*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
+#include <sys/arc.h>
#include <sys/dmu.h>
#include <sys/dmu_impl.h>
#include <sys/dbuf.h>
boolean_t is_metadata;
DB_DNODE_ENTER(db);
- is_metadata = dmu_ot[DB_DNODE(db)->dn_type].ot_metadata;
+ is_metadata = DMU_OT_IS_METADATA(DB_DNODE(db)->dn_type);
DB_DNODE_EXIT(db);
return (is_metadata);
retry:
h->hash_table_mask = hsize - 1;
+#if defined(_KERNEL) && defined(HAVE_SPL)
+ /* Large allocations which do not require contiguous pages
+ * should be using vmem_alloc() in the linux kernel */
+ h->hash_table = vmem_zalloc(hsize * sizeof (void *), KM_PUSHPAGE);
+#else
h->hash_table = kmem_zalloc(hsize * sizeof (void *), KM_NOSLEEP);
+#endif
if (h->hash_table == NULL) {
/* XXX - we should really return an error instead of assert */
ASSERT(hsize > (1ULL << 10));
for (i = 0; i < DBUF_MUTEXES; i++)
mutex_destroy(&h->hash_mutexes[i]);
+#if defined(_KERNEL) && defined(HAVE_SPL)
+ /* Large allocations which do not require contiguous pages
+ * should be using vmem_free() in the linux kernel */
+ vmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *));
+#else
kmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *));
+#endif
kmem_cache_destroy(dbuf_cache);
}
} else if (db->db_blkid == DMU_SPILL_BLKID) {
ASSERT(dn != NULL);
ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
- ASSERT3U(db->db.db_offset, ==, 0);
+ ASSERT0(db->db.db_offset);
} else {
ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size);
}
spa_t *spa;
zbookmark_t zb;
uint32_t aflags = ARC_NOWAIT;
- arc_buf_t *pbuf;
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
db->db.db_object, db->db_level, db->db_blkid);
dbuf_add_ref(db, NULL);
- /* ZIO_FLAG_CANFAIL callers have to check the parent zio's error */
- if (db->db_parent)
- pbuf = db->db_parent->db_buf;
- else
- pbuf = db->db_objset->os_phys_buf;
-
- (void) dsl_read(zio, spa, db->db_blkptr, pbuf,
+ (void) arc_read(zio, spa, db->db_blkptr,
dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
(*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
&aflags, &zb);
dbuf_release_bp(dmu_buf_impl_t *db)
{
objset_t *os;
- zbookmark_t zb;
DB_GET_OBJSET(&os, db);
ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
list_link_active(&os->os_dsl_dataset->ds_synced_link));
ASSERT(db->db_parent == NULL || arc_released(db->db_parent->db_buf));
- zb.zb_objset = os->os_dsl_dataset ?
- os->os_dsl_dataset->ds_object : 0;
- zb.zb_object = db->db.db_object;
- zb.zb_level = db->db_level;
- zb.zb_blkid = db->db_blkid;
- (void) arc_release_bp(db->db_buf, db,
- db->db_blkptr, os->os_spa, &zb);
+ (void) arc_release(db->db_buf, db);
}
dbuf_dirty_record_t *
dn->dn_dirtyctx =
(dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN);
ASSERT(dn->dn_dirtyctx_firstset == NULL);
- dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP);
+ dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_PUSHPAGE);
}
mutex_exit(&dn->dn_mtx);
* to make a copy of it so that the changes we make in this
* transaction group won't leak out when we sync the older txg.
*/
- dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP);
+ dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_PUSHPAGE);
list_link_init(&dr->dr_dirty_node);
if (db->db_level == 0) {
void *data_old = db->db_buf;
* it, since one of the current holders may be in the
* middle of an update. Note that users of dbuf_undirty()
* should not place a hold on the dbuf before the call.
+ * Also note: we can get here with a spill block, so
+ * test for that similar to how dbuf_dirty does.
*/
if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
mutex_exit(&db->db_mtx);
/* Make sure we don't toss this buffer at sync phase */
- mutex_enter(&dn->dn_mtx);
- dnode_clear_range(dn, db->db_blkid, 1, tx);
- mutex_exit(&dn->dn_mtx);
+ if (db->db_blkid != DMU_SPILL_BLKID) {
+ mutex_enter(&dn->dn_mtx);
+ dnode_clear_range(dn, db->db_blkid, 1, tx);
+ mutex_exit(&dn->dn_mtx);
+ }
DB_DNODE_EXIT(db);
return (0);
}
*drp = dr->dr_next;
+ /*
+ * Note that there are three places in dbuf_dirty()
+ * where this dirty record may be put on a list.
+ * Make sure to do a list_remove corresponding to
+ * every one of those list_insert calls.
+ */
if (dr->dr_parent) {
mutex_enter(&dr->dr_parent->dt.di.dr_mtx);
list_remove(&dr->dr_parent->dt.di.dr_children, dr);
mutex_exit(&dr->dr_parent->dt.di.dr_mtx);
- } else if (db->db_level+1 == dn->dn_nlevels) {
+ } else if (db->db_blkid == DMU_SPILL_BLKID ||
+ db->db_level+1 == dn->dn_nlevels) {
ASSERT(db->db_blkptr == NULL || db->db_parent == dn->dn_dbuf);
mutex_enter(&dn->dn_mtx);
list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr);
ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
ASSERT(dn->dn_type != DMU_OT_NONE);
- db = kmem_cache_alloc(dbuf_cache, KM_SLEEP);
+ db = kmem_cache_alloc(dbuf_cache, KM_PUSHPAGE);
db->db_objset = os;
db->db.db_object = dn->dn_object;
if (bp && !BP_IS_HOLE(bp)) {
int priority = dn->dn_type == DMU_OT_DDT_ZAP ?
ZIO_PRIORITY_DDT_PREFETCH : ZIO_PRIORITY_ASYNC_READ;
- arc_buf_t *pbuf;
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
zbookmark_t zb;
SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
dn->dn_object, 0, blkid);
- if (db)
- pbuf = db->db_buf;
- else
- pbuf = dn->dn_objset->os_phys_buf;
-
- (void) dsl_read(NULL, dn->dn_objset->os_spa,
- bp, pbuf, NULL, NULL, priority,
+ (void) arc_read(NULL, dn->dn_objset->os_spa,
+ bp, NULL, NULL, priority,
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
&aflags, &zb);
}
int error;
dh = kmem_zalloc(sizeof(struct dbuf_hold_impl_data) *
- DBUF_HOLD_IMPL_MAX_DEPTH, KM_SLEEP);
+ DBUF_HOLD_IMPL_MAX_DEPTH, KM_PUSHPAGE);
__dbuf_hold_impl_init(dh, dn, level, blkid, fail_sparse, tag, dbp, 0);
error = __dbuf_hold_impl(dh);
dbuf_evict(db);
} else {
VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0);
- if (!DBUF_IS_CACHEABLE(db))
+
+ /*
+ * A dbuf will be eligible for eviction if either the
+ * 'primarycache' property is set or a duplicate
+ * copy of this buffer is already cached in the arc.
+ *
+ * In the case of the 'primarycache' a buffer
+ * is considered for eviction if it matches the
+ * criteria set in the property.
+ *
+ * To decide if our buffer is considered a
+ * duplicate, we must call into the arc to determine
+ * if multiple buffers are referencing the same
+ * block on-disk. If so, then we simply evict
+ * ourselves.
+ */
+ if (!DBUF_IS_CACHEABLE(db) ||
+ arc_buf_eviction_needed(db->db_buf))
dbuf_clear(db);
else
mutex_exit(&db->db_mtx);
dbuf_dirty_record_t **drp;
ASSERT(*datap != NULL);
- ASSERT3U(db->db_level, ==, 0);
+ ASSERT0(db->db_level);
ASSERT3U(dn->dn_phys->dn_bonuslen, <=, DN_MAX_BONUSLEN);
bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen);
DB_DNODE_EXIT(db);
uint64_t txg = zio->io_txg;
dbuf_dirty_record_t **drp, *dr;
- ASSERT3U(zio->io_error, ==, 0);
+ ASSERT0(zio->io_error);
ASSERT(db->db_blkptr == bp);
if (zio->io_flags & ZIO_FLAG_IO_REWRITE) {
ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
}
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(dbuf_find);
+EXPORT_SYMBOL(dbuf_is_metadata);
+EXPORT_SYMBOL(dbuf_evict);
+EXPORT_SYMBOL(dbuf_loan_arcbuf);
+EXPORT_SYMBOL(dbuf_whichblock);
+EXPORT_SYMBOL(dbuf_read);
+EXPORT_SYMBOL(dbuf_unoverride);
+EXPORT_SYMBOL(dbuf_free_range);
+EXPORT_SYMBOL(dbuf_new_size);
+EXPORT_SYMBOL(dbuf_release_bp);
+EXPORT_SYMBOL(dbuf_dirty);
+EXPORT_SYMBOL(dmu_buf_will_dirty);
+EXPORT_SYMBOL(dmu_buf_will_not_fill);
+EXPORT_SYMBOL(dmu_buf_will_fill);
+EXPORT_SYMBOL(dmu_buf_fill_done);
+EXPORT_SYMBOL(dmu_buf_rele);
+EXPORT_SYMBOL(dbuf_assign_arcbuf);
+EXPORT_SYMBOL(dbuf_clear);
+EXPORT_SYMBOL(dbuf_prefetch);
+EXPORT_SYMBOL(dbuf_hold_impl);
+EXPORT_SYMBOL(dbuf_hold);
+EXPORT_SYMBOL(dbuf_hold_level);
+EXPORT_SYMBOL(dbuf_create_bonus);
+EXPORT_SYMBOL(dbuf_spill_set_blksz);
+EXPORT_SYMBOL(dbuf_rm_spill);
+EXPORT_SYMBOL(dbuf_add_ref);
+EXPORT_SYMBOL(dbuf_rele);
+EXPORT_SYMBOL(dbuf_rele_and_unlock);
+EXPORT_SYMBOL(dbuf_refcount);
+EXPORT_SYMBOL(dbuf_sync_list);
+EXPORT_SYMBOL(dmu_buf_set_user);
+EXPORT_SYMBOL(dmu_buf_set_user_ie);
+EXPORT_SYMBOL(dmu_buf_update_user);
+EXPORT_SYMBOL(dmu_buf_get_user);
+EXPORT_SYMBOL(dmu_buf_freeable);
+#endif