X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fdbuf.c;h=46f42dd7c934a1b6d0781dbdc8fc0ef639cc21c9;hb=b4f7f105275d996fbcb6abd65760307d2153a89b;hp=1ebac4ee6da7c4c14196b0167884d9b46e2f7703;hpb=c28b227942b421ebdc03c9df9a012642fb517223;p=zfs.git diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 1ebac4e..46f42dd 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -20,6 +20,8 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -259,7 +261,7 @@ dbuf_is_metadata(dmu_buf_impl_t *db) boolean_t is_metadata; DB_DNODE_ENTER(db); - is_metadata = dmu_ot[DB_DNODE(db)->dn_type].ot_metadata; + is_metadata = DMU_OT_IS_METADATA(DB_DNODE(db)->dn_type); DB_DNODE_EXIT(db); return (is_metadata); @@ -297,7 +299,7 @@ retry: #if defined(_KERNEL) && defined(HAVE_SPL) /* Large allocations which do not require contiguous pages * should be using vmem_alloc() in the linux kernel */ - h->hash_table = vmem_zalloc(hsize * sizeof (void *), KM_SLEEP); + h->hash_table = vmem_zalloc(hsize * sizeof (void *), KM_PUSHPAGE); #else h->hash_table = kmem_zalloc(hsize * sizeof (void *), KM_NOSLEEP); #endif @@ -371,7 +373,7 @@ dbuf_verify(dmu_buf_impl_t *db) } else if (db->db_blkid == DMU_SPILL_BLKID) { ASSERT(dn != NULL); ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen); - ASSERT3U(db->db.db_offset, ==, 0); + ASSERT0(db->db.db_offset); } else { ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size); } @@ -557,7 +559,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags) spa_t *spa; zbookmark_t zb; uint32_t aflags = ARC_NOWAIT; - arc_buf_t *pbuf; DB_DNODE_ENTER(db); dn = DB_DNODE(db); @@ -619,14 +620,8 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags) db->db.db_object, db->db_level, db->db_blkid); dbuf_add_ref(db, NULL); - /* ZIO_FLAG_CANFAIL callers have to check the parent zio's error */ - if (db->db_parent) - pbuf = db->db_parent->db_buf; - else - pbuf = db->db_objset->os_phys_buf; - - (void) dsl_read(zio, spa, db->db_blkptr, pbuf, + (void) arc_read(zio, spa, db->db_blkptr, dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, (*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED, &aflags, &zb); @@ -1024,7 +1019,6 @@ void dbuf_release_bp(dmu_buf_impl_t *db) { objset_t *os; - zbookmark_t zb; DB_GET_OBJSET(&os, db); ASSERT(dsl_pool_sync_context(dmu_objset_pool(os))); @@ -1032,13 +1026,7 @@ dbuf_release_bp(dmu_buf_impl_t *db) list_link_active(&os->os_dsl_dataset->ds_synced_link)); ASSERT(db->db_parent == NULL || arc_released(db->db_parent->db_buf)); - zb.zb_objset = os->os_dsl_dataset ? - os->os_dsl_dataset->ds_object : 0; - zb.zb_object = db->db.db_object; - zb.zb_level = db->db_level; - zb.zb_blkid = db->db_blkid; - (void) arc_release_bp(db->db_buf, db, - db->db_blkptr, os->os_spa, &zb); + (void) arc_release(db->db_buf, db); } dbuf_dirty_record_t * @@ -1095,7 +1083,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) dn->dn_dirtyctx = (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN); ASSERT(dn->dn_dirtyctx_firstset == NULL); - dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP); + dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_PUSHPAGE); } mutex_exit(&dn->dn_mtx); @@ -1172,7 +1160,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) * to make a copy of it so that the changes we make in this * transaction group won't leak out when we sync the older txg. */ - dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP); + dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_PUSHPAGE); list_link_init(&dr->dr_dirty_node); if (db->db_level == 0) { void *data_old = db->db_buf; @@ -1347,13 +1335,17 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) * it, since one of the current holders may be in the * middle of an update. Note that users of dbuf_undirty() * should not place a hold on the dbuf before the call. + * Also note: we can get here with a spill block, so + * test for that similar to how dbuf_dirty does. */ if (refcount_count(&db->db_holds) > db->db_dirtycnt) { mutex_exit(&db->db_mtx); /* Make sure we don't toss this buffer at sync phase */ - mutex_enter(&dn->dn_mtx); - dnode_clear_range(dn, db->db_blkid, 1, tx); - mutex_exit(&dn->dn_mtx); + if (db->db_blkid != DMU_SPILL_BLKID) { + mutex_enter(&dn->dn_mtx); + dnode_clear_range(dn, db->db_blkid, 1, tx); + mutex_exit(&dn->dn_mtx); + } DB_DNODE_EXIT(db); return (0); } @@ -1366,11 +1358,18 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) *drp = dr->dr_next; + /* + * Note that there are three places in dbuf_dirty() + * where this dirty record may be put on a list. + * Make sure to do a list_remove corresponding to + * every one of those list_insert calls. + */ if (dr->dr_parent) { mutex_enter(&dr->dr_parent->dt.di.dr_mtx); list_remove(&dr->dr_parent->dt.di.dr_children, dr); mutex_exit(&dr->dr_parent->dt.di.dr_mtx); - } else if (db->db_level+1 == dn->dn_nlevels) { + } else if (db->db_blkid == DMU_SPILL_BLKID || + db->db_level+1 == dn->dn_nlevels) { ASSERT(db->db_blkptr == NULL || db->db_parent == dn->dn_dbuf); mutex_enter(&dn->dn_mtx); list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr); @@ -1707,7 +1706,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); ASSERT(dn->dn_type != DMU_OT_NONE); - db = kmem_cache_alloc(dbuf_cache, KM_SLEEP); + db = kmem_cache_alloc(dbuf_cache, KM_PUSHPAGE); db->db_objset = os; db->db.db_object = dn->dn_object; @@ -1873,7 +1872,6 @@ dbuf_prefetch(dnode_t *dn, uint64_t blkid) if (bp && !BP_IS_HOLE(bp)) { int priority = dn->dn_type == DMU_OT_DDT_ZAP ? ZIO_PRIORITY_DDT_PREFETCH : ZIO_PRIORITY_ASYNC_READ; - arc_buf_t *pbuf; dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; zbookmark_t zb; @@ -1881,13 +1879,8 @@ dbuf_prefetch(dnode_t *dn, uint64_t blkid) SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, dn->dn_object, 0, blkid); - if (db) - pbuf = db->db_buf; - else - pbuf = dn->dn_objset->os_phys_buf; - - (void) dsl_read(NULL, dn->dn_objset->os_spa, - bp, pbuf, NULL, NULL, priority, + (void) arc_read(NULL, dn->dn_objset->os_spa, + bp, NULL, NULL, priority, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &aflags, &zb); } @@ -2007,7 +2000,7 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse, int error; dh = kmem_zalloc(sizeof(struct dbuf_hold_impl_data) * - DBUF_HOLD_IMPL_MAX_DEPTH, KM_SLEEP); + DBUF_HOLD_IMPL_MAX_DEPTH, KM_PUSHPAGE); __dbuf_hold_impl_init(dh, dn, level, blkid, fail_sparse, tag, dbp, 0); error = __dbuf_hold_impl(dh); @@ -2176,7 +2169,24 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) dbuf_evict(db); } else { VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0); - if (!DBUF_IS_CACHEABLE(db)) + + /* + * A dbuf will be eligible for eviction if either the + * 'primarycache' property is set or a duplicate + * copy of this buffer is already cached in the arc. + * + * In the case of the 'primarycache' a buffer + * is considered for eviction if it matches the + * criteria set in the property. + * + * To decide if our buffer is considered a + * duplicate, we must call into the arc to determine + * if multiple buffers are referencing the same + * block on-disk. If so, then we simply evict + * ourselves. + */ + if (!DBUF_IS_CACHEABLE(db) || + arc_buf_eviction_needed(db->db_buf)) dbuf_clear(db); else mutex_exit(&db->db_mtx); @@ -2403,7 +2413,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) dbuf_dirty_record_t **drp; ASSERT(*datap != NULL); - ASSERT3U(db->db_level, ==, 0); + ASSERT0(db->db_level); ASSERT3U(dn->dn_phys->dn_bonuslen, <=, DN_MAX_BONUSLEN); bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen); DB_DNODE_EXIT(db); @@ -2606,7 +2616,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) uint64_t txg = zio->io_txg; dbuf_dirty_record_t **drp, *dr; - ASSERT3U(zio->io_error, ==, 0); + ASSERT0(zio->io_error); ASSERT(db->db_blkptr == bp); if (zio->io_flags & ZIO_FLAG_IO_REWRITE) { @@ -2819,6 +2829,39 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) } #if defined(_KERNEL) && defined(HAVE_SPL) -EXPORT_SYMBOL(dmu_buf_rele); +EXPORT_SYMBOL(dbuf_find); +EXPORT_SYMBOL(dbuf_is_metadata); +EXPORT_SYMBOL(dbuf_evict); +EXPORT_SYMBOL(dbuf_loan_arcbuf); +EXPORT_SYMBOL(dbuf_whichblock); +EXPORT_SYMBOL(dbuf_read); +EXPORT_SYMBOL(dbuf_unoverride); +EXPORT_SYMBOL(dbuf_free_range); +EXPORT_SYMBOL(dbuf_new_size); +EXPORT_SYMBOL(dbuf_release_bp); +EXPORT_SYMBOL(dbuf_dirty); EXPORT_SYMBOL(dmu_buf_will_dirty); +EXPORT_SYMBOL(dmu_buf_will_not_fill); +EXPORT_SYMBOL(dmu_buf_will_fill); +EXPORT_SYMBOL(dmu_buf_fill_done); +EXPORT_SYMBOL(dmu_buf_rele); +EXPORT_SYMBOL(dbuf_assign_arcbuf); +EXPORT_SYMBOL(dbuf_clear); +EXPORT_SYMBOL(dbuf_prefetch); +EXPORT_SYMBOL(dbuf_hold_impl); +EXPORT_SYMBOL(dbuf_hold); +EXPORT_SYMBOL(dbuf_hold_level); +EXPORT_SYMBOL(dbuf_create_bonus); +EXPORT_SYMBOL(dbuf_spill_set_blksz); +EXPORT_SYMBOL(dbuf_rm_spill); +EXPORT_SYMBOL(dbuf_add_ref); +EXPORT_SYMBOL(dbuf_rele); +EXPORT_SYMBOL(dbuf_rele_and_unlock); +EXPORT_SYMBOL(dbuf_refcount); +EXPORT_SYMBOL(dbuf_sync_list); +EXPORT_SYMBOL(dmu_buf_set_user); +EXPORT_SYMBOL(dmu_buf_set_user_ie); +EXPORT_SYMBOL(dmu_buf_update_user); +EXPORT_SYMBOL(dmu_buf_get_user); +EXPORT_SYMBOL(dmu_buf_freeable); #endif