X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fdbuf.c;h=83a8035dd98a32eafd61b318ac1dca99a5c1c0d0;hb=389cf730cedd42dd1ef653e9358635c114e458d5;hp=1ebac4ee6da7c4c14196b0167884d9b46e2f7703;hpb=c28b227942b421ebdc03c9df9a012642fb517223;p=zfs.git diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 1ebac4e..83a8035 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -20,6 +20,8 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include @@ -259,7 +261,7 @@ dbuf_is_metadata(dmu_buf_impl_t *db) boolean_t is_metadata; DB_DNODE_ENTER(db); - is_metadata = dmu_ot[DB_DNODE(db)->dn_type].ot_metadata; + is_metadata = DMU_OT_IS_METADATA(DB_DNODE(db)->dn_type); DB_DNODE_EXIT(db); return (is_metadata); @@ -297,7 +299,7 @@ retry: #if defined(_KERNEL) && defined(HAVE_SPL) /* Large allocations which do not require contiguous pages * should be using vmem_alloc() in the linux kernel */ - h->hash_table = vmem_zalloc(hsize * sizeof (void *), KM_SLEEP); + h->hash_table = vmem_zalloc(hsize * sizeof (void *), KM_PUSHPAGE); #else h->hash_table = kmem_zalloc(hsize * sizeof (void *), KM_NOSLEEP); #endif @@ -371,7 +373,7 @@ dbuf_verify(dmu_buf_impl_t *db) } else if (db->db_blkid == DMU_SPILL_BLKID) { ASSERT(dn != NULL); ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen); - ASSERT3U(db->db.db_offset, ==, 0); + ASSERT0(db->db.db_offset); } else { ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size); } @@ -1095,7 +1097,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) dn->dn_dirtyctx = (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN); ASSERT(dn->dn_dirtyctx_firstset == NULL); - dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP); + dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_PUSHPAGE); } mutex_exit(&dn->dn_mtx); @@ -1172,7 +1174,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) * to make a copy of it so that the changes we make in this * transaction group won't leak out when we sync the older txg. */ - dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP); + dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_PUSHPAGE); list_link_init(&dr->dr_dirty_node); if (db->db_level == 0) { void *data_old = db->db_buf; @@ -1347,13 +1349,17 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) * it, since one of the current holders may be in the * middle of an update. Note that users of dbuf_undirty() * should not place a hold on the dbuf before the call. + * Also note: we can get here with a spill block, so + * test for that similar to how dbuf_dirty does. */ if (refcount_count(&db->db_holds) > db->db_dirtycnt) { mutex_exit(&db->db_mtx); /* Make sure we don't toss this buffer at sync phase */ - mutex_enter(&dn->dn_mtx); - dnode_clear_range(dn, db->db_blkid, 1, tx); - mutex_exit(&dn->dn_mtx); + if (db->db_blkid != DMU_SPILL_BLKID) { + mutex_enter(&dn->dn_mtx); + dnode_clear_range(dn, db->db_blkid, 1, tx); + mutex_exit(&dn->dn_mtx); + } DB_DNODE_EXIT(db); return (0); } @@ -1366,11 +1372,18 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) *drp = dr->dr_next; + /* + * Note that there are three places in dbuf_dirty() + * where this dirty record may be put on a list. + * Make sure to do a list_remove corresponding to + * every one of those list_insert calls. + */ if (dr->dr_parent) { mutex_enter(&dr->dr_parent->dt.di.dr_mtx); list_remove(&dr->dr_parent->dt.di.dr_children, dr); mutex_exit(&dr->dr_parent->dt.di.dr_mtx); - } else if (db->db_level+1 == dn->dn_nlevels) { + } else if (db->db_blkid == DMU_SPILL_BLKID || + db->db_level+1 == dn->dn_nlevels) { ASSERT(db->db_blkptr == NULL || db->db_parent == dn->dn_dbuf); mutex_enter(&dn->dn_mtx); list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr); @@ -1707,7 +1720,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); ASSERT(dn->dn_type != DMU_OT_NONE); - db = kmem_cache_alloc(dbuf_cache, KM_SLEEP); + db = kmem_cache_alloc(dbuf_cache, KM_PUSHPAGE); db->db_objset = os; db->db.db_object = dn->dn_object; @@ -2007,7 +2020,7 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse, int error; dh = kmem_zalloc(sizeof(struct dbuf_hold_impl_data) * - DBUF_HOLD_IMPL_MAX_DEPTH, KM_SLEEP); + DBUF_HOLD_IMPL_MAX_DEPTH, KM_PUSHPAGE); __dbuf_hold_impl_init(dh, dn, level, blkid, fail_sparse, tag, dbp, 0); error = __dbuf_hold_impl(dh); @@ -2176,7 +2189,24 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) dbuf_evict(db); } else { VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0); - if (!DBUF_IS_CACHEABLE(db)) + + /* + * A dbuf will be eligible for eviction if either the + * 'primarycache' property is set or a duplicate + * copy of this buffer is already cached in the arc. + * + * In the case of the 'primarycache' a buffer + * is considered for eviction if it matches the + * criteria set in the property. + * + * To decide if our buffer is considered a + * duplicate, we must call into the arc to determine + * if multiple buffers are referencing the same + * block on-disk. If so, then we simply evict + * ourselves. + */ + if (!DBUF_IS_CACHEABLE(db) || + arc_buf_eviction_needed(db->db_buf)) dbuf_clear(db); else mutex_exit(&db->db_mtx); @@ -2403,7 +2433,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) dbuf_dirty_record_t **drp; ASSERT(*datap != NULL); - ASSERT3U(db->db_level, ==, 0); + ASSERT0(db->db_level); ASSERT3U(dn->dn_phys->dn_bonuslen, <=, DN_MAX_BONUSLEN); bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen); DB_DNODE_EXIT(db); @@ -2606,7 +2636,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) uint64_t txg = zio->io_txg; dbuf_dirty_record_t **drp, *dr; - ASSERT3U(zio->io_error, ==, 0); + ASSERT0(zio->io_error); ASSERT(db->db_blkptr == bp); if (zio->io_flags & ZIO_FLAG_IO_REWRITE) { @@ -2819,6 +2849,39 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) } #if defined(_KERNEL) && defined(HAVE_SPL) -EXPORT_SYMBOL(dmu_buf_rele); +EXPORT_SYMBOL(dbuf_find); +EXPORT_SYMBOL(dbuf_is_metadata); +EXPORT_SYMBOL(dbuf_evict); +EXPORT_SYMBOL(dbuf_loan_arcbuf); +EXPORT_SYMBOL(dbuf_whichblock); +EXPORT_SYMBOL(dbuf_read); +EXPORT_SYMBOL(dbuf_unoverride); +EXPORT_SYMBOL(dbuf_free_range); +EXPORT_SYMBOL(dbuf_new_size); +EXPORT_SYMBOL(dbuf_release_bp); +EXPORT_SYMBOL(dbuf_dirty); EXPORT_SYMBOL(dmu_buf_will_dirty); +EXPORT_SYMBOL(dmu_buf_will_not_fill); +EXPORT_SYMBOL(dmu_buf_will_fill); +EXPORT_SYMBOL(dmu_buf_fill_done); +EXPORT_SYMBOL(dmu_buf_rele); +EXPORT_SYMBOL(dbuf_assign_arcbuf); +EXPORT_SYMBOL(dbuf_clear); +EXPORT_SYMBOL(dbuf_prefetch); +EXPORT_SYMBOL(dbuf_hold_impl); +EXPORT_SYMBOL(dbuf_hold); +EXPORT_SYMBOL(dbuf_hold_level); +EXPORT_SYMBOL(dbuf_create_bonus); +EXPORT_SYMBOL(dbuf_spill_set_blksz); +EXPORT_SYMBOL(dbuf_rm_spill); +EXPORT_SYMBOL(dbuf_add_ref); +EXPORT_SYMBOL(dbuf_rele); +EXPORT_SYMBOL(dbuf_rele_and_unlock); +EXPORT_SYMBOL(dbuf_refcount); +EXPORT_SYMBOL(dbuf_sync_list); +EXPORT_SYMBOL(dmu_buf_set_user); +EXPORT_SYMBOL(dmu_buf_set_user_ie); +EXPORT_SYMBOL(dmu_buf_update_user); +EXPORT_SYMBOL(dmu_buf_get_user); +EXPORT_SYMBOL(dmu_buf_freeable); #endif