X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fdbuf.c;h=1b6f242dd7aa261761966a73ff0ccd25b9d9d4a3;hb=9babb37438b58e77bad04e820d5702e15b79e6a6;hp=d04610317a4ea42fdcf1becc7584513d09969ca3;hpb=172bb4bd5e4afef721dd4d2972d8680d983f144b;p=zfs.git diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index d046103..1b6f242 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -329,7 +329,7 @@ dbuf_verify(dmu_buf_impl_t *db) if (db->db_parent == dn->dn_dbuf) { /* db is pointed to by the dnode */ /* ASSERT3U(db->db_blkid, <, dn->dn_nblkptr); */ - if (db->db.db_object == DMU_META_DNODE_OBJECT) + if (DMU_OBJECT_IS_SPECIAL(db->db.db_object)) ASSERT(db->db_parent == NULL); else ASSERT(db->db_parent != NULL); @@ -465,15 +465,15 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags) ASSERT(db->db_buf == NULL); if (db->db_blkid == DB_BONUS_BLKID) { - int bonuslen = dn->dn_bonuslen; + int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen); ASSERT3U(bonuslen, <=, db->db.db_size); db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN); - arc_space_consume(DN_MAX_BONUSLEN); + arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); if (bonuslen < DN_MAX_BONUSLEN) bzero(db->db.db_data, DN_MAX_BONUSLEN); - bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, - bonuslen); + if (bonuslen) + bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, bonuslen); dbuf_update_data(db); db->db_state = DB_CACHED; mutex_exit(&db->db_mtx); @@ -665,7 +665,7 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg) if (db->db_blkid == DB_BONUS_BLKID) { /* Note that the data bufs here are zio_bufs */ dr->dt.dl.dr_data = zio_buf_alloc(DN_MAX_BONUSLEN); - arc_space_consume(DN_MAX_BONUSLEN); + arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); bcopy(db->db.db_data, dr->dt.dl.dr_data, DN_MAX_BONUSLEN); } else if (refcount_count(&db->db_holds) > db->db_dirtycnt) { int size = db->db.db_size; @@ -908,15 +908,11 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) * Shouldn't dirty a regular buffer in syncing context. Private * objects may be dirtied in syncing context, but only if they * were already pre-dirtied in open context. - * XXX We may want to prohibit dirtying in syncing context even - * if they did pre-dirty. */ ASSERT(!dmu_tx_is_syncing(tx) || BP_IS_HOLE(dn->dn_objset->os_rootbp) || - dn->dn_object == DMU_META_DNODE_OBJECT || - dn->dn_objset->os_dsl_dataset == NULL || - dsl_dir_is_private(dn->dn_objset->os_dsl_dataset->ds_dir)); - + DMU_OBJECT_IS_SPECIAL(dn->dn_object) || + dn->dn_objset->os_dsl_dataset == NULL); /* * We make this assert for private objects as well, but after we * check if we're already dirty. They are allowed to re-dirty @@ -975,7 +971,8 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) /* * Only valid if not already dirty. */ - ASSERT(dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx == + ASSERT(dn->dn_object == 0 || + dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx == (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN)); ASSERT3U(dn->dn_nlevels, >, db->db_level); @@ -987,15 +984,13 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) /* * We should only be dirtying in syncing context if it's the - * mos, a spa os, or we're initializing the os. However, we are - * allowed to dirty in syncing context provided we already - * dirtied it in open context. Hence we must make this - * assertion only if we're not already dirty. + * mos or we're initializing the os or it's a special object. + * However, we are allowed to dirty in syncing context provided + * we already dirtied it in open context. Hence we must make + * this assertion only if we're not already dirty. */ - ASSERT(!dmu_tx_is_syncing(tx) || - os->os_dsl_dataset == NULL || - !dsl_dir_is_private(os->os_dsl_dataset->ds_dir) || - !BP_IS_HOLE(os->os_rootbp)); + ASSERT(!dmu_tx_is_syncing(tx) || DMU_OBJECT_IS_SPECIAL(dn->dn_object) || + os->os_dsl_dataset == NULL || BP_IS_HOLE(os->os_rootbp)); ASSERT(db->db.db_size != 0); dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); @@ -1312,6 +1307,68 @@ dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx) } /* + * Directly assign a provided arc buf to a given dbuf if it's not referenced + * by anybody except our caller. Otherwise copy arcbuf's contents to dbuf. + */ +void +dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) +{ + ASSERT(!refcount_is_zero(&db->db_holds)); + ASSERT(db->db_dnode->dn_object != DMU_META_DNODE_OBJECT); + ASSERT(db->db_blkid != DB_BONUS_BLKID); + ASSERT(db->db_level == 0); + ASSERT(DBUF_GET_BUFC_TYPE(db) == ARC_BUFC_DATA); + ASSERT(buf != NULL); + ASSERT(arc_buf_size(buf) == db->db.db_size); + ASSERT(tx->tx_txg != 0); + + arc_return_buf(buf, db); + ASSERT(arc_released(buf)); + + mutex_enter(&db->db_mtx); + + while (db->db_state == DB_READ || db->db_state == DB_FILL) + cv_wait(&db->db_changed, &db->db_mtx); + + ASSERT(db->db_state == DB_CACHED || db->db_state == DB_UNCACHED); + + if (db->db_state == DB_CACHED && + refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) { + mutex_exit(&db->db_mtx); + (void) dbuf_dirty(db, tx); + bcopy(buf->b_data, db->db.db_data, db->db.db_size); + VERIFY(arc_buf_remove_ref(buf, db) == 1); + return; + } + + if (db->db_state == DB_CACHED) { + dbuf_dirty_record_t *dr = db->db_last_dirty; + + ASSERT(db->db_buf != NULL); + if (dr != NULL && dr->dr_txg == tx->tx_txg) { + ASSERT(dr->dt.dl.dr_data == db->db_buf); + if (!arc_released(db->db_buf)) { + ASSERT(dr->dt.dl.dr_override_state == + DR_OVERRIDDEN); + arc_release(db->db_buf, db); + } + dr->dt.dl.dr_data = buf; + VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1); + } else if (dr == NULL || dr->dt.dl.dr_data != db->db_buf) { + arc_release(db->db_buf, db); + VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1); + } + db->db_buf = NULL; + } + ASSERT(db->db_buf == NULL); + dbuf_set_data(db, buf); + db->db_state = DB_FILL; + mutex_exit(&db->db_mtx); + (void) dbuf_dirty(db, tx); + dbuf_fill_done(db, tx); +} + +/* * "Clear" the contents of this dbuf. This will mark the dbuf * EVICTING and clear *most* of its references. Unfortunetely, * when we are not holding the dn_dbufs_mtx, we can't clear the @@ -1341,7 +1398,7 @@ dbuf_clear(dmu_buf_impl_t *db) ASSERT(db->db.db_data != NULL); if (db->db_blkid == DB_BONUS_BLKID) { zio_buf_free(db->db.db_data, DN_MAX_BONUSLEN); - arc_space_return(DN_MAX_BONUSLEN); + arc_space_return(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); } db->db.db_data = NULL; db->db_state = DB_UNCACHED; @@ -1463,7 +1520,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, db->db.db_offset = DB_BONUS_BLKID; db->db_state = DB_UNCACHED; /* the bonus dbuf is not placed in the hash table */ - arc_space_consume(sizeof (dmu_buf_impl_t)); + arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER); return (db); } else { int blocksize = @@ -1490,7 +1547,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, list_insert_head(&dn->dn_dbufs, db); db->db_state = DB_UNCACHED; mutex_exit(&dn->dn_dbufs_mtx); - arc_space_consume(sizeof (dmu_buf_impl_t)); + arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER); if (parent && parent != dn->dn_dbuf) dbuf_add_ref(parent, db); @@ -1559,7 +1616,7 @@ dbuf_destroy(dmu_buf_impl_t *db) ASSERT(db->db_data_pending == NULL); kmem_cache_free(dbuf_cache, db); - arc_space_return(sizeof (dmu_buf_impl_t)); + arc_space_return(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER); } void @@ -1855,6 +1912,19 @@ dmu_buf_get_user(dmu_buf_t *db_fake) return (db->db_user_ptr); } +boolean_t +dmu_buf_freeable(dmu_buf_t *dbuf) +{ + boolean_t res = B_FALSE; + dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf; + + if (db->db_blkptr) + res = dsl_dataset_block_freeable(db->db_objset->os_dsl_dataset, + db->db_blkptr->blk_birth); + + return (res); +} + static void dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db) { @@ -1943,7 +2013,6 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) dnode_t *dn = db->db_dnode; objset_impl_t *os = dn->dn_objset; uint64_t txg = tx->tx_txg; - int blksz; ASSERT(dmu_tx_is_syncing(tx)); @@ -1980,7 +2049,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen); if (*datap != db->db.db_data) { zio_buf_free(*datap, DN_MAX_BONUSLEN); - arc_space_return(DN_MAX_BONUSLEN); + arc_space_return(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); } db->db_data_pending = NULL; drp = &db->db_last_dirty; @@ -2049,32 +2118,25 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) return; } - if (db->db_state != DB_NOFILL) { - blksz = arc_buf_size(*datap); - - if (dn->dn_object != DMU_META_DNODE_OBJECT) { - /* - * If this buffer is currently "in use" (i.e., there - * are active holds and db_data still references it), - * then make a copy before we start the write so that - * any modifications from the open txg will not leak - * into this write. - * - * NOTE: this copy does not need to be made for - * objects only modified in the syncing context (e.g. - * DNONE_DNODE blocks). - */ - if (refcount_count(&db->db_holds) > 1 && - *datap == db->db_buf) { - arc_buf_contents_t type = - DBUF_GET_BUFC_TYPE(db); - *datap = - arc_buf_alloc(os->os_spa, blksz, db, type); - bcopy(db->db.db_data, (*datap)->b_data, blksz); - } - } - - ASSERT(*datap != NULL); + if (db->db_state != DB_NOFILL && + dn->dn_object != DMU_META_DNODE_OBJECT && + refcount_count(&db->db_holds) > 1 && + *datap == db->db_buf) { + /* + * If this buffer is currently "in use" (i.e., there + * are active holds and db_data still references it), + * then make a copy before we start the write so that + * any modifications from the open txg will not leak + * into this write. + * + * NOTE: this copy does not need to be made for + * objects only modified in the syncing context (e.g. + * DNONE_DNODE blocks). + */ + int blksz = arc_buf_size(*datap); + arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); + *datap = arc_buf_alloc(os->os_spa, blksz, db, type); + bcopy(db->db.db_data, (*datap)->b_data, blksz); } db->db_data_pending = dr;