X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fbpobj.c;h=1920da4408c8c5c4de83666d56045f213bc20a63;hb=refs%2Fheads%2Frertzinger%2Ffeature-zpool-get--p;hp=f81c48aca68b15196b929726c47a27050358ab13;hpb=428870ff734fdaccc342b33fc53cf94724409a46;p=zfs.git diff --git a/module/zfs/bpobj.c b/module/zfs/bpobj.c index f81c48a..1920da4 100644 --- a/module/zfs/bpobj.c +++ b/module/zfs/bpobj.c @@ -20,11 +20,61 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include #include #include +#include +#include +#include + +/* + * Return an empty bpobj, preferably the empty dummy one (dp_empty_bpobj). + */ +uint64_t +bpobj_alloc_empty(objset_t *os, int blocksize, dmu_tx_t *tx) +{ + zfeature_info_t *empty_bpobj_feat = + &spa_feature_table[SPA_FEATURE_EMPTY_BPOBJ]; + spa_t *spa = dmu_objset_spa(os); + dsl_pool_t *dp = dmu_objset_pool(os); + + if (spa_feature_is_enabled(spa, empty_bpobj_feat)) { + if (!spa_feature_is_active(spa, empty_bpobj_feat)) { + ASSERT0(dp->dp_empty_bpobj); + dp->dp_empty_bpobj = + bpobj_alloc(os, SPA_MAXBLOCKSIZE, tx); + VERIFY(zap_add(os, + DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_EMPTY_BPOBJ, sizeof (uint64_t), 1, + &dp->dp_empty_bpobj, tx) == 0); + } + spa_feature_incr(spa, empty_bpobj_feat, tx); + ASSERT(dp->dp_empty_bpobj != 0); + return (dp->dp_empty_bpobj); + } else { + return (bpobj_alloc(os, blocksize, tx)); + } +} + +void +bpobj_decr_empty(objset_t *os, dmu_tx_t *tx) +{ + zfeature_info_t *empty_bpobj_feat = + &spa_feature_table[SPA_FEATURE_EMPTY_BPOBJ]; + dsl_pool_t *dp = dmu_objset_pool(os); + + spa_feature_decr(dmu_objset_spa(os), empty_bpobj_feat, tx); + if (!spa_feature_is_active(dmu_objset_spa(os), empty_bpobj_feat)) { + VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_EMPTY_BPOBJ, tx)); + VERIFY3U(0, ==, dmu_object_free(os, dp->dp_empty_bpobj, tx)); + dp->dp_empty_bpobj = 0; + } +} uint64_t bpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx) @@ -51,6 +101,7 @@ bpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx) int epb; dmu_buf_t *dbuf = NULL; + ASSERT(obj != dmu_objset_pool(os)->dp_empty_bpobj); VERIFY3U(0, ==, bpobj_open(&bpo, os, obj)); mutex_enter(&bpo.bpo_lock); @@ -113,16 +164,15 @@ bpobj_open(bpobj_t *bpo, objset_t *os, uint64_t object) ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ); ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPOBJ_HDR); + err = dmu_bonus_hold(os, object, bpo, &bpo->bpo_dbuf); + if (err) + return (err); + bpo->bpo_os = os; bpo->bpo_object = object; bpo->bpo_epb = doi.doi_data_block_size >> SPA_BLKPTRSHIFT; bpo->bpo_havecomp = (doi.doi_bonus_size > BPOBJ_SIZE_V0); bpo->bpo_havesubobj = (doi.doi_bonus_size > BPOBJ_SIZE_V1); - - err = dmu_bonus_hold(bpo->bpo_os, - bpo->bpo_object, bpo, &bpo->bpo_dbuf); - if (err) - return (err); bpo->bpo_phys = bpo->bpo_dbuf->db_data; return (0); } @@ -140,6 +190,7 @@ bpobj_close(bpobj_t *bpo) bpo->bpo_dbuf = NULL; bpo->bpo_phys = NULL; bpo->bpo_cached_dbuf = NULL; + bpo->bpo_object = 0; mutex_destroy(&bpo->bpo_lock); } @@ -210,8 +261,10 @@ bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx, ASSERT(bpo->bpo_havecomp); err = dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi); - if (err) + if (err) { + mutex_exit(&bpo->bpo_lock); return (err); + } epb = doi.doi_data_block_size / sizeof (uint64_t); for (i = bpo->bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) { @@ -252,7 +305,7 @@ bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx, &used_after, &comp_after, &uncomp_after)); bpo->bpo_phys->bpo_bytes -= used_before - used_after; ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0); - bpo->bpo_phys->bpo_comp -= comp_before - used_after; + bpo->bpo_phys->bpo_comp -= comp_before - comp_after; bpo->bpo_phys->bpo_uncomp -= uncomp_before - uncomp_after; } @@ -312,17 +365,23 @@ void bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx) { bpobj_t subbpo; - uint64_t used, comp, uncomp; + uint64_t used, comp, uncomp, subsubobjs; ASSERT(bpo->bpo_havesubobj); ASSERT(bpo->bpo_havecomp); + ASSERT(bpo->bpo_object != dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj); + + if (subobj == dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj) { + bpobj_decr_empty(bpo->bpo_os, tx); + return; + } VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj)); VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp)); - bpobj_close(&subbpo); if (used == 0) { /* No point in having an empty subobj. */ + bpobj_close(&subbpo); bpobj_free(bpo->bpo_os, subobj, tx); return; } @@ -338,10 +397,41 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx) bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), sizeof (subobj), &subobj, tx); bpo->bpo_phys->bpo_num_subobjs++; + + /* + * If subobj has only one block of subobjs, then move subobj's + * subobjs to bpo's subobj list directly. This reduces + * recursion in bpobj_iterate due to nested subobjs. + */ + subsubobjs = subbpo.bpo_phys->bpo_subobjs; + if (subsubobjs != 0) { + dmu_object_info_t doi; + + VERIFY3U(0, ==, dmu_object_info(bpo->bpo_os, subsubobjs, &doi)); + if (doi.doi_max_offset == doi.doi_data_block_size) { + dmu_buf_t *subdb; + uint64_t numsubsub = subbpo.bpo_phys->bpo_num_subobjs; + + VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, subsubobjs, + 0, FTAG, &subdb, 0)); + dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, + bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), + numsubsub * sizeof (subobj), subdb->db_data, tx); + dmu_buf_rele(subdb, FTAG); + bpo->bpo_phys->bpo_num_subobjs += numsubsub; + + dmu_buf_will_dirty(subbpo.bpo_dbuf, tx); + subbpo.bpo_phys->bpo_subobjs = 0; + VERIFY3U(0, ==, dmu_object_free(bpo->bpo_os, + subsubobjs, tx)); + } + } bpo->bpo_phys->bpo_bytes += used; bpo->bpo_phys->bpo_comp += comp; bpo->bpo_phys->bpo_uncomp += uncomp; mutex_exit(&bpo->bpo_lock); + + bpobj_close(&subbpo); } void @@ -353,6 +443,7 @@ bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx) blkptr_t *bparray; ASSERT(!BP_IS_HOLE(bp)); + ASSERT(bpo->bpo_object != dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj); /* We never need the fill count. */ stored_bp.blk_fill = 0; @@ -407,7 +498,10 @@ space_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) struct space_range_arg *sra = arg; if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) { - sra->used += bp_get_dsize_sync(sra->spa, bp); + if (dsl_pool_sync_context(spa_get_dsl(sra->spa))) + sra->used += bp_get_dsize_sync(sra->spa, bp); + else + sra->used += bp_get_dsize(sra->spa, bp); sra->comp += BP_GET_PSIZE(bp); sra->uncomp += BP_GET_UCSIZE(bp); }