Add -p switch to "zpool get"
[zfs.git] / module / zfs / bpobj.c
index f81c48a..1920da4 100644 (file)
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/bpobj.h>
 #include <sys/zfs_context.h>
 #include <sys/refcount.h>
+#include <sys/dsl_pool.h>
+#include <sys/zfeature.h>
+#include <sys/zap.h>
+
+/*
+ * Return an empty bpobj, preferably the empty dummy one (dp_empty_bpobj).
+ */
+uint64_t
+bpobj_alloc_empty(objset_t *os, int blocksize, dmu_tx_t *tx)
+{
+       zfeature_info_t *empty_bpobj_feat =
+           &spa_feature_table[SPA_FEATURE_EMPTY_BPOBJ];
+       spa_t *spa = dmu_objset_spa(os);
+       dsl_pool_t *dp = dmu_objset_pool(os);
+
+       if (spa_feature_is_enabled(spa, empty_bpobj_feat)) {
+               if (!spa_feature_is_active(spa, empty_bpobj_feat)) {
+                       ASSERT0(dp->dp_empty_bpobj);
+                       dp->dp_empty_bpobj =
+                           bpobj_alloc(os, SPA_MAXBLOCKSIZE, tx);
+                       VERIFY(zap_add(os,
+                           DMU_POOL_DIRECTORY_OBJECT,
+                           DMU_POOL_EMPTY_BPOBJ, sizeof (uint64_t), 1,
+                           &dp->dp_empty_bpobj, tx) == 0);
+               }
+               spa_feature_incr(spa, empty_bpobj_feat, tx);
+               ASSERT(dp->dp_empty_bpobj != 0);
+               return (dp->dp_empty_bpobj);
+       } else {
+               return (bpobj_alloc(os, blocksize, tx));
+       }
+}
+
+void
+bpobj_decr_empty(objset_t *os, dmu_tx_t *tx)
+{
+       zfeature_info_t *empty_bpobj_feat =
+           &spa_feature_table[SPA_FEATURE_EMPTY_BPOBJ];
+       dsl_pool_t *dp = dmu_objset_pool(os);
+
+       spa_feature_decr(dmu_objset_spa(os), empty_bpobj_feat, tx);
+       if (!spa_feature_is_active(dmu_objset_spa(os), empty_bpobj_feat)) {
+               VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset,
+                   DMU_POOL_DIRECTORY_OBJECT,
+                   DMU_POOL_EMPTY_BPOBJ, tx));
+               VERIFY3U(0, ==, dmu_object_free(os, dp->dp_empty_bpobj, tx));
+               dp->dp_empty_bpobj = 0;
+       }
+}
 
 uint64_t
 bpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx)
@@ -51,6 +101,7 @@ bpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx)
        int epb;
        dmu_buf_t *dbuf = NULL;
 
+       ASSERT(obj != dmu_objset_pool(os)->dp_empty_bpobj);
        VERIFY3U(0, ==, bpobj_open(&bpo, os, obj));
 
        mutex_enter(&bpo.bpo_lock);
@@ -113,16 +164,15 @@ bpobj_open(bpobj_t *bpo, objset_t *os, uint64_t object)
        ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ);
        ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPOBJ_HDR);
 
+       err = dmu_bonus_hold(os, object, bpo, &bpo->bpo_dbuf);
+       if (err)
+               return (err);
+
        bpo->bpo_os = os;
        bpo->bpo_object = object;
        bpo->bpo_epb = doi.doi_data_block_size >> SPA_BLKPTRSHIFT;
        bpo->bpo_havecomp = (doi.doi_bonus_size > BPOBJ_SIZE_V0);
        bpo->bpo_havesubobj = (doi.doi_bonus_size > BPOBJ_SIZE_V1);
-
-       err = dmu_bonus_hold(bpo->bpo_os,
-           bpo->bpo_object, bpo, &bpo->bpo_dbuf);
-       if (err)
-               return (err);
        bpo->bpo_phys = bpo->bpo_dbuf->db_data;
        return (0);
 }
@@ -140,6 +190,7 @@ bpobj_close(bpobj_t *bpo)
        bpo->bpo_dbuf = NULL;
        bpo->bpo_phys = NULL;
        bpo->bpo_cached_dbuf = NULL;
+       bpo->bpo_object = 0;
 
        mutex_destroy(&bpo->bpo_lock);
 }
@@ -210,8 +261,10 @@ bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
 
        ASSERT(bpo->bpo_havecomp);
        err = dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi);
-       if (err)
+       if (err) {
+               mutex_exit(&bpo->bpo_lock);
                return (err);
+       }
        epb = doi.doi_data_block_size / sizeof (uint64_t);
 
        for (i = bpo->bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) {
@@ -252,7 +305,7 @@ bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
                            &used_after, &comp_after, &uncomp_after));
                        bpo->bpo_phys->bpo_bytes -= used_before - used_after;
                        ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0);
-                       bpo->bpo_phys->bpo_comp -= comp_before - used_after;
+                       bpo->bpo_phys->bpo_comp -= comp_before - comp_after;
                        bpo->bpo_phys->bpo_uncomp -=
                            uncomp_before - uncomp_after;
                }
@@ -312,17 +365,23 @@ void
 bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
 {
        bpobj_t subbpo;
-       uint64_t used, comp, uncomp;
+       uint64_t used, comp, uncomp, subsubobjs;
 
        ASSERT(bpo->bpo_havesubobj);
        ASSERT(bpo->bpo_havecomp);
+       ASSERT(bpo->bpo_object != dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj);
+
+       if (subobj == dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj) {
+               bpobj_decr_empty(bpo->bpo_os, tx);
+               return;
+       }
 
        VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj));
        VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
-       bpobj_close(&subbpo);
 
        if (used == 0) {
                /* No point in having an empty subobj. */
+               bpobj_close(&subbpo);
                bpobj_free(bpo->bpo_os, subobj, tx);
                return;
        }
@@ -338,10 +397,41 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
            bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
            sizeof (subobj), &subobj, tx);
        bpo->bpo_phys->bpo_num_subobjs++;
+
+       /*
+        * If subobj has only one block of subobjs, then move subobj's
+        * subobjs to bpo's subobj list directly.  This reduces
+        * recursion in bpobj_iterate due to nested subobjs.
+        */
+       subsubobjs = subbpo.bpo_phys->bpo_subobjs;
+       if (subsubobjs != 0) {
+               dmu_object_info_t doi;
+
+               VERIFY3U(0, ==, dmu_object_info(bpo->bpo_os, subsubobjs, &doi));
+               if (doi.doi_max_offset == doi.doi_data_block_size) {
+                       dmu_buf_t *subdb;
+                       uint64_t numsubsub = subbpo.bpo_phys->bpo_num_subobjs;
+
+                       VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, subsubobjs,
+                           0, FTAG, &subdb, 0));
+                       dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
+                           bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
+                           numsubsub * sizeof (subobj), subdb->db_data, tx);
+                       dmu_buf_rele(subdb, FTAG);
+                       bpo->bpo_phys->bpo_num_subobjs += numsubsub;
+
+                       dmu_buf_will_dirty(subbpo.bpo_dbuf, tx);
+                       subbpo.bpo_phys->bpo_subobjs = 0;
+                       VERIFY3U(0, ==, dmu_object_free(bpo->bpo_os,
+                           subsubobjs, tx));
+               }
+       }
        bpo->bpo_phys->bpo_bytes += used;
        bpo->bpo_phys->bpo_comp += comp;
        bpo->bpo_phys->bpo_uncomp += uncomp;
        mutex_exit(&bpo->bpo_lock);
+
+       bpobj_close(&subbpo);
 }
 
 void
@@ -353,6 +443,7 @@ bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx)
        blkptr_t *bparray;
 
        ASSERT(!BP_IS_HOLE(bp));
+       ASSERT(bpo->bpo_object != dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj);
 
        /* We never need the fill count. */
        stored_bp.blk_fill = 0;
@@ -407,7 +498,10 @@ space_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
        struct space_range_arg *sra = arg;
 
        if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) {
-               sra->used += bp_get_dsize_sync(sra->spa, bp);
+               if (dsl_pool_sync_context(spa_get_dsl(sra->spa)))
+                       sra->used += bp_get_dsize_sync(sra->spa, bp);
+               else
+                       sra->used += bp_get_dsize(sra->spa, bp);
                sra->comp += BP_GET_PSIZE(bp);
                sra->uncomp += BP_GET_UCSIZE(bp);
        }