Update core ZFS code from build 121 to build 141.
[zfs.git] / module / zfs / vdev_label.c
index 06cb720..75ec545 100644 (file)
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
 #include <sys/uberblock_impl.h>
 #include <sys/metaslab.h>
 #include <sys/zio.h>
+#include <sys/dsl_scan.h>
 #include <sys/fs/zfs.h>
 
 /*
@@ -208,7 +208,7 @@ vdev_label_write(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset,
  */
 nvlist_t *
 vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
-    boolean_t isspare, boolean_t isl2cache)
+    vdev_config_flag_t flags)
 {
        nvlist_t *nv = NULL;
 
@@ -216,7 +216,7 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
 
        VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
            vd->vdev_ops->vdev_op_type) == 0);
-       if (!isspare && !isl2cache)
+       if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)))
                VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id)
                    == 0);
        VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0);
@@ -270,7 +270,8 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
        if (vd->vdev_isspare)
                VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1) == 0);
 
-       if (!isspare && !isl2cache && vd == vd->vdev_top) {
+       if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) &&
+           vd == vd->vdev_top) {
                VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY,
                    vd->vdev_ms_array) == 0);
                VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT,
@@ -281,39 +282,74 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
                    vd->vdev_asize) == 0);
                VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_LOG,
                    vd->vdev_islog) == 0);
+               if (vd->vdev_removing)
+                       VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVING,
+                           vd->vdev_removing) == 0);
        }
 
        if (vd->vdev_dtl_smo.smo_object != 0)
                VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_DTL,
                    vd->vdev_dtl_smo.smo_object) == 0);
 
+       if (vd->vdev_crtxg)
+               VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_CREATE_TXG,
+                   vd->vdev_crtxg) == 0);
+
        if (getstats) {
                vdev_stat_t vs;
+               pool_scan_stat_t ps;
+
                vdev_get_stats(vd, &vs);
-               VERIFY(nvlist_add_uint64_array(nv, ZPOOL_CONFIG_STATS,
+               VERIFY(nvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
                    (uint64_t *)&vs, sizeof (vs) / sizeof (uint64_t)) == 0);
+
+               /* provide either current or previous scan information */
+               if (spa_scan_get_stats(spa, &ps) == 0) {
+                       VERIFY(nvlist_add_uint64_array(nv,
+                           ZPOOL_CONFIG_SCAN_STATS, (uint64_t *)&ps,
+                           sizeof (pool_scan_stat_t) / sizeof (uint64_t))
+                           == 0);
+               }
        }
 
        if (!vd->vdev_ops->vdev_op_leaf) {
                nvlist_t **child;
-               int c;
+               int c, idx;
+
+               ASSERT(!vd->vdev_ishole);
 
                child = kmem_alloc(vd->vdev_children * sizeof (nvlist_t *),
                    KM_SLEEP);
 
-               for (c = 0; c < vd->vdev_children; c++)
-                       child[c] = vdev_config_generate(spa, vd->vdev_child[c],
-                           getstats, isspare, isl2cache);
+               for (c = 0, idx = 0; c < vd->vdev_children; c++) {
+                       vdev_t *cvd = vd->vdev_child[c];
 
-               VERIFY(nvlist_add_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
-                   child, vd->vdev_children) == 0);
+                       /*
+                        * If we're generating an nvlist of removing
+                        * vdevs then skip over any device which is
+                        * not being removed.
+                        */
+                       if ((flags & VDEV_CONFIG_REMOVING) &&
+                           !cvd->vdev_removing)
+                               continue;
 
-               for (c = 0; c < vd->vdev_children; c++)
+                       child[idx++] = vdev_config_generate(spa, cvd,
+                           getstats, flags);
+               }
+
+               if (idx) {
+                       VERIFY(nvlist_add_nvlist_array(nv,
+                           ZPOOL_CONFIG_CHILDREN, child, idx) == 0);
+               }
+
+               for (c = 0; c < idx; c++)
                        nvlist_free(child[c]);
 
                kmem_free(child, vd->vdev_children * sizeof (nvlist_t *));
 
        } else {
+               const char *aux = NULL;
+
                if (vd->vdev_offline && !vd->vdev_tmpoffline)
                        VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_OFFLINE,
                            B_TRUE) == 0);
@@ -329,11 +365,66 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
                if (vd->vdev_unspare)
                        VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_UNSPARE,
                            B_TRUE) == 0);
+               if (vd->vdev_ishole)
+                       VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_HOLE,
+                           B_TRUE) == 0);
+
+               switch (vd->vdev_stat.vs_aux) {
+               case VDEV_AUX_ERR_EXCEEDED:
+                       aux = "err_exceeded";
+                       break;
+
+               case VDEV_AUX_EXTERNAL:
+                       aux = "external";
+                       break;
+               }
+
+               if (aux != NULL)
+                       VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_AUX_STATE,
+                           aux) == 0);
+
+               if (vd->vdev_splitting && vd->vdev_orig_guid != 0LL) {
+                       VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ORIG_GUID,
+                           vd->vdev_orig_guid) == 0);
+               }
        }
 
        return (nv);
 }
 
+/*
+ * Generate a view of the top-level vdevs.  If we currently have holes
+ * in the namespace, then generate an array which contains a list of holey
+ * vdevs.  Additionally, add the number of top-level children that currently
+ * exist.
+ */
+void
+vdev_top_config_generate(spa_t *spa, nvlist_t *config)
+{
+       vdev_t *rvd = spa->spa_root_vdev;
+       uint64_t *array;
+       uint_t c, idx;
+
+       array = kmem_alloc(rvd->vdev_children * sizeof (uint64_t), KM_SLEEP);
+
+       for (c = 0, idx = 0; c < rvd->vdev_children; c++) {
+               vdev_t *tvd = rvd->vdev_child[c];
+
+               if (tvd->vdev_ishole)
+                       array[idx++] = c;
+       }
+
+       if (idx) {
+               VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_HOLE_ARRAY,
+                   array, idx) == 0);
+       }
+
+       VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN,
+           rvd->vdev_children) == 0);
+
+       kmem_free(array, rvd->vdev_children * sizeof (uint64_t));
+}
+
 nvlist_t *
 vdev_label_read_config(vdev_t *vd)
 {
@@ -516,6 +607,9 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
                    crtxg, reason)) != 0)
                        return (error);
 
+       /* Track the creation time for this vdev */
+       vd->vdev_crtxg = crtxg;
+
        if (!vd->vdev_ops->vdev_op_leaf)
                return (0);
 
@@ -528,7 +622,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
        /*
         * Determine if the vdev is in use.
         */
-       if (reason != VDEV_LABEL_REMOVE &&
+       if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPLIT &&
            vdev_inuse(vd, crtxg, reason, &spare_guid, &l2cache_guid))
                return (EBUSY);
 
@@ -554,7 +648,8 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
                 */
                if (reason == VDEV_LABEL_SPARE)
                        return (0);
-               ASSERT(reason == VDEV_LABEL_REPLACE);
+               ASSERT(reason == VDEV_LABEL_REPLACE ||
+                   reason == VDEV_LABEL_SPLIT);
        }
 
        if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPARE &&
@@ -619,7 +714,11 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
                VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID,
                    vd->vdev_guid) == 0);
        } else {
-               label = spa_config_generate(spa, vd, 0ULL, B_FALSE);
+               uint64_t txg = 0ULL;
+
+               if (reason == VDEV_LABEL_SPLIT)
+                       txg = spa->spa_uberblock.ub_txg;
+               label = spa_config_generate(spa, vd, txg, B_FALSE);
 
                /*
                 * Add our creation time.  This allows us to detect multiple
@@ -717,11 +816,6 @@ retry:
  */
 
 /*
- * For use by zdb and debugging purposes only
- */
-uint64_t ub_max_txg = UINT64_MAX;
-
-/*
  * Consider the following situation: txg is safely synced to disk.  We've
  * written the first uberblock for txg + 1, and then we lose power.  When we
  * come back up, we fail to see the uberblock for txg + 1 because, say,
@@ -750,6 +844,7 @@ vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2)
 static void
 vdev_uberblock_load_done(zio_t *zio)
 {
+       spa_t *spa = zio->io_spa;
        zio_t *rio = zio->io_private;
        uberblock_t *ub = zio->io_data;
        uberblock_t *ubbest = rio->io_private;
@@ -758,7 +853,7 @@ vdev_uberblock_load_done(zio_t *zio)
 
        if (zio->io_error == 0 && uberblock_verify(ub) == 0) {
                mutex_enter(&rio->io_lock);
-               if (ub->ub_txg <= ub_max_txg &&
+               if (ub->ub_txg <= spa->spa_load_max_txg &&
                    vdev_uberblock_compare(ub, ubbest) > 0)
                        *ubbest = *ub;
                mutex_exit(&rio->io_lock);
@@ -976,6 +1071,9 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags)
        for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) {
                uint64_t *good_writes = kmem_zalloc(sizeof (uint64_t),
                    KM_SLEEP);
+
+               ASSERT(!vd->vdev_ishole);
+
                zio_t *vio = zio_null(zio, spa, NULL,
                    (vd->vdev_islog || vd->vdev_aux != NULL) ?
                    vdev_label_sync_ignore_done : vdev_label_sync_top_done,