Minor tweak to update script
[zfs.git] / zfs / lib / libzpool / vdev_label.c
index 7dcf1fa..bf93046 100644 (file)
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident  "@(#)vdev_label.c       1.18    07/12/12 SMI"
-
 /*
  * Virtual Device Labels
  * ---------------------
@@ -159,25 +157,45 @@ vdev_label_offset(uint64_t psize, int l, uint64_t offset)
            0 : psize - VDEV_LABELS * sizeof (vdev_label_t)));
 }
 
+/*
+ * Returns back the vdev label associated with the passed in offset.
+ */
+int
+vdev_label_number(uint64_t psize, uint64_t offset)
+{
+       int l;
+
+       if (offset >= psize - VDEV_LABEL_END_SIZE) {
+               offset -= psize - VDEV_LABEL_END_SIZE;
+               offset += (VDEV_LABELS / 2) * sizeof (vdev_label_t);
+       }
+       l = offset / sizeof (vdev_label_t);
+       return (l < VDEV_LABELS ? l : -1);
+}
+
 static void
 vdev_label_read(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset,
-       uint64_t size, zio_done_func_t *done, void *private)
+       uint64_t size, zio_done_func_t *done, void *private, int flags)
 {
-       ASSERT(vd->vdev_children == 0);
+       ASSERT(spa_config_held(zio->io_spa, SCL_STATE_ALL, RW_WRITER) ==
+           SCL_STATE_ALL);
+       ASSERT(flags & ZIO_FLAG_CONFIG_WRITER);
 
        zio_nowait(zio_read_phys(zio, vd,
            vdev_label_offset(vd->vdev_psize, l, offset),
            size, buf, ZIO_CHECKSUM_LABEL, done, private,
-           ZIO_PRIORITY_SYNC_READ,
-           ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
-           B_TRUE));
+           ZIO_PRIORITY_SYNC_READ, flags, B_TRUE));
 }
 
 static void
 vdev_label_write(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset,
        uint64_t size, zio_done_func_t *done, void *private, int flags)
 {
-       ASSERT(vd->vdev_children == 0);
+       ASSERT(spa_config_held(zio->io_spa, SCL_ALL, RW_WRITER) == SCL_ALL ||
+           (spa_config_held(zio->io_spa, SCL_CONFIG | SCL_STATE, RW_READER) ==
+           (SCL_CONFIG | SCL_STATE) &&
+           dsl_pool_sync_context(spa_get_dsl(zio->io_spa))));
+       ASSERT(flags & ZIO_FLAG_CONFIG_WRITER);
 
        zio_nowait(zio_write_phys(zio, vd,
            vdev_label_offset(vd->vdev_psize, l, offset),
@@ -317,24 +335,23 @@ vdev_label_read_config(vdev_t *vd)
        nvlist_t *config = NULL;
        vdev_phys_t *vp;
        zio_t *zio;
-       int l;
+       int flags =
+           ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
 
-       ASSERT(spa_config_held(spa, RW_READER) ||
-           spa_config_held(spa, RW_WRITER));
+       ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
 
        if (!vdev_readable(vd))
                return (NULL);
 
        vp = zio_buf_alloc(sizeof (vdev_phys_t));
 
-       for (l = 0; l < VDEV_LABELS; l++) {
+       for (int l = 0; l < VDEV_LABELS; l++) {
 
-               zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL |
-                   ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CONFIG_HELD);
+               zio = zio_root(spa, NULL, NULL, flags);
 
                vdev_label_read(zio, vd, l, vp,
                    offsetof(vdev_label_t, vl_vdev_phys),
-                   sizeof (vdev_phys_t), NULL, NULL);
+                   sizeof (vdev_phys_t), NULL, NULL, flags);
 
                if (zio_wait(zio) == 0 &&
                    nvlist_unpack(vp->vp_nvlist, sizeof (vp->vp_nvlist),
@@ -405,7 +422,7 @@ vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason,
         */
        if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
            !spa_guid_exists(pool_guid, device_guid) &&
-           !spa_spare_exists(device_guid, NULL) &&
+           !spa_spare_exists(device_guid, NULL, NULL) &&
            !spa_l2cache_exists(device_guid, NULL))
                return (B_FALSE);
 
@@ -425,7 +442,7 @@ vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason,
         * spa_has_spare() here because it may be on our pending list of spares
         * to add.  We also check if it is an l2cache device.
         */
-       if (spa_spare_exists(device_guid, &spare_pool) ||
+       if (spa_spare_exists(device_guid, &spare_pool, NULL) ||
            spa_has_spare(spa, device_guid)) {
                if (spare_guid)
                        *spare_guid = device_guid;
@@ -474,16 +491,15 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
        vdev_boot_header_t *vb;
        uberblock_t *ub;
        zio_t *zio;
-       int l, c, n;
        char *buf;
        size_t buflen;
        int error;
        uint64_t spare_guid, l2cache_guid;
-       int flags = ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL;
+       int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
 
-       ASSERT(spa_config_held(spa, RW_WRITER));
+       ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
 
-       for (c = 0; c < vd->vdev_children; c++)
+       for (int c = 0; c < vd->vdev_children; c++)
                if ((error = vdev_label_init(vd->vdev_child[c],
                    crtxg, reason)) != 0)
                        return (error);
@@ -515,14 +531,12 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
         */
        if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_L2CACHE &&
            spare_guid != 0ULL) {
-               vdev_t *pvd = vd->vdev_parent;
+               uint64_t guid_delta = spare_guid - vd->vdev_guid;
 
-               for (; pvd != NULL; pvd = pvd->vdev_parent) {
-                       pvd->vdev_guid_sum -= vd->vdev_guid;
-                       pvd->vdev_guid_sum += spare_guid;
-               }
+               vd->vdev_guid += guid_delta;
 
-               vd->vdev_guid = vd->vdev_guid_sum = spare_guid;
+               for (vdev_t *pvd = vd; pvd != NULL; pvd = pvd->vdev_parent)
+                       pvd->vdev_guid_sum += guid_delta;
 
                /*
                 * If this is a replacement, then we want to fallthrough to the
@@ -536,14 +550,12 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
 
        if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPARE &&
            l2cache_guid != 0ULL) {
-               vdev_t *pvd = vd->vdev_parent;
+               uint64_t guid_delta = l2cache_guid - vd->vdev_guid;
 
-               for (; pvd != NULL; pvd = pvd->vdev_parent) {
-                       pvd->vdev_guid_sum -= vd->vdev_guid;
-                       pvd->vdev_guid_sum += l2cache_guid;
-               }
+               vd->vdev_guid += guid_delta;
 
-               vd->vdev_guid = vd->vdev_guid_sum = l2cache_guid;
+               for (vdev_t *pvd = vd; pvd != NULL; pvd = pvd->vdev_parent)
+                       pvd->vdev_guid_sum += guid_delta;
 
                /*
                 * If this is a replacement, then we want to fallthrough to the
@@ -643,7 +655,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
         */
        zio = zio_root(spa, NULL, NULL, flags);
 
-       for (l = 0; l < VDEV_LABELS; l++) {
+       for (int l = 0; l < VDEV_LABELS; l++) {
 
                vdev_label_write(zio, vd, l, vp,
                    offsetof(vdev_label_t, vl_vdev_phys),
@@ -653,7 +665,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
                    offsetof(vdev_label_t, vl_boot_header),
                    sizeof (vdev_boot_header_t), NULL, NULL, flags);
 
-               for (n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
+               for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
                        vdev_label_write(zio, vd, l, ub,
                            VDEV_UBERBLOCK_OFFSET(vd, n),
                            VDEV_UBERBLOCK_SIZE(vd), NULL, NULL, flags);
@@ -675,7 +687,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
         */
        if (error == 0 && !vd->vdev_isspare &&
            (reason == VDEV_LABEL_SPARE ||
-           spa_spare_exists(vd->vdev_guid, NULL)))
+           spa_spare_exists(vd->vdev_guid, NULL, NULL)))
                spa_spare_add(vd);
 
        if (error == 0 && !vd->vdev_isl2cache &&
@@ -721,17 +733,17 @@ vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2)
 static void
 vdev_uberblock_load_done(zio_t *zio)
 {
+       zio_t *rio = zio->io_private;
        uberblock_t *ub = zio->io_data;
-       uberblock_t *ubbest = zio->io_private;
-       spa_t *spa = zio->io_spa;
+       uberblock_t *ubbest = rio->io_private;
 
        ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(zio->io_vd));
 
        if (zio->io_error == 0 && uberblock_verify(ub) == 0) {
-               mutex_enter(&spa->spa_uberblock_lock);
+               mutex_enter(&rio->io_lock);
                if (vdev_uberblock_compare(ub, ubbest) > 0)
                        *ubbest = *ub;
-               mutex_exit(&spa->spa_uberblock_lock);
+               mutex_exit(&rio->io_lock);
        }
 
        zio_buf_free(zio->io_data, zio->io_size);
@@ -740,26 +752,39 @@ vdev_uberblock_load_done(zio_t *zio)
 void
 vdev_uberblock_load(zio_t *zio, vdev_t *vd, uberblock_t *ubbest)
 {
-       int l, c, n;
-
-       for (c = 0; c < vd->vdev_children; c++)
-               vdev_uberblock_load(zio, vd->vdev_child[c], ubbest);
+       spa_t *spa = vd->vdev_spa;
+       vdev_t *rvd = spa->spa_root_vdev;
+       int flags =
+           ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
+
+       if (vd == rvd) {
+               ASSERT(zio == NULL);
+               spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+               zio = zio_root(spa, NULL, ubbest, flags);
+               bzero(ubbest, sizeof (uberblock_t));
+       }
 
-       if (!vd->vdev_ops->vdev_op_leaf)
-               return;
+       ASSERT(zio != NULL);
 
-       if (vdev_is_dead(vd))
-               return;
+       for (int c = 0; c < vd->vdev_children; c++)
+               vdev_uberblock_load(zio, vd->vdev_child[c], ubbest);
 
-       for (l = 0; l < VDEV_LABELS; l++) {
-               for (n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
-                       vdev_label_read(zio, vd, l,
-                           zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd)),
-                           VDEV_UBERBLOCK_OFFSET(vd, n),
-                           VDEV_UBERBLOCK_SIZE(vd),
-                           vdev_uberblock_load_done, ubbest);
+       if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) {
+               for (int l = 0; l < VDEV_LABELS; l++) {
+                       for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
+                               vdev_label_read(zio, vd, l,
+                                   zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd)),
+                                   VDEV_UBERBLOCK_OFFSET(vd, n),
+                                   VDEV_UBERBLOCK_SIZE(vd),
+                                   vdev_uberblock_load_done, zio, flags);
+                       }
                }
        }
+
+       if (vd == rvd) {
+               (void) zio_wait(zio);
+               spa_config_exit(spa, SCL_ALL, FTAG);
+       }
 }
 
 /*
@@ -779,18 +804,18 @@ vdev_uberblock_sync_done(zio_t *zio)
  * Write the uberblock to all labels of all leaves of the specified vdev.
  */
 static void
-vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd)
+vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags)
 {
-       int l, c, n;
        uberblock_t *ubbuf;
+       int n;
 
-       for (c = 0; c < vd->vdev_children; c++)
-               vdev_uberblock_sync(zio, ub, vd->vdev_child[c]);
+       for (int c = 0; c < vd->vdev_children; c++)
+               vdev_uberblock_sync(zio, ub, vd->vdev_child[c], flags);
 
        if (!vd->vdev_ops->vdev_op_leaf)
                return;
 
-       if (vdev_is_dead(vd))
+       if (!vdev_writeable(vd))
                return;
 
        n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1);
@@ -799,12 +824,11 @@ vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd)
        bzero(ubbuf, VDEV_UBERBLOCK_SIZE(vd));
        *ubbuf = *ub;
 
-       for (l = 0; l < VDEV_LABELS; l++)
+       for (int l = 0; l < VDEV_LABELS; l++)
                vdev_label_write(zio, vd, l, ubbuf,
-                   VDEV_UBERBLOCK_OFFSET(vd, n),
-                   VDEV_UBERBLOCK_SIZE(vd),
+                   VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd),
                    vdev_uberblock_sync_done, zio->io_private,
-                   ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE);
+                   flags | ZIO_FLAG_DONT_PROPAGATE);
 
        zio_buf_free(ubbuf, VDEV_UBERBLOCK_SIZE(vd));
 }
@@ -813,14 +837,13 @@ int
 vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags)
 {
        spa_t *spa = svd[0]->vdev_spa;
-       int v;
        zio_t *zio;
        uint64_t good_writes = 0;
 
        zio = zio_root(spa, NULL, &good_writes, flags);
 
-       for (v = 0; v < svdcount; v++)
-               vdev_uberblock_sync(zio, ub, svd[v]);
+       for (int v = 0; v < svdcount; v++)
+               vdev_uberblock_sync(zio, ub, svd[v], flags);
 
        (void) zio_wait(zio);
 
@@ -831,7 +854,7 @@ vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags)
         */
        zio = zio_root(spa, NULL, NULL, flags);
 
-       for (v = 0; v < svdcount; v++)
+       for (int v = 0; v < svdcount; v++)
                zio_flush(zio, svd[v]);
 
        (void) zio_wait(zio);
@@ -866,24 +889,32 @@ vdev_label_sync_top_done(zio_t *zio)
 }
 
 /*
+ * We ignore errors for log and cache devices, simply free the private data.
+ */
+static void
+vdev_label_sync_ignore_done(zio_t *zio)
+{
+       kmem_free(zio->io_private, sizeof (uint64_t));
+}
+
+/*
  * Write all even or odd labels to all leaves of the specified vdev.
  */
 static void
-vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg)
+vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg, int flags)
 {
        nvlist_t *label;
        vdev_phys_t *vp;
        char *buf;
        size_t buflen;
-       int c;
 
-       for (c = 0; c < vd->vdev_children; c++)
-               vdev_label_sync(zio, vd->vdev_child[c], l, txg);
+       for (int c = 0; c < vd->vdev_children; c++)
+               vdev_label_sync(zio, vd->vdev_child[c], l, txg, flags);
 
        if (!vd->vdev_ops->vdev_op_leaf)
                return;
 
-       if (vdev_is_dead(vd))
+       if (!vdev_writeable(vd))
                return;
 
        /*
@@ -903,7 +934,7 @@ vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg)
                            offsetof(vdev_label_t, vl_vdev_phys),
                            sizeof (vdev_phys_t),
                            vdev_label_sync_done, zio->io_private,
-                           ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE);
+                           flags | ZIO_FLAG_DONT_PROPAGATE);
                }
        }
 
@@ -912,9 +943,9 @@ vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg)
 }
 
 int
-vdev_label_sync_list(spa_t *spa, int l, int flags, uint64_t txg)
+vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags)
 {
-       list_t *dl = &spa->spa_dirty_list;
+       list_t *dl = &spa->spa_config_dirty_list;
        vdev_t *vd;
        zio_t *zio;
        int error;
@@ -927,9 +958,11 @@ vdev_label_sync_list(spa_t *spa, int l, int flags, uint64_t txg)
        for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) {
                uint64_t *good_writes = kmem_zalloc(sizeof (uint64_t),
                    KM_SLEEP);
-               zio_t *vio = zio_null(zio, spa, vdev_label_sync_top_done,
+               zio_t *vio = zio_null(zio, spa,
+                   (vd->vdev_islog || vd->vdev_aux != NULL) ?
+                   vdev_label_sync_ignore_done : vdev_label_sync_top_done,
                    good_writes, flags);
-               vdev_label_sync(vio, vd, l, txg);
+               vdev_label_sync(vio, vd, l, txg, flags);
                zio_nowait(vio);
        }
 
@@ -967,7 +1000,7 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg)
        vdev_t *vd;
        zio_t *zio;
        int error;
-       int flags = ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL;
+       int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
 
        ASSERT(ub->ub_txg <= txg);
 
@@ -979,7 +1012,7 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg)
         */
        if (ub->ub_txg < txg &&
            uberblock_update(ub, spa->spa_root_vdev, txg) == B_FALSE &&
-           list_is_empty(&spa->spa_dirty_list))
+           list_is_empty(&spa->spa_config_dirty_list))
                return (0);
 
        if (txg > spa_freeze_txg(spa))
@@ -1010,7 +1043,7 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg)
         * the new labels to disk to ensure that all even-label updates
         * are committed to stable storage before the uberblock update.
         */
-       if ((error = vdev_label_sync_list(spa, 0, flags, txg)) != 0)
+       if ((error = vdev_label_sync_list(spa, 0, txg, flags)) != 0)
                return (error);
 
        /*
@@ -1041,5 +1074,5 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg)
         * to disk to ensure that all odd-label updates are committed to
         * stable storage before the next transaction group begins.
         */
-       return (vdev_label_sync_list(spa, 1, flags, txg));
+       return (vdev_label_sync_list(spa, 1, txg, flags));
 }