X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=zfs%2Flib%2Flibzpool%2Fvdev_label.c;h=bf930466fbd64ba911ab8f7c394fd498f160b0b3;hb=2f40ac4d9e154f1d1ca4073ef8c3bfba92093c86;hp=7dcf1facdbd09f06998358c2c90f504bd2411fbe;hpb=34dc7c2f2553220ebc6e29ca195fb6d57155f95f;p=zfs.git diff --git a/zfs/lib/libzpool/vdev_label.c b/zfs/lib/libzpool/vdev_label.c index 7dcf1fa..bf93046 100644 --- a/zfs/lib/libzpool/vdev_label.c +++ b/zfs/lib/libzpool/vdev_label.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "@(#)vdev_label.c 1.18 07/12/12 SMI" - /* * Virtual Device Labels * --------------------- @@ -159,25 +157,45 @@ vdev_label_offset(uint64_t psize, int l, uint64_t offset) 0 : psize - VDEV_LABELS * sizeof (vdev_label_t))); } +/* + * Returns back the vdev label associated with the passed in offset. + */ +int +vdev_label_number(uint64_t psize, uint64_t offset) +{ + int l; + + if (offset >= psize - VDEV_LABEL_END_SIZE) { + offset -= psize - VDEV_LABEL_END_SIZE; + offset += (VDEV_LABELS / 2) * sizeof (vdev_label_t); + } + l = offset / sizeof (vdev_label_t); + return (l < VDEV_LABELS ? l : -1); +} + static void vdev_label_read(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset, - uint64_t size, zio_done_func_t *done, void *private) + uint64_t size, zio_done_func_t *done, void *private, int flags) { - ASSERT(vd->vdev_children == 0); + ASSERT(spa_config_held(zio->io_spa, SCL_STATE_ALL, RW_WRITER) == + SCL_STATE_ALL); + ASSERT(flags & ZIO_FLAG_CONFIG_WRITER); zio_nowait(zio_read_phys(zio, vd, vdev_label_offset(vd->vdev_psize, l, offset), size, buf, ZIO_CHECKSUM_LABEL, done, private, - ZIO_PRIORITY_SYNC_READ, - ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, - B_TRUE)); + ZIO_PRIORITY_SYNC_READ, flags, B_TRUE)); } static void vdev_label_write(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset, uint64_t size, zio_done_func_t *done, void *private, int flags) { - ASSERT(vd->vdev_children == 0); + ASSERT(spa_config_held(zio->io_spa, SCL_ALL, RW_WRITER) == SCL_ALL || + (spa_config_held(zio->io_spa, SCL_CONFIG | SCL_STATE, RW_READER) == + (SCL_CONFIG | SCL_STATE) && + dsl_pool_sync_context(spa_get_dsl(zio->io_spa)))); + ASSERT(flags & ZIO_FLAG_CONFIG_WRITER); zio_nowait(zio_write_phys(zio, vd, vdev_label_offset(vd->vdev_psize, l, offset), @@ -317,24 +335,23 @@ vdev_label_read_config(vdev_t *vd) nvlist_t *config = NULL; vdev_phys_t *vp; zio_t *zio; - int l; + int flags = + ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; - ASSERT(spa_config_held(spa, RW_READER) || - spa_config_held(spa, RW_WRITER)); + ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); if (!vdev_readable(vd)) return (NULL); vp = zio_buf_alloc(sizeof (vdev_phys_t)); - for (l = 0; l < VDEV_LABELS; l++) { + for (int l = 0; l < VDEV_LABELS; l++) { - zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL | - ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CONFIG_HELD); + zio = zio_root(spa, NULL, NULL, flags); vdev_label_read(zio, vd, l, vp, offsetof(vdev_label_t, vl_vdev_phys), - sizeof (vdev_phys_t), NULL, NULL); + sizeof (vdev_phys_t), NULL, NULL, flags); if (zio_wait(zio) == 0 && nvlist_unpack(vp->vp_nvlist, sizeof (vp->vp_nvlist), @@ -405,7 +422,7 @@ vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason, */ if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && !spa_guid_exists(pool_guid, device_guid) && - !spa_spare_exists(device_guid, NULL) && + !spa_spare_exists(device_guid, NULL, NULL) && !spa_l2cache_exists(device_guid, NULL)) return (B_FALSE); @@ -425,7 +442,7 @@ vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason, * spa_has_spare() here because it may be on our pending list of spares * to add. We also check if it is an l2cache device. */ - if (spa_spare_exists(device_guid, &spare_pool) || + if (spa_spare_exists(device_guid, &spare_pool, NULL) || spa_has_spare(spa, device_guid)) { if (spare_guid) *spare_guid = device_guid; @@ -474,16 +491,15 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) vdev_boot_header_t *vb; uberblock_t *ub; zio_t *zio; - int l, c, n; char *buf; size_t buflen; int error; uint64_t spare_guid, l2cache_guid; - int flags = ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; - ASSERT(spa_config_held(spa, RW_WRITER)); + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); - for (c = 0; c < vd->vdev_children; c++) + for (int c = 0; c < vd->vdev_children; c++) if ((error = vdev_label_init(vd->vdev_child[c], crtxg, reason)) != 0) return (error); @@ -515,14 +531,12 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) */ if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_L2CACHE && spare_guid != 0ULL) { - vdev_t *pvd = vd->vdev_parent; + uint64_t guid_delta = spare_guid - vd->vdev_guid; - for (; pvd != NULL; pvd = pvd->vdev_parent) { - pvd->vdev_guid_sum -= vd->vdev_guid; - pvd->vdev_guid_sum += spare_guid; - } + vd->vdev_guid += guid_delta; - vd->vdev_guid = vd->vdev_guid_sum = spare_guid; + for (vdev_t *pvd = vd; pvd != NULL; pvd = pvd->vdev_parent) + pvd->vdev_guid_sum += guid_delta; /* * If this is a replacement, then we want to fallthrough to the @@ -536,14 +550,12 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPARE && l2cache_guid != 0ULL) { - vdev_t *pvd = vd->vdev_parent; + uint64_t guid_delta = l2cache_guid - vd->vdev_guid; - for (; pvd != NULL; pvd = pvd->vdev_parent) { - pvd->vdev_guid_sum -= vd->vdev_guid; - pvd->vdev_guid_sum += l2cache_guid; - } + vd->vdev_guid += guid_delta; - vd->vdev_guid = vd->vdev_guid_sum = l2cache_guid; + for (vdev_t *pvd = vd; pvd != NULL; pvd = pvd->vdev_parent) + pvd->vdev_guid_sum += guid_delta; /* * If this is a replacement, then we want to fallthrough to the @@ -643,7 +655,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) */ zio = zio_root(spa, NULL, NULL, flags); - for (l = 0; l < VDEV_LABELS; l++) { + for (int l = 0; l < VDEV_LABELS; l++) { vdev_label_write(zio, vd, l, vp, offsetof(vdev_label_t, vl_vdev_phys), @@ -653,7 +665,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) offsetof(vdev_label_t, vl_boot_header), sizeof (vdev_boot_header_t), NULL, NULL, flags); - for (n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) { + for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) { vdev_label_write(zio, vd, l, ub, VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd), NULL, NULL, flags); @@ -675,7 +687,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) */ if (error == 0 && !vd->vdev_isspare && (reason == VDEV_LABEL_SPARE || - spa_spare_exists(vd->vdev_guid, NULL))) + spa_spare_exists(vd->vdev_guid, NULL, NULL))) spa_spare_add(vd); if (error == 0 && !vd->vdev_isl2cache && @@ -721,17 +733,17 @@ vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2) static void vdev_uberblock_load_done(zio_t *zio) { + zio_t *rio = zio->io_private; uberblock_t *ub = zio->io_data; - uberblock_t *ubbest = zio->io_private; - spa_t *spa = zio->io_spa; + uberblock_t *ubbest = rio->io_private; ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(zio->io_vd)); if (zio->io_error == 0 && uberblock_verify(ub) == 0) { - mutex_enter(&spa->spa_uberblock_lock); + mutex_enter(&rio->io_lock); if (vdev_uberblock_compare(ub, ubbest) > 0) *ubbest = *ub; - mutex_exit(&spa->spa_uberblock_lock); + mutex_exit(&rio->io_lock); } zio_buf_free(zio->io_data, zio->io_size); @@ -740,26 +752,39 @@ vdev_uberblock_load_done(zio_t *zio) void vdev_uberblock_load(zio_t *zio, vdev_t *vd, uberblock_t *ubbest) { - int l, c, n; - - for (c = 0; c < vd->vdev_children; c++) - vdev_uberblock_load(zio, vd->vdev_child[c], ubbest); + spa_t *spa = vd->vdev_spa; + vdev_t *rvd = spa->spa_root_vdev; + int flags = + ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; + + if (vd == rvd) { + ASSERT(zio == NULL); + spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); + zio = zio_root(spa, NULL, ubbest, flags); + bzero(ubbest, sizeof (uberblock_t)); + } - if (!vd->vdev_ops->vdev_op_leaf) - return; + ASSERT(zio != NULL); - if (vdev_is_dead(vd)) - return; + for (int c = 0; c < vd->vdev_children; c++) + vdev_uberblock_load(zio, vd->vdev_child[c], ubbest); - for (l = 0; l < VDEV_LABELS; l++) { - for (n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) { - vdev_label_read(zio, vd, l, - zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd)), - VDEV_UBERBLOCK_OFFSET(vd, n), - VDEV_UBERBLOCK_SIZE(vd), - vdev_uberblock_load_done, ubbest); + if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) { + for (int l = 0; l < VDEV_LABELS; l++) { + for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) { + vdev_label_read(zio, vd, l, + zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd)), + VDEV_UBERBLOCK_OFFSET(vd, n), + VDEV_UBERBLOCK_SIZE(vd), + vdev_uberblock_load_done, zio, flags); + } } } + + if (vd == rvd) { + (void) zio_wait(zio); + spa_config_exit(spa, SCL_ALL, FTAG); + } } /* @@ -779,18 +804,18 @@ vdev_uberblock_sync_done(zio_t *zio) * Write the uberblock to all labels of all leaves of the specified vdev. */ static void -vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd) +vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags) { - int l, c, n; uberblock_t *ubbuf; + int n; - for (c = 0; c < vd->vdev_children; c++) - vdev_uberblock_sync(zio, ub, vd->vdev_child[c]); + for (int c = 0; c < vd->vdev_children; c++) + vdev_uberblock_sync(zio, ub, vd->vdev_child[c], flags); if (!vd->vdev_ops->vdev_op_leaf) return; - if (vdev_is_dead(vd)) + if (!vdev_writeable(vd)) return; n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1); @@ -799,12 +824,11 @@ vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd) bzero(ubbuf, VDEV_UBERBLOCK_SIZE(vd)); *ubbuf = *ub; - for (l = 0; l < VDEV_LABELS; l++) + for (int l = 0; l < VDEV_LABELS; l++) vdev_label_write(zio, vd, l, ubbuf, - VDEV_UBERBLOCK_OFFSET(vd, n), - VDEV_UBERBLOCK_SIZE(vd), + VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd), vdev_uberblock_sync_done, zio->io_private, - ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE); + flags | ZIO_FLAG_DONT_PROPAGATE); zio_buf_free(ubbuf, VDEV_UBERBLOCK_SIZE(vd)); } @@ -813,14 +837,13 @@ int vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags) { spa_t *spa = svd[0]->vdev_spa; - int v; zio_t *zio; uint64_t good_writes = 0; zio = zio_root(spa, NULL, &good_writes, flags); - for (v = 0; v < svdcount; v++) - vdev_uberblock_sync(zio, ub, svd[v]); + for (int v = 0; v < svdcount; v++) + vdev_uberblock_sync(zio, ub, svd[v], flags); (void) zio_wait(zio); @@ -831,7 +854,7 @@ vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags) */ zio = zio_root(spa, NULL, NULL, flags); - for (v = 0; v < svdcount; v++) + for (int v = 0; v < svdcount; v++) zio_flush(zio, svd[v]); (void) zio_wait(zio); @@ -866,24 +889,32 @@ vdev_label_sync_top_done(zio_t *zio) } /* + * We ignore errors for log and cache devices, simply free the private data. + */ +static void +vdev_label_sync_ignore_done(zio_t *zio) +{ + kmem_free(zio->io_private, sizeof (uint64_t)); +} + +/* * Write all even or odd labels to all leaves of the specified vdev. */ static void -vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg) +vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg, int flags) { nvlist_t *label; vdev_phys_t *vp; char *buf; size_t buflen; - int c; - for (c = 0; c < vd->vdev_children; c++) - vdev_label_sync(zio, vd->vdev_child[c], l, txg); + for (int c = 0; c < vd->vdev_children; c++) + vdev_label_sync(zio, vd->vdev_child[c], l, txg, flags); if (!vd->vdev_ops->vdev_op_leaf) return; - if (vdev_is_dead(vd)) + if (!vdev_writeable(vd)) return; /* @@ -903,7 +934,7 @@ vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg) offsetof(vdev_label_t, vl_vdev_phys), sizeof (vdev_phys_t), vdev_label_sync_done, zio->io_private, - ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE); + flags | ZIO_FLAG_DONT_PROPAGATE); } } @@ -912,9 +943,9 @@ vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg) } int -vdev_label_sync_list(spa_t *spa, int l, int flags, uint64_t txg) +vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags) { - list_t *dl = &spa->spa_dirty_list; + list_t *dl = &spa->spa_config_dirty_list; vdev_t *vd; zio_t *zio; int error; @@ -927,9 +958,11 @@ vdev_label_sync_list(spa_t *spa, int l, int flags, uint64_t txg) for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) { uint64_t *good_writes = kmem_zalloc(sizeof (uint64_t), KM_SLEEP); - zio_t *vio = zio_null(zio, spa, vdev_label_sync_top_done, + zio_t *vio = zio_null(zio, spa, + (vd->vdev_islog || vd->vdev_aux != NULL) ? + vdev_label_sync_ignore_done : vdev_label_sync_top_done, good_writes, flags); - vdev_label_sync(vio, vd, l, txg); + vdev_label_sync(vio, vd, l, txg, flags); zio_nowait(vio); } @@ -967,7 +1000,7 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg) vdev_t *vd; zio_t *zio; int error; - int flags = ZIO_FLAG_CONFIG_HELD | ZIO_FLAG_CANFAIL; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; ASSERT(ub->ub_txg <= txg); @@ -979,7 +1012,7 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg) */ if (ub->ub_txg < txg && uberblock_update(ub, spa->spa_root_vdev, txg) == B_FALSE && - list_is_empty(&spa->spa_dirty_list)) + list_is_empty(&spa->spa_config_dirty_list)) return (0); if (txg > spa_freeze_txg(spa)) @@ -1010,7 +1043,7 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg) * the new labels to disk to ensure that all even-label updates * are committed to stable storage before the uberblock update. */ - if ((error = vdev_label_sync_list(spa, 0, flags, txg)) != 0) + if ((error = vdev_label_sync_list(spa, 0, txg, flags)) != 0) return (error); /* @@ -1041,5 +1074,5 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg) * to disk to ensure that all odd-label updates are committed to * stable storage before the next transaction group begins. */ - return (vdev_label_sync_list(spa, 1, flags, txg)); + return (vdev_label_sync_list(spa, 1, txg, flags)); }