X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=module%2Fzfs%2Fvdev_label.c;h=75ec545345fb706efa70942b1dae0a946a3a9f0c;hb=428870ff734fdaccc342b33fc53cf94724409a46;hp=06cb720128b6d0cc7b1dd75c891dabacc9601ea4;hpb=45d1cae3b8c949ecc391dd7a5b81963b34c71c29;p=zfs.git diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index 06cb720..75ec545 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -141,6 +140,7 @@ #include #include #include +#include #include /* @@ -208,7 +208,7 @@ vdev_label_write(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset, */ nvlist_t * vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, - boolean_t isspare, boolean_t isl2cache) + vdev_config_flag_t flags) { nvlist_t *nv = NULL; @@ -216,7 +216,7 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE, vd->vdev_ops->vdev_op_type) == 0); - if (!isspare && !isl2cache) + if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE))) VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id) == 0); VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0); @@ -270,7 +270,8 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, if (vd->vdev_isspare) VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1) == 0); - if (!isspare && !isl2cache && vd == vd->vdev_top) { + if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) && + vd == vd->vdev_top) { VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY, vd->vdev_ms_array) == 0); VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT, @@ -281,39 +282,74 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, vd->vdev_asize) == 0); VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_LOG, vd->vdev_islog) == 0); + if (vd->vdev_removing) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVING, + vd->vdev_removing) == 0); } if (vd->vdev_dtl_smo.smo_object != 0) VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_DTL, vd->vdev_dtl_smo.smo_object) == 0); + if (vd->vdev_crtxg) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_CREATE_TXG, + vd->vdev_crtxg) == 0); + if (getstats) { vdev_stat_t vs; + pool_scan_stat_t ps; + vdev_get_stats(vd, &vs); - VERIFY(nvlist_add_uint64_array(nv, ZPOOL_CONFIG_STATS, + VERIFY(nvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, (uint64_t *)&vs, sizeof (vs) / sizeof (uint64_t)) == 0); + + /* provide either current or previous scan information */ + if (spa_scan_get_stats(spa, &ps) == 0) { + VERIFY(nvlist_add_uint64_array(nv, + ZPOOL_CONFIG_SCAN_STATS, (uint64_t *)&ps, + sizeof (pool_scan_stat_t) / sizeof (uint64_t)) + == 0); + } } if (!vd->vdev_ops->vdev_op_leaf) { nvlist_t **child; - int c; + int c, idx; + + ASSERT(!vd->vdev_ishole); child = kmem_alloc(vd->vdev_children * sizeof (nvlist_t *), KM_SLEEP); - for (c = 0; c < vd->vdev_children; c++) - child[c] = vdev_config_generate(spa, vd->vdev_child[c], - getstats, isspare, isl2cache); + for (c = 0, idx = 0; c < vd->vdev_children; c++) { + vdev_t *cvd = vd->vdev_child[c]; - VERIFY(nvlist_add_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - child, vd->vdev_children) == 0); + /* + * If we're generating an nvlist of removing + * vdevs then skip over any device which is + * not being removed. + */ + if ((flags & VDEV_CONFIG_REMOVING) && + !cvd->vdev_removing) + continue; - for (c = 0; c < vd->vdev_children; c++) + child[idx++] = vdev_config_generate(spa, cvd, + getstats, flags); + } + + if (idx) { + VERIFY(nvlist_add_nvlist_array(nv, + ZPOOL_CONFIG_CHILDREN, child, idx) == 0); + } + + for (c = 0; c < idx; c++) nvlist_free(child[c]); kmem_free(child, vd->vdev_children * sizeof (nvlist_t *)); } else { + const char *aux = NULL; + if (vd->vdev_offline && !vd->vdev_tmpoffline) VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_OFFLINE, B_TRUE) == 0); @@ -329,11 +365,66 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, if (vd->vdev_unspare) VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_UNSPARE, B_TRUE) == 0); + if (vd->vdev_ishole) + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_IS_HOLE, + B_TRUE) == 0); + + switch (vd->vdev_stat.vs_aux) { + case VDEV_AUX_ERR_EXCEEDED: + aux = "err_exceeded"; + break; + + case VDEV_AUX_EXTERNAL: + aux = "external"; + break; + } + + if (aux != NULL) + VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_AUX_STATE, + aux) == 0); + + if (vd->vdev_splitting && vd->vdev_orig_guid != 0LL) { + VERIFY(nvlist_add_uint64(nv, ZPOOL_CONFIG_ORIG_GUID, + vd->vdev_orig_guid) == 0); + } } return (nv); } +/* + * Generate a view of the top-level vdevs. If we currently have holes + * in the namespace, then generate an array which contains a list of holey + * vdevs. Additionally, add the number of top-level children that currently + * exist. + */ +void +vdev_top_config_generate(spa_t *spa, nvlist_t *config) +{ + vdev_t *rvd = spa->spa_root_vdev; + uint64_t *array; + uint_t c, idx; + + array = kmem_alloc(rvd->vdev_children * sizeof (uint64_t), KM_SLEEP); + + for (c = 0, idx = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; + + if (tvd->vdev_ishole) + array[idx++] = c; + } + + if (idx) { + VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_HOLE_ARRAY, + array, idx) == 0); + } + + VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN, + rvd->vdev_children) == 0); + + kmem_free(array, rvd->vdev_children * sizeof (uint64_t)); +} + nvlist_t * vdev_label_read_config(vdev_t *vd) { @@ -516,6 +607,9 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) crtxg, reason)) != 0) return (error); + /* Track the creation time for this vdev */ + vd->vdev_crtxg = crtxg; + if (!vd->vdev_ops->vdev_op_leaf) return (0); @@ -528,7 +622,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) /* * Determine if the vdev is in use. */ - if (reason != VDEV_LABEL_REMOVE && + if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPLIT && vdev_inuse(vd, crtxg, reason, &spare_guid, &l2cache_guid)) return (EBUSY); @@ -554,7 +648,8 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) */ if (reason == VDEV_LABEL_SPARE) return (0); - ASSERT(reason == VDEV_LABEL_REPLACE); + ASSERT(reason == VDEV_LABEL_REPLACE || + reason == VDEV_LABEL_SPLIT); } if (reason != VDEV_LABEL_REMOVE && reason != VDEV_LABEL_SPARE && @@ -619,7 +714,11 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0); } else { - label = spa_config_generate(spa, vd, 0ULL, B_FALSE); + uint64_t txg = 0ULL; + + if (reason == VDEV_LABEL_SPLIT) + txg = spa->spa_uberblock.ub_txg; + label = spa_config_generate(spa, vd, txg, B_FALSE); /* * Add our creation time. This allows us to detect multiple @@ -717,11 +816,6 @@ retry: */ /* - * For use by zdb and debugging purposes only - */ -uint64_t ub_max_txg = UINT64_MAX; - -/* * Consider the following situation: txg is safely synced to disk. We've * written the first uberblock for txg + 1, and then we lose power. When we * come back up, we fail to see the uberblock for txg + 1 because, say, @@ -750,6 +844,7 @@ vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2) static void vdev_uberblock_load_done(zio_t *zio) { + spa_t *spa = zio->io_spa; zio_t *rio = zio->io_private; uberblock_t *ub = zio->io_data; uberblock_t *ubbest = rio->io_private; @@ -758,7 +853,7 @@ vdev_uberblock_load_done(zio_t *zio) if (zio->io_error == 0 && uberblock_verify(ub) == 0) { mutex_enter(&rio->io_lock); - if (ub->ub_txg <= ub_max_txg && + if (ub->ub_txg <= spa->spa_load_max_txg && vdev_uberblock_compare(ub, ubbest) > 0) *ubbest = *ub; mutex_exit(&rio->io_lock); @@ -976,6 +1071,9 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags) for (vd = list_head(dl); vd != NULL; vd = list_next(dl, vd)) { uint64_t *good_writes = kmem_zalloc(sizeof (uint64_t), KM_SLEEP); + + ASSERT(!vd->vdev_ishole); + zio_t *vio = zio_null(zio, spa, NULL, (vd->vdev_islog || vd->vdev_aux != NULL) ? vdev_label_sync_ignore_done : vdev_label_sync_top_done,