X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fdsl_scrub.c;h=03ebb90bbdaf44d407d1535a7f47b0430fc74264;hb=3affbe6d7e23f26ef9b4e70043b9fb93bfe9ea32;hp=950a91f783a4745f2ec4c08e8502031adef58ec9;hpb=172bb4bd5e4afef721dd4d2972d8680d983f144b;p=zfs.git diff --git a/module/zfs/dsl_scrub.c b/module/zfs/dsl_scrub.c index 950a91f..03ebb90 100644 --- a/module/zfs/dsl_scrub.c +++ b/module/zfs/dsl_scrub.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -45,6 +45,8 @@ typedef int (scrub_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *); static scrub_cb_t dsl_pool_scrub_clean_cb; static dsl_syncfunc_t dsl_pool_scrub_cancel_sync; +static void scrub_visitdnode(dsl_pool_t *dp, dnode_phys_t *dnp, arc_buf_t *buf, + uint64_t objset, uint64_t object); int zfs_scrub_min_time = 1; /* scrub for at least 1 sec each txg */ int zfs_resilver_min_time = 3; /* resilver for at least 3 sec each txg */ @@ -95,6 +97,9 @@ dsl_pool_scrub_setup_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) ESC_ZFS_RESILVER_START); dp->dp_scrub_max_txg = MIN(dp->dp_scrub_max_txg, tx->tx_txg); + } else { + spa_event_notify(dp->dp_spa, NULL, + ESC_ZFS_SCRUB_START); } /* zero out the scrub stats in all vdev_stat_t's */ @@ -212,8 +217,9 @@ dsl_pool_scrub_cancel_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) */ vdev_dtl_reassess(dp->dp_spa->spa_root_vdev, tx->tx_txg, *completep ? dp->dp_scrub_max_txg : 0, B_TRUE); - if (dp->dp_scrub_min_txg && *completep) - spa_event_notify(dp->dp_spa, NULL, ESC_ZFS_RESILVER_FINISH); + if (*completep) + spa_event_notify(dp->dp_spa, NULL, dp->dp_scrub_min_txg ? + ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH); spa_errlog_rotate(dp->dp_spa); /* @@ -344,6 +350,12 @@ traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg) if (bp->blk_birth <= dp->dp_scrub_min_txg) return; + /* + * One block ("stubby") can be allocated a long time ago; we + * want to visit that one because it has been allocated + * (on-disk) even if it hasn't been claimed (even though for + * plain scrub there's nothing to do to it). + */ if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(dp->dp_spa)) return; @@ -369,6 +381,11 @@ traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg) if (bp->blk_birth <= dp->dp_scrub_min_txg) return; + /* + * birth can be < claim_txg if this record's txg is + * already txg sync'ed (but this log block contains + * other records that are not synced) + */ if (claim_txg == 0 || bp->blk_birth < claim_txg) return; @@ -391,7 +408,7 @@ traverse_zil(dsl_pool_t *dp, zil_header_t *zh) * We only want to visit blocks that have been claimed but not yet * replayed (or, in read-only mode, blocks that *would* be claimed). */ - if (claim_txg == 0 && (spa_mode & FWRITE)) + if (claim_txg == 0 && spa_writeable(dp->dp_spa)) return; zilog = zil_alloc(dp->dp_meta_objset, zh); @@ -409,9 +426,6 @@ scrub_visitbp(dsl_pool_t *dp, dnode_phys_t *dnp, int err; arc_buf_t *buf = NULL; - if (bp->blk_birth == 0) - return; - if (bp->blk_birth <= dp->dp_scrub_min_txg) return; @@ -471,7 +485,7 @@ scrub_visitbp(dsl_pool_t *dp, dnode_phys_t *dnp, } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) { uint32_t flags = ARC_WAIT; dnode_phys_t *child_dnp; - int i, j; + int i; int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; err = arc_read(NULL, dp->dp_spa, bp, pbuf, @@ -486,20 +500,12 @@ scrub_visitbp(dsl_pool_t *dp, dnode_phys_t *dnp, child_dnp = buf->b_data; for (i = 0; i < epb; i++, child_dnp++) { - for (j = 0; j < child_dnp->dn_nblkptr; j++) { - zbookmark_t czb; - - SET_BOOKMARK(&czb, zb->zb_objset, - zb->zb_blkid * epb + i, - child_dnp->dn_nlevels - 1, j); - scrub_visitbp(dp, child_dnp, buf, - &child_dnp->dn_blkptr[j], &czb); - } + scrub_visitdnode(dp, child_dnp, buf, zb->zb_objset, + zb->zb_blkid * epb + i); } } else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { uint32_t flags = ARC_WAIT; objset_phys_t *osp; - int j; err = arc_read_nolock(NULL, dp->dp_spa, bp, arc_getbuf_func, &buf, @@ -515,13 +521,13 @@ scrub_visitbp(dsl_pool_t *dp, dnode_phys_t *dnp, traverse_zil(dp, &osp->os_zil_header); - for (j = 0; j < osp->os_meta_dnode.dn_nblkptr; j++) { - zbookmark_t czb; - - SET_BOOKMARK(&czb, zb->zb_objset, 0, - osp->os_meta_dnode.dn_nlevels - 1, j); - scrub_visitbp(dp, &osp->os_meta_dnode, buf, - &osp->os_meta_dnode.dn_blkptr[j], &czb); + scrub_visitdnode(dp, &osp->os_meta_dnode, + buf, zb->zb_objset, 0); + if (arc_buf_size(buf) >= sizeof (objset_phys_t)) { + scrub_visitdnode(dp, &osp->os_userused_dnode, + buf, zb->zb_objset, 0); + scrub_visitdnode(dp, &osp->os_groupused_dnode, + buf, zb->zb_objset, 0); } } @@ -531,6 +537,21 @@ scrub_visitbp(dsl_pool_t *dp, dnode_phys_t *dnp, } static void +scrub_visitdnode(dsl_pool_t *dp, dnode_phys_t *dnp, arc_buf_t *buf, + uint64_t objset, uint64_t object) +{ + int j; + + for (j = 0; j < dnp->dn_nblkptr; j++) { + zbookmark_t czb; + + SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j); + scrub_visitbp(dp, dnp, buf, &dnp->dn_blkptr[j], &czb); + } + +} + +static void scrub_visit_rootbp(dsl_pool_t *dp, dsl_dataset_t *ds, blkptr_t *bp) { zbookmark_t zb; @@ -740,6 +761,7 @@ enqueue_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg) void dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx) { + spa_t *spa = dp->dp_spa; zap_cursor_t zc; zap_attribute_t za; boolean_t complete = B_TRUE; @@ -747,8 +769,10 @@ dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx) if (dp->dp_scrub_func == SCRUB_FUNC_NONE) return; - /* If the spa is not fully loaded, don't bother. */ - if (dp->dp_spa->spa_load_state != SPA_LOAD_NONE) + /* + * If the pool is not loaded, or is trying to unload, leave it alone. + */ + if (spa->spa_load_state != SPA_LOAD_NONE || spa_shutting_down(spa)) return; if (dp->dp_scrub_restart) { @@ -757,13 +781,13 @@ dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx) dsl_pool_scrub_setup_sync(dp, &func, kcred, tx); } - if (dp->dp_spa->spa_root_vdev->vdev_stat.vs_scrub_type == 0) { + if (spa->spa_root_vdev->vdev_stat.vs_scrub_type == 0) { /* * We must have resumed after rebooting; reset the vdev * stats to know that we're doing a scrub (although it * will think we're just starting now). */ - vdev_scrub_stat_update(dp->dp_spa->spa_root_vdev, + vdev_scrub_stat_update(spa->spa_root_vdev, dp->dp_scrub_min_txg ? POOL_SCRUB_RESILVER : POOL_SCRUB_EVERYTHING, B_FALSE); } @@ -771,7 +795,7 @@ dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx) dp->dp_scrub_pausing = B_FALSE; dp->dp_scrub_start_time = lbolt64; dp->dp_scrub_isresilver = (dp->dp_scrub_min_txg != 0); - dp->dp_spa->spa_scrub_active = B_TRUE; + spa->spa_scrub_active = B_TRUE; if (dp->dp_scrub_bookmark.zb_objset == 0) { /* First do the MOS & ORIGIN */ @@ -779,8 +803,8 @@ dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx) if (dp->dp_scrub_pausing) goto out; - if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) { - VERIFY(0 == dmu_objset_find_spa(dp->dp_spa, + if (spa_version(spa) < SPA_VERSION_DSL_SCRUB) { + VERIFY(0 == dmu_objset_find_spa(spa, NULL, enqueue_cb, tx, DS_FIND_CHILDREN)); } else { scrub_visitds(dp, dp->dp_origin_snap->ds_object, tx); @@ -830,15 +854,13 @@ out: VERIFY(0 == zap_update(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_ERRORS, sizeof (uint64_t), 1, - &dp->dp_spa->spa_scrub_errors, tx)); + &spa->spa_scrub_errors, tx)); /* XXX this is scrub-clean specific */ - mutex_enter(&dp->dp_spa->spa_scrub_lock); - while (dp->dp_spa->spa_scrub_inflight > 0) { - cv_wait(&dp->dp_spa->spa_scrub_io_cv, - &dp->dp_spa->spa_scrub_lock); - } - mutex_exit(&dp->dp_spa->spa_scrub_lock); + mutex_enter(&spa->spa_scrub_lock); + while (spa->spa_scrub_inflight > 0) + cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); + mutex_exit(&spa->spa_scrub_lock); } void @@ -920,13 +942,17 @@ static int dsl_pool_scrub_clean_cb(dsl_pool_t *dp, const blkptr_t *bp, const zbookmark_t *zb) { - size_t size = BP_GET_LSIZE(bp); - int d; + size_t size = BP_GET_PSIZE(bp); spa_t *spa = dp->dp_spa; boolean_t needs_io; - int zio_flags = ZIO_FLAG_SCRUB_THREAD | ZIO_FLAG_CANFAIL; + int zio_flags = ZIO_FLAG_SCRUB_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL; int zio_priority; + ASSERT(bp->blk_birth > dp->dp_scrub_min_txg); + + if (bp->blk_birth >= dp->dp_scrub_max_txg) + return (0); + count_block(dp->dp_blkstats, bp); if (dp->dp_scrub_isresilver == 0) { @@ -945,7 +971,7 @@ dsl_pool_scrub_clean_cb(dsl_pool_t *dp, if (zb->zb_level == -1 && BP_GET_TYPE(bp) != DMU_OT_OBJSET) zio_flags |= ZIO_FLAG_SPECULATIVE; - for (d = 0; d < BP_GET_NDVAS(bp); d++) { + for (int d = 0; d < BP_GET_NDVAS(bp); d++) { vdev_t *vd = vdev_lookup_top(spa, DVA_GET_VDEV(&bp->blk_dva[d])); @@ -963,16 +989,17 @@ dsl_pool_scrub_clean_cb(dsl_pool_t *dp, if (DVA_GET_GANG(&bp->blk_dva[d])) { /* * Gang members may be spread across multiple - * vdevs, so the best we can do is look at the - * pool-wide DTL. + * vdevs, so the best estimate we have is the + * scrub range, which has already been checked. * XXX -- it would be better to change our - * allocation policy to ensure that this can't - * happen. + * allocation policy to ensure that all + * gang members reside on the same vdev. */ - vd = spa->spa_root_vdev; + needs_io = B_TRUE; + } else { + needs_io = vdev_dtl_contains(vd, DTL_PARTIAL, + bp->blk_birth, 1); } - needs_io = vdev_dtl_contains(&vd->vdev_dtl_map, - bp->blk_birth, 1); } } @@ -997,6 +1024,8 @@ dsl_pool_scrub_clean_cb(dsl_pool_t *dp, int dsl_pool_scrub_clean(dsl_pool_t *dp) { + spa_t *spa = dp->dp_spa; + /* * Purge all vdev caches. We do this here rather than in sync * context because this requires a writer lock on the spa_config @@ -1004,11 +1033,11 @@ dsl_pool_scrub_clean(dsl_pool_t *dp) * spa_scrub_reopen flag indicates that vdev_open() should not * attempt to start another scrub. */ - spa_config_enter(dp->dp_spa, SCL_ALL, FTAG, RW_WRITER); - dp->dp_spa->spa_scrub_reopen = B_TRUE; - vdev_reopen(dp->dp_spa->spa_root_vdev); - dp->dp_spa->spa_scrub_reopen = B_FALSE; - spa_config_exit(dp->dp_spa, SCL_ALL, FTAG); + spa_vdev_state_enter(spa); + spa->spa_scrub_reopen = B_TRUE; + vdev_reopen(spa->spa_root_vdev); + spa->spa_scrub_reopen = B_FALSE; + (void) spa_vdev_state_exit(spa, NULL, 0); return (dsl_pool_scrub_setup(dp, SCRUB_FUNC_CLEAN)); }