* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
static scrub_cb_t dsl_pool_scrub_clean_cb;
static dsl_syncfunc_t dsl_pool_scrub_cancel_sync;
+static void scrub_visitdnode(dsl_pool_t *dp, dnode_phys_t *dnp, arc_buf_t *buf,
+ uint64_t objset, uint64_t object);
int zfs_scrub_min_time = 1; /* scrub for at least 1 sec each txg */
int zfs_resilver_min_time = 3; /* resilver for at least 3 sec each txg */
ESC_ZFS_RESILVER_START);
dp->dp_scrub_max_txg = MIN(dp->dp_scrub_max_txg,
tx->tx_txg);
+ } else {
+ spa_event_notify(dp->dp_spa, NULL,
+ ESC_ZFS_SCRUB_START);
}
/* zero out the scrub stats in all vdev_stat_t's */
*/
vdev_dtl_reassess(dp->dp_spa->spa_root_vdev, tx->tx_txg,
*completep ? dp->dp_scrub_max_txg : 0, B_TRUE);
- if (dp->dp_scrub_min_txg && *completep)
- spa_event_notify(dp->dp_spa, NULL, ESC_ZFS_RESILVER_FINISH);
+ if (*completep)
+ spa_event_notify(dp->dp_spa, NULL, dp->dp_scrub_min_txg ?
+ ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH);
spa_errlog_rotate(dp->dp_spa);
/*
if (bp->blk_birth <= dp->dp_scrub_min_txg)
return;
+ /*
+ * One block ("stubby") can be allocated a long time ago; we
+ * want to visit that one because it has been allocated
+ * (on-disk) even if it hasn't been claimed (even though for
+ * plain scrub there's nothing to do to it).
+ */
if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(dp->dp_spa))
return;
if (bp->blk_birth <= dp->dp_scrub_min_txg)
return;
+ /*
+ * birth can be < claim_txg if this record's txg is
+ * already txg sync'ed (but this log block contains
+ * other records that are not synced)
+ */
if (claim_txg == 0 || bp->blk_birth < claim_txg)
return;
* We only want to visit blocks that have been claimed but not yet
* replayed (or, in read-only mode, blocks that *would* be claimed).
*/
- if (claim_txg == 0 && (spa_mode & FWRITE))
+ if (claim_txg == 0 && spa_writeable(dp->dp_spa))
return;
zilog = zil_alloc(dp->dp_meta_objset, zh);
int err;
arc_buf_t *buf = NULL;
- if (bp->blk_birth == 0)
- return;
-
if (bp->blk_birth <= dp->dp_scrub_min_txg)
return;
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
uint32_t flags = ARC_WAIT;
dnode_phys_t *child_dnp;
- int i, j;
+ int i;
int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
err = arc_read(NULL, dp->dp_spa, bp, pbuf,
child_dnp = buf->b_data;
for (i = 0; i < epb; i++, child_dnp++) {
- for (j = 0; j < child_dnp->dn_nblkptr; j++) {
- zbookmark_t czb;
-
- SET_BOOKMARK(&czb, zb->zb_objset,
- zb->zb_blkid * epb + i,
- child_dnp->dn_nlevels - 1, j);
- scrub_visitbp(dp, child_dnp, buf,
- &child_dnp->dn_blkptr[j], &czb);
- }
+ scrub_visitdnode(dp, child_dnp, buf, zb->zb_objset,
+ zb->zb_blkid * epb + i);
}
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
uint32_t flags = ARC_WAIT;
objset_phys_t *osp;
- int j;
err = arc_read_nolock(NULL, dp->dp_spa, bp,
arc_getbuf_func, &buf,
traverse_zil(dp, &osp->os_zil_header);
- for (j = 0; j < osp->os_meta_dnode.dn_nblkptr; j++) {
- zbookmark_t czb;
-
- SET_BOOKMARK(&czb, zb->zb_objset, 0,
- osp->os_meta_dnode.dn_nlevels - 1, j);
- scrub_visitbp(dp, &osp->os_meta_dnode, buf,
- &osp->os_meta_dnode.dn_blkptr[j], &czb);
+ scrub_visitdnode(dp, &osp->os_meta_dnode,
+ buf, zb->zb_objset, 0);
+ if (arc_buf_size(buf) >= sizeof (objset_phys_t)) {
+ scrub_visitdnode(dp, &osp->os_userused_dnode,
+ buf, zb->zb_objset, 0);
+ scrub_visitdnode(dp, &osp->os_groupused_dnode,
+ buf, zb->zb_objset, 0);
}
}
}
static void
+scrub_visitdnode(dsl_pool_t *dp, dnode_phys_t *dnp, arc_buf_t *buf,
+ uint64_t objset, uint64_t object)
+{
+ int j;
+
+ for (j = 0; j < dnp->dn_nblkptr; j++) {
+ zbookmark_t czb;
+
+ SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
+ scrub_visitbp(dp, dnp, buf, &dnp->dn_blkptr[j], &czb);
+ }
+
+}
+
+static void
scrub_visit_rootbp(dsl_pool_t *dp, dsl_dataset_t *ds, blkptr_t *bp)
{
zbookmark_t zb;
void
dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx)
{
+ spa_t *spa = dp->dp_spa;
zap_cursor_t zc;
zap_attribute_t za;
boolean_t complete = B_TRUE;
if (dp->dp_scrub_func == SCRUB_FUNC_NONE)
return;
- /* If the spa is not fully loaded, don't bother. */
- if (dp->dp_spa->spa_load_state != SPA_LOAD_NONE)
+ /*
+ * If the pool is not loaded, or is trying to unload, leave it alone.
+ */
+ if (spa->spa_load_state != SPA_LOAD_NONE || spa_shutting_down(spa))
return;
if (dp->dp_scrub_restart) {
dsl_pool_scrub_setup_sync(dp, &func, kcred, tx);
}
- if (dp->dp_spa->spa_root_vdev->vdev_stat.vs_scrub_type == 0) {
+ if (spa->spa_root_vdev->vdev_stat.vs_scrub_type == 0) {
/*
* We must have resumed after rebooting; reset the vdev
* stats to know that we're doing a scrub (although it
* will think we're just starting now).
*/
- vdev_scrub_stat_update(dp->dp_spa->spa_root_vdev,
+ vdev_scrub_stat_update(spa->spa_root_vdev,
dp->dp_scrub_min_txg ? POOL_SCRUB_RESILVER :
POOL_SCRUB_EVERYTHING, B_FALSE);
}
dp->dp_scrub_pausing = B_FALSE;
dp->dp_scrub_start_time = lbolt64;
dp->dp_scrub_isresilver = (dp->dp_scrub_min_txg != 0);
- dp->dp_spa->spa_scrub_active = B_TRUE;
+ spa->spa_scrub_active = B_TRUE;
if (dp->dp_scrub_bookmark.zb_objset == 0) {
/* First do the MOS & ORIGIN */
if (dp->dp_scrub_pausing)
goto out;
- if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) {
- VERIFY(0 == dmu_objset_find_spa(dp->dp_spa,
+ if (spa_version(spa) < SPA_VERSION_DSL_SCRUB) {
+ VERIFY(0 == dmu_objset_find_spa(spa,
NULL, enqueue_cb, tx, DS_FIND_CHILDREN));
} else {
scrub_visitds(dp, dp->dp_origin_snap->ds_object, tx);
VERIFY(0 == zap_update(dp->dp_meta_objset,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_SCRUB_ERRORS, sizeof (uint64_t), 1,
- &dp->dp_spa->spa_scrub_errors, tx));
+ &spa->spa_scrub_errors, tx));
/* XXX this is scrub-clean specific */
- mutex_enter(&dp->dp_spa->spa_scrub_lock);
- while (dp->dp_spa->spa_scrub_inflight > 0) {
- cv_wait(&dp->dp_spa->spa_scrub_io_cv,
- &dp->dp_spa->spa_scrub_lock);
- }
- mutex_exit(&dp->dp_spa->spa_scrub_lock);
+ mutex_enter(&spa->spa_scrub_lock);
+ while (spa->spa_scrub_inflight > 0)
+ cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
+ mutex_exit(&spa->spa_scrub_lock);
}
void
dsl_pool_scrub_clean_cb(dsl_pool_t *dp,
const blkptr_t *bp, const zbookmark_t *zb)
{
- size_t size = BP_GET_LSIZE(bp);
- int d;
+ size_t size = BP_GET_PSIZE(bp);
spa_t *spa = dp->dp_spa;
boolean_t needs_io;
- int zio_flags = ZIO_FLAG_SCRUB_THREAD | ZIO_FLAG_CANFAIL;
+ int zio_flags = ZIO_FLAG_SCRUB_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL;
int zio_priority;
+ ASSERT(bp->blk_birth > dp->dp_scrub_min_txg);
+
+ if (bp->blk_birth >= dp->dp_scrub_max_txg)
+ return (0);
+
count_block(dp->dp_blkstats, bp);
if (dp->dp_scrub_isresilver == 0) {
if (zb->zb_level == -1 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)
zio_flags |= ZIO_FLAG_SPECULATIVE;
- for (d = 0; d < BP_GET_NDVAS(bp); d++) {
+ for (int d = 0; d < BP_GET_NDVAS(bp); d++) {
vdev_t *vd = vdev_lookup_top(spa,
DVA_GET_VDEV(&bp->blk_dva[d]));
if (DVA_GET_GANG(&bp->blk_dva[d])) {
/*
* Gang members may be spread across multiple
- * vdevs, so the best we can do is look at the
- * pool-wide DTL.
+ * vdevs, so the best estimate we have is the
+ * scrub range, which has already been checked.
* XXX -- it would be better to change our
- * allocation policy to ensure that this can't
- * happen.
+ * allocation policy to ensure that all
+ * gang members reside on the same vdev.
*/
- vd = spa->spa_root_vdev;
+ needs_io = B_TRUE;
+ } else {
+ needs_io = vdev_dtl_contains(vd, DTL_PARTIAL,
+ bp->blk_birth, 1);
}
- needs_io = vdev_dtl_contains(&vd->vdev_dtl_map,
- bp->blk_birth, 1);
}
}
int
dsl_pool_scrub_clean(dsl_pool_t *dp)
{
+ spa_t *spa = dp->dp_spa;
+
/*
* Purge all vdev caches. We do this here rather than in sync
* context because this requires a writer lock on the spa_config
* spa_scrub_reopen flag indicates that vdev_open() should not
* attempt to start another scrub.
*/
- spa_config_enter(dp->dp_spa, SCL_ALL, FTAG, RW_WRITER);
- dp->dp_spa->spa_scrub_reopen = B_TRUE;
- vdev_reopen(dp->dp_spa->spa_root_vdev);
- dp->dp_spa->spa_scrub_reopen = B_FALSE;
- spa_config_exit(dp->dp_spa, SCL_ALL, FTAG);
+ spa_vdev_state_enter(spa);
+ spa->spa_scrub_reopen = B_TRUE;
+ vdev_reopen(spa->spa_root_vdev);
+ spa->spa_scrub_reopen = B_FALSE;
+ (void) spa_vdev_state_exit(spa, NULL, 0);
return (dsl_pool_scrub_setup(dp, SCRUB_FUNC_CLEAN));
}