-static void
-spa_scrub_io_done(zio_t *zio)
-{
- spa_t *spa = zio->io_spa;
-
- arc_data_buf_free(zio->io_data, zio->io_size);
-
- mutex_enter(&spa->spa_scrub_lock);
- if (zio->io_error && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
- vdev_t *vd = zio->io_vd ? zio->io_vd : spa->spa_root_vdev;
- spa->spa_scrub_errors++;
- mutex_enter(&vd->vdev_stat_lock);
- vd->vdev_stat.vs_scrub_errors++;
- mutex_exit(&vd->vdev_stat_lock);
- }
-
- if (--spa->spa_scrub_inflight < spa->spa_scrub_maxinflight)
- cv_broadcast(&spa->spa_scrub_io_cv);
-
- ASSERT(spa->spa_scrub_inflight >= 0);
-
- mutex_exit(&spa->spa_scrub_lock);
-}
-
-static void
-spa_scrub_io_start(spa_t *spa, blkptr_t *bp, int priority, int flags,
- zbookmark_t *zb)
-{
- size_t size = BP_GET_LSIZE(bp);
- void *data;
-
- mutex_enter(&spa->spa_scrub_lock);
- /*
- * Do not give too much work to vdev(s).
- */
- while (spa->spa_scrub_inflight >= spa->spa_scrub_maxinflight) {
- cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
- }
- spa->spa_scrub_inflight++;
- mutex_exit(&spa->spa_scrub_lock);
-
- data = arc_data_buf_alloc(size);
-
- if (zb->zb_level == -1 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)
- flags |= ZIO_FLAG_SPECULATIVE; /* intent log block */
-
- flags |= ZIO_FLAG_SCRUB_THREAD | ZIO_FLAG_CANFAIL;
-
- zio_nowait(zio_read(NULL, spa, bp, data, size,
- spa_scrub_io_done, NULL, priority, flags, zb));
-}
-
-/* ARGSUSED */
-static int
-spa_scrub_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a)
-{
- blkptr_t *bp = &bc->bc_blkptr;
- vdev_t *vd = spa->spa_root_vdev;
- dva_t *dva = bp->blk_dva;
- int needs_resilver = B_FALSE;
- int d;
-
- if (bc->bc_errno) {
- /*
- * We can't scrub this block, but we can continue to scrub
- * the rest of the pool. Note the error and move along.
- */
- mutex_enter(&spa->spa_scrub_lock);
- spa->spa_scrub_errors++;
- mutex_exit(&spa->spa_scrub_lock);
-
- mutex_enter(&vd->vdev_stat_lock);
- vd->vdev_stat.vs_scrub_errors++;
- mutex_exit(&vd->vdev_stat_lock);
-
- return (ERESTART);
- }
-
- ASSERT(bp->blk_birth < spa->spa_scrub_maxtxg);
-
- for (d = 0; d < BP_GET_NDVAS(bp); d++) {
- vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[d]));
-
- ASSERT(vd != NULL);
-
- /*
- * Keep track of how much data we've examined so that
- * zpool(1M) status can make useful progress reports.
- */
- mutex_enter(&vd->vdev_stat_lock);
- vd->vdev_stat.vs_scrub_examined += DVA_GET_ASIZE(&dva[d]);
- mutex_exit(&vd->vdev_stat_lock);
-
- if (spa->spa_scrub_type == POOL_SCRUB_RESILVER) {
- if (DVA_GET_GANG(&dva[d])) {
- /*
- * Gang members may be spread across multiple
- * vdevs, so the best we can do is look at the
- * pool-wide DTL.
- * XXX -- it would be better to change our
- * allocation policy to ensure that this can't
- * happen.
- */
- vd = spa->spa_root_vdev;
- }
- if (vdev_dtl_contains(&vd->vdev_dtl_map,
- bp->blk_birth, 1))
- needs_resilver = B_TRUE;
- }
- }
-
- if (spa->spa_scrub_type == POOL_SCRUB_EVERYTHING)
- spa_scrub_io_start(spa, bp, ZIO_PRIORITY_SCRUB,
- ZIO_FLAG_SCRUB, &bc->bc_bookmark);
- else if (needs_resilver)
- spa_scrub_io_start(spa, bp, ZIO_PRIORITY_RESILVER,
- ZIO_FLAG_RESILVER, &bc->bc_bookmark);
-
- return (0);
-}
-
-static void
-spa_scrub_thread(spa_t *spa)
-{
- callb_cpr_t cprinfo;
- traverse_handle_t *th = spa->spa_scrub_th;
- vdev_t *rvd = spa->spa_root_vdev;
- pool_scrub_type_t scrub_type = spa->spa_scrub_type;
- int error = 0;
- boolean_t complete;
-
- CALLB_CPR_INIT(&cprinfo, &spa->spa_scrub_lock, callb_generic_cpr, FTAG);
-
- /*
- * If we're restarting due to a snapshot create/delete,
- * wait for that to complete.
- */
- txg_wait_synced(spa_get_dsl(spa), 0);
-
- dprintf("start %s mintxg=%llu maxtxg=%llu\n",
- scrub_type == POOL_SCRUB_RESILVER ? "resilver" : "scrub",
- spa->spa_scrub_mintxg, spa->spa_scrub_maxtxg);
-
- spa_config_enter(spa, RW_WRITER, FTAG);
- vdev_reopen(rvd); /* purge all vdev caches */
- vdev_config_dirty(rvd); /* rewrite all disk labels */
- vdev_scrub_stat_update(rvd, scrub_type, B_FALSE);
- spa_config_exit(spa, FTAG);
-
- mutex_enter(&spa->spa_scrub_lock);
- spa->spa_scrub_errors = 0;
- spa->spa_scrub_active = 1;
- ASSERT(spa->spa_scrub_inflight == 0);
-
- while (!spa->spa_scrub_stop) {
- CALLB_CPR_SAFE_BEGIN(&cprinfo);
- while (spa->spa_scrub_suspended) {
- spa->spa_scrub_active = 0;
- cv_broadcast(&spa->spa_scrub_cv);
- cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock);
- spa->spa_scrub_active = 1;
- }
- CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_scrub_lock);
-
- if (spa->spa_scrub_restart_txg != 0)
- break;
-
- mutex_exit(&spa->spa_scrub_lock);
- error = traverse_more(th);
- mutex_enter(&spa->spa_scrub_lock);
- if (error != EAGAIN)
- break;
- }
-
- while (spa->spa_scrub_inflight)
- cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
-
- spa->spa_scrub_active = 0;
- cv_broadcast(&spa->spa_scrub_cv);
-
- mutex_exit(&spa->spa_scrub_lock);
-
- spa_config_enter(spa, RW_WRITER, FTAG);
-
- mutex_enter(&spa->spa_scrub_lock);
-
- /*
- * Note: we check spa_scrub_restart_txg under both spa_scrub_lock
- * AND the spa config lock to synchronize with any config changes
- * that revise the DTLs under spa_vdev_enter() / spa_vdev_exit().
- */
- if (spa->spa_scrub_restart_txg != 0)
- error = ERESTART;
-
- if (spa->spa_scrub_stop)
- error = EINTR;
-
- /*
- * Even if there were uncorrectable errors, we consider the scrub
- * completed. The downside is that if there is a transient error during
- * a resilver, we won't resilver the data properly to the target. But
- * if the damage is permanent (more likely) we will resilver forever,
- * which isn't really acceptable. Since there is enough information for
- * the user to know what has failed and why, this seems like a more
- * tractable approach.
- */
- complete = (error == 0);
-
- dprintf("end %s to maxtxg=%llu %s, traverse=%d, %llu errors, stop=%u\n",
- scrub_type == POOL_SCRUB_RESILVER ? "resilver" : "scrub",
- spa->spa_scrub_maxtxg, complete ? "done" : "FAILED",
- error, spa->spa_scrub_errors, spa->spa_scrub_stop);
-
- mutex_exit(&spa->spa_scrub_lock);
-
- /*
- * If the scrub/resilver completed, update all DTLs to reflect this.
- * Whether it succeeded or not, vacate all temporary scrub DTLs.
- */
- vdev_dtl_reassess(rvd, spa_last_synced_txg(spa) + 1,
- complete ? spa->spa_scrub_maxtxg : 0, B_TRUE);
- vdev_scrub_stat_update(rvd, POOL_SCRUB_NONE, complete);
- spa_errlog_rotate(spa);
-
- if (scrub_type == POOL_SCRUB_RESILVER && complete)
- spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_FINISH);
-
- spa_config_exit(spa, FTAG);
-
- mutex_enter(&spa->spa_scrub_lock);
-
- /*
- * We may have finished replacing a device.
- * Let the async thread assess this and handle the detach.
- */
- spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
-
- /*
- * If we were told to restart, our final act is to start a new scrub.
- */
- if (error == ERESTART)
- spa_async_request(spa, scrub_type == POOL_SCRUB_RESILVER ?
- SPA_ASYNC_RESILVER : SPA_ASYNC_SCRUB);
-
- spa->spa_scrub_type = POOL_SCRUB_NONE;
- spa->spa_scrub_active = 0;
- spa->spa_scrub_thread = NULL;
- cv_broadcast(&spa->spa_scrub_cv);
- CALLB_CPR_EXIT(&cprinfo); /* drops &spa->spa_scrub_lock */
- thread_exit();
-}
-
-void
-spa_scrub_suspend(spa_t *spa)
-{
- mutex_enter(&spa->spa_scrub_lock);
- spa->spa_scrub_suspended++;
- while (spa->spa_scrub_active) {
- cv_broadcast(&spa->spa_scrub_cv);
- cv_wait(&spa->spa_scrub_cv, &spa->spa_scrub_lock);
- }
- while (spa->spa_scrub_inflight)
- cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
- mutex_exit(&spa->spa_scrub_lock);
-}
-
-void
-spa_scrub_resume(spa_t *spa)
-{
- mutex_enter(&spa->spa_scrub_lock);
- ASSERT(spa->spa_scrub_suspended != 0);
- if (--spa->spa_scrub_suspended == 0)
- cv_broadcast(&spa->spa_scrub_cv);
- mutex_exit(&spa->spa_scrub_lock);
-}
-
-void
-spa_scrub_restart(spa_t *spa, uint64_t txg)
-{
- /*
- * Something happened (e.g. snapshot create/delete) that means
- * we must restart any in-progress scrubs. The itinerary will
- * fix this properly.
- */
- mutex_enter(&spa->spa_scrub_lock);
- spa->spa_scrub_restart_txg = txg;
- mutex_exit(&spa->spa_scrub_lock);
-}
-