X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fdsl_scan.c;h=d56aabbb14984d2d0ca5c4770f2317ee4012d0e0;hb=22872ff5da0d12fe393e40d3e4b3c5a58d93e360;hp=56d41083673eba5d2cf9d47baedb63f06e43364e;hpb=572e285762521df27fe5b026f409ba1a21abb7ac;p=zfs.git diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 56d4108..d56aabb 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -50,9 +50,7 @@ typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *); -static scan_cb_t dsl_scan_defrag_cb; static scan_cb_t dsl_scan_scrub_cb; -static scan_cb_t dsl_scan_remove_cb; static dsl_syncfunc_t dsl_scan_cancel_sync; static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx); @@ -64,8 +62,8 @@ int zfs_scan_idle = 50; /* idle window in clock ticks */ int zfs_scan_min_time_ms = 1000; /* min millisecs to scrub per txg */ int zfs_free_min_time_ms = 1000; /* min millisecs to free per txg */ int zfs_resilver_min_time_ms = 3000; /* min millisecs to resilver per txg */ -boolean_t zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */ -boolean_t zfs_no_scrub_prefetch = B_FALSE; /* set to disable srub prefetching */ +int zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */ +int zfs_no_scrub_prefetch = B_FALSE; /* set to disable srub prefetching */ enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE; int dsl_scan_delay_completion = B_FALSE; /* set to delay scan completion */ @@ -194,9 +192,9 @@ dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx) if (vdev_resilver_needed(spa->spa_root_vdev, &scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) { - spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START); + spa_event_notify(spa, NULL, FM_EREPORT_ZFS_RESILVER_START); } else { - spa_event_notify(spa, NULL, ESC_ZFS_SCRUB_START); + spa_event_notify(spa, NULL, FM_EREPORT_ZFS_SCRUB_START); } spa->spa_scrub_started = B_TRUE; @@ -213,8 +211,8 @@ dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx) /* back to the generic stuff */ if (dp->dp_blkstats == NULL) { - dp->dp_blkstats = - kmem_alloc(sizeof (zfs_all_blkstats_t), KM_SLEEP); + dp->dp_blkstats = kmem_alloc(sizeof (zfs_all_blkstats_t), + KM_SLEEP | KM_NODEBUG); } bzero(dp->dp_blkstats, sizeof (zfs_all_blkstats_t)); @@ -297,7 +295,8 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) complete ? scn->scn_phys.scn_max_txg : 0, B_TRUE); if (complete) { spa_event_notify(spa, NULL, scn->scn_phys.scn_min_txg ? - ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH); + FM_EREPORT_ZFS_RESILVER_FINISH : + FM_EREPORT_ZFS_SCRUB_FINISH); } spa_errlog_rotate(spa); @@ -649,7 +648,7 @@ dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp, * Return nonzero on i/o error. * Return new buf to write out in *bufp. */ -static int +__attribute__((always_inline)) static int dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb, dmu_tx_t *tx, arc_buf_t **bufp) @@ -755,7 +754,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, return (0); } -static void +__attribute__((always_inline)) static void dsl_scan_visitdnode(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype, dnode_phys_t *dnp, arc_buf_t *buf, uint64_t object, dmu_tx_t *tx) @@ -792,29 +791,39 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb, { dsl_pool_t *dp = scn->scn_dp; arc_buf_t *buf = NULL; - blkptr_t bp_toread = *bp; + blkptr_t *bp_toread; + + bp_toread = kmem_alloc(sizeof (blkptr_t), KM_SLEEP); + *bp_toread = *bp; /* ASSERT(pbuf == NULL || arc_released(pbuf)); */ if (dsl_scan_check_pause(scn, zb)) - return; + goto out; if (dsl_scan_check_resume(scn, dnp, zb)) - return; + goto out; if (bp->blk_birth == 0) - return; + goto out; scn->scn_visited_this_txg++; - dprintf_bp(bp, - "visiting ds=%p/%llu zb=%llx/%llx/%llx/%llx buf=%p bp=%p", - ds, ds ? ds->ds_object : 0, - zb->zb_objset, zb->zb_object, zb->zb_level, zb->zb_blkid, - pbuf, bp); + /* + * This debugging is commented out to conserve stack space. This + * function is called recursively and the debugging addes several + * bytes to the stack for each call. It can be commented back in + * if required to debug an issue in dsl_scan_visitbp(). + * + * dprintf_bp(bp, + * "visiting ds=%p/%llu zb=%llx/%llx/%llx/%llx buf=%p bp=%p", + * ds, ds ? ds->ds_object : 0, + * zb->zb_objset, zb->zb_object, zb->zb_level, zb->zb_blkid, + * pbuf, bp); + */ if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) - return; + goto out; if (BP_GET_TYPE(bp) != DMU_OT_USERGROUP_USED) { /* @@ -829,12 +838,12 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb, * it (original untranslated -> translations from * deleted snap -> now). */ - bp_toread = *bp; + *bp_toread = *bp; } - if (dsl_scan_recurse(scn, ds, ostype, dnp, &bp_toread, zb, tx, + if (dsl_scan_recurse(scn, ds, ostype, dnp, bp_toread, zb, tx, &buf) != 0) - return; + goto out; /* * If dsl_scan_ddt() has aready visited this block, it will have @@ -844,7 +853,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb, if (ddt_class_contains(dp->dp_spa, scn->scn_phys.scn_ddt_class_max, bp)) { ASSERT(buf == NULL); - return; + goto out; } /* @@ -859,6 +868,8 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb, } if (buf) (void) arc_buf_remove_ref(buf, &buf); +out: + kmem_free(bp_toread, sizeof(blkptr_t)); } static void @@ -1076,6 +1087,7 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx) dsl_pool_t *dp = scn->scn_dp; dsl_dataset_t *ds; objset_t *os; + char *dsname; VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); @@ -1098,7 +1110,7 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx) dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_scan_visit_rootbp(scn, ds, &ds->ds_phys->ds_bp, tx); - char *dsname = kmem_alloc(ZFS_MAXNAMELEN, KM_SLEEP); + dsname = kmem_alloc(ZFS_MAXNAMELEN, KM_SLEEP); dsl_dataset_name(ds, dsname); zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; " "pausing=%u", @@ -1250,10 +1262,12 @@ static void dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx) { ddt_bookmark_t *ddb = &scn->scn_phys.scn_ddt_bookmark; - ddt_entry_t dde = { 0 }; + ddt_entry_t dde; int error; uint64_t n = 0; + bzero(&dde, sizeof (ddt_entry_t)); + while ((error = ddt_walk(scn->scn_dp->dp_spa, ddb, &dde)) == 0) { ddt_t *ddt; @@ -1294,11 +1308,12 @@ dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum, ddt_phys_t *ddp = dde->dde_phys; blkptr_t bp; zbookmark_t zb = { 0 }; + int p; if (scn->scn_phys.scn_state != DSS_SCANNING) return; - for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { + for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) { if (ddp->ddp_phys_birth == 0 || ddp->ddp_phys_birth > scn->scn_phys.scn_cur_max_txg) continue; @@ -1313,8 +1328,8 @@ static void dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) { dsl_pool_t *dp = scn->scn_dp; - zap_cursor_t zc; - zap_attribute_t za; + zap_cursor_t *zc; + zap_attribute_t *za; if (scn->scn_phys.scn_ddt_bookmark.ddb_class <= scn->scn_phys.scn_ddt_class_max) { @@ -1362,24 +1377,26 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) * bookmark so we don't think that we're still trying to resume. */ bzero(&scn->scn_phys.scn_bookmark, sizeof (zbookmark_t)); + zc = kmem_alloc(sizeof(zap_cursor_t), KM_SLEEP); + za = kmem_alloc(sizeof(zap_attribute_t), KM_SLEEP); /* keep pulling things out of the zap-object-as-queue */ - while (zap_cursor_init(&zc, dp->dp_meta_objset, + while (zap_cursor_init(zc, dp->dp_meta_objset, scn->scn_phys.scn_queue_obj), - zap_cursor_retrieve(&zc, &za) == 0) { + zap_cursor_retrieve(zc, za) == 0) { dsl_dataset_t *ds; uint64_t dsobj; - dsobj = strtonum(za.za_name, NULL); + dsobj = strtonum(za->za_name, NULL); VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, dsobj, tx)); /* Set up min/max txg */ VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); - if (za.za_first_integer != 0) { + if (za->za_first_integer != 0) { scn->scn_phys.scn_cur_min_txg = MAX(scn->scn_phys.scn_min_txg, - za.za_first_integer); + za->za_first_integer); } else { scn->scn_phys.scn_cur_min_txg = MAX(scn->scn_phys.scn_min_txg, @@ -1389,11 +1406,14 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) dsl_dataset_rele(ds, FTAG); dsl_scan_visitds(scn, dsobj, tx); - zap_cursor_fini(&zc); + zap_cursor_fini(zc); if (scn->scn_pausing) - return; + goto out; } - zap_cursor_fini(&zc); + zap_cursor_fini(zc); +out: + kmem_free(za, sizeof(zap_attribute_t)); + kmem_free(zc, sizeof(zap_cursor_t)); } static int @@ -1658,10 +1678,11 @@ dsl_scan_scrub_cb(dsl_pool_t *dp, size_t size = BP_GET_PSIZE(bp); spa_t *spa = dp->dp_spa; uint64_t phys_birth = BP_PHYSICAL_BIRTH(bp); - boolean_t needs_io; + boolean_t needs_io = B_FALSE; int zio_flags = ZIO_FLAG_SCAN_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL; - int zio_priority; + int zio_priority = 0; int scan_delay = 0; + int d; if (phys_birth <= scn->scn_phys.scn_min_txg || phys_birth >= scn->scn_phys.scn_max_txg) @@ -1686,7 +1707,7 @@ dsl_scan_scrub_cb(dsl_pool_t *dp, if (zb->zb_level == ZB_ZIL_LEVEL) zio_flags |= ZIO_FLAG_SPECULATIVE; - for (int d = 0; d < BP_GET_NDVAS(bp); d++) { + for (d = 0; d < BP_GET_NDVAS(bp); d++) { vdev_t *vd = vdev_lookup_top(spa, DVA_GET_VDEV(&bp->blk_dva[d])); @@ -1764,3 +1785,35 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) return (dsl_sync_task_do(dp, dsl_scan_setup_check, dsl_scan_setup_sync, dp->dp_scan, &func, 0)); } + +#if defined(_KERNEL) && defined(HAVE_SPL) +module_param(zfs_top_maxinflight, int, 0644); +MODULE_PARM_DESC(zfs_top_maxinflight, "Max I/Os per top-level"); + +module_param(zfs_resilver_delay, int, 0644); +MODULE_PARM_DESC(zfs_resilver_delay, "Number of ticks to delay resilver"); + +module_param(zfs_scrub_delay, int, 0644); +MODULE_PARM_DESC(zfs_scrub_delay, "Number of ticks to delay scrub"); + +module_param(zfs_scan_idle, int, 0644); +MODULE_PARM_DESC(zfs_scan_idle, "Idle window in clock ticks"); + +module_param(zfs_scan_min_time_ms, int, 0644); +MODULE_PARM_DESC(zfs_scan_min_time_ms, "Min millisecs to scrub per txg"); + +module_param(zfs_free_min_time_ms, int, 0644); +MODULE_PARM_DESC(zfs_free_min_time_ms, "Min millisecs to free per txg"); + +module_param(zfs_resilver_min_time_ms, int, 0644); +MODULE_PARM_DESC(zfs_resilver_min_time_ms, "Min millisecs to resilver per txg"); + +module_param(zfs_no_scrub_io, int, 0644); +MODULE_PARM_DESC(zfs_no_scrub_io, "Set to disable scrub I/O"); + +module_param(zfs_no_scrub_prefetch, int, 0644); +MODULE_PARM_DESC(zfs_no_scrub_prefetch, "Set to disable scrub prefetching"); + +module_param(zfs_txg_timeout, int, 0644); +MODULE_PARM_DESC(zfs_txg_timeout, "Max seconds worth of delta per txg"); +#endif