X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fzio.c;h=fda76123e4927315bd01d2f5cfb0f4278f368ca0;hb=55d85d5a8c45c4559a4a0e675c37b0c3afb19c2f;hp=66f228bc7807fbab4fae6b1dc9f0306e9df400f7;hpb=9ae529ec5dbdc828ff8326beae58062971d74b2e;p=zfs.git diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 66f228b..fda7612 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -85,6 +85,22 @@ extern vmem_t *zio_alloc_arena; extern int zfs_mg_alloc_failures; /* + * The following actions directly effect the spa's sync-to-convergence logic. + * The values below define the sync pass when we start performing the action. + * Care should be taken when changing these values as they directly impact + * spa_sync() performance. Tuning these values may introduce subtle performance + * pathologies and should only be done in the context of performance analysis. + * These tunables will eventually be removed and replaced with #defines once + * enough analysis has been done to determine optimal values. + * + * The 'zfs_sync_pass_deferred_free' pass must be greater than 1 to ensure that + * regular blocks are not deferred. + */ +int zfs_sync_pass_deferred_free = 2; /* defer frees starting in this pass */ +int zfs_sync_pass_dont_compress = 5; /* don't compress starting in this pass */ +int zfs_sync_pass_rewrite = 2; /* rewrite new bps starting in this pass */ + +/* * An allocating zio is one that either currently has the DVA allocate * stage set or will have it later in its lifetime. */ @@ -210,6 +226,8 @@ zio_init(void) zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8); zio_inject_init(); + + lz4_init(); } void @@ -238,6 +256,8 @@ zio_fini(void) kmem_cache_destroy(zio_cache); zio_inject_fini(); + + lz4_fini(); } /* @@ -605,6 +625,9 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, zio->io_vsd_ops = NULL; zio->io_offset = offset; zio->io_deadline = 0; + zio->io_timestamp = 0; + zio->io_delta = 0; + zio->io_delay = 0; zio->io_orig_data = zio->io_data = data; zio->io_orig_size = zio->io_size = size; zio->io_orig_flags = zio->io_flags = flags; @@ -616,7 +639,6 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, zio->io_bp_override = NULL; zio->io_walk_link = NULL; zio->io_transform_stack = NULL; - zio->io_delay = 0; zio->io_error = 0; zio->io_child_count = 0; zio->io_parent_count = 0; @@ -765,7 +787,7 @@ zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, ASSERT(!BP_IS_HOLE(bp)); ASSERT(spa_syncing_txg(spa) == txg); - ASSERT(spa_sync_pass(spa) <= SYNC_PASS_DEFERRED_FREE); + ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free); zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp), NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, flags, @@ -1062,7 +1084,7 @@ zio_write_bp_init(zio_t *zio) ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL); ASSERT(!BP_GET_DEDUP(bp)); - if (pass > SYNC_PASS_DONT_COMPRESS) + if (pass >= zfs_sync_pass_dont_compress) compress = ZIO_COMPRESS_OFF; /* Make sure someone doesn't change their mind on overwrites */ @@ -1091,7 +1113,7 @@ zio_write_bp_init(zio_t *zio) * There should only be a handful of blocks after pass 1 in any case. */ if (bp->blk_birth == zio->io_txg && BP_GET_PSIZE(bp) == psize && - pass > SYNC_PASS_REWRITE) { + pass >= zfs_sync_pass_rewrite) { enum zio_stage gang_stages = zio->io_pipeline & ZIO_GANG_STAGES; ASSERT(psize != 0); zio->io_pipeline = ZIO_REWRITE_PIPELINE | gang_stages; @@ -1248,7 +1270,7 @@ __zio_execute(zio_t *zio) while (zio->io_stage < ZIO_STAGE_DONE) { enum zio_stage pipeline = zio->io_pipeline; enum zio_stage stage = zio->io_stage; - dsl_pool_t *dsl; + dsl_pool_t *dp; boolean_t cut; int rv; @@ -1262,7 +1284,7 @@ __zio_execute(zio_t *zio) ASSERT(stage <= ZIO_STAGE_DONE); - dsl = spa_get_dsl(zio->io_spa); + dp = spa_get_dsl(zio->io_spa); cut = (stage == ZIO_STAGE_VDEV_IO_START) ? zio_requeue_io_start_cut_in_line : B_FALSE; @@ -1272,20 +1294,30 @@ __zio_execute(zio_t *zio) * or may wait for an I/O that needs an interrupt thread * to complete, issue async to avoid deadlock. * - * If we are in the txg_sync_thread or being called - * during pool init issue async to minimize stack depth. - * Both of these call paths may be recursively called. - * * For VDEV_IO_START, we cut in line so that the io will * be sent to disk promptly. */ - if (((stage & ZIO_BLOCKING_STAGES) && zio->io_vd == NULL && - zio_taskq_member(zio, ZIO_TASKQ_INTERRUPT)) || - (dsl != NULL && dsl_pool_sync_context(dsl))) { + if ((stage & ZIO_BLOCKING_STAGES) && zio->io_vd == NULL && + zio_taskq_member(zio, ZIO_TASKQ_INTERRUPT)) { zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut); return; } +#ifdef _KERNEL + /* + * If we executing in the context of the tx_sync_thread, + * or we are performing pool initialization outside of a + * zio_taskq[ZIO_TASKQ_ISSUE] context. Then issue the zio + * async to minimize stack usage for these deep call paths. + */ + if ((dp && curthread == dp->dp_tx.tx_sync_thread) || + (dp && spa_is_initializing(dp->dp_spa) && + !zio_taskq_member(zio, ZIO_TASKQ_ISSUE))) { + zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut); + return; + } +#endif + zio->io_stage = stage; rv = zio_pipeline[highbit(stage) - 1](zio); @@ -2235,8 +2267,11 @@ zio_ddt_free(zio_t *zio) ddt_enter(ddt); freedde = dde = ddt_lookup(ddt, bp, B_TRUE); - ddp = ddt_phys_select(dde, bp); - ddt_phys_decref(ddp); + if (dde) { + ddp = ddt_phys_select(dde, bp); + if (ddp) + ddt_phys_decref(ddp); + } ddt_exit(ddt); return (ZIO_PIPELINE_CONTINUE); @@ -2889,11 +2924,11 @@ zio_done(zio_t *zio) vdev_stat_update(zio, zio->io_size); /* - * If this I/O is attached to a particular vdev is slow, exeeding + * If this I/O is attached to a particular vdev is slow, exceeding * 30 seconds to complete, post an error described the I/O delay. * We ignore these errors if the device is currently unavailable. */ - if (zio->io_delay >= zio_delay_max) { + if (zio->io_delay >= MSEC_TO_TICK(zio_delay_max)) { if (zio->io_vd != NULL && !vdev_is_dead(zio->io_vd)) zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa, zio->io_vd, zio, 0, 0); @@ -3043,7 +3078,7 @@ zio_done(zio_t *zio) * Hand it off to the otherwise-unused claim taskq. */ ASSERT(taskq_empty_ent(&zio->io_tqent)); - (void) taskq_dispatch_ent( + taskq_dispatch_ent( zio->io_spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE], (task_func_t *)zio_reexecute, zio, 0, &zio->io_tqent);