X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fzio.c;h=0fa823687b51531df188a3c536af55fbd80d87c2;hb=ef3c1dea7024b07b4ace6115de9f22a99c1394d8;hp=fb60bd8a06a9f07a53be8182eb78857a9d7080c9;hpb=c776b317e44a64d53217d34c3fa61d36fd5a32d5;p=zfs.git diff --git a/module/zfs/zio.c b/module/zfs/zio.c index fb60bd8..0fa8236 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -61,8 +61,7 @@ uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE] = { * ========================================================================== */ char *zio_type_name[ZIO_TYPES] = { - "zio_null", "zio_read", "zio_write", "zio_free", "zio_claim", - "zio_ioctl" + "z_null", "z_rd", "z_wr", "z_fr", "z_cl", "z_ioctl" }; /* @@ -74,6 +73,8 @@ kmem_cache_t *zio_cache; kmem_cache_t *zio_link_cache; kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; +int zio_bulk_flags = 0; +int zio_delay_max = ZIO_DELAY_MAX; #ifdef _KERNEL extern vmem_t *zio_alloc_arena; @@ -85,7 +86,7 @@ extern vmem_t *zio_alloc_arena; */ #define IO_IS_ALLOCATING(zio) ((zio)->io_orig_pipeline & ZIO_STAGE_DVA_ALLOCATE) -boolean_t zio_requeue_io_start_cut_in_line = B_TRUE; +int zio_requeue_io_start_cut_in_line = 1; #ifdef ZFS_DEBUG int zio_buf_debug_limit = 16384; @@ -93,6 +94,8 @@ int zio_buf_debug_limit = 16384; int zio_buf_debug_limit = 0; #endif +static inline void __zio_execute(zio_t *zio); + void zio_init(void) { @@ -134,12 +137,14 @@ zio_init(void) (void) sprintf(name, "zio_buf_%lu", (ulong_t)size); zio_buf_cache[c] = kmem_cache_create(name, size, align, NULL, NULL, NULL, NULL, NULL, - size > zio_buf_debug_limit ? KMC_NODEBUG : 0); + (size > zio_buf_debug_limit ? KMC_NODEBUG : 0) | + zio_bulk_flags); (void) sprintf(name, "zio_data_buf_%lu", (ulong_t)size); zio_data_buf_cache[c] = kmem_cache_create(name, size, align, NULL, NULL, NULL, NULL, data_alloc_arena, - size > zio_buf_debug_limit ? KMC_NODEBUG : 0); + (size > zio_buf_debug_limit ? KMC_NODEBUG : 0) | + zio_bulk_flags); } } @@ -250,7 +255,7 @@ static void zio_push_transform(zio_t *zio, void *data, uint64_t size, uint64_t bufsize, zio_transform_func_t *transform) { - zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_SLEEP); + zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_PUSHPAGE); zt->zt_orig_data = zio->io_data; zt->zt_orig_size = zio->io_size; @@ -365,7 +370,7 @@ zio_unique_parent(zio_t *cio) void zio_add_child(zio_t *pio, zio_t *cio) { - zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP); + zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_PUSHPAGE); int w; /* @@ -451,7 +456,7 @@ zio_notify_parent(zio_t *pio, zio_t *zio, enum zio_wait_type wait) if (--*countp == 0 && pio->io_stall == countp) { pio->io_stall = NULL; mutex_exit(&pio->io_lock); - zio_execute(pio); + __zio_execute(pio); } else { mutex_exit(&pio->io_lock); } @@ -486,7 +491,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER)); ASSERT(vd || stage == ZIO_STAGE_OPEN); - zio = kmem_cache_alloc(zio_cache, KM_SLEEP); + zio = kmem_cache_alloc(zio_cache, KM_PUSHPAGE); bzero(zio, sizeof (zio_t)); mutex_init(&zio->io_lock, NULL, MUTEX_DEFAULT, NULL); @@ -1122,14 +1127,31 @@ zio_interrupt(zio_t *zio) */ static zio_pipe_stage_t *zio_pipeline[]; +/* + * zio_execute() is a wrapper around the static function + * __zio_execute() so that we can force __zio_execute() to be + * inlined. This reduces stack overhead which is important + * because __zio_execute() is called recursively in several zio + * code paths. zio_execute() itself cannot be inlined because + * it is externally visible. + */ void zio_execute(zio_t *zio) { + __zio_execute(zio); +} + +__attribute__((always_inline)) +static inline void +__zio_execute(zio_t *zio) +{ zio->io_executor = curthread; while (zio->io_stage < ZIO_STAGE_DONE) { enum zio_stage pipeline = zio->io_pipeline; enum zio_stage stage = zio->io_stage; + dsl_pool_t *dsl; + boolean_t cut; int rv; ASSERT(!MUTEX_HELD(&zio->io_lock)); @@ -1142,19 +1164,26 @@ zio_execute(zio_t *zio) ASSERT(stage <= ZIO_STAGE_DONE); + dsl = spa_get_dsl(zio->io_spa); + cut = (stage == ZIO_STAGE_VDEV_IO_START) ? + zio_requeue_io_start_cut_in_line : B_FALSE; + /* * If we are in interrupt context and this pipeline stage * will grab a config lock that is held across I/O, * or may wait for an I/O that needs an interrupt thread * to complete, issue async to avoid deadlock. * + * If we are in the txg_sync_thread or being called + * during pool init issue async to minimize stack depth. + * Both of these call paths may be recursively called. + * * For VDEV_IO_START, we cut in line so that the io will * be sent to disk promptly. */ - if ((stage & ZIO_BLOCKING_STAGES) && zio->io_vd == NULL && - zio_taskq_member(zio, ZIO_TASKQ_INTERRUPT)) { - boolean_t cut = (stage == ZIO_STAGE_VDEV_IO_START) ? - zio_requeue_io_start_cut_in_line : B_FALSE; + if (((stage & ZIO_BLOCKING_STAGES) && zio->io_vd == NULL && + zio_taskq_member(zio, ZIO_TASKQ_INTERRUPT)) || + (dsl != NULL && dsl_pool_sync_context(dsl))) { zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut); return; } @@ -1169,6 +1198,7 @@ zio_execute(zio_t *zio) } } + /* * ========================================================================== * Initiate I/O, either sync or async @@ -1184,7 +1214,7 @@ zio_wait(zio_t *zio) zio->io_waiter = curthread; - zio_execute(zio); + __zio_execute(zio); mutex_enter(&zio->io_lock); while (zio->io_executor != NULL) @@ -1214,7 +1244,7 @@ zio_nowait(zio_t *zio) zio_add_child(spa->spa_async_zio_root, zio); } - zio_execute(zio); + __zio_execute(zio); } /* @@ -1269,7 +1299,7 @@ zio_reexecute(zio_t *pio) * responsibility of the caller to wait on him. */ if (!(pio->io_flags & ZIO_FLAG_GODFATHER)) - zio_execute(pio); + __zio_execute(pio); } void @@ -1482,7 +1512,7 @@ zio_gang_node_alloc(zio_gang_node_t **gnpp) ASSERT(*gnpp == NULL); - gn = kmem_zalloc(sizeof (*gn), KM_SLEEP); + gn = kmem_zalloc(sizeof (*gn), KM_PUSHPAGE); gn->gn_gbh = zio_buf_alloc(SPA_GANGBLOCKSIZE); *gnpp = gn; @@ -2733,6 +2763,17 @@ zio_done(zio_t *zio) vdev_stat_update(zio, zio->io_size); + /* + * If this I/O is attached to a particular vdev is slow, exeeding + * 30 seconds to complete, post an error described the I/O delay. + * We ignore these errors if the device is currently unavailable. + */ + if (zio->io_delay >= zio_delay_max) { + if (zio->io_vd != NULL && !vdev_is_dead(zio->io_vd)) + zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa, + zio->io_vd, zio, 0, 0); + } + if (zio->io_error) { /* * If this I/O is attached to a particular vdev, @@ -2957,3 +2998,25 @@ static zio_pipe_stage_t *zio_pipeline[] = { zio_checksum_verify, zio_done }; + +#if defined(_KERNEL) && defined(HAVE_SPL) +/* Fault injection */ +EXPORT_SYMBOL(zio_injection_enabled); +EXPORT_SYMBOL(zio_inject_fault); +EXPORT_SYMBOL(zio_inject_list_next); +EXPORT_SYMBOL(zio_clear_fault); +EXPORT_SYMBOL(zio_handle_fault_injection); +EXPORT_SYMBOL(zio_handle_device_injection); +EXPORT_SYMBOL(zio_handle_label_injection); +EXPORT_SYMBOL(zio_priority_table); +EXPORT_SYMBOL(zio_type_name); + +module_param(zio_bulk_flags, int, 0644); +MODULE_PARM_DESC(zio_bulk_flags, "Additional flags to pass to bulk buffers"); + +module_param(zio_delay_max, int, 0644); +MODULE_PARM_DESC(zio_delay_max, "Max zio millisec delay before posting event"); + +module_param(zio_requeue_io_start_cut_in_line, int, 0644); +MODULE_PARM_DESC(zio_requeue_io_start_cut_in_line, "Prioritize requeued I/O"); +#endif