X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fzio.c;h=0022c64cc5c05e490fa987f70c1d41a1bcc9a2c4;hb=f5fc4acaa77e2c1782a9495bbf1a39884b4c3940;hp=9f34a23385f9f7bae43e05e04560a8e01b410f7a;hpb=a69052be7f9a4008e2b09578e9db5fdebc186111;p=zfs.git diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 9f34a23..0022c64 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011 by Delphix. All rights reserved. */ #include @@ -61,8 +62,7 @@ uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE] = { * ========================================================================== */ char *zio_type_name[ZIO_TYPES] = { - "zio_null", "zio_read", "zio_write", "zio_free", "zio_claim", - "zio_ioctl" + "z_null", "z_rd", "z_wr", "z_fr", "z_cl", "z_ioctl" }; /* @@ -80,6 +80,7 @@ int zio_delay_max = ZIO_DELAY_MAX; #ifdef _KERNEL extern vmem_t *zio_alloc_arena; #endif +extern int zfs_mg_alloc_failures; /* * An allocating zio is one that either currently has the DVA allocate @@ -87,7 +88,7 @@ extern vmem_t *zio_alloc_arena; */ #define IO_IS_ALLOCATING(zio) ((zio)->io_orig_pipeline & ZIO_STAGE_DVA_ALLOCATE) -boolean_t zio_requeue_io_start_cut_in_line = B_TRUE; +int zio_requeue_io_start_cut_in_line = 1; #ifdef ZFS_DEBUG int zio_buf_debug_limit = 16384; @@ -159,6 +160,12 @@ zio_init(void) zio_data_buf_cache[c - 1] = zio_data_buf_cache[c]; } + /* + * The zio write taskqs have 1 thread per cpu, allow 1/2 of the taskqs + * to fail 3 times per txg or 8 failures, whichever is greater. + */ + zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8); + zio_inject_init(); } @@ -256,7 +263,7 @@ static void zio_push_transform(zio_t *zio, void *data, uint64_t size, uint64_t bufsize, zio_transform_func_t *transform) { - zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_SLEEP); + zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_PUSHPAGE); zt->zt_orig_data = zio->io_data; zt->zt_orig_size = zio->io_size; @@ -371,7 +378,7 @@ zio_unique_parent(zio_t *cio) void zio_add_child(zio_t *pio, zio_t *cio) { - zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP); + zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_PUSHPAGE); int w; /* @@ -492,7 +499,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER)); ASSERT(vd || stage == ZIO_STAGE_OPEN); - zio = kmem_cache_alloc(zio_cache, KM_SLEEP); + zio = kmem_cache_alloc(zio_cache, KM_PUSHPAGE); bzero(zio, sizeof (zio_t)); mutex_init(&zio->io_lock, NULL, MUTEX_DEFAULT, NULL); @@ -1151,6 +1158,8 @@ __zio_execute(zio_t *zio) while (zio->io_stage < ZIO_STAGE_DONE) { enum zio_stage pipeline = zio->io_pipeline; enum zio_stage stage = zio->io_stage; + dsl_pool_t *dsl; + boolean_t cut; int rv; ASSERT(!MUTEX_HELD(&zio->io_lock)); @@ -1163,19 +1172,26 @@ __zio_execute(zio_t *zio) ASSERT(stage <= ZIO_STAGE_DONE); + dsl = spa_get_dsl(zio->io_spa); + cut = (stage == ZIO_STAGE_VDEV_IO_START) ? + zio_requeue_io_start_cut_in_line : B_FALSE; + /* * If we are in interrupt context and this pipeline stage * will grab a config lock that is held across I/O, * or may wait for an I/O that needs an interrupt thread * to complete, issue async to avoid deadlock. * + * If we are in the txg_sync_thread or being called + * during pool init issue async to minimize stack depth. + * Both of these call paths may be recursively called. + * * For VDEV_IO_START, we cut in line so that the io will * be sent to disk promptly. */ - if ((stage & ZIO_BLOCKING_STAGES) && zio->io_vd == NULL && - zio_taskq_member(zio, ZIO_TASKQ_INTERRUPT)) { - boolean_t cut = (stage == ZIO_STAGE_VDEV_IO_START) ? - zio_requeue_io_start_cut_in_line : B_FALSE; + if (((stage & ZIO_BLOCKING_STAGES) && zio->io_vd == NULL && + zio_taskq_member(zio, ZIO_TASKQ_INTERRUPT)) || + (dsl != NULL && dsl_pool_sync_context(dsl))) { zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, cut); return; } @@ -1504,7 +1520,7 @@ zio_gang_node_alloc(zio_gang_node_t **gnpp) ASSERT(*gnpp == NULL); - gn = kmem_zalloc(sizeof (*gn), KM_SLEEP); + gn = kmem_zalloc(sizeof (*gn), KM_PUSHPAGE); gn->gn_gbh = zio_buf_alloc(SPA_GANGBLOCKSIZE); *gnpp = gn; @@ -2143,6 +2159,7 @@ zio_dva_allocate(zio_t *zio) metaslab_class_t *mc = spa_normal_class(spa); blkptr_t *bp = zio->io_bp; int error; + int flags = 0; if (zio->io_gang_leader == NULL) { ASSERT(zio->io_child_type > ZIO_CHILD_GANG); @@ -2155,10 +2172,21 @@ zio_dva_allocate(zio_t *zio) ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa)); ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp)); + /* + * The dump device does not support gang blocks so allocation on + * behalf of the dump device (i.e. ZIO_FLAG_NODATA) must avoid + * the "fast" gang feature. + */ + flags |= (zio->io_flags & ZIO_FLAG_NODATA) ? METASLAB_GANG_AVOID : 0; + flags |= (zio->io_flags & ZIO_FLAG_GANG_CHILD) ? + METASLAB_GANG_CHILD : 0; error = metaslab_alloc(spa, mc, zio->io_size, bp, - zio->io_prop.zp_copies, zio->io_txg, NULL, 0); + zio->io_prop.zp_copies, zio->io_txg, NULL, flags); if (error) { + spa_dbgmsg(spa, "%s: metaslab allocation failure: zio %p, " + "size %llu, error %d", spa_name(spa), zio, zio->io_size, + error); if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE) return (zio_write_gang_block(zio)); zio->io_error = error; @@ -3007,5 +3035,8 @@ module_param(zio_bulk_flags, int, 0644); MODULE_PARM_DESC(zio_bulk_flags, "Additional flags to pass to bulk buffers"); module_param(zio_delay_max, int, 0644); -MODULE_PARM_DESC(zio_delay_max, "Max zio delay before posting an event (ms)"); +MODULE_PARM_DESC(zio_delay_max, "Max zio millisec delay before posting event"); + +module_param(zio_requeue_io_start_cut_in_line, int, 0644); +MODULE_PARM_DESC(zio_requeue_io_start_cut_in_line, "Prioritize requeued I/O"); #endif