* ==========================================================================
*/
char *zio_type_name[ZIO_TYPES] = {
- "zio_null", "zio_read", "zio_write", "zio_free", "zio_claim",
- "zio_ioctl"
+ "z_null", "z_rd", "z_wr", "z_fr", "z_cl", "z_ioctl"
};
/*
kmem_cache_t *zio_link_cache;
kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
+int zio_bulk_flags = 0;
+int zio_delay_max = ZIO_DELAY_MAX;
#ifdef _KERNEL
extern vmem_t *zio_alloc_arena;
int zio_buf_debug_limit = 0;
#endif
+static inline void __zio_execute(zio_t *zio);
+
void
zio_init(void)
{
(void) sprintf(name, "zio_buf_%lu", (ulong_t)size);
zio_buf_cache[c] = kmem_cache_create(name, size,
align, NULL, NULL, NULL, NULL, NULL,
- size > zio_buf_debug_limit ? KMC_NODEBUG : 0);
+ (size > zio_buf_debug_limit ? KMC_NODEBUG : 0) |
+ zio_bulk_flags);
(void) sprintf(name, "zio_data_buf_%lu", (ulong_t)size);
zio_data_buf_cache[c] = kmem_cache_create(name, size,
align, NULL, NULL, NULL, NULL, data_alloc_arena,
- size > zio_buf_debug_limit ? KMC_NODEBUG : 0);
+ (size > zio_buf_debug_limit ? KMC_NODEBUG : 0) |
+ zio_bulk_flags);
}
}
return (waiting);
}
-static void
+__attribute__((always_inline))
+static inline void
zio_notify_parent(zio_t *pio, zio_t *zio, enum zio_wait_type wait)
{
uint64_t *countp = &pio->io_children[zio->io_child_type][wait];
if (--*countp == 0 && pio->io_stall == countp) {
pio->io_stall = NULL;
mutex_exit(&pio->io_lock);
- zio_execute(pio);
+ __zio_execute(pio);
} else {
mutex_exit(&pio->io_lock);
}
{
spa_t *spa = zio->io_spa;
zio_type_t t = zio->io_type;
- int flags = TQ_SLEEP | (cutinline ? TQ_FRONT : 0);
+ int flags = TQ_NOSLEEP | (cutinline ? TQ_FRONT : 0);
/*
* If we're a config writer or a probe, the normal issue and
q++;
ASSERT3U(q, <, ZIO_TASKQ_TYPES);
- (void) taskq_dispatch(spa->spa_zio_taskq[t][q],
- (task_func_t *)zio_execute, zio, flags);
+
+ while (taskq_dispatch(spa->spa_zio_taskq[t][q],
+ (task_func_t *)zio_execute, zio, flags) == 0); /* do nothing */
}
static boolean_t
*/
static zio_pipe_stage_t *zio_pipeline[];
+/*
+ * zio_execute() is a wrapper around the static function
+ * __zio_execute() so that we can force __zio_execute() to be
+ * inlined. This reduces stack overhead which is important
+ * because __zio_execute() is called recursively in several zio
+ * code paths. zio_execute() itself cannot be inlined because
+ * it is externally visible.
+ */
void
zio_execute(zio_t *zio)
{
+ __zio_execute(zio);
+}
+
+__attribute__((always_inline))
+static inline void
+__zio_execute(zio_t *zio)
+{
zio->io_executor = curthread;
while (zio->io_stage < ZIO_STAGE_DONE) {
}
}
+
/*
* ==========================================================================
* Initiate I/O, either sync or async
zio->io_waiter = curthread;
- zio_execute(zio);
+ __zio_execute(zio);
mutex_enter(&zio->io_lock);
while (zio->io_executor != NULL)
zio_add_child(spa->spa_async_zio_root, zio);
}
- zio_execute(zio);
+ __zio_execute(zio);
}
/*
* responsibility of the caller to wait on him.
*/
if (!(pio->io_flags & ZIO_FLAG_GODFATHER))
- zio_execute(pio);
+ __zio_execute(pio);
}
void
static int
zio_done(zio_t *zio)
{
- spa_t *spa = zio->io_spa;
- zio_t *lio = zio->io_logical;
- blkptr_t *bp = zio->io_bp;
- vdev_t *vd = zio->io_vd;
- uint64_t psize = zio->io_size;
zio_t *pio, *pio_next;
int c, w;
for (w = 0; w < ZIO_WAIT_TYPES; w++)
ASSERT(zio->io_children[c][w] == 0);
- if (bp != NULL) {
- ASSERT(bp->blk_pad[0] == 0);
- ASSERT(bp->blk_pad[1] == 0);
- ASSERT(bcmp(bp, &zio->io_bp_copy, sizeof (blkptr_t)) == 0 ||
- (bp == zio_unique_parent(zio)->io_bp));
- if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(bp) &&
+ if (zio->io_bp != NULL) {
+ ASSERT(zio->io_bp->blk_pad[0] == 0);
+ ASSERT(zio->io_bp->blk_pad[1] == 0);
+ ASSERT(bcmp(zio->io_bp, &zio->io_bp_copy, sizeof (blkptr_t)) == 0 ||
+ (zio->io_bp == zio_unique_parent(zio)->io_bp));
+ if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(zio->io_bp) &&
zio->io_bp_override == NULL &&
!(zio->io_flags & ZIO_FLAG_IO_REPAIR)) {
- ASSERT(!BP_SHOULD_BYTESWAP(bp));
- ASSERT3U(zio->io_prop.zp_copies, <=, BP_GET_NDVAS(bp));
- ASSERT(BP_COUNT_GANG(bp) == 0 ||
- (BP_COUNT_GANG(bp) == BP_GET_NDVAS(bp)));
+ ASSERT(!BP_SHOULD_BYTESWAP(zio->io_bp));
+ ASSERT3U(zio->io_prop.zp_copies, <=, BP_GET_NDVAS(zio->io_bp));
+ ASSERT(BP_COUNT_GANG(zio->io_bp) == 0 ||
+ (BP_COUNT_GANG(zio->io_bp) == BP_GET_NDVAS(zio->io_bp)));
}
}
while (zio->io_cksum_report != NULL) {
zio_cksum_report_t *zcr = zio->io_cksum_report;
uint64_t align = zcr->zcr_align;
- uint64_t asize = P2ROUNDUP(psize, align);
+ uint64_t asize = P2ROUNDUP(zio->io_size, align);
char *abuf = zio->io_data;
- if (asize != psize) {
+ if (asize != zio->io_size) {
abuf = zio_buf_alloc(asize);
- bcopy(zio->io_data, abuf, psize);
- bzero(abuf + psize, asize - psize);
+ bcopy(zio->io_data, abuf, zio->io_size);
+ bzero(abuf + zio->io_size, asize - zio->io_size);
}
zio->io_cksum_report = zcr->zcr_next;
zcr->zcr_finish(zcr, abuf);
zfs_ereport_free_checksum(zcr);
- if (asize != psize)
+ if (asize != zio->io_size)
zio_buf_free(abuf, asize);
}
}
zio_pop_transforms(zio); /* note: may set zio->io_error */
- vdev_stat_update(zio, psize);
+ vdev_stat_update(zio, zio->io_size);
+
+ /*
+ * If this I/O is attached to a particular vdev is slow, exeeding
+ * 30 seconds to complete, post an error described the I/O delay.
+ * We ignore these errors if the device is currently unavailable.
+ */
+ if (zio->io_delay >= zio_delay_max) {
+ if (zio->io_vd != NULL && !vdev_is_dead(zio->io_vd))
+ zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa,
+ zio->io_vd, zio, 0, 0);
+ }
if (zio->io_error) {
/*
* at the block level. We ignore these errors if the
* device is currently unavailable.
*/
- if (zio->io_error != ECKSUM && vd != NULL && !vdev_is_dead(vd))
- zfs_ereport_post(FM_EREPORT_ZFS_IO, spa, vd, zio, 0, 0);
+ if (zio->io_error != ECKSUM && zio->io_vd != NULL &&
+ !vdev_is_dead(zio->io_vd))
+ zfs_ereport_post(FM_EREPORT_ZFS_IO, zio->io_spa,
+ zio->io_vd, zio, 0, 0);
if ((zio->io_error == EIO || !(zio->io_flags &
(ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) &&
- zio == lio) {
+ zio == zio->io_logical) {
/*
* For logical I/O requests, tell the SPA to log the
* error and generate a logical data ereport.
*/
- spa_log_error(spa, zio);
- zfs_ereport_post(FM_EREPORT_ZFS_DATA, spa, NULL, zio,
+ spa_log_error(zio->io_spa, zio);
+ zfs_ereport_post(FM_EREPORT_ZFS_DATA, zio->io_spa, NULL, zio,
0, 0);
}
}
- if (zio->io_error && zio == lio) {
+ if (zio->io_error && zio == zio->io_logical) {
/*
* Determine whether zio should be reexecuted. This will
* propagate all the way to the root via zio_notify_parent().
*/
- ASSERT(vd == NULL && bp != NULL);
+ ASSERT(zio->io_vd == NULL && zio->io_bp != NULL);
ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
if (IO_IS_ALLOCATING(zio) &&
zio->io_type == ZIO_TYPE_FREE) &&
!(zio->io_flags & ZIO_FLAG_SCAN_THREAD) &&
zio->io_error == ENXIO &&
- spa_load_state(spa) == SPA_LOAD_NONE &&
- spa_get_failmode(spa) != ZIO_FAILURE_MODE_CONTINUE)
+ spa_load_state(zio->io_spa) == SPA_LOAD_NONE &&
+ spa_get_failmode(zio->io_spa) != ZIO_FAILURE_MODE_CONTINUE)
zio->io_reexecute |= ZIO_REEXECUTE_SUSPEND;
if (!(zio->io_flags & ZIO_FLAG_CANFAIL) && !zio->io_reexecute)
if ((zio->io_error || zio->io_reexecute) &&
IO_IS_ALLOCATING(zio) && zio->io_gang_leader == zio &&
!(zio->io_flags & ZIO_FLAG_IO_REWRITE))
- zio_dva_unallocate(zio, zio->io_gang_tree, bp);
+ zio_dva_unallocate(zio, zio->io_gang_tree, zio->io_bp);
zio_gang_tree_free(&zio->io_gang_tree);
* We'd fail again if we reexecuted now, so suspend
* until conditions improve (e.g. device comes online).
*/
- zio_suspend(spa, zio);
+ zio_suspend(zio->io_spa, zio);
} else {
/*
* Reexecution is potentially a huge amount of work.
* Hand it off to the otherwise-unused claim taskq.
*/
(void) taskq_dispatch(
- spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE],
+ zio->io_spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE],
(task_func_t *)zio_reexecute, zio, TQ_SLEEP);
}
return (ZIO_PIPELINE_STOP);
zio_checksum_verify,
zio_done
};
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+/* Fault injection */
+EXPORT_SYMBOL(zio_injection_enabled);
+EXPORT_SYMBOL(zio_inject_fault);
+EXPORT_SYMBOL(zio_inject_list_next);
+EXPORT_SYMBOL(zio_clear_fault);
+EXPORT_SYMBOL(zio_handle_fault_injection);
+EXPORT_SYMBOL(zio_handle_device_injection);
+EXPORT_SYMBOL(zio_handle_label_injection);
+EXPORT_SYMBOL(zio_priority_table);
+EXPORT_SYMBOL(zio_type_name);
+
+module_param(zio_bulk_flags, int, 0644);
+MODULE_PARM_DESC(zio_bulk_flags, "Additional flags to pass to bulk buffers");
+
+module_param(zio_delay_max, int, 0644);
+MODULE_PARM_DESC(zio_delay_max, "Max zio delay before posting an event (ms)");
+#endif