*/
kmem_cache_t *zio_cache;
kmem_cache_t *zio_link_cache;
+kmem_cache_t *zio_vdev_cache;
kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
int zio_bulk_flags = 0;
zio_cons, zio_dest, NULL, NULL, NULL, KMC_KMEM);
zio_link_cache = kmem_cache_create("zio_link_cache",
sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, KMC_KMEM);
+ zio_vdev_cache = kmem_cache_create("zio_vdev_cache", sizeof(vdev_io_t),
+ PAGESIZE, NULL, NULL, NULL, NULL, NULL, KMC_VMEM);
/*
* For small buffers, we want a cache for each multiple of
zio_data_buf_cache[c] = NULL;
}
+ kmem_cache_destroy(zio_vdev_cache);
kmem_cache_destroy(zio_link_cache);
kmem_cache_destroy(zio_cache);
ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
- return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE));
+ return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE | KM_NODEBUG));
}
/*
ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
- return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE));
+ return (kmem_cache_alloc(zio_data_buf_cache[c],
+ KM_PUSHPAGE | KM_NODEBUG));
}
void
}
/*
+ * Dedicated I/O buffers to ensure that memory fragmentation never prevents
+ * or significantly delays the issuing of a zio. These buffers are used
+ * to aggregate I/O and could be used for raidz stripes.
+ */
+void *
+zio_vdev_alloc(void)
+{
+ return (kmem_cache_alloc(zio_vdev_cache, KM_PUSHPAGE));
+}
+
+void
+zio_vdev_free(void *buf)
+{
+ kmem_cache_free(zio_vdev_cache, buf);
+
+}
+
+/*
* ==========================================================================
* Push and pop I/O transform buffers
* ==========================================================================
int
zio_wait(zio_t *zio)
{
+ uint64_t timeout;
int error;
ASSERT(zio->io_stage == ZIO_STAGE_OPEN);
ASSERT(zio->io_executor == NULL);
zio->io_waiter = curthread;
+ timeout = ddi_get_lbolt() + (zio_delay_max / MILLISEC * hz);
__zio_execute(zio);
mutex_enter(&zio->io_lock);
- while (zio->io_executor != NULL)
- cv_wait(&zio->io_cv, &zio->io_lock);
+ while (zio->io_executor != NULL) {
+ /*
+ * Wake up periodically to prevent the kernel from complaining
+ * about a blocked task. However, check zio_delay_max to see
+ * if the I/O has exceeded the timeout and post an ereport.
+ */
+ cv_timedwait_interruptible(&zio->io_cv, &zio->io_lock,
+ ddi_get_lbolt() + hz);
+
+ if (timeout && (ddi_get_lbolt() > timeout)) {
+ zio->io_delay = zio_delay_max;
+ zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
+ zio->io_spa, zio->io_vd, zio, 0, 0);
+ timeout = 0;
+ }
+ }
mutex_exit(&zio->io_lock);
error = zio->io_error;
*/
pio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
+ /*
+ * We didn't allocate this bp, so make sure it doesn't get unmarked.
+ */
+ pio->io_flags &= ~ZIO_FLAG_FASTWRITE;
+
zio_nowait(zio);
return (ZIO_PIPELINE_CONTINUE);
flags |= (zio->io_flags & ZIO_FLAG_NODATA) ? METASLAB_GANG_AVOID : 0;
flags |= (zio->io_flags & ZIO_FLAG_GANG_CHILD) ?
METASLAB_GANG_CHILD : 0;
+ flags |= (zio->io_flags & ZIO_FLAG_FASTWRITE) ? METASLAB_FASTWRITE : 0;
error = metaslab_alloc(spa, mc, zio->io_size, bp,
zio->io_prop.zp_copies, zio->io_txg, NULL, flags);
* Try to allocate an intent log block. Return 0 on success, errno on failure.
*/
int
-zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp,
- uint64_t size, boolean_t use_slog)
+zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, uint64_t size,
+ boolean_t use_slog)
{
int error = 1;
*/
if (use_slog) {
error = metaslab_alloc(spa, spa_log_class(spa), size,
- new_bp, 1, txg, old_bp,
- METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID);
+ new_bp, 1, txg, NULL,
+ METASLAB_FASTWRITE | METASLAB_GANG_AVOID);
}
if (error) {
error = metaslab_alloc(spa, spa_normal_class(spa), size,
- new_bp, 1, txg, old_bp,
- METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID);
+ new_bp, 1, txg, NULL,
+ METASLAB_FASTWRITE | METASLAB_GANG_AVOID);
}
if (error == 0) {
vdev_stat_update(zio, zio->io_size);
/*
- * If this I/O is attached to a particular vdev is slow, exeeding
- * 30 seconds to complete, post an error described the I/O delay.
- * We ignore these errors if the device is currently unavailable.
+ * When an I/O completes but was slow post an ereport.
*/
- if (zio->io_delay >= zio_delay_max) {
- if (zio->io_vd != NULL && !vdev_is_dead(zio->io_vd))
- zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa,
- zio->io_vd, zio, 0, 0);
- }
+ if (zio->io_delay >= zio_delay_max)
+ zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa,
+ zio->io_vd, zio, 0, 0);
if (zio->io_error) {
/*
zfs_ereport_free_checksum(zcr);
}
+ if (zio->io_flags & ZIO_FLAG_FASTWRITE && zio->io_bp &&
+ !BP_IS_HOLE(zio->io_bp)) {
+ metaslab_fastwrite_unmark(zio->io_spa, zio->io_bp);
+ }
+
/*
* It is the responsibility of the done callback to ensure that this
* particular zio is no longer discoverable for adoption, and as