X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fzio.c;h=bcfc081d1a01832ade32c7da8f5e61a738d897af;hb=9dcb97198338ba2d8764dd5604b278118612f74d;hp=ace72a0875be7db5dad1427c0bc6a3e2186e9d93;hpb=a5c20e2a0a9046c06d86615fbf51dc04f12bba14;p=zfs.git diff --git a/module/zfs/zio.c b/module/zfs/zio.c index ace72a0..bcfc081 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -1305,18 +1305,34 @@ __zio_execute(zio_t *zio) int zio_wait(zio_t *zio) { + uint64_t timeout; int error; ASSERT(zio->io_stage == ZIO_STAGE_OPEN); ASSERT(zio->io_executor == NULL); zio->io_waiter = curthread; + timeout = ddi_get_lbolt() + (zio_delay_max / MILLISEC * hz); __zio_execute(zio); mutex_enter(&zio->io_lock); - while (zio->io_executor != NULL) - cv_wait(&zio->io_cv, &zio->io_lock); + while (zio->io_executor != NULL) { + /* + * Wake up periodically to prevent the kernel from complaining + * about a blocked task. However, check zio_delay_max to see + * if the I/O has exceeded the timeout and post an ereport. + */ + cv_timedwait_interruptible(&zio->io_cv, &zio->io_lock, + ddi_get_lbolt() + hz); + + if (timeout && (ddi_get_lbolt() > timeout)) { + zio->io_delay = zio_delay_max; + zfs_ereport_post(FM_EREPORT_ZFS_DELAY, + zio->io_spa, zio->io_vd, zio, 0, 0); + timeout = 0; + } + } mutex_exit(&zio->io_lock); error = zio->io_error; @@ -1861,6 +1877,11 @@ zio_write_gang_block(zio_t *pio) */ pio->io_pipeline = ZIO_INTERLOCK_PIPELINE; + /* + * We didn't allocate this bp, so make sure it doesn't get unmarked. + */ + pio->io_flags &= ~ZIO_FLAG_FASTWRITE; + zio_nowait(zio); return (ZIO_PIPELINE_CONTINUE); @@ -2270,6 +2291,7 @@ zio_dva_allocate(zio_t *zio) flags |= (zio->io_flags & ZIO_FLAG_NODATA) ? METASLAB_GANG_AVOID : 0; flags |= (zio->io_flags & ZIO_FLAG_GANG_CHILD) ? METASLAB_GANG_CHILD : 0; + flags |= (zio->io_flags & ZIO_FLAG_FASTWRITE) ? METASLAB_FASTWRITE : 0; error = metaslab_alloc(spa, mc, zio->io_size, bp, zio->io_prop.zp_copies, zio->io_txg, NULL, flags); @@ -2333,8 +2355,8 @@ zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp) * Try to allocate an intent log block. Return 0 on success, errno on failure. */ int -zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp, - uint64_t size, boolean_t use_slog) +zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, uint64_t size, + boolean_t use_slog) { int error = 1; @@ -2347,14 +2369,14 @@ zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp, */ if (use_slog) { error = metaslab_alloc(spa, spa_log_class(spa), size, - new_bp, 1, txg, old_bp, - METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID); + new_bp, 1, txg, NULL, + METASLAB_FASTWRITE | METASLAB_GANG_AVOID); } if (error) { error = metaslab_alloc(spa, spa_normal_class(spa), size, - new_bp, 1, txg, old_bp, - METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID); + new_bp, 1, txg, NULL, + METASLAB_FASTWRITE | METASLAB_GANG_AVOID); } if (error == 0) { @@ -2432,25 +2454,19 @@ zio_vdev_io_start(zio_t *zio) align = 1ULL << vd->vdev_top->vdev_ashift; - /* - * On Linux, we don't care about read alignment. The backing block - * device driver will take care of that for us. - * The only exception is raidz, which needs a full block for parity. - */ - if (P2PHASE(zio->io_size, align) != 0 && - (zio->io_type != ZIO_TYPE_READ || - vd->vdev_ops == &vdev_raidz_ops)) { + if (P2PHASE(zio->io_size, align) != 0) { uint64_t asize = P2ROUNDUP(zio->io_size, align); char *abuf = zio_buf_alloc(asize); + ASSERT(vd == vd->vdev_top); if (zio->io_type == ZIO_TYPE_WRITE) { bcopy(zio->io_data, abuf, zio->io_size); bzero(abuf + zio->io_size, asize - zio->io_size); } zio_push_transform(zio, abuf, asize, asize, zio_subblock); - ASSERT(P2PHASE(zio->io_size, align) == 0); } ASSERT(P2PHASE(zio->io_offset, align) == 0); + ASSERT(P2PHASE(zio->io_size, align) == 0); VERIFY(zio->io_type != ZIO_TYPE_WRITE || spa_writeable(spa)); /* @@ -2889,15 +2905,11 @@ zio_done(zio_t *zio) vdev_stat_update(zio, zio->io_size); /* - * If this I/O is attached to a particular vdev is slow, exeeding - * 30 seconds to complete, post an error described the I/O delay. - * We ignore these errors if the device is currently unavailable. + * When an I/O completes but was slow post an ereport. */ - if (zio->io_delay >= zio_delay_max) { - if (zio->io_vd != NULL && !vdev_is_dead(zio->io_vd)) - zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa, - zio->io_vd, zio, 0, 0); - } + if (zio->io_delay >= zio_delay_max) + zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa, + zio->io_vd, zio, 0, 0); if (zio->io_error) { /* @@ -3066,6 +3078,11 @@ zio_done(zio_t *zio) zfs_ereport_free_checksum(zcr); } + if (zio->io_flags & ZIO_FLAG_FASTWRITE && zio->io_bp && + !BP_IS_HOLE(zio->io_bp)) { + metaslab_fastwrite_unmark(zio->io_spa, zio->io_bp); + } + /* * It is the responsibility of the done callback to ensure that this * particular zio is no longer discoverable for adoption, and as