Log I/Os longer than zio_delay_max (30s default)

[zfs.git] / module / zfs / zio.c
diff --git a/module/zfs/zio.c b/module/zfs/zio.c

index fe2bdc8..bcfc081 100644 (file)
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -259,7 +259,7 @@ zio_buf_alloc(size_t size)
  
         ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
  
-       return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE));
+       return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE | KM_NODEBUG));
  }
  
  /*
@@ -275,7 +275,8 @@ zio_data_buf_alloc(size_t size)
  
         ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
  
-       return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE));
+       return (kmem_cache_alloc(zio_data_buf_cache[c],
+           KM_PUSHPAGE | KM_NODEBUG));
  }
  
  void
@@ -1304,18 +1305,34 @@ __zio_execute(zio_t *zio)
  int
  zio_wait(zio_t *zio)
  {
+       uint64_t timeout;
         int error;
  
         ASSERT(zio->io_stage == ZIO_STAGE_OPEN);
         ASSERT(zio->io_executor == NULL);
  
         zio->io_waiter = curthread;
+       timeout = ddi_get_lbolt() + (zio_delay_max / MILLISEC * hz);
  
         __zio_execute(zio);
  
         mutex_enter(&zio->io_lock);
-       while (zio->io_executor != NULL)
-               cv_wait(&zio->io_cv, &zio->io_lock);
+       while (zio->io_executor != NULL) {
+               /*
+                * Wake up periodically to prevent the kernel from complaining
+                * about a blocked task.  However, check zio_delay_max to see
+                * if the I/O has exceeded the timeout and post an ereport.
+                */
+               cv_timedwait_interruptible(&zio->io_cv, &zio->io_lock,
+                   ddi_get_lbolt() + hz);
+
+               if (timeout && (ddi_get_lbolt() > timeout)) {
+                       zio->io_delay = zio_delay_max;
+                       zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
+                           zio->io_spa, zio->io_vd, zio, 0, 0);
+                       timeout = 0;
+               }
+       }
         mutex_exit(&zio->io_lock);
  
         error = zio->io_error;
@@ -1860,6 +1877,11 @@ zio_write_gang_block(zio_t *pio)
          */
         pio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
  
+       /*
+        * We didn't allocate this bp, so make sure it doesn't get unmarked.
+        */
+       pio->io_flags &= ~ZIO_FLAG_FASTWRITE;
+
         zio_nowait(zio);
  
         return (ZIO_PIPELINE_CONTINUE);
@@ -2269,6 +2291,7 @@ zio_dva_allocate(zio_t *zio)
         flags |= (zio->io_flags & ZIO_FLAG_NODATA) ? METASLAB_GANG_AVOID : 0;
         flags |= (zio->io_flags & ZIO_FLAG_GANG_CHILD) ?
             METASLAB_GANG_CHILD : 0;
+       flags |= (zio->io_flags & ZIO_FLAG_FASTWRITE) ? METASLAB_FASTWRITE : 0;
         error = metaslab_alloc(spa, mc, zio->io_size, bp,
             zio->io_prop.zp_copies, zio->io_txg, NULL, flags);
  
@@ -2332,8 +2355,8 @@ zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp)
   * Try to allocate an intent log block.  Return 0 on success, errno on failure.
   */
  int
-zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp,
-    uint64_t size, boolean_t use_slog)
+zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, uint64_t size,
+    boolean_t use_slog)
  {
         int error = 1;
  
@@ -2346,14 +2369,14 @@ zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp,
          */
         if (use_slog) {
                 error = metaslab_alloc(spa, spa_log_class(spa), size,
-                   new_bp, 1, txg, old_bp,
-                   METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID);
+                   new_bp, 1, txg, NULL,
+                   METASLAB_FASTWRITE | METASLAB_GANG_AVOID);
         }
  
         if (error) {
                 error = metaslab_alloc(spa, spa_normal_class(spa), size,
-                   new_bp, 1, txg, old_bp,
-                   METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID);
+                   new_bp, 1, txg, NULL,
+                   METASLAB_FASTWRITE | METASLAB_GANG_AVOID);
         }
  
         if (error == 0) {
@@ -2882,15 +2905,11 @@ zio_done(zio_t *zio)
         vdev_stat_update(zio, zio->io_size);
  
         /*
-        * If this I/O is attached to a particular vdev is slow, exeeding
-        * 30 seconds to complete, post an error described the I/O delay.
-        * We ignore these errors if the device is currently unavailable.
+        * When an I/O completes but was slow post an ereport.
          */
-       if (zio->io_delay >= zio_delay_max) {
-               if (zio->io_vd != NULL && !vdev_is_dead(zio->io_vd))
-                       zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa,
-                                         zio->io_vd, zio, 0, 0);
-       }
+       if (zio->io_delay >= zio_delay_max)
+               zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa,
+                   zio->io_vd, zio, 0, 0);
  
         if (zio->io_error) {
                 /*
@@ -3059,6 +3078,11 @@ zio_done(zio_t *zio)
                 zfs_ereport_free_checksum(zcr);
         }
  
+       if (zio->io_flags & ZIO_FLAG_FASTWRITE && zio->io_bp &&
+           !BP_IS_HOLE(zio->io_bp)) {
+               metaslab_fastwrite_unmark(zio->io_spa, zio->io_bp);
+       }
+
         /*
          * It is the responsibility of the done callback to ensure that this
          * particular zio is no longer discoverable for adoption, and as