Add ZIL statistics.
authorEtienne Dechamps <etienne.dechamps@ovh.net>
Fri, 15 Jun 2012 14:22:14 +0000 (16:22 +0200)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Fri, 29 Jun 2012 16:56:51 +0000 (09:56 -0700)
The performance of the ZIL is usually the main bottleneck when dealing with
synchronous, write-heavy workloads (e.g. databases). Understanding the
behavior of the ZIL is required to diagnose performance issues for these
workloads, and to tune ZIL parameters (like zil_slog_limit) accordingly.

This commit adds a new kstat page dedicated to the ZIL with some counters
which, hopefully, scheds some light into what the ZIL is doing, and how it is
doing it.

Currently, these statistics are available in /proc/spl/kstat/zfs/zil.
A description of the fields can be found in zil.h.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #786

include/sys/zil.h
module/zfs/zil.c

index 45900c9..c583887 100644 (file)
@@ -372,6 +372,65 @@ typedef struct itx {
        /* followed by type-specific part of lr_xx_t and its immediate data */
 } itx_t;
 
+/*
+ * Used for zil kstat.
+ */
+typedef struct zil_stats {
+       /*
+        * Number of times a ZIL commit (e.g. fsync) has been requested.
+        */
+       kstat_named_t zil_commit_count;
+
+       /*
+        * Number of times the ZIL has been flushed to stable storage.
+        * This is less than zil_commit_count when commits are "merged"
+        * (see the documentation above zil_commit()).
+        */
+       kstat_named_t zil_commit_writer_count;
+
+       /*
+        * Number of transactions (reads, writes, renames, etc.)
+        * that have been commited.
+        */
+       kstat_named_t zil_itx_count;
+
+       /*
+        * See the documentation for itx_wr_state_t above.
+        * Note that "bytes" accumulates the length of the transactions
+        * (i.e. data), not the actual log record sizes.
+        */
+       kstat_named_t zil_itx_indirect_count;
+       kstat_named_t zil_itx_indirect_bytes;
+       kstat_named_t zil_itx_copied_count;
+       kstat_named_t zil_itx_copied_bytes;
+       kstat_named_t zil_itx_needcopy_count;
+       kstat_named_t zil_itx_needcopy_bytes;
+
+       /*
+        * Transactions which have been allocated to the "normal"
+        * (i.e. not slog) storage pool. Note that "bytes" accumulate
+        * the actual log record sizes - which do not include the actual
+        * data in case of indirect writes.
+        */
+       kstat_named_t zil_itx_metaslab_normal_count;
+       kstat_named_t zil_itx_metaslab_normal_bytes;
+
+       /*
+        * Transactions which have been allocated to the "slog" storage pool.
+        * If there are no separate log devices, this is the same as the
+        * "normal" pool.
+        */
+       kstat_named_t zil_itx_metaslab_slog_count;
+       kstat_named_t zil_itx_metaslab_slog_bytes;
+} zil_stats_t;
+
+extern zil_stats_t zil_stats;
+
+#define ZIL_STAT_INCR(stat, val) \
+    atomic_add_64(&zil_stats.stat.value.ui64, (val));
+#define ZIL_STAT_BUMP(stat) \
+    ZIL_STAT_INCR(stat, 1);
+
 typedef int zil_parse_blk_func_t(zilog_t *zilog, blkptr_t *bp, void *arg,
     uint64_t txg);
 typedef int zil_parse_lr_func_t(zilog_t *zilog, lr_t *lr, void *arg,
index fb05036..9ab02d7 100644 (file)
  */
 
 /*
+ * See zil.h for more information about these fields.
+ */
+zil_stats_t zil_stats = {
+       { "zil_commit_count",              KSTAT_DATA_UINT64 },
+       { "zil_commit_writer_count",       KSTAT_DATA_UINT64 },
+       { "zil_itx_count",                 KSTAT_DATA_UINT64 },
+       { "zil_itx_indirect_count",        KSTAT_DATA_UINT64 },
+       { "zil_itx_indirect_bytes",        KSTAT_DATA_UINT64 },
+       { "zil_itx_copied_count",          KSTAT_DATA_UINT64 },
+       { "zil_itx_copied_bytes",          KSTAT_DATA_UINT64 },
+       { "zil_itx_needcopy_count",        KSTAT_DATA_UINT64 },
+       { "zil_itx_needcopy_bytes",        KSTAT_DATA_UINT64 },
+       { "zil_itx_metaslab_normal_count", KSTAT_DATA_UINT64 },
+       { "zil_itx_metaslab_normal_bytes", KSTAT_DATA_UINT64 },
+       { "zil_itx_metaslab_slog_count",   KSTAT_DATA_UINT64 },
+       { "zil_itx_metaslab_slog_bytes",   KSTAT_DATA_UINT64 },
+};
+
+static kstat_t *zil_ksp;
+
+/*
  * This global ZIL switch affects all pools
  */
 int zil_replay_disable = 0;    /* disable intent logging replay */
@@ -879,6 +900,7 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
        uint64_t txg;
        uint64_t zil_blksz, wsz;
        int i, error;
+       boolean_t use_slog;
 
        if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) {
                zilc = (zil_chain_t *)lwb->lwb_buf;
@@ -935,8 +957,19 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
 
        BP_ZERO(bp);
        /* pass the old blkptr in order to spread log blocks across devs */
+       use_slog = USE_SLOG(zilog);
        error = zio_alloc_zil(spa, txg, bp, &lwb->lwb_blk, zil_blksz,
-           USE_SLOG(zilog));
+           use_slog);
+       if (use_slog)
+       {
+               ZIL_STAT_BUMP(zil_itx_metaslab_slog_count);
+               ZIL_STAT_INCR(zil_itx_metaslab_slog_bytes, lwb->lwb_nused);
+       }
+       else
+       {
+               ZIL_STAT_BUMP(zil_itx_metaslab_normal_count);
+               ZIL_STAT_INCR(zil_itx_metaslab_normal_bytes, lwb->lwb_nused);
+       }
        if (!error) {
                ASSERT3U(bp->blk_birth, ==, txg);
                bp->blk_cksum = lwb->lwb_blk.blk_cksum;
@@ -1022,13 +1055,18 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
        lrc = (lr_t *)lr_buf;
        lrw = (lr_write_t *)lrc;
 
+       ZIL_STAT_BUMP(zil_itx_count);
+
        /*
         * If it's a write, fetch the data or get its blkptr as appropriate.
         */
        if (lrc->lrc_txtype == TX_WRITE) {
                if (txg > spa_freeze_txg(zilog->zl_spa))
                        txg_wait_synced(zilog->zl_dmu_pool, txg);
-               if (itx->itx_wr_state != WR_COPIED) {
+               if (itx->itx_wr_state == WR_COPIED) {
+                       ZIL_STAT_BUMP(zil_itx_copied_count);
+                       ZIL_STAT_INCR(zil_itx_copied_bytes, lrw->lr_length);
+               } else {
                        char *dbuf;
                        int error;
 
@@ -1036,9 +1074,13 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
                                ASSERT(itx->itx_wr_state == WR_NEED_COPY);
                                dbuf = lr_buf + reclen;
                                lrw->lr_common.lrc_reclen += dlen;
+                               ZIL_STAT_BUMP(zil_itx_needcopy_count);
+                               ZIL_STAT_INCR(zil_itx_needcopy_bytes, lrw->lr_length);
                        } else {
                                ASSERT(itx->itx_wr_state == WR_INDIRECT);
                                dbuf = NULL;
+                               ZIL_STAT_BUMP(zil_itx_indirect_count);
+                               ZIL_STAT_INCR(zil_itx_indirect_bytes, lrw->lr_length);
                        }
                        error = zilog->zl_get_data(
                            itx->itx_private, lrw, dbuf, lwb->lwb_zio);
@@ -1497,6 +1539,8 @@ zil_commit(zilog_t *zilog, uint64_t foid)
        if (zilog->zl_sync == ZFS_SYNC_DISABLED)
                return;
 
+       ZIL_STAT_BUMP(zil_commit_count);
+
        /* move the async itxs for the foid to the sync queues */
        zil_async_to_sync(zilog, foid);
 
@@ -1512,6 +1556,7 @@ zil_commit(zilog_t *zilog, uint64_t foid)
 
        zilog->zl_next_batch++;
        zilog->zl_writer = B_TRUE;
+       ZIL_STAT_BUMP(zil_commit_writer_count);
        zil_commit_writer(zilog);
        zilog->zl_com_batch = mybatch;
        zilog->zl_writer = B_FALSE;
@@ -1600,12 +1645,26 @@ zil_init(void)
 {
        zil_lwb_cache = kmem_cache_create("zil_lwb_cache",
            sizeof (struct lwb), 0, NULL, NULL, NULL, NULL, NULL, 0);
+
+       zil_ksp = kstat_create("zfs", 0, "zil", "misc",
+           KSTAT_TYPE_NAMED, sizeof(zil_stats) / sizeof(kstat_named_t),
+           KSTAT_FLAG_VIRTUAL);
+
+       if (zil_ksp != NULL) {
+               zil_ksp->ks_data = &zil_stats;
+               kstat_install(zil_ksp);
+       }
 }
 
 void
 zil_fini(void)
 {
        kmem_cache_destroy(zil_lwb_cache);
+
+       if (zil_ksp != NULL) {
+               kstat_delete(zil_ksp);
+               zil_ksp = NULL;
+       }
 }
 
 void