Add linux zpios support
authorBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 26 Aug 2010 18:58:00 +0000 (11:58 -0700)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Tue, 31 Aug 2010 20:42:01 +0000 (13:42 -0700)
Linux kernel implementation of PIOS test app.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
33 files changed:
cmd/Makefile.am
cmd/zpios/.gitignore [new file with mode: 0644]
cmd/zpios/Makefile.am [new file with mode: 0644]
cmd/zpios/zpios.h [new file with mode: 0644]
cmd/zpios/zpios_main.c [new file with mode: 0644]
cmd/zpios/zpios_util.c [new file with mode: 0644]
config/zfs-build.m4
configure.ac
module/Makefile.in
module/zpios/Makefile.in [new file with mode: 0644]
module/zpios/include/zpios-ctl.h [new file with mode: 0644]
module/zpios/include/zpios-internal.h [new file with mode: 0644]
module/zpios/zpios.c [new file with mode: 0644]
scripts/Makefile.am
scripts/common.sh.in
scripts/zpios-profile/zpios-profile-disk.sh [new file with mode: 0755]
scripts/zpios-profile/zpios-profile-pids.sh [new file with mode: 0755]
scripts/zpios-profile/zpios-profile-post.sh [new file with mode: 0755]
scripts/zpios-profile/zpios-profile-pre.sh [new file with mode: 0755]
scripts/zpios-profile/zpios-profile.sh [new file with mode: 0755]
scripts/zpios-sanity.sh [new file with mode: 0755]
scripts/zpios-survey.sh [new file with mode: 0755]
scripts/zpios-test/16th-8192rc-4rs-1cs-4off.sh [new file with mode: 0755]
scripts/zpios-test/1th-16rc-4rs-1cs-4off.sh [new file with mode: 0755]
scripts/zpios-test/1x256th-65536rc-4rs-1cs-4off.sh [new file with mode: 0755]
scripts/zpios-test/256th-65536rc-4rs-1cs-4off.sh [new file with mode: 0755]
scripts/zpios-test/4th-1024rc-4rs-1cs-4off.sh [new file with mode: 0755]
scripts/zpios-test/large-thread-survey.sh [new symlink]
scripts/zpios-test/large.sh [new symlink]
scripts/zpios-test/medium.sh [new symlink]
scripts/zpios-test/small.sh [new symlink]
scripts/zpios-test/tiny.sh [new symlink]
scripts/zpios.sh [new file with mode: 0755]

index 954f856..7608547 100644 (file)
@@ -1 +1 @@
-SUBDIRS = zfs zpool zpool_id zpool_layout zdb zinject ztest
+SUBDIRS = zfs zpool zpool_id zpool_layout zdb zinject ztest zpios
diff --git a/cmd/zpios/.gitignore b/cmd/zpios/.gitignore
new file mode 100644 (file)
index 0000000..b83e1d0
--- /dev/null
@@ -0,0 +1 @@
+/zpios
diff --git a/cmd/zpios/Makefile.am b/cmd/zpios/Makefile.am
new file mode 100644 (file)
index 0000000..4e13a76
--- /dev/null
@@ -0,0 +1,12 @@
+include $(top_srcdir)/config/Rules.am
+
+DEFAULT_INCLUDES += \
+       -I${top_srcdir}/module/zpios/include
+
+sbin_PROGRAMS = zpios
+
+zpios_SOURCES = \
+       $(top_srcdir)/cmd/zpios/zpios_main.c \
+       $(top_srcdir)/cmd/zpios/zpios_util.c \
+       $(top_srcdir)/cmd/zpios/zpios.h
+
diff --git a/cmd/zpios/zpios.h b/cmd/zpios/zpios.h
new file mode 100644 (file)
index 0000000..ed97945
--- /dev/null
@@ -0,0 +1,121 @@
+/*****************************************************************************\
+ *  ZPIOS is a heavily modified version of the original PIOS test code.
+ *  It is designed to have the test code running in the Linux kernel
+ *  against ZFS while still being flexibly controled from user space.
+ *
+ *  Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  LLNL-CODE-403049
+ *
+ *  Original PIOS Test Code
+ *  Copyright (C) 2004 Cluster File Systems, Inc.
+ *  Written by Peter Braam <braam@clusterfs.com>
+ *             Atul Vidwansa <atul@clusterfs.com>
+ *             Milind Dumbare <milind@clusterfs.com>
+ *
+ *  This file is part of ZFS on Linux.
+ *  For details, see <http://github.com/behlendorf/zfs/>.
+ *
+ *  ZPIOS is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  ZPIOS is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with ZPIOS.  If not, see <http://www.gnu.org/licenses/>.
+\*****************************************************************************/
+
+#ifndef _ZPIOS_H
+#define _ZPIOS_H
+
+#include <zpios-ctl.h>
+
+#define VERSION_SIZE           64
+
+/* Regular expressions */
+#define REGEX_NUMBERS          "^[0-9]*[0-9]$"
+#define REGEX_NUMBERS_COMMA    "^([0-9]+,)*[0-9]+$"
+#define REGEX_SIZE             "^[0-9][0-9]*[kmgt]$"
+#define REGEX_SIZE_COMMA       "^([0-9][0-9]*[kmgt]+,)*[0-9][0-9]*[kmgt]$"
+
+/* Flags for low, high, incr */
+#define FLAG_SET               0x01
+#define FLAG_LOW               0x02
+#define FLAG_HIGH              0x04
+#define FLAG_INCR              0x08
+
+#define TRUE                   1
+#define FALSE                  0
+
+#define KB                     (1024)
+#define MB                     (KB * 1024)
+#define GB                     (MB * 1024)
+#define TB                     (GB * 1024)
+
+#define KMGT_SIZE              16
+
+/* All offsets, sizes and counts can be passed to the application in
+ * multiple ways.
+ * 1. a value (stored in val[0], val_count will be 1)
+ * 2. a comma separated list of values (stored in val[], using val_count)
+ * 3. a range and block sizes, low, high, factor (val_count must be 0)
+ */
+typedef struct pios_range_repeat {
+       uint64_t val[32];        /* Comma sep array, or low, high, inc */
+       uint64_t val_count;      /* Num of values */
+       uint64_t val_low;
+       uint64_t val_high;
+       uint64_t val_inc_perc;
+       uint64_t next_val;       /* Used for multiple runs in get_next() */
+} range_repeat_t;
+
+typedef struct cmd_args {
+       range_repeat_t T;           /* Thread count */
+       range_repeat_t N;           /* Region count */
+       range_repeat_t O;           /* Offset count */
+       range_repeat_t C;           /* Chunksize */
+       range_repeat_t S;           /* Regionsize */
+
+       const char *pool;           /* Pool */
+       const char *name;           /* Name */
+       uint32_t flags;             /* Flags */
+       uint32_t io_type;           /* DMUIO only */
+       uint32_t verbose;           /* Verbose */
+       uint32_t human_readable;    /* Human readable output */
+
+       uint64_t regionnoise;       /* Region noise */
+       uint64_t chunknoise;        /* Chunk noise */
+       uint64_t thread_delay;      /* Thread delay */
+
+       char pre[ZPIOS_PATH_SIZE];  /* Pre-exec hook */
+       char post[ZPIOS_PATH_SIZE]; /* Post-exec hook */
+       char log[ZPIOS_PATH_SIZE];  /* Requested log dir */
+
+       /* Control */
+       int current_id;
+       uint64_t current_T;
+       uint64_t current_N;
+       uint64_t current_C;
+       uint64_t current_S;
+       uint64_t current_O;
+
+       uint32_t rc;
+} cmd_args_t;
+
+int set_count(char *pattern1, char *pattern2, range_repeat_t *range,
+             char *optarg, uint32_t *flags, char *arg);
+int set_lhi(char *pattern, range_repeat_t *range, char *optarg,
+           int flag, uint32_t *flag_thread, char *arg);
+int set_noise(uint64_t *noise, char *optarg, char *arg);
+int set_load_params(cmd_args_t *args, char *optarg);
+int check_mutual_exclusive_command_lines(uint32_t flag, char *arg);
+void print_stats_header(cmd_args_t *args);
+void print_stats(cmd_args_t *args, zpios_cmd_t *cmd);
+
+#endif /* _ZPIOS_H */
diff --git a/cmd/zpios/zpios_main.c b/cmd/zpios/zpios_main.c
new file mode 100644 (file)
index 0000000..14c37e7
--- /dev/null
@@ -0,0 +1,629 @@
+/*****************************************************************************\
+ *  ZPIOS is a heavily modified version of the original PIOS test code.
+ *  It is designed to have the test code running in the Linux kernel
+ *  against ZFS while still being flexibly controled from user space.
+ *
+ *  Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  LLNL-CODE-403049
+ *
+ *  Original PIOS Test Code
+ *  Copyright (C) 2004 Cluster File Systems, Inc.
+ *  Written by Peter Braam <braam@clusterfs.com>
+ *             Atul Vidwansa <atul@clusterfs.com>
+ *             Milind Dumbare <milind@clusterfs.com>
+ *
+ *  This file is part of ZFS on Linux.
+ *  For details, see <http://github.com/behlendorf/zfs/>.
+ *
+ *  ZPIOS is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  ZPIOS is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with ZPIOS.  If not, see <http://www.gnu.org/licenses/>.
+\*****************************************************************************/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <getopt.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include "zpios.h"
+
+static const char short_opt[] = "t:l:h:e:n:i:j:k:o:m:q:r:c:a:b:g:s:A:B:C:"
+                                "L:p:M:xP:R:G:I:N:T:VzOfHv?";
+static const struct option long_opt[] = {
+       {"threadcount",         required_argument, 0, 't' },
+       {"threadcount_low",     required_argument, 0, 'l' },
+       {"threadcount_high",    required_argument, 0, 'h' },
+       {"threadcount_incr",    required_argument, 0, 'e' },
+       {"regioncount",         required_argument, 0, 'n' },
+       {"regioncount_low",     required_argument, 0, 'i' },
+       {"regioncount_high",    required_argument, 0, 'j' },
+       {"regioncount_incr",    required_argument, 0, 'k' },
+       {"offset",              required_argument, 0, 'o' },
+       {"offset_low",          required_argument, 0, 'm' },
+       {"offset_high",         required_argument, 0, 'q' },
+       {"offset_incr",         required_argument, 0, 'r' },
+       {"chunksize",           required_argument, 0, 'c' },
+       {"chunksize_low",       required_argument, 0, 'a' },
+       {"chunksize_high",      required_argument, 0, 'b' },
+       {"chunksize_incr",      required_argument, 0, 'g' },
+       {"regionsize",          required_argument, 0, 's' },
+       {"regionsize_low",      required_argument, 0, 'A' },
+       {"regionsize_high",     required_argument, 0, 'B' },
+       {"regionsize_incr",     required_argument, 0, 'C' },
+       {"load",                required_argument, 0, 'L' },
+       {"pool",                required_argument, 0, 'p' },
+       {"name",                required_argument, 0, 'M' },
+       {"cleanup",             no_argument,       0, 'x' },
+       {"prerun",              required_argument, 0, 'P' },
+       {"postrun",             required_argument, 0, 'R' },
+       {"log",                 required_argument, 0, 'G' },
+       {"regionnoise",         required_argument, 0, 'I' },
+       {"chunknoise",          required_argument, 0, 'N' },
+       {"threaddelay",         required_argument, 0, 'T' },
+       {"verify",              no_argument,       0, 'V' },
+       {"zerocopy",            no_argument,       0, 'z' },
+       {"nowait",              no_argument,       0, 'O' },
+       {"noprefetch",          no_argument,       0, 'f' },
+       {"human-readable",      no_argument,       0, 'H' },
+       {"verbose",             no_argument,       0, 'v' },
+       {"help",                no_argument,       0, '?' },
+       { 0,                    0,                 0,  0  },
+};
+
+static int zpiosctl_fd;                                /* Control file descriptor */
+static char zpios_version[VERSION_SIZE];       /* Kernel version string */
+static char *zpios_buffer = NULL;              /* Scratch space area */
+static int zpios_buffer_size = 0;              /* Scratch space size */
+
+static int
+usage(void)
+{
+       fprintf(stderr, "Usage: zpios\n");
+       fprintf(stderr,
+               "       --threadcount       -t    =values\n"
+               "       --threadcount_low   -l    =value\n"
+               "       --threadcount_high  -h    =value\n"
+               "       --threadcount_incr  -e    =value\n"
+               "       --regioncount       -n    =values\n"
+               "       --regioncount_low   -i    =value\n"
+               "       --regioncount_high  -j    =value\n"
+               "       --regioncount_incr  -k    =value\n"
+               "       --offset            -o    =values\n"
+               "       --offset_low        -m    =value\n"
+               "       --offset_high       -q    =value\n"
+               "       --offset_incr       -r    =value\n"
+               "       --chunksize         -c    =values\n"
+               "       --chunksize_low     -a    =value\n"
+               "       --chunksize_high    -b    =value\n"
+               "       --chunksize_incr    -g    =value\n"
+               "       --regionsize        -s    =values\n"
+               "       --regionsize_low    -A    =value\n"
+               "       --regionsize_high   -B    =value\n"
+               "       --regionsize_incr   -C    =value\n"
+               "       --load              -L    =dmuio|ssf|fpp\n"
+               "       --pool              -p    =pool name\n"
+               "       --name              -M    =test name\n"
+               "       --cleanup           -x\n"
+               "       --prerun            -P    =pre-command\n"
+               "       --postrun           -R    =post-command\n"
+               "       --log               -G    =log directory\n"
+               "       --regionnoise       -I    =shift\n"
+               "       --chunknoise        -N    =bytes\n"
+               "       --threaddelay       -T    =jiffies\n"
+               "       --verify            -V\n"
+               "       --zerocopy          -z\n"
+               "       --nowait            -O\n"
+               "       --noprefetch        -f\n"
+               "       --human-readable    -H\n"
+               "       --verbose           -v    =increase verbosity\n"
+               "       --help              -?    =this help\n\n");
+
+       return 0;
+}
+
+static void args_fini(cmd_args_t *args)
+{
+       assert(args != NULL);
+       free(args);
+}
+
+static cmd_args_t *
+args_init(int argc, char **argv)
+{
+       cmd_args_t *args;
+       uint32_t fl_th = 0;
+       uint32_t fl_rc = 0;
+       uint32_t fl_of = 0;
+       uint32_t fl_rs = 0;
+       uint32_t fl_cs = 0;
+       int c, rc;
+
+       if (argc == 1) {
+               usage();
+               return (cmd_args_t *)NULL;
+       }
+
+       /* Configure and populate the args structures */
+       args = malloc(sizeof(*args));
+       if (args == NULL)
+               return NULL;
+
+       memset(args, 0, sizeof(*args));
+
+       while ((c=getopt_long(argc, argv, short_opt, long_opt, NULL)) != -1) {
+               rc = 0;
+
+               switch (c) {
+               case 't': /* --thread count */
+                       rc = set_count(REGEX_NUMBERS, REGEX_NUMBERS_COMMA,
+                                      &args->T, optarg, &fl_th, "threadcount");
+                       break;
+               case 'l': /* --threadcount_low */
+                       rc = set_lhi(REGEX_NUMBERS, &args->T, optarg,
+                                    FLAG_LOW, &fl_th, "threadcount_low");
+                       break;
+               case 'h': /* --threadcount_high */
+                       rc = set_lhi(REGEX_NUMBERS, &args->T, optarg,
+                                    FLAG_HIGH, &fl_th, "threadcount_high");
+                       break;
+               case 'e': /* --threadcount_inc */
+                       rc = set_lhi(REGEX_NUMBERS, &args->T, optarg,
+                                    FLAG_INCR, &fl_th, "threadcount_incr");
+                       break;
+               case 'n': /* --regioncount */
+                       rc = set_count(REGEX_NUMBERS, REGEX_NUMBERS_COMMA,
+                                      &args->N, optarg, &fl_rc, "regioncount");
+                       break;
+               case 'i': /* --regioncount_low */
+                       rc = set_lhi(REGEX_NUMBERS, &args->N, optarg,
+                                    FLAG_LOW, &fl_rc, "regioncount_low");
+                       break;
+               case 'j': /* --regioncount_high */
+                       rc = set_lhi(REGEX_NUMBERS, &args->N, optarg,
+                                    FLAG_HIGH, &fl_rc, "regioncount_high");
+                       break;
+               case 'k': /* --regioncount_inc */
+                       rc = set_lhi(REGEX_NUMBERS, &args->N, optarg,
+                                    FLAG_INCR, &fl_rc, "regioncount_incr");
+                       break;
+               case 'o': /* --offset */
+                       rc = set_count(REGEX_SIZE, REGEX_SIZE_COMMA,
+                                      &args->O, optarg, &fl_of, "offset");
+                       break;
+               case 'm': /* --offset_low */
+                       rc = set_lhi(REGEX_SIZE, &args->O, optarg,
+                                    FLAG_LOW, &fl_of, "offset_low");
+                       break;
+               case 'q': /* --offset_high */
+                       rc = set_lhi(REGEX_SIZE, &args->O, optarg,
+                                    FLAG_HIGH, &fl_of, "offset_high");
+                       break;
+               case 'r': /* --offset_inc */
+                       rc = set_lhi(REGEX_NUMBERS, &args->O, optarg,
+                                    FLAG_INCR, &fl_of, "offset_incr");
+                       break;
+               case 'c': /* --chunksize */
+                       rc = set_count(REGEX_SIZE, REGEX_SIZE_COMMA,
+                                      &args->C, optarg, &fl_cs, "chunksize");
+                       break;
+               case 'a': /* --chunksize_low */
+                       rc = set_lhi(REGEX_SIZE, &args->C, optarg,
+                                    FLAG_LOW, &fl_cs, "chunksize_low");
+                       break;
+               case 'b': /* --chunksize_high */
+                       rc = set_lhi(REGEX_SIZE, &args->C, optarg,
+                                    FLAG_HIGH, &fl_cs, "chunksize_high");
+                       break;
+               case 'g': /* --chunksize_inc */
+                       rc = set_lhi(REGEX_NUMBERS, &args->C, optarg,
+                                    FLAG_INCR, &fl_cs, "chunksize_incr");
+                       break;
+               case 's': /* --regionsize */
+                       rc = set_count(REGEX_SIZE, REGEX_SIZE_COMMA,
+                                      &args->S, optarg, &fl_rs, "regionsize");
+                       break;
+               case 'A': /* --regionsize_low */
+                       rc = set_lhi(REGEX_SIZE, &args->S, optarg,
+                                    FLAG_LOW, &fl_rs, "regionsize_low");
+                       break;
+               case 'B': /* --regionsize_high */
+                       rc = set_lhi(REGEX_SIZE, &args->S, optarg,
+                                    FLAG_HIGH, &fl_rs, "regionsize_high");
+                       break;
+               case 'C': /* --regionsize_inc */
+                       rc = set_lhi(REGEX_NUMBERS, &args->S, optarg,
+                                    FLAG_INCR, &fl_rs, "regionsize_incr");
+                       break;
+               case 'L': /* --load */
+                       rc = set_load_params(args, optarg);
+                       break;
+               case 'p': /* --pool */
+                       args->pool = optarg;
+                       break;
+               case 'M':
+                       args->name = optarg;
+                       break;
+               case 'x': /* --cleanup */
+                       args->flags |= DMU_REMOVE;
+                       break;
+               case 'P': /* --prerun */
+                       strncpy(args->pre, optarg, ZPIOS_PATH_SIZE - 1);
+                       break;
+               case 'R': /* --postrun */
+                       strncpy(args->post, optarg, ZPIOS_PATH_SIZE - 1);
+                       break;
+               case 'G': /* --log */
+                       strncpy(args->log, optarg, ZPIOS_PATH_SIZE - 1);
+                       break;
+               case 'I': /* --regionnoise */
+                       rc = set_noise(&args->regionnoise, optarg, "regionnoise");
+                       break;
+               case 'N': /* --chunknoise */
+                       rc = set_noise(&args->chunknoise, optarg, "chunknoise");
+                       break;
+               case 'T': /* --threaddelay */
+                       rc = set_noise(&args->thread_delay, optarg, "threaddelay");
+                       break;
+               case 'V': /* --verify */
+                       args->flags |= DMU_VERIFY;
+                       break;
+               case 'z': /* --zerocopy */
+                       args->flags |= (DMU_WRITE_ZC | DMU_READ_ZC);
+                       break;
+               case 'O': /* --nowait */
+                       args->flags |= DMU_WRITE_NOWAIT;
+                       break;
+               case 'f': /* --noprefetch */
+                       args->flags |= DMU_READ_NOPF;
+                       break;
+               case 'H': /* --human-readable */
+                       args->human_readable = 1;
+                       break;
+               case 'v': /* --verbose */
+                       args->verbose++;
+                       break;
+               case '?':
+                       rc = 1;
+                       break;
+               default:
+                       fprintf(stderr,"Unknown option '%s'\n",argv[optind-1]);
+                       rc = EINVAL;
+                       break;
+               }
+
+               if (rc) {
+                       usage();
+                       args_fini(args);
+                       return NULL;
+               }
+       }
+
+       check_mutual_exclusive_command_lines(fl_th, "threadcount");
+       check_mutual_exclusive_command_lines(fl_rc, "regioncount");
+       check_mutual_exclusive_command_lines(fl_of, "offset");
+       check_mutual_exclusive_command_lines(fl_rs, "regionsize");
+       check_mutual_exclusive_command_lines(fl_cs, "chunksize");
+
+       if (args->pool == NULL) {
+               fprintf(stderr, "Error: Pool not specificed\n");
+               usage();
+               args_fini(args);
+               return NULL;
+       }
+
+       if ((args->flags & (DMU_WRITE_ZC | DMU_READ_ZC)) &&
+           (args->flags & DMU_VERIFY)) {
+                fprintf(stderr, "Error, --zerocopy incompatible --verify, "
+                            "used for performance analysis only\n");
+               usage();
+               args_fini(args);
+               return NULL;
+       }
+
+       return args;
+}
+
+static int
+dev_clear(void)
+{
+       zpios_cfg_t cfg;
+       int rc;
+
+       memset(&cfg, 0, sizeof(cfg));
+       cfg.cfg_magic = ZPIOS_CFG_MAGIC;
+        cfg.cfg_cmd   = ZPIOS_CFG_BUFFER_CLEAR;
+       cfg.cfg_arg1  = 0;
+
+       rc = ioctl(zpiosctl_fd, ZPIOS_CFG, &cfg);
+       if (rc)
+               fprintf(stderr, "Ioctl() error %lu / %d: %d\n",
+                       (unsigned long) ZPIOS_CFG, cfg.cfg_cmd, errno);
+
+       lseek(zpiosctl_fd, 0, SEEK_SET);
+
+       return rc;
+}
+
+/* Passing a size of zero simply results in querying the current size */
+static int
+dev_size(int size)
+{
+       zpios_cfg_t cfg;
+       int rc;
+
+       memset(&cfg, 0, sizeof(cfg));
+       cfg.cfg_magic = ZPIOS_CFG_MAGIC;
+        cfg.cfg_cmd   = ZPIOS_CFG_BUFFER_SIZE;
+       cfg.cfg_arg1  = size;
+
+       rc = ioctl(zpiosctl_fd, ZPIOS_CFG, &cfg);
+       if (rc) {
+               fprintf(stderr, "Ioctl() error %lu / %d: %d\n",
+                       (unsigned long) ZPIOS_CFG, cfg.cfg_cmd, errno);
+               return rc;
+       }
+
+       return cfg.cfg_rc1;
+}
+
+static void
+dev_fini(void)
+{
+       if (zpios_buffer)
+               free(zpios_buffer);
+
+       if (zpiosctl_fd != -1) {
+               if (close(zpiosctl_fd) == -1) {
+                       fprintf(stderr, "Unable to close %s: %d\n",
+                               ZPIOS_DEV, errno);
+               }
+       }
+}
+
+static int
+dev_init(void)
+{
+       int rc;
+
+       zpiosctl_fd = open(ZPIOS_DEV, O_RDONLY);
+       if (zpiosctl_fd == -1) {
+               fprintf(stderr, "Unable to open %s: %d\n"
+                       "Is the zpios module loaded?\n", ZPIOS_DEV, errno);
+               rc = errno;
+               goto error;
+       }
+
+       if ((rc = dev_clear()))
+               goto error;
+
+       if ((rc = dev_size(0)) < 0)
+               goto error;
+
+       zpios_buffer_size = rc;
+       zpios_buffer = (char *)malloc(zpios_buffer_size);
+       if (zpios_buffer == NULL) {
+               rc = ENOMEM;
+               goto error;
+       }
+
+       memset(zpios_buffer, 0, zpios_buffer_size);
+       return 0;
+error:
+       if (zpiosctl_fd != -1) {
+               if (close(zpiosctl_fd) == -1) {
+                       fprintf(stderr, "Unable to close %s: %d\n",
+                               ZPIOS_DEV, errno);
+               }
+       }
+
+       return rc;
+}
+
+static int
+get_next(uint64_t *val, range_repeat_t *range)
+{
+       /* if low, incr, high is given */
+       if (range->val_count == 0) {
+               *val = (range->val_low) +
+                      (range->val_low * range->next_val / 100);
+
+               if (*val > range->val_high)
+                       return 0; /* No more values, limit exceeded */
+
+               if (!range->next_val)
+                       range->next_val = range->val_inc_perc;
+               else
+                       range->next_val = range->next_val+range->val_inc_perc;
+
+               return 1; /* more values to come */
+
+       /* if only one val is given */
+       } else if (range->val_count == 1) {
+               if (range->next_val)
+                       return 0; /* No more values, we only have one */
+
+               *val = range->val[0];
+               range->next_val = 1;
+               return 1; /* more values to come */
+
+       /* if comma separated values are given */
+       } else if (range->val_count > 1) {
+               if (range->next_val > range->val_count - 1)
+                       return 0; /* No more values, limit exceeded */
+
+               *val = range->val[range->next_val];
+               range->next_val++;
+               return 1; /* more values to come */
+       }
+
+       return 0;
+}
+
+static int
+run_one(cmd_args_t *args, uint32_t id, uint32_t T, uint32_t N,
+        uint64_t C, uint64_t S, uint64_t O)
+{
+       zpios_cmd_t *cmd;
+        int rc, rc2, cmd_size;
+
+        dev_clear();
+
+       cmd_size = sizeof(zpios_cmd_t) + ((T + N + 1) * sizeof(zpios_stats_t));
+        cmd = (zpios_cmd_t *)malloc(cmd_size);
+        if (cmd == NULL)
+                return ENOMEM;
+
+        memset(cmd, 0, cmd_size);
+        cmd->cmd_magic = ZPIOS_CMD_MAGIC;
+       strncpy(cmd->cmd_pool, args->pool, ZPIOS_NAME_SIZE - 1);
+       strncpy(cmd->cmd_pre, args->pre, ZPIOS_PATH_SIZE - 1);
+       strncpy(cmd->cmd_post, args->post, ZPIOS_PATH_SIZE - 1);
+       strncpy(cmd->cmd_log, args->log, ZPIOS_PATH_SIZE - 1);
+       cmd->cmd_id           = id;
+       cmd->cmd_chunk_size   = C;
+       cmd->cmd_thread_count = T;
+       cmd->cmd_region_count = N;
+       cmd->cmd_region_size  = S;
+       cmd->cmd_offset       = O;
+       cmd->cmd_region_noise = args->regionnoise;
+       cmd->cmd_chunk_noise  = args->chunknoise;
+       cmd->cmd_thread_delay = args->thread_delay;
+       cmd->cmd_flags        = args->flags;
+        cmd->cmd_data_size    = (T + N + 1) * sizeof(zpios_stats_t);
+
+        rc = ioctl(zpiosctl_fd, ZPIOS_CMD, cmd);
+       if (rc)
+               args->rc = errno;
+
+       print_stats(args, cmd);
+
+        if (args->verbose) {
+                rc2 = read(zpiosctl_fd, zpios_buffer, zpios_buffer_size - 1);
+                if (rc2 < 0) {
+                        fprintf(stdout, "Error reading results: %d\n", rc2);
+                } else if ((rc2 > 0) && (strlen(zpios_buffer) > 0)) {
+                        fprintf(stdout, "\n%s\n", zpios_buffer);
+                        fflush(stdout);
+                }
+        }
+
+        free(cmd);
+
+        return rc;
+}
+
+static int
+run_offsets(cmd_args_t *args)
+{
+       int rc = 0;
+
+       while (rc == 0 && get_next(&args->current_O, &args->O)) {
+               rc = run_one(args, args->current_id,
+                            args->current_T, args->current_N, args->current_C,
+                            args->current_S, args->current_O);
+               args->current_id++;
+       }
+
+       args->O.next_val = 0;
+       return rc;
+}
+
+static int
+run_region_counts(cmd_args_t *args)
+{
+       int rc = 0;
+
+       while (rc == 0 && get_next((uint64_t *)&args->current_N, &args->N))
+              rc = run_offsets(args);
+
+       args->N.next_val = 0;
+       return rc;
+}
+
+static int
+run_region_sizes(cmd_args_t *args)
+{
+       int rc = 0;
+
+       while (rc == 0 && get_next(&args->current_S, &args->S)) {
+               if (args->current_S < args->current_C) {
+                       fprintf(stderr, "Error: in any run chunksize can "
+                               "not be smaller than regionsize.\n");
+                       return EINVAL;
+               }
+
+               rc = run_region_counts(args);
+       }
+
+       args->S.next_val = 0;
+       return rc;
+}
+
+static int
+run_chunk_sizes(cmd_args_t *args)
+{
+       int rc = 0;
+
+       while (rc == 0 && get_next(&args->current_C, &args->C)) {
+              rc = run_region_sizes(args);
+       }
+
+       args->C.next_val = 0;
+       return rc;
+}
+
+static int
+run_thread_counts(cmd_args_t *args)
+{
+       int rc = 0;
+
+       while (rc == 0 && get_next((uint64_t *)&args->current_T, &args->T))
+               rc = run_chunk_sizes(args);
+
+       return rc;
+}
+
+int
+main(int argc, char **argv)
+{
+       cmd_args_t *args;
+       int rc = 0;
+
+       /* Argument init and parsing */
+       if ((args = args_init(argc, argv)) == NULL) {
+               rc = -1;
+               goto out;
+       }
+
+       /* Device specific init */
+       if ((rc = dev_init()))
+               goto out;
+
+       /* Generic kernel version string */
+       if (args->verbose)
+               fprintf(stdout, "%s", zpios_version);
+
+       print_stats_header(args);
+       rc = run_thread_counts(args);
+out:
+       if (args != NULL)
+               args_fini(args);
+
+       dev_fini();
+       return rc;
+}
diff --git a/cmd/zpios/zpios_util.c b/cmd/zpios/zpios_util.c
new file mode 100644 (file)
index 0000000..48a0a46
--- /dev/null
@@ -0,0 +1,454 @@
+/*****************************************************************************\
+ *  ZPIOS is a heavily modified version of the original PIOS test code.
+ *  It is designed to have the test code running in the Linux kernel
+ *  against ZFS while still being flexibly controled from user space.
+ *
+ *  Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  LLNL-CODE-403049
+ *
+ *  Original PIOS Test Code
+ *  Copyright (C) 2004 Cluster File Systems, Inc.
+ *  Written by Peter Braam <braam@clusterfs.com>
+ *             Atul Vidwansa <atul@clusterfs.com>
+ *             Milind Dumbare <milind@clusterfs.com>
+ *
+ *  This file is part of ZFS on Linux.
+ *  For details, see <http://github.com/behlendorf/zfs/>.
+ *
+ *  ZPIOS is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  ZPIOS is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with ZPIOS.  If not, see <http://www.gnu.org/licenses/>.
+\*****************************************************************************/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <regex.h>
+#include "zpios.h"
+
+/* extracts an unsigned int (64) and K,M,G,T from the string */
+/* and returns a 64 bit value converted to the proper units */
+static int
+kmgt_to_uint64(const char *str, uint64_t *val)
+{
+       char *endptr;
+       int rc = 0;
+
+       *val = strtoll(str, &endptr, 0);
+       if ((str == endptr) && (*val == 0))
+               return EINVAL;
+
+       switch (endptr[0]) {
+               case 'k': case 'K':
+                       *val = (*val) << 10;
+                       break;
+               case 'm': case 'M':
+                       *val = (*val) << 20;
+                       break;
+               case 'g': case 'G':
+                       *val = (*val) << 30;
+                       break;
+               case 't': case 'T':
+                       *val = (*val) << 40;
+                       break;
+               case '\0':
+                       break;
+               default:
+                       rc = EINVAL;
+       }
+
+       return rc;
+}
+
+static char *
+uint64_to_kmgt(char *str, uint64_t val)
+{
+       char postfix[] = "kmgt";
+       int i = -1;
+
+       while ((val >= KB) && (i < 4)) {
+               val = (val >> 10);
+               i++;
+       }
+
+       if (i >= 4)
+               (void)snprintf(str, KMGT_SIZE-1, "inf");
+       else
+               (void)snprintf(str, KMGT_SIZE-1, "%lu%c", (unsigned long)val,
+                              (i == -1) ? '\0' : postfix[i]);
+
+       return str;
+}
+
+static char *
+kmgt_per_sec(char *str, uint64_t v, double t)
+{
+       char postfix[] = "kmgt";
+       double val = ((double)v) / t;
+       int i = -1;
+
+       while ((val >= (double)KB) && (i < 4)) {
+               val /= (double)KB;
+               i++;
+       }
+
+       if (i >= 4)
+               (void)snprintf(str, KMGT_SIZE-1, "inf");
+       else
+               (void)snprintf(str, KMGT_SIZE-1, "%.2f%c", val,
+                              (i == -1) ? '\0' : postfix[i]);
+
+       return str;
+}
+
+static char *
+print_flags(char *str, uint32_t flags)
+{
+       str[0] = (flags & DMU_WRITE)  ? 'w' : '-';
+       str[1] = (flags & DMU_READ)   ? 'r' : '-';
+       str[2] = (flags & DMU_VERIFY) ? 'v' : '-';
+       str[3] = (flags & DMU_REMOVE) ? 'c' : '-';
+       str[4] = (flags & DMU_FPP)    ? 'p' : 's';
+       str[5] = (flags & (DMU_WRITE_ZC | DMU_READ_ZC)) ? 'z' : '-';
+       str[6] = (flags & DMU_WRITE_NOWAIT) ? 'O' : '-';
+       str[7] = '\0';
+
+       return str;
+}
+
+static int
+regex_match(const char *string, char *pattern)
+{
+       regex_t re = { 0 };
+       int rc;
+
+       rc = regcomp(&re, pattern, REG_EXTENDED | REG_NOSUB | REG_ICASE);
+       if (rc) {
+               fprintf(stderr, "Error: Couldn't do regcomp, %d\n", rc);
+               return rc;
+       }
+
+       rc = regexec(&re, string, (size_t) 0, NULL, 0);
+       regfree(&re);
+
+       return rc;
+}
+
+/* fills the pios_range_repeat structure of comma separated values */
+static int
+split_string(const char *optarg, char *pattern, range_repeat_t *range)
+{
+       const char comma[] = ",";
+       char *cp, *token[32];
+       int rc, i = 0;
+
+       if ((rc = regex_match(optarg, pattern)))
+               return rc;
+
+       cp = strdup(optarg);
+       if (cp == NULL)
+               return ENOMEM;
+
+       do {
+               /* STRTOK(3) Each subsequent call, with a null pointer as the
+                * value of the * first  argument, starts searching from the
+                * saved pointer and behaves as described above.
+                */
+               token[i] = strtok(cp, comma);
+               cp = NULL;
+       } while ((token[i++] != NULL) && (i < 32));
+
+       range->val_count = i - 1;
+
+       for (i = 0; i < range->val_count; i++)
+               kmgt_to_uint64(token[i], &range->val[i]);
+
+       free(cp);
+       return 0;
+}
+
+int
+set_count(char *pattern1, char *pattern2, range_repeat_t *range,
+         char *optarg, uint32_t *flags, char *arg)
+{
+       if (flags)
+               *flags |= FLAG_SET;
+
+       range->next_val = 0;
+
+       if (regex_match(optarg, pattern1) == 0) {
+               kmgt_to_uint64(optarg, &range->val[0]);
+               range->val_count = 1;
+       } else if (split_string(optarg, pattern2, range) < 0) {
+               fprintf(stderr, "Error: Incorrect pattern for %s, '%s'\n",
+                       arg, optarg);
+               return EINVAL;
+       }
+
+       return 0;
+}
+
+/* validates the value with regular expression and sets low, high, incr
+ * according to value at which flag will be set. Sets the flag after. */
+int
+set_lhi(char *pattern, range_repeat_t *range, char *optarg,
+        int flag, uint32_t *flag_thread, char *arg)
+{
+       int rc;
+
+       if ((rc = regex_match(optarg, pattern))) {
+               fprintf(stderr, "Error: Wrong pattern in %s, '%s'\n",
+                       arg, optarg);
+               return rc;
+       }
+
+       switch (flag) {
+               case FLAG_LOW:
+                       kmgt_to_uint64(optarg, &range->val_low);
+                       break;
+               case FLAG_HIGH:
+                       kmgt_to_uint64(optarg, &range->val_high);
+                       break;
+               case FLAG_INCR:
+                       kmgt_to_uint64(optarg, &range->val_inc_perc);
+                       break;
+               default:
+                       assert(0);
+       }
+
+       *flag_thread |= flag;
+
+       return 0;
+}
+
+int
+set_noise(uint64_t *noise, char *optarg, char *arg)
+{
+       if (regex_match(optarg, REGEX_NUMBERS) == 0) {
+               kmgt_to_uint64(optarg, noise);
+       } else {
+               fprintf(stderr, "Error: Incorrect pattern for %s\n", arg);
+               return EINVAL;
+       }
+
+       return 0;
+}
+
+int
+set_load_params(cmd_args_t *args, char *optarg)
+{
+       char *param, *search, comma[] = ",";
+       int rc = 0;
+
+       search = strdup(optarg);
+       if (search == NULL)
+               return ENOMEM;
+
+       while ((param = strtok(search, comma)) != NULL) {
+               search = NULL;
+
+               if (strcmp("fpp", param) == 0) {
+                       args->flags |= DMU_FPP; /* File Per Process/Thread */
+               } else if (strcmp("ssf", param) == 0) {
+                       args->flags &= ~DMU_FPP; /* Single Shared File */
+               } else if (strcmp("dmuio", param) == 0) {
+                       args->io_type |= DMU_IO;
+                       args->flags |= (DMU_WRITE | DMU_READ);
+               } else {
+                       fprintf(stderr, "Invalid load: %s\n", param);
+                       rc = EINVAL;
+               }
+       }
+
+       free(search);
+
+       return rc;
+}
+
+
+/* checks the low, high, increment values against the single value for
+ * mutual exclusion, for e.g threadcount is mutually exclusive to
+ * threadcount_low, ..._high, ..._incr */
+int
+check_mutual_exclusive_command_lines(uint32_t flag, char *arg)
+{
+       if ((flag & FLAG_SET) && (flag & (FLAG_LOW | FLAG_HIGH | FLAG_INCR))) {
+               fprintf(stderr, "Error: --%s can not be given with --%s_low, "
+                       "--%s_high or --%s_incr.\n", arg, arg, arg, arg);
+               return 0;
+       }
+
+       if ((flag & (FLAG_LOW | FLAG_HIGH | FLAG_INCR)) && !(flag & FLAG_SET)){
+               if (flag != (FLAG_LOW | FLAG_HIGH | FLAG_INCR)) {
+                       fprintf(stderr, "Error: One or more values missing "
+                               "from --%s_low, --%s_high, --%s_incr.\n",
+                               arg, arg, arg);
+                       return 0;
+               }
+       }
+
+       return 1;
+}
+
+void
+print_stats_header(cmd_args_t *args)
+{
+       if (args->verbose) {
+               printf("status    name        id\tth-cnt\trg-cnt\trg-sz\t"
+                      "ch-sz\toffset\trg-no\tch-no\tth-dly\tflags\ttime\t"
+                      "cr-time\trm-time\twr-time\trd-time\twr-data\twr-ch\t"
+                      "wr-bw\trd-data\trd-ch\trd-bw\n");
+               printf("------------------------------------------------"
+                      "------------------------------------------------"
+                      "------------------------------------------------"
+                      "----------------------------------------------\n");
+       } else {
+               printf("status    name        id\t"
+                      "wr-data\twr-ch\twr-bw\t"
+                      "rd-data\trd-ch\trd-bw\n");
+               printf("-----------------------------------------"
+                      "--------------------------------------\n");
+       }
+}
+
+static void
+print_stats_human_readable(cmd_args_t *args, zpios_cmd_t *cmd)
+{
+       zpios_stats_t *summary_stats;
+       double t_time, wr_time, rd_time, cr_time, rm_time;
+       char str[KMGT_SIZE];
+
+       if (args->rc)
+               printf("FAIL: %3d ", args->rc);
+       else
+               printf("PASS:     ");
+
+       printf("%-12s", args->name ? args->name : ZPIOS_NAME);
+        printf("%2u\t", cmd->cmd_id);
+
+       if (args->verbose) {
+               printf("%u\t", cmd->cmd_thread_count);
+               printf("%u\t", cmd->cmd_region_count);
+               printf("%s\t", uint64_to_kmgt(str, cmd->cmd_region_size));
+               printf("%s\t", uint64_to_kmgt(str, cmd->cmd_chunk_size));
+               printf("%s\t", uint64_to_kmgt(str, cmd->cmd_offset));
+               printf("%s\t", uint64_to_kmgt(str, cmd->cmd_region_noise));
+               printf("%s\t", uint64_to_kmgt(str, cmd->cmd_chunk_noise));
+               printf("%s\t", uint64_to_kmgt(str, cmd->cmd_thread_delay));
+               printf("%s\t", print_flags(str, cmd->cmd_flags));
+       }
+
+       if (args->rc) {
+               printf("\n");
+               return;
+       }
+
+       summary_stats = (zpios_stats_t *)cmd->cmd_data_str;
+       t_time  = zpios_timespec_to_double(summary_stats->total_time.delta);
+       wr_time = zpios_timespec_to_double(summary_stats->wr_time.delta);
+       rd_time = zpios_timespec_to_double(summary_stats->rd_time.delta);
+       cr_time = zpios_timespec_to_double(summary_stats->cr_time.delta);
+       rm_time = zpios_timespec_to_double(summary_stats->rm_time.delta);
+
+       if (args->verbose) {
+               printf("%.2f\t", t_time);
+               printf("%.3f\t", cr_time);
+               printf("%.3f\t", rm_time);
+               printf("%.2f\t", wr_time);
+               printf("%.2f\t", rd_time);
+       }
+
+        printf("%s\t", uint64_to_kmgt(str, summary_stats->wr_data));
+        printf("%s\t", uint64_to_kmgt(str, summary_stats->wr_chunks));
+       printf("%s\t", kmgt_per_sec(str, summary_stats->wr_data, wr_time));
+
+        printf("%s\t", uint64_to_kmgt(str, summary_stats->rd_data));
+        printf("%s\t", uint64_to_kmgt(str, summary_stats->rd_chunks));
+       printf("%s\n", kmgt_per_sec(str, summary_stats->rd_data, rd_time));
+       fflush(stdout);
+}
+
+static void
+print_stats_table(cmd_args_t *args, zpios_cmd_t *cmd)
+{
+       zpios_stats_t *summary_stats;
+       double wr_time, rd_time;
+
+       if (args->rc)
+               printf("FAIL: %3d ", args->rc);
+       else
+               printf("PASS:     ");
+
+       printf("%-12s", args->name ? args->name : ZPIOS_NAME);
+        printf("%2u\t", cmd->cmd_id);
+
+       if (args->verbose) {
+               printf("%u\t", cmd->cmd_thread_count);
+               printf("%u\t", cmd->cmd_region_count);
+               printf("%llu\t", (long long unsigned)cmd->cmd_region_size);
+               printf("%llu\t", (long long unsigned)cmd->cmd_chunk_size);
+               printf("%llu\t", (long long unsigned)cmd->cmd_offset);
+               printf("%u\t", cmd->cmd_region_noise);
+               printf("%u\t", cmd->cmd_chunk_noise);
+               printf("%u\t", cmd->cmd_thread_delay);
+               printf("0x%x\t", cmd->cmd_flags);
+       }
+
+       if (args->rc) {
+               printf("\n");
+               return;
+       }
+
+       summary_stats = (zpios_stats_t *)cmd->cmd_data_str;
+       wr_time = zpios_timespec_to_double(summary_stats->wr_time.delta);
+       rd_time = zpios_timespec_to_double(summary_stats->rd_time.delta);
+
+       if (args->verbose) {
+               printf("%ld.%02ld\t",
+                      (long)summary_stats->total_time.delta.ts_sec,
+                      (long)summary_stats->total_time.delta.ts_nsec);
+               printf("%ld.%02ld\t",
+                      (long)summary_stats->cr_time.delta.ts_sec,
+                      (long)summary_stats->cr_time.delta.ts_nsec);
+               printf("%ld.%02ld\t",
+                      (long)summary_stats->rm_time.delta.ts_sec,
+                      (long)summary_stats->rm_time.delta.ts_nsec);
+               printf("%ld.%02ld\t",
+                      (long)summary_stats->wr_time.delta.ts_sec,
+                      (long)summary_stats->wr_time.delta.ts_nsec);
+               printf("%ld.%02ld\t",
+                      (long)summary_stats->rd_time.delta.ts_sec,
+                      (long)summary_stats->rd_time.delta.ts_nsec);
+       }
+
+        printf("%lld\t", (long long unsigned)summary_stats->wr_data);
+        printf("%lld\t", (long long unsigned)summary_stats->wr_chunks);
+       printf("%.4f\t", (double)summary_stats->wr_data / wr_time);
+
+        printf("%lld\t", (long long unsigned)summary_stats->rd_data);
+        printf("%lld\t", (long long unsigned)summary_stats->rd_chunks);
+       printf("%.4f\n", (double)summary_stats->rd_data / rd_time);
+       fflush(stdout);
+}
+
+void
+print_stats(cmd_args_t *args, zpios_cmd_t *cmd)
+{
+       if (args->human_readable)
+               print_stats_human_readable(args, cmd);
+       else
+               print_stats_table(args, cmd);
+}
index b880035..fbaffca 100644 (file)
@@ -53,6 +53,8 @@ SCRIPTDIR=${SCRIPTDIR}
 ETCDIR=\${TOPDIR}/etc
 DEVDIR=\${TOPDIR}/dev
 ZPOOLDIR=\${TOPDIR}/scripts/zpool-config
+ZPIOSDIR=\${TOPDIR}/scripts/zpios-test
+ZPIOSPROFILEDIR=\${TOPDIR}/scripts/zpios-profile
 
 ZDB=\${CMDDIR}/zdb/zdb
 ZFS=\${CMDDIR}/zfs/zfs
@@ -60,10 +62,13 @@ ZINJECT=\${CMDDIR}/zinject/zinject
 ZPOOL=\${CMDDIR}/zpool/zpool
 ZPOOL_ID=\${CMDDIR}/zpool_id/zpool_id
 ZTEST=\${CMDDIR}/ztest/ztest
+ZPIOS=\${CMDDIR}/zpios/zpios
 
 COMMON_SH=\${SCRIPTDIR}/common.sh
 ZFS_SH=\${SCRIPTDIR}/zfs.sh
 ZPOOL_CREATE_SH=\${SCRIPTDIR}/zpool-create.sh
+ZPIOS_SH=\${SCRIPTDIR}/zpios.sh
+ZPIOS_SURVEY_SH=\${SCRIPTDIR}/zpios-survey.sh
 
 INTREE=1
 LDMOD=/sbin/insmod
@@ -85,6 +90,10 @@ ZFS_MODULES=(                                         \\
         \${MODDIR}/zfs/zfs.ko                          \\
 )
 
+ZPIOS_MODULES=(                                       \\
+        \${MODDIR}/zpios/zpios.ko                      \\
+)
+
 MODULES=(                                             \\
         \${KERNEL_MODULES[[*]]}                          \\
         \${SPL_MODULES[[*]]}                             \\
index 0b7ea81..3963d23 100644 (file)
@@ -81,12 +81,14 @@ AC_CONFIG_FILES([
        cmd/zpool_id/Makefile
        cmd/zpool_layout/Makefile
        cmd/ztest/Makefile
+       cmd/zpios/Makefile
        module/Makefile
        module/avl/Makefile
        module/nvpair/Makefile
        module/unicode/Makefile
        module/zcommon/Makefile
        module/zfs/Makefile
+       module/zpios/Makefile
        scripts/Makefile
        scripts/common.sh
        zfs.spec
index 489119a..f59f044 100644 (file)
@@ -3,6 +3,7 @@ subdir-m += nvpair
 subdir-m += unicode
 subdir-m += zcommon
 subdir-m += zfs
+subdir-m += zpios
 
 modules:
        # Make the exported SPL symbols available to these modules.
diff --git a/module/zpios/Makefile.in b/module/zpios/Makefile.in
new file mode 100644 (file)
index 0000000..4924082
--- /dev/null
@@ -0,0 +1,11 @@
+MODULE := zpios
+
+EXTRA_CFLAGS  = -I@MODDIR@/zfs/include
+EXTRA_CFLAGS += -I@MODDIR@/zcommon/include
+EXTRA_CFLAGS += -I@MODDIR@/avl/include
+EXTRA_CFLAGS += -I@MODDIR@/nvpair/include
+EXTRA_CFLAGS += -I@MODDIR@/unicode/include
+EXTRA_CFLAGS += -I@MODDIR@/zpios/include
+EXTRA_CFLAGS += @KERNELCPPFLAGS@
+
+obj-m := ${MODULE}.o
diff --git a/module/zpios/include/zpios-ctl.h b/module/zpios/include/zpios-ctl.h
new file mode 100644 (file)
index 0000000..234e96c
--- /dev/null
@@ -0,0 +1,198 @@
+/*****************************************************************************\
+ *  ZPIOS is a heavily modified version of the original PIOS test code.
+ *  It is designed to have the test code running in the Linux kernel
+ *  against ZFS while still being flexibly controled from user space.
+ *
+ *  Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  LLNL-CODE-403049
+ *
+ *  Original PIOS Test Code
+ *  Copyright (C) 2004 Cluster File Systems, Inc.
+ *  Written by Peter Braam <braam@clusterfs.com>
+ *             Atul Vidwansa <atul@clusterfs.com>
+ *             Milind Dumbare <milind@clusterfs.com>
+ *
+ *  This file is part of ZFS on Linux.
+ *  For details, see <http://github.com/behlendorf/zfs/>.
+ *
+ *  ZPIOS is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  ZPIOS is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with ZPIOS.  If not, see <http://www.gnu.org/licenses/>.
+\*****************************************************************************/
+
+#ifndef _ZPIOS_CTL_H
+#define _ZPIOS_CTL_H
+
+/* Contains shared definitions which both the userspace
+ * and kernelspace portions of zpios must agree on.
+ */
+#ifndef _KERNEL
+#include <stdint.h>
+#endif
+
+#define ZPIOS_MAJOR                    232 /* XXX - Arbitrary */
+#define ZPIOS_MINORS                   1
+#define ZPIOS_NAME                     "zpios"
+#define ZPIOS_DEV                      "/dev/zpios"
+
+#define DMU_IO                         0x01
+
+#define DMU_WRITE                      0x0001
+#define DMU_READ                       0x0002
+#define DMU_VERIFY                     0x0004
+#define DMU_REMOVE                     0x0008
+#define DMU_FPP                                0x0010
+#define DMU_WRITE_ZC                   0x0020 /* Incompatible w/DMU_VERIFY */
+#define DMU_READ_ZC                    0x0040 /* Incompatible w/DMU_VERIFY */
+#define DMU_WRITE_NOWAIT               0x0080
+#define DMU_READ_NOPF                  0x0100
+
+#define ZPIOS_NAME_SIZE                        16
+#define ZPIOS_PATH_SIZE                        128
+
+#define PHASE_PRE_RUN                  "pre-run"
+#define PHASE_PRE_CREATE               "pre-create"
+#define PHASE_PRE_WRITE                        "pre-write"
+#define PHASE_PRE_READ                 "pre-read"
+#define PHASE_PRE_REMOVE               "pre-remove"
+#define PHASE_POST_RUN                 "post-run"
+#define PHASE_POST_CREATE              "post-create"
+#define PHASE_POST_WRITE               "post-write"
+#define PHASE_POST_READ                        "post-read"
+#define PHASE_POST_REMOVE              "post-remove"
+
+#define        ZPIOS_CFG_MAGIC                 0x87237190U
+typedef struct zpios_cfg {
+       uint32_t cfg_magic;             /* Unique magic */
+       int32_t cfg_cmd;                /* Config command */
+       int32_t cfg_arg1;               /* Config command arg 1 */
+       int32_t cfg_rc1;                /* Config response 1 */
+} zpios_cfg_t;
+
+typedef struct zpios_timespec {
+       uint32_t ts_sec;
+       uint32_t ts_nsec;
+} zpios_timespec_t;
+
+typedef struct zpios_time {
+       zpios_timespec_t start;
+       zpios_timespec_t stop;
+       zpios_timespec_t delta;
+} zpios_time_t;
+
+typedef struct zpios_stats {
+       zpios_time_t total_time;
+       zpios_time_t cr_time;
+       zpios_time_t rm_time;
+       zpios_time_t wr_time;
+       zpios_time_t rd_time;
+       uint64_t wr_data;
+       uint64_t wr_chunks;
+       uint64_t rd_data;
+       uint64_t rd_chunks;
+} zpios_stats_t;
+
+#define        ZPIOS_CMD_MAGIC                 0x49715385U
+typedef struct zpios_cmd {
+       uint32_t cmd_magic;             /* Unique magic */
+       uint32_t cmd_id;                /* Run ID */
+       char cmd_pool[ZPIOS_NAME_SIZE]; /* Pool name */
+       uint64_t cmd_chunk_size;        /* Chunk size */
+       uint32_t cmd_thread_count;      /* Thread count */
+       uint32_t cmd_region_count;      /* Region count */
+       uint64_t cmd_region_size;       /* Region size */
+       uint64_t cmd_offset;            /* Region offset */
+       uint32_t cmd_region_noise;      /* Region noise */
+       uint32_t cmd_chunk_noise;       /* Chunk noise */
+       uint32_t cmd_thread_delay;      /* Thread delay */
+       uint32_t cmd_flags;             /* Test flags */
+        char cmd_pre[ZPIOS_PATH_SIZE]; /* Pre-exec hook */
+        char cmd_post[ZPIOS_PATH_SIZE];        /* Post-exec hook */
+       char cmd_log[ZPIOS_PATH_SIZE];  /* Requested log dir */
+       uint64_t cmd_data_size;         /* Opaque data size */
+       char cmd_data_str[0];           /* Opaque data region */
+} zpios_cmd_t;
+
+/* Valid ioctls */
+#define ZPIOS_CFG                      _IOWR('f', 101, zpios_cfg_t)
+#define ZPIOS_CMD                      _IOWR('f', 102, zpios_cmd_t)
+
+/* Valid configuration commands */
+#define ZPIOS_CFG_BUFFER_CLEAR         0x001   /* Clear text buffer */
+#define ZPIOS_CFG_BUFFER_SIZE          0x002   /* Resize text buffer */
+
+#ifndef NSEC_PER_SEC
+#define NSEC_PER_SEC    1000000000L
+#endif
+
+static inline
+void zpios_timespec_normalize(zpios_timespec_t *ts, uint32_t sec, uint32_t nsec)
+{
+       while (nsec >= NSEC_PER_SEC) {
+               nsec -= NSEC_PER_SEC;
+               sec++;
+       }
+       while (nsec < 0) {
+               nsec += NSEC_PER_SEC;
+               sec--;
+       }
+       ts->ts_sec = sec;
+       ts->ts_nsec = nsec;
+}
+
+static inline
+zpios_timespec_t zpios_timespec_add(zpios_timespec_t lhs, zpios_timespec_t rhs)
+{
+       zpios_timespec_t ts_delta;
+       zpios_timespec_normalize(&ts_delta, lhs.ts_sec + rhs.ts_sec,
+                                lhs.ts_nsec + rhs.ts_nsec);
+        return ts_delta;
+}
+
+static inline
+zpios_timespec_t zpios_timespec_sub(zpios_timespec_t lhs, zpios_timespec_t rhs)
+{
+       zpios_timespec_t ts_delta;
+       zpios_timespec_normalize(&ts_delta, lhs.ts_sec - rhs.ts_sec,
+                                lhs.ts_nsec - rhs.ts_nsec);
+       return ts_delta;
+}
+
+#ifdef _KERNEL
+
+static inline
+zpios_timespec_t zpios_timespec_now(void)
+{
+       zpios_timespec_t zts_now;
+       struct timespec ts_now;
+
+       ts_now = current_kernel_time();
+       zts_now.ts_sec  = ts_now.tv_sec;
+       zts_now.ts_nsec = ts_now.tv_nsec;
+
+       return zts_now;
+}
+
+#else
+
+static inline
+double zpios_timespec_to_double(zpios_timespec_t ts)
+{
+       return ((double)(ts.ts_sec) +
+              ((double)(ts.ts_nsec) / (double)(NSEC_PER_SEC)));
+}
+
+#endif /* _KERNEL */
+
+#endif /* _ZPIOS_CTL_H */
diff --git a/module/zpios/include/zpios-internal.h b/module/zpios/include/zpios-internal.h
new file mode 100644 (file)
index 0000000..c9b6e00
--- /dev/null
@@ -0,0 +1,138 @@
+/*****************************************************************************\
+ *  ZPIOS is a heavily modified version of the original PIOS test code.
+ *  It is designed to have the test code running in the Linux kernel
+ *  against ZFS while still being flexibly controled from user space.
+ *
+ *  Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  LLNL-CODE-403049
+ *
+ *  Original PIOS Test Code
+ *  Copyright (C) 2004 Cluster File Systems, Inc.
+ *  Written by Peter Braam <braam@clusterfs.com>
+ *             Atul Vidwansa <atul@clusterfs.com>
+ *             Milind Dumbare <milind@clusterfs.com>
+ *
+ *  This file is part of ZFS on Linux.
+ *  For details, see <http://github.com/behlendorf/zfs/>.
+ *
+ *  ZPIOS is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  ZPIOS is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with ZPIOS.  If not, see <http://www.gnu.org/licenses/>.
+\*****************************************************************************/
+
+#ifndef _ZPIOS_INTERNAL_H
+#define _ZPIOS_INTERNAL_H
+
+#include "zpios-ctl.h"
+
+#define OBJ_SIZE       64
+
+struct run_args;
+
+typedef struct dmu_obj {
+       objset_t *os;
+       uint64_t obj;
+} dmu_obj_t;
+
+/* thread doing the IO data */
+typedef struct thread_data {
+       struct run_args *run_args;
+       int thread_no;
+       int rc;
+       zpios_stats_t stats;
+        kmutex_t lock;
+} thread_data_t;
+
+/* region for IO data */
+typedef struct zpios_region {
+       __u64 wr_offset;
+       __u64 rd_offset;
+       __u64 init_offset;
+       __u64 max_offset;
+       dmu_obj_t obj;
+       zpios_stats_t stats;
+        kmutex_t lock;
+} zpios_region_t;
+
+/* arguments for one run */
+typedef struct run_args {
+       /* Config args */
+       int id;
+       char pool[ZPIOS_NAME_SIZE];
+       __u64 chunk_size;
+       __u32 thread_count;
+       __u32 region_count;
+       __u64 region_size;
+       __u64 offset;
+       __u32 region_noise;
+       __u32 chunk_noise;
+       __u32 thread_delay;
+       __u32 flags;
+       char pre[ZPIOS_PATH_SIZE];
+       char post[ZPIOS_PATH_SIZE];
+       char log[ZPIOS_PATH_SIZE];
+
+       /* Control data */
+       objset_t *os;
+        wait_queue_head_t waitq;
+       volatile uint64_t threads_done;
+        kmutex_t lock_work;
+       kmutex_t lock_ctl;
+       __u32 region_next;
+
+       /* Results data */
+       struct file *file;
+       zpios_stats_t stats;
+
+       thread_data_t **threads;
+       zpios_region_t regions[0]; /* Must be last element */
+} run_args_t;
+
+#define ZPIOS_INFO_BUFFER_SIZE          65536
+#define ZPIOS_INFO_BUFFER_REDZONE       1024
+
+typedef struct zpios_info {
+        spinlock_t info_lock;
+        int info_size;
+        char *info_buffer;
+        char *info_head;        /* Internal kernel use only */
+} zpios_info_t;
+
+#define zpios_print(file, format, args...)                              \
+({      zpios_info_t *_info_ = (zpios_info_t *)file->private_data;      \
+        int _rc_;                                                       \
+                                                                        \
+        ASSERT(_info_);                                                 \
+        ASSERT(_info_->info_buffer);                                    \
+                                                                        \
+        spin_lock(&_info_->info_lock);                                  \
+                                                                        \
+        /* Don't allow the kernel to start a write in the red zone */   \
+        if ((int)(_info_->info_head - _info_->info_buffer) >            \
+            (_info_->info_size - ZPIOS_INFO_BUFFER_REDZONE))      {     \
+                _rc_ = -EOVERFLOW;                                      \
+        } else {                                                        \
+                _rc_ = sprintf(_info_->info_head, format, args);        \
+                if (_rc_ >= 0)                                          \
+                        _info_->info_head += _rc_;                      \
+        }                                                               \
+                                                                        \
+        spin_unlock(&_info_->info_lock);                                \
+        _rc_;                                                           \
+})
+
+#define zpios_vprint(file, test, format, args...)                       \
+        zpios_print(file, "%*s: " format, ZPIOS_NAME_SIZE, test, args)
+
+#endif /* _ZPIOS_INTERNAL_H */
diff --git a/module/zpios/zpios.c b/module/zpios/zpios.c
new file mode 100644 (file)
index 0000000..3edc161
--- /dev/null
@@ -0,0 +1,1331 @@
+/*****************************************************************************\
+ *  ZPIOS is a heavily modified version of the original PIOS test code.
+ *  It is designed to have the test code running in the Linux kernel
+ *  against ZFS while still being flexibly controled from user space.
+ *
+ *  Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
+ *  LLNL-CODE-403049
+ *
+ *  Original PIOS Test Code
+ *  Copyright (C) 2004 Cluster File Systems, Inc.
+ *  Written by Peter Braam <braam@clusterfs.com>
+ *             Atul Vidwansa <atul@clusterfs.com>
+ *             Milind Dumbare <milind@clusterfs.com>
+ *
+ *  This file is part of ZFS on Linux.
+ *  For details, see <http://github.com/behlendorf/zfs/>.
+ *
+ *  ZPIOS is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ *  ZPIOS is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with ZPIOS.  If not, see <http://www.gnu.org/licenses/>.
+\*****************************************************************************/
+
+#include <sys/zfs_context.h>
+#include <sys/dmu.h>
+#include <sys/txg.h>
+#include <linux/cdev.h>
+#include "zpios-internal.h"
+
+
+static spl_class *zpios_class;
+static spl_device *zpios_device;
+static char *zpios_tag = "zpios_tag";
+
+static
+int zpios_upcall(char *path, char *phase, run_args_t *run_args, int rc)
+{
+       /* This is stack heavy but it should be OK since we are only
+        * making the upcall between tests when the stack is shallow.
+        */
+        char id[16], chunk_size[16], region_size[16], thread_count[16];
+       char region_count[16], offset[16], region_noise[16], chunk_noise[16];
+        char thread_delay[16], flags[16], result[8];
+        char *argv[16], *envp[4];
+
+       if ((path == NULL) || (strlen(path) == 0))
+               return -ENOENT;
+
+       snprintf(id, 15, "%d", run_args->id);
+       snprintf(chunk_size, 15, "%lu", (long unsigned)run_args->chunk_size);
+        snprintf(region_size, 15, "%lu",(long unsigned) run_args->region_size);
+       snprintf(thread_count, 15, "%u", run_args->thread_count);
+       snprintf(region_count, 15, "%u", run_args->region_count);
+       snprintf(offset, 15, "%lu", (long unsigned)run_args->offset);
+       snprintf(region_noise, 15, "%u", run_args->region_noise);
+       snprintf(chunk_noise, 15, "%u", run_args->chunk_noise);
+       snprintf(thread_delay, 15, "%u", run_args->thread_delay);
+       snprintf(flags, 15, "0x%x", run_args->flags);
+       snprintf(result, 7, "%d", rc);
+
+       /* Passing 15 args to registered pre/post upcall */
+        argv[0] = path;
+       argv[1] = phase;
+       argv[2] = strlen(run_args->log) ? run_args->log : "<none>";
+       argv[3] = id;
+       argv[4] = run_args->pool;
+       argv[5] = chunk_size;
+       argv[6] = region_size;
+       argv[7] = thread_count;
+       argv[8] = region_count;
+       argv[9] = offset;
+       argv[10] = region_noise;
+       argv[11] = chunk_noise;
+       argv[12] = thread_delay;
+       argv[13] = flags;
+       argv[14] = result;
+       argv[15] = NULL;
+
+       /* Passing environment for user space upcall */
+        envp[0] = "HOME=/";
+        envp[1] = "TERM=linux";
+        envp[2] = "PATH=/sbin:/usr/sbin:/bin:/usr/bin";
+        envp[3] = NULL;
+
+        return call_usermodehelper(path, argv, envp, 1);
+}
+
+static uint64_t
+zpios_dmu_object_create(run_args_t *run_args, objset_t *os)
+{
+       struct dmu_tx *tx;
+        uint64_t obj = 0ULL;
+       int rc;
+
+       tx = dmu_tx_create(os);
+       dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, OBJ_SIZE);
+       rc = dmu_tx_assign(tx, TXG_WAIT);
+       if (rc) {
+               zpios_print(run_args->file,
+                           "dmu_tx_assign() failed: %d\n", rc);
+               dmu_tx_abort(tx);
+               return obj;
+       }
+
+       obj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
+                              DMU_OT_NONE, 0, tx);
+       rc = dmu_object_set_blocksize(os, obj, 128ULL << 10, 0, tx);
+       if (rc) {
+               zpios_print(run_args->file,
+                           "dmu_object_set_blocksize() failed: %d\n", rc);
+               dmu_tx_abort(tx);
+               return obj;
+       }
+
+       dmu_tx_commit(tx);
+
+       return obj;
+}
+
+static int
+zpios_dmu_object_free(run_args_t *run_args, objset_t *os, uint64_t obj)
+{
+       struct dmu_tx *tx;
+       int rc;
+
+       tx = dmu_tx_create(os);
+        dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END);
+       rc = dmu_tx_assign(tx, TXG_WAIT);
+       if (rc) {
+               zpios_print(run_args->file,
+                           "dmu_tx_assign() failed: %d\n", rc);
+               dmu_tx_abort(tx);
+               return rc;
+       }
+
+       rc = dmu_object_free(os, obj, tx);
+       if (rc) {
+               zpios_print(run_args->file,
+                           "dmu_object_free() failed: %d\n", rc);
+               dmu_tx_abort(tx);
+               return rc;
+       }
+
+       dmu_tx_commit(tx);
+
+       return 0;
+}
+
+static int
+zpios_dmu_setup(run_args_t *run_args)
+{
+       zpios_time_t *t = &(run_args->stats.cr_time);
+       objset_t *os;
+       char name[32];
+       uint64_t obj = 0ULL;
+       int i, rc = 0, rc2;
+
+       (void)zpios_upcall(run_args->pre, PHASE_PRE_CREATE, run_args, 0);
+       t->start = zpios_timespec_now();
+
+       (void)snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id);
+       rc = dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL);
+       if (rc) {
+               zpios_print(run_args->file, "Error dmu_objset_create(%s, ...) "
+                           "failed: %d\n", name, rc);
+               goto out;
+       }
+
+        rc = dmu_objset_own(name, DMU_OST_OTHER, 0, zpios_tag, &os);
+        if (rc) {
+               zpios_print(run_args->file, "Error dmu_objset_own(%s, ...) "
+                           "failed: %d\n", name, rc);
+               goto out_destroy;
+        }
+
+       if (!(run_args->flags & DMU_FPP)) {
+               obj = zpios_dmu_object_create(run_args, os);
+               if (obj == 0) {
+                       rc = -EBADF;
+                       zpios_print(run_args->file, "Error zpios_dmu_"
+                                   "object_create() failed, %d\n", rc);
+                       goto out_destroy;
+               }
+       }
+
+       for (i = 0; i < run_args->region_count; i++) {
+               zpios_region_t *region;
+
+               region = &run_args->regions[i];
+               mutex_init(&region->lock, NULL, MUTEX_DEFAULT, NULL);
+
+               if (run_args->flags & DMU_FPP) {
+                       /* File per process */
+                       region->obj.os  = os;
+                       region->obj.obj = zpios_dmu_object_create(run_args, os);
+                       ASSERT(region->obj.obj > 0); /* XXX - Handle this */
+                       region->wr_offset   = run_args->offset;
+                       region->rd_offset   = run_args->offset;
+                       region->init_offset = run_args->offset;
+                       region->max_offset  = run_args->offset +
+                                             run_args->region_size;
+               } else {
+                       /* Single shared file */
+                       region->obj.os  = os;
+                       region->obj.obj = obj;
+                       region->wr_offset   = run_args->offset * i;
+                       region->rd_offset   = run_args->offset * i;
+                       region->init_offset = run_args->offset * i;
+                       region->max_offset  = run_args->offset *
+                                             i + run_args->region_size;
+               }
+       }
+
+       run_args->os = os;
+out_destroy:
+       if (rc) {
+               rc2 = dmu_objset_destroy(name, B_FALSE);
+               if (rc2)
+                       zpios_print(run_args->file, "Error dmu_objset_destroy"
+                                   "(%s, ...) failed: %d\n", name, rc2);
+       }
+out:
+       t->stop  = zpios_timespec_now();
+       t->delta = zpios_timespec_sub(t->stop, t->start);
+       (void)zpios_upcall(run_args->post, PHASE_POST_CREATE, run_args, rc);
+
+       return rc;
+}
+
+static int
+zpios_setup_run(run_args_t **run_args, zpios_cmd_t *kcmd, struct file *file)
+{
+       run_args_t *ra;
+       int rc, size;
+
+       size = sizeof(*ra) + kcmd->cmd_region_count * sizeof(zpios_region_t);
+
+       ra = vmem_zalloc(size, KM_SLEEP);
+       if (ra == NULL) {
+               zpios_print(file, "Unable to vmem_zalloc() %d bytes "
+                           "for regions\n", size);
+               return -ENOMEM;
+       }
+
+       *run_args = ra;
+       strncpy(ra->pool, kcmd->cmd_pool, ZPIOS_NAME_SIZE - 1);
+       strncpy(ra->pre, kcmd->cmd_pre, ZPIOS_PATH_SIZE - 1);
+       strncpy(ra->post, kcmd->cmd_post, ZPIOS_PATH_SIZE - 1);
+       strncpy(ra->log, kcmd->cmd_log, ZPIOS_PATH_SIZE - 1);
+       ra->id              = kcmd->cmd_id;
+       ra->chunk_size      = kcmd->cmd_chunk_size;
+       ra->thread_count    = kcmd->cmd_thread_count;
+       ra->region_count    = kcmd->cmd_region_count;
+       ra->region_size     = kcmd->cmd_region_size;
+       ra->offset          = kcmd->cmd_offset;
+       ra->region_noise    = kcmd->cmd_region_noise;
+       ra->chunk_noise     = kcmd->cmd_chunk_noise;
+       ra->thread_delay    = kcmd->cmd_thread_delay;
+       ra->flags           = kcmd->cmd_flags;
+       ra->stats.wr_data   = 0;
+       ra->stats.wr_chunks = 0;
+       ra->stats.rd_data   = 0;
+       ra->stats.rd_chunks = 0;
+       ra->region_next     = 0;
+       ra->file            = file;
+        mutex_init(&ra->lock_work, NULL, MUTEX_DEFAULT, NULL);
+        mutex_init(&ra->lock_ctl, NULL, MUTEX_DEFAULT, NULL);
+
+       (void)zpios_upcall(ra->pre, PHASE_PRE_RUN, ra, 0);
+
+       rc = zpios_dmu_setup(ra);
+       if (rc) {
+               mutex_destroy(&ra->lock_ctl);
+               mutex_destroy(&ra->lock_work);
+               vmem_free(ra, size);
+               *run_args = NULL;
+       }
+
+       return rc;
+}
+
+static int
+zpios_get_work_item(run_args_t *run_args, dmu_obj_t *obj, __u64 *offset,
+                   __u32 *chunk_size, zpios_region_t **region, __u32 flags)
+{
+       int i, j, count = 0;
+       unsigned int random_int;
+
+       get_random_bytes(&random_int, sizeof(unsigned int));
+
+       mutex_enter(&run_args->lock_work);
+       i = run_args->region_next;
+
+       /* XXX: I don't much care for this chunk selection mechansim
+        * there's the potential to burn a lot of time here doing nothing
+        * useful while holding the global lock.  This could give some
+        * misleading performance results.  I'll fix it latter.
+        */
+       while (count < run_args->region_count) {
+               __u64 *rw_offset;
+               zpios_time_t *rw_time;
+
+               j = i % run_args->region_count;
+               *region = &(run_args->regions[j]);
+
+               if (flags & DMU_WRITE) {
+                       rw_offset = &((*region)->wr_offset);
+                       rw_time = &((*region)->stats.wr_time);
+               } else {
+                       rw_offset = &((*region)->rd_offset);
+                       rw_time = &((*region)->stats.rd_time);
+               }
+
+               /* test if region is fully written */
+               if (*rw_offset + *chunk_size > (*region)->max_offset) {
+                       i++;
+                       count++;
+
+                       if (unlikely(rw_time->stop.ts_sec == 0) &&
+                           unlikely(rw_time->stop.ts_nsec == 0))
+                               rw_time->stop = zpios_timespec_now();
+
+                       continue;
+               }
+
+               *offset = *rw_offset;
+               *obj = (*region)->obj;
+               *rw_offset += *chunk_size;
+
+               /* update ctl structure */
+               if (run_args->region_noise) {
+                       get_random_bytes(&random_int, sizeof(unsigned int));
+                       run_args->region_next += random_int % run_args->region_noise;
+               } else {
+                       run_args->region_next++;
+               }
+
+               mutex_exit(&run_args->lock_work);
+               return 1;
+       }
+
+       /* nothing left to do */
+       mutex_exit(&run_args->lock_work);
+
+       return 0;
+}
+
+static void
+zpios_remove_objset(run_args_t *run_args)
+{
+       zpios_time_t *t = &(run_args->stats.rm_time);
+       zpios_region_t *region;
+       char name[32];
+       int rc = 0, i;
+
+       (void)zpios_upcall(run_args->pre, PHASE_PRE_REMOVE, run_args, 0);
+       t->start = zpios_timespec_now();
+
+       (void)snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id);
+
+       if (run_args->flags & DMU_REMOVE) {
+               if (run_args->flags & DMU_FPP) {
+                       for (i = 0; i < run_args->region_count; i++) {
+                               region = &run_args->regions[i];
+                               rc = zpios_dmu_object_free(run_args,
+                                                          region->obj.os,
+                                                          region->obj.obj);
+                               if (rc)
+                                       zpios_print(run_args->file, "Error "
+                                                   "removing object %d, %d\n",
+                                                   (int)region->obj.obj, rc);
+                       }
+               } else {
+                       region = &run_args->regions[0];
+                       rc = zpios_dmu_object_free(run_args,
+                                                  region->obj.os,
+                                                  region->obj.obj);
+                       if (rc)
+                               zpios_print(run_args->file, "Error "
+                                           "removing object %d, %d\n",
+                                           (int)region->obj.obj, rc);
+               }
+       }
+
+       dmu_objset_disown(run_args->os, zpios_tag);
+
+       if (run_args->flags & DMU_REMOVE) {
+               rc = dmu_objset_destroy(name, B_FALSE);
+               if (rc)
+                       zpios_print(run_args->file, "Error dmu_objset_destroy"
+                                   "(%s, ...) failed: %d\n", name, rc);
+       }
+
+       t->stop  = zpios_timespec_now();
+       t->delta = zpios_timespec_sub(t->stop, t->start);
+       (void)zpios_upcall(run_args->post, PHASE_POST_REMOVE, run_args, rc);
+}
+
+static void
+zpios_cleanup_run(run_args_t *run_args)
+{
+       int i, size = 0;
+
+       if (run_args == NULL)
+               return;
+
+       if (run_args->threads != NULL) {
+               for (i = 0; i < run_args->thread_count; i++) {
+                       if (run_args->threads[i]) {
+                               mutex_destroy(&run_args->threads[i]->lock);
+                               kmem_free(run_args->threads[i],
+                                         sizeof(thread_data_t));
+                       }
+               }
+
+               kmem_free(run_args->threads,
+                         sizeof(thread_data_t *) * run_args->thread_count);
+       }
+
+       for (i = 0; i < run_args->region_count; i++)
+               mutex_destroy(&run_args->regions[i].lock);
+
+       mutex_destroy(&run_args->lock_work);
+       mutex_destroy(&run_args->lock_ctl);
+       size = run_args->region_count * sizeof(zpios_region_t);
+
+       vmem_free(run_args, sizeof(*run_args) + size);
+}
+
+static int
+zpios_dmu_write(run_args_t *run_args, objset_t *os, uint64_t object,
+               uint64_t offset, uint64_t size, const void *buf)
+{
+       struct dmu_tx *tx;
+       int rc, how = TXG_WAIT;
+//     int flags = 0;
+
+       if (run_args->flags & DMU_WRITE_NOWAIT)
+               how = TXG_NOWAIT;
+
+       while (1) {
+               tx = dmu_tx_create(os);
+               dmu_tx_hold_write(tx, object, offset, size);
+               rc = dmu_tx_assign(tx, how);
+
+               if (rc) {
+                       if (rc == ERESTART && how == TXG_NOWAIT) {
+                               dmu_tx_wait(tx);
+                               dmu_tx_abort(tx);
+                               continue;
+                       }
+                       zpios_print(run_args->file,
+                                   "Error in dmu_tx_assign(), %d", rc);
+                       dmu_tx_abort(tx);
+                       return rc;
+               }
+               break;
+       }
+
+//     if (run_args->flags & DMU_WRITE_ZC)
+//             flags |= DMU_WRITE_ZEROCOPY;
+
+       dmu_write(os, object, offset, size, buf, tx);
+       dmu_tx_commit(tx);
+
+       return 0;
+}
+
+static int
+zpios_dmu_read(run_args_t *run_args, objset_t *os, uint64_t object,
+              uint64_t offset, uint64_t size, void *buf)
+{
+       int flags = 0;
+
+//     if (run_args->flags & DMU_READ_ZC)
+//             flags |= DMU_READ_ZEROCOPY;
+
+       if (run_args->flags & DMU_READ_NOPF)
+               flags |= DMU_READ_NO_PREFETCH;
+
+       return dmu_read(os, object, offset, size, buf, flags);
+}
+
+static int
+zpios_thread_main(void *data)
+{
+       thread_data_t *thr = (thread_data_t *)data;
+       run_args_t *run_args = thr->run_args;
+       zpios_time_t t;
+       dmu_obj_t obj;
+       __u64 offset;
+       __u32 chunk_size;
+       zpios_region_t *region;
+       char *buf;
+       unsigned int random_int;
+       int chunk_noise = run_args->chunk_noise;
+       int chunk_noise_tmp = 0;
+       int thread_delay = run_args->thread_delay;
+       int thread_delay_tmp = 0;
+       int i, rc = 0;
+
+       if (chunk_noise) {
+               get_random_bytes(&random_int, sizeof(unsigned int));
+               chunk_noise_tmp = (random_int % (chunk_noise * 2))-chunk_noise;
+       }
+
+       /* It's OK to vmem_alloc() this memory because it will be copied
+        * in to the slab and pointers to the slab copy will be setup in
+        * the bio when the IO is submitted.  This of course is not ideal
+        * since we want a zero-copy IO path if possible.  It would be nice
+        * to have direct access to those slab entries.
+        */
+       chunk_size = run_args->chunk_size + chunk_noise_tmp;
+       buf = (char *)vmem_alloc(chunk_size, KM_SLEEP);
+       ASSERT(buf);
+
+       /* Trivial data verification pattern for now. */
+       if (run_args->flags & DMU_VERIFY)
+               memset(buf, 'z', chunk_size);
+
+       /* Write phase */
+       mutex_enter(&thr->lock);
+       thr->stats.wr_time.start = zpios_timespec_now();
+       mutex_exit(&thr->lock);
+
+       while (zpios_get_work_item(run_args, &obj, &offset,
+                                  &chunk_size, &region, DMU_WRITE)) {
+               if (thread_delay) {
+                       get_random_bytes(&random_int, sizeof(unsigned int));
+                       thread_delay_tmp = random_int % thread_delay;
+                       set_current_state(TASK_UNINTERRUPTIBLE);
+                       schedule_timeout(thread_delay_tmp); /* In jiffies */
+               }
+
+               t.start = zpios_timespec_now();
+               rc = zpios_dmu_write(run_args, obj.os, obj.obj,
+                                    offset, chunk_size, buf);
+               t.stop  = zpios_timespec_now();
+               t.delta = zpios_timespec_sub(t.stop, t.start);
+
+               if (rc) {
+                       zpios_print(run_args->file, "IO error while doing "
+                                   "dmu_write(): %d\n", rc);
+                       break;
+               }
+
+               mutex_enter(&thr->lock);
+               thr->stats.wr_data += chunk_size;
+               thr->stats.wr_chunks++;
+               thr->stats.wr_time.delta = zpios_timespec_add(
+                       thr->stats.wr_time.delta, t.delta);
+               mutex_exit(&thr->lock);
+
+               mutex_enter(&region->lock);
+               region->stats.wr_data += chunk_size;
+               region->stats.wr_chunks++;
+               region->stats.wr_time.delta = zpios_timespec_add(
+                       region->stats.wr_time.delta, t.delta);
+
+               /* First time region was accessed */
+               if (region->init_offset == offset)
+                       region->stats.wr_time.start = t.start;
+
+               mutex_exit(&region->lock);
+       }
+
+       mutex_enter(&run_args->lock_ctl);
+       run_args->threads_done++;
+       mutex_exit(&run_args->lock_ctl);
+
+       mutex_enter(&thr->lock);
+       thr->rc = rc;
+       thr->stats.wr_time.stop = zpios_timespec_now();
+       mutex_exit(&thr->lock);
+       wake_up(&run_args->waitq);
+
+       set_current_state(TASK_UNINTERRUPTIBLE);
+       schedule();
+
+       /* Check if we should exit */
+       mutex_enter(&thr->lock);
+       rc = thr->rc;
+       mutex_exit(&thr->lock);
+       if (rc)
+               goto out;
+
+       /* Read phase */
+       mutex_enter(&thr->lock);
+       thr->stats.rd_time.start = zpios_timespec_now();
+       mutex_exit(&thr->lock);
+
+       while (zpios_get_work_item(run_args, &obj, &offset,
+                                  &chunk_size, &region, DMU_READ)) {
+               if (thread_delay) {
+                       get_random_bytes(&random_int, sizeof(unsigned int));
+                       thread_delay_tmp = random_int % thread_delay;
+                       set_current_state(TASK_UNINTERRUPTIBLE);
+                       schedule_timeout(thread_delay_tmp); /* In jiffies */
+               }
+
+               if (run_args->flags & DMU_VERIFY)
+                       memset(buf, 0, chunk_size);
+
+               t.start = zpios_timespec_now();
+               rc = zpios_dmu_read(run_args, obj.os, obj.obj,
+                                   offset, chunk_size, buf);
+               t.stop  = zpios_timespec_now();
+               t.delta = zpios_timespec_sub(t.stop, t.start);
+
+               if (rc) {
+                       zpios_print(run_args->file, "IO error while doing "
+                                   "dmu_read(): %d\n", rc);
+                       break;
+               }
+
+               /* Trivial data verification, expensive! */
+               if (run_args->flags & DMU_VERIFY) {
+                       for (i = 0; i < chunk_size; i++) {
+                               if (buf[i] != 'z') {
+                                       zpios_print(run_args->file,
+                                                   "IO verify error: %d/%d/%d\n",
+                                                   (int)obj.obj, (int)offset,
+                                                   (int)chunk_size);
+                                       break;
+                               }
+                       }
+               }
+
+               mutex_enter(&thr->lock);
+               thr->stats.rd_data += chunk_size;
+               thr->stats.rd_chunks++;
+               thr->stats.rd_time.delta = zpios_timespec_add(
+                       thr->stats.rd_time.delta, t.delta);
+               mutex_exit(&thr->lock);
+
+               mutex_enter(&region->lock);
+               region->stats.rd_data += chunk_size;
+               region->stats.rd_chunks++;
+               region->stats.rd_time.delta = zpios_timespec_add(
+                       region->stats.rd_time.delta, t.delta);
+
+               /* First time region was accessed */
+               if (region->init_offset == offset)
+                       region->stats.rd_time.start = t.start;
+
+               mutex_exit(&region->lock);
+       }
+
+       mutex_enter(&run_args->lock_ctl);
+       run_args->threads_done++;
+       mutex_exit(&run_args->lock_ctl);
+
+       mutex_enter(&thr->lock);
+       thr->rc = rc;
+       thr->stats.rd_time.stop = zpios_timespec_now();
+       mutex_exit(&thr->lock);
+       wake_up(&run_args->waitq);
+
+out:
+       vmem_free(buf, chunk_size);
+       do_exit(0);
+
+       return rc; /* Unreachable, due to do_exit() */
+}
+
+static int
+zpios_thread_done(run_args_t *run_args)
+{
+       ASSERT(run_args->threads_done <= run_args->thread_count);
+       return (run_args->threads_done == run_args->thread_count);
+}
+
+static int
+zpios_threads_run(run_args_t *run_args)
+{
+       struct task_struct *tsk, **tsks;
+       thread_data_t *thr = NULL;
+       zpios_time_t *tt = &(run_args->stats.total_time);
+       zpios_time_t *tw = &(run_args->stats.wr_time);
+       zpios_time_t *tr = &(run_args->stats.rd_time);
+       int i, rc = 0, tc = run_args->thread_count;
+
+       tsks = kmem_zalloc(sizeof(struct task_struct *) * tc, KM_SLEEP);
+       if (tsks == NULL) {
+               rc = -ENOMEM;
+               goto cleanup2;
+       }
+
+       run_args->threads = kmem_zalloc(sizeof(thread_data_t *) * tc, KM_SLEEP);
+       if (run_args->threads == NULL) {
+               rc = -ENOMEM;
+               goto cleanup;
+       }
+
+       init_waitqueue_head(&run_args->waitq);
+       run_args->threads_done = 0;
+
+       /* Create all the needed threads which will sleep until awoken */
+       for (i = 0; i < tc; i++) {
+               thr = kmem_zalloc(sizeof(thread_data_t), KM_SLEEP);
+               if (thr == NULL) {
+                       rc = -ENOMEM;
+                       goto taskerr;
+               }
+
+               thr->thread_no = i;
+               thr->run_args = run_args;
+               thr->rc = 0;
+               mutex_init(&thr->lock, NULL, MUTEX_DEFAULT, NULL);
+               run_args->threads[i] = thr;
+
+               tsk = kthread_create(zpios_thread_main, (void *)thr,
+                                    "%s/%d", "zpios_io", i);
+               if (IS_ERR(tsk)) {
+                       rc = -EINVAL;
+                       goto taskerr;
+               }
+
+               tsks[i] = tsk;
+       }
+
+       tt->start = zpios_timespec_now();
+
+       /* Wake up all threads for write phase */
+       (void)zpios_upcall(run_args->pre, PHASE_PRE_WRITE, run_args, 0);
+       for (i = 0; i < tc; i++)
+               wake_up_process(tsks[i]);
+
+       /* Wait for write phase to complete */
+       tw->start = zpios_timespec_now();
+       wait_event(run_args->waitq, zpios_thread_done(run_args));
+       tw->stop = zpios_timespec_now();
+       (void)zpios_upcall(run_args->post, PHASE_POST_WRITE, run_args, rc);
+
+       for (i = 0; i < tc; i++) {
+               thr = run_args->threads[i];
+
+               mutex_enter(&thr->lock);
+
+               if (!rc && thr->rc)
+                       rc = thr->rc;
+
+               run_args->stats.wr_data += thr->stats.wr_data;
+               run_args->stats.wr_chunks += thr->stats.wr_chunks;
+               mutex_exit(&thr->lock);
+       }
+
+       if (rc) {
+               /* Wake up all threads and tell them to exit */
+               for (i = 0; i < tc; i++) {
+                       mutex_enter(&thr->lock);
+                       thr->rc = rc;
+                       mutex_exit(&thr->lock);
+
+                       wake_up_process(tsks[i]);
+               }
+               goto out;
+       }
+
+       mutex_enter(&run_args->lock_ctl);
+       ASSERT(run_args->threads_done == run_args->thread_count);
+       run_args->threads_done = 0;
+       mutex_exit(&run_args->lock_ctl);
+
+       /* Wake up all threads for read phase */
+       (void)zpios_upcall(run_args->pre, PHASE_PRE_READ, run_args, 0);
+        for (i = 0; i < tc; i++)
+               wake_up_process(tsks[i]);
+
+       /* Wait for read phase to complete */
+       tr->start = zpios_timespec_now();
+       wait_event(run_args->waitq, zpios_thread_done(run_args));
+       tr->stop = zpios_timespec_now();
+       (void)zpios_upcall(run_args->post, PHASE_POST_READ, run_args, rc);
+
+       for (i = 0; i < tc; i++) {
+               thr = run_args->threads[i];
+
+               mutex_enter(&thr->lock);
+
+               if (!rc && thr->rc)
+                       rc = thr->rc;
+
+               run_args->stats.rd_data += thr->stats.rd_data;
+               run_args->stats.rd_chunks += thr->stats.rd_chunks;
+               mutex_exit(&thr->lock);
+       }
+out:
+       tt->stop  = zpios_timespec_now();
+       tt->delta = zpios_timespec_sub(tt->stop, tt->start);
+       tw->delta = zpios_timespec_sub(tw->stop, tw->start);
+       tr->delta = zpios_timespec_sub(tr->stop, tr->start);
+
+cleanup:
+       kmem_free(tsks, sizeof(struct task_struct *) * tc);
+cleanup2:
+       /* Returns first encountered thread error (if any) */
+       return rc;
+
+taskerr:
+       /* Destroy all threads that were created successfully */
+       for (i = 0; i < tc; i++)
+               if (tsks[i] != NULL)
+                       (void) kthread_stop(tsks[i]);
+
+       goto cleanup;
+}
+
+static int
+zpios_do_one_run(struct file *file, zpios_cmd_t *kcmd,
+                 int data_size, void *data)
+{
+       run_args_t *run_args = { 0 };
+       zpios_stats_t *stats = (zpios_stats_t *)data;
+       int i, n, m, size, rc;
+
+       if ((!kcmd->cmd_chunk_size) || (!kcmd->cmd_region_size) ||
+           (!kcmd->cmd_thread_count) || (!kcmd->cmd_region_count)) {
+               zpios_print(file, "Invalid chunk_size, region_size, "
+                           "thread_count, or region_count, %d\n", -EINVAL);
+               return -EINVAL;
+       }
+
+       if (!(kcmd->cmd_flags & DMU_WRITE) ||
+           !(kcmd->cmd_flags & DMU_READ)) {
+               zpios_print(file, "Invalid flags, minimally DMU_WRITE "
+                           "and DMU_READ must be set, %d\n", -EINVAL);
+               return -EINVAL;
+       }
+
+       if ((kcmd->cmd_flags & (DMU_WRITE_ZC | DMU_READ_ZC)) &&
+           (kcmd->cmd_flags & DMU_VERIFY)) {
+               zpios_print(file, "Invalid flags, DMU_*_ZC incompatible "
+                           "with DMU_VERIFY, used for performance analysis "
+                           "only, %d\n", -EINVAL);
+               return -EINVAL;
+       }
+
+       /* Opaque data on return contains structs of the following form:
+        *
+        * zpios_stat_t stats[];
+        * stats[0]     = run_args->stats;
+        * stats[1-N]   = threads[N]->stats;
+        * stats[N+1-M] = regions[M]->stats;
+        *
+        * Where N is the number of threads, and M is the number of regions.
+        */
+       size = (sizeof(zpios_stats_t) +
+              (kcmd->cmd_thread_count * sizeof(zpios_stats_t)) +
+              (kcmd->cmd_region_count * sizeof(zpios_stats_t)));
+       if (data_size < size) {
+               zpios_print(file, "Invalid size, command data buffer "
+                           "size too small, (%d < %d)\n", data_size, size);
+               return -ENOSPC;
+       }
+
+       rc = zpios_setup_run(&run_args, kcmd, file);
+       if (rc)
+               return rc;
+
+        rc = zpios_threads_run(run_args);
+       zpios_remove_objset(run_args);
+       if (rc)
+               goto cleanup;
+
+       if (stats) {
+               n = 1;
+               m = 1 + kcmd->cmd_thread_count;
+               stats[0] = run_args->stats;
+
+               for (i = 0; i < kcmd->cmd_thread_count; i++)
+                       stats[n+i] = run_args->threads[i]->stats;
+
+               for (i = 0; i < kcmd->cmd_region_count; i++)
+                       stats[m+i] = run_args->regions[i].stats;
+       }
+
+cleanup:
+        zpios_cleanup_run(run_args);
+
+       (void)zpios_upcall(kcmd->cmd_post, PHASE_POST_RUN, run_args, 0);
+
+       return rc;
+}
+
+static int
+zpios_open(struct inode *inode, struct file *file)
+{
+       unsigned int minor = iminor(inode);
+       zpios_info_t *info;
+
+       if (minor >= ZPIOS_MINORS)
+               return -ENXIO;
+
+       info = (zpios_info_t *)kmem_alloc(sizeof(*info), KM_SLEEP);
+       if (info == NULL)
+               return -ENOMEM;
+
+       spin_lock_init(&info->info_lock);
+       info->info_size = ZPIOS_INFO_BUFFER_SIZE;
+       info->info_buffer = (char *)vmem_alloc(ZPIOS_INFO_BUFFER_SIZE,KM_SLEEP);
+       if (info->info_buffer == NULL) {
+               kmem_free(info, sizeof(*info));
+               return -ENOMEM;
+       }
+
+       info->info_head = info->info_buffer;
+       file->private_data = (void *)info;
+
+        return 0;
+}
+
+static int
+zpios_release(struct inode *inode, struct file *file)
+{
+       unsigned int minor = iminor(inode);
+       zpios_info_t *info = (zpios_info_t *)file->private_data;
+
+       if (minor >= ZPIOS_MINORS)
+               return -ENXIO;
+
+       ASSERT(info);
+       ASSERT(info->info_buffer);
+
+       vmem_free(info->info_buffer, ZPIOS_INFO_BUFFER_SIZE);
+       kmem_free(info, sizeof(*info));
+
+       return 0;
+}
+
+static int
+zpios_buffer_clear(struct file *file, zpios_cfg_t *kcfg, unsigned long arg)
+{
+       zpios_info_t *info = (zpios_info_t *)file->private_data;
+
+       ASSERT(info);
+       ASSERT(info->info_buffer);
+
+       spin_lock(&info->info_lock);
+       memset(info->info_buffer, 0, info->info_size);
+       info->info_head = info->info_buffer;
+       spin_unlock(&info->info_lock);
+
+       return 0;
+}
+
+static int
+zpios_buffer_size(struct file *file, zpios_cfg_t *kcfg, unsigned long arg)
+{
+       zpios_info_t *info = (zpios_info_t *)file->private_data;
+       char *buf;
+       int min, size, rc = 0;
+
+       ASSERT(info);
+       ASSERT(info->info_buffer);
+
+       spin_lock(&info->info_lock);
+       if (kcfg->cfg_arg1 > 0) {
+
+               size = kcfg->cfg_arg1;
+               buf = (char *)vmem_alloc(size, KM_SLEEP);
+               if (buf == NULL) {
+                       rc = -ENOMEM;
+                       goto out;
+               }
+
+               /* Zero fill and truncate contents when coping buffer */
+               min = ((size < info->info_size) ? size : info->info_size);
+               memset(buf, 0, size);
+               memcpy(buf, info->info_buffer, min);
+               vmem_free(info->info_buffer, info->info_size);
+               info->info_size = size;
+               info->info_buffer = buf;
+               info->info_head = info->info_buffer;
+       }
+
+       kcfg->cfg_rc1 = info->info_size;
+
+       if (copy_to_user((struct zpios_cfg_t __user *)arg, kcfg, sizeof(*kcfg)))
+               rc = -EFAULT;
+out:
+       spin_unlock(&info->info_lock);
+
+       return rc;
+}
+
+static int
+zpios_ioctl_cfg(struct file *file, unsigned long arg)
+{
+       zpios_cfg_t kcfg;
+       int rc = 0;
+
+       if (copy_from_user(&kcfg, (zpios_cfg_t *)arg, sizeof(kcfg)))
+               return -EFAULT;
+
+       if (kcfg.cfg_magic != ZPIOS_CFG_MAGIC) {
+               zpios_print(file, "Bad config magic 0x%x != 0x%x\n",
+                           kcfg.cfg_magic, ZPIOS_CFG_MAGIC);
+               return -EINVAL;
+       }
+
+       switch (kcfg.cfg_cmd) {
+               case ZPIOS_CFG_BUFFER_CLEAR:
+                       /* cfg_arg1 - Unused
+                        * cfg_rc1  - Unused
+                        */
+                       rc = zpios_buffer_clear(file, &kcfg, arg);
+                       break;
+               case ZPIOS_CFG_BUFFER_SIZE:
+                       /* cfg_arg1 - 0 - query size; >0 resize
+                        * cfg_rc1  - Set to current buffer size
+                        */
+                       rc = zpios_buffer_size(file, &kcfg, arg);
+                       break;
+               default:
+                       zpios_print(file, "Bad config command %d\n",
+                                   kcfg.cfg_cmd);
+                       rc = -EINVAL;
+                       break;
+       }
+
+       return rc;
+}
+
+static int
+zpios_ioctl_cmd(struct file *file, unsigned long arg)
+{
+       zpios_cmd_t *kcmd;
+       void *data = NULL;
+       int rc = -EINVAL;
+
+       kcmd = kmem_alloc(sizeof(zpios_cmd_t), KM_SLEEP);
+       if (kcmd == NULL) {
+               zpios_print(file, "Unable to kmem_alloc() %ld byte for "
+                           "zpios_cmd_t\n", (long int)sizeof(zpios_cmd_t));
+               return -ENOMEM;
+       }
+
+       rc = copy_from_user(kcmd, (zpios_cfg_t *)arg, sizeof(zpios_cmd_t));
+       if (rc) {
+               zpios_print(file, "Unable to copy command structure "
+                           "from user to kernel memory, %d\n", rc);
+               goto out_cmd;
+       }
+
+       if (kcmd->cmd_magic != ZPIOS_CMD_MAGIC) {
+               zpios_print(file, "Bad command magic 0x%x != 0x%x\n",
+                           kcmd->cmd_magic, ZPIOS_CFG_MAGIC);
+               rc = -EINVAL;
+               goto out_cmd;
+       }
+
+       /* Allocate memory for any opaque data the caller needed to pass on */
+       if (kcmd->cmd_data_size > 0) {
+               data = (void *)vmem_alloc(kcmd->cmd_data_size, KM_SLEEP);
+               if (data == NULL) {
+                       zpios_print(file, "Unable to vmem_alloc() %ld "
+                                   "bytes for data buffer\n",
+                                   (long)kcmd->cmd_data_size);
+                       rc = -ENOMEM;
+                       goto out_cmd;
+               }
+
+               rc = copy_from_user(data, (void *)(arg + offsetof(zpios_cmd_t,
+                                   cmd_data_str)), kcmd->cmd_data_size);
+               if (rc) {
+                       zpios_print(file, "Unable to copy data buffer "
+                                   "from user to kernel memory, %d\n", rc);
+                       goto out_data;
+               }
+       }
+
+       rc = zpios_do_one_run(file, kcmd, kcmd->cmd_data_size, data);
+
+       if (data != NULL) {
+               /* If the test failed do not print out the stats */
+               if (rc)
+                       goto out_data;
+
+               rc = copy_to_user((void *)(arg + offsetof(zpios_cmd_t,
+                                 cmd_data_str)), data, kcmd->cmd_data_size);
+               if (rc) {
+                       zpios_print(file, "Unable to copy data buffer "
+                                   "from kernel to user memory, %d\n", rc);
+                       rc = -EFAULT;
+               }
+
+out_data:
+               vmem_free(data, kcmd->cmd_data_size);
+       }
+out_cmd:
+       kmem_free(kcmd, sizeof(zpios_cmd_t));
+
+       return rc;
+}
+
+static int
+zpios_ioctl(struct inode *inode, struct file *file,
+            unsigned int cmd, unsigned long arg)
+{
+        unsigned int minor = iminor(inode);
+       int rc = 0;
+
+       /* Ignore tty ioctls */
+       if ((cmd & 0xffffff00) == ((int)'T') << 8)
+               return -ENOTTY;
+
+       if (minor >= ZPIOS_MINORS)
+               return -ENXIO;
+
+       switch (cmd) {
+               case ZPIOS_CFG:
+                       rc = zpios_ioctl_cfg(file, arg);
+                       break;
+               case ZPIOS_CMD:
+                       rc = zpios_ioctl_cmd(file, arg);
+                       break;
+               default:
+                       zpios_print(file, "Bad ioctl command %d\n", cmd);
+                       rc = -EINVAL;
+                       break;
+       }
+
+       return rc;
+}
+
+#ifdef CONFIG_COMPAT
+/* Compatibility handler for ioctls from 32-bit ELF binaries */
+static long
+zpios_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+       return zpios_ioctl(file->f_dentry->d_inode, file, cmd, arg);
+}
+#endif /* CONFIG_COMPAT */
+
+/* I'm not sure why you would want to write in to this buffer from
+ * user space since its principle use is to pass test status info
+ * back to the user space, but I don't see any reason to prevent it.
+ */
+static ssize_t
+zpios_write(struct file *file, const char __user *buf,
+            size_t count, loff_t *ppos)
+{
+        unsigned int minor = iminor(file->f_dentry->d_inode);
+       zpios_info_t *info = (zpios_info_t *)file->private_data;
+       int rc = 0;
+
+       if (minor >= ZPIOS_MINORS)
+               return -ENXIO;
+
+       ASSERT(info);
+       ASSERT(info->info_buffer);
+
+       spin_lock(&info->info_lock);
+
+       /* Write beyond EOF */
+       if (*ppos >= info->info_size) {
+               rc = -EFBIG;
+               goto out;
+       }
+
+       /* Resize count if beyond EOF */
+       if (*ppos + count > info->info_size)
+               count = info->info_size - *ppos;
+
+       if (copy_from_user(info->info_buffer, buf, count)) {
+               rc = -EFAULT;
+               goto out;
+       }
+
+       *ppos += count;
+       rc = count;
+out:
+       spin_unlock(&info->info_lock);
+       return rc;
+}
+
+static ssize_t
+zpios_read(struct file *file, char __user *buf,
+                       size_t count, loff_t *ppos)
+{
+        unsigned int minor = iminor(file->f_dentry->d_inode);
+       zpios_info_t *info = (zpios_info_t *)file->private_data;
+       int rc = 0;
+
+       if (minor >= ZPIOS_MINORS)
+               return -ENXIO;
+
+       ASSERT(info);
+       ASSERT(info->info_buffer);
+
+       spin_lock(&info->info_lock);
+
+       /* Read beyond EOF */
+       if (*ppos >= info->info_size)
+               goto out;
+
+       /* Resize count if beyond EOF */
+       if (*ppos + count > info->info_size)
+               count = info->info_size - *ppos;
+
+       if (copy_to_user(buf, info->info_buffer + *ppos, count)) {
+               rc = -EFAULT;
+               goto out;
+       }
+
+       *ppos += count;
+       rc = count;
+out:
+       spin_unlock(&info->info_lock);
+       return rc;
+}
+
+static loff_t zpios_seek(struct file *file, loff_t offset, int origin)
+{
+        unsigned int minor = iminor(file->f_dentry->d_inode);
+       zpios_info_t *info = (zpios_info_t *)file->private_data;
+       int rc = -EINVAL;
+
+       if (minor >= ZPIOS_MINORS)
+               return -ENXIO;
+
+       ASSERT(info);
+       ASSERT(info->info_buffer);
+
+       spin_lock(&info->info_lock);
+
+       switch (origin) {
+       case 0: /* SEEK_SET - No-op just do it */
+               break;
+       case 1: /* SEEK_CUR - Seek from current */
+               offset = file->f_pos + offset;
+               break;
+       case 2: /* SEEK_END - Seek from end */
+               offset = info->info_size + offset;
+               break;
+       }
+
+       if (offset >= 0) {
+               file->f_pos = offset;
+               file->f_version = 0;
+               rc = offset;
+       }
+
+       spin_unlock(&info->info_lock);
+
+       return rc;
+}
+
+static struct cdev zpios_cdev;
+static struct file_operations zpios_fops = {
+       .owner          = THIS_MODULE,
+       .open           = zpios_open,
+       .release        = zpios_release,
+       .ioctl          = zpios_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = zpios_compat_ioctl,
+#endif
+       .read           = zpios_read,
+       .write          = zpios_write,
+       .llseek         = zpios_seek,
+};
+
+static int
+zpios_init(void)
+{
+       dev_t dev;
+       int rc;
+
+       dev = MKDEV(ZPIOS_MAJOR, 0);
+       if ((rc = register_chrdev_region(dev, ZPIOS_MINORS, ZPIOS_NAME)))
+               goto error;
+
+       /* Support for registering a character driver */
+       cdev_init(&zpios_cdev, &zpios_fops);
+       zpios_cdev.owner = THIS_MODULE;
+       kobject_set_name(&zpios_cdev.kobj, ZPIOS_NAME);
+       if ((rc = cdev_add(&zpios_cdev, dev, ZPIOS_MINORS))) {
+               printk(KERN_ERR "ZPIOS: Error adding cdev, %d\n", rc);
+               kobject_put(&zpios_cdev.kobj);
+               unregister_chrdev_region(dev, ZPIOS_MINORS);
+               goto error;
+       }
+
+       /* Support for udev make driver info available in sysfs */
+       zpios_class = spl_class_create(THIS_MODULE, ZPIOS_NAME);
+       if (IS_ERR(zpios_class)) {
+               rc = PTR_ERR(zpios_class);
+               printk(KERN_ERR "ZPIOS: Error creating zpios class, %d\n", rc);
+               cdev_del(&zpios_cdev);
+               unregister_chrdev_region(dev, ZPIOS_MINORS);
+               goto error;
+       }
+
+       zpios_device = spl_device_create(zpios_class, NULL,
+                                        dev, NULL, ZPIOS_NAME);
+       return 0;
+error:
+       printk(KERN_ERR "ZPIOS: Error registering zpios device, %d\n", rc);
+       return rc;
+}
+
+static int
+zpios_fini(void)
+{
+       dev_t dev = MKDEV(ZPIOS_MAJOR, 0);
+
+       spl_device_destroy(zpios_class, zpios_device, dev);
+       spl_class_destroy(zpios_class);
+       cdev_del(&zpios_cdev);
+       unregister_chrdev_region(dev, ZPIOS_MINORS);
+
+       return 0;
+}
+
+spl_module_init(zpios_init);
+spl_module_exit(zpios_fini);
+
+MODULE_AUTHOR("LLNL / Sun");
+MODULE_DESCRIPTION("Kernel PIOS implementation");
+MODULE_LICENSE("GPL");
index a1dfc38..ed6e502 100644 (file)
@@ -4,11 +4,17 @@ nobase_pkglibexec_SCRIPTS += zconfig.sh
 nobase_pkglibexec_SCRIPTS += zfs.sh
 nobase_pkglibexec_SCRIPTS += zpool-create.sh
 nobase_pkglibexec_SCRIPTS += zpool-config/*
+nobase_pkglibexec_SCRIPTS += zpios.sh
+nobase_pkglibexec_SCRIPTS += zpios-sanity.sh
+nobase_pkglibexec_SCRIPTS += zpios-survey.sh
+nobase_pkglibexec_SCRIPTS += zpios-test/*
+nobase_pkglibexec_SCRIPTS += zpios-profile/*
 EXTRA_DIST = zfs-update.sh $(nobase_pkglibexec_SCRIPTS)
 
 ZFS=${top_srcdir}/scripts/zfs.sh
 ZCONFIG=${top_srcdir}/scripts/zconfig.sh
 ZTEST=${top_builddir}/cmd/ztest/ztest
+ZPIOS_SANITY=${top_srcdir}/scripts/zpios-sanity.sh
 
 check:
        @echo
@@ -27,3 +33,11 @@ check:
        @echo
        @$(ZCONFIG)
        @echo
+       @echo -n "===================================="
+       @echo -n " ZPIOS "
+       @echo    "===================================="
+       @echo
+       @$(ZFS)
+       @$(ZPIOS_SANITY)
+       @$(ZFS) -u
+       @echo
index 0a8399f..102952f 100644 (file)
@@ -39,6 +39,8 @@ sbindir=@sbindir@
 ETCDIR=${ETCDIR:-/etc}
 DEVDIR=${DEVDIR:-/dev/disk/zpool}
 ZPOOLDIR=${ZPOOLDIR:-${pkglibexecdir}/zpool-config}
+ZPIOSDIR=${ZPIOSDIR:-${pkglibexecdir}/zpios-test}
+ZPIOSPROFILEDIR=${ZPIOSPROFILEDIR:-${pkglibexecdir}/zpios-profile}
 
 ZDB=${ZDB:-${sbindir}/zdb}
 ZFS=${ZFS:-${sbindir}/zfs}
@@ -46,10 +48,13 @@ ZINJECT=${ZINJECT:-${sbindir}/zinject}
 ZPOOL=${ZPOOL:-${sbindir}/zpool}
 ZPOOL_ID=${ZPOOL_ID:-${bindir}/zpool_id}
 ZTEST=${ZTEST:-${sbindir}/ztest}
+ZPIOS=${ZPIOS:-${sbindir}/zpios}
 
 COMMON_SH=${COMMON_SH:-${pkglibexecdir}/common.sh}
 ZFS_SH=${ZFS_SH:-${pkglibexecdir}/zfs.sh}
 ZPOOL_CREATE_SH=${ZPOOL_CREATE_SH:-${pkglibexecdir}/zpool-create.sh}
+ZPIOS_SH=${ZPIOS_SH:-${pkglibexecdir}/zpios.sh}
+ZPIOS_SURVEY_SH=${ZPIOS_SURVEY_SH:-${pkglibexecdir}/zpios-survey.sh}
 
 LDMOD=${LDMOD:-/sbin/modprobe}
 LSMOD=${LSMOD:-/sbin/lsmod}
diff --git a/scripts/zpios-profile/zpios-profile-disk.sh b/scripts/zpios-profile/zpios-profile-disk.sh
new file mode 100755 (executable)
index 0000000..b56ee1e
--- /dev/null
@@ -0,0 +1,129 @@
+#!/bin/bash
+#
+# /proc/diskinfo <after skipping major/minor>
+# Field 1 -- device name
+# Field 2 -- # of reads issued
+# Field 3 -- # of reads merged
+# Field 4 -- # of sectors read
+# Field 5 -- # of milliseconds spent reading
+# Field 6 -- # of writes completed
+# Field 7 -- # of writes merged
+# Field 8 -- # of sectors written
+# Field 9 -- # of milliseconds spent writing
+# Field 10 -- # of I/Os currently in progress
+# Field 11 -- # of milliseconds spent doing I/Os
+# Field 12 -- weighted # of milliseconds spent doing I/Os
+
+PROG=zpios-profile-disk.sh
+
+RUN_PIDS=${0}
+RUN_LOG_DIR=${1}
+RUN_ID=${2}
+
+create_table() {
+       local FIELD=$1
+       local ROW_M=()
+       local ROW_N=()
+       local HEADER=1
+       local STEP=1
+
+       for DISK_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/disk-[0-9]*`; do
+               ROW_M=( ${ROW_N[@]} )
+               ROW_N=( `cat ${DISK_FILE} | grep sd | cut -c11- | cut -f${FIELD} -d' ' | tr "\n" "\t"` )
+
+               if [ $HEADER -eq 1 ]; then
+                       echo -n "step, "
+                       cat ${DISK_FILE} | grep sd | cut -c11- | cut -f1 -d' ' | tr "\n" ", "
+                       echo "total"
+                       HEADER=0
+               fi
+
+               if [ ${#ROW_M[@]} -eq 0 ]; then
+                       continue
+               fi
+
+               if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then
+                       echo "Badly formatted profile data in ${DISK_FILE}"
+                       break
+               fi
+
+               TOTAL=0
+               echo -n "${STEP}, "
+               for (( i=0; i<${#ROW_N[@]}; i++ )); do
+                       DELTA=`echo "${ROW_N[${i}]}-${ROW_M[${i}]}" | bc`
+                       let TOTAL=${TOTAL}+${DELTA}
+                       echo -n "${DELTA}, "
+               done
+               echo "${TOTAL}, "
+
+               let STEP=${STEP}+1
+       done
+}
+
+create_table_mbs() {
+       local FIELD=$1
+       local TIME=$2
+       local ROW_M=()
+       local ROW_N=()
+       local HEADER=1
+       local STEP=1
+
+       for DISK_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/disk-[0-9]*`; do
+               ROW_M=( ${ROW_N[@]} )
+               ROW_N=( `cat ${DISK_FILE} | grep sd | cut -c11- | cut -f${FIELD} -d' ' | tr "\n" "\t"` )
+
+               if [ $HEADER -eq 1 ]; then
+                       echo -n "step, "
+                       cat ${DISK_FILE} | grep sd | cut -c11- | cut -f1 -d' ' | tr "\n" ", "
+                       echo "total"
+                       HEADER=0
+               fi
+
+               if [ ${#ROW_M[@]} -eq 0 ]; then
+                       continue
+               fi
+
+               if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then
+                       echo "Badly formatted profile data in ${DISK_FILE}"
+                       break
+               fi
+
+               TOTAL=0
+               echo -n "${STEP}, "
+               for (( i=0; i<${#ROW_N[@]}; i++ )); do
+                       DELTA=`echo "${ROW_N[${i}]}-${ROW_M[${i}]}" | bc`
+                       MBS=`echo "scale=2; ((${DELTA}*512)/${TIME})/(1024*1024)" | bc`
+                       TOTAL=`echo "scale=2; ${TOTAL}+${MBS}" | bc`
+                       echo -n "${MBS}, "
+               done
+               echo "${TOTAL}, "
+
+               let STEP=${STEP}+1
+       done
+}
+
+echo
+echo "Reads issued per device"
+create_table 2
+echo
+echo "Reads merged per device"
+create_table 3
+echo
+echo "Sectors read per device"
+create_table 4
+echo "MB/s per device"
+create_table_mbs 4 3
+
+echo
+echo "Writes issued per device"
+create_table 6
+echo
+echo "Writes merged per device"
+create_table 7
+echo
+echo "Sectors written per device"
+create_table 8
+echo "MB/s per device"
+create_table_mbs 8 3
+
+exit 0
diff --git a/scripts/zpios-profile/zpios-profile-pids.sh b/scripts/zpios-profile/zpios-profile-pids.sh
new file mode 100755 (executable)
index 0000000..3514b38
--- /dev/null
@@ -0,0 +1,131 @@
+#!/bin/bash
+
+PROG=zpios-profile-pids.sh
+
+RUN_PIDS=${0}
+RUN_LOG_DIR=${1}
+RUN_ID=${2}
+
+ROW_M=()
+ROW_N=()
+ROW_N_SCHED=()
+ROW_N_WAIT=()
+
+HEADER=1
+STEP=1
+
+for PID_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/pids-[0-9]*`; do
+       ROW_M=( ${ROW_N[@]} )
+       ROW_N=( 0 0 0 0  0 0 0 0  0 0 0 0  0 0 0 0  0 0 0 )
+       ROW_N_SCHED=( `cat ${PID_FILE} | cut -f15 -d' ' | tr "\n" "\t"` )
+       ROW_N_WAIT=(  `cat ${PID_FILE} | cut -f17 -d' ' | tr "\n" "\t"` )
+       ROW_N_NAMES=( `cat ${PID_FILE} | cut -f2  -d' ' | cut -f2 -d'(' | 
+                       cut -f1 -d')'   | cut -f1  -d'/' | tr "\n" "\t"` )
+
+       for (( i=0; i<${#ROW_N_SCHED[@]}; i++ )); do
+               SUM=`echo "${ROW_N_WAIT[${i}]}+${ROW_N_SCHED[${i}]}" | bc`
+
+               case ${ROW_N_NAMES[${i}]} in
+                       zio_taskq)      IDX=0;;
+                       zio_req_nul)    IDX=1;;
+                       zio_irq_nul)    IDX=2;;
+                       zio_req_rd)     IDX=3;;
+                       zio_irq_rd)     IDX=4;;
+                       zio_req_wr)     IDX=5;;
+                       zio_irq_wr)     IDX=6;;
+                       zio_req_fr)     IDX=7;;
+                       zio_irq_fr)     IDX=8;;
+                       zio_req_cm)     IDX=9;;
+                       zio_irq_cm)     IDX=10;;
+                       zio_req_ctl)    IDX=11;;
+                       zio_irq_ctl)    IDX=12;;
+                       txg_quiesce)    IDX=13;;
+                       txg_sync)       IDX=14;;
+                       txg_timelimit)  IDX=15;;
+                       arc_reclaim)    IDX=16;;
+                       l2arc_feed)     IDX=17;;
+                       zpios_io)       IDX=18;;
+                       *)              continue;;
+               esac
+
+               let ROW_N[${IDX}]=${ROW_N[${IDX}]}+${SUM}
+       done
+
+       if [ $HEADER -eq 1 ]; then
+               echo "step, zio_taskq, zio_req_nul, zio_irq_nul, "        \
+                     "zio_req_rd, zio_irq_rd, zio_req_wr, zio_irq_wr, "   \
+                     "zio_req_fr, zio_irq_fr, zio_req_cm, zio_irq_cm, "   \
+                     "zio_req_ctl, zio_irq_ctl, txg_quiesce, txg_sync, "  \
+                     "txg_timelimit, arc_reclaim, l2arc_feed, zpios_io, " \
+                    "idle"
+               HEADER=0
+       fi
+
+       if [ ${#ROW_M[@]} -eq 0 ]; then
+               continue
+       fi
+
+       if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then
+               echo "Badly formatted profile data in ${PID_FILE}"
+               break
+       fi
+
+       # Original values are in jiffies and we expect HZ to be 1000
+       # on most 2.6 systems thus we divide by 10 to get a percentage.
+       IDLE=1000
+        echo -n "${STEP}, "
+       for (( i=0; i<${#ROW_N[@]}; i++ )); do
+               DELTA=`echo "${ROW_N[${i}]}-${ROW_M[${i}]}" | bc`
+               DELTA_PERCENT=`echo "scale=1; ${DELTA}/10" | bc`
+               let IDLE=${IDLE}-${DELTA}
+               echo -n "${DELTA_PERCENT}, "
+       done
+       ILDE_PERCENT=`echo "scale=1; ${IDLE}/10" | bc`
+       echo "${ILDE_PERCENT}"
+
+       let STEP=${STEP}+1
+done
+
+exit
+
+echo
+echo "Percent of total system time per pid"
+for PID_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/pids-[0-9]*`; do
+       ROW_M=( ${ROW_N[@]} )
+       ROW_N_SCHED=( `cat ${PID_FILE} | cut -f15 -d' ' | tr "\n" "\t"` )
+       ROW_N_WAIT=( `cat ${PID_FILE} | cut -f17 -d' ' | tr "\n" "\t"` )
+
+       for (( i=0; i<${#ROW_N_SCHED[@]}; i++ )); do
+               ROW_N[${i}]=`echo "${ROW_N_WAIT[${i}]}+${ROW_N_SCHED[${i}]}" | bc`
+       done
+
+       if [ $HEADER -eq 1 ]; then
+               echo -n "step, "
+               cat ${PID_FILE} | cut -f2 -d' ' | tr "\n" ", "
+               echo
+               HEADER=0
+       fi
+
+       if [ ${#ROW_M[@]} -eq 0 ]; then
+               continue
+       fi
+
+       if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then
+               echo "Badly formatted profile data in ${PID_FILE}"
+               break
+       fi
+
+       # Original values are in jiffies and we expect HZ to be 1000
+       # on most 2.6 systems thus we divide by 10 to get a percentage.
+        echo -n "${STEP}, "
+       for (( i=0; i<${#ROW_N[@]}; i++ )); do
+               DELTA=`echo "scale=1; (${ROW_N[${i}]}-${ROW_M[${i}]})/10" | bc`
+               echo -n "${DELTA}, "
+       done
+
+       echo
+       let STEP=${STEP}+1
+done
+
+
+exit 0
diff --git a/scripts/zpios-profile/zpios-profile-post.sh b/scripts/zpios-profile/zpios-profile-post.sh
new file mode 100755 (executable)
index 0000000..3a454ba
--- /dev/null
@@ -0,0 +1,129 @@
+#!/bin/bash
+
+PROG=zpios-profile-post.sh
+
+RUN_POST=${0}
+RUN_PHASE=${1}
+RUN_DIR=${2}
+RUN_ID=${3}
+RUN_POOL=${4}
+RUN_CHUNK_SIZE=${5}
+RUN_REGION_SIZE=${6}
+RUN_THRD_COUNT=${7}
+RUN_REGION_COUNT=${8}
+RUN_OFFSET=${9}
+RUN_REGION_NOISE=${10}
+RUN_CHUNK_NOISE=${11}
+RUN_THRD_DELAY=${12}
+RUN_FLAGS=${13}
+RUN_RESULT=${14}
+
+# Summarize system time per process
+zpios_profile_post_pids() {
+       ${PROFILE_PIDS} ${PROFILE_RUN_CR_PIDS_LOG} >${PROFILE_RUN_CR_PIDS_CSV}
+       ${PROFILE_PIDS} ${PROFILE_RUN_WR_PIDS_LOG} >${PROFILE_RUN_WR_PIDS_CSV}
+       ${PROFILE_PIDS} ${PROFILE_RUN_RD_PIDS_LOG} >${PROFILE_RUN_RD_PIDS_CSV}
+       ${PROFILE_PIDS} ${PROFILE_RUN_RM_PIDS_LOG} >${PROFILE_RUN_RM_PIDS_CSV}
+}
+
+zpios_profile_post_disk() {
+       ${PROFILE_DISK} ${PROFILE_RUN_CR_DISK_LOG} >${PROFILE_RUN_CR_DISK_CSV}
+       ${PROFILE_DISK} ${PROFILE_RUN_WR_DISK_LOG} >${PROFILE_RUN_WR_DISK_CSV}
+       ${PROFILE_DISK} ${PROFILE_RUN_RD_DISK_LOG} >${PROFILE_RUN_RD_DISK_CSV}
+       ${PROFILE_DISK} ${PROFILE_RUN_RM_DISK_LOG} >${PROFILE_RUN_RM_DISK_CSV}
+}
+
+# Summarize per device performance
+
+# Stop a user defined profiling script which is gathering additional data
+zpios_profile_post_stop() {
+       local PROFILE_PID=$1
+
+       kill -s SIGHUP `cat ${PROFILE_PID}`
+
+
+       # Sleep waiting for profile script to exit
+       while [ -f ${PROFILE_PID} ]; do
+               sleep 0.01
+       done
+}
+
+zpios_profile_post_proc_stop() {
+       local PROC_DIR=$1
+
+       if [ -f ${PROFILE_ARC_PROC} ]; then
+               cat ${PROFILE_ARC_PROC} >${PROC_DIR}/arcstats.txt
+       fi
+
+       if [ -f ${PROFILE_VDEV_CACHE_PROC} ]; then
+               cat ${PROFILE_VDEV_CACHE_PROC} >${PROC_DIR}/vdev_cache_stats.txt
+       fi
+}
+
+zpios_profile_post_oprofile_stop() {
+       local OPROFILE_LOG=$1
+       local OPROFILE_ARGS="-a -g -l -p ${OPROFILE_KERNEL_DIR},${OPROFILE_SPL_DIR},${OPROFILE_ZFS_DIR}"
+
+       /usr/bin/opcontrol --stop >>${OPROFILE_LOG} 2>&1
+       /usr/bin/opcontrol --dump >>${OPROFILE_LOG} 2>&1
+       /usr/bin/opreport ${OPROFILE_ARGS} >${OPROFILE_LOG} 2>&1
+       /usr/bin/oparchive
+}
+
+zpios_profile_post_create() {
+       zpios_profile_post_oprofile_stop ${PROFILE_RUN_CR_OPROFILE_LOG}
+       zpios_profile_post_proc_stop ${PROFILE_RUN_CR_DIR}
+       zpios_profile_post_stop ${PROFILE_RUN_CR_PID}
+}
+
+zpios_profile_post_write() {
+       zpios_profile_post_oprofile_stop ${PROFILE_RUN_WR_OPROFILE_LOG}
+       zpios_profile_post_proc_stop ${PROFILE_RUN_WR_DIR}
+       zpios_profile_post_stop ${PROFILE_RUN_WR_PID}
+}
+
+zpios_profile_post_read() {
+       zpios_profile_post_oprofile_stop ${PROFILE_RUN_CR_RD_LOG}
+       zpios_profile_post_proc_stop ${PROFILE_RUN_RD_DIR}
+       zpios_profile_post_stop ${PROFILE_RUN_RD_PID}
+}
+
+zpios_profile_post_remove() {
+       zpios_profile_post_oprofile_stop ${PROFILE_RUN_RM_OPROFILE_LOG}
+       zpios_profile_post_proc_stop ${PROFILE_RUN_RM_DIR}
+       zpios_profile_post_stop ${PROFILE_RUN_RM_PID}
+}
+
+# Source global zpios test configuration
+if [ -f ${RUN_DIR}/zpios-config.sh ]; then
+       . ${RUN_DIR}/zpios-config.sh
+fi
+
+# Source global per-run test configuration
+if [ -f ${RUN_DIR}/${RUN_ID}/zpios-config-run.sh ]; then
+       . ${RUN_DIR}/${RUN_ID}/zpios-config-run.sh
+fi
+
+case "${RUN_PHASE}" in
+       post-run)
+               zpios_profile_post_pids
+               zpios_profile_post_disk
+               ;;
+       post-create)
+               zpios_profile_post_create
+               ;;
+       post-write)
+               zpios_profile_post_write
+               ;;
+       post-read)
+               zpios_profile_post_read
+               ;;
+       post-remove)
+               zpios_profile_post_remove
+               ;;
+       *)
+               echo "Usage: ${PROG} {post-run|post-create|post-write|post-read|post-remove}"
+               exit 1
+esac
+
+exit 0
diff --git a/scripts/zpios-profile/zpios-profile-pre.sh b/scripts/zpios-profile/zpios-profile-pre.sh
new file mode 100755 (executable)
index 0000000..a2a8857
--- /dev/null
@@ -0,0 +1,184 @@
+#!/bin/bash
+
+PROG=zpios-profile-pre.sh
+
+PROFILE_RDY=0
+trap "PROFILE_RDY=1" SIGHUP
+
+RUN_PRE=${0}
+RUN_PHASE=${1}
+RUN_DIR=${2}
+RUN_ID=${3}
+RUN_POOL=${4}
+RUN_CHUNK_SIZE=${5}
+RUN_REGION_SIZE=${6}
+RUN_THRD_COUNT=${7}
+RUN_REGION_COUNT=${8}
+RUN_OFFSET=${9}
+RUN_REGION_NOISE=${10}
+RUN_CHUNK_NOISE=${11}
+RUN_THRD_DELAY=${12}
+RUN_FLAGS=${13}
+RUN_RESULT=${14}
+
+zpios_profile_pre_run_cfg() {
+cat > ${RUN_DIR}/${RUN_ID}/zpios-config-run.sh << EOF
+#
+# Zpios Profiling Configuration for Run ${RUN_ID}
+#
+
+PROFILE_RUN_DIR=${RUN_DIR}/${RUN_ID}
+
+PROFILE_RUN_CR_DIR=${RUN_DIR}/${RUN_ID}/create
+PROFILE_RUN_CR_PID=${RUN_DIR}/${RUN_ID}/create/profile.pid
+PROFILE_RUN_CR_OPROFILE_LOG=${RUN_DIR}/${RUN_ID}/create/oprofile.txt
+PROFILE_RUN_CR_PIDS_LOG=${RUN_DIR}/${RUN_ID}/create/pids.txt
+PROFILE_RUN_CR_PIDS_CSV=${RUN_DIR}/${RUN_ID}/create/pids.csv
+PROFILE_RUN_CR_DISK_LOG=${RUN_DIR}/${RUN_ID}/create/disk.txt
+PROFILE_RUN_CR_DISK_CSV=${RUN_DIR}/${RUN_ID}/create/disk.csv
+
+PROFILE_RUN_WR_DIR=${RUN_DIR}/${RUN_ID}/write
+PROFILE_RUN_WR_PID=${RUN_DIR}/${RUN_ID}/write/profile.pid
+PROFILE_RUN_WR_OPROFILE_LOG=${RUN_DIR}/${RUN_ID}/write/oprofile.txt
+PROFILE_RUN_WR_PIDS_LOG=${RUN_DIR}/${RUN_ID}/write/pids.txt
+PROFILE_RUN_WR_PIDS_CSV=${RUN_DIR}/${RUN_ID}/write/pids.csv
+PROFILE_RUN_WR_DISK_LOG=${RUN_DIR}/${RUN_ID}/write/disk.txt
+PROFILE_RUN_WR_DISK_CSV=${RUN_DIR}/${RUN_ID}/write/disk.csv
+
+PROFILE_RUN_RD_DIR=${RUN_DIR}/${RUN_ID}/read
+PROFILE_RUN_RD_PID=${RUN_DIR}/${RUN_ID}/read/profile.pid
+PROFILE_RUN_RD_OPROFILE_LOG=${RUN_DIR}/${RUN_ID}/read/oprofile.txt
+PROFILE_RUN_RD_PIDS_LOG=${RUN_DIR}/${RUN_ID}/read/pids.txt
+PROFILE_RUN_RD_PIDS_CSV=${RUN_DIR}/${RUN_ID}/read/pids.csv
+PROFILE_RUN_RD_DISK_LOG=${RUN_DIR}/${RUN_ID}/read/disk.txt
+PROFILE_RUN_RD_DISK_CSV=${RUN_DIR}/${RUN_ID}/read/disk.csv
+
+PROFILE_RUN_RM_DIR=${RUN_DIR}/${RUN_ID}/remove
+PROFILE_RUN_RM_PID=${RUN_DIR}/${RUN_ID}/remove/profile.pid
+PROFILE_RUN_RM_OPROFILE_LOG=${RUN_DIR}/${RUN_ID}/remove/oprofile.txt
+PROFILE_RUN_RM_PIDS_LOG=${RUN_DIR}/${RUN_ID}/remove/pids.txt
+PROFILE_RUN_RM_PIDS_CSV=${RUN_DIR}/${RUN_ID}/remove/pids.csv
+PROFILE_RUN_RM_DISK_LOG=${RUN_DIR}/${RUN_ID}/remove/disk.txt
+PROFILE_RUN_RM_DISK_CSV=${RUN_DIR}/${RUN_ID}/remove/disk.csv
+
+# PROFILE_PIDS_LOG=${RUN_DIR}/${RUN_ID}/pids-summary.csv
+# PROFILE_DISK_LOG=${RUN_DIR}/${RUN_ID}/disk-summary.csv
+EOF
+}
+
+zpios_profile_pre_run_args() {
+cat > ${RUN_DIR}/${RUN_ID}/zpios-args.txt << EOF
+#
+# Zpios Arguments for Run ${RUN_ID}
+#
+
+DIR=${RUN_DIR}
+ID=${RUN_ID}
+POOL=${RUN_POOL}
+CHUNK_SIZE=${RUN_CHUNK_SIZE}
+REGION_SIZE=${RUN_REGION_SIZE}
+THRD_COUNT=${RUN_THRD_COUNT}
+REGION_COUNT=${RUN_REGION_COUNT}
+OFFSET=${RUN_OFFSET}
+REGION_NOISE=${RUN_REGION_NOISE}
+CHUNK_NOISE=${RUN_CHUNK_NOISE}
+THRD_DELAY=${RUN_THRD_DELAY}
+FLAGS=${RUN_FLAGS}
+RESULT=${RUN_RESULT}
+EOF
+}
+
+# Spawn a user defined profiling script to gather additional data
+zpios_profile_pre_start() {
+       local PROFILE_PID=$1
+
+       ${PROFILE_USER} ${RUN_PHASE} ${RUN_DIR} ${RUN_ID} &
+       echo "$!" >${PROFILE_PID}
+
+       # Sleep waiting for profile script to be ready, it will
+       # signal us via SIGHUP when it is ready to start profiling.
+       while [ ${PROFILE_RDY} -eq 0 ]; do
+               sleep 0.01
+       done
+}
+
+zpios_profile_post_proc_start() { 
+
+        if [ -f ${PROFILE_ARC_PROC} ]; then
+                echo 0 >${PROFILE_ARC_PROC}
+        fi
+
+        if [ -f ${PROFILE_VDEV_CACHE_PROC} ]; then
+                echo 0 >${PROFILE_VDEV_CACHE_PROC}
+        fi
+}
+
+zpios_profile_pre_oprofile_start() {
+       local OPROFILE_LOG=$1
+
+       /usr/bin/opcontrol --reset >>${OPROFILE_LOG} 2>&1
+       /usr/bin/opcontrol --start >>${OPROFILE_LOG} 2>&1
+}
+
+zpios_profile_pre_create() {
+       mkdir ${PROFILE_RUN_CR_DIR}
+       zpios_profile_pre_start ${PROFILE_RUN_CR_PID}
+       zpios_profile_post_proc_start
+       zpios_profile_pre_oprofile_start ${PROFILE_RUN_CR_OPROFILE_LOG}
+}
+
+zpios_profile_pre_write() {
+       mkdir ${PROFILE_RUN_WR_DIR}
+       zpios_profile_pre_start ${PROFILE_RUN_WR_PID}
+       zpios_profile_post_proc_start
+       zpios_profile_pre_oprofile_start ${PROFILE_RUN_WR_OPROFILE_LOG}
+}
+
+zpios_profile_pre_read() {
+       mkdir ${PROFILE_RUN_RD_DIR}
+       zpios_profile_pre_start ${PROFILE_RUN_RD_PID}
+       zpios_profile_post_proc_start
+       zpios_profile_pre_oprofile_start ${PROFILE_RUN_CR_RD_LOG}
+}
+
+zpios_profile_pre_remove() {
+       mkdir ${PROFILE_RUN_RM_DIR}
+       zpios_profile_pre_start ${PROFILE_RUN_RM_PID}
+       zpios_profile_post_proc_start
+       zpios_profile_pre_oprofile_start ${PROFILE_RUN_RM_OPROFILE_LOG}
+}
+
+# Source global zpios test configuration
+if [ -f ${RUN_DIR}/zpios-config.sh ]; then
+       . ${RUN_DIR}/zpios-config.sh
+fi
+
+# Source global per-run test configuration
+if [ -f ${RUN_DIR}/${RUN_ID}/zpios-config-run.sh ]; then
+       . ${RUN_DIR}/${RUN_ID}/zpios-config-run.sh
+fi
+
+case "${RUN_PHASE}" in
+       pre-run)
+               mkdir -p ${RUN_DIR}/${RUN_ID}/
+               zpios_profile_pre_run_cfg
+               zpios_profile_pre_run_args
+               ;;
+       pre-create)
+               zpios_profile_pre_create
+               ;;
+       pre-write)
+               zpios_profile_pre_write
+               ;;
+       pre-read)
+               zpios_profile_pre_read
+               ;;
+       pre-remove)
+               zpios_profile_pre_remove
+               ;;
+       *)
+               echo "Usage: ${PROG} {pre-run|pre-create|pre-write|pre-read|pre-remove}"
+               exit 1
+esac
+
+exit 0
diff --git a/scripts/zpios-profile/zpios-profile.sh b/scripts/zpios-profile/zpios-profile.sh
new file mode 100755 (executable)
index 0000000..f4f0ee9
--- /dev/null
@@ -0,0 +1,226 @@
+#!/bin/bash
+
+
+PROG=zpios-profile.sh
+
+trap "RUN_DONE=1" SIGHUP
+
+RUN_PHASE=${1}
+RUN_LOG_DIR=${2}
+RUN_ID=${3}
+RUN_DONE=0
+
+POLL_INTERVAL=2.99
+
+# Log these pids, the exact pid numbers will vary from system to system
+# so I harvest pid for all the following type of processes from /proc/<pid>/
+#
+# zio_taskq/#
+# spa_zio_issue/#
+# spa_zio_intr/#
+# txg_quiesce_thr
+# txg_sync_thread
+# txg_timelimit_t
+# arc_reclaim_thr
+# l2arc_feed_thre
+# zpios_io/#
+
+ZIO_TASKQ_PIDS=()
+ZIO_REQ_NUL_PIDS=() 
+ZIO_IRQ_NUL_PIDS=() 
+ZIO_REQ_RD_PIDS=() 
+ZIO_IRQ_RD_PIDS=() 
+ZIO_REQ_WR_PIDS=() 
+ZIO_IRQ_WR_PIDS=()
+ZIO_REQ_FR_PIDS=() 
+ZIO_IRQ_FR_PIDS=()
+ZIO_REQ_CM_PIDS=() 
+ZIO_IRQ_CM_PIDS=()
+ZIO_REQ_CTL_PIDS=() 
+ZIO_IRQ_CTL_PIDS=()
+
+TXG_QUIESCE_PIDS=()
+TXG_SYNC_PIDS=() 
+TXG_TIMELIMIT_PIDS=()
+
+ARC_RECLAIM_PIDS=()
+L2ARC_FEED_PIDS=()
+
+ZPIOS_IO_PIDS=()
+
+show_pids() {
+       echo "* zio_taskq:     { ${ZIO_TASKQ_PIDS[@]} } = ${#ZIO_TASKQ_PIDS[@]}"
+       echo "* zio_req_nul:   { ${ZIO_REQ_NUL_PIDS[@]} } = ${#ZIO_REQ_NUL_PIDS[@]}"
+       echo "* zio_irq_nul:   { ${ZIO_IRQ_NUL_PIDS[@]} } = ${#ZIO_IRQ_NUL_PIDS[@]}"
+       echo "* zio_req_rd:    { ${ZIO_REQ_RD_PIDS[@]} } = ${#ZIO_REQ_RD_PIDS[@]}"
+       echo "* zio_irq_rd:    { ${ZIO_IRQ_RD_PIDS[@]} } = ${#ZIO_IRQ_RD_PIDS[@]}"
+       echo "* zio_req_wr:    { ${ZIO_REQ_WR_PIDS[@]} } = ${#ZIO_REQ_WR_PIDS[@]}"
+       echo "* zio_irq_wr:    { ${ZIO_IRQ_WR_PIDS[@]} } = ${#ZIO_IRQ_WR_PIDS[@]}"
+       echo "* zio_req_fr:    { ${ZIO_REQ_FR_PIDS[@]} } = ${#ZIO_REQ_FR_PIDS[@]}"
+       echo "* zio_irq_fr:    { ${ZIO_IRQ_FR_PIDS[@]} } = ${#ZIO_IRQ_FR_PIDS[@]}"
+       echo "* zio_req_cm:    { ${ZIO_REQ_CM_PIDS[@]} } = ${#ZIO_REQ_CM_PIDS[@]}"
+       echo "* zio_irq_cm:    { ${ZIO_IRQ_CM_PIDS[@]} } = ${#ZIO_IRQ_CM_PIDS[@]}"
+       echo "* zio_req_ctl:   { ${ZIO_REQ_CTL_PIDS[@]} } = ${#ZIO_REQ_CTL_PIDS[@]}"
+       echo "* zio_irq_ctl:   { ${ZIO_IRQ_CTL_PIDS[@]} } = ${#ZIO_IRQ_CTL_PIDS[@]}"
+       echo "* txg_quiesce:   { ${TXG_QUIESCE_PIDS[@]} } = ${#TXG_QUIESCE_PIDS[@]}"
+       echo "* txg_sync:      { ${TXG_SYNC_PIDS[@]} } = ${#TXG_SYNC_PIDS[@]}"
+       echo "* txg_timelimit: { ${TXG_TIMELIMIT_PIDS[@]} } = ${#TXG_TIMELIMIT_PIDS[@]}"
+       echo "* arc_reclaim:   { ${ARC_RECLAIM_PIDS[@]} } = ${#ARC_RECLAIM_PIDS[@]}"
+       echo "* l2arc_feed:    { ${L2ARC_FEED_PIDS[@]} } = ${#L2ARC_FEED_PIDS[@]}"
+       echo "* zpios_io:      { ${ZPIOS_IO_PIDS[@]} } = ${#ZPIOS_IO_PIDS[@]}"
+}
+
+check_pid() {
+       local PID=$1
+       local NAME=$2
+       local TYPE=$3
+       local PIDS=( "$4" )
+        local NAME_STRING=`echo ${NAME} | cut -f1 -d'/'`
+        local NAME_NUMBER=`echo ${NAME} | cut -f2 -d'/'`
+
+       if [ "${NAME_STRING}" == "${TYPE}" ]; then
+               if [ -n "${NAME_NUMBER}" ]; then
+                       PIDS[${NAME_NUMBER}]=${PID}
+               else
+                       PIDS[${#PIDS[@]}]=${PID}
+
+               fi
+       fi
+
+       echo "${PIDS[@]}"
+}
+
+# NOTE: This whole process is crazy slow but it will do for now
+aquire_pids() {
+       echo "--- Aquiring ZFS pids ---"
+
+       for PID in `ls /proc/ | grep [0-9] | sort -n -u`; do
+               if [ ! -e /proc/${PID}/status ]; then
+                       continue
+               fi
+
+               NAME=`cat /proc/${PID}/status  | head -n1 | cut -f2`
+
+               ZIO_TASKQ_PIDS=( `check_pid ${PID} ${NAME} "zio_taskq" \
+                                "$(echo "${ZIO_TASKQ_PIDS[@]}")"` )
+
+               ZIO_REQ_NUL_PIDS=( `check_pid ${PID} ${NAME} "zio_req_nul" \
+                                  "$(echo "${ZIO_REQ_NUL_PIDS[@]}")"` )
+
+               ZIO_IRQ_NUL_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_nul" \
+                                  "$(echo "${ZIO_IRQ_NUL_PIDS[@]}")"` )
+
+               ZIO_REQ_RD_PIDS=( `check_pid ${PID} ${NAME} "zio_req_rd" \
+                                  "$(echo "${ZIO_REQ_RD_PIDS[@]}")"` )
+
+               ZIO_IRQ_RD_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_rd" \
+                                  "$(echo "${ZIO_IRQ_RD_PIDS[@]}")"` )
+
+               ZIO_REQ_WR_PIDS=( `check_pid ${PID} ${NAME} "zio_req_wr" \
+                                  "$(echo "${ZIO_REQ_WR_PIDS[@]}")"` )
+
+               ZIO_IRQ_WR_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_wr" \
+                                  "$(echo "${ZIO_IRQ_WR_PIDS[@]}")"` )
+
+               ZIO_REQ_FR_PIDS=( `check_pid ${PID} ${NAME} "zio_req_fr" \
+                                  "$(echo "${ZIO_REQ_FR_PIDS[@]}")"` )
+
+               ZIO_IRQ_FR_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_fr" \
+                                  "$(echo "${ZIO_IRQ_FR_PIDS[@]}")"` )
+
+               ZIO_REQ_CM_PIDS=( `check_pid ${PID} ${NAME} "zio_req_cm" \
+                                  "$(echo "${ZIO_REQ_CM_PIDS[@]}")"` )
+
+               ZIO_IRQ_CM_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_cm" \
+                                  "$(echo "${ZIO_IRQ_CM_PIDS[@]}")"` )
+
+               ZIO_REQ_CTL_PIDS=( `check_pid ${PID} ${NAME} "zio_req_ctl" \
+                                  "$(echo "${ZIO_REQ_CTL_PIDS[@]}")"` )
+
+               ZIO_IRQ_CTL_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_ctl" \
+                                  "$(echo "${ZIO_IRQ_CTL_PIDS[@]}")"` )
+
+               TXG_QUIESCE_PIDS=( `check_pid ${PID} ${NAME} "txg_quiesce" \
+                                  "$(echo "${TXG_QUIESCE_PIDS[@]}")"` )
+
+               TXG_SYNC_PIDS=( `check_pid ${PID} ${NAME} "txg_sync" \
+                               "$(echo "${TXG_SYNC_PIDS[@]}")"` )
+
+               TXG_TIMELIMIT_PIDS=( `check_pid ${PID} ${NAME} "txg_timelimit" \
+                                    "$(echo "${TXG_TIMELIMIT_PIDS[@]}")"` )
+
+               ARC_RECLAIM_PIDS=( `check_pid ${PID} ${NAME} "arc_reclaim" \
+                                     "$(echo "${ARC_RECLAIM_PIDS[@]}")"` )
+
+               L2ARC_FEED_PIDS=( `check_pid ${PID} ${NAME} "l2arc_feed" \
+                                  "$(echo "${L2ARC_FEED_PIDS[@]}")"` )
+       done
+
+       # Wait for zpios_io threads to start
+       kill -s SIGHUP ${PPID}
+       echo "* Waiting for zpios_io threads to start"
+       while [ ${RUN_DONE} -eq 0 ]; do
+               ZPIOS_IO_PIDS=( `ps ax | grep zpios_io | grep -v grep | \
+                                 sed 's/^ *//g' | cut -f1 -d' '` )
+               if [ ${#ZPIOS_IO_PIDS[@]} -gt 0 ]; then
+                       break;
+               fi
+               sleep 0.1
+       done
+
+       echo "`show_pids`" >${RUN_LOG_DIR}/${RUN_ID}/pids.txt
+}
+
+log_pids() {
+       echo "--- Logging ZFS profile to ${RUN_LOG_DIR}/${RUN_ID}/ ---"
+       ALL_PIDS=( ${ZIO_TASKQ_PIDS[@]}     \
+                   ${ZIO_REQ_NUL_PIDS[@]}   \
+                   ${ZIO_IRQ_NUL_PIDS[@]}   \
+                   ${ZIO_REQ_RD_PID[@]}     \
+                   ${ZIO_IRQ_RD_PIDS[@]}    \
+                   ${ZIO_REQ_WR_PIDS[@]}    \
+                   ${ZIO_IRQ_WR_PIDS[@]}    \
+                   ${ZIO_REQ_FR_PIDS[@]}    \ 
+                   ${ZIO_IRQ_FR_PIDS[@]}    \
+                   ${ZIO_REQ_CM_PIDS[@]}    \ 
+                   ${ZIO_IRQ_CM_PIDS[@]}    \
+                   ${ZIO_REQ_CTL_PIDS[@]}   \
+                   ${ZIO_IRQ_CTL_PIDS[@]}   \
+                   ${TXG_QUIESCE_PIDS[@]}   \
+                   ${TXG_SYNC_PIDS[@]}      \
+                   ${TXG_TIMELIMIT_PIDS[@]} \
+                   ${ARC_RECLAIM_PIDS[@]}   \
+                   ${L2ARC_FEED_PIDS[@]}    \
+                   ${ZPIOS_IO_PIDS[@]} )
+
+       while [ ${RUN_DONE} -eq 0 ]; do
+               NOW=`date +%s.%N`
+               LOG_PIDS="${RUN_LOG_DIR}/${RUN_ID}/pids-${NOW}"
+               LOG_DISK="${RUN_LOG_DIR}/${RUN_ID}/disk-${NOW}"
+
+               for PID in "${ALL_PIDS[@]}"; do
+                       if [ -z ${PID} ]; then
+                               continue;
+                       fi
+
+                       if [ -e /proc/${PID}/stat ]; then
+                               cat /proc/${PID}/stat | head -n1 >>${LOG_PIDS}
+                       else
+                               echo "<${PID} exited>" >>${LOG_PIDS}
+                       fi
+               done
+
+               cat /proc/diskstats >${LOG_DISK}
+
+               NOW2=`date +%s.%N`
+               DELTA=`echo "${POLL_INTERVAL}-(${NOW2}-${NOW})" | bc`
+               sleep ${DELTA}
+       done
+}
+
+aquire_pids
+log_pids
+
+# rm ${PROFILE_PID}
+
+exit 0
diff --git a/scripts/zpios-sanity.sh b/scripts/zpios-sanity.sh
new file mode 100755 (executable)
index 0000000..194ae82
--- /dev/null
@@ -0,0 +1,158 @@
+#!/bin/bash
+#
+# ZFS/ZPOOL configuration test script.
+
+basedir="$(dirname $0)"
+
+SCRIPT_COMMON=common.sh
+if [ -f "${basedir}/${SCRIPT_COMMON}" ]; then
+. "${basedir}/${SCRIPT_COMMON}"
+else
+echo "Missing helper script ${SCRIPT_COMMON}" && exit 1
+fi
+
+PROG=zpios-sanity.sh
+HEADER=
+
+usage() {
+cat << EOF
+USAGE:
+$0 [hvxfc]
+
+DESCRIPTION:
+        ZPIOS sanity tests
+
+OPTIONS:
+        -h      Show this message
+        -v      Verbose
+        -x      Destructive hd/sd/md/dm/ram tests
+       -f      Don't prompt due to -x
+       -c      Cleanup lo+file devices at start
+
+EOF
+}
+
+while getopts 'hvxfc?' OPTION; do
+       case $OPTION in
+       h)
+               usage
+               exit 1
+               ;;
+       v)
+               VERBOSE=1
+               ;;
+       x)
+               DANGEROUS=1
+               ;;
+       f)
+               FORCE=1
+               ;;
+       c)
+               CLEANUP=1
+               ;;
+       ?)
+               usage
+               exit
+               ;;
+       esac
+done
+
+if [ $(id -u) != 0 ]; then
+       die "Must run as root"
+fi
+
+# Perform pre-cleanup is requested
+if [ ${CLEANUP} ]; then
+       cleanup_loop_devices
+       rm -f /tmp/zpool.cache.*
+fi
+
+zpios_test() {
+       CONFIG=$1
+       TEST=$2
+       LOG=`mktemp`
+
+       ${ZPIOS_SH} -f -c ${CONFIG} -t ${TEST} &>${LOG}
+       if [ $? -ne 0 ]; then
+               if [ ${VERBOSE} ]; then
+                       printf "FAIL:     %-13s\n" ${CONFIG}
+                       cat ${LOG}
+               else
+                       if [ ! ${HEADER} ]; then
+                               head -2 ${LOG}
+                               HEADER=1
+                       fi
+
+                       printf "FAIL:     %-13s" ${CONFIG}
+                       tail -1 ${LOG}
+               fi
+       else
+               if [ ${VERBOSE} ]; then
+                       cat ${LOG}
+               else
+                       if [ ! ${HEADER} ]; then
+                               head -2 ${LOG}
+                               HEADER=1
+                       fi
+
+                       tail -1 ${LOG}
+               fi
+       fi
+
+       rm -f ${LOG}
+}
+
+if [ ${DANGEROUS} ] && [ ! ${FORCE} ]; then
+       cat << EOF
+The -x option was passed which will result in UNRECOVERABLE DATA LOSS
+on on the following block devices:
+
+  /dev/sd[abcd]
+  /dev/hda
+  /dev/ram0
+  /dev/md0
+  /dev/dm-0
+
+To continue please confirm by entering YES:
+EOF
+       read CONFIRM
+       if [ ${CONFIRM} != "YES" ] && [ ${CONFIRM} != "yes" ]; then
+               exit 0;
+       fi
+fi
+
+#
+# These configurations are all safe and pose no risk to any data on
+# the system which runs them.  They will confine all their IO to a
+# file in /tmp or a loopback device configured to use a file in /tmp.
+#
+SAFE_CONFIGS=(                                         \
+       file-raid0 file-raid10 file-raidz file-raidz2   \
+       lo-raid0 lo-raid10 lo-raidz lo-raidz2           \
+)
+
+#
+# These configurations are down right dangerous.  They will attempt
+# to use various real block devices on your system which may contain
+# data you car about.  You are STRONGLY advised not to run this unless
+# you are certain there is no data on the system you care about.
+#
+DANGEROUS_CONFIGS=(                                    \
+       hda-raid0                                       \
+       sda-raid0                                       \
+       ram0-raid0                                      \
+       md0-raid10 md0-raid5                            \
+       dm0-raid0                                       \
+)
+
+for CONFIG in ${SAFE_CONFIGS[*]}; do
+       zpios_test $CONFIG tiny
+done
+
+if [ ${DANGEROUS} ]; then
+       for CONFIG in ${DANGEROUS_CONFIGS[*]}; do
+               zpios_test $CONFIG tiny
+       done
+fi
+
+exit 0
diff --git a/scripts/zpios-survey.sh b/scripts/zpios-survey.sh
new file mode 100755 (executable)
index 0000000..cb751b4
--- /dev/null
@@ -0,0 +1,215 @@
+#!/bin/bash
+#
+# Wrapper script for easily running a survey of zpios based tests
+#
+
+basedir="$(dirname $0)"
+
+SCRIPT_COMMON=common.sh
+if [ -f "${basedir}/${SCRIPT_COMMON}" ]; then
+. "${basedir}/${SCRIPT_COMMON}"
+else
+echo "Missing helper script ${SCRIPT_COMMON}" && exit 1
+fi
+
+PROG=zpios-survey.sh
+
+usage() {
+cat << EOF
+USAGE:
+$0 [hvp] <-c config> <-t test>
+
+DESCRIPTION:
+        Helper script for easy zpios survey benchmarking.
+
+OPTIONS:
+        -h      Show this message
+        -v      Verbose
+        -p      Enable profiling
+        -c      Zpool configuration
+        -t      Zpios test
+        -l      Zpios survey log
+
+EOF
+}
+
+print_header() {
+tee -a ${ZPIOS_SURVEY_LOG} << EOF
+
+================================================================
+Test: $1
+EOF
+}
+
+# Baseline performance for an out of the box config with no manual tuning.
+# Ideally, we want everything to be automatically tuned for your system and
+# for this to perform reasonably well.
+zpios_survey_base() {
+       TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+baseline"
+       print_header ${TEST_NAME}
+
+       ${ZFS_SH} ${VERBOSE_FLAG} | \
+               tee -a ${ZPIOS_SURVEY_LOG}
+       ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} | \
+               tee -a ${ZPIOS_SURVEY_LOG}
+       ${ZFS_SH} -u ${VERBOSE_FLAG} | \
+               tee -a ${ZPIOS_SURVEY_LOG}
+}
+
+# Disable ZFS's prefetching.  For some reason still not clear to me
+# current prefetching policy is quite bad for a random workload.
+# Allowing the algorithm to detect a random workload and not do 
+# anything may be the way to address this issue.
+zpios_survey_prefetch() {
+       TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+prefetch"
+       print_header ${TEST_NAME}
+
+       ${ZFS_SH} ${VERBOSE_FLAG}               \
+               tee -a ${ZPIOS_SURVEY_LOG}
+       ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} \
+               -o "--noprefetch" |                                    \
+               tee -a ${ZPIOS_SURVEY_LOG}
+       ${ZFS_SH} -u ${VERBOSE_FLAG} | \
+               tee -a ${ZPIOS_SURVEY_LOG}
+}
+
+# Simulating a zerocopy IO path should improve performance by freeing up
+# lots of CPU which is wasted move data between buffers.
+zpios_survey_zerocopy() {
+       TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+zerocopy"
+       print_header ${TEST_NAME}
+
+       ${ZFS_SH} ${VERBOSE_FLAG} | \
+               tee -a ${ZPIOS_SURVEY_LOG}
+       ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} \
+               -o "--zerocopy" |                                      \
+               tee -a ${ZPIOS_SURVEY_LOG}
+       ${ZFS_SH} -u ${VERBOSE_FLAG} | \
+               tee -a ${ZPIOS_SURVEY_LOG}
+}
+
+# Disabling checksumming should show some (if small) improvement
+# simply due to freeing up a modest amount of CPU.
+zpios_survey_checksum() {
+       TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+checksum"
+       print_header ${TEST_NAME}
+
+       ${ZFS_SH} ${VERBOSE_FLAG} | \
+               tee -a ${ZPIOS_SURVEY_LOG}
+       ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} \
+               -s "set checksum=off" |                                \
+               tee -a ${ZPIOS_SURVEY_LOG}
+       ${ZFS_SH} -u ${VERBOSE_FLAG} | \
+               tee -a ${ZPIOS_SURVEY_LOG}
+}
+
+# Increasing the pending IO depth also seems to improve things likely
+# at the expense of latency.  This should be explored more because I'm
+# seeing a much bigger impact there that I would have expected.  There
+# may be some low hanging fruit to be found here.
+zpios_survey_pending() {
+       TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+pending"
+       print_header ${TEST_NAME}
+
+       ${ZFS_SH} ${VERBOSE_FLAG}                  \
+               zfs="zfs_vdev_max_pending=1024" | \
+               tee -a ${ZPIOS_SURVEY_LOG}
+       ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} | \
+               tee -a ${ZPIOS_SURVEY_LOG}
+       ${ZFS_SH} -u ${VERBOSE_FLAG} | \
+               tee -a ${ZPIOS_SURVEY_LOG}
+}
+
+# To avoid memory fragmentation issues our slab implementation can be
+# based on a virtual address space.  Interestingly, we take a pretty
+# substantial performance penalty for this somewhere in the low level
+# IO drivers.  If we back the slab with kmem pages we see far better
+# read performance numbers at the cost of memory fragmention and general
+# system instability due to large allocations.  This may be because of
+# an optimization in the low level drivers due to the contigeous kmem
+# based memory.  This needs to be explained.  The good news here is that
+# with zerocopy interfaces added at the DMU layer we could gaurentee
+# kmem based memory for a pool of pages.
+#
+# 0x100 = KMC_KMEM - Force kmem_* based slab
+# 0x200 = KMC_VMEM - Force vmem_* based slab
+zpios_survey_kmem() {
+       TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+kmem"
+       print_header ${TEST_NAME}
+
+       ${ZFS_SH} ${VERBOSE_FLAG}             \  
+               zfs="zio_bulk_flags=0x100" | \
+               tee -a ${ZPIOS_SURVEY_LOG}
+       ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} | \
+               tee -a ${ZPIOS_SURVEY_LOG}
+       ${ZFS_SH} -u ${VERBOSE_FLAG} | \
+               tee -a ${ZPIOS_SURVEY_LOG}
+}
+
+# Apply all possible turning concurrently to get a best case number
+zpios_survey_all() {
+       TEST_NAME="${ZPOOL_CONFIG}+${ZPIOS_TEST}+all"
+       print_header ${TEST_NAME}
+
+       ${ZFS_SH} ${VERBOSE_FLAG}                \  
+               zfs="zfs_vdev_max_pending=1024" \
+               zfs="zio_bulk_flags=0x100" |    \
+               tee -a ${ZPIOS_SURVEY_LOG}
+       ${ZPIOS_SH} ${VERBOSE_FLAG} -c ${ZPOOL_CONFIG} -t ${ZPIOS_TEST} \
+               -o "--noprefetch --zerocopy"                           \
+               -s "set checksum=off" |                                \
+               tee -a ${ZPIOS_SURVEY_LOG}
+       ${ZFS_SH} -u ${VERBOSE_FLAG} | \
+               tee -a ${ZPIOS_SURVEY_LOG}
+}
+
+
+PROFILE=
+ZPOOL_NAME=zpios-survey
+ZPOOL_CONFIG=zpool-config.sh
+ZPIOS_TEST=zpios-test.sh
+ZPIOS_SURVEY_LOG=/dev/null
+
+while getopts 'hvpc:t:l:' OPTION; do
+       case $OPTION in
+       h)
+               usage
+               exit 1
+               ;;
+       v)
+               VERBOSE=1
+               VERBOSE_FLAG="-v"
+               ;;
+       p)
+               PROFILE=1
+               PROFILE_FLAG="-p"
+               ;;
+       c)
+               ZPOOL_CONFIG=${OPTARG}
+               ;;
+       t)
+               ZPIOS_TEST=${OPTARG}
+               ;;
+       l)
+               ZPIOS_SURVEY_LOG=${OPTARG}
+               ;;
+       ?)
+               usage
+               exit
+               ;;
+       esac
+done
+
+if [ $(id -u) != 0 ]; then
+       die "Must run as root"
+fi
+
+zpios_survey_base
+zpios_survey_prefetch
+zpios_survey_zerocopy
+zpios_survey_checksum
+zpios_survey_pending
+zpios_survey_kmem
+zpios_survey_all
+
+exit 0
diff --git a/scripts/zpios-test/16th-8192rc-4rs-1cs-4off.sh b/scripts/zpios-test/16th-8192rc-4rs-1cs-4off.sh
new file mode 100755 (executable)
index 0000000..cbd9c69
--- /dev/null
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+# Usage: zpios
+#        --threadcount       -t    =values
+#        --threadcount_low   -l    =value
+#        --threadcount_high  -h    =value
+#        --threadcount_incr  -e    =value
+#        --regioncount       -n    =values
+#        --regioncount_low   -i    =value
+#        --regioncount_high  -j    =value
+#        --regioncount_incr  -k    =value
+#        --offset            -o    =values
+#        --offset_low        -m    =value
+#        --offset_high       -q    =value
+#        --offset_incr       -r    =value
+#        --chunksize         -c    =values
+#        --chunksize_low     -a    =value
+#        --chunksize_high    -b    =value
+#        --chunksize_incr    -g    =value
+#        --regionsize        -s    =values
+#        --regionsize_low    -A    =value
+#        --regionsize_high   -B    =value
+#        --regionsize_incr   -C    =value
+#        --load              -L    =dmuio|ssf|fpp
+#        --pool              -p    =pool name
+#        --name              -M    =test name
+#        --cleanup           -x
+#        --prerun            -P    =pre-command
+#        --postrun           -R    =post-command
+#        --log               -G    =log directory
+#        --regionnoise       -I    =shift
+#        --chunknoise        -N    =bytes
+#        --threaddelay       -T    =jiffies
+#        --verify            -V
+#        --zerocopy          -z
+#        --nowait            -O
+#        --human-readable    -H
+#        --verbose           -v    =increase verbosity
+#        --help              -?    =this help
+
+ZPIOS_CMD="${ZPIOS}                                              \
+       --load=dmuio                                             \
+       --pool=${ZPOOL_NAME}                                     \
+       --name=${ZPOOL_CONFIG}                                   \
+       --threadcount=16                                         \
+       --regioncount=8192                                       \
+       --regionsize=4M                                          \
+       --chunksize=1M                                           \
+       --offset=4M                                              \
+       --cleanup                                                \
+       --human-readable                                         \
+       ${ZPIOS_OPTIONS}"
+
+zpios_start() {
+       if [ ${VERBOSE} ]; then
+               ZPIOS_CMD="${ZPIOS_CMD} --verbose"
+               echo ${ZPIOS_CMD}
+       fi
+
+       ${ZPIOS_CMD} || exit 1
+}
+
+zpios_stop() {
+       [ ${VERBOSE} ] && echo
+}
diff --git a/scripts/zpios-test/1th-16rc-4rs-1cs-4off.sh b/scripts/zpios-test/1th-16rc-4rs-1cs-4off.sh
new file mode 100755 (executable)
index 0000000..cd3c50b
--- /dev/null
@@ -0,0 +1,66 @@
+#!/bin/bash
+#
+# Usage: zpios
+#        --threadcount       -t    =values
+#        --threadcount_low   -l    =value
+#        --threadcount_high  -h    =value
+#        --threadcount_incr  -e    =value
+#        --regioncount       -n    =values
+#        --regioncount_low   -i    =value
+#        --regioncount_high  -j    =value
+#        --regioncount_incr  -k    =value
+#        --offset            -o    =values
+#        --offset_low        -m    =value
+#        --offset_high       -q    =value
+#        --offset_incr       -r    =value
+#        --chunksize         -c    =values
+#        --chunksize_low     -a    =value
+#        --chunksize_high    -b    =value
+#        --chunksize_incr    -g    =value
+#        --regionsize        -s    =values
+#        --regionsize_low    -A    =value
+#        --regionsize_high   -B    =value
+#        --regionsize_incr   -C    =value
+#        --load              -L    =dmuio|ssf|fpp
+#        --pool              -p    =pool name
+#        --name              -M    =test name
+#        --cleanup           -x
+#        --prerun            -P    =pre-command
+#        --postrun           -R    =post-command
+#        --log               -G    =log directory
+#        --regionnoise       -I    =shift
+#        --chunknoise        -N    =bytes
+#        --threaddelay       -T    =jiffies
+#        --verify            -V
+#        --zerocopy          -z
+#        --nowait            -O
+#        --human-readable    -H
+#        --verbose           -v    =increase verbosity
+#        --help              -?    =this help
+
+
+ZPIOS_CMD="${ZPIOS}                                              \
+       --load=dmuio                                             \
+       --pool=${ZPOOL_NAME}                                     \
+       --name=${ZPOOL_CONFIG}                                   \
+       --threadcount=1                                          \
+       --regioncount=16                                         \
+       --regionsize=4M                                          \
+       --chunksize=1M                                           \
+       --offset=4M                                              \
+       --cleanup                                                \
+       --human-readable                                         \
+       ${ZPIOS_OPTIONS}"
+
+zpios_start() {
+       if [ ${VERBOSE} ]; then
+               ZPIOS_CMD="${ZPIOS_CMD} --verbose"
+               echo ${ZPIOS_CMD}
+       fi
+
+       ${ZPIOS_CMD} || exit 1
+}
+
+zpios_stop() {
+       [ ${VERBOSE} ] && echo
+}
diff --git a/scripts/zpios-test/1x256th-65536rc-4rs-1cs-4off.sh b/scripts/zpios-test/1x256th-65536rc-4rs-1cs-4off.sh
new file mode 100755 (executable)
index 0000000..743e97b
--- /dev/null
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+# Usage: zpios
+#        --threadcount       -t    =values
+#        --threadcount_low   -l    =value
+#        --threadcount_high  -h    =value
+#        --threadcount_incr  -e    =value
+#        --regioncount       -n    =values
+#        --regioncount_low   -i    =value
+#        --regioncount_high  -j    =value
+#        --regioncount_incr  -k    =value
+#        --offset            -o    =values
+#        --offset_low        -m    =value
+#        --offset_high       -q    =value
+#        --offset_incr       -r    =value
+#        --chunksize         -c    =values
+#        --chunksize_low     -a    =value
+#        --chunksize_high    -b    =value
+#        --chunksize_incr    -g    =value
+#        --regionsize        -s    =values
+#        --regionsize_low    -A    =value
+#        --regionsize_high   -B    =value
+#        --regionsize_incr   -C    =value
+#        --load              -L    =dmuio|ssf|fpp
+#        --pool              -p    =pool name
+#        --name              -M    =test name
+#        --cleanup           -x
+#        --prerun            -P    =pre-command
+#        --postrun           -R    =post-command
+#        --log               -G    =log directory
+#        --regionnoise       -I    =shift
+#        --chunknoise        -N    =bytes
+#        --threaddelay       -T    =jiffies
+#        --verify            -V
+#        --zerocopy          -z
+#        --nowait            -O
+#        --human-readable    -H
+#        --verbose           -v    =increase verbosity
+#        --help              -?    =this help
+
+ZPIOS_CMD="${ZPIOS}                                              \
+       --load=dmuio                                             \
+       --pool=${ZPOOL_NAME}                                     \
+       --name=${ZPOOL_CONFIG}                                   \
+       --threadcount=1,2,4,8,16,32,64,128,256                   \
+       --regioncount=65536                                      \
+       --regionsize=4M                                          \
+       --chunksize=1M                                           \
+       --offset=4M                                              \
+        --cleanup                                                \
+       --human-readable                                         \
+       ${ZPIOS_OPTIONS}"
+
+zpios_start() {
+       if [ ${VERBOSE} ]; then
+               ZPIOS_CMD="${ZPIOS_CMD} --verbose"
+               echo ${ZPIOS_CMD}
+       fi
+
+       ${ZPIOS_CMD} || exit 1
+}
+
+zpios_stop() {
+       [ ${VERBOSE} ] && echo
+}
diff --git a/scripts/zpios-test/256th-65536rc-4rs-1cs-4off.sh b/scripts/zpios-test/256th-65536rc-4rs-1cs-4off.sh
new file mode 100755 (executable)
index 0000000..92a3b77
--- /dev/null
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+# Usage: zpios
+#        --threadcount       -t    =values
+#        --threadcount_low   -l    =value
+#        --threadcount_high  -h    =value
+#        --threadcount_incr  -e    =value
+#        --regioncount       -n    =values
+#        --regioncount_low   -i    =value
+#        --regioncount_high  -j    =value
+#        --regioncount_incr  -k    =value
+#        --offset            -o    =values
+#        --offset_low        -m    =value
+#        --offset_high       -q    =value
+#        --offset_incr       -r    =value
+#        --chunksize         -c    =values
+#        --chunksize_low     -a    =value
+#        --chunksize_high    -b    =value
+#        --chunksize_incr    -g    =value
+#        --regionsize        -s    =values
+#        --regionsize_low    -A    =value
+#        --regionsize_high   -B    =value
+#        --regionsize_incr   -C    =value
+#        --load              -L    =dmuio|ssf|fpp
+#        --pool              -p    =pool name
+#        --name              -M    =test name
+#        --cleanup           -x
+#        --prerun            -P    =pre-command
+#        --postrun           -R    =post-command
+#        --log               -G    =log directory
+#        --regionnoise       -I    =shift
+#        --chunknoise        -N    =bytes
+#        --threaddelay       -T    =jiffies
+#        --verify            -V
+#        --zerocopy          -z
+#        --nowait            -O
+#        --human-readable    -H
+#        --verbose           -v    =increase verbosity
+#        --help              -?    =this help
+
+ZPIOS_CMD="${ZPIOS}                                              \
+       --load=dmuio                                             \
+       --pool=${ZPOOL_NAME}                                     \
+       --name=${ZPOOL_CONFIG}                                   \
+       --threadcount=256                                        \
+       --regioncount=65536                                      \
+       --regionsize=4M                                          \
+       --chunksize=1M                                           \
+       --offset=4M                                              \
+        --cleanup                                                \
+       --human-readable                                         \
+       ${ZPIOS_OPTIONS}"
+
+zpios_start() {
+       if [ ${VERBOSE} ]; then
+               ZPIOS_CMD="${ZPIOS_CMD} --verbose"
+               echo ${ZPIOS_CMD}
+       fi
+
+       ${ZPIOS_CMD} || exit 1
+}
+
+zpios_stop() {
+       [ ${VERBOSE} ] && echo
+}
diff --git a/scripts/zpios-test/4th-1024rc-4rs-1cs-4off.sh b/scripts/zpios-test/4th-1024rc-4rs-1cs-4off.sh
new file mode 100755 (executable)
index 0000000..0db952c
--- /dev/null
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+# Usage: zpios
+#        --threadcount       -t    =values
+#        --threadcount_low   -l    =value
+#        --threadcount_high  -h    =value
+#        --threadcount_incr  -e    =value
+#        --regioncount       -n    =values
+#        --regioncount_low   -i    =value
+#        --regioncount_high  -j    =value
+#        --regioncount_incr  -k    =value
+#        --offset            -o    =values
+#        --offset_low        -m    =value
+#        --offset_high       -q    =value
+#        --offset_incr       -r    =value
+#        --chunksize         -c    =values
+#        --chunksize_low     -a    =value
+#        --chunksize_high    -b    =value
+#        --chunksize_incr    -g    =value
+#        --regionsize        -s    =values
+#        --regionsize_low    -A    =value
+#        --regionsize_high   -B    =value
+#        --regionsize_incr   -C    =value
+#        --load              -L    =dmuio|ssf|fpp
+#        --pool              -p    =pool name
+#        --name              -M    =test name
+#        --cleanup           -x
+#        --prerun            -P    =pre-command
+#        --postrun           -R    =post-command
+#        --log               -G    =log directory
+#        --regionnoise       -I    =shift
+#        --chunknoise        -N    =bytes
+#        --threaddelay       -T    =jiffies
+#        --verify            -V
+#        --zerocopy          -z
+#        --nowait            -O
+#        --human-readable    -H
+#        --verbose           -v    =increase verbosity
+#        --help              -?    =this help
+
+ZPIOS_CMD="${ZPIOS}                                              \
+       --load=dmuio                                             \
+       --pool=${ZPOOL_NAME}                                     \
+       --name=${ZPOOL_CONFIG}                                   \
+       --threadcount=4                                          \
+       --regioncount=1024                                       \
+       --regionsize=4M                                          \
+       --chunksize=1M                                           \
+       --offset=4M                                              \
+       --cleanup                                                \
+       --human-readable                                         \
+       ${ZPIOS_OPTIONS}"
+
+zpios_start() {
+       if [ ${VERBOSE} ]; then
+               ZPIOS_CMD="${ZPIOS_CMD} --verbose"
+               echo ${ZPIOS_CMD}
+       fi
+
+       ${ZPIOS_CMD} || exit 1
+}
+
+zpios_stop() {
+       [ ${VERBOSE} ] && echo
+}
diff --git a/scripts/zpios-test/large-thread-survey.sh b/scripts/zpios-test/large-thread-survey.sh
new file mode 120000 (symlink)
index 0000000..90b6e3c
--- /dev/null
@@ -0,0 +1 @@
+1x256th-65536rc-4rs-1cs-4off.sh
\ No newline at end of file
diff --git a/scripts/zpios-test/large.sh b/scripts/zpios-test/large.sh
new file mode 120000 (symlink)
index 0000000..b8e22bf
--- /dev/null
@@ -0,0 +1 @@
+256th-65536rc-4rs-1cs-4off.sh
\ No newline at end of file
diff --git a/scripts/zpios-test/medium.sh b/scripts/zpios-test/medium.sh
new file mode 120000 (symlink)
index 0000000..d81027b
--- /dev/null
@@ -0,0 +1 @@
+16th-8192rc-4rs-1cs-4off.sh
\ No newline at end of file
diff --git a/scripts/zpios-test/small.sh b/scripts/zpios-test/small.sh
new file mode 120000 (symlink)
index 0000000..cbf03b5
--- /dev/null
@@ -0,0 +1 @@
+4th-1024rc-4rs-1cs-4off.sh
\ No newline at end of file
diff --git a/scripts/zpios-test/tiny.sh b/scripts/zpios-test/tiny.sh
new file mode 120000 (symlink)
index 0000000..ba8b7cd
--- /dev/null
@@ -0,0 +1 @@
+1th-16rc-4rs-1cs-4off.sh
\ No newline at end of file
diff --git a/scripts/zpios.sh b/scripts/zpios.sh
new file mode 100755 (executable)
index 0000000..e16a58a
--- /dev/null
@@ -0,0 +1,272 @@
+#!/bin/bash
+#
+# Wrapper script for easily running zpios based tests
+#
+
+basedir="$(dirname $0)"
+
+SCRIPT_COMMON=common.sh
+if [ -f "${basedir}/${SCRIPT_COMMON}" ]; then
+. "${basedir}/${SCRIPT_COMMON}"
+else
+echo "Missing helper script ${SCRIPT_COMMON}" && exit 1
+fi
+
+PROG=zpios.sh
+DATE=`date +%Y%m%d-%H%M%S`
+if [ "${ZPIOS_MODULES}" ]; then
+       MODULES=(${ZPIOS_MODULES[*]})
+else
+       MODULES=(zpios)
+fi
+
+usage() {
+cat << EOF
+USAGE:
+$0 [hvp] <-c config> <-t test>
+
+DESCRIPTION:
+        Helper script for easy zpios benchmarking.
+
+OPTIONS:
+        -h      Show this message
+        -v      Verbose
+        -f      Force everything
+        -p      Enable profiling
+        -c      Zpool configuration
+        -t      Zpios test
+        -o      Additional zpios options
+        -l      Additional zpool options
+        -s      Additional zfs options
+
+EOF
+}
+
+print_header() {
+       echo --------------------- ZPIOS RESULTS ----------------------------
+       echo -n "Date: "; date
+       echo -n "Kernel: "; uname -r
+       dmesg | grep "Loaded Solaris Porting Layer" | tail -n1
+       dmesg | grep "Loaded ZFS Filesystem" | tail -n1
+       echo
+}
+
+print_spl_info() {
+       echo --------------------- SPL Tunings ------------------------------
+       ${SYSCTL} -A | grep spl
+
+       if [ -d /sys/module/spl/parameters ]; then
+               grep [0-9] /sys/module/spl/parameters/*
+       else
+               grep [0-9] /sys/module/spl/*
+       fi
+
+       echo
+}
+
+print_zfs_info() {
+       echo --------------------- ZFS Tunings ------------------------------
+       ${SYSCTL} -A | grep zfs
+
+       if [ -d /sys/module/zfs/parameters ]; then
+               grep [0-9] /sys/module/zfs/parameters/*
+       else
+               grep [0-9] /sys/module/zfs/*
+       fi
+
+       echo
+}
+
+print_stats() {
+       echo ---------------------- Statistics -------------------------------
+       ${SYSCTL} -A | grep spl | grep stack_max
+
+       if [ -d /proc/spl/kstat/ ]; then
+               if [ -f /proc/spl/kstat/zfs/arcstats ]; then
+                       echo "* ARC"
+                       cat /proc/spl/kstat/zfs/arcstats
+                       echo
+               fi
+
+               if [ -f /proc/spl/kstat/zfs/vdev_cache_stats ]; then
+                       echo "* VDEV Cache"
+                       cat /proc/spl/kstat/zfs/vdev_cache_stats
+                       echo
+               fi
+       fi
+
+       if [ -f /proc/spl/kmem/slab ]; then
+               echo "* SPL SLAB"
+               cat /proc/spl/kmem/slab
+               echo
+       fi
+
+       echo
+}
+
+check_test() {
+
+       if [ ! -f ${ZPIOS_TEST} ]; then
+               local NAME=`basename ${ZPIOS_TEST} .sh`
+               ERROR="Unknown test '${NAME}', available tests are:\n"
+
+               for TST in `ls ${ZPIOSDIR}/ | grep ".sh"`; do
+                       local NAME=`basename ${TST} .sh`
+                       ERROR="${ERROR}${NAME}\n"
+               done
+
+               return 1
+       fi
+
+       return 0
+}
+
+zpios_profile_config() {
+cat > ${PROFILE_DIR}/zpios-config.sh << EOF
+#
+# Zpios Profiling Configuration
+#
+
+PROFILE_DIR=/tmp/zpios/${ZPOOL_CONFIG}+${ZPIOS_TEST_ARG}+${DATE}
+PROFILE_PRE=${ZPIOSPROFILEDIR}/zpios-profile-pre.sh
+PROFILE_POST=${ZPIOSPROFILEDIR}/zpios-profile-post.sh
+PROFILE_USER=${ZPIOSPROFILEDIR}/zpios-profile.sh
+PROFILE_PIDS=${ZPIOSPROFILEDIR}/zpios-profile-pids.sh
+PROFILE_DISK=${ZPIOSPROFILEDIR}/zpios-profile-disk.sh
+PROFILE_ARC_PROC=/proc/spl/kstat/zfs/arcstats
+PROFILE_VDEV_CACHE_PROC=/proc/spl/kstat/zfs/vdev_cache_stats
+
+OPROFILE_KERNEL="/boot/vmlinux-`uname -r`"
+OPROFILE_KERNEL_DIR="/lib/modules/`uname -r`/kernel/"
+OPROFILE_SPL_DIR=${SPLBUILD}/module/
+OPROFILE_ZFS_DIR=${MODDIR}
+
+EOF
+}
+
+zpios_profile_start() {
+       PROFILE_DIR=/tmp/zpios/${ZPOOL_CONFIG}+${ZPIOS_TEST_ARG}+${DATE}
+
+       mkdir -p ${PROFILE_DIR}
+       zpios_profile_config
+       . ${PROFILE_DIR}/zpios-config.sh
+
+       ZPIOS_OPTIONS="${ZPIOS_OPTIONS} --log=${PROFILE_DIR}"
+       ZPIOS_OPTIONS="${ZPIOS_OPTIONS} --prerun=${PROFILE_PRE}"
+       ZPIOS_OPTIONS="${ZPIOS_OPTIONS} --postrun=${PROFILE_POST}"
+
+       /usr/bin/opcontrol --init
+       /usr/bin/opcontrol --setup --vmlinux=${OPROFILE_KERNEL}
+}
+
+zpios_profile_stop() {
+       /usr/bin/opcontrol --shutdown
+       /usr/bin/opcontrol --deinit
+}
+
+PROFILE=
+ZPOOL_CONFIG=zpool-config.sh
+ZPIOS_TEST=zpios-test.sh
+ZPOOL_NAME=zpios
+ZPIOS_OPTIONS=
+ZPOOL_OPTIONS=""
+ZFS_OPTIONS=""
+
+while getopts 'hvfpc:t:o:l:s:' OPTION; do
+       case $OPTION in
+       h)
+               usage
+               exit 1
+               ;;
+       v)
+               VERBOSE=1
+               VERBOSE_FLAG="-v"
+               ;;
+       f)
+               FORCE=1
+               FORCE_FLAG="-f"
+               ;;
+       p)
+               PROFILE=1
+               ;;
+       c)
+               ZPOOL_CONFIG=${OPTARG}
+               ;;
+       t)
+               ZPIOS_TEST_ARG=${OPTARG}
+               ZPIOS_TEST=${ZPIOSDIR}/${OPTARG}.sh
+               ;;
+       o)
+               ZPIOS_OPTIONS=${OPTARG}
+               ;;
+       l)      # Passed through to zpool-create.sh 
+               ZPOOL_OPTIONS=${OPTARG}
+               ;;
+       s)      # Passed through to zpool-create.sh
+               ZFS_OPTIONS=${OPTARG}
+               ;;
+       ?)
+               usage
+               exit
+               ;;
+       esac
+done
+
+if [ $(id -u) != 0 ]; then
+        die "Must run as root"
+fi
+
+# Validate and source your test config
+check_test || die "${ERROR}"
+. ${ZPIOS_TEST}
+
+# Pull in the zpios test module is not loaded.  If this fails it is
+# likely because the full module stack was not yet loaded with zfs.sh
+if check_modules; then
+       if ! load_modules; then
+               die "Run 'zfs.sh' to ensure the full module stack is loaded"
+       fi
+fi
+
+# Wait for device creation
+while [ ! -c /dev/zpios ]; do
+       sleep 1
+done
+
+if [ ${VERBOSE} ]; then
+       print_header
+       print_spl_info
+       print_zfs_info
+fi
+
+# Create the zpool configuration
+${ZPOOL_CREATE_SH} ${VERBOSE_FLAG} ${FORCE_FLAG} \
+       -p ${ZPOOL_NAME} -c ${ZPOOL_CONFIG} \
+       -l "${ZPOOL_OPTIONS}" -s "${ZFS_OPTIONS}" || exit 1
+
+if [ ${PROFILE} ]; then
+       zpios_profile_start
+fi
+
+zpios_start
+zpios_stop
+
+if [ ${PROFILE} ]; then
+       zpios_profile_stop
+fi
+
+if [ ${VERBOSE} ]; then
+       print_stats
+fi
+
+# Destroy the zpool configuration
+${ZPOOL_CREATE_SH} ${VERBOSE_FLAG} ${FORCE_FLAG} \
+       -p ${ZPOOL_NAME} -c ${ZPOOL_CONFIG} -d || exit 1
+
+# Unload the test module stack and wait for device removal
+unload_modules
+while [ -c /dev/zpios ]; do
+       sleep 1
+done
+
+exit 0