Add -p switch to "zpool get"
[zfs.git] / module / zpios / pios.c
1 /*****************************************************************************\
2  *  ZPIOS is a heavily modified version of the original PIOS test code.
3  *  It is designed to have the test code running in the Linux kernel
4  *  against ZFS while still being flexibly controled from user space.
5  *
6  *  Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
7  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
9  *  LLNL-CODE-403049
10  *
11  *  Original PIOS Test Code
12  *  Copyright (C) 2004 Cluster File Systems, Inc.
13  *  Written by Peter Braam <braam@clusterfs.com>
14  *             Atul Vidwansa <atul@clusterfs.com>
15  *             Milind Dumbare <milind@clusterfs.com>
16  *
17  *  This file is part of ZFS on Linux.
18  *  For details, see <http://zfsonlinux.org/>.
19  *
20  *  ZPIOS is free software; you can redistribute it and/or modify it
21  *  under the terms of the GNU General Public License as published by the
22  *  Free Software Foundation; either version 2 of the License, or (at your
23  *  option) any later version.
24  *
25  *  ZPIOS is distributed in the hope that it will be useful, but WITHOUT
26  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
27  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
28  *  for more details.
29  *
30  *  You should have received a copy of the GNU General Public License along
31  *  with ZPIOS.  If not, see <http://www.gnu.org/licenses/>.
32 \*****************************************************************************/
33
34 #include <sys/zfs_context.h>
35 #include <sys/dmu.h>
36 #include <sys/txg.h>
37 #include <linux/cdev.h>
38 #include "zpios-internal.h"
39
40
41 static spl_class *zpios_class;
42 static spl_device *zpios_device;
43 static char *zpios_tag = "zpios_tag";
44
45 static
46 int zpios_upcall(char *path, char *phase, run_args_t *run_args, int rc)
47 {
48         /* This is stack heavy but it should be OK since we are only
49          * making the upcall between tests when the stack is shallow.
50          */
51         char id[16], chunk_size[16], region_size[16], thread_count[16];
52         char region_count[16], offset[16], region_noise[16], chunk_noise[16];
53         char thread_delay[16], flags[16], result[8];
54         char *argv[16], *envp[4];
55
56         if ((path == NULL) || (strlen(path) == 0))
57                 return -ENOENT;
58
59         snprintf(id, 15, "%d", run_args->id);
60         snprintf(chunk_size, 15, "%lu", (long unsigned)run_args->chunk_size);
61         snprintf(region_size, 15, "%lu",(long unsigned) run_args->region_size);
62         snprintf(thread_count, 15, "%u", run_args->thread_count);
63         snprintf(region_count, 15, "%u", run_args->region_count);
64         snprintf(offset, 15, "%lu", (long unsigned)run_args->offset);
65         snprintf(region_noise, 15, "%u", run_args->region_noise);
66         snprintf(chunk_noise, 15, "%u", run_args->chunk_noise);
67         snprintf(thread_delay, 15, "%u", run_args->thread_delay);
68         snprintf(flags, 15, "0x%x", run_args->flags);
69         snprintf(result, 7, "%d", rc);
70
71         /* Passing 15 args to registered pre/post upcall */
72         argv[0] = path;
73         argv[1] = phase;
74         argv[2] = strlen(run_args->log) ? run_args->log : "<none>";
75         argv[3] = id;
76         argv[4] = run_args->pool;
77         argv[5] = chunk_size;
78         argv[6] = region_size;
79         argv[7] = thread_count;
80         argv[8] = region_count;
81         argv[9] = offset;
82         argv[10] = region_noise;
83         argv[11] = chunk_noise;
84         argv[12] = thread_delay;
85         argv[13] = flags;
86         argv[14] = result;
87         argv[15] = NULL;
88
89         /* Passing environment for user space upcall */
90         envp[0] = "HOME=/";
91         envp[1] = "TERM=linux";
92         envp[2] = "PATH=/sbin:/usr/sbin:/bin:/usr/bin";
93         envp[3] = NULL;
94
95         return call_usermodehelper(path, argv, envp, UMH_WAIT_PROC);
96 }
97
98 static uint64_t
99 zpios_dmu_object_create(run_args_t *run_args, objset_t *os)
100 {
101         struct dmu_tx *tx;
102         uint64_t obj = 0ULL;
103         int rc;
104
105         tx = dmu_tx_create(os);
106         dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, OBJ_SIZE);
107         rc = dmu_tx_assign(tx, TXG_WAIT);
108         if (rc) {
109                 zpios_print(run_args->file,
110                             "dmu_tx_assign() failed: %d\n", rc);
111                 dmu_tx_abort(tx);
112                 return obj;
113         }
114
115         obj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
116                                DMU_OT_NONE, 0, tx);
117         rc = dmu_object_set_blocksize(os, obj, 128ULL << 10, 0, tx);
118         if (rc) {
119                 zpios_print(run_args->file,
120                             "dmu_object_set_blocksize() failed: %d\n", rc);
121                 dmu_tx_abort(tx);
122                 return obj;
123         }
124
125         dmu_tx_commit(tx);
126
127         return obj;
128 }
129
130 static int
131 zpios_dmu_object_free(run_args_t *run_args, objset_t *os, uint64_t obj)
132 {
133         struct dmu_tx *tx;
134         int rc;
135
136         tx = dmu_tx_create(os);
137         dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END);
138         rc = dmu_tx_assign(tx, TXG_WAIT);
139         if (rc) {
140                 zpios_print(run_args->file,
141                             "dmu_tx_assign() failed: %d\n", rc);
142                 dmu_tx_abort(tx);
143                 return rc;
144         }
145
146         rc = dmu_object_free(os, obj, tx);
147         if (rc) {
148                 zpios_print(run_args->file,
149                             "dmu_object_free() failed: %d\n", rc);
150                 dmu_tx_abort(tx);
151                 return rc;
152         }
153
154         dmu_tx_commit(tx);
155
156         return 0;
157 }
158
159 static int
160 zpios_dmu_setup(run_args_t *run_args)
161 {
162         zpios_time_t *t = &(run_args->stats.cr_time);
163         objset_t *os;
164         char name[32];
165         uint64_t obj = 0ULL;
166         int i, rc = 0, rc2;
167
168         (void)zpios_upcall(run_args->pre, PHASE_PRE_CREATE, run_args, 0);
169         t->start = zpios_timespec_now();
170
171         (void)snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id);
172         rc = dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL);
173         if (rc) {
174                 zpios_print(run_args->file, "Error dmu_objset_create(%s, ...) "
175                             "failed: %d\n", name, rc);
176                 goto out;
177         }
178
179         rc = dmu_objset_own(name, DMU_OST_OTHER, 0, zpios_tag, &os);
180         if (rc) {
181                 zpios_print(run_args->file, "Error dmu_objset_own(%s, ...) "
182                             "failed: %d\n", name, rc);
183                 goto out_destroy;
184         }
185
186         if (!(run_args->flags & DMU_FPP)) {
187                 obj = zpios_dmu_object_create(run_args, os);
188                 if (obj == 0) {
189                         rc = -EBADF;
190                         zpios_print(run_args->file, "Error zpios_dmu_"
191                                     "object_create() failed, %d\n", rc);
192                         goto out_destroy;
193                 }
194         }
195
196         for (i = 0; i < run_args->region_count; i++) {
197                 zpios_region_t *region;
198
199                 region = &run_args->regions[i];
200                 mutex_init(&region->lock, NULL, MUTEX_DEFAULT, NULL);
201
202                 if (run_args->flags & DMU_FPP) {
203                         /* File per process */
204                         region->obj.os  = os;
205                         region->obj.obj = zpios_dmu_object_create(run_args, os);
206                         ASSERT(region->obj.obj > 0); /* XXX - Handle this */
207                         region->wr_offset   = run_args->offset;
208                         region->rd_offset   = run_args->offset;
209                         region->init_offset = run_args->offset;
210                         region->max_offset  = run_args->offset +
211                                               run_args->region_size;
212                 } else {
213                         /* Single shared file */
214                         region->obj.os  = os;
215                         region->obj.obj = obj;
216                         region->wr_offset   = run_args->offset * i;
217                         region->rd_offset   = run_args->offset * i;
218                         region->init_offset = run_args->offset * i;
219                         region->max_offset  = run_args->offset *
220                                               i + run_args->region_size;
221                 }
222         }
223
224         run_args->os = os;
225 out_destroy:
226         if (rc) {
227                 rc2 = dmu_objset_destroy(name, B_FALSE);
228                 if (rc2)
229                         zpios_print(run_args->file, "Error dmu_objset_destroy"
230                                     "(%s, ...) failed: %d\n", name, rc2);
231         }
232 out:
233         t->stop  = zpios_timespec_now();
234         t->delta = zpios_timespec_sub(t->stop, t->start);
235         (void)zpios_upcall(run_args->post, PHASE_POST_CREATE, run_args, rc);
236
237         return rc;
238 }
239
240 static int
241 zpios_setup_run(run_args_t **run_args, zpios_cmd_t *kcmd, struct file *file)
242 {
243         run_args_t *ra;
244         int rc, size;
245
246         size = sizeof(*ra) + kcmd->cmd_region_count * sizeof(zpios_region_t);
247
248         ra = vmem_zalloc(size, KM_SLEEP);
249         if (ra == NULL) {
250                 zpios_print(file, "Unable to vmem_zalloc() %d bytes "
251                             "for regions\n", size);
252                 return -ENOMEM;
253         }
254
255         *run_args = ra;
256         strncpy(ra->pool, kcmd->cmd_pool, ZPIOS_NAME_SIZE - 1);
257         strncpy(ra->pre, kcmd->cmd_pre, ZPIOS_PATH_SIZE - 1);
258         strncpy(ra->post, kcmd->cmd_post, ZPIOS_PATH_SIZE - 1);
259         strncpy(ra->log, kcmd->cmd_log, ZPIOS_PATH_SIZE - 1);
260         ra->id              = kcmd->cmd_id;
261         ra->chunk_size      = kcmd->cmd_chunk_size;
262         ra->thread_count    = kcmd->cmd_thread_count;
263         ra->region_count    = kcmd->cmd_region_count;
264         ra->region_size     = kcmd->cmd_region_size;
265         ra->offset          = kcmd->cmd_offset;
266         ra->region_noise    = kcmd->cmd_region_noise;
267         ra->chunk_noise     = kcmd->cmd_chunk_noise;
268         ra->thread_delay    = kcmd->cmd_thread_delay;
269         ra->flags           = kcmd->cmd_flags;
270         ra->stats.wr_data   = 0;
271         ra->stats.wr_chunks = 0;
272         ra->stats.rd_data   = 0;
273         ra->stats.rd_chunks = 0;
274         ra->region_next     = 0;
275         ra->file            = file;
276         mutex_init(&ra->lock_work, NULL, MUTEX_DEFAULT, NULL);
277         mutex_init(&ra->lock_ctl, NULL, MUTEX_DEFAULT, NULL);
278
279         (void)zpios_upcall(ra->pre, PHASE_PRE_RUN, ra, 0);
280
281         rc = zpios_dmu_setup(ra);
282         if (rc) {
283                 mutex_destroy(&ra->lock_ctl);
284                 mutex_destroy(&ra->lock_work);
285                 vmem_free(ra, size);
286                 *run_args = NULL;
287         }
288
289         return rc;
290 }
291
292 static int
293 zpios_get_work_item(run_args_t *run_args, dmu_obj_t *obj, __u64 *offset,
294                     __u32 *chunk_size, zpios_region_t **region, __u32 flags)
295 {
296         int i, j, count = 0;
297         unsigned int random_int;
298
299         get_random_bytes(&random_int, sizeof(unsigned int));
300
301         mutex_enter(&run_args->lock_work);
302         i = run_args->region_next;
303
304         /* XXX: I don't much care for this chunk selection mechansim
305          * there's the potential to burn a lot of time here doing nothing
306          * useful while holding the global lock.  This could give some
307          * misleading performance results.  I'll fix it latter.
308          */
309         while (count < run_args->region_count) {
310                 __u64 *rw_offset;
311                 zpios_time_t *rw_time;
312
313                 j = i % run_args->region_count;
314                 *region = &(run_args->regions[j]);
315
316                 if (flags & DMU_WRITE) {
317                         rw_offset = &((*region)->wr_offset);
318                         rw_time = &((*region)->stats.wr_time);
319                 } else {
320                         rw_offset = &((*region)->rd_offset);
321                         rw_time = &((*region)->stats.rd_time);
322                 }
323
324                 /* test if region is fully written */
325                 if (*rw_offset + *chunk_size > (*region)->max_offset) {
326                         i++;
327                         count++;
328
329                         if (unlikely(rw_time->stop.ts_sec == 0) &&
330                             unlikely(rw_time->stop.ts_nsec == 0))
331                                 rw_time->stop = zpios_timespec_now();
332
333                         continue;
334                 }
335
336                 *offset = *rw_offset;
337                 *obj = (*region)->obj;
338                 *rw_offset += *chunk_size;
339
340                 /* update ctl structure */
341                 if (run_args->region_noise) {
342                         get_random_bytes(&random_int, sizeof(unsigned int));
343                         run_args->region_next += random_int % run_args->region_noise;
344                 } else {
345                         run_args->region_next++;
346                 }
347
348                 mutex_exit(&run_args->lock_work);
349                 return 1;
350         }
351
352         /* nothing left to do */
353         mutex_exit(&run_args->lock_work);
354
355         return 0;
356 }
357
358 static void
359 zpios_remove_objset(run_args_t *run_args)
360 {
361         zpios_time_t *t = &(run_args->stats.rm_time);
362         zpios_region_t *region;
363         char name[32];
364         int rc = 0, i;
365
366         (void)zpios_upcall(run_args->pre, PHASE_PRE_REMOVE, run_args, 0);
367         t->start = zpios_timespec_now();
368
369         (void)snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id);
370
371         if (run_args->flags & DMU_REMOVE) {
372                 if (run_args->flags & DMU_FPP) {
373                         for (i = 0; i < run_args->region_count; i++) {
374                                 region = &run_args->regions[i];
375                                 rc = zpios_dmu_object_free(run_args,
376                                                            region->obj.os,
377                                                            region->obj.obj);
378                                 if (rc)
379                                         zpios_print(run_args->file, "Error "
380                                                     "removing object %d, %d\n",
381                                                     (int)region->obj.obj, rc);
382                         }
383                 } else {
384                         region = &run_args->regions[0];
385                         rc = zpios_dmu_object_free(run_args,
386                                                    region->obj.os,
387                                                    region->obj.obj);
388                         if (rc)
389                                 zpios_print(run_args->file, "Error "
390                                             "removing object %d, %d\n",
391                                             (int)region->obj.obj, rc);
392                 }
393         }
394
395         dmu_objset_disown(run_args->os, zpios_tag);
396
397         if (run_args->flags & DMU_REMOVE) {
398                 rc = dmu_objset_destroy(name, B_FALSE);
399                 if (rc)
400                         zpios_print(run_args->file, "Error dmu_objset_destroy"
401                                     "(%s, ...) failed: %d\n", name, rc);
402         }
403
404         t->stop  = zpios_timespec_now();
405         t->delta = zpios_timespec_sub(t->stop, t->start);
406         (void)zpios_upcall(run_args->post, PHASE_POST_REMOVE, run_args, rc);
407 }
408
409 static void
410 zpios_cleanup_run(run_args_t *run_args)
411 {
412         int i, size = 0;
413
414         if (run_args == NULL)
415                 return;
416
417         if (run_args->threads != NULL) {
418                 for (i = 0; i < run_args->thread_count; i++) {
419                         if (run_args->threads[i]) {
420                                 mutex_destroy(&run_args->threads[i]->lock);
421                                 kmem_free(run_args->threads[i],
422                                           sizeof(thread_data_t));
423                         }
424                 }
425
426                 kmem_free(run_args->threads,
427                           sizeof(thread_data_t *) * run_args->thread_count);
428         }
429
430         for (i = 0; i < run_args->region_count; i++)
431                 mutex_destroy(&run_args->regions[i].lock);
432
433         mutex_destroy(&run_args->lock_work);
434         mutex_destroy(&run_args->lock_ctl);
435         size = run_args->region_count * sizeof(zpios_region_t);
436
437         vmem_free(run_args, sizeof(*run_args) + size);
438 }
439
440 static int
441 zpios_dmu_write(run_args_t *run_args, objset_t *os, uint64_t object,
442                 uint64_t offset, uint64_t size, const void *buf)
443 {
444         struct dmu_tx *tx;
445         int rc, how = TXG_WAIT;
446 //      int flags = 0;
447
448         if (run_args->flags & DMU_WRITE_NOWAIT)
449                 how = TXG_NOWAIT;
450
451         while (1) {
452                 tx = dmu_tx_create(os);
453                 dmu_tx_hold_write(tx, object, offset, size);
454                 rc = dmu_tx_assign(tx, how);
455
456                 if (rc) {
457                         if (rc == ERESTART && how == TXG_NOWAIT) {
458                                 dmu_tx_wait(tx);
459                                 dmu_tx_abort(tx);
460                                 continue;
461                         }
462                         zpios_print(run_args->file,
463                                     "Error in dmu_tx_assign(), %d", rc);
464                         dmu_tx_abort(tx);
465                         return rc;
466                 }
467                 break;
468         }
469
470 //      if (run_args->flags & DMU_WRITE_ZC)
471 //              flags |= DMU_WRITE_ZEROCOPY;
472
473         dmu_write(os, object, offset, size, buf, tx);
474         dmu_tx_commit(tx);
475
476         return 0;
477 }
478
479 static int
480 zpios_dmu_read(run_args_t *run_args, objset_t *os, uint64_t object,
481                uint64_t offset, uint64_t size, void *buf)
482 {
483         int flags = 0;
484
485 //      if (run_args->flags & DMU_READ_ZC)
486 //              flags |= DMU_READ_ZEROCOPY;
487
488         if (run_args->flags & DMU_READ_NOPF)
489                 flags |= DMU_READ_NO_PREFETCH;
490
491         return dmu_read(os, object, offset, size, buf, flags);
492 }
493
494 static int
495 zpios_thread_main(void *data)
496 {
497         thread_data_t *thr = (thread_data_t *)data;
498         run_args_t *run_args = thr->run_args;
499         zpios_time_t t;
500         dmu_obj_t obj;
501         __u64 offset;
502         __u32 chunk_size;
503         zpios_region_t *region;
504         char *buf;
505         unsigned int random_int;
506         int chunk_noise = run_args->chunk_noise;
507         int chunk_noise_tmp = 0;
508         int thread_delay = run_args->thread_delay;
509         int thread_delay_tmp = 0;
510         int i, rc = 0;
511
512         if (chunk_noise) {
513                 get_random_bytes(&random_int, sizeof(unsigned int));
514                 chunk_noise_tmp = (random_int % (chunk_noise * 2))-chunk_noise;
515         }
516
517         /* It's OK to vmem_alloc() this memory because it will be copied
518          * in to the slab and pointers to the slab copy will be setup in
519          * the bio when the IO is submitted.  This of course is not ideal
520          * since we want a zero-copy IO path if possible.  It would be nice
521          * to have direct access to those slab entries.
522          */
523         chunk_size = run_args->chunk_size + chunk_noise_tmp;
524         buf = (char *)vmem_alloc(chunk_size, KM_SLEEP);
525         ASSERT(buf);
526
527         /* Trivial data verification pattern for now. */
528         if (run_args->flags & DMU_VERIFY)
529                 memset(buf, 'z', chunk_size);
530
531         /* Write phase */
532         mutex_enter(&thr->lock);
533         thr->stats.wr_time.start = zpios_timespec_now();
534         mutex_exit(&thr->lock);
535
536         while (zpios_get_work_item(run_args, &obj, &offset,
537                                    &chunk_size, &region, DMU_WRITE)) {
538                 if (thread_delay) {
539                         get_random_bytes(&random_int, sizeof(unsigned int));
540                         thread_delay_tmp = random_int % thread_delay;
541                         set_current_state(TASK_UNINTERRUPTIBLE);
542                         schedule_timeout(thread_delay_tmp); /* In jiffies */
543                 }
544
545                 t.start = zpios_timespec_now();
546                 rc = zpios_dmu_write(run_args, obj.os, obj.obj,
547                                      offset, chunk_size, buf);
548                 t.stop  = zpios_timespec_now();
549                 t.delta = zpios_timespec_sub(t.stop, t.start);
550
551                 if (rc) {
552                         zpios_print(run_args->file, "IO error while doing "
553                                     "dmu_write(): %d\n", rc);
554                         break;
555                 }
556
557                 mutex_enter(&thr->lock);
558                 thr->stats.wr_data += chunk_size;
559                 thr->stats.wr_chunks++;
560                 thr->stats.wr_time.delta = zpios_timespec_add(
561                         thr->stats.wr_time.delta, t.delta);
562                 mutex_exit(&thr->lock);
563
564                 mutex_enter(&region->lock);
565                 region->stats.wr_data += chunk_size;
566                 region->stats.wr_chunks++;
567                 region->stats.wr_time.delta = zpios_timespec_add(
568                         region->stats.wr_time.delta, t.delta);
569
570                 /* First time region was accessed */
571                 if (region->init_offset == offset)
572                         region->stats.wr_time.start = t.start;
573
574                 mutex_exit(&region->lock);
575         }
576
577         mutex_enter(&run_args->lock_ctl);
578         run_args->threads_done++;
579         mutex_exit(&run_args->lock_ctl);
580
581         mutex_enter(&thr->lock);
582         thr->rc = rc;
583         thr->stats.wr_time.stop = zpios_timespec_now();
584         mutex_exit(&thr->lock);
585         wake_up(&run_args->waitq);
586
587         set_current_state(TASK_UNINTERRUPTIBLE);
588         schedule();
589
590         /* Check if we should exit */
591         mutex_enter(&thr->lock);
592         rc = thr->rc;
593         mutex_exit(&thr->lock);
594         if (rc)
595                 goto out;
596
597         /* Read phase */
598         mutex_enter(&thr->lock);
599         thr->stats.rd_time.start = zpios_timespec_now();
600         mutex_exit(&thr->lock);
601
602         while (zpios_get_work_item(run_args, &obj, &offset,
603                                    &chunk_size, &region, DMU_READ)) {
604                 if (thread_delay) {
605                         get_random_bytes(&random_int, sizeof(unsigned int));
606                         thread_delay_tmp = random_int % thread_delay;
607                         set_current_state(TASK_UNINTERRUPTIBLE);
608                         schedule_timeout(thread_delay_tmp); /* In jiffies */
609                 }
610
611                 if (run_args->flags & DMU_VERIFY)
612                         memset(buf, 0, chunk_size);
613
614                 t.start = zpios_timespec_now();
615                 rc = zpios_dmu_read(run_args, obj.os, obj.obj,
616                                     offset, chunk_size, buf);
617                 t.stop  = zpios_timespec_now();
618                 t.delta = zpios_timespec_sub(t.stop, t.start);
619
620                 if (rc) {
621                         zpios_print(run_args->file, "IO error while doing "
622                                     "dmu_read(): %d\n", rc);
623                         break;
624                 }
625
626                 /* Trivial data verification, expensive! */
627                 if (run_args->flags & DMU_VERIFY) {
628                         for (i = 0; i < chunk_size; i++) {
629                                 if (buf[i] != 'z') {
630                                         zpios_print(run_args->file,
631                                                     "IO verify error: %d/%d/%d\n",
632                                                     (int)obj.obj, (int)offset,
633                                                     (int)chunk_size);
634                                         break;
635                                 }
636                         }
637                 }
638
639                 mutex_enter(&thr->lock);
640                 thr->stats.rd_data += chunk_size;
641                 thr->stats.rd_chunks++;
642                 thr->stats.rd_time.delta = zpios_timespec_add(
643                         thr->stats.rd_time.delta, t.delta);
644                 mutex_exit(&thr->lock);
645
646                 mutex_enter(&region->lock);
647                 region->stats.rd_data += chunk_size;
648                 region->stats.rd_chunks++;
649                 region->stats.rd_time.delta = zpios_timespec_add(
650                         region->stats.rd_time.delta, t.delta);
651
652                 /* First time region was accessed */
653                 if (region->init_offset == offset)
654                         region->stats.rd_time.start = t.start;
655
656                 mutex_exit(&region->lock);
657         }
658
659         mutex_enter(&run_args->lock_ctl);
660         run_args->threads_done++;
661         mutex_exit(&run_args->lock_ctl);
662
663         mutex_enter(&thr->lock);
664         thr->rc = rc;
665         thr->stats.rd_time.stop = zpios_timespec_now();
666         mutex_exit(&thr->lock);
667         wake_up(&run_args->waitq);
668
669 out:
670         vmem_free(buf, chunk_size);
671         do_exit(0);
672
673         return rc; /* Unreachable, due to do_exit() */
674 }
675
676 static int
677 zpios_thread_done(run_args_t *run_args)
678 {
679         ASSERT(run_args->threads_done <= run_args->thread_count);
680         return (run_args->threads_done == run_args->thread_count);
681 }
682
683 static int
684 zpios_threads_run(run_args_t *run_args)
685 {
686         struct task_struct *tsk, **tsks;
687         thread_data_t *thr = NULL;
688         zpios_time_t *tt = &(run_args->stats.total_time);
689         zpios_time_t *tw = &(run_args->stats.wr_time);
690         zpios_time_t *tr = &(run_args->stats.rd_time);
691         int i, rc = 0, tc = run_args->thread_count;
692
693         tsks = kmem_zalloc(sizeof(struct task_struct *) * tc, KM_SLEEP);
694         if (tsks == NULL) {
695                 rc = -ENOMEM;
696                 goto cleanup2;
697         }
698
699         run_args->threads = kmem_zalloc(sizeof(thread_data_t *) * tc, KM_SLEEP);
700         if (run_args->threads == NULL) {
701                 rc = -ENOMEM;
702                 goto cleanup;
703         }
704
705         init_waitqueue_head(&run_args->waitq);
706         run_args->threads_done = 0;
707
708         /* Create all the needed threads which will sleep until awoken */
709         for (i = 0; i < tc; i++) {
710                 thr = kmem_zalloc(sizeof(thread_data_t), KM_SLEEP);
711                 if (thr == NULL) {
712                         rc = -ENOMEM;
713                         goto taskerr;
714                 }
715
716                 thr->thread_no = i;
717                 thr->run_args = run_args;
718                 thr->rc = 0;
719                 mutex_init(&thr->lock, NULL, MUTEX_DEFAULT, NULL);
720                 run_args->threads[i] = thr;
721
722                 tsk = kthread_create(zpios_thread_main, (void *)thr,
723                                      "%s/%d", "zpios_io", i);
724                 if (IS_ERR(tsk)) {
725                         rc = -EINVAL;
726                         goto taskerr;
727                 }
728
729                 tsks[i] = tsk;
730         }
731
732         tt->start = zpios_timespec_now();
733
734         /* Wake up all threads for write phase */
735         (void)zpios_upcall(run_args->pre, PHASE_PRE_WRITE, run_args, 0);
736         for (i = 0; i < tc; i++)
737                 wake_up_process(tsks[i]);
738
739         /* Wait for write phase to complete */
740         tw->start = zpios_timespec_now();
741         wait_event(run_args->waitq, zpios_thread_done(run_args));
742         tw->stop = zpios_timespec_now();
743         (void)zpios_upcall(run_args->post, PHASE_POST_WRITE, run_args, rc);
744
745         for (i = 0; i < tc; i++) {
746                 thr = run_args->threads[i];
747
748                 mutex_enter(&thr->lock);
749
750                 if (!rc && thr->rc)
751                         rc = thr->rc;
752
753                 run_args->stats.wr_data += thr->stats.wr_data;
754                 run_args->stats.wr_chunks += thr->stats.wr_chunks;
755                 mutex_exit(&thr->lock);
756         }
757
758         if (rc) {
759                 /* Wake up all threads and tell them to exit */
760                 for (i = 0; i < tc; i++) {
761                         mutex_enter(&thr->lock);
762                         thr->rc = rc;
763                         mutex_exit(&thr->lock);
764
765                         wake_up_process(tsks[i]);
766                 }
767                 goto out;
768         }
769
770         mutex_enter(&run_args->lock_ctl);
771         ASSERT(run_args->threads_done == run_args->thread_count);
772         run_args->threads_done = 0;
773         mutex_exit(&run_args->lock_ctl);
774
775         /* Wake up all threads for read phase */
776         (void)zpios_upcall(run_args->pre, PHASE_PRE_READ, run_args, 0);
777         for (i = 0; i < tc; i++)
778                 wake_up_process(tsks[i]);
779
780         /* Wait for read phase to complete */
781         tr->start = zpios_timespec_now();
782         wait_event(run_args->waitq, zpios_thread_done(run_args));
783         tr->stop = zpios_timespec_now();
784         (void)zpios_upcall(run_args->post, PHASE_POST_READ, run_args, rc);
785
786         for (i = 0; i < tc; i++) {
787                 thr = run_args->threads[i];
788
789                 mutex_enter(&thr->lock);
790
791                 if (!rc && thr->rc)
792                         rc = thr->rc;
793
794                 run_args->stats.rd_data += thr->stats.rd_data;
795                 run_args->stats.rd_chunks += thr->stats.rd_chunks;
796                 mutex_exit(&thr->lock);
797         }
798 out:
799         tt->stop  = zpios_timespec_now();
800         tt->delta = zpios_timespec_sub(tt->stop, tt->start);
801         tw->delta = zpios_timespec_sub(tw->stop, tw->start);
802         tr->delta = zpios_timespec_sub(tr->stop, tr->start);
803
804 cleanup:
805         kmem_free(tsks, sizeof(struct task_struct *) * tc);
806 cleanup2:
807         /* Returns first encountered thread error (if any) */
808         return rc;
809
810 taskerr:
811         /* Destroy all threads that were created successfully */
812         for (i = 0; i < tc; i++)
813                 if (tsks[i] != NULL)
814                         (void) kthread_stop(tsks[i]);
815
816         goto cleanup;
817 }
818
819 static int
820 zpios_do_one_run(struct file *file, zpios_cmd_t *kcmd,
821                  int data_size, void *data)
822 {
823         run_args_t *run_args = { 0 };
824         zpios_stats_t *stats = (zpios_stats_t *)data;
825         int i, n, m, size, rc;
826
827         if ((!kcmd->cmd_chunk_size) || (!kcmd->cmd_region_size) ||
828             (!kcmd->cmd_thread_count) || (!kcmd->cmd_region_count)) {
829                 zpios_print(file, "Invalid chunk_size, region_size, "
830                             "thread_count, or region_count, %d\n", -EINVAL);
831                 return -EINVAL;
832         }
833
834         if (!(kcmd->cmd_flags & DMU_WRITE) ||
835             !(kcmd->cmd_flags & DMU_READ)) {
836                 zpios_print(file, "Invalid flags, minimally DMU_WRITE "
837                             "and DMU_READ must be set, %d\n", -EINVAL);
838                 return -EINVAL;
839         }
840
841         if ((kcmd->cmd_flags & (DMU_WRITE_ZC | DMU_READ_ZC)) &&
842             (kcmd->cmd_flags & DMU_VERIFY)) {
843                 zpios_print(file, "Invalid flags, DMU_*_ZC incompatible "
844                             "with DMU_VERIFY, used for performance analysis "
845                             "only, %d\n", -EINVAL);
846                 return -EINVAL;
847         }
848
849         /* Opaque data on return contains structs of the following form:
850          *
851          * zpios_stat_t stats[];
852          * stats[0]     = run_args->stats;
853          * stats[1-N]   = threads[N]->stats;
854          * stats[N+1-M] = regions[M]->stats;
855          *
856          * Where N is the number of threads, and M is the number of regions.
857          */
858         size = (sizeof(zpios_stats_t) +
859                (kcmd->cmd_thread_count * sizeof(zpios_stats_t)) +
860                (kcmd->cmd_region_count * sizeof(zpios_stats_t)));
861         if (data_size < size) {
862                 zpios_print(file, "Invalid size, command data buffer "
863                             "size too small, (%d < %d)\n", data_size, size);
864                 return -ENOSPC;
865         }
866
867         rc = zpios_setup_run(&run_args, kcmd, file);
868         if (rc)
869                 return rc;
870
871         rc = zpios_threads_run(run_args);
872         zpios_remove_objset(run_args);
873         if (rc)
874                 goto cleanup;
875
876         if (stats) {
877                 n = 1;
878                 m = 1 + kcmd->cmd_thread_count;
879                 stats[0] = run_args->stats;
880
881                 for (i = 0; i < kcmd->cmd_thread_count; i++)
882                         stats[n+i] = run_args->threads[i]->stats;
883
884                 for (i = 0; i < kcmd->cmd_region_count; i++)
885                         stats[m+i] = run_args->regions[i].stats;
886         }
887
888 cleanup:
889         zpios_cleanup_run(run_args);
890
891         (void)zpios_upcall(kcmd->cmd_post, PHASE_POST_RUN, run_args, 0);
892
893         return rc;
894 }
895
896 static int
897 zpios_open(struct inode *inode, struct file *file)
898 {
899         unsigned int minor = iminor(inode);
900         zpios_info_t *info;
901
902         if (minor >= ZPIOS_MINORS)
903                 return -ENXIO;
904
905         info = (zpios_info_t *)kmem_alloc(sizeof(*info), KM_SLEEP);
906         if (info == NULL)
907                 return -ENOMEM;
908
909         spin_lock_init(&info->info_lock);
910         info->info_size = ZPIOS_INFO_BUFFER_SIZE;
911         info->info_buffer = (char *)vmem_alloc(ZPIOS_INFO_BUFFER_SIZE,KM_SLEEP);
912         if (info->info_buffer == NULL) {
913                 kmem_free(info, sizeof(*info));
914                 return -ENOMEM;
915         }
916
917         info->info_head = info->info_buffer;
918         file->private_data = (void *)info;
919
920         return 0;
921 }
922
923 static int
924 zpios_release(struct inode *inode, struct file *file)
925 {
926         unsigned int minor = iminor(inode);
927         zpios_info_t *info = (zpios_info_t *)file->private_data;
928
929         if (minor >= ZPIOS_MINORS)
930                 return -ENXIO;
931
932         ASSERT(info);
933         ASSERT(info->info_buffer);
934
935         vmem_free(info->info_buffer, ZPIOS_INFO_BUFFER_SIZE);
936         kmem_free(info, sizeof(*info));
937
938         return 0;
939 }
940
941 static int
942 zpios_buffer_clear(struct file *file, zpios_cfg_t *kcfg, unsigned long arg)
943 {
944         zpios_info_t *info = (zpios_info_t *)file->private_data;
945
946         ASSERT(info);
947         ASSERT(info->info_buffer);
948
949         spin_lock(&info->info_lock);
950         memset(info->info_buffer, 0, info->info_size);
951         info->info_head = info->info_buffer;
952         spin_unlock(&info->info_lock);
953
954         return 0;
955 }
956
957 static int
958 zpios_buffer_size(struct file *file, zpios_cfg_t *kcfg, unsigned long arg)
959 {
960         zpios_info_t *info = (zpios_info_t *)file->private_data;
961         char *buf;
962         int min, size, rc = 0;
963
964         ASSERT(info);
965         ASSERT(info->info_buffer);
966
967         spin_lock(&info->info_lock);
968         if (kcfg->cfg_arg1 > 0) {
969
970                 size = kcfg->cfg_arg1;
971                 buf = (char *)vmem_alloc(size, KM_SLEEP);
972                 if (buf == NULL) {
973                         rc = -ENOMEM;
974                         goto out;
975                 }
976
977                 /* Zero fill and truncate contents when coping buffer */
978                 min = ((size < info->info_size) ? size : info->info_size);
979                 memset(buf, 0, size);
980                 memcpy(buf, info->info_buffer, min);
981                 vmem_free(info->info_buffer, info->info_size);
982                 info->info_size = size;
983                 info->info_buffer = buf;
984                 info->info_head = info->info_buffer;
985         }
986
987         kcfg->cfg_rc1 = info->info_size;
988
989         if (copy_to_user((struct zpios_cfg_t __user *)arg, kcfg, sizeof(*kcfg)))
990                 rc = -EFAULT;
991 out:
992         spin_unlock(&info->info_lock);
993
994         return rc;
995 }
996
997 static int
998 zpios_ioctl_cfg(struct file *file, unsigned long arg)
999 {
1000         zpios_cfg_t kcfg;
1001         int rc = 0;
1002
1003         if (copy_from_user(&kcfg, (zpios_cfg_t *)arg, sizeof(kcfg)))
1004                 return -EFAULT;
1005
1006         if (kcfg.cfg_magic != ZPIOS_CFG_MAGIC) {
1007                 zpios_print(file, "Bad config magic 0x%x != 0x%x\n",
1008                             kcfg.cfg_magic, ZPIOS_CFG_MAGIC);
1009                 return -EINVAL;
1010         }
1011
1012         switch (kcfg.cfg_cmd) {
1013                 case ZPIOS_CFG_BUFFER_CLEAR:
1014                         /* cfg_arg1 - Unused
1015                          * cfg_rc1  - Unused
1016                          */
1017                         rc = zpios_buffer_clear(file, &kcfg, arg);
1018                         break;
1019                 case ZPIOS_CFG_BUFFER_SIZE:
1020                         /* cfg_arg1 - 0 - query size; >0 resize
1021                          * cfg_rc1  - Set to current buffer size
1022                          */
1023                         rc = zpios_buffer_size(file, &kcfg, arg);
1024                         break;
1025                 default:
1026                         zpios_print(file, "Bad config command %d\n",
1027                                     kcfg.cfg_cmd);
1028                         rc = -EINVAL;
1029                         break;
1030         }
1031
1032         return rc;
1033 }
1034
1035 static int
1036 zpios_ioctl_cmd(struct file *file, unsigned long arg)
1037 {
1038         zpios_cmd_t *kcmd;
1039         void *data = NULL;
1040         int rc = -EINVAL;
1041
1042         kcmd = kmem_alloc(sizeof(zpios_cmd_t), KM_SLEEP);
1043         if (kcmd == NULL) {
1044                 zpios_print(file, "Unable to kmem_alloc() %ld byte for "
1045                             "zpios_cmd_t\n", (long int)sizeof(zpios_cmd_t));
1046                 return -ENOMEM;
1047         }
1048
1049         rc = copy_from_user(kcmd, (zpios_cfg_t *)arg, sizeof(zpios_cmd_t));
1050         if (rc) {
1051                 zpios_print(file, "Unable to copy command structure "
1052                             "from user to kernel memory, %d\n", rc);
1053                 goto out_cmd;
1054         }
1055
1056         if (kcmd->cmd_magic != ZPIOS_CMD_MAGIC) {
1057                 zpios_print(file, "Bad command magic 0x%x != 0x%x\n",
1058                             kcmd->cmd_magic, ZPIOS_CFG_MAGIC);
1059                 rc = -EINVAL;
1060                 goto out_cmd;
1061         }
1062
1063         /* Allocate memory for any opaque data the caller needed to pass on */
1064         if (kcmd->cmd_data_size > 0) {
1065                 data = (void *)vmem_alloc(kcmd->cmd_data_size, KM_SLEEP);
1066                 if (data == NULL) {
1067                         zpios_print(file, "Unable to vmem_alloc() %ld "
1068                                     "bytes for data buffer\n",
1069                                     (long)kcmd->cmd_data_size);
1070                         rc = -ENOMEM;
1071                         goto out_cmd;
1072                 }
1073
1074                 rc = copy_from_user(data, (void *)(arg + offsetof(zpios_cmd_t,
1075                                     cmd_data_str)), kcmd->cmd_data_size);
1076                 if (rc) {
1077                         zpios_print(file, "Unable to copy data buffer "
1078                                     "from user to kernel memory, %d\n", rc);
1079                         goto out_data;
1080                 }
1081         }
1082
1083         rc = zpios_do_one_run(file, kcmd, kcmd->cmd_data_size, data);
1084
1085         if (data != NULL) {
1086                 /* If the test failed do not print out the stats */
1087                 if (rc)
1088                         goto out_data;
1089
1090                 rc = copy_to_user((void *)(arg + offsetof(zpios_cmd_t,
1091                                   cmd_data_str)), data, kcmd->cmd_data_size);
1092                 if (rc) {
1093                         zpios_print(file, "Unable to copy data buffer "
1094                                     "from kernel to user memory, %d\n", rc);
1095                         rc = -EFAULT;
1096                 }
1097
1098 out_data:
1099                 vmem_free(data, kcmd->cmd_data_size);
1100         }
1101 out_cmd:
1102         kmem_free(kcmd, sizeof(zpios_cmd_t));
1103
1104         return rc;
1105 }
1106
1107 static long
1108 zpios_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1109 {
1110         unsigned int minor = iminor(file->f_dentry->d_inode);
1111         int rc = 0;
1112
1113         /* Ignore tty ioctls */
1114         if ((cmd & 0xffffff00) == ((int)'T') << 8)
1115                 return -ENOTTY;
1116
1117         if (minor >= ZPIOS_MINORS)
1118                 return -ENXIO;
1119
1120         switch (cmd) {
1121                 case ZPIOS_CFG:
1122                         rc = zpios_ioctl_cfg(file, arg);
1123                         break;
1124                 case ZPIOS_CMD:
1125                         rc = zpios_ioctl_cmd(file, arg);
1126                         break;
1127                 default:
1128                         zpios_print(file, "Bad ioctl command %d\n", cmd);
1129                         rc = -EINVAL;
1130                         break;
1131         }
1132
1133         return rc;
1134 }
1135
1136 #ifdef CONFIG_COMPAT
1137 /* Compatibility handler for ioctls from 32-bit ELF binaries */
1138 static long
1139 zpios_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1140 {
1141         return zpios_unlocked_ioctl(file, cmd, arg);
1142 }
1143 #endif /* CONFIG_COMPAT */
1144
1145 /* I'm not sure why you would want to write in to this buffer from
1146  * user space since its principle use is to pass test status info
1147  * back to the user space, but I don't see any reason to prevent it.
1148  */
1149 static ssize_t
1150 zpios_write(struct file *file, const char __user *buf,
1151             size_t count, loff_t *ppos)
1152 {
1153         unsigned int minor = iminor(file->f_dentry->d_inode);
1154         zpios_info_t *info = (zpios_info_t *)file->private_data;
1155         int rc = 0;
1156
1157         if (minor >= ZPIOS_MINORS)
1158                 return -ENXIO;
1159
1160         ASSERT(info);
1161         ASSERT(info->info_buffer);
1162
1163         spin_lock(&info->info_lock);
1164
1165         /* Write beyond EOF */
1166         if (*ppos >= info->info_size) {
1167                 rc = -EFBIG;
1168                 goto out;
1169         }
1170
1171         /* Resize count if beyond EOF */
1172         if (*ppos + count > info->info_size)
1173                 count = info->info_size - *ppos;
1174
1175         if (copy_from_user(info->info_buffer, buf, count)) {
1176                 rc = -EFAULT;
1177                 goto out;
1178         }
1179
1180         *ppos += count;
1181         rc = count;
1182 out:
1183         spin_unlock(&info->info_lock);
1184         return rc;
1185 }
1186
1187 static ssize_t
1188 zpios_read(struct file *file, char __user *buf,
1189                         size_t count, loff_t *ppos)
1190 {
1191         unsigned int minor = iminor(file->f_dentry->d_inode);
1192         zpios_info_t *info = (zpios_info_t *)file->private_data;
1193         int rc = 0;
1194
1195         if (minor >= ZPIOS_MINORS)
1196                 return -ENXIO;
1197
1198         ASSERT(info);
1199         ASSERT(info->info_buffer);
1200
1201         spin_lock(&info->info_lock);
1202
1203         /* Read beyond EOF */
1204         if (*ppos >= info->info_size)
1205                 goto out;
1206
1207         /* Resize count if beyond EOF */
1208         if (*ppos + count > info->info_size)
1209                 count = info->info_size - *ppos;
1210
1211         if (copy_to_user(buf, info->info_buffer + *ppos, count)) {
1212                 rc = -EFAULT;
1213                 goto out;
1214         }
1215
1216         *ppos += count;
1217         rc = count;
1218 out:
1219         spin_unlock(&info->info_lock);
1220         return rc;
1221 }
1222
1223 static loff_t zpios_seek(struct file *file, loff_t offset, int origin)
1224 {
1225         unsigned int minor = iminor(file->f_dentry->d_inode);
1226         zpios_info_t *info = (zpios_info_t *)file->private_data;
1227         int rc = -EINVAL;
1228
1229         if (minor >= ZPIOS_MINORS)
1230                 return -ENXIO;
1231
1232         ASSERT(info);
1233         ASSERT(info->info_buffer);
1234
1235         spin_lock(&info->info_lock);
1236
1237         switch (origin) {
1238         case 0: /* SEEK_SET - No-op just do it */
1239                 break;
1240         case 1: /* SEEK_CUR - Seek from current */
1241                 offset = file->f_pos + offset;
1242                 break;
1243         case 2: /* SEEK_END - Seek from end */
1244                 offset = info->info_size + offset;
1245                 break;
1246         }
1247
1248         if (offset >= 0) {
1249                 file->f_pos = offset;
1250                 file->f_version = 0;
1251                 rc = offset;
1252         }
1253
1254         spin_unlock(&info->info_lock);
1255
1256         return rc;
1257 }
1258
1259 static struct cdev zpios_cdev;
1260 static struct file_operations zpios_fops = {
1261         .owner          = THIS_MODULE,
1262         .open           = zpios_open,
1263         .release        = zpios_release,
1264         .unlocked_ioctl = zpios_unlocked_ioctl,
1265 #ifdef CONFIG_COMPAT
1266         .compat_ioctl   = zpios_compat_ioctl,
1267 #endif
1268         .read           = zpios_read,
1269         .write          = zpios_write,
1270         .llseek         = zpios_seek,
1271 };
1272
1273 static int
1274 zpios_init(void)
1275 {
1276         dev_t dev;
1277         int rc;
1278
1279         dev = MKDEV(ZPIOS_MAJOR, 0);
1280         if ((rc = register_chrdev_region(dev, ZPIOS_MINORS, ZPIOS_NAME)))
1281                 goto error;
1282
1283         /* Support for registering a character driver */
1284         cdev_init(&zpios_cdev, &zpios_fops);
1285         zpios_cdev.owner = THIS_MODULE;
1286         kobject_set_name(&zpios_cdev.kobj, ZPIOS_NAME);
1287         if ((rc = cdev_add(&zpios_cdev, dev, ZPIOS_MINORS))) {
1288                 printk(KERN_ERR "ZPIOS: Error adding cdev, %d\n", rc);
1289                 kobject_put(&zpios_cdev.kobj);
1290                 unregister_chrdev_region(dev, ZPIOS_MINORS);
1291                 goto error;
1292         }
1293
1294         /* Support for udev make driver info available in sysfs */
1295         zpios_class = spl_class_create(THIS_MODULE, ZPIOS_NAME);
1296         if (IS_ERR(zpios_class)) {
1297                 rc = PTR_ERR(zpios_class);
1298                 printk(KERN_ERR "ZPIOS: Error creating zpios class, %d\n", rc);
1299                 cdev_del(&zpios_cdev);
1300                 unregister_chrdev_region(dev, ZPIOS_MINORS);
1301                 goto error;
1302         }
1303
1304         zpios_device = spl_device_create(zpios_class, NULL,
1305                                          dev, NULL, ZPIOS_NAME);
1306         return 0;
1307 error:
1308         printk(KERN_ERR "ZPIOS: Error registering zpios device, %d\n", rc);
1309         return rc;
1310 }
1311
1312 static int
1313 zpios_fini(void)
1314 {
1315         dev_t dev = MKDEV(ZPIOS_MAJOR, 0);
1316
1317         spl_device_destroy(zpios_class, zpios_device, dev);
1318         spl_class_destroy(zpios_class);
1319         cdev_del(&zpios_cdev);
1320         unregister_chrdev_region(dev, ZPIOS_MINORS);
1321
1322         return 0;
1323 }
1324
1325 spl_module_init(zpios_init);
1326 spl_module_exit(zpios_fini);
1327
1328 MODULE_AUTHOR("LLNL / Sun");
1329 MODULE_DESCRIPTION("Kernel PIOS implementation");
1330 MODULE_LICENSE("GPL");