1 /*****************************************************************************\
2 * ZPIOS is a heavily modified version of the original PIOS test code.
3 * It is designed to have the test code running in the Linux kernel
4 * against ZFS while still being flexibly controled from user space.
6 * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
7 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
11 * Original PIOS Test Code
12 * Copyright (C) 2004 Cluster File Systems, Inc.
13 * Written by Peter Braam <braam@clusterfs.com>
14 * Atul Vidwansa <atul@clusterfs.com>
15 * Milind Dumbare <milind@clusterfs.com>
17 * This file is part of ZFS on Linux.
18 * For details, see <http://github.com/behlendorf/zfs/>.
20 * ZPIOS is free software; you can redistribute it and/or modify it
21 * under the terms of the GNU General Public License as published by the
22 * Free Software Foundation; either version 2 of the License, or (at your
23 * option) any later version.
25 * ZPIOS is distributed in the hope that it will be useful, but WITHOUT
26 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
27 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
30 * You should have received a copy of the GNU General Public License along
31 * with ZPIOS. If not, see <http://www.gnu.org/licenses/>.
32 \*****************************************************************************/
34 #include <sys/zfs_context.h>
37 #include <linux/cdev.h>
38 #include "zpios-internal.h"
41 static spl_class *zpios_class;
42 static spl_device *zpios_device;
43 static char *zpios_tag = "zpios_tag";
46 int zpios_upcall(char *path, char *phase, run_args_t *run_args, int rc)
48 /* This is stack heavy but it should be OK since we are only
49 * making the upcall between tests when the stack is shallow.
51 char id[16], chunk_size[16], region_size[16], thread_count[16];
52 char region_count[16], offset[16], region_noise[16], chunk_noise[16];
53 char thread_delay[16], flags[16], result[8];
54 char *argv[16], *envp[4];
56 if ((path == NULL) || (strlen(path) == 0))
59 snprintf(id, 15, "%d", run_args->id);
60 snprintf(chunk_size, 15, "%lu", (long unsigned)run_args->chunk_size);
61 snprintf(region_size, 15, "%lu",(long unsigned) run_args->region_size);
62 snprintf(thread_count, 15, "%u", run_args->thread_count);
63 snprintf(region_count, 15, "%u", run_args->region_count);
64 snprintf(offset, 15, "%lu", (long unsigned)run_args->offset);
65 snprintf(region_noise, 15, "%u", run_args->region_noise);
66 snprintf(chunk_noise, 15, "%u", run_args->chunk_noise);
67 snprintf(thread_delay, 15, "%u", run_args->thread_delay);
68 snprintf(flags, 15, "0x%x", run_args->flags);
69 snprintf(result, 7, "%d", rc);
71 /* Passing 15 args to registered pre/post upcall */
74 argv[2] = strlen(run_args->log) ? run_args->log : "<none>";
76 argv[4] = run_args->pool;
78 argv[6] = region_size;
79 argv[7] = thread_count;
80 argv[8] = region_count;
82 argv[10] = region_noise;
83 argv[11] = chunk_noise;
84 argv[12] = thread_delay;
89 /* Passing environment for user space upcall */
91 envp[1] = "TERM=linux";
92 envp[2] = "PATH=/sbin:/usr/sbin:/bin:/usr/bin";
95 return call_usermodehelper(path, argv, envp, 1);
99 zpios_dmu_object_create(run_args_t *run_args, objset_t *os)
105 tx = dmu_tx_create(os);
106 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, OBJ_SIZE);
107 rc = dmu_tx_assign(tx, TXG_WAIT);
109 zpios_print(run_args->file,
110 "dmu_tx_assign() failed: %d\n", rc);
115 obj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
117 rc = dmu_object_set_blocksize(os, obj, 128ULL << 10, 0, tx);
119 zpios_print(run_args->file,
120 "dmu_object_set_blocksize() failed: %d\n", rc);
131 zpios_dmu_object_free(run_args_t *run_args, objset_t *os, uint64_t obj)
136 tx = dmu_tx_create(os);
137 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END);
138 rc = dmu_tx_assign(tx, TXG_WAIT);
140 zpios_print(run_args->file,
141 "dmu_tx_assign() failed: %d\n", rc);
146 rc = dmu_object_free(os, obj, tx);
148 zpios_print(run_args->file,
149 "dmu_object_free() failed: %d\n", rc);
160 zpios_dmu_setup(run_args_t *run_args)
162 zpios_time_t *t = &(run_args->stats.cr_time);
168 (void)zpios_upcall(run_args->pre, PHASE_PRE_CREATE, run_args, 0);
169 t->start = zpios_timespec_now();
171 (void)snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id);
172 rc = dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL);
174 zpios_print(run_args->file, "Error dmu_objset_create(%s, ...) "
175 "failed: %d\n", name, rc);
179 rc = dmu_objset_own(name, DMU_OST_OTHER, 0, zpios_tag, &os);
181 zpios_print(run_args->file, "Error dmu_objset_own(%s, ...) "
182 "failed: %d\n", name, rc);
186 if (!(run_args->flags & DMU_FPP)) {
187 obj = zpios_dmu_object_create(run_args, os);
190 zpios_print(run_args->file, "Error zpios_dmu_"
191 "object_create() failed, %d\n", rc);
196 for (i = 0; i < run_args->region_count; i++) {
197 zpios_region_t *region;
199 region = &run_args->regions[i];
200 mutex_init(®ion->lock, NULL, MUTEX_DEFAULT, NULL);
202 if (run_args->flags & DMU_FPP) {
203 /* File per process */
205 region->obj.obj = zpios_dmu_object_create(run_args, os);
206 ASSERT(region->obj.obj > 0); /* XXX - Handle this */
207 region->wr_offset = run_args->offset;
208 region->rd_offset = run_args->offset;
209 region->init_offset = run_args->offset;
210 region->max_offset = run_args->offset +
211 run_args->region_size;
213 /* Single shared file */
215 region->obj.obj = obj;
216 region->wr_offset = run_args->offset * i;
217 region->rd_offset = run_args->offset * i;
218 region->init_offset = run_args->offset * i;
219 region->max_offset = run_args->offset *
220 i + run_args->region_size;
227 rc2 = dmu_objset_destroy(name, B_FALSE);
229 zpios_print(run_args->file, "Error dmu_objset_destroy"
230 "(%s, ...) failed: %d\n", name, rc2);
233 t->stop = zpios_timespec_now();
234 t->delta = zpios_timespec_sub(t->stop, t->start);
235 (void)zpios_upcall(run_args->post, PHASE_POST_CREATE, run_args, rc);
241 zpios_setup_run(run_args_t **run_args, zpios_cmd_t *kcmd, struct file *file)
246 size = sizeof(*ra) + kcmd->cmd_region_count * sizeof(zpios_region_t);
248 ra = vmem_zalloc(size, KM_SLEEP);
250 zpios_print(file, "Unable to vmem_zalloc() %d bytes "
251 "for regions\n", size);
256 strncpy(ra->pool, kcmd->cmd_pool, ZPIOS_NAME_SIZE - 1);
257 strncpy(ra->pre, kcmd->cmd_pre, ZPIOS_PATH_SIZE - 1);
258 strncpy(ra->post, kcmd->cmd_post, ZPIOS_PATH_SIZE - 1);
259 strncpy(ra->log, kcmd->cmd_log, ZPIOS_PATH_SIZE - 1);
260 ra->id = kcmd->cmd_id;
261 ra->chunk_size = kcmd->cmd_chunk_size;
262 ra->thread_count = kcmd->cmd_thread_count;
263 ra->region_count = kcmd->cmd_region_count;
264 ra->region_size = kcmd->cmd_region_size;
265 ra->offset = kcmd->cmd_offset;
266 ra->region_noise = kcmd->cmd_region_noise;
267 ra->chunk_noise = kcmd->cmd_chunk_noise;
268 ra->thread_delay = kcmd->cmd_thread_delay;
269 ra->flags = kcmd->cmd_flags;
270 ra->stats.wr_data = 0;
271 ra->stats.wr_chunks = 0;
272 ra->stats.rd_data = 0;
273 ra->stats.rd_chunks = 0;
276 mutex_init(&ra->lock_work, NULL, MUTEX_DEFAULT, NULL);
277 mutex_init(&ra->lock_ctl, NULL, MUTEX_DEFAULT, NULL);
279 (void)zpios_upcall(ra->pre, PHASE_PRE_RUN, ra, 0);
281 rc = zpios_dmu_setup(ra);
283 mutex_destroy(&ra->lock_ctl);
284 mutex_destroy(&ra->lock_work);
293 zpios_get_work_item(run_args_t *run_args, dmu_obj_t *obj, __u64 *offset,
294 __u32 *chunk_size, zpios_region_t **region, __u32 flags)
297 unsigned int random_int;
299 get_random_bytes(&random_int, sizeof(unsigned int));
301 mutex_enter(&run_args->lock_work);
302 i = run_args->region_next;
304 /* XXX: I don't much care for this chunk selection mechansim
305 * there's the potential to burn a lot of time here doing nothing
306 * useful while holding the global lock. This could give some
307 * misleading performance results. I'll fix it latter.
309 while (count < run_args->region_count) {
311 zpios_time_t *rw_time;
313 j = i % run_args->region_count;
314 *region = &(run_args->regions[j]);
316 if (flags & DMU_WRITE) {
317 rw_offset = &((*region)->wr_offset);
318 rw_time = &((*region)->stats.wr_time);
320 rw_offset = &((*region)->rd_offset);
321 rw_time = &((*region)->stats.rd_time);
324 /* test if region is fully written */
325 if (*rw_offset + *chunk_size > (*region)->max_offset) {
329 if (unlikely(rw_time->stop.ts_sec == 0) &&
330 unlikely(rw_time->stop.ts_nsec == 0))
331 rw_time->stop = zpios_timespec_now();
336 *offset = *rw_offset;
337 *obj = (*region)->obj;
338 *rw_offset += *chunk_size;
340 /* update ctl structure */
341 if (run_args->region_noise) {
342 get_random_bytes(&random_int, sizeof(unsigned int));
343 run_args->region_next += random_int % run_args->region_noise;
345 run_args->region_next++;
348 mutex_exit(&run_args->lock_work);
352 /* nothing left to do */
353 mutex_exit(&run_args->lock_work);
359 zpios_remove_objset(run_args_t *run_args)
361 zpios_time_t *t = &(run_args->stats.rm_time);
362 zpios_region_t *region;
366 (void)zpios_upcall(run_args->pre, PHASE_PRE_REMOVE, run_args, 0);
367 t->start = zpios_timespec_now();
369 (void)snprintf(name, 32, "%s/id_%d", run_args->pool, run_args->id);
371 if (run_args->flags & DMU_REMOVE) {
372 if (run_args->flags & DMU_FPP) {
373 for (i = 0; i < run_args->region_count; i++) {
374 region = &run_args->regions[i];
375 rc = zpios_dmu_object_free(run_args,
379 zpios_print(run_args->file, "Error "
380 "removing object %d, %d\n",
381 (int)region->obj.obj, rc);
384 region = &run_args->regions[0];
385 rc = zpios_dmu_object_free(run_args,
389 zpios_print(run_args->file, "Error "
390 "removing object %d, %d\n",
391 (int)region->obj.obj, rc);
395 dmu_objset_disown(run_args->os, zpios_tag);
397 if (run_args->flags & DMU_REMOVE) {
398 rc = dmu_objset_destroy(name, B_FALSE);
400 zpios_print(run_args->file, "Error dmu_objset_destroy"
401 "(%s, ...) failed: %d\n", name, rc);
404 t->stop = zpios_timespec_now();
405 t->delta = zpios_timespec_sub(t->stop, t->start);
406 (void)zpios_upcall(run_args->post, PHASE_POST_REMOVE, run_args, rc);
410 zpios_cleanup_run(run_args_t *run_args)
414 if (run_args == NULL)
417 if (run_args->threads != NULL) {
418 for (i = 0; i < run_args->thread_count; i++) {
419 if (run_args->threads[i]) {
420 mutex_destroy(&run_args->threads[i]->lock);
421 kmem_free(run_args->threads[i],
422 sizeof(thread_data_t));
426 kmem_free(run_args->threads,
427 sizeof(thread_data_t *) * run_args->thread_count);
430 for (i = 0; i < run_args->region_count; i++)
431 mutex_destroy(&run_args->regions[i].lock);
433 mutex_destroy(&run_args->lock_work);
434 mutex_destroy(&run_args->lock_ctl);
435 size = run_args->region_count * sizeof(zpios_region_t);
437 vmem_free(run_args, sizeof(*run_args) + size);
441 zpios_dmu_write(run_args_t *run_args, objset_t *os, uint64_t object,
442 uint64_t offset, uint64_t size, const void *buf)
445 int rc, how = TXG_WAIT;
448 if (run_args->flags & DMU_WRITE_NOWAIT)
452 tx = dmu_tx_create(os);
453 dmu_tx_hold_write(tx, object, offset, size);
454 rc = dmu_tx_assign(tx, how);
457 if (rc == ERESTART && how == TXG_NOWAIT) {
462 zpios_print(run_args->file,
463 "Error in dmu_tx_assign(), %d", rc);
470 // if (run_args->flags & DMU_WRITE_ZC)
471 // flags |= DMU_WRITE_ZEROCOPY;
473 dmu_write(os, object, offset, size, buf, tx);
480 zpios_dmu_read(run_args_t *run_args, objset_t *os, uint64_t object,
481 uint64_t offset, uint64_t size, void *buf)
485 // if (run_args->flags & DMU_READ_ZC)
486 // flags |= DMU_READ_ZEROCOPY;
488 if (run_args->flags & DMU_READ_NOPF)
489 flags |= DMU_READ_NO_PREFETCH;
491 return dmu_read(os, object, offset, size, buf, flags);
495 zpios_thread_main(void *data)
497 thread_data_t *thr = (thread_data_t *)data;
498 run_args_t *run_args = thr->run_args;
503 zpios_region_t *region;
505 unsigned int random_int;
506 int chunk_noise = run_args->chunk_noise;
507 int chunk_noise_tmp = 0;
508 int thread_delay = run_args->thread_delay;
509 int thread_delay_tmp = 0;
513 get_random_bytes(&random_int, sizeof(unsigned int));
514 chunk_noise_tmp = (random_int % (chunk_noise * 2))-chunk_noise;
517 /* It's OK to vmem_alloc() this memory because it will be copied
518 * in to the slab and pointers to the slab copy will be setup in
519 * the bio when the IO is submitted. This of course is not ideal
520 * since we want a zero-copy IO path if possible. It would be nice
521 * to have direct access to those slab entries.
523 chunk_size = run_args->chunk_size + chunk_noise_tmp;
524 buf = (char *)vmem_alloc(chunk_size, KM_SLEEP);
527 /* Trivial data verification pattern for now. */
528 if (run_args->flags & DMU_VERIFY)
529 memset(buf, 'z', chunk_size);
532 mutex_enter(&thr->lock);
533 thr->stats.wr_time.start = zpios_timespec_now();
534 mutex_exit(&thr->lock);
536 while (zpios_get_work_item(run_args, &obj, &offset,
537 &chunk_size, ®ion, DMU_WRITE)) {
539 get_random_bytes(&random_int, sizeof(unsigned int));
540 thread_delay_tmp = random_int % thread_delay;
541 set_current_state(TASK_UNINTERRUPTIBLE);
542 schedule_timeout(thread_delay_tmp); /* In jiffies */
545 t.start = zpios_timespec_now();
546 rc = zpios_dmu_write(run_args, obj.os, obj.obj,
547 offset, chunk_size, buf);
548 t.stop = zpios_timespec_now();
549 t.delta = zpios_timespec_sub(t.stop, t.start);
552 zpios_print(run_args->file, "IO error while doing "
553 "dmu_write(): %d\n", rc);
557 mutex_enter(&thr->lock);
558 thr->stats.wr_data += chunk_size;
559 thr->stats.wr_chunks++;
560 thr->stats.wr_time.delta = zpios_timespec_add(
561 thr->stats.wr_time.delta, t.delta);
562 mutex_exit(&thr->lock);
564 mutex_enter(®ion->lock);
565 region->stats.wr_data += chunk_size;
566 region->stats.wr_chunks++;
567 region->stats.wr_time.delta = zpios_timespec_add(
568 region->stats.wr_time.delta, t.delta);
570 /* First time region was accessed */
571 if (region->init_offset == offset)
572 region->stats.wr_time.start = t.start;
574 mutex_exit(®ion->lock);
577 mutex_enter(&run_args->lock_ctl);
578 run_args->threads_done++;
579 mutex_exit(&run_args->lock_ctl);
581 mutex_enter(&thr->lock);
583 thr->stats.wr_time.stop = zpios_timespec_now();
584 mutex_exit(&thr->lock);
585 wake_up(&run_args->waitq);
587 set_current_state(TASK_UNINTERRUPTIBLE);
590 /* Check if we should exit */
591 mutex_enter(&thr->lock);
593 mutex_exit(&thr->lock);
598 mutex_enter(&thr->lock);
599 thr->stats.rd_time.start = zpios_timespec_now();
600 mutex_exit(&thr->lock);
602 while (zpios_get_work_item(run_args, &obj, &offset,
603 &chunk_size, ®ion, DMU_READ)) {
605 get_random_bytes(&random_int, sizeof(unsigned int));
606 thread_delay_tmp = random_int % thread_delay;
607 set_current_state(TASK_UNINTERRUPTIBLE);
608 schedule_timeout(thread_delay_tmp); /* In jiffies */
611 if (run_args->flags & DMU_VERIFY)
612 memset(buf, 0, chunk_size);
614 t.start = zpios_timespec_now();
615 rc = zpios_dmu_read(run_args, obj.os, obj.obj,
616 offset, chunk_size, buf);
617 t.stop = zpios_timespec_now();
618 t.delta = zpios_timespec_sub(t.stop, t.start);
621 zpios_print(run_args->file, "IO error while doing "
622 "dmu_read(): %d\n", rc);
626 /* Trivial data verification, expensive! */
627 if (run_args->flags & DMU_VERIFY) {
628 for (i = 0; i < chunk_size; i++) {
630 zpios_print(run_args->file,
631 "IO verify error: %d/%d/%d\n",
632 (int)obj.obj, (int)offset,
639 mutex_enter(&thr->lock);
640 thr->stats.rd_data += chunk_size;
641 thr->stats.rd_chunks++;
642 thr->stats.rd_time.delta = zpios_timespec_add(
643 thr->stats.rd_time.delta, t.delta);
644 mutex_exit(&thr->lock);
646 mutex_enter(®ion->lock);
647 region->stats.rd_data += chunk_size;
648 region->stats.rd_chunks++;
649 region->stats.rd_time.delta = zpios_timespec_add(
650 region->stats.rd_time.delta, t.delta);
652 /* First time region was accessed */
653 if (region->init_offset == offset)
654 region->stats.rd_time.start = t.start;
656 mutex_exit(®ion->lock);
659 mutex_enter(&run_args->lock_ctl);
660 run_args->threads_done++;
661 mutex_exit(&run_args->lock_ctl);
663 mutex_enter(&thr->lock);
665 thr->stats.rd_time.stop = zpios_timespec_now();
666 mutex_exit(&thr->lock);
667 wake_up(&run_args->waitq);
670 vmem_free(buf, chunk_size);
673 return rc; /* Unreachable, due to do_exit() */
677 zpios_thread_done(run_args_t *run_args)
679 ASSERT(run_args->threads_done <= run_args->thread_count);
680 return (run_args->threads_done == run_args->thread_count);
684 zpios_threads_run(run_args_t *run_args)
686 struct task_struct *tsk, **tsks;
687 thread_data_t *thr = NULL;
688 zpios_time_t *tt = &(run_args->stats.total_time);
689 zpios_time_t *tw = &(run_args->stats.wr_time);
690 zpios_time_t *tr = &(run_args->stats.rd_time);
691 int i, rc = 0, tc = run_args->thread_count;
693 tsks = kmem_zalloc(sizeof(struct task_struct *) * tc, KM_SLEEP);
699 run_args->threads = kmem_zalloc(sizeof(thread_data_t *) * tc, KM_SLEEP);
700 if (run_args->threads == NULL) {
705 init_waitqueue_head(&run_args->waitq);
706 run_args->threads_done = 0;
708 /* Create all the needed threads which will sleep until awoken */
709 for (i = 0; i < tc; i++) {
710 thr = kmem_zalloc(sizeof(thread_data_t), KM_SLEEP);
717 thr->run_args = run_args;
719 mutex_init(&thr->lock, NULL, MUTEX_DEFAULT, NULL);
720 run_args->threads[i] = thr;
722 tsk = kthread_create(zpios_thread_main, (void *)thr,
723 "%s/%d", "zpios_io", i);
732 tt->start = zpios_timespec_now();
734 /* Wake up all threads for write phase */
735 (void)zpios_upcall(run_args->pre, PHASE_PRE_WRITE, run_args, 0);
736 for (i = 0; i < tc; i++)
737 wake_up_process(tsks[i]);
739 /* Wait for write phase to complete */
740 tw->start = zpios_timespec_now();
741 wait_event(run_args->waitq, zpios_thread_done(run_args));
742 tw->stop = zpios_timespec_now();
743 (void)zpios_upcall(run_args->post, PHASE_POST_WRITE, run_args, rc);
745 for (i = 0; i < tc; i++) {
746 thr = run_args->threads[i];
748 mutex_enter(&thr->lock);
753 run_args->stats.wr_data += thr->stats.wr_data;
754 run_args->stats.wr_chunks += thr->stats.wr_chunks;
755 mutex_exit(&thr->lock);
759 /* Wake up all threads and tell them to exit */
760 for (i = 0; i < tc; i++) {
761 mutex_enter(&thr->lock);
763 mutex_exit(&thr->lock);
765 wake_up_process(tsks[i]);
770 mutex_enter(&run_args->lock_ctl);
771 ASSERT(run_args->threads_done == run_args->thread_count);
772 run_args->threads_done = 0;
773 mutex_exit(&run_args->lock_ctl);
775 /* Wake up all threads for read phase */
776 (void)zpios_upcall(run_args->pre, PHASE_PRE_READ, run_args, 0);
777 for (i = 0; i < tc; i++)
778 wake_up_process(tsks[i]);
780 /* Wait for read phase to complete */
781 tr->start = zpios_timespec_now();
782 wait_event(run_args->waitq, zpios_thread_done(run_args));
783 tr->stop = zpios_timespec_now();
784 (void)zpios_upcall(run_args->post, PHASE_POST_READ, run_args, rc);
786 for (i = 0; i < tc; i++) {
787 thr = run_args->threads[i];
789 mutex_enter(&thr->lock);
794 run_args->stats.rd_data += thr->stats.rd_data;
795 run_args->stats.rd_chunks += thr->stats.rd_chunks;
796 mutex_exit(&thr->lock);
799 tt->stop = zpios_timespec_now();
800 tt->delta = zpios_timespec_sub(tt->stop, tt->start);
801 tw->delta = zpios_timespec_sub(tw->stop, tw->start);
802 tr->delta = zpios_timespec_sub(tr->stop, tr->start);
805 kmem_free(tsks, sizeof(struct task_struct *) * tc);
807 /* Returns first encountered thread error (if any) */
811 /* Destroy all threads that were created successfully */
812 for (i = 0; i < tc; i++)
814 (void) kthread_stop(tsks[i]);
820 zpios_do_one_run(struct file *file, zpios_cmd_t *kcmd,
821 int data_size, void *data)
823 run_args_t *run_args = { 0 };
824 zpios_stats_t *stats = (zpios_stats_t *)data;
825 int i, n, m, size, rc;
827 if ((!kcmd->cmd_chunk_size) || (!kcmd->cmd_region_size) ||
828 (!kcmd->cmd_thread_count) || (!kcmd->cmd_region_count)) {
829 zpios_print(file, "Invalid chunk_size, region_size, "
830 "thread_count, or region_count, %d\n", -EINVAL);
834 if (!(kcmd->cmd_flags & DMU_WRITE) ||
835 !(kcmd->cmd_flags & DMU_READ)) {
836 zpios_print(file, "Invalid flags, minimally DMU_WRITE "
837 "and DMU_READ must be set, %d\n", -EINVAL);
841 if ((kcmd->cmd_flags & (DMU_WRITE_ZC | DMU_READ_ZC)) &&
842 (kcmd->cmd_flags & DMU_VERIFY)) {
843 zpios_print(file, "Invalid flags, DMU_*_ZC incompatible "
844 "with DMU_VERIFY, used for performance analysis "
845 "only, %d\n", -EINVAL);
849 /* Opaque data on return contains structs of the following form:
851 * zpios_stat_t stats[];
852 * stats[0] = run_args->stats;
853 * stats[1-N] = threads[N]->stats;
854 * stats[N+1-M] = regions[M]->stats;
856 * Where N is the number of threads, and M is the number of regions.
858 size = (sizeof(zpios_stats_t) +
859 (kcmd->cmd_thread_count * sizeof(zpios_stats_t)) +
860 (kcmd->cmd_region_count * sizeof(zpios_stats_t)));
861 if (data_size < size) {
862 zpios_print(file, "Invalid size, command data buffer "
863 "size too small, (%d < %d)\n", data_size, size);
867 rc = zpios_setup_run(&run_args, kcmd, file);
871 rc = zpios_threads_run(run_args);
872 zpios_remove_objset(run_args);
878 m = 1 + kcmd->cmd_thread_count;
879 stats[0] = run_args->stats;
881 for (i = 0; i < kcmd->cmd_thread_count; i++)
882 stats[n+i] = run_args->threads[i]->stats;
884 for (i = 0; i < kcmd->cmd_region_count; i++)
885 stats[m+i] = run_args->regions[i].stats;
889 zpios_cleanup_run(run_args);
891 (void)zpios_upcall(kcmd->cmd_post, PHASE_POST_RUN, run_args, 0);
897 zpios_open(struct inode *inode, struct file *file)
899 unsigned int minor = iminor(inode);
902 if (minor >= ZPIOS_MINORS)
905 info = (zpios_info_t *)kmem_alloc(sizeof(*info), KM_SLEEP);
909 spin_lock_init(&info->info_lock);
910 info->info_size = ZPIOS_INFO_BUFFER_SIZE;
911 info->info_buffer = (char *)vmem_alloc(ZPIOS_INFO_BUFFER_SIZE,KM_SLEEP);
912 if (info->info_buffer == NULL) {
913 kmem_free(info, sizeof(*info));
917 info->info_head = info->info_buffer;
918 file->private_data = (void *)info;
924 zpios_release(struct inode *inode, struct file *file)
926 unsigned int minor = iminor(inode);
927 zpios_info_t *info = (zpios_info_t *)file->private_data;
929 if (minor >= ZPIOS_MINORS)
933 ASSERT(info->info_buffer);
935 vmem_free(info->info_buffer, ZPIOS_INFO_BUFFER_SIZE);
936 kmem_free(info, sizeof(*info));
942 zpios_buffer_clear(struct file *file, zpios_cfg_t *kcfg, unsigned long arg)
944 zpios_info_t *info = (zpios_info_t *)file->private_data;
947 ASSERT(info->info_buffer);
949 spin_lock(&info->info_lock);
950 memset(info->info_buffer, 0, info->info_size);
951 info->info_head = info->info_buffer;
952 spin_unlock(&info->info_lock);
958 zpios_buffer_size(struct file *file, zpios_cfg_t *kcfg, unsigned long arg)
960 zpios_info_t *info = (zpios_info_t *)file->private_data;
962 int min, size, rc = 0;
965 ASSERT(info->info_buffer);
967 spin_lock(&info->info_lock);
968 if (kcfg->cfg_arg1 > 0) {
970 size = kcfg->cfg_arg1;
971 buf = (char *)vmem_alloc(size, KM_SLEEP);
977 /* Zero fill and truncate contents when coping buffer */
978 min = ((size < info->info_size) ? size : info->info_size);
979 memset(buf, 0, size);
980 memcpy(buf, info->info_buffer, min);
981 vmem_free(info->info_buffer, info->info_size);
982 info->info_size = size;
983 info->info_buffer = buf;
984 info->info_head = info->info_buffer;
987 kcfg->cfg_rc1 = info->info_size;
989 if (copy_to_user((struct zpios_cfg_t __user *)arg, kcfg, sizeof(*kcfg)))
992 spin_unlock(&info->info_lock);
998 zpios_ioctl_cfg(struct file *file, unsigned long arg)
1003 if (copy_from_user(&kcfg, (zpios_cfg_t *)arg, sizeof(kcfg)))
1006 if (kcfg.cfg_magic != ZPIOS_CFG_MAGIC) {
1007 zpios_print(file, "Bad config magic 0x%x != 0x%x\n",
1008 kcfg.cfg_magic, ZPIOS_CFG_MAGIC);
1012 switch (kcfg.cfg_cmd) {
1013 case ZPIOS_CFG_BUFFER_CLEAR:
1014 /* cfg_arg1 - Unused
1017 rc = zpios_buffer_clear(file, &kcfg, arg);
1019 case ZPIOS_CFG_BUFFER_SIZE:
1020 /* cfg_arg1 - 0 - query size; >0 resize
1021 * cfg_rc1 - Set to current buffer size
1023 rc = zpios_buffer_size(file, &kcfg, arg);
1026 zpios_print(file, "Bad config command %d\n",
1036 zpios_ioctl_cmd(struct file *file, unsigned long arg)
1042 kcmd = kmem_alloc(sizeof(zpios_cmd_t), KM_SLEEP);
1044 zpios_print(file, "Unable to kmem_alloc() %ld byte for "
1045 "zpios_cmd_t\n", (long int)sizeof(zpios_cmd_t));
1049 rc = copy_from_user(kcmd, (zpios_cfg_t *)arg, sizeof(zpios_cmd_t));
1051 zpios_print(file, "Unable to copy command structure "
1052 "from user to kernel memory, %d\n", rc);
1056 if (kcmd->cmd_magic != ZPIOS_CMD_MAGIC) {
1057 zpios_print(file, "Bad command magic 0x%x != 0x%x\n",
1058 kcmd->cmd_magic, ZPIOS_CFG_MAGIC);
1063 /* Allocate memory for any opaque data the caller needed to pass on */
1064 if (kcmd->cmd_data_size > 0) {
1065 data = (void *)vmem_alloc(kcmd->cmd_data_size, KM_SLEEP);
1067 zpios_print(file, "Unable to vmem_alloc() %ld "
1068 "bytes for data buffer\n",
1069 (long)kcmd->cmd_data_size);
1074 rc = copy_from_user(data, (void *)(arg + offsetof(zpios_cmd_t,
1075 cmd_data_str)), kcmd->cmd_data_size);
1077 zpios_print(file, "Unable to copy data buffer "
1078 "from user to kernel memory, %d\n", rc);
1083 rc = zpios_do_one_run(file, kcmd, kcmd->cmd_data_size, data);
1086 /* If the test failed do not print out the stats */
1090 rc = copy_to_user((void *)(arg + offsetof(zpios_cmd_t,
1091 cmd_data_str)), data, kcmd->cmd_data_size);
1093 zpios_print(file, "Unable to copy data buffer "
1094 "from kernel to user memory, %d\n", rc);
1099 vmem_free(data, kcmd->cmd_data_size);
1102 kmem_free(kcmd, sizeof(zpios_cmd_t));
1108 zpios_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1110 unsigned int minor = iminor(file->f_dentry->d_inode);
1113 /* Ignore tty ioctls */
1114 if ((cmd & 0xffffff00) == ((int)'T') << 8)
1117 if (minor >= ZPIOS_MINORS)
1122 rc = zpios_ioctl_cfg(file, arg);
1125 rc = zpios_ioctl_cmd(file, arg);
1128 zpios_print(file, "Bad ioctl command %d\n", cmd);
1136 #ifdef CONFIG_COMPAT
1137 /* Compatibility handler for ioctls from 32-bit ELF binaries */
1139 zpios_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1141 return zpios_unlocked_ioctl(file, cmd, arg);
1143 #endif /* CONFIG_COMPAT */
1145 /* I'm not sure why you would want to write in to this buffer from
1146 * user space since its principle use is to pass test status info
1147 * back to the user space, but I don't see any reason to prevent it.
1150 zpios_write(struct file *file, const char __user *buf,
1151 size_t count, loff_t *ppos)
1153 unsigned int minor = iminor(file->f_dentry->d_inode);
1154 zpios_info_t *info = (zpios_info_t *)file->private_data;
1157 if (minor >= ZPIOS_MINORS)
1161 ASSERT(info->info_buffer);
1163 spin_lock(&info->info_lock);
1165 /* Write beyond EOF */
1166 if (*ppos >= info->info_size) {
1171 /* Resize count if beyond EOF */
1172 if (*ppos + count > info->info_size)
1173 count = info->info_size - *ppos;
1175 if (copy_from_user(info->info_buffer, buf, count)) {
1183 spin_unlock(&info->info_lock);
1188 zpios_read(struct file *file, char __user *buf,
1189 size_t count, loff_t *ppos)
1191 unsigned int minor = iminor(file->f_dentry->d_inode);
1192 zpios_info_t *info = (zpios_info_t *)file->private_data;
1195 if (minor >= ZPIOS_MINORS)
1199 ASSERT(info->info_buffer);
1201 spin_lock(&info->info_lock);
1203 /* Read beyond EOF */
1204 if (*ppos >= info->info_size)
1207 /* Resize count if beyond EOF */
1208 if (*ppos + count > info->info_size)
1209 count = info->info_size - *ppos;
1211 if (copy_to_user(buf, info->info_buffer + *ppos, count)) {
1219 spin_unlock(&info->info_lock);
1223 static loff_t zpios_seek(struct file *file, loff_t offset, int origin)
1225 unsigned int minor = iminor(file->f_dentry->d_inode);
1226 zpios_info_t *info = (zpios_info_t *)file->private_data;
1229 if (minor >= ZPIOS_MINORS)
1233 ASSERT(info->info_buffer);
1235 spin_lock(&info->info_lock);
1238 case 0: /* SEEK_SET - No-op just do it */
1240 case 1: /* SEEK_CUR - Seek from current */
1241 offset = file->f_pos + offset;
1243 case 2: /* SEEK_END - Seek from end */
1244 offset = info->info_size + offset;
1249 file->f_pos = offset;
1250 file->f_version = 0;
1254 spin_unlock(&info->info_lock);
1259 static struct cdev zpios_cdev;
1260 static struct file_operations zpios_fops = {
1261 .owner = THIS_MODULE,
1263 .release = zpios_release,
1264 .unlocked_ioctl = zpios_unlocked_ioctl,
1265 #ifdef CONFIG_COMPAT
1266 .compat_ioctl = zpios_compat_ioctl,
1269 .write = zpios_write,
1270 .llseek = zpios_seek,
1279 dev = MKDEV(ZPIOS_MAJOR, 0);
1280 if ((rc = register_chrdev_region(dev, ZPIOS_MINORS, ZPIOS_NAME)))
1283 /* Support for registering a character driver */
1284 cdev_init(&zpios_cdev, &zpios_fops);
1285 zpios_cdev.owner = THIS_MODULE;
1286 kobject_set_name(&zpios_cdev.kobj, ZPIOS_NAME);
1287 if ((rc = cdev_add(&zpios_cdev, dev, ZPIOS_MINORS))) {
1288 printk(KERN_ERR "ZPIOS: Error adding cdev, %d\n", rc);
1289 kobject_put(&zpios_cdev.kobj);
1290 unregister_chrdev_region(dev, ZPIOS_MINORS);
1294 /* Support for udev make driver info available in sysfs */
1295 zpios_class = spl_class_create(THIS_MODULE, ZPIOS_NAME);
1296 if (IS_ERR(zpios_class)) {
1297 rc = PTR_ERR(zpios_class);
1298 printk(KERN_ERR "ZPIOS: Error creating zpios class, %d\n", rc);
1299 cdev_del(&zpios_cdev);
1300 unregister_chrdev_region(dev, ZPIOS_MINORS);
1304 zpios_device = spl_device_create(zpios_class, NULL,
1305 dev, NULL, ZPIOS_NAME);
1308 printk(KERN_ERR "ZPIOS: Error registering zpios device, %d\n", rc);
1315 dev_t dev = MKDEV(ZPIOS_MAJOR, 0);
1317 spl_device_destroy(zpios_class, zpios_device, dev);
1318 spl_class_destroy(zpios_class);
1319 cdev_del(&zpios_cdev);
1320 unregister_chrdev_region(dev, ZPIOS_MINORS);
1325 spl_module_init(zpios_init);
1326 spl_module_exit(zpios_fini);
1328 MODULE_AUTHOR("LLNL / Sun");
1329 MODULE_DESCRIPTION("Kernel PIOS implementation");
1330 MODULE_LICENSE("GPL");