X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fvdev_disk.c;h=5cb3fe279c9de21c01f0f1f4c5c6a35f1429fe3f;hb=e2448b0e62f73f8b9574d74c5b327707b67b703a;hp=3e59bd22615efbb417f2f8e02a2ba7844526f7fe;hpb=e06be586410cdad14d2dce76af4f2d43eebe7c83;p=zfs.git diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 3e59bd2..5cb3fe2 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -33,6 +33,8 @@ #include #include +char *zfs_vdev_scheduler = VDEV_SCHEDULER; + /* * Virtual device vector for disks. */ @@ -102,6 +104,60 @@ vdev_disk_error(zio_t *zio) #endif } +/* + * Use the Linux 'noop' elevator for zfs managed block devices. This + * strikes the ideal balance by allowing the zfs elevator to do all + * request ordering and prioritization. While allowing the Linux + * elevator to do the maximum front/back merging allowed by the + * physical device. This yields the largest possible requests for + * the device with the lowest total overhead. + * + * Unfortunately we cannot directly call the elevator_switch() function + * because it is not exported from the block layer. This means we have + * to use the sysfs interface and a user space upcall. Pools will be + * automatically imported on module load so we must do this at device + * open time from the kernel. + */ +#define SET_SCHEDULER_CMD \ + "exec 0/sys/block/%s/queue/scheduler " \ + " 2>/dev/null; " \ + "echo %s" + +static int +vdev_elevator_switch(vdev_t *v, char *elevator) +{ + vdev_disk_t *vd = v->vdev_tsd; + struct block_device *bdev = vd->vd_bdev; + struct request_queue *q = bdev_get_queue(bdev); + char *device = bdev->bd_disk->disk_name; + char *argv[] = { "/bin/sh", "-c", NULL, NULL }; + char *envp[] = { NULL }; + int error; + + /* Skip devices which are not whole disks (partitions) */ + if (!v->vdev_wholedisk) + return (0); + + /* Skip devices without schedulers (loop, ram, dm, etc) */ + if (!q->elevator || !blk_queue_stackable(q)) + return (0); + + /* Leave existing scheduler when set to "none" */ + if (!strncmp(elevator, "none", 4) && (strlen(elevator) == 4)) + return (0); + + argv[2] = kmem_asprintf(SET_SCHEDULER_CMD, device, elevator); + error = call_usermodehelper(argv[0], argv, envp, 1); + if (error) + printk("ZFS: Unable to set \"%s\" scheduler for %s (%s): %d\n", + elevator, v->vdev_path, device, error); + + strfree(argv[2]); + + return (error); +} + static int vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift) { @@ -167,6 +223,9 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift) /* Based on the minimum sector size set the block size */ *ashift = highbit(MAX(block_size, SPA_MINBLOCKSIZE)) - 1; + /* Try to set the io scheduler elevator algorithm */ + (void) vdev_elevator_switch(v, zfs_vdev_scheduler); + return 0; } @@ -702,3 +761,6 @@ vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config) return 0; } + +module_param(zfs_vdev_scheduler, charp, 0644); +MODULE_PARM_DESC(zfs_vdev_scheduler, "IO Scheduler (noop)");