#include <sys/zfs_rlock.h>
#include <sys/zfs_znode.h>
#include <sys/zvol.h>
+#include <linux/blkdev_compat.h>
+unsigned int zvol_inhibit_dev = 0;
unsigned int zvol_major = ZVOL_MAJOR;
-unsigned int zvol_threads = 0;
+unsigned int zvol_threads = 32;
+unsigned long zvol_max_discard_blocks = 16384;
static taskq_t *zvol_taskq;
static kmutex_t zvol_state_lock;
* The in-core state of each volume.
*/
typedef struct zvol_state {
- char zv_name[DISK_NAME_LEN]; /* name */
+ char zv_name[MAXNAMELEN]; /* name */
uint64_t zv_volsize; /* advertised space */
uint64_t zv_volblocksize;/* volume block size */
objset_t *zv_objset; /* objset handle */
ASSERT(MUTEX_HELD(&zvol_state_lock));
for (zv = list_head(&zvol_state_list); zv != NULL;
zv = list_next(&zvol_state_list, zv)) {
- if (!strncmp(zv->zv_name, name, DISK_NAME_LEN))
+ if (!strncmp(zv->zv_name, name, MAXNAMELEN))
return zv;
}
* Ensure the zap is flushed then inform the VFS of the capacity change.
*/
static int
-zvol_update_volsize(zvol_state_t *zv, uint64_t volsize)
+zvol_update_volsize(zvol_state_t *zv, uint64_t volsize, objset_t *os)
{
struct block_device *bdev;
dmu_tx_t *tx;
ASSERT(MUTEX_HELD(&zvol_state_lock));
- tx = dmu_tx_create(zv->zv_objset);
+ tx = dmu_tx_create(os);
dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
return (error);
}
- error = zap_update(zv->zv_objset, ZVOL_ZAP_OBJ, "size", 8, 1,
+ error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1,
&volsize, tx);
dmu_tx_commit(tx);
if (error)
return (error);
- error = dmu_free_long_range(zv->zv_objset,
+ error = dmu_free_long_range(os,
ZVOL_OBJ, volsize, DMU_OBJECT_END);
if (error)
return (error);
- zv->zv_volsize = volsize;
- zv->zv_changed = 1;
-
bdev = bdget_disk(zv->zv_disk, 0);
if (!bdev)
- return EIO;
+ return (EIO);
+/*
+ * 2.6.28 API change
+ * Added check_disk_size_change() helper function.
+ */
+#ifdef HAVE_CHECK_DISK_SIZE_CHANGE
+ set_capacity(zv->zv_disk, volsize >> 9);
+ zv->zv_volsize = volsize;
+ check_disk_size_change(zv->zv_disk, bdev);
+#else
+ zv->zv_volsize = volsize;
+ zv->zv_changed = 1;
+ (void) check_disk_change(bdev);
+#endif /* HAVE_CHECK_DISK_SIZE_CHANGE */
- error = check_disk_change(bdev);
- ASSERT3U(error, !=, 0);
bdput(bdev);
return (0);
goto out_doi;
}
- error = zvol_update_volsize(zv, volsize);
+ error = zvol_update_volsize(zv, volsize, os);
out_doi:
kmem_free(doi, sizeof(dmu_object_info_t));
out:
uint32_t blocksize = zv->zv_volblocksize;
zilog_t *zilog = zv->zv_zilog;
boolean_t slogging;
+ ssize_t immediate_write_sz;
if (zil_replaying(zilog, tx))
return;
- slogging = spa_has_slogs(zilog->zl_spa);
+ immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+ ? 0 : zvol_immediate_write_sz;
+ slogging = spa_has_slogs(zilog->zl_spa) &&
+ (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
while (size) {
itx_t *itx;
* Unlike zfs_log_write() we can be called with
* up to DMU_MAX_ACCESS/2 (5MB) writes.
*/
- if (blocksize > zvol_immediate_write_sz && !slogging &&
+ if (blocksize > immediate_write_sz && !slogging &&
size >= blocksize && offset % blocksize == 0) {
write_state = WR_INDIRECT; /* uses dmu_sync */
len = blocksize;
dmu_tx_t *tx;
rl_t *rl;
+ /*
+ * Annotate this call path with a flag that indicates that it is
+ * unsafe to use KM_SLEEP during memory allocations due to the
+ * potential for a deadlock. KM_PUSHPAGE should be used instead.
+ */
+ ASSERT(!(current->flags & PF_NOFS));
+ current->flags |= PF_NOFS;
+
+ if (req->cmd_flags & VDEV_REQ_FLUSH)
+ zil_commit(zv->zv_zilog, ZVOL_OBJ);
+
+ /*
+ * Some requests are just for flush and nothing else.
+ */
+ if (size == 0) {
+ blk_end_request(req, 0, size);
+ goto out;
+ }
+
rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER);
tx = dmu_tx_create(zv->zv_objset);
dmu_tx_abort(tx);
zfs_range_unlock(rl);
blk_end_request(req, -error, size);
- return;
+ goto out;
}
error = dmu_write_req(zv->zv_objset, ZVOL_OBJ, req, tx);
if (error == 0)
- zvol_log_write(zv, tx, offset, size, rq_is_sync(req));
+ zvol_log_write(zv, tx, offset, size,
+ req->cmd_flags & VDEV_REQ_FUA);
dmu_tx_commit(tx);
zfs_range_unlock(rl);
- if (rq_is_sync(req))
+ if ((req->cmd_flags & VDEV_REQ_FUA) ||
+ zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zv->zv_zilog, ZVOL_OBJ);
blk_end_request(req, -error, size);
+out:
+ current->flags &= ~PF_NOFS;
+}
+
+#ifdef HAVE_BLK_QUEUE_DISCARD
+static void
+zvol_discard(void *arg)
+{
+ struct request *req = (struct request *)arg;
+ struct request_queue *q = req->q;
+ zvol_state_t *zv = q->queuedata;
+ uint64_t offset = blk_rq_pos(req) << 9;
+ uint64_t size = blk_rq_bytes(req);
+ int error;
+ rl_t *rl;
+
+ /*
+ * Annotate this call path with a flag that indicates that it is
+ * unsafe to use KM_SLEEP during memory allocations due to the
+ * potential for a deadlock. KM_PUSHPAGE should be used instead.
+ */
+ ASSERT(!(current->flags & PF_NOFS));
+ current->flags |= PF_NOFS;
+
+ if (offset + size > zv->zv_volsize) {
+ blk_end_request(req, -EIO, size);
+ goto out;
+ }
+
+ if (size == 0) {
+ blk_end_request(req, 0, size);
+ goto out;
+ }
+
+ rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER);
+
+ error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, size);
+
+ /*
+ * TODO: maybe we should add the operation to the log.
+ */
+
+ zfs_range_unlock(rl);
+
+ blk_end_request(req, -error, size);
+out:
+ current->flags &= ~PF_NOFS;
}
+#endif /* HAVE_BLK_QUEUE_DISCARD */
/*
* Common read path running under the zvol taskq context. This function
int error;
rl_t *rl;
+ if (size == 0) {
+ blk_end_request(req, 0, size);
+ return;
+ }
+
rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER);
error = dmu_read_req(zv->zv_objset, ZVOL_OBJ, req);
while ((req = blk_fetch_request(q)) != NULL) {
size = blk_rq_bytes(req);
- if (blk_rq_pos(req) + blk_rq_sectors(req) >
+ if (size != 0 && blk_rq_pos(req) + blk_rq_sectors(req) >
get_capacity(zv->zv_disk)) {
printk(KERN_INFO
"%s: bad access: block=%llu, count=%lu\n",
break;
}
+#ifdef HAVE_BLK_QUEUE_DISCARD
+ if (req->cmd_flags & VDEV_REQ_DISCARD) {
+ zvol_dispatch(zvol_discard, req);
+ break;
+ }
+#endif /* HAVE_BLK_QUEUE_DISCARD */
+
zvol_dispatch(zvol_write, req);
break;
default:
ASSERT(zio != NULL);
ASSERT(size != 0);
- zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
+ zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_PUSHPAGE);
zgd->zgd_zilog = zv->zv_zilog;
zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER);
case BLKFLSBUF:
zil_commit(zv->zv_zilog, ZVOL_OBJ);
break;
+ case BLKZNAME:
+ error = copy_to_user((void *)arg, zv->zv_name, MAXNAMELEN);
+ break;
default:
error = -ENOTTY;
mutex_enter(&zvol_state_lock);
zv = zvol_find_by_dev(dev);
- kobj = zv ? get_disk(zv->zv_disk) : ERR_PTR(-ENOENT);
+ kobj = zv ? get_disk(zv->zv_disk) : NULL;
mutex_exit(&zvol_state_lock);
return kobj;
if (zv->zv_queue == NULL)
goto out_kmem;
+#ifdef HAVE_BLK_QUEUE_FLUSH
+ blk_queue_flush(zv->zv_queue, VDEV_REQ_FLUSH | VDEV_REQ_FUA);
+#else
+ blk_queue_ordered(zv->zv_queue, QUEUE_ORDERED_DRAIN, NULL);
+#endif /* HAVE_BLK_QUEUE_FLUSH */
+
zv->zv_disk = alloc_disk(ZVOL_MINORS);
if (zv->zv_disk == NULL)
goto out_queue;
zv->zv_queue->queuedata = zv;
zv->zv_dev = dev;
zv->zv_open_count = 0;
- strlcpy(zv->zv_name, name, DISK_NAME_LEN);
+ strlcpy(zv->zv_name, name, MAXNAMELEN);
mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL);
avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare,
zv->zv_disk->fops = &zvol_ops;
zv->zv_disk->private_data = zv;
zv->zv_disk->queue = zv->zv_queue;
- snprintf(zv->zv_disk->disk_name, DISK_NAME_LEN, "%s", name);
+ snprintf(zv->zv_disk->disk_name, DISK_NAME_LEN, "%s%d",
+ ZVOL_DEV_NAME, (dev & MINORMASK));
return zv;
set_capacity(zv->zv_disk, zv->zv_volsize >> 9);
+ blk_queue_max_hw_sectors(zv->zv_queue, UINT_MAX);
+ blk_queue_max_segments(zv->zv_queue, UINT16_MAX);
+ blk_queue_max_segment_size(zv->zv_queue, UINT_MAX);
+ blk_queue_physical_block_size(zv->zv_queue, zv->zv_volblocksize);
+ blk_queue_io_opt(zv->zv_queue, zv->zv_volblocksize);
+#ifdef HAVE_BLK_QUEUE_DISCARD
+ blk_queue_max_discard_sectors(zv->zv_queue,
+ (zvol_max_discard_blocks * zv->zv_volblocksize) >> 9);
+ blk_queue_discard_granularity(zv->zv_queue, zv->zv_volblocksize);
+ queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zv->zv_queue);
+#endif
+#ifdef HAVE_BLK_QUEUE_NONROT
+ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zv->zv_queue);
+#endif
+
if (zil_replay_disable)
zil_destroy(dmu_objset_zil(os), B_FALSE);
else
spa_t *spa = NULL;
int error = 0;
+ if (zvol_inhibit_dev)
+ return (0);
+
mutex_enter(&zvol_state_lock);
if (pool) {
error = dmu_objset_find_spa(NULL, pool, zvol_create_minors_cb,
zvol_state_t *zv, *zv_next;
char *str;
- str = kmem_zalloc(DISK_NAME_LEN, KM_SLEEP);
+ if (zvol_inhibit_dev)
+ return;
+
+ str = kmem_zalloc(MAXNAMELEN, KM_SLEEP);
if (pool) {
(void) strncpy(str, pool, strlen(pool));
(void) strcat(str, "/");
}
}
mutex_exit(&zvol_state_lock);
- kmem_free(str, DISK_NAME_LEN);
+ kmem_free(str, MAXNAMELEN);
}
int
{
int error;
- if (!zvol_threads)
- zvol_threads = num_online_cpus();
-
zvol_taskq = taskq_create(ZVOL_DRIVER, zvol_threads, maxclsyspri,
zvol_threads, INT_MAX, TASKQ_PREPOPULATE);
if (zvol_taskq == NULL) {
list_destroy(&zvol_state_list);
}
-module_param(zvol_major, uint, 0);
+module_param(zvol_inhibit_dev, uint, 0644);
+MODULE_PARM_DESC(zvol_inhibit_dev, "Do not create zvol device nodes");
+
+module_param(zvol_major, uint, 0444);
MODULE_PARM_DESC(zvol_major, "Major number for zvol device");
-module_param(zvol_threads, uint, 0);
+module_param(zvol_threads, uint, 0444);
MODULE_PARM_DESC(zvol_threads, "Number of threads for zvol device");
+
+module_param(zvol_max_discard_blocks, ulong, 0444);
+MODULE_PARM_DESC(zvol_max_discard_blocks, "Max number of blocks to discard at once");