X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fzvol.c;h=60ff64ebe7b03dfa41e96de42e611ecf02da7986;hb=8630650a8d9cfba379a5b73bd95e903d577e0d8d;hp=9dda04077e7d0073dddcb2ce6c63b5160d261974;hpb=56c34bac44d47898809c46db3e5444511bbe0ef6;p=zfs.git

diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
index 9dda040..60ff64e 100644
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@@ -46,8 +46,10 @@
 #include <sys/zvol.h>
 #include <linux/blkdev_compat.h>
 
+unsigned int zvol_inhibit_dev = 0;
 unsigned int zvol_major = ZVOL_MAJOR;
-unsigned int zvol_threads = 0;
+unsigned int zvol_threads = 32;
+unsigned long zvol_max_discard_blocks = 16384;
 
 static taskq_t *zvol_taskq;
 static kmutex_t zvol_state_lock;
@@ -459,11 +461,15 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx,
 	uint32_t blocksize = zv->zv_volblocksize;
 	zilog_t *zilog = zv->zv_zilog;
 	boolean_t slogging;
+	ssize_t immediate_write_sz;
 
 	if (zil_replaying(zilog, tx))
 		return;
 
-	slogging = spa_has_slogs(zilog->zl_spa);
+	immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+		? 0 : zvol_immediate_write_sz;
+	slogging = spa_has_slogs(zilog->zl_spa) &&
+		(zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
 
 	while (size) {
 		itx_t *itx;
@@ -475,7 +481,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx,
 		 * Unlike zfs_log_write() we can be called with
 		 * up to DMU_MAX_ACCESS/2 (5MB) writes.
 		 */
-		if (blocksize > zvol_immediate_write_sz && !slogging &&
+		if (blocksize > immediate_write_sz && !slogging &&
 		    size >= blocksize && offset % blocksize == 0) {
 			write_state = WR_INDIRECT; /* uses dmu_sync */
 			len = blocksize;
@@ -534,6 +540,25 @@ zvol_write(void *arg)
 	dmu_tx_t *tx;
 	rl_t *rl;
 
+	/*
+	 * Annotate this call path with a flag that indicates that it is
+	 * unsafe to use KM_SLEEP during memory allocations due to the
+	 * potential for a deadlock.  KM_PUSHPAGE should be used instead.
+	 */
+	ASSERT(!(current->flags & PF_NOFS));
+	current->flags |= PF_NOFS;
+
+	if (req->cmd_flags & VDEV_REQ_FLUSH)
+		zil_commit(zv->zv_zilog, ZVOL_OBJ);
+
+	/*
+	 * Some requests are just for flush and nothing else.
+	 */
+	if (size == 0) {
+		blk_end_request(req, 0, size);
+		goto out;
+	}
+
 	rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER);
 
 	tx = dmu_tx_create(zv->zv_objset);
@@ -545,22 +570,72 @@ zvol_write(void *arg)
 		dmu_tx_abort(tx);
 		zfs_range_unlock(rl);
 		blk_end_request(req, -error, size);
-		return;
+		goto out;
 	}
 
 	error = dmu_write_req(zv->zv_objset, ZVOL_OBJ, req, tx);
 	if (error == 0)
-		zvol_log_write(zv, tx, offset, size, rq_is_sync(req));
+		zvol_log_write(zv, tx, offset, size,
+		    req->cmd_flags & VDEV_REQ_FUA);
 
 	dmu_tx_commit(tx);
 	zfs_range_unlock(rl);
 
-	if (rq_is_sync(req) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)
+	if ((req->cmd_flags & VDEV_REQ_FUA) ||
+	    zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zv->zv_zilog, ZVOL_OBJ);
 
 	blk_end_request(req, -error, size);
+out:
+	current->flags &= ~PF_NOFS;
 }
 
+#ifdef HAVE_BLK_QUEUE_DISCARD
+static void
+zvol_discard(void *arg)
+{
+	struct request *req = (struct request *)arg;
+	struct request_queue *q = req->q;
+	zvol_state_t *zv = q->queuedata;
+	uint64_t offset = blk_rq_pos(req) << 9;
+	uint64_t size = blk_rq_bytes(req);
+	int error;
+	rl_t *rl;
+
+	/*
+	 * Annotate this call path with a flag that indicates that it is
+	 * unsafe to use KM_SLEEP during memory allocations due to the
+	 * potential for a deadlock.  KM_PUSHPAGE should be used instead.
+	 */
+	ASSERT(!(current->flags & PF_NOFS));
+	current->flags |= PF_NOFS;
+
+	if (offset + size > zv->zv_volsize) {
+		blk_end_request(req, -EIO, size);
+		goto out;
+	}
+
+	if (size == 0) {
+		blk_end_request(req, 0, size);
+		goto out;
+	}
+
+	rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER);
+
+	error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, size);
+
+	/*
+	 * TODO: maybe we should add the operation to the log.
+	 */
+
+	zfs_range_unlock(rl);
+
+	blk_end_request(req, -error, size);
+out:
+	current->flags &= ~PF_NOFS;
+}
+#endif /* HAVE_BLK_QUEUE_DISCARD */
+
 /*
  * Common read path running under the zvol taskq context.  This function
  * is responsible for copying the requested data out of the DMU and in to
@@ -578,6 +653,11 @@ zvol_read(void *arg)
 	int error;
 	rl_t *rl;
 
+	if (size == 0) {
+		blk_end_request(req, 0, size);
+		return;
+	}
+
 	rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER);
 
 	error = dmu_read_req(zv->zv_objset, ZVOL_OBJ, req);
@@ -627,7 +707,7 @@ zvol_request(struct request_queue *q)
 	while ((req = blk_fetch_request(q)) != NULL) {
 		size = blk_rq_bytes(req);
 
-		if (blk_rq_pos(req) + blk_rq_sectors(req) >
+		if (size != 0 && blk_rq_pos(req) + blk_rq_sectors(req) >
 		    get_capacity(zv->zv_disk)) {
 			printk(KERN_INFO
 			       "%s: bad access: block=%llu, count=%lu\n",
@@ -656,6 +736,13 @@ zvol_request(struct request_queue *q)
 				break;
 			}
 
+#ifdef HAVE_BLK_QUEUE_DISCARD
+			if (req->cmd_flags & VDEV_REQ_DISCARD) {
+				zvol_dispatch(zvol_discard, req);
+				break;
+			}
+#endif /* HAVE_BLK_QUEUE_DISCARD */
+
 			zvol_dispatch(zvol_write, req);
 			break;
 		default:
@@ -1062,6 +1149,12 @@ zvol_alloc(dev_t dev, const char *name)
 	if (zv->zv_queue == NULL)
 		goto out_kmem;
 
+#ifdef HAVE_BLK_QUEUE_FLUSH
+	blk_queue_flush(zv->zv_queue, VDEV_REQ_FLUSH | VDEV_REQ_FUA);
+#else
+	blk_queue_ordered(zv->zv_queue, QUEUE_ORDERED_DRAIN, NULL);
+#endif /* HAVE_BLK_QUEUE_FLUSH */
+
 	zv->zv_disk = alloc_disk(ZVOL_MINORS);
 	if (zv->zv_disk == NULL)
 		goto out_queue;
@@ -1164,6 +1257,21 @@ __zvol_create_minor(const char *name)
 
 	set_capacity(zv->zv_disk, zv->zv_volsize >> 9);
 
+	blk_queue_max_hw_sectors(zv->zv_queue, UINT_MAX);
+	blk_queue_max_segments(zv->zv_queue, UINT16_MAX);
+	blk_queue_max_segment_size(zv->zv_queue, UINT_MAX);
+	blk_queue_physical_block_size(zv->zv_queue, zv->zv_volblocksize);
+	blk_queue_io_opt(zv->zv_queue, zv->zv_volblocksize);
+#ifdef HAVE_BLK_QUEUE_DISCARD
+	blk_queue_max_discard_sectors(zv->zv_queue,
+	    (zvol_max_discard_blocks * zv->zv_volblocksize) >> 9);
+	blk_queue_discard_granularity(zv->zv_queue, zv->zv_volblocksize);
+	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zv->zv_queue);
+#endif
+#ifdef HAVE_BLK_QUEUE_NONROT
+	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zv->zv_queue);
+#endif
+
 	if (zil_replay_disable)
 		zil_destroy(dmu_objset_zil(os), B_FALSE);
 	else
@@ -1257,6 +1365,9 @@ zvol_create_minors(const char *pool)
 	spa_t *spa = NULL;
 	int error = 0;
 
+	if (zvol_inhibit_dev)
+		return (0);
+
 	mutex_enter(&zvol_state_lock);
 	if (pool) {
 		error = dmu_objset_find_spa(NULL, pool, zvol_create_minors_cb,
@@ -1286,6 +1397,9 @@ zvol_remove_minors(const char *pool)
 	zvol_state_t *zv, *zv_next;
 	char *str;
 
+	if (zvol_inhibit_dev)
+		return;
+
 	str = kmem_zalloc(MAXNAMELEN, KM_SLEEP);
 	if (pool) {
 		(void) strncpy(str, pool, strlen(pool));
@@ -1310,9 +1424,6 @@ zvol_init(void)
 {
 	int error;
 
-	if (!zvol_threads)
-		zvol_threads = num_online_cpus();
-
 	zvol_taskq = taskq_create(ZVOL_DRIVER, zvol_threads, maxclsyspri,
 		                  zvol_threads, INT_MAX, TASKQ_PREPOPULATE);
 	if (zvol_taskq == NULL) {
@@ -1350,8 +1461,14 @@ zvol_fini(void)
 	list_destroy(&zvol_state_list);
 }
 
+module_param(zvol_inhibit_dev, uint, 0644);
+MODULE_PARM_DESC(zvol_inhibit_dev, "Do not create zvol device nodes");
+
 module_param(zvol_major, uint, 0444);
 MODULE_PARM_DESC(zvol_major, "Major number for zvol device");
 
 module_param(zvol_threads, uint, 0444);
 MODULE_PARM_DESC(zvol_threads, "Number of threads for zvol device");
+
+module_param(zvol_max_discard_blocks, ulong, 0444);
+MODULE_PARM_DESC(zvol_max_discard_blocks, "Max number of blocks to discard at once");