Merge branch 'for-4.12/block' of git://git.kernel.dk/linux-block

Pull block layer updates from Jens Axboe: - Add BFQ IO scheduler under the new blk-mq scheduling framework. BFQ was initially a fork of CFQ, but subsequently changed to implement fairness based on B-WF2Q+, a modified variant of WF2Q. BFQ is meant to be used on desktop type single drives, providing good fairness. From Paolo. - Add Kyber IO scheduler. This is a full multiqueue aware scheduler, using a scalable token based algorithm that throttles IO based on live completion IO stats, similary to blk-wbt. From Omar. - A series from Jan, moving users to separately allocated backing devices. This continues the work of separating backing device life times, solving various problems with hot removal. - A series of updates for lightnvm, mostly from Javier. Includes a 'pblk' target that exposes an open channel SSD as a physical block device. - A series of fixes and improvements for nbd from Josef. - A series from Omar, removing queue sharing between devices on mostly legacy drivers. This helps us clean up other bits, if we know that a queue only has a single device backing. This has been overdue for more than a decade. - Fixes for the blk-stats, and improvements to unify the stats and user windows. This both improves blk-wbt, and enables other users to register a need to receive IO stats for a device. From Omar. - blk-throttle improvements from Shaohua. This provides a scalable framework for implementing scalable priotization - particularly for blk-mq, but applicable to any type of block device. The interface is marked experimental for now. - Bucketized IO stats for IO polling from Stephen Bates. This improves efficiency of polled workloads in the presence of mixed block size IO. - A few fixes for opal, from Scott. - A few pulls for NVMe, including a lot of fixes for NVMe-over-fabrics. From a variety of folks, mostly Sagi and James Smart. - A series from Bart, improving our exposed info and capabilities from the blk-mq debugfs support. - A series from Christoph, cleaning up how handle WRITE_ZEROES. - A series from Christoph, cleaning up the block layer handling of how we track errors in a request. On top of being a nice cleanup, it also shrinks the size of struct request a bit. - Removal of mg_disk and hd (sorry Linus) by Christoph. The former was never used by platforms, and the latter has outlived it's usefulness. - Various little bug fixes and cleanups from a wide variety of folks. * 'for-4.12/block' of git://git.kernel.dk/linux-block: (329 commits) block: hide badblocks attribute by default blk-mq: unify hctx delay_work and run_work block: add kblock_mod_delayed_work_on() blk-mq: unify hctx delayed_run_work and run_work nbd: fix use after free on module unload MAINTAINERS: bfq: Add Paolo as maintainer for the BFQ I/O scheduler blk-mq-sched: alloate reserved tags out of normal pool mtip32xx: use runtime tag to initialize command header scsi: Implement blk_mq_ops.show_rq() blk-mq: Add blk_mq_ops.show_rq() blk-mq: Show operation, cmd_flags and rq_flags names blk-mq: Make blk_flags_show() callers append a newline character blk-mq: Move the "state" debugfs attribute one level down blk-mq: Unregister debugfs attributes earlier blk-mq: Only unregister hctxs for which registration succeeded blk-mq-debugfs: Rename functions for registering and unregistering the mq directory blk-mq: Let blk_mq_debugfs_register() look up the queue name blk-mq: Register <dev>/queue/mq after having registered <dev>/queue ide-pm: always pass 0 error to ide_complete_rq in ide_do_devset ide-pm: always pass 0 error to __blk_end_request_all ..
author: Linus Torvalds <torvalds@linux-foundation.org> 2017-05-01 10:39:57 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2017-05-01 10:39:57 -0700
commit: 694752922b12bd318aa80191bd9d8c3dcfb39055 (patch)
tree: 5afe83fd99100bea546dd5a1c1f778c58f41e5c0 /drivers/nvme/host/core.c
parent: a351e9b9fc24e982ec2f0e76379a49826036da12 (diff)
parent: 9438b3e080beccf6022138ea62192d55cc7dc4ed (diff)
download: linux-694752922b12bd318aa80191bd9d8c3dcfb39055.tar.gz
linux-694752922b12bd318aa80191bd9d8c3dcfb39055.tar.xz
1 files changed, 76 insertions, 18 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index eeb409c287b8..bf6729b1d8bf 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -49,10 +49,9 @@ unsigned char shutdown_timeout = 5;
 module_param(shutdown_timeout, byte, 0644);
 MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
 
-unsigned int nvme_max_retries = 5;
-module_param_named(max_retries, nvme_max_retries, uint, 0644);
+static u8 nvme_max_retries = 5;
+module_param_named(max_retries, nvme_max_retries, byte, 0644);
 MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
-EXPORT_SYMBOL_GPL(nvme_max_retries);
 
 static int nvme_char_major;
 module_param(nvme_char_major, int, 0);
@@ -67,6 +66,57 @@ static DEFINE_SPINLOCK(dev_list_lock);
 
 static struct class *nvme_class;
 
+static int nvme_error_status(struct request *req)
+{
+	switch (nvme_req(req)->status & 0x7ff) {
+	case NVME_SC_SUCCESS:
+		return 0;
+	case NVME_SC_CAP_EXCEEDED:
+		return -ENOSPC;
+	default:
+		return -EIO;
+
+	/*
+	 * XXX: these errors are a nasty side-band protocol to
+	 * drivers/md/dm-mpath.c:noretry_error() that aren't documented
+	 * anywhere..
+	 */
+	case NVME_SC_CMD_SEQ_ERROR:
+		return -EILSEQ;
+	case NVME_SC_ONCS_NOT_SUPPORTED:
+		return -EOPNOTSUPP;
+	case NVME_SC_WRITE_FAULT:
+	case NVME_SC_READ_ERROR:
+	case NVME_SC_UNWRITTEN_BLOCK:
+		return -ENODATA;
+	}
+}
+
+static inline bool nvme_req_needs_retry(struct request *req)
+{
+	if (blk_noretry_request(req))
+		return false;
+	if (nvme_req(req)->status & NVME_SC_DNR)
+		return false;
+	if (jiffies - req->start_time >= req->timeout)
+		return false;
+	if (nvme_req(req)->retries >= nvme_max_retries)
+		return false;
+	return true;
+}
+
+void nvme_complete_rq(struct request *req)
+{
+	if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) {
+		nvme_req(req)->retries++;
+		blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q));
+		return;
+	}
+
+	blk_mq_end_request(req, nvme_error_status(req));
+}
+EXPORT_SYMBOL_GPL(nvme_complete_rq);
+
 void nvme_cancel_request(struct request *req, void *data, bool reserved)
 {
 	int status;
@@ -80,7 +130,9 @@ void nvme_cancel_request(struct request *req, void *data, bool reserved)
 	status = NVME_SC_ABORT_REQ;
 	if (blk_queue_dying(req->q))
 		status |= NVME_SC_DNR;
-	blk_mq_complete_request(req, status);
+	nvme_req(req)->status = status;
+	blk_mq_complete_request(req);
+
 }
 EXPORT_SYMBOL_GPL(nvme_cancel_request);
 
@@ -205,12 +257,6 @@ fail:
 	return NULL;
 }
 
-void nvme_requeue_req(struct request *req)
-{
-	blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q));
-}
-EXPORT_SYMBOL_GPL(nvme_requeue_req);
-
 struct request *nvme_alloc_request(struct request_queue *q,
 		struct nvme_command *cmd, unsigned int flags, int qid)
 {
@@ -327,6 +373,12 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 {
 	int ret = BLK_MQ_RQ_QUEUE_OK;
 
+	if (!(req->rq_flags & RQF_DONTPREP)) {
+		nvme_req(req)->retries = 0;
+		nvme_req(req)->flags = 0;
+		req->rq_flags |= RQF_DONTPREP;
+	}
+
 	switch (req_op(req)) {
 	case REQ_OP_DRV_IN:
 	case REQ_OP_DRV_OUT:
@@ -335,6 +387,8 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 	case REQ_OP_FLUSH:
 		nvme_setup_flush(ns, cmd);
 		break;
+	case REQ_OP_WRITE_ZEROES:
+		/* currently only aliased to deallocate for a few ctrls: */
 	case REQ_OP_DISCARD:
 		ret = nvme_setup_discard(ns, req, cmd);
 		break;
@@ -378,7 +432,10 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 	blk_execute_rq(req->q, NULL, req, at_head);
 	if (result)
 		*result = nvme_req(req)->result;
-	ret = req->errors;
+	if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
+		ret = -EINTR;
+	else
+		ret = nvme_req(req)->status;
  out:
 	blk_mq_free_request(req);
 	return ret;
@@ -463,7 +520,10 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
 	}
  submit:
 	blk_execute_rq(req->q, disk, req, 0);
-	ret = req->errors;
+	if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
+		ret = -EINTR;
+	else
+		ret = nvme_req(req)->status;
 	if (result)
 		*result = le32_to_cpu(nvme_req(req)->result.u32);
 	if (meta && !ret && !write) {
@@ -900,16 +960,14 @@ static void nvme_config_discard(struct nvme_ns *ns)
 	BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
 			NVME_DSM_MAX_RANGES);
 
-	if (ctrl->quirks & NVME_QUIRK_DISCARD_ZEROES)
-		ns->queue->limits.discard_zeroes_data = 1;
-	else
-		ns->queue->limits.discard_zeroes_data = 0;
-
 	ns->queue->limits.discard_alignment = logical_block_size;
 	ns->queue->limits.discard_granularity = logical_block_size;
 	blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
 	blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES);
 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
+
+	if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
+		blk_queue_max_write_zeroes_sectors(ns->queue, UINT_MAX);
 }
 
 static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
@@ -2393,7 +2451,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl)
 
 	mutex_lock(&ctrl->namespaces_mutex);
 	list_for_each_entry(ns, &ctrl->namespaces, list)
-		blk_mq_freeze_queue_start(ns->queue);
+		blk_freeze_queue_start(ns->queue);
 	mutex_unlock(&ctrl->namespaces_mutex);
 }
 EXPORT_SYMBOL_GPL(nvme_start_freeze);
author	Linus Torvalds <torvalds@linux-foundation.org>	2017-05-01 10:39:57 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2017-05-01 10:39:57 -0700
commit	694752922b12bd318aa80191bd9d8c3dcfb39055 (patch)
tree	5afe83fd99100bea546dd5a1c1f778c58f41e5c0 /drivers/nvme/host/core.c
parent	a351e9b9fc24e982ec2f0e76379a49826036da12 (diff)
parent	9438b3e080beccf6022138ea62192d55cc7dc4ed (diff)
download	linux-694752922b12bd318aa80191bd9d8c3dcfb39055.tar.gz linux-694752922b12bd318aa80191bd9d8c3dcfb39055.tar.xz