summaryrefslogtreecommitdiff
path: root/drivers/nvme/host/core.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-01 10:39:57 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-01 10:39:57 -0700
commit694752922b12bd318aa80191bd9d8c3dcfb39055 (patch)
tree5afe83fd99100bea546dd5a1c1f778c58f41e5c0 /drivers/nvme/host/core.c
parenta351e9b9fc24e982ec2f0e76379a49826036da12 (diff)
parent9438b3e080beccf6022138ea62192d55cc7dc4ed (diff)
downloadlinux-694752922b12bd318aa80191bd9d8c3dcfb39055.tar.gz
linux-694752922b12bd318aa80191bd9d8c3dcfb39055.tar.xz
Merge branch 'for-4.12/block' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe: - Add BFQ IO scheduler under the new blk-mq scheduling framework. BFQ was initially a fork of CFQ, but subsequently changed to implement fairness based on B-WF2Q+, a modified variant of WF2Q. BFQ is meant to be used on desktop type single drives, providing good fairness. From Paolo. - Add Kyber IO scheduler. This is a full multiqueue aware scheduler, using a scalable token based algorithm that throttles IO based on live completion IO stats, similary to blk-wbt. From Omar. - A series from Jan, moving users to separately allocated backing devices. This continues the work of separating backing device life times, solving various problems with hot removal. - A series of updates for lightnvm, mostly from Javier. Includes a 'pblk' target that exposes an open channel SSD as a physical block device. - A series of fixes and improvements for nbd from Josef. - A series from Omar, removing queue sharing between devices on mostly legacy drivers. This helps us clean up other bits, if we know that a queue only has a single device backing. This has been overdue for more than a decade. - Fixes for the blk-stats, and improvements to unify the stats and user windows. This both improves blk-wbt, and enables other users to register a need to receive IO stats for a device. From Omar. - blk-throttle improvements from Shaohua. This provides a scalable framework for implementing scalable priotization - particularly for blk-mq, but applicable to any type of block device. The interface is marked experimental for now. - Bucketized IO stats for IO polling from Stephen Bates. This improves efficiency of polled workloads in the presence of mixed block size IO. - A few fixes for opal, from Scott. - A few pulls for NVMe, including a lot of fixes for NVMe-over-fabrics. From a variety of folks, mostly Sagi and James Smart. - A series from Bart, improving our exposed info and capabilities from the blk-mq debugfs support. - A series from Christoph, cleaning up how handle WRITE_ZEROES. - A series from Christoph, cleaning up the block layer handling of how we track errors in a request. On top of being a nice cleanup, it also shrinks the size of struct request a bit. - Removal of mg_disk and hd (sorry Linus) by Christoph. The former was never used by platforms, and the latter has outlived it's usefulness. - Various little bug fixes and cleanups from a wide variety of folks. * 'for-4.12/block' of git://git.kernel.dk/linux-block: (329 commits) block: hide badblocks attribute by default blk-mq: unify hctx delay_work and run_work block: add kblock_mod_delayed_work_on() blk-mq: unify hctx delayed_run_work and run_work nbd: fix use after free on module unload MAINTAINERS: bfq: Add Paolo as maintainer for the BFQ I/O scheduler blk-mq-sched: alloate reserved tags out of normal pool mtip32xx: use runtime tag to initialize command header scsi: Implement blk_mq_ops.show_rq() blk-mq: Add blk_mq_ops.show_rq() blk-mq: Show operation, cmd_flags and rq_flags names blk-mq: Make blk_flags_show() callers append a newline character blk-mq: Move the "state" debugfs attribute one level down blk-mq: Unregister debugfs attributes earlier blk-mq: Only unregister hctxs for which registration succeeded blk-mq-debugfs: Rename functions for registering and unregistering the mq directory blk-mq: Let blk_mq_debugfs_register() look up the queue name blk-mq: Register <dev>/queue/mq after having registered <dev>/queue ide-pm: always pass 0 error to ide_complete_rq in ide_do_devset ide-pm: always pass 0 error to __blk_end_request_all ..
Diffstat (limited to 'drivers/nvme/host/core.c')
-rw-r--r--drivers/nvme/host/core.c94
1 files changed, 76 insertions, 18 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index eeb409c287b8..bf6729b1d8bf 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -49,10 +49,9 @@ unsigned char shutdown_timeout = 5;
module_param(shutdown_timeout, byte, 0644);
MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
-unsigned int nvme_max_retries = 5;
-module_param_named(max_retries, nvme_max_retries, uint, 0644);
+static u8 nvme_max_retries = 5;
+module_param_named(max_retries, nvme_max_retries, byte, 0644);
MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
-EXPORT_SYMBOL_GPL(nvme_max_retries);
static int nvme_char_major;
module_param(nvme_char_major, int, 0);
@@ -67,6 +66,57 @@ static DEFINE_SPINLOCK(dev_list_lock);
static struct class *nvme_class;
+static int nvme_error_status(struct request *req)
+{
+ switch (nvme_req(req)->status & 0x7ff) {
+ case NVME_SC_SUCCESS:
+ return 0;
+ case NVME_SC_CAP_EXCEEDED:
+ return -ENOSPC;
+ default:
+ return -EIO;
+
+ /*
+ * XXX: these errors are a nasty side-band protocol to
+ * drivers/md/dm-mpath.c:noretry_error() that aren't documented
+ * anywhere..
+ */
+ case NVME_SC_CMD_SEQ_ERROR:
+ return -EILSEQ;
+ case NVME_SC_ONCS_NOT_SUPPORTED:
+ return -EOPNOTSUPP;
+ case NVME_SC_WRITE_FAULT:
+ case NVME_SC_READ_ERROR:
+ case NVME_SC_UNWRITTEN_BLOCK:
+ return -ENODATA;
+ }
+}
+
+static inline bool nvme_req_needs_retry(struct request *req)
+{
+ if (blk_noretry_request(req))
+ return false;
+ if (nvme_req(req)->status & NVME_SC_DNR)
+ return false;
+ if (jiffies - req->start_time >= req->timeout)
+ return false;
+ if (nvme_req(req)->retries >= nvme_max_retries)
+ return false;
+ return true;
+}
+
+void nvme_complete_rq(struct request *req)
+{
+ if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) {
+ nvme_req(req)->retries++;
+ blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q));
+ return;
+ }
+
+ blk_mq_end_request(req, nvme_error_status(req));
+}
+EXPORT_SYMBOL_GPL(nvme_complete_rq);
+
void nvme_cancel_request(struct request *req, void *data, bool reserved)
{
int status;
@@ -80,7 +130,9 @@ void nvme_cancel_request(struct request *req, void *data, bool reserved)
status = NVME_SC_ABORT_REQ;
if (blk_queue_dying(req->q))
status |= NVME_SC_DNR;
- blk_mq_complete_request(req, status);
+ nvme_req(req)->status = status;
+ blk_mq_complete_request(req);
+
}
EXPORT_SYMBOL_GPL(nvme_cancel_request);
@@ -205,12 +257,6 @@ fail:
return NULL;
}
-void nvme_requeue_req(struct request *req)
-{
- blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q));
-}
-EXPORT_SYMBOL_GPL(nvme_requeue_req);
-
struct request *nvme_alloc_request(struct request_queue *q,
struct nvme_command *cmd, unsigned int flags, int qid)
{
@@ -327,6 +373,12 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
{
int ret = BLK_MQ_RQ_QUEUE_OK;
+ if (!(req->rq_flags & RQF_DONTPREP)) {
+ nvme_req(req)->retries = 0;
+ nvme_req(req)->flags = 0;
+ req->rq_flags |= RQF_DONTPREP;
+ }
+
switch (req_op(req)) {
case REQ_OP_DRV_IN:
case REQ_OP_DRV_OUT:
@@ -335,6 +387,8 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
case REQ_OP_FLUSH:
nvme_setup_flush(ns, cmd);
break;
+ case REQ_OP_WRITE_ZEROES:
+ /* currently only aliased to deallocate for a few ctrls: */
case REQ_OP_DISCARD:
ret = nvme_setup_discard(ns, req, cmd);
break;
@@ -378,7 +432,10 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
blk_execute_rq(req->q, NULL, req, at_head);
if (result)
*result = nvme_req(req)->result;
- ret = req->errors;
+ if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
+ ret = -EINTR;
+ else
+ ret = nvme_req(req)->status;
out:
blk_mq_free_request(req);
return ret;
@@ -463,7 +520,10 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
}
submit:
blk_execute_rq(req->q, disk, req, 0);
- ret = req->errors;
+ if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
+ ret = -EINTR;
+ else
+ ret = nvme_req(req)->status;
if (result)
*result = le32_to_cpu(nvme_req(req)->result.u32);
if (meta && !ret && !write) {
@@ -900,16 +960,14 @@ static void nvme_config_discard(struct nvme_ns *ns)
BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
NVME_DSM_MAX_RANGES);
- if (ctrl->quirks & NVME_QUIRK_DISCARD_ZEROES)
- ns->queue->limits.discard_zeroes_data = 1;
- else
- ns->queue->limits.discard_zeroes_data = 0;
-
ns->queue->limits.discard_alignment = logical_block_size;
ns->queue->limits.discard_granularity = logical_block_size;
blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES);
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
+
+ if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
+ blk_queue_max_write_zeroes_sectors(ns->queue, UINT_MAX);
}
static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
@@ -2393,7 +2451,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl)
mutex_lock(&ctrl->namespaces_mutex);
list_for_each_entry(ns, &ctrl->namespaces, list)
- blk_mq_freeze_queue_start(ns->queue);
+ blk_freeze_queue_start(ns->queue);
mutex_unlock(&ctrl->namespaces_mutex);
}
EXPORT_SYMBOL_GPL(nvme_start_freeze);