diff options
Diffstat (limited to 'block/blk-mq-sched.c')
-rw-r--r-- | block/blk-mq-sched.c | 203 |
1 files changed, 131 insertions, 72 deletions
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 4ab69435708c..c117bd8fd1f6 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -81,20 +81,103 @@ static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) } else clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); - if (blk_mq_hctx_has_pending(hctx)) { - blk_mq_run_hw_queue(hctx, true); - return true; - } + return blk_mq_run_hw_queue(hctx, true); +} - return false; +/* + * Only SCSI implements .get_budget and .put_budget, and SCSI restarts + * its queue by itself in its completion handler, so we don't need to + * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE. + */ +static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) +{ + struct request_queue *q = hctx->queue; + struct elevator_queue *e = q->elevator; + LIST_HEAD(rq_list); + + do { + struct request *rq; + + if (e->type->ops.mq.has_work && + !e->type->ops.mq.has_work(hctx)) + break; + + if (!blk_mq_get_dispatch_budget(hctx)) + break; + + rq = e->type->ops.mq.dispatch_request(hctx); + if (!rq) { + blk_mq_put_dispatch_budget(hctx); + break; + } + + /* + * Now this rq owns the budget which has to be released + * if this rq won't be queued to driver via .queue_rq() + * in blk_mq_dispatch_rq_list(). + */ + list_add(&rq->queuelist, &rq_list); + } while (blk_mq_dispatch_rq_list(q, &rq_list, true)); } +static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx, + struct blk_mq_ctx *ctx) +{ + unsigned idx = ctx->index_hw; + + if (++idx == hctx->nr_ctx) + idx = 0; + + return hctx->ctxs[idx]; +} + +/* + * Only SCSI implements .get_budget and .put_budget, and SCSI restarts + * its queue by itself in its completion handler, so we don't need to + * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE. + */ +static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) +{ + struct request_queue *q = hctx->queue; + LIST_HEAD(rq_list); + struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from); + + do { + struct request *rq; + + if (!sbitmap_any_bit_set(&hctx->ctx_map)) + break; + + if (!blk_mq_get_dispatch_budget(hctx)) + break; + + rq = blk_mq_dequeue_from_ctx(hctx, ctx); + if (!rq) { + blk_mq_put_dispatch_budget(hctx); + break; + } + + /* + * Now this rq owns the budget which has to be released + * if this rq won't be queued to driver via .queue_rq() + * in blk_mq_dispatch_rq_list(). + */ + list_add(&rq->queuelist, &rq_list); + + /* round robin for fair dispatch */ + ctx = blk_mq_next_ctx(hctx, rq->mq_ctx); + + } while (blk_mq_dispatch_rq_list(q, &rq_list, true)); + + WRITE_ONCE(hctx->dispatch_from, ctx); +} + +/* return true if hw queue need to be run again */ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; struct elevator_queue *e = q->elevator; const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request; - bool did_work = false; LIST_HEAD(rq_list); /* RCU or SRCU read lock is needed before checking quiesced flag */ @@ -122,29 +205,34 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) * scheduler, we can no longer merge or sort them. So it's best to * leave them there for as long as we can. Mark the hw queue as * needing a restart in that case. + * + * We want to dispatch from the scheduler if there was nothing + * on the dispatch list or we were able to dispatch from the + * dispatch list. */ if (!list_empty(&rq_list)) { blk_mq_sched_mark_restart_hctx(hctx); - did_work = blk_mq_dispatch_rq_list(q, &rq_list); - } else if (!has_sched_dispatch) { + if (blk_mq_dispatch_rq_list(q, &rq_list, false)) { + if (has_sched_dispatch) + blk_mq_do_dispatch_sched(hctx); + else + blk_mq_do_dispatch_ctx(hctx); + } + } else if (has_sched_dispatch) { + blk_mq_do_dispatch_sched(hctx); + } else if (q->mq_ops->get_budget) { + /* + * If we need to get budget before queuing request, we + * dequeue request one by one from sw queue for avoiding + * to mess up I/O merge when dispatch runs out of resource. + * + * TODO: get more budgets, and dequeue more requests in + * one time. + */ + blk_mq_do_dispatch_ctx(hctx); + } else { blk_mq_flush_busy_ctxs(hctx, &rq_list); - blk_mq_dispatch_rq_list(q, &rq_list); - } - - /* - * We want to dispatch from the scheduler if we had no work left - * on the dispatch list, OR if we did have work but weren't able - * to make progress. - */ - if (!did_work && has_sched_dispatch) { - do { - struct request *rq; - - rq = e->type->ops.mq.dispatch_request(hctx); - if (!rq) - break; - list_add(&rq->queuelist, &rq_list); - } while (blk_mq_dispatch_rq_list(q, &rq_list)); + blk_mq_dispatch_rq_list(q, &rq_list, false); } } @@ -260,21 +348,21 @@ void blk_mq_sched_request_inserted(struct request *rq) EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted); static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, + bool has_sched, struct request *rq) { - if (rq->tag == -1) { - rq->rq_flags |= RQF_SORTED; - return false; + /* dispatch flush rq directly */ + if (rq->rq_flags & RQF_FLUSH_SEQ) { + spin_lock(&hctx->lock); + list_add(&rq->queuelist, &hctx->dispatch); + spin_unlock(&hctx->lock); + return true; } - /* - * If we already have a real request tag, send directly to - * the dispatch list. - */ - spin_lock(&hctx->lock); - list_add(&rq->queuelist, &hctx->dispatch); - spin_unlock(&hctx->lock); - return true; + if (has_sched) + rq->rq_flags |= RQF_SORTED; + + return false; } /** @@ -339,21 +427,6 @@ done: } } -/* - * Add flush/fua to the queue. If we fail getting a driver tag, then - * punt to the requeue list. Requeue will re-invoke us from a context - * that's safe to block from. - */ -static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx, - struct request *rq, bool can_block) -{ - if (blk_mq_get_driver_tag(rq, &hctx, can_block)) { - blk_insert_flush(rq); - blk_mq_run_hw_queue(hctx, true); - } else - blk_mq_add_to_requeue_list(rq, false, true); -} - void blk_mq_sched_insert_request(struct request *rq, bool at_head, bool run_queue, bool async, bool can_block) { @@ -362,12 +435,15 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head, struct blk_mq_ctx *ctx = rq->mq_ctx; struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); - if (rq->tag == -1 && op_is_flush(rq->cmd_flags)) { - blk_mq_sched_insert_flush(hctx, rq, can_block); - return; + /* flush rq in flush machinery need to be dispatched directly */ + if (!(rq->rq_flags & RQF_FLUSH_SEQ) && op_is_flush(rq->cmd_flags)) { + blk_insert_flush(rq); + goto run; } - if (e && blk_mq_sched_bypass_insert(hctx, rq)) + WARN_ON(e && (rq->tag != -1)); + + if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) goto run; if (e && e->type->ops.mq.insert_requests) { @@ -393,23 +469,6 @@ void blk_mq_sched_insert_requests(struct request_queue *q, struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); struct elevator_queue *e = hctx->queue->elevator; - if (e) { - struct request *rq, *next; - - /* - * We bypass requests that already have a driver tag assigned, - * which should only be flushes. Flushes are only ever inserted - * as single requests, so we shouldn't ever hit the - * WARN_ON_ONCE() below (but let's handle it just in case). - */ - list_for_each_entry_safe(rq, next, list, queuelist) { - if (WARN_ON_ONCE(rq->tag != -1)) { - list_del_init(&rq->queuelist); - blk_mq_sched_bypass_insert(hctx, rq); - } - } - } - if (e && e->type->ops.mq.insert_requests) e->type->ops.mq.insert_requests(hctx, list, false); else |