From: Ming Lei <ming.lei@redhat.com>
To: Jens Axboe <axboe@fb.com>, linux-block@vger.kernel.org
Cc: Omar Sandoval <osandov@fb.com>, Ming Lei <ming.lei@redhat.com>
Subject: [PATCH 6/6] blk-mq: don't allocate driver tag beforehand for flush rq
Date: Fri, 15 Sep 2017 00:42:13 +0800 [thread overview]
Message-ID: <20170914164213.17859-7-ming.lei@redhat.com> (raw)
In-Reply-To: <20170914164213.17859-1-ming.lei@redhat.com>
The behind idea is simple:
1) for none scheduler, driver tag has to be borrowed for flush
rq, otherwise we may run out of tag, and IO hang is caused.
get/put driver tag is actually a nop, so reorder tags isn't
necessary at all.
2) for real I/O scheduler, we needn't to allocate driver tag
beforehand for flush rq, and it works just fine to follow the
way for normal requests: allocate driver tag for each rq just
before calling .queue_rq().
One visible change to driver is that the driver tag isn't
shared in the flush request sequence, that won't be a
problem, since we always do that in legacy path.
Then flush rq isn't treated specially wrt. get/put driver tag,
code gets cleanup much, such as, reorder_tags_to_front() is
removed, and we needn't to worry about request order in
dispatch list for avoiding I/O deadlock.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
block/blk-flush.c | 41 +++++++++++++++++++++++++++++++++++------
block/blk-mq-sched.c | 36 ++----------------------------------
block/blk-mq.c | 38 ++++----------------------------------
3 files changed, 41 insertions(+), 74 deletions(-)
diff --git a/block/blk-flush.c b/block/blk-flush.c
index a9773d2075ac..b091e298b97a 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -216,6 +216,17 @@ static bool blk_flush_complete_seq(struct request *rq,
return kicked | queued;
}
+/*
+ * We don't share tag between flush rq and data rq in case of
+ * IO scheduler, so have to release tag and restart queue
+ */
+static void put_flush_driver_tag(struct blk_mq_hw_ctx *hctx,
+ struct request *rq)
+{
+ __blk_mq_put_driver_tag(hctx, rq);
+ blk_mq_sched_restart(hctx);
+}
+
static void flush_end_io(struct request *flush_rq, blk_status_t error)
{
struct request_queue *q = flush_rq->q;
@@ -231,8 +242,13 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
/* release the tag's ownership to the req cloned from */
spin_lock_irqsave(&fq->mq_flush_lock, flags);
hctx = blk_mq_map_queue(q, flush_rq->mq_ctx->cpu);
- blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
- flush_rq->tag = -1;
+ if (!q->elevator) {
+ blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
+ flush_rq->tag = -1;
+ } else {
+ put_flush_driver_tag(hctx, flush_rq);
+ flush_rq->internal_tag = -1;
+ }
}
running = &fq->flush_queue[fq->flush_running_idx];
@@ -321,16 +337,24 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
* Borrow tag from the first request since they can't
* be in flight at the same time. And acquire the tag's
* ownership for flush req.
+ *
+ * In case of io scheduler, flush rq need to borrow
+ * scheduler tag for making put/get driver tag workable.
+ * In case of none, flush rq need to borrow driver tag.
*/
if (q->mq_ops) {
struct blk_mq_hw_ctx *hctx;
flush_rq->mq_ctx = first_rq->mq_ctx;
- flush_rq->tag = first_rq->tag;
- fq->orig_rq = first_rq;
- hctx = blk_mq_map_queue(q, first_rq->mq_ctx->cpu);
- blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq);
+ if (!q->elevator) {
+ fq->orig_rq = first_rq;
+ flush_rq->tag = first_rq->tag;
+ hctx = blk_mq_map_queue(q, first_rq->mq_ctx->cpu);
+ blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq);
+ } else {
+ flush_rq->internal_tag = first_rq->internal_tag;
+ }
}
flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
@@ -394,6 +418,11 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
hctx = blk_mq_map_queue(q, ctx->cpu);
+ if (q->elevator) {
+ WARN_ON(rq->tag < 0);
+ put_flush_driver_tag(hctx, rq);
+ }
+
/*
* After populating an empty queue, kick it to avoid stall. Read
* the comment in flush_end_io().
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index b6399d99438e..d2c675c089b3 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -340,21 +340,6 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
}
}
-/*
- * Add flush/fua to the queue. If we fail getting a driver tag, then
- * punt to the requeue list. Requeue will re-invoke us from a context
- * that's safe to block from.
- */
-static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx,
- struct request *rq, bool can_block)
-{
- if (blk_mq_get_driver_tag(rq, &hctx, can_block)) {
- blk_insert_flush(rq);
- blk_mq_run_hw_queue(hctx, true);
- } else
- blk_mq_add_to_requeue_list(rq, false, true);
-}
-
void blk_mq_sched_insert_request(struct request *rq, bool at_head,
bool run_queue, bool async, bool can_block)
{
@@ -365,8 +350,8 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
/* flush rq in flush machinery need to be dispatched directly */
if (!(rq->rq_flags & RQF_FLUSH_SEQ) && op_is_flush(rq->cmd_flags)) {
- blk_mq_sched_insert_flush(hctx, rq, can_block);
- return;
+ blk_insert_flush(rq);
+ goto run;
}
if (blk_mq_sched_bypass_insert(hctx, !!e, rq))
@@ -395,23 +380,6 @@ void blk_mq_sched_insert_requests(struct request_queue *q,
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
struct elevator_queue *e = hctx->queue->elevator;
- if (e) {
- struct request *rq, *next;
-
- /*
- * We bypass requests that already have a driver tag assigned,
- * which should only be flushes. Flushes are only ever inserted
- * as single requests, so we shouldn't ever hit the
- * WARN_ON_ONCE() below (but let's handle it just in case).
- */
- list_for_each_entry_safe(rq, next, list, queuelist) {
- if (WARN_ON_ONCE(rq->tag != -1)) {
- list_del_init(&rq->queuelist);
- blk_mq_sched_bypass_insert(hctx, true, rq);
- }
- }
- }
-
if (e && e->type->ops.mq.insert_requests)
e->type->ops.mq.insert_requests(hctx, list, false);
else
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 46d561e26774..68c165f8f0f8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -940,30 +940,6 @@ static void blk_mq_put_driver_tag(struct request *rq)
__blk_mq_put_driver_tag(hctx, rq);
}
-/*
- * If we fail getting a driver tag because all the driver tags are already
- * assigned and on the dispatch list, BUT the first entry does not have a
- * tag, then we could deadlock. For that case, move entries with assigned
- * driver tags to the front, leaving the set of tagged requests in the
- * same order, and the untagged set in the same order.
- */
-static bool reorder_tags_to_front(struct list_head *list)
-{
- struct request *rq, *tmp, *first = NULL;
-
- list_for_each_entry_safe_reverse(rq, tmp, list, queuelist) {
- if (rq == first)
- break;
- if (rq->tag != -1) {
- list_move(&rq->queuelist, list);
- if (!first)
- first = rq;
- }
- }
-
- return first != NULL;
-}
-
static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
void *key)
{
@@ -1021,9 +997,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
rq = list_first_entry(list, struct request, queuelist);
if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
- if (!queued && reorder_tags_to_front(list))
- continue;
-
/*
* The initial allocation attempt failed, so we need to
* rerun the hardware queue when a tag is freed.
@@ -1630,13 +1603,10 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
if (unlikely(is_flush_fua)) {
blk_mq_put_ctx(data.ctx);
blk_mq_bio_to_request(rq, bio);
- if (q->elevator) {
- blk_mq_sched_insert_request(rq, false, true, true,
- true);
- } else {
- blk_insert_flush(rq);
- blk_mq_run_hw_queue(data.hctx, true);
- }
+
+ /* bypass scheduler for flush rq */
+ blk_insert_flush(rq);
+ blk_mq_run_hw_queue(data.hctx, true);
} else if (plug && q->nr_hw_queues == 1) {
struct request *last = NULL;
--
2.9.5
next prev parent reply other threads:[~2017-09-14 16:43 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-09-14 16:42 [PATCH 0/6] blk-mq: don't allocate driver tag beforehand for flush rq Ming Lei
2017-09-14 16:42 ` [PATCH 1/6] blk-flush: don't run queue for requests of bypassing flush Ming Lei
2017-09-14 16:42 ` [PATCH 2/6] block: pass 'run_queue' to blk_mq_request_bypass_insert Ming Lei
2017-09-14 16:42 ` [PATCH 3/6] blk-flush: use blk_mq_request_bypass_insert() Ming Lei
2017-09-14 16:42 ` [PATCH 4/6] blk-mq: decide how to handle flush rq via RQF_FLUSH_SEQ Ming Lei
2017-09-14 16:42 ` [PATCH 5/6] blk-mq: move __blk_mq_put_driver_tag() into blk-mq.h Ming Lei
2017-09-14 16:42 ` Ming Lei [this message]
2017-09-14 18:51 ` [PATCH 0/6] blk-mq: don't allocate driver tag beforehand for flush rq Jens Axboe
2017-09-15 2:20 ` Ming Lei
2017-09-15 14:29 ` Jens Axboe
2017-09-15 18:06 ` Jens Axboe
2017-09-16 1:55 ` Ming Lei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170914164213.17859-7-ming.lei@redhat.com \
--to=ming.lei@redhat.com \
--cc=axboe@fb.com \
--cc=linux-block@vger.kernel.org \
--cc=osandov@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).