From: Ming Lei <ming.lei@redhat.com>
To: Jens Axboe <axboe@fb.com>,
linux-block@vger.kernel.org,
Christoph Hellwig <hch@infradead.org>
Cc: Bart Van Assche <bart.vanassche@sandisk.com>,
Laurence Oberman <loberman@redhat.com>,
Paolo Valente <paolo.valente@linaro.org>,
Oleksandr Natalenko <oleksandr@natalenko.name>,
Tom Nguyen <tom81094@gmail.com>,
linux-kernel@vger.kernel.org, linux-scsi@vger.kernel.org,
Omar Sandoval <osandov@fb.com>,
John Garry <john.garry@huawei.com>,
Ming Lei <ming.lei@redhat.com>
Subject: [PATCH V7 4/6] blk-mq: introduce .get_budget and .put_budget in blk_mq_ops
Date: Fri, 13 Oct 2017 02:37:02 +0800 [thread overview]
Message-ID: <20171012183704.22326-5-ming.lei@redhat.com> (raw)
In-Reply-To: <20171012183704.22326-1-ming.lei@redhat.com>
For SCSI devices, there is often per-request-queue depth, which need
to be respected before queuing one request.
The current blk-mq always dequeues one request first, then calls .queue_rq()
to dispatch the request to lld. One obvious issue of this way is that I/O
merge may not be good, because when the per-request-queue depth can't be
respected, .queue_rq() has to return BLK_STS_RESOURCE, then this request
has to staty in hctx->dispatch list, and never got chance to participate
into I/O merge.
This patch introduces .get_budget and .put_budget callback in blk_mq_ops,
then we can try to get reserved budget first before dequeuing request.
Once we can't get budget for queueing I/O, we don't need to dequeue request
at all, then I/O merge can get improved a lot.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
block/blk-mq-sched.c | 39 +++++++++++++++++++++++++++++++--------
block/blk-mq.c | 35 ++++++++++++++++++++++++++++++++---
block/blk-mq.h | 2 +-
include/linux/blk-mq.h | 10 ++++++++++
4 files changed, 74 insertions(+), 12 deletions(-)
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index be29ba849408..bcf4773ee1ca 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -89,19 +89,32 @@ static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
return false;
}
-static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
+static bool blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
{
struct request_queue *q = hctx->queue;
struct elevator_queue *e = q->elevator;
LIST_HEAD(rq_list);
do {
- struct request *rq = e->type->ops.mq.dispatch_request(hctx);
+ struct request *rq;
- if (!rq)
+ if (e->type->ops.mq.has_work &&
+ !e->type->ops.mq.has_work(hctx))
break;
+
+ if (q->mq_ops->get_budget && !q->mq_ops->get_budget(hctx))
+ return true;
+
+ rq = e->type->ops.mq.dispatch_request(hctx);
+ if (!rq) {
+ if (q->mq_ops->put_budget)
+ q->mq_ops->put_budget(hctx);
+ break;
+ }
list_add(&rq->queuelist, &rq_list);
- } while (blk_mq_dispatch_rq_list(q, &rq_list));
+ } while (blk_mq_dispatch_rq_list(q, &rq_list, true));
+
+ return false;
}
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
@@ -110,6 +123,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
struct elevator_queue *e = q->elevator;
const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request;
LIST_HEAD(rq_list);
+ bool run_queue = false;
/* RCU or SRCU read lock is needed before checking quiesced flag */
if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
@@ -143,13 +157,22 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
*/
if (!list_empty(&rq_list)) {
blk_mq_sched_mark_restart_hctx(hctx);
- if (blk_mq_dispatch_rq_list(q, &rq_list) && has_sched_dispatch)
- blk_mq_do_dispatch_sched(hctx);
+ if (blk_mq_dispatch_rq_list(q, &rq_list, false) &&
+ has_sched_dispatch)
+ run_queue = blk_mq_do_dispatch_sched(hctx);
} else if (has_sched_dispatch) {
- blk_mq_do_dispatch_sched(hctx);
+ run_queue = blk_mq_do_dispatch_sched(hctx);
} else {
blk_mq_flush_busy_ctxs(hctx, &rq_list);
- blk_mq_dispatch_rq_list(q, &rq_list);
+ blk_mq_dispatch_rq_list(q, &rq_list, false);
+ }
+
+ if (run_queue) {
+ if (!blk_mq_sched_needs_restart(hctx) &&
+ !test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state)) {
+ blk_mq_sched_mark_restart_hctx(hctx);
+ blk_mq_run_hw_queue(hctx, true);
+ }
}
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 076cbab9c3e0..e6e50a91f170 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1045,7 +1045,16 @@ static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx)
return true;
}
-bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
+static void blk_mq_put_budget(struct blk_mq_hw_ctx *hctx, bool got_budget)
+{
+ struct request_queue *q = hctx->queue;
+
+ if (q->mq_ops->put_budget && got_budget)
+ q->mq_ops->put_budget(hctx);
+}
+
+bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
+ bool got_budget)
{
struct blk_mq_hw_ctx *hctx;
struct request *rq;
@@ -1054,6 +1063,8 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
if (list_empty(list))
return false;
+ WARN_ON(!list_is_singular(list) && got_budget);
+
/*
* Now process all the entries, sending them to the driver.
*/
@@ -1071,15 +1082,25 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
* The initial allocation attempt failed, so we need to
* rerun the hardware queue when a tag is freed.
*/
- if (!blk_mq_dispatch_wait_add(hctx))
+ if (!blk_mq_dispatch_wait_add(hctx)) {
+ blk_mq_put_budget(hctx, got_budget);
break;
+ }
/*
* It's possible that a tag was freed in the window
* between the allocation failure and adding the
* hardware queue to the wait queue.
*/
- if (!blk_mq_get_driver_tag(rq, &hctx, false))
+ if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
+ blk_mq_put_budget(hctx, got_budget);
+ break;
+ }
+ }
+
+ if (!got_budget) {
+ if (q->mq_ops->get_budget &&
+ !q->mq_ops->get_budget(hctx))
break;
}
@@ -1577,6 +1598,11 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
if (!blk_mq_get_driver_tag(rq, NULL, false))
goto insert;
+ if (q->mq_ops->get_budget && !q->mq_ops->get_budget(hctx)) {
+ blk_mq_put_driver_tag(rq);
+ goto insert;
+ }
+
new_cookie = request_to_qc_t(hctx, rq);
/*
@@ -2577,6 +2603,9 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
if (!set->ops->queue_rq)
return -EINVAL;
+ if ((!!set->ops->get_budget) != (!!set->ops->put_budget))
+ return -EINVAL;
+
if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
pr_info("blk-mq: reduced tag depth to %u\n",
BLK_MQ_MAX_DEPTH);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index ef15b3414da5..d1bfce3e69ad 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -30,7 +30,7 @@ void blk_mq_freeze_queue(struct request_queue *q);
void blk_mq_free_queue(struct request_queue *q);
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
void blk_mq_wake_waiters(struct request_queue *q);
-bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *);
+bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool);
void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx);
bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 50c6485cb04f..f8c4ba2682ec 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -90,6 +90,8 @@ struct blk_mq_queue_data {
typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *,
const struct blk_mq_queue_data *);
+typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *);
+typedef void (put_budget_fn)(struct blk_mq_hw_ctx *);
typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
@@ -112,6 +114,14 @@ struct blk_mq_ops {
queue_rq_fn *queue_rq;
/*
+ * Reserve budget before queue request, once .queue_rq is
+ * run, it is driver's responsibility to release the
+ * reserved budget.
+ */
+ get_budget_fn *get_budget;
+ put_budget_fn *put_budget;
+
+ /*
* Called on request timeout
*/
timeout_fn *timeout;
--
2.9.5
next prev parent reply other threads:[~2017-10-12 18:37 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-10-12 18:36 [PATCH V7 0/6] blk-mq-sched: improve sequential I/O performance Ming Lei
2017-10-12 18:36 ` [PATCH V7 1/6] blk-mq-sched: fix scheduler bad performance Ming Lei
2017-10-12 18:37 ` [PATCH V7 2/6] blk-mq-sched: move actual dispatching into one helper Ming Lei
2017-10-12 18:37 ` [PATCH V7 3/6] sbitmap: introduce __sbitmap_for_each_set() Ming Lei
2017-10-12 18:37 ` Ming Lei [this message]
2017-10-12 18:46 ` [PATCH V7 4/6] blk-mq: introduce .get_budget and .put_budget in blk_mq_ops Jens Axboe
2017-10-13 0:19 ` Ming Lei
2017-10-13 14:44 ` Jens Axboe
2017-10-13 16:07 ` Ming Lei
2017-10-13 16:19 ` Jens Axboe
2017-10-13 16:21 ` Ming Lei
2017-10-13 16:28 ` Jens Axboe
2017-10-13 16:31 ` Bart Van Assche
2017-10-13 16:31 ` Bart Van Assche
2017-10-13 16:33 ` Jens Axboe
2017-10-13 16:45 ` Ming Lei
2017-10-13 17:08 ` Bart Van Assche
2017-10-13 17:08 ` Bart Van Assche
2017-10-13 17:29 ` Ming Lei
2017-10-13 17:47 ` Bart Van Assche
2017-10-13 17:47 ` Bart Van Assche
2017-10-16 11:30 ` Hannes Reinecke
2017-10-16 16:06 ` Bart Van Assche
2017-10-16 16:06 ` Bart Van Assche
2017-10-17 1:29 ` Ming Lei
2017-10-17 6:38 ` Hannes Reinecke
2017-10-17 9:36 ` Ming Lei
2017-10-17 18:09 ` Bart Van Assche
2017-10-13 16:17 ` Ming Lei
2017-10-13 16:20 ` Jens Axboe
2017-10-13 16:22 ` Ming Lei
2017-10-13 16:28 ` Jens Axboe
2017-10-12 18:37 ` [PATCH V7 5/6] blk-mq-sched: improve dispatching from sw queue Ming Lei
2017-10-12 18:48 ` Bart Van Assche
2017-10-13 0:20 ` Ming Lei
2017-10-12 18:37 ` [PATCH V7 6/6] SCSI: implement .get_budget and .put_budget for blk-mq Ming Lei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171012183704.22326-5-ming.lei@redhat.com \
--to=ming.lei@redhat.com \
--cc=axboe@fb.com \
--cc=bart.vanassche@sandisk.com \
--cc=hch@infradead.org \
--cc=john.garry@huawei.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-scsi@vger.kernel.org \
--cc=loberman@redhat.com \
--cc=oleksandr@natalenko.name \
--cc=osandov@fb.com \
--cc=paolo.valente@linaro.org \
--cc=tom81094@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.