From: Christoph Hellwig <hch@lst.de>
To: Jens Axboe <axboe@kernel.dk>, Ming Lei <ming.lei@redhat.com>
Cc: linux-block@vger.kernel.org
Subject: [PATCH 3/3] blk-mq: make sure elevator callbacks aren't called for passthrough request
Date: Thu, 18 May 2023 07:31:01 +0200 [thread overview]
Message-ID: <20230518053101.760632-4-hch@lst.de> (raw)
In-Reply-To: <20230518053101.760632-1-hch@lst.de>
In case of q->elevator, passthrought request can still be marked as
RQF_ELV, so some elevator callbacks will be called for them.
Fix this by splitting RQF_SCHED_TAGS, which is set for all requests that
are issued on a queue that uses an I/O scheduler, and RQF_USE_SCHED for
non-flush, non-passthrough requests on such a queue.
Roughly based on two different patches from
Ming Lei <ming.lei@redhat.com>.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
block/blk-mq-debugfs.c | 3 ++-
block/blk-mq-sched.h | 6 ++---
block/blk-mq.c | 53 +++++++++++++++++++++++-------------------
block/blk-mq.h | 6 ++---
include/linux/blk-mq.h | 12 ++++++----
5 files changed, 44 insertions(+), 36 deletions(-)
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 588b7048342bee..1178d8696dcc05 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -246,6 +246,8 @@ static const char *const rqf_name[] = {
RQF_NAME(MIXED_MERGE),
RQF_NAME(MQ_INFLIGHT),
RQF_NAME(DONTPREP),
+ RQF_NAME(SCHED_TAGS),
+ RQF_NAME(USE_SCHED),
RQF_NAME(FAILED),
RQF_NAME(QUIET),
RQF_NAME(IO_STAT),
@@ -255,7 +257,6 @@ static const char *const rqf_name[] = {
RQF_NAME(SPECIAL_PAYLOAD),
RQF_NAME(ZONE_WRITE_LOCKED),
RQF_NAME(TIMED_OUT),
- RQF_NAME(ELV),
RQF_NAME(RESV),
};
#undef RQF_NAME
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 4d8d2cd3b47396..1326526bb7338c 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -37,7 +37,7 @@ static inline bool
blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
struct bio *bio)
{
- if (rq->rq_flags & RQF_ELV) {
+ if (rq->rq_flags & RQF_USE_SCHED) {
struct elevator_queue *e = q->elevator;
if (e->type->ops.allow_merge)
@@ -48,7 +48,7 @@ blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
{
- if (rq->rq_flags & RQF_ELV) {
+ if (rq->rq_flags & RQF_USE_SCHED) {
struct elevator_queue *e = rq->q->elevator;
if (e->type->ops.completed_request)
@@ -58,7 +58,7 @@ static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
static inline void blk_mq_sched_requeue_request(struct request *rq)
{
- if ((rq->rq_flags & RQF_ELV) && !op_is_flush(rq->cmd_flags)) {
+ if (rq->rq_flags & RQF_USE_SCHED) {
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 7470c6636dc4f7..e021740154feae 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -354,12 +354,12 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
data->rq_flags |= RQF_IO_STAT;
rq->rq_flags = data->rq_flags;
- if (!(data->rq_flags & RQF_ELV)) {
- rq->tag = tag;
- rq->internal_tag = BLK_MQ_NO_TAG;
- } else {
+ if (data->rq_flags & RQF_SCHED_TAGS) {
rq->tag = BLK_MQ_NO_TAG;
rq->internal_tag = tag;
+ } else {
+ rq->tag = tag;
+ rq->internal_tag = BLK_MQ_NO_TAG;
}
rq->timeout = 0;
@@ -386,14 +386,13 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
WRITE_ONCE(rq->deadline, 0);
req_ref_set(rq, 1);
- if (rq->rq_flags & RQF_ELV) {
+ if (rq->rq_flags & RQF_USE_SCHED) {
struct elevator_queue *e = data->q->elevator;
INIT_HLIST_NODE(&rq->hash);
RB_CLEAR_NODE(&rq->rb_node);
- if (!op_is_flush(data->cmd_flags) &&
- e->type->ops.prepare_request)
+ if (e->type->ops.prepare_request)
e->type->ops.prepare_request(rq);
}
@@ -447,26 +446,32 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
data->flags |= BLK_MQ_REQ_NOWAIT;
if (q->elevator) {
- struct elevator_queue *e = q->elevator;
-
- data->rq_flags |= RQF_ELV;
+ /*
+ * All requests use scheduler tags when an I/O scheduler is
+ * enabled for the queue.
+ */
+ data->rq_flags |= RQF_SCHED_TAGS;
/*
* Flush/passthrough requests are special and go directly to the
- * dispatch list. Don't include reserved tags in the
- * limiting, as it isn't useful.
+ * dispatch list.
*/
if (!op_is_flush(data->cmd_flags) &&
- !blk_op_is_passthrough(data->cmd_flags) &&
- e->type->ops.limit_depth &&
- !(data->flags & BLK_MQ_REQ_RESERVED))
- e->type->ops.limit_depth(data->cmd_flags, data);
+ !blk_op_is_passthrough(data->cmd_flags)) {
+ struct elevator_mq_ops *ops = &q->elevator->type->ops;
+
+ WARN_ON_ONCE(data->flags & BLK_MQ_REQ_RESERVED);
+
+ data->rq_flags |= RQF_USE_SCHED;
+ if (ops->limit_depth)
+ ops->limit_depth(data->cmd_flags, data);
+ }
}
retry:
data->ctx = blk_mq_get_ctx(q);
data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
- if (!(data->rq_flags & RQF_ELV))
+ if (!(data->rq_flags & RQF_SCHED_TAGS))
blk_mq_tag_busy(data->hctx);
if (data->flags & BLK_MQ_REQ_RESERVED)
@@ -646,10 +651,10 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
goto out_queue_exit;
data.ctx = __blk_mq_get_ctx(q, cpu);
- if (!q->elevator)
- blk_mq_tag_busy(data.hctx);
+ if (q->elevator)
+ data.rq_flags |= RQF_SCHED_TAGS;
else
- data.rq_flags |= RQF_ELV;
+ blk_mq_tag_busy(data.hctx);
if (flags & BLK_MQ_REQ_RESERVED)
data.rq_flags |= RQF_RESV;
@@ -694,7 +699,7 @@ void blk_mq_free_request(struct request *rq)
struct request_queue *q = rq->q;
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
- if ((rq->rq_flags & RQF_ELV) && !op_is_flush(rq->cmd_flags) &&
+ if ((rq->rq_flags & RQF_USE_SCHED) &&
q->elevator->type->ops.finish_request)
q->elevator->type->ops.finish_request(rq);
@@ -1268,7 +1273,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
if (!plug->multiple_queues && last && last->q != rq->q)
plug->multiple_queues = true;
- if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
+ if (!plug->has_elevator && (rq->rq_flags & RQF_USE_SCHED))
plug->has_elevator = true;
rq->rq_next = NULL;
rq_list_add(&plug->mq_list, rq);
@@ -2620,7 +2625,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
return;
}
- if ((rq->rq_flags & RQF_ELV) || !blk_mq_get_budget_and_tag(rq)) {
+ if ((rq->rq_flags & RQF_USE_SCHED) || !blk_mq_get_budget_and_tag(rq)) {
blk_mq_insert_request(rq, 0);
blk_mq_run_hw_queue(hctx, false);
return;
@@ -2983,7 +2988,7 @@ void blk_mq_submit_bio(struct bio *bio)
}
hctx = rq->mq_hctx;
- if ((rq->rq_flags & RQF_ELV) ||
+ if ((rq->rq_flags & RQF_USE_SCHED) ||
(hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) {
blk_mq_insert_request(rq, 0);
blk_mq_run_hw_queue(hctx, true);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index e876584d351634..d15981db34b958 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -226,9 +226,9 @@ static inline bool blk_mq_is_shared_tags(unsigned int flags)
static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
{
- if (!(data->rq_flags & RQF_ELV))
- return data->hctx->tags;
- return data->hctx->sched_tags;
+ if (data->rq_flags & RQF_SCHED_TAGS)
+ return data->hctx->sched_tags;
+ return data->hctx->tags;
}
static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 5529e7d28ae6bb..888b79633692fc 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -38,6 +38,10 @@ typedef __u32 __bitwise req_flags_t;
#define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6))
/* don't call prep for this one */
#define RQF_DONTPREP ((__force req_flags_t)(1 << 7))
+/* use hctx->sched_tags */
+#define RQF_SCHED_TAGS ((__force req_flags_t)(1 << 8))
+/* use and I/O scheduler for this request */
+#define RQF_USE_SCHED ((__force req_flags_t)(1 << 9))
/* vaguely specified driver internal error. Ignored by the block layer */
#define RQF_FAILED ((__force req_flags_t)(1 << 10))
/* don't warn about errors */
@@ -57,9 +61,7 @@ typedef __u32 __bitwise req_flags_t;
#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
/* ->timeout has been called, don't expire again */
#define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21))
-/* queue has elevator attached */
-#define RQF_ELV ((__force req_flags_t)(1 << 22))
-#define RQF_RESV ((__force req_flags_t)(1 << 23))
+#define RQF_RESV ((__force req_flags_t)(1 << 23))
/* flags that prevent us from merging requests: */
#define RQF_NOMERGE_FLAGS \
@@ -842,7 +844,7 @@ void blk_mq_end_request_batch(struct io_comp_batch *ib);
*/
static inline bool blk_mq_need_time_stamp(struct request *rq)
{
- return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV));
+ return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED));
}
static inline bool blk_mq_is_reserved_rq(struct request *rq)
@@ -858,7 +860,7 @@ static inline bool blk_mq_add_to_batch(struct request *req,
struct io_comp_batch *iob, int ioerror,
void (*complete)(struct io_comp_batch *))
{
- if (!iob || (req->rq_flags & RQF_ELV) || ioerror ||
+ if (!iob || (req->rq_flags & RQF_USE_SCHED) || ioerror ||
(req->end_io && !blk_rq_is_passthrough(req)))
return false;
--
2.39.2
next prev parent reply other threads:[~2023-05-18 5:31 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-05-18 5:30 keep passthrough request out of the I/O schedulers Christoph Hellwig
2023-05-18 5:30 ` [PATCH 1/3] blk-mq: don't queue plugged passthrough requests into scheduler Christoph Hellwig
2023-05-18 17:50 ` Bart Van Assche
2023-05-19 1:41 ` Jens Axboe
2023-05-18 5:31 ` [PATCH 2/3] blk-mq: remove RQF_ELVPRIV Christoph Hellwig
2023-05-18 7:05 ` Ming Lei
2023-05-18 7:11 ` Ming Lei
2023-05-18 13:06 ` Christoph Hellwig
2023-05-18 13:20 ` Ming Lei
2023-05-19 1:39 ` Jens Axboe
2023-05-18 17:52 ` Bart Van Assche
2023-05-18 5:31 ` Christoph Hellwig [this message]
2023-05-18 13:23 ` [PATCH 3/3] blk-mq: make sure elevator callbacks aren't called for passthrough request Ming Lei
2023-05-18 17:58 ` Bart Van Assche
2023-05-19 1:39 ` keep passthrough request out of the I/O schedulers Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230518053101.760632-4-hch@lst.de \
--to=hch@lst.de \
--cc=axboe@kernel.dk \
--cc=linux-block@vger.kernel.org \
--cc=ming.lei@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).