* [PATCH v2 1/5] blk-mq: account active requests when get driver tag
2023-09-13 15:16 [PATCH v2 0/5] blk-mq: optimize queue_rqs() support chengming.zhou
@ 2023-09-13 15:16 ` chengming.zhou
2023-09-16 9:23 ` Ming Lei
2023-09-16 12:39 ` Ming Lei
2023-09-13 15:16 ` [PATCH v2 2/5] blk-mq: remove RQF_MQ_INFLIGHT chengming.zhou
` (4 subsequent siblings)
5 siblings, 2 replies; 15+ messages in thread
From: chengming.zhou @ 2023-09-13 15:16 UTC (permalink / raw)
To: axboe, hch, ming.lei, bvanassche
Cc: kbusch, mst, damien.lemoal, linux-block, linux-kernel,
chengming.zhou, Chengming Zhou
From: Chengming Zhou <zhouchengming@bytedance.com>
There is a limit that batched queue_rqs() can't work on shared tags
queue, since the account of active requests can't be done there.
Now we account the active requests only in blk_mq_get_driver_tag(),
which is not the time we get driver tag actually (with none elevator).
To support batched queue_rqs() on shared tags queue, we move the
account of active requests to where we get the driver tag:
1. none elevator: blk_mq_get_tags() and blk_mq_get_tag()
2. other elevator: __blk_mq_alloc_driver_tag()
This is clearer and match with the unaccount side, which just happen
when we put the driver tag.
The other good point is that we don't need RQF_MQ_INFLIGHT trick
anymore, which used to avoid double account of flush request.
Now we only account when actually get the driver tag, so all is good.
We will remove RQF_MQ_INFLIGHT in the next patch.
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
block/blk-mq.c | 33 +++++++++++------------------
block/blk-mq.h | 56 ++++++++++++++++++++++++++++++++++++--------------
2 files changed, 53 insertions(+), 36 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1fafd54dce3c..e776388decc3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -426,6 +426,8 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data)
rq_list_add(data->cached_rq, rq);
nr++;
}
+ if (!(data->rq_flags & RQF_SCHED_TAGS))
+ blk_mq_add_active_requests(data->hctx, nr);
/* caller already holds a reference, add for remainder */
percpu_ref_get_many(&data->q->q_usage_counter, nr - 1);
data->nr_tags -= nr;
@@ -510,6 +512,8 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
goto retry;
}
+ if (!(data->rq_flags & RQF_SCHED_TAGS))
+ blk_mq_inc_active_requests(data->hctx);
rq = blk_mq_rq_ctx_init(data, blk_mq_tags_from_data(data), tag);
blk_mq_rq_time_init(rq, alloc_time_ns);
return rq;
@@ -669,6 +673,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
tag = blk_mq_get_tag(&data);
if (tag == BLK_MQ_NO_TAG)
goto out_queue_exit;
+ if (!(data.rq_flags & RQF_SCHED_TAGS))
+ blk_mq_inc_active_requests(data.hctx);
rq = blk_mq_rq_ctx_init(&data, blk_mq_tags_from_data(&data), tag);
blk_mq_rq_time_init(rq, alloc_time_ns);
rq->__data_len = 0;
@@ -708,11 +714,10 @@ static void __blk_mq_free_request(struct request *rq)
blk_pm_mark_last_busy(rq);
rq->mq_hctx = NULL;
- if (rq->rq_flags & RQF_MQ_INFLIGHT)
- __blk_mq_dec_active_requests(hctx);
-
- if (rq->tag != BLK_MQ_NO_TAG)
+ if (rq->tag != BLK_MQ_NO_TAG) {
+ blk_mq_dec_active_requests(hctx);
blk_mq_put_tag(hctx->tags, ctx, rq->tag);
+ }
if (sched_tag != BLK_MQ_NO_TAG)
blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag);
blk_mq_sched_restart(hctx);
@@ -1065,8 +1070,7 @@ static inline void blk_mq_flush_tag_batch(struct blk_mq_hw_ctx *hctx,
* All requests should have been marked as RQF_MQ_INFLIGHT, so
* update hctx->nr_active in batch
*/
- if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
- __blk_mq_sub_active_requests(hctx, nr_tags);
+ blk_mq_sub_active_requests(hctx, nr_tags);
blk_mq_put_tags(hctx->tags, tag_array, nr_tags);
percpu_ref_put_many(&q->q_usage_counter, nr_tags);
@@ -1748,7 +1752,7 @@ struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
return data.rq;
}
-static bool __blk_mq_alloc_driver_tag(struct request *rq)
+bool __blk_mq_alloc_driver_tag(struct request *rq)
{
struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags;
unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;
@@ -1769,20 +1773,7 @@ static bool __blk_mq_alloc_driver_tag(struct request *rq)
return false;
rq->tag = tag + tag_offset;
- return true;
-}
-
-bool __blk_mq_get_driver_tag(struct blk_mq_hw_ctx *hctx, struct request *rq)
-{
- if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_alloc_driver_tag(rq))
- return false;
-
- if ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) &&
- !(rq->rq_flags & RQF_MQ_INFLIGHT)) {
- rq->rq_flags |= RQF_MQ_INFLIGHT;
- __blk_mq_inc_active_requests(hctx);
- }
- hctx->tags->rqs[rq->tag] = rq;
+ blk_mq_inc_active_requests(rq->mq_hctx);
return true;
}
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 1743857e0b01..560a76df290a 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -271,12 +271,18 @@ static inline int blk_mq_get_rq_budget_token(struct request *rq)
return -1;
}
-static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx)
+static inline void __blk_mq_add_active_requests(struct blk_mq_hw_ctx *hctx,
+ int val)
{
if (blk_mq_is_shared_tags(hctx->flags))
- atomic_inc(&hctx->queue->nr_active_requests_shared_tags);
+ atomic_add(val, &hctx->queue->nr_active_requests_shared_tags);
else
- atomic_inc(&hctx->nr_active);
+ atomic_add(val, &hctx->nr_active);
+}
+
+static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx)
+{
+ __blk_mq_add_active_requests(hctx, 1);
}
static inline void __blk_mq_sub_active_requests(struct blk_mq_hw_ctx *hctx,
@@ -293,6 +299,32 @@ static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx)
__blk_mq_sub_active_requests(hctx, 1);
}
+static inline void blk_mq_add_active_requests(struct blk_mq_hw_ctx *hctx,
+ int val)
+{
+ if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
+ __blk_mq_add_active_requests(hctx, val);
+}
+
+static inline void blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx)
+{
+ if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
+ __blk_mq_inc_active_requests(hctx);
+}
+
+static inline void blk_mq_sub_active_requests(struct blk_mq_hw_ctx *hctx,
+ int val)
+{
+ if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
+ __blk_mq_sub_active_requests(hctx, val);
+}
+
+static inline void blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx)
+{
+ if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
+ __blk_mq_dec_active_requests(hctx);
+}
+
static inline int __blk_mq_active_requests(struct blk_mq_hw_ctx *hctx)
{
if (blk_mq_is_shared_tags(hctx->flags))
@@ -302,13 +334,9 @@ static inline int __blk_mq_active_requests(struct blk_mq_hw_ctx *hctx)
static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
struct request *rq)
{
+ blk_mq_dec_active_requests(hctx);
blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag);
rq->tag = BLK_MQ_NO_TAG;
-
- if (rq->rq_flags & RQF_MQ_INFLIGHT) {
- rq->rq_flags &= ~RQF_MQ_INFLIGHT;
- __blk_mq_dec_active_requests(hctx);
- }
}
static inline void blk_mq_put_driver_tag(struct request *rq)
@@ -319,19 +347,17 @@ static inline void blk_mq_put_driver_tag(struct request *rq)
__blk_mq_put_driver_tag(rq->mq_hctx, rq);
}
-bool __blk_mq_get_driver_tag(struct blk_mq_hw_ctx *hctx, struct request *rq);
+bool __blk_mq_alloc_driver_tag(struct request *rq);
static inline bool blk_mq_get_driver_tag(struct request *rq)
{
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
- if (rq->tag != BLK_MQ_NO_TAG &&
- !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) {
- hctx->tags->rqs[rq->tag] = rq;
- return true;
- }
+ if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_alloc_driver_tag(rq))
+ return false;
- return __blk_mq_get_driver_tag(hctx, rq);
+ hctx->tags->rqs[rq->tag] = rq;
+ return true;
}
static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap)
--
2.40.1
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH v2 1/5] blk-mq: account active requests when get driver tag
2023-09-13 15:16 ` [PATCH v2 1/5] blk-mq: account active requests when get driver tag chengming.zhou
@ 2023-09-16 9:23 ` Ming Lei
2023-09-16 9:32 ` Chengming Zhou
2023-09-16 12:39 ` Ming Lei
1 sibling, 1 reply; 15+ messages in thread
From: Ming Lei @ 2023-09-16 9:23 UTC (permalink / raw)
To: chengming.zhou
Cc: axboe, hch, bvanassche, kbusch, mst, damien.lemoal, linux-block,
linux-kernel, Chengming Zhou, ming.lei
On Wed, Sep 13, 2023 at 03:16:12PM +0000, chengming.zhou@linux.dev wrote:
> From: Chengming Zhou <zhouchengming@bytedance.com>
>
> There is a limit that batched queue_rqs() can't work on shared tags
> queue, since the account of active requests can't be done there.
>
> Now we account the active requests only in blk_mq_get_driver_tag(),
> which is not the time we get driver tag actually (with none elevator).
>
> To support batched queue_rqs() on shared tags queue, we move the
> account of active requests to where we get the driver tag:
>
> 1. none elevator: blk_mq_get_tags() and blk_mq_get_tag()
> 2. other elevator: __blk_mq_alloc_driver_tag()
>
> This is clearer and match with the unaccount side, which just happen
> when we put the driver tag.
>
> The other good point is that we don't need RQF_MQ_INFLIGHT trick
> anymore, which used to avoid double account of flush request.
> Now we only account when actually get the driver tag, so all is good.
> We will remove RQF_MQ_INFLIGHT in the next patch.
RQF_MQ_INFLIGHT is only set for BLK_MQ_F_TAG_QUEUE_SHARED, so we can
avoid the extra atomic accounting for !BLK_MQ_F_TAG_QUEUE_SHARED.
But now your patch switches to account unconditionally by removing
RQF_MQ_INFLIGHT, not friendly for !BLK_MQ_F_TAG_QUEUE_SHARED.
Thanks,
Ming
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v2 1/5] blk-mq: account active requests when get driver tag
2023-09-16 9:23 ` Ming Lei
@ 2023-09-16 9:32 ` Chengming Zhou
0 siblings, 0 replies; 15+ messages in thread
From: Chengming Zhou @ 2023-09-16 9:32 UTC (permalink / raw)
To: Ming Lei
Cc: axboe, hch, bvanassche, kbusch, mst, damien.lemoal, linux-block,
linux-kernel, Chengming Zhou
On 2023/9/16 17:23, Ming Lei wrote:
> On Wed, Sep 13, 2023 at 03:16:12PM +0000, chengming.zhou@linux.dev wrote:
>> From: Chengming Zhou <zhouchengming@bytedance.com>
>>
>> There is a limit that batched queue_rqs() can't work on shared tags
>> queue, since the account of active requests can't be done there.
>>
>> Now we account the active requests only in blk_mq_get_driver_tag(),
>> which is not the time we get driver tag actually (with none elevator).
>>
>> To support batched queue_rqs() on shared tags queue, we move the
>> account of active requests to where we get the driver tag:
>>
>> 1. none elevator: blk_mq_get_tags() and blk_mq_get_tag()
>> 2. other elevator: __blk_mq_alloc_driver_tag()
>>
>> This is clearer and match with the unaccount side, which just happen
>> when we put the driver tag.
>>
>> The other good point is that we don't need RQF_MQ_INFLIGHT trick
>> anymore, which used to avoid double account of flush request.
>> Now we only account when actually get the driver tag, so all is good.
>> We will remove RQF_MQ_INFLIGHT in the next patch.
>
> RQF_MQ_INFLIGHT is only set for BLK_MQ_F_TAG_QUEUE_SHARED, so we can
> avoid the extra atomic accounting for !BLK_MQ_F_TAG_QUEUE_SHARED.
>
> But now your patch switches to account unconditionally by removing
> RQF_MQ_INFLIGHT, not friendly for !BLK_MQ_F_TAG_QUEUE_SHARED.
>
Hi Ming, blk_mq_add_active_requests() will check hctx->flags before
doing atomic accounting and only account for BLK_MQ_F_TAG_QUEUE_SHARED.
Yes, we don't need any atomic accounting in non-shared queue.
Thanks.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v2 1/5] blk-mq: account active requests when get driver tag
2023-09-13 15:16 ` [PATCH v2 1/5] blk-mq: account active requests when get driver tag chengming.zhou
2023-09-16 9:23 ` Ming Lei
@ 2023-09-16 12:39 ` Ming Lei
1 sibling, 0 replies; 15+ messages in thread
From: Ming Lei @ 2023-09-16 12:39 UTC (permalink / raw)
To: chengming.zhou
Cc: axboe, hch, bvanassche, kbusch, mst, damien.lemoal, linux-block,
linux-kernel, Chengming Zhou
On Wed, Sep 13, 2023 at 03:16:12PM +0000, chengming.zhou@linux.dev wrote:
> From: Chengming Zhou <zhouchengming@bytedance.com>
>
> There is a limit that batched queue_rqs() can't work on shared tags
> queue, since the account of active requests can't be done there.
>
> Now we account the active requests only in blk_mq_get_driver_tag(),
> which is not the time we get driver tag actually (with none elevator).
>
> To support batched queue_rqs() on shared tags queue, we move the
> account of active requests to where we get the driver tag:
>
> 1. none elevator: blk_mq_get_tags() and blk_mq_get_tag()
> 2. other elevator: __blk_mq_alloc_driver_tag()
>
> This is clearer and match with the unaccount side, which just happen
> when we put the driver tag.
>
> The other good point is that we don't need RQF_MQ_INFLIGHT trick
> anymore, which used to avoid double account of flush request.
> Now we only account when actually get the driver tag, so all is good.
> We will remove RQF_MQ_INFLIGHT in the next patch.
>
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> ---
Nice cleanup,
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Thanks,
Ming
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH v2 2/5] blk-mq: remove RQF_MQ_INFLIGHT
2023-09-13 15:16 [PATCH v2 0/5] blk-mq: optimize queue_rqs() support chengming.zhou
2023-09-13 15:16 ` [PATCH v2 1/5] blk-mq: account active requests when get driver tag chengming.zhou
@ 2023-09-13 15:16 ` chengming.zhou
2023-09-16 12:40 ` Ming Lei
2023-09-13 15:16 ` [PATCH v2 3/5] blk-mq: support batched queue_rqs() on shared tags queue chengming.zhou
` (3 subsequent siblings)
5 siblings, 1 reply; 15+ messages in thread
From: chengming.zhou @ 2023-09-13 15:16 UTC (permalink / raw)
To: axboe, hch, ming.lei, bvanassche
Cc: kbusch, mst, damien.lemoal, linux-block, linux-kernel,
chengming.zhou, Chengming Zhou
From: Chengming Zhou <zhouchengming@bytedance.com>
Since the previous patch change to only account active requests when
we really allocate the driver tag, the RQF_MQ_INFLIGHT can be removed
and no double account problem.
1. none elevator: flush request will use the first pending request's
driver tag, won't double account.
2. other elevator: flush request will be accounted when allocate driver
tag when issue, and will be unaccounted when it put the driver tag.
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
block/blk-flush.c | 11 ++---------
block/blk-mq-debugfs.c | 1 -
block/blk-mq.c | 4 ----
include/linux/blk-mq.h | 2 --
4 files changed, 2 insertions(+), 16 deletions(-)
diff --git a/block/blk-flush.c b/block/blk-flush.c
index e73dc22d05c1..3f4d41952ef2 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -323,16 +323,9 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
flush_rq->mq_ctx = first_rq->mq_ctx;
flush_rq->mq_hctx = first_rq->mq_hctx;
- if (!q->elevator) {
+ if (!q->elevator)
flush_rq->tag = first_rq->tag;
-
- /*
- * We borrow data request's driver tag, so have to mark
- * this flush request as INFLIGHT for avoiding double
- * account of this driver tag
- */
- flush_rq->rq_flags |= RQF_MQ_INFLIGHT;
- } else
+ else
flush_rq->internal_tag = first_rq->internal_tag;
flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index c3b5930106b2..5cbeb9344f2f 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -246,7 +246,6 @@ static const char *const rqf_name[] = {
RQF_NAME(STARTED),
RQF_NAME(FLUSH_SEQ),
RQF_NAME(MIXED_MERGE),
- RQF_NAME(MQ_INFLIGHT),
RQF_NAME(DONTPREP),
RQF_NAME(SCHED_TAGS),
RQF_NAME(USE_SCHED),
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e776388decc3..c209a7dddee3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1066,10 +1066,6 @@ static inline void blk_mq_flush_tag_batch(struct blk_mq_hw_ctx *hctx,
{
struct request_queue *q = hctx->queue;
- /*
- * All requests should have been marked as RQF_MQ_INFLIGHT, so
- * update hctx->nr_active in batch
- */
blk_mq_sub_active_requests(hctx, nr_tags);
blk_mq_put_tags(hctx->tags, tag_array, nr_tags);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 958ed7e89b30..1ab3081c82ed 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -32,8 +32,6 @@ typedef __u32 __bitwise req_flags_t;
#define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4))
/* merge of different types, fail separately */
#define RQF_MIXED_MERGE ((__force req_flags_t)(1 << 5))
-/* track inflight for MQ */
-#define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6))
/* don't call prep for this one */
#define RQF_DONTPREP ((__force req_flags_t)(1 << 7))
/* use hctx->sched_tags */
--
2.40.1
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH v2 2/5] blk-mq: remove RQF_MQ_INFLIGHT
2023-09-13 15:16 ` [PATCH v2 2/5] blk-mq: remove RQF_MQ_INFLIGHT chengming.zhou
@ 2023-09-16 12:40 ` Ming Lei
0 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2023-09-16 12:40 UTC (permalink / raw)
To: chengming.zhou
Cc: axboe, hch, bvanassche, kbusch, mst, damien.lemoal, linux-block,
linux-kernel, Chengming Zhou
On Wed, Sep 13, 2023 at 03:16:13PM +0000, chengming.zhou@linux.dev wrote:
> From: Chengming Zhou <zhouchengming@bytedance.com>
>
> Since the previous patch change to only account active requests when
> we really allocate the driver tag, the RQF_MQ_INFLIGHT can be removed
> and no double account problem.
>
> 1. none elevator: flush request will use the first pending request's
> driver tag, won't double account.
>
> 2. other elevator: flush request will be accounted when allocate driver
> tag when issue, and will be unaccounted when it put the driver tag.
>
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Thanks,
Ming
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH v2 3/5] blk-mq: support batched queue_rqs() on shared tags queue
2023-09-13 15:16 [PATCH v2 0/5] blk-mq: optimize queue_rqs() support chengming.zhou
2023-09-13 15:16 ` [PATCH v2 1/5] blk-mq: account active requests when get driver tag chengming.zhou
2023-09-13 15:16 ` [PATCH v2 2/5] blk-mq: remove RQF_MQ_INFLIGHT chengming.zhou
@ 2023-09-13 15:16 ` chengming.zhou
2023-09-22 8:36 ` Ming Lei
2023-09-13 15:16 ` [PATCH v2 4/5] blk-mq: update driver tags request table when start request chengming.zhou
` (2 subsequent siblings)
5 siblings, 1 reply; 15+ messages in thread
From: chengming.zhou @ 2023-09-13 15:16 UTC (permalink / raw)
To: axboe, hch, ming.lei, bvanassche
Cc: kbusch, mst, damien.lemoal, linux-block, linux-kernel,
chengming.zhou, Chengming Zhou
From: Chengming Zhou <zhouchengming@bytedance.com>
Since active requests have been accounted when allocate driver tags,
we can remove this limit now.
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
block/blk-mq.c | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c209a7dddee3..68ce9357463b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2781,13 +2781,8 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
* If we do, we can dispatch the whole plug list in one go. We
* already know at this point that all requests belong to the
* same queue, caller must ensure that's the case.
- *
- * Since we pass off the full list to the driver at this point,
- * we do not increment the active request count for the queue.
- * Bypass shared tags for now because of that.
*/
- if (q->mq_ops->queue_rqs &&
- !(rq->mq_hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) {
+ if (q->mq_ops->queue_rqs) {
blk_mq_run_dispatch_ops(q,
__blk_mq_flush_plug_list(q, plug));
if (rq_list_empty(plug->mq_list))
--
2.40.1
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH v2 3/5] blk-mq: support batched queue_rqs() on shared tags queue
2023-09-13 15:16 ` [PATCH v2 3/5] blk-mq: support batched queue_rqs() on shared tags queue chengming.zhou
@ 2023-09-22 8:36 ` Ming Lei
0 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2023-09-22 8:36 UTC (permalink / raw)
To: chengming.zhou
Cc: axboe, hch, bvanassche, kbusch, mst, damien.lemoal, linux-block,
linux-kernel, Chengming Zhou
On Wed, Sep 13, 2023 at 03:16:14PM +0000, chengming.zhou@linux.dev wrote:
> From: Chengming Zhou <zhouchengming@bytedance.com>
>
> Since active requests have been accounted when allocate driver tags,
> we can remove this limit now.
>
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> ---
> block/blk-mq.c | 7 +------
> 1 file changed, 1 insertion(+), 6 deletions(-)
>
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index c209a7dddee3..68ce9357463b 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -2781,13 +2781,8 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
> * If we do, we can dispatch the whole plug list in one go. We
> * already know at this point that all requests belong to the
> * same queue, caller must ensure that's the case.
> - *
> - * Since we pass off the full list to the driver at this point,
> - * we do not increment the active request count for the queue.
> - * Bypass shared tags for now because of that.
> */
> - if (q->mq_ops->queue_rqs &&
> - !(rq->mq_hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) {
> + if (q->mq_ops->queue_rqs) {
> blk_mq_run_dispatch_ops(q,
> __blk_mq_flush_plug_list(q, plug));
> if (rq_list_empty(plug->mq_list))
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Thanks,
Ming
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH v2 4/5] blk-mq: update driver tags request table when start request
2023-09-13 15:16 [PATCH v2 0/5] blk-mq: optimize queue_rqs() support chengming.zhou
` (2 preceding siblings ...)
2023-09-13 15:16 ` [PATCH v2 3/5] blk-mq: support batched queue_rqs() on shared tags queue chengming.zhou
@ 2023-09-13 15:16 ` chengming.zhou
2023-09-22 8:49 ` Ming Lei
2023-09-13 15:16 ` [PATCH v2 5/5] block/null_blk: add queue_rqs() support chengming.zhou
2023-09-22 14:52 ` [PATCH v2 0/5] blk-mq: optimize " Jens Axboe
5 siblings, 1 reply; 15+ messages in thread
From: chengming.zhou @ 2023-09-13 15:16 UTC (permalink / raw)
To: axboe, hch, ming.lei, bvanassche
Cc: kbusch, mst, damien.lemoal, linux-block, linux-kernel,
chengming.zhou, Chengming Zhou
From: Chengming Zhou <zhouchengming@bytedance.com>
Now we update driver tags request table in blk_mq_get_driver_tag(),
so the driver that support queue_rqs() have to update that inflight
table by itself.
Move it to blk_mq_start_request(), which is a better place where
we setup the deadline for request timeout check. And it's just
where the request becomes inflight.
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
block/blk-mq.c | 1 +
block/blk-mq.h | 3 ---
drivers/block/virtio_blk.c | 2 --
drivers/nvme/host/pci.c | 1 -
4 files changed, 1 insertion(+), 6 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 68ce9357463b..e2d11183f62e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1259,6 +1259,7 @@ void blk_mq_start_request(struct request *rq)
blk_add_timer(rq);
WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT);
+ rq->mq_hctx->tags->rqs[rq->tag] = rq;
#ifdef CONFIG_BLK_DEV_INTEGRITY
if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE)
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 560a76df290a..f75a9ecfebde 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -351,12 +351,9 @@ bool __blk_mq_alloc_driver_tag(struct request *rq);
static inline bool blk_mq_get_driver_tag(struct request *rq)
{
- struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
-
if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_alloc_driver_tag(rq))
return false;
- hctx->tags->rqs[rq->tag] = rq;
return true;
}
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 1fe011676d07..4689ac2e0c0e 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -470,8 +470,6 @@ static bool virtblk_prep_rq_batch(struct request *req)
struct virtio_blk *vblk = req->mq_hctx->queue->queuedata;
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
- req->mq_hctx->tags->rqs[req->tag] = req;
-
return virtblk_prep_rq(req->mq_hctx, vblk, req, vbr) == BLK_STS_OK;
}
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 2f57da12d983..c2e942808eff 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -924,7 +924,6 @@ static bool nvme_prep_rq_batch(struct nvme_queue *nvmeq, struct request *req)
if (unlikely(!nvme_check_ready(&nvmeq->dev->ctrl, req, true)))
return false;
- req->mq_hctx->tags->rqs[req->tag] = req;
return nvme_prep_rq(nvmeq->dev, req) == BLK_STS_OK;
}
--
2.40.1
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH v2 4/5] blk-mq: update driver tags request table when start request
2023-09-13 15:16 ` [PATCH v2 4/5] blk-mq: update driver tags request table when start request chengming.zhou
@ 2023-09-22 8:49 ` Ming Lei
0 siblings, 0 replies; 15+ messages in thread
From: Ming Lei @ 2023-09-22 8:49 UTC (permalink / raw)
To: chengming.zhou
Cc: axboe, hch, bvanassche, kbusch, mst, damien.lemoal, linux-block,
linux-kernel, Chengming Zhou
On Wed, Sep 13, 2023 at 03:16:15PM +0000, chengming.zhou@linux.dev wrote:
> From: Chengming Zhou <zhouchengming@bytedance.com>
>
> Now we update driver tags request table in blk_mq_get_driver_tag(),
> so the driver that support queue_rqs() have to update that inflight
> table by itself.
>
> Move it to blk_mq_start_request(), which is a better place where
> we setup the deadline for request timeout check. And it's just
> where the request becomes inflight.
>
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> ---
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Thanks
Ming
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH v2 5/5] block/null_blk: add queue_rqs() support
2023-09-13 15:16 [PATCH v2 0/5] blk-mq: optimize queue_rqs() support chengming.zhou
` (3 preceding siblings ...)
2023-09-13 15:16 ` [PATCH v2 4/5] blk-mq: update driver tags request table when start request chengming.zhou
@ 2023-09-13 15:16 ` chengming.zhou
2023-09-22 8:54 ` Ming Lei
2023-09-22 14:52 ` [PATCH v2 0/5] blk-mq: optimize " Jens Axboe
5 siblings, 1 reply; 15+ messages in thread
From: chengming.zhou @ 2023-09-13 15:16 UTC (permalink / raw)
To: axboe, hch, ming.lei, bvanassche
Cc: kbusch, mst, damien.lemoal, linux-block, linux-kernel,
chengming.zhou, Chengming Zhou
From: Chengming Zhou <zhouchengming@bytedance.com>
Add batched mq_ops.queue_rqs() support in null_blk for testing. The
implementation is much easy since null_blk doesn't have commit_rqs().
We simply handle each request one by one, if errors are encountered,
leave them in the passed in list and return back.
There is about 3.6% improvement in IOPS of fio/t/io_uring on null_blk
with hw_queue_depth=256 on my test VM, from 1.09M to 1.13M.
Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
drivers/block/null_blk/main.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 968090935eb2..79d6cd3c3d41 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1750,6 +1750,25 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
return null_handle_cmd(cmd, sector, nr_sectors, req_op(rq));
}
+static void null_queue_rqs(struct request **rqlist)
+{
+ struct request *requeue_list = NULL;
+ struct request **requeue_lastp = &requeue_list;
+ struct blk_mq_queue_data bd = { };
+ blk_status_t ret;
+
+ do {
+ struct request *rq = rq_list_pop(rqlist);
+
+ bd.rq = rq;
+ ret = null_queue_rq(rq->mq_hctx, &bd);
+ if (ret != BLK_STS_OK)
+ rq_list_add_tail(&requeue_lastp, rq);
+ } while (!rq_list_empty(*rqlist));
+
+ *rqlist = requeue_list;
+}
+
static void cleanup_queue(struct nullb_queue *nq)
{
bitmap_free(nq->tag_map);
@@ -1802,6 +1821,7 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
static const struct blk_mq_ops null_mq_ops = {
.queue_rq = null_queue_rq,
+ .queue_rqs = null_queue_rqs,
.complete = null_complete_rq,
.timeout = null_timeout_rq,
.poll = null_poll,
--
2.40.1
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH v2 5/5] block/null_blk: add queue_rqs() support
2023-09-13 15:16 ` [PATCH v2 5/5] block/null_blk: add queue_rqs() support chengming.zhou
@ 2023-09-22 8:54 ` Ming Lei
2023-09-23 7:01 ` Chengming Zhou
0 siblings, 1 reply; 15+ messages in thread
From: Ming Lei @ 2023-09-22 8:54 UTC (permalink / raw)
To: chengming.zhou
Cc: axboe, hch, bvanassche, kbusch, mst, damien.lemoal, linux-block,
linux-kernel, Chengming Zhou
On Wed, Sep 13, 2023 at 03:16:16PM +0000, chengming.zhou@linux.dev wrote:
> From: Chengming Zhou <zhouchengming@bytedance.com>
>
> Add batched mq_ops.queue_rqs() support in null_blk for testing. The
> implementation is much easy since null_blk doesn't have commit_rqs().
>
> We simply handle each request one by one, if errors are encountered,
> leave them in the passed in list and return back.
>
> There is about 3.6% improvement in IOPS of fio/t/io_uring on null_blk
> with hw_queue_depth=256 on my test VM, from 1.09M to 1.13M.
I guess you pass 'shared_tags' to null_blk for the verification?
>
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> ---
> drivers/block/null_blk/main.c | 20 ++++++++++++++++++++
> 1 file changed, 20 insertions(+)
>
> diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
> index 968090935eb2..79d6cd3c3d41 100644
> --- a/drivers/block/null_blk/main.c
> +++ b/drivers/block/null_blk/main.c
> @@ -1750,6 +1750,25 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
> return null_handle_cmd(cmd, sector, nr_sectors, req_op(rq));
> }
>
> +static void null_queue_rqs(struct request **rqlist)
> +{
> + struct request *requeue_list = NULL;
> + struct request **requeue_lastp = &requeue_list;
> + struct blk_mq_queue_data bd = { };
> + blk_status_t ret;
> +
> + do {
> + struct request *rq = rq_list_pop(rqlist);
> +
> + bd.rq = rq;
> + ret = null_queue_rq(rq->mq_hctx, &bd);
> + if (ret != BLK_STS_OK)
> + rq_list_add_tail(&requeue_lastp, rq);
> + } while (!rq_list_empty(*rqlist));
> +
> + *rqlist = requeue_list;
> +}
> +
null_blk may not be one perfect example for showing queue_rqs()
which is usually for handling request in batch, but for test or
demo purpose, it is fine:
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Thanks
Ming
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH v2 5/5] block/null_blk: add queue_rqs() support
2023-09-22 8:54 ` Ming Lei
@ 2023-09-23 7:01 ` Chengming Zhou
0 siblings, 0 replies; 15+ messages in thread
From: Chengming Zhou @ 2023-09-23 7:01 UTC (permalink / raw)
To: Ming Lei
Cc: axboe, hch, bvanassche, kbusch, mst, damien.lemoal, linux-block,
linux-kernel, Chengming Zhou
On 2023/9/22 16:54, Ming Lei wrote:
> On Wed, Sep 13, 2023 at 03:16:16PM +0000, chengming.zhou@linux.dev wrote:
>> From: Chengming Zhou <zhouchengming@bytedance.com>
>>
>> Add batched mq_ops.queue_rqs() support in null_blk for testing. The
>> implementation is much easy since null_blk doesn't have commit_rqs().
>>
>> We simply handle each request one by one, if errors are encountered,
>> leave them in the passed in list and return back.
>>
>> There is about 3.6% improvement in IOPS of fio/t/io_uring on null_blk
>> with hw_queue_depth=256 on my test VM, from 1.09M to 1.13M.
>
> I guess you pass 'shared_tags' to null_blk for the verification?
IIRC it should be "modprobe null_blk hw_queue_depth=256 nr_devices=2 shared_tags=1".
>
>>
>> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
>> ---
>> drivers/block/null_blk/main.c | 20 ++++++++++++++++++++
>> 1 file changed, 20 insertions(+)
>>
>> diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
>> index 968090935eb2..79d6cd3c3d41 100644
>> --- a/drivers/block/null_blk/main.c
>> +++ b/drivers/block/null_blk/main.c
>> @@ -1750,6 +1750,25 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
>> return null_handle_cmd(cmd, sector, nr_sectors, req_op(rq));
>> }
>>
>> +static void null_queue_rqs(struct request **rqlist)
>> +{
>> + struct request *requeue_list = NULL;
>> + struct request **requeue_lastp = &requeue_list;
>> + struct blk_mq_queue_data bd = { };
>> + blk_status_t ret;
>> +
>> + do {
>> + struct request *rq = rq_list_pop(rqlist);
>> +
>> + bd.rq = rq;
>> + ret = null_queue_rq(rq->mq_hctx, &bd);
>> + if (ret != BLK_STS_OK)
>> + rq_list_add_tail(&requeue_lastp, rq);
>> + } while (!rq_list_empty(*rqlist));
>> +
>> + *rqlist = requeue_list;
>> +}
>> +
>
> null_blk may not be one perfect example for showing queue_rqs()
> which is usually for handling request in batch, but for test or
> demo purpose, it is fine:
>
> Reviewed-by: Ming Lei <ming.lei@redhat.com>
>
Yes, some other "real" drivers should be better choice that we can
handle more things in batch to improve performance. Maybe ublk driver
can benefit from this too.
Thanks!
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v2 0/5] blk-mq: optimize queue_rqs() support
2023-09-13 15:16 [PATCH v2 0/5] blk-mq: optimize queue_rqs() support chengming.zhou
` (4 preceding siblings ...)
2023-09-13 15:16 ` [PATCH v2 5/5] block/null_blk: add queue_rqs() support chengming.zhou
@ 2023-09-22 14:52 ` Jens Axboe
5 siblings, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2023-09-22 14:52 UTC (permalink / raw)
To: hch, ming.lei, bvanassche, chengming.zhou
Cc: kbusch, mst, damien.lemoal, linux-block, linux-kernel,
Chengming Zhou
On Wed, 13 Sep 2023 15:16:11 +0000, chengming.zhou@linux.dev wrote:
> Changes in v2:
> - Drop the patch that fixes a potential race in request timeout
> from this series.
> - Rebased on the newest block/for-next branch.
>
> The current queue_rqs() support has limitation that it can't work on
> shared tags queue, which is resolved by patch 1-3. We move the account
> of active requests to where we really allocate the driver tag.
>
> [...]
Applied, thanks!
[1/5] blk-mq: account active requests when get driver tag
commit: b8643d682669994b3f57c3440df3d4f9cb735f35
[2/5] blk-mq: remove RQF_MQ_INFLIGHT
commit: 48554df6bf2b1e83f70749bf4b4d7914f8b3c01d
[3/5] blk-mq: support batched queue_rqs() on shared tags queue
commit: 434097ee375fff36bc5037524609ffd6199f11da
[4/5] blk-mq: update driver tags request table when start request
commit: 217b613a53d3a430aa2e5d1523819dc271f02ff0
[5/5] block/null_blk: add queue_rqs() support
commit: d78bfa1346ab1fe04d20aa45a0678d1fc866f37c
Best regards,
--
Jens Axboe
^ permalink raw reply [flat|nested] 15+ messages in thread