From: Bart Van Assche <bvanassche@acm.org>
To: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org, linux-scsi@vger.kernel.org,
Christoph Hellwig <hch@lst.de>,
Damien Le Moal <dlemoal@kernel.org>,
Bart Van Assche <bvanassche@acm.org>
Subject: [PATCH v22 03/14] blk-mq: Restore the zone write order when requeuing
Date: Thu, 24 Jul 2025 14:56:52 -0700 [thread overview]
Message-ID: <20250724215703.2910510-4-bvanassche@acm.org> (raw)
In-Reply-To: <20250724215703.2910510-1-bvanassche@acm.org>
Zoned writes may be requeued. This happens if a block driver returns
BLK_STS_RESOURCE, to handle SCSI unit attentions or by the SCSI error
handler after error handling has finished. A later patch enables write
pipelining and increases the number of pending writes per zone. If
multiple writes are pending per zone, write requests may be requeued in
another order than submitted. Restore the request order if requests are
requeued. Add RQF_DONTPREP to RQF_NOMERGE_FLAGS because this patch may
cause RQF_DONTPREP requests to be sent to the code that checks whether
a request can be merged. RQF_DONTPREP requests must not be merged.
Cc: Damien Le Moal <dlemoal@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
block/bfq-iosched.c | 2 ++
block/blk-mq.c | 20 +++++++++++++++++++-
block/blk-mq.h | 2 ++
block/kyber-iosched.c | 2 ++
block/mq-deadline.c | 7 ++++++-
include/linux/blk-mq.h | 2 +-
6 files changed, 32 insertions(+), 3 deletions(-)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 0cb1e9873aab..1bd3afe5d779 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -6276,6 +6276,8 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
if (flags & BLK_MQ_INSERT_AT_HEAD) {
list_add(&rq->queuelist, &bfqd->dispatch);
+ } else if (flags & BLK_MQ_INSERT_ORDERED) {
+ blk_mq_insert_ordered(rq, &bfqd->dispatch);
} else if (!bfqq) {
list_add_tail(&rq->queuelist, &bfqd->dispatch);
} else {
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 445f2275eddb..37217d1f19bb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1580,7 +1580,9 @@ static void blk_mq_requeue_work(struct work_struct *work)
* already. Insert it into the hctx dispatch list to avoid
* block layer merges for the request.
*/
- if (rq->rq_flags & RQF_DONTPREP)
+ if (blk_mq_preserve_order(rq))
+ blk_mq_insert_request(rq, BLK_MQ_INSERT_ORDERED);
+ else if (rq->rq_flags & RQF_DONTPREP)
blk_mq_request_bypass_insert(rq, 0);
else
blk_mq_insert_request(rq, BLK_MQ_INSERT_AT_HEAD);
@@ -2614,6 +2616,20 @@ static void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx,
blk_mq_run_hw_queue(hctx, run_queue_async);
}
+void blk_mq_insert_ordered(struct request *rq, struct list_head *list)
+{
+ struct request_queue *q = rq->q;
+ struct request *rq2;
+
+ list_for_each_entry(rq2, list, queuelist)
+ if (rq2->q == q && blk_rq_pos(rq2) > blk_rq_pos(rq))
+ break;
+
+ /* Insert rq before rq2. If rq2 is the list head, append at the end. */
+ list_add_tail(&rq->queuelist, &rq2->queuelist);
+}
+EXPORT_SYMBOL_GPL(blk_mq_insert_ordered);
+
static void blk_mq_insert_request(struct request *rq, blk_insert_t flags)
{
struct request_queue *q = rq->q;
@@ -2668,6 +2684,8 @@ static void blk_mq_insert_request(struct request *rq, blk_insert_t flags)
spin_lock(&ctx->lock);
if (flags & BLK_MQ_INSERT_AT_HEAD)
list_add(&rq->queuelist, &ctx->rq_lists[hctx->type]);
+ else if (flags & BLK_MQ_INSERT_ORDERED)
+ blk_mq_insert_ordered(rq, &ctx->rq_lists[hctx->type]);
else
list_add_tail(&rq->queuelist,
&ctx->rq_lists[hctx->type]);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index affb2e14b56e..393660311a56 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -40,8 +40,10 @@ enum {
typedef unsigned int __bitwise blk_insert_t;
#define BLK_MQ_INSERT_AT_HEAD ((__force blk_insert_t)0x01)
+#define BLK_MQ_INSERT_ORDERED ((__force blk_insert_t)0x02)
void blk_mq_submit_bio(struct bio *bio);
+void blk_mq_insert_ordered(struct request *rq, struct list_head *list);
int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
unsigned int flags);
void blk_mq_exit_queue(struct request_queue *q);
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index 4dba8405bd01..051c05ceafd7 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -603,6 +603,8 @@ static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
trace_block_rq_insert(rq);
if (flags & BLK_MQ_INSERT_AT_HEAD)
list_move(&rq->queuelist, head);
+ else if (flags & BLK_MQ_INSERT_ORDERED)
+ blk_mq_insert_ordered(rq, head);
else
list_move_tail(&rq->queuelist, head);
sbitmap_set_bit(&khd->kcq_map[sched_domain],
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 2edf1cac06d5..110fef65b829 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -710,7 +710,12 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
* set expire time and add to fifo list
*/
rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
- list_add_tail(&rq->queuelist, &per_prio->fifo_list[data_dir]);
+ if (flags & BLK_MQ_INSERT_ORDERED)
+ blk_mq_insert_ordered(rq,
+ &per_prio->fifo_list[data_dir]);
+ else
+ list_add_tail(&rq->queuelist,
+ &per_prio->fifo_list[data_dir]);
}
}
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 30d7cd1b0484..1c516151fff0 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -86,7 +86,7 @@ enum rqf_flags {
/* flags that prevent us from merging requests: */
#define RQF_NOMERGE_FLAGS \
- (RQF_STARTED | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
+ (RQF_STARTED | RQF_FLUSH_SEQ | RQF_DONTPREP | RQF_SPECIAL_PAYLOAD)
enum mq_rq_state {
MQ_RQ_IDLE = 0,
next prev parent reply other threads:[~2025-07-24 21:58 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-07-24 21:56 [PATCH v22 00/14] Improve write performance for zoned UFS devices Bart Van Assche
2025-07-24 21:56 ` [PATCH v22 01/14] block: Support block devices that preserve the order of write requests Bart Van Assche
2025-07-24 21:56 ` [PATCH v22 02/14] blk-mq: Always insert sequential zoned writes into a software queue Bart Van Assche
2025-07-24 21:56 ` Bart Van Assche [this message]
2025-07-24 21:56 ` [PATCH v22 04/14] blk-zoned: Add an argument to blk_zone_plug_bio() Bart Van Assche
2025-07-24 21:56 ` [PATCH v22 05/14] blk-zoned: Split an if-statement Bart Van Assche
2025-07-24 21:56 ` [PATCH v22 06/14] blk-zoned: Move code from disk_zone_wplug_add_bio() into its caller Bart Van Assche
2025-07-24 21:56 ` [PATCH v22 07/14] blk-zoned: Introduce a loop in blk_zone_wplug_bio_work() Bart Van Assche
2025-07-24 21:56 ` [PATCH v22 08/14] blk-zoned: Support pipelining of zoned writes Bart Van Assche
2025-07-24 21:56 ` [PATCH v22 09/14] null_blk: Add the preserves_write_order attribute Bart Van Assche
2025-07-24 21:56 ` [PATCH v22 10/14] scsi: core: Retry unaligned zoned writes Bart Van Assche
2025-07-24 21:57 ` [PATCH v22 11/14] scsi: sd: Increase retry count for " Bart Van Assche
2025-07-24 21:57 ` [PATCH v22 12/14] scsi: scsi_debug: Add the preserves_write_order module parameter Bart Van Assche
2025-07-24 21:57 ` [PATCH v22 13/14] scsi: scsi_debug: Support injecting unaligned write errors Bart Van Assche
2025-07-24 21:57 ` [PATCH v22 14/14] ufs: core: Inform the block layer about write ordering Bart Van Assche
2025-08-01 8:04 ` Can Guo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250724215703.2910510-4-bvanassche@acm.org \
--to=bvanassche@acm.org \
--cc=axboe@kernel.dk \
--cc=dlemoal@kernel.org \
--cc=hch@lst.de \
--cc=linux-block@vger.kernel.org \
--cc=linux-scsi@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).