From: Damien Le Moal <dlemoal@kernel.org>
To: Bart Van Assche <bvanassche@acm.org>, Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org, linux-scsi@vger.kernel.org,
Christoph Hellwig <hch@lst.de>, Jaegeuk Kim <jaegeuk@kernel.org>
Subject: Re: [PATCH v16 14/26] blk-mq: Restore the zoned write order when requeuing
Date: Tue, 19 Nov 2024 16:52:02 +0900 [thread overview]
Message-ID: <db6d72c5-5221-413f-a355-df8ab414f63e@kernel.org> (raw)
In-Reply-To: <20241119002815.600608-15-bvanassche@acm.org>
On 11/19/24 09:28, Bart Van Assche wrote:
> Zoned writes may be requeued, e.g. if a block driver returns
> BLK_STS_RESOURCE. Requests may be requeued in another order than
> submitted. Restore the request order if requests are requeued.
>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
> block/bfq-iosched.c | 2 ++
> block/blk-mq.c | 20 +++++++++++++++++++-
> block/blk-mq.h | 2 ++
> block/kyber-iosched.c | 2 ++
> block/mq-deadline.c | 7 ++++++-
> include/linux/blk-mq.h | 2 +-
> 6 files changed, 32 insertions(+), 3 deletions(-)
>
> diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
> index 0747d9d0e48c..13bedbf03bd2 100644
> --- a/block/bfq-iosched.c
> +++ b/block/bfq-iosched.c
> @@ -6265,6 +6265,8 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
>
> if (flags & BLK_MQ_INSERT_AT_HEAD) {
> list_add(&rq->queuelist, &bfqd->dispatch);
> + } else if (flags & BLK_MQ_INSERT_ORDERED) {
> + blk_mq_insert_ordered(rq, &bfqd->dispatch);
> } else if (!bfqq) {
> list_add_tail(&rq->queuelist, &bfqd->dispatch);
> } else {
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index f134d5e1c4a1..1302ccbf2a7d 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -1564,7 +1564,9 @@ static void blk_mq_requeue_work(struct work_struct *work)
> * already. Insert it into the hctx dispatch list to avoid
> * block layer merges for the request.
> */
> - if (rq->rq_flags & RQF_DONTPREP)
> + if (blk_rq_is_seq_zoned_write(rq))
> + blk_mq_insert_request(rq, BLK_MQ_INSERT_ORDERED);
Is this OK to do without any starvation prevention ? A high LBA write that
constantly gets requeued behind low LBA writes could end up in a timeout
situation, no ?
> + else if (rq->rq_flags & RQF_DONTPREP)
> blk_mq_request_bypass_insert(rq, 0);
> else
> blk_mq_insert_request(rq, BLK_MQ_INSERT_AT_HEAD);
> @@ -2599,6 +2601,20 @@ static void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx,
> blk_mq_run_hw_queue(hctx, run_queue_async);
> }
>
> +void blk_mq_insert_ordered(struct request *rq, struct list_head *list)
> +{
> + struct request_queue *q = rq->q;
> + struct request *rq2;
> +
> + list_for_each_entry(rq2, list, queuelist)
> + if (rq2->q == q && blk_rq_pos(rq2) > blk_rq_pos(rq))
> + break;
> +
> + /* Insert rq before rq2. If rq2 is the list head, append at the end. */
> + list_add_tail(&rq->queuelist, &rq2->queuelist);
> +}
> +EXPORT_SYMBOL_GPL(blk_mq_insert_ordered);
> +
> static void blk_mq_insert_request(struct request *rq, blk_insert_t flags)
> {
> struct request_queue *q = rq->q;
> @@ -2653,6 +2669,8 @@ static void blk_mq_insert_request(struct request *rq, blk_insert_t flags)
> spin_lock(&ctx->lock);
> if (flags & BLK_MQ_INSERT_AT_HEAD)
> list_add(&rq->queuelist, &ctx->rq_lists[hctx->type]);
> + else if (flags & BLK_MQ_INSERT_ORDERED)
> + blk_mq_insert_ordered(rq, &ctx->rq_lists[hctx->type]);
> else
> list_add_tail(&rq->queuelist,
> &ctx->rq_lists[hctx->type]);
> diff --git a/block/blk-mq.h b/block/blk-mq.h
> index 309db553aba6..10b9fb3ca762 100644
> --- a/block/blk-mq.h
> +++ b/block/blk-mq.h
> @@ -40,8 +40,10 @@ enum {
>
> typedef unsigned int __bitwise blk_insert_t;
> #define BLK_MQ_INSERT_AT_HEAD ((__force blk_insert_t)0x01)
> +#define BLK_MQ_INSERT_ORDERED ((__force blk_insert_t)0x02)
>
> void blk_mq_submit_bio(struct bio *bio);
> +void blk_mq_insert_ordered(struct request *rq, struct list_head *list);
> int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
> unsigned int flags);
> void blk_mq_exit_queue(struct request_queue *q);
> diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
> index 4155594aefc6..77bb41bab68d 100644
> --- a/block/kyber-iosched.c
> +++ b/block/kyber-iosched.c
> @@ -603,6 +603,8 @@ static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
> trace_block_rq_insert(rq);
> if (flags & BLK_MQ_INSERT_AT_HEAD)
> list_move(&rq->queuelist, head);
> + else if (flags & BLK_MQ_INSERT_ORDERED)
> + blk_mq_insert_ordered(rq, head);
> else
> list_move_tail(&rq->queuelist, head);
> sbitmap_set_bit(&khd->kcq_map[sched_domain],
> diff --git a/block/mq-deadline.c b/block/mq-deadline.c
> index 2edf84b1bc2a..200e5a2928ce 100644
> --- a/block/mq-deadline.c
> +++ b/block/mq-deadline.c
> @@ -711,7 +711,12 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
> * set expire time and add to fifo list
> */
> rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
> - list_add_tail(&rq->queuelist, &per_prio->fifo_list[data_dir]);
> + if (flags & BLK_MQ_INSERT_ORDERED)
> + blk_mq_insert_ordered(rq,
> + &per_prio->fifo_list[data_dir]);
> + else
> + list_add_tail(&rq->queuelist,
> + &per_prio->fifo_list[data_dir]);
> }
> }
>
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index ac05974f08f9..f7514eefccfd 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -85,7 +85,7 @@ enum {
>
> /* flags that prevent us from merging requests: */
> #define RQF_NOMERGE_FLAGS \
> - (RQF_STARTED | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
> + (RQF_STARTED | RQF_FLUSH_SEQ | RQF_DONTPREP | RQF_SPECIAL_PAYLOAD)
>
> enum mq_rq_state {
> MQ_RQ_IDLE = 0,
--
Damien Le Moal
Western Digital Research
next prev parent reply other threads:[~2024-11-19 7:52 UTC|newest]
Thread overview: 73+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-19 0:27 [PATCH v16 00/26] Improve write performance for zoned UFS devices Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 01/26] blk-zoned: Fix a reference count leak Bart Van Assche
2024-11-19 2:23 ` Damien Le Moal
2024-11-19 20:21 ` Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 02/26] blk-zoned: Split disk_zone_wplugs_work() Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 03/26] blk-zoned: Split queue_zone_wplugs_show() Bart Van Assche
2024-11-19 2:25 ` Damien Le Moal
2024-11-19 0:27 ` [PATCH v16 04/26] blk-zoned: Only handle errors after pending zoned writes have completed Bart Van Assche
2024-11-19 2:50 ` Damien Le Moal
2024-11-19 20:51 ` Bart Van Assche
2024-11-21 3:23 ` Damien Le Moal
2024-11-21 17:43 ` Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 05/26] blk-zoned: Fix a deadlock triggered by unaligned writes Bart Van Assche
2024-11-19 2:57 ` Damien Le Moal
2024-11-19 21:04 ` Bart Van Assche
2024-11-21 3:32 ` Damien Le Moal
2024-11-21 17:51 ` Bart Van Assche
2024-11-25 4:00 ` Damien Le Moal
2024-11-25 4:19 ` Damien Le Moal
2025-01-09 19:11 ` Bart Van Assche
2025-01-10 5:07 ` Damien Le Moal
2025-01-10 18:17 ` Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 06/26] blk-zoned: Fix requeuing of zoned writes Bart Van Assche
2024-11-19 3:00 ` Damien Le Moal
2024-11-19 21:06 ` Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 07/26] block: Support block drivers that preserve the order of write requests Bart Van Assche
2024-11-19 7:37 ` Damien Le Moal
2024-11-19 21:08 ` Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 08/26] dm-linear: Report to the block layer that the write order is preserved Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 09/26] mq-deadline: Remove a local variable Bart Van Assche
2024-11-19 7:38 ` Damien Le Moal
2024-11-19 21:11 ` Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 10/26] blk-mq: Clean up blk_mq_requeue_work() Bart Van Assche
2024-11-19 7:39 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 11/26] block: Optimize blk_mq_submit_bio() for the cache hit scenario Bart Van Assche
2024-11-19 7:40 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 12/26] block: Rework request allocation in blk_mq_submit_bio() Bart Van Assche
2024-11-19 7:44 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 13/26] block: Support allocating from a specific software queue Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 14/26] blk-mq: Restore the zoned write order when requeuing Bart Van Assche
2024-11-19 7:52 ` Damien Le Moal [this message]
2024-11-19 21:16 ` Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 15/26] blk-zoned: Document the locking order Bart Van Assche
2024-11-19 7:52 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 16/26] blk-zoned: Document locking assumptions Bart Van Assche
2024-11-19 7:53 ` Damien Le Moal
2024-11-19 21:18 ` Bart Van Assche
2024-11-21 3:34 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 17/26] blk-zoned: Uninline functions that are not in the hot path Bart Van Assche
2024-11-19 7:55 ` Damien Le Moal
2024-11-19 21:20 ` Bart Van Assche
2024-11-21 3:36 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 18/26] blk-zoned: Make disk_should_remove_zone_wplug() more robust Bart Van Assche
2024-11-19 7:58 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 19/26] blk-zoned: Add an argument to blk_zone_plug_bio() Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 20/26] blk-zoned: Support pipelining of zoned writes Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 21/26] scsi: core: Retry unaligned " Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 22/26] scsi: sd: Increase retry count for " Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 23/26] scsi: scsi_debug: Add the preserves_write_order module parameter Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 24/26] scsi: scsi_debug: Support injecting unaligned write errors Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 25/26] scsi: scsi_debug: Skip host/bus reset settle delay Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 26/26] scsi: ufs: Inform the block layer about write ordering Bart Van Assche
[not found] ` <37f95f44-ab1d-20db-e0c7-94946cb9d4eb@quicinc.com>
2024-11-22 18:20 ` Bart Van Assche
2024-11-23 0:34 ` Can Guo
2024-11-19 8:01 ` [PATCH v16 00/26] Improve write performance for zoned UFS devices Damien Le Moal
2024-11-19 19:08 ` Bart Van Assche
2024-11-21 3:20 ` Damien Le Moal
2024-11-21 18:00 ` Bart Van Assche
2024-11-25 3:59 ` Damien Le Moal
2025-01-09 19:02 ` Bart Van Assche
2025-01-10 5:10 ` Damien Le Moal
2024-11-19 12:25 ` Christoph Hellwig
2024-11-19 18:52 ` Bart Van Assche
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=db6d72c5-5221-413f-a355-df8ab414f63e@kernel.org \
--to=dlemoal@kernel.org \
--cc=axboe@kernel.dk \
--cc=bvanassche@acm.org \
--cc=hch@lst.de \
--cc=jaegeuk@kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=linux-scsi@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.