From: Bart Van Assche <bvanassche@acm.org>
To: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org, linux-scsi@vger.kernel.org,
Christoph Hellwig <hch@lst.de>,
Damien Le Moal <dlemoal@kernel.org>,
Jaegeuk Kim <jaegeuk@kernel.org>,
Bart Van Assche <bvanassche@acm.org>
Subject: [PATCH v16 20/26] blk-zoned: Support pipelining of zoned writes
Date: Mon, 18 Nov 2024 16:28:09 -0800 [thread overview]
Message-ID: <20241119002815.600608-21-bvanassche@acm.org> (raw)
In-Reply-To: <20241119002815.600608-1-bvanassche@acm.org>
Support pipelining of zoned writes if the block driver preserves the write
order per hardware queue. Track per zone to which software queue writes
have been queued. If zoned writes are pipelined, submit new writes to the
same software queue as the writes that are already in progress. This
prevents reordering by submitting requests for the same zone to different
software or hardware queues.
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
block/blk-zoned.c | 40 ++++++++++++++++++++++++++++------------
1 file changed, 28 insertions(+), 12 deletions(-)
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 8d0fac14c837..f934c0cb5fdd 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -8,6 +8,7 @@
* Copyright (c) 2016, Damien Le Moal
* Copyright (c) 2016, Western Digital
* Copyright (c) 2024, Western Digital Corporation or its affiliates.
+ * Copyright 2024 Google LLC
*/
#include <linux/kernel.h>
@@ -59,6 +60,8 @@ static const char *const zone_cond_name[] = {
* as a number of 512B sectors.
* @wp_offset_compl: End offset for completed zoned writes as a number of 512
* byte sectors.
+ * @swq_cpu: Software queue to submit writes to for drivers that preserve the
+ * write order.
* @bio_list: The list of BIOs that are currently plugged.
* @bio_work: Work struct to handle issuing of plugged BIOs
* @rcu_head: RCU head to free zone write plugs with an RCU grace period.
@@ -73,6 +76,7 @@ struct blk_zone_wplug {
unsigned int zone_no;
unsigned int wp_offset;
unsigned int wp_offset_compl;
+ int swq_cpu;
struct bio_list bio_list;
struct work_struct bio_work;
struct rcu_head rcu_head;
@@ -82,8 +86,7 @@ struct blk_zone_wplug {
/*
* Zone write plug flags bits:
* - BLK_ZONE_WPLUG_PLUGGED: Indicates that the zone write plug is plugged,
- * that is, that write BIOs are being throttled due to a write BIO already
- * being executed or the zone write plug bio list is not empty.
+ * that is, that write BIOs are being throttled.
* - BLK_ZONE_WPLUG_ERROR: Indicates that a write error happened which will be
* recovered with a report zone to update the zone write pointer offset.
* - BLK_ZONE_WPLUG_UNHASHED: Indicates that the zone write plug was removed
@@ -535,6 +538,7 @@ static struct blk_zone_wplug *disk_get_and_lock_zone_wplug(struct gendisk *disk,
zwplug->zone_no = zno;
zwplug->wp_offset = sector & (disk->queue->limits.chunk_sectors - 1);
zwplug->wp_offset_compl = 0;
+ zwplug->swq_cpu = -1;
bio_list_init(&zwplug->bio_list);
INIT_WORK(&zwplug->bio_work, blk_zone_wplug_bio_work);
zwplug->disk = disk;
@@ -973,7 +977,8 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
return false;
}
-static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
+static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs,
+ int *swq_cpu)
{
struct gendisk *disk = bio->bi_bdev->bd_disk;
sector_t sector = bio->bi_iter.bi_sector;
@@ -1017,11 +1022,19 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
bio_set_flag(bio, BIO_ZONE_WRITE_PLUGGING);
/*
- * If the zone is already plugged or has a pending error, add the BIO
- * to the plug BIO list. Otherwise, plug and let the BIO execute.
+ * If the zone has a pending error or is already plugged, add the BIO
+ * to the plug BIO list. Otherwise, execute the BIO and plug if not yet
+ * plugged and if the write order is not preserved.
*/
- if (zwplug->flags & BLK_ZONE_WPLUG_BUSY)
+ if (zwplug->flags & BLK_ZONE_WPLUG_BUSY) {
goto plug;
+ } else if (disk->queue->limits.driver_preserves_write_order) {
+ if (zwplug->swq_cpu < 0)
+ zwplug->swq_cpu = raw_smp_processor_id();
+ *swq_cpu = zwplug->swq_cpu;
+ } else {
+ zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED;
+ }
/*
* If an error is detected when preparing the BIO, add it to the BIO
@@ -1030,8 +1043,6 @@ static bool blk_zone_wplug_handle_write(struct bio *bio, unsigned int nr_segs)
if (!blk_zone_wplug_prepare_bio(zwplug, bio))
goto plug;
- zwplug->flags |= BLK_ZONE_WPLUG_PLUGGED;
-
spin_unlock_irqrestore(&zwplug->lock, flags);
return false;
@@ -1107,7 +1118,7 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs, int *swq_cpu)
fallthrough;
case REQ_OP_WRITE:
case REQ_OP_WRITE_ZEROES:
- return blk_zone_wplug_handle_write(bio, nr_segs);
+ return blk_zone_wplug_handle_write(bio, nr_segs, swq_cpu);
case REQ_OP_ZONE_RESET:
return blk_zone_wplug_handle_reset_or_finish(bio, 0);
case REQ_OP_ZONE_FINISH:
@@ -1131,7 +1142,6 @@ static void disk_zone_wplug_schedule_bio_work(struct gendisk *disk,
* of the next plugged BIO. blk_zone_wplug_bio_work() will release the
* reference we take here.
*/
- WARN_ON_ONCE(!(zwplug->flags & BLK_ZONE_WPLUG_PLUGGED));
refcount_inc(&zwplug->ref);
queue_work(disk->zone_wplugs_wq, &zwplug->bio_work);
}
@@ -1185,6 +1195,9 @@ static void disk_zone_wplug_unplug_bio(struct gendisk *disk,
zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
+ if (refcount_read(&zwplug->ref) == 2)
+ zwplug->swq_cpu = -1;
+
/*
* If the zone is full (it was fully written or finished, or empty
* (it was reset), remove its zone write plug from the hash table.
@@ -1937,6 +1950,7 @@ static void queue_zone_wplug_show(struct blk_zone_wplug *zwplug,
unsigned int zwp_zone_no, zwp_ref;
unsigned int zwp_bio_list_size;
unsigned long flags;
+ int swq_cpu;
spin_lock_irqsave(&zwplug->lock, flags);
zwp_zone_no = zwplug->zone_no;
@@ -1945,13 +1959,15 @@ static void queue_zone_wplug_show(struct blk_zone_wplug *zwplug,
zwp_wp_offset = zwplug->wp_offset;
zwp_wp_offset_compl = zwplug->wp_offset_compl;
zwp_bio_list_size = bio_list_size(&zwplug->bio_list);
+ swq_cpu = zwplug->swq_cpu;
spin_unlock_irqrestore(&zwplug->lock, flags);
bool all_zwr_inserted = blk_zone_all_zwr_inserted(zwplug);
- seq_printf(m, "zone_no %u flags 0x%x ref %u wp_offset %u wp_offset_compl %u bio_list_size %u all_zwr_inserted %d\n",
+ seq_printf(m, "zone_no %u flags 0x%x ref %u wp_offset %u wp_offset_compl %u bio_list_size %u all_zwr_inserted %d swq_cpu %d\n",
zwp_zone_no, zwp_flags, zwp_ref, zwp_wp_offset,
- zwp_wp_offset_compl, zwp_bio_list_size, all_zwr_inserted);
+ zwp_wp_offset_compl, zwp_bio_list_size, all_zwr_inserted,
+ swq_cpu);
}
int queue_zone_wplugs_show(void *data, struct seq_file *m)
next prev parent reply other threads:[~2024-11-19 0:29 UTC|newest]
Thread overview: 73+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-19 0:27 [PATCH v16 00/26] Improve write performance for zoned UFS devices Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 01/26] blk-zoned: Fix a reference count leak Bart Van Assche
2024-11-19 2:23 ` Damien Le Moal
2024-11-19 20:21 ` Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 02/26] blk-zoned: Split disk_zone_wplugs_work() Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 03/26] blk-zoned: Split queue_zone_wplugs_show() Bart Van Assche
2024-11-19 2:25 ` Damien Le Moal
2024-11-19 0:27 ` [PATCH v16 04/26] blk-zoned: Only handle errors after pending zoned writes have completed Bart Van Assche
2024-11-19 2:50 ` Damien Le Moal
2024-11-19 20:51 ` Bart Van Assche
2024-11-21 3:23 ` Damien Le Moal
2024-11-21 17:43 ` Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 05/26] blk-zoned: Fix a deadlock triggered by unaligned writes Bart Van Assche
2024-11-19 2:57 ` Damien Le Moal
2024-11-19 21:04 ` Bart Van Assche
2024-11-21 3:32 ` Damien Le Moal
2024-11-21 17:51 ` Bart Van Assche
2024-11-25 4:00 ` Damien Le Moal
2024-11-25 4:19 ` Damien Le Moal
2025-01-09 19:11 ` Bart Van Assche
2025-01-10 5:07 ` Damien Le Moal
2025-01-10 18:17 ` Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 06/26] blk-zoned: Fix requeuing of zoned writes Bart Van Assche
2024-11-19 3:00 ` Damien Le Moal
2024-11-19 21:06 ` Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 07/26] block: Support block drivers that preserve the order of write requests Bart Van Assche
2024-11-19 7:37 ` Damien Le Moal
2024-11-19 21:08 ` Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 08/26] dm-linear: Report to the block layer that the write order is preserved Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 09/26] mq-deadline: Remove a local variable Bart Van Assche
2024-11-19 7:38 ` Damien Le Moal
2024-11-19 21:11 ` Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 10/26] blk-mq: Clean up blk_mq_requeue_work() Bart Van Assche
2024-11-19 7:39 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 11/26] block: Optimize blk_mq_submit_bio() for the cache hit scenario Bart Van Assche
2024-11-19 7:40 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 12/26] block: Rework request allocation in blk_mq_submit_bio() Bart Van Assche
2024-11-19 7:44 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 13/26] block: Support allocating from a specific software queue Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 14/26] blk-mq: Restore the zoned write order when requeuing Bart Van Assche
2024-11-19 7:52 ` Damien Le Moal
2024-11-19 21:16 ` Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 15/26] blk-zoned: Document the locking order Bart Van Assche
2024-11-19 7:52 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 16/26] blk-zoned: Document locking assumptions Bart Van Assche
2024-11-19 7:53 ` Damien Le Moal
2024-11-19 21:18 ` Bart Van Assche
2024-11-21 3:34 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 17/26] blk-zoned: Uninline functions that are not in the hot path Bart Van Assche
2024-11-19 7:55 ` Damien Le Moal
2024-11-19 21:20 ` Bart Van Assche
2024-11-21 3:36 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 18/26] blk-zoned: Make disk_should_remove_zone_wplug() more robust Bart Van Assche
2024-11-19 7:58 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 19/26] blk-zoned: Add an argument to blk_zone_plug_bio() Bart Van Assche
2024-11-19 0:28 ` Bart Van Assche [this message]
2024-11-19 0:28 ` [PATCH v16 21/26] scsi: core: Retry unaligned zoned writes Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 22/26] scsi: sd: Increase retry count for " Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 23/26] scsi: scsi_debug: Add the preserves_write_order module parameter Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 24/26] scsi: scsi_debug: Support injecting unaligned write errors Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 25/26] scsi: scsi_debug: Skip host/bus reset settle delay Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 26/26] scsi: ufs: Inform the block layer about write ordering Bart Van Assche
[not found] ` <37f95f44-ab1d-20db-e0c7-94946cb9d4eb@quicinc.com>
2024-11-22 18:20 ` Bart Van Assche
2024-11-23 0:34 ` Can Guo
2024-11-19 8:01 ` [PATCH v16 00/26] Improve write performance for zoned UFS devices Damien Le Moal
2024-11-19 19:08 ` Bart Van Assche
2024-11-21 3:20 ` Damien Le Moal
2024-11-21 18:00 ` Bart Van Assche
2024-11-25 3:59 ` Damien Le Moal
2025-01-09 19:02 ` Bart Van Assche
2025-01-10 5:10 ` Damien Le Moal
2024-11-19 12:25 ` Christoph Hellwig
2024-11-19 18:52 ` Bart Van Assche
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241119002815.600608-21-bvanassche@acm.org \
--to=bvanassche@acm.org \
--cc=axboe@kernel.dk \
--cc=dlemoal@kernel.org \
--cc=hch@lst.de \
--cc=jaegeuk@kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=linux-scsi@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox