From: Bart Van Assche <bvanassche@acm.org>
To: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org, linux-scsi@vger.kernel.org,
Christoph Hellwig <hch@lst.de>,
Damien Le Moal <dlemoal@kernel.org>,
Jaegeuk Kim <jaegeuk@kernel.org>,
Bart Van Assche <bvanassche@acm.org>
Subject: [PATCH v16 05/26] blk-zoned: Fix a deadlock triggered by unaligned writes
Date: Mon, 18 Nov 2024 16:27:54 -0800 [thread overview]
Message-ID: <20241119002815.600608-6-bvanassche@acm.org> (raw)
In-Reply-To: <20241119002815.600608-1-bvanassche@acm.org>
If the queue is filled with unaligned writes then the following
deadlock occurs:
Call Trace:
<TASK>
__schedule+0x8cc/0x2190
schedule+0xdd/0x2b0
blk_queue_enter+0x2ce/0x4f0
blk_mq_alloc_request+0x303/0x810
scsi_execute_cmd+0x3f4/0x7b0
sd_zbc_do_report_zones+0x19e/0x4c0
sd_zbc_report_zones+0x304/0x920
disk_zone_wplug_handle_error+0x237/0x920
disk_zone_wplugs_work+0x17e/0x430
process_one_work+0xdd0/0x1490
worker_thread+0x5eb/0x1010
kthread+0x2e5/0x3b0
ret_from_fork+0x3a/0x80
</TASK>
Fix this deadlock by removing the disk->fops->report_zones() call and by
deriving the write pointer information from successfully completed zoned
writes.
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
block/blk-zoned.c | 69 +++++++++++++++++++----------------------------
block/blk.h | 4 ++-
2 files changed, 31 insertions(+), 42 deletions(-)
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index b570b773e65f..284820b29285 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -56,6 +56,8 @@ static const char *const zone_cond_name[] = {
* @zone_no: The number of the zone the plug is managing.
* @wp_offset: The zone write pointer location relative to the start of the zone
* as a number of 512B sectors.
+ * @wp_offset_compl: End offset for completed zoned writes as a number of 512
+ * byte sectors.
* @bio_list: The list of BIOs that are currently plugged.
* @bio_work: Work struct to handle issuing of plugged BIOs
* @rcu_head: RCU head to free zone write plugs with an RCU grace period.
@@ -69,6 +71,7 @@ struct blk_zone_wplug {
unsigned int flags;
unsigned int zone_no;
unsigned int wp_offset;
+ unsigned int wp_offset_compl;
struct bio_list bio_list;
struct work_struct bio_work;
struct rcu_head rcu_head;
@@ -531,6 +534,7 @@ static struct blk_zone_wplug *disk_get_and_lock_zone_wplug(struct gendisk *disk,
zwplug->flags = 0;
zwplug->zone_no = zno;
zwplug->wp_offset = sector & (disk->queue->limits.chunk_sectors - 1);
+ zwplug->wp_offset_compl = 0;
bio_list_init(&zwplug->bio_list);
INIT_WORK(&zwplug->bio_work, blk_zone_wplug_bio_work);
zwplug->disk = disk;
@@ -1226,6 +1230,7 @@ void blk_zone_write_plug_bio_endio(struct bio *bio)
struct gendisk *disk = bio->bi_bdev->bd_disk;
struct blk_zone_wplug *zwplug =
disk_get_zone_wplug(disk, bio->bi_iter.bi_sector);
+ unsigned int end_sector;
unsigned long flags;
if (WARN_ON_ONCE(!zwplug))
@@ -1243,11 +1248,24 @@ void blk_zone_write_plug_bio_endio(struct bio *bio)
bio->bi_opf |= REQ_OP_ZONE_APPEND;
}
- /*
- * If the BIO failed, mark the plug as having an error to trigger
- * recovery.
- */
- if (bio->bi_status != BLK_STS_OK) {
+ if (bio->bi_status == BLK_STS_OK) {
+ switch (bio_op(bio)) {
+ case REQ_OP_WRITE:
+ case REQ_OP_ZONE_APPEND:
+ case REQ_OP_WRITE_ZEROES:
+ end_sector = bdev_offset_from_zone_start(disk->part0,
+ bio->bi_iter.bi_sector + bio_sectors(bio));
+ if (end_sector > zwplug->wp_offset_compl)
+ zwplug->wp_offset_compl = end_sector;
+ break;
+ default:
+ break;
+ }
+ } else {
+ /*
+ * If the BIO failed, mark the plug as having an error to
+ * trigger recovery.
+ */
spin_lock_irqsave(&zwplug->lock, flags);
disk_zone_wplug_set_error(disk, zwplug);
spin_unlock_irqrestore(&zwplug->lock, flags);
@@ -1388,30 +1406,10 @@ static unsigned int blk_zone_wp_offset(struct blk_zone *zone)
}
}
-static int blk_zone_wplug_report_zone_cb(struct blk_zone *zone,
- unsigned int idx, void *data)
-{
- struct blk_zone *zonep = data;
-
- *zonep = *zone;
- return 0;
-}
-
static void disk_zone_wplug_handle_error(struct gendisk *disk,
struct blk_zone_wplug *zwplug)
{
- sector_t zone_start_sector =
- bdev_zone_sectors(disk->part0) * zwplug->zone_no;
- unsigned int noio_flag;
- struct blk_zone zone;
unsigned long flags;
- int ret;
-
- /* Get the current zone information from the device. */
- noio_flag = memalloc_noio_save();
- ret = disk->fops->report_zones(disk, zone_start_sector, 1,
- blk_zone_wplug_report_zone_cb, &zone);
- memalloc_noio_restore(noio_flag);
spin_lock_irqsave(&zwplug->lock, flags);
@@ -1425,19 +1423,8 @@ static void disk_zone_wplug_handle_error(struct gendisk *disk,
zwplug->flags &= ~BLK_ZONE_WPLUG_ERROR;
- if (ret != 1) {
- /*
- * We failed to get the zone information, meaning that something
- * is likely really wrong with the device. Abort all remaining
- * plugged BIOs as otherwise we could endup waiting forever on
- * plugged BIOs to complete if there is a queue freeze on-going.
- */
- disk_zone_wplug_abort(zwplug);
- goto unplug;
- }
-
/* Update the zone write pointer offset. */
- zwplug->wp_offset = blk_zone_wp_offset(&zone);
+ zwplug->wp_offset = zwplug->wp_offset_compl;
disk_zone_wplug_abort_unaligned(disk, zwplug);
/* Restart BIO submission if we still have any BIO left. */
@@ -1446,7 +1433,6 @@ static void disk_zone_wplug_handle_error(struct gendisk *disk,
goto unlock;
}
-unplug:
zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
if (disk_should_remove_zone_wplug(disk, zwplug))
disk_remove_zone_wplug(disk, zwplug);
@@ -1978,7 +1964,7 @@ EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
static void queue_zone_wplug_show(struct blk_zone_wplug *zwplug,
struct seq_file *m)
{
- unsigned int zwp_wp_offset, zwp_flags;
+ unsigned int zwp_wp_offset, zwp_wp_offset_compl, zwp_flags;
unsigned int zwp_zone_no, zwp_ref;
unsigned int zwp_bio_list_size;
unsigned long flags;
@@ -1988,14 +1974,15 @@ static void queue_zone_wplug_show(struct blk_zone_wplug *zwplug,
zwp_flags = zwplug->flags;
zwp_ref = refcount_read(&zwplug->ref);
zwp_wp_offset = zwplug->wp_offset;
+ zwp_wp_offset_compl = zwplug->wp_offset_compl;
zwp_bio_list_size = bio_list_size(&zwplug->bio_list);
spin_unlock_irqrestore(&zwplug->lock, flags);
bool all_zwr_inserted = blk_zone_all_zwr_inserted(zwplug);
- seq_printf(m, "zone_no %u flags 0x%x ref %u wp_offset %u bio_list_size %u all_zwr_inserted %d\n",
+ seq_printf(m, "zone_no %u flags 0x%x ref %u wp_offset %u wp_offset_compl %u bio_list_size %u all_zwr_inserted %d\n",
zwp_zone_no, zwp_flags, zwp_ref, zwp_wp_offset,
- zwp_bio_list_size, all_zwr_inserted);
+ zwp_wp_offset_compl, zwp_bio_list_size, all_zwr_inserted);
}
int queue_zone_wplugs_show(void *data, struct seq_file *m)
diff --git a/block/blk.h b/block/blk.h
index be945db6298d..88a6e258eafe 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -470,8 +470,10 @@ static inline void blk_zone_update_request_bio(struct request *rq,
* the original BIO sector so that blk_zone_write_plug_bio_endio() can
* lookup the zone write plug.
*/
- if (req_op(rq) == REQ_OP_ZONE_APPEND || bio_zone_write_plugging(bio))
+ if (req_op(rq) == REQ_OP_ZONE_APPEND || bio_zone_write_plugging(bio)) {
bio->bi_iter.bi_sector = rq->__sector;
+ bio->bi_iter.bi_size = rq->__data_len;
+ }
}
void blk_zone_write_plug_requeue_request(struct request *rq);
next prev parent reply other threads:[~2024-11-19 0:28 UTC|newest]
Thread overview: 73+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-19 0:27 [PATCH v16 00/26] Improve write performance for zoned UFS devices Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 01/26] blk-zoned: Fix a reference count leak Bart Van Assche
2024-11-19 2:23 ` Damien Le Moal
2024-11-19 20:21 ` Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 02/26] blk-zoned: Split disk_zone_wplugs_work() Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 03/26] blk-zoned: Split queue_zone_wplugs_show() Bart Van Assche
2024-11-19 2:25 ` Damien Le Moal
2024-11-19 0:27 ` [PATCH v16 04/26] blk-zoned: Only handle errors after pending zoned writes have completed Bart Van Assche
2024-11-19 2:50 ` Damien Le Moal
2024-11-19 20:51 ` Bart Van Assche
2024-11-21 3:23 ` Damien Le Moal
2024-11-21 17:43 ` Bart Van Assche
2024-11-19 0:27 ` Bart Van Assche [this message]
2024-11-19 2:57 ` [PATCH v16 05/26] blk-zoned: Fix a deadlock triggered by unaligned writes Damien Le Moal
2024-11-19 21:04 ` Bart Van Assche
2024-11-21 3:32 ` Damien Le Moal
2024-11-21 17:51 ` Bart Van Assche
2024-11-25 4:00 ` Damien Le Moal
2024-11-25 4:19 ` Damien Le Moal
2025-01-09 19:11 ` Bart Van Assche
2025-01-10 5:07 ` Damien Le Moal
2025-01-10 18:17 ` Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 06/26] blk-zoned: Fix requeuing of zoned writes Bart Van Assche
2024-11-19 3:00 ` Damien Le Moal
2024-11-19 21:06 ` Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 07/26] block: Support block drivers that preserve the order of write requests Bart Van Assche
2024-11-19 7:37 ` Damien Le Moal
2024-11-19 21:08 ` Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 08/26] dm-linear: Report to the block layer that the write order is preserved Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 09/26] mq-deadline: Remove a local variable Bart Van Assche
2024-11-19 7:38 ` Damien Le Moal
2024-11-19 21:11 ` Bart Van Assche
2024-11-19 0:27 ` [PATCH v16 10/26] blk-mq: Clean up blk_mq_requeue_work() Bart Van Assche
2024-11-19 7:39 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 11/26] block: Optimize blk_mq_submit_bio() for the cache hit scenario Bart Van Assche
2024-11-19 7:40 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 12/26] block: Rework request allocation in blk_mq_submit_bio() Bart Van Assche
2024-11-19 7:44 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 13/26] block: Support allocating from a specific software queue Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 14/26] blk-mq: Restore the zoned write order when requeuing Bart Van Assche
2024-11-19 7:52 ` Damien Le Moal
2024-11-19 21:16 ` Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 15/26] blk-zoned: Document the locking order Bart Van Assche
2024-11-19 7:52 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 16/26] blk-zoned: Document locking assumptions Bart Van Assche
2024-11-19 7:53 ` Damien Le Moal
2024-11-19 21:18 ` Bart Van Assche
2024-11-21 3:34 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 17/26] blk-zoned: Uninline functions that are not in the hot path Bart Van Assche
2024-11-19 7:55 ` Damien Le Moal
2024-11-19 21:20 ` Bart Van Assche
2024-11-21 3:36 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 18/26] blk-zoned: Make disk_should_remove_zone_wplug() more robust Bart Van Assche
2024-11-19 7:58 ` Damien Le Moal
2024-11-19 0:28 ` [PATCH v16 19/26] blk-zoned: Add an argument to blk_zone_plug_bio() Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 20/26] blk-zoned: Support pipelining of zoned writes Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 21/26] scsi: core: Retry unaligned " Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 22/26] scsi: sd: Increase retry count for " Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 23/26] scsi: scsi_debug: Add the preserves_write_order module parameter Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 24/26] scsi: scsi_debug: Support injecting unaligned write errors Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 25/26] scsi: scsi_debug: Skip host/bus reset settle delay Bart Van Assche
2024-11-19 0:28 ` [PATCH v16 26/26] scsi: ufs: Inform the block layer about write ordering Bart Van Assche
[not found] ` <37f95f44-ab1d-20db-e0c7-94946cb9d4eb@quicinc.com>
2024-11-22 18:20 ` Bart Van Assche
2024-11-23 0:34 ` Can Guo
2024-11-19 8:01 ` [PATCH v16 00/26] Improve write performance for zoned UFS devices Damien Le Moal
2024-11-19 19:08 ` Bart Van Assche
2024-11-21 3:20 ` Damien Le Moal
2024-11-21 18:00 ` Bart Van Assche
2024-11-25 3:59 ` Damien Le Moal
2025-01-09 19:02 ` Bart Van Assche
2025-01-10 5:10 ` Damien Le Moal
2024-11-19 12:25 ` Christoph Hellwig
2024-11-19 18:52 ` Bart Van Assche
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241119002815.600608-6-bvanassche@acm.org \
--to=bvanassche@acm.org \
--cc=axboe@kernel.dk \
--cc=dlemoal@kernel.org \
--cc=hch@lst.de \
--cc=jaegeuk@kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=linux-scsi@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox