All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] dm: Fix deadlock when reloading a multipath table
@ 2025-10-09  3:04 Benjamin Marzinski
  2025-10-09  9:57 ` Martin Wilck
  0 siblings, 1 reply; 11+ messages in thread
From: Benjamin Marzinski @ 2025-10-09  3:04 UTC (permalink / raw)
  To: Mikulas Patocka, Mike Snitzer; +Cc: Martin Wilck, dm-devel

Request-based devices (dm-multipath) queue I/O in blk-mq on noflush
suspends. Any queued IO will make it impossible to freeze the queue. If
a process attempts to update the queue limits while there is queued IO,
it can be get stuck holding the limits lock, while unable to freeze the
queue. If device-mapper then attempts to update the limits during a
table swap, it will deadlock trying to grab the limits lock while making
it impossible to flush the IO.

Disallow updating the queue limits during a table swap, when updating an
immutable request-based dm device (dm-multipath) during a noflush
suspend. It is userspace's responsibility to make sure that the new
table uses the same limits as the existing table if it asks for a
noflush suspend.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
 drivers/md/dm-table.c |  4 ++++
 drivers/md/dm-thin.c  |  7 ++-----
 drivers/md/dm.c       | 35 +++++++++++++++++++++++------------
 3 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index ad0a60a07b93..0522cd700e0e 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -2043,6 +2043,10 @@ bool dm_table_supports_size_change(struct dm_table *t, sector_t old_size,
 	return true;
 }
 
+/*
+ * This function will be skipped by noflush reloads of immutable request
+ * based devices (dm-mpath).
+ */
 int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 			      struct queue_limits *limits)
 {
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index c84149ba4e38..6f98936f0e05 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -4383,11 +4383,8 @@ static void thin_postsuspend(struct dm_target *ti)
 {
 	struct thin_c *tc = ti->private;
 
-	/*
-	 * The dm_noflush_suspending flag has been cleared by now, so
-	 * unfortunately we must always run this.
-	 */
-	noflush_work(tc, do_noflush_stop);
+	if (dm_noflush_suspending(ti))
+		noflush_work(tc, do_noflush_stop);
 }
 
 static int thin_preresume(struct dm_target *ti)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 3ede5ba4cf7e..87e65c48dd04 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2439,7 +2439,6 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
 {
 	struct dm_table *old_map;
 	sector_t size, old_size;
-	int ret;
 
 	lockdep_assert_held(&md->suspend_lock);
 
@@ -2454,11 +2453,13 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
 
 	set_capacity(md->disk, size);
 
-	ret = dm_table_set_restrictions(t, md->queue, limits);
-	if (ret) {
-		set_capacity(md->disk, old_size);
-		old_map = ERR_PTR(ret);
-		goto out;
+	if (limits) {
+		int ret = dm_table_set_restrictions(t, md->queue, limits);
+		if (ret) {
+			set_capacity(md->disk, old_size);
+			old_map = ERR_PTR(ret);
+			goto out;
+		}
 	}
 
 	/*
@@ -2836,6 +2837,7 @@ static void dm_wq_work(struct work_struct *work)
 
 static void dm_queue_flush(struct mapped_device *md)
 {
+	clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
 	clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
 	smp_mb__after_atomic();
 	queue_work(md->wq, &md->work);
@@ -2848,6 +2850,7 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
 {
 	struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL);
 	struct queue_limits limits;
+	bool update_limits = true;
 	int r;
 
 	mutex_lock(&md->suspend_lock);
@@ -2856,20 +2859,31 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
 	if (!dm_suspended_md(md))
 		goto out;
 
+	/*
+	 * To avoid a potential deadlock locking the queue limits, disallow
+	 * updating the queue limits during a table swap, when updating an
+	 * immutable request-based dm device (dm-multipath) during a noflush
+	 * suspend. It is userspace's responsibility to make sure that the new
+	 * table uses the same limits as the existing table, if it asks for a
+	 * noflush suspend.
+	 */
+	if (dm_request_based(md) && md->immutable_target &&
+	    __noflush_suspending(md))
+		update_limits = false;
 	/*
 	 * If the new table has no data devices, retain the existing limits.
 	 * This helps multipath with queue_if_no_path if all paths disappear,
 	 * then new I/O is queued based on these limits, and then some paths
 	 * reappear.
 	 */
-	if (dm_table_has_no_data_devices(table)) {
+	else if (dm_table_has_no_data_devices(table)) {
 		live_map = dm_get_live_table_fast(md);
 		if (live_map)
 			limits = md->queue->limits;
 		dm_put_live_table_fast(md);
 	}
 
-	if (!live_map) {
+	if (update_limits && !live_map) {
 		r = dm_calculate_queue_limits(table, &limits);
 		if (r) {
 			map = ERR_PTR(r);
@@ -2877,7 +2891,7 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
 		}
 	}
 
-	map = __bind(md, table, &limits);
+	map = __bind(md, table, update_limits ? &limits : NULL);
 	dm_issue_global_event();
 
 out:
@@ -2930,7 +2944,6 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
 
 	/*
 	 * DMF_NOFLUSH_SUSPENDING must be set before presuspend.
-	 * This flag is cleared before dm_suspend returns.
 	 */
 	if (noflush)
 		set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
@@ -2993,8 +3006,6 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
 	if (!r)
 		set_bit(dmf_suspended_flag, &md->flags);
 
-	if (noflush)
-		clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
 	if (map)
 		synchronize_srcu(&md->io_barrier);
 
-- 
2.50.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2025-10-23 17:54 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-09  3:04 [PATCH] dm: Fix deadlock when reloading a multipath table Benjamin Marzinski
2025-10-09  9:57 ` Martin Wilck
2025-10-09 22:25   ` Benjamin Marzinski
2025-10-10 10:19     ` Martin Wilck
2025-10-21 19:16       ` Benjamin Marzinski
2025-10-22 14:11         ` Martin Wilck
2025-10-23 17:39           ` Benjamin Marzinski
2025-10-23 17:54             ` Bart Van Assche
2025-10-09 23:29   ` Bart Van Assche
2025-10-10  0:39     ` Benjamin Marzinski
2025-10-10  9:23     ` Martin Wilck

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.