public inbox for stable@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] md/raid10: fix deadlock with check operation and nowait requests
@ 2026-02-10  5:09 Josh Hunt
  2026-02-10 10:17 ` kernel test robot
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Josh Hunt @ 2026-02-10  5:09 UTC (permalink / raw)
  To: song, yukuai, linan122, linux-raid; +Cc: ncroxon, Josh Hunt, stable

When an array check is running it will raise the barrier at which point
normal requests will become blocked and increment the nr_pending value to
signal there is work pending inside of wait_barrier(). NOWAIT requests
do not block and so will return immediately with an error, and additionally
do not increment nr_pending in wait_barrier(). Upstream change
43806c3d5b9b ("raid10: cleanup memleak at raid10_make_request") added a
call to raid_end_bio_io() to fix a memory leak when NOWAIT requests hit
this condition. raid_end_bio_io() eventually calls allow_barrier() and
it will unconditionally do an atomic_dec_and_test(&conf->nr_pending) even
though the corresponding increment on nr_pending didn't happen in the
NOWAIT case.

This can be easily seen by starting a check operation while an application
is doing nowait IO on the same array. This results in a deadlocked state
due to nr_pending value underflowing and so the md resync thread gets
stuck waiting for nr_pending to == 0.

Output of r10conf state of the array when we hit this condition:

  crash> struct r10conf.barrier,nr_pending,nr_waiting,nr_queued <addr of r10conf>
    barrier = 1,
    nr_pending = {
      counter = -41
    },
    nr_waiting = 15,
    nr_queued = 0,

Example of md_sync thread stuck waiting on raise_barrier() and other
requests stuck in wait_barrier():

md1_resync
[<0>] raise_barrier+0xce/0x1c0
[<0>] raid10_sync_request+0x1ca/0x1ed0
[<0>] md_do_sync+0x779/0x1110
[<0>] md_thread+0x90/0x160
[<0>] kthread+0xbe/0xf0
[<0>] ret_from_fork+0x34/0x50
[<0>] ret_from_fork_asm+0x1a/0x30

kworker/u1040:2+flush-253:4
[<0>] wait_barrier+0x1de/0x220
[<0>] regular_request_wait+0x30/0x180
[<0>] raid10_make_request+0x261/0x1000
[<0>] md_handle_request+0x13b/0x230
[<0>] __submit_bio+0x107/0x1f0
[<0>] submit_bio_noacct_nocheck+0x16f/0x390
[<0>] ext4_io_submit+0x24/0x40
[<0>] ext4_do_writepages+0x254/0xc80
[<0>] ext4_writepages+0x84/0x120
[<0>] do_writepages+0x7a/0x260
[<0>] __writeback_single_inode+0x3d/0x300
[<0>] writeback_sb_inodes+0x1dd/0x470
[<0>] __writeback_inodes_wb+0x4c/0xe0
[<0>] wb_writeback+0x18b/0x2d0
[<0>] wb_workfn+0x2a1/0x400
[<0>] process_one_work+0x149/0x330
[<0>] worker_thread+0x2d2/0x410
[<0>] kthread+0xbe/0xf0
[<0>] ret_from_fork+0x34/0x50
[<0>] ret_from_fork_asm+0x1a/0x30

Fixes: 43806c3d5b9b ("raid10: cleanup memleak at raid10_make_request")
Cc: stable@vger.kernel.org
Signed-off-by: Josh Hunt <johunt@akamai.com>
---
 drivers/md/raid10.c | 36 +++++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 9debb20cf129..184b5b3906d1 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -68,6 +68,7 @@
  */
 
 static void allow_barrier(struct r10conf *conf);
+static void allow_barrier_nowait(struct r10conf *conf);
 static void lower_barrier(struct r10conf *conf);
 static int _enough(struct r10conf *conf, int previous, int ignore);
 static int enough(struct r10conf *conf, int ignore);
@@ -317,7 +318,7 @@ static void reschedule_retry(struct r10bio *r10_bio)
  * operation and are ready to return a success/failure code to the buffer
  * cache layer.
  */
-static void raid_end_bio_io(struct r10bio *r10_bio)
+static void raid_end_bio_io(struct r10bio *r10_bio, bool adjust_pending)
 {
 	struct bio *bio = r10_bio->master_bio;
 	struct r10conf *conf = r10_bio->mddev->private;
@@ -332,7 +333,10 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
 	 * Wake up any possible resync thread that waits for the device
 	 * to go idle.
 	 */
-	allow_barrier(conf);
+	if (adjust_pending)
+		allow_barrier(conf);
+	else
+		allow_barrier_nowait(conf);
 
 	free_r10bio(r10_bio);
 }
@@ -414,7 +418,7 @@ static void raid10_end_read_request(struct bio *bio)
 			uptodate = 1;
 	}
 	if (uptodate) {
-		raid_end_bio_io(r10_bio);
+		raid_end_bio_io(r10_bio, true);
 		rdev_dec_pending(rdev, conf->mddev);
 	} else {
 		/*
@@ -446,7 +450,7 @@ static void one_write_done(struct r10bio *r10_bio)
 			if (test_bit(R10BIO_MadeGood, &r10_bio->state))
 				reschedule_retry(r10_bio);
 			else
-				raid_end_bio_io(r10_bio);
+				raid_end_bio_io(r10_bio, true);
 		}
 	}
 }
@@ -1030,13 +1034,23 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
 	return ret;
 }
 
-static void allow_barrier(struct r10conf *conf)
+static void __allow_barrier(struct r10conf *conf, bool adjust_pending)
 {
-	if ((atomic_dec_and_test(&conf->nr_pending)) ||
+	if ((adjust_pending && atomic_dec_and_test(&conf->nr_pending)) ||
 			(conf->array_freeze_pending))
 		wake_up_barrier(conf);
 }
 
+static void allow_barrier(struct r10conf *conf)
+{
+	__allow_barrier(conf, true);
+}
+
+static void allow_barrier_nowait(struct r10conf *conf)
+{
+	__allow_barrier(conf, false);
+}
+
 static void freeze_array(struct r10conf *conf, int extra)
 {
 	/* stop syncio and normal IO and wait for everything to
@@ -1184,7 +1198,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
 	}
 
 	if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) {
-		raid_end_bio_io(r10_bio);
+		raid_end_bio_io(r10_bio, false);
 		return;
 	}
 
@@ -1195,7 +1209,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
 					    mdname(mddev), b,
 					    (unsigned long long)r10_bio->sector);
 		}
-		raid_end_bio_io(r10_bio);
+		raid_end_bio_io(r10_bio, true);
 		return;
 	}
 	if (err_rdev)
@@ -1372,7 +1386,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
 
 	sectors = r10_bio->sectors;
 	if (!regular_request_wait(mddev, conf, bio, sectors)) {
-		raid_end_bio_io(r10_bio);
+		raid_end_bio_io(r10_bio, false);
 		return;
 	}
 
@@ -2952,7 +2966,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 			if (test_bit(R10BIO_WriteError,
 				     &r10_bio->state))
 				close_write(r10_bio);
-			raid_end_bio_io(r10_bio);
+			raid_end_bio_io(r10_bio, true);
 		}
 	}
 }
@@ -2987,7 +3001,7 @@ static void raid10d(struct md_thread *thread)
 			if (test_bit(R10BIO_WriteError,
 				     &r10_bio->state))
 				close_write(r10_bio);
-			raid_end_bio_io(r10_bio);
+			raid_end_bio_io(r10_bio, true);
 		}
 	}
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-02-10 12:47 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-10  5:09 [PATCH] md/raid10: fix deadlock with check operation and nowait requests Josh Hunt
2026-02-10 10:17 ` kernel test robot
2026-02-10 10:18 ` kernel test robot
2026-02-10 11:14 ` kernel test robot
2026-02-10 11:56   ` Josh Hunt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox