[PATCH] md/raid10: fix deadlock with check operation and nowait requests

public inbox for stable@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] md/raid10: fix deadlock with check operation and nowait requests
@ 2026-02-10  5:09 Josh Hunt
  2026-02-10 10:17 ` kernel test robot
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Josh Hunt @ 2026-02-10  5:09 UTC (permalink / raw)
  To: song, yukuai, linan122, linux-raid; +Cc: ncroxon, Josh Hunt, stable

When an array check is running it will raise the barrier at which point
normal requests will become blocked and increment the nr_pending value to
signal there is work pending inside of wait_barrier(). NOWAIT requests
do not block and so will return immediately with an error, and additionally
do not increment nr_pending in wait_barrier(). Upstream change
43806c3d5b9b ("raid10: cleanup memleak at raid10_make_request") added a
call to raid_end_bio_io() to fix a memory leak when NOWAIT requests hit
this condition. raid_end_bio_io() eventually calls allow_barrier() and
it will unconditionally do an atomic_dec_and_test(&conf->nr_pending) even
though the corresponding increment on nr_pending didn't happen in the
NOWAIT case.

This can be easily seen by starting a check operation while an application
is doing nowait IO on the same array. This results in a deadlocked state
due to nr_pending value underflowing and so the md resync thread gets
stuck waiting for nr_pending to == 0.

Output of r10conf state of the array when we hit this condition:

  crash> struct r10conf.barrier,nr_pending,nr_waiting,nr_queued <addr of r10conf>
    barrier = 1,
    nr_pending = {
      counter = -41
    },
    nr_waiting = 15,
    nr_queued = 0,

Example of md_sync thread stuck waiting on raise_barrier() and other
requests stuck in wait_barrier():

md1_resync
[<0>] raise_barrier+0xce/0x1c0
[<0>] raid10_sync_request+0x1ca/0x1ed0
[<0>] md_do_sync+0x779/0x1110
[<0>] md_thread+0x90/0x160
[<0>] kthread+0xbe/0xf0
[<0>] ret_from_fork+0x34/0x50
[<0>] ret_from_fork_asm+0x1a/0x30

kworker/u1040:2+flush-253:4
[<0>] wait_barrier+0x1de/0x220
[<0>] regular_request_wait+0x30/0x180
[<0>] raid10_make_request+0x261/0x1000
[<0>] md_handle_request+0x13b/0x230
[<0>] __submit_bio+0x107/0x1f0
[<0>] submit_bio_noacct_nocheck+0x16f/0x390
[<0>] ext4_io_submit+0x24/0x40
[<0>] ext4_do_writepages+0x254/0xc80
[<0>] ext4_writepages+0x84/0x120
[<0>] do_writepages+0x7a/0x260
[<0>] __writeback_single_inode+0x3d/0x300
[<0>] writeback_sb_inodes+0x1dd/0x470
[<0>] __writeback_inodes_wb+0x4c/0xe0
[<0>] wb_writeback+0x18b/0x2d0
[<0>] wb_workfn+0x2a1/0x400
[<0>] process_one_work+0x149/0x330
[<0>] worker_thread+0x2d2/0x410
[<0>] kthread+0xbe/0xf0
[<0>] ret_from_fork+0x34/0x50
[<0>] ret_from_fork_asm+0x1a/0x30

Fixes: 43806c3d5b9b ("raid10: cleanup memleak at raid10_make_request")
Cc: stable@vger.kernel.org
Signed-off-by: Josh Hunt <johunt@akamai.com>
---
 drivers/md/raid10.c | 36 +++++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 9debb20cf129..184b5b3906d1 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -68,6 +68,7 @@
  */
 
 static void allow_barrier(struct r10conf *conf);
+static void allow_barrier_nowait(struct r10conf *conf);
 static void lower_barrier(struct r10conf *conf);
 static int _enough(struct r10conf *conf, int previous, int ignore);
 static int enough(struct r10conf *conf, int ignore);
@@ -317,7 +318,7 @@ static void reschedule_retry(struct r10bio *r10_bio)
  * operation and are ready to return a success/failure code to the buffer
  * cache layer.
  */
-static void raid_end_bio_io(struct r10bio *r10_bio)
+static void raid_end_bio_io(struct r10bio *r10_bio, bool adjust_pending)
 {
 	struct bio *bio = r10_bio->master_bio;
 	struct r10conf *conf = r10_bio->mddev->private;
@@ -332,7 +333,10 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
 	 * Wake up any possible resync thread that waits for the device
 	 * to go idle.
 	 */
-	allow_barrier(conf);
+	if (adjust_pending)
+		allow_barrier(conf);
+	else
+		allow_barrier_nowait(conf);
 
 	free_r10bio(r10_bio);
 }
@@ -414,7 +418,7 @@ static void raid10_end_read_request(struct bio *bio)
 			uptodate = 1;
 	}
 	if (uptodate) {
-		raid_end_bio_io(r10_bio);
+		raid_end_bio_io(r10_bio, true);
 		rdev_dec_pending(rdev, conf->mddev);
 	} else {
 		/*
@@ -446,7 +450,7 @@ static void one_write_done(struct r10bio *r10_bio)
 			if (test_bit(R10BIO_MadeGood, &r10_bio->state))
 				reschedule_retry(r10_bio);
 			else
-				raid_end_bio_io(r10_bio);
+				raid_end_bio_io(r10_bio, true);
 		}
 	}
 }
@@ -1030,13 +1034,23 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
 	return ret;
 }
 
-static void allow_barrier(struct r10conf *conf)
+static void __allow_barrier(struct r10conf *conf, bool adjust_pending)
 {
-	if ((atomic_dec_and_test(&conf->nr_pending)) ||
+	if ((adjust_pending && atomic_dec_and_test(&conf->nr_pending)) ||
 			(conf->array_freeze_pending))
 		wake_up_barrier(conf);
 }
 
+static void allow_barrier(struct r10conf *conf)
+{
+	__allow_barrier(conf, true);
+}
+
+static void allow_barrier_nowait(struct r10conf *conf)
+{
+	__allow_barrier(conf, false);
+}
+
 static void freeze_array(struct r10conf *conf, int extra)
 {
 	/* stop syncio and normal IO and wait for everything to
@@ -1184,7 +1198,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
 	}
 
 	if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) {
-		raid_end_bio_io(r10_bio);
+		raid_end_bio_io(r10_bio, false);
 		return;
 	}
 
@@ -1195,7 +1209,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
 					    mdname(mddev), b,
 					    (unsigned long long)r10_bio->sector);
 		}
-		raid_end_bio_io(r10_bio);
+		raid_end_bio_io(r10_bio, true);
 		return;
 	}
 	if (err_rdev)
@@ -1372,7 +1386,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
 
 	sectors = r10_bio->sectors;
 	if (!regular_request_wait(mddev, conf, bio, sectors)) {
-		raid_end_bio_io(r10_bio);
+		raid_end_bio_io(r10_bio, false);
 		return;
 	}
 
@@ -2952,7 +2966,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 			if (test_bit(R10BIO_WriteError,
 				     &r10_bio->state))
 				close_write(r10_bio);
-			raid_end_bio_io(r10_bio);
+			raid_end_bio_io(r10_bio, true);
 		}
 	}
 }
@@ -2987,7 +3001,7 @@ static void raid10d(struct md_thread *thread)
 			if (test_bit(R10BIO_WriteError,
 				     &r10_bio->state))
 				close_write(r10_bio);
-			raid_end_bio_io(r10_bio);
+			raid_end_bio_io(r10_bio, true);
 		}
 	}
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] md/raid10: fix deadlock with check operation and nowait requests
  2026-02-10  5:09 [PATCH] md/raid10: fix deadlock with check operation and nowait requests Josh Hunt
@ 2026-02-10 10:17 ` kernel test robot
  2026-02-10 10:18 ` kernel test robot
  2026-02-10 11:14 ` kernel test robot
  2 siblings, 0 replies; 5+ messages in thread
From: kernel test robot @ 2026-02-10 10:17 UTC (permalink / raw)
  To: Josh Hunt, song, yukuai, linan122, linux-raid
  Cc: oe-kbuild-all, ncroxon, Josh Hunt, stable

Hi Josh,

kernel test robot noticed the following build errors:

[auto build test ERROR on linus/master]
[also build test ERROR on v6.19 next-20260209]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Josh-Hunt/md-raid10-fix-deadlock-with-check-operation-and-nowait-requests/20260210-135305
base:   linus/master
patch link:    https://lore.kernel.org/r/20260210050942.3731656-1-johunt%40akamai.com
patch subject: [PATCH] md/raid10: fix deadlock with check operation and nowait requests
config: nios2-allmodconfig (https://download.01.org/0day-ci/archive/20260210/202602101844.0pRyZv4D-lkp@intel.com/config)
compiler: nios2-linux-gcc (GCC) 11.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260210/202602101844.0pRyZv4D-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202602101844.0pRyZv4D-lkp@intel.com/

All errors (new ones prefixed by >>):

   drivers/md/raid10.c: In function 'raid10_read_request':
>> drivers/md/raid10.c:1257:9: error: too few arguments to function 'raid_end_bio_io'
    1257 |         raid_end_bio_io(r10_bio);
         |         ^~~~~~~~~~~~~~~
   drivers/md/raid10.c:321:13: note: declared here
     321 | static void raid_end_bio_io(struct r10bio *r10_bio, bool adjust_pending)
         |             ^~~~~~~~~~~~~~~
   drivers/md/raid10.c: In function 'raid10_write_request':
   drivers/md/raid10.c:1540:9: error: too few arguments to function 'raid_end_bio_io'
    1540 |         raid_end_bio_io(r10_bio);
         |         ^~~~~~~~~~~~~~~
   drivers/md/raid10.c:321:13: note: declared here
     321 | static void raid_end_bio_io(struct r10bio *r10_bio, bool adjust_pending)
         |             ^~~~~~~~~~~~~~~


vim +/raid_end_bio_io +1257 drivers/md/raid10.c

caea3c47ad5152 Guoqing Jiang     2018-12-07  1161  
bb5f1ed70bc3bb Robert LeBlanc    2016-12-05  1162  static void raid10_read_request(struct mddev *mddev, struct bio *bio,
820455238366a7 Yu Kuai           2023-06-22  1163  				struct r10bio *r10_bio, bool io_accounting)
^1da177e4c3f41 Linus Torvalds    2005-04-16  1164  {
e879a8793f915a NeilBrown         2011-10-11  1165  	struct r10conf *conf = mddev->private;
^1da177e4c3f41 Linus Torvalds    2005-04-16  1166  	struct bio *read_bio;
d4432c23be957f NeilBrown         2011-07-28  1167  	int max_sectors;
bb5f1ed70bc3bb Robert LeBlanc    2016-12-05  1168  	struct md_rdev *rdev;
545250f2480911 NeilBrown         2017-04-05  1169  	char b[BDEVNAME_SIZE];
545250f2480911 NeilBrown         2017-04-05  1170  	int slot = r10_bio->read_slot;
545250f2480911 NeilBrown         2017-04-05  1171  	struct md_rdev *err_rdev = NULL;
545250f2480911 NeilBrown         2017-04-05  1172  	gfp_t gfp = GFP_NOIO;
9b622e2bbcf049 Tomasz Majchrzak  2016-07-28  1173  
93decc563637c4 Kevin Vigor       2020-11-06  1174  	if (slot >= 0 && r10_bio->devs[slot].rdev) {
545250f2480911 NeilBrown         2017-04-05  1175  		/*
545250f2480911 NeilBrown         2017-04-05  1176  		 * This is an error retry, but we cannot
545250f2480911 NeilBrown         2017-04-05  1177  		 * safely dereference the rdev in the r10_bio,
545250f2480911 NeilBrown         2017-04-05  1178  		 * we must use the one in conf.
545250f2480911 NeilBrown         2017-04-05  1179  		 * If it has already been disconnected (unlikely)
545250f2480911 NeilBrown         2017-04-05  1180  		 * we lose the device name in error messages.
545250f2480911 NeilBrown         2017-04-05  1181  		 */
545250f2480911 NeilBrown         2017-04-05  1182  		int disk;
545250f2480911 NeilBrown         2017-04-05  1183  		/*
545250f2480911 NeilBrown         2017-04-05  1184  		 * As we are blocking raid10, it is a little safer to
545250f2480911 NeilBrown         2017-04-05  1185  		 * use __GFP_HIGH.
545250f2480911 NeilBrown         2017-04-05  1186  		 */
545250f2480911 NeilBrown         2017-04-05  1187  		gfp = GFP_NOIO | __GFP_HIGH;
545250f2480911 NeilBrown         2017-04-05  1188  
545250f2480911 NeilBrown         2017-04-05  1189  		disk = r10_bio->devs[slot].devnum;
a448af25becf4b Yu Kuai           2023-11-25  1190  		err_rdev = conf->mirrors[disk].rdev;
545250f2480911 NeilBrown         2017-04-05  1191  		if (err_rdev)
900d156bac2bc4 Christoph Hellwig 2022-07-13  1192  			snprintf(b, sizeof(b), "%pg", err_rdev->bdev);
545250f2480911 NeilBrown         2017-04-05  1193  		else {
545250f2480911 NeilBrown         2017-04-05  1194  			strcpy(b, "???");
545250f2480911 NeilBrown         2017-04-05  1195  			/* This never gets dereferenced */
545250f2480911 NeilBrown         2017-04-05  1196  			err_rdev = r10_bio->devs[slot].rdev;
545250f2480911 NeilBrown         2017-04-05  1197  		}
545250f2480911 NeilBrown         2017-04-05  1198  	}
856e08e23762df NeilBrown         2011-07-28  1199  
43806c3d5b9bb7 Nigel Croxon      2025-07-03  1200  	if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) {
4e9814d1943b0e Josh Hunt         2026-02-10  1201  		raid_end_bio_io(r10_bio, false);
c9aa889b035fca Vishal Verma      2021-12-21  1202  		return;
43806c3d5b9bb7 Nigel Croxon      2025-07-03  1203  	}
43806c3d5b9bb7 Nigel Croxon      2025-07-03  1204  
96c3fd1f380237 NeilBrown         2011-12-23  1205  	rdev = read_balance(conf, r10_bio, &max_sectors);
96c3fd1f380237 NeilBrown         2011-12-23  1206  	if (!rdev) {
545250f2480911 NeilBrown         2017-04-05  1207  		if (err_rdev) {
545250f2480911 NeilBrown         2017-04-05  1208  			pr_crit_ratelimited("md/raid10:%s: %s: unrecoverable I/O read error for block %llu\n",
545250f2480911 NeilBrown         2017-04-05  1209  					    mdname(mddev), b,
545250f2480911 NeilBrown         2017-04-05  1210  					    (unsigned long long)r10_bio->sector);
545250f2480911 NeilBrown         2017-04-05  1211  		}
4e9814d1943b0e Josh Hunt         2026-02-10  1212  		raid_end_bio_io(r10_bio, true);
5a7bbad27a4103 Christoph Hellwig 2011-09-12  1213  		return;
^1da177e4c3f41 Linus Torvalds    2005-04-16  1214  	}
545250f2480911 NeilBrown         2017-04-05  1215  	if (err_rdev)
913cce5a1e588e Christoph Hellwig 2022-05-12  1216  		pr_err_ratelimited("md/raid10:%s: %pg: redirecting sector %llu to another mirror\n",
545250f2480911 NeilBrown         2017-04-05  1217  				   mdname(mddev),
913cce5a1e588e Christoph Hellwig 2022-05-12  1218  				   rdev->bdev,
545250f2480911 NeilBrown         2017-04-05  1219  				   (unsigned long long)r10_bio->sector);
fc9977dd069e4f NeilBrown         2017-04-05  1220  	if (max_sectors < bio_sectors(bio)) {
e820d55cb99dd9 Guoqing Jiang     2018-12-19  1221  		allow_barrier(conf);
6fc07785d9b892 Yu Kuai           2025-09-10  1222  		bio = bio_submit_split_bioset(bio, max_sectors,
6fc07785d9b892 Yu Kuai           2025-09-10  1223  					      &conf->bio_split);
c9aa889b035fca Vishal Verma      2021-12-21  1224  		wait_barrier(conf, false);
6fc07785d9b892 Yu Kuai           2025-09-10  1225  		if (!bio) {
6fc07785d9b892 Yu Kuai           2025-09-10  1226  			set_bit(R10BIO_Returned, &r10_bio->state);
4cf58d95290973 John Garry        2024-11-11  1227  			goto err_handle;
4cf58d95290973 John Garry        2024-11-11  1228  		}
22f166218f7313 Yu Kuai           2025-09-10  1229  
fc9977dd069e4f NeilBrown         2017-04-05  1230  		r10_bio->master_bio = bio;
fc9977dd069e4f NeilBrown         2017-04-05  1231  		r10_bio->sectors = max_sectors;
fc9977dd069e4f NeilBrown         2017-04-05  1232  	}
96c3fd1f380237 NeilBrown         2011-12-23  1233  	slot = r10_bio->read_slot;
^1da177e4c3f41 Linus Torvalds    2005-04-16  1234  
820455238366a7 Yu Kuai           2023-06-22  1235  	if (io_accounting) {
820455238366a7 Yu Kuai           2023-06-22  1236  		md_account_bio(mddev, &bio);
820455238366a7 Yu Kuai           2023-06-22  1237  		r10_bio->master_bio = bio;
820455238366a7 Yu Kuai           2023-06-22  1238  	}
abfc426d1b2fb2 Christoph Hellwig 2022-02-02  1239  	read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set);
5fa31c49928139 Zheng Qixing      2025-07-02  1240  	read_bio->bi_opf &= ~REQ_NOWAIT;
^1da177e4c3f41 Linus Torvalds    2005-04-16  1241  
^1da177e4c3f41 Linus Torvalds    2005-04-16  1242  	r10_bio->devs[slot].bio = read_bio;
abbf098e6e1e23 NeilBrown         2011-12-23  1243  	r10_bio->devs[slot].rdev = rdev;
^1da177e4c3f41 Linus Torvalds    2005-04-16  1244  
4f024f3797c43c Kent Overstreet   2013-10-11  1245  	read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
f8c9e74ff0832f NeilBrown         2012-05-21  1246  		choose_data_offset(r10_bio, rdev);
^1da177e4c3f41 Linus Torvalds    2005-04-16  1247  	read_bio->bi_end_io = raid10_end_read_request;
8d3ca83dcf9ca3 NeilBrown         2016-11-18  1248  	if (test_bit(FailFast, &rdev->flags) &&
8d3ca83dcf9ca3 NeilBrown         2016-11-18  1249  	    test_bit(R10BIO_FailFast, &r10_bio->state))
8d3ca83dcf9ca3 NeilBrown         2016-11-18  1250  	        read_bio->bi_opf |= MD_FAILFAST;
^1da177e4c3f41 Linus Torvalds    2005-04-16  1251  	read_bio->bi_private = r10_bio;
c396b90e502691 Christoph Hellwig 2024-03-03  1252  	mddev_trace_remap(mddev, read_bio, r10_bio->sector);
ed00aabd5eb9fb Christoph Hellwig 2020-07-01  1253  	submit_bio_noacct(read_bio);
5a7bbad27a4103 Christoph Hellwig 2011-09-12  1254  	return;
4cf58d95290973 John Garry        2024-11-11  1255  err_handle:
4cf58d95290973 John Garry        2024-11-11  1256  	atomic_dec(&rdev->nr_pending);
4cf58d95290973 John Garry        2024-11-11 @1257  	raid_end_bio_io(r10_bio);
^1da177e4c3f41 Linus Torvalds    2005-04-16  1258  }
^1da177e4c3f41 Linus Torvalds    2005-04-16  1259  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] md/raid10: fix deadlock with check operation and nowait requests
  2026-02-10  5:09 [PATCH] md/raid10: fix deadlock with check operation and nowait requests Josh Hunt
  2026-02-10 10:17 ` kernel test robot
@ 2026-02-10 10:18 ` kernel test robot
  2026-02-10 11:14 ` kernel test robot
  2 siblings, 0 replies; 5+ messages in thread
From: kernel test robot @ 2026-02-10 10:18 UTC (permalink / raw)
  To: Josh Hunt, song, yukuai, linan122, linux-raid
  Cc: oe-kbuild-all, ncroxon, Josh Hunt, stable

Hi Josh,

kernel test robot noticed the following build errors:

[auto build test ERROR on linus/master]
[also build test ERROR on v6.19 next-20260209]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Josh-Hunt/md-raid10-fix-deadlock-with-check-operation-and-nowait-requests/20260210-135305
base:   linus/master
patch link:    https://lore.kernel.org/r/20260210050942.3731656-1-johunt%40akamai.com
patch subject: [PATCH] md/raid10: fix deadlock with check operation and nowait requests
config: m68k-defconfig (https://download.01.org/0day-ci/archive/20260210/202602101850.DC3BeMD5-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 15.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260210/202602101850.DC3BeMD5-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202602101850.DC3BeMD5-lkp@intel.com/

All errors (new ones prefixed by >>):

   drivers/md/raid10.c: In function 'raid10_read_request':
>> drivers/md/raid10.c:1257:9: error: too few arguments to function 'raid_end_bio_io'; expected 2, have 1
    1257 |         raid_end_bio_io(r10_bio);
         |         ^~~~~~~~~~~~~~~
   drivers/md/raid10.c:321:13: note: declared here
     321 | static void raid_end_bio_io(struct r10bio *r10_bio, bool adjust_pending)
         |             ^~~~~~~~~~~~~~~
   drivers/md/raid10.c: In function 'raid10_write_request':
   drivers/md/raid10.c:1540:9: error: too few arguments to function 'raid_end_bio_io'; expected 2, have 1
    1540 |         raid_end_bio_io(r10_bio);
         |         ^~~~~~~~~~~~~~~
   drivers/md/raid10.c:321:13: note: declared here
     321 | static void raid_end_bio_io(struct r10bio *r10_bio, bool adjust_pending)
         |             ^~~~~~~~~~~~~~~


vim +/raid_end_bio_io +1257 drivers/md/raid10.c

caea3c47ad5152 Guoqing Jiang     2018-12-07  1161  
bb5f1ed70bc3bb Robert LeBlanc    2016-12-05  1162  static void raid10_read_request(struct mddev *mddev, struct bio *bio,
820455238366a7 Yu Kuai           2023-06-22  1163  				struct r10bio *r10_bio, bool io_accounting)
^1da177e4c3f41 Linus Torvalds    2005-04-16  1164  {
e879a8793f915a NeilBrown         2011-10-11  1165  	struct r10conf *conf = mddev->private;
^1da177e4c3f41 Linus Torvalds    2005-04-16  1166  	struct bio *read_bio;
d4432c23be957f NeilBrown         2011-07-28  1167  	int max_sectors;
bb5f1ed70bc3bb Robert LeBlanc    2016-12-05  1168  	struct md_rdev *rdev;
545250f2480911 NeilBrown         2017-04-05  1169  	char b[BDEVNAME_SIZE];
545250f2480911 NeilBrown         2017-04-05  1170  	int slot = r10_bio->read_slot;
545250f2480911 NeilBrown         2017-04-05  1171  	struct md_rdev *err_rdev = NULL;
545250f2480911 NeilBrown         2017-04-05  1172  	gfp_t gfp = GFP_NOIO;
9b622e2bbcf049 Tomasz Majchrzak  2016-07-28  1173  
93decc563637c4 Kevin Vigor       2020-11-06  1174  	if (slot >= 0 && r10_bio->devs[slot].rdev) {
545250f2480911 NeilBrown         2017-04-05  1175  		/*
545250f2480911 NeilBrown         2017-04-05  1176  		 * This is an error retry, but we cannot
545250f2480911 NeilBrown         2017-04-05  1177  		 * safely dereference the rdev in the r10_bio,
545250f2480911 NeilBrown         2017-04-05  1178  		 * we must use the one in conf.
545250f2480911 NeilBrown         2017-04-05  1179  		 * If it has already been disconnected (unlikely)
545250f2480911 NeilBrown         2017-04-05  1180  		 * we lose the device name in error messages.
545250f2480911 NeilBrown         2017-04-05  1181  		 */
545250f2480911 NeilBrown         2017-04-05  1182  		int disk;
545250f2480911 NeilBrown         2017-04-05  1183  		/*
545250f2480911 NeilBrown         2017-04-05  1184  		 * As we are blocking raid10, it is a little safer to
545250f2480911 NeilBrown         2017-04-05  1185  		 * use __GFP_HIGH.
545250f2480911 NeilBrown         2017-04-05  1186  		 */
545250f2480911 NeilBrown         2017-04-05  1187  		gfp = GFP_NOIO | __GFP_HIGH;
545250f2480911 NeilBrown         2017-04-05  1188  
545250f2480911 NeilBrown         2017-04-05  1189  		disk = r10_bio->devs[slot].devnum;
a448af25becf4b Yu Kuai           2023-11-25  1190  		err_rdev = conf->mirrors[disk].rdev;
545250f2480911 NeilBrown         2017-04-05  1191  		if (err_rdev)
900d156bac2bc4 Christoph Hellwig 2022-07-13  1192  			snprintf(b, sizeof(b), "%pg", err_rdev->bdev);
545250f2480911 NeilBrown         2017-04-05  1193  		else {
545250f2480911 NeilBrown         2017-04-05  1194  			strcpy(b, "???");
545250f2480911 NeilBrown         2017-04-05  1195  			/* This never gets dereferenced */
545250f2480911 NeilBrown         2017-04-05  1196  			err_rdev = r10_bio->devs[slot].rdev;
545250f2480911 NeilBrown         2017-04-05  1197  		}
545250f2480911 NeilBrown         2017-04-05  1198  	}
856e08e23762df NeilBrown         2011-07-28  1199  
43806c3d5b9bb7 Nigel Croxon      2025-07-03  1200  	if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) {
4e9814d1943b0e Josh Hunt         2026-02-10  1201  		raid_end_bio_io(r10_bio, false);
c9aa889b035fca Vishal Verma      2021-12-21  1202  		return;
43806c3d5b9bb7 Nigel Croxon      2025-07-03  1203  	}
43806c3d5b9bb7 Nigel Croxon      2025-07-03  1204  
96c3fd1f380237 NeilBrown         2011-12-23  1205  	rdev = read_balance(conf, r10_bio, &max_sectors);
96c3fd1f380237 NeilBrown         2011-12-23  1206  	if (!rdev) {
545250f2480911 NeilBrown         2017-04-05  1207  		if (err_rdev) {
545250f2480911 NeilBrown         2017-04-05  1208  			pr_crit_ratelimited("md/raid10:%s: %s: unrecoverable I/O read error for block %llu\n",
545250f2480911 NeilBrown         2017-04-05  1209  					    mdname(mddev), b,
545250f2480911 NeilBrown         2017-04-05  1210  					    (unsigned long long)r10_bio->sector);
545250f2480911 NeilBrown         2017-04-05  1211  		}
4e9814d1943b0e Josh Hunt         2026-02-10  1212  		raid_end_bio_io(r10_bio, true);
5a7bbad27a4103 Christoph Hellwig 2011-09-12  1213  		return;
^1da177e4c3f41 Linus Torvalds    2005-04-16  1214  	}
545250f2480911 NeilBrown         2017-04-05  1215  	if (err_rdev)
913cce5a1e588e Christoph Hellwig 2022-05-12  1216  		pr_err_ratelimited("md/raid10:%s: %pg: redirecting sector %llu to another mirror\n",
545250f2480911 NeilBrown         2017-04-05  1217  				   mdname(mddev),
913cce5a1e588e Christoph Hellwig 2022-05-12  1218  				   rdev->bdev,
545250f2480911 NeilBrown         2017-04-05  1219  				   (unsigned long long)r10_bio->sector);
fc9977dd069e4f NeilBrown         2017-04-05  1220  	if (max_sectors < bio_sectors(bio)) {
e820d55cb99dd9 Guoqing Jiang     2018-12-19  1221  		allow_barrier(conf);
6fc07785d9b892 Yu Kuai           2025-09-10  1222  		bio = bio_submit_split_bioset(bio, max_sectors,
6fc07785d9b892 Yu Kuai           2025-09-10  1223  					      &conf->bio_split);
c9aa889b035fca Vishal Verma      2021-12-21  1224  		wait_barrier(conf, false);
6fc07785d9b892 Yu Kuai           2025-09-10  1225  		if (!bio) {
6fc07785d9b892 Yu Kuai           2025-09-10  1226  			set_bit(R10BIO_Returned, &r10_bio->state);
4cf58d95290973 John Garry        2024-11-11  1227  			goto err_handle;
4cf58d95290973 John Garry        2024-11-11  1228  		}
22f166218f7313 Yu Kuai           2025-09-10  1229  
fc9977dd069e4f NeilBrown         2017-04-05  1230  		r10_bio->master_bio = bio;
fc9977dd069e4f NeilBrown         2017-04-05  1231  		r10_bio->sectors = max_sectors;
fc9977dd069e4f NeilBrown         2017-04-05  1232  	}
96c3fd1f380237 NeilBrown         2011-12-23  1233  	slot = r10_bio->read_slot;
^1da177e4c3f41 Linus Torvalds    2005-04-16  1234  
820455238366a7 Yu Kuai           2023-06-22  1235  	if (io_accounting) {
820455238366a7 Yu Kuai           2023-06-22  1236  		md_account_bio(mddev, &bio);
820455238366a7 Yu Kuai           2023-06-22  1237  		r10_bio->master_bio = bio;
820455238366a7 Yu Kuai           2023-06-22  1238  	}
abfc426d1b2fb2 Christoph Hellwig 2022-02-02  1239  	read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set);
5fa31c49928139 Zheng Qixing      2025-07-02  1240  	read_bio->bi_opf &= ~REQ_NOWAIT;
^1da177e4c3f41 Linus Torvalds    2005-04-16  1241  
^1da177e4c3f41 Linus Torvalds    2005-04-16  1242  	r10_bio->devs[slot].bio = read_bio;
abbf098e6e1e23 NeilBrown         2011-12-23  1243  	r10_bio->devs[slot].rdev = rdev;
^1da177e4c3f41 Linus Torvalds    2005-04-16  1244  
4f024f3797c43c Kent Overstreet   2013-10-11  1245  	read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
f8c9e74ff0832f NeilBrown         2012-05-21  1246  		choose_data_offset(r10_bio, rdev);
^1da177e4c3f41 Linus Torvalds    2005-04-16  1247  	read_bio->bi_end_io = raid10_end_read_request;
8d3ca83dcf9ca3 NeilBrown         2016-11-18  1248  	if (test_bit(FailFast, &rdev->flags) &&
8d3ca83dcf9ca3 NeilBrown         2016-11-18  1249  	    test_bit(R10BIO_FailFast, &r10_bio->state))
8d3ca83dcf9ca3 NeilBrown         2016-11-18  1250  	        read_bio->bi_opf |= MD_FAILFAST;
^1da177e4c3f41 Linus Torvalds    2005-04-16  1251  	read_bio->bi_private = r10_bio;
c396b90e502691 Christoph Hellwig 2024-03-03  1252  	mddev_trace_remap(mddev, read_bio, r10_bio->sector);
ed00aabd5eb9fb Christoph Hellwig 2020-07-01  1253  	submit_bio_noacct(read_bio);
5a7bbad27a4103 Christoph Hellwig 2011-09-12  1254  	return;
4cf58d95290973 John Garry        2024-11-11  1255  err_handle:
4cf58d95290973 John Garry        2024-11-11  1256  	atomic_dec(&rdev->nr_pending);
4cf58d95290973 John Garry        2024-11-11 @1257  	raid_end_bio_io(r10_bio);
^1da177e4c3f41 Linus Torvalds    2005-04-16  1258  }
^1da177e4c3f41 Linus Torvalds    2005-04-16  1259  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] md/raid10: fix deadlock with check operation and nowait requests
  2026-02-10  5:09 [PATCH] md/raid10: fix deadlock with check operation and nowait requests Josh Hunt
  2026-02-10 10:17 ` kernel test robot
  2026-02-10 10:18 ` kernel test robot
@ 2026-02-10 11:14 ` kernel test robot
  2026-02-10 11:56   ` Josh Hunt
  2 siblings, 1 reply; 5+ messages in thread
From: kernel test robot @ 2026-02-10 11:14 UTC (permalink / raw)
  To: Josh Hunt, song, yukuai, linan122, linux-raid
  Cc: oe-kbuild-all, ncroxon, Josh Hunt, stable

Hi Josh,

kernel test robot noticed the following build errors:

[auto build test ERROR on linus/master]
[also build test ERROR on v6.19 next-20260209]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Josh-Hunt/md-raid10-fix-deadlock-with-check-operation-and-nowait-requests/20260210-135305
base:   linus/master
patch link:    https://lore.kernel.org/r/20260210050942.3731656-1-johunt%40akamai.com
patch subject: [PATCH] md/raid10: fix deadlock with check operation and nowait requests
config: x86_64-rhel-9.4 (https://download.01.org/0day-ci/archive/20260210/202602101220.J4BofeDD-lkp@intel.com/config)
compiler: gcc-14 (Debian 14.2.0-19) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260210/202602101220.J4BofeDD-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202602101220.J4BofeDD-lkp@intel.com/

All errors (new ones prefixed by >>):

   drivers/md/raid10.c: In function 'raid10_read_request':
>> drivers/md/raid10.c:1257:9: error: too few arguments to function 'raid_end_bio_io'
    1257 |         raid_end_bio_io(r10_bio);
         |         ^~~~~~~~~~~~~~~
   drivers/md/raid10.c:321:13: note: declared here
     321 | static void raid_end_bio_io(struct r10bio *r10_bio, bool adjust_pending)
         |             ^~~~~~~~~~~~~~~
   drivers/md/raid10.c: In function 'raid10_write_request':
   drivers/md/raid10.c:1540:9: error: too few arguments to function 'raid_end_bio_io'
    1540 |         raid_end_bio_io(r10_bio);
         |         ^~~~~~~~~~~~~~~
   drivers/md/raid10.c:321:13: note: declared here
     321 | static void raid_end_bio_io(struct r10bio *r10_bio, bool adjust_pending)
         |             ^~~~~~~~~~~~~~~


vim +/raid_end_bio_io +1257 drivers/md/raid10.c

caea3c47ad5152 Guoqing Jiang     2018-12-07  1161  
bb5f1ed70bc3bb Robert LeBlanc    2016-12-05  1162  static void raid10_read_request(struct mddev *mddev, struct bio *bio,
820455238366a7 Yu Kuai           2023-06-22  1163  				struct r10bio *r10_bio, bool io_accounting)
^1da177e4c3f41 Linus Torvalds    2005-04-16  1164  {
e879a8793f915a NeilBrown         2011-10-11  1165  	struct r10conf *conf = mddev->private;
^1da177e4c3f41 Linus Torvalds    2005-04-16  1166  	struct bio *read_bio;
d4432c23be957f NeilBrown         2011-07-28  1167  	int max_sectors;
bb5f1ed70bc3bb Robert LeBlanc    2016-12-05  1168  	struct md_rdev *rdev;
545250f2480911 NeilBrown         2017-04-05  1169  	char b[BDEVNAME_SIZE];
545250f2480911 NeilBrown         2017-04-05  1170  	int slot = r10_bio->read_slot;
545250f2480911 NeilBrown         2017-04-05  1171  	struct md_rdev *err_rdev = NULL;
545250f2480911 NeilBrown         2017-04-05  1172  	gfp_t gfp = GFP_NOIO;
9b622e2bbcf049 Tomasz Majchrzak  2016-07-28  1173  
93decc563637c4 Kevin Vigor       2020-11-06  1174  	if (slot >= 0 && r10_bio->devs[slot].rdev) {
545250f2480911 NeilBrown         2017-04-05  1175  		/*
545250f2480911 NeilBrown         2017-04-05  1176  		 * This is an error retry, but we cannot
545250f2480911 NeilBrown         2017-04-05  1177  		 * safely dereference the rdev in the r10_bio,
545250f2480911 NeilBrown         2017-04-05  1178  		 * we must use the one in conf.
545250f2480911 NeilBrown         2017-04-05  1179  		 * If it has already been disconnected (unlikely)
545250f2480911 NeilBrown         2017-04-05  1180  		 * we lose the device name in error messages.
545250f2480911 NeilBrown         2017-04-05  1181  		 */
545250f2480911 NeilBrown         2017-04-05  1182  		int disk;
545250f2480911 NeilBrown         2017-04-05  1183  		/*
545250f2480911 NeilBrown         2017-04-05  1184  		 * As we are blocking raid10, it is a little safer to
545250f2480911 NeilBrown         2017-04-05  1185  		 * use __GFP_HIGH.
545250f2480911 NeilBrown         2017-04-05  1186  		 */
545250f2480911 NeilBrown         2017-04-05  1187  		gfp = GFP_NOIO | __GFP_HIGH;
545250f2480911 NeilBrown         2017-04-05  1188  
545250f2480911 NeilBrown         2017-04-05  1189  		disk = r10_bio->devs[slot].devnum;
a448af25becf4b Yu Kuai           2023-11-25  1190  		err_rdev = conf->mirrors[disk].rdev;
545250f2480911 NeilBrown         2017-04-05  1191  		if (err_rdev)
900d156bac2bc4 Christoph Hellwig 2022-07-13  1192  			snprintf(b, sizeof(b), "%pg", err_rdev->bdev);
545250f2480911 NeilBrown         2017-04-05  1193  		else {
545250f2480911 NeilBrown         2017-04-05  1194  			strcpy(b, "???");
545250f2480911 NeilBrown         2017-04-05  1195  			/* This never gets dereferenced */
545250f2480911 NeilBrown         2017-04-05  1196  			err_rdev = r10_bio->devs[slot].rdev;
545250f2480911 NeilBrown         2017-04-05  1197  		}
545250f2480911 NeilBrown         2017-04-05  1198  	}
856e08e23762df NeilBrown         2011-07-28  1199  
43806c3d5b9bb7 Nigel Croxon      2025-07-03  1200  	if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) {
4e9814d1943b0e Josh Hunt         2026-02-10  1201  		raid_end_bio_io(r10_bio, false);
c9aa889b035fca Vishal Verma      2021-12-21  1202  		return;
43806c3d5b9bb7 Nigel Croxon      2025-07-03  1203  	}
43806c3d5b9bb7 Nigel Croxon      2025-07-03  1204  
96c3fd1f380237 NeilBrown         2011-12-23  1205  	rdev = read_balance(conf, r10_bio, &max_sectors);
96c3fd1f380237 NeilBrown         2011-12-23  1206  	if (!rdev) {
545250f2480911 NeilBrown         2017-04-05  1207  		if (err_rdev) {
545250f2480911 NeilBrown         2017-04-05  1208  			pr_crit_ratelimited("md/raid10:%s: %s: unrecoverable I/O read error for block %llu\n",
545250f2480911 NeilBrown         2017-04-05  1209  					    mdname(mddev), b,
545250f2480911 NeilBrown         2017-04-05  1210  					    (unsigned long long)r10_bio->sector);
545250f2480911 NeilBrown         2017-04-05  1211  		}
4e9814d1943b0e Josh Hunt         2026-02-10  1212  		raid_end_bio_io(r10_bio, true);
5a7bbad27a4103 Christoph Hellwig 2011-09-12  1213  		return;
^1da177e4c3f41 Linus Torvalds    2005-04-16  1214  	}
545250f2480911 NeilBrown         2017-04-05  1215  	if (err_rdev)
913cce5a1e588e Christoph Hellwig 2022-05-12  1216  		pr_err_ratelimited("md/raid10:%s: %pg: redirecting sector %llu to another mirror\n",
545250f2480911 NeilBrown         2017-04-05  1217  				   mdname(mddev),
913cce5a1e588e Christoph Hellwig 2022-05-12  1218  				   rdev->bdev,
545250f2480911 NeilBrown         2017-04-05  1219  				   (unsigned long long)r10_bio->sector);
fc9977dd069e4f NeilBrown         2017-04-05  1220  	if (max_sectors < bio_sectors(bio)) {
e820d55cb99dd9 Guoqing Jiang     2018-12-19  1221  		allow_barrier(conf);
6fc07785d9b892 Yu Kuai           2025-09-10  1222  		bio = bio_submit_split_bioset(bio, max_sectors,
6fc07785d9b892 Yu Kuai           2025-09-10  1223  					      &conf->bio_split);
c9aa889b035fca Vishal Verma      2021-12-21  1224  		wait_barrier(conf, false);
6fc07785d9b892 Yu Kuai           2025-09-10  1225  		if (!bio) {
6fc07785d9b892 Yu Kuai           2025-09-10  1226  			set_bit(R10BIO_Returned, &r10_bio->state);
4cf58d95290973 John Garry        2024-11-11  1227  			goto err_handle;
4cf58d95290973 John Garry        2024-11-11  1228  		}
22f166218f7313 Yu Kuai           2025-09-10  1229  
fc9977dd069e4f NeilBrown         2017-04-05  1230  		r10_bio->master_bio = bio;
fc9977dd069e4f NeilBrown         2017-04-05  1231  		r10_bio->sectors = max_sectors;
fc9977dd069e4f NeilBrown         2017-04-05  1232  	}
96c3fd1f380237 NeilBrown         2011-12-23  1233  	slot = r10_bio->read_slot;
^1da177e4c3f41 Linus Torvalds    2005-04-16  1234  
820455238366a7 Yu Kuai           2023-06-22  1235  	if (io_accounting) {
820455238366a7 Yu Kuai           2023-06-22  1236  		md_account_bio(mddev, &bio);
820455238366a7 Yu Kuai           2023-06-22  1237  		r10_bio->master_bio = bio;
820455238366a7 Yu Kuai           2023-06-22  1238  	}
abfc426d1b2fb2 Christoph Hellwig 2022-02-02  1239  	read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set);
5fa31c49928139 Zheng Qixing      2025-07-02  1240  	read_bio->bi_opf &= ~REQ_NOWAIT;
^1da177e4c3f41 Linus Torvalds    2005-04-16  1241  
^1da177e4c3f41 Linus Torvalds    2005-04-16  1242  	r10_bio->devs[slot].bio = read_bio;
abbf098e6e1e23 NeilBrown         2011-12-23  1243  	r10_bio->devs[slot].rdev = rdev;
^1da177e4c3f41 Linus Torvalds    2005-04-16  1244  
4f024f3797c43c Kent Overstreet   2013-10-11  1245  	read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
f8c9e74ff0832f NeilBrown         2012-05-21  1246  		choose_data_offset(r10_bio, rdev);
^1da177e4c3f41 Linus Torvalds    2005-04-16  1247  	read_bio->bi_end_io = raid10_end_read_request;
8d3ca83dcf9ca3 NeilBrown         2016-11-18  1248  	if (test_bit(FailFast, &rdev->flags) &&
8d3ca83dcf9ca3 NeilBrown         2016-11-18  1249  	    test_bit(R10BIO_FailFast, &r10_bio->state))
8d3ca83dcf9ca3 NeilBrown         2016-11-18  1250  	        read_bio->bi_opf |= MD_FAILFAST;
^1da177e4c3f41 Linus Torvalds    2005-04-16  1251  	read_bio->bi_private = r10_bio;
c396b90e502691 Christoph Hellwig 2024-03-03  1252  	mddev_trace_remap(mddev, read_bio, r10_bio->sector);
ed00aabd5eb9fb Christoph Hellwig 2020-07-01  1253  	submit_bio_noacct(read_bio);
5a7bbad27a4103 Christoph Hellwig 2011-09-12  1254  	return;
4cf58d95290973 John Garry        2024-11-11  1255  err_handle:
4cf58d95290973 John Garry        2024-11-11  1256  	atomic_dec(&rdev->nr_pending);
4cf58d95290973 John Garry        2024-11-11 @1257  	raid_end_bio_io(r10_bio);
^1da177e4c3f41 Linus Torvalds    2005-04-16  1258  }
^1da177e4c3f41 Linus Torvalds    2005-04-16  1259  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] md/raid10: fix deadlock with check operation and nowait requests
  2026-02-10 11:14 ` kernel test robot
@ 2026-02-10 11:56   ` Josh Hunt
  0 siblings, 0 replies; 5+ messages in thread
From: Josh Hunt @ 2026-02-10 11:56 UTC (permalink / raw)
  To: kernel test robot, song, yukuai, linan122, linux-raid
  Cc: oe-kbuild-all, ncroxon, stable

On 2/10/26 3:14 AM, kernel test robot wrote:
> !-------------------------------------------------------------------|
>    This Message Is From an External Sender
>    This message came from outside your organization.
> |-------------------------------------------------------------------!
> 
> Hi Josh,
> 
> kernel test robot noticed the following build errors:
> 
> [auto build test ERROR on linus/master]
> [also build test ERROR on v6.19 next-20260209]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://urldefense.com/v3/__https://git-scm.com/docs/git-format-patch*_base_tree_information__;Iw!!GjvTz_vk!QT9JklLw7YgSInuzDS_0EDSnJkZTTG057ef2CiWO2UwIN0aUk9RGNuVtQ6XCy8sTbfadURM$ ]
> 
> url:    https://urldefense.com/v3/__https://github.com/intel-lab-lkp/linux/commits/Josh-Hunt/md-raid10-fix-deadlock-with-check-operation-and-nowait-requests/20260210-135305__;!!GjvTz_vk!QT9JklLw7YgSInuzDS_0EDSnJkZTTG057ef2CiWO2UwIN0aUk9RGNuVtQ6XCy8sTilUfWlw$
> base:   linus/master
> patch link:    https://urldefense.com/v3/__https://lore.kernel.org/r/20260210050942.3731656-1-johunt*40akamai.com__;JQ!!GjvTz_vk!QT9JklLw7YgSInuzDS_0EDSnJkZTTG057ef2CiWO2UwIN0aUk9RGNuVtQ6XCy8sT0QqCm3Y$
> patch subject: [PATCH] md/raid10: fix deadlock with check operation and nowait requests
> config: x86_64-rhel-9.4 (https://urldefense.com/v3/__https://download.01.org/0day-ci/archive/20260210/202602101220.J4BofeDD-lkp@intel.com/config__;!!GjvTz_vk!QT9JklLw7YgSInuzDS_0EDSnJkZTTG057ef2CiWO2UwIN0aUk9RGNuVtQ6XCy8sTZp6wg4c$ )
> compiler: gcc-14 (Debian 14.2.0-19) 14.2.0
> reproduce (this is a W=1 build): (https://urldefense.com/v3/__https://download.01.org/0day-ci/archive/20260210/202602101220.J4BofeDD-lkp@intel.com/reproduce__;!!GjvTz_vk!QT9JklLw7YgSInuzDS_0EDSnJkZTTG057ef2CiWO2UwIN0aUk9RGNuVtQ6XCy8sTFU3cM60$ )
> 
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes: https://urldefense.com/v3/__https://lore.kernel.org/oe-kbuild-all/202602101220.J4BofeDD-lkp@intel.com/__;!!GjvTz_vk!QT9JklLw7YgSInuzDS_0EDSnJkZTTG057ef2CiWO2UwIN0aUk9RGNuVtQ6XCy8sTUPVdYDI$
> 
> All errors (new ones prefixed by >>):
> 
>     drivers/md/raid10.c: In function 'raid10_read_request':
>>> drivers/md/raid10.c:1257:9: error: too few arguments to function 'raid_end_bio_io'
>      1257 |         raid_end_bio_io(r10_bio);
>           |         ^~~~~~~~~~~~~~~
>     drivers/md/raid10.c:321:13: note: declared here
>       321 | static void raid_end_bio_io(struct r10bio *r10_bio, bool adjust_pending)
>           |             ^~~~~~~~~~~~~~~
>     drivers/md/raid10.c: In function 'raid10_write_request':
>     drivers/md/raid10.c:1540:9: error: too few arguments to function 'raid_end_bio_io'
>      1540 |         raid_end_bio_io(r10_bio);
>           |         ^~~~~~~~~~~~~~~
>     drivers/md/raid10.c:321:13: note: declared here
>       321 | static void raid_end_bio_io(struct r10bio *r10_bio, bool adjust_pending)
>           |             ^~~~~~~~~~~~~~~
> 
> 
> vim +/raid_end_bio_io +1257 drivers/md/raid10.c
> 
> caea3c47ad5152 Guoqing Jiang     2018-12-07  1161
> bb5f1ed70bc3bb Robert LeBlanc    2016-12-05  1162  static void raid10_read_request(struct mddev *mddev, struct bio *bio,
> 820455238366a7 Yu Kuai           2023-06-22  1163  				struct r10bio *r10_bio, bool io_accounting)
> ^1da177e4c3f41 Linus Torvalds    2005-04-16  1164  {
> e879a8793f915a NeilBrown         2011-10-11  1165  	struct r10conf *conf = mddev->private;
> ^1da177e4c3f41 Linus Torvalds    2005-04-16  1166  	struct bio *read_bio;
> d4432c23be957f NeilBrown         2011-07-28  1167  	int max_sectors;
> bb5f1ed70bc3bb Robert LeBlanc    2016-12-05  1168  	struct md_rdev *rdev;
> 545250f2480911 NeilBrown         2017-04-05  1169  	char b[BDEVNAME_SIZE];
> 545250f2480911 NeilBrown         2017-04-05  1170  	int slot = r10_bio->read_slot;
> 545250f2480911 NeilBrown         2017-04-05  1171  	struct md_rdev *err_rdev = NULL;
> 545250f2480911 NeilBrown         2017-04-05  1172  	gfp_t gfp = GFP_NOIO;
> 9b622e2bbcf049 Tomasz Majchrzak  2016-07-28  1173
> 93decc563637c4 Kevin Vigor       2020-11-06  1174  	if (slot >= 0 && r10_bio->devs[slot].rdev) {
> 545250f2480911 NeilBrown         2017-04-05  1175  		/*
> 545250f2480911 NeilBrown         2017-04-05  1176  		 * This is an error retry, but we cannot
> 545250f2480911 NeilBrown         2017-04-05  1177  		 * safely dereference the rdev in the r10_bio,
> 545250f2480911 NeilBrown         2017-04-05  1178  		 * we must use the one in conf.
> 545250f2480911 NeilBrown         2017-04-05  1179  		 * If it has already been disconnected (unlikely)
> 545250f2480911 NeilBrown         2017-04-05  1180  		 * we lose the device name in error messages.
> 545250f2480911 NeilBrown         2017-04-05  1181  		 */
> 545250f2480911 NeilBrown         2017-04-05  1182  		int disk;
> 545250f2480911 NeilBrown         2017-04-05  1183  		/*
> 545250f2480911 NeilBrown         2017-04-05  1184  		 * As we are blocking raid10, it is a little safer to
> 545250f2480911 NeilBrown         2017-04-05  1185  		 * use __GFP_HIGH.
> 545250f2480911 NeilBrown         2017-04-05  1186  		 */
> 545250f2480911 NeilBrown         2017-04-05  1187  		gfp = GFP_NOIO | __GFP_HIGH;
> 545250f2480911 NeilBrown         2017-04-05  1188
> 545250f2480911 NeilBrown         2017-04-05  1189  		disk = r10_bio->devs[slot].devnum;
> a448af25becf4b Yu Kuai           2023-11-25  1190  		err_rdev = conf->mirrors[disk].rdev;
> 545250f2480911 NeilBrown         2017-04-05  1191  		if (err_rdev)
> 900d156bac2bc4 Christoph Hellwig 2022-07-13  1192  			snprintf(b, sizeof(b), "%pg", err_rdev->bdev);
> 545250f2480911 NeilBrown         2017-04-05  1193  		else {
> 545250f2480911 NeilBrown         2017-04-05  1194  			strcpy(b, "???");
> 545250f2480911 NeilBrown         2017-04-05  1195  			/* This never gets dereferenced */
> 545250f2480911 NeilBrown         2017-04-05  1196  			err_rdev = r10_bio->devs[slot].rdev;
> 545250f2480911 NeilBrown         2017-04-05  1197  		}
> 545250f2480911 NeilBrown         2017-04-05  1198  	}
> 856e08e23762df NeilBrown         2011-07-28  1199
> 43806c3d5b9bb7 Nigel Croxon      2025-07-03  1200  	if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) {
> 4e9814d1943b0e Josh Hunt         2026-02-10  1201  		raid_end_bio_io(r10_bio, false);
> c9aa889b035fca Vishal Verma      2021-12-21  1202  		return;
> 43806c3d5b9bb7 Nigel Croxon      2025-07-03  1203  	}
> 43806c3d5b9bb7 Nigel Croxon      2025-07-03  1204
> 96c3fd1f380237 NeilBrown         2011-12-23  1205  	rdev = read_balance(conf, r10_bio, &max_sectors);
> 96c3fd1f380237 NeilBrown         2011-12-23  1206  	if (!rdev) {
> 545250f2480911 NeilBrown         2017-04-05  1207  		if (err_rdev) {
> 545250f2480911 NeilBrown         2017-04-05  1208  			pr_crit_ratelimited("md/raid10:%s: %s: unrecoverable I/O read error for block %llu\n",
> 545250f2480911 NeilBrown         2017-04-05  1209  					    mdname(mddev), b,
> 545250f2480911 NeilBrown         2017-04-05  1210  					    (unsigned long long)r10_bio->sector);
> 545250f2480911 NeilBrown         2017-04-05  1211  		}
> 4e9814d1943b0e Josh Hunt         2026-02-10  1212  		raid_end_bio_io(r10_bio, true);
> 5a7bbad27a4103 Christoph Hellwig 2011-09-12  1213  		return;
> ^1da177e4c3f41 Linus Torvalds    2005-04-16  1214  	}
> 545250f2480911 NeilBrown         2017-04-05  1215  	if (err_rdev)
> 913cce5a1e588e Christoph Hellwig 2022-05-12  1216  		pr_err_ratelimited("md/raid10:%s: %pg: redirecting sector %llu to another mirror\n",
> 545250f2480911 NeilBrown         2017-04-05  1217  				   mdname(mddev),
> 913cce5a1e588e Christoph Hellwig 2022-05-12  1218  				   rdev->bdev,
> 545250f2480911 NeilBrown         2017-04-05  1219  				   (unsigned long long)r10_bio->sector);
> fc9977dd069e4f NeilBrown         2017-04-05  1220  	if (max_sectors < bio_sectors(bio)) {
> e820d55cb99dd9 Guoqing Jiang     2018-12-19  1221  		allow_barrier(conf);
> 6fc07785d9b892 Yu Kuai           2025-09-10  1222  		bio = bio_submit_split_bioset(bio, max_sectors,
> 6fc07785d9b892 Yu Kuai           2025-09-10  1223  					      &conf->bio_split);
> c9aa889b035fca Vishal Verma      2021-12-21  1224  		wait_barrier(conf, false);
> 6fc07785d9b892 Yu Kuai           2025-09-10  1225  		if (!bio) {
> 6fc07785d9b892 Yu Kuai           2025-09-10  1226  			set_bit(R10BIO_Returned, &r10_bio->state);
> 4cf58d95290973 John Garry        2024-11-11  1227  			goto err_handle;
> 4cf58d95290973 John Garry        2024-11-11  1228  		}
> 22f166218f7313 Yu Kuai           2025-09-10  1229
> fc9977dd069e4f NeilBrown         2017-04-05  1230  		r10_bio->master_bio = bio;
> fc9977dd069e4f NeilBrown         2017-04-05  1231  		r10_bio->sectors = max_sectors;
> fc9977dd069e4f NeilBrown         2017-04-05  1232  	}
> 96c3fd1f380237 NeilBrown         2011-12-23  1233  	slot = r10_bio->read_slot;
> ^1da177e4c3f41 Linus Torvalds    2005-04-16  1234
> 820455238366a7 Yu Kuai           2023-06-22  1235  	if (io_accounting) {
> 820455238366a7 Yu Kuai           2023-06-22  1236  		md_account_bio(mddev, &bio);
> 820455238366a7 Yu Kuai           2023-06-22  1237  		r10_bio->master_bio = bio;
> 820455238366a7 Yu Kuai           2023-06-22  1238  	}
> abfc426d1b2fb2 Christoph Hellwig 2022-02-02  1239  	read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set);
> 5fa31c49928139 Zheng Qixing      2025-07-02  1240  	read_bio->bi_opf &= ~REQ_NOWAIT;
> ^1da177e4c3f41 Linus Torvalds    2005-04-16  1241
> ^1da177e4c3f41 Linus Torvalds    2005-04-16  1242  	r10_bio->devs[slot].bio = read_bio;
> abbf098e6e1e23 NeilBrown         2011-12-23  1243  	r10_bio->devs[slot].rdev = rdev;
> ^1da177e4c3f41 Linus Torvalds    2005-04-16  1244
> 4f024f3797c43c Kent Overstreet   2013-10-11  1245  	read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
> f8c9e74ff0832f NeilBrown         2012-05-21  1246  		choose_data_offset(r10_bio, rdev);
> ^1da177e4c3f41 Linus Torvalds    2005-04-16  1247  	read_bio->bi_end_io = raid10_end_read_request;
> 8d3ca83dcf9ca3 NeilBrown         2016-11-18  1248  	if (test_bit(FailFast, &rdev->flags) &&
> 8d3ca83dcf9ca3 NeilBrown         2016-11-18  1249  	    test_bit(R10BIO_FailFast, &r10_bio->state))
> 8d3ca83dcf9ca3 NeilBrown         2016-11-18  1250  	        read_bio->bi_opf |= MD_FAILFAST;
> ^1da177e4c3f41 Linus Torvalds    2005-04-16  1251  	read_bio->bi_private = r10_bio;
> c396b90e502691 Christoph Hellwig 2024-03-03  1252  	mddev_trace_remap(mddev, read_bio, r10_bio->sector);
> ed00aabd5eb9fb Christoph Hellwig 2020-07-01  1253  	submit_bio_noacct(read_bio);
> 5a7bbad27a4103 Christoph Hellwig 2011-09-12  1254  	return;
> 4cf58d95290973 John Garry        2024-11-11  1255  err_handle:
> 4cf58d95290973 John Garry        2024-11-11  1256  	atomic_dec(&rdev->nr_pending);
> 4cf58d95290973 John Garry        2024-11-11 @1257  	raid_end_bio_io(r10_bio);
> ^1da177e4c3f41 Linus Torvalds    2005-04-16  1258  }
> ^1da177e4c3f41 Linus Torvalds    2005-04-16  1259
> 

Apologies. I cherry-picked this from my 6.12.y branch which is where we 
hit the issue, but looks like I forgot to build test it. Will send a v2.

Josh

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-02-10 12:47 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-10  5:09 [PATCH] md/raid10: fix deadlock with check operation and nowait requests Josh Hunt
2026-02-10 10:17 ` kernel test robot
2026-02-10 10:18 ` kernel test robot
2026-02-10 11:14 ` kernel test robot
2026-02-10 11:56   ` Josh Hunt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox