* [PATCH v2] md/raid10: fix deadlock with check operation and nowait requests
@ 2026-02-10 16:51 Josh Hunt
2026-02-26 5:24 ` Yu Kuai
0 siblings, 1 reply; 3+ messages in thread
From: Josh Hunt @ 2026-02-10 16:51 UTC (permalink / raw)
To: song, yukuai, linan122, linux-raid; +Cc: ncroxon, Josh Hunt, stable
When an array check is running it will raise the barrier at which point
normal requests will become blocked and increment the nr_pending value to
signal there is work pending inside of wait_barrier(). NOWAIT requests
do not block and so will return immediately with an error, and additionally
do not increment nr_pending in wait_barrier(). Upstream change
43806c3d5b9b ("raid10: cleanup memleak at raid10_make_request") added a
call to raid_end_bio_io() to fix a memory leak when NOWAIT requests hit
this condition. raid_end_bio_io() eventually calls allow_barrier() and
it will unconditionally do an atomic_dec_and_test(&conf->nr_pending) even
though the corresponding increment on nr_pending didn't happen in the
NOWAIT case.
This can be easily seen by starting a check operation while an application is
doing nowait IO on the same array. This results in a deadlocked state due to
nr_pending value underflowing and so the md resync thread gets stuck waiting
for nr_pending to == 0.
Output of r10conf state of the array when we hit this condition:
crash> struct r10conf.barrier,nr_pending,nr_waiting,nr_queued <addr of r10conf>
barrier = 1,
nr_pending = {
counter = -41
},
nr_waiting = 15,
nr_queued = 0,
Example of md_sync thread stuck waiting on raise_barrier() and other requests
stuck in wait_barrier():
md1_resync
[<0>] raise_barrier+0xce/0x1c0
[<0>] raid10_sync_request+0x1ca/0x1ed0
[<0>] md_do_sync+0x779/0x1110
[<0>] md_thread+0x90/0x160
[<0>] kthread+0xbe/0xf0
[<0>] ret_from_fork+0x34/0x50
[<0>] ret_from_fork_asm+0x1a/0x30
kworker/u1040:2+flush-253:4
[<0>] wait_barrier+0x1de/0x220
[<0>] regular_request_wait+0x30/0x180
[<0>] raid10_make_request+0x261/0x1000
[<0>] md_handle_request+0x13b/0x230
[<0>] __submit_bio+0x107/0x1f0
[<0>] submit_bio_noacct_nocheck+0x16f/0x390
[<0>] ext4_io_submit+0x24/0x40
[<0>] ext4_do_writepages+0x254/0xc80
[<0>] ext4_writepages+0x84/0x120
[<0>] do_writepages+0x7a/0x260
[<0>] __writeback_single_inode+0x3d/0x300
[<0>] writeback_sb_inodes+0x1dd/0x470
[<0>] __writeback_inodes_wb+0x4c/0xe0
[<0>] wb_writeback+0x18b/0x2d0
[<0>] wb_workfn+0x2a1/0x400
[<0>] process_one_work+0x149/0x330
[<0>] worker_thread+0x2d2/0x410
[<0>] kthread+0xbe/0xf0
[<0>] ret_from_fork+0x34/0x50
[<0>] ret_from_fork_asm+0x1a/0x30
Fixes: 43806c3d5b9b ("raid10: cleanup memleak at raid10_make_request")
Cc: stable@vger.kernel.org
Signed-off-by: Josh Hunt <johunt@akamai.com>
---
drivers/md/raid10.c | 40 +++++++++++++++++++++++++++-------------
1 file changed, 27 insertions(+), 13 deletions(-)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 9debb20cf129..b05066dde693 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -68,6 +68,7 @@
*/
static void allow_barrier(struct r10conf *conf);
+static void allow_barrier_nowait(struct r10conf *conf);
static void lower_barrier(struct r10conf *conf);
static int _enough(struct r10conf *conf, int previous, int ignore);
static int enough(struct r10conf *conf, int ignore);
@@ -317,7 +318,7 @@ static void reschedule_retry(struct r10bio *r10_bio)
* operation and are ready to return a success/failure code to the buffer
* cache layer.
*/
-static void raid_end_bio_io(struct r10bio *r10_bio)
+static void raid_end_bio_io(struct r10bio *r10_bio, bool adjust_pending)
{
struct bio *bio = r10_bio->master_bio;
struct r10conf *conf = r10_bio->mddev->private;
@@ -332,7 +333,10 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
* Wake up any possible resync thread that waits for the device
* to go idle.
*/
- allow_barrier(conf);
+ if (adjust_pending)
+ allow_barrier(conf);
+ else
+ allow_barrier_nowait(conf);
free_r10bio(r10_bio);
}
@@ -414,7 +418,7 @@ static void raid10_end_read_request(struct bio *bio)
uptodate = 1;
}
if (uptodate) {
- raid_end_bio_io(r10_bio);
+ raid_end_bio_io(r10_bio, true);
rdev_dec_pending(rdev, conf->mddev);
} else {
/*
@@ -446,7 +450,7 @@ static void one_write_done(struct r10bio *r10_bio)
if (test_bit(R10BIO_MadeGood, &r10_bio->state))
reschedule_retry(r10_bio);
else
- raid_end_bio_io(r10_bio);
+ raid_end_bio_io(r10_bio, true);
}
}
}
@@ -1030,13 +1034,23 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
return ret;
}
-static void allow_barrier(struct r10conf *conf)
+static void __allow_barrier(struct r10conf *conf, bool adjust_pending)
{
- if ((atomic_dec_and_test(&conf->nr_pending)) ||
+ if ((adjust_pending && atomic_dec_and_test(&conf->nr_pending)) ||
(conf->array_freeze_pending))
wake_up_barrier(conf);
}
+static void allow_barrier(struct r10conf *conf)
+{
+ __allow_barrier(conf, true);
+}
+
+static void allow_barrier_nowait(struct r10conf *conf)
+{
+ __allow_barrier(conf, false);
+}
+
static void freeze_array(struct r10conf *conf, int extra)
{
/* stop syncio and normal IO and wait for everything to
@@ -1184,7 +1198,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
}
if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) {
- raid_end_bio_io(r10_bio);
+ raid_end_bio_io(r10_bio, false);
return;
}
@@ -1195,7 +1209,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
mdname(mddev), b,
(unsigned long long)r10_bio->sector);
}
- raid_end_bio_io(r10_bio);
+ raid_end_bio_io(r10_bio, true);
return;
}
if (err_rdev)
@@ -1240,7 +1254,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
return;
err_handle:
atomic_dec(&rdev->nr_pending);
- raid_end_bio_io(r10_bio);
+ raid_end_bio_io(r10_bio, true);
}
static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
@@ -1372,7 +1386,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
sectors = r10_bio->sectors;
if (!regular_request_wait(mddev, conf, bio, sectors)) {
- raid_end_bio_io(r10_bio);
+ raid_end_bio_io(r10_bio, false);
return;
}
@@ -1523,7 +1537,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
}
}
- raid_end_bio_io(r10_bio);
+ raid_end_bio_io(r10_bio, true);
}
static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
@@ -2952,7 +2966,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
if (test_bit(R10BIO_WriteError,
&r10_bio->state))
close_write(r10_bio);
- raid_end_bio_io(r10_bio);
+ raid_end_bio_io(r10_bio, true);
}
}
}
@@ -2987,7 +3001,7 @@ static void raid10d(struct md_thread *thread)
if (test_bit(R10BIO_WriteError,
&r10_bio->state))
close_write(r10_bio);
- raid_end_bio_io(r10_bio);
+ raid_end_bio_io(r10_bio, true);
}
}
--
2.34.1
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [PATCH v2] md/raid10: fix deadlock with check operation and nowait requests
2026-02-10 16:51 [PATCH v2] md/raid10: fix deadlock with check operation and nowait requests Josh Hunt
@ 2026-02-26 5:24 ` Yu Kuai
2026-03-03 0:54 ` Josh Hunt
0 siblings, 1 reply; 3+ messages in thread
From: Yu Kuai @ 2026-02-26 5:24 UTC (permalink / raw)
To: Josh Hunt, song, linan122, linux-raid, yukuai; +Cc: ncroxon, stable
Hi,
在 2026/2/11 0:51, Josh Hunt 写道:
> When an array check is running it will raise the barrier at which point
> normal requests will become blocked and increment the nr_pending value to
> signal there is work pending inside of wait_barrier(). NOWAIT requests
> do not block and so will return immediately with an error, and additionally
> do not increment nr_pending in wait_barrier(). Upstream change
> 43806c3d5b9b ("raid10: cleanup memleak at raid10_make_request") added a
> call to raid_end_bio_io() to fix a memory leak when NOWAIT requests hit
> this condition. raid_end_bio_io() eventually calls allow_barrier() and
> it will unconditionally do an atomic_dec_and_test(&conf->nr_pending) even
> though the corresponding increment on nr_pending didn't happen in the
> NOWAIT case.
>
> This can be easily seen by starting a check operation while an application is
> doing nowait IO on the same array. This results in a deadlocked state due to
> nr_pending value underflowing and so the md resync thread gets stuck waiting
> for nr_pending to == 0.
>
> Output of r10conf state of the array when we hit this condition:
>
> crash> struct r10conf.barrier,nr_pending,nr_waiting,nr_queued <addr of r10conf>
> barrier = 1,
> nr_pending = {
> counter = -41
> },
> nr_waiting = 15,
> nr_queued = 0,
>
> Example of md_sync thread stuck waiting on raise_barrier() and other requests
> stuck in wait_barrier():
>
> md1_resync
> [<0>] raise_barrier+0xce/0x1c0
> [<0>] raid10_sync_request+0x1ca/0x1ed0
> [<0>] md_do_sync+0x779/0x1110
> [<0>] md_thread+0x90/0x160
> [<0>] kthread+0xbe/0xf0
> [<0>] ret_from_fork+0x34/0x50
> [<0>] ret_from_fork_asm+0x1a/0x30
>
> kworker/u1040:2+flush-253:4
> [<0>] wait_barrier+0x1de/0x220
> [<0>] regular_request_wait+0x30/0x180
> [<0>] raid10_make_request+0x261/0x1000
> [<0>] md_handle_request+0x13b/0x230
> [<0>] __submit_bio+0x107/0x1f0
> [<0>] submit_bio_noacct_nocheck+0x16f/0x390
> [<0>] ext4_io_submit+0x24/0x40
> [<0>] ext4_do_writepages+0x254/0xc80
> [<0>] ext4_writepages+0x84/0x120
> [<0>] do_writepages+0x7a/0x260
> [<0>] __writeback_single_inode+0x3d/0x300
> [<0>] writeback_sb_inodes+0x1dd/0x470
> [<0>] __writeback_inodes_wb+0x4c/0xe0
> [<0>] wb_writeback+0x18b/0x2d0
> [<0>] wb_workfn+0x2a1/0x400
> [<0>] process_one_work+0x149/0x330
> [<0>] worker_thread+0x2d2/0x410
> [<0>] kthread+0xbe/0xf0
> [<0>] ret_from_fork+0x34/0x50
> [<0>] ret_from_fork_asm+0x1a/0x30
>
> Fixes: 43806c3d5b9b ("raid10: cleanup memleak at raid10_make_request")
> Cc: stable@vger.kernel.org
> Signed-off-by: Josh Hunt <johunt@akamai.com>
> ---
> drivers/md/raid10.c | 40 +++++++++++++++++++++++++++-------------
> 1 file changed, 27 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 9debb20cf129..b05066dde693 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -68,6 +68,7 @@
> */
>
> static void allow_barrier(struct r10conf *conf);
> +static void allow_barrier_nowait(struct r10conf *conf);
> static void lower_barrier(struct r10conf *conf);
> static int _enough(struct r10conf *conf, int previous, int ignore);
> static int enough(struct r10conf *conf, int ignore);
> @@ -317,7 +318,7 @@ static void reschedule_retry(struct r10bio *r10_bio)
> * operation and are ready to return a success/failure code to the buffer
> * cache layer.
> */
> -static void raid_end_bio_io(struct r10bio *r10_bio)
> +static void raid_end_bio_io(struct r10bio *r10_bio, bool adjust_pending)
> {
> struct bio *bio = r10_bio->master_bio;
> struct r10conf *conf = r10_bio->mddev->private;
> @@ -332,7 +333,10 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
> * Wake up any possible resync thread that waits for the device
> * to go idle.
> */
> - allow_barrier(conf);
> + if (adjust_pending)
> + allow_barrier(conf);
> + else
> + allow_barrier_nowait(conf);
>
> free_r10bio(r10_bio);
> }
> @@ -414,7 +418,7 @@ static void raid10_end_read_request(struct bio *bio)
> uptodate = 1;
> }
> if (uptodate) {
> - raid_end_bio_io(r10_bio);
> + raid_end_bio_io(r10_bio, true);
> rdev_dec_pending(rdev, conf->mddev);
> } else {
> /*
> @@ -446,7 +450,7 @@ static void one_write_done(struct r10bio *r10_bio)
> if (test_bit(R10BIO_MadeGood, &r10_bio->state))
> reschedule_retry(r10_bio);
> else
> - raid_end_bio_io(r10_bio);
> + raid_end_bio_io(r10_bio, true);
> }
> }
> }
> @@ -1030,13 +1034,23 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
> return ret;
> }
>
> -static void allow_barrier(struct r10conf *conf)
> +static void __allow_barrier(struct r10conf *conf, bool adjust_pending)
> {
> - if ((atomic_dec_and_test(&conf->nr_pending)) ||
> + if ((adjust_pending && atomic_dec_and_test(&conf->nr_pending)) ||
> (conf->array_freeze_pending))
> wake_up_barrier(conf);
> }
>
> +static void allow_barrier(struct r10conf *conf)
> +{
> + __allow_barrier(conf, true);
> +}
> +
> +static void allow_barrier_nowait(struct r10conf *conf)
> +{
> + __allow_barrier(conf, false);
> +}
> +
> static void freeze_array(struct r10conf *conf, int extra)
> {
> /* stop syncio and normal IO and wait for everything to
> @@ -1184,7 +1198,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
> }
>
> if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) {
> - raid_end_bio_io(r10_bio);
> + raid_end_bio_io(r10_bio, false);
> return;
> }
>
> @@ -1195,7 +1209,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
> mdname(mddev), b,
> (unsigned long long)r10_bio->sector);
> }
> - raid_end_bio_io(r10_bio);
> + raid_end_bio_io(r10_bio, true);
> return;
> }
> if (err_rdev)
> @@ -1240,7 +1254,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
> return;
> err_handle:
> atomic_dec(&rdev->nr_pending);
> - raid_end_bio_io(r10_bio);
> + raid_end_bio_io(r10_bio, true);
> }
>
> static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
> @@ -1372,7 +1386,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
>
> sectors = r10_bio->sectors;
> if (!regular_request_wait(mddev, conf, bio, sectors)) {
> - raid_end_bio_io(r10_bio);
> + raid_end_bio_io(r10_bio, false);
There really is problem, however the analyze seems a bit wrong.
The master_bio is already handled with bio_wouldblock_error(), it's wrong
to call raid_end_bio_io() directly. Looks like this problem can be fixed by
calling free_r10bio() instead.
> return;
> }
>
> @@ -1523,7 +1537,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
> }
> }
>
> - raid_end_bio_io(r10_bio);
> + raid_end_bio_io(r10_bio, true);
> }
>
> static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
> @@ -2952,7 +2966,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
> if (test_bit(R10BIO_WriteError,
> &r10_bio->state))
> close_write(r10_bio);
> - raid_end_bio_io(r10_bio);
> + raid_end_bio_io(r10_bio, true);
> }
> }
> }
> @@ -2987,7 +3001,7 @@ static void raid10d(struct md_thread *thread)
> if (test_bit(R10BIO_WriteError,
> &r10_bio->state))
> close_write(r10_bio);
> - raid_end_bio_io(r10_bio);
> + raid_end_bio_io(r10_bio, true);
> }
> }
>
--
Thansk,
Kuai
^ permalink raw reply [flat|nested] 3+ messages in thread* Re: [PATCH v2] md/raid10: fix deadlock with check operation and nowait requests
2026-02-26 5:24 ` Yu Kuai
@ 2026-03-03 0:54 ` Josh Hunt
0 siblings, 0 replies; 3+ messages in thread
From: Josh Hunt @ 2026-03-03 0:54 UTC (permalink / raw)
To: yukuai, song, linan122, linux-raid; +Cc: ncroxon, stable
On 2/25/26 9:24 PM, Yu Kuai wrote:
> !-------------------------------------------------------------------|
> This Message Is From an External Sender
> This message came from outside your organization.
> |-------------------------------------------------------------------!
>
> Hi,
>
> 在 2026/2/11 0:51, Josh Hunt 写道:
>> When an array check is running it will raise the barrier at which point
>> normal requests will become blocked and increment the nr_pending value to
>> signal there is work pending inside of wait_barrier(). NOWAIT requests
>> do not block and so will return immediately with an error, and additionally
>> do not increment nr_pending in wait_barrier(). Upstream change
>> 43806c3d5b9b ("raid10: cleanup memleak at raid10_make_request") added a
>> call to raid_end_bio_io() to fix a memory leak when NOWAIT requests hit
>> this condition. raid_end_bio_io() eventually calls allow_barrier() and
>> it will unconditionally do an atomic_dec_and_test(&conf->nr_pending) even
>> though the corresponding increment on nr_pending didn't happen in the
>> NOWAIT case.
>>
>> This can be easily seen by starting a check operation while an application is
>> doing nowait IO on the same array. This results in a deadlocked state due to
>> nr_pending value underflowing and so the md resync thread gets stuck waiting
>> for nr_pending to == 0.
>>
>> Output of r10conf state of the array when we hit this condition:
>>
>> crash> struct r10conf.barrier,nr_pending,nr_waiting,nr_queued <addr of r10conf>
>> barrier = 1,
>> nr_pending = {
>> counter = -41
>> },
>> nr_waiting = 15,
>> nr_queued = 0,
>>
>> Example of md_sync thread stuck waiting on raise_barrier() and other requests
>> stuck in wait_barrier():
>>
>> md1_resync
>> [<0>] raise_barrier+0xce/0x1c0
>> [<0>] raid10_sync_request+0x1ca/0x1ed0
>> [<0>] md_do_sync+0x779/0x1110
>> [<0>] md_thread+0x90/0x160
>> [<0>] kthread+0xbe/0xf0
>> [<0>] ret_from_fork+0x34/0x50
>> [<0>] ret_from_fork_asm+0x1a/0x30
>>
>> kworker/u1040:2+flush-253:4
>> [<0>] wait_barrier+0x1de/0x220
>> [<0>] regular_request_wait+0x30/0x180
>> [<0>] raid10_make_request+0x261/0x1000
>> [<0>] md_handle_request+0x13b/0x230
>> [<0>] __submit_bio+0x107/0x1f0
>> [<0>] submit_bio_noacct_nocheck+0x16f/0x390
>> [<0>] ext4_io_submit+0x24/0x40
>> [<0>] ext4_do_writepages+0x254/0xc80
>> [<0>] ext4_writepages+0x84/0x120
>> [<0>] do_writepages+0x7a/0x260
>> [<0>] __writeback_single_inode+0x3d/0x300
>> [<0>] writeback_sb_inodes+0x1dd/0x470
>> [<0>] __writeback_inodes_wb+0x4c/0xe0
>> [<0>] wb_writeback+0x18b/0x2d0
>> [<0>] wb_workfn+0x2a1/0x400
>> [<0>] process_one_work+0x149/0x330
>> [<0>] worker_thread+0x2d2/0x410
>> [<0>] kthread+0xbe/0xf0
>> [<0>] ret_from_fork+0x34/0x50
>> [<0>] ret_from_fork_asm+0x1a/0x30
>>
>> Fixes: 43806c3d5b9b ("raid10: cleanup memleak at raid10_make_request")
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Josh Hunt <johunt@akamai.com>
>> ---
>> drivers/md/raid10.c | 40 +++++++++++++++++++++++++++-------------
>> 1 file changed, 27 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>> index 9debb20cf129..b05066dde693 100644
>> --- a/drivers/md/raid10.c
>> +++ b/drivers/md/raid10.c
>> @@ -68,6 +68,7 @@
>> */
>>
>> static void allow_barrier(struct r10conf *conf);
>> +static void allow_barrier_nowait(struct r10conf *conf);
>> static void lower_barrier(struct r10conf *conf);
>> static int _enough(struct r10conf *conf, int previous, int ignore);
>> static int enough(struct r10conf *conf, int ignore);
>> @@ -317,7 +318,7 @@ static void reschedule_retry(struct r10bio *r10_bio)
>> * operation and are ready to return a success/failure code to the buffer
>> * cache layer.
>> */
>> -static void raid_end_bio_io(struct r10bio *r10_bio)
>> +static void raid_end_bio_io(struct r10bio *r10_bio, bool adjust_pending)
>> {
>> struct bio *bio = r10_bio->master_bio;
>> struct r10conf *conf = r10_bio->mddev->private;
>> @@ -332,7 +333,10 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
>> * Wake up any possible resync thread that waits for the device
>> * to go idle.
>> */
>> - allow_barrier(conf);
>> + if (adjust_pending)
>> + allow_barrier(conf);
>> + else
>> + allow_barrier_nowait(conf);
>>
>> free_r10bio(r10_bio);
>> }
>> @@ -414,7 +418,7 @@ static void raid10_end_read_request(struct bio *bio)
>> uptodate = 1;
>> }
>> if (uptodate) {
>> - raid_end_bio_io(r10_bio);
>> + raid_end_bio_io(r10_bio, true);
>> rdev_dec_pending(rdev, conf->mddev);
>> } else {
>> /*
>> @@ -446,7 +450,7 @@ static void one_write_done(struct r10bio *r10_bio)
>> if (test_bit(R10BIO_MadeGood, &r10_bio->state))
>> reschedule_retry(r10_bio);
>> else
>> - raid_end_bio_io(r10_bio);
>> + raid_end_bio_io(r10_bio, true);
>> }
>> }
>> }
>> @@ -1030,13 +1034,23 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
>> return ret;
>> }
>>
>> -static void allow_barrier(struct r10conf *conf)
>> +static void __allow_barrier(struct r10conf *conf, bool adjust_pending)
>> {
>> - if ((atomic_dec_and_test(&conf->nr_pending)) ||
>> + if ((adjust_pending && atomic_dec_and_test(&conf->nr_pending)) ||
>> (conf->array_freeze_pending))
>> wake_up_barrier(conf);
>> }
>>
>> +static void allow_barrier(struct r10conf *conf)
>> +{
>> + __allow_barrier(conf, true);
>> +}
>> +
>> +static void allow_barrier_nowait(struct r10conf *conf)
>> +{
>> + __allow_barrier(conf, false);
>> +}
>> +
>> static void freeze_array(struct r10conf *conf, int extra)
>> {
>> /* stop syncio and normal IO and wait for everything to
>> @@ -1184,7 +1198,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
>> }
>>
>> if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors)) {
>> - raid_end_bio_io(r10_bio);
>> + raid_end_bio_io(r10_bio, false);
>> return;
>> }
>>
>> @@ -1195,7 +1209,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
>> mdname(mddev), b,
>> (unsigned long long)r10_bio->sector);
>> }
>> - raid_end_bio_io(r10_bio);
>> + raid_end_bio_io(r10_bio, true);
>> return;
>> }
>> if (err_rdev)
>> @@ -1240,7 +1254,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
>> return;
>> err_handle:
>> atomic_dec(&rdev->nr_pending);
>> - raid_end_bio_io(r10_bio);
>> + raid_end_bio_io(r10_bio, true);
>> }
>>
>> static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
>> @@ -1372,7 +1386,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
>>
>> sectors = r10_bio->sectors;
>> if (!regular_request_wait(mddev, conf, bio, sectors)) {
>> - raid_end_bio_io(r10_bio);
>> + raid_end_bio_io(r10_bio, false);
>
> There really is problem, however the analyze seems a bit wrong.
>
> The master_bio is already handled with bio_wouldblock_error(), it's wrong
> to call raid_end_bio_io() directly. Looks like this problem can be fixed by
> calling free_r10bio() instead.
Thanks for the feedback. I tried this and indeed it does also fix the
issue for me so will send over a v3 here shortly. Appreciate the help!
>
>> return;
>> }
>>
>> @@ -1523,7 +1537,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
>> }
>> }
>>
>> - raid_end_bio_io(r10_bio);
>> + raid_end_bio_io(r10_bio, true);
>> }
>>
>> static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
>> @@ -2952,7 +2966,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
>> if (test_bit(R10BIO_WriteError,
>> &r10_bio->state))
>> close_write(r10_bio);
>> - raid_end_bio_io(r10_bio);
>> + raid_end_bio_io(r10_bio, true);
>> }
>> }
>> }
>> @@ -2987,7 +3001,7 @@ static void raid10d(struct md_thread *thread)
>> if (test_bit(R10BIO_WriteError,
>> &r10_bio->state))
>> close_write(r10_bio);
>> - raid_end_bio_io(r10_bio);
>> + raid_end_bio_io(r10_bio, true);
>> }
>> }
>>
>
Josh
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2026-03-03 1:34 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-10 16:51 [PATCH v2] md/raid10: fix deadlock with check operation and nowait requests Josh Hunt
2026-02-26 5:24 ` Yu Kuai
2026-03-03 0:54 ` Josh Hunt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox