* [PATCH] md/raid1: honor REQ_NOWAIT when waiting for behind writes
@ 2026-06-11 8:35 Abd-Alrhman Masalkhi
2026-06-11 8:49 ` sashiko-bot
0 siblings, 1 reply; 3+ messages in thread
From: Abd-Alrhman Masalkhi @ 2026-06-11 8:35 UTC (permalink / raw)
To: song, yukuai, magiclinan, xiao, vverma, axboe
Cc: linux-raid, linux-kernel, Abd-Alrhman Masalkhi
raid1 supports REQ_NOWAIT reads by avoiding waits in the barrier path
through wait_read_barrier(). However, a read can still block on a
WriteMostly device when the array uses a bitmap and there are
outstanding behind writes.
In that case raid1 unconditionally calls wait_behind_writes(), which
may sleep until all behind writes complete. As a result, a REQ_NOWAIT
read can block despite the caller explicitly requesting non-blocking
behavior.
This ensures that raid1 consistently honors REQ_NOWAIT reads across all
paths that may otherwise wait for behind writes.
Fixes: 5aa705039c4f ("md: raid1 add nowait support")
Signed-off-by: Abd-Alrhman Masalkhi <abd.masalkhi@gmail.com>
---
drivers/md/md-bitmap.c | 9 +++++++--
drivers/md/md-bitmap.h | 2 +-
drivers/md/md-llbitmap.c | 13 ++++++++-----
drivers/md/md.c | 2 +-
drivers/md/raid1.c | 10 +++++++---
5 files changed, 24 insertions(+), 12 deletions(-)
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index 028b9ca8ce52..1206e31f323a 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -2063,18 +2063,23 @@ static void bitmap_end_behind_write(struct mddev *mddev)
bitmap->mddev->bitmap_info.max_write_behind);
}
-static void bitmap_wait_behind_writes(struct mddev *mddev)
+static bool bitmap_wait_behind_writes(struct mddev *mddev, bool nowait)
{
struct bitmap *bitmap = mddev->bitmap;
/* wait for behind writes to complete */
if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
+ if (nowait)
+ return false;
+
pr_debug("md:%s: behind writes in progress - waiting to stop.\n",
mdname(mddev));
/* need to kick something here to make sure I/O goes? */
wait_event(bitmap->behind_wait,
atomic_read(&bitmap->behind_writes) == 0);
}
+
+ return true;
}
static void bitmap_destroy(struct mddev *mddev)
@@ -2084,7 +2089,7 @@ static void bitmap_destroy(struct mddev *mddev)
if (!bitmap) /* there was no bitmap */
return;
- bitmap_wait_behind_writes(mddev);
+ bitmap_wait_behind_writes(mddev, false);
if (!test_bit(MD_SERIALIZE_POLICY, &mddev->flags))
mddev_destroy_serial_pool(mddev, NULL);
diff --git a/drivers/md/md-bitmap.h b/drivers/md/md-bitmap.h
index 214f623c7e79..f46674bdfeb9 100644
--- a/drivers/md/md-bitmap.h
+++ b/drivers/md/md-bitmap.h
@@ -98,7 +98,7 @@ struct bitmap_operations {
void (*start_behind_write)(struct mddev *mddev);
void (*end_behind_write)(struct mddev *mddev);
- void (*wait_behind_writes)(struct mddev *mddev);
+ bool (*wait_behind_writes)(struct mddev *mddev, bool nowait);
md_bitmap_fn *start_write;
md_bitmap_fn *end_write;
diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c
index 1adc5b117821..5a4e2abaa757 100644
--- a/drivers/md/md-llbitmap.c
+++ b/drivers/md/md-llbitmap.c
@@ -1574,16 +1574,19 @@ static void llbitmap_end_behind_write(struct mddev *mddev)
wake_up(&llbitmap->behind_wait);
}
-static void llbitmap_wait_behind_writes(struct mddev *mddev)
+static bool llbitmap_wait_behind_writes(struct mddev *mddev, bool nowait)
{
struct llbitmap *llbitmap = mddev->bitmap;
- if (!llbitmap)
- return;
+ if (llbitmap && atomic_read(&llbitmap->behind_writes) > 0) {
+ if (nowait)
+ return false;
- wait_event(llbitmap->behind_wait,
- atomic_read(&llbitmap->behind_writes) == 0);
+ wait_event(llbitmap->behind_wait,
+ atomic_read(&llbitmap->behind_writes) == 0);
+ }
+ return true;
}
static ssize_t bits_show(struct mddev *mddev, char *page)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 096bb64e87bd..d1465bcd86c8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7050,7 +7050,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes);
static void mddev_detach(struct mddev *mddev)
{
if (md_bitmap_enabled(mddev, false))
- mddev->bitmap_ops->wait_behind_writes(mddev);
+ mddev->bitmap_ops->wait_behind_writes(mddev, false);
if (mddev->pers && mddev->pers->quiesce && !is_md_suspended(mddev)) {
mddev->pers->quiesce(mddev, 1);
mddev->pers->quiesce(mddev, 0);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index b1ed4cc6ade4..b2d7c13b64bd 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1341,6 +1341,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
int max_sectors;
int rdisk;
bool r1bio_existed = !!r1_bio;
+ bool nowait = bio->bi_opf & REQ_NOWAIT;
/*
* An md cloned bio indicates we are in the error path.
@@ -1360,8 +1361,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
* Still need barrier for READ in case that whole
* array is frozen.
*/
- if (!wait_read_barrier(conf, bio->bi_iter.bi_sector,
- bio->bi_opf & REQ_NOWAIT)) {
+ if (!wait_read_barrier(conf, bio->bi_iter.bi_sector, nowait)) {
bio_wouldblock_error(bio);
return;
}
@@ -1402,7 +1402,11 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
* over-take any writes that are 'behind'
*/
mddev_add_trace_msg(mddev, "raid1 wait behind writes");
- mddev->bitmap_ops->wait_behind_writes(mddev);
+ if (!mddev->bitmap_ops->wait_behind_writes(mddev, nowait)) {
+ bio_wouldblock_error(bio);
+ set_bit(R1BIO_Returned, &r1_bio->state);
+ goto err_handle;
+ }
}
if (max_sectors < bio_sectors(bio)) {
--
2.43.0
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [PATCH] md/raid1: honor REQ_NOWAIT when waiting for behind writes
2026-06-11 8:35 [PATCH] md/raid1: honor REQ_NOWAIT when waiting for behind writes Abd-Alrhman Masalkhi
@ 2026-06-11 8:49 ` sashiko-bot
2026-06-11 10:49 ` Abd-Alrhman Masalkhi
0 siblings, 1 reply; 3+ messages in thread
From: sashiko-bot @ 2026-06-11 8:49 UTC (permalink / raw)
To: Abd-Alrhman Masalkhi; +Cc: yukuai, linux-raid
Thank you for your contribution! Sashiko AI review found 3 potential issue(s) to consider:
Pre-existing issues:
- [High] Leaking pre-allocated `r1_bio` when `REQ_NOWAIT` reads fail `wait_read_barrier()` during retry.
- [High] Permanent barrier leak if a REQ_NOWAIT write encounters a blocked device.
- [Medium] REQ_NOWAIT semantics violation: memory allocations use GFP_NOIO unconditionally.
--
commit 7fed7389f4fe20e79eece974eaf3b8d317519e27
Author: Abd-Alrhman Masalkhi <abd.masalkhi@gmail.com>
md/raid1: honor REQ_NOWAIT when waiting for behind writes
This commit ensures that raid1 consistently honors REQ_NOWAIT reads
across paths that may otherwise wait for behind writes. It modifies
bitmap_wait_behind_writes() to return early instead of sleeping if
the nowait flag is set.
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index 5b9368bd9e700..d03da046a5920 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
[ ... ]
> @@ -1360,8 +1361,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
> * Still need barrier for READ in case that whole
> * array is frozen.
> */
> - if (!wait_read_barrier(conf, bio->bi_iter.bi_sector,
> - bio->bi_opf & REQ_NOWAIT)) {
> + if (!wait_read_barrier(conf, bio->bi_iter.bi_sector, nowait)) {
> bio_wouldblock_error(bio);
> return;
> }
[Severity: High]
This is a pre-existing issue, but does this early return leak the
pre-allocated r1_bio when wait_read_barrier() fails during a retry?
When a REQ_NOWAIT read fails its initial attempt, it is queued to the
retry_list and handled by the raid1d thread via handle_read_error().
That function passes the existing, pre-allocated r1_bio back to
raid1_read_request():
drivers/md/raid1.c:handle_read_error() {
...
raid1_read_request(mddev, bio, r1_bio->sectors, r1_bio);
allow_barrier(conf, sector);
...
}
If the array is concurrently frozen, wait_read_barrier() returns false
since REQ_NOWAIT is still set on the master bio. The code handles this
by returning immediately, bypassing the err_handle cleanup path and
failing to call raid_end_bio_io().
Since handle_read_error() assumes ownership was taken and only releases
its old barrier, does this result in a permanent memory leak of the
r1_bio from the r1bio_pool mempool?
[Severity: Medium]
This is another pre-existing issue, but does the allocation path for
REQ_NOWAIT bios violate block layer non-blocking semantics by
unconditionally using GFP_NOIO?
Even when REQ_NOWAIT is requested, alloc_r1bio() uses GFP_NOIO
unconditionally:
drivers/md/raid1.c:alloc_r1bio() {
...
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
...
}
Similarly, bio_alloc_clone() in raid1_read_request() unconditionally
uses gfp flags that include __GFP_DIRECT_RECLAIM.
Since mempool-backed allocations will sleep under memory pressure
instead of failing immediately when these flags are used, will this
cause unexpected high latencies and potential priority inversion in
asynchronous contexts?
[Severity: High]
This isn't a bug introduced by this patch, but does
raid1_write_request() permanently leak the barrier if a REQ_NOWAIT
write encounters a blocked device?
In raid1_write_request(), the code successfully acquires a barrier by
calling wait_barrier(), which internally increments conf->nr_pending.
Immediately after, it calls wait_blocked_rdev():
drivers/md/raid1.c:raid1_write_request() {
...
if (!wait_blocked_rdev(mddev, bio)) {
bio_wouldblock_error(bio);
return;
}
...
}
If the device is blocked and the request is REQ_NOWAIT,
wait_blocked_rdev() returns false and the function returns early.
However, it fails to call allow_barrier() before returning to drop
the previously acquired barrier increment.
Because freeze_array() waits for nr_pending to reach 0, could this
permanently prevent the array from ever freezing again, deadlocking
resyncs, reconfiguration, and array teardown?
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260611083514.754922-1-abd.masalkhi@gmail.com?part=1
^ permalink raw reply [flat|nested] 3+ messages in thread* Re: [PATCH] md/raid1: honor REQ_NOWAIT when waiting for behind writes
2026-06-11 8:49 ` sashiko-bot
@ 2026-06-11 10:49 ` Abd-Alrhman Masalkhi
0 siblings, 0 replies; 3+ messages in thread
From: Abd-Alrhman Masalkhi @ 2026-06-11 10:49 UTC (permalink / raw)
To: sashiko-reviews; +Cc: yukuai, linux-raid
On Thu, Jun 11, 2026 at 08:49 +0000, sashiko-bot@kernel.org wrote:
> Thank you for your contribution! Sashiko AI review found 3 potential issue(s) to consider:
>
> Pre-existing issues:
> - [High] Leaking pre-allocated `r1_bio` when `REQ_NOWAIT` reads fail `wait_read_barrier()` during retry.
> - [High] Permanent barrier leak if a REQ_NOWAIT write encounters a blocked device.
> - [Medium] REQ_NOWAIT semantics violation: memory allocations use GFP_NOIO unconditionally.
These appear to be pre-existing issues rather than being introduced by
this patch. I plan to address them in separate patches.
> --
>
> commit 7fed7389f4fe20e79eece974eaf3b8d317519e27
> Author: Abd-Alrhman Masalkhi <abd.masalkhi@gmail.com>
>
> md/raid1: honor REQ_NOWAIT when waiting for behind writes
>
> This commit ensures that raid1 consistently honors REQ_NOWAIT reads
> across paths that may otherwise wait for behind writes. It modifies
> bitmap_wait_behind_writes() to return early instead of sleeping if
> the nowait flag is set.
>
>> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
>> index 5b9368bd9e700..d03da046a5920 100644
>> --- a/drivers/md/raid1.c
>> +++ b/drivers/md/raid1.c
> [ ... ]
>> @@ -1360,8 +1361,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
>> * Still need barrier for READ in case that whole
>> * array is frozen.
>> */
>> - if (!wait_read_barrier(conf, bio->bi_iter.bi_sector,
>> - bio->bi_opf & REQ_NOWAIT)) {
>> + if (!wait_read_barrier(conf, bio->bi_iter.bi_sector, nowait)) {
>> bio_wouldblock_error(bio);
>> return;
>> }
>
> [Severity: High]
> This is a pre-existing issue, but does this early return leak the
> pre-allocated r1_bio when wait_read_barrier() fails during a retry?
>
> When a REQ_NOWAIT read fails its initial attempt, it is queued to the
> retry_list and handled by the raid1d thread via handle_read_error().
>
> That function passes the existing, pre-allocated r1_bio back to
> raid1_read_request():
>
> drivers/md/raid1.c:handle_read_error() {
> ...
> raid1_read_request(mddev, bio, r1_bio->sectors, r1_bio);
> allow_barrier(conf, sector);
> ...
> }
>
> If the array is concurrently frozen, wait_read_barrier() returns false
> since REQ_NOWAIT is still set on the master bio. The code handles this
> by returning immediately, bypassing the err_handle cleanup path and
> failing to call raid_end_bio_io().
>
> Since handle_read_error() assumes ownership was taken and only releases
> its old barrier, does this result in a permanent memory leak of the
> r1_bio from the r1bio_pool mempool?
>
> [Severity: Medium]
> This is another pre-existing issue, but does the allocation path for
> REQ_NOWAIT bios violate block layer non-blocking semantics by
> unconditionally using GFP_NOIO?
>
> Even when REQ_NOWAIT is requested, alloc_r1bio() uses GFP_NOIO
> unconditionally:
>
> drivers/md/raid1.c:alloc_r1bio() {
> ...
> r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
> ...
> }
>
> Similarly, bio_alloc_clone() in raid1_read_request() unconditionally
> uses gfp flags that include __GFP_DIRECT_RECLAIM.
>
> Since mempool-backed allocations will sleep under memory pressure
> instead of failing immediately when these flags are used, will this
> cause unexpected high latencies and potential priority inversion in
> asynchronous contexts?
>
> [Severity: High]
> This isn't a bug introduced by this patch, but does
> raid1_write_request() permanently leak the barrier if a REQ_NOWAIT
> write encounters a blocked device?
>
> In raid1_write_request(), the code successfully acquires a barrier by
> calling wait_barrier(), which internally increments conf->nr_pending.
>
> Immediately after, it calls wait_blocked_rdev():
>
> drivers/md/raid1.c:raid1_write_request() {
> ...
> if (!wait_blocked_rdev(mddev, bio)) {
> bio_wouldblock_error(bio);
> return;
> }
> ...
> }
>
> If the device is blocked and the request is REQ_NOWAIT,
> wait_blocked_rdev() returns false and the function returns early.
> However, it fails to call allow_barrier() before returning to drop
> the previously acquired barrier increment.
>
> Because freeze_array() waits for nr_pending to reach 0, could this
> permanently prevent the array from ever freezing again, deadlocking
> resyncs, reconfiguration, and array teardown?
>
> --
> Sashiko AI review · https://sashiko.dev/#/patchset/20260611083514.754922-1-abd.masalkhi@gmail.com?part=1
--
Best Regards,
Abd-Alrhman
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2026-06-11 10:49 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-11 8:35 [PATCH] md/raid1: honor REQ_NOWAIT when waiting for behind writes Abd-Alrhman Masalkhi
2026-06-11 8:49 ` sashiko-bot
2026-06-11 10:49 ` Abd-Alrhman Masalkhi
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.