public inbox for linux-raid@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] md/raid0,linear: move error handling from make_request to IO completion
@ 2026-03-04 14:29 Chen Cheng
  2026-03-05 17:27 ` Yu Kuai
  0 siblings, 1 reply; 2+ messages in thread
From: Chen Cheng @ 2026-03-04 14:29 UTC (permalink / raw)
  To: Song Liu, Yu Kuai; +Cc: Li Nan, linux-raid, Chen Cheng

raid0 and linear call md_error() in the submission path after checking
is_rdev_broken() (i.e. disk_live() == false). This was intended as a
safety fast-fail for removed disks, but is unnecessary: the block layer
already handles GD_DEAD disks in __bio_queue_enter() and fails IO
immediately without hanging. More importantly, it misses the offline
case where the disk still exists but rejects IO — in that scenario, IO
fails silently and md_error() is never called.

Remove the submission-path check and instead call md_error() from
md_end_clone_io() on any IO failure. Store the target rdev in
md_io_clone so the completion handler knows which device failed. This
uniformly handles disk removal, offline status, and any other IO error.

Since raid0 and linear have no redundancy, any disk IO failure means
data is already lost, so marking the array broken on any error is
correct.

Signed-off-by: Chen Cheng <chencheng@fnnas.com>
---
 drivers/md/md-linear.c |  7 +------
 drivers/md/md.c        |  5 +++++
 drivers/md/md.h        | 11 ++++++-----
 drivers/md/raid0.c     |  7 +------
 4 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index fdff250d0d51..2ba181a1297d 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -251,12 +251,6 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
 		     bio_sector < start_sector))
 		goto out_of_bounds;
 
-	if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
-		md_error(mddev, tmp_dev->rdev);
-		bio_io_error(bio);
-		return true;
-	}
-
 	if (unlikely(bio_end_sector(bio) > end_sector)) {
 		/* This bio crosses a device boundary, so we have to split it */
 		bio = bio_submit_split_bioset(bio, end_sector - bio_sector,
@@ -269,6 +263,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
 	bio_set_dev(bio, tmp_dev->rdev->bdev);
 	bio->bi_iter.bi_sector = bio->bi_iter.bi_sector -
 		start_sector + data_offset;
+	md_set_clone_rdev((struct md_io_clone *)bio->bi_private, tmp_dev->rdev);
 
 	if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
 		     !bdev_max_discard_sectors(bio->bi_bdev))) {
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 3ce6f9e9d38e..7caa24c4919d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -9218,6 +9218,7 @@ static void md_end_clone_io(struct bio *bio)
 	struct md_io_clone *md_io_clone = bio->bi_private;
 	struct bio *orig_bio = md_io_clone->orig_bio;
 	struct mddev *mddev = md_io_clone->mddev;
+	enum md_submodule_id id = mddev->pers->head.id;
 
 	if (bio_data_dir(orig_bio) == WRITE && md_bitmap_enabled(mddev, false))
 		md_bitmap_end(mddev, md_io_clone);
@@ -9225,6 +9226,10 @@ static void md_end_clone_io(struct bio *bio)
 	if (bio->bi_status && !orig_bio->bi_status)
 		orig_bio->bi_status = bio->bi_status;
 
+	if (bio->bi_status && md_io_clone->rdev &&
+	    (id == ID_LINEAR || id == ID_RAID0))
+		md_error(mddev, md_io_clone->rdev);
+
 	if (md_io_clone->start_time)
 		bio_end_io_acct(orig_bio, md_io_clone->start_time);
 
diff --git a/drivers/md/md.h b/drivers/md/md.h
index ac84289664cd..625f8304de3b 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -872,6 +872,7 @@ struct md_thread {
 
 struct md_io_clone {
 	struct mddev	*mddev;
+	struct md_rdev	*rdev;
 	struct bio	*orig_bio;
 	unsigned long	start_time;
 	sector_t	offset;
@@ -917,6 +918,11 @@ extern void md_finish_reshape(struct mddev *mddev);
 void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
 			struct bio *bio, sector_t start, sector_t size);
 void md_account_bio(struct mddev *mddev, struct bio **bio);
+static inline void md_set_clone_rdev(struct md_io_clone *clone,
+				     struct md_rdev *rdev)
+{
+	clone->rdev = rdev;
+}
 void md_free_cloned_bio(struct bio *bio);
 
 extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
@@ -961,11 +967,6 @@ extern void mddev_destroy_serial_pool(struct mddev *mddev,
 struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
 struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
 
-static inline bool is_rdev_broken(struct md_rdev *rdev)
-{
-	return !disk_live(rdev->bdev->bd_disk);
-}
-
 static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
 {
 	int faulty = test_bit(Faulty, &rdev->flags);
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index ef0045db409f..e933abeb0d70 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -576,15 +576,10 @@ static void raid0_map_submit_bio(struct mddev *mddev, struct bio *bio)
 		return;
 	}
 
-	if (unlikely(is_rdev_broken(tmp_dev))) {
-		bio_io_error(bio);
-		md_error(mddev, tmp_dev);
-		return;
-	}
-
 	bio_set_dev(bio, tmp_dev->bdev);
 	bio->bi_iter.bi_sector = sector + zone->dev_start +
 		tmp_dev->data_offset;
+	md_set_clone_rdev((struct md_io_clone *)bio->bi_private, tmp_dev);
 	mddev_trace_remap(mddev, bio, bio_sector);
 	mddev_check_write_zeroes(mddev, bio);
 	submit_bio_noacct(bio);
-- 
2.51.0

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] md/raid0,linear: move error handling from make_request to IO completion
  2026-03-04 14:29 [PATCH] md/raid0,linear: move error handling from make_request to IO completion Chen Cheng
@ 2026-03-05 17:27 ` Yu Kuai
  0 siblings, 0 replies; 2+ messages in thread
From: Yu Kuai @ 2026-03-05 17:27 UTC (permalink / raw)
  To: Chen Cheng, Song Liu, yukuai; +Cc: Li Nan, linux-raid

Hi,

在 2026/3/4 22:29, Chen Cheng 写道:
> raid0 and linear call md_error() in the submission path after checking
> is_rdev_broken() (i.e. disk_live() == false). This was intended as a
> safety fast-fail for removed disks, but is unnecessary: the block layer
> already handles GD_DEAD disks in __bio_queue_enter() and fails IO
> immediately without hanging. More importantly, it misses the offline
> case where the disk still exists but rejects IO — in that scenario, IO
> fails silently and md_error() is never called.
>
> Remove the submission-path check and instead call md_error() from
> md_end_clone_io() on any IO failure. Store the target rdev in
> md_io_clone so the completion handler knows which device failed. This
> uniformly handles disk removal, offline status, and any other IO error.
>
> Since raid0 and linear have no redundancy, any disk IO failure means
> data is already lost, so marking the array broken on any error is
> correct.

I won't say it's correct, this will make the array more fragile, the
lost data can be insignificant for filesystem and user, and in this
case the array can still be used.

>
> Signed-off-by: Chen Cheng <chencheng@fnnas.com>
> ---
>   drivers/md/md-linear.c |  7 +------
>   drivers/md/md.c        |  5 +++++
>   drivers/md/md.h        | 11 ++++++-----
>   drivers/md/raid0.c     |  7 +------
>   4 files changed, 13 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
> index fdff250d0d51..2ba181a1297d 100644
> --- a/drivers/md/md-linear.c
> +++ b/drivers/md/md-linear.c
> @@ -251,12 +251,6 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
>   		     bio_sector < start_sector))
>   		goto out_of_bounds;
>   
> -	if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
> -		md_error(mddev, tmp_dev->rdev);
> -		bio_io_error(bio);
> -		return true;
> -	}
> -
>   	if (unlikely(bio_end_sector(bio) > end_sector)) {
>   		/* This bio crosses a device boundary, so we have to split it */
>   		bio = bio_submit_split_bioset(bio, end_sector - bio_sector,
> @@ -269,6 +263,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
>   	bio_set_dev(bio, tmp_dev->rdev->bdev);
>   	bio->bi_iter.bi_sector = bio->bi_iter.bi_sector -
>   		start_sector + data_offset;
> +	md_set_clone_rdev((struct md_io_clone *)bio->bi_private, tmp_dev->rdev);
>   
>   	if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
>   		     !bdev_max_discard_sectors(bio->bi_bdev))) {
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 3ce6f9e9d38e..7caa24c4919d 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -9218,6 +9218,7 @@ static void md_end_clone_io(struct bio *bio)
>   	struct md_io_clone *md_io_clone = bio->bi_private;
>   	struct bio *orig_bio = md_io_clone->orig_bio;
>   	struct mddev *mddev = md_io_clone->mddev;
> +	enum md_submodule_id id = mddev->pers->head.id;
>   
>   	if (bio_data_dir(orig_bio) == WRITE && md_bitmap_enabled(mddev, false))
>   		md_bitmap_end(mddev, md_io_clone);
> @@ -9225,6 +9226,10 @@ static void md_end_clone_io(struct bio *bio)
>   	if (bio->bi_status && !orig_bio->bi_status)
>   		orig_bio->bi_status = bio->bi_status;
>   
> +	if (bio->bi_status && md_io_clone->rdev &&
> +	    (id == ID_LINEAR || id == ID_RAID0))
> +		md_error(mddev, md_io_clone->rdev);

I'll suggest to introduce badblocks to linear and raid0 as well, in the case when
metadata update succeed, only call md_error() when there is too much badblocks.

> +
>   	if (md_io_clone->start_time)
>   		bio_end_io_acct(orig_bio, md_io_clone->start_time);
>   
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index ac84289664cd..625f8304de3b 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -872,6 +872,7 @@ struct md_thread {
>   
>   struct md_io_clone {
>   	struct mddev	*mddev;
> +	struct md_rdev	*rdev;
>   	struct bio	*orig_bio;
>   	unsigned long	start_time;
>   	sector_t	offset;
> @@ -917,6 +918,11 @@ extern void md_finish_reshape(struct mddev *mddev);
>   void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
>   			struct bio *bio, sector_t start, sector_t size);
>   void md_account_bio(struct mddev *mddev, struct bio **bio);
> +static inline void md_set_clone_rdev(struct md_io_clone *clone,
> +				     struct md_rdev *rdev)
> +{
> +	clone->rdev = rdev;
> +}
>   void md_free_cloned_bio(struct bio *bio);
>   
>   extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
> @@ -961,11 +967,6 @@ extern void mddev_destroy_serial_pool(struct mddev *mddev,
>   struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
>   struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
>   
> -static inline bool is_rdev_broken(struct md_rdev *rdev)
> -{
> -	return !disk_live(rdev->bdev->bd_disk);
> -}
> -
>   static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
>   {
>   	int faulty = test_bit(Faulty, &rdev->flags);
> diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
> index ef0045db409f..e933abeb0d70 100644
> --- a/drivers/md/raid0.c
> +++ b/drivers/md/raid0.c
> @@ -576,15 +576,10 @@ static void raid0_map_submit_bio(struct mddev *mddev, struct bio *bio)
>   		return;
>   	}
>   
> -	if (unlikely(is_rdev_broken(tmp_dev))) {
> -		bio_io_error(bio);
> -		md_error(mddev, tmp_dev);
> -		return;
> -	}
> -
>   	bio_set_dev(bio, tmp_dev->bdev);
>   	bio->bi_iter.bi_sector = sector + zone->dev_start +
>   		tmp_dev->data_offset;
> +	md_set_clone_rdev((struct md_io_clone *)bio->bi_private, tmp_dev);
>   	mddev_trace_remap(mddev, bio, bio_sector);
>   	mddev_check_write_zeroes(mddev, bio);
>   	submit_bio_noacct(bio);

-- 
Thansk,
Kuai

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-03-05 17:29 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-04 14:29 [PATCH] md/raid0,linear: move error handling from make_request to IO completion Chen Cheng
2026-03-05 17:27 ` Yu Kuai

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox