public inbox for linux-raid@vger.kernel.org
 help / color / mirror / Atom feed
From: "Yu Kuai" <yukuai@fnnas.com>
To: "Chen Cheng" <chencheng@fnnas.com>, "Song Liu" <song@kernel.org>,
	 <yukuai@fnnas.com>
Cc: "Li Nan" <linan122@huawei.com>, <linux-raid@vger.kernel.org>
Subject: Re: [PATCH] md/raid0,linear: move error handling from make_request to IO completion
Date: Fri, 6 Mar 2026 01:27:50 +0800	[thread overview]
Message-ID: <11ee7aea-e9cf-47f3-8297-09ec607ec2ca@fnnas.com> (raw)
In-Reply-To: <20260304142907.3791-1-chencheng@fnnas.com>

Hi,

在 2026/3/4 22:29, Chen Cheng 写道:
> raid0 and linear call md_error() in the submission path after checking
> is_rdev_broken() (i.e. disk_live() == false). This was intended as a
> safety fast-fail for removed disks, but is unnecessary: the block layer
> already handles GD_DEAD disks in __bio_queue_enter() and fails IO
> immediately without hanging. More importantly, it misses the offline
> case where the disk still exists but rejects IO — in that scenario, IO
> fails silently and md_error() is never called.
>
> Remove the submission-path check and instead call md_error() from
> md_end_clone_io() on any IO failure. Store the target rdev in
> md_io_clone so the completion handler knows which device failed. This
> uniformly handles disk removal, offline status, and any other IO error.
>
> Since raid0 and linear have no redundancy, any disk IO failure means
> data is already lost, so marking the array broken on any error is
> correct.

I won't say it's correct, this will make the array more fragile, the
lost data can be insignificant for filesystem and user, and in this
case the array can still be used.

>
> Signed-off-by: Chen Cheng <chencheng@fnnas.com>
> ---
>   drivers/md/md-linear.c |  7 +------
>   drivers/md/md.c        |  5 +++++
>   drivers/md/md.h        | 11 ++++++-----
>   drivers/md/raid0.c     |  7 +------
>   4 files changed, 13 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
> index fdff250d0d51..2ba181a1297d 100644
> --- a/drivers/md/md-linear.c
> +++ b/drivers/md/md-linear.c
> @@ -251,12 +251,6 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
>   		     bio_sector < start_sector))
>   		goto out_of_bounds;
>   
> -	if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
> -		md_error(mddev, tmp_dev->rdev);
> -		bio_io_error(bio);
> -		return true;
> -	}
> -
>   	if (unlikely(bio_end_sector(bio) > end_sector)) {
>   		/* This bio crosses a device boundary, so we have to split it */
>   		bio = bio_submit_split_bioset(bio, end_sector - bio_sector,
> @@ -269,6 +263,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
>   	bio_set_dev(bio, tmp_dev->rdev->bdev);
>   	bio->bi_iter.bi_sector = bio->bi_iter.bi_sector -
>   		start_sector + data_offset;
> +	md_set_clone_rdev((struct md_io_clone *)bio->bi_private, tmp_dev->rdev);
>   
>   	if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
>   		     !bdev_max_discard_sectors(bio->bi_bdev))) {
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 3ce6f9e9d38e..7caa24c4919d 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -9218,6 +9218,7 @@ static void md_end_clone_io(struct bio *bio)
>   	struct md_io_clone *md_io_clone = bio->bi_private;
>   	struct bio *orig_bio = md_io_clone->orig_bio;
>   	struct mddev *mddev = md_io_clone->mddev;
> +	enum md_submodule_id id = mddev->pers->head.id;
>   
>   	if (bio_data_dir(orig_bio) == WRITE && md_bitmap_enabled(mddev, false))
>   		md_bitmap_end(mddev, md_io_clone);
> @@ -9225,6 +9226,10 @@ static void md_end_clone_io(struct bio *bio)
>   	if (bio->bi_status && !orig_bio->bi_status)
>   		orig_bio->bi_status = bio->bi_status;
>   
> +	if (bio->bi_status && md_io_clone->rdev &&
> +	    (id == ID_LINEAR || id == ID_RAID0))
> +		md_error(mddev, md_io_clone->rdev);

I'll suggest to introduce badblocks to linear and raid0 as well, in the case when
metadata update succeed, only call md_error() when there is too much badblocks.

> +
>   	if (md_io_clone->start_time)
>   		bio_end_io_acct(orig_bio, md_io_clone->start_time);
>   
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index ac84289664cd..625f8304de3b 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -872,6 +872,7 @@ struct md_thread {
>   
>   struct md_io_clone {
>   	struct mddev	*mddev;
> +	struct md_rdev	*rdev;
>   	struct bio	*orig_bio;
>   	unsigned long	start_time;
>   	sector_t	offset;
> @@ -917,6 +918,11 @@ extern void md_finish_reshape(struct mddev *mddev);
>   void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
>   			struct bio *bio, sector_t start, sector_t size);
>   void md_account_bio(struct mddev *mddev, struct bio **bio);
> +static inline void md_set_clone_rdev(struct md_io_clone *clone,
> +				     struct md_rdev *rdev)
> +{
> +	clone->rdev = rdev;
> +}
>   void md_free_cloned_bio(struct bio *bio);
>   
>   extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
> @@ -961,11 +967,6 @@ extern void mddev_destroy_serial_pool(struct mddev *mddev,
>   struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
>   struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
>   
> -static inline bool is_rdev_broken(struct md_rdev *rdev)
> -{
> -	return !disk_live(rdev->bdev->bd_disk);
> -}
> -
>   static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
>   {
>   	int faulty = test_bit(Faulty, &rdev->flags);
> diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
> index ef0045db409f..e933abeb0d70 100644
> --- a/drivers/md/raid0.c
> +++ b/drivers/md/raid0.c
> @@ -576,15 +576,10 @@ static void raid0_map_submit_bio(struct mddev *mddev, struct bio *bio)
>   		return;
>   	}
>   
> -	if (unlikely(is_rdev_broken(tmp_dev))) {
> -		bio_io_error(bio);
> -		md_error(mddev, tmp_dev);
> -		return;
> -	}
> -
>   	bio_set_dev(bio, tmp_dev->bdev);
>   	bio->bi_iter.bi_sector = sector + zone->dev_start +
>   		tmp_dev->data_offset;
> +	md_set_clone_rdev((struct md_io_clone *)bio->bi_private, tmp_dev);
>   	mddev_trace_remap(mddev, bio, bio_sector);
>   	mddev_check_write_zeroes(mddev, bio);
>   	submit_bio_noacct(bio);

-- 
Thansk,
Kuai

      reply	other threads:[~2026-03-05 17:29 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-04 14:29 [PATCH] md/raid0,linear: move error handling from make_request to IO completion Chen Cheng
2026-03-05 17:27 ` Yu Kuai [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=11ee7aea-e9cf-47f3-8297-09ec607ec2ca@fnnas.com \
    --to=yukuai@fnnas.com \
    --cc=chencheng@fnnas.com \
    --cc=linan122@huawei.com \
    --cc=linux-raid@vger.kernel.org \
    --cc=song@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox