From: "Yu Kuai" <yukuai@fnnas.com>
To: "Chen Cheng" <chencheng@fnnas.com>, "Song Liu" <song@kernel.org>,
<yukuai@fnnas.com>
Cc: "Li Nan" <linan122@huawei.com>, <linux-raid@vger.kernel.org>
Subject: Re: [PATCH] md/raid0,linear: move error handling from make_request to IO completion
Date: Fri, 6 Mar 2026 01:27:50 +0800 [thread overview]
Message-ID: <11ee7aea-e9cf-47f3-8297-09ec607ec2ca@fnnas.com> (raw)
In-Reply-To: <20260304142907.3791-1-chencheng@fnnas.com>
Hi,
在 2026/3/4 22:29, Chen Cheng 写道:
> raid0 and linear call md_error() in the submission path after checking
> is_rdev_broken() (i.e. disk_live() == false). This was intended as a
> safety fast-fail for removed disks, but is unnecessary: the block layer
> already handles GD_DEAD disks in __bio_queue_enter() and fails IO
> immediately without hanging. More importantly, it misses the offline
> case where the disk still exists but rejects IO — in that scenario, IO
> fails silently and md_error() is never called.
>
> Remove the submission-path check and instead call md_error() from
> md_end_clone_io() on any IO failure. Store the target rdev in
> md_io_clone so the completion handler knows which device failed. This
> uniformly handles disk removal, offline status, and any other IO error.
>
> Since raid0 and linear have no redundancy, any disk IO failure means
> data is already lost, so marking the array broken on any error is
> correct.
I won't say it's correct, this will make the array more fragile, the
lost data can be insignificant for filesystem and user, and in this
case the array can still be used.
>
> Signed-off-by: Chen Cheng <chencheng@fnnas.com>
> ---
> drivers/md/md-linear.c | 7 +------
> drivers/md/md.c | 5 +++++
> drivers/md/md.h | 11 ++++++-----
> drivers/md/raid0.c | 7 +------
> 4 files changed, 13 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
> index fdff250d0d51..2ba181a1297d 100644
> --- a/drivers/md/md-linear.c
> +++ b/drivers/md/md-linear.c
> @@ -251,12 +251,6 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
> bio_sector < start_sector))
> goto out_of_bounds;
>
> - if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
> - md_error(mddev, tmp_dev->rdev);
> - bio_io_error(bio);
> - return true;
> - }
> -
> if (unlikely(bio_end_sector(bio) > end_sector)) {
> /* This bio crosses a device boundary, so we have to split it */
> bio = bio_submit_split_bioset(bio, end_sector - bio_sector,
> @@ -269,6 +263,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
> bio_set_dev(bio, tmp_dev->rdev->bdev);
> bio->bi_iter.bi_sector = bio->bi_iter.bi_sector -
> start_sector + data_offset;
> + md_set_clone_rdev((struct md_io_clone *)bio->bi_private, tmp_dev->rdev);
>
> if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
> !bdev_max_discard_sectors(bio->bi_bdev))) {
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 3ce6f9e9d38e..7caa24c4919d 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -9218,6 +9218,7 @@ static void md_end_clone_io(struct bio *bio)
> struct md_io_clone *md_io_clone = bio->bi_private;
> struct bio *orig_bio = md_io_clone->orig_bio;
> struct mddev *mddev = md_io_clone->mddev;
> + enum md_submodule_id id = mddev->pers->head.id;
>
> if (bio_data_dir(orig_bio) == WRITE && md_bitmap_enabled(mddev, false))
> md_bitmap_end(mddev, md_io_clone);
> @@ -9225,6 +9226,10 @@ static void md_end_clone_io(struct bio *bio)
> if (bio->bi_status && !orig_bio->bi_status)
> orig_bio->bi_status = bio->bi_status;
>
> + if (bio->bi_status && md_io_clone->rdev &&
> + (id == ID_LINEAR || id == ID_RAID0))
> + md_error(mddev, md_io_clone->rdev);
I'll suggest to introduce badblocks to linear and raid0 as well, in the case when
metadata update succeed, only call md_error() when there is too much badblocks.
> +
> if (md_io_clone->start_time)
> bio_end_io_acct(orig_bio, md_io_clone->start_time);
>
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index ac84289664cd..625f8304de3b 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -872,6 +872,7 @@ struct md_thread {
>
> struct md_io_clone {
> struct mddev *mddev;
> + struct md_rdev *rdev;
> struct bio *orig_bio;
> unsigned long start_time;
> sector_t offset;
> @@ -917,6 +918,11 @@ extern void md_finish_reshape(struct mddev *mddev);
> void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
> struct bio *bio, sector_t start, sector_t size);
> void md_account_bio(struct mddev *mddev, struct bio **bio);
> +static inline void md_set_clone_rdev(struct md_io_clone *clone,
> + struct md_rdev *rdev)
> +{
> + clone->rdev = rdev;
> +}
> void md_free_cloned_bio(struct bio *bio);
>
> extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
> @@ -961,11 +967,6 @@ extern void mddev_destroy_serial_pool(struct mddev *mddev,
> struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
> struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
>
> -static inline bool is_rdev_broken(struct md_rdev *rdev)
> -{
> - return !disk_live(rdev->bdev->bd_disk);
> -}
> -
> static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
> {
> int faulty = test_bit(Faulty, &rdev->flags);
> diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
> index ef0045db409f..e933abeb0d70 100644
> --- a/drivers/md/raid0.c
> +++ b/drivers/md/raid0.c
> @@ -576,15 +576,10 @@ static void raid0_map_submit_bio(struct mddev *mddev, struct bio *bio)
> return;
> }
>
> - if (unlikely(is_rdev_broken(tmp_dev))) {
> - bio_io_error(bio);
> - md_error(mddev, tmp_dev);
> - return;
> - }
> -
> bio_set_dev(bio, tmp_dev->bdev);
> bio->bi_iter.bi_sector = sector + zone->dev_start +
> tmp_dev->data_offset;
> + md_set_clone_rdev((struct md_io_clone *)bio->bi_private, tmp_dev);
> mddev_trace_remap(mddev, bio, bio_sector);
> mddev_check_write_zeroes(mddev, bio);
> submit_bio_noacct(bio);
--
Thansk,
Kuai
prev parent reply other threads:[~2026-03-05 17:29 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-04 14:29 [PATCH] md/raid0,linear: move error handling from make_request to IO completion Chen Cheng
2026-03-05 17:27 ` Yu Kuai [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=11ee7aea-e9cf-47f3-8297-09ec607ec2ca@fnnas.com \
--to=yukuai@fnnas.com \
--cc=chencheng@fnnas.com \
--cc=linan122@huawei.com \
--cc=linux-raid@vger.kernel.org \
--cc=song@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox