Linux RAID subsystem development
 help / color / mirror / Atom feed
From: "Chen Cheng" <chencheng@fnnas.com>
To: "Yu Kuai" <yukuai@fnnas.com>, <linux-raid@vger.kernel.org>
Cc: "Chen Cheng" <chencheng@fnnas.com>, <linux-kernel@vger.kernel.org>
Subject: [PATCH] md/linear,raid0: introduce badblocks handling
Date: Fri, 15 May 2026 20:00:12 +0800	[thread overview]
Message-ID: <20260515120012.3699839-1-chencheng@fnnas.com> (raw)

From: Chen Cheng <chencheng@fnnas.com>

md/linear and raid0 do not currently consult rdev badblocks, so I/O
can still be submitted to ranges that are already known to be bad.

The existing submit-path disk_live() fast-fail only covers removed
devices. It does not help when a member device is still present but a
mapped read fails, and immediately calling md_error() for every I/O
failure would make these arrays unnecessarily fragile.

Add badblocks handling for both raid-0 and md-linear personalities.

Before submitting a mapped bio, check the target rdev badblocks. If the
bio starts on a known bad range, fail it immediately. If it crosses into
a bad range, split it so that only the leading good sectors are submitted.

Also remember the mapped target rdev and sector range in md_io_clone, so
md_end_clone_io() can record badblocks for linear/raid0 failures.
This allows later I/O to fail fast on known bad sectors while avoiding
escalation to md_error() on every read failure. If badblocks cannot be
recorded, rdev_set_badblocks() will still trigger md_error().

Signed-off-by: Chen Cheng <chencheng@fnnas.com>
---
 drivers/md/md-linear.c | 33 +++++++++++++++++++++++++++------
 drivers/md/md.c        | 16 ++++++++++++++++
 drivers/md/md.h        | 11 +++++++++--
 drivers/md/raid0.c     | 32 ++++++++++++++++++++++++++------
 4 files changed, 78 insertions(+), 14 deletions(-)

diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index fdff250d0d51..c6695658b698 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -237,6 +237,12 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
 	struct dev_info *tmp_dev;
 	sector_t start_sector, end_sector, data_offset;
 	sector_t bio_sector = bio->bi_iter.bi_sector;
+	sector_t first_bad, bad_sectors, good_sectors;
+	sector_t target_start_sector, bio_start_sector;
+	struct md_io_clone *md_io_clone;
+	unsigned int target_nr_sectors;
+	enum req_op op = bio_op(bio);
+	bool is_rw = (op == REQ_OP_READ || op == REQ_OP_WRITE);
 
 	if (unlikely(bio->bi_opf & REQ_PREFLUSH)
 	    && md_flush_request(mddev, bio))
@@ -251,12 +257,6 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
 		     bio_sector < start_sector))
 		goto out_of_bounds;
 
-	if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
-		md_error(mddev, tmp_dev->rdev);
-		bio_io_error(bio);
-		return true;
-	}
-
 	if (unlikely(bio_end_sector(bio) > end_sector)) {
 		/* This bio crosses a device boundary, so we have to split it */
 		bio = bio_submit_split_bioset(bio, end_sector - bio_sector,
@@ -265,10 +265,31 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
 			return true;
 	}
 
+	bio_start_sector = bio->bi_iter.bi_sector - start_sector;
+
+	if (is_rw && is_badblock(tmp_dev->rdev, bio_start_sector,
+				 bio_sectors(bio), &first_bad, &bad_sectors)) {
+		if (first_bad == bio_start_sector) {
+			bio_io_error(bio);
+			return true;
+		}
+
+		good_sectors = first_bad - bio_start_sector;
+		bio = bio_submit_split_bioset(bio, good_sectors, &mddev->bio_set);
+		if (!bio)
+			return true;
+	}
+
+	target_start_sector = bio->bi_iter.bi_sector - start_sector;
+	target_nr_sectors = bio_sectors(bio);
+
 	md_account_bio(mddev, &bio);
 	bio_set_dev(bio, tmp_dev->rdev->bdev);
 	bio->bi_iter.bi_sector = bio->bi_iter.bi_sector -
 		start_sector + data_offset;
+	md_io_clone = bio->bi_private;
+	md_set_clone_target(md_io_clone, tmp_dev->rdev,
+			    target_start_sector, target_nr_sectors);
 
 	if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
 		     !bdev_max_discard_sectors(bio->bi_bdev))) {
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 3ce6f9e9d38e..995a8fa5f6a3 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -9218,6 +9218,13 @@ static void md_end_clone_io(struct bio *bio)
 	struct md_io_clone *md_io_clone = bio->bi_private;
 	struct bio *orig_bio = md_io_clone->orig_bio;
 	struct mddev *mddev = md_io_clone->mddev;
+	struct md_rdev *target_rdev = md_io_clone->target_rdev;
+	sector_t target_start_sector = md_io_clone->target_start_sector;
+	unsigned int target_nr_sectors = md_io_clone->target_nr_sectors;
+	enum md_submodule_id id = mddev->pers->head.id;
+	bool is_raid0_or_linear = (id == ID_LINEAR || id == ID_RAID0);
+	enum req_op op = bio_op(orig_bio);
+	bool is_rw = (op == REQ_OP_READ || op == REQ_OP_WRITE);
 
 	if (bio_data_dir(orig_bio) == WRITE && md_bitmap_enabled(mddev, false))
 		md_bitmap_end(mddev, md_io_clone);
@@ -9225,6 +9232,12 @@ static void md_end_clone_io(struct bio *bio)
 	if (bio->bi_status && !orig_bio->bi_status)
 		orig_bio->bi_status = bio->bi_status;
 
+	if (bio->bi_status && target_rdev && target_nr_sectors &&
+	    is_raid0_or_linear && is_rw) {
+		rdev_set_badblocks(target_rdev, target_start_sector,
+				   target_nr_sectors, 0);
+	}
+
 	if (md_io_clone->start_time)
 		bio_end_io_acct(orig_bio, md_io_clone->start_time);
 
@@ -9243,6 +9256,9 @@ static void md_clone_bio(struct mddev *mddev, struct bio **bio)
 	md_io_clone = container_of(clone, struct md_io_clone, bio_clone);
 	md_io_clone->orig_bio = *bio;
 	md_io_clone->mddev = mddev;
+	md_io_clone->target_rdev = NULL;
+	md_io_clone->target_start_sector = 0;
+	md_io_clone->target_nr_sectors = 0;
 	if (blk_queue_io_stat(bdev->bd_disk->queue))
 		md_io_clone->start_time = bio_start_io_acct(*bio);
 
diff --git a/drivers/md/md.h b/drivers/md/md.h
index ac84289664cd..3122c66ef379 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -872,6 +872,9 @@ struct md_thread {
 
 struct md_io_clone {
 	struct mddev	*mddev;
+	struct md_rdev	*target_rdev;
+	sector_t	target_start_sector;
+	unsigned int	target_nr_sectors;
 	struct bio	*orig_bio;
 	unsigned long	start_time;
 	sector_t	offset;
@@ -961,9 +964,13 @@ extern void mddev_destroy_serial_pool(struct mddev *mddev,
 struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
 struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
 
-static inline bool is_rdev_broken(struct md_rdev *rdev)
+static inline void
+md_set_clone_target(struct md_io_clone *clone, struct md_rdev *rdev,
+		    sector_t start_sector, unsigned int nr_sectors)
 {
-	return !disk_live(rdev->bdev->bd_disk);
+	clone->target_rdev = rdev;
+	clone->target_start_sector = start_sector;
+	clone->target_nr_sectors = nr_sectors;
 }
 
 static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index ef0045db409f..b95a16139fcd 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -559,8 +559,12 @@ static void raid0_map_submit_bio(struct mddev *mddev, struct bio *bio)
 	struct md_rdev *tmp_dev;
 	sector_t bio_sector = bio->bi_iter.bi_sector;
 	sector_t sector = bio_sector;
-
-	md_account_bio(mddev, &bio);
+	sector_t bio_start_sector, target_start_sector;
+	sector_t first_bad, bad_sectors, good_sectors;
+	unsigned int target_nr_sectors;
+	struct md_io_clone *md_io_clone;
+	enum req_op op = bio_op(bio);
+	bool is_rw = (op == REQ_OP_READ || op == REQ_OP_WRITE);
 
 	zone = find_zone(mddev->private, &sector);
 	switch (conf->layout) {
@@ -576,13 +580,29 @@ static void raid0_map_submit_bio(struct mddev *mddev, struct bio *bio)
 		return;
 	}
 
-	if (unlikely(is_rdev_broken(tmp_dev))) {
-		bio_io_error(bio);
-		md_error(mddev, tmp_dev);
-		return;
+	bio_start_sector = sector + zone->dev_start;
+
+	if (is_rw && is_badblock(tmp_dev, bio_start_sector, bio_sectors(bio),
+				 &first_bad, &bad_sectors)) {
+		if (first_bad == bio_start_sector) {
+			bio_io_error(bio);
+			return;
+		}
+
+		good_sectors = first_bad - bio_start_sector;
+		bio = bio_submit_split_bioset(bio, good_sectors, &mddev->bio_set);
+		if (!bio)
+			return;
 	}
 
+	target_start_sector = sector + zone->dev_start;
+	target_nr_sectors = bio_sectors(bio);
+
+	md_account_bio(mddev, &bio);
 	bio_set_dev(bio, tmp_dev->bdev);
+	md_io_clone = bio->bi_private;
+	md_set_clone_target(md_io_clone, tmp_dev, target_start_sector,
+			    target_nr_sectors);
 	bio->bi_iter.bi_sector = sector + zone->dev_start +
 		tmp_dev->data_offset;
 	mddev_trace_remap(mddev, bio, bio_sector);
-- 
2.54.0

                 reply	other threads:[~2026-05-15 12:00 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260515120012.3699839-1-chencheng@fnnas.com \
    --to=chencheng@fnnas.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-raid@vger.kernel.org \
    --cc=yukuai@fnnas.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox