From: Kenta Akagi <k@mgml.me>
To: Song Liu <song@kernel.org>, Yu Kuai <yukuai3@huawei.com>,
Mariusz Tkaczyk <mtkaczyk@kernel.org>, Shaohua Li <shli@fb.com>,
Guoqing Jiang <jgq516@gmail.com>
Cc: linux-raid@vger.kernel.org, linux-kernel@vger.kernel.org,
Kenta Akagi <k@mgml.me>
Subject: [PATCH v4 1/9] md/raid1,raid10: Set the LastDev flag when the configuration changes
Date: Mon, 15 Sep 2025 12:42:02 +0900 [thread overview]
Message-ID: <20250915034210.8533-2-k@mgml.me> (raw)
In-Reply-To: <20250915034210.8533-1-k@mgml.me>
Currently, the LastDev flag is set on an rdev that failed a failfast
metadata write and called md_error, but did not become Faulty. It is
cleared when the metadata write retry succeeds. This has problems for
the following reasons:
* Despite its name, the flag is only set during a metadata write window.
* Unlike when LastDev and Failfast was introduced, md_error on the last
rdev of a RAID1/10 array now sets MD_BROKEN. Thus when LastDev is set,
the array is already unwritable.
A following commit will prevent failfast bios from breaking the array,
which requires knowing from outside the personality whether an rdev is
the last one. For that purpose, LastDev should be set on rdevs that must
not be lost.
This commit ensures that LastDev is set on the indispensable rdev in a
degraded RAID1/10 array.
Signed-off-by: Kenta Akagi <k@mgml.me>
---
drivers/md/md.c | 4 +---
drivers/md/md.h | 6 +++---
drivers/md/raid1.c | 34 +++++++++++++++++++++++++++++++++-
drivers/md/raid10.c | 34 +++++++++++++++++++++++++++++++++-
4 files changed, 70 insertions(+), 8 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 4e033c26fdd4..268410b66b83 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1007,10 +1007,8 @@ static void super_written(struct bio *bio)
if (!test_bit(Faulty, &rdev->flags)
&& (bio->bi_opf & MD_FAILFAST)) {
set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
- set_bit(LastDev, &rdev->flags);
}
- } else
- clear_bit(LastDev, &rdev->flags);
+ }
bio_put(bio);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 51af29a03079..ec598f9a8381 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -281,9 +281,9 @@ enum flag_bits {
* It is expects that no bad block log
* is present.
*/
- LastDev, /* Seems to be the last working dev as
- * it didn't fail, so don't use FailFast
- * any more for metadata
+ LastDev, /* This is the last working rdev.
+ * so don't use FailFast any more for
+ * metadata.
*/
CollisionCheck, /*
* check if there is collision between raid1
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index bf44878ec640..32ad6b102ff7 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1733,6 +1733,33 @@ static void raid1_status(struct seq_file *seq, struct mddev *mddev)
seq_printf(seq, "]");
}
+/**
+ * update_lastdev - Set or clear LastDev flag for all rdevs in array
+ * @conf: pointer to r1conf
+ *
+ * Sets LastDev if the device is In_sync and cannot be lost for the array.
+ * Otherwise, clear it.
+ *
+ * Caller must hold ->device_lock.
+ */
+static void update_lastdev(struct r1conf *conf)
+{
+ int i;
+ int alive_disks = conf->raid_disks - conf->mddev->degraded;
+
+ for (i = 0; i < conf->raid_disks; i++) {
+ struct md_rdev *rdev = conf->mirrors[i].rdev;
+
+ if (rdev) {
+ if (test_bit(In_sync, &rdev->flags) &&
+ alive_disks == 1)
+ set_bit(LastDev, &rdev->flags);
+ else
+ clear_bit(LastDev, &rdev->flags);
+ }
+ }
+}
+
/**
* raid1_error() - RAID1 error handler.
* @mddev: affected md device.
@@ -1767,8 +1794,10 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
}
}
set_bit(Blocked, &rdev->flags);
- if (test_and_clear_bit(In_sync, &rdev->flags))
+ if (test_and_clear_bit(In_sync, &rdev->flags)) {
mddev->degraded++;
+ update_lastdev(conf);
+ }
set_bit(Faulty, &rdev->flags);
spin_unlock_irqrestore(&conf->device_lock, flags);
/*
@@ -1864,6 +1893,7 @@ static int raid1_spare_active(struct mddev *mddev)
}
}
mddev->degraded -= count;
+ update_lastdev(conf);
spin_unlock_irqrestore(&conf->device_lock, flags);
print_conf(conf);
@@ -3290,6 +3320,7 @@ static int raid1_run(struct mddev *mddev)
rcu_assign_pointer(conf->thread, NULL);
mddev->private = conf;
set_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
+ update_lastdev(conf);
md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
@@ -3427,6 +3458,7 @@ static int raid1_reshape(struct mddev *mddev)
spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded += (raid_disks - conf->raid_disks);
+ update_lastdev(conf);
spin_unlock_irqrestore(&conf->device_lock, flags);
conf->raid_disks = mddev->raid_disks = raid_disks;
mddev->delta_disks = 0;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index b60c30bfb6c7..dc4edd4689f8 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1983,6 +1983,33 @@ static int enough(struct r10conf *conf, int ignore)
_enough(conf, 1, ignore);
}
+/**
+ * update_lastdev - Set or clear LastDev flag for all rdevs in array
+ * @conf: pointer to r10conf
+ *
+ * Sets LastDev if the device is In_sync and cannot be lost for the array.
+ * Otherwise, clear it.
+ *
+ * Caller must hold ->reconfig_mutex or ->device_lock.
+ */
+static void update_lastdev(struct r10conf *conf)
+{
+ int i;
+ int raid_disks = max(conf->geo.raid_disks, conf->prev.raid_disks);
+
+ for (i = 0; i < raid_disks; i++) {
+ struct md_rdev *rdev = conf->mirrors[i].rdev;
+
+ if (rdev) {
+ if (test_bit(In_sync, &rdev->flags) &&
+ !enough(conf, i))
+ set_bit(LastDev, &rdev->flags);
+ else
+ clear_bit(LastDev, &rdev->flags);
+ }
+ }
+}
+
/**
* raid10_error() - RAID10 error handler.
* @mddev: affected md device.
@@ -2013,8 +2040,10 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
return;
}
}
- if (test_and_clear_bit(In_sync, &rdev->flags))
+ if (test_and_clear_bit(In_sync, &rdev->flags)) {
mddev->degraded++;
+ update_lastdev(conf);
+ }
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
set_bit(Blocked, &rdev->flags);
@@ -2102,6 +2131,7 @@ static int raid10_spare_active(struct mddev *mddev)
}
spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded -= count;
+ update_lastdev(conf);
spin_unlock_irqrestore(&conf->device_lock, flags);
print_conf(conf);
@@ -4159,6 +4189,7 @@ static int raid10_run(struct mddev *mddev)
md_set_array_sectors(mddev, size);
mddev->resync_max_sectors = size;
set_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
+ update_lastdev(conf);
if (md_integrity_register(mddev))
goto out_free_conf;
@@ -4567,6 +4598,7 @@ static int raid10_start_reshape(struct mddev *mddev)
*/
spin_lock_irq(&conf->device_lock);
mddev->degraded = calc_degraded(conf);
+ update_lastdev(conf);
spin_unlock_irq(&conf->device_lock);
mddev->raid_disks = conf->geo.raid_disks;
mddev->reshape_position = conf->reshape_progress;
--
2.50.1
next prev parent reply other threads:[~2025-09-15 3:43 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-15 3:42 [PATCH v4 0/9] Don't set MD_BROKEN on failfast bio failure Kenta Akagi
2025-09-15 3:42 ` Kenta Akagi [this message]
2025-09-18 1:00 ` [PATCH v4 1/9] md/raid1,raid10: Set the LastDev flag when the configuration changes Yu Kuai
2025-09-18 14:02 ` Kenta Akagi
2025-09-21 7:54 ` Xiao Ni
2025-09-21 14:48 ` Kenta Akagi
2025-09-15 3:42 ` [PATCH v4 2/9] md: serialize md_error() Kenta Akagi
2025-09-18 1:04 ` Yu Kuai
2025-09-21 6:11 ` Kenta Akagi
2025-09-15 3:42 ` [PATCH v4 3/9] md: introduce md_bio_failure_error() Kenta Akagi
2025-09-18 1:09 ` Yu Kuai
2025-09-18 14:56 ` Kenta Akagi
2025-09-15 3:42 ` [PATCH v4 4/9] md/raid1,raid10: Don't set MD_BROKEN on failfast bio failure Kenta Akagi
2025-09-18 1:26 ` Yu Kuai
2025-09-18 15:22 ` Kenta Akagi
2025-09-19 1:36 ` Yu Kuai
2025-09-20 6:30 ` Kenta Akagi
2025-09-20 9:51 ` Yu Kuai
2025-09-23 15:54 ` Kenta Akagi
2025-09-15 3:42 ` [PATCH v4 5/9] md/raid1,raid10: Set R{1,10}BIO_Uptodate when successful retry of a failed bio Kenta Akagi
2025-09-17 9:24 ` Li Nan
2025-09-17 13:20 ` Kenta Akagi
2025-09-18 6:39 ` Li Nan
2025-09-18 15:36 ` Kenta Akagi
2025-09-19 1:37 ` Li Nan
2025-09-15 3:42 ` [PATCH v4 6/9] md/raid1,raid10: Fix missing retries Failfast write bios on no-bbl rdevs Kenta Akagi
2025-09-17 10:06 ` Li Nan
2025-09-17 13:33 ` Kenta Akagi
2025-09-18 6:58 ` Li Nan
2025-09-18 16:23 ` Kenta Akagi
2025-09-19 1:28 ` Li Nan
2025-09-15 3:42 ` [PATCH v4 7/9] md/raid10: fix failfast read error not rescheduled Kenta Akagi
2025-09-18 7:38 ` Li Nan
2025-09-18 16:12 ` Kenta Akagi
2025-09-19 1:20 ` Li Nan
2025-09-15 3:42 ` [PATCH v4 8/9] md/raid1,raid10: Add error message when setting MD_BROKEN Kenta Akagi
2025-09-15 3:42 ` [PATCH v4 9/9] md/raid1,raid10: Fix: Operation continuing on 0 devices Kenta Akagi
2025-09-15 7:19 ` Paul Menzel
2025-09-15 8:19 ` Kenta Akagi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250915034210.8533-2-k@mgml.me \
--to=k@mgml.me \
--cc=jgq516@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-raid@vger.kernel.org \
--cc=mtkaczyk@kernel.org \
--cc=shli@fb.com \
--cc=song@kernel.org \
--cc=yukuai3@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox