From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Alex Lyakas <alex.lyakas@zadara.com>, Song Liu <song@kernel.org>,
Sasha Levin <sashal@kernel.org>,
linux-raid@vger.kernel.org
Subject: [PATCH AUTOSEL 5.10 34/44] md: Whenassemble the array, consult the superblock of the freshest device
Date: Tue, 16 Jan 2024 15:00:03 -0500 [thread overview]
Message-ID: <20240116200044.258335-34-sashal@kernel.org> (raw)
In-Reply-To: <20240116200044.258335-1-sashal@kernel.org>
From: Alex Lyakas <alex.lyakas@zadara.com>
[ Upstream commit dc1cc22ed58f11d58d8553c5ec5f11cbfc3e3039 ]
Upon assembling the array, both kernel and mdadm allow the devices to have event
counter difference of 1, and still consider them as up-to-date.
However, a device whose event count is behind by 1, may in fact not be up-to-date,
and array resync with such a device may cause data corruption.
To avoid this, consult the superblock of the freshest device about the status
of a device, whose event counter is behind by 1.
Signed-off-by: Alex Lyakas <alex.lyakas@zadara.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/1702470271-16073-1-git-send-email-alex.lyakas@zadara.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
drivers/md/md.c | 54 ++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 44 insertions(+), 10 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 6efe49f7bdf5..03d2e31dda2f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1179,6 +1179,7 @@ struct super_type {
struct md_rdev *refdev,
int minor_version);
int (*validate_super)(struct mddev *mddev,
+ struct md_rdev *freshest,
struct md_rdev *rdev);
void (*sync_super)(struct mddev *mddev,
struct md_rdev *rdev);
@@ -1317,8 +1318,9 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
/*
* validate_super for 0.90.0
+ * note: we are not using "freshest" for 0.9 superblock
*/
-static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
+static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev)
{
mdp_disk_t *desc;
mdp_super_t *sb = page_address(rdev->sb_page);
@@ -1833,7 +1835,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
return ret;
}
-static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
+static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev)
{
struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
__u64 ev1 = le64_to_cpu(sb->events);
@@ -1929,13 +1931,15 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
}
} else if (mddev->pers == NULL) {
/* Insist of good event counter while assembling, except for
- * spares (which don't need an event count) */
- ++ev1;
+ * spares (which don't need an event count).
+ * Similar to mdadm, we allow event counter difference of 1
+ * from the freshest device.
+ */
if (rdev->desc_nr >= 0 &&
rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
(le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
- if (ev1 < mddev->events)
+ if (ev1 + 1 < mddev->events)
return -EINVAL;
} else if (mddev->bitmap) {
/* If adding to array with a bitmap, then we can accept an
@@ -1956,8 +1960,38 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
role = MD_DISK_ROLE_SPARE;
rdev->desc_nr = -1;
- } else
+ } else if (mddev->pers == NULL && freshest && ev1 < mddev->events) {
+ /*
+ * If we are assembling, and our event counter is smaller than the
+ * highest event counter, we cannot trust our superblock about the role.
+ * It could happen that our rdev was marked as Faulty, and all other
+ * superblocks were updated with +1 event counter.
+ * Then, before the next superblock update, which typically happens when
+ * remove_and_add_spares() removes the device from the array, there was
+ * a crash or reboot.
+ * If we allow current rdev without consulting the freshest superblock,
+ * we could cause data corruption.
+ * Note that in this case our event counter is smaller by 1 than the
+ * highest, otherwise, this rdev would not be allowed into array;
+ * both kernel and mdadm allow event counter difference of 1.
+ */
+ struct mdp_superblock_1 *freshest_sb = page_address(freshest->sb_page);
+ u32 freshest_max_dev = le32_to_cpu(freshest_sb->max_dev);
+
+ if (rdev->desc_nr >= freshest_max_dev) {
+ /* this is unexpected, better not proceed */
+ pr_warn("md: %s: rdev[%pg]: desc_nr(%d) >= freshest(%pg)->sb->max_dev(%u)\n",
+ mdname(mddev), rdev->bdev, rdev->desc_nr,
+ freshest->bdev, freshest_max_dev);
+ return -EUCLEAN;
+ }
+
+ role = le16_to_cpu(freshest_sb->dev_roles[rdev->desc_nr]);
+ pr_debug("md: %s: rdev[%pg]: role=%d(0x%x) according to freshest %pg\n",
+ mdname(mddev), rdev->bdev, role, role, freshest->bdev);
+ } else {
role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
+ }
switch(role) {
case MD_DISK_ROLE_SPARE: /* spare */
break;
@@ -2896,7 +2930,7 @@ static int add_bound_rdev(struct md_rdev *rdev)
* and should be added immediately.
*/
super_types[mddev->major_version].
- validate_super(mddev, rdev);
+ validate_super(mddev, NULL/*freshest*/, rdev);
if (add_journal)
mddev_suspend(mddev);
err = mddev->pers->hot_add_disk(mddev, rdev);
@@ -3814,7 +3848,7 @@ static int analyze_sbs(struct mddev *mddev)
}
super_types[mddev->major_version].
- validate_super(mddev, freshest);
+ validate_super(mddev, NULL/*freshest*/, freshest);
i = 0;
rdev_for_each_safe(rdev, tmp, mddev) {
@@ -3829,7 +3863,7 @@ static int analyze_sbs(struct mddev *mddev)
}
if (rdev != freshest) {
if (super_types[mddev->major_version].
- validate_super(mddev, rdev)) {
+ validate_super(mddev, freshest, rdev)) {
pr_warn("md: kicking non-fresh %s from array!\n",
bdevname(rdev->bdev,b));
md_kick_rdev_from_array(rdev);
@@ -6817,7 +6851,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
rdev->saved_raid_disk = rdev->raid_disk;
} else
super_types[mddev->major_version].
- validate_super(mddev, rdev);
+ validate_super(mddev, NULL/*freshest*/, rdev);
if ((info->state & (1<<MD_DISK_SYNC)) &&
rdev->raid_disk != info->raid_disk) {
/* This was a hot-add request, but events doesn't
--
2.43.0
next prev parent reply other threads:[~2024-01-16 20:02 UTC|newest]
Thread overview: 58+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-16 19:59 [PATCH AUTOSEL 5.10 01/44] wifi: rt2x00: restart beacon queue when hardware reset Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 02/44] selftests/bpf: satisfy compiler by having explicit return in btf test Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 03/44] selftests/bpf: Fix pyperf180 compilation failure with clang18 Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 04/44] scsi: lpfc: Fix possible file string name overflow when updating firmware Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 05/44] PCI: Add no PM reset quirk for NVIDIA Spectrum devices Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 06/44] bonding: return -ENOMEM instead of BUG in alb_upper_dev_walk Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 07/44] wifi: ieee80211: fix PV1 frame control field name Sasha Levin
2024-01-16 21:31 ` Johannes Berg
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 08/44] scsi: arcmsr: Support new PCI device IDs 1883 and 1886 Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 09/44] ARM: dts: imx7d: Fix coresight funnel ports Sasha Levin
2024-01-16 19:59 ` Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 10/44] ARM: dts: imx7s: Fix lcdif compatible Sasha Levin
2024-01-16 19:59 ` Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 11/44] ARM: dts: imx7s: Fix nand-controller #size-cells Sasha Levin
2024-01-16 19:59 ` Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 12/44] wifi: ath9k: Fix potential array-index-out-of-bounds read in ath9k_htc_txstatus() Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 13/44] bpf: Add map and need_defer parameters to .map_fd_put_ptr() Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 14/44] bpf: Set need_defer as false when clearing fd array during map free Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 15/44] scsi: libfc: Don't schedule abort twice Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 16/44] scsi: libfc: Fix up timeout error in fc_fcp_rec_error() Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 17/44] net: mvmdio: Avoid excessive sleeps in polled mode Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 18/44] bpf: Guard stack limits against 32bit overflow Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 19/44] bpf: Set uattr->batch.count as zero before batched update or deletion Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 20/44] ARM: dts: rockchip: fix rk3036 hdmi ports node Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 21/44] ARM: dts: imx25/27-eukrea: Fix RTC node name Sasha Levin
2024-01-16 19:59 ` Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 22/44] ARM: dts: imx: Use flash@0,0 pattern Sasha Levin
2024-01-16 19:59 ` Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 23/44] ARM: dts: imx27: Fix sram node Sasha Levin
2024-01-16 19:59 ` Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 24/44] ARM: dts: imx1: " Sasha Levin
2024-01-16 19:59 ` Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 25/44] ionic: pass opcode to devcmd_wait Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 26/44] block/rnbd-srv: Check for unlikely string overflow Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 27/44] ARM: dts: imx25: Fix the iim compatible string Sasha Levin
2024-01-16 19:59 ` Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 28/44] ARM: dts: imx25/27: Pass timing0 Sasha Levin
2024-01-16 19:59 ` Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 29/44] ARM: dts: imx27-apf27dev: Fix LED name Sasha Levin
2024-01-16 19:59 ` Sasha Levin
2024-01-16 19:59 ` [PATCH AUTOSEL 5.10 30/44] ARM: dts: imx23-sansa: Use preferred i2c-gpios properties Sasha Levin
2024-01-16 19:59 ` Sasha Levin
2024-01-16 20:00 ` [PATCH AUTOSEL 5.10 31/44] ARM: dts: imx23/28: Fix the DMA controller node name Sasha Levin
2024-01-16 20:00 ` Sasha Levin
2024-01-16 20:00 ` [PATCH AUTOSEL 5.10 32/44] net: dsa: mv88e6xxx: Fix mv88e6352_serdes_get_stats error path Sasha Levin
2024-01-16 20:00 ` [PATCH AUTOSEL 5.10 33/44] block: prevent an integer overflow in bvec_try_merge_hw_page Sasha Levin
2024-01-16 20:00 ` Sasha Levin [this message]
2024-01-16 20:00 ` [PATCH AUTOSEL 5.10 35/44] arm64: dts: qcom: msm8996: Fix 'in-ports' is a required property Sasha Levin
2024-01-16 20:00 ` [PATCH AUTOSEL 5.10 36/44] arm64: dts: qcom: msm8998: Fix 'out-ports' " Sasha Levin
2024-01-16 20:00 ` [PATCH AUTOSEL 5.10 37/44] wifi: rtl8xxxu: Add additional USB IDs for RTL8192EU devices Sasha Levin
2024-01-16 20:00 ` [PATCH AUTOSEL 5.10 38/44] libbpf: Fix NULL pointer dereference in bpf_object__collect_prog_relos Sasha Levin
2024-01-16 20:00 ` [PATCH AUTOSEL 5.10 39/44] wifi: rtlwifi: add calculate_bit_shift() Sasha Levin
2024-01-16 20:00 ` [PATCH AUTOSEL 5.10 40/44] wifi: rtlwifi: rtl8723{be,ae}: using calculate_bit_shift() Sasha Levin
2024-01-16 20:00 ` [PATCH AUTOSEL 5.10 41/44] wifi: cfg80211: free beacon_ies when overridden from hidden BSS Sasha Levin
2024-01-16 20:00 ` [PATCH AUTOSEL 5.10 42/44] Bluetooth: qca: Set both WIDEBAND_SPEECH and LE_STATES quirks for QCA2066 Sasha Levin
2024-01-16 20:00 ` [PATCH AUTOSEL 5.10 43/44] Bluetooth: L2CAP: Fix possible multiple reject send Sasha Levin
2024-01-16 20:00 ` [Intel-wired-lan] [PATCH AUTOSEL 5.10 44/44] i40e: Fix VF disable behavior to block all traffic Sasha Levin
2024-01-16 20:00 ` Sasha Levin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240116200044.258335-34-sashal@kernel.org \
--to=sashal@kernel.org \
--cc=alex.lyakas@zadara.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-raid@vger.kernel.org \
--cc=song@kernel.org \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.