From: "Chen Cheng" <chencheng@fnnas.com>
To: <linux-raid@vger.kernel.org>, <yukuai@fygo.io>, <yukuai@fnnas.com>
Cc: <chencheng@fnnas.com>, <linux-kernel@vger.kernel.org>
Subject: [PATCH v2] md: use READ_ONCE() for lockless reads of sb_flags
Date: Tue, 23 Jun 2026 19:16:17 +0800 [thread overview]
Message-ID: <20260623111617.2500313-1-chencheng@fnnas.com> (raw)
From: Chen Cheng <chencheng@fnnas.com>
sb_flags is checked without a lock in md, raid1, raid5, and raid10.
KCSAN reports these reads as data races.
The write side uses atomic bit ops.
The read side still has plain loads in a few places.
Use READ_ONCE() for the lockless reads of sb_flags.
v1 -> v2:
- Add lock-free read paths for other array levels.
KCSAN reports #1:
======================================
BUG: KCSAN: data-race in md_check_recovery / md_write_start
write (marked) to 0xffff8e39f897f030 of 8 bytes by task 248146 on cpu 8:
md_write_start+0x5dd/0x910
raid10_make_request+0x9b/0x1080
md_handle_request+0x4a2/0xa40
[........]
read to 0xffff8e39f897f030 of 8 bytes by task 250445 on cpu 11:
md_check_recovery+0x574/0x900
raid10d+0xb7/0x2950
[........]
KCSAN reports #2:
======================================
BUG: KCSAN: data-race in md_check_recovery / md_write_start
write (marked) to 0xffff8e39e953f030 of 8 bytes by task 540091 on cpu 11:
md_write_start+0x5dd/0x910
raid1_make_request+0x141/0x1990
[........]
read to 0xffff8e39e953f030 of 8 bytes by task 580822 on cpu 0:
md_check_recovery+0x574/0x900
raid1d+0xcc/0x3840
[........]
value changed: 0x0000000000000002 -> 0x0000000000000006
KCSAN reports #3:
======================================
BUG: KCSAN: data-race in md_check_recovery / md_do_sync.cold
write (marked) to 0xffff8e39e9404030 of 8 bytes by task 492473 on cpu 6:
md_do_sync.cold+0x3f6/0x1686
[........]
read to 0xffff8e39e9404030 of 8 bytes by task 492402 on cpu 3:
md_check_recovery+0x16d/0x900
raid1d+0xcc/0x3840
[........]
value changed: 0x0000000000000000 -> 0x0000000000000002
KCSAN reports #4:
======================================
BUG: KCSAN: data-race in md_do_sync.cold / raid5d
write (marked) to 0xffff8e39c35cb030 of 8 bytes by task 192196 on cpu 10:
md_do_sync.cold+0x3f6/0x1686
md_thread+0x15a/0x2d0
[........]
read to 0xffff8e39c35cb030 of 8 bytes by task 190759 on cpu 5:
raid5d+0x7f9/0xba0
md_thread+0x15a/0x2d0
[........]
value changed: 0x0000000000000000 -> 0x0000000000000002
Signed-off-by: Chen Cheng <chencheng@fnnas.com>
---
drivers/md/md.c | 8 ++++----
drivers/md/raid1.c | 2 +-
drivers/md/raid10.c | 4 ++--
drivers/md/raid5.c | 7 ++++---
4 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 096bb64e87bd..c5c50640b684 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6830,11 +6830,11 @@ int md_run(struct mddev *mddev)
* via sysfs - until a lack of spares is confirmed.
*/
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- if (mddev->sb_flags)
+ if (READ_ONCE(mddev->sb_flags))
md_update_sb(mddev, 0);
if (IS_ENABLED(CONFIG_MD_BITMAP) && !mddev->bitmap_info.file &&
!mddev->bitmap_info.offset)
md_bitmap_set_none(mddev);
@@ -7024,11 +7024,11 @@ static void __md_stop_writes(struct mddev *mddev)
mddev->bitmap_ops->flush(mddev);
}
if (md_is_rdwr(mddev) &&
((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
- mddev->sb_flags)) {
+ READ_ONCE(mddev->sb_flags))) {
/* mark array as shutdown cleanly */
if (!mddev_is_clustered(mddev))
mddev->in_sync = 1;
md_update_sb(mddev, 1);
}
@@ -10294,11 +10294,11 @@ static bool md_should_do_recovery(struct mddev *mddev)
/*
* MD_SB_CHANGE_PENDING indicates that the array is switching from clean to
* active, and no action is needed for now.
* All other MD_SB_* flags require to update the superblock.
*/
- if (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING))
+ if (READ_ONCE(mddev->sb_flags) & ~ (1<<MD_SB_CHANGE_PENDING))
return true;
/*
* If the array is not using external metadata and there has been no data
* written for some time, then the array's status needs to be set to
@@ -10423,11 +10423,11 @@ void md_check_recovery(struct mddev *mddev)
spin_lock(&mddev->lock);
set_in_sync(mddev);
spin_unlock(&mddev->lock);
}
- if (mddev->sb_flags)
+ if (READ_ONCE(mddev->sb_flags))
md_update_sb(mddev, 0);
/*
* Never start a new sync thread if MD_RECOVERY_RUNNING is
* still set.
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 29b58583e381..bd6808656edb 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2738,11 +2738,11 @@ static void raid1d(struct md_thread *thread)
handle_read_error(conf, r1_bio);
else
WARN_ON_ONCE(1);
cond_resched();
- if (mddev->sb_flags & ~(1<<MD_SB_CHANGE_PENDING))
+ if (READ_ONCE(mddev->sb_flags) & ~(1 << MD_SB_CHANGE_PENDING))
md_check_recovery(mddev);
}
blk_finish_plug(&plug);
}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index adaf9e432e25..3ffa5a19964d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -3030,11 +3030,11 @@ static void raid10d(struct md_thread *thread)
handle_read_error(mddev, r10_bio);
else
WARN_ON_ONCE(1);
cond_resched();
- if (mddev->sb_flags & ~(1<<MD_SB_CHANGE_PENDING))
+ if (READ_ONCE(mddev->sb_flags) & ~(1 << MD_SB_CHANGE_PENDING))
md_check_recovery(mddev);
}
blk_finish_plug(&plug);
}
@@ -4698,11 +4698,11 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
else
mddev->curr_resync_completed = conf->reshape_progress;
conf->reshape_checkpoint = jiffies;
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
md_wakeup_thread(mddev->thread);
- wait_event(mddev->sb_wait, mddev->sb_flags == 0 ||
+ wait_event(mddev->sb_wait, READ_ONCE(mddev->sb_flags) == 0 ||
test_bit(MD_RECOVERY_INTR, &mddev->recovery));
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
allow_barrier(conf);
return sectors_done;
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ded6a69f7795..cb58b4353995 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1219,11 +1219,11 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
break;
if (bad < 0) {
set_bit(BlockedBadBlocks, &rdev->flags);
if (!conf->mddev->external &&
- conf->mddev->sb_flags) {
+ READ_ONCE(conf->mddev->sb_flags)) {
/* It is very unlikely, but we might
* still need to write out the
* bad block log - better give it
* a chance*/
md_check_recovery(conf->mddev);
@@ -6469,11 +6469,11 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
rdev->recovery_offset = sector_nr;
conf->reshape_checkpoint = jiffies;
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
md_wakeup_thread(mddev->thread);
- wait_event(mddev->sb_wait, mddev->sb_flags == 0 ||
+ wait_event(mddev->sb_wait, READ_ONCE(mddev->sb_flags) == 0 ||
test_bit(MD_RECOVERY_INTR, &mddev->recovery));
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
return 0;
spin_lock_irq(&conf->device_lock);
conf->reshape_safe = mddev->reshape_position;
@@ -6913,11 +6913,12 @@ static void raid5d(struct md_thread *thread)
conf->temp_inactive_list);
if (!batch_size && !released)
break;
handled += batch_size;
- if (mddev->sb_flags & ~(1 << MD_SB_CHANGE_PENDING)) {
+ if (READ_ONCE(mddev->sb_flags) &
+ ~(1 << MD_SB_CHANGE_PENDING)) {
spin_unlock_irq(&conf->device_lock);
md_check_recovery(mddev);
spin_lock_irq(&conf->device_lock);
}
}
--
2.54.0
next reply other threads:[~2026-06-23 11:16 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-23 11:16 Chen Cheng [this message]
2026-06-23 11:25 ` [PATCH v2] md: use READ_ONCE() for lockless reads of sb_flags sashiko-bot
2026-06-23 12:51 ` Chen Cheng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260623111617.2500313-1-chencheng@fnnas.com \
--to=chencheng@fnnas.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-raid@vger.kernel.org \
--cc=yukuai@fnnas.com \
--cc=yukuai@fygo.io \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox