From: Yu Kuai <yukuai1@huaweicloud.com>
To: hch@infradead.org, corbet@lwn.net, agk@redhat.com,
snitzer@kernel.org, mpatocka@redhat.com, song@kernel.org,
xni@redhat.com, hare@suse.de, linan122@huawei.com,
colyli@kernel.org
Cc: linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
dm-devel@lists.linux.dev, linux-raid@vger.kernel.org,
yukuai3@huawei.com, yukuai1@huaweicloud.com, yi.zhang@huawei.com,
yangerkun@huawei.com, johnny.chenyi@huawei.com
Subject: [PATCH v6 md-6.18 09/11] md: add a new recovery_flag MD_RECOVERY_LAZY_RECOVER
Date: Tue, 26 Aug 2025 16:52:03 +0800 [thread overview]
Message-ID: <20250826085205.1061353-10-yukuai1@huaweicloud.com> (raw)
In-Reply-To: <20250826085205.1061353-1-yukuai1@huaweicloud.com>
From: Yu Kuai <yukuai3@huawei.com>
This flag is used by llbitmap in later patches to skip raid456 initial
recover and delay building initial xor data to first write.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
drivers/md/md.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-
drivers/md/md.h | 2 ++
drivers/md/raid5.c | 19 +++++++++++++++----
3 files changed, 63 insertions(+), 5 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 7196e7f6b2a4..199843356449 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -9199,6 +9199,39 @@ static sector_t md_sync_max_sectors(struct mddev *mddev,
}
}
+/*
+ * If lazy recovery is requested and all rdevs are in sync, select the rdev with
+ * the higest index to perfore recovery to build initial xor data, this is the
+ * same as old bitmap.
+ */
+static bool mddev_select_lazy_recover_rdev(struct mddev *mddev)
+{
+ struct md_rdev *recover_rdev = NULL;
+ struct md_rdev *rdev;
+ bool ret = false;
+
+ rcu_read_lock();
+ rdev_for_each_rcu(rdev, mddev) {
+ if (rdev->raid_disk < 0)
+ continue;
+
+ if (test_bit(Faulty, &rdev->flags) ||
+ !test_bit(In_sync, &rdev->flags))
+ break;
+
+ if (!recover_rdev || recover_rdev->raid_disk < rdev->raid_disk)
+ recover_rdev = rdev;
+ }
+
+ if (recover_rdev) {
+ clear_bit(In_sync, &recover_rdev->flags);
+ ret = true;
+ }
+
+ rcu_read_unlock();
+ return ret;
+}
+
static sector_t md_sync_position(struct mddev *mddev, enum sync_action action)
{
sector_t start = 0;
@@ -9230,6 +9263,14 @@ static sector_t md_sync_position(struct mddev *mddev, enum sync_action action)
start = rdev->recovery_offset;
rcu_read_unlock();
+ /*
+ * If there are no spares, and raid456 lazy initial recover is
+ * requested.
+ */
+ if (test_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery) &&
+ start == MaxSector && mddev_select_lazy_recover_rdev(mddev))
+ start = 0;
+
/* If there is a bitmap, we need to make sure all
* writes that started before we added a spare
* complete before we start doing a recovery.
@@ -9791,6 +9832,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+ clear_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery);
return true;
}
@@ -9799,6 +9841,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
remove_spares(mddev, NULL);
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+ clear_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery);
return true;
}
@@ -9808,7 +9851,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
* re-add.
*/
*spares = remove_and_add_spares(mddev, NULL);
- if (*spares) {
+ if (*spares || test_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery)) {
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
@@ -10021,6 +10064,7 @@ void md_check_recovery(struct mddev *mddev)
}
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+ clear_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery);
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
@@ -10131,6 +10175,7 @@ void md_reap_sync_thread(struct mddev *mddev)
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+ clear_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery);
/*
* We call mddev->cluster_ops->update_size here because sync_size could
* be changed by md_update_sb, and MD_RECOVERY_RESHAPE is cleared,
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 4fa5a3e68a0c..7b6357879a84 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -667,6 +667,8 @@ enum recovery_flags {
MD_RECOVERY_RESHAPE,
/* remote node is running resync thread */
MD_RESYNCING_REMOTE,
+ /* raid456 lazy initial recover */
+ MD_RECOVERY_LAZY_RECOVER,
};
enum md_ro_state {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 672ab226e43c..5112658ef5f6 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4705,10 +4705,21 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
}
} else if (test_bit(In_sync, &rdev->flags))
set_bit(R5_Insync, &dev->flags);
- else if (sh->sector + RAID5_STRIPE_SECTORS(conf) <= rdev->recovery_offset)
- /* in sync if before recovery_offset */
- set_bit(R5_Insync, &dev->flags);
- else if (test_bit(R5_UPTODATE, &dev->flags) &&
+ else if (sh->sector + RAID5_STRIPE_SECTORS(conf) <=
+ rdev->recovery_offset) {
+ /*
+ * in sync if:
+ * - normal IO, or
+ * - resync IO that is not lazy recovery
+ *
+ * For lazy recovery, we have to mark the rdev without
+ * In_sync as failed, to build initial xor data.
+ */
+ if (!test_bit(STRIPE_SYNCING, &sh->state) ||
+ !test_bit(MD_RECOVERY_LAZY_RECOVER,
+ &conf->mddev->recovery))
+ set_bit(R5_Insync, &dev->flags);
+ } else if (test_bit(R5_UPTODATE, &dev->flags) &&
test_bit(R5_Expanded, &dev->flags))
/* If we've reshaped into here, we assume it is Insync.
* We will shortly update recovery_offset to make
--
2.39.2
next prev parent reply other threads:[~2025-08-26 9:00 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-26 8:51 [PATCH v6 md-6.18 00/11] md/llbitmap: md/md-llbitmap: introduce a new lockless bitmap Yu Kuai
2025-08-26 8:51 ` [PATCH v6 md-6.18 01/11] md: add a new parameter 'offset' to md_super_write() Yu Kuai
2025-08-26 8:51 ` [PATCH v6 md-6.18 02/11] md: factor out a helper raid_is_456() Yu Kuai
2025-08-26 8:51 ` [PATCH v6 md-6.18 03/11] md/md-bitmap: support discard for bitmap ops Yu Kuai
2025-08-26 8:51 ` [PATCH v6 md-6.18 04/11] md: add a new mddev field 'bitmap_id' Yu Kuai
2025-08-26 8:51 ` [PATCH v6 md-6.18 05/11] md/md-bitmap: add a new sysfs api bitmap_type Yu Kuai
2025-08-26 8:52 ` [PATCH v6 md-6.18 06/11] md/md-bitmap: delay registration of bitmap_ops until creating bitmap Yu Kuai
2025-08-26 8:52 ` [PATCH v6 md-6.18 07/11] md/md-bitmap: add a new method skip_sync_blocks() in bitmap_operations Yu Kuai
2025-08-26 8:52 ` [PATCH v6 md-6.18 08/11] md/md-bitmap: add a new method blocks_synced() " Yu Kuai
2025-08-26 8:52 ` Yu Kuai [this message]
2025-08-26 8:52 ` [PATCH v6 md-6.18 10/11] md/md-bitmap: make method bitmap_ops->daemon_work optional Yu Kuai
2025-08-26 8:52 ` [PATCH v6 md-6.18 11/11] md/md-llbitmap: introduce new lockless bitmap Yu Kuai
2025-08-26 9:52 ` Paul Menzel
2025-08-27 3:44 ` Yu Kuai
2025-08-27 6:07 ` Paul Menzel
2025-08-28 7:10 ` Yu Kuai
2025-08-28 4:15 ` Randy Dunlap
2025-08-28 11:24 ` Li Nan
2025-08-29 1:03 ` Yu Kuai
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250826085205.1061353-10-yukuai1@huaweicloud.com \
--to=yukuai1@huaweicloud.com \
--cc=agk@redhat.com \
--cc=colyli@kernel.org \
--cc=corbet@lwn.net \
--cc=dm-devel@lists.linux.dev \
--cc=hare@suse.de \
--cc=hch@infradead.org \
--cc=johnny.chenyi@huawei.com \
--cc=linan122@huawei.com \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-raid@vger.kernel.org \
--cc=mpatocka@redhat.com \
--cc=snitzer@kernel.org \
--cc=song@kernel.org \
--cc=xni@redhat.com \
--cc=yangerkun@huawei.com \
--cc=yi.zhang@huawei.com \
--cc=yukuai3@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).