linux-doc.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Yu Kuai <yukuai1@huaweicloud.com>
To: hch@infradead.org, corbet@lwn.net, agk@redhat.com,
	snitzer@kernel.org, mpatocka@redhat.com, song@kernel.org,
	xni@redhat.com, hare@suse.de, linan122@huawei.com,
	colyli@kernel.org
Cc: linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
	dm-devel@lists.linux.dev, linux-raid@vger.kernel.org,
	yukuai3@huawei.com, yukuai1@huaweicloud.com, yi.zhang@huawei.com,
	yangerkun@huawei.com, johnny.chenyi@huawei.com
Subject: [PATCH v6 md-6.18 09/11] md: add a new recovery_flag MD_RECOVERY_LAZY_RECOVER
Date: Tue, 26 Aug 2025 16:52:03 +0800	[thread overview]
Message-ID: <20250826085205.1061353-10-yukuai1@huaweicloud.com> (raw)
In-Reply-To: <20250826085205.1061353-1-yukuai1@huaweicloud.com>

From: Yu Kuai <yukuai3@huawei.com>

This flag is used by llbitmap in later patches to skip raid456 initial
recover and delay building initial xor data to first write.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 drivers/md/md.c    | 47 +++++++++++++++++++++++++++++++++++++++++++++-
 drivers/md/md.h    |  2 ++
 drivers/md/raid5.c | 19 +++++++++++++++----
 3 files changed, 63 insertions(+), 5 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 7196e7f6b2a4..199843356449 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -9199,6 +9199,39 @@ static sector_t md_sync_max_sectors(struct mddev *mddev,
 	}
 }
 
+/*
+ * If lazy recovery is requested and all rdevs are in sync, select the rdev with
+ * the higest index to perfore recovery to build initial xor data, this is the
+ * same as old bitmap.
+ */
+static bool mddev_select_lazy_recover_rdev(struct mddev *mddev)
+{
+	struct md_rdev *recover_rdev = NULL;
+	struct md_rdev *rdev;
+	bool ret = false;
+
+	rcu_read_lock();
+	rdev_for_each_rcu(rdev, mddev) {
+		if (rdev->raid_disk < 0)
+			continue;
+
+		if (test_bit(Faulty, &rdev->flags) ||
+		    !test_bit(In_sync, &rdev->flags))
+			break;
+
+		if (!recover_rdev || recover_rdev->raid_disk < rdev->raid_disk)
+			recover_rdev = rdev;
+	}
+
+	if (recover_rdev) {
+		clear_bit(In_sync, &recover_rdev->flags);
+		ret = true;
+	}
+
+	rcu_read_unlock();
+	return ret;
+}
+
 static sector_t md_sync_position(struct mddev *mddev, enum sync_action action)
 {
 	sector_t start = 0;
@@ -9230,6 +9263,14 @@ static sector_t md_sync_position(struct mddev *mddev, enum sync_action action)
 				start = rdev->recovery_offset;
 		rcu_read_unlock();
 
+		/*
+		 * If there are no spares, and raid456 lazy initial recover is
+		 * requested.
+		 */
+		if (test_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery) &&
+		    start == MaxSector && mddev_select_lazy_recover_rdev(mddev))
+			start = 0;
+
 		/* If there is a bitmap, we need to make sure all
 		 * writes that started before we added a spare
 		 * complete before we start doing a recovery.
@@ -9791,6 +9832,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
 
 		set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
 		clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+		clear_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery);
 		return true;
 	}
 
@@ -9799,6 +9841,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
 		remove_spares(mddev, NULL);
 		set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
 		clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+		clear_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery);
 		return true;
 	}
 
@@ -9808,7 +9851,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
 	 * re-add.
 	 */
 	*spares = remove_and_add_spares(mddev, NULL);
-	if (*spares) {
+	if (*spares || test_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery)) {
 		clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
 		clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
 		clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
@@ -10021,6 +10064,7 @@ void md_check_recovery(struct mddev *mddev)
 			}
 
 			clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+			clear_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery);
 			clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 			clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
 
@@ -10131,6 +10175,7 @@ void md_reap_sync_thread(struct mddev *mddev)
 	clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
 	clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
 	clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+	clear_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery);
 	/*
 	 * We call mddev->cluster_ops->update_size here because sync_size could
 	 * be changed by md_update_sb, and MD_RECOVERY_RESHAPE is cleared,
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 4fa5a3e68a0c..7b6357879a84 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -667,6 +667,8 @@ enum recovery_flags {
 	MD_RECOVERY_RESHAPE,
 	/* remote node is running resync thread */
 	MD_RESYNCING_REMOTE,
+	/* raid456 lazy initial recover */
+	MD_RECOVERY_LAZY_RECOVER,
 };
 
 enum md_ro_state {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 672ab226e43c..5112658ef5f6 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4705,10 +4705,21 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
 			}
 		} else if (test_bit(In_sync, &rdev->flags))
 			set_bit(R5_Insync, &dev->flags);
-		else if (sh->sector + RAID5_STRIPE_SECTORS(conf) <= rdev->recovery_offset)
-			/* in sync if before recovery_offset */
-			set_bit(R5_Insync, &dev->flags);
-		else if (test_bit(R5_UPTODATE, &dev->flags) &&
+		else if (sh->sector + RAID5_STRIPE_SECTORS(conf) <=
+			 rdev->recovery_offset) {
+			/*
+			 * in sync if:
+			 *  - normal IO, or
+			 *  - resync IO that is not lazy recovery
+			 *
+			 * For lazy recovery, we have to mark the rdev without
+			 * In_sync as failed, to build initial xor data.
+			 */
+			if (!test_bit(STRIPE_SYNCING, &sh->state) ||
+			    !test_bit(MD_RECOVERY_LAZY_RECOVER,
+				      &conf->mddev->recovery))
+				set_bit(R5_Insync, &dev->flags);
+		} else if (test_bit(R5_UPTODATE, &dev->flags) &&
 			 test_bit(R5_Expanded, &dev->flags))
 			/* If we've reshaped into here, we assume it is Insync.
 			 * We will shortly update recovery_offset to make
-- 
2.39.2


  parent reply	other threads:[~2025-08-26  9:00 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-26  8:51 [PATCH v6 md-6.18 00/11] md/llbitmap: md/md-llbitmap: introduce a new lockless bitmap Yu Kuai
2025-08-26  8:51 ` [PATCH v6 md-6.18 01/11] md: add a new parameter 'offset' to md_super_write() Yu Kuai
2025-08-26  8:51 ` [PATCH v6 md-6.18 02/11] md: factor out a helper raid_is_456() Yu Kuai
2025-08-26  8:51 ` [PATCH v6 md-6.18 03/11] md/md-bitmap: support discard for bitmap ops Yu Kuai
2025-08-26  8:51 ` [PATCH v6 md-6.18 04/11] md: add a new mddev field 'bitmap_id' Yu Kuai
2025-08-26  8:51 ` [PATCH v6 md-6.18 05/11] md/md-bitmap: add a new sysfs api bitmap_type Yu Kuai
2025-08-26  8:52 ` [PATCH v6 md-6.18 06/11] md/md-bitmap: delay registration of bitmap_ops until creating bitmap Yu Kuai
2025-08-26  8:52 ` [PATCH v6 md-6.18 07/11] md/md-bitmap: add a new method skip_sync_blocks() in bitmap_operations Yu Kuai
2025-08-26  8:52 ` [PATCH v6 md-6.18 08/11] md/md-bitmap: add a new method blocks_synced() " Yu Kuai
2025-08-26  8:52 ` Yu Kuai [this message]
2025-08-26  8:52 ` [PATCH v6 md-6.18 10/11] md/md-bitmap: make method bitmap_ops->daemon_work optional Yu Kuai
2025-08-26  8:52 ` [PATCH v6 md-6.18 11/11] md/md-llbitmap: introduce new lockless bitmap Yu Kuai
2025-08-26  9:52   ` Paul Menzel
2025-08-27  3:44     ` Yu Kuai
2025-08-27  6:07       ` Paul Menzel
2025-08-28  7:10         ` Yu Kuai
2025-08-28  4:15   ` Randy Dunlap
2025-08-28 11:24   ` Li Nan
2025-08-29  1:03     ` Yu Kuai

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250826085205.1061353-10-yukuai1@huaweicloud.com \
    --to=yukuai1@huaweicloud.com \
    --cc=agk@redhat.com \
    --cc=colyli@kernel.org \
    --cc=corbet@lwn.net \
    --cc=dm-devel@lists.linux.dev \
    --cc=hare@suse.de \
    --cc=hch@infradead.org \
    --cc=johnny.chenyi@huawei.com \
    --cc=linan122@huawei.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-raid@vger.kernel.org \
    --cc=mpatocka@redhat.com \
    --cc=snitzer@kernel.org \
    --cc=song@kernel.org \
    --cc=xni@redhat.com \
    --cc=yangerkun@huawei.com \
    --cc=yi.zhang@huawei.com \
    --cc=yukuai3@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).