public inbox for linux-btrfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Sun YangKai <sunk67188@gmail.com>
To: linux-btrfs@vger.kernel.org
Cc: Sun YangKai <sunk67188@gmail.com>, Boris Burkov <boris@bur.io>
Subject: [PATCH v2 1/7] btrfs: fix periodic reclaim condition
Date: Sat,  3 Jan 2026 20:19:48 +0800	[thread overview]
Message-ID: <20260103122504.10924-3-sunk67188@gmail.com> (raw)
In-Reply-To: <20260103122504.10924-2-sunk67188@gmail.com>

Problems with current implementation:
1. reclaimable_bytes is signed while chunk_sz is unsigned, causing
   negative reclaimable_bytes to trigger reclaim unexpectedly
2. The "space must be freed between scans" assumption breaks the
   two-scan requirement: first scan marks block groups, second scan
   reclaims them. Without the second scan, no reclamation occurs.

Instead, track actual reclaim progress: pause reclaim when block groups
will be reclaimed, and resume only when progress is made. This ensures
reclaim continues until no further progress can be made, then resumes when
space_info changes or new reclaimable groups appear.

CC: Boris Burkov <boris@bur.io>
Fixes: 813d4c6422516 ("btrfs: prevent pathological periodic reclaim loops")
Signed-off-by: Sun YangKai <sunk67188@gmail.com>
---
 fs/btrfs/block-group.c | 15 +++++++-------
 fs/btrfs/space-info.c  | 44 +++++++++++++++++++-----------------------
 fs/btrfs/space-info.h  | 28 ++++++++++++++++++---------
 3 files changed, 46 insertions(+), 41 deletions(-)

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index e417aba4c4c7..94a4068cd42a 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1871,6 +1871,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
 	while (!list_empty(&fs_info->reclaim_bgs)) {
 		u64 used;
 		u64 reserved;
+		u64 old_total;
 		int ret = 0;
 
 		bg = list_first_entry(&fs_info->reclaim_bgs,
@@ -1936,6 +1937,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
 		}
 
 		spin_unlock(&bg->lock);
+		old_total = space_info->total_bytes;
 		spin_unlock(&space_info->lock);
 
 		/*
@@ -1988,14 +1990,14 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
 			reserved = 0;
 			spin_lock(&space_info->lock);
 			space_info->reclaim_errors++;
-			if (READ_ONCE(space_info->periodic_reclaim))
-				space_info->periodic_reclaim_ready = false;
 			spin_unlock(&space_info->lock);
 		}
 		spin_lock(&space_info->lock);
 		space_info->reclaim_count++;
 		space_info->reclaim_bytes += used;
 		space_info->reclaim_bytes += reserved;
+		if (space_info->total_bytes < old_total)
+			btrfs_resume_periodic_reclaim(space_info);
 		spin_unlock(&space_info->lock);
 
 next:
@@ -3730,8 +3732,6 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
 		space_info->bytes_reserved -= num_bytes;
 		space_info->bytes_used += num_bytes;
 		space_info->disk_used += num_bytes * factor;
-		if (READ_ONCE(space_info->periodic_reclaim))
-			btrfs_space_info_update_reclaimable(space_info, -num_bytes);
 		spin_unlock(&cache->lock);
 		spin_unlock(&space_info->lock);
 	} else {
@@ -3741,12 +3741,11 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
 		btrfs_space_info_update_bytes_pinned(space_info, num_bytes);
 		space_info->bytes_used -= num_bytes;
 		space_info->disk_used -= num_bytes * factor;
-		if (READ_ONCE(space_info->periodic_reclaim))
-			btrfs_space_info_update_reclaimable(space_info, num_bytes);
-		else
-			reclaim = should_reclaim_block_group(cache, num_bytes);
+		reclaim = should_reclaim_block_group(cache, num_bytes);
 
 		spin_unlock(&cache->lock);
+		if (reclaim)
+			btrfs_resume_periodic_reclaim(space_info);
 		spin_unlock(&space_info->lock);
 
 		btrfs_set_extent_bit(&trans->transaction->pinned_extents, bytenr,
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 7b7b7255f7d8..de8bde1081be 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -2119,48 +2119,44 @@ static void do_reclaim_sweep(struct btrfs_space_info *space_info, int raid)
 	 * really need a block group, do take a fresh one.
 	 */
 	if (try_again && urgent) {
-		try_again = false;
+		urgent = false;
 		goto again;
 	}
 
 	up_read(&space_info->groups_sem);
-}
-
-void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes)
-{
-	u64 chunk_sz = calc_effective_data_chunk_size(space_info->fs_info);
-
-	lockdep_assert_held(&space_info->lock);
-	space_info->reclaimable_bytes += bytes;
 
-	if (space_info->reclaimable_bytes >= chunk_sz)
-		btrfs_set_periodic_reclaim_ready(space_info, true);
-}
-
-void btrfs_set_periodic_reclaim_ready(struct btrfs_space_info *space_info, bool ready)
-{
-	lockdep_assert_held(&space_info->lock);
-	if (!READ_ONCE(space_info->periodic_reclaim))
-		return;
-	if (ready != space_info->periodic_reclaim_ready) {
-		space_info->periodic_reclaim_ready = ready;
-		if (!ready)
-			space_info->reclaimable_bytes = 0;
+	/*
+	 * Temporary pause periodic reclaim until reclaim make some progress.
+	 * This can prevent periodic reclaim keep happening but make no progress.
+	 */
+	if (!try_again) {
+		spin_lock(&space_info->lock);
+		btrfs_pause_periodic_reclaim(space_info);
+		spin_unlock(&space_info->lock);
 	}
 }
 
 static bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info)
 {
 	bool ret;
+	u64 chunk_sz;
+	u64 unused;
 
 	if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
 		return false;
 	if (!READ_ONCE(space_info->periodic_reclaim))
 		return false;
+	if (!READ_ONCE(space_info->periodic_reclaim_paused))
+		return true;
+
+	chunk_sz = calc_effective_data_chunk_size(space_info->fs_info);
 
 	spin_lock(&space_info->lock);
-	ret = space_info->periodic_reclaim_ready;
-	btrfs_set_periodic_reclaim_ready(space_info, false);
+	unused = space_info->total_bytes - space_info->bytes_used;
+	ret = (unused >= space_info->last_reclaim_unused + chunk_sz ||
+	       btrfs_calc_reclaim_threshold(space_info) != space_info->last_reclaim_threshold);
+	if (ret)
+		btrfs_resume_periodic_reclaim(space_info);
 	spin_unlock(&space_info->lock);
 
 	return ret;
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index 0703f24b23f7..a49a4c7b0a68 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -214,14 +214,11 @@ struct btrfs_space_info {
 
 	/*
 	 * Periodic reclaim should be a no-op if a space_info hasn't
-	 * freed any space since the last time we tried.
+	 * freed any space since the last time we made no progress.
 	 */
-	bool periodic_reclaim_ready;
-
-	/*
-	 * Net bytes freed or allocated since the last reclaim pass.
-	 */
-	s64 reclaimable_bytes;
+	bool periodic_reclaim_paused;
+	int last_reclaim_threshold;
+	u64 last_reclaim_unused;
 };
 
 static inline bool btrfs_mixed_space_info(const struct btrfs_space_info *space_info)
@@ -301,9 +298,22 @@ void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info);
 void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info);
 u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
 
-void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes);
-void btrfs_set_periodic_reclaim_ready(struct btrfs_space_info *space_info, bool ready);
 int btrfs_calc_reclaim_threshold(const struct btrfs_space_info *space_info);
+static inline void btrfs_resume_periodic_reclaim(struct btrfs_space_info *space_info)
+{
+	lockdep_assert_held(&space_info->lock);
+	if (space_info->periodic_reclaim_paused)
+		space_info->periodic_reclaim_paused = false;
+}
+static inline void btrfs_pause_periodic_reclaim(struct btrfs_space_info *space_info)
+{
+	lockdep_assert_held(&space_info->lock);
+	if (!space_info->periodic_reclaim_paused) {
+		space_info->periodic_reclaim_paused = true;
+		space_info->last_reclaim_threshold = btrfs_calc_reclaim_threshold(space_info);
+		space_info->last_reclaim_unused = space_info->total_bytes - space_info->bytes_used;
+	}
+}
 void btrfs_reclaim_sweep(const struct btrfs_fs_info *fs_info);
 void btrfs_return_free_space(struct btrfs_space_info *space_info, u64 len);
 
-- 
2.51.2


  reply	other threads:[~2026-01-03 13:06 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-03 12:19 [PATCH v2 0/7] btrfs: fix periodic reclaim condition with some cleanup Sun YangKai
2026-01-03 12:19 ` Sun YangKai [this message]
2026-01-04 19:40   ` [PATCH v2 1/7] btrfs: fix periodic reclaim condition Boris Burkov
2026-01-05 13:00     ` Sun Yangkai
2026-01-05 18:21       ` Boris Burkov
2026-01-07 14:09         ` Sun Yangkai
2026-01-07 17:57           ` Boris Burkov
2026-01-08 15:11             ` Sun Yangkai
2026-01-03 12:19 ` [PATCH v2 2/7] btrfs: use u8 for reclaim threshold type Sun YangKai
2026-01-03 12:19 ` [PATCH v2 3/7] btrfs: clarify reclaim sweep control flow Sun YangKai
2026-01-03 12:19 ` [PATCH v2 4/7] btrfs: change block group reclaim_mark to bool Sun YangKai
2026-01-03 12:19 ` [PATCH v2 5/7] btrfs: reorder btrfs_block_group members to reduce struct size Sun YangKai
2026-01-05 15:07   ` Filipe Manana
2026-01-05 15:26     ` Sun Yangkai
2026-01-03 12:19 ` [PATCH v2 6/7] btrfs: use proper types for btrfs_block_group fields Sun YangKai
2026-01-03 12:19 ` [PATCH v2 7/7] btrfs: consolidate reclaim readiness checks in btrfs_should_reclaim() Sun YangKai

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260103122504.10924-3-sunk67188@gmail.com \
    --to=sunk67188@gmail.com \
    --cc=boris@bur.io \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox