public inbox for linux-btrfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Johannes Thumshirn <jth@kernel.org>
To: linux-btrfs@vger.kernel.org
Cc: Damien Le Moal <dlemoal@kernel.org>,
	Naohiro Aota <naohiro.aota@wdc.com>,
	David Sterba <dsterba@suse.com>,
	Josef Bacik <josef@toxicpanda.com>, Boris Burkov <boris@bur.io>,
	Filipe Manana <fdmanana@suse.com>,
	Johannes Thumshirn <johannes.thumshirn@wdc.com>
Subject: [PATCH RFC 2/9] btrfs: zoned: get rid of relocation_bg_lock
Date: Fri, 27 Jun 2025 11:19:07 +0200	[thread overview]
Message-ID: <20250627091914.100715-3-jth@kernel.org> (raw)
In-Reply-To: <20250627091914.100715-1-jth@kernel.org>

From: Johannes Thumshirn <johannes.thumshirn@wdc.com>

Lockstat analysis of benchmark workloads shows a very high contention of
the relocation_bg_lock. But the relocation_bg_lock only protects a single
field in 'struct btrfs_fs_info', namely 'u64 data_reloc_bg'.

Use READ_ONCE()/WRITE_ONCE() to access 'btrfs_fs_info::data_reloc_bg'.

This is safe in the allocator path, as relocation I/O is only going to
block groups in the relocation sub-space_info and at the moment, there is
only one relocation block group in this space info.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 fs/btrfs/disk-io.c     |  1 -
 fs/btrfs/extent-tree.c | 28 +++++++++++-----------------
 fs/btrfs/fs.h          |  6 +-----
 fs/btrfs/zoned.c       | 11 +++++------
 4 files changed, 17 insertions(+), 29 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6ac5be02dce7..9a13f5b1ed43 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2791,7 +2791,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
 	spin_lock_init(&fs_info->unused_bgs_lock);
 	spin_lock_init(&fs_info->treelog_bg_lock);
 	spin_lock_init(&fs_info->zone_active_bgs_lock);
-	spin_lock_init(&fs_info->relocation_bg_lock);
 	rwlock_init(&fs_info->tree_mod_log_lock);
 	rwlock_init(&fs_info->global_root_lock);
 	mutex_init(&fs_info->unused_bg_unpin_mutex);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 10f50c725313..a9bda68a1883 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3865,14 +3865,10 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
 	 * Do not allow non-relocation blocks in the dedicated relocation block
 	 * group, and vice versa.
 	 */
-	spin_lock(&fs_info->relocation_bg_lock);
-	data_reloc_bytenr = fs_info->data_reloc_bg;
+	data_reloc_bytenr = READ_ONCE(fs_info->data_reloc_bg);
 	if (data_reloc_bytenr &&
 	    ((ffe_ctl->for_data_reloc && bytenr != data_reloc_bytenr) ||
 	     (!ffe_ctl->for_data_reloc && bytenr == data_reloc_bytenr)))
-		skip = true;
-	spin_unlock(&fs_info->relocation_bg_lock);
-	if (skip)
 		return 1;
 
 	/* Check RO and no space case before trying to activate it */
@@ -3899,7 +3895,6 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
 	spin_lock(&space_info->lock);
 	spin_lock(&block_group->lock);
 	spin_lock(&fs_info->treelog_bg_lock);
-	spin_lock(&fs_info->relocation_bg_lock);
 
 	if (ret)
 		goto out;
@@ -3908,8 +3903,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
 	       block_group->start == fs_info->treelog_bg ||
 	       fs_info->treelog_bg == 0);
 	ASSERT(!ffe_ctl->for_data_reloc ||
-	       block_group->start == fs_info->data_reloc_bg ||
-	       fs_info->data_reloc_bg == 0);
+	       block_group->start == data_reloc_bytenr ||
+	       data_reloc_bytenr == 0);
 
 	if (block_group->ro ||
 	    (!ffe_ctl->for_data_reloc &&
@@ -3932,7 +3927,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
 	 * Do not allow currently used block group to be the data relocation
 	 * dedicated block group.
 	 */
-	if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg &&
+	if (ffe_ctl->for_data_reloc && data_reloc_bytenr == 0 &&
 	    (block_group->used || block_group->reserved)) {
 		ret = 1;
 		goto out;
@@ -3957,8 +3952,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
 		fs_info->treelog_bg = block_group->start;
 
 	if (ffe_ctl->for_data_reloc) {
-		if (!fs_info->data_reloc_bg)
-			fs_info->data_reloc_bg = block_group->start;
+		if (READ_ONCE(fs_info->data_reloc_bg) == 0)
+			WRITE_ONCE(fs_info->data_reloc_bg, block_group->start);
 		/*
 		 * Do not allow allocations from this block group, unless it is
 		 * for data relocation. Compared to increasing the ->ro, setting
@@ -3994,8 +3989,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
 	if (ret && ffe_ctl->for_treelog)
 		fs_info->treelog_bg = 0;
 	if (ret && ffe_ctl->for_data_reloc)
-		fs_info->data_reloc_bg = 0;
-	spin_unlock(&fs_info->relocation_bg_lock);
+		WRITE_ONCE(fs_info->data_reloc_bg, 0);
 	spin_unlock(&fs_info->treelog_bg_lock);
 	spin_unlock(&block_group->lock);
 	spin_unlock(&space_info->lock);
@@ -4304,10 +4298,10 @@ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
 			ffe_ctl->hint_byte = fs_info->treelog_bg;
 		spin_unlock(&fs_info->treelog_bg_lock);
 	} else if (ffe_ctl->for_data_reloc) {
-		spin_lock(&fs_info->relocation_bg_lock);
-		if (fs_info->data_reloc_bg)
-			ffe_ctl->hint_byte = fs_info->data_reloc_bg;
-		spin_unlock(&fs_info->relocation_bg_lock);
+		u64 data_reloc_bg = READ_ONCE(fs_info->data_reloc_bg);
+
+		if (data_reloc_bg)
+			ffe_ctl->hint_byte = data_reloc_bg;
 	} else if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
 		struct btrfs_block_group *block_group;
 
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index b239e4b8421c..570f4b85096c 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -849,11 +849,7 @@ struct btrfs_fs_info {
 	spinlock_t treelog_bg_lock;
 	u64 treelog_bg;
 
-	/*
-	 * Start of the dedicated data relocation block group, protected by
-	 * relocation_bg_lock.
-	 */
-	spinlock_t relocation_bg_lock;
+	/* Start of the dedicated data relocation block group */
 	u64 data_reloc_bg;
 	struct mutex zoned_data_reloc_io_lock;
 
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 0d5d6db72b62..388c277a84d3 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -2495,11 +2495,10 @@ void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
 void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg)
 {
 	struct btrfs_fs_info *fs_info = bg->fs_info;
+	u64 data_reloc_bg = READ_ONCE(fs_info->data_reloc_bg);
 
-	spin_lock(&fs_info->relocation_bg_lock);
-	if (fs_info->data_reloc_bg == bg->start)
-		fs_info->data_reloc_bg = 0;
-	spin_unlock(&fs_info->relocation_bg_lock);
+	if (data_reloc_bg == bg->start)
+		WRITE_ONCE(fs_info->data_reloc_bg, 0);
 }
 
 void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
@@ -2518,7 +2517,7 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
 	if (!btrfs_is_zoned(fs_info))
 		return;
 
-	if (fs_info->data_reloc_bg)
+	if (READ_ONCE(fs_info->data_reloc_bg))
 		return;
 
 	if (sb_rdonly(fs_info->sb))
@@ -2539,7 +2538,7 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
 			continue;
 		}
 
-		fs_info->data_reloc_bg = bg->start;
+		WRITE_ONCE(fs_info->data_reloc_bg, bg->start);
 		set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &bg->runtime_flags);
 		btrfs_zone_activate(bg);
 
-- 
2.49.0


  parent reply	other threads:[~2025-06-27  9:19 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-27  9:19 [PATCH RFC 0/9] btrfs: zoned: fixes for garbage collection under preassure Johannes Thumshirn
2025-06-27  9:19 ` [PATCH RFC 1/9] btrfs: zoned: do not select metadata BG as finish target Johannes Thumshirn
2025-06-27 11:34   ` Christoph Hellwig
2025-07-02 15:34     ` Naohiro Aota
2025-06-27  9:19 ` Johannes Thumshirn [this message]
2025-06-27  9:19 ` [PATCH RFC 3/9] btrfs: zoned: get rid of treelog_bg_lock Johannes Thumshirn
2025-06-27  9:19 ` [PATCH RFC 4/9] btrfs: zoned: don't hold space_info lock on zoned allocation Johannes Thumshirn
2025-06-27  9:19 ` [PATCH RFC 5/9] btrfs: remove delalloc_root_mutex Johannes Thumshirn
2025-06-27 12:42   ` Filipe Manana
2025-06-27  9:19 ` [PATCH RFC 6/9] btrfs: remove btrfs_root's delalloc_mutex Johannes Thumshirn
2025-06-27 12:30   ` Filipe Manana
2025-06-27  9:19 ` [PATCH RFC 7/9] btrfs: lower auto-reclaim message log level Johannes Thumshirn
2025-06-27 11:35   ` Christoph Hellwig
2025-06-27  9:19 ` [PATCH RFC 8/9] btrfs: lower log level of relocation messages Johannes Thumshirn
2025-06-27 11:36   ` Christoph Hellwig
2025-06-30 17:12   ` David Sterba
2025-07-01  5:09     ` Johannes Thumshirn
2025-07-01 14:43       ` David Sterba
2025-06-27  9:19 ` [PATCH RFC 9/9] btrfs: remove unused bgs on allocation failure Johannes Thumshirn
2025-06-27 11:38   ` Christoph Hellwig
2025-06-30 11:45     ` Johannes Thumshirn
2025-06-30 12:05       ` Filipe Manana
2025-06-27 12:14   ` Filipe Manana

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250627091914.100715-3-jth@kernel.org \
    --to=jth@kernel.org \
    --cc=boris@bur.io \
    --cc=dlemoal@kernel.org \
    --cc=dsterba@suse.com \
    --cc=fdmanana@suse.com \
    --cc=johannes.thumshirn@wdc.com \
    --cc=josef@toxicpanda.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=naohiro.aota@wdc.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox