From: Johannes Thumshirn <jth@kernel.org>
To: linux-btrfs@vger.kernel.org
Cc: Damien Le Moal <dlemoal@kernel.org>,
Naohiro Aota <naohiro.aota@wdc.com>,
David Sterba <dsterba@suse.com>,
Josef Bacik <josef@toxicpanda.com>, Boris Burkov <boris@bur.io>,
Filipe Manana <fdmanana@suse.com>,
Johannes Thumshirn <johannes.thumshirn@wdc.com>
Subject: [PATCH RFC 2/9] btrfs: zoned: get rid of relocation_bg_lock
Date: Fri, 27 Jun 2025 11:19:07 +0200 [thread overview]
Message-ID: <20250627091914.100715-3-jth@kernel.org> (raw)
In-Reply-To: <20250627091914.100715-1-jth@kernel.org>
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Lockstat analysis of benchmark workloads shows a very high contention of
the relocation_bg_lock. But the relocation_bg_lock only protects a single
field in 'struct btrfs_fs_info', namely 'u64 data_reloc_bg'.
Use READ_ONCE()/WRITE_ONCE() to access 'btrfs_fs_info::data_reloc_bg'.
This is safe in the allocator path, as relocation I/O is only going to
block groups in the relocation sub-space_info and at the moment, there is
only one relocation block group in this space info.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
fs/btrfs/disk-io.c | 1 -
fs/btrfs/extent-tree.c | 28 +++++++++++-----------------
fs/btrfs/fs.h | 6 +-----
fs/btrfs/zoned.c | 11 +++++------
4 files changed, 17 insertions(+), 29 deletions(-)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6ac5be02dce7..9a13f5b1ed43 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2791,7 +2791,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
spin_lock_init(&fs_info->unused_bgs_lock);
spin_lock_init(&fs_info->treelog_bg_lock);
spin_lock_init(&fs_info->zone_active_bgs_lock);
- spin_lock_init(&fs_info->relocation_bg_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
rwlock_init(&fs_info->global_root_lock);
mutex_init(&fs_info->unused_bg_unpin_mutex);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 10f50c725313..a9bda68a1883 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3865,14 +3865,10 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
* Do not allow non-relocation blocks in the dedicated relocation block
* group, and vice versa.
*/
- spin_lock(&fs_info->relocation_bg_lock);
- data_reloc_bytenr = fs_info->data_reloc_bg;
+ data_reloc_bytenr = READ_ONCE(fs_info->data_reloc_bg);
if (data_reloc_bytenr &&
((ffe_ctl->for_data_reloc && bytenr != data_reloc_bytenr) ||
(!ffe_ctl->for_data_reloc && bytenr == data_reloc_bytenr)))
- skip = true;
- spin_unlock(&fs_info->relocation_bg_lock);
- if (skip)
return 1;
/* Check RO and no space case before trying to activate it */
@@ -3899,7 +3895,6 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
spin_lock(&space_info->lock);
spin_lock(&block_group->lock);
spin_lock(&fs_info->treelog_bg_lock);
- spin_lock(&fs_info->relocation_bg_lock);
if (ret)
goto out;
@@ -3908,8 +3903,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
block_group->start == fs_info->treelog_bg ||
fs_info->treelog_bg == 0);
ASSERT(!ffe_ctl->for_data_reloc ||
- block_group->start == fs_info->data_reloc_bg ||
- fs_info->data_reloc_bg == 0);
+ block_group->start == data_reloc_bytenr ||
+ data_reloc_bytenr == 0);
if (block_group->ro ||
(!ffe_ctl->for_data_reloc &&
@@ -3932,7 +3927,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
* Do not allow currently used block group to be the data relocation
* dedicated block group.
*/
- if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg &&
+ if (ffe_ctl->for_data_reloc && data_reloc_bytenr == 0 &&
(block_group->used || block_group->reserved)) {
ret = 1;
goto out;
@@ -3957,8 +3952,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
fs_info->treelog_bg = block_group->start;
if (ffe_ctl->for_data_reloc) {
- if (!fs_info->data_reloc_bg)
- fs_info->data_reloc_bg = block_group->start;
+ if (READ_ONCE(fs_info->data_reloc_bg) == 0)
+ WRITE_ONCE(fs_info->data_reloc_bg, block_group->start);
/*
* Do not allow allocations from this block group, unless it is
* for data relocation. Compared to increasing the ->ro, setting
@@ -3994,8 +3989,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
if (ret && ffe_ctl->for_treelog)
fs_info->treelog_bg = 0;
if (ret && ffe_ctl->for_data_reloc)
- fs_info->data_reloc_bg = 0;
- spin_unlock(&fs_info->relocation_bg_lock);
+ WRITE_ONCE(fs_info->data_reloc_bg, 0);
spin_unlock(&fs_info->treelog_bg_lock);
spin_unlock(&block_group->lock);
spin_unlock(&space_info->lock);
@@ -4304,10 +4298,10 @@ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
ffe_ctl->hint_byte = fs_info->treelog_bg;
spin_unlock(&fs_info->treelog_bg_lock);
} else if (ffe_ctl->for_data_reloc) {
- spin_lock(&fs_info->relocation_bg_lock);
- if (fs_info->data_reloc_bg)
- ffe_ctl->hint_byte = fs_info->data_reloc_bg;
- spin_unlock(&fs_info->relocation_bg_lock);
+ u64 data_reloc_bg = READ_ONCE(fs_info->data_reloc_bg);
+
+ if (data_reloc_bg)
+ ffe_ctl->hint_byte = data_reloc_bg;
} else if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
struct btrfs_block_group *block_group;
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index b239e4b8421c..570f4b85096c 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -849,11 +849,7 @@ struct btrfs_fs_info {
spinlock_t treelog_bg_lock;
u64 treelog_bg;
- /*
- * Start of the dedicated data relocation block group, protected by
- * relocation_bg_lock.
- */
- spinlock_t relocation_bg_lock;
+ /* Start of the dedicated data relocation block group */
u64 data_reloc_bg;
struct mutex zoned_data_reloc_io_lock;
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 0d5d6db72b62..388c277a84d3 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -2495,11 +2495,10 @@ void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
+ u64 data_reloc_bg = READ_ONCE(fs_info->data_reloc_bg);
- spin_lock(&fs_info->relocation_bg_lock);
- if (fs_info->data_reloc_bg == bg->start)
- fs_info->data_reloc_bg = 0;
- spin_unlock(&fs_info->relocation_bg_lock);
+ if (data_reloc_bg == bg->start)
+ WRITE_ONCE(fs_info->data_reloc_bg, 0);
}
void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
@@ -2518,7 +2517,7 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
if (!btrfs_is_zoned(fs_info))
return;
- if (fs_info->data_reloc_bg)
+ if (READ_ONCE(fs_info->data_reloc_bg))
return;
if (sb_rdonly(fs_info->sb))
@@ -2539,7 +2538,7 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
continue;
}
- fs_info->data_reloc_bg = bg->start;
+ WRITE_ONCE(fs_info->data_reloc_bg, bg->start);
set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &bg->runtime_flags);
btrfs_zone_activate(bg);
--
2.49.0
next prev parent reply other threads:[~2025-06-27 9:19 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-27 9:19 [PATCH RFC 0/9] btrfs: zoned: fixes for garbage collection under preassure Johannes Thumshirn
2025-06-27 9:19 ` [PATCH RFC 1/9] btrfs: zoned: do not select metadata BG as finish target Johannes Thumshirn
2025-06-27 11:34 ` Christoph Hellwig
2025-07-02 15:34 ` Naohiro Aota
2025-06-27 9:19 ` Johannes Thumshirn [this message]
2025-06-27 9:19 ` [PATCH RFC 3/9] btrfs: zoned: get rid of treelog_bg_lock Johannes Thumshirn
2025-06-27 9:19 ` [PATCH RFC 4/9] btrfs: zoned: don't hold space_info lock on zoned allocation Johannes Thumshirn
2025-06-27 9:19 ` [PATCH RFC 5/9] btrfs: remove delalloc_root_mutex Johannes Thumshirn
2025-06-27 12:42 ` Filipe Manana
2025-06-27 9:19 ` [PATCH RFC 6/9] btrfs: remove btrfs_root's delalloc_mutex Johannes Thumshirn
2025-06-27 12:30 ` Filipe Manana
2025-06-27 9:19 ` [PATCH RFC 7/9] btrfs: lower auto-reclaim message log level Johannes Thumshirn
2025-06-27 11:35 ` Christoph Hellwig
2025-06-27 23:24 ` kernel test robot
2025-06-27 9:19 ` [PATCH RFC 8/9] btrfs: lower log level of relocation messages Johannes Thumshirn
2025-06-27 11:36 ` Christoph Hellwig
2025-06-27 23:44 ` kernel test robot
2025-06-30 17:12 ` David Sterba
2025-07-01 5:09 ` Johannes Thumshirn
2025-07-01 14:43 ` David Sterba
2025-06-27 9:19 ` [PATCH RFC 9/9] btrfs: remove unused bgs on allocation failure Johannes Thumshirn
2025-06-27 11:38 ` Christoph Hellwig
2025-06-30 11:45 ` Johannes Thumshirn
2025-06-30 12:05 ` Filipe Manana
2025-06-27 12:14 ` Filipe Manana
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250627091914.100715-3-jth@kernel.org \
--to=jth@kernel.org \
--cc=boris@bur.io \
--cc=dlemoal@kernel.org \
--cc=dsterba@suse.com \
--cc=fdmanana@suse.com \
--cc=johannes.thumshirn@wdc.com \
--cc=josef@toxicpanda.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=naohiro.aota@wdc.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.