From: Johannes Thumshirn <jth@kernel.org>
To: linux-btrfs@vger.kernel.org
Cc: Damien Le Moal <dlemoal@kernel.org>,
Naohiro Aota <naohiro.aota@wdc.com>,
David Sterba <dsterba@suse.com>,
Josef Bacik <josef@toxicpanda.com>, Boris Burkov <boris@bur.io>,
Filipe Manana <fdmanana@suse.com>,
Johannes Thumshirn <johannes.thumshirn@wdc.com>
Subject: [PATCH RFC 3/9] btrfs: zoned: get rid of treelog_bg_lock
Date: Fri, 27 Jun 2025 11:19:08 +0200 [thread overview]
Message-ID: <20250627091914.100715-4-jth@kernel.org> (raw)
In-Reply-To: <20250627091914.100715-1-jth@kernel.org>
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Lockstat analysis of benchmark workloads shows a very high contention of
the treelog_bg_lock. But the treelog_bg_lock only protects a single
field in 'struct btrfs_fs_info', namely 'u64 treelog_bg'.
Use READ_ONCE()/WRITE_ONCE() to access 'btrfs_fs_info::treelog_bg'.
This is safe in the allocator path, as treelog I/O is only going to block
groups in the treelog sub-space_info and at the moment, there is only one
treelog block group in this space info.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
fs/btrfs/disk-io.c | 1 -
fs/btrfs/extent-tree.c | 45 +++++++++++-------------------------------
fs/btrfs/fs.h | 1 -
fs/btrfs/zoned.c | 2 +-
fs/btrfs/zoned.h | 7 +++----
5 files changed, 15 insertions(+), 41 deletions(-)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9a13f5b1ed43..35cd38de7727 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2789,7 +2789,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
spin_lock_init(&fs_info->defrag_inodes_lock);
spin_lock_init(&fs_info->super_lock);
spin_lock_init(&fs_info->unused_bgs_lock);
- spin_lock_init(&fs_info->treelog_bg_lock);
spin_lock_init(&fs_info->zone_active_bgs_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
rwlock_init(&fs_info->global_root_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a9bda68a1883..46358a555f78 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3809,22 +3809,6 @@ static int do_allocation_clustered(struct btrfs_block_group *block_group,
return find_free_extent_unclustered(block_group, ffe_ctl);
}
-/*
- * Tree-log block group locking
- * ============================
- *
- * fs_info::treelog_bg_lock protects the fs_info::treelog_bg which
- * indicates the starting address of a block group, which is reserved only
- * for tree-log metadata.
- *
- * Lock nesting
- * ============
- *
- * space_info::lock
- * block_group::lock
- * fs_info::treelog_bg_lock
- */
-
/*
* Simple allocator for sequential-only block group. It only allows sequential
* allocation. No need to play with trees. This function also reserves the
@@ -3844,7 +3828,6 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
u64 log_bytenr;
u64 data_reloc_bytenr;
int ret = 0;
- bool skip = false;
ASSERT(btrfs_is_zoned(block_group->fs_info));
@@ -3852,13 +3835,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
* Do not allow non-tree-log blocks in the dedicated tree-log block
* group, and vice versa.
*/
- spin_lock(&fs_info->treelog_bg_lock);
- log_bytenr = fs_info->treelog_bg;
+ log_bytenr = READ_ONCE(fs_info->treelog_bg);
if (log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) ||
(!ffe_ctl->for_treelog && bytenr == log_bytenr)))
- skip = true;
- spin_unlock(&fs_info->treelog_bg_lock);
- if (skip)
return 1;
/*
@@ -3894,14 +3873,13 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
spin_lock(&space_info->lock);
spin_lock(&block_group->lock);
- spin_lock(&fs_info->treelog_bg_lock);
if (ret)
goto out;
ASSERT(!ffe_ctl->for_treelog ||
- block_group->start == fs_info->treelog_bg ||
- fs_info->treelog_bg == 0);
+ block_group->start == log_bytenr ||
+ log_bytenr == 0);
ASSERT(!ffe_ctl->for_data_reloc ||
block_group->start == data_reloc_bytenr ||
data_reloc_bytenr == 0);
@@ -3917,7 +3895,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
* Do not allow currently using block group to be tree-log dedicated
* block group.
*/
- if (ffe_ctl->for_treelog && !fs_info->treelog_bg &&
+ if (ffe_ctl->for_treelog && log_bytenr == 0 &&
(block_group->used || block_group->reserved)) {
ret = 1;
goto out;
@@ -3948,8 +3926,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
goto out;
}
- if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
- fs_info->treelog_bg = block_group->start;
+ if (ffe_ctl->for_treelog && READ_ONCE(fs_info->treelog_bg) == 0)
+ WRITE_ONCE(fs_info->treelog_bg, block_group->start);
if (ffe_ctl->for_data_reloc) {
if (READ_ONCE(fs_info->data_reloc_bg) == 0)
@@ -3987,10 +3965,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
out:
if (ret && ffe_ctl->for_treelog)
- fs_info->treelog_bg = 0;
+ WRITE_ONCE(fs_info->treelog_bg, 0);
if (ret && ffe_ctl->for_data_reloc)
WRITE_ONCE(fs_info->data_reloc_bg, 0);
- spin_unlock(&fs_info->treelog_bg_lock);
spin_unlock(&block_group->lock);
spin_unlock(&space_info->lock);
return ret;
@@ -4293,10 +4270,10 @@ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
struct find_free_extent_ctl *ffe_ctl)
{
if (ffe_ctl->for_treelog) {
- spin_lock(&fs_info->treelog_bg_lock);
- if (fs_info->treelog_bg)
- ffe_ctl->hint_byte = fs_info->treelog_bg;
- spin_unlock(&fs_info->treelog_bg_lock);
+ u64 treelog_bg = READ_ONCE(fs_info->treelog_bg);
+
+ if (treelog_bg)
+ ffe_ctl->hint_byte = treelog_bg;
} else if (ffe_ctl->for_data_reloc) {
u64 data_reloc_bg = READ_ONCE(fs_info->data_reloc_bg);
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index 570f4b85096c..a388af40a251 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -846,7 +846,6 @@ struct btrfs_fs_info {
u64 max_zone_append_size;
struct mutex zoned_meta_io_lock;
- spinlock_t treelog_bg_lock;
u64 treelog_bg;
/* Start of the dedicated data relocation block group */
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 388c277a84d3..c89f846af6dd 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1948,7 +1948,7 @@ static bool check_bg_is_active(struct btrfs_eb_write_context *ctx,
if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags))
return true;
- if (fs_info->treelog_bg == block_group->start) {
+ if (READ_ONCE(fs_info->treelog_bg) == block_group->start) {
if (!btrfs_zone_activate(block_group)) {
int ret_fin = btrfs_zone_finish_one_bg(fs_info);
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 6e11533b8e14..c1b3a5c3a799 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -383,14 +383,13 @@ static inline void btrfs_zoned_meta_io_unlock(struct btrfs_fs_info *fs_info)
static inline void btrfs_clear_treelog_bg(struct btrfs_block_group *bg)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
+ u64 treelog_bg = READ_ONCE(fs_info->treelog_bg);
if (!btrfs_is_zoned(fs_info))
return;
- spin_lock(&fs_info->treelog_bg_lock);
- if (fs_info->treelog_bg == bg->start)
- fs_info->treelog_bg = 0;
- spin_unlock(&fs_info->treelog_bg_lock);
+ if (treelog_bg == bg->start)
+ WRITE_ONCE(fs_info->treelog_bg, 0);
}
static inline void btrfs_zoned_data_reloc_lock(struct btrfs_inode *inode)
--
2.49.0
next prev parent reply other threads:[~2025-06-27 9:19 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-27 9:19 [PATCH RFC 0/9] btrfs: zoned: fixes for garbage collection under preassure Johannes Thumshirn
2025-06-27 9:19 ` [PATCH RFC 1/9] btrfs: zoned: do not select metadata BG as finish target Johannes Thumshirn
2025-06-27 11:34 ` Christoph Hellwig
2025-07-02 15:34 ` Naohiro Aota
2025-06-27 9:19 ` [PATCH RFC 2/9] btrfs: zoned: get rid of relocation_bg_lock Johannes Thumshirn
2025-06-27 9:19 ` Johannes Thumshirn [this message]
2025-06-27 9:19 ` [PATCH RFC 4/9] btrfs: zoned: don't hold space_info lock on zoned allocation Johannes Thumshirn
2025-06-27 9:19 ` [PATCH RFC 5/9] btrfs: remove delalloc_root_mutex Johannes Thumshirn
2025-06-27 12:42 ` Filipe Manana
2025-06-27 9:19 ` [PATCH RFC 6/9] btrfs: remove btrfs_root's delalloc_mutex Johannes Thumshirn
2025-06-27 12:30 ` Filipe Manana
2025-06-27 9:19 ` [PATCH RFC 7/9] btrfs: lower auto-reclaim message log level Johannes Thumshirn
2025-06-27 11:35 ` Christoph Hellwig
2025-06-27 23:24 ` kernel test robot
2025-06-27 9:19 ` [PATCH RFC 8/9] btrfs: lower log level of relocation messages Johannes Thumshirn
2025-06-27 11:36 ` Christoph Hellwig
2025-06-27 23:44 ` kernel test robot
2025-06-30 17:12 ` David Sterba
2025-07-01 5:09 ` Johannes Thumshirn
2025-07-01 14:43 ` David Sterba
2025-06-27 9:19 ` [PATCH RFC 9/9] btrfs: remove unused bgs on allocation failure Johannes Thumshirn
2025-06-27 11:38 ` Christoph Hellwig
2025-06-30 11:45 ` Johannes Thumshirn
2025-06-30 12:05 ` Filipe Manana
2025-06-27 12:14 ` Filipe Manana
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250627091914.100715-4-jth@kernel.org \
--to=jth@kernel.org \
--cc=boris@bur.io \
--cc=dlemoal@kernel.org \
--cc=dsterba@suse.com \
--cc=fdmanana@suse.com \
--cc=johannes.thumshirn@wdc.com \
--cc=josef@toxicpanda.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=naohiro.aota@wdc.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.