From: Johannes Thumshirn <jth@kernel.org>
To: linux-btrfs@vger.kernel.org
Cc: Damien Le Moal <dlemoal@kernel.org>,
Naohiro Aota <naohiro.aota@wdc.com>,
David Sterba <dsterba@suse.com>,
Josef Bacik <josef@toxicpanda.com>, Boris Burkov <boris@bur.io>,
Filipe Manana <fdmanana@suse.com>,
Johannes Thumshirn <johannes.thumshirn@wdc.com>
Subject: [PATCH RFC 3/9] btrfs: zoned: get rid of treelog_bg_lock
Date: Fri, 27 Jun 2025 11:19:08 +0200 [thread overview]
Message-ID: <20250627091914.100715-4-jth@kernel.org> (raw)
In-Reply-To: <20250627091914.100715-1-jth@kernel.org>
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Lockstat analysis of benchmark workloads shows a very high contention of
the treelog_bg_lock. But the treelog_bg_lock only protects a single
field in 'struct btrfs_fs_info', namely 'u64 treelog_bg'.
Use READ_ONCE()/WRITE_ONCE() to access 'btrfs_fs_info::treelog_bg'.
This is safe in the allocator path, as treelog I/O is only going to block
groups in the treelog sub-space_info and at the moment, there is only one
treelog block group in this space info.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
fs/btrfs/disk-io.c | 1 -
fs/btrfs/extent-tree.c | 45 +++++++++++-------------------------------
fs/btrfs/fs.h | 1 -
fs/btrfs/zoned.c | 2 +-
fs/btrfs/zoned.h | 7 +++----
5 files changed, 15 insertions(+), 41 deletions(-)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9a13f5b1ed43..35cd38de7727 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2789,7 +2789,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
spin_lock_init(&fs_info->defrag_inodes_lock);
spin_lock_init(&fs_info->super_lock);
spin_lock_init(&fs_info->unused_bgs_lock);
- spin_lock_init(&fs_info->treelog_bg_lock);
spin_lock_init(&fs_info->zone_active_bgs_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
rwlock_init(&fs_info->global_root_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a9bda68a1883..46358a555f78 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3809,22 +3809,6 @@ static int do_allocation_clustered(struct btrfs_block_group *block_group,
return find_free_extent_unclustered(block_group, ffe_ctl);
}
-/*
- * Tree-log block group locking
- * ============================
- *
- * fs_info::treelog_bg_lock protects the fs_info::treelog_bg which
- * indicates the starting address of a block group, which is reserved only
- * for tree-log metadata.
- *
- * Lock nesting
- * ============
- *
- * space_info::lock
- * block_group::lock
- * fs_info::treelog_bg_lock
- */
-
/*
* Simple allocator for sequential-only block group. It only allows sequential
* allocation. No need to play with trees. This function also reserves the
@@ -3844,7 +3828,6 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
u64 log_bytenr;
u64 data_reloc_bytenr;
int ret = 0;
- bool skip = false;
ASSERT(btrfs_is_zoned(block_group->fs_info));
@@ -3852,13 +3835,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
* Do not allow non-tree-log blocks in the dedicated tree-log block
* group, and vice versa.
*/
- spin_lock(&fs_info->treelog_bg_lock);
- log_bytenr = fs_info->treelog_bg;
+ log_bytenr = READ_ONCE(fs_info->treelog_bg);
if (log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) ||
(!ffe_ctl->for_treelog && bytenr == log_bytenr)))
- skip = true;
- spin_unlock(&fs_info->treelog_bg_lock);
- if (skip)
return 1;
/*
@@ -3894,14 +3873,13 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
spin_lock(&space_info->lock);
spin_lock(&block_group->lock);
- spin_lock(&fs_info->treelog_bg_lock);
if (ret)
goto out;
ASSERT(!ffe_ctl->for_treelog ||
- block_group->start == fs_info->treelog_bg ||
- fs_info->treelog_bg == 0);
+ block_group->start == log_bytenr ||
+ log_bytenr == 0);
ASSERT(!ffe_ctl->for_data_reloc ||
block_group->start == data_reloc_bytenr ||
data_reloc_bytenr == 0);
@@ -3917,7 +3895,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
* Do not allow currently using block group to be tree-log dedicated
* block group.
*/
- if (ffe_ctl->for_treelog && !fs_info->treelog_bg &&
+ if (ffe_ctl->for_treelog && log_bytenr == 0 &&
(block_group->used || block_group->reserved)) {
ret = 1;
goto out;
@@ -3948,8 +3926,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
goto out;
}
- if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
- fs_info->treelog_bg = block_group->start;
+ if (ffe_ctl->for_treelog && READ_ONCE(fs_info->treelog_bg) == 0)
+ WRITE_ONCE(fs_info->treelog_bg, block_group->start);
if (ffe_ctl->for_data_reloc) {
if (READ_ONCE(fs_info->data_reloc_bg) == 0)
@@ -3987,10 +3965,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
out:
if (ret && ffe_ctl->for_treelog)
- fs_info->treelog_bg = 0;
+ WRITE_ONCE(fs_info->treelog_bg, 0);
if (ret && ffe_ctl->for_data_reloc)
WRITE_ONCE(fs_info->data_reloc_bg, 0);
- spin_unlock(&fs_info->treelog_bg_lock);
spin_unlock(&block_group->lock);
spin_unlock(&space_info->lock);
return ret;
@@ -4293,10 +4270,10 @@ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
struct find_free_extent_ctl *ffe_ctl)
{
if (ffe_ctl->for_treelog) {
- spin_lock(&fs_info->treelog_bg_lock);
- if (fs_info->treelog_bg)
- ffe_ctl->hint_byte = fs_info->treelog_bg;
- spin_unlock(&fs_info->treelog_bg_lock);
+ u64 treelog_bg = READ_ONCE(fs_info->treelog_bg);
+
+ if (treelog_bg)
+ ffe_ctl->hint_byte = treelog_bg;
} else if (ffe_ctl->for_data_reloc) {
u64 data_reloc_bg = READ_ONCE(fs_info->data_reloc_bg);
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index 570f4b85096c..a388af40a251 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -846,7 +846,6 @@ struct btrfs_fs_info {
u64 max_zone_append_size;
struct mutex zoned_meta_io_lock;
- spinlock_t treelog_bg_lock;
u64 treelog_bg;
/* Start of the dedicated data relocation block group */
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 388c277a84d3..c89f846af6dd 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1948,7 +1948,7 @@ static bool check_bg_is_active(struct btrfs_eb_write_context *ctx,
if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags))
return true;
- if (fs_info->treelog_bg == block_group->start) {
+ if (READ_ONCE(fs_info->treelog_bg) == block_group->start) {
if (!btrfs_zone_activate(block_group)) {
int ret_fin = btrfs_zone_finish_one_bg(fs_info);
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 6e11533b8e14..c1b3a5c3a799 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -383,14 +383,13 @@ static inline void btrfs_zoned_meta_io_unlock(struct btrfs_fs_info *fs_info)
static inline void btrfs_clear_treelog_bg(struct btrfs_block_group *bg)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
+ u64 treelog_bg = READ_ONCE(fs_info->treelog_bg);
if (!btrfs_is_zoned(fs_info))
return;
- spin_lock(&fs_info->treelog_bg_lock);
- if (fs_info->treelog_bg == bg->start)
- fs_info->treelog_bg = 0;
- spin_unlock(&fs_info->treelog_bg_lock);
+ if (treelog_bg == bg->start)
+ WRITE_ONCE(fs_info->treelog_bg, 0);
}
static inline void btrfs_zoned_data_reloc_lock(struct btrfs_inode *inode)
--
2.49.0
next prev parent reply other threads:[~2025-06-27 9:19 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-27 9:19 [PATCH RFC 0/9] btrfs: zoned: fixes for garbage collection under preassure Johannes Thumshirn
2025-06-27 9:19 ` [PATCH RFC 1/9] btrfs: zoned: do not select metadata BG as finish target Johannes Thumshirn
2025-06-27 11:34 ` Christoph Hellwig
2025-07-02 15:34 ` Naohiro Aota
2025-06-27 9:19 ` [PATCH RFC 2/9] btrfs: zoned: get rid of relocation_bg_lock Johannes Thumshirn
2025-06-27 9:19 ` Johannes Thumshirn [this message]
2025-06-27 9:19 ` [PATCH RFC 4/9] btrfs: zoned: don't hold space_info lock on zoned allocation Johannes Thumshirn
2025-06-27 9:19 ` [PATCH RFC 5/9] btrfs: remove delalloc_root_mutex Johannes Thumshirn
2025-06-27 12:42 ` Filipe Manana
2025-06-27 9:19 ` [PATCH RFC 6/9] btrfs: remove btrfs_root's delalloc_mutex Johannes Thumshirn
2025-06-27 12:30 ` Filipe Manana
2025-06-27 9:19 ` [PATCH RFC 7/9] btrfs: lower auto-reclaim message log level Johannes Thumshirn
2025-06-27 11:35 ` Christoph Hellwig
2025-06-27 9:19 ` [PATCH RFC 8/9] btrfs: lower log level of relocation messages Johannes Thumshirn
2025-06-27 11:36 ` Christoph Hellwig
2025-06-30 17:12 ` David Sterba
2025-07-01 5:09 ` Johannes Thumshirn
2025-07-01 14:43 ` David Sterba
2025-06-27 9:19 ` [PATCH RFC 9/9] btrfs: remove unused bgs on allocation failure Johannes Thumshirn
2025-06-27 11:38 ` Christoph Hellwig
2025-06-30 11:45 ` Johannes Thumshirn
2025-06-30 12:05 ` Filipe Manana
2025-06-27 12:14 ` Filipe Manana
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250627091914.100715-4-jth@kernel.org \
--to=jth@kernel.org \
--cc=boris@bur.io \
--cc=dlemoal@kernel.org \
--cc=dsterba@suse.com \
--cc=fdmanana@suse.com \
--cc=johannes.thumshirn@wdc.com \
--cc=josef@toxicpanda.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=naohiro.aota@wdc.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox