All of lore.kernel.org
 help / color / mirror / Atom feed
From: Johannes Thumshirn <jth@kernel.org>
To: linux-btrfs@vger.kernel.org
Cc: Damien Le Moal <dlemoal@kernel.org>,
	Naohiro Aota <naohiro.aota@wdc.com>,
	Johannes Thumshirn <johannes.thumshirn@wdc.com>
Subject: [PATCH v2 2/3] btrfs: zoned: get rid of treelog_bg_lock
Date: Tue, 24 Jun 2025 11:37:09 +0200	[thread overview]
Message-ID: <20250624093710.18685-3-jth@kernel.org> (raw)
In-Reply-To: <20250624093710.18685-1-jth@kernel.org>

From: Johannes Thumshirn <johannes.thumshirn@wdc.com>

Lockstat analysis of benchmark workloads shows a very high contention on
treelog_bg_lock. But the lock only protects a single field in
'struct btrfs_fs_info', namely 'u64 treelog_bg'.

Use READ_ONCE()/WRITE_ONCE() to access 'btrfs_fs_info::treelog_bg'.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 fs/btrfs/disk-io.c     |  1 -
 fs/btrfs/extent-tree.c | 49 ++++++++++--------------------------------
 fs/btrfs/fs.h          |  1 -
 fs/btrfs/zoned.c       |  2 +-
 fs/btrfs/zoned.h       |  6 ++----
 5 files changed, 14 insertions(+), 45 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 24896322376d..a54218717cb4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2789,7 +2789,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
 	spin_lock_init(&fs_info->defrag_inodes_lock);
 	spin_lock_init(&fs_info->super_lock);
 	spin_lock_init(&fs_info->unused_bgs_lock);
-	spin_lock_init(&fs_info->treelog_bg_lock);
 	spin_lock_init(&fs_info->zone_active_bgs_lock);
 	rwlock_init(&fs_info->tree_mod_log_lock);
 	rwlock_init(&fs_info->global_root_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 80ceca89a9ef..39d3984153a2 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3809,22 +3809,6 @@ static int do_allocation_clustered(struct btrfs_block_group *block_group,
 	return find_free_extent_unclustered(block_group, ffe_ctl);
 }
 
-/*
- * Tree-log block group locking
- * ============================
- *
- * fs_info::treelog_bg_lock protects the fs_info::treelog_bg which
- * indicates the starting address of a block group, which is reserved only
- * for tree-log metadata.
- *
- * Lock nesting
- * ============
- *
- * space_info::lock
- *   block_group::lock
- *     fs_info::treelog_bg_lock
- */
-
 /*
  * Simple allocator for sequential-only block group. It only allows sequential
  * allocation. No need to play with trees. This function also reserves the
@@ -3841,9 +3825,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
 	u64 num_bytes = ffe_ctl->num_bytes;
 	u64 avail;
 	u64 bytenr = block_group->start;
-	u64 log_bytenr;
 	int ret = 0;
-	bool skip = false;
 
 	ASSERT(btrfs_is_zoned(block_group->fs_info));
 
@@ -3851,13 +3833,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
 	 * Do not allow non-tree-log blocks in the dedicated tree-log block
 	 * group, and vice versa.
 	 */
-	spin_lock(&fs_info->treelog_bg_lock);
-	log_bytenr = fs_info->treelog_bg;
-	if (log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) ||
-			   (!ffe_ctl->for_treelog && bytenr == log_bytenr)))
-		skip = true;
-	spin_unlock(&fs_info->treelog_bg_lock);
-	if (skip)
+	if (READ_ONCE(fs_info->treelog_bg) &&
+	    ((ffe_ctl->for_treelog && bytenr != READ_ONCE(fs_info->treelog_bg)) ||
+	     (!ffe_ctl->for_treelog && bytenr == READ_ONCE(fs_info->treelog_bg))))
 		return 1;
 
 	/*
@@ -3892,14 +3870,13 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
 
 	spin_lock(&space_info->lock);
 	spin_lock(&block_group->lock);
-	spin_lock(&fs_info->treelog_bg_lock);
 
 	if (ret)
 		goto out;
 
 	ASSERT(!ffe_ctl->for_treelog ||
-	       block_group->start == fs_info->treelog_bg ||
-	       fs_info->treelog_bg == 0);
+	       block_group->start == READ_ONCE(fs_info->treelog_bg) ||
+	       READ_ONCE(fs_info->treelog_bg) == 0);
 	ASSERT(!ffe_ctl->for_data_reloc ||
 	       block_group->start == READ_ONCE(fs_info->data_reloc_bg) ||
 	       READ_ONCE(fs_info->data_reloc_bg) == 0);
@@ -3915,7 +3892,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
 	 * Do not allow currently using block group to be tree-log dedicated
 	 * block group.
 	 */
-	if (ffe_ctl->for_treelog && !fs_info->treelog_bg &&
+	if (ffe_ctl->for_treelog && READ_ONCE(fs_info->treelog_bg) == 0 &&
 	    (block_group->used || block_group->reserved)) {
 		ret = 1;
 		goto out;
@@ -3946,8 +3923,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
 		goto out;
 	}
 
-	if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
-		fs_info->treelog_bg = block_group->start;
+	if (ffe_ctl->for_treelog && READ_ONCE(fs_info->treelog_bg) == 0)
+		WRITE_ONCE(fs_info->treelog_bg, block_group->start);
 
 	if (ffe_ctl->for_data_reloc) {
 		if (READ_ONCE(fs_info->data_reloc_bg) == 0)
@@ -3985,10 +3962,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
 
 out:
 	if (ret && ffe_ctl->for_treelog)
-		fs_info->treelog_bg = 0;
+		WRITE_ONCE(fs_info->treelog_bg, 0);
 	if (ret && ffe_ctl->for_data_reloc)
 		WRITE_ONCE(fs_info->data_reloc_bg, 0);
-	spin_unlock(&fs_info->treelog_bg_lock);
 	spin_unlock(&block_group->lock);
 	spin_unlock(&space_info->lock);
 	return ret;
@@ -4290,11 +4266,8 @@ static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
 static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
 				    struct find_free_extent_ctl *ffe_ctl)
 {
-	if (ffe_ctl->for_treelog) {
-		spin_lock(&fs_info->treelog_bg_lock);
-		if (fs_info->treelog_bg)
-			ffe_ctl->hint_byte = fs_info->treelog_bg;
-		spin_unlock(&fs_info->treelog_bg_lock);
+	if (ffe_ctl->for_treelog && READ_ONCE(fs_info->treelog_bg)) {
+			ffe_ctl->hint_byte = READ_ONCE(fs_info->treelog_bg);
 	} else if (ffe_ctl->for_data_reloc &&
 		   READ_ONCE(fs_info->data_reloc_bg)) {
 			ffe_ctl->hint_byte = READ_ONCE(fs_info->data_reloc_bg);
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index 570f4b85096c..a388af40a251 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -846,7 +846,6 @@ struct btrfs_fs_info {
 	u64 max_zone_append_size;
 
 	struct mutex zoned_meta_io_lock;
-	spinlock_t treelog_bg_lock;
 	u64 treelog_bg;
 
 	/* Start of the dedicated data relocation block group */
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 421afdb6eb39..98f483def77d 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1948,7 +1948,7 @@ static bool check_bg_is_active(struct btrfs_eb_write_context *ctx,
 	if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags))
 		return true;
 
-	if (fs_info->treelog_bg == block_group->start) {
+	if (READ_ONCE(fs_info->treelog_bg) == block_group->start) {
 		if (!btrfs_zone_activate(block_group)) {
 			int ret_fin = btrfs_zone_finish_one_bg(fs_info);
 
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 6e11533b8e14..f71026b6c100 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -387,10 +387,8 @@ static inline void btrfs_clear_treelog_bg(struct btrfs_block_group *bg)
 	if (!btrfs_is_zoned(fs_info))
 		return;
 
-	spin_lock(&fs_info->treelog_bg_lock);
-	if (fs_info->treelog_bg == bg->start)
-		fs_info->treelog_bg = 0;
-	spin_unlock(&fs_info->treelog_bg_lock);
+	if (READ_ONCE(fs_info->treelog_bg) == bg->start)
+		WRITE_ONCE(fs_info->treelog_bg, 0);
 }
 
 static inline void btrfs_zoned_data_reloc_lock(struct btrfs_inode *inode)
-- 
2.49.0


  parent reply	other threads:[~2025-06-24  9:37 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-24  9:37 [PATCH v2 0/3] btrfs: zoned: reduce lock contention in zoned extent allocator Johannes Thumshirn
2025-06-24  9:37 ` [PATCH v2 1/3] btrfs: zoned: get rid of relocation_bg_lock Johannes Thumshirn
2025-06-24 12:32   ` Naohiro Aota
2025-06-25 10:28     ` Johannes Thumshirn
2025-06-24  9:37 ` Johannes Thumshirn [this message]
2025-06-24  9:37 ` [PATCH v2 3/3] btrfs: zoned: don't hold space_info lock on zoned allocation Johannes Thumshirn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250624093710.18685-3-jth@kernel.org \
    --to=jth@kernel.org \
    --cc=dlemoal@kernel.org \
    --cc=johannes.thumshirn@wdc.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=naohiro.aota@wdc.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.