From: Naohiro Aota <Naohiro.Aota@wdc.com>
To: Johannes Thumshirn <jth@kernel.org>,
"linux-btrfs@vger.kernel.org" <linux-btrfs@vger.kernel.org>
Cc: Damien Le Moal <dlemoal@kernel.org>,
Naohiro Aota <Naohiro.Aota@wdc.com>,
Johannes Thumshirn <Johannes.Thumshirn@wdc.com>
Subject: Re: [PATCH v2 1/3] btrfs: zoned: get rid of relocation_bg_lock
Date: Tue, 24 Jun 2025 12:32:54 +0000 [thread overview]
Message-ID: <DAURLBJDUGQJ.1W5C4RDQ8GJHF@wdc.com> (raw)
In-Reply-To: <20250624093710.18685-2-jth@kernel.org>
On Tue Jun 24, 2025 at 6:37 PM JST, Johannes Thumshirn wrote:
> From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
>
> Lockstat analysis of benchmark workloads shows a very high contention on
> relocation_bg_lock. But the lock only protects a single field in
> 'struct btrfs_fs_info', namely 'u64 data_reloc_bg'.
>
> Use READ_ONCE()/WRITE_ONCE() to access 'btrfs_fs_info::data_reloc_bg'.
>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
> fs/btrfs/disk-io.c | 1 -
> fs/btrfs/extent-tree.c | 34 ++++++++++++----------------------
> fs/btrfs/fs.h | 6 +-----
> fs/btrfs/zoned.c | 10 ++++------
> 4 files changed, 17 insertions(+), 34 deletions(-)
>
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index ee3cdd7035cc..24896322376d 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -2791,7 +2791,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
> spin_lock_init(&fs_info->unused_bgs_lock);
> spin_lock_init(&fs_info->treelog_bg_lock);
> spin_lock_init(&fs_info->zone_active_bgs_lock);
> - spin_lock_init(&fs_info->relocation_bg_lock);
> rwlock_init(&fs_info->tree_mod_log_lock);
> rwlock_init(&fs_info->global_root_lock);
> mutex_init(&fs_info->unused_bg_unpin_mutex);
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 10f50c725313..80ceca89a9ef 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -3842,7 +3842,6 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
> u64 avail;
> u64 bytenr = block_group->start;
> u64 log_bytenr;
> - u64 data_reloc_bytenr;
> int ret = 0;
> bool skip = false;
>
> @@ -3865,14 +3864,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
> * Do not allow non-relocation blocks in the dedicated relocation block
> * group, and vice versa.
> */
> - spin_lock(&fs_info->relocation_bg_lock);
> - data_reloc_bytenr = fs_info->data_reloc_bg;
> - if (data_reloc_bytenr &&
> - ((ffe_ctl->for_data_reloc && bytenr != data_reloc_bytenr) ||
> - (!ffe_ctl->for_data_reloc && bytenr == data_reloc_bytenr)))
> - skip = true;
> - spin_unlock(&fs_info->relocation_bg_lock);
> - if (skip)
> + if (READ_ONCE(fs_info->data_reloc_bg) &&
> + ((ffe_ctl->for_data_reloc && bytenr != READ_ONCE(fs_info->data_reloc_bg)) ||
> + (!ffe_ctl->for_data_reloc && bytenr == READ_ONCE(fs_info->data_reloc_bg))))
> return 1;
>
> /* Check RO and no space case before trying to activate it */
> @@ -3899,7 +3893,6 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
> spin_lock(&space_info->lock);
> spin_lock(&block_group->lock);
> spin_lock(&fs_info->treelog_bg_lock);
> - spin_lock(&fs_info->relocation_bg_lock);
>
> if (ret)
> goto out;
> @@ -3908,8 +3901,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
> block_group->start == fs_info->treelog_bg ||
> fs_info->treelog_bg == 0);
> ASSERT(!ffe_ctl->for_data_reloc ||
> - block_group->start == fs_info->data_reloc_bg ||
> - fs_info->data_reloc_bg == 0);
> + block_group->start == READ_ONCE(fs_info->data_reloc_bg) ||
> + READ_ONCE(fs_info->data_reloc_bg) == 0);
>
> if (block_group->ro ||
> (!ffe_ctl->for_data_reloc &&
> @@ -3932,7 +3925,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
> * Do not allow currently used block group to be the data relocation
> * dedicated block group.
> */
> - if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg &&
> + if (ffe_ctl->for_data_reloc && READ_ONCE(fs_info->data_reloc_bg) == 0 &&
> (block_group->used || block_group->reserved)) {
> ret = 1;
> goto out;
> @@ -3957,8 +3950,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
> fs_info->treelog_bg = block_group->start;
>
> if (ffe_ctl->for_data_reloc) {
> - if (!fs_info->data_reloc_bg)
> - fs_info->data_reloc_bg = block_group->start;
> + if (READ_ONCE(fs_info->data_reloc_bg) == 0)
> + WRITE_ONCE(fs_info->data_reloc_bg, block_group->start);
What happens two threads reach here at the same time and each has a
different block group locked? One of them will get an unexpected state.
Apparently, it does not happen because we have the space_info->lock
around, but patch 3 will remove it...
> /*
> * Do not allow allocations from this block group, unless it is
> * for data relocation. Compared to increasing the ->ro, setting
> @@ -3994,8 +3987,7 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
> if (ret && ffe_ctl->for_treelog)
> fs_info->treelog_bg = 0;
> if (ret && ffe_ctl->for_data_reloc)
> - fs_info->data_reloc_bg = 0;
> - spin_unlock(&fs_info->relocation_bg_lock);
> + WRITE_ONCE(fs_info->data_reloc_bg, 0);
> spin_unlock(&fs_info->treelog_bg_lock);
> spin_unlock(&block_group->lock);
> spin_unlock(&space_info->lock);
> @@ -4303,11 +4295,9 @@ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
> if (fs_info->treelog_bg)
> ffe_ctl->hint_byte = fs_info->treelog_bg;
> spin_unlock(&fs_info->treelog_bg_lock);
> - } else if (ffe_ctl->for_data_reloc) {
> - spin_lock(&fs_info->relocation_bg_lock);
> - if (fs_info->data_reloc_bg)
> - ffe_ctl->hint_byte = fs_info->data_reloc_bg;
> - spin_unlock(&fs_info->relocation_bg_lock);
> + } else if (ffe_ctl->for_data_reloc &&
> + READ_ONCE(fs_info->data_reloc_bg)) {
> + ffe_ctl->hint_byte = READ_ONCE(fs_info->data_reloc_bg);
> } else if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
> struct btrfs_block_group *block_group;
>
> diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
> index b239e4b8421c..570f4b85096c 100644
> --- a/fs/btrfs/fs.h
> +++ b/fs/btrfs/fs.h
> @@ -849,11 +849,7 @@ struct btrfs_fs_info {
> spinlock_t treelog_bg_lock;
> u64 treelog_bg;
>
> - /*
> - * Start of the dedicated data relocation block group, protected by
> - * relocation_bg_lock.
> - */
> - spinlock_t relocation_bg_lock;
> + /* Start of the dedicated data relocation block group */
> u64 data_reloc_bg;
> struct mutex zoned_data_reloc_io_lock;
>
> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
> index bd987c90a05c..421afdb6eb39 100644
> --- a/fs/btrfs/zoned.c
> +++ b/fs/btrfs/zoned.c
> @@ -2496,10 +2496,8 @@ void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg)
> {
> struct btrfs_fs_info *fs_info = bg->fs_info;
>
> - spin_lock(&fs_info->relocation_bg_lock);
> - if (fs_info->data_reloc_bg == bg->start)
> - fs_info->data_reloc_bg = 0;
> - spin_unlock(&fs_info->relocation_bg_lock);
> + if (READ_ONCE(fs_info->data_reloc_bg) == bg->start)
> + WRITE_ONCE(fs_info->data_reloc_bg, 0);
> }
>
> void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
> @@ -2518,7 +2516,7 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
> if (!btrfs_is_zoned(fs_info))
> return;
>
> - if (fs_info->data_reloc_bg)
> + if (READ_ONCE(fs_info->data_reloc_bg))
> return;
>
> if (sb_rdonly(fs_info->sb))
> @@ -2539,7 +2537,7 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
> continue;
> }
>
> - fs_info->data_reloc_bg = bg->start;
> + WRITE_ONCE(fs_info->data_reloc_bg, bg->start);
> set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &bg->runtime_flags);
> btrfs_zone_activate(bg);
>
next prev parent reply other threads:[~2025-06-24 12:33 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-24 9:37 [PATCH v2 0/3] btrfs: zoned: reduce lock contention in zoned extent allocator Johannes Thumshirn
2025-06-24 9:37 ` [PATCH v2 1/3] btrfs: zoned: get rid of relocation_bg_lock Johannes Thumshirn
2025-06-24 12:32 ` Naohiro Aota [this message]
2025-06-25 10:28 ` Johannes Thumshirn
2025-06-24 9:37 ` [PATCH v2 2/3] btrfs: zoned: get rid of treelog_bg_lock Johannes Thumshirn
2025-06-24 9:37 ` [PATCH v2 3/3] btrfs: zoned: don't hold space_info lock on zoned allocation Johannes Thumshirn
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=DAURLBJDUGQJ.1W5C4RDQ8GJHF@wdc.com \
--to=naohiro.aota@wdc.com \
--cc=Johannes.Thumshirn@wdc.com \
--cc=dlemoal@kernel.org \
--cc=jth@kernel.org \
--cc=linux-btrfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.