From: Boris Burkov <boris@bur.io>
To: Leo Martins <loemra.dev@gmail.com>
Cc: linux-btrfs@vger.kernel.org, kernel-team@fb.com, fstests@vger.kernel.org
Subject: Re: [PATCH v2 2/3] btrfs: add tracing for find_free_extent skip conditions
Date: Tue, 14 Oct 2025 20:28:50 -0700 [thread overview]
Message-ID: <20251015032817.GA1702774@zen.localdomain> (raw)
In-Reply-To: <e5bb7036ec9f54f4ff862327d3ff3ccc9128077d.1759532729.git.loemra.dev@gmail.com>
On Fri, Oct 03, 2025 at 04:41:58PM -0700, Leo Martins wrote:
> Add detailed tracing to the find_free_extent() function to improve
> observability of extent allocation behavior. This patch introduces:
> > - A new trace event btrfs_find_free_extent_skip_block_group() that
> captures
> allocation context and skip reasons
> - Comprehensive set of FFE_SKIP_BG_* constants defining specific
> reasons why block groups are skipped during allocation
> - Trace points at all major skip conditions in the allocator loop
>
> The trace event includes key allocation parameters (root, size, flags,
> loop iteration) and block group details (start, length, flags) along
> with the specific skip reason and associated error codes.
>
> These trace points will help diagnose allocation performance
> issues, understand allocation patterns, and debug extent allocator
> behavior.
>
One nit inline, but this looks great, thanks.
Reviewed-by: Boris Burkov <boris@bur.io>
> Signed-off-by: Leo Martins <loemra.dev@gmail.com>
> ---
> fs/btrfs/extent-tree.c | 38 +++++++++++++++++++--
> fs/btrfs/extent-tree.h | 17 ++++++++++
> include/trace/events/btrfs.h | 66 ++++++++++++++++++++++++++++++++++++
> 3 files changed, 118 insertions(+), 3 deletions(-)
>
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 28b442660014..3b6d57d39bd5 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -4455,6 +4455,8 @@ static noinline int find_free_extent(struct btrfs_root *root,
> * target because our list pointers are not
> * valid
> */
> + trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> + FFE_SKIP_BG_REMOVING, 0);
> btrfs_put_block_group(block_group);
> up_read(&space_info->groups_sem);
> } else {
> @@ -4466,6 +4468,15 @@ static noinline int find_free_extent(struct btrfs_root *root,
> goto have_block_group;
> }
> } else if (block_group) {
> + if (!block_group_bits(block_group, ffe_ctl->flags))
> + trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> + FFE_SKIP_BG_WRONG_FLAGS, block_group->flags & ~ffe_ctl->flags);
> + else if (block_group->space_info != space_info)
> + trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> + FFE_SKIP_BG_WRONG_SPACE_INFO, 0);
> + else if (block_group->cached == BTRFS_CACHE_NO)
> + trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> + FFE_SKIP_BG_NOT_CACHED, 0);
nit: I think this would look nicer with the code/data selected by the
ifs and then a single trace call.
> btrfs_put_block_group(block_group);
> }
> }
> @@ -4481,6 +4492,8 @@ static noinline int find_free_extent(struct btrfs_root *root,
> ffe_ctl->hinted = false;
> /* If the block group is read-only, we can skip it entirely. */
> if (unlikely(block_group->ro)) {
> + trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> + FFE_SKIP_BG_READ_ONLY, 0);
> if (ffe_ctl->for_treelog)
> btrfs_clear_treelog_bg(block_group);
> if (ffe_ctl->for_data_reloc)
> @@ -4492,6 +4505,8 @@ static noinline int find_free_extent(struct btrfs_root *root,
> ffe_ctl->search_start = block_group->start;
>
> if (!block_group_bits(block_group, ffe_ctl->flags)) {
> + trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> + FFE_SKIP_BG_WRONG_FLAGS, block_group->flags & ~ffe_ctl->flags);
> btrfs_release_block_group(block_group, ffe_ctl->delalloc);
> continue;
> }
> @@ -4511,6 +4526,8 @@ static noinline int find_free_extent(struct btrfs_root *root,
> * error that caused problems, not ENOSPC.
> */
> if (ret < 0) {
> + trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> + FFE_SKIP_BG_CACHE_ERROR, ret);
> if (!cache_block_group_error)
> cache_block_group_error = ret;
> ret = 0;
> @@ -4520,18 +4537,26 @@ static noinline int find_free_extent(struct btrfs_root *root,
> }
>
> if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) {
> + trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> + FFE_SKIP_BG_CACHE_ERROR, -EIO);
> if (!cache_block_group_error)
> cache_block_group_error = -EIO;
> goto loop;
> }
>
> - if (!find_free_extent_check_size_class(ffe_ctl, block_group))
> + if (!find_free_extent_check_size_class(ffe_ctl, block_group)) {
> + trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> + FFE_SKIP_BG_SIZE_CLASS_MISMATCH, block_group->size_class);
> goto loop;
> + }
>
> bg_ret = NULL;
> ret = do_allocation(block_group, ffe_ctl, &bg_ret);
> - if (ret > 0)
> + if (ret > 0) {
> + trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> + FFE_SKIP_BG_DO_ALLOCATION_FAILED, ret);
> goto loop;
> + }
>
> if (bg_ret && bg_ret != block_group) {
> btrfs_release_block_group(block_group, ffe_ctl->delalloc);
> @@ -4545,22 +4570,29 @@ static noinline int find_free_extent(struct btrfs_root *root,
> /* move on to the next group */
> if (ffe_ctl->search_start + ffe_ctl->num_bytes >
> block_group->start + block_group->length) {
> + trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> + FFE_SKIP_BG_SEARCH_BOUNDS, block_group->start + block_group->length);
> btrfs_add_free_space_unused(block_group,
> ffe_ctl->found_offset,
> ffe_ctl->num_bytes);
> goto loop;
> }
>
> - if (ffe_ctl->found_offset < ffe_ctl->search_start)
> + if (ffe_ctl->found_offset < ffe_ctl->search_start) {
> + trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> + FFE_SKIP_BG_SEARCH_BOUNDS, ffe_ctl->found_offset);
> btrfs_add_free_space_unused(block_group,
> ffe_ctl->found_offset,
> ffe_ctl->search_start - ffe_ctl->found_offset);
> + }
>
> ret = btrfs_add_reserved_bytes(block_group, ffe_ctl->ram_bytes,
> ffe_ctl->num_bytes,
> ffe_ctl->delalloc,
> ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS);
> if (ret == -EAGAIN) {
> + trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> + FFE_SKIP_BG_ADD_RESERVED_FAILED, -EAGAIN);
> btrfs_add_free_space_unused(block_group,
> ffe_ctl->found_offset,
> ffe_ctl->num_bytes);
> diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
> index e970ac42a871..4f1dc077b7c9 100644
> --- a/fs/btrfs/extent-tree.h
> +++ b/fs/btrfs/extent-tree.h
> @@ -23,6 +23,23 @@ enum btrfs_extent_allocation_policy {
> BTRFS_EXTENT_ALLOC_ZONED,
> };
>
> +/*
> + * Enum for find_free_extent skip reasons used in trace events.
> + * Each enum corresponds to a specific unhappy path in the allocator.
> + */
> +enum {
> + FFE_SKIP_BG_REMOVING,
> + FFE_SKIP_BG_READ_ONLY,
> + FFE_SKIP_BG_WRONG_SPACE_INFO,
> + FFE_SKIP_BG_WRONG_FLAGS,
> + FFE_SKIP_BG_NOT_CACHED,
> + FFE_SKIP_BG_CACHE_ERROR,
> + FFE_SKIP_BG_SIZE_CLASS_MISMATCH,
> + FFE_SKIP_BG_DO_ALLOCATION_FAILED,
> + FFE_SKIP_BG_SEARCH_BOUNDS,
> + FFE_SKIP_BG_ADD_RESERVED_FAILED,
> +};
> +
> struct find_free_extent_ctl {
> /* Basic allocation info */
> u64 ram_bytes;
> diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
> index 7e418f065b94..72aa250983d4 100644
> --- a/include/trace/events/btrfs.h
> +++ b/include/trace/events/btrfs.h
> @@ -103,6 +103,19 @@ struct find_free_extent_ctl;
> EM( COMMIT_TRANS, "COMMIT_TRANS") \
> EMe(RESET_ZONES, "RESET_ZONES")
>
> +#define FIND_FREE_EXTENT_SKIP_REASONS \
> + EM( FFE_SKIP_BG_REMOVING, "BG_REMOVING") \
> + EM( FFE_SKIP_BG_READ_ONLY, "BG_READ_ONLY") \
> + EM( FFE_SKIP_BG_WRONG_SPACE_INFO, "BG_WRONG_SPACE_INFO") \
> + EM( FFE_SKIP_BG_WRONG_FLAGS, "BG_WRONG_FLAGS") \
> + EM( FFE_SKIP_BG_NOT_CACHED, "BG_NOT_CACHED") \
> + EM( FFE_SKIP_BG_CACHE_ERROR, "BG_CACHE_ERROR") \
> + EM( FFE_SKIP_BG_SIZE_CLASS_MISMATCH, "BG_SIZE_CLASS_MISMATCH") \
> + EM( FFE_SKIP_BG_DO_ALLOCATION_FAILED, "BG_DO_ALLOCATION_FAILED") \
> + EM( FFE_SKIP_BG_SEARCH_BOUNDS, "BG_SEARCH_BOUNDS") \
> + EMe(FFE_SKIP_BG_ADD_RESERVED_FAILED, "BG_ADD_RESERVED_FAILED")
> +
> +
> /*
> * First define the enums in the above macros to be exported to userspace via
> * TRACE_DEFINE_ENUM().
> @@ -118,6 +131,7 @@ FI_TYPES
> QGROUP_RSV_TYPES
> IO_TREE_OWNER
> FLUSH_STATES
> +FIND_FREE_EXTENT_SKIP_REASONS
>
> /*
> * Now redefine the EM and EMe macros to map the enums to the strings that will
> @@ -1388,6 +1402,58 @@ DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent_cluster,
> TP_ARGS(block_group, ffe_ctl)
> );
>
> +TRACE_EVENT(btrfs_find_free_extent_skip_block_group,
> +
> + TP_PROTO(const struct btrfs_root *root,
> + const struct find_free_extent_ctl *ffe_ctl,
> + const struct btrfs_block_group *block_group,
> + int reason,
> + s64 error_or_value),
> +
> + TP_ARGS(root, ffe_ctl, block_group, reason, error_or_value),
> +
> + TP_STRUCT__entry_btrfs(
> + __field( u64, root_objectid )
> + __field( u64, num_bytes )
> + __field( u64, empty_size )
> + __field( u64, flags )
> + __field( int, loop )
> + __field( bool, hinted )
> + __field( int, size_class )
> + __field( u64, bg_start )
> + __field( u64, bg_length )
> + __field( u64, bg_flags )
> + __field( int, reason )
> + __field( s64, error_or_value )
> + ),
> +
> + TP_fast_assign_btrfs(root->fs_info,
> + __entry->root_objectid = btrfs_root_id(root);
> + __entry->num_bytes = ffe_ctl->num_bytes;
> + __entry->empty_size = ffe_ctl->empty_size;
> + __entry->flags = ffe_ctl->flags;
> + __entry->loop = ffe_ctl->loop;
> + __entry->hinted = ffe_ctl->hinted;
> + __entry->size_class = ffe_ctl->size_class;
> + __entry->bg_start = block_group ? block_group->start : 0;
> + __entry->bg_length = block_group ? block_group->length : 0;
> + __entry->bg_flags = block_group ? block_group->flags : 0;
> + __entry->reason = reason;
> + __entry->error_or_value = error_or_value;
> + ),
> +
> + TP_printk_btrfs(
> +"root=%llu(%s) len=%llu empty_size=%llu flags=%llu(%s) loop=%d hinted=%d size_class=%d bg=[%llu+%llu] bg_flags=%llu(%s) reason=%s error_or_value=%lld",
> + show_root_type(__entry->root_objectid),
> + __entry->num_bytes, __entry->empty_size, __entry->flags,
> + __print_flags((unsigned long)__entry->flags, "|", BTRFS_GROUP_FLAGS),
> + __entry->loop, __entry->hinted, __entry->size_class,
> + __entry->bg_start, __entry->bg_length, __entry->bg_flags,
> + __print_flags((unsigned long)__entry->bg_flags, "|", BTRFS_GROUP_FLAGS),
> + __print_symbolic(__entry->reason, FIND_FREE_EXTENT_SKIP_REASONS),
> + __entry->error_or_value)
> +);
> +
> TRACE_EVENT(btrfs_find_cluster,
>
> TP_PROTO(const struct btrfs_block_group *block_group, u64 start,
> --
> 2.47.3
>
next prev parent reply other threads:[~2025-10-15 3:29 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-03 23:41 [PATCH v2 0/3] btrfs: find_free_extent cleanups Leo Martins
2025-10-03 23:41 ` [PATCH v2 1/3] btrfs: remove ffe RAID loop Leo Martins
2025-10-15 3:29 ` Boris Burkov
2025-10-03 23:41 ` [PATCH v2 2/3] btrfs: add tracing for find_free_extent skip conditions Leo Martins
2025-10-15 3:28 ` Boris Burkov [this message]
2025-10-03 23:41 ` [PATCH v2 3/3] fstests: btrfs: test RAID conversions under stress Leo Martins
2025-10-04 1:54 ` Qu Wenruo
2025-10-06 17:37 ` Leo Martins
2025-10-06 18:16 ` David Sterba
2025-10-15 3:31 ` Boris Burkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251015032817.GA1702774@zen.localdomain \
--to=boris@bur.io \
--cc=fstests@vger.kernel.org \
--cc=kernel-team@fb.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=loemra.dev@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox