Linux Btrfs filesystem development
 help / color / mirror / Atom feed
From: Boris Burkov <boris@bur.io>
To: Leo Martins <loemra.dev@gmail.com>
Cc: linux-btrfs@vger.kernel.org, kernel-team@fb.com, fstests@vger.kernel.org
Subject: Re: [PATCH v2 2/3] btrfs: add tracing for find_free_extent skip conditions
Date: Tue, 14 Oct 2025 20:28:50 -0700	[thread overview]
Message-ID: <20251015032817.GA1702774@zen.localdomain> (raw)
In-Reply-To: <e5bb7036ec9f54f4ff862327d3ff3ccc9128077d.1759532729.git.loemra.dev@gmail.com>

On Fri, Oct 03, 2025 at 04:41:58PM -0700, Leo Martins wrote:
> Add detailed tracing to the find_free_extent() function to improve
> observability of extent allocation behavior. This patch introduces:
> > - A new trace event btrfs_find_free_extent_skip_block_group() that 
>   captures
>   allocation context and skip reasons
> - Comprehensive set of FFE_SKIP_BG_* constants defining specific
>   reasons why block groups are skipped during allocation
> - Trace points at all major skip conditions in the allocator loop
> 
> The trace event includes key allocation parameters (root, size, flags,
> loop iteration) and block group details (start, length, flags) along
> with the specific skip reason and associated error codes.
> 
> These trace points will help diagnose allocation performance
> issues, understand allocation patterns, and debug extent allocator
> behavior.
> 

One nit inline, but this looks great, thanks.
Reviewed-by: Boris Burkov <boris@bur.io>

> Signed-off-by: Leo Martins <loemra.dev@gmail.com>
> ---
>  fs/btrfs/extent-tree.c       | 38 +++++++++++++++++++--
>  fs/btrfs/extent-tree.h       | 17 ++++++++++
>  include/trace/events/btrfs.h | 66 ++++++++++++++++++++++++++++++++++++
>  3 files changed, 118 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 28b442660014..3b6d57d39bd5 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -4455,6 +4455,8 @@ static noinline int find_free_extent(struct btrfs_root *root,
>  				 * target because our list pointers are not
>  				 * valid
>  				 */
> +				trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> +								  FFE_SKIP_BG_REMOVING, 0);
>  				btrfs_put_block_group(block_group);
>  				up_read(&space_info->groups_sem);
>  			} else {
> @@ -4466,6 +4468,15 @@ static noinline int find_free_extent(struct btrfs_root *root,
>  				goto have_block_group;
>  			}
>  		} else if (block_group) {
> +			if (!block_group_bits(block_group, ffe_ctl->flags))
> +				trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> +								  FFE_SKIP_BG_WRONG_FLAGS, block_group->flags & ~ffe_ctl->flags);
> +			else if (block_group->space_info != space_info)
> +				trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> +								  FFE_SKIP_BG_WRONG_SPACE_INFO, 0);
> +			else if (block_group->cached == BTRFS_CACHE_NO)
> +				trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> +								  FFE_SKIP_BG_NOT_CACHED, 0);

nit: I think this would look nicer with the code/data selected by the
ifs and then a single trace call.

>  			btrfs_put_block_group(block_group);
>  		}
>  	}
> @@ -4481,6 +4492,8 @@ static noinline int find_free_extent(struct btrfs_root *root,
>  		ffe_ctl->hinted = false;
>  		/* If the block group is read-only, we can skip it entirely. */
>  		if (unlikely(block_group->ro)) {
> +			trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> +							  FFE_SKIP_BG_READ_ONLY, 0);
>  			if (ffe_ctl->for_treelog)
>  				btrfs_clear_treelog_bg(block_group);
>  			if (ffe_ctl->for_data_reloc)
> @@ -4492,6 +4505,8 @@ static noinline int find_free_extent(struct btrfs_root *root,
>  		ffe_ctl->search_start = block_group->start;
>  
>  		if (!block_group_bits(block_group, ffe_ctl->flags)) {
> +			trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> +							  FFE_SKIP_BG_WRONG_FLAGS, block_group->flags & ~ffe_ctl->flags);
>  			btrfs_release_block_group(block_group, ffe_ctl->delalloc);
>  			continue;
>  		}
> @@ -4511,6 +4526,8 @@ static noinline int find_free_extent(struct btrfs_root *root,
>  			 * error that caused problems, not ENOSPC.
>  			 */
>  			if (ret < 0) {
> +				trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> +								  FFE_SKIP_BG_CACHE_ERROR, ret);
>  				if (!cache_block_group_error)
>  					cache_block_group_error = ret;
>  				ret = 0;
> @@ -4520,18 +4537,26 @@ static noinline int find_free_extent(struct btrfs_root *root,
>  		}
>  
>  		if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) {
> +			trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> +							  FFE_SKIP_BG_CACHE_ERROR, -EIO);
>  			if (!cache_block_group_error)
>  				cache_block_group_error = -EIO;
>  			goto loop;
>  		}
>  
> -		if (!find_free_extent_check_size_class(ffe_ctl, block_group))
> +		if (!find_free_extent_check_size_class(ffe_ctl, block_group)) {
> +			trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> +							  FFE_SKIP_BG_SIZE_CLASS_MISMATCH, block_group->size_class);
>  			goto loop;
> +		}
>  
>  		bg_ret = NULL;
>  		ret = do_allocation(block_group, ffe_ctl, &bg_ret);
> -		if (ret > 0)
> +		if (ret > 0) {
> +			trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> +							  FFE_SKIP_BG_DO_ALLOCATION_FAILED, ret);
>  			goto loop;
> +		}
>  
>  		if (bg_ret && bg_ret != block_group) {
>  			btrfs_release_block_group(block_group, ffe_ctl->delalloc);
> @@ -4545,22 +4570,29 @@ static noinline int find_free_extent(struct btrfs_root *root,
>  		/* move on to the next group */
>  		if (ffe_ctl->search_start + ffe_ctl->num_bytes >
>  		    block_group->start + block_group->length) {
> +			trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> +							  FFE_SKIP_BG_SEARCH_BOUNDS, block_group->start + block_group->length);
>  			btrfs_add_free_space_unused(block_group,
>  					    ffe_ctl->found_offset,
>  					    ffe_ctl->num_bytes);
>  			goto loop;
>  		}
>  
> -		if (ffe_ctl->found_offset < ffe_ctl->search_start)
> +		if (ffe_ctl->found_offset < ffe_ctl->search_start) {
> +			trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> +							FFE_SKIP_BG_SEARCH_BOUNDS, ffe_ctl->found_offset);
>  			btrfs_add_free_space_unused(block_group,
>  					ffe_ctl->found_offset,
>  					ffe_ctl->search_start - ffe_ctl->found_offset);
> +		}
>  
>  		ret = btrfs_add_reserved_bytes(block_group, ffe_ctl->ram_bytes,
>  					       ffe_ctl->num_bytes,
>  					       ffe_ctl->delalloc,
>  					       ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS);
>  		if (ret == -EAGAIN) {
> +			trace_btrfs_find_free_extent_skip_block_group(root, ffe_ctl, block_group,
> +							  FFE_SKIP_BG_ADD_RESERVED_FAILED, -EAGAIN);
>  			btrfs_add_free_space_unused(block_group,
>  					ffe_ctl->found_offset,
>  					ffe_ctl->num_bytes);
> diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
> index e970ac42a871..4f1dc077b7c9 100644
> --- a/fs/btrfs/extent-tree.h
> +++ b/fs/btrfs/extent-tree.h
> @@ -23,6 +23,23 @@ enum btrfs_extent_allocation_policy {
>  	BTRFS_EXTENT_ALLOC_ZONED,
>  };
>  
> +/*
> + * Enum for find_free_extent skip reasons used in trace events.
> + * Each enum corresponds to a specific unhappy path in the allocator.
> + */
> +enum {
> +	FFE_SKIP_BG_REMOVING,
> +	FFE_SKIP_BG_READ_ONLY,
> +	FFE_SKIP_BG_WRONG_SPACE_INFO,
> +	FFE_SKIP_BG_WRONG_FLAGS,
> +	FFE_SKIP_BG_NOT_CACHED,
> +	FFE_SKIP_BG_CACHE_ERROR,
> +	FFE_SKIP_BG_SIZE_CLASS_MISMATCH,
> +	FFE_SKIP_BG_DO_ALLOCATION_FAILED,
> +	FFE_SKIP_BG_SEARCH_BOUNDS,
> +	FFE_SKIP_BG_ADD_RESERVED_FAILED,
> +};
> +
>  struct find_free_extent_ctl {
>  	/* Basic allocation info */
>  	u64 ram_bytes;
> diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
> index 7e418f065b94..72aa250983d4 100644
> --- a/include/trace/events/btrfs.h
> +++ b/include/trace/events/btrfs.h
> @@ -103,6 +103,19 @@ struct find_free_extent_ctl;
>  	EM( COMMIT_TRANS,		"COMMIT_TRANS")			\
>  	EMe(RESET_ZONES,		"RESET_ZONES")
>  
> +#define FIND_FREE_EXTENT_SKIP_REASONS						\
> +	EM( FFE_SKIP_BG_REMOVING,		"BG_REMOVING")			\
> +	EM( FFE_SKIP_BG_READ_ONLY,		"BG_READ_ONLY")			\
> +	EM( FFE_SKIP_BG_WRONG_SPACE_INFO,	"BG_WRONG_SPACE_INFO")		\
> +	EM( FFE_SKIP_BG_WRONG_FLAGS,		"BG_WRONG_FLAGS")		\
> +	EM( FFE_SKIP_BG_NOT_CACHED,		"BG_NOT_CACHED")		\
> +	EM( FFE_SKIP_BG_CACHE_ERROR,		"BG_CACHE_ERROR")		\
> +	EM( FFE_SKIP_BG_SIZE_CLASS_MISMATCH,	"BG_SIZE_CLASS_MISMATCH")	\
> +	EM( FFE_SKIP_BG_DO_ALLOCATION_FAILED,	"BG_DO_ALLOCATION_FAILED")	\
> +	EM( FFE_SKIP_BG_SEARCH_BOUNDS,		"BG_SEARCH_BOUNDS")		\
> +	EMe(FFE_SKIP_BG_ADD_RESERVED_FAILED,	"BG_ADD_RESERVED_FAILED")
> +
> +
>  /*
>   * First define the enums in the above macros to be exported to userspace via
>   * TRACE_DEFINE_ENUM().
> @@ -118,6 +131,7 @@ FI_TYPES
>  QGROUP_RSV_TYPES
>  IO_TREE_OWNER
>  FLUSH_STATES
> +FIND_FREE_EXTENT_SKIP_REASONS
>  
>  /*
>   * Now redefine the EM and EMe macros to map the enums to the strings that will
> @@ -1388,6 +1402,58 @@ DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent_cluster,
>  	TP_ARGS(block_group, ffe_ctl)
>  );
>  
> +TRACE_EVENT(btrfs_find_free_extent_skip_block_group,
> +
> +	TP_PROTO(const struct btrfs_root *root,
> +		 const struct find_free_extent_ctl *ffe_ctl,
> +		 const struct btrfs_block_group *block_group,
> +		 int reason,
> +		 s64 error_or_value),
> +
> +	TP_ARGS(root, ffe_ctl, block_group, reason, error_or_value),
> +
> +	TP_STRUCT__entry_btrfs(
> +		__field(	u64,	root_objectid		)
> +		__field(	u64,	num_bytes		)
> +		__field(	u64,	empty_size		)
> +		__field(	u64,	flags			)
> +		__field(	int,	loop			)
> +		__field(	bool,	hinted			)
> +		__field(	int,	size_class		)
> +		__field(	u64,	bg_start		)
> +		__field(	u64,	bg_length		)
> +		__field(	u64,	bg_flags		)
> +		__field(	int,	reason			)
> +		__field(	s64,	error_or_value		)
> +	),
> +
> +	TP_fast_assign_btrfs(root->fs_info,
> +		__entry->root_objectid	= btrfs_root_id(root);
> +		__entry->num_bytes	= ffe_ctl->num_bytes;
> +		__entry->empty_size	= ffe_ctl->empty_size;
> +		__entry->flags		= ffe_ctl->flags;
> +		__entry->loop		= ffe_ctl->loop;
> +		__entry->hinted		= ffe_ctl->hinted;
> +		__entry->size_class	= ffe_ctl->size_class;
> +		__entry->bg_start	= block_group ? block_group->start : 0;
> +		__entry->bg_length	= block_group ? block_group->length : 0;
> +		__entry->bg_flags	= block_group ? block_group->flags : 0;
> +		__entry->reason		= reason;
> +		__entry->error_or_value	= error_or_value;
> +	),
> +
> +	TP_printk_btrfs(
> +"root=%llu(%s) len=%llu empty_size=%llu flags=%llu(%s) loop=%d hinted=%d size_class=%d bg=[%llu+%llu] bg_flags=%llu(%s) reason=%s error_or_value=%lld",
> +		  show_root_type(__entry->root_objectid),
> +		  __entry->num_bytes, __entry->empty_size, __entry->flags,
> +		  __print_flags((unsigned long)__entry->flags, "|", BTRFS_GROUP_FLAGS),
> +		  __entry->loop, __entry->hinted, __entry->size_class,
> +		  __entry->bg_start, __entry->bg_length, __entry->bg_flags,
> +		  __print_flags((unsigned long)__entry->bg_flags, "|", BTRFS_GROUP_FLAGS),
> +		  __print_symbolic(__entry->reason, FIND_FREE_EXTENT_SKIP_REASONS),
> +		  __entry->error_or_value)
> +);
> +
>  TRACE_EVENT(btrfs_find_cluster,
>  
>  	TP_PROTO(const struct btrfs_block_group *block_group, u64 start,
> -- 
> 2.47.3
> 

  reply	other threads:[~2025-10-15  3:29 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-03 23:41 [PATCH v2 0/3] btrfs: find_free_extent cleanups Leo Martins
2025-10-03 23:41 ` [PATCH v2 1/3] btrfs: remove ffe RAID loop Leo Martins
2025-10-15  3:29   ` Boris Burkov
2025-10-03 23:41 ` [PATCH v2 2/3] btrfs: add tracing for find_free_extent skip conditions Leo Martins
2025-10-15  3:28   ` Boris Burkov [this message]
2025-10-03 23:41 ` [PATCH v2 3/3] fstests: btrfs: test RAID conversions under stress Leo Martins
2025-10-04  1:54   ` Qu Wenruo
2025-10-06 17:37     ` Leo Martins
2025-10-06 18:16       ` David Sterba
2025-10-15  3:31   ` Boris Burkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251015032817.GA1702774@zen.localdomain \
    --to=boris@bur.io \
    --cc=fstests@vger.kernel.org \
    --cc=kernel-team@fb.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=loemra.dev@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox