Re: [PATCH bpf v3 3/3] bpf: Cache build IDs in sleepable stackmap path

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Ihor Solodrai <ihor.solodrai@linux.dev>
To: Mykyta Yatsenko <mykyta.yatsenko5@gmail.com>,
	Alexei Starovoitov <ast@kernel.org>,
	Andrii Nakryiko <andrii@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Eduard Zingerman <eddyz87@gmail.com>,
	Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Puranjay Mohan <puranjay@kernel.org>,
	Shakeel Butt <shakeel.butt@linux.dev>,
	bpf@vger.kernel.org, linux-kernel@vger.kernel.org,
	kernel-team@meta.com
Subject: Re: [PATCH bpf v3 3/3] bpf: Cache build IDs in sleepable stackmap path
Date: Thu, 14 May 2026 10:15:45 -0700	[thread overview]
Message-ID: <f35f3133-9f1e-4118-8d58-eb29f69f8940@linux.dev> (raw)
In-Reply-To: <6b4e0918-79a5-49ae-a172-c7d6df558dec@gmail.com>

On 5/14/26 7:58 AM, Mykyta Yatsenko wrote:
> 
> 
> On 5/12/26 4:29 AM, Ihor Solodrai wrote:
>> Stack traces often contain adjacent IPs from the same VMA or from
>> different VMAs backed by the same ELF file. Cache the last successfully
>> parsed build ID together with the resolved VMA range and backing file
>> so the sleepable build-ID path can avoid repeated VMA locking and file
>> parsing in common cases.
>>
>> Suggested-by: Mykyta Yatsenko <mykyta.yatsenko5@gmail.com>
>> Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
>> ---
>>  kernel/bpf/stackmap.c | 51 ++++++++++++++++++++++++++++++++++++++++---
>>  1 file changed, 48 insertions(+), 3 deletions(-)
>>
>> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
>> index c1e96df360c3..318ce9ed0dd5 100644
>> --- a/kernel/bpf/stackmap.c
>> +++ b/kernel/bpf/stackmap.c
>> @@ -226,13 +226,34 @@ static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *i
>>  		.vma = NULL,
>>  		.mm = mm,
>>  	};
>> -	unsigned long vm_pgoff, vm_start;
>> +	struct {
>> +		struct file *file;
>> +		const char *build_id;
>> +		unsigned long vm_start;
>> +		unsigned long vm_end;
>> +		unsigned long vm_pgoff;
>> +	} cache = {};
>> +	unsigned long vm_pgoff, vm_start, vm_end;
>>  	struct vm_area_struct *vma;
>>  	struct file *file;
>>  	u64 ip;
>>  
>>  	for (u32 i = 0; i < trace_nr; i++) {
>>  		ip = READ_ONCE(id_offs[i].ip);
>> +
>> +		/*
>> +		 * Range cache fast path: if ip falls within the previously
>> +		 * resolved VMA range, reuse the cache build_id without
>> +		 * re-acquiring the VMA lock.
>> +		 */
>> +		if (cache.build_id && ip >= cache.vm_start && ip < cache.vm_end) {
>> +			memcpy(id_offs[i].build_id, cache.build_id, BUILD_ID_SIZE_MAX);
>> +			vm_start = cache.vm_start;
>> +			vm_end = cache.vm_end;
>> +			vm_pgoff = cache.vm_pgoff;
>> +			goto build_id_valid;
>> +		}
>> +
>>  		vma = stack_map_lock_vma(&lock, ip);
>>  		if (!vma || !vma->vm_file) {
>>  			stack_map_build_id_set_ip(&id_offs[i]);
>> @@ -240,9 +261,22 @@ static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *i
>>  			continue;
>>  		}
>>  
>> -		file = get_file(vma->vm_file);
>> +		file = vma->vm_file;
>>  		vm_pgoff = vma->vm_pgoff;
>>  		vm_start = vma->vm_start;
>> +		vm_end = vma->vm_end;
>> +
>> +		if (file == cache.file) {
>> +			/*
>> +			 * Same backing file as previous (e.g. different VMAs
>> +			 * of the same ELF binary). Reuse the cache build_id.
>> +			 */
>> +			memcpy(id_offs[i].build_id, cache.build_id, BUILD_ID_SIZE_MAX);
>> +			stack_map_unlock_vma(&lock);
>> +			goto build_id_valid;
>> +		}
>> +
>> +		file = get_file(file);
>>  		stack_map_unlock_vma(&lock);
>>  
>>  		/* build_id_parse_file() may block on filesystem reads */
>> @@ -251,11 +285,22 @@ static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *i
>>  			fput(file);
>>  			continue;
>>  		}
>> -		fput(file);
>>  
>> +		if (cache.file)
>> +			fput(cache.file);
>> +		cache.file = file;
>> +		cache.build_id = id_offs[i].build_id;
>> +
>> +build_id_valid:
>> +		cache.vm_start = vm_start;
>> +		cache.vm_end = vm_end;
>> +		cache.vm_pgoff = vm_pgoff;
> 
> nit: Maybe we can also put 
> if (cache.build_id != id_offs[i])
> 	memcpy(id_offs[i].build_id, cache.build_id, BUILD_ID_SIZE_MAX); 
> here? So all writes to the id_offs[i] are at the same place for clearer structure?

Looks reasonable to me, will do this in v4.

> 
> Overall it looks correct. Did you try to run it to see if we hit this cache at all?

I vibe coded a bit of instrumentation to check. Works on a trivial example.
I think real-world deep stacks should have a high cache hit rate here.


diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 318ce9ed0dd5..be3c41b2c814 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -10,9 +10,19 @@
 #include <linux/btf_ids.h>
 #include <linux/buildid.h>
 #include <linux/mmap_lock.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 #include "percpu_freelist.h"
 #include "mmap_unlock_work.h"
 
+#define DEBUG_BUILD_ID_CACHE
+
+#ifdef DEBUG_BUILD_ID_CACHE
+static atomic64_t bid_cache_range_hits;
+static atomic64_t bid_cache_file_hits;
+static atomic64_t bid_cache_misses;
+#endif
+
 #define STACK_CREATE_FLAG_MASK					\
 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY |	\
 	 BPF_F_STACK_BUILD_ID)
@@ -247,6 +257,9 @@ static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *i
 		 * re-acquiring the VMA lock.
 		 */
 		if (cache.build_id && ip >= cache.vm_start && ip < cache.vm_end) {
+#ifdef DEBUG_BUILD_ID_CACHE
+			atomic64_inc(&bid_cache_range_hits);
+#endif
 			memcpy(id_offs[i].build_id, cache.build_id, BUILD_ID_SIZE_MAX);
 			vm_start = cache.vm_start;
 			vm_end = cache.vm_end;
@@ -271,6 +284,9 @@ static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *i
 			 * Same backing file as previous (e.g. different VMAs
 			 * of the same ELF binary). Reuse the cache build_id.
 			 */
+#ifdef DEBUG_BUILD_ID_CACHE
+			atomic64_inc(&bid_cache_file_hits);
+#endif
 			memcpy(id_offs[i].build_id, cache.build_id, BUILD_ID_SIZE_MAX);
 			stack_map_unlock_vma(&lock);
 			goto build_id_valid;
@@ -279,6 +295,9 @@ static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *i
 		file = get_file(file);
 		stack_map_unlock_vma(&lock);
 
+#ifdef DEBUG_BUILD_ID_CACHE
+		atomic64_inc(&bid_cache_misses);
+#endif
 		/* build_id_parse_file() may block on filesystem reads */
 		if (build_id_parse_file(file, id_offs[i].build_id, NULL)) {
 			stack_map_build_id_set_ip(&id_offs[i]);
@@ -943,3 +962,23 @@ const struct bpf_map_ops stack_trace_map_ops = {
 	.map_mem_usage = stack_map_mem_usage,
 	.map_btf_id = &stack_trace_map_btf_ids[0],
 };
+
+#ifdef DEBUG_BUILD_ID_CACHE
+static int bid_cache_stats_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "range_hits %lld\nfile_hits %lld\nmisses %lld\n",
+		   (long long)atomic64_read(&bid_cache_range_hits),
+		   (long long)atomic64_read(&bid_cache_file_hits),
+		   (long long)atomic64_read(&bid_cache_misses));
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(bid_cache_stats);
+
+static int __init bid_cache_debugfs_init(void)
+{
+	debugfs_create_file("bpf_stackmap_buildid_cache", 0444, NULL, NULL,
+			    &bid_cache_stats_fops);
+	return 0;
+}
+late_initcall(bid_cache_debugfs_init);
+#endif /* DEBUG_BUILD_ID_CACHE */
diff --git a/tools/testing/selftests/bpf/uprobe_multi.c b/tools/testing/selftests/bpf/uprobe_multi.c
index 3e58a86b8e25..e475298da6d4 100644
--- a/tools/testing/selftests/bpf/uprobe_multi.c
+++ b/tools/testing/selftests/bpf/uprobe_multi.c
@@ -95,6 +95,23 @@ static int usdt(void)
 extern char build_id_start[];
 extern char build_id_end[];
 
+/* Deep-recursion driver to give the sleepable build_id cache a stack with
+ * many adjacent frames in the same VMA. Used by uprobe-deep mode.
+ */
+static int __attribute__((noinline)) recurse(int depth)
+{
+	if (depth <= 0) {
+		(void)uprobe();
+		return 0;
+	}
+	return recurse(depth - 1) + 1;
+}
+
+static int trigger_uprobe_deep(int depth)
+{
+	return recurse(depth);
+}
+
 int __attribute__((weak)) trigger_uprobe(bool build_id_resident)
 {
 	int page_sz = sysconf(_SC_PAGESIZE);
@@ -142,6 +159,8 @@ int main(int argc, char **argv)
 		return trigger_uprobe(false /* page-out build ID */);
 	if (!strcmp("uprobe-paged-in", argv[1]))
 		return trigger_uprobe(true /* page-in build ID */);
+	if (!strcmp("uprobe-deep", argv[1]))
+		return trigger_uprobe_deep(32);
 
 error:
 	fprintf(stderr, "usage: %s <bench|usdt>\n", argv[0]);



> Acked-by: Mykyta Yatsenko <yatsenko@meta.com>
> 
>>  		id_offs[i].offset = (vm_pgoff << PAGE_SHIFT) + ip - vm_start;
>>  		id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
>>  	}
>> +
>> +	if (cache.file)
>> +		fput(cache.file);
>>  }
>>  
>>  /*
>

     prev parent reply	other threads:[~2026-05-14 17:15 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-12  3:29 [PATCH bpf v3 0/3] bpf: Implement stack_map_get_build_id_offset_sleepable() Ihor Solodrai
2026-05-12  3:29 ` [PATCH bpf v3 1/3] bpf: Factor out stack_map_build_id_set_ip() in stackmap.c Ihor Solodrai
2026-05-12  3:29 ` [PATCH bpf v3 2/3] bpf: Avoid faultable build ID reads under mm locks Ihor Solodrai
2026-05-12  4:16   ` bot+bpf-ci
2026-05-12 17:09     ` Ihor Solodrai
2026-05-13  4:03   ` sashiko-bot
2026-05-13 17:19     ` Ihor Solodrai
2026-05-12  3:29 ` [PATCH bpf v3 3/3] bpf: Cache build IDs in sleepable stackmap path Ihor Solodrai
2026-05-13  4:31   ` sashiko-bot
2026-05-14 14:58   ` Mykyta Yatsenko
2026-05-14 17:15     ` Ihor Solodrai [this message]

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:318ce9ed0dd dfblob:be3c41b2c81 dfblob:3e58a86b8e2
dfblob:e475298da6d )
 OR (
bs:"Re: [PATCH bpf v3 3/3] bpf: Cache build IDs in sleepable stackmap path" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f35f3133-9f1e-4118-8d58-eb29f69f8940@linux.dev \
    --to=ihor.solodrai@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=eddyz87@gmail.com \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=memxor@gmail.com \
    --cc=mykyta.yatsenko5@gmail.com \
    --cc=puranjay@kernel.org \
    --cc=shakeel.butt@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.