Re: [PATCH bpf v3 3/3] bpf: Cache build IDs in sleepable stackmap path

BPF List
 help / color / mirror / Atom feed

From: Ihor Solodrai <ihor.solodrai@linux.dev>
To: Mykyta Yatsenko <mykyta.yatsenko5@gmail.com>,
	Alexei Starovoitov <ast@kernel.org>,
	Andrii Nakryiko <andrii@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Eduard Zingerman <eddyz87@gmail.com>,
	Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Puranjay Mohan <puranjay@kernel.org>,
	Shakeel Butt <shakeel.butt@linux.dev>,
	bpf@vger.kernel.org, linux-kernel@vger.kernel.org,
	kernel-team@meta.com
Subject: Re: [PATCH bpf v3 3/3] bpf: Cache build IDs in sleepable stackmap path
Date: Thu, 14 May 2026 10:15:45 -0700	[thread overview]
Message-ID: <f35f3133-9f1e-4118-8d58-eb29f69f8940@linux.dev> (raw)
In-Reply-To: <6b4e0918-79a5-49ae-a172-c7d6df558dec@gmail.com>

On 5/14/26 7:58 AM, Mykyta Yatsenko wrote:
> 
> 
> On 5/12/26 4:29 AM, Ihor Solodrai wrote:
>> Stack traces often contain adjacent IPs from the same VMA or from
>> different VMAs backed by the same ELF file. Cache the last successfully
>> parsed build ID together with the resolved VMA range and backing file
>> so the sleepable build-ID path can avoid repeated VMA locking and file
>> parsing in common cases.
>>
>> Suggested-by: Mykyta Yatsenko <mykyta.yatsenko5@gmail.com>
>> Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
>> ---
>>  kernel/bpf/stackmap.c | 51 ++++++++++++++++++++++++++++++++++++++++---
>>  1 file changed, 48 insertions(+), 3 deletions(-)
>>
>> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
>> index c1e96df360c3..318ce9ed0dd5 100644
>> --- a/kernel/bpf/stackmap.c
>> +++ b/kernel/bpf/stackmap.c
>> @@ -226,13 +226,34 @@ static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *i
>>  		.vma = NULL,
>>  		.mm = mm,
>>  	};
>> -	unsigned long vm_pgoff, vm_start;
>> +	struct {
>> +		struct file *file;
>> +		const char *build_id;
>> +		unsigned long vm_start;
>> +		unsigned long vm_end;
>> +		unsigned long vm_pgoff;
>> +	} cache = {};
>> +	unsigned long vm_pgoff, vm_start, vm_end;
>>  	struct vm_area_struct *vma;
>>  	struct file *file;
>>  	u64 ip;
>>  
>>  	for (u32 i = 0; i < trace_nr; i++) {
>>  		ip = READ_ONCE(id_offs[i].ip);
>> +
>> +		/*
>> +		 * Range cache fast path: if ip falls within the previously
>> +		 * resolved VMA range, reuse the cache build_id without
>> +		 * re-acquiring the VMA lock.
>> +		 */
>> +		if (cache.build_id && ip >= cache.vm_start && ip < cache.vm_end) {
>> +			memcpy(id_offs[i].build_id, cache.build_id, BUILD_ID_SIZE_MAX);
>> +			vm_start = cache.vm_start;
>> +			vm_end = cache.vm_end;
>> +			vm_pgoff = cache.vm_pgoff;
>> +			goto build_id_valid;
>> +		}
>> +
>>  		vma = stack_map_lock_vma(&lock, ip);
>>  		if (!vma || !vma->vm_file) {
>>  			stack_map_build_id_set_ip(&id_offs[i]);
>> @@ -240,9 +261,22 @@ static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *i
>>  			continue;
>>  		}
>>  
>> -		file = get_file(vma->vm_file);
>> +		file = vma->vm_file;
>>  		vm_pgoff = vma->vm_pgoff;
>>  		vm_start = vma->vm_start;
>> +		vm_end = vma->vm_end;
>> +
>> +		if (file == cache.file) {
>> +			/*
>> +			 * Same backing file as previous (e.g. different VMAs
>> +			 * of the same ELF binary). Reuse the cache build_id.
>> +			 */
>> +			memcpy(id_offs[i].build_id, cache.build_id, BUILD_ID_SIZE_MAX);
>> +			stack_map_unlock_vma(&lock);
>> +			goto build_id_valid;
>> +		}
>> +
>> +		file = get_file(file);
>>  		stack_map_unlock_vma(&lock);
>>  
>>  		/* build_id_parse_file() may block on filesystem reads */
>> @@ -251,11 +285,22 @@ static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *i
>>  			fput(file);
>>  			continue;
>>  		}
>> -		fput(file);
>>  
>> +		if (cache.file)
>> +			fput(cache.file);
>> +		cache.file = file;
>> +		cache.build_id = id_offs[i].build_id;
>> +
>> +build_id_valid:
>> +		cache.vm_start = vm_start;
>> +		cache.vm_end = vm_end;
>> +		cache.vm_pgoff = vm_pgoff;
> 
> nit: Maybe we can also put 
> if (cache.build_id != id_offs[i])
> 	memcpy(id_offs[i].build_id, cache.build_id, BUILD_ID_SIZE_MAX); 
> here? So all writes to the id_offs[i] are at the same place for clearer structure?

Looks reasonable to me, will do this in v4.

> 
> Overall it looks correct. Did you try to run it to see if we hit this cache at all?

I vibe coded a bit of instrumentation to check. Works on a trivial example.
I think real-world deep stacks should have a high cache hit rate here.


diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 318ce9ed0dd5..be3c41b2c814 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -10,9 +10,19 @@
 #include <linux/btf_ids.h>
 #include <linux/buildid.h>
 #include <linux/mmap_lock.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 #include "percpu_freelist.h"
 #include "mmap_unlock_work.h"
 
+#define DEBUG_BUILD_ID_CACHE
+
+#ifdef DEBUG_BUILD_ID_CACHE
+static atomic64_t bid_cache_range_hits;
+static atomic64_t bid_cache_file_hits;
+static atomic64_t bid_cache_misses;
+#endif
+
 #define STACK_CREATE_FLAG_MASK					\
 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY |	\
 	 BPF_F_STACK_BUILD_ID)
@@ -247,6 +257,9 @@ static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *i
 		 * re-acquiring the VMA lock.
 		 */
 		if (cache.build_id && ip >= cache.vm_start && ip < cache.vm_end) {
+#ifdef DEBUG_BUILD_ID_CACHE
+			atomic64_inc(&bid_cache_range_hits);
+#endif
 			memcpy(id_offs[i].build_id, cache.build_id, BUILD_ID_SIZE_MAX);
 			vm_start = cache.vm_start;
 			vm_end = cache.vm_end;
@@ -271,6 +284,9 @@ static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *i
 			 * Same backing file as previous (e.g. different VMAs
 			 * of the same ELF binary). Reuse the cache build_id.
 			 */
+#ifdef DEBUG_BUILD_ID_CACHE
+			atomic64_inc(&bid_cache_file_hits);
+#endif
 			memcpy(id_offs[i].build_id, cache.build_id, BUILD_ID_SIZE_MAX);
 			stack_map_unlock_vma(&lock);
 			goto build_id_valid;
@@ -279,6 +295,9 @@ static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *i
 		file = get_file(file);
 		stack_map_unlock_vma(&lock);
 
+#ifdef DEBUG_BUILD_ID_CACHE
+		atomic64_inc(&bid_cache_misses);
+#endif
 		/* build_id_parse_file() may block on filesystem reads */
 		if (build_id_parse_file(file, id_offs[i].build_id, NULL)) {
 			stack_map_build_id_set_ip(&id_offs[i]);
@@ -943,3 +962,23 @@ const struct bpf_map_ops stack_trace_map_ops = {
 	.map_mem_usage = stack_map_mem_usage,
 	.map_btf_id = &stack_trace_map_btf_ids[0],
 };
+
+#ifdef DEBUG_BUILD_ID_CACHE
+static int bid_cache_stats_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "range_hits %lld\nfile_hits %lld\nmisses %lld\n",
+		   (long long)atomic64_read(&bid_cache_range_hits),
+		   (long long)atomic64_read(&bid_cache_file_hits),
+		   (long long)atomic64_read(&bid_cache_misses));
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(bid_cache_stats);
+
+static int __init bid_cache_debugfs_init(void)
+{
+	debugfs_create_file("bpf_stackmap_buildid_cache", 0444, NULL, NULL,
+			    &bid_cache_stats_fops);
+	return 0;
+}
+late_initcall(bid_cache_debugfs_init);
+#endif /* DEBUG_BUILD_ID_CACHE */
diff --git a/tools/testing/selftests/bpf/uprobe_multi.c b/tools/testing/selftests/bpf/uprobe_multi.c
index 3e58a86b8e25..e475298da6d4 100644
--- a/tools/testing/selftests/bpf/uprobe_multi.c
+++ b/tools/testing/selftests/bpf/uprobe_multi.c
@@ -95,6 +95,23 @@ static int usdt(void)
 extern char build_id_start[];
 extern char build_id_end[];
 
+/* Deep-recursion driver to give the sleepable build_id cache a stack with
+ * many adjacent frames in the same VMA. Used by uprobe-deep mode.
+ */
+static int __attribute__((noinline)) recurse(int depth)
+{
+	if (depth <= 0) {
+		(void)uprobe();
+		return 0;
+	}
+	return recurse(depth - 1) + 1;
+}
+
+static int trigger_uprobe_deep(int depth)
+{
+	return recurse(depth);
+}
+
 int __attribute__((weak)) trigger_uprobe(bool build_id_resident)
 {
 	int page_sz = sysconf(_SC_PAGESIZE);
@@ -142,6 +159,8 @@ int main(int argc, char **argv)
 		return trigger_uprobe(false /* page-out build ID */);
 	if (!strcmp("uprobe-paged-in", argv[1]))
 		return trigger_uprobe(true /* page-in build ID */);
+	if (!strcmp("uprobe-deep", argv[1]))
+		return trigger_uprobe_deep(32);
 
 error:
 	fprintf(stderr, "usage: %s <bench|usdt>\n", argv[0]);



> Acked-by: Mykyta Yatsenko <yatsenko@meta.com>
> 
>>  		id_offs[i].offset = (vm_pgoff << PAGE_SHIFT) + ip - vm_start;
>>  		id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
>>  	}
>> +
>> +	if (cache.file)
>> +		fput(cache.file);
>>  }
>>  
>>  /*
>

     prev parent reply	other threads:[~2026-05-14 17:15 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-12  3:29 [PATCH bpf v3 0/3] bpf: Implement stack_map_get_build_id_offset_sleepable() Ihor Solodrai
2026-05-12  3:29 ` [PATCH bpf v3 1/3] bpf: Factor out stack_map_build_id_set_ip() in stackmap.c Ihor Solodrai
2026-05-12  3:29 ` [PATCH bpf v3 2/3] bpf: Avoid faultable build ID reads under mm locks Ihor Solodrai
2026-05-12  4:16   ` bot+bpf-ci
2026-05-12 17:09     ` Ihor Solodrai
2026-05-13  4:03   ` sashiko-bot
2026-05-13 17:19     ` Ihor Solodrai
2026-05-12  3:29 ` [PATCH bpf v3 3/3] bpf: Cache build IDs in sleepable stackmap path Ihor Solodrai
2026-05-13  4:31   ` sashiko-bot
2026-05-14 14:58   ` Mykyta Yatsenko
2026-05-14 17:15     ` Ihor Solodrai [this message]

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:318ce9ed0dd dfblob:be3c41b2c81 dfblob:3e58a86b8e2
dfblob:e475298da6d )
 OR (
bs:"Re: [PATCH bpf v3 3/3] bpf: Cache build IDs in sleepable stackmap path" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f35f3133-9f1e-4118-8d58-eb29f69f8940@linux.dev \
    --to=ihor.solodrai@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=eddyz87@gmail.com \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=memxor@gmail.com \
    --cc=mykyta.yatsenko5@gmail.com \
    --cc=puranjay@kernel.org \
    --cc=shakeel.butt@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox