From: Ihor Solodrai <ihor.solodrai@linux.dev>
To: Alexei Starovoitov <ast@kernel.org>,
Andrii Nakryiko <andrii@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Eduard Zingerman <eddyz87@gmail.com>,
Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Puranjay Mohan <puranjay@kernel.org>,
Shakeel Butt <shakeel.butt@linux.dev>,
Mykyta Yatsenko <mykyta.yatsenko5@gmail.com>,
bpf@vger.kernel.org, linux-kernel@vger.kernel.org,
kernel-team@meta.com
Subject: [PATCH bpf-next v6 2/3] bpf: Avoid faultable build ID reads under mm locks
Date: Thu, 21 May 2026 15:50:21 -0700 [thread overview]
Message-ID: <20260521225022.2695755-3-ihor.solodrai@linux.dev> (raw)
In-Reply-To: <20260521225022.2695755-1-ihor.solodrai@linux.dev>
Sleepable build ID parsing can block in __kernel_read() [1], so the
stackmap sleepable path must not call it while holding mmap_lock or a
per-VMA read lock.
The issue and the fix are conceptually similar to a recent procfs
patch [2].
Resolve each covered VMA with a stable read-side reference, preferring
lock_vma_under_rcu() and falling back to mmap_read_trylock() only long
enough to acquire the VMA read lock. Take a reference to the backing
file, drop the VMA lock, and then parse the build ID through
(sleepable) build_id_parse_file().
We have to use mmap_read_trylock() (and give up on failure) in this
context because taking mmap_read_lock() is generally unsafe on code
paths reachable from BPF programs [3], and may lead to deadlocks.
[1] https://lore.kernel.org/all/20251218005818.614819-1-shakeel.butt@linux.dev/
[2] https://lore.kernel.org/all/20260128183232.2854138-1-andrii@kernel.org/
[3] https://lore.kernel.org/bpf/2895ecd8-df1e-4cc0-b9f9-aef893dc2360@linux.dev/
Fixes: d4dd9775ec24 ("bpf: wire up sleepable bpf_get_stack() and bpf_get_task_stack() helpers")
Suggested-by: Puranjay Mohan <puranjay@kernel.org>
Signed-off-by: Ihor Solodrai <ihor.solodrai@linux.dev>
---
kernel/bpf/stackmap.c | 107 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 107 insertions(+)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 4c753e02c415..95336c0e8b56 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -9,6 +9,7 @@
#include <linux/perf_event.h>
#include <linux/btf_ids.h>
#include <linux/buildid.h>
+#include <linux/mmap_lock.h>
#include "percpu_freelist.h"
#include "mmap_unlock_work.h"
@@ -174,6 +175,107 @@ static inline void stack_map_build_id_set_valid(struct bpf_stack_build_id *id,
memcpy(id->build_id, build_id, BUILD_ID_SIZE_MAX);
}
+struct stack_map_vma_lock {
+ bool vma_locked;
+ struct vm_area_struct *vma;
+ struct mm_struct *mm;
+};
+
+static struct vm_area_struct *stack_map_lock_vma(struct stack_map_vma_lock *lock, unsigned long ip)
+{
+ struct mm_struct *mm = lock->mm;
+ struct vm_area_struct *vma;
+
+ if (WARN_ON_ONCE(!mm))
+ return NULL;
+
+ vma = lock_vma_under_rcu(mm, ip);
+ if (vma)
+ goto vma_locked;
+
+ /*
+ * Taking mmap_read_lock() is unsafe here, because the caller
+ * BPF program might already hold it, causing a deadlock.
+ */
+ if (!mmap_read_trylock(mm))
+ return NULL;
+
+ vma = vma_lookup(mm, ip);
+ if (!vma) {
+ mmap_read_unlock(mm);
+ return NULL;
+ }
+
+#ifdef CONFIG_PER_VMA_LOCK
+ if (!vma_start_read_locked(vma)) {
+ mmap_read_unlock(mm);
+ return NULL;
+ }
+ mmap_read_unlock(mm);
+#else
+ mmap_read_unlock(mm);
+ return NULL;
+#endif
+vma_locked:
+ lock->vma_locked = true;
+ lock->vma = vma;
+ return vma;
+}
+
+static void stack_map_unlock_vma(struct stack_map_vma_lock *lock)
+{
+ struct vm_area_struct *vma = lock->vma;
+
+ if (lock->vma_locked) {
+ if (WARN_ON_ONCE(!vma))
+ goto out;
+ vma_end_read(vma);
+ }
+out:
+ lock->vma_locked = false;
+ lock->vma = NULL;
+}
+
+static void stack_map_get_build_id_offset_sleepable(struct bpf_stack_build_id *id_offs,
+ u32 trace_nr)
+{
+ struct mm_struct *mm = current->mm;
+ struct stack_map_vma_lock lock = {
+ .vma_locked = false,
+ .vma = NULL,
+ .mm = mm,
+ };
+ struct vm_area_struct *vma;
+ struct file *file;
+ u64 offset;
+ u64 ip;
+
+ for (u32 i = 0; i < trace_nr; i++) {
+ ip = READ_ONCE(id_offs[i].ip);
+
+ vma = stack_map_lock_vma(&lock, ip);
+ if (!vma || !vma->vm_file) {
+ stack_map_build_id_set_ip(&id_offs[i]);
+ stack_map_unlock_vma(&lock);
+ continue;
+ }
+
+ file = get_file(vma->vm_file);
+ offset = stack_map_build_id_offset(vma->vm_pgoff, vma->vm_start, ip);
+ stack_map_unlock_vma(&lock);
+
+ /* build_id_parse_file() may block on filesystem reads */
+ if (build_id_parse_file(file, id_offs[i].build_id, NULL)) {
+ stack_map_build_id_set_ip(&id_offs[i]);
+ fput(file);
+ continue;
+ }
+ fput(file);
+
+ stack_map_build_id_set_valid(&id_offs[i], offset, id_offs[i].build_id);
+ }
+}
+
/*
* Expects all id_offs[i].ip values to be set to correct initial IPs.
* They will be subsequently:
@@ -194,6 +296,11 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
const unsigned char *prev_build_id;
int i;
+ if (may_fault && has_user_ctx) {
+ stack_map_get_build_id_offset_sleepable(id_offs, trace_nr);
+ return;
+ }
+
/* If the irq_work is in use, fall back to report ips. Same
* fallback is used for kernel stack (!user) on a stackmap with
* build_id.
--
2.54.0
next prev parent reply other threads:[~2026-05-21 22:51 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-21 22:50 [PATCH bpf-next v6 0/3] bpf: Implement stack_map_get_build_id_offset_sleepable() Ihor Solodrai
2026-05-21 22:50 ` [PATCH bpf-next v6 1/3] bpf: Factor out stack_map build ID helpers Ihor Solodrai
2026-05-21 23:16 ` sashiko-bot
2026-05-21 23:32 ` Ihor Solodrai
2026-05-22 17:17 ` Andrii Nakryiko
2026-05-22 17:16 ` Andrii Nakryiko
2026-05-22 17:33 ` Ihor Solodrai
2026-05-22 17:50 ` Andrii Nakryiko
2026-05-21 22:50 ` Ihor Solodrai [this message]
2026-05-21 23:33 ` [PATCH bpf-next v6 2/3] bpf: Avoid faultable build ID reads under mm locks bot+bpf-ci
2026-05-21 23:41 ` sashiko-bot
2026-05-22 17:42 ` Andrii Nakryiko
2026-05-22 18:04 ` Ihor Solodrai
2026-05-22 18:14 ` Andrii Nakryiko
2026-05-21 22:50 ` [PATCH bpf-next v6 3/3] bpf: Cache build IDs in sleepable stackmap path Ihor Solodrai
2026-05-21 23:33 ` bot+bpf-ci
2026-05-22 0:13 ` sashiko-bot
2026-05-22 17:46 ` Andrii Nakryiko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260521225022.2695755-3-ihor.solodrai@linux.dev \
--to=ihor.solodrai@linux.dev \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=eddyz87@gmail.com \
--cc=kernel-team@meta.com \
--cc=linux-kernel@vger.kernel.org \
--cc=memxor@gmail.com \
--cc=mykyta.yatsenko5@gmail.com \
--cc=puranjay@kernel.org \
--cc=shakeel.butt@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.