From: Puranjay Mohan <puranjay@kernel.org>
To: bpf@vger.kernel.org
Cc: Puranjay Mohan <puranjay@kernel.org>,
Puranjay Mohan <puranjay12@gmail.com>,
Alexei Starovoitov <ast@kernel.org>,
Andrii Nakryiko <andrii@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Martin KaFai Lau <martin.lau@kernel.org>,
Eduard Zingerman <eddyz87@gmail.com>,
Kumar Kartikeya Dwivedi <memxor@gmail.com>,
Mykyta Yatsenko <mykyta.yatsenko5@gmail.com>,
kernel-team@meta.com
Subject: [PATCH bpf v5 5/8] bpf: Add KF_FORBID_FAULT modifier for KF_ACQUIRE kfuncs
Date: Thu, 26 Feb 2026 08:14:54 -0800 [thread overview]
Message-ID: <20260226161500.775715-6-puranjay@kernel.org> (raw)
In-Reply-To: <20260226161500.775715-1-puranjay@kernel.org>
With KF_ACQUIRE support for iterators in place, we need a way to tell
the verifier that holding a particular acquired reference forbids
faulting. For example, task_vma's _next holds mmap_lock, so any
operation that might trigger a page fault between _next and _release
must be rejected to avoid deadlocking on mmap_lock re-acquisition.
Note that mmap_lock is a sleeping lock (rw_semaphore), so sleeping
itself is fine while holding it. The actual constraint is about
faulting, not sleeping. The flag is named KF_FORBID_FAULT to reflect
this. The current implementation conservatively blocks all sleepable
operations while the reference is held.
Add a KF_FORBID_FAULT flag (1 << 17) that can be combined with
KF_ACQUIRE. When acquire_reference() is called for such a kfunc, the
reference is tagged with forbid_fault=true and a per-state
forbid_fault_count counter is incremented. When the reference is
released through release_reference_state(), the counter is decremented.
The counter is checked wherever the verifier decides if sleeping is
allowed.
This is generic and works for both iterator and non-iterator kfuncs.
For iterators, the auto-release and explicit _release from the previous
commit handle the counter decrement automatically via
release_reference().
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
---
include/linux/bpf_verifier.h | 2 ++
include/linux/btf.h | 1 +
kernel/bpf/verifier.c | 36 +++++++++++++++++++++++++++++-------
3 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index ef8e45a362d9..11102e1b53ef 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -261,6 +261,7 @@ struct bpf_reference_state {
* it matches on unlock.
*/
void *ptr;
+ bool forbid_fault; /* ref prevents faulting while held */
};
struct bpf_retval_range {
@@ -421,6 +422,7 @@ struct bpf_verifier_state {
u32 active_lock_id;
void *active_lock_ptr;
u32 active_rcu_locks;
+ u32 forbid_fault_count;
bool speculative;
bool in_sleepable;
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 48108471c5b1..fec8298692a2 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -79,6 +79,7 @@
#define KF_ARENA_ARG1 (1 << 14) /* kfunc takes an arena pointer as its first argument */
#define KF_ARENA_ARG2 (1 << 15) /* kfunc takes an arena pointer as its second argument */
#define KF_IMPLICIT_ARGS (1 << 16) /* kfunc has implicit arguments supplied by the verifier */
+#define KF_FORBID_FAULT (1 << 17) /* acquired reference forbids faulting while held */
/*
* Tag marking a kernel function as a kfunc. This is meant to minimize the
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 061f93d0c2c2..cb69761332ab 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -202,7 +202,7 @@ struct bpf_verifier_stack_elem {
#define BPF_PRIV_STACK_MIN_SIZE 64
-static int acquire_reference(struct bpf_verifier_env *env, int insn_idx);
+static int acquire_reference(struct bpf_verifier_env *env, int insn_idx, bool forbid_fault);
static int release_reference_nomark(struct bpf_verifier_state *state, int ref_obj_id);
static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
@@ -807,7 +807,7 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
if (clone_ref_obj_id)
id = clone_ref_obj_id;
else
- id = acquire_reference(env, insn_idx);
+ id = acquire_reference(env, insn_idx, false);
if (id < 0)
return id;
@@ -1055,7 +1055,7 @@ static int mark_stack_slots_iter(struct bpf_verifier_env *env,
if (spi < 0)
return spi;
- id = acquire_reference(env, insn_idx);
+ id = acquire_reference(env, insn_idx, false);
if (id < 0)
return id;
@@ -1476,6 +1476,7 @@ static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf
dst->active_irq_id = src->active_irq_id;
dst->active_lock_id = src->active_lock_id;
dst->active_lock_ptr = src->active_lock_ptr;
+ dst->forbid_fault_count = src->forbid_fault_count;
return 0;
}
@@ -1549,7 +1550,7 @@ static struct bpf_reference_state *acquire_reference_state(struct bpf_verifier_e
return &state->refs[new_ofs];
}
-static int acquire_reference(struct bpf_verifier_env *env, int insn_idx)
+static int acquire_reference(struct bpf_verifier_env *env, int insn_idx, bool forbid_fault)
{
struct bpf_reference_state *s;
@@ -1558,6 +1559,9 @@ static int acquire_reference(struct bpf_verifier_env *env, int insn_idx)
return -ENOMEM;
s->type = REF_TYPE_PTR;
s->id = ++env->id_gen;
+ s->forbid_fault = forbid_fault;
+ if (forbid_fault)
+ env->cur_state->forbid_fault_count++;
return s->id;
}
@@ -1600,6 +1604,9 @@ static void release_reference_state(struct bpf_verifier_state *state, int idx)
int last_idx;
size_t rem;
+ if (state->refs[idx].forbid_fault)
+ state->forbid_fault_count--;
+
/* IRQ state requires the relative ordering of elements remaining the
* same, since it relies on the refs array to behave as a stack, so that
* it can detect out-of-order IRQ restore. Hence use memmove to shift
@@ -11589,6 +11596,7 @@ static inline bool in_sleepable_context(struct bpf_verifier_env *env)
!env->cur_state->active_preempt_locks &&
!env->cur_state->active_locks &&
!env->cur_state->active_irq_id &&
+ !env->cur_state->forbid_fault_count &&
in_sleepable(env);
}
@@ -11602,6 +11610,8 @@ static const char *non_sleepable_context_description(struct bpf_verifier_env *en
return "IRQ-disabled region";
if (env->cur_state->active_locks)
return "lock region";
+ if (env->cur_state->forbid_fault_count)
+ return "nofault region";
return "non-sleepable prog";
}
@@ -12047,7 +12057,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
/* For release_reference() */
regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
} else if (is_acquire_function(func_id, meta.map.ptr)) {
- int id = acquire_reference(env, insn_idx);
+ int id = acquire_reference(env, insn_idx, false);
if (id < 0)
return id;
@@ -12152,6 +12162,11 @@ static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
return meta->kfunc_flags & KF_RELEASE;
}
+static bool is_kfunc_forbid_fault(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->kfunc_flags & KF_FORBID_FAULT;
+}
+
static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
{
return meta->kfunc_flags & KF_SLEEPABLE;
@@ -14403,6 +14418,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
if (is_kfunc_acquire(&meta)) {
+ bool forbid_fault = is_kfunc_forbid_fault(&meta);
int id;
/*
@@ -14417,7 +14433,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return err;
}
- id = acquire_reference(env, insn_idx);
+ id = acquire_reference(env, insn_idx, forbid_fault);
if (id < 0)
return id;
@@ -20091,6 +20107,9 @@ static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *c
if (old->active_rcu_locks != cur->active_rcu_locks)
return false;
+ if (old->forbid_fault_count != cur->forbid_fault_count)
+ return false;
+
if (!check_ids(old->active_irq_id, cur->active_irq_id, idmap))
return false;
@@ -20104,6 +20123,9 @@ static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *c
return false;
switch (old->refs[i].type) {
case REF_TYPE_PTR:
+ if (old->refs[i].forbid_fault != cur->refs[i].forbid_fault)
+ return false;
+ break;
case REF_TYPE_IRQ:
break;
case REF_TYPE_LOCK:
@@ -24629,7 +24651,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
if (!subprog && env->prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
for (i = 0; i < aux->ctx_arg_info_size; i++)
aux->ctx_arg_info[i].ref_obj_id = aux->ctx_arg_info[i].refcounted ?
- acquire_reference(env, 0) : 0;
+ acquire_reference(env, 0, false) : 0;
}
ret = do_check(env);
--
2.47.3
next prev parent reply other threads:[~2026-02-26 16:15 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-26 16:14 [PATCH bpf v5 0/8] Introduce KF_FORBID_FAULT modifier for acquire/release kfuncs Puranjay Mohan
2026-02-26 16:14 ` [PATCH bpf v5 1/8] bpf: Add KF_ACQUIRE and KF_RELEASE support for iterators Puranjay Mohan
2026-02-27 0:46 ` Alexei Starovoitov
2026-02-26 16:14 ` [PATCH bpf v5 2/8] bpf: consolidate sleepable checks in check_helper_call() Puranjay Mohan
2026-02-26 18:36 ` Eduard Zingerman
2026-02-26 16:14 ` [PATCH bpf v5 3/8] bpf: consolidate sleepable checks in check_kfunc_call() Puranjay Mohan
2026-02-26 16:14 ` [PATCH bpf v5 4/8] bpf: consolidate sleepable checks in check_func_call() Puranjay Mohan
2026-02-26 19:00 ` Eduard Zingerman
2026-02-26 16:14 ` Puranjay Mohan [this message]
2026-02-26 16:14 ` [PATCH bpf v5 6/8] bpf: Move locking to bpf_iter_task_vma_next() Puranjay Mohan
2026-02-26 16:14 ` [PATCH bpf v5 7/8] bpf: Add split iteration support to task_vma iterator Puranjay Mohan
2026-02-26 16:14 ` [PATCH bpf v5 8/8] selftests/bpf: Add tests for split " Puranjay Mohan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260226161500.775715-6-puranjay@kernel.org \
--to=puranjay@kernel.org \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=eddyz87@gmail.com \
--cc=kernel-team@meta.com \
--cc=martin.lau@kernel.org \
--cc=memxor@gmail.com \
--cc=mykyta.yatsenko5@gmail.com \
--cc=puranjay12@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox