* [PATCH bpf-next v5 1/5] bpf: Add sleepable support for raw tracepoint programs
2026-03-16 21:46 [PATCH bpf-next v5 0/5] bpf: Add support for sleepable tracepoint programs Mykyta Yatsenko
@ 2026-03-16 21:46 ` Mykyta Yatsenko
2026-03-16 21:46 ` [PATCH bpf-next v5 2/5] bpf: Add sleepable support for classic " Mykyta Yatsenko
` (3 subsequent siblings)
4 siblings, 0 replies; 12+ messages in thread
From: Mykyta Yatsenko @ 2026-03-16 21:46 UTC (permalink / raw)
To: bpf, ast, andrii, daniel, kafai, kernel-team, eddyz87; +Cc: Mykyta Yatsenko
From: Mykyta Yatsenko <yatsenko@meta.com>
Rework __bpf_trace_run() to support sleepable BPF programs by using
explicit RCU flavor selection, following the uprobe_prog_run() pattern.
For sleepable programs, use rcu_read_lock_tasks_trace() for lifetime
protection and add a might_fault() annotation. For non-sleepable
programs, use the regular rcu_read_lock(). Replace the combined
rcu_read_lock_dont_migrate() with separate rcu_read_lock()/
migrate_disable() calls, since sleepable programs need
rcu_read_lock_tasks_trace() instead of rcu_read_lock().
Remove the preempt_disable_notrace/preempt_enable_notrace pair from
the faultable tracepoint BPF probe wrapper in bpf_probe.h, since
migration protection and RCU locking are now handled per-program
inside __bpf_trace_run().
This prepares the runtime execution path for both BTF-based raw
tracepoints (tp_btf) and classic raw tracepoints (raw_tp) to support
sleepable BPF programs on faultable tracepoints (e.g. syscall
tracepoints). The verifier changes to allow loading sleepable
programs are in a subsequent patch.
Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
---
include/trace/bpf_probe.h | 2 --
kernel/trace/bpf_trace.c | 21 ++++++++++++++++++---
2 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
index 9391d54d3f12..d1de8f9aa07f 100644
--- a/include/trace/bpf_probe.h
+++ b/include/trace/bpf_probe.h
@@ -58,9 +58,7 @@ static notrace void \
__bpf_trace_##call(void *__data, proto) \
{ \
might_fault(); \
- preempt_disable_notrace(); \
CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(__data, CAST_TO_U64(args)); \
- preempt_enable_notrace(); \
}
#undef DECLARE_EVENT_SYSCALL_CLASS
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0b040a417442..35ed53807cfd 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2072,11 +2072,18 @@ void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
static __always_inline
void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args)
{
+ struct srcu_ctr __percpu *scp = NULL;
struct bpf_prog *prog = link->link.prog;
+ bool sleepable = prog->sleepable;
struct bpf_run_ctx *old_run_ctx;
struct bpf_trace_run_ctx run_ctx;
- rcu_read_lock_dont_migrate();
+ if (sleepable)
+ scp = rcu_read_lock_tasks_trace();
+ else
+ rcu_read_lock();
+
+ migrate_disable();
if (unlikely(!bpf_prog_get_recursion_context(prog))) {
bpf_prog_inc_misses_counter(prog);
goto out;
@@ -2085,12 +2092,20 @@ void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args)
run_ctx.bpf_cookie = link->cookie;
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
- (void) bpf_prog_run(prog, args);
+ if (sleepable)
+ might_fault();
+
+ (void)bpf_prog_run(prog, args);
bpf_reset_run_ctx(old_run_ctx);
out:
bpf_prog_put_recursion_context(prog);
- rcu_read_unlock_migrate();
+ migrate_enable();
+
+ if (sleepable)
+ rcu_read_unlock_tasks_trace(scp);
+ else
+ rcu_read_unlock();
}
#define UNPACK(...) __VA_ARGS__
--
2.52.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH bpf-next v5 2/5] bpf: Add sleepable support for classic tracepoint programs
2026-03-16 21:46 [PATCH bpf-next v5 0/5] bpf: Add support for sleepable tracepoint programs Mykyta Yatsenko
2026-03-16 21:46 ` [PATCH bpf-next v5 1/5] bpf: Add sleepable support for raw " Mykyta Yatsenko
@ 2026-03-16 21:46 ` Mykyta Yatsenko
2026-03-16 22:22 ` bot+bpf-ci
2026-03-23 20:38 ` Kumar Kartikeya Dwivedi
2026-03-16 21:46 ` [PATCH bpf-next v5 3/5] bpf: Verifier support for sleepable " Mykyta Yatsenko
` (2 subsequent siblings)
4 siblings, 2 replies; 12+ messages in thread
From: Mykyta Yatsenko @ 2026-03-16 21:46 UTC (permalink / raw)
To: bpf, ast, andrii, daniel, kafai, kernel-team, eddyz87; +Cc: Mykyta Yatsenko
From: Mykyta Yatsenko <yatsenko@meta.com>
Add trace_call_bpf_faultable(), a variant of trace_call_bpf() for
faultable tracepoints that supports sleepable BPF programs. It uses
rcu_tasks_trace for lifetime protection and bpf_prog_run_array_uprobe()
for per-program RCU flavor selection, following the uprobe_prog_run()
pattern. Uses preempt-safe this_cpu_inc_return/this_cpu_dec for the
bpf_prog_active recursion counter since preemption is enabled in this
context.
Restructure perf_syscall_enter() and perf_syscall_exit() to run BPF
filter before perf event processing. Previously, BPF ran after the
per-cpu perf trace buffer was allocated under preempt_disable,
requiring cleanup via perf_swevent_put_recursion_context() on filter.
Now BPF runs in faultable context before preempt_disable, reading
syscall arguments from local variables instead of the per-cpu trace
record, removing the dependency on buffer allocation. This allows
sleepable BPF programs to execute and avoids unnecessary buffer
allocation when BPF filters the event. The perf event submission
path (buffer allocation, fill, submit) remains under preempt_disable
as before.
Add an attach-time check in __perf_event_set_bpf_prog() to reject
sleepable BPF_PROG_TYPE_TRACEPOINT programs on non-syscall
tracepoints, since only syscall tracepoints run in faultable context.
This prepares the classic tracepoint runtime and attach paths for
sleepable programs. The verifier changes to allow loading sleepable
BPF_PROG_TYPE_TRACEPOINT programs are in a subsequent patch.
Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
---
include/linux/trace_events.h | 6 +++
kernel/events/core.c | 9 ++++
kernel/trace/bpf_trace.c | 39 ++++++++++++++++
kernel/trace/trace_syscalls.c | 104 +++++++++++++++++++++++-------------------
4 files changed, 110 insertions(+), 48 deletions(-)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 37eb2f0f3dd8..5fbbeb9ec4b9 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -767,6 +767,7 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
#ifdef CONFIG_BPF_EVENTS
unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
+unsigned int trace_call_bpf_faultable(struct trace_event_call *call, void *ctx);
int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie);
void perf_event_detach_bpf_prog(struct perf_event *event);
int perf_event_query_prog_array(struct perf_event *event, void __user *info);
@@ -789,6 +790,11 @@ static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *c
return 1;
}
+static inline unsigned int trace_call_bpf_faultable(struct trace_event_call *call, void *ctx)
+{
+ return 1;
+}
+
static inline int
perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie)
{
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1f5699b339ec..46b733d3dd41 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11647,6 +11647,15 @@ static int __perf_event_set_bpf_prog(struct perf_event *event,
/* only uprobe programs are allowed to be sleepable */
return -EINVAL;
+ if (prog->type == BPF_PROG_TYPE_TRACEPOINT && prog->sleepable) {
+ /*
+ * Sleepable tracepoint programs can only attach to faultable
+ * tracepoints. Currently only syscall tracepoints are faultable.
+ */
+ if (!is_syscall_tp)
+ return -EINVAL;
+ }
+
/* Kprobe override only works for kprobes, not uprobes. */
if (prog->kprobe_override && !is_kprobe)
return -EINVAL;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 35ed53807cfd..69c9a5539e65 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -152,6 +152,45 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
return ret;
}
+/**
+ * trace_call_bpf_faultable - invoke BPF program in faultable context
+ * @call: tracepoint event
+ * @ctx: opaque context pointer
+ *
+ * Variant of trace_call_bpf() for faultable tracepoints (e.g. syscall
+ * tracepoints). Supports sleepable BPF programs by using rcu_tasks_trace
+ * for lifetime protection and per-program rcu_read_lock for non-sleepable
+ * programs, following the uprobe_prog_run() pattern.
+ *
+ * Must be called from a faultable/preemptible context.
+ */
+unsigned int trace_call_bpf_faultable(struct trace_event_call *call, void *ctx)
+{
+ struct bpf_prog_array *prog_array;
+ unsigned int ret;
+
+ might_fault();
+
+ guard(rcu_tasks_trace)();
+ guard(migrate)();
+
+ if (unlikely(this_cpu_inc_return(bpf_prog_active) != 1)) {
+ scoped_guard(rcu) {
+ bpf_prog_inc_misses_counters(rcu_dereference(call->prog_array));
+ }
+ this_cpu_dec(bpf_prog_active);
+ return 0;
+ }
+
+ prog_array = rcu_dereference_check(call->prog_array,
+ rcu_read_lock_trace_held());
+ ret = bpf_prog_run_array_uprobe(prog_array, ctx, bpf_prog_run);
+
+ this_cpu_dec(bpf_prog_active);
+
+ return ret;
+}
+
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
{
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 37317b81fcda..8a7677c15f93 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1372,26 +1372,27 @@ static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
static int sys_perf_refcount_enter;
static int sys_perf_refcount_exit;
-static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *regs,
+static int perf_call_bpf_enter(struct trace_event_call *call,
struct syscall_metadata *sys_data,
- struct syscall_trace_enter *rec)
+ int syscall_nr, unsigned long *args)
{
struct syscall_tp_t {
struct trace_entry ent;
int syscall_nr;
unsigned long args[SYSCALL_DEFINE_MAXARGS];
} __aligned(8) param;
+ struct pt_regs regs = {};
int i;
BUILD_BUG_ON(sizeof(param.ent) < sizeof(void *));
- /* bpf prog requires 'regs' to be the first member in the ctx (a.k.a. ¶m) */
- perf_fetch_caller_regs(regs);
- *(struct pt_regs **)¶m = regs;
- param.syscall_nr = rec->nr;
+ /* bpf prog requires 'regs' to be the first member in the ctx */
+ perf_fetch_caller_regs(®s);
+ *(struct pt_regs **)¶m = ®s;
+ param.syscall_nr = syscall_nr;
for (i = 0; i < sys_data->nb_args; i++)
- param.args[i] = rec->args[i];
- return trace_call_bpf(call, ¶m);
+ param.args[i] = args[i];
+ return trace_call_bpf_faultable(call, ¶m);
}
static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
@@ -1411,12 +1412,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
int size = 0;
int uargs = 0;
- /*
- * Syscall probe called with preemption enabled, but the ring
- * buffer and per-cpu data require preemption to be disabled.
- */
might_fault();
- guard(preempt_notrace)();
syscall_nr = trace_get_syscall_nr(current, regs);
if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
@@ -1430,6 +1426,26 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
syscall_get_arguments(current, regs, args);
+ /*
+ * Run BPF filter in faultable context before per-cpu buffer
+ * allocation, allowing sleepable BPF programs to execute.
+ */
+ valid_prog_array = bpf_prog_array_valid(sys_data->enter_event);
+ if (valid_prog_array &&
+ !perf_call_bpf_enter(sys_data->enter_event, sys_data,
+ syscall_nr, args))
+ return;
+
+ /*
+ * Per-cpu ring buffer and perf event list operations require
+ * preemption to be disabled.
+ */
+ guard(preempt_notrace)();
+
+ head = this_cpu_ptr(sys_data->enter_event->perf_events);
+ if (hlist_empty(head))
+ return;
+
/* Check if this syscall event faults in user space memory */
mayfault = sys_data->user_mask != 0;
@@ -1439,11 +1455,6 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
return;
}
- head = this_cpu_ptr(sys_data->enter_event->perf_events);
- valid_prog_array = bpf_prog_array_valid(sys_data->enter_event);
- if (!valid_prog_array && hlist_empty(head))
- return;
-
/* get the size after alignment with the u32 buffer size field */
size += sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
size = ALIGN(size + sizeof(u32), sizeof(u64));
@@ -1459,13 +1470,6 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
if (mayfault)
syscall_put_data(sys_data, rec, user_ptr, size, user_sizes, uargs);
- if ((valid_prog_array &&
- !perf_call_bpf_enter(sys_data->enter_event, fake_regs, sys_data, rec)) ||
- hlist_empty(head)) {
- perf_swevent_put_recursion_context(rctx);
- return;
- }
-
perf_trace_buf_submit(rec, size, rctx,
sys_data->enter_event->event.type, 1, regs,
head, NULL);
@@ -1515,21 +1519,22 @@ static void perf_sysenter_disable(struct trace_event_call *call)
syscall_fault_buffer_disable();
}
-static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *regs,
- struct syscall_trace_exit *rec)
+static int perf_call_bpf_exit(struct trace_event_call *call,
+ int syscall_nr, long ret_val)
{
struct syscall_tp_t {
struct trace_entry ent;
int syscall_nr;
unsigned long ret;
} __aligned(8) param;
-
- /* bpf prog requires 'regs' to be the first member in the ctx (a.k.a. ¶m) */
- perf_fetch_caller_regs(regs);
- *(struct pt_regs **)¶m = regs;
- param.syscall_nr = rec->nr;
- param.ret = rec->ret;
- return trace_call_bpf(call, ¶m);
+ struct pt_regs regs = {};
+
+ /* bpf prog requires 'regs' to be the first member in the ctx */
+ perf_fetch_caller_regs(®s);
+ *(struct pt_regs **)¶m = ®s;
+ param.syscall_nr = syscall_nr;
+ param.ret = ret_val;
+ return trace_call_bpf_faultable(call, ¶m);
}
static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
@@ -1543,12 +1548,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
int rctx;
int size;
- /*
- * Syscall probe called with preemption enabled, but the ring
- * buffer and per-cpu data require preemption to be disabled.
- */
might_fault();
- guard(preempt_notrace)();
syscall_nr = trace_get_syscall_nr(current, regs);
if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
@@ -1560,9 +1560,24 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
if (!sys_data)
return;
- head = this_cpu_ptr(sys_data->exit_event->perf_events);
+ /*
+ * Run BPF filter in faultable context before per-cpu buffer
+ * allocation, allowing sleepable BPF programs to execute.
+ */
valid_prog_array = bpf_prog_array_valid(sys_data->exit_event);
- if (!valid_prog_array && hlist_empty(head))
+ if (valid_prog_array &&
+ !perf_call_bpf_exit(sys_data->exit_event, syscall_nr,
+ syscall_get_return_value(current, regs)))
+ return;
+
+ /*
+ * Per-cpu ring buffer and perf event list operations require
+ * preemption to be disabled.
+ */
+ guard(preempt_notrace)();
+
+ head = this_cpu_ptr(sys_data->exit_event->perf_events);
+ if (hlist_empty(head))
return;
/* We can probably do that at build time */
@@ -1576,13 +1591,6 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs);
- if ((valid_prog_array &&
- !perf_call_bpf_exit(sys_data->exit_event, fake_regs, rec)) ||
- hlist_empty(head)) {
- perf_swevent_put_recursion_context(rctx);
- return;
- }
-
perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type,
1, regs, head, NULL);
}
--
2.52.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH bpf-next v5 2/5] bpf: Add sleepable support for classic tracepoint programs
2026-03-16 21:46 ` [PATCH bpf-next v5 2/5] bpf: Add sleepable support for classic " Mykyta Yatsenko
@ 2026-03-16 22:22 ` bot+bpf-ci
2026-03-23 20:38 ` Kumar Kartikeya Dwivedi
1 sibling, 0 replies; 12+ messages in thread
From: bot+bpf-ci @ 2026-03-16 22:22 UTC (permalink / raw)
To: mykyta.yatsenko5, bpf, ast, andrii, daniel, kafai, kernel-team,
eddyz87
Cc: yatsenko, ast, andrii, daniel, martin.lau, eddyz87, yonghong.song,
clm, ihor.solodrai
[-- Attachment #1: Type: text/plain, Size: 1299 bytes --]
> diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
> index 37317b81fcda..8a7677c15f93 100644
> --- a/kernel/trace/trace_syscalls.c
> +++ b/kernel/trace/trace_syscalls.c
[ ... ]
> @@ -1411,12 +1412,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
> int size = 0;
> int uargs = 0;
perf_syscall_enter() still declares fake_regs:
struct pt_regs *fake_regs;
and passes it to perf_trace_buf_alloc():
rec = perf_trace_buf_alloc(size, &fake_regs, &rctx);
but the value is never read after that. Previously it was passed
to perf_call_bpf_enter() for perf_fetch_caller_regs(), but
perf_call_bpf_enter() now creates its own stack-local pt_regs.
The same applies to fake_regs in perf_syscall_exit().
Could these be cleaned up, perhaps by passing NULL to
perf_trace_buf_alloc() since it already handles that case?
void *perf_trace_buf_alloc(int size, struct pt_regs **regs, ...)
{
...
if (regs)
*regs = this_cpu_ptr(&__perf_regs[rctx]);
...
}
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/23167661157
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH bpf-next v5 2/5] bpf: Add sleepable support for classic tracepoint programs
2026-03-16 21:46 ` [PATCH bpf-next v5 2/5] bpf: Add sleepable support for classic " Mykyta Yatsenko
2026-03-16 22:22 ` bot+bpf-ci
@ 2026-03-23 20:38 ` Kumar Kartikeya Dwivedi
2026-03-23 20:57 ` Mykyta Yatsenko
1 sibling, 1 reply; 12+ messages in thread
From: Kumar Kartikeya Dwivedi @ 2026-03-23 20:38 UTC (permalink / raw)
To: Mykyta Yatsenko
Cc: bpf, ast, andrii, daniel, kafai, kernel-team, eddyz87,
Mykyta Yatsenko
On Mon, 16 Mar 2026 at 22:47, Mykyta Yatsenko
<mykyta.yatsenko5@gmail.com> wrote:
>
> From: Mykyta Yatsenko <yatsenko@meta.com>
>
> Add trace_call_bpf_faultable(), a variant of trace_call_bpf() for
> faultable tracepoints that supports sleepable BPF programs. It uses
> rcu_tasks_trace for lifetime protection and bpf_prog_run_array_uprobe()
> for per-program RCU flavor selection, following the uprobe_prog_run()
> pattern. Uses preempt-safe this_cpu_inc_return/this_cpu_dec for the
> bpf_prog_active recursion counter since preemption is enabled in this
> context.
>
> Restructure perf_syscall_enter() and perf_syscall_exit() to run BPF
> filter before perf event processing. Previously, BPF ran after the
> per-cpu perf trace buffer was allocated under preempt_disable,
> requiring cleanup via perf_swevent_put_recursion_context() on filter.
> Now BPF runs in faultable context before preempt_disable, reading
> syscall arguments from local variables instead of the per-cpu trace
> record, removing the dependency on buffer allocation. This allows
> sleepable BPF programs to execute and avoids unnecessary buffer
> allocation when BPF filters the event. The perf event submission
> path (buffer allocation, fill, submit) remains under preempt_disable
> as before.
>
> Add an attach-time check in __perf_event_set_bpf_prog() to reject
> sleepable BPF_PROG_TYPE_TRACEPOINT programs on non-syscall
> tracepoints, since only syscall tracepoints run in faultable context.
>
> This prepares the classic tracepoint runtime and attach paths for
> sleepable programs. The verifier changes to allow loading sleepable
> BPF_PROG_TYPE_TRACEPOINT programs are in a subsequent patch.
>
> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
> ---
> include/linux/trace_events.h | 6 +++
> kernel/events/core.c | 9 ++++
> kernel/trace/bpf_trace.c | 39 ++++++++++++++++
> kernel/trace/trace_syscalls.c | 104 +++++++++++++++++++++++-------------------
> 4 files changed, 110 insertions(+), 48 deletions(-)
>
> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> index 37eb2f0f3dd8..5fbbeb9ec4b9 100644
> --- a/include/linux/trace_events.h
> +++ b/include/linux/trace_events.h
> @@ -767,6 +767,7 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
>
> #ifdef CONFIG_BPF_EVENTS
> unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
> +unsigned int trace_call_bpf_faultable(struct trace_event_call *call, void *ctx);
> int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie);
> void perf_event_detach_bpf_prog(struct perf_event *event);
> int perf_event_query_prog_array(struct perf_event *event, void __user *info);
> @@ -789,6 +790,11 @@ static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *c
> return 1;
> }
>
> +static inline unsigned int trace_call_bpf_faultable(struct trace_event_call *call, void *ctx)
> +{
> + return 1;
> +}
> +
> static inline int
> perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie)
> {
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 1f5699b339ec..46b733d3dd41 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -11647,6 +11647,15 @@ static int __perf_event_set_bpf_prog(struct perf_event *event,
> /* only uprobe programs are allowed to be sleepable */
> return -EINVAL;
>
> + if (prog->type == BPF_PROG_TYPE_TRACEPOINT && prog->sleepable) {
> + /*
> + * Sleepable tracepoint programs can only attach to faultable
> + * tracepoints. Currently only syscall tracepoints are faultable.
> + */
> + if (!is_syscall_tp)
> + return -EINVAL;
> + }
> +
> /* Kprobe override only works for kprobes, not uprobes. */
> if (prog->kprobe_override && !is_kprobe)
> return -EINVAL;
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 35ed53807cfd..69c9a5539e65 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -152,6 +152,45 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
> return ret;
> }
>
> +/**
> + * trace_call_bpf_faultable - invoke BPF program in faultable context
> + * @call: tracepoint event
> + * @ctx: opaque context pointer
> + *
> + * Variant of trace_call_bpf() for faultable tracepoints (e.g. syscall
> + * tracepoints). Supports sleepable BPF programs by using rcu_tasks_trace
> + * for lifetime protection and per-program rcu_read_lock for non-sleepable
> + * programs, following the uprobe_prog_run() pattern.
> + *
> + * Must be called from a faultable/preemptible context.
> + */
> +unsigned int trace_call_bpf_faultable(struct trace_event_call *call, void *ctx)
> +{
> + struct bpf_prog_array *prog_array;
> + unsigned int ret;
> +
> + might_fault();
> +
> + guard(rcu_tasks_trace)();
> + guard(migrate)();
> +
> + if (unlikely(this_cpu_inc_return(bpf_prog_active) != 1)) {
This seems a bit heavy handed, why not
bpf_prog_get_recursion_context()? Esp. since we can potentially sleep
and other tasks can preempt us on the same CPU.
> + scoped_guard(rcu) {
> + bpf_prog_inc_misses_counters(rcu_dereference(call->prog_array));
> + }
> + this_cpu_dec(bpf_prog_active);
> + return 0;
> + }
> +
> + prog_array = rcu_dereference_check(call->prog_array,
> + rcu_read_lock_trace_held());
> + ret = bpf_prog_run_array_uprobe(prog_array, ctx, bpf_prog_run);
This confused me a bit, do we need to generalize it a bit? E.g. it
would now set run_ctx.is_uprobe = 1 for this case too.
Actually, except that bit, the rest looks reusable, so maybe it should
be renamed to __ version and expose two wrappers, one that passes
is_uprobe = false and one passing true?
Then do bpf_prog_run_array_trace() here?
> +
> [...]
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH bpf-next v5 2/5] bpf: Add sleepable support for classic tracepoint programs
2026-03-23 20:38 ` Kumar Kartikeya Dwivedi
@ 2026-03-23 20:57 ` Mykyta Yatsenko
0 siblings, 0 replies; 12+ messages in thread
From: Mykyta Yatsenko @ 2026-03-23 20:57 UTC (permalink / raw)
To: Kumar Kartikeya Dwivedi
Cc: bpf, ast, andrii, daniel, kafai, kernel-team, eddyz87,
Mykyta Yatsenko
Kumar Kartikeya Dwivedi <memxor@gmail.com> writes:
> On Mon, 16 Mar 2026 at 22:47, Mykyta Yatsenko
> <mykyta.yatsenko5@gmail.com> wrote:
>>
>> From: Mykyta Yatsenko <yatsenko@meta.com>
>>
>> Add trace_call_bpf_faultable(), a variant of trace_call_bpf() for
>> faultable tracepoints that supports sleepable BPF programs. It uses
>> rcu_tasks_trace for lifetime protection and bpf_prog_run_array_uprobe()
>> for per-program RCU flavor selection, following the uprobe_prog_run()
>> pattern. Uses preempt-safe this_cpu_inc_return/this_cpu_dec for the
>> bpf_prog_active recursion counter since preemption is enabled in this
>> context.
>>
>> Restructure perf_syscall_enter() and perf_syscall_exit() to run BPF
>> filter before perf event processing. Previously, BPF ran after the
>> per-cpu perf trace buffer was allocated under preempt_disable,
>> requiring cleanup via perf_swevent_put_recursion_context() on filter.
>> Now BPF runs in faultable context before preempt_disable, reading
>> syscall arguments from local variables instead of the per-cpu trace
>> record, removing the dependency on buffer allocation. This allows
>> sleepable BPF programs to execute and avoids unnecessary buffer
>> allocation when BPF filters the event. The perf event submission
>> path (buffer allocation, fill, submit) remains under preempt_disable
>> as before.
>>
>> Add an attach-time check in __perf_event_set_bpf_prog() to reject
>> sleepable BPF_PROG_TYPE_TRACEPOINT programs on non-syscall
>> tracepoints, since only syscall tracepoints run in faultable context.
>>
>> This prepares the classic tracepoint runtime and attach paths for
>> sleepable programs. The verifier changes to allow loading sleepable
>> BPF_PROG_TYPE_TRACEPOINT programs are in a subsequent patch.
>>
>> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
>> ---
>> include/linux/trace_events.h | 6 +++
>> kernel/events/core.c | 9 ++++
>> kernel/trace/bpf_trace.c | 39 ++++++++++++++++
>> kernel/trace/trace_syscalls.c | 104 +++++++++++++++++++++++-------------------
>> 4 files changed, 110 insertions(+), 48 deletions(-)
>>
>> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
>> index 37eb2f0f3dd8..5fbbeb9ec4b9 100644
>> --- a/include/linux/trace_events.h
>> +++ b/include/linux/trace_events.h
>> @@ -767,6 +767,7 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
>>
>> #ifdef CONFIG_BPF_EVENTS
>> unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
>> +unsigned int trace_call_bpf_faultable(struct trace_event_call *call, void *ctx);
>> int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie);
>> void perf_event_detach_bpf_prog(struct perf_event *event);
>> int perf_event_query_prog_array(struct perf_event *event, void __user *info);
>> @@ -789,6 +790,11 @@ static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *c
>> return 1;
>> }
>>
>> +static inline unsigned int trace_call_bpf_faultable(struct trace_event_call *call, void *ctx)
>> +{
>> + return 1;
>> +}
>> +
>> static inline int
>> perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie)
>> {
>> diff --git a/kernel/events/core.c b/kernel/events/core.c
>> index 1f5699b339ec..46b733d3dd41 100644
>> --- a/kernel/events/core.c
>> +++ b/kernel/events/core.c
>> @@ -11647,6 +11647,15 @@ static int __perf_event_set_bpf_prog(struct perf_event *event,
>> /* only uprobe programs are allowed to be sleepable */
>> return -EINVAL;
>>
>> + if (prog->type == BPF_PROG_TYPE_TRACEPOINT && prog->sleepable) {
>> + /*
>> + * Sleepable tracepoint programs can only attach to faultable
>> + * tracepoints. Currently only syscall tracepoints are faultable.
>> + */
>> + if (!is_syscall_tp)
>> + return -EINVAL;
>> + }
>> +
>> /* Kprobe override only works for kprobes, not uprobes. */
>> if (prog->kprobe_override && !is_kprobe)
>> return -EINVAL;
>> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
>> index 35ed53807cfd..69c9a5539e65 100644
>> --- a/kernel/trace/bpf_trace.c
>> +++ b/kernel/trace/bpf_trace.c
>> @@ -152,6 +152,45 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
>> return ret;
>> }
>>
>> +/**
>> + * trace_call_bpf_faultable - invoke BPF program in faultable context
>> + * @call: tracepoint event
>> + * @ctx: opaque context pointer
>> + *
>> + * Variant of trace_call_bpf() for faultable tracepoints (e.g. syscall
>> + * tracepoints). Supports sleepable BPF programs by using rcu_tasks_trace
>> + * for lifetime protection and per-program rcu_read_lock for non-sleepable
>> + * programs, following the uprobe_prog_run() pattern.
>> + *
>> + * Must be called from a faultable/preemptible context.
>> + */
>> +unsigned int trace_call_bpf_faultable(struct trace_event_call *call, void *ctx)
>> +{
>> + struct bpf_prog_array *prog_array;
>> + unsigned int ret;
>> +
>> + might_fault();
>> +
>> + guard(rcu_tasks_trace)();
>> + guard(migrate)();
>> +
>> + if (unlikely(this_cpu_inc_return(bpf_prog_active) != 1)) {
>
> This seems a bit heavy handed, why not
> bpf_prog_get_recursion_context()? Esp. since we can potentially sleep
> and other tasks can preempt us on the same CPU.
Agree, the same point was raised by sashiko review. Thanks, though, for
the hint to look into bpf_prog_get_recursion_context(), I was not
aware of it.
>
>> + scoped_guard(rcu) {
>> + bpf_prog_inc_misses_counters(rcu_dereference(call->prog_array));
>> + }
>> + this_cpu_dec(bpf_prog_active);
>> + return 0;
>> + }
>> +
>> + prog_array = rcu_dereference_check(call->prog_array,
>> + rcu_read_lock_trace_held());
>> + ret = bpf_prog_run_array_uprobe(prog_array, ctx, bpf_prog_run);
>
> This confused me a bit, do we need to generalize it a bit? E.g. it
> would now set run_ctx.is_uprobe = 1 for this case too.
> Actually, except that bit, the rest looks reusable, so maybe it should
> be renamed to __ version and expose two wrappers, one that passes
> is_uprobe = false and one passing true?
> Then do bpf_prog_run_array_trace() here?
Thanks, I agree on your point.
>
>> +
>> [...]
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH bpf-next v5 3/5] bpf: Verifier support for sleepable tracepoint programs
2026-03-16 21:46 [PATCH bpf-next v5 0/5] bpf: Add support for sleepable tracepoint programs Mykyta Yatsenko
2026-03-16 21:46 ` [PATCH bpf-next v5 1/5] bpf: Add sleepable support for raw " Mykyta Yatsenko
2026-03-16 21:46 ` [PATCH bpf-next v5 2/5] bpf: Add sleepable support for classic " Mykyta Yatsenko
@ 2026-03-16 21:46 ` Mykyta Yatsenko
2026-03-16 21:46 ` [PATCH bpf-next v5 4/5] libbpf: Add section handlers for sleepable tracepoints Mykyta Yatsenko
2026-03-16 21:46 ` [PATCH bpf-next v5 5/5] selftests/bpf: Add tests for sleepable tracepoint programs Mykyta Yatsenko
4 siblings, 0 replies; 12+ messages in thread
From: Mykyta Yatsenko @ 2026-03-16 21:46 UTC (permalink / raw)
To: bpf, ast, andrii, daniel, kafai, kernel-team, eddyz87; +Cc: Mykyta Yatsenko
From: Mykyta Yatsenko <yatsenko@meta.com>
Allow BPF_PROG_TYPE_RAW_TRACEPOINT, BPF_PROG_TYPE_TRACEPOINT, and
BPF_TRACE_RAW_TP (tp_btf) programs to be sleepable by adding them
to can_be_sleepable().
For BTF-based raw tracepoints (tp_btf), add a load-time check in
bpf_check_attach_target() that rejects sleepable programs attaching
to non-faultable tracepoints with a descriptive error message.
For classic raw tracepoints (raw_tp), add an attach-time check in
bpf_raw_tp_link_attach() that rejects sleepable programs on
non-faultable tracepoints. The attach-time check is needed because
the tracepoint name is not known at load time for classic raw_tp.
The attach-time check for classic tracepoints (tp) in
__perf_event_set_bpf_prog() was added in the previous patch.
Replace the verbose error message that enumerates allowed program
types with a generic "Program of this type cannot be sleepable"
message, since the list of sleepable-capable types keeps growing.
Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
---
kernel/bpf/syscall.c | 5 +++++
kernel/bpf/verifier.c | 13 +++++++++++--
2 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 274039e36465..bc19f6cdf752 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4261,6 +4261,11 @@ static int bpf_raw_tp_link_attach(struct bpf_prog *prog,
if (!btp)
return -ENOENT;
+ if (prog->sleepable && !tracepoint_is_faultable(btp->tp)) {
+ bpf_put_raw_tracepoint(btp);
+ return -EINVAL;
+ }
+
link = kzalloc_obj(*link, GFP_USER);
if (!link) {
err = -ENOMEM;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 01c18f4268de..a4836f564cb1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -25255,6 +25255,12 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
btp = bpf_get_raw_tracepoint(tname);
if (!btp)
return -EINVAL;
+ if (prog->sleepable && !tracepoint_is_faultable(btp->tp)) {
+ bpf_log(log, "Sleepable program cannot attach to non-faultable tracepoint %s\n",
+ tname);
+ bpf_put_raw_tracepoint(btp);
+ return -EINVAL;
+ }
fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL,
trace_symbol);
bpf_put_raw_tracepoint(btp);
@@ -25471,6 +25477,7 @@ static bool can_be_sleepable(struct bpf_prog *prog)
case BPF_MODIFY_RETURN:
case BPF_TRACE_ITER:
case BPF_TRACE_FSESSION:
+ case BPF_TRACE_RAW_TP:
return true;
default:
return false;
@@ -25478,7 +25485,9 @@ static bool can_be_sleepable(struct bpf_prog *prog)
}
return prog->type == BPF_PROG_TYPE_LSM ||
prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ ||
- prog->type == BPF_PROG_TYPE_STRUCT_OPS;
+ prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
+ prog->type == BPF_PROG_TYPE_RAW_TRACEPOINT ||
+ prog->type == BPF_PROG_TYPE_TRACEPOINT;
}
static int check_attach_btf_id(struct bpf_verifier_env *env)
@@ -25500,7 +25509,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
}
if (prog->sleepable && !can_be_sleepable(prog)) {
- verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
+ verbose(env, "Program of this type cannot be sleepable\n");
return -EINVAL;
}
--
2.52.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH bpf-next v5 4/5] libbpf: Add section handlers for sleepable tracepoints
2026-03-16 21:46 [PATCH bpf-next v5 0/5] bpf: Add support for sleepable tracepoint programs Mykyta Yatsenko
` (2 preceding siblings ...)
2026-03-16 21:46 ` [PATCH bpf-next v5 3/5] bpf: Verifier support for sleepable " Mykyta Yatsenko
@ 2026-03-16 21:46 ` Mykyta Yatsenko
2026-03-16 21:46 ` [PATCH bpf-next v5 5/5] selftests/bpf: Add tests for sleepable tracepoint programs Mykyta Yatsenko
4 siblings, 0 replies; 12+ messages in thread
From: Mykyta Yatsenko @ 2026-03-16 21:46 UTC (permalink / raw)
To: bpf, ast, andrii, daniel, kafai, kernel-team, eddyz87; +Cc: Mykyta Yatsenko
From: Mykyta Yatsenko <yatsenko@meta.com>
Add SEC_DEF entries for sleepable tracepoint variants:
- "tp_btf.s+" for sleepable BTF-based raw tracepoints
- "raw_tp.s+" for sleepable classic raw tracepoints
- "raw_tracepoint.s+" (alias)
- "tp.s+" for sleepable classic tracepoints
- "tracepoint.s+" (alias)
Update attach_raw_tp() to recognize "raw_tp.s" and
"raw_tracepoint.s" prefixes when extracting the tracepoint name.
Rewrite attach_tp() to use a prefix array including "tp.s/" and
"tracepoint.s/" variants for proper section name parsing.
Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
---
tools/lib/bpf/libbpf.c | 39 ++++++++++++++++++++++++++++++++-------
1 file changed, 32 insertions(+), 7 deletions(-)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 0662d72bad20..625d49a21bcf 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -9858,11 +9858,16 @@ static const struct bpf_sec_def section_defs[] = {
SEC_DEF("netkit/peer", SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE),
SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp),
SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp),
+ SEC_DEF("tracepoint.s+", TRACEPOINT, 0, SEC_SLEEPABLE, attach_tp),
+ SEC_DEF("tp.s+", TRACEPOINT, 0, SEC_SLEEPABLE, attach_tp),
SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
+ SEC_DEF("raw_tracepoint.s+", RAW_TRACEPOINT, 0, SEC_SLEEPABLE, attach_raw_tp),
+ SEC_DEF("raw_tp.s+", RAW_TRACEPOINT, 0, SEC_SLEEPABLE, attach_raw_tp),
SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace),
+ SEC_DEF("tp_btf.s+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace),
SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace),
SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace),
@@ -12985,23 +12990,41 @@ struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog,
static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
{
+ static const char *const prefixes[] = {
+ "tp.s/",
+ "tp/",
+ "tracepoint.s/",
+ "tracepoint/",
+ };
char *sec_name, *tp_cat, *tp_name;
+ size_t i;
*link = NULL;
- /* no auto-attach for SEC("tp") or SEC("tracepoint") */
- if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0)
+ /* no auto-attach for bare SEC("tp"), SEC("tracepoint") and .s variants */
+ if (strcmp(prog->sec_name, "tp") == 0 ||
+ strcmp(prog->sec_name, "tracepoint") == 0 ||
+ strcmp(prog->sec_name, "tp.s") == 0 ||
+ strcmp(prog->sec_name, "tracepoint.s") == 0)
return 0;
sec_name = strdup(prog->sec_name);
if (!sec_name)
return -ENOMEM;
- /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */
- if (str_has_pfx(prog->sec_name, "tp/"))
- tp_cat = sec_name + sizeof("tp/") - 1;
- else
- tp_cat = sec_name + sizeof("tracepoint/") - 1;
+ /* extract "<prefix><category>/<name>" */
+ tp_cat = NULL;
+ for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
+ if (str_has_pfx(prog->sec_name, prefixes[i])) {
+ tp_cat = sec_name + strlen(prefixes[i]);
+ break;
+ }
+ }
+ if (!tp_cat) {
+ free(sec_name);
+ return -EINVAL;
+ }
+
tp_name = strchr(tp_cat, '/');
if (!tp_name) {
free(sec_name);
@@ -13065,6 +13088,8 @@ static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf
"raw_tracepoint",
"raw_tp.w",
"raw_tracepoint.w",
+ "raw_tp.s",
+ "raw_tracepoint.s",
};
size_t i;
const char *tp_name = NULL;
--
2.52.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH bpf-next v5 5/5] selftests/bpf: Add tests for sleepable tracepoint programs
2026-03-16 21:46 [PATCH bpf-next v5 0/5] bpf: Add support for sleepable tracepoint programs Mykyta Yatsenko
` (3 preceding siblings ...)
2026-03-16 21:46 ` [PATCH bpf-next v5 4/5] libbpf: Add section handlers for sleepable tracepoints Mykyta Yatsenko
@ 2026-03-16 21:46 ` Mykyta Yatsenko
4 siblings, 0 replies; 12+ messages in thread
From: Mykyta Yatsenko @ 2026-03-16 21:46 UTC (permalink / raw)
To: bpf, ast, andrii, daniel, kafai, kernel-team, eddyz87; +Cc: Mykyta Yatsenko
From: Mykyta Yatsenko <yatsenko@meta.com>
Add functional tests for sleepable tracepoint programs that attach to
the nanosleep syscall and use bpf_copy_from_user() to read user memory:
- tp_btf: BTF-based raw tracepoint using SEC("tp_btf.s/sys_enter")
with PT_REGS_PARM1_SYSCALL (non-CO-RE macro for BTF programs).
- classic: Classic raw tracepoint using SEC("raw_tp.s/sys_enter")
with PT_REGS_PARM1_CORE_SYSCALL (CO-RE macro needed for classic).
- tracepoint: Classic tracepoint using
SEC("tp.s/syscalls/sys_enter_nanosleep") receiving
struct syscall_trace_enter with direct access to args[].
Add a negative test (handle_sched_switch) that verifies sleepable
programs are rejected on non-faultable tracepoints (sched_switch).
Update verifier/sleepable.c tests:
- Add "sleepable raw tracepoint accept" test for sys_enter.
- Rename reject test and update error message to match the new
descriptive "Sleepable program cannot attach to non-faultable
tracepoint" message.
Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
---
.../bpf/prog_tests/sleepable_tracepoints.c | 57 ++++++++++++++++
.../bpf/progs/test_sleepable_tracepoints.c | 75 ++++++++++++++++++++++
tools/testing/selftests/bpf/verifier/sleepable.c | 17 ++++-
3 files changed, 147 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/sleepable_tracepoints.c b/tools/testing/selftests/bpf/prog_tests/sleepable_tracepoints.c
new file mode 100644
index 000000000000..308d53c31632
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sleepable_tracepoints.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include <time.h>
+#include "test_sleepable_tracepoints.skel.h"
+
+static void run_test(struct bpf_program *prog,
+ struct test_sleepable_tracepoints *skel)
+{
+ struct bpf_link *link;
+
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "prog_attach"))
+ return;
+
+ skel->bss->target_pid = getpid();
+ skel->bss->triggered = 0;
+ skel->bss->err = 0;
+ skel->bss->copied_tv_nsec = 0;
+
+ syscall(__NR_nanosleep, &(struct timespec){ .tv_nsec = 555 }, NULL);
+
+ ASSERT_EQ(skel->bss->triggered, 1, "triggered");
+ ASSERT_EQ(skel->bss->err, 0, "err");
+ ASSERT_EQ(skel->bss->copied_tv_nsec, 555, "copied_tv_nsec");
+
+ bpf_link__destroy(link);
+}
+
+void test_sleepable_tracepoints(void)
+{
+ struct test_sleepable_tracepoints *skel;
+
+ skel = test_sleepable_tracepoints__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.handle_sched_switch, false);
+
+ if (!ASSERT_OK(test_sleepable_tracepoints__load(skel), "skel_load"))
+ goto cleanup;
+
+ if (test__start_subtest("tp_btf"))
+ run_test(skel->progs.handle_sys_enter_tp_btf, skel);
+
+ if (test__start_subtest("classic"))
+ run_test(skel->progs.handle_sys_enter_raw_tp, skel);
+
+ if (test__start_subtest("tracepoint"))
+ run_test(skel->progs.handle_sys_enter_tp, skel);
+
+cleanup:
+ test_sleepable_tracepoints__destroy(skel);
+
+ RUN_TESTS(test_sleepable_tracepoints);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sleepable_tracepoints.c b/tools/testing/selftests/bpf/progs/test_sleepable_tracepoints.c
new file mode 100644
index 000000000000..37e14f5e9f14
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sleepable_tracepoints.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <asm/unistd.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+int target_pid;
+int triggered;
+long err;
+long copied_tv_nsec;
+
+static int copy_nanosleep_arg(struct __kernel_timespec *ts)
+{
+ long tv_nsec;
+
+ err = bpf_copy_from_user(&tv_nsec, sizeof(tv_nsec), &ts->tv_nsec);
+ if (err)
+ return err;
+
+ copied_tv_nsec = tv_nsec;
+ triggered = 1;
+ return 0;
+}
+
+SEC("tp_btf.s/sys_enter")
+int BPF_PROG(handle_sys_enter_tp_btf, struct pt_regs *regs, long id)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ if (task->pid != target_pid)
+ return 0;
+
+ if (id != __NR_nanosleep)
+ return 0;
+
+ return copy_nanosleep_arg((void *)PT_REGS_PARM1_SYSCALL(regs));
+}
+
+SEC("raw_tp.s/sys_enter")
+int BPF_PROG(handle_sys_enter_raw_tp, struct pt_regs *regs, long id)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ if (task->pid != target_pid)
+ return 0;
+
+ if (id != __NR_nanosleep)
+ return 0;
+
+ return copy_nanosleep_arg((void *)PT_REGS_PARM1_CORE_SYSCALL(regs));
+}
+
+SEC("tp.s/syscalls/sys_enter_nanosleep")
+int handle_sys_enter_tp(struct syscall_trace_enter *args)
+{
+ if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+ return 0;
+
+ return copy_nanosleep_arg((void *)args->args[0]);
+}
+
+/* Sleepable program on a non-faultable tracepoint should fail to load */
+SEC("tp_btf.s/sched_switch")
+__failure __msg("Sleepable program cannot attach to non-faultable tracepoint")
+int BPF_PROG(handle_sched_switch, bool preempt,
+ struct task_struct *prev, struct task_struct *next)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/verifier/sleepable.c b/tools/testing/selftests/bpf/verifier/sleepable.c
index 1f0d2bdc673f..6dabc5522945 100644
--- a/tools/testing/selftests/bpf/verifier/sleepable.c
+++ b/tools/testing/selftests/bpf/verifier/sleepable.c
@@ -76,7 +76,20 @@
.runs = -1,
},
{
- "sleepable raw tracepoint reject",
+ "sleepable raw tracepoint accept",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_RAW_TP,
+ .kfunc = "sys_enter",
+ .result = ACCEPT,
+ .flags = BPF_F_SLEEPABLE,
+ .runs = -1,
+},
+{
+ "sleepable raw tracepoint reject non-faultable",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
@@ -85,7 +98,7 @@
.expected_attach_type = BPF_TRACE_RAW_TP,
.kfunc = "sched_switch",
.result = REJECT,
- .errstr = "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable",
+ .errstr = "Sleepable program cannot attach to non-faultable tracepoint",
.flags = BPF_F_SLEEPABLE,
.runs = -1,
},
--
2.52.0
^ permalink raw reply related [flat|nested] 12+ messages in thread