From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
To: Steven Rostedt <rostedt@goodmis.org>,
Masami Hiramatsu <mhiramat@kernel.org>,
Shuah Khan <shuah@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org,
linux-kselftest@vger.kernel.org, bpf@vger.kernel.org
Subject: [RFC PATCH 2/4] tracing/probes: Compile all fetchargs into a single BPF program per event
Date: Wed, 1 Jul 2026 22:45:41 +0900 [thread overview]
Message-ID: <178291354144.1566898.14374948740441958770.stgit@devnote2> (raw)
In-Reply-To: <178291352217.1566898.14481561093843379745.stgit@devnote2>
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Compile all fetch arguments of a trace probe event into a single BPF
program instead of separate programs per argument to reduce prologue
and dispatching overhead.
BPF-compatible arguments (such as register, immediate, dereferences,
and raw stores) are compiled, including registers mapping for x86_64,
arm64, and s390. If any argument requires non-BPF operations (such as
dynamic strings), we fallback to the interpreter loop for all arguments.
Also, correctly initialize prog->len to prevent invalid opcode execution in
the BPF interpreter.
Assisted-by: Antigravity:gemini-3.5-flash
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
---
kernel/trace/trace_probe.c | 249 ++++++++++++++++++++++++++++++++++++++-
kernel/trace/trace_probe.h | 15 ++
kernel/trace/trace_probe_tmpl.h | 13 ++
3 files changed, 273 insertions(+), 4 deletions(-)
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 18c212122344..0deb53c22ae3 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -2003,11 +2003,208 @@ static char *generate_probe_arg_name(const char *arg, int idx)
return name;
}
+#ifdef CONFIG_BPF_SYSCALL
+#include <linux/filter.h>
+#include <linux/uaccess.h>
+
+static int regs_get_kernel_argument_offset(unsigned int n)
+{
+#ifdef CONFIG_X86_64
+ static const int argument_offsets[] = {
+ offsetof(struct pt_regs, di),
+ offsetof(struct pt_regs, si),
+ offsetof(struct pt_regs, dx),
+ offsetof(struct pt_regs, cx),
+ offsetof(struct pt_regs, r8),
+ offsetof(struct pt_regs, r9),
+ };
+ if (n < ARRAY_SIZE(argument_offsets))
+ return argument_offsets[n];
+#elif defined(CONFIG_ARM64)
+ if (n < 8)
+ return offsetof(struct pt_regs, regs[n]);
+#elif defined(CONFIG_S390)
+ if (n < 5)
+ return offsetof(struct pt_regs, gprs[2 + n]);
+#endif
+ return -1;
+}
+
+static bool trace_probe_can_compile_bpf(struct trace_probe *tp)
+{
+ int i;
+
+ if (tp->nr_args == 0)
+ return false;
+
+ for (i = 0; i < tp->nr_args; i++) {
+ struct probe_arg *parg = &tp->args[i];
+ struct fetch_insn *code = parg->code;
+
+ while (code->op != FETCH_OP_END) {
+ switch (code->op) {
+ case FETCH_OP_REG:
+ case FETCH_OP_IMM:
+ case FETCH_OP_DEREF:
+ case FETCH_OP_ST_RAW:
+ case FETCH_OP_ST_MEM:
+ break;
+ case FETCH_OP_ARG:
+ if (regs_get_kernel_argument_offset(code->param) < 0)
+ return false;
+ break;
+ default:
+ return false;
+ }
+ code++;
+ }
+ }
+ return true;
+}
+
+static void trace_probe_compile_bpf(struct trace_probe *tp)
+{
+ struct bpf_insn *insns;
+ int i = 0;
+ struct bpf_prog *prog;
+ int err, idx;
+
+ if (!trace_probe_can_compile_bpf(tp))
+ return;
+
+ insns = kmalloc_array(512, sizeof(struct bpf_insn), GFP_KERNEL);
+ if (!insns)
+ return;
+
+ /* Prologue: R6 = ctx */
+ insns[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+ /* R7 = ctx->rec */
+ insns[i++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_6,
+ offsetof(struct fetch_bpf_ctx, rec));
+ /* R8 = ctx->data */
+ insns[i++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_6,
+ offsetof(struct fetch_bpf_ctx, data));
+ /* R9 = total size (0) */
+ insns[i++] = BPF_MOV64_IMM(BPF_REG_9, 0);
+
+ for (idx = 0; idx < tp->nr_args; idx++) {
+ struct probe_arg *parg = &tp->args[idx];
+ struct fetch_insn *code = parg->code;
+
+ while (code->op != FETCH_OP_END && i < 500) {
+ switch (code->op) {
+ case FETCH_OP_REG:
+ /* R0 = *(unsigned long *)(R7 + code->param) */
+ insns[i++] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, code->param);
+ break;
+ case FETCH_OP_ARG: {
+ int offset = regs_get_kernel_argument_offset(code->param);
+ /* R0 = *(unsigned long *)(R7 + offset) */
+ insns[i++] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, offset);
+ break;
+ }
+ case FETCH_OP_IMM:
+ insns[i++] = BPF_LD_IMM64(BPF_REG_0, code->immediate);
+ break;
+ case FETCH_OP_DEREF:
+ /* Add offset: R3 = R0 + code->offset (src) */
+ insns[i++] = BPF_MOV64_REG(BPF_REG_2, BPF_REG_0);
+ if (code->offset)
+ insns[i++] = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
+ code->offset);
+ /* R1 = dst (R10 - 8 on stack) */
+ insns[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10);
+ insns[i++] = BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8);
+ /* R3 = size */
+ insns[i++] = BPF_MOV64_IMM(BPF_REG_3, sizeof(unsigned long));
+ /* Call copy_from_kernel_nofault(dst, src, size) */
+ insns[i++] = BPF_EMIT_CALL(copy_from_kernel_nofault);
+ /* if (R0 < 0) return R0; */
+ insns[i++] = BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1);
+ insns[i++] = BPF_EXIT_INSN();
+ /* R0 = *(unsigned long *)(R10 - 8) */
+ insns[i++] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8);
+ break;
+ case FETCH_OP_ST_RAW:
+ /* Store R0 into R8 (data) + parg->offset based on size */
+ switch (code->size) {
+ case 1:
+ insns[i++] = BPF_STX_MEM(BPF_B, BPF_REG_8, BPF_REG_0,
+ parg->offset);
+ break;
+ case 2:
+ insns[i++] = BPF_STX_MEM(BPF_H, BPF_REG_8, BPF_REG_0,
+ parg->offset);
+ break;
+ case 4:
+ insns[i++] = BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_0,
+ parg->offset);
+ break;
+ case 8:
+ insns[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_8, BPF_REG_0,
+ parg->offset);
+ break;
+ }
+ break;
+ case FETCH_OP_ST_MEM:
+ /* Add offset: R2 = R0 + code->offset (src) */
+ insns[i++] = BPF_MOV64_REG(BPF_REG_2, BPF_REG_0);
+ if (code->offset)
+ insns[i++] = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
+ code->offset);
+ /* R1 = dst (R8 + parg->offset) */
+ insns[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_8);
+ if (parg->offset)
+ insns[i++] = BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+ parg->offset);
+ /* R3 = size */
+ insns[i++] = BPF_MOV64_IMM(BPF_REG_3, code->size);
+ /* Call copy_from_kernel_nofault(dst, src, size) */
+ insns[i++] = BPF_EMIT_CALL(copy_from_kernel_nofault);
+ /* if (R0 < 0) return R0; */
+ insns[i++] = BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1);
+ insns[i++] = BPF_EXIT_INSN();
+ break;
+ default:
+ goto out;
+ }
+ code++;
+ }
+ }
+
+ if (i >= 500)
+ goto out;
+
+ /* Epilogue: return R9 (0) */
+ insns[i++] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_9);
+ insns[i++] = BPF_EXIT_INSN();
+
+ prog = bpf_prog_alloc(bpf_prog_size(i), 0);
+ if (!prog)
+ goto out;
+
+ prog->len = i;
+ memcpy(prog->insnsi, insns, prog->len * sizeof(struct bpf_insn));
+ prog->type = BPF_PROG_TYPE_KPROBE;
+
+ prog = bpf_prog_select_runtime(prog, &err);
+ if (IS_ERR(prog))
+ goto out;
+ tp->prog = prog;
+
+out:
+ kfree(insns);
+}
+#endif
+
+/* Parse an argument */
+/* The caller must pass a null-terminated argument string */
int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, const char *arg,
struct traceprobe_parse_context *ctx)
{
struct probe_arg *parg = &tp->args[i];
const char *body;
+ int ret;
ctx->tp = tp;
body = strchr(arg, '=');
@@ -2038,7 +2235,11 @@ int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, const char *arg,
}
ctx->offset = body - arg;
/* Parse fetch argument */
- return traceprobe_parse_probe_arg_body(body, &tp->size, parg, ctx);
+ ret = traceprobe_parse_probe_arg_body(body, &tp->size, parg, ctx);
+ if (ret)
+ return ret;
+
+ return 0;
}
void traceprobe_free_probe_arg(struct probe_arg *arg)
@@ -2443,6 +2644,13 @@ void trace_probe_cleanup(struct trace_probe *tp)
for (i = 0; i < tp->nr_args; i++)
traceprobe_free_probe_arg(&tp->args[i]);
+#ifdef CONFIG_BPF_SYSCALL
+ if (tp->prog) {
+ bpf_prog_put(tp->prog);
+ tp->prog = NULL;
+ }
+#endif
+
if (tp->entry_arg) {
kfree(tp->entry_arg);
tp->entry_arg = NULL;
@@ -2531,15 +2739,32 @@ int trace_probe_register_event_call(struct trace_probe *tp)
trace_probe_name(tp)))
return -EEXIST;
+#ifdef CONFIG_BPF_SYSCALL
+ trace_probe_compile_bpf(tp);
+#endif
+
ret = register_trace_event(&call->event);
- if (!ret)
- return -ENODEV;
+ if (!ret) {
+ ret = -ENODEV;
+ goto err_free_bpf;
+ }
ret = trace_add_event_call(call);
- if (ret)
+ if (ret) {
unregister_trace_event(&call->event);
+ goto err_free_bpf;
+ }
return ret;
+
+err_free_bpf:
+#ifdef CONFIG_BPF_SYSCALL
+ if (tp->prog) {
+ bpf_prog_put(tp->prog);
+ tp->prog = NULL;
+ }
+#endif
+ return ret;
}
int trace_probe_add_file(struct trace_probe *tp, struct trace_event_file *file)
@@ -2768,5 +2993,21 @@ void trace_probe_dump_args(struct seq_file *m, struct trace_probe *tp)
for (i = 0; i < tp->nr_args; i++)
trace_probe_dump_arg(m, &tp->args[i]);
+
+#ifdef CONFIG_BPF_SYSCALL
+ if (tp->prog) {
+ seq_printf(m, "# [BPF%s]:", tp->prog->jited ? "-JIT" : "");
+ for (i = 0; i < tp->prog->len; i++) {
+ struct bpf_insn *insn = &tp->prog->insnsi[i];
+
+ seq_printf(m, " %02x %02x %04x %08x", insn->code,
+ insn->dst_reg | (insn->src_reg << 4),
+ insn->off, insn->imm);
+ if (i < tp->prog->len - 1)
+ seq_putc(m, ',');
+ }
+ seq_putc(m, '\n');
+ }
+#endif
}
#endif /* CONFIG_PROBE_EVENTS_DUMP_FETCHARG */
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index e6268a8dc378..10589414451c 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -274,6 +274,9 @@ struct trace_probe {
ssize_t size; /* trace entry size */
unsigned int nr_args;
struct probe_entry_arg *entry_arg; /* This is only for return probe */
+#ifdef CONFIG_BPF_SYSCALL
+ struct bpf_prog *prog;
+#endif
struct probe_arg args[];
};
@@ -299,6 +302,7 @@ static inline void trace_probe_set_flag(struct trace_probe *tp,
smp_store_release(&tp->event->flags, tp->event->flags | flag);
}
+
static inline void trace_probe_clear_flag(struct trace_probe *tp,
unsigned int flag)
{
@@ -631,3 +635,14 @@ struct uprobe_dispatch_data {
struct trace_uprobe *tu;
unsigned long bp_addr;
};
+
+#ifdef CONFIG_BPF_SYSCALL
+#include <linux/filter.h>
+
+struct fetch_bpf_ctx {
+ void *rec;
+ void *edata;
+ void *data;
+ void *base;
+};
+#endif
diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h
index 8db12f758fda..6ca2dfe59a0f 100644
--- a/kernel/trace/trace_probe_tmpl.h
+++ b/kernel/trace/trace_probe_tmpl.h
@@ -273,6 +273,19 @@ store_trace_args(void *data, struct trace_probe *tp, void *rec, void *edata,
u32 *dl; /* Data location */
int ret, i;
+#ifdef CONFIG_BPF_SYSCALL
+ if (tp->prog) {
+ struct fetch_bpf_ctx ctx = {
+ .rec = rec,
+ .edata = edata,
+ .data = data,
+ .base = base,
+ };
+ bpf_prog_run(tp->prog, &ctx);
+ return;
+ }
+#endif
+
for (i = 0; i < tp->nr_args; i++) {
arg = tp->args + i;
dl = data + arg->offset;
next prev parent reply other threads:[~2026-07-01 13:45 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-07-01 13:45 [RFC PATCH 0/4] tracing/probes: Optimize fetcharg with BPF Masami Hiramatsu (Google)
2026-07-01 13:45 ` [RFC PATCH 1/4] tools/tracing: Add fetcharg performance micro-benchmark Masami Hiramatsu (Google)
2026-07-01 13:45 ` Masami Hiramatsu (Google) [this message]
2026-07-01 18:41 ` [RFC PATCH 2/4] tracing/probes: Compile all fetchargs into a single BPF program per event Alexei Starovoitov
2026-07-01 18:47 ` Steven Rostedt
2026-07-01 18:53 ` Alexei Starovoitov
2026-07-01 22:40 ` Masami Hiramatsu
2026-07-02 0:01 ` Alexei Starovoitov
2026-07-02 1:01 ` Masami Hiramatsu
2026-07-02 14:04 ` Steven Rostedt
2026-07-01 13:45 ` [RFC PATCH 3/4] tracing: Add disable_bpf trace option to ignore eBPF for fetchargs Masami Hiramatsu (Google)
2026-07-01 13:46 ` [RFC PATCH 4/4] selftests/ftrace: Add a test for eBPF compiled fetchargs Masami Hiramatsu (Google)
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=178291354144.1566898.14374948740441958770.stgit@devnote2 \
--to=mhiramat@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=linux-trace-kernel@vger.kernel.org \
--cc=mathieu.desnoyers@efficios.com \
--cc=rostedt@goodmis.org \
--cc=shuah@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox