From: Yonghong Song <yonghong.song@linux.dev>
To: bpf@vger.kernel.org
Cc: Alexei Starovoitov <ast@kernel.org>,
Andrii Nakryiko <andrii@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
"Jose E . Marchesi" <jose.marchesi@oracle.com>,
kernel-team@fb.com, Martin KaFai Lau <martin.lau@kernel.org>
Subject: [PATCH bpf-next v4 15/18] bpf,x86: Implement JIT support for stack arguments
Date: Sat, 11 Apr 2026 22:00:26 -0700 [thread overview]
Message-ID: <20260412050033.267815-1-yonghong.song@linux.dev> (raw)
In-Reply-To: <20260412045826.254200-1-yonghong.song@linux.dev>
Add x86_64 JIT support for BPF functions and kfuncs with more than
5 arguments. The extra arguments are passed through a stack area
addressed by register r12 (BPF_REG_STACK_ARG_BASE) in BPF bytecode,
which the JIT translates to native code.
The JIT follows the x86-64 calling convention for both BPF-to-BPF
and kfunc calls:
- Arg 6 is passed in the R9 register
- Args 7+ are passed on the stack
Incoming arg 6 (BPF r12+8) is translated to a MOV from R9 rather
than a memory load. Incoming args 7+ (BPF r12+16, r12+24, ...) map
directly to [rbp + 16], [rbp + 24], ..., matching the x86-64 stack
layout after CALL + PUSH RBP, so no offset adjustment is needed.
The verifier guarantees that neither tail_call_reachable nor
priv_stack is set when outgoing stack args exist, so R9 is always
available. When BPF bytecode writes to the arg-6 stack slot
(the most negative outgoing offset), the JIT emits a MOV into R9
instead of a memory store. Outgoing args 7+ are placed at [rsp]
in a pre-allocated area below callee-saved registers, using:
native_off = outgoing_arg_base + bpf_off
The native x86_64 stack layout:
high address
+-------------------------+
| incoming stack arg N | [rbp + 16 + (N-2)*8] (from caller)
| ... |
| incoming stack arg 7 | [rbp + 16]
+-------------------------+
| return address | [rbp + 8]
| saved rbp | [rbp]
+-------------------------+
| BPF program stack | (round_up(stack_depth, 8) bytes)
+-------------------------+
| callee-saved regs | (r12, rbx, r13, r14, r15 as needed)
+-------------------------+
| outgoing arg M | [rsp + (M-7)*8]
| ... |
| outgoing arg 7 | [rsp]
+-------------------------+ rsp
low address
(Arg 6 is in R9, not on the stack)
[1] https://github.com/llvm/llvm-project/pull/189060
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
arch/x86/net/bpf_jit_comp.c | 172 ++++++++++++++++++++++++++++++++++--
1 file changed, 164 insertions(+), 8 deletions(-)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 32864dbc2c4e..ec57b9a6b417 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -390,6 +390,34 @@ static void pop_callee_regs(u8 **pprog, bool *callee_regs_used)
*pprog = prog;
}
+/* add rsp, depth */
+static void emit_add_rsp(u8 **pprog, u16 depth)
+{
+ u8 *prog = *pprog;
+
+ if (!depth)
+ return;
+ if (is_imm8(depth))
+ EMIT4(0x48, 0x83, 0xC4, depth); /* add rsp, imm8 */
+ else
+ EMIT3_off32(0x48, 0x81, 0xC4, depth); /* add rsp, imm32 */
+ *pprog = prog;
+}
+
+/* sub rsp, depth */
+static void emit_sub_rsp(u8 **pprog, u16 depth)
+{
+ u8 *prog = *pprog;
+
+ if (!depth)
+ return;
+ if (is_imm8(depth))
+ EMIT4(0x48, 0x83, 0xEC, depth); /* sub rsp, imm8 */
+ else
+ EMIT3_off32(0x48, 0x81, 0xEC, depth); /* sub rsp, imm32 */
+ *pprog = prog;
+}
+
static void emit_nops(u8 **pprog, int len)
{
u8 *prog = *pprog;
@@ -725,8 +753,8 @@ static void emit_return(u8 **pprog, u8 *ip)
*/
static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
u8 **pprog, bool *callee_regs_used,
- u32 stack_depth, u8 *ip,
- struct jit_context *ctx)
+ u32 stack_depth, u16 outgoing_depth,
+ u8 *ip, struct jit_context *ctx)
{
int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack_depth);
u8 *prog = *pprog, *start = *pprog;
@@ -775,6 +803,9 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
/* Inc tail_call_cnt if the slot is populated. */
EMIT4(0x48, 0x83, 0x00, 0x01); /* add qword ptr [rax], 1 */
+ /* Deallocate outgoing stack arg area. */
+ emit_add_rsp(&prog, outgoing_depth);
+
if (bpf_prog->aux->exception_boundary) {
pop_callee_regs(&prog, all_callee_regs_used);
pop_r12(&prog);
@@ -815,6 +846,7 @@ static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
struct bpf_jit_poke_descriptor *poke,
u8 **pprog, u8 *ip,
bool *callee_regs_used, u32 stack_depth,
+ u16 outgoing_depth,
struct jit_context *ctx)
{
int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack_depth);
@@ -842,6 +874,9 @@ static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
/* Inc tail_call_cnt if the slot is populated. */
EMIT4(0x48, 0x83, 0x00, 0x01); /* add qword ptr [rax], 1 */
+ /* Deallocate outgoing stack arg area. */
+ emit_add_rsp(&prog, outgoing_depth);
+
if (bpf_prog->aux->exception_boundary) {
pop_callee_regs(&prog, all_callee_regs_used);
pop_r12(&prog);
@@ -1664,16 +1699,48 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
int i, excnt = 0;
int ilen, proglen = 0;
u8 *prog = temp;
+ u16 stack_arg_depth, incoming_stack_arg_depth, outgoing_stack_arg_depth;
+ u16 outgoing_rsp;
u32 stack_depth;
+ int callee_saved_size;
+ s32 outgoing_arg_base;
+ bool has_stack_args;
int err;
stack_depth = bpf_prog->aux->stack_depth;
+ stack_arg_depth = bpf_prog->aux->stack_arg_depth;
+ incoming_stack_arg_depth = bpf_prog->aux->incoming_stack_arg_depth;
+ outgoing_stack_arg_depth = stack_arg_depth - incoming_stack_arg_depth;
priv_stack_ptr = bpf_prog->aux->priv_stack_ptr;
if (priv_stack_ptr) {
priv_frame_ptr = priv_stack_ptr + PRIV_STACK_GUARD_SZ + round_up(stack_depth, 8);
stack_depth = 0;
}
+ /*
+ * Follow x86-64 calling convention for both BPF-to-BPF and
+ * kfunc calls:
+ * - Arg 6 is passed in R9 register
+ * - Args 7+ are passed on the stack at [rsp]
+ *
+ * Incoming arg 6 is read from R9 (BPF r12+8 → MOV from R9).
+ * Incoming args 7+ are read from [rbp + 16], [rbp + 24], ...
+ * (BPF r12+16, r12+24, ... map directly with no offset change).
+ *
+ * The verifier guarantees that neither tail_call_reachable nor
+ * priv_stack is set when outgoing stack args exist, so R9 is
+ * always available.
+ *
+ * Stack layout (high to low):
+ * [rbp + 16 + ...] incoming stack args 7+ (from caller)
+ * [rbp + 8] return address
+ * [rbp] saved rbp
+ * [rbp - prog_stack] program stack
+ * [below] callee-saved regs
+ * [below] outgoing args 7+ (= rsp)
+ */
+ has_stack_args = stack_arg_depth > 0;
+
arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena);
user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena);
@@ -1700,6 +1767,41 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
push_r12(&prog);
push_callee_regs(&prog, callee_regs_used);
}
+
+ /* Compute callee-saved register area size. */
+ callee_saved_size = 0;
+ if (bpf_prog->aux->exception_boundary || arena_vm_start)
+ callee_saved_size += 8; /* r12 */
+ if (bpf_prog->aux->exception_boundary) {
+ callee_saved_size += 4 * 8; /* rbx, r13, r14, r15 */
+ } else {
+ int j;
+
+ for (j = 0; j < 4; j++)
+ if (callee_regs_used[j])
+ callee_saved_size += 8;
+ }
+ /*
+ * Base offset from rbp for translating BPF outgoing args 7+
+ * to native offsets:
+ * native_off = outgoing_arg_base + bpf_off
+ *
+ * BPF outgoing offsets are negative (r12 - N*8 for arg6,
+ * ..., r12 - 8 for last arg). Arg 6 goes to R9 directly,
+ * so only args 7+ occupy the outgoing stack area.
+ *
+ * Note that tail_call_reachable is guaranteed to be false when
+ * stack args exist, so tcc pushes need not be accounted for.
+ */
+ outgoing_arg_base = -(round_up(stack_depth, 8) + callee_saved_size);
+
+ /*
+ * Allocate outgoing stack arg area for args 7+ only.
+ * Arg 6 goes into r9 register, not on stack.
+ */
+ outgoing_rsp = outgoing_stack_arg_depth > 8 ? outgoing_stack_arg_depth - 8 : 0;
+ emit_sub_rsp(&prog, outgoing_rsp);
+
if (arena_vm_start)
emit_mov_imm64(&prog, X86_REG_R12,
arena_vm_start >> 32, (u32) arena_vm_start);
@@ -1715,13 +1817,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
prog = temp;
for (i = 1; i <= insn_cnt; i++, insn++) {
+ bool adjust_stack_arg_off = false;
const s32 imm32 = insn->imm;
u32 dst_reg = insn->dst_reg;
u32 src_reg = insn->src_reg;
u8 b2 = 0, b3 = 0;
u8 *start_of_ldx;
s64 jmp_offset;
- s16 insn_off;
+ s32 insn_off;
u8 jmp_cond;
u8 *func;
int nops;
@@ -1734,6 +1837,21 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
dst_reg = X86_REG_R9;
}
+ if (has_stack_args) {
+ u8 class = BPF_CLASS(insn->code);
+
+ if (class == BPF_LDX &&
+ src_reg == BPF_REG_STACK_ARG_BASE) {
+ src_reg = BPF_REG_FP;
+ adjust_stack_arg_off = true;
+ }
+ if ((class == BPF_STX || class == BPF_ST) &&
+ dst_reg == BPF_REG_STACK_ARG_BASE) {
+ dst_reg = BPF_REG_FP;
+ adjust_stack_arg_off = true;
+ }
+ }
+
switch (insn->code) {
/* ALU */
case BPF_ALU | BPF_ADD | BPF_X:
@@ -2129,12 +2247,20 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
EMIT1(0xC7);
goto st;
case BPF_ST | BPF_MEM | BPF_DW:
+ if (adjust_stack_arg_off && insn->off == -outgoing_stack_arg_depth) {
+ /* Arg 6: store immediate in r9 register */
+ emit_mov_imm64(&prog, X86_REG_R9, imm32 >> 31, (u32)imm32);
+ break;
+ }
EMIT2(add_1mod(0x48, dst_reg), 0xC7);
-st: if (is_imm8(insn->off))
- EMIT2(add_1reg(0x40, dst_reg), insn->off);
+st: insn_off = insn->off;
+ if (adjust_stack_arg_off)
+ insn_off = outgoing_arg_base + insn_off;
+ if (is_imm8(insn_off))
+ EMIT2(add_1reg(0x40, dst_reg), insn_off);
else
- EMIT1_off32(add_1reg(0x80, dst_reg), insn->off);
+ EMIT1_off32(add_1reg(0x80, dst_reg), insn_off);
EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code)));
break;
@@ -2144,7 +2270,15 @@ st: if (is_imm8(insn->off))
case BPF_STX | BPF_MEM | BPF_H:
case BPF_STX | BPF_MEM | BPF_W:
case BPF_STX | BPF_MEM | BPF_DW:
- emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+ if (adjust_stack_arg_off && insn->off == -outgoing_stack_arg_depth) {
+ /* Arg 6: store register value in r9 */
+ EMIT_mov(X86_REG_R9, src_reg);
+ break;
+ }
+ insn_off = insn->off;
+ if (adjust_stack_arg_off)
+ insn_off = outgoing_arg_base + insn_off;
+ emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn_off);
break;
case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
@@ -2243,6 +2377,18 @@ st: if (is_imm8(insn->off))
case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
insn_off = insn->off;
+ if (adjust_stack_arg_off) {
+ if (insn_off == 8) {
+ /* Incoming arg 6: read from r9 */
+ EMIT_mov(dst_reg, X86_REG_R9);
+ break;
+ }
+ /*
+ * Incoming args 7+: native_off == bpf_off
+ * (r12+16 → [rbp+16], r12+24 → [rbp+24], ...)
+ * No offset adjustment needed.
+ */
+ }
if (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX) {
@@ -2468,12 +2614,14 @@ st: if (is_imm8(insn->off))
&prog, image + addrs[i - 1],
callee_regs_used,
stack_depth,
+ outgoing_rsp,
ctx);
else
emit_bpf_tail_call_indirect(bpf_prog,
&prog,
callee_regs_used,
stack_depth,
+ outgoing_rsp,
image + addrs[i - 1],
ctx);
break;
@@ -2734,6 +2882,8 @@ st: if (is_imm8(insn->off))
if (emit_spectre_bhb_barrier(&prog, ip, bpf_prog))
return -EINVAL;
}
+ /* Deallocate outgoing args 7+ area. */
+ emit_add_rsp(&prog, outgoing_rsp);
if (bpf_prog->aux->exception_boundary) {
pop_callee_regs(&prog, all_callee_regs_used);
pop_r12(&prog);
@@ -3757,7 +3907,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
prog->aux->jit_data = jit_data;
}
priv_stack_ptr = prog->aux->priv_stack_ptr;
- if (!priv_stack_ptr && prog->aux->jits_use_priv_stack) {
+ /*
+ * x86-64 uses R9 for both private stack frame pointer and
+ * outgoing arg 6, so disable private stack when outgoing
+ * stack args are present.
+ */
+ if (!priv_stack_ptr && prog->aux->jits_use_priv_stack &&
+ prog->aux->stack_arg_depth == prog->aux->incoming_stack_arg_depth) {
/* Allocate actual private stack size with verifier-calculated
* stack size plus two memory guards to protect overflow and
* underflow.
--
2.52.0
next prev parent reply other threads:[~2026-04-12 5:00 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-12 4:58 [PATCH bpf-next v4 00/18] bpf: Support stack arguments for BPF functions and kfuncs Yonghong Song
2026-04-12 4:58 ` [PATCH bpf-next v4 01/18] bpf: Remove unused parameter from check_map_kptr_access() Yonghong Song
2026-04-12 4:58 ` [PATCH bpf-next v4 02/18] bpf: Change from "arg #%d" to "arg#%d" in verifier log Yonghong Song
2026-04-12 4:58 ` [PATCH bpf-next v4 03/18] bpf: Refactor to avoid redundant calculation of bpf_reg_state Yonghong Song
2026-04-12 5:31 ` bot+bpf-ci
2026-04-12 4:58 ` [PATCH bpf-next v4 04/18] bpf: Refactor to handle memory and size together Yonghong Song
2026-04-12 5:31 ` bot+bpf-ci
2026-04-12 4:58 ` [PATCH bpf-next v4 05/18] bpf: Change some regno type from u32 to int type Yonghong Song
2026-04-12 4:58 ` [PATCH bpf-next v4 06/18] bpf: Use argument index instead of register index in kfunc verifier logs Yonghong Song
2026-04-12 5:43 ` bot+bpf-ci
2026-04-12 4:59 ` [PATCH bpf-next v4 07/18] bpf: Introduce bpf register BPF_REG_STACK_ARG_BASE Yonghong Song
2026-04-12 4:59 ` [PATCH bpf-next v4 08/18] bpf: Reuse MAX_BPF_FUNC_ARGS for maximum number of arguments Yonghong Song
2026-04-12 4:59 ` [PATCH bpf-next v4 09/18] bpf: Support stack arguments for bpf functions Yonghong Song
2026-04-12 5:43 ` bot+bpf-ci
2026-04-12 5:00 ` [PATCH bpf-next v4 10/18] bpf: Fix interaction between stack argument PTR_TO_STACK and dead slot poisoning Yonghong Song
2026-04-12 5:43 ` bot+bpf-ci
2026-04-12 5:00 ` [PATCH bpf-next v4 11/18] bpf: Reject stack arguments in non-JITed programs Yonghong Song
2026-04-12 5:00 ` [PATCH bpf-next v4 12/18] bpf: Reject stack arguments if tail call reachable Yonghong Song
2026-04-12 5:43 ` bot+bpf-ci
2026-04-12 5:00 ` [PATCH bpf-next v4 13/18] bpf: Support stack arguments for kfunc calls Yonghong Song
2026-04-12 5:43 ` bot+bpf-ci
2026-04-12 5:00 ` [PATCH bpf-next v4 14/18] bpf: Enable stack argument support for x86_64 Yonghong Song
2026-04-12 5:00 ` Yonghong Song [this message]
2026-04-12 5:43 ` [PATCH bpf-next v4 15/18] bpf,x86: Implement JIT support for stack arguments bot+bpf-ci
2026-04-12 5:00 ` [PATCH bpf-next v4 16/18] selftests/bpf: Add tests for BPF function " Yonghong Song
2026-04-12 5:00 ` [PATCH bpf-next v4 17/18] selftests/bpf: Add negative test for greater-than-8-byte kfunc stack argument Yonghong Song
2026-04-12 5:00 ` [PATCH bpf-next v4 18/18] selftests/bpf: Add verifier tests for stack argument validation Yonghong Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260412050033.267815-1-yonghong.song@linux.dev \
--to=yonghong.song@linux.dev \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=jose.marchesi@oracle.com \
--cc=kernel-team@fb.com \
--cc=martin.lau@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox