All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yonghong Song <yonghong.song@linux.dev>
To: bpf@vger.kernel.org
Cc: Alexei Starovoitov <ast@kernel.org>,
	Andrii Nakryiko <andrii@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	"Jose E . Marchesi" <jose.marchesi@oracle.com>,
	kernel-team@fb.com, Martin KaFai Lau <martin.lau@kernel.org>
Subject: [PATCH bpf-next v4 15/18] bpf,x86: Implement JIT support for stack arguments
Date: Sat, 11 Apr 2026 22:00:26 -0700	[thread overview]
Message-ID: <20260412050033.267815-1-yonghong.song@linux.dev> (raw)
In-Reply-To: <20260412045826.254200-1-yonghong.song@linux.dev>

Add x86_64 JIT support for BPF functions and kfuncs with more than
5 arguments. The extra arguments are passed through a stack area
addressed by register r12 (BPF_REG_STACK_ARG_BASE) in BPF bytecode,
which the JIT translates to native code.

The JIT follows the x86-64 calling convention for both BPF-to-BPF
and kfunc calls:
  - Arg 6 is passed in the R9 register
  - Args 7+ are passed on the stack

Incoming arg 6 (BPF r12+8) is translated to a MOV from R9 rather
than a memory load. Incoming args 7+ (BPF r12+16, r12+24, ...) map
directly to [rbp + 16], [rbp + 24], ..., matching the x86-64 stack
layout after CALL + PUSH RBP, so no offset adjustment is needed.

The verifier guarantees that neither tail_call_reachable nor
priv_stack is set when outgoing stack args exist, so R9 is always
available. When BPF bytecode writes to the arg-6 stack slot
(the most negative outgoing offset), the JIT emits a MOV into R9
instead of a memory store. Outgoing args 7+ are placed at [rsp]
in a pre-allocated area below callee-saved registers, using:
  native_off = outgoing_arg_base + bpf_off

The native x86_64 stack layout:

  high address
  +-------------------------+
  | incoming stack arg N    |  [rbp + 16 + (N-2)*8]  (from caller)
  | ...                     |
  | incoming stack arg 7    |  [rbp + 16]
  +-------------------------+
  | return address          |  [rbp + 8]
  | saved rbp               |  [rbp]
  +-------------------------+
  | BPF program stack       |  (round_up(stack_depth, 8) bytes)
  +-------------------------+
  | callee-saved regs       |  (r12, rbx, r13, r14, r15 as needed)
  +-------------------------+
  | outgoing arg M          |  [rsp + (M-7)*8]
  | ...                     |
  | outgoing arg 7          |  [rsp]
  +-------------------------+  rsp
  low address

  (Arg 6 is in R9, not on the stack)

  [1] https://github.com/llvm/llvm-project/pull/189060

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
 arch/x86/net/bpf_jit_comp.c | 172 ++++++++++++++++++++++++++++++++++--
 1 file changed, 164 insertions(+), 8 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 32864dbc2c4e..ec57b9a6b417 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -390,6 +390,34 @@ static void pop_callee_regs(u8 **pprog, bool *callee_regs_used)
 	*pprog = prog;
 }
 
+/* add rsp, depth */
+static void emit_add_rsp(u8 **pprog, u16 depth)
+{
+	u8 *prog = *pprog;
+
+	if (!depth)
+		return;
+	if (is_imm8(depth))
+		EMIT4(0x48, 0x83, 0xC4, depth); /* add rsp, imm8 */
+	else
+		EMIT3_off32(0x48, 0x81, 0xC4, depth); /* add rsp, imm32 */
+	*pprog = prog;
+}
+
+/* sub rsp, depth */
+static void emit_sub_rsp(u8 **pprog, u16 depth)
+{
+	u8 *prog = *pprog;
+
+	if (!depth)
+		return;
+	if (is_imm8(depth))
+		EMIT4(0x48, 0x83, 0xEC, depth); /* sub rsp, imm8 */
+	else
+		EMIT3_off32(0x48, 0x81, 0xEC, depth); /* sub rsp, imm32 */
+	*pprog = prog;
+}
+
 static void emit_nops(u8 **pprog, int len)
 {
 	u8 *prog = *pprog;
@@ -725,8 +753,8 @@ static void emit_return(u8 **pprog, u8 *ip)
  */
 static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
 					u8 **pprog, bool *callee_regs_used,
-					u32 stack_depth, u8 *ip,
-					struct jit_context *ctx)
+					u32 stack_depth, u16 outgoing_depth,
+					u8 *ip, struct jit_context *ctx)
 {
 	int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack_depth);
 	u8 *prog = *pprog, *start = *pprog;
@@ -775,6 +803,9 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
 	/* Inc tail_call_cnt if the slot is populated. */
 	EMIT4(0x48, 0x83, 0x00, 0x01);            /* add qword ptr [rax], 1 */
 
+	/* Deallocate outgoing stack arg area. */
+	emit_add_rsp(&prog, outgoing_depth);
+
 	if (bpf_prog->aux->exception_boundary) {
 		pop_callee_regs(&prog, all_callee_regs_used);
 		pop_r12(&prog);
@@ -815,6 +846,7 @@ static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
 				      struct bpf_jit_poke_descriptor *poke,
 				      u8 **pprog, u8 *ip,
 				      bool *callee_regs_used, u32 stack_depth,
+				      u16 outgoing_depth,
 				      struct jit_context *ctx)
 {
 	int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack_depth);
@@ -842,6 +874,9 @@ static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
 	/* Inc tail_call_cnt if the slot is populated. */
 	EMIT4(0x48, 0x83, 0x00, 0x01);                /* add qword ptr [rax], 1 */
 
+	/* Deallocate outgoing stack arg area. */
+	emit_add_rsp(&prog, outgoing_depth);
+
 	if (bpf_prog->aux->exception_boundary) {
 		pop_callee_regs(&prog, all_callee_regs_used);
 		pop_r12(&prog);
@@ -1664,16 +1699,48 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
 	int i, excnt = 0;
 	int ilen, proglen = 0;
 	u8 *prog = temp;
+	u16 stack_arg_depth, incoming_stack_arg_depth, outgoing_stack_arg_depth;
+	u16 outgoing_rsp;
 	u32 stack_depth;
+	int callee_saved_size;
+	s32 outgoing_arg_base;
+	bool has_stack_args;
 	int err;
 
 	stack_depth = bpf_prog->aux->stack_depth;
+	stack_arg_depth = bpf_prog->aux->stack_arg_depth;
+	incoming_stack_arg_depth = bpf_prog->aux->incoming_stack_arg_depth;
+	outgoing_stack_arg_depth = stack_arg_depth - incoming_stack_arg_depth;
 	priv_stack_ptr = bpf_prog->aux->priv_stack_ptr;
 	if (priv_stack_ptr) {
 		priv_frame_ptr = priv_stack_ptr + PRIV_STACK_GUARD_SZ + round_up(stack_depth, 8);
 		stack_depth = 0;
 	}
 
+	/*
+	 * Follow x86-64 calling convention for both BPF-to-BPF and
+	 * kfunc calls:
+	 *   - Arg 6 is passed in R9 register
+	 *   - Args 7+ are passed on the stack at [rsp]
+	 *
+	 * Incoming arg 6 is read from R9 (BPF r12+8 → MOV from R9).
+	 * Incoming args 7+ are read from [rbp + 16], [rbp + 24], ...
+	 * (BPF r12+16, r12+24, ... map directly with no offset change).
+	 *
+	 * The verifier guarantees that neither tail_call_reachable nor
+	 * priv_stack is set when outgoing stack args exist, so R9 is
+	 * always available.
+	 *
+	 * Stack layout (high to low):
+	 *   [rbp + 16 + ...]    incoming stack args 7+ (from caller)
+	 *   [rbp + 8]           return address
+	 *   [rbp]               saved rbp
+	 *   [rbp - prog_stack]  program stack
+	 *   [below]             callee-saved regs
+	 *   [below]             outgoing args 7+ (= rsp)
+	 */
+	has_stack_args = stack_arg_depth > 0;
+
 	arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena);
 	user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena);
 
@@ -1700,6 +1767,41 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
 			push_r12(&prog);
 		push_callee_regs(&prog, callee_regs_used);
 	}
+
+	/* Compute callee-saved register area size. */
+	callee_saved_size = 0;
+	if (bpf_prog->aux->exception_boundary || arena_vm_start)
+		callee_saved_size += 8; /* r12 */
+	if (bpf_prog->aux->exception_boundary) {
+		callee_saved_size += 4 * 8; /* rbx, r13, r14, r15 */
+	} else {
+		int j;
+
+		for (j = 0; j < 4; j++)
+			if (callee_regs_used[j])
+				callee_saved_size += 8;
+	}
+	/*
+	 * Base offset from rbp for translating BPF outgoing args 7+
+	 * to native offsets:
+	 *   native_off = outgoing_arg_base + bpf_off
+	 *
+	 * BPF outgoing offsets are negative (r12 - N*8 for arg6,
+	 * ..., r12 - 8 for last arg). Arg 6 goes to R9 directly,
+	 * so only args 7+ occupy the outgoing stack area.
+	 *
+	 * Note that tail_call_reachable is guaranteed to be false when
+	 * stack args exist, so tcc pushes need not be accounted for.
+	 */
+	outgoing_arg_base = -(round_up(stack_depth, 8) + callee_saved_size);
+
+	/*
+	 * Allocate outgoing stack arg area for args 7+ only.
+	 * Arg 6 goes into r9 register, not on stack.
+	 */
+	outgoing_rsp = outgoing_stack_arg_depth > 8 ?  outgoing_stack_arg_depth - 8 : 0;
+	emit_sub_rsp(&prog, outgoing_rsp);
+
 	if (arena_vm_start)
 		emit_mov_imm64(&prog, X86_REG_R12,
 			       arena_vm_start >> 32, (u32) arena_vm_start);
@@ -1715,13 +1817,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
 	prog = temp;
 
 	for (i = 1; i <= insn_cnt; i++, insn++) {
+		bool adjust_stack_arg_off = false;
 		const s32 imm32 = insn->imm;
 		u32 dst_reg = insn->dst_reg;
 		u32 src_reg = insn->src_reg;
 		u8 b2 = 0, b3 = 0;
 		u8 *start_of_ldx;
 		s64 jmp_offset;
-		s16 insn_off;
+		s32 insn_off;
 		u8 jmp_cond;
 		u8 *func;
 		int nops;
@@ -1734,6 +1837,21 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
 				dst_reg = X86_REG_R9;
 		}
 
+		if (has_stack_args) {
+			u8 class = BPF_CLASS(insn->code);
+
+			if (class == BPF_LDX &&
+			    src_reg == BPF_REG_STACK_ARG_BASE) {
+				src_reg = BPF_REG_FP;
+				adjust_stack_arg_off = true;
+			}
+			if ((class == BPF_STX || class == BPF_ST) &&
+			    dst_reg == BPF_REG_STACK_ARG_BASE) {
+				dst_reg = BPF_REG_FP;
+				adjust_stack_arg_off = true;
+			}
+		}
+
 		switch (insn->code) {
 			/* ALU */
 		case BPF_ALU | BPF_ADD | BPF_X:
@@ -2129,12 +2247,20 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
 				EMIT1(0xC7);
 			goto st;
 		case BPF_ST | BPF_MEM | BPF_DW:
+			if (adjust_stack_arg_off && insn->off == -outgoing_stack_arg_depth) {
+				/* Arg 6: store immediate in r9 register */
+				emit_mov_imm64(&prog, X86_REG_R9, imm32 >> 31, (u32)imm32);
+				break;
+			}
 			EMIT2(add_1mod(0x48, dst_reg), 0xC7);
 
-st:			if (is_imm8(insn->off))
-				EMIT2(add_1reg(0x40, dst_reg), insn->off);
+st:			insn_off = insn->off;
+			if (adjust_stack_arg_off)
+				insn_off = outgoing_arg_base + insn_off;
+			if (is_imm8(insn_off))
+				EMIT2(add_1reg(0x40, dst_reg), insn_off);
 			else
-				EMIT1_off32(add_1reg(0x80, dst_reg), insn->off);
+				EMIT1_off32(add_1reg(0x80, dst_reg), insn_off);
 
 			EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code)));
 			break;
@@ -2144,7 +2270,15 @@ st:			if (is_imm8(insn->off))
 		case BPF_STX | BPF_MEM | BPF_H:
 		case BPF_STX | BPF_MEM | BPF_W:
 		case BPF_STX | BPF_MEM | BPF_DW:
-			emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+			if (adjust_stack_arg_off && insn->off == -outgoing_stack_arg_depth) {
+				/* Arg 6: store register value in r9 */
+				EMIT_mov(X86_REG_R9, src_reg);
+				break;
+			}
+			insn_off = insn->off;
+			if (adjust_stack_arg_off)
+				insn_off = outgoing_arg_base + insn_off;
+			emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn_off);
 			break;
 
 		case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
@@ -2243,6 +2377,18 @@ st:			if (is_imm8(insn->off))
 		case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
 		case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
 			insn_off = insn->off;
+			if (adjust_stack_arg_off) {
+				if (insn_off == 8) {
+					/* Incoming arg 6: read from r9 */
+					EMIT_mov(dst_reg, X86_REG_R9);
+					break;
+				}
+				/*
+				 * Incoming args 7+: native_off == bpf_off
+				 * (r12+16 → [rbp+16], r12+24 → [rbp+24], ...)
+				 * No offset adjustment needed.
+				 */
+			}
 
 			if (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
 			    BPF_MODE(insn->code) == BPF_PROBE_MEMSX) {
@@ -2468,12 +2614,14 @@ st:			if (is_imm8(insn->off))
 							  &prog, image + addrs[i - 1],
 							  callee_regs_used,
 							  stack_depth,
+							  outgoing_rsp,
 							  ctx);
 			else
 				emit_bpf_tail_call_indirect(bpf_prog,
 							    &prog,
 							    callee_regs_used,
 							    stack_depth,
+							    outgoing_rsp,
 							    image + addrs[i - 1],
 							    ctx);
 			break;
@@ -2734,6 +2882,8 @@ st:			if (is_imm8(insn->off))
 				if (emit_spectre_bhb_barrier(&prog, ip, bpf_prog))
 					return -EINVAL;
 			}
+			/* Deallocate outgoing args 7+ area. */
+			emit_add_rsp(&prog, outgoing_rsp);
 			if (bpf_prog->aux->exception_boundary) {
 				pop_callee_regs(&prog, all_callee_regs_used);
 				pop_r12(&prog);
@@ -3757,7 +3907,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		prog->aux->jit_data = jit_data;
 	}
 	priv_stack_ptr = prog->aux->priv_stack_ptr;
-	if (!priv_stack_ptr && prog->aux->jits_use_priv_stack) {
+	/*
+	 * x86-64 uses R9 for both private stack frame pointer and
+	 * outgoing arg 6, so disable private stack when outgoing
+	 * stack args are present.
+	 */
+	if (!priv_stack_ptr && prog->aux->jits_use_priv_stack &&
+	    prog->aux->stack_arg_depth == prog->aux->incoming_stack_arg_depth) {
 		/* Allocate actual private stack size with verifier-calculated
 		 * stack size plus two memory guards to protect overflow and
 		 * underflow.
-- 
2.52.0


  parent reply	other threads:[~2026-04-12  5:00 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-12  4:58 [PATCH bpf-next v4 00/18] bpf: Support stack arguments for BPF functions and kfuncs Yonghong Song
2026-04-12  4:58 ` [PATCH bpf-next v4 01/18] bpf: Remove unused parameter from check_map_kptr_access() Yonghong Song
2026-04-12  4:58 ` [PATCH bpf-next v4 02/18] bpf: Change from "arg #%d" to "arg#%d" in verifier log Yonghong Song
2026-04-12  4:58 ` [PATCH bpf-next v4 03/18] bpf: Refactor to avoid redundant calculation of bpf_reg_state Yonghong Song
2026-04-12  5:31   ` bot+bpf-ci
2026-04-13 14:25     ` Yonghong Song
2026-04-12  4:58 ` [PATCH bpf-next v4 04/18] bpf: Refactor to handle memory and size together Yonghong Song
2026-04-12  5:31   ` bot+bpf-ci
2026-04-13 14:27     ` Yonghong Song
2026-04-12  4:58 ` [PATCH bpf-next v4 05/18] bpf: Change some regno type from u32 to int type Yonghong Song
2026-04-12  4:58 ` [PATCH bpf-next v4 06/18] bpf: Use argument index instead of register index in kfunc verifier logs Yonghong Song
2026-04-12  5:43   ` bot+bpf-ci
2026-04-13 14:37     ` Yonghong Song
2026-04-12 22:01   ` Alexei Starovoitov
2026-04-13 14:45     ` Yonghong Song
2026-04-15 23:23     ` Amery Hung
2026-04-16 14:39       ` Yonghong Song
2026-04-12  4:59 ` [PATCH bpf-next v4 07/18] bpf: Introduce bpf register BPF_REG_STACK_ARG_BASE Yonghong Song
2026-04-12  4:59 ` [PATCH bpf-next v4 08/18] bpf: Reuse MAX_BPF_FUNC_ARGS for maximum number of arguments Yonghong Song
2026-04-12  4:59 ` [PATCH bpf-next v4 09/18] bpf: Support stack arguments for bpf functions Yonghong Song
2026-04-12  5:43   ` bot+bpf-ci
2026-04-13 15:22     ` Yonghong Song
2026-04-12 22:23   ` Alexei Starovoitov
2026-04-13 16:33     ` Yonghong Song
2026-04-12  5:00 ` [PATCH bpf-next v4 10/18] bpf: Fix interaction between stack argument PTR_TO_STACK and dead slot poisoning Yonghong Song
2026-04-12  5:43   ` bot+bpf-ci
2026-04-13 16:36     ` Yonghong Song
2026-04-15 22:32   ` Amery Hung
2026-04-16 14:21     ` Yonghong Song
2026-04-12  5:00 ` [PATCH bpf-next v4 11/18] bpf: Reject stack arguments in non-JITed programs Yonghong Song
2026-04-12  5:00 ` [PATCH bpf-next v4 12/18] bpf: Reject stack arguments if tail call reachable Yonghong Song
2026-04-12  5:43   ` bot+bpf-ci
2026-04-13 16:37     ` Yonghong Song
2026-04-12  5:00 ` [PATCH bpf-next v4 13/18] bpf: Support stack arguments for kfunc calls Yonghong Song
2026-04-12  5:43   ` bot+bpf-ci
2026-04-13 16:43     ` Yonghong Song
2026-04-12  5:00 ` [PATCH bpf-next v4 14/18] bpf: Enable stack argument support for x86_64 Yonghong Song
2026-04-12  5:00 ` Yonghong Song [this message]
2026-04-12  5:43   ` [PATCH bpf-next v4 15/18] bpf,x86: Implement JIT support for stack arguments bot+bpf-ci
2026-04-13 16:49     ` Yonghong Song
2026-04-12 22:36   ` Alexei Starovoitov
2026-04-13 17:26     ` Yonghong Song
2026-04-13 19:59       ` Alexei Starovoitov
2026-04-13 20:32         ` Yonghong Song
2026-04-13 20:38           ` Alexei Starovoitov
2026-04-13 21:10             ` Yonghong Song
2026-04-14 16:45       ` Yonghong Song
2026-04-14 17:51         ` Alexei Starovoitov
2026-04-12  5:00 ` [PATCH bpf-next v4 16/18] selftests/bpf: Add tests for BPF function " Yonghong Song
2026-04-12  5:00 ` [PATCH bpf-next v4 17/18] selftests/bpf: Add negative test for greater-than-8-byte kfunc stack argument Yonghong Song
2026-04-12  5:00 ` [PATCH bpf-next v4 18/18] selftests/bpf: Add verifier tests for stack argument validation Yonghong Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260412050033.267815-1-yonghong.song@linux.dev \
    --to=yonghong.song@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=jose.marchesi@oracle.com \
    --cc=kernel-team@fb.com \
    --cc=martin.lau@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.