From: Yonghong Song <yonghong.song@linux.dev>
To: bpf@vger.kernel.org
Cc: Alexei Starovoitov <ast@kernel.org>,
Andrii Nakryiko <andrii@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
kernel-team@fb.com, Martin KaFai Lau <martin.lau@kernel.org>,
Tejun Heo <tj@kernel.org>
Subject: [PATCH bpf-next v4 05/10] bpf, x86: Add jit support for private stack
Date: Thu, 10 Oct 2024 10:56:18 -0700 [thread overview]
Message-ID: <20241010175618.1897998-1-yonghong.song@linux.dev> (raw)
In-Reply-To: <20241010175552.1895980-1-yonghong.song@linux.dev>
Add jit support for private stack. For a particular subtree, e.g.,
subtree_root <== stack depth 120
subprog1 <== stack depth 80
subprog2 <== stack depth 40
subprog3 <== stack depth 160
Let us say that priv_stack_ptr is the memory address allocated for
private stack. The frame pointer for each above is calculated like below:
subtree_root <== subtree_root_fp = private_stack_ptr + 120
subprog1 <== subtree_subprog1_fp = subtree_root_fp + 80
subprog2 <== subtree_subprog2_fp = subtree_subprog1_fp + 40
subprog3 <== subtree_subprog1_fp = subtree_root_fp + 160
For any function call to helper/kfunc, push/pop prog frame pointer
is needed in order to preserve frame pointer value.
To deal with exception handling, push/pop frame pointer is also used
surrounding call to subsequent subprog. For example,
subtree_root
subprog1
...
insn: call bpf_throw
...
After jit, we will have
subtree_root
insn: push r9
subprog1
...
insn: push r9
insn: call bpf_throw
insn: pop r9
...
insn: pop r9
exception_handler
pop r9
...
where r9 represents the fp for each subprog.
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
arch/x86/net/bpf_jit_comp.c | 88 ++++++++++++++++++++++++++++++++++++-
1 file changed, 86 insertions(+), 2 deletions(-)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index f01fdabf786e..a6ba85cec49a 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -325,6 +325,22 @@ struct jit_context {
/* Number of bytes that will be skipped on tailcall */
#define X86_TAIL_CALL_OFFSET (12 + ENDBR_INSN_SIZE)
+static void push_r9(u8 **pprog)
+{
+ u8 *prog = *pprog;
+
+ EMIT2(0x41, 0x51); /* push r9 */
+ *pprog = prog;
+}
+
+static void pop_r9(u8 **pprog)
+{
+ u8 *prog = *pprog;
+
+ EMIT2(0x41, 0x59); /* pop r9 */
+ *pprog = prog;
+}
+
static void push_r12(u8 **pprog)
{
u8 *prog = *pprog;
@@ -484,13 +500,17 @@ static void emit_prologue_tail_call(u8 **pprog, bool is_subprog)
*pprog = prog;
}
+static void emit_priv_frame_ptr(u8 **pprog, struct bpf_prog *bpf_prog,
+ enum bpf_priv_stack_mode priv_stack_mode);
+
/*
* Emit x86-64 prologue code for BPF program.
* bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes
* while jumping to another program
*/
static void emit_prologue(u8 **pprog, u32 stack_depth, struct bpf_prog *bpf_prog,
- bool tail_call_reachable)
+ bool tail_call_reachable,
+ enum bpf_priv_stack_mode priv_stack_mode)
{
bool ebpf_from_cbpf = bpf_prog_was_classic(bpf_prog);
bool is_exception_cb = bpf_prog->aux->exception_cb;
@@ -520,6 +540,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, struct bpf_prog *bpf_prog
* first restore those callee-saved regs from stack, before
* reusing the stack frame.
*/
+ if (priv_stack_mode != NO_PRIV_STACK)
+ pop_r9(&prog);
pop_callee_regs(&prog, all_callee_regs_used);
pop_r12(&prog);
/* Reset the stack frame. */
@@ -532,6 +554,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, struct bpf_prog *bpf_prog
/* X86_TAIL_CALL_OFFSET is here */
EMIT_ENDBR();
+ emit_priv_frame_ptr(&prog, bpf_prog, priv_stack_mode);
+
/* sub rsp, rounded_stack_depth */
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
@@ -1451,6 +1475,42 @@ static void emit_alu_helper_1(u8 **pprog, u8 insn_code, u32 dst_reg, s32 imm32)
*pprog = prog;
}
+static void emit_root_priv_frame_ptr(u8 **pprog, struct bpf_prog *bpf_prog,
+ u32 orig_stack_depth)
+{
+ void __percpu *priv_frame_ptr;
+ u8 *prog = *pprog;
+
+ priv_frame_ptr = bpf_prog->aux->priv_stack_ptr + orig_stack_depth;
+
+ /* movabs r9, priv_frame_ptr */
+ emit_mov_imm64(&prog, X86_REG_R9, (long) priv_frame_ptr >> 32,
+ (u32) (long) priv_frame_ptr);
+#ifdef CONFIG_SMP
+ /* add <r9>, gs:[<off>] */
+ EMIT2(0x65, 0x4c);
+ EMIT3(0x03, 0x0c, 0x25);
+ EMIT((u32)(unsigned long)&this_cpu_off, 4);
+#endif
+ *pprog = prog;
+}
+
+static void emit_priv_frame_ptr(u8 **pprog, struct bpf_prog *bpf_prog,
+ enum bpf_priv_stack_mode priv_stack_mode)
+{
+ u32 orig_stack_depth = round_up(bpf_prog->aux->stack_depth, 8);
+ u8 *prog = *pprog;
+
+ if (priv_stack_mode == PRIV_STACK_ROOT_PROG)
+ emit_root_priv_frame_ptr(&prog, bpf_prog, orig_stack_depth);
+ else if (priv_stack_mode == PRIV_STACK_SUB_PROG && orig_stack_depth)
+ /* r9 += orig_stack_depth */
+ emit_alu_helper_1(&prog, BPF_ALU64 | BPF_ADD | BPF_K, X86_REG_R9,
+ orig_stack_depth);
+
+ *pprog = prog;
+}
+
#define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
#define __LOAD_TCC_PTR(off) \
@@ -1464,6 +1524,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
{
bool tail_call_reachable = bpf_prog->aux->tail_call_reachable;
struct bpf_insn *insn = bpf_prog->insnsi;
+ enum bpf_priv_stack_mode priv_stack_mode;
bool callee_regs_used[4] = {};
int insn_cnt = bpf_prog->len;
bool seen_exit = false;
@@ -1476,13 +1537,17 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
int err;
stack_depth = bpf_prog->aux->stack_depth;
+ priv_stack_mode = bpf_prog->aux->priv_stack_mode;
+ if (priv_stack_mode != NO_PRIV_STACK)
+ stack_depth = 0;
arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena);
user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena);
detect_reg_usage(insn, insn_cnt, callee_regs_used);
- emit_prologue(&prog, stack_depth, bpf_prog, tail_call_reachable);
+ emit_prologue(&prog, stack_depth, bpf_prog, tail_call_reachable,
+ priv_stack_mode);
/* Exception callback will clobber callee regs for its own use, and
* restore the original callee regs from main prog's stack frame.
*/
@@ -1521,6 +1586,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
u8 *func;
int nops;
+ if (priv_stack_mode != NO_PRIV_STACK) {
+ if (src_reg == BPF_REG_FP)
+ src_reg = X86_REG_R9;
+
+ if (dst_reg == BPF_REG_FP)
+ dst_reg = X86_REG_R9;
+ }
+
switch (insn->code) {
/* ALU */
case BPF_ALU | BPF_ADD | BPF_X:
@@ -2146,9 +2219,15 @@ st: if (is_imm8(insn->off))
}
if (!imm32)
return -EINVAL;
+ if (priv_stack_mode != NO_PRIV_STACK) {
+ push_r9(&prog);
+ ip += 2;
+ }
ip += x86_call_depth_emit_accounting(&prog, func, ip);
if (emit_call(&prog, func, ip))
return -EINVAL;
+ if (priv_stack_mode != NO_PRIV_STACK)
+ pop_r9(&prog);
break;
}
@@ -3572,6 +3651,11 @@ bool bpf_jit_supports_exceptions(void)
return IS_ENABLED(CONFIG_UNWINDER_ORC);
}
+bool bpf_jit_supports_private_stack(void)
+{
+ return true;
+}
+
void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie)
{
#if defined(CONFIG_UNWINDER_ORC)
--
2.43.5
next prev parent reply other threads:[~2024-10-10 17:56 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-10 17:55 [PATCH bpf-next v4 00/10] bpf: Support private stack for bpf progs Yonghong Song
2024-10-10 17:55 ` [PATCH bpf-next v4 01/10] bpf: Allow each subprog having stack size of 512 bytes Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 02/10] bpf: Mark each subprog with proper private stack modes Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 03/10] bpf, x86: Refactor func emit_prologue Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 04/10] bpf, x86: Create a helper for certain "reg <op>= imm" operations Yonghong Song
2024-10-10 17:56 ` Yonghong Song [this message]
2024-10-10 17:56 ` [PATCH bpf-next v4 06/10] selftests/bpf: Add private stack tests Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 07/10] bpf: Support calling non-tailcall bpf prog Yonghong Song
2024-10-10 20:28 ` Alexei Starovoitov
2024-10-11 4:12 ` Yonghong Song
2024-10-15 21:18 ` Tejun Heo
2024-10-15 21:35 ` Alexei Starovoitov
2024-10-10 17:56 ` [PATCH bpf-next v4 08/10] bpf, x86: Create two helpers for some arith operations Yonghong Song
2024-10-10 20:21 ` Alexei Starovoitov
2024-10-11 4:16 ` Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 09/10] bpf, x86: Jit support for nested bpf_prog_call Yonghong Song
2024-10-10 20:53 ` Alexei Starovoitov
2024-10-11 4:20 ` Yonghong Song
2024-10-11 4:29 ` Alexei Starovoitov
2024-10-11 15:38 ` Yonghong Song
2024-10-11 15:40 ` Alexei Starovoitov
2024-10-11 16:14 ` Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 10/10] selftests/bpf: Add tests for bpf_prog_call() Yonghong Song
2024-10-15 21:28 ` [PATCH bpf-next v4 00/10] bpf: Support private stack for bpf progs Tejun Heo
2024-10-15 21:39 ` Alexei Starovoitov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241010175618.1897998-1-yonghong.song@linux.dev \
--to=yonghong.song@linux.dev \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=kernel-team@fb.com \
--cc=martin.lau@kernel.org \
--cc=tj@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox