BPF List
 help / color / mirror / Atom feed
From: Yonghong Song <yonghong.song@linux.dev>
To: bpf@vger.kernel.org
Cc: Alexei Starovoitov <ast@kernel.org>,
	Andrii Nakryiko <andrii@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	kernel-team@fb.com, Martin KaFai Lau <martin.lau@kernel.org>,
	Tejun Heo <tj@kernel.org>
Subject: [PATCH bpf-next v4 09/10] bpf, x86: Jit support for nested bpf_prog_call
Date: Thu, 10 Oct 2024 10:56:38 -0700	[thread overview]
Message-ID: <20241010175638.1899406-1-yonghong.song@linux.dev> (raw)
In-Reply-To: <20241010175552.1895980-1-yonghong.song@linux.dev>

Two functions are added in the kernel
  - int notrace __bpf_prog_enter_recur_limited(struct bpf_prog *prog)
  - void notrace __bpf_prog_exit_recur_limited(struct bpf_prog *prog)
and they are called in bpf progs through jit.

Func __bpf_prog_enter_recur_limited() will return 0 if maximum recursion
level has been reached in which case, bpf prog will return to the caller
directly. Otherwise, it will return the current recursion level. The
recursion level will be used by jit to calculated proper frame pointer
for that recursion level.

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
 arch/x86/net/bpf_jit_comp.c | 94 +++++++++++++++++++++++++++++++++----
 include/linux/bpf.h         |  2 +
 kernel/bpf/trampoline.c     | 16 +++++++
 3 files changed, 104 insertions(+), 8 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 297dd64f4b6a..a763e018e87f 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -501,7 +501,8 @@ static void emit_prologue_tail_call(u8 **pprog, bool is_subprog)
 }
 
 static void emit_priv_frame_ptr(u8 **pprog, struct bpf_prog *bpf_prog,
-				enum bpf_priv_stack_mode priv_stack_mode);
+				enum bpf_priv_stack_mode priv_stack_mode,
+				bool is_subprog, u8 *image, u8 *temp);
 
 /*
  * Emit x86-64 prologue code for BPF program.
@@ -510,7 +511,8 @@ static void emit_priv_frame_ptr(u8 **pprog, struct bpf_prog *bpf_prog,
  */
 static void emit_prologue(u8 **pprog, u32 stack_depth, struct bpf_prog *bpf_prog,
 			  bool tail_call_reachable,
-			  enum bpf_priv_stack_mode priv_stack_mode)
+			  enum bpf_priv_stack_mode priv_stack_mode, u8 *image,
+			  u8 *temp)
 {
 	bool ebpf_from_cbpf = bpf_prog_was_classic(bpf_prog);
 	bool is_exception_cb = bpf_prog->aux->exception_cb;
@@ -554,7 +556,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, struct bpf_prog *bpf_prog
 	/* X86_TAIL_CALL_OFFSET is here */
 	EMIT_ENDBR();
 
-	emit_priv_frame_ptr(&prog, bpf_prog, priv_stack_mode);
+	emit_priv_frame_ptr(&prog, bpf_prog, priv_stack_mode, is_subprog, image, temp);
 
 	/* sub rsp, rounded_stack_depth */
 	if (stack_depth)
@@ -696,6 +698,15 @@ static void emit_return(u8 **pprog, u8 *ip)
 	*pprog = prog;
 }
 
+static int num_bytes_of_emit_return(void)
+{
+	if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+		return 5;
+	if (IS_ENABLED(CONFIG_MITIGATION_SLS))
+		return 2;
+	return 1;
+}
+
 #define BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack)	(-16 - round_up(stack, 8))
 
 /*
@@ -1527,17 +1538,67 @@ static void emit_root_priv_frame_ptr(u8 **pprog, struct bpf_prog *bpf_prog,
 }
 
 static void emit_priv_frame_ptr(u8 **pprog, struct bpf_prog *bpf_prog,
-				enum bpf_priv_stack_mode priv_stack_mode)
+				enum bpf_priv_stack_mode priv_stack_mode,
+				bool is_subprog, u8 *image, u8 *temp)
 {
 	u32 orig_stack_depth = round_up(bpf_prog->aux->stack_depth, 8);
 	u8 *prog = *pprog;
 
-	if (priv_stack_mode == PRIV_STACK_ROOT_PROG)
-		emit_root_priv_frame_ptr(&prog, bpf_prog, orig_stack_depth);
-	else if (priv_stack_mode == PRIV_STACK_SUB_PROG && orig_stack_depth)
+	if (priv_stack_mode == PRIV_STACK_ROOT_PROG) {
+		int offs;
+		u8 *func;
+
+		if (!bpf_prog->aux->has_prog_call) {
+			emit_root_priv_frame_ptr(&prog, bpf_prog, orig_stack_depth);
+		} else {
+			EMIT1(0x57);		/* push rdi */
+			if (is_subprog) {
+				/* subprog may have up to 5 arguments */
+				EMIT1(0x56);		/* push rsi */
+				EMIT1(0x52);		/* push rdx */
+				EMIT1(0x51);		/* push rcx */
+				EMIT2(0x41, 0x50);	/* push r8 */
+			}
+			emit_mov_imm64(&prog, BPF_REG_1, (long) bpf_prog >> 32,
+				       (u32) (long) bpf_prog);
+			func = (u8 *)__bpf_prog_enter_recur_limited;
+			offs = prog - temp;
+			offs += x86_call_depth_emit_accounting(&prog, func, image + offs);
+			emit_call(&prog, func, image + offs);
+			if (is_subprog) {
+				EMIT2(0x41, 0x58);	/* pop r8 */
+				EMIT1(0x59);		/* pop rcx */
+				EMIT1(0x5a);		/* pop rdx */
+				EMIT1(0x5e);		/* pop rsi */
+			}
+			EMIT1(0x5f);		/* pop rdi */
+
+			EMIT4(0x48, 0x83, 0xf8, 0x0);   /* cmp rax,0x0 */
+			EMIT2(X86_JNE, num_bytes_of_emit_return() + 1);
+
+			/* return if stack recursion has been reached */
+			EMIT1(0xC9);    /* leave */
+			emit_return(&prog, image + (prog - temp));
+
+			/* cnt -= 1 */
+			emit_alu_helper_1(&prog, BPF_ALU64 | BPF_SUB | BPF_K,
+					  BPF_REG_0, 1);
+
+			/* accum_stack_depth = cnt * subtree_stack_depth */
+			emit_alu_helper_3(&prog, BPF_ALU64 | BPF_MUL | BPF_K, BPF_REG_0,
+					  bpf_prog->aux->subtree_stack_depth);
+
+			emit_root_priv_frame_ptr(&prog, bpf_prog, orig_stack_depth);
+
+			/* r9 += accum_stack_depth */
+			emit_alu_helper_2(&prog, BPF_ALU64 | BPF_ADD | BPF_X, X86_REG_R9,
+					  BPF_REG_0);
+		}
+	} else if (priv_stack_mode == PRIV_STACK_SUB_PROG && orig_stack_depth) {
 		/* r9 += orig_stack_depth */
 		emit_alu_helper_1(&prog, BPF_ALU64 | BPF_ADD | BPF_K, X86_REG_R9,
 				  orig_stack_depth);
+	}
 
 	*pprog = prog;
 }
@@ -1578,7 +1639,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
 	detect_reg_usage(insn, insn_cnt, callee_regs_used);
 
 	emit_prologue(&prog, stack_depth, bpf_prog, tail_call_reachable,
-		      priv_stack_mode);
+		      priv_stack_mode, image, temp);
 	/* Exception callback will clobber callee regs for its own use, and
 	 * restore the original callee regs from main prog's stack frame.
 	 */
@@ -2519,6 +2580,23 @@ st:			if (is_imm8(insn->off))
 				if (arena_vm_start)
 					pop_r12(&prog);
 			}
+
+			if (bpf_prog->aux->has_prog_call) {
+				u8 *func, *ip;
+				int offs;
+
+				ip = image + addrs[i - 1];
+				/* save and restore the return value */
+				EMIT1(0x50);    /* push rax */
+				emit_mov_imm64(&prog, BPF_REG_1, (long) bpf_prog >> 32,
+					       (u32) (long) bpf_prog);
+				func = (u8 *)__bpf_prog_exit_recur_limited;
+				offs = prog - temp;
+				offs += x86_call_depth_emit_accounting(&prog, func, ip + offs);
+				emit_call(&prog, func, ip + offs);
+				EMIT1(0x58);    /* pop rax */
+			}
+
 			EMIT1(0xC9);         /* leave */
 			emit_return(&prog, image + addrs[i - 1] + (prog - temp));
 			break;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 952cb398eb30..605004cba9f7 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1148,6 +1148,8 @@ u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
 					     struct bpf_tramp_run_ctx *run_ctx);
 void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
 					     struct bpf_tramp_run_ctx *run_ctx);
+int notrace __bpf_prog_enter_recur_limited(struct bpf_prog *prog);
+void notrace __bpf_prog_exit_recur_limited(struct bpf_prog *prog);
 void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
 void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
 typedef u64 (*bpf_trampoline_enter_t)(struct bpf_prog *prog,
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index f8302a5ca400..d9e7260e4b39 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -960,6 +960,22 @@ void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
 	rcu_read_unlock_trace();
 }
 
+int notrace __bpf_prog_enter_recur_limited(struct bpf_prog *prog)
+{
+	int cnt = this_cpu_inc_return(*(prog->active));
+
+	if (cnt > BPF_MAX_PRIV_STACK_NEST_LEVEL) {
+		bpf_prog_inc_misses_counter(prog);
+		return 0;
+	}
+	return cnt;
+}
+
+void notrace __bpf_prog_exit_recur_limited(struct bpf_prog *prog)
+{
+	this_cpu_dec(*(prog->active));
+}
+
 static u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog,
 					      struct bpf_tramp_run_ctx *run_ctx)
 {
-- 
2.43.5


  parent reply	other threads:[~2024-10-10 17:59 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-10 17:55 [PATCH bpf-next v4 00/10] bpf: Support private stack for bpf progs Yonghong Song
2024-10-10 17:55 ` [PATCH bpf-next v4 01/10] bpf: Allow each subprog having stack size of 512 bytes Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 02/10] bpf: Mark each subprog with proper private stack modes Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 03/10] bpf, x86: Refactor func emit_prologue Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 04/10] bpf, x86: Create a helper for certain "reg <op>= imm" operations Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 05/10] bpf, x86: Add jit support for private stack Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 06/10] selftests/bpf: Add private stack tests Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 07/10] bpf: Support calling non-tailcall bpf prog Yonghong Song
2024-10-10 20:28   ` Alexei Starovoitov
2024-10-11  4:12     ` Yonghong Song
2024-10-15 21:18       ` Tejun Heo
2024-10-15 21:35         ` Alexei Starovoitov
2024-10-10 17:56 ` [PATCH bpf-next v4 08/10] bpf, x86: Create two helpers for some arith operations Yonghong Song
2024-10-10 20:21   ` Alexei Starovoitov
2024-10-11  4:16     ` Yonghong Song
2024-10-10 17:56 ` Yonghong Song [this message]
2024-10-10 20:53   ` [PATCH bpf-next v4 09/10] bpf, x86: Jit support for nested bpf_prog_call Alexei Starovoitov
2024-10-11  4:20     ` Yonghong Song
2024-10-11  4:29       ` Alexei Starovoitov
2024-10-11 15:38         ` Yonghong Song
2024-10-11 15:40           ` Alexei Starovoitov
2024-10-11 16:14             ` Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 10/10] selftests/bpf: Add tests for bpf_prog_call() Yonghong Song
2024-10-15 21:28 ` [PATCH bpf-next v4 00/10] bpf: Support private stack for bpf progs Tejun Heo
2024-10-15 21:39   ` Alexei Starovoitov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241010175638.1899406-1-yonghong.song@linux.dev \
    --to=yonghong.song@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=kernel-team@fb.com \
    --cc=martin.lau@kernel.org \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox