BPF List
 help / color / mirror / Atom feed
From: George Guo <dongtai.guo@linux.dev>
To: Huacai Chen <chenhuacai@kernel.org>,
	Tiezhu Yang <yangtiezhu@loongson.cn>,
	Hengqi Chen <hengqi.chen@gmail.com>,
	Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Andrii Nakryiko <andrii@kernel.org>
Cc: WANG Xuerui <kernel@xen0n.name>,
	Martin KaFai Lau <martin.lau@linux.dev>,
	Eduard Zingerman <eddyz87@gmail.com>,
	Kumar Kartikeya Dwivedi <memxor@gmail.com>,
	Song Liu <song@kernel.org>,
	Yonghong Song <yonghong.song@linux.dev>,
	Jiri Olsa <jolsa@kernel.org>, George Guo <guodongtai@kylinos.cn>,
	bpf@vger.kernel.org, loongarch@lists.linux.dev,
	linux-kernel@vger.kernel.org
Subject: [PATCH bpf-next v2 04/11] LoongArch: BPF: Add private stack support
Date: Thu,  2 Jul 2026 10:23:15 +0800	[thread overview]
Message-ID: <20260702022322.51033-5-dongtai.guo@linux.dev> (raw)
In-Reply-To: <20260702022322.51033-1-dongtai.guo@linux.dev>

From: George Guo <guodongtai@kylinos.cn>

Support per-program private stacks, advertised via
bpf_jit_supports_private_stack(). When the verifier marks a program with
jits_use_priv_stack (e.g. a sufficiently deep, potentially recursive
tracing program), its BPF stack is moved off the kernel stack into a
per-CPU allocation, reducing kernel stack pressure.

The private stack is sized as the verifier-computed stack depth plus two
16-byte guard regions for overflow/underflow detection, initialised at
allocation time and validated in bpf_jit_free(). S5 (saved/restored but
otherwise unused by the JIT) holds the private stack pointer, computed in
the prologue from the current CPU's per-CPU offset ($r21).

Signed-off-by: George Guo <guodongtai@kylinos.cn>
---
 arch/loongarch/net/bpf_jit.c | 112 ++++++++++++++++++++++++++++++++++-
 arch/loongarch/net/bpf_jit.h |   1 +
 2 files changed, 110 insertions(+), 3 deletions(-)

diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index bb84b985cb45..3822e05a0779 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -25,6 +25,7 @@
 
 #define REG_TCC		LOONGARCH_GPR_A6
 #define REG_ARENA	LOONGARCH_GPR_S6 /* For storing arena_vm_start */
+#define REG_PRIV_SP	LOONGARCH_GPR_S5 /* For storing the private stack pointer */
 
 static int tail_call_cnt_ptr_stack_off(struct jit_ctx *ctx)
 {
@@ -43,6 +44,10 @@ static int tail_call_cnt_ptr_stack_off(struct jit_ctx *ctx)
 	return round_up(ctx->stack_size, 16) - offset;
 }
 
+/* Memory size/value to protect private stack overflow/underflow */
+#define PRIV_STACK_GUARD_SZ	16
+#define PRIV_STACK_GUARD_VAL	0xEB9F12345678eb9fULL
+
 static const int regmap[] = {
 	/* return value from in-kernel function, and exit value for eBPF program */
 	[BPF_REG_0] = LOONGARCH_GPR_A5,
@@ -63,6 +68,15 @@ static const int regmap[] = {
 	[BPF_REG_AX] = LOONGARCH_GPR_T0,
 };
 
+static void emit_percpu_ptr(struct jit_ctx *ctx, u8 dst, void __percpu *ptr)
+{
+	move_imm(ctx, dst, (__force long)ptr, false);
+#ifdef CONFIG_SMP
+	/* dst += __my_cpu_offset, held in $r21 */
+	emit_insn(ctx, addd, dst, dst, LOONGARCH_GPR_U0);
+#endif
+}
+
 static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx, int *store_offset)
 {
 	const struct bpf_prog *prog = ctx->prog;
@@ -164,7 +178,14 @@ static void build_prologue(struct jit_ctx *ctx)
 		stack_adjust += 8;
 
 	stack_adjust = round_up(stack_adjust, 16);
-	stack_adjust += bpf_stack_adjust;
+
+	/*
+	 * When a private stack is used the BPF stack lives in a per-CPU
+	 * allocation rather than on the kernel stack, so only the non-BPF
+	 * part is reserved here.
+	 */
+	if (!ctx->priv_sp_used)
+		stack_adjust += bpf_stack_adjust;
 
 	/*
 	 * Save the original return address to a temporary register to prevent
@@ -219,8 +240,16 @@ static void build_prologue(struct jit_ctx *ctx)
 
 	emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust);
 
-	if (bpf_stack_adjust)
+	if (ctx->priv_sp_used) {
+		/* Set up the private stack pointer and the BPF frame pointer */
+		void __percpu *priv_stack_ptr;
+
+		priv_stack_ptr = prog->aux->priv_stack_ptr + PRIV_STACK_GUARD_SZ;
+		emit_percpu_ptr(ctx, REG_PRIV_SP, priv_stack_ptr);
+		emit_insn(ctx, addid, regmap[BPF_REG_FP], REG_PRIV_SP, bpf_stack_adjust);
+	} else if (bpf_stack_adjust) {
 		emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust);
+	}
 
 	ctx->stack_size = stack_adjust;
 
@@ -2225,6 +2254,39 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
 	return ret < 0 ? ret : ret * LOONGARCH_INSN_SIZE;
 }
 
+static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size)
+{
+	int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
+	u64 *stack_ptr;
+
+	for_each_possible_cpu(cpu) {
+		stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
+		stack_ptr[0] = PRIV_STACK_GUARD_VAL;
+		stack_ptr[1] = PRIV_STACK_GUARD_VAL;
+		stack_ptr[underflow_idx] = PRIV_STACK_GUARD_VAL;
+		stack_ptr[underflow_idx + 1] = PRIV_STACK_GUARD_VAL;
+	}
+}
+
+static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size,
+				   struct bpf_prog *prog)
+{
+	int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
+	u64 *stack_ptr;
+
+	for_each_possible_cpu(cpu) {
+		stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
+		if (stack_ptr[0] != PRIV_STACK_GUARD_VAL ||
+		    stack_ptr[1] != PRIV_STACK_GUARD_VAL ||
+		    stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL ||
+		    stack_ptr[underflow_idx + 1] != PRIV_STACK_GUARD_VAL) {
+			pr_err("BPF private stack overflow/underflow detected for prog %s\n",
+			       bpf_jit_get_prog_name(prog));
+			break;
+		}
+	}
+}
+
 struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
 {
 	bool extra_pass = false;
@@ -2233,7 +2295,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr
 	struct jit_ctx ctx;
 	struct jit_data *jit_data;
 	struct bpf_binary_header *header;
-	struct bpf_binary_header *ro_header;
+	struct bpf_binary_header *ro_header = NULL;
+	void __percpu *priv_stack_ptr = NULL;
+	int priv_stack_alloc_sz;
 
 	/*
 	 * If BPF JIT was not enabled then we must fall back to
@@ -2249,6 +2313,22 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr
 			return prog;
 		prog->aux->jit_data = jit_data;
 	}
+	priv_stack_ptr = prog->aux->priv_stack_ptr;
+	if (!priv_stack_ptr && prog->aux->jits_use_priv_stack) {
+		/*
+		 * Allocate the actual private stack: the verifier-calculated
+		 * stack size plus two guard regions to detect overflow and
+		 * underflow.
+		 */
+		priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
+				      2 * PRIV_STACK_GUARD_SZ;
+		priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 16, GFP_KERNEL);
+		if (!priv_stack_ptr)
+			goto out_priv_stack;
+
+		priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_sz);
+		prog->aux->priv_stack_ptr = priv_stack_ptr;
+	}
 	if (jit_data->ctx.offset) {
 		ctx = jit_data->ctx;
 		ro_header = jit_data->ro_header;
@@ -2264,6 +2344,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr
 	ctx.prog = prog;
 	ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);
 	ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
+	ctx.priv_sp_used = priv_stack_ptr ? true : false;
 
 	ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL);
 	if (ctx.offset == NULL)
@@ -2357,7 +2438,17 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr
 		bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
 
 out_offset:
+		/*
+		 * A NULL ro_header here means the JIT failed, so release the
+		 * private stack that was allocated above; on success the
+		 * program keeps it until bpf_jit_free().
+		 */
+		if (!ro_header && priv_stack_ptr) {
+			free_percpu(priv_stack_ptr);
+			prog->aux->priv_stack_ptr = NULL;
+		}
 		kvfree(ctx.offset);
+out_priv_stack:
 		kfree(jit_data);
 		prog->aux->jit_data = NULL;
 	}
@@ -2374,6 +2465,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr
 	if (header) {
 		bpf_arch_text_copy(&ro_header->size, &header->size, sizeof(header->size));
 		bpf_jit_binary_pack_free(ro_header, header);
+		ro_header = NULL;
 	}
 	goto out_offset;
 }
@@ -2383,6 +2475,8 @@ void bpf_jit_free(struct bpf_prog *prog)
 	if (prog->jited) {
 		struct jit_data *jit_data = prog->aux->jit_data;
 		struct bpf_binary_header *hdr;
+		void __percpu *priv_stack_ptr;
+		int priv_stack_alloc_sz;
 
 		/*
 		 * If we fail the final pass of JIT (from jit_subprogs), the
@@ -2395,6 +2489,13 @@ void bpf_jit_free(struct bpf_prog *prog)
 		}
 		hdr = bpf_jit_binary_pack_hdr(prog);
 		bpf_jit_binary_pack_free(hdr, NULL);
+		priv_stack_ptr = prog->aux->priv_stack_ptr;
+		if (priv_stack_ptr) {
+			priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
+					      2 * PRIV_STACK_GUARD_SZ;
+			priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_sz, prog);
+			free_percpu(prog->aux->priv_stack_ptr);
+		}
 		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
 	}
 
@@ -2431,6 +2532,11 @@ bool bpf_jit_supports_timed_may_goto(void)
 	return true;
 }
 
+bool bpf_jit_supports_private_stack(void)
+{
+	return true;
+}
+
 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
 bool bpf_jit_supports_subprog_tailcalls(void)
 {
diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h
index a8e29be35fa8..01a7ea47e79b 100644
--- a/arch/loongarch/net/bpf_jit.h
+++ b/arch/loongarch/net/bpf_jit.h
@@ -22,6 +22,7 @@ struct jit_ctx {
 	u32 stack_size;
 	u64 arena_vm_start;
 	u64 user_vm_start;
+	bool priv_sp_used;
 };
 
 struct jit_data {
-- 
2.25.1


  parent reply	other threads:[~2026-07-02  2:24 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-07-02  2:23 [PATCH bpf-next v2 00/11] LoongArch: BPF: arena features, exceptions, private stack and may_goto George Guo
2026-07-02  2:23 ` [PATCH bpf-next v2 01/11] LoongArch: BPF: Fix tail call count pointer offset for arena programs George Guo
2026-07-02  2:35   ` sashiko-bot
2026-07-02  2:23 ` [PATCH bpf-next v2 02/11] LoongArch: BPF: Support internal-only MOV to resolve per-CPU addrs George Guo
2026-07-02  2:23 ` [PATCH bpf-next v2 03/11] LoongArch: BPF: Add timed may_goto support George Guo
2026-07-02  2:23 ` George Guo [this message]
2026-07-02  2:23 ` [PATCH bpf-next v2 05/11] LoongArch: BPF: Add exceptions (bpf_throw) support George Guo
2026-07-02  2:39   ` sashiko-bot
2026-07-02  2:23 ` [PATCH bpf-next v2 06/11] LoongArch: BPF: Support sign-extending loads from arena George Guo
2026-07-02  2:23 ` [PATCH bpf-next v2 07/11] LoongArch: BPF: Support atomics on arena pointers George Guo
2026-07-02  2:48   ` sashiko-bot
2026-07-02  2:23 ` [PATCH bpf-next v2 08/11] selftests/bpf: Enable struct_ops private stack test for LoongArch George Guo
2026-07-02  2:23 ` [PATCH bpf-next v2 09/11] selftests/bpf: Enable arena LDSX tests on LoongArch George Guo
2026-07-02  2:23 ` [PATCH bpf-next v2 10/11] selftests/bpf: Enable arena atomics " George Guo
2026-07-02  2:49   ` sashiko-bot
2026-07-02  2:23 ` [PATCH bpf-next v2 11/11] selftests/bpf: Add LoongArch deny list George Guo
2026-07-03 10:11 ` [PATCH bpf-next v2 00/11] LoongArch: BPF: arena features, exceptions, private stack and may_goto Huacai Chen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260702022322.51033-5-dongtai.guo@linux.dev \
    --to=dongtai.guo@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=chenhuacai@kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=eddyz87@gmail.com \
    --cc=guodongtai@kylinos.cn \
    --cc=hengqi.chen@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kernel@xen0n.name \
    --cc=linux-kernel@vger.kernel.org \
    --cc=loongarch@lists.linux.dev \
    --cc=martin.lau@linux.dev \
    --cc=memxor@gmail.com \
    --cc=song@kernel.org \
    --cc=yangtiezhu@loongson.cn \
    --cc=yonghong.song@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox