From: George Guo <dongtai.guo@linux.dev>
To: chenhuacai@kernel.org, yangtiezhu@loongson.cn, hengqi.chen@gmail.com
Cc: kernel@xen0n.name, ast@kernel.org, daniel@iogearbox.net,
andrii@kernel.org, martin.lau@linux.dev, eddyz87@gmail.com,
memxor@gmail.com, song@kernel.org, yonghong.song@linux.dev,
jolsa@kernel.org, shuah@kernel.org, loongarch@lists.linux.dev,
linux-kernel@vger.kernel.org, bpf@vger.kernel.org,
linux-kselftest@vger.kernel.org,
George Guo <guodongtai@kylinos.cn>
Subject: [PATCH 2/5] LoongArch: BPF: Add private stack support
Date: Thu, 18 Jun 2026 11:38:06 +0800 [thread overview]
Message-ID: <20260618033809.98253-3-dongtai.guo@linux.dev> (raw)
In-Reply-To: <20260618033809.98253-1-dongtai.guo@linux.dev>
From: George Guo <guodongtai@kylinos.cn>
Support per-program private stacks, advertised via
bpf_jit_supports_private_stack(). When the verifier marks a program with
jits_use_priv_stack (e.g. a sufficiently deep, potentially recursive
tracing program), its BPF stack is moved off the kernel stack into a
per-CPU allocation, reducing kernel stack pressure.
The private stack is allocated in bpf_int_jit_compile() as the
verifier-computed stack depth plus two 16-byte guard regions used to
detect overflow and underflow; the guards are initialised at allocation
time and validated in bpf_jit_free(). S5 (otherwise saved/restored but
unused by the JIT) is reused to hold the private stack pointer, loaded
in the prologue with the current CPU's per-CPU offset ($r21). When a
private stack is in use the BPF frame pointer points into this per-CPU
region and the BPF stack is no longer reserved on the kernel stack.
Signed-off-by: George Guo <guodongtai@kylinos.cn>
---
arch/loongarch/net/bpf_jit.c | 111 ++++++++++++++++++++++++++++++++++-
arch/loongarch/net/bpf_jit.h | 1 +
2 files changed, 109 insertions(+), 3 deletions(-)
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 3f9ffdde2491..c410b02e64be 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -18,8 +18,13 @@
#define REG_TCC LOONGARCH_GPR_A6
#define REG_ARENA LOONGARCH_GPR_S6 /* For storing arena_vm_start */
+#define REG_PRIV_SP LOONGARCH_GPR_S5 /* For storing the private stack pointer */
#define BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack) (round_up(stack, 16) - 80)
+/* Memory size/value to protect private stack overflow/underflow */
+#define PRIV_STACK_GUARD_SZ 16
+#define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL
+
static const int regmap[] = {
/* return value from in-kernel function, and exit value for eBPF program */
[BPF_REG_0] = LOONGARCH_GPR_A5,
@@ -40,6 +45,15 @@ static const int regmap[] = {
[BPF_REG_AX] = LOONGARCH_GPR_T0,
};
+static void emit_percpu_ptr(struct jit_ctx *ctx, u8 dst, void __percpu *ptr)
+{
+ move_imm(ctx, dst, (__force long)ptr, false);
+#ifdef CONFIG_SMP
+ /* dst += __my_cpu_offset, held in $r21 */
+ emit_insn(ctx, addd, dst, dst, LOONGARCH_GPR_U0);
+#endif
+}
+
static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx, int *store_offset)
{
const struct bpf_prog *prog = ctx->prog;
@@ -141,7 +155,14 @@ static void build_prologue(struct jit_ctx *ctx)
stack_adjust += 8;
stack_adjust = round_up(stack_adjust, 16);
- stack_adjust += bpf_stack_adjust;
+
+ /*
+ * When a private stack is used the BPF stack lives in a per-CPU
+ * allocation rather than on the kernel stack, so only the non-BPF
+ * part is reserved here.
+ */
+ if (!ctx->priv_sp_used)
+ stack_adjust += bpf_stack_adjust;
move_reg(ctx, LOONGARCH_GPR_T0, LOONGARCH_GPR_RA);
/* Reserve space for the move_imm + jirl instruction */
@@ -191,8 +212,16 @@ static void build_prologue(struct jit_ctx *ctx)
emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust);
- if (bpf_stack_adjust)
+ if (ctx->priv_sp_used) {
+ /* Set up the private stack pointer and the BPF frame pointer */
+ void __percpu *priv_stack_ptr;
+
+ priv_stack_ptr = prog->aux->priv_stack_ptr + PRIV_STACK_GUARD_SZ;
+ emit_percpu_ptr(ctx, REG_PRIV_SP, priv_stack_ptr);
+ emit_insn(ctx, addid, regmap[BPF_REG_FP], REG_PRIV_SP, bpf_stack_adjust);
+ } else if (bpf_stack_adjust) {
emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust);
+ }
ctx->stack_size = stack_adjust;
@@ -2166,6 +2195,39 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
return ret < 0 ? ret : ret * LOONGARCH_INSN_SIZE;
}
+static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size)
+{
+ int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
+ u64 *stack_ptr;
+
+ for_each_possible_cpu(cpu) {
+ stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
+ stack_ptr[0] = PRIV_STACK_GUARD_VAL;
+ stack_ptr[1] = PRIV_STACK_GUARD_VAL;
+ stack_ptr[underflow_idx] = PRIV_STACK_GUARD_VAL;
+ stack_ptr[underflow_idx + 1] = PRIV_STACK_GUARD_VAL;
+ }
+}
+
+static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size,
+ struct bpf_prog *prog)
+{
+ int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
+ u64 *stack_ptr;
+
+ for_each_possible_cpu(cpu) {
+ stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
+ if (stack_ptr[0] != PRIV_STACK_GUARD_VAL ||
+ stack_ptr[1] != PRIV_STACK_GUARD_VAL ||
+ stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL ||
+ stack_ptr[underflow_idx + 1] != PRIV_STACK_GUARD_VAL) {
+ pr_err("BPF private stack overflow/underflow detected for prog %sx\n",
+ bpf_jit_get_prog_name(prog));
+ break;
+ }
+ }
+}
+
struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
{
bool extra_pass = false;
@@ -2174,7 +2236,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr
struct jit_ctx ctx;
struct jit_data *jit_data;
struct bpf_binary_header *header;
- struct bpf_binary_header *ro_header;
+ struct bpf_binary_header *ro_header = NULL;
+ void __percpu *priv_stack_ptr = NULL;
+ int priv_stack_alloc_sz;
/*
* If BPF JIT was not enabled then we must fall back to
@@ -2190,6 +2254,22 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr
return prog;
prog->aux->jit_data = jit_data;
}
+ priv_stack_ptr = prog->aux->priv_stack_ptr;
+ if (!priv_stack_ptr && prog->aux->jits_use_priv_stack) {
+ /*
+ * Allocate the actual private stack: the verifier-calculated
+ * stack size plus two guard regions to detect overflow and
+ * underflow.
+ */
+ priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
+ 2 * PRIV_STACK_GUARD_SZ;
+ priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 16, GFP_KERNEL);
+ if (!priv_stack_ptr)
+ goto out_priv_stack;
+
+ priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_sz);
+ prog->aux->priv_stack_ptr = priv_stack_ptr;
+ }
if (jit_data->ctx.offset) {
ctx = jit_data->ctx;
ro_header = jit_data->ro_header;
@@ -2205,6 +2285,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr
ctx.prog = prog;
ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);
ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
+ ctx.priv_sp_used = priv_stack_ptr ? true : false;
ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL);
if (ctx.offset == NULL)
@@ -2298,7 +2379,17 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr
bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
out_offset:
+ /*
+ * A NULL ro_header here means the JIT failed, so release the
+ * private stack that was allocated above; on success the
+ * program keeps it until bpf_jit_free().
+ */
+ if (!ro_header && priv_stack_ptr) {
+ free_percpu(priv_stack_ptr);
+ prog->aux->priv_stack_ptr = NULL;
+ }
kvfree(ctx.offset);
+out_priv_stack:
kfree(jit_data);
prog->aux->jit_data = NULL;
}
@@ -2324,6 +2415,8 @@ void bpf_jit_free(struct bpf_prog *prog)
if (prog->jited) {
struct jit_data *jit_data = prog->aux->jit_data;
struct bpf_binary_header *hdr;
+ void __percpu *priv_stack_ptr;
+ int priv_stack_alloc_sz;
/*
* If we fail the final pass of JIT (from jit_subprogs), the
@@ -2336,6 +2429,13 @@ void bpf_jit_free(struct bpf_prog *prog)
}
hdr = bpf_jit_binary_pack_hdr(prog);
bpf_jit_binary_pack_free(hdr, NULL);
+ priv_stack_ptr = prog->aux->priv_stack_ptr;
+ if (priv_stack_ptr) {
+ priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
+ 2 * PRIV_STACK_GUARD_SZ;
+ priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_sz, prog);
+ free_percpu(prog->aux->priv_stack_ptr);
+ }
WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
}
@@ -2382,6 +2482,11 @@ bool bpf_jit_supports_fsession(void)
return true;
}
+bool bpf_jit_supports_private_stack(void)
+{
+ return true;
+}
+
/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
bool bpf_jit_supports_subprog_tailcalls(void)
{
diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h
index a8e29be35fa8..01a7ea47e79b 100644
--- a/arch/loongarch/net/bpf_jit.h
+++ b/arch/loongarch/net/bpf_jit.h
@@ -22,6 +22,7 @@ struct jit_ctx {
u32 stack_size;
u64 arena_vm_start;
u64 user_vm_start;
+ bool priv_sp_used;
};
struct jit_data {
--
2.25.1
next prev parent reply other threads:[~2026-06-18 3:39 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-18 3:38 [PATCH 0/5] LoongArch: BPF: arena instruction gating, private stack and exceptions George Guo
2026-06-18 3:38 ` [PATCH 1/5] LoongArch: BPF: Gate unsupported arena instructions via bpf_jit_supports_insn() George Guo
2026-06-18 3:53 ` sashiko-bot
2026-06-18 4:19 ` bot+bpf-ci
2026-06-18 3:38 ` George Guo [this message]
2026-06-18 3:55 ` [PATCH 2/5] LoongArch: BPF: Add private stack support sashiko-bot
2026-06-18 3:38 ` [PATCH 3/5] LoongArch: BPF: Add exceptions (bpf_throw) support George Guo
2026-06-18 3:55 ` sashiko-bot
2026-06-18 3:38 ` [PATCH 4/5] selftests/bpf: Add LoongArch deny list George Guo
2026-06-18 3:52 ` sashiko-bot
2026-06-18 3:38 ` [PATCH 5/5] selftests/bpf: Enable struct_ops private stack test for LoongArch George Guo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260618033809.98253-3-dongtai.guo@linux.dev \
--to=dongtai.guo@linux.dev \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=chenhuacai@kernel.org \
--cc=daniel@iogearbox.net \
--cc=eddyz87@gmail.com \
--cc=guodongtai@kylinos.cn \
--cc=hengqi.chen@gmail.com \
--cc=jolsa@kernel.org \
--cc=kernel@xen0n.name \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=loongarch@lists.linux.dev \
--cc=martin.lau@linux.dev \
--cc=memxor@gmail.com \
--cc=shuah@kernel.org \
--cc=song@kernel.org \
--cc=yangtiezhu@loongson.cn \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox