* [PATCH bpf-next] bpf, arm64: JIT support for private stack
@ 2025-07-22 17:32 Puranjay Mohan
2025-07-23 15:52 ` kernel test robot
2025-07-23 22:01 ` Yonghong Song
0 siblings, 2 replies; 3+ messages in thread
From: Puranjay Mohan @ 2025-07-22 17:32 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
Puranjay Mohan, Xu Kuohai, Catalin Marinas, Will Deacon,
Mykola Lysenko, bpf
The private stack is allocated in bpf_int_jit_compile() with 16-byte
alignment. It includes additional guard regions to detect stack
overflows and underflows at runtime.
Memory layout:
+------------------------------------------------------+
| |
| 16 bytes padding (overflow guard - stack top) |
| [ detects writes beyond top of stack ] |
BPF FP ->+------------------------------------------------------+
| |
| BPF private stack (sized by verifier) |
| [ 16-byte aligned ] |
| |
BPF PRIV SP ->+------------------------------------------------------+
| |
| 16 bytes padding (underflow guard - stack bottom) |
| [ detects accesses before start of stack ] |
| |
+------------------------------------------------------+
On detection of an overflow or underflow, the kernel emits messages
like:
BPF private stack overflow/underflow detected for prog <prog_name>
After commit bd737fcb6485 ("bpf, arm64: Get rid of fpb"), Jited BPF
programs use the stack in two ways:
1. Via the BPF frame pointer (top of stack), using negative offsets.
2. Via the stack pointer (bottom of stack), using positive offsets in
LDR/STR instructions.
When a private stack is used, ARM64 callee-saved register x27 replaces
the stack pointer. The BPF frame pointer usage remains unchanged; but it
now points to the top of the private stack.
Relevant tests:
#415/1 struct_ops_private_stack/private_stack:OK
#415/2 struct_ops_private_stack/private_stack_fail:OK
#415/3 struct_ops_private_stack/private_stack_recur:OK
#415 struct_ops_private_stack:OK
#549/1 verifier_private_stack/Private stack, single prog:OK
#549/2 verifier_private_stack/Private stack, subtree > MAX_BPF_STACK:OK
#549/3 verifier_private_stack/No private stack:OK
#549/4 verifier_private_stack/Private stack, callback:OK
#549/5 verifier_private_stack/Private stack, exception in main prog:OK
#549/6 verifier_private_stack/Private stack, exception in subprog:OK
#549/7 verifier_private_stack/Private stack, async callback, not nested:OK
#549/8 verifier_private_stack/Private stack, async callback, potential nesting:OK
#549 verifier_private_stack:OK
Summary: 2/11 PASSED, 0 SKIPPED, 0 FAILED
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
---
Note: This needs the fix in [1] to work properly.
[1] https://lore.kernel.org/all/20250722133410.54161-2-puranjay@kernel.org/
---
arch/arm64/net/bpf_jit_comp.c | 131 ++++++++++++++++--
arch/x86/net/bpf_jit_comp.c | 9 +-
include/linux/filter.h | 2 +
kernel/bpf/core.c | 7 +
.../bpf/progs/struct_ops_private_stack.c | 2 +-
.../bpf/progs/struct_ops_private_stack_fail.c | 2 +-
.../progs/struct_ops_private_stack_recur.c | 2 +-
.../bpf/progs/verifier_private_stack.c | 89 +++++++++++-
8 files changed, 221 insertions(+), 23 deletions(-)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 89b1b8c248c62..5a0170536c8d4 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -30,6 +30,7 @@
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
#define TCCNT_PTR (MAX_BPF_JIT_REG + 2)
#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
+#define PRIVATE_SP (MAX_BPF_JIT_REG + 4)
#define ARENA_VM_START (MAX_BPF_JIT_REG + 5)
#define check_imm(bits, imm) do { \
@@ -68,6 +69,8 @@ static const int bpf2a64[] = {
[TCCNT_PTR] = A64_R(26),
/* temporary register for blinding constants */
[BPF_REG_AX] = A64_R(9),
+ /* callee saved register for private stack pointer */
+ [PRIVATE_SP] = A64_R(27),
/* callee saved register for kern_vm_start address */
[ARENA_VM_START] = A64_R(28),
};
@@ -86,6 +89,7 @@ struct jit_ctx {
u64 user_vm_start;
u64 arena_vm_start;
bool fp_used;
+ bool priv_sp_used;
bool write;
};
@@ -98,6 +102,10 @@ struct bpf_plt {
#define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target)
#define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target)
+/* Memory size/value to protect private stack overflow/underflow */
+#define PRIV_STACK_GUARD_SZ 16
+#define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL
+
static inline void emit(const u32 insn, struct jit_ctx *ctx)
{
if (ctx->image != NULL && ctx->write)
@@ -387,8 +395,11 @@ static void find_used_callee_regs(struct jit_ctx *ctx)
if (reg_used & 8)
ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_9];
- if (reg_used & 16)
+ if (reg_used & 16) {
ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_FP];
+ if (ctx->priv_sp_used)
+ ctx->used_callee_reg[i++] = bpf2a64[PRIVATE_SP];
+ }
if (ctx->arena_vm_start)
ctx->used_callee_reg[i++] = bpf2a64[ARENA_VM_START];
@@ -461,6 +472,19 @@ static void pop_callee_regs(struct jit_ctx *ctx)
}
}
+static void emit_percpu_ptr(const u8 dst_reg, void __percpu *ptr,
+ struct jit_ctx *ctx)
+{
+ const u8 tmp = bpf2a64[TMP_REG_1];
+
+ emit_a64_mov_i64(dst_reg, (__force const u64)ptr, ctx);
+ if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
+ emit(A64_MRS_TPIDR_EL2(tmp), ctx);
+ else
+ emit(A64_MRS_TPIDR_EL1(tmp), ctx);
+ emit(A64_ADD(1, dst_reg, dst_reg, tmp), ctx);
+}
+
#define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0)
#define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0)
@@ -476,6 +500,8 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
const bool is_main_prog = !bpf_is_subprog(prog);
const u8 fp = bpf2a64[BPF_REG_FP];
const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
+ const u8 priv_sp = bpf2a64[PRIVATE_SP];
+ void __percpu *priv_stack_ptr;
const int idx0 = ctx->idx;
int cur_offset;
@@ -551,15 +577,23 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
emit(A64_SUB_I(1, A64_SP, A64_FP, 96), ctx);
}
- if (ctx->fp_used)
- /* Set up BPF prog stack base register */
- emit(A64_MOV(1, fp, A64_SP), ctx);
-
/* Stack must be multiples of 16B */
ctx->stack_size = round_up(prog->aux->stack_depth, 16);
+ if (ctx->fp_used) {
+ if (ctx->priv_sp_used) {
+ /* Set up private stack pointer */
+ priv_stack_ptr = prog->aux->priv_stack_ptr + PRIV_STACK_GUARD_SZ;
+ emit_percpu_ptr(priv_sp, priv_stack_ptr, ctx);
+ emit(A64_ADD_I(1, fp, priv_sp, ctx->stack_size), ctx);
+ } else {
+ /* Set up BPF prog stack base register */
+ emit(A64_MOV(1, fp, A64_SP), ctx);
+ }
+ }
+
/* Set up function call stack */
- if (ctx->stack_size)
+ if (ctx->stack_size && !ctx->priv_sp_used)
emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
if (ctx->arena_vm_start)
@@ -623,7 +657,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
emit(A64_STR64I(tcc, ptr, 0), ctx);
/* restore SP */
- if (ctx->stack_size)
+ if (ctx->stack_size && !ctx->priv_sp_used)
emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
pop_callee_regs(ctx);
@@ -991,7 +1025,7 @@ static void build_epilogue(struct jit_ctx *ctx, bool was_classic)
const u8 ptr = bpf2a64[TCCNT_PTR];
/* We're done with BPF stack */
- if (ctx->stack_size)
+ if (ctx->stack_size && !ctx->priv_sp_used)
emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
pop_callee_regs(ctx);
@@ -1120,6 +1154,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
const u8 tmp2 = bpf2a64[TMP_REG_2];
const u8 fp = bpf2a64[BPF_REG_FP];
const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
+ const u8 priv_sp = bpf2a64[PRIVATE_SP];
const s16 off = insn->off;
const s32 imm = insn->imm;
const int i = insn - ctx->prog->insnsi;
@@ -1564,7 +1599,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
src = tmp2;
}
if (src == fp) {
- src_adj = A64_SP;
+ src_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
off_adj = off + ctx->stack_size;
} else {
src_adj = src;
@@ -1654,7 +1689,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
dst = tmp2;
}
if (dst == fp) {
- dst_adj = A64_SP;
+ dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
off_adj = off + ctx->stack_size;
} else {
dst_adj = dst;
@@ -1716,7 +1751,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
dst = tmp2;
}
if (dst == fp) {
- dst_adj = A64_SP;
+ dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP;
off_adj = off + ctx->stack_size;
} else {
dst_adj = dst;
@@ -1859,6 +1894,39 @@ static inline void bpf_flush_icache(void *start, void *end)
flush_icache_range((unsigned long)start, (unsigned long)end);
}
+static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size)
+{
+ int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
+ u64 *stack_ptr;
+
+ for_each_possible_cpu(cpu) {
+ stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
+ stack_ptr[0] = PRIV_STACK_GUARD_VAL;
+ stack_ptr[1] = PRIV_STACK_GUARD_VAL;
+ stack_ptr[underflow_idx] = PRIV_STACK_GUARD_VAL;
+ stack_ptr[underflow_idx + 1] = PRIV_STACK_GUARD_VAL;
+ }
+}
+
+static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size,
+ struct bpf_prog *prog)
+{
+ int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
+ u64 *stack_ptr;
+
+ for_each_possible_cpu(cpu) {
+ stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
+ if (stack_ptr[0] != PRIV_STACK_GUARD_VAL ||
+ stack_ptr[1] != PRIV_STACK_GUARD_VAL ||
+ stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL ||
+ stack_ptr[underflow_idx + 1] != PRIV_STACK_GUARD_VAL) {
+ pr_err("BPF private stack overflow/underflow detected for prog %sx\n",
+ bpf_jit_get_prog_name(prog));
+ break;
+ }
+ }
+}
+
struct arm64_jit_data {
struct bpf_binary_header *header;
u8 *ro_image;
@@ -1873,7 +1941,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
struct bpf_binary_header *header;
struct bpf_binary_header *ro_header;
struct arm64_jit_data *jit_data;
+ void __percpu *priv_stack_ptr = NULL;
bool was_classic = bpf_prog_was_classic(prog);
+ int priv_stack_alloc_sz;
bool tmp_blinded = false;
bool extra_pass = false;
struct jit_ctx ctx;
@@ -1905,6 +1975,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
}
prog->aux->jit_data = jit_data;
}
+ priv_stack_ptr = prog->aux->priv_stack_ptr;
+ if (!priv_stack_ptr && prog->aux->jits_use_priv_stack) {
+ /* Allocate actual private stack size with verifier-calculated
+ * stack size plus two memory guards to protect overflow and
+ * underflow.
+ */
+ priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
+ 2 * PRIV_STACK_GUARD_SZ;
+ priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 16, GFP_KERNEL);
+ if (!priv_stack_ptr) {
+ prog = orig_prog;
+ goto out_priv_stack;
+ }
+
+ priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_sz);
+ prog->aux->priv_stack_ptr = priv_stack_ptr;
+ }
if (jit_data->ctx.offset) {
ctx = jit_data->ctx;
ro_image_ptr = jit_data->ro_image;
@@ -1928,6 +2015,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);
+ if (priv_stack_ptr)
+ ctx.priv_sp_used = true;
+
/* Pass 1: Estimate the maximum image size.
*
* BPF line info needs ctx->offset[i] to be the offset of
@@ -2067,7 +2157,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
ctx.offset[i] *= AARCH64_INSN_SIZE;
bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
out_off:
+ if (!ro_header && priv_stack_ptr) {
+ free_percpu(priv_stack_ptr);
+ prog->aux->priv_stack_ptr = NULL;
+ }
kvfree(ctx.offset);
+out_priv_stack:
kfree(jit_data);
prog->aux->jit_data = NULL;
}
@@ -2086,6 +2181,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
goto out_off;
}
+bool bpf_jit_supports_private_stack(void)
+{
+ return true;
+}
+
bool bpf_jit_supports_kfunc_call(void)
{
return true;
@@ -2931,6 +3031,8 @@ void bpf_jit_free(struct bpf_prog *prog)
if (prog->jited) {
struct arm64_jit_data *jit_data = prog->aux->jit_data;
struct bpf_binary_header *hdr;
+ void __percpu *priv_stack_ptr;
+ int priv_stack_alloc_sz;
/*
* If we fail the final pass of JIT (from jit_subprogs),
@@ -2944,6 +3046,13 @@ void bpf_jit_free(struct bpf_prog *prog)
}
hdr = bpf_jit_binary_pack_hdr(prog);
bpf_jit_binary_pack_free(hdr, NULL);
+ priv_stack_ptr = prog->aux->priv_stack_ptr;
+ if (priv_stack_ptr) {
+ priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
+ 2 * PRIV_STACK_GUARD_SZ;
+ priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_sz, prog);
+ free_percpu(prog->aux->priv_stack_ptr);
+ }
WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 40e1b3b9634fe..7e3fca1646203 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -3501,13 +3501,6 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs, image, buf);
}
-static const char *bpf_get_prog_name(struct bpf_prog *prog)
-{
- if (prog->aux->ksym.prog)
- return prog->aux->ksym.name;
- return prog->aux->name;
-}
-
static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size)
{
int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
@@ -3531,7 +3524,7 @@ static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size
if (stack_ptr[0] != PRIV_STACK_GUARD_VAL ||
stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL) {
pr_err("BPF private stack overflow/underflow detected for prog %sx\n",
- bpf_get_prog_name(prog));
+ bpf_jit_get_prog_name(prog));
break;
}
}
diff --git a/include/linux/filter.h b/include/linux/filter.h
index eca229752cbef..5cc7a82ec8322 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1278,6 +1278,8 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog,
const struct bpf_insn *insn, bool extra_pass,
u64 *func_addr, bool *func_addr_fixed);
+const char *bpf_jit_get_prog_name(struct bpf_prog *prog);
+
struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp);
void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 61613785bdd0f..29c0225c14aa9 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1297,6 +1297,13 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog,
return 0;
}
+const char *bpf_jit_get_prog_name(struct bpf_prog *prog)
+{
+ if (prog->aux->ksym.prog)
+ return prog->aux->ksym.name;
+ return prog->aux->name;
+}
+
static int bpf_jit_blind_insn(const struct bpf_insn *from,
const struct bpf_insn *aux,
struct bpf_insn *to_buff,
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
index 0e4d2ff63ab81..dbe646013811a 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
@@ -7,7 +7,7 @@
char _license[] SEC("license") = "GPL";
-#if defined(__TARGET_ARCH_x86)
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
bool skip __attribute((__section__(".data"))) = false;
#else
bool skip = true;
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
index 58d5d8dc22352..3d89ad7cbe2a9 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
@@ -7,7 +7,7 @@
char _license[] SEC("license") = "GPL";
-#if defined(__TARGET_ARCH_x86)
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
bool skip __attribute((__section__(".data"))) = false;
#else
bool skip = true;
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
index 31e58389bb8b0..b1f6d7e5a8e50 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
@@ -7,7 +7,7 @@
char _license[] SEC("license") = "GPL";
-#if defined(__TARGET_ARCH_x86)
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
bool skip __attribute((__section__(".data"))) = false;
#else
bool skip = true;
diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c
index fc91b414364e0..1ecd34ebde196 100644
--- a/tools/testing/selftests/bpf/progs/verifier_private_stack.c
+++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c
@@ -8,7 +8,7 @@
/* From include/linux/filter.h */
#define MAX_BPF_STACK 512
-#if defined(__TARGET_ARCH_x86)
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
struct elem {
struct bpf_timer t;
@@ -30,6 +30,18 @@ __jited(" movabsq $0x{{.*}}, %r9")
__jited(" addq %gs:{{.*}}, %r9")
__jited(" movl $0x2a, %edi")
__jited(" movq %rdi, -0x100(%r9)")
+__arch_arm64
+__jited(" stp x25, x27, [sp, {{.*}}]!")
+__jited(" mov x27, {{.*}}")
+__jited(" movk x27, {{.*}}, lsl #16")
+__jited(" movk x27, {{.*}}")
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+__jited(" mov x0, #0x2a")
+__jited(" str x0, [x27]")
+__jited("...")
+__jited(" ldp x25, x27, [sp], {{.*}}")
__naked void private_stack_single_prog(void)
{
asm volatile (" \
@@ -45,6 +57,9 @@ __description("No private stack")
__success
__arch_x86_64
__jited(" subq $0x8, %rsp")
+__arch_arm64
+__jited(" mov x25, sp")
+__jited(" sub sp, sp, #0x10")
__naked void no_private_stack_nested(void)
{
asm volatile (" \
@@ -81,6 +96,19 @@ __jited(" pushq %r9")
__jited(" callq 0x{{.*}}")
__jited(" popq %r9")
__jited(" xorl %eax, %eax")
+__arch_arm64
+__jited(" stp x25, x27, [sp, {{.*}}]!")
+__jited(" mov x27, {{.*}}")
+__jited(" movk x27, {{.*}}, lsl #16")
+__jited(" movk x27, {{.*}}")
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+__jited(" mov x0, #0x2a")
+__jited(" str x0, [x27]")
+__jited(" bl {{.*}}")
+__jited("...")
+__jited(" ldp x25, x27, [sp], {{.*}}")
__naked void private_stack_nested_1(void)
{
asm volatile (" \
@@ -131,6 +159,24 @@ __jited(" movq %rdi, -0x200(%r9)")
__jited(" pushq %r9")
__jited(" callq")
__jited(" popq %r9")
+__arch_arm64
+__jited("func #1")
+__jited("...")
+__jited(" stp x25, x27, [sp, {{.*}}]!")
+__jited(" mov x27, {{.*}}")
+__jited(" movk x27, {{.*}}, lsl #16")
+__jited(" movk x27, {{.*}}")
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+__jited(" bl 0x{{.*}}")
+__jited(" add x7, x0, #0x0")
+__jited(" mov x0, #0x2a")
+__jited(" str x0, [x27]")
+__jited(" bl 0x{{.*}}")
+__jited(" add x7, x0, #0x0")
+__jited(" mov x7, #0x0")
+__jited(" ldp x25, x27, [sp], {{.*}}")
__naked void private_stack_callback(void)
{
asm volatile (" \
@@ -154,6 +200,28 @@ __arch_x86_64
__jited(" pushq %r9")
__jited(" callq")
__jited(" popq %r9")
+__arch_arm64
+__jited(" stp x29, x30, [sp, #-0x10]!")
+__jited(" mov x29, sp")
+__jited(" stp xzr, x26, [sp, #-0x10]!")
+__jited(" mov x26, sp")
+__jited(" stp x19, x20, [sp, #-0x10]!")
+__jited(" stp x21, x22, [sp, #-0x10]!")
+__jited(" stp x23, x24, [sp, #-0x10]!")
+__jited(" stp x25, x26, [sp, #-0x10]!")
+__jited(" stp x27, x28, [sp, #-0x10]!")
+__jited(" mov x27, {{.*}}")
+__jited(" movk x27, {{.*}}, lsl #16")
+__jited(" movk x27, {{.*}}")
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+__jited(" mov x0, #0x2a")
+__jited(" str x0, [x27]")
+__jited(" mov x0, #0x0")
+__jited(" bl 0x{{.*}}")
+__jited(" add x7, x0, #0x0")
+__jited(" ldp x27, x28, [sp], #0x10")
int private_stack_exception_main_prog(void)
{
asm volatile (" \
@@ -179,6 +247,19 @@ __jited(" movq %rdi, -0x200(%r9)")
__jited(" pushq %r9")
__jited(" callq")
__jited(" popq %r9")
+__arch_arm64
+__jited(" stp x27, x28, [sp, #-0x10]!")
+__jited(" mov x27, {{.*}}")
+__jited(" movk x27, {{.*}}, lsl #16")
+__jited(" movk x27, {{.*}}")
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
+__jited(" mov x0, #0x2a")
+__jited(" str x0, [x27]")
+__jited(" bl 0x{{.*}}")
+__jited(" add x7, x0, #0x0")
+__jited(" ldp x27, x28, [sp], #0x10")
int private_stack_exception_sub_prog(void)
{
asm volatile (" \
@@ -220,6 +301,10 @@ __description("Private stack, async callback, not nested")
__success __retval(0)
__arch_x86_64
__jited(" movabsq $0x{{.*}}, %r9")
+__arch_arm64
+__jited(" mrs x10, TPIDR_EL{{[0-1]}}")
+__jited(" add x27, x27, x10")
+__jited(" add x25, x27, {{.*}}")
int private_stack_async_callback_1(void)
{
struct bpf_timer *arr_timer;
@@ -241,6 +326,8 @@ __description("Private stack, async callback, potential nesting")
__success __retval(0)
__arch_x86_64
__jited(" subq $0x100, %rsp")
+__arch_arm64
+__jited(" sub sp, sp, #0x100")
int private_stack_async_callback_2(void)
{
struct bpf_timer *arr_timer;
--
2.47.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH bpf-next] bpf, arm64: JIT support for private stack
2025-07-22 17:32 [PATCH bpf-next] bpf, arm64: JIT support for private stack Puranjay Mohan
@ 2025-07-23 15:52 ` kernel test robot
2025-07-23 22:01 ` Yonghong Song
1 sibling, 0 replies; 3+ messages in thread
From: kernel test robot @ 2025-07-23 15:52 UTC (permalink / raw)
To: Puranjay Mohan, Alexei Starovoitov, Daniel Borkmann,
Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
Hao Luo, Jiri Olsa, Xu Kuohai, Catalin Marinas, Will Deacon,
Mykola Lysenko, bpf
Cc: llvm, oe-kbuild-all
Hi Puranjay,
kernel test robot noticed the following build warnings:
[auto build test WARNING on bpf-next/master]
url: https://github.com/intel-lab-lkp/linux/commits/Puranjay-Mohan/bpf-arm64-JIT-support-for-private-stack/20250723-013449
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link: https://lore.kernel.org/r/20250722173254.3879-1-puranjay%40kernel.org
patch subject: [PATCH bpf-next] bpf, arm64: JIT support for private stack
config: arm64-randconfig-001-20250723 (https://download.01.org/0day-ci/archive/20250723/202507232327.S1FR5cNc-lkp@intel.com/config)
compiler: clang version 22.0.0git (https://github.com/llvm/llvm-project 853c343b45b3e83cc5eeef5a52fc8cc9d8a09252)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250723/202507232327.S1FR5cNc-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202507232327.S1FR5cNc-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> arch/arm64/net/bpf_jit_comp.c:2031:6: warning: variable 'ro_header' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
2031 | if (build_body(&ctx, extra_pass)) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
arch/arm64/net/bpf_jit_comp.c:2160:8: note: uninitialized use occurs here
2160 | if (!ro_header && priv_stack_ptr) {
| ^~~~~~~~~
arch/arm64/net/bpf_jit_comp.c:2031:2: note: remove the 'if' if its condition is always false
2031 | if (build_body(&ctx, extra_pass)) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2032 | prog = orig_prog;
| ~~~~~~~~~~~~~~~~~
2033 | goto out_off;
| ~~~~~~~~~~~~~
2034 | }
| ~
arch/arm64/net/bpf_jit_comp.c:2026:6: warning: variable 'ro_header' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
2026 | if (build_prologue(&ctx, was_classic)) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
arch/arm64/net/bpf_jit_comp.c:2160:8: note: uninitialized use occurs here
2160 | if (!ro_header && priv_stack_ptr) {
| ^~~~~~~~~
arch/arm64/net/bpf_jit_comp.c:2026:2: note: remove the 'if' if its condition is always false
2026 | if (build_prologue(&ctx, was_classic)) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2027 | prog = orig_prog;
| ~~~~~~~~~~~~~~~~~
2028 | goto out_off;
| ~~~~~~~~~~~~~
2029 | }
| ~
arch/arm64/net/bpf_jit_comp.c:2010:6: warning: variable 'ro_header' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
2010 | if (ctx.offset == NULL) {
| ^~~~~~~~~~~~~~~~~~
arch/arm64/net/bpf_jit_comp.c:2160:8: note: uninitialized use occurs here
2160 | if (!ro_header && priv_stack_ptr) {
| ^~~~~~~~~
arch/arm64/net/bpf_jit_comp.c:2010:2: note: remove the 'if' if its condition is always false
2010 | if (ctx.offset == NULL) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~
2011 | prog = orig_prog;
| ~~~~~~~~~~~~~~~~~
2012 | goto out_off;
| ~~~~~~~~~~~~~
2013 | }
| ~
arch/arm64/net/bpf_jit_comp.c:1942:37: note: initialize the variable 'ro_header' to silence this warning
1942 | struct bpf_binary_header *ro_header;
| ^
| = NULL
3 warnings generated.
vim +2031 arch/arm64/net/bpf_jit_comp.c
db496944fdaaf2 Alexei Starovoitov 2017-12-14 1936
d1c55ab5e41fcd Daniel Borkmann 2016-05-13 1937 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
e54bcde3d69d40 Zi Shen Lim 2014-08-26 1938 {
b2ad54e1533e91 Xu Kuohai 2022-07-11 1939 int image_size, prog_size, extable_size, extable_align, extable_offset;
26eb042ee4c784 Daniel Borkmann 2016-05-13 1940 struct bpf_prog *tmp, *orig_prog = prog;
b569c1c622c5e6 Daniel Borkmann 2014-09-16 1941 struct bpf_binary_header *header;
1dad391daef129 Puranjay Mohan 2024-02-28 1942 struct bpf_binary_header *ro_header;
db496944fdaaf2 Alexei Starovoitov 2017-12-14 1943 struct arm64_jit_data *jit_data;
291f131eb536b5 Puranjay Mohan 2025-07-22 1944 void __percpu *priv_stack_ptr = NULL;
56ea6a8b4949c6 Daniel Borkmann 2018-05-14 1945 bool was_classic = bpf_prog_was_classic(prog);
291f131eb536b5 Puranjay Mohan 2025-07-22 1946 int priv_stack_alloc_sz;
26eb042ee4c784 Daniel Borkmann 2016-05-13 1947 bool tmp_blinded = false;
db496944fdaaf2 Alexei Starovoitov 2017-12-14 1948 bool extra_pass = false;
e54bcde3d69d40 Zi Shen Lim 2014-08-26 1949 struct jit_ctx ctx;
b569c1c622c5e6 Daniel Borkmann 2014-09-16 1950 u8 *image_ptr;
1dad391daef129 Puranjay Mohan 2024-02-28 1951 u8 *ro_image_ptr;
ddbe9ec55039dd Xu Kuohai 2024-09-03 1952 int body_idx;
ddbe9ec55039dd Xu Kuohai 2024-09-03 1953 int exentry_idx;
e54bcde3d69d40 Zi Shen Lim 2014-08-26 1954
60b58afc96c9df Alexei Starovoitov 2017-12-14 1955 if (!prog->jit_requested)
26eb042ee4c784 Daniel Borkmann 2016-05-13 1956 return orig_prog;
26eb042ee4c784 Daniel Borkmann 2016-05-13 1957
26eb042ee4c784 Daniel Borkmann 2016-05-13 1958 tmp = bpf_jit_blind_constants(prog);
26eb042ee4c784 Daniel Borkmann 2016-05-13 1959 /* If blinding was requested and we failed during blinding,
26eb042ee4c784 Daniel Borkmann 2016-05-13 1960 * we must fall back to the interpreter.
26eb042ee4c784 Daniel Borkmann 2016-05-13 1961 */
26eb042ee4c784 Daniel Borkmann 2016-05-13 1962 if (IS_ERR(tmp))
26eb042ee4c784 Daniel Borkmann 2016-05-13 1963 return orig_prog;
26eb042ee4c784 Daniel Borkmann 2016-05-13 1964 if (tmp != prog) {
26eb042ee4c784 Daniel Borkmann 2016-05-13 1965 tmp_blinded = true;
26eb042ee4c784 Daniel Borkmann 2016-05-13 1966 prog = tmp;
26eb042ee4c784 Daniel Borkmann 2016-05-13 1967 }
e54bcde3d69d40 Zi Shen Lim 2014-08-26 1968
db496944fdaaf2 Alexei Starovoitov 2017-12-14 1969 jit_data = prog->aux->jit_data;
db496944fdaaf2 Alexei Starovoitov 2017-12-14 1970 if (!jit_data) {
db496944fdaaf2 Alexei Starovoitov 2017-12-14 1971 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
db496944fdaaf2 Alexei Starovoitov 2017-12-14 1972 if (!jit_data) {
db496944fdaaf2 Alexei Starovoitov 2017-12-14 1973 prog = orig_prog;
db496944fdaaf2 Alexei Starovoitov 2017-12-14 1974 goto out;
db496944fdaaf2 Alexei Starovoitov 2017-12-14 1975 }
db496944fdaaf2 Alexei Starovoitov 2017-12-14 1976 prog->aux->jit_data = jit_data;
db496944fdaaf2 Alexei Starovoitov 2017-12-14 1977 }
291f131eb536b5 Puranjay Mohan 2025-07-22 1978 priv_stack_ptr = prog->aux->priv_stack_ptr;
291f131eb536b5 Puranjay Mohan 2025-07-22 1979 if (!priv_stack_ptr && prog->aux->jits_use_priv_stack) {
291f131eb536b5 Puranjay Mohan 2025-07-22 1980 /* Allocate actual private stack size with verifier-calculated
291f131eb536b5 Puranjay Mohan 2025-07-22 1981 * stack size plus two memory guards to protect overflow and
291f131eb536b5 Puranjay Mohan 2025-07-22 1982 * underflow.
291f131eb536b5 Puranjay Mohan 2025-07-22 1983 */
291f131eb536b5 Puranjay Mohan 2025-07-22 1984 priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
291f131eb536b5 Puranjay Mohan 2025-07-22 1985 2 * PRIV_STACK_GUARD_SZ;
291f131eb536b5 Puranjay Mohan 2025-07-22 1986 priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 16, GFP_KERNEL);
291f131eb536b5 Puranjay Mohan 2025-07-22 1987 if (!priv_stack_ptr) {
291f131eb536b5 Puranjay Mohan 2025-07-22 1988 prog = orig_prog;
291f131eb536b5 Puranjay Mohan 2025-07-22 1989 goto out_priv_stack;
291f131eb536b5 Puranjay Mohan 2025-07-22 1990 }
291f131eb536b5 Puranjay Mohan 2025-07-22 1991
291f131eb536b5 Puranjay Mohan 2025-07-22 1992 priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_sz);
291f131eb536b5 Puranjay Mohan 2025-07-22 1993 prog->aux->priv_stack_ptr = priv_stack_ptr;
291f131eb536b5 Puranjay Mohan 2025-07-22 1994 }
db496944fdaaf2 Alexei Starovoitov 2017-12-14 1995 if (jit_data->ctx.offset) {
db496944fdaaf2 Alexei Starovoitov 2017-12-14 1996 ctx = jit_data->ctx;
1dad391daef129 Puranjay Mohan 2024-02-28 1997 ro_image_ptr = jit_data->ro_image;
1dad391daef129 Puranjay Mohan 2024-02-28 1998 ro_header = jit_data->ro_header;
db496944fdaaf2 Alexei Starovoitov 2017-12-14 1999 header = jit_data->header;
1dad391daef129 Puranjay Mohan 2024-02-28 2000 image_ptr = (void *)header + ((void *)ro_image_ptr
1dad391daef129 Puranjay Mohan 2024-02-28 2001 - (void *)ro_header);
db496944fdaaf2 Alexei Starovoitov 2017-12-14 2002 extra_pass = true;
800834285361dc Jean-Philippe Brucker 2020-07-28 2003 prog_size = sizeof(u32) * ctx.idx;
db496944fdaaf2 Alexei Starovoitov 2017-12-14 2004 goto skip_init_ctx;
db496944fdaaf2 Alexei Starovoitov 2017-12-14 2005 }
e54bcde3d69d40 Zi Shen Lim 2014-08-26 2006 memset(&ctx, 0, sizeof(ctx));
e54bcde3d69d40 Zi Shen Lim 2014-08-26 2007 ctx.prog = prog;
e54bcde3d69d40 Zi Shen Lim 2014-08-26 2008
19f68ed6dc90c9 Aijun Sun 2022-08-04 2009 ctx.offset = kvcalloc(prog->len + 1, sizeof(int), GFP_KERNEL);
26eb042ee4c784 Daniel Borkmann 2016-05-13 2010 if (ctx.offset == NULL) {
26eb042ee4c784 Daniel Borkmann 2016-05-13 2011 prog = orig_prog;
db496944fdaaf2 Alexei Starovoitov 2017-12-14 2012 goto out_off;
26eb042ee4c784 Daniel Borkmann 2016-05-13 2013 }
e54bcde3d69d40 Zi Shen Lim 2014-08-26 2014
4dd31243e30843 Puranjay Mohan 2024-03-25 2015 ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
5d4fa9ec5643a5 Xu Kuohai 2024-08-26 2016 ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);
5b3d19b9bd4080 Xu Kuohai 2022-03-21 2017
291f131eb536b5 Puranjay Mohan 2025-07-22 2018 if (priv_stack_ptr)
291f131eb536b5 Puranjay Mohan 2025-07-22 2019 ctx.priv_sp_used = true;
291f131eb536b5 Puranjay Mohan 2025-07-22 2020
ddbe9ec55039dd Xu Kuohai 2024-09-03 2021 /* Pass 1: Estimate the maximum image size.
68e4f238b0e9d3 Hou Tao 2022-02-26 2022 *
68e4f238b0e9d3 Hou Tao 2022-02-26 2023 * BPF line info needs ctx->offset[i] to be the offset of
68e4f238b0e9d3 Hou Tao 2022-02-26 2024 * instruction[i] in jited image, so build prologue first.
68e4f238b0e9d3 Hou Tao 2022-02-26 2025 */
5d4fa9ec5643a5 Xu Kuohai 2024-08-26 2026 if (build_prologue(&ctx, was_classic)) {
26eb042ee4c784 Daniel Borkmann 2016-05-13 2027 prog = orig_prog;
26eb042ee4c784 Daniel Borkmann 2016-05-13 2028 goto out_off;
26eb042ee4c784 Daniel Borkmann 2016-05-13 2029 }
e54bcde3d69d40 Zi Shen Lim 2014-08-26 2030
68e4f238b0e9d3 Hou Tao 2022-02-26 @2031 if (build_body(&ctx, extra_pass)) {
ddb55992b04d97 Zi Shen Lim 2016-06-08 2032 prog = orig_prog;
ddb55992b04d97 Zi Shen Lim 2016-06-08 2033 goto out_off;
ddb55992b04d97 Zi Shen Lim 2016-06-08 2034 }
51c9fbb1b146f3 Zi Shen Lim 2014-12-03 2035
51c9fbb1b146f3 Zi Shen Lim 2014-12-03 2036 ctx.epilogue_offset = ctx.idx;
0dfefc2ea2f29c James Morse 2021-12-09 2037 build_epilogue(&ctx, was_classic);
b2ad54e1533e91 Xu Kuohai 2022-07-11 2038 build_plt(&ctx);
e54bcde3d69d40 Zi Shen Lim 2014-08-26 2039
b2ad54e1533e91 Xu Kuohai 2022-07-11 2040 extable_align = __alignof__(struct exception_table_entry);
800834285361dc Jean-Philippe Brucker 2020-07-28 2041 extable_size = prog->aux->num_exentries *
800834285361dc Jean-Philippe Brucker 2020-07-28 2042 sizeof(struct exception_table_entry);
800834285361dc Jean-Philippe Brucker 2020-07-28 2043
ddbe9ec55039dd Xu Kuohai 2024-09-03 2044 /* Now we know the maximum image size. */
800834285361dc Jean-Philippe Brucker 2020-07-28 2045 prog_size = sizeof(u32) * ctx.idx;
b2ad54e1533e91 Xu Kuohai 2022-07-11 2046 /* also allocate space for plt target */
b2ad54e1533e91 Xu Kuohai 2022-07-11 2047 extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align);
b2ad54e1533e91 Xu Kuohai 2022-07-11 2048 image_size = extable_offset + extable_size;
1dad391daef129 Puranjay Mohan 2024-02-28 2049 ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr,
1dad391daef129 Puranjay Mohan 2024-02-28 2050 sizeof(u32), &header, &image_ptr,
1dad391daef129 Puranjay Mohan 2024-02-28 2051 jit_fill_hole);
1dad391daef129 Puranjay Mohan 2024-02-28 2052 if (!ro_header) {
26eb042ee4c784 Daniel Borkmann 2016-05-13 2053 prog = orig_prog;
26eb042ee4c784 Daniel Borkmann 2016-05-13 2054 goto out_off;
26eb042ee4c784 Daniel Borkmann 2016-05-13 2055 }
e54bcde3d69d40 Zi Shen Lim 2014-08-26 2056
ddbe9ec55039dd Xu Kuohai 2024-09-03 2057 /* Pass 2: Determine jited position and result for each instruction */
e54bcde3d69d40 Zi Shen Lim 2014-08-26 2058
1dad391daef129 Puranjay Mohan 2024-02-28 2059 /*
1dad391daef129 Puranjay Mohan 2024-02-28 2060 * Use the image(RW) for writing the JITed instructions. But also save
1dad391daef129 Puranjay Mohan 2024-02-28 2061 * the ro_image(RX) for calculating the offsets in the image. The RW
1dad391daef129 Puranjay Mohan 2024-02-28 2062 * image will be later copied to the RX image from where the program
1dad391daef129 Puranjay Mohan 2024-02-28 2063 * will run. The bpf_jit_binary_pack_finalize() will do this copy in the
1dad391daef129 Puranjay Mohan 2024-02-28 2064 * final step.
1dad391daef129 Puranjay Mohan 2024-02-28 2065 */
425e1ed73e6574 Luc Van Oostenryck 2017-06-28 2066 ctx.image = (__le32 *)image_ptr;
1dad391daef129 Puranjay Mohan 2024-02-28 2067 ctx.ro_image = (__le32 *)ro_image_ptr;
800834285361dc Jean-Philippe Brucker 2020-07-28 2068 if (extable_size)
1dad391daef129 Puranjay Mohan 2024-02-28 2069 prog->aux->extable = (void *)ro_image_ptr + extable_offset;
db496944fdaaf2 Alexei Starovoitov 2017-12-14 2070 skip_init_ctx:
e54bcde3d69d40 Zi Shen Lim 2014-08-26 2071 ctx.idx = 0;
800834285361dc Jean-Philippe Brucker 2020-07-28 2072 ctx.exentry_idx = 0;
ddbe9ec55039dd Xu Kuohai 2024-09-03 2073 ctx.write = true;
b569c1c622c5e6 Daniel Borkmann 2014-09-16 2074
5d4fa9ec5643a5 Xu Kuohai 2024-08-26 2075 build_prologue(&ctx, was_classic);
e54bcde3d69d40 Zi Shen Lim 2014-08-26 2076
ddbe9ec55039dd Xu Kuohai 2024-09-03 2077 /* Record exentry_idx and body_idx before first build_body */
ddbe9ec55039dd Xu Kuohai 2024-09-03 2078 exentry_idx = ctx.exentry_idx;
ddbe9ec55039dd Xu Kuohai 2024-09-03 2079 body_idx = ctx.idx;
ddbe9ec55039dd Xu Kuohai 2024-09-03 2080 /* Dont write body instructions to memory for now */
ddbe9ec55039dd Xu Kuohai 2024-09-03 2081 ctx.write = false;
ddbe9ec55039dd Xu Kuohai 2024-09-03 2082
8c11ea5ce13da0 Daniel Borkmann 2018-11-26 2083 if (build_body(&ctx, extra_pass)) {
26eb042ee4c784 Daniel Borkmann 2016-05-13 2084 prog = orig_prog;
1dad391daef129 Puranjay Mohan 2024-02-28 2085 goto out_free_hdr;
60ef0494f197d4 Daniel Borkmann 2014-09-11 2086 }
e54bcde3d69d40 Zi Shen Lim 2014-08-26 2087
ddbe9ec55039dd Xu Kuohai 2024-09-03 2088 ctx.epilogue_offset = ctx.idx;
ddbe9ec55039dd Xu Kuohai 2024-09-03 2089 ctx.exentry_idx = exentry_idx;
ddbe9ec55039dd Xu Kuohai 2024-09-03 2090 ctx.idx = body_idx;
ddbe9ec55039dd Xu Kuohai 2024-09-03 2091 ctx.write = true;
ddbe9ec55039dd Xu Kuohai 2024-09-03 2092
ddbe9ec55039dd Xu Kuohai 2024-09-03 2093 /* Pass 3: Adjust jump offset and write final image */
ddbe9ec55039dd Xu Kuohai 2024-09-03 2094 if (build_body(&ctx, extra_pass) ||
ddbe9ec55039dd Xu Kuohai 2024-09-03 2095 WARN_ON_ONCE(ctx.idx != ctx.epilogue_offset)) {
ddbe9ec55039dd Xu Kuohai 2024-09-03 2096 prog = orig_prog;
ddbe9ec55039dd Xu Kuohai 2024-09-03 2097 goto out_free_hdr;
ddbe9ec55039dd Xu Kuohai 2024-09-03 2098 }
ddbe9ec55039dd Xu Kuohai 2024-09-03 2099
0dfefc2ea2f29c James Morse 2021-12-09 2100 build_epilogue(&ctx, was_classic);
b2ad54e1533e91 Xu Kuohai 2022-07-11 2101 build_plt(&ctx);
e54bcde3d69d40 Zi Shen Lim 2014-08-26 2102
ddbe9ec55039dd Xu Kuohai 2024-09-03 2103 /* Extra pass to validate JITed code. */
efc9909fdce00a Xu Kuohai 2022-07-11 2104 if (validate_ctx(&ctx)) {
26eb042ee4c784 Daniel Borkmann 2016-05-13 2105 prog = orig_prog;
1dad391daef129 Puranjay Mohan 2024-02-28 2106 goto out_free_hdr;
42ff712bc0c3d7 Zi Shen Lim 2016-01-13 2107 }
42ff712bc0c3d7 Zi Shen Lim 2016-01-13 2108
ddbe9ec55039dd Xu Kuohai 2024-09-03 2109 /* update the real prog size */
ddbe9ec55039dd Xu Kuohai 2024-09-03 2110 prog_size = sizeof(u32) * ctx.idx;
ddbe9ec55039dd Xu Kuohai 2024-09-03 2111
e54bcde3d69d40 Zi Shen Lim 2014-08-26 2112 /* And we're done. */
e54bcde3d69d40 Zi Shen Lim 2014-08-26 2113 if (bpf_jit_enable > 1)
800834285361dc Jean-Philippe Brucker 2020-07-28 2114 bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
e54bcde3d69d40 Zi Shen Lim 2014-08-26 2115
db496944fdaaf2 Alexei Starovoitov 2017-12-14 2116 if (!prog->is_func || extra_pass) {
ddbe9ec55039dd Xu Kuohai 2024-09-03 2117 /* The jited image may shrink since the jited result for
ddbe9ec55039dd Xu Kuohai 2024-09-03 2118 * BPF_CALL to subprog may be changed from indirect call
ddbe9ec55039dd Xu Kuohai 2024-09-03 2119 * to direct call.
ddbe9ec55039dd Xu Kuohai 2024-09-03 2120 */
ddbe9ec55039dd Xu Kuohai 2024-09-03 2121 if (extra_pass && ctx.idx > jit_data->ctx.idx) {
ddbe9ec55039dd Xu Kuohai 2024-09-03 2122 pr_err_once("multi-func JIT bug %d > %d\n",
db496944fdaaf2 Alexei Starovoitov 2017-12-14 2123 ctx.idx, jit_data->ctx.idx);
db496944fdaaf2 Alexei Starovoitov 2017-12-14 2124 prog->bpf_func = NULL;
db496944fdaaf2 Alexei Starovoitov 2017-12-14 2125 prog->jited = 0;
10f3b29c65bb2f Eric Dumazet 2022-05-31 2126 prog->jited_len = 0;
1dad391daef129 Puranjay Mohan 2024-02-28 2127 goto out_free_hdr;
1dad391daef129 Puranjay Mohan 2024-02-28 2128 }
9919c5c98cb25d Rafael Passos 2024-06-14 2129 if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) {
1dad391daef129 Puranjay Mohan 2024-02-28 2130 /* ro_header has been freed */
1dad391daef129 Puranjay Mohan 2024-02-28 2131 ro_header = NULL;
1dad391daef129 Puranjay Mohan 2024-02-28 2132 prog = orig_prog;
db496944fdaaf2 Alexei Starovoitov 2017-12-14 2133 goto out_off;
db496944fdaaf2 Alexei Starovoitov 2017-12-14 2134 }
1dad391daef129 Puranjay Mohan 2024-02-28 2135 /*
1dad391daef129 Puranjay Mohan 2024-02-28 2136 * The instructions have now been copied to the ROX region from
1dad391daef129 Puranjay Mohan 2024-02-28 2137 * where they will execute. Now the data cache has to be cleaned to
1dad391daef129 Puranjay Mohan 2024-02-28 2138 * the PoU and the I-cache has to be invalidated for the VAs.
1dad391daef129 Puranjay Mohan 2024-02-28 2139 */
1dad391daef129 Puranjay Mohan 2024-02-28 2140 bpf_flush_icache(ro_header, ctx.ro_image + ctx.idx);
db496944fdaaf2 Alexei Starovoitov 2017-12-14 2141 } else {
db496944fdaaf2 Alexei Starovoitov 2017-12-14 2142 jit_data->ctx = ctx;
1dad391daef129 Puranjay Mohan 2024-02-28 2143 jit_data->ro_image = ro_image_ptr;
db496944fdaaf2 Alexei Starovoitov 2017-12-14 2144 jit_data->header = header;
1dad391daef129 Puranjay Mohan 2024-02-28 2145 jit_data->ro_header = ro_header;
db496944fdaaf2 Alexei Starovoitov 2017-12-14 2146 }
1dad391daef129 Puranjay Mohan 2024-02-28 2147
1dad391daef129 Puranjay Mohan 2024-02-28 2148 prog->bpf_func = (void *)ctx.ro_image;
a91263d520246b Daniel Borkmann 2015-09-30 2149 prog->jited = 1;
800834285361dc Jean-Philippe Brucker 2020-07-28 2150 prog->jited_len = prog_size;
26eb042ee4c784 Daniel Borkmann 2016-05-13 2151
db496944fdaaf2 Alexei Starovoitov 2017-12-14 2152 if (!prog->is_func || extra_pass) {
dda7596c109fc3 Hou Tao 2022-02-26 2153 int i;
dda7596c109fc3 Hou Tao 2022-02-26 2154
dda7596c109fc3 Hou Tao 2022-02-26 2155 /* offset[prog->len] is the size of program */
dda7596c109fc3 Hou Tao 2022-02-26 2156 for (i = 0; i <= prog->len; i++)
dda7596c109fc3 Hou Tao 2022-02-26 2157 ctx.offset[i] *= AARCH64_INSN_SIZE;
32f6865c7aa3c4 Ilias Apalodimas 2020-09-17 2158 bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
26eb042ee4c784 Daniel Borkmann 2016-05-13 2159 out_off:
291f131eb536b5 Puranjay Mohan 2025-07-22 2160 if (!ro_header && priv_stack_ptr) {
291f131eb536b5 Puranjay Mohan 2025-07-22 2161 free_percpu(priv_stack_ptr);
291f131eb536b5 Puranjay Mohan 2025-07-22 2162 prog->aux->priv_stack_ptr = NULL;
291f131eb536b5 Puranjay Mohan 2025-07-22 2163 }
19f68ed6dc90c9 Aijun Sun 2022-08-04 2164 kvfree(ctx.offset);
291f131eb536b5 Puranjay Mohan 2025-07-22 2165 out_priv_stack:
db496944fdaaf2 Alexei Starovoitov 2017-12-14 2166 kfree(jit_data);
db496944fdaaf2 Alexei Starovoitov 2017-12-14 2167 prog->aux->jit_data = NULL;
db496944fdaaf2 Alexei Starovoitov 2017-12-14 2168 }
26eb042ee4c784 Daniel Borkmann 2016-05-13 2169 out:
26eb042ee4c784 Daniel Borkmann 2016-05-13 2170 if (tmp_blinded)
26eb042ee4c784 Daniel Borkmann 2016-05-13 2171 bpf_jit_prog_release_other(prog, prog == orig_prog ?
26eb042ee4c784 Daniel Borkmann 2016-05-13 2172 tmp : orig_prog);
d1c55ab5e41fcd Daniel Borkmann 2016-05-13 2173 return prog;
1dad391daef129 Puranjay Mohan 2024-02-28 2174
1dad391daef129 Puranjay Mohan 2024-02-28 2175 out_free_hdr:
1dad391daef129 Puranjay Mohan 2024-02-28 2176 if (header) {
1dad391daef129 Puranjay Mohan 2024-02-28 2177 bpf_arch_text_copy(&ro_header->size, &header->size,
1dad391daef129 Puranjay Mohan 2024-02-28 2178 sizeof(header->size));
1dad391daef129 Puranjay Mohan 2024-02-28 2179 bpf_jit_binary_pack_free(ro_header, header);
1dad391daef129 Puranjay Mohan 2024-02-28 2180 }
1dad391daef129 Puranjay Mohan 2024-02-28 2181 goto out_off;
e54bcde3d69d40 Zi Shen Lim 2014-08-26 2182 }
91fc957c9b1d6c Ard Biesheuvel 2018-11-23 2183
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH bpf-next] bpf, arm64: JIT support for private stack
2025-07-22 17:32 [PATCH bpf-next] bpf, arm64: JIT support for private stack Puranjay Mohan
2025-07-23 15:52 ` kernel test robot
@ 2025-07-23 22:01 ` Yonghong Song
1 sibling, 0 replies; 3+ messages in thread
From: Yonghong Song @ 2025-07-23 22:01 UTC (permalink / raw)
To: Puranjay Mohan, Alexei Starovoitov, Daniel Borkmann,
Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
Xu Kuohai, Catalin Marinas, Will Deacon, Mykola Lysenko, bpf
On 7/22/25 10:32 AM, Puranjay Mohan wrote:
> The private stack is allocated in bpf_int_jit_compile() with 16-byte
> alignment. It includes additional guard regions to detect stack
> overflows and underflows at runtime.
>
> Memory layout:
>
> +------------------------------------------------------+
> | |
> | 16 bytes padding (overflow guard - stack top) |
> | [ detects writes beyond top of stack ] |
> BPF FP ->+------------------------------------------------------+
> | |
> | BPF private stack (sized by verifier) |
> | [ 16-byte aligned ] |
> | |
> BPF PRIV SP ->+------------------------------------------------------+
> | |
> | 16 bytes padding (underflow guard - stack bottom) |
> | [ detects accesses before start of stack ] |
> | |
> +------------------------------------------------------+
>
> On detection of an overflow or underflow, the kernel emits messages
> like:
> BPF private stack overflow/underflow detected for prog <prog_name>
>
> After commit bd737fcb6485 ("bpf, arm64: Get rid of fpb"), Jited BPF
> programs use the stack in two ways:
> 1. Via the BPF frame pointer (top of stack), using negative offsets.
> 2. Via the stack pointer (bottom of stack), using positive offsets in
> LDR/STR instructions.
>
> When a private stack is used, ARM64 callee-saved register x27 replaces
> the stack pointer. The BPF frame pointer usage remains unchanged; but it
> now points to the top of the private stack.
>
> Relevant tests:
>
> #415/1 struct_ops_private_stack/private_stack:OK
> #415/2 struct_ops_private_stack/private_stack_fail:OK
> #415/3 struct_ops_private_stack/private_stack_recur:OK
> #415 struct_ops_private_stack:OK
> #549/1 verifier_private_stack/Private stack, single prog:OK
> #549/2 verifier_private_stack/Private stack, subtree > MAX_BPF_STACK:OK
> #549/3 verifier_private_stack/No private stack:OK
> #549/4 verifier_private_stack/Private stack, callback:OK
> #549/5 verifier_private_stack/Private stack, exception in main prog:OK
> #549/6 verifier_private_stack/Private stack, exception in subprog:OK
> #549/7 verifier_private_stack/Private stack, async callback, not nested:OK
> #549/8 verifier_private_stack/Private stack, async callback, potential nesting:OK
> #549 verifier_private_stack:OK
> Summary: 2/11 PASSED, 0 SKIPPED, 0 FAILED
>
> Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
> ---
> Note: This needs the fix in [1] to work properly.
> [1] https://lore.kernel.org/all/20250722133410.54161-2-puranjay@kernel.org/
> ---
> arch/arm64/net/bpf_jit_comp.c | 131 ++++++++++++++++--
> arch/x86/net/bpf_jit_comp.c | 9 +-
> include/linux/filter.h | 2 +
> kernel/bpf/core.c | 7 +
> .../bpf/progs/struct_ops_private_stack.c | 2 +-
> .../bpf/progs/struct_ops_private_stack_fail.c | 2 +-
> .../progs/struct_ops_private_stack_recur.c | 2 +-
> .../bpf/progs/verifier_private_stack.c | 89 +++++++++++-
> 8 files changed, 221 insertions(+), 23 deletions(-)
>
> diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
> index 89b1b8c248c62..5a0170536c8d4 100644
> --- a/arch/arm64/net/bpf_jit_comp.c
> +++ b/arch/arm64/net/bpf_jit_comp.c
> @@ -30,6 +30,7 @@
> #define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
> #define TCCNT_PTR (MAX_BPF_JIT_REG + 2)
> #define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
> +#define PRIVATE_SP (MAX_BPF_JIT_REG + 4)
> #define ARENA_VM_START (MAX_BPF_JIT_REG + 5)
>
> #define check_imm(bits, imm) do { \
> @@ -68,6 +69,8 @@ static const int bpf2a64[] = {
> [TCCNT_PTR] = A64_R(26),
> /* temporary register for blinding constants */
> [BPF_REG_AX] = A64_R(9),
> + /* callee saved register for private stack pointer */
> + [PRIVATE_SP] = A64_R(27),
> /* callee saved register for kern_vm_start address */
> [ARENA_VM_START] = A64_R(28),
> };
> @@ -86,6 +89,7 @@ struct jit_ctx {
> u64 user_vm_start;
> u64 arena_vm_start;
> bool fp_used;
> + bool priv_sp_used;
> bool write;
> };
>
> @@ -98,6 +102,10 @@ struct bpf_plt {
> #define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target)
> #define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target)
>
> +/* Memory size/value to protect private stack overflow/underflow */
> +#define PRIV_STACK_GUARD_SZ 16
> +#define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL
> +
> static inline void emit(const u32 insn, struct jit_ctx *ctx)
> {
> if (ctx->image != NULL && ctx->write)
> @@ -387,8 +395,11 @@ static void find_used_callee_regs(struct jit_ctx *ctx)
> if (reg_used & 8)
> ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_9];
>
> - if (reg_used & 16)
> + if (reg_used & 16) {
> ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_FP];
> + if (ctx->priv_sp_used)
> + ctx->used_callee_reg[i++] = bpf2a64[PRIVATE_SP];
> + }
>
> if (ctx->arena_vm_start)
> ctx->used_callee_reg[i++] = bpf2a64[ARENA_VM_START];
> @@ -461,6 +472,19 @@ static void pop_callee_regs(struct jit_ctx *ctx)
> }
> }
>
[...]
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 40e1b3b9634fe..7e3fca1646203 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -3501,13 +3501,6 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
> return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs, image, buf);
> }
>
> -static const char *bpf_get_prog_name(struct bpf_prog *prog)
> -{
> - if (prog->aux->ksym.prog)
> - return prog->aux->ksym.name;
> - return prog->aux->name;
> -}
> -
> static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size)
> {
> int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
> @@ -3531,7 +3524,7 @@ static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size
> if (stack_ptr[0] != PRIV_STACK_GUARD_VAL ||
> stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL) {
> pr_err("BPF private stack overflow/underflow detected for prog %sx\n",
> - bpf_get_prog_name(prog));
> + bpf_jit_get_prog_name(prog));
> break;
> }
> }
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index eca229752cbef..5cc7a82ec8322 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -1278,6 +1278,8 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog,
> const struct bpf_insn *insn, bool extra_pass,
> u64 *func_addr, bool *func_addr_fixed);
>
> +const char *bpf_jit_get_prog_name(struct bpf_prog *prog);
> +
> struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp);
> void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other);
>
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 61613785bdd0f..29c0225c14aa9 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -1297,6 +1297,13 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog,
> return 0;
> }
>
> +const char *bpf_jit_get_prog_name(struct bpf_prog *prog)
> +{
> + if (prog->aux->ksym.prog)
> + return prog->aux->ksym.name;
> + return prog->aux->name;
> +}
This is a refactoring and should be a separate patch.
> +
> static int bpf_jit_blind_insn(const struct bpf_insn *from,
> const struct bpf_insn *aux,
> struct bpf_insn *to_buff,
> diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
> index 0e4d2ff63ab81..dbe646013811a 100644
> --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
> +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
The selftests should be another separate patch.
> @@ -7,7 +7,7 @@
>
> char _license[] SEC("license") = "GPL";
>
> -#if defined(__TARGET_ARCH_x86)
> +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
> bool skip __attribute((__section__(".data"))) = false;
> #else
> bool skip = true;
> diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
> index 58d5d8dc22352..3d89ad7cbe2a9 100644
> --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
> +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
> @@ -7,7 +7,7 @@
>
> char _license[] SEC("license") = "GPL";
>
> -#if defined(__TARGET_ARCH_x86)
> +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
> bool skip __attribute((__section__(".data"))) = false;
> #else
> bool skip = true;
[...]
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2025-07-23 22:01 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-22 17:32 [PATCH bpf-next] bpf, arm64: JIT support for private stack Puranjay Mohan
2025-07-23 15:52 ` kernel test robot
2025-07-23 22:01 ` Yonghong Song
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.