* [PATCH v5 1/6] powerpc/bpf: fix alignment of long branch trampoline address
2026-05-19 23:38 [PATCH v5 0/6] powerpc/bpf: Add support for verifier selftest adubey
@ 2026-05-19 23:38 ` adubey
2026-05-19 23:38 ` [PATCH v5 2/6] powerpc/bpf: Move out dummy_tramp_addr after Long branch stub adubey
` (4 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: adubey @ 2026-05-19 23:38 UTC (permalink / raw)
To: bpf
Cc: hbathini, linuxppc-dev, maddy, ast, andrii, daniel, shuah,
linux-kselftest, stable, Abhishek Dubey
From: Abhishek Dubey <adubey@linux.ibm.com>
Ensure the dummy trampoline address field present between the OOL stub
and the long branch stub is 8-byte aligned, for memory compatibility
when content loaded to a register.
Reported-by: Hari Bathini <hbathini@linux.ibm.com>
Fixes: d243b62b7bd3 ("powerpc64/bpf: Add support for bpf trampolines")
Cc: stable@vger.kernel.org
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
---
arch/powerpc/net/bpf_jit.h | 4 ++--
arch/powerpc/net/bpf_jit_comp.c | 38 +++++++++++++++++++++++++++----
arch/powerpc/net/bpf_jit_comp32.c | 4 ++--
arch/powerpc/net/bpf_jit_comp64.c | 4 ++--
4 files changed, 39 insertions(+), 11 deletions(-)
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index f32de8704d4d..71e6e7d01057 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -214,8 +214,8 @@ int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
u32 *addrs, int pass, bool extra_pass);
void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
-void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
-void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx);
+void bpf_jit_build_epilogue(u32 *image, u32 *fimage, struct codegen_context *ctx);
+void bpf_jit_build_fentry_stubs(u32 *image, u32 *fimage, struct codegen_context *ctx);
void bpf_jit_realloc_regs(struct codegen_context *ctx);
int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr);
void prepare_for_fsession_fentry(u32 *image, struct codegen_context *ctx, int cookie_cnt,
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 53ab97ad6074..00b86ed97cb5 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -49,11 +49,38 @@ asm (
" .popsection ;"
);
-void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx)
+void bpf_jit_build_fentry_stubs(u32 *image, u32 *fimage, struct codegen_context *ctx)
{
int ool_stub_idx, long_branch_stub_idx;
+ int ool_instrs;
/*
+ * In the final pass, align the mis-aligned dummy_tramp_addr field
+ * in the fimage. The alignment NOP must appear before OOL stub,
+ * to make ool_stub_idx & long_branch_stub_idx constant from end.
+ *
+ * dummy_tramp_addr must be 8-byte aligned for load-register
+ * compatibility. Since fimage is guaranteed >= 8-byte aligned
+ * by the allocator, alignment depends only on the instruction
+ * count offset. The OOL stub has 4 instructions (with
+ * CONFIG_PPC_FTRACE_OUT_OF_LINE) or 3 instructions (without)
+ * before dummy_tramp_addr.
+ *
+ * Emit a NOP here if (ctx->idx + ool_instrs) is odd, so that
+ * dummy_tramp_addr lands at an even instruction offset (== 8-byte
+ * aligned from an 8-byte aligned base).
+ *
+ * In pass=0 when image==NULL, conservatively account for space
+ * required to accommodate alignment NOP. In case final pass skips
+ * emitting alignment NOP, the image buffer have 4 spare bytes and
+ * jited_len signifies correct program size.
+ */
+
+ ool_instrs = IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) ? 4 : 3;
+ if (!image || ((ctx->idx + ool_instrs) & 1))
+ EMIT(PPC_RAW_NOP());
+
+ /* nop // optional, for alignment of dummy_tramp_addr
* Out-of-line stub:
* mflr r0
* [b|bl] tramp
@@ -70,7 +97,7 @@ void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx)
/*
* Long branch stub:
- * .long <dummy_tramp_addr>
+ * .long <dummy_tramp_addr> // 8-byte aligned
* mflr r11
* bcl 20,31,$+4
* mflr r12
@@ -81,6 +108,7 @@ void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx)
*/
if (image)
*((unsigned long *)&image[ctx->idx]) = (unsigned long)dummy_tramp;
+
ctx->idx += SZL / 4;
long_branch_stub_idx = ctx->idx;
EMIT(PPC_RAW_MFLR(_R11));
@@ -107,7 +135,7 @@ int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg,
PPC_JMP(ctx->alt_exit_addr);
} else {
ctx->alt_exit_addr = ctx->idx * 4;
- bpf_jit_build_epilogue(image, ctx);
+ bpf_jit_build_epilogue(image, NULL, ctx);
}
return 0;
@@ -286,7 +314,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr
*/
bpf_jit_build_prologue(NULL, &cgctx);
addrs[fp->len] = cgctx.idx * 4;
- bpf_jit_build_epilogue(NULL, &cgctx);
+ bpf_jit_build_epilogue(NULL, NULL, &cgctx);
fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4;
extable_len = fp->aux->num_exentries * sizeof(struct exception_table_entry);
@@ -318,7 +346,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_pr
bpf_jit_binary_pack_free(fhdr, hdr);
goto out_err;
}
- bpf_jit_build_epilogue(code_base, &cgctx);
+ bpf_jit_build_epilogue(code_base, fcode_base, &cgctx);
if (bpf_jit_enable > 1)
pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index bfdc50740da8..95bda0dee925 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -229,7 +229,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
}
-void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+void bpf_jit_build_epilogue(u32 *image, u32 *fimage, struct codegen_context *ctx)
{
EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0)));
@@ -237,7 +237,7 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
EMIT(PPC_RAW_BLR());
- bpf_jit_build_fentry_stubs(image, ctx);
+ bpf_jit_build_fentry_stubs(image, fimage, ctx);
}
/* Relative offset needs to be calculated based on final image location */
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index db364d9083e7..885dc8cf55a2 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -398,7 +398,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
}
}
-void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+void bpf_jit_build_epilogue(u32 *image, u32 *fimage, struct codegen_context *ctx)
{
bpf_jit_emit_common_epilogue(image, ctx);
@@ -407,7 +407,7 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
EMIT(PPC_RAW_BLR());
- bpf_jit_build_fentry_stubs(image, ctx);
+ bpf_jit_build_fentry_stubs(image, fimage, ctx);
}
/*
--
2.52.0
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH v5 2/6] powerpc/bpf: Move out dummy_tramp_addr after Long branch stub
2026-05-19 23:38 [PATCH v5 0/6] powerpc/bpf: Add support for verifier selftest adubey
2026-05-19 23:38 ` [PATCH v5 1/6] powerpc/bpf: fix alignment of long branch trampoline address adubey
@ 2026-05-19 23:38 ` adubey
2026-05-19 20:26 ` bot+bpf-ci
2026-05-19 23:38 ` [PATCH v5 3/6] selftest/bpf: Fixing powerpc JIT disassembly failure adubey
` (3 subsequent siblings)
5 siblings, 1 reply; 9+ messages in thread
From: adubey @ 2026-05-19 23:38 UTC (permalink / raw)
To: bpf
Cc: hbathini, linuxppc-dev, maddy, ast, andrii, daniel, shuah,
linux-kselftest, stable, Abhishek Dubey
From: Abhishek Dubey <adubey@linux.ibm.com>
Move the long branch address space to the bottom of the long
branch stub. This allows uninterrupted disassembly until the
last 8 bytes. Exclude these last bytes from the overall
program length to prevent failure in assembly generation.
Also, align dummy_tramp_addr field with 8-byte boundary.
Following is disassembler output for test program with moved down
dummy_tramp_addr field:
.....
.....
pc:68 left:44 a6 03 08 7c : mtlr 0
pc:72 left:40 bc ff ff 4b : b .-68
pc:76 left:36 a6 02 68 7d : mflr 11
pc:80 left:32 05 00 9f 42 : bcl 20, 31, .+4
pc:84 left:28 a6 02 88 7d : mflr 12
pc:88 left:24 14 00 8c e9 : ld 12, 20(12)
pc:92 left:20 a6 03 89 7d : mtctr 12
pc:96 left:16 a6 03 68 7d : mtlr 11
pc:100 left:12 20 04 80 4e : bctr
pc:104 left:8 c0 34 1d 00 :
Failure log:
Can't disasm instruction at offset 104: c0 34 1d 00 00 00 00 c0
Disassembly logic can truncate at 104, ignoring last 8 bytes.
Update the dummy_tramp_addr field offset calculation from the end
of the program to reflect its new location, for bpf_arch_text_poke()
to update the actual trampoline's address in this field.
All BPF trampoline selftests continue to pass with this patch applied.
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
---
arch/powerpc/net/bpf_jit_comp.c | 41 ++++++++++++++++++---------------
1 file changed, 23 insertions(+), 18 deletions(-)
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 00b86ed97cb5..56a923d3908e 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -52,9 +52,10 @@ asm (
void bpf_jit_build_fentry_stubs(u32 *image, u32 *fimage, struct codegen_context *ctx)
{
int ool_stub_idx, long_branch_stub_idx;
- int ool_instrs;
+ int stubs_instrs;
/*
+ * The dummy_tramp_addr field is placed at bottom of Long branch stub.
* In the final pass, align the mis-aligned dummy_tramp_addr field
* in the fimage. The alignment NOP must appear before OOL stub,
* to make ool_stub_idx & long_branch_stub_idx constant from end.
@@ -62,11 +63,11 @@ void bpf_jit_build_fentry_stubs(u32 *image, u32 *fimage, struct codegen_context
* dummy_tramp_addr must be 8-byte aligned for load-register
* compatibility. Since fimage is guaranteed >= 8-byte aligned
* by the allocator, alignment depends only on the instruction
- * count offset. The OOL stub has 4 instructions (with
- * CONFIG_PPC_FTRACE_OUT_OF_LINE) or 3 instructions (without)
+ * count offset. The stubs block has 11 instructions (with
+ * CONFIG_PPC_FTRACE_OUT_OF_LINE) or 10 instructions (without)
* before dummy_tramp_addr.
*
- * Emit a NOP here if (ctx->idx + ool_instrs) is odd, so that
+ * Emit a NOP here if (ctx->idx + stubs_instrs) is odd, so that
* dummy_tramp_addr lands at an even instruction offset (== 8-byte
* aligned from an 8-byte aligned base).
*
@@ -76,8 +77,8 @@ void bpf_jit_build_fentry_stubs(u32 *image, u32 *fimage, struct codegen_context
* jited_len signifies correct program size.
*/
- ool_instrs = IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) ? 4 : 3;
- if (!image || ((ctx->idx + ool_instrs) & 1))
+ stubs_instrs = IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) ? 11 : 10;
+ if (!image || ((ctx->idx + stubs_instrs) & 1))
EMIT(PPC_RAW_NOP());
/* nop // optional, for alignment of dummy_tramp_addr
@@ -97,28 +98,29 @@ void bpf_jit_build_fentry_stubs(u32 *image, u32 *fimage, struct codegen_context
/*
* Long branch stub:
- * .long <dummy_tramp_addr> // 8-byte aligned
* mflr r11
* bcl 20,31,$+4
- * mflr r12
- * ld r12, -8-SZL(r12)
+ * mflr r12 // lr/r12 stores pc of current(this) inst.
+ * ld r12, 20(r12) // offset(dummy_tramp_addr) from prev inst. is 20
* mtctr r12
- * mtlr r11 // needed to retain ftrace ABI
+ * mtlr r11 // needed to retain ftrace ABI
* bctr
+ * .long <dummy_tramp_addr> // 8-byte aligned
*/
- if (image)
- *((unsigned long *)&image[ctx->idx]) = (unsigned long)dummy_tramp;
-
- ctx->idx += SZL / 4;
long_branch_stub_idx = ctx->idx;
EMIT(PPC_RAW_MFLR(_R11));
EMIT(PPC_RAW_BCL4());
EMIT(PPC_RAW_MFLR(_R12));
- EMIT(PPC_RAW_LL(_R12, _R12, -8-SZL));
+ EMIT(PPC_RAW_LL(_R12, _R12, 20));
EMIT(PPC_RAW_MTCTR(_R12));
EMIT(PPC_RAW_MTLR(_R11));
EMIT(PPC_RAW_BCTR());
+ if (image)
+ *((unsigned long *)&image[ctx->idx]) = (unsigned long)dummy_tramp;
+
+ ctx->idx += SZL / 4;
+
if (!bpf_jit_ool_stub) {
bpf_jit_ool_stub = (ctx->idx - ool_stub_idx) * 4;
bpf_jit_long_branch_stub = (ctx->idx - long_branch_stub_idx) * 4;
@@ -1288,6 +1290,7 @@ static void do_isync(void *info __maybe_unused)
* bpf_func:
* [nop|b] ool_stub
* 2. Out-of-line stub:
+ * nop // optional nop for alignment
* ool_stub:
* mflr r0
* [b|bl] <bpf_prog>/<long_branch_stub>
@@ -1295,14 +1298,14 @@ static void do_isync(void *info __maybe_unused)
* b bpf_func + 4
* 3. Long branch stub:
* long_branch_stub:
- * .long <branch_addr>/<dummy_tramp>
* mflr r11
* bcl 20,31,$+4
* mflr r12
- * ld r12, -16(r12)
+ * ld r12, 20(r12)
* mtctr r12
* mtlr r11 // needed to retain ftrace ABI
* bctr
+ * .long <branch_addr>/<dummy_tramp>
*
* dummy_tramp is used to reduce synchronization requirements.
*
@@ -1404,10 +1407,12 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
* 1. Update the address in the long branch stub:
* If new_addr is out of range, we will have to use the long branch stub, so patch new_addr
* here. Otherwise, revert to dummy_tramp, but only if we had patched old_addr here.
+ *
+ * dummy_tramp_addr moved to bottom of long branch stub.
*/
if ((new_addr && !is_offset_in_branch_range(new_addr - ip)) ||
(old_addr && !is_offset_in_branch_range(old_addr - ip)))
- ret = patch_ulong((void *)(bpf_func_end - bpf_jit_long_branch_stub - SZL),
+ ret = patch_ulong((void *)(bpf_func_end - SZL), /* SZL: dummy_tramp_addr offset */
(new_addr && !is_offset_in_branch_range(new_addr - ip)) ?
(unsigned long)new_addr : (unsigned long)dummy_tramp);
if (ret)
--
2.52.0
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH v5 2/6] powerpc/bpf: Move out dummy_tramp_addr after Long branch stub
2026-05-19 23:38 ` [PATCH v5 2/6] powerpc/bpf: Move out dummy_tramp_addr after Long branch stub adubey
@ 2026-05-19 20:26 ` bot+bpf-ci
0 siblings, 0 replies; 9+ messages in thread
From: bot+bpf-ci @ 2026-05-19 20:26 UTC (permalink / raw)
To: adubey, bpf
Cc: hbathini, linuxppc-dev, maddy, ast, andrii, daniel, shuah,
linux-kselftest, stable, adubey, ast, andrii, daniel, martin.lau,
eddyz87, yonghong.song, clm, ihor.solodrai
[-- Attachment #1: Type: text/plain, Size: 2261 bytes --]
> powerpc/bpf: Move out dummy_tramp_addr after Long branch stub
>
> Move the long branch address space to the bottom of the long
> branch stub. This allows uninterrupted disassembly until the
> last 8 bytes. Exclude these last bytes from the overall
> program length to prevent failure in assembly generation.
> Also, align dummy_tramp_addr field with 8-byte boundary.
>
> Following is disassembler output for test program with moved down
> dummy_tramp_addr field:
> .....
> .....
> pc:68 left:44 a6 03 08 7c : mtlr 0
> pc:72 left:40 bc ff ff 4b : b .-68
> pc:76 left:36 a6 02 68 7d : mflr 11
> pc:80 left:32 05 00 9f 42 : bcl 20, 31, .+4
> pc:84 left:28 a6 02 88 7d : mflr 12
> pc:88 left:24 14 00 8c e9 : ld 12, 20(12)
> pc:92 left:20 a6 03 89 7d : mtctr 12
> pc:96 left:16 a6 03 68 7d : mtlr 11
> pc:100 left:12 20 04 80 4e : bctr
> pc:104 left:8 c0 34 1d 00 :
>
> Failure log:
> Can't disasm instruction at offset 104: c0 34 1d 00 00 00 00 c0
> Disassembly logic can truncate at 104, ignoring last 8 bytes.
>
> Update the dummy_tramp_addr field offset calculation from the end
> of the program to reflect its new location, for bpf_arch_text_poke()
> to update the actual trampoline's address in this field.
>
> All BPF trampoline selftests continue to pass with this patch applied.
>
> Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
This looks like a bug fix for the bpf trampoline implementation on
powerpc64. Should this include a Fixes tag?
The bug was introduced in commit d243b62b7bd3 which added support for bpf
trampolines on powerpc64. That commit placed the dummy_tramp_addr field at
the beginning of the long branch stub, causing the disassembly failures and
incorrect offset calculations that this patch fixes.
Suggested:
Fixes: d243b62b7bd3 ("powerpc64/bpf: Add support for bpf trampolines")
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/26121409898
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v5 3/6] selftest/bpf: Fixing powerpc JIT disassembly failure
2026-05-19 23:38 [PATCH v5 0/6] powerpc/bpf: Add support for verifier selftest adubey
2026-05-19 23:38 ` [PATCH v5 1/6] powerpc/bpf: fix alignment of long branch trampoline address adubey
2026-05-19 23:38 ` [PATCH v5 2/6] powerpc/bpf: Move out dummy_tramp_addr after Long branch stub adubey
@ 2026-05-19 23:38 ` adubey
2026-05-19 20:14 ` bot+bpf-ci
2026-05-19 23:38 ` [PATCH v5 4/6] selftest/bpf: Enable verifier selftest for powerpc64 adubey
` (2 subsequent siblings)
5 siblings, 1 reply; 9+ messages in thread
From: adubey @ 2026-05-19 23:38 UTC (permalink / raw)
To: bpf
Cc: hbathini, linuxppc-dev, maddy, ast, andrii, daniel, shuah,
linux-kselftest, stable, Abhishek Dubey
From: Abhishek Dubey <adubey@linux.ibm.com>
Ensure that the trampoline stubs JITed at the tail of the
epilogue do not expose the dummy trampoline address stored
in the last 8 bytes (for both 64-bit and 32-bit PowerPC)
to the disassembly flow. Prevent the disassembler from
ingesting this memory address, as it may occasionally decode
into a seemingly valid but incorrect instruction. Fix this
issue by truncating the last 8 bytes from JITed buffers
before supplying them for disassembly.
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
---
tools/testing/selftests/bpf/jit_disasm_helpers.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/bpf/jit_disasm_helpers.c b/tools/testing/selftests/bpf/jit_disasm_helpers.c
index 364c557c5115..9e6613479145 100644
--- a/tools/testing/selftests/bpf/jit_disasm_helpers.c
+++ b/tools/testing/selftests/bpf/jit_disasm_helpers.c
@@ -170,9 +170,11 @@ int get_jited_program_text(int fd, char *text, size_t text_sz)
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
__u32 jited_funcs, len, pc;
+ __u32 trunc_len = 0;
__u32 *func_lens = NULL;
FILE *text_out = NULL;
uint8_t *image = NULL;
+ char *triple = NULL;
int i, err = 0;
if (!llvm_initialized) {
@@ -216,9 +218,21 @@ int get_jited_program_text(int fd, char *text, size_t text_sz)
if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd #2"))
goto out;
+ /*
+ * last 8 bytes contains dummy_trampoline address in JIT
+ * output for 64-bit and 32-bit powerpc, which can't
+ * disassemble a to valid instruction.
+ */
+ triple = LLVMGetDefaultTargetTriple();
+ if (triple) {
+ if (strstr(triple, "powerpc"))
+ trunc_len = 8;
+ LLVMDisposeMessage(triple);
+ }
+
for (pc = 0, i = 0; i < jited_funcs; ++i) {
fprintf(text_out, "func #%d:\n", i);
- disasm_one_func(text_out, image + pc, func_lens[i]);
+ disasm_one_func(text_out, image + pc, func_lens[i] - trunc_len);
fprintf(text_out, "\n");
pc += func_lens[i];
}
--
2.52.0
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH v5 3/6] selftest/bpf: Fixing powerpc JIT disassembly failure
2026-05-19 23:38 ` [PATCH v5 3/6] selftest/bpf: Fixing powerpc JIT disassembly failure adubey
@ 2026-05-19 20:14 ` bot+bpf-ci
0 siblings, 0 replies; 9+ messages in thread
From: bot+bpf-ci @ 2026-05-19 20:14 UTC (permalink / raw)
To: adubey, bpf
Cc: hbathini, linuxppc-dev, maddy, ast, andrii, daniel, shuah,
linux-kselftest, stable, adubey, ast, andrii, daniel, martin.lau,
eddyz87, yonghong.song, clm, ihor.solodrai
[-- Attachment #1: Type: text/plain, Size: 2503 bytes --]
> diff --git a/tools/testing/selftests/bpf/jit_disasm_helpers.c b/tools/testing/selftests/bpf/jit_disasm_helpers.c
> index 364c557c5115..9e6613479145 100644
> --- a/tools/testing/selftests/bpf/jit_disasm_helpers.c
> +++ b/tools/testing/selftests/bpf/jit_disasm_helpers.c
> @@ -170,9 +170,11 @@ int get_jited_program_text(int fd, char *text, size_t text_sz)
> struct bpf_prog_info info = {};
> __u32 info_len = sizeof(info);
> __u32 jited_funcs, len, pc;
> + __u32 trunc_len = 0;
> __u32 *func_lens = NULL;
> FILE *text_out = NULL;
> uint8_t *image = NULL;
> + char *triple = NULL;
> int i, err = 0;
>
> if (!llvm_initialized) {
> @@ -216,9 +218,21 @@ int get_jited_program_text(int fd, char *text, size_t text_sz)
> if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd #2"))
> goto out;
>
> + /*
> + * last 8 bytes contains dummy_trampoline address in JIT
> + * output for 64-bit and 32-bit powerpc, which can't
> + * disassemble a to valid instruction.
> + */
This isn't a bug, but there's a grammar error here: "disassemble a to
valid instruction" should be "disassemble to a valid instruction".
> + triple = LLVMGetDefaultTargetTriple();
> + if (triple) {
> + if (strstr(triple, "powerpc"))
> + trunc_len = 8;
Will this remove a valid instruction on 32-bit PowerPC targets?
The dummy trampoline address is stored as an unsigned long, which
occupies 8 bytes on 64-bit PowerPC, but only 4 bytes on 32-bit
PowerPC.
Because the long branch stub ends with a 4-byte bctr instruction
followed by the dummy address, unconditionally truncating 8 bytes on
32-bit PowerPC would remove both the 4-byte dummy address and the
valid 4-byte bctr instruction from the disassembly.
Does the truncation length need to be determined dynamically based on
whether the target is 32-bit or 64-bit?
(Note: This concern was raised by sashiko-bot@kernel.org in v4:
https://lore.kernel.org/bpf/20260517183846.CD7C2C2BCB0@smtp.kernel.org)
> + LLVMDisposeMessage(triple);
> + }
> +
> for (pc = 0, i = 0; i < jited_funcs; ++i) {
> fprintf(text_out, "func #%d:\n", i);
> - disasm_one_func(text_out, image + pc, func_lens[i]);
> + disasm_one_func(text_out, image + pc, func_lens[i] - trunc_len);
> fprintf(text_out, "\n");
> pc += func_lens[i];
> }
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/26121409898
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v5 4/6] selftest/bpf: Enable verifier selftest for powerpc64
2026-05-19 23:38 [PATCH v5 0/6] powerpc/bpf: Add support for verifier selftest adubey
` (2 preceding siblings ...)
2026-05-19 23:38 ` [PATCH v5 3/6] selftest/bpf: Fixing powerpc JIT disassembly failure adubey
@ 2026-05-19 23:38 ` adubey
2026-05-19 23:38 ` [PATCH v5 5/6] powerpc64/bpf: fix compare instruction emitted for tailcall adubey
2026-05-19 23:38 ` [PATCH v5 6/6] selftest/bpf: Add tailcall verifier selftest for powerpc64 adubey
5 siblings, 0 replies; 9+ messages in thread
From: adubey @ 2026-05-19 23:38 UTC (permalink / raw)
To: bpf
Cc: hbathini, linuxppc-dev, maddy, ast, andrii, daniel, shuah,
linux-kselftest, stable, Abhishek Dubey
From: Abhishek Dubey <adubey@linux.ibm.com>
This patch enables arch specifier "__powerpc64" in verifier
selftest for ppc64. Power 32-bit would require separate
handling. Changes tested for 64-bit only.
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
---
tools/testing/selftests/bpf/progs/bpf_misc.h | 1 +
tools/testing/selftests/bpf/test_loader.c | 5 +++++
2 files changed, 6 insertions(+)
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 9eeb5b0b63d6..cdc2a3de3054 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -158,6 +158,7 @@
#define __arch_arm64 __arch("ARM64")
#define __arch_riscv64 __arch("RISCV64")
#define __arch_s390x __arch("s390x")
+#define __arch_powerpc64 __arch("POWERPC64")
#define __caps_unpriv(caps) __test_tag("test_caps_unpriv=" EXPAND_QUOTE(caps))
#define __load_if_JITed() __test_tag("load_mode=jited")
#define __load_if_no_JITed() __test_tag("load_mode=no_jited")
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index abdb9e6e3713..d5589355ed9e 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -377,6 +377,7 @@ enum arch {
ARCH_ARM64 = 0x4,
ARCH_RISCV64 = 0x8,
ARCH_S390X = 0x10,
+ ARCH_POWERPC64 = 0x20,
};
static int get_current_arch(void)
@@ -389,6 +390,8 @@ static int get_current_arch(void)
return ARCH_RISCV64;
#elif defined(__s390x__)
return ARCH_S390X;
+#elif defined(__powerpc64__)
+ return ARCH_POWERPC64;
#endif
return ARCH_UNKNOWN;
}
@@ -580,6 +583,8 @@ static int parse_test_spec(struct test_loader *tester,
arch = ARCH_RISCV64;
} else if (strcmp(val, "s390x") == 0) {
arch = ARCH_S390X;
+ } else if (strcmp(val, "POWERPC64") == 0) {
+ arch = ARCH_POWERPC64;
} else {
PRINT_FAIL("bad arch spec: '%s'\n", val);
err = -EINVAL;
--
2.52.0
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH v5 5/6] powerpc64/bpf: fix compare instruction emitted for tailcall
2026-05-19 23:38 [PATCH v5 0/6] powerpc/bpf: Add support for verifier selftest adubey
` (3 preceding siblings ...)
2026-05-19 23:38 ` [PATCH v5 4/6] selftest/bpf: Enable verifier selftest for powerpc64 adubey
@ 2026-05-19 23:38 ` adubey
2026-05-19 23:38 ` [PATCH v5 6/6] selftest/bpf: Add tailcall verifier selftest for powerpc64 adubey
5 siblings, 0 replies; 9+ messages in thread
From: adubey @ 2026-05-19 23:38 UTC (permalink / raw)
To: bpf
Cc: hbathini, linuxppc-dev, maddy, ast, andrii, daniel, shuah,
linux-kselftest, stable, Abhishek Dubey, sashiko-bot
From: Abhishek Dubey <adubey@linux.ibm.com>
The tail_call_info field can contain either a scalar counter
value or a 64-bit pointer to the counter, using a 32-bit
compare (cmplwi) only checks the lower 32 bits, which can lead
to incorrect comparisions when location of counter is near 4GB
boundary. Use instruction cmpldi for accurate comparision in
all cases.
Reported-by: sashiko-bot@kernel.org
Closes: https://lore.kernel.org/bpf/20260517191450.85AE6C2BCB8@smtp.kernel.org/
Fixes: 2ed2d8f6fb38 ("powerpc64/bpf: Support tailcalls with subprogs")
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
---
arch/powerpc/net/bpf_jit_comp.c | 2 +-
arch/powerpc/net/bpf_jit_comp64.c | 8 ++++----
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 56a923d3908e..eb476c582bc5 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -764,7 +764,7 @@ static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_conte
* Setting the tail_call_info in trampoline's frame
* depending on if previous frame had value or reference.
*/
- EMIT(PPC_RAW_CMPLWI(_R3, MAX_TAIL_CALL_CNT));
+ EMIT(PPC_RAW_CMPLDI(_R3, MAX_TAIL_CALL_CNT));
PPC_BCC_CONST_SHORT(COND_GT, 8);
EMIT(PPC_RAW_ADDI(_R3, _R4, -BPF_PPC_TAILCALL));
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 885dc8cf55a2..74fce3cf6c5e 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -276,7 +276,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
*/
EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_2), _R1, 0));
EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_2), -(BPF_PPC_TAILCALL)));
- EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT));
+ EMIT(PPC_RAW_CMPLDI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT));
PPC_BCC_CONST_SHORT(COND_GT, 8);
EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_2),
-(BPF_PPC_TAILCALL)));
@@ -651,7 +651,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
PPC_BCC_SHORT(COND_GE, out);
EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallinfo_offset(ctx)));
- EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT));
+ EMIT(PPC_RAW_CMPLDI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT));
PPC_BCC_CONST_SHORT(COND_LE, 8);
/* dereference TMP_REG_1 */
@@ -661,7 +661,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
* if (tail_call_info == MAX_TAIL_CALL_CNT)
* goto out;
*/
- EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT));
+ EMIT(PPC_RAW_CMPLDI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT));
PPC_BCC_SHORT(COND_EQ, out);
/*
@@ -696,7 +696,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
* tail_call_info.
*/
EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_2), _R1, bpf_jit_stack_tailcallinfo_offset(ctx)));
- EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_2), MAX_TAIL_CALL_CNT));
+ EMIT(PPC_RAW_CMPLDI(bpf_to_ppc(TMP_REG_2), MAX_TAIL_CALL_CNT));
PPC_BCC_CONST_SHORT(COND_GT, 8);
/* First get address of tail_call_info */
--
2.52.0
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH v5 6/6] selftest/bpf: Add tailcall verifier selftest for powerpc64
2026-05-19 23:38 [PATCH v5 0/6] powerpc/bpf: Add support for verifier selftest adubey
` (4 preceding siblings ...)
2026-05-19 23:38 ` [PATCH v5 5/6] powerpc64/bpf: fix compare instruction emitted for tailcall adubey
@ 2026-05-19 23:38 ` adubey
5 siblings, 0 replies; 9+ messages in thread
From: adubey @ 2026-05-19 23:38 UTC (permalink / raw)
To: bpf
Cc: hbathini, linuxppc-dev, maddy, ast, andrii, daniel, shuah,
linux-kselftest, stable, Abhishek Dubey
From: Abhishek Dubey <adubey@linux.ibm.com>
Verifier testcase result for tailcalls:
# ./test_progs -t verifier_tailcall
#618/1 verifier_tailcall/invalid map type for tail call:OK
#618/2 verifier_tailcall/invalid map type for tail call @unpriv:OK
#618 verifier_tailcall:OK
#619/1 verifier_tailcall_jit/main:OK
#619 verifier_tailcall_jit:OK
Summary: 2/3 PASSED, 0 SKIPPED, 0 FAILED
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
---
.../bpf/progs/verifier_tailcall_jit.c | 69 +++++++++++++++++++
1 file changed, 69 insertions(+)
diff --git a/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c b/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c
index 8d60c634a114..b754b76420c9 100644
--- a/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c
+++ b/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c
@@ -90,6 +90,75 @@ __jited(" popq %rax")
__jited(" jmp {{.*}}") /* jump to tail call tgt */
__jited("L0: leave")
__jited(" {{(retq|jmp 0x)}}") /* return or jump to rethunk */
+__arch_powerpc64
+/* program entry for main(), regular function prologue */
+__jited(" nop")
+__jited("...") /* ld 2, 16(13) absent with CONFIG_PPC_KERNEL_PCREL */
+__jited(" li 9, 0")
+__jited(" std 9, -8(1)")
+__jited(" mflr 0")
+__jited(" std 0, 16(1)")
+__jited(" stdu 1, {{.*}}(1)")
+/* load address and call sub() via count register */
+__jited(" lis 12, {{.*}}")
+__jited(" sldi 12, 12, 32")
+__jited(" oris 12, 12, {{.*}}")
+__jited(" ori 12, 12, {{.*}}")
+__jited(" mtctr 12")
+__jited(" bctrl")
+__jited(" mr 8, 3")
+__jited(" li 8, 0")
+__jited(" addi 1, 1, {{.*}}")
+__jited(" ld 0, 16(1)")
+__jited(" mtlr 0")
+__jited(" mr 3, 8")
+__jited(" blr")
+__jited("...")
+__jited("func #1")
+/* subprogram entry for sub() */
+__jited(" nop")
+__jited("...") /* ld 2, 16(13) absent with CONFIG_PPC_KERNEL_PCREL */
+/* tail call prologue for subprogram */
+__jited(" ld 10, 0(1)")
+__jited(" ld 9, -8(10)")
+__jited(" cmpldi 9, 33")
+__jited(" bt {{.*}}, {{.*}}")
+__jited(" addi 9, 10, -8")
+__jited(" std 9, -8(1)")
+__jited(" lis {{.*}}, {{.*}}")
+__jited(" sldi {{.*}}, {{.*}}, 32")
+__jited(" oris {{.*}}, {{.*}}, {{.*}}")
+__jited(" ori {{.*}}, {{.*}}, {{.*}}")
+__jited(" li {{.*}}, 0")
+__jited(" lwz 9, {{.*}}({{.*}})")
+__jited(" slwi {{.*}}, {{.*}}, 0")
+__jited(" cmplw {{.*}}, 9")
+__jited(" bf 0, {{.*}}")
+/* bpf_tail_call implementation */
+__jited(" ld 9, -8(1)")
+__jited(" cmpldi 9, 33")
+__jited(" bf {{.*}}, {{.*}}")
+__jited(" ld 9, 0(9)")
+__jited(" cmpldi 9, 33")
+__jited(" bt {{.*}}, {{.*}}")
+__jited(" addi 9, 9, 1")
+__jited(" mulli 10, {{.*}}, 8")
+__jited(" add 10, 10, {{.*}}")
+__jited(" ld 10, {{.*}}(10)")
+__jited(" cmpldi 10, 0")
+__jited(" bt {{.*}}, {{.*}}")
+__jited(" ld 10, {{.*}}(10)")
+__jited(" addi 10, 10, {{.*}}") /* offset depends on CONFIG_PPC_KERNEL_PCREL */
+__jited(" mtctr 10")
+__jited(" ld 10, -8(1)")
+__jited(" cmpldi 10, 33")
+__jited(" bt {{.*}}, {{.*}}")
+__jited(" addi 10, 1, -8")
+__jited(" std 9, 0(10)")
+__jited(" bctr")
+__jited(" mr 3, 8")
+__jited(" blr")
+
SEC("tc")
__naked int main(void)
{
--
2.52.0
^ permalink raw reply related [flat|nested] 9+ messages in thread