* [PATCH 1/4] powerpc/bpf: Move out dummy_tramp_addr after Long branch stub
2026-02-25 1:36 [PATCH 0/4] powerpc/bpf: Add support for verifier selftest adubey
@ 2026-02-25 1:36 ` adubey
2026-02-24 21:29 ` bot+bpf-ci
2026-03-15 18:06 ` Hari Bathini
2026-02-25 1:36 ` [PATCH 2/4] selftest/bpf: Fixing powerpc JIT disassembly failure adubey
` (2 subsequent siblings)
3 siblings, 2 replies; 8+ messages in thread
From: adubey @ 2026-02-25 1:36 UTC (permalink / raw)
To: linuxppc-dev; +Cc: bpf, hbathini, ast, daniel, andrii, maddy, Abhishek Dubey
From: Abhishek Dubey <adubey@linux.ibm.com>
Move the long branch address space to the bottom of the long
branch stub. This allows uninterrupted disassembly until the
last 8 bytes. Exclude these last bytes from the overall
program length to prevent failure in assembly generation.
Also, align dummy_tramp_addr field with 8-byte boundary.
Following is disassembler output for test program with moved down
dummy_tramp_addr field:
.....
.....
pc:68 left:44 a6 03 08 7c : mtlr 0
pc:72 left:40 bc ff ff 4b : b .-68
pc:76 left:36 a6 02 68 7d : mflr 11
pc:80 left:32 05 00 9f 42 : bcl 20, 31, .+4
pc:84 left:28 a6 02 88 7d : mflr 12
pc:88 left:24 14 00 8c e9 : ld 12, 20(12)
pc:92 left:20 a6 03 89 7d : mtctr 12
pc:96 left:16 a6 03 68 7d : mtlr 11
pc:100 left:12 20 04 80 4e : bctr
pc:104 left:8 c0 34 1d 00 :
Failure log:
Can't disasm instruction at offset 104: c0 34 1d 00 00 00 00 c0
Disassembly logic can truncate at 104, ignoring last 8 bytes.
Update the dummy_tramp_addr field offset calculation from the end
of the program to reflect its new location, for bpf_arch_text_poke()
to update the actual trampoline's address in this field.
All BPF trampoline selftests continue to pass with this patch applied.
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
---
arch/powerpc/net/bpf_jit_comp.c | 45 +++++++++++++++++++++++++--------
1 file changed, 34 insertions(+), 11 deletions(-)
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 7a78e03d482f..f8f6305b0d9f 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -51,7 +51,9 @@ asm (
void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx)
{
- int ool_stub_idx, long_branch_stub_idx;
+ int ool_stub_idx, long_branch_stub_idx, tramp_load_offset;
+ bool tramp_needs_align;
+ u32 tramp_idx;
/*
* Out-of-line stub:
@@ -70,27 +72,45 @@ void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx)
/*
* Long branch stub:
- * .long <dummy_tramp_addr>
* mflr r11
* bcl 20,31,$+4
- * mflr r12
- * ld r12, -8-SZL(r12)
+ * mflr r12 // lr/r12 stores current pc
+ * ld r12, 20(r12) // offset(dummy_tramp_addr) from prev inst. is 20
* mtctr r12
- * mtlr r11 // needed to retain ftrace ABI
+ * mtlr r11 // needed to retain ftrace ABI
* bctr
+ * nop // Optional, for mem alignment of dummy_tramp_addr
+ * .long <dummy_tramp_addr>
*/
- if (image)
- *((unsigned long *)&image[ctx->idx]) = (unsigned long)dummy_tramp;
- ctx->idx += SZL / 4;
long_branch_stub_idx = ctx->idx;
EMIT(PPC_RAW_MFLR(_R11));
EMIT(PPC_RAW_BCL4());
EMIT(PPC_RAW_MFLR(_R12));
- EMIT(PPC_RAW_LL(_R12, _R12, -8-SZL));
+
+ /* Relative offset of dummy_tramp_addr wrt start of long branch stub */
+ tramp_idx = long_branch_stub_idx + 7;
+ /*
+ * Image layout need not be considered 8-byte aligned.
+ * Lower 3 bits must be clear for 8-bytes alignment.
+ * Adjust offset for padding NOP before dummy_tramp_addr
+ */
+ tramp_needs_align = (((unsigned long)&image[tramp_idx]) & 7) != 0;
+ tramp_load_offset = tramp_needs_align ? 24 : 20;
+
+ EMIT(PPC_RAW_LL(_R12, _R12, tramp_load_offset));
EMIT(PPC_RAW_MTCTR(_R12));
EMIT(PPC_RAW_MTLR(_R11));
EMIT(PPC_RAW_BCTR());
+ /* align dummy_tramp_addr to 8 bytes */
+ if (tramp_needs_align)
+ EMIT(PPC_RAW_NOP());
+
+ if (image)
+ *((unsigned long *)&image[ctx->idx]) = (unsigned long)dummy_tramp;
+
+ ctx->idx += SZL / 4;
+
if (!bpf_jit_ool_stub) {
bpf_jit_ool_stub = (ctx->idx - ool_stub_idx) * 4;
bpf_jit_long_branch_stub = (ctx->idx - long_branch_stub_idx) * 4;
@@ -1206,7 +1226,6 @@ static void do_isync(void *info __maybe_unused)
* b bpf_func + 4
* 3. Long branch stub:
* long_branch_stub:
- * .long <branch_addr>/<dummy_tramp>
* mflr r11
* bcl 20,31,$+4
* mflr r12
@@ -1214,6 +1233,8 @@ static void do_isync(void *info __maybe_unused)
* mtctr r12
* mtlr r11 // needed to retain ftrace ABI
* bctr
+ * nop // optinonal, mem align dummy_tramp field
+ * .long <branch_addr>/<dummy_tramp>
*
* dummy_tramp is used to reduce synchronization requirements.
*
@@ -1315,10 +1336,12 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
* 1. Update the address in the long branch stub:
* If new_addr is out of range, we will have to use the long branch stub, so patch new_addr
* here. Otherwise, revert to dummy_tramp, but only if we had patched old_addr here.
+ *
+ * dummy_tramp_addr moved to bottom of long branch stub.
*/
if ((new_addr && !is_offset_in_branch_range(new_addr - ip)) ||
(old_addr && !is_offset_in_branch_range(old_addr - ip)))
- ret = patch_ulong((void *)(bpf_func_end - bpf_jit_long_branch_stub - SZL),
+ ret = patch_ulong((void *)(bpf_func_end - SZL), /* SZL: dummy_tramp_addr offset */
(new_addr && !is_offset_in_branch_range(new_addr - ip)) ?
(unsigned long)new_addr : (unsigned long)dummy_tramp);
if (ret)
--
2.52.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [PATCH 1/4] powerpc/bpf: Move out dummy_tramp_addr after Long branch stub
2026-02-25 1:36 ` [PATCH 1/4] powerpc/bpf: Move out dummy_tramp_addr after Long branch stub adubey
@ 2026-02-24 21:29 ` bot+bpf-ci
2026-03-15 18:06 ` Hari Bathini
1 sibling, 0 replies; 8+ messages in thread
From: bot+bpf-ci @ 2026-02-24 21:29 UTC (permalink / raw)
To: adubey, linuxppc-dev
Cc: bpf, hbathini, ast, daniel, andrii, maddy, adubey, ast, andrii,
daniel, martin.lau, eddyz87, yonghong.song, clm, ihor.solodrai
[-- Attachment #1: Type: text/plain, Size: 4637 bytes --]
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
[ ... ]
> + /* Relative offset of dummy_tramp_addr wrt start of long branch stub */
> + tramp_idx = long_branch_stub_idx + 7;
> + /*
> + * Image layout need not be considered 8-byte aligned.
> + * Lower 3 bits must be clear for 8-bytes alignment.
> + * Adjust offset for padding NOP before dummy_tramp_addr
> + */
> + tramp_needs_align = (((unsigned long)&image[tramp_idx]) & 7) != 0;
> + tramp_load_offset = tramp_needs_align ? 24 : 20;
Can the alignment check produce different results between the sizing
pass and the code generation pass?
During the sizing pass, bpf_int_jit_compile() calls
bpf_jit_build_epilogue(NULL, &cgctx), so image is NULL here.
&image[tramp_idx] evaluates to (tramp_idx * 4), and the alignment
decision depends only on the parity of tramp_idx.
During code generation, image is code_base, which may or may not be
8-byte aligned. bpf_jit_binary_pack_alloc() uses alignment=4 with a
random start offset, so roughly half the time code_base will have
bit 2 set. When it does, the alignment check flips:
sizing pass (image=NULL, tramp_idx odd):
((tramp_idx * 4) & 7) = 4, tramp_needs_align = true
code gen pass (code_base bit 2 set, tramp_idx odd):
(((unsigned long)code_base + tramp_idx * 4) & 7) = 0,
tramp_needs_align = false
This means the sizing pass may emit a NOP that the code generation
pass omits (or vice versa), causing the stub to be a different size
than what was computed during sizing.
> + EMIT(PPC_RAW_LL(_R12, _R12, tramp_load_offset));
> + EMIT(PPC_RAW_MTCTR(_R12));
> + EMIT(PPC_RAW_MTLR(_R11));
> + EMIT(PPC_RAW_BCTR());
> +
> + /* align dummy_tramp_addr to 8 bytes */
> + if (tramp_needs_align)
> + EMIT(PPC_RAW_NOP());
> +
> + if (image)
> + *((unsigned long *)&image[ctx->idx]) = (unsigned long)dummy_tramp;
> +
> + ctx->idx += SZL / 4;
> +
> if (!bpf_jit_ool_stub) {
> bpf_jit_ool_stub = (ctx->idx - ool_stub_idx) * 4;
> bpf_jit_long_branch_stub = (ctx->idx - long_branch_stub_idx) * 4;
> }
Because the conditional NOP makes the stub size variable,
bpf_jit_long_branch_stub now depends on whether tramp_needs_align
was true when it was first set. This global is set once during the
first program's sizing pass and reused for all subsequent programs.
Since each BPF program has a different body size (different N for
long_branch_stub_idx), and each allocation may have different
code_base alignment, the stub size can vary per-program. But
bpf_arch_text_poke() uses the fixed global to compute the branch
target:
create_branch(&old_inst, ip,
bpf_func_end - bpf_jit_long_branch_stub,
branch_flags);
If this program's actual stub size differs from the global by 4
bytes, the branch lands one instruction before or after the intended
mflr r11 at the stub entry.
In the old code, the stub had a fixed layout with no conditional
instructions, so bpf_jit_long_branch_stub was the same for all
programs.
[ ... ]
> @@ -1161,6 +1180,8 @@ static void do_isync(void *info __maybe_unused)
> * mtctr r12
> * mtlr r11 // needed to retain ftrace ABI
> * bctr
> + * nop // optinonal, mem align dummy_tramp field
> + * .long <branch_addr>/<dummy_tramp>
Typo: "optinonal" should be "optional".
Also, the comment block above this still shows the old load offset:
* ld r12, -16(r12)
This should reflect the new forward offset (20 or 24).
[ ... ]
> /*
> * Long branch stub:
> * mflr r11
> * bcl 20,31,$+4
> - * mflr r12
> - * ld r12, -8-SZL(r12)
> + * mflr r12 // lr/r12 stores current pc
> + * ld r12, 20(r12) // offset(dummy_tramp_addr) from prev inst. is 20
The hardcoded 20 in this comment is only correct when no alignment
NOP is needed. When tramp_needs_align is true, the offset is 24.
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/22369534987
AI-authorship-score: low
AI-authorship-explanation: Domain-specific code with detailed hardware debugging output in the cover letter suggests manual engineering work.
issues-found: 3
issue-severity-score: high
issue-severity-explanation: Alignment check uses NULL image pointer during sizing pass, causing variable stub sizes that break the bpf_jit_long_branch_stub global used by bpf_arch_text_poke to compute branch targets, potentially causing incorrect JIT code execution.
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH 1/4] powerpc/bpf: Move out dummy_tramp_addr after Long branch stub
2026-02-25 1:36 ` [PATCH 1/4] powerpc/bpf: Move out dummy_tramp_addr after Long branch stub adubey
2026-02-24 21:29 ` bot+bpf-ci
@ 2026-03-15 18:06 ` Hari Bathini
2026-03-15 18:42 ` Hari Bathini
1 sibling, 1 reply; 8+ messages in thread
From: Hari Bathini @ 2026-03-15 18:06 UTC (permalink / raw)
To: adubey, linuxppc-dev; +Cc: bpf, ast, daniel, andrii, maddy
On 25/02/26 7:06 am, adubey@linux.ibm.com wrote:
> From: Abhishek Dubey <adubey@linux.ibm.com>
>
> Move the long branch address space to the bottom of the long
> branch stub. This allows uninterrupted disassembly until the
> last 8 bytes. Exclude these last bytes from the overall
> program length to prevent failure in assembly generation.
> Also, align dummy_tramp_addr field with 8-byte boundary.
>
> Following is disassembler output for test program with moved down
> dummy_tramp_addr field:
> .....
> .....
> pc:68 left:44 a6 03 08 7c : mtlr 0
> pc:72 left:40 bc ff ff 4b : b .-68
> pc:76 left:36 a6 02 68 7d : mflr 11
> pc:80 left:32 05 00 9f 42 : bcl 20, 31, .+4
> pc:84 left:28 a6 02 88 7d : mflr 12
> pc:88 left:24 14 00 8c e9 : ld 12, 20(12)
> pc:92 left:20 a6 03 89 7d : mtctr 12
> pc:96 left:16 a6 03 68 7d : mtlr 11
> pc:100 left:12 20 04 80 4e : bctr
> pc:104 left:8 c0 34 1d 00 :
>
> Failure log:
> Can't disasm instruction at offset 104: c0 34 1d 00 00 00 00 c0
> Disassembly logic can truncate at 104, ignoring last 8 bytes.
>
> Update the dummy_tramp_addr field offset calculation from the end
> of the program to reflect its new location, for bpf_arch_text_poke()
> to update the actual trampoline's address in this field.
>
> All BPF trampoline selftests continue to pass with this patch applied.
>
> Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
> ---
> arch/powerpc/net/bpf_jit_comp.c | 45 +++++++++++++++++++++++++--------
> 1 file changed, 34 insertions(+), 11 deletions(-)
>
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index 7a78e03d482f..f8f6305b0d9f 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -51,7 +51,9 @@ asm (
>
> void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx)
> {
> - int ool_stub_idx, long_branch_stub_idx;
> + int ool_stub_idx, long_branch_stub_idx, tramp_load_offset;
> + bool tramp_needs_align;
> + u32 tramp_idx;
>
> /*
> * Out-of-line stub:
> @@ -70,27 +72,45 @@ void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx)
>
> /*
> * Long branch stub:
> - * .long <dummy_tramp_addr>
> * mflr r11
> * bcl 20,31,$+4
> - * mflr r12
> - * ld r12, -8-SZL(r12)
> + * mflr r12 // lr/r12 stores current pc
> + * ld r12, 20(r12) // offset(dummy_tramp_addr) from prev inst. is 20
> * mtctr r12
> - * mtlr r11 // needed to retain ftrace ABI
> + * mtlr r11 // needed to retain ftrace ABI
> * bctr
> + * nop // Optional, for mem alignment of dummy_tramp_addr
> + * .long <dummy_tramp_addr>
> */
> - if (image)
> - *((unsigned long *)&image[ctx->idx]) = (unsigned long)dummy_tramp;
> - ctx->idx += SZL / 4;
> long_branch_stub_idx = ctx->idx;
> EMIT(PPC_RAW_MFLR(_R11));
> EMIT(PPC_RAW_BCL4());
> EMIT(PPC_RAW_MFLR(_R12));
> - EMIT(PPC_RAW_LL(_R12, _R12, -8-SZL));
> +
> + /* Relative offset of dummy_tramp_addr wrt start of long branch stub */
> + tramp_idx = long_branch_stub_idx + 7;
> + /*
> + * Image layout need not be considered 8-byte aligned.
> + * Lower 3 bits must be clear for 8-bytes alignment.
> + * Adjust offset for padding NOP before dummy_tramp_addr
> + */
> + tramp_needs_align = (((unsigned long)&image[tramp_idx]) & 7) != 0;
I would rather check:
is_8byte_aligned = (((unsigned long)&image[tramp_idx]) & 0x7) == 0x4;
and handle alignment when !is_8byte_aligned for better readability.
This alignment handling needs to go under CONFIG_PPC64.
Also, this alignment handling fix has nothing to with the moving around
of dummy_tramp_addr. Have the alignment handled in a separate patch with
a fixes tag for stable releases to pick it...
- Hari
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH 1/4] powerpc/bpf: Move out dummy_tramp_addr after Long branch stub
2026-03-15 18:06 ` Hari Bathini
@ 2026-03-15 18:42 ` Hari Bathini
0 siblings, 0 replies; 8+ messages in thread
From: Hari Bathini @ 2026-03-15 18:42 UTC (permalink / raw)
To: adubey, linuxppc-dev; +Cc: bpf, ast, daniel, andrii, maddy
On 15/03/26 11:36 pm, Hari Bathini wrote:
>
>
> On 25/02/26 7:06 am, adubey@linux.ibm.com wrote:
>> From: Abhishek Dubey <adubey@linux.ibm.com>
>>
>> Move the long branch address space to the bottom of the long
>> branch stub. This allows uninterrupted disassembly until the
>> last 8 bytes. Exclude these last bytes from the overall
>> program length to prevent failure in assembly generation.
>> Also, align dummy_tramp_addr field with 8-byte boundary.
>>
>> Following is disassembler output for test program with moved down
>> dummy_tramp_addr field:
>> .....
>> .....
>> pc:68 left:44 a6 03 08 7c : mtlr 0
>> pc:72 left:40 bc ff ff 4b : b .-68
>> pc:76 left:36 a6 02 68 7d : mflr 11
>> pc:80 left:32 05 00 9f 42 : bcl 20, 31, .+4
>> pc:84 left:28 a6 02 88 7d : mflr 12
>> pc:88 left:24 14 00 8c e9 : ld 12, 20(12)
>> pc:92 left:20 a6 03 89 7d : mtctr 12
>> pc:96 left:16 a6 03 68 7d : mtlr 11
>> pc:100 left:12 20 04 80 4e : bctr
>> pc:104 left:8 c0 34 1d 00 :
>>
>> Failure log:
>> Can't disasm instruction at offset 104: c0 34 1d 00 00 00 00 c0
>> Disassembly logic can truncate at 104, ignoring last 8 bytes.
>>
>> Update the dummy_tramp_addr field offset calculation from the end
>> of the program to reflect its new location, for bpf_arch_text_poke()
>> to update the actual trampoline's address in this field.
>>
>> All BPF trampoline selftests continue to pass with this patch applied.
>>
>> Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
>> ---
>> arch/powerpc/net/bpf_jit_comp.c | 45 +++++++++++++++++++++++++--------
>> 1 file changed, 34 insertions(+), 11 deletions(-)
>>
>> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/
>> bpf_jit_comp.c
>> index 7a78e03d482f..f8f6305b0d9f 100644
>> --- a/arch/powerpc/net/bpf_jit_comp.c
>> +++ b/arch/powerpc/net/bpf_jit_comp.c
>> @@ -51,7 +51,9 @@ asm (
>> void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context
>> *ctx)
>> {
>> - int ool_stub_idx, long_branch_stub_idx;
>> + int ool_stub_idx, long_branch_stub_idx, tramp_load_offset;
>> + bool tramp_needs_align;
>> + u32 tramp_idx;
>> /*
>> * Out-of-line stub:
>> @@ -70,27 +72,45 @@ void bpf_jit_build_fentry_stubs(u32 *image, struct
>> codegen_context *ctx)
>> /*
>> * Long branch stub:
>> - * .long <dummy_tramp_addr>
>> * mflr r11
>> * bcl 20,31,$+4
>> - * mflr r12
>> - * ld r12, -8-SZL(r12)
>> + * mflr r12 // lr/r12 stores current pc
>> + * ld r12, 20(r12) // offset(dummy_tramp_addr) from prev
>> inst. is 20
>> * mtctr r12
>> - * mtlr r11 // needed to retain ftrace ABI
>> + * mtlr r11 // needed to retain ftrace ABI
>> * bctr
>> + * nop // Optional, for mem alignment of
>> dummy_tramp_addr
>> + * .long <dummy_tramp_addr>
>> */
>> - if (image)
>> - *((unsigned long *)&image[ctx->idx]) = (unsigned
>> long)dummy_tramp;
>> - ctx->idx += SZL / 4;
>> long_branch_stub_idx = ctx->idx;
>> EMIT(PPC_RAW_MFLR(_R11));
>> EMIT(PPC_RAW_BCL4());
>> EMIT(PPC_RAW_MFLR(_R12));
>> - EMIT(PPC_RAW_LL(_R12, _R12, -8-SZL));
>> +
>> + /* Relative offset of dummy_tramp_addr wrt start of long branch
>> stub */
>> + tramp_idx = long_branch_stub_idx + 7;
>> + /*
>> + * Image layout need not be considered 8-byte aligned.
>> + * Lower 3 bits must be clear for 8-bytes alignment.
>> + * Adjust offset for padding NOP before dummy_tramp_addr
>> + */
>
>> + tramp_needs_align = (((unsigned long)&image[tramp_idx]) & 7) != 0;
>
> I would rather check:
>
> is_8byte_aligned = (((unsigned long)&image[tramp_idx]) & 0x7) == 0x4;
Oh wait, I meant:
is_8byte_aligned = ((((unsigned long)&image[tramp_idx]) & 0x7) == 0);
- Hari
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 2/4] selftest/bpf: Fixing powerpc JIT disassembly failure
2026-02-25 1:36 [PATCH 0/4] powerpc/bpf: Add support for verifier selftest adubey
2026-02-25 1:36 ` [PATCH 1/4] powerpc/bpf: Move out dummy_tramp_addr after Long branch stub adubey
@ 2026-02-25 1:36 ` adubey
2026-02-25 1:36 ` [PATCH 3/4] selftest/bpf: Enable verifier selftest for powerpc64 adubey
2026-02-25 1:36 ` [PATCH 4/4] selftest/bpf: Add tailcall " adubey
3 siblings, 0 replies; 8+ messages in thread
From: adubey @ 2026-02-25 1:36 UTC (permalink / raw)
To: linuxppc-dev; +Cc: bpf, hbathini, ast, daniel, andrii, maddy, Abhishek Dubey
From: Abhishek Dubey <adubey@linux.ibm.com>
Ensure that the trampoline stubs JITed at the tail of the
epilogue do not expose the dummy trampoline address stored
in the last 8 bytes (for both 64-bit and 32-bit PowerPC)
to the disassembly flow. Prevent the disassembler from
ingesting this memory address, as it may occasionally decode
into a seemingly valid but incorrect instruction. Fix this
issue by truncating the last 8 bytes from JITed buffers
before supplying them for disassembly.
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
---
tools/testing/selftests/bpf/jit_disasm_helpers.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/bpf/jit_disasm_helpers.c b/tools/testing/selftests/bpf/jit_disasm_helpers.c
index febd6b12e372..cadddeaa6ed7 100644
--- a/tools/testing/selftests/bpf/jit_disasm_helpers.c
+++ b/tools/testing/selftests/bpf/jit_disasm_helpers.c
@@ -170,9 +170,11 @@ int get_jited_program_text(int fd, char *text, size_t text_sz)
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
__u32 jited_funcs, len, pc;
+ __u32 trunc_len = 0;
__u32 *func_lens = NULL;
FILE *text_out = NULL;
uint8_t *image = NULL;
+ char *triple = NULL;
int i, err = 0;
if (!llvm_initialized) {
@@ -216,9 +218,18 @@ int get_jited_program_text(int fd, char *text, size_t text_sz)
if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd #2"))
goto out;
+ /*
+ * last 8 bytes contains dummy_trampoline address in JIT
+ * output for 64-bit and 32-bit powerpc, which can't
+ * disassemble a to valid instruction.
+ */
+ triple = LLVMGetDefaultTargetTriple();
+ if (strstr(triple, "powerpc"))
+ trunc_len = 8;
+
for (pc = 0, i = 0; i < jited_funcs; ++i) {
fprintf(text_out, "func #%d:\n", i);
- disasm_one_func(text_out, image + pc, func_lens[i]);
+ disasm_one_func(text_out, image + pc, func_lens[i] - trunc_len);
fprintf(text_out, "\n");
pc += func_lens[i];
}
--
2.52.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 3/4] selftest/bpf: Enable verifier selftest for powerpc64
2026-02-25 1:36 [PATCH 0/4] powerpc/bpf: Add support for verifier selftest adubey
2026-02-25 1:36 ` [PATCH 1/4] powerpc/bpf: Move out dummy_tramp_addr after Long branch stub adubey
2026-02-25 1:36 ` [PATCH 2/4] selftest/bpf: Fixing powerpc JIT disassembly failure adubey
@ 2026-02-25 1:36 ` adubey
2026-02-25 1:36 ` [PATCH 4/4] selftest/bpf: Add tailcall " adubey
3 siblings, 0 replies; 8+ messages in thread
From: adubey @ 2026-02-25 1:36 UTC (permalink / raw)
To: linuxppc-dev; +Cc: bpf, hbathini, ast, daniel, andrii, maddy, Abhishek Dubey
From: Abhishek Dubey <adubey@linux.ibm.com>
This patch enables arch specifier "__powerpc64" in verifier
selftest for ppc64. Power 32-bit would require separate
handling. Changes tested for ppc64 only.
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
---
tools/testing/selftests/bpf/progs/bpf_misc.h | 1 +
tools/testing/selftests/bpf/test_loader.c | 5 +++++
2 files changed, 6 insertions(+)
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index c9bfbe1bafc1..dee284c3ddba 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -155,6 +155,7 @@
#define __arch_arm64 __arch("ARM64")
#define __arch_riscv64 __arch("RISCV64")
#define __arch_s390x __arch("s390x")
+#define __arch_powerpc64 __arch("POWERPC64")
#define __caps_unpriv(caps) __attribute__((btf_decl_tag("comment:test_caps_unpriv=" EXPAND_QUOTE(caps))))
#define __load_if_JITed() __attribute__((btf_decl_tag("comment:load_mode=jited")))
#define __load_if_no_JITed() __attribute__((btf_decl_tag("comment:load_mode=no_jited")))
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 338c035c3688..fc8b95316379 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -378,6 +378,7 @@ enum arch {
ARCH_ARM64 = 0x4,
ARCH_RISCV64 = 0x8,
ARCH_S390X = 0x10,
+ ARCH_POWERPC64 = 0x20,
};
static int get_current_arch(void)
@@ -390,6 +391,8 @@ static int get_current_arch(void)
return ARCH_RISCV64;
#elif defined(__s390x__)
return ARCH_S390X;
+#elif defined(__powerpc64__)
+ return ARCH_POWERPC64;
#endif
return ARCH_UNKNOWN;
}
@@ -587,6 +590,8 @@ static int parse_test_spec(struct test_loader *tester,
arch = ARCH_RISCV64;
} else if (strcmp(val, "s390x") == 0) {
arch = ARCH_S390X;
+ } else if (strcmp(val, "POWERPC64") == 0) {
+ arch = ARCH_POWERPC64;
} else {
PRINT_FAIL("bad arch spec: '%s'\n", val);
err = -EINVAL;
--
2.52.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 4/4] selftest/bpf: Add tailcall verifier selftest for powerpc64
2026-02-25 1:36 [PATCH 0/4] powerpc/bpf: Add support for verifier selftest adubey
` (2 preceding siblings ...)
2026-02-25 1:36 ` [PATCH 3/4] selftest/bpf: Enable verifier selftest for powerpc64 adubey
@ 2026-02-25 1:36 ` adubey
3 siblings, 0 replies; 8+ messages in thread
From: adubey @ 2026-02-25 1:36 UTC (permalink / raw)
To: linuxppc-dev; +Cc: bpf, hbathini, ast, daniel, andrii, maddy, Abhishek Dubey
From: Abhishek Dubey <adubey@linux.ibm.com>
Verifier testcase result for tailcalls:
# ./test_progs -t verifier_tailcall
#617/1 verifier_tailcall/invalid map type for tail call:OK
#617/2 verifier_tailcall/invalid map type for tail call @unpriv:OK
#617 verifier_tailcall:OK
#618/1 verifier_tailcall_jit/main:OK
#618 verifier_tailcall_jit:OK
Summary: 2/3 PASSED, 0 SKIPPED, 0 FAILED
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
---
.../bpf/progs/verifier_tailcall_jit.c | 69 +++++++++++++++++++
1 file changed, 69 insertions(+)
diff --git a/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c b/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c
index 8d60c634a114..17475ecb3207 100644
--- a/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c
+++ b/tools/testing/selftests/bpf/progs/verifier_tailcall_jit.c
@@ -90,6 +90,75 @@ __jited(" popq %rax")
__jited(" jmp {{.*}}") /* jump to tail call tgt */
__jited("L0: leave")
__jited(" {{(retq|jmp 0x)}}") /* return or jump to rethunk */
+__arch_powerpc64
+/* program entry for main(), regular function prologue */
+__jited(" nop")
+__jited(" ld 2, 16(13)")
+__jited(" li 9, 0")
+__jited(" std 9, -8(1)")
+__jited(" mflr 0")
+__jited(" std 0, 16(1)")
+__jited(" stdu 1, {{.*}}(1)")
+/* load address and call sub() via count register */
+__jited(" lis 12, {{.*}}")
+__jited(" sldi 12, 12, 32")
+__jited(" oris 12, 12, {{.*}}")
+__jited(" ori 12, 12, {{.*}}")
+__jited(" mtctr 12")
+__jited(" bctrl")
+__jited(" mr 8, 3")
+__jited(" li 8, 0")
+__jited(" addi 1, 1, {{.*}}")
+__jited(" ld 0, 16(1)")
+__jited(" mtlr 0")
+__jited(" mr 3, 8")
+__jited(" blr")
+__jited("...")
+__jited("func #1")
+/* subprogram entry for sub() */
+__jited(" nop")
+__jited(" ld 2, 16(13)")
+/* tail call prologue for subprogram */
+__jited(" ld 10, 0(1)")
+__jited(" ld 9, -8(10)")
+__jited(" cmplwi 9, 33")
+__jited(" bt {{.*}}, {{.*}}")
+__jited(" addi 9, 10, -8")
+__jited(" std 9, -8(1)")
+__jited(" lis {{.*}}, {{.*}}")
+__jited(" sldi {{.*}}, {{.*}}, 32")
+__jited(" oris {{.*}}, {{.*}}, {{.*}}")
+__jited(" ori {{.*}}, {{.*}}, {{.*}}")
+__jited(" li {{.*}}, 0")
+__jited(" lwz 9, {{.*}}({{.*}})")
+__jited(" slwi {{.*}}, {{.*}}, 0")
+__jited(" cmplw {{.*}}, 9")
+__jited(" bf 0, {{.*}}")
+/* bpf_tail_call implementation */
+__jited(" ld 9, -8(1)")
+__jited(" cmplwi 9, 33")
+__jited(" bf {{.*}}, {{.*}}")
+__jited(" ld 9, 0(9)")
+__jited(" cmplwi 9, 33")
+__jited(" bt {{.*}}, {{.*}}")
+__jited(" addi 9, 9, 1")
+__jited(" mulli 10, {{.*}}, 8")
+__jited(" add 10, 10, {{.*}}")
+__jited(" ld 10, {{.*}}(10)")
+__jited(" cmpldi 10, 0")
+__jited(" bt {{.*}}, {{.*}}")
+__jited(" ld 10, {{.*}}(10)")
+__jited(" addi 10, 10, 16")
+__jited(" mtctr 10")
+__jited(" ld 10, -8(1)")
+__jited(" cmplwi 10, 33")
+__jited(" bt {{.*}}, {{.*}}")
+__jited(" addi 10, 1, -8")
+__jited(" std 9, 0(10)")
+__jited(" bctr")
+__jited(" mr 3, 8")
+__jited(" blr")
+
SEC("tc")
__naked int main(void)
{
--
2.52.0
^ permalink raw reply related [flat|nested] 8+ messages in thread