From: adubey@linux.ibm.com
To: bpf@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
linux-kselftest@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: hbathini@linux.ibm.com, sachinpb@linux.ibm.com,
venkat88@linux.ibm.com, andrii@kernel.org, eddyz87@gmail.com,
mykolal@fb.com, ast@kernel.org, daniel@iogearbox.net,
martin.lau@linux.dev, song@kernel.org, yonghong.song@linux.dev,
john.fastabend@gmail.com, kpsingh@kernel.org, sdf@fomichev.me,
haoluo@google.com, jolsa@kernel.org, christophe.leroy@csgroup.eu,
naveen@kernel.org, maddy@linux.ibm.com, mpe@ellerman.id.au,
npiggin@gmail.com, memxor@gmail.com, iii@linux.ibm.com,
shuah@kernel.org, Abhishek Dubey <adubey@linux.ibm.com>
Subject: [PATCH 1/6] powerpc64/bpf: Support tailcalls with subprogs
Date: Mon, 5 Jan 2026 16:22:07 +0530 [thread overview]
Message-ID: <20260105105212.136645-2-adubey@linux.ibm.com> (raw)
In-Reply-To: <20260105105212.136645-1-adubey@linux.ibm.com>
From: Abhishek Dubey <adubey@linux.ibm.com>
Enabling tailcalls with subprog combinations by referencing
method. The actual tailcall count is always maintained in the
tail_call_info variable present in the frame of main function
(also called entry function). The tail_call_info variables in
the frames of all other subprog contains reference to the
tail_call_info present in frame of main function.
Dynamic resolution interprets the tail_call_info either as
value or reference depending on the context of active frame
while tailcall is invoked.
Following is selftest run:
#./test_progs -t tailcalls
#425/1 tailcalls/tailcall_1:OK
#425/2 tailcalls/tailcall_2:OK
#425/3 tailcalls/tailcall_3:OK
#425/4 tailcalls/tailcall_4:OK
#425/5 tailcalls/tailcall_5:OK
#425/6 tailcalls/tailcall_6:OK
#425/7 tailcalls/tailcall_bpf2bpf_1:OK
#425/8 tailcalls/tailcall_bpf2bpf_2:OK
#425/9 tailcalls/tailcall_bpf2bpf_3:OK
#425/10 tailcalls/tailcall_bpf2bpf_4:OK
#425/11 tailcalls/tailcall_bpf2bpf_5:OK
#425/12 tailcalls/tailcall_bpf2bpf_6:OK
#425/13 tailcalls/tailcall_bpf2bpf_fentry:OK
#425/14 tailcalls/tailcall_bpf2bpf_fexit:OK
#425/15 tailcalls/tailcall_bpf2bpf_fentry_fexit:OK
#425/16 tailcalls/tailcall_bpf2bpf_fentry_entry:OK
#425/17 tailcalls/tailcall_poke:OK
#425/18 tailcalls/tailcall_bpf2bpf_hierarchy_1:OK
#425/19 tailcalls/tailcall_bpf2bpf_hierarchy_fentry:OK
#425/20 tailcalls/tailcall_bpf2bpf_hierarchy_fexit:OK
#425/21 tailcalls/tailcall_bpf2bpf_hierarchy_fentry_fexit:OK
#425/22 tailcalls/tailcall_bpf2bpf_hierarchy_fentry_entry:OK
#425/23 tailcalls/tailcall_bpf2bpf_hierarchy_2:OK
#425/24 tailcalls/tailcall_bpf2bpf_hierarchy_3:OK
#425/25 tailcalls/tailcall_freplace:OK
#425/26 tailcalls/tailcall_bpf2bpf_freplace:OK
#425/27 tailcalls/tailcall_failure:OK
#425/28 tailcalls/reject_tail_call_spin_lock:OK
#425/29 tailcalls/reject_tail_call_rcu_lock:OK
#425/30 tailcalls/reject_tail_call_preempt_lock:OK
#425/31 tailcalls/reject_tail_call_ref:OK
#425 tailcalls:OK
Summary: 1/31 PASSED, 0 SKIPPED, 0 FAILED
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
---
arch/powerpc/net/bpf_jit.h | 14 ++++++-
arch/powerpc/net/bpf_jit_comp.c | 10 ++++-
arch/powerpc/net/bpf_jit_comp64.c | 67 +++++++++++++++++++++++--------
3 files changed, 71 insertions(+), 20 deletions(-)
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 8334cd667bba..98e8b1f9c2f9 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -51,6 +51,12 @@
EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \
} while (0)
+/* Same as PPC_BCC_SHORT, except valid dest is known prior to call. */
+#define PPC_COND_BRANCH(cond, dest) \
+ do { \
+ long offset = (long)(dest) - CTX_NIA(ctx); \
+ EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \
+ } while (0)
/*
* Sign-extended 32-bit immediate load
*
@@ -72,6 +78,10 @@
} } while (0)
#ifdef CONFIG_PPC64
+
+/* for gpr non volatile registers BPG_REG_6 to 10 */
+#define BPF_PPC_STACK_SAVE (6*8)
+
/* If dummy pass (!image), account for maximum possible instructions */
#define PPC_LI64(d, i) do { \
if (!image) \
@@ -166,6 +176,7 @@ struct codegen_context {
unsigned int alt_exit_addr;
u64 arena_vm_start;
u64 user_vm_start;
+ bool is_subprog;
};
#define bpf_to_ppc(r) (ctx->b2p[r])
@@ -200,11 +211,10 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx);
void bpf_jit_realloc_regs(struct codegen_context *ctx);
int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr);
-
int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
struct codegen_context *ctx, int insn_idx,
int jmp_off, int dst_reg, u32 code);
-
+int bpf_jit_stack_tailcallinfo_offset(struct codegen_context *ctx);
#endif
#endif
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 5e976730b2f5..069a8822c30d 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -206,6 +206,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
cgctx.arena_vm_start = bpf_arena_get_kern_vm_start(fp->aux->arena);
cgctx.user_vm_start = bpf_arena_get_user_vm_start(fp->aux->arena);
+ cgctx.is_subprog = bpf_is_subprog(fp);
/* Scouting faux-generate pass 0 */
if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
@@ -435,6 +436,11 @@ void bpf_jit_free(struct bpf_prog *fp)
bpf_prog_unlock_free(fp);
}
+bool bpf_jit_supports_subprog_tailcalls(void)
+{
+ return IS_ENABLED(CONFIG_PPC64);
+}
+
bool bpf_jit_supports_kfunc_call(void)
{
return true;
@@ -604,7 +610,7 @@ static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct codegen_contex
int func_frame_offset, int r4_off)
{
if (IS_ENABLED(CONFIG_PPC64)) {
- /* See bpf_jit_stack_tailcallcnt() */
+ /* See bpf_jit_stack_tailcallinfo_offset() */
int tailcallcnt_offset = 7 * 8;
EMIT(PPC_RAW_LL(_R3, _R1, func_frame_offset - tailcallcnt_offset));
@@ -619,7 +625,7 @@ static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_cont
int func_frame_offset, int r4_off)
{
if (IS_ENABLED(CONFIG_PPC64)) {
- /* See bpf_jit_stack_tailcallcnt() */
+ /* See bpf_jit_stack_tailcallinfo_offset() */
int tailcallcnt_offset = 7 * 8;
EMIT(PPC_RAW_LL(_R3, _R1, -tailcallcnt_offset));
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 1fe37128c876..37c547b49da8 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -24,17 +24,19 @@
* Ensure the top half (upto local_tmp_var) stays consistent
* with our redzone usage.
*
+ * tail_call_info - stores tailcall count value in main program's
+ * frame, stores reference to tail_call_info of
+ * main's frame in sub-prog's frame.
+ *
* [ prev sp ] <-------------
* [ nv gpr save area ] 6*8 |
- * [ tail_call_cnt ] 8 |
+ * [ tail_call_info ] 8 |
* [ local_tmp_var ] 24 |
* fp (r31) --> [ ebpf stack space ] upto 512 |
* [ frame header ] 32/112 |
* sp (r1) ---> [ stack pointer ] --------------
*/
-/* for gpr non volatile registers BPG_REG_6 to 10 */
-#define BPF_PPC_STACK_SAVE (6*8)
/* for bpf JIT code internal usage */
#define BPF_PPC_STACK_LOCALS 32
/* stack frame excluding BPF stack, ensure this is quadword aligned */
@@ -93,7 +95,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
* [ ... ] |
* sp (r1) ---> [ stack pointer ] --------------
* [ nv gpr save area ] 6*8
- * [ tail_call_cnt ] 8
+ * [ tail_call_info ] 8
* [ local_tmp_var ] 24
* [ unused red zone ] 224
*/
@@ -105,7 +107,7 @@ static int bpf_jit_stack_local(struct codegen_context *ctx)
return -(BPF_PPC_STACK_SAVE + 32);
}
-static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx)
+int bpf_jit_stack_tailcallinfo_offset(struct codegen_context *ctx)
{
return bpf_jit_stack_local(ctx) + 24;
}
@@ -138,17 +140,31 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
#endif
/*
- * Initialize tail_call_cnt if we do tail calls.
- * Otherwise, put in NOPs so that it can be skipped when we are
- * invoked through a tail call.
+ * Tail call count(tcc) is saved & updated only in main
+ * program's frame and the address of tcc in main program's
+ * frame (tcc_ptr) is saved in subprogs frame.
+ *
+ * Offset of tail_call_info on any frame will be interpreted
+ * as either tcc_ptr or tcc value depending on whether it is
+ * greater than MAX_TAIL_CALL_CNT or not.
*/
- if (ctx->seen & SEEN_TAILCALL) {
+ if (!ctx->is_subprog) {
EMIT(PPC_RAW_LI(bpf_to_ppc(TMP_REG_1), 0));
/* this goes in the redzone */
EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, -(BPF_PPC_STACK_SAVE + 8)));
} else {
- EMIT(PPC_RAW_NOP());
- EMIT(PPC_RAW_NOP());
+ /*
+ * if tail_call_info < MAX_TAIL_CALL_CNT
+ * main prog calling first subprog -> copy reference
+ * else
+ * subsequent subprog calling another subprog -> directly copy content
+ */
+ EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_2), _R1, 0));
+ EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_2), -(BPF_PPC_STACK_SAVE+8)));
+ EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT));
+ PPC_COND_BRANCH(COND_GT, CTX_NIA(ctx) + 8);
+ EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_2), -(BPF_PPC_STACK_SAVE + 8)));
+ EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, -(BPF_PPC_STACK_SAVE + 8)));
}
if (bpf_has_stack_frame(ctx)) {
@@ -343,19 +359,38 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
EMIT(PPC_RAW_CMPLW(b2p_index, bpf_to_ppc(TMP_REG_1)));
PPC_BCC_SHORT(COND_GE, out);
+ EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallinfo_offset(ctx)));
+ EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT));
+ PPC_COND_BRANCH(COND_LE, CTX_NIA(ctx) + 8);
+
+ /* dereference TMP_REG_1 */
+ EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), 0));
+
/*
- * if (tail_call_cnt >= MAX_TAIL_CALL_CNT)
+ * if (tail_call_info == MAX_TAIL_CALL_CNT)
* goto out;
*/
- EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx)));
EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT));
- PPC_BCC_SHORT(COND_GE, out);
+ PPC_COND_BRANCH(COND_EQ, out);
/*
- * tail_call_cnt++;
+ * tail_call_info++; <- Actual value of tcc here
*/
EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), 1));
- EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx)));
+
+ /*
+ * Before writing updated tail_call_info, distinguish if current frame
+ * is storing a reference to tail_call_info or actual tcc value in
+ * tail_call_info.
+ */
+ EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_2), _R1, bpf_jit_stack_tailcallinfo_offset(ctx)));
+ EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_2), MAX_TAIL_CALL_CNT));
+ PPC_COND_BRANCH(COND_GT, CTX_NIA(ctx) + 8);
+
+ /* First get address of tail_call_info */
+ EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_2), _R1, bpf_jit_stack_tailcallinfo_offset(ctx)));
+ /* Writeback updated value to tail_call_info */
+ EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_2), 0));
/* prog = array->ptrs[index]; */
EMIT(PPC_RAW_MULI(bpf_to_ppc(TMP_REG_1), b2p_index, 8));
--
2.48.1
next prev parent reply other threads:[~2026-01-05 10:53 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-05 10:52 [PATCH 0/6] powerpc64/bpf: Support tailcalls with subprogs & BPF exceptions adubey
2026-01-05 10:52 ` adubey [this message]
2026-01-05 10:52 ` [PATCH 2/6] powerpc64/bpf: Tailcall handling with trampolines adubey
2026-01-05 11:15 ` bot+bpf-ci
2026-01-11 18:43 ` Hari Bathini
2026-01-17 10:33 ` Hari Bathini
2026-01-05 10:52 ` [PATCH 3/6] powerpc/bpf: use BPF_PPC_STACK_SAVE to spill trampoline NVRs adubey
2026-01-05 10:52 ` [PATCH 4/6] powerpc64/bpf: Add arch_bpf_stack_walk() for BPF JIT adubey
2026-01-05 10:52 ` [PATCH 5/6] powerpc64/bpf: Support exceptions adubey
2026-01-12 5:51 ` Saket Kumar Bhaskar
[not found] ` <9102a4504413501f382cf3e22118e88f@imap.linux.ibm.com>
2026-01-12 7:19 ` Christophe Leroy (CS GROUP)
2026-01-05 10:52 ` [PATCH 6/6] powerpc64/bpf: Additional NVR handling for bpf_throw adubey
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260105105212.136645-2-adubey@linux.ibm.com \
--to=adubey@linux.ibm.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=christophe.leroy@csgroup.eu \
--cc=daniel@iogearbox.net \
--cc=eddyz87@gmail.com \
--cc=haoluo@google.com \
--cc=hbathini@linux.ibm.com \
--cc=iii@linux.ibm.com \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kpsingh@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=maddy@linux.ibm.com \
--cc=martin.lau@linux.dev \
--cc=memxor@gmail.com \
--cc=mpe@ellerman.id.au \
--cc=mykolal@fb.com \
--cc=naveen@kernel.org \
--cc=npiggin@gmail.com \
--cc=sachinpb@linux.ibm.com \
--cc=sdf@fomichev.me \
--cc=shuah@kernel.org \
--cc=song@kernel.org \
--cc=venkat88@linux.ibm.com \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.