bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH bpf-next v2] s390/bpf: Use direct calls and jumps where possible
@ 2025-08-19 10:20 Ilya Leoshkevich
  2025-08-20 13:00 ` patchwork-bot+netdevbpf
  0 siblings, 1 reply; 2+ messages in thread
From: Ilya Leoshkevich @ 2025-08-19 10:20 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
  Cc: bpf, Heiko Carstens, Vasily Gorbik, Alexander Gordeev,
	Ilya Leoshkevich

After the V!=R rework (commit c98d2ecae08f ("s390/mm: Uncouple physical
vs virtual address spaces")), all kernel code and related data are
allocated within a 4G region, making it possible to use relative
addressing in BPF code more extensively.

Convert as many indirect calls and jumps to direct calls as possible,
namely:

* BPF_CALL
* __bpf_tramp_enter()
* __bpf_tramp_exit()
* __bpf_prog_enter()
* __bpf_prog_exit()
* fentry
* fmod_ret
* fexit
* BPF_TRAMP_F_CALL_ORIG without BPF_TRAMP_F_ORIG_STACK
* Trampoline returns without BPF_TRAMP_F_SKIP_FRAME and
  BPF_TRAMP_F_ORIG_STACK

The following indirect calls and jumps remain:

* Prog returns
* Trampoline returns with BPF_TRAMP_F_SKIP_FRAME or
  BPF_TRAMP_F_ORIG_STACK
* BPF_TAIL_CALL
* BPF_TRAMP_F_CALL_ORIG with BPF_TRAMP_F_ORIG_STACK

As a result, only one usage of call_r1() remains, so inline it.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---

v1: https://lore.kernel.org/bpf/20250814124314.185516-1-iii@linux.ibm.com/
v1 -> v2: Rebased.

 arch/s390/net/bpf_jit_comp.c | 80 +++++++++++++++---------------------
 1 file changed, 32 insertions(+), 48 deletions(-)

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index b2b8eb62b82e..fd45f03a213c 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -674,20 +674,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp)
 		_EMIT2(0x07f0 | reg);					\
 } while (0)
 
-/*
- * Call r1 either directly or via __s390_indirect_jump_r1 thunk
- */
-static void call_r1(struct bpf_jit *jit)
-{
-	if (nospec_uses_trampoline())
-		/* brasl %r14,__s390_indirect_jump_r1 */
-		EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
-				     __s390_indirect_jump_r1);
-	else
-		/* basr %r14,%r1 */
-		EMIT2(0x0d00, REG_14, REG_1);
-}
-
 /*
  * Function epilogue
  */
@@ -1820,10 +1806,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 			}
 		}
 
-		/* lgrl %w1,func */
-		EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func));
-		/* %r1() */
-		call_r1(jit);
+		/* brasl %r14,func */
+		EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, (void *)func);
 		/* lgr %b0,%r2: load return value into %b0 */
 		EMIT4(0xb9040000, BPF_REG_0, REG_2);
 
@@ -2534,14 +2518,12 @@ static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
 	 *         goto skip;
 	 */
 
-	/* %r1 = __bpf_prog_enter */
-	load_imm64(jit, REG_1, (u64)bpf_trampoline_enter(p));
 	/* %r2 = p */
 	load_imm64(jit, REG_2, (u64)p);
 	/* la %r3,run_ctx_off(%r15) */
 	EMIT4_DISP(0x41000000, REG_3, REG_15, tjit->run_ctx_off);
-	/* %r1() */
-	call_r1(jit);
+	/* brasl %r14,__bpf_prog_enter */
+	EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, bpf_trampoline_enter(p));
 	/* ltgr %r7,%r2 */
 	EMIT4(0xb9020000, REG_7, REG_2);
 	/* brcl 8,skip */
@@ -2552,15 +2534,13 @@ static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
 	 * retval = bpf_func(args, p->insnsi);
 	 */
 
-	/* %r1 = p->bpf_func */
-	load_imm64(jit, REG_1, (u64)p->bpf_func);
 	/* la %r2,bpf_args_off(%r15) */
 	EMIT4_DISP(0x41000000, REG_2, REG_15, tjit->bpf_args_off);
 	/* %r3 = p->insnsi */
 	if (!p->jited)
 		load_imm64(jit, REG_3, (u64)p->insnsi);
-	/* %r1() */
-	call_r1(jit);
+	/* brasl %r14,p->bpf_func */
+	EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, p->bpf_func);
 	/* stg %r2,retval_off(%r15) */
 	if (save_ret) {
 		if (sign_extend(jit, REG_2, m->ret_size, m->ret_flags))
@@ -2577,16 +2557,14 @@ static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
 	 * __bpf_prog_exit(p, start, &run_ctx);
 	 */
 
-	/* %r1 = __bpf_prog_exit */
-	load_imm64(jit, REG_1, (u64)bpf_trampoline_exit(p));
 	/* %r2 = p */
 	load_imm64(jit, REG_2, (u64)p);
 	/* lgr %r3,%r7 */
 	EMIT4(0xb9040000, REG_3, REG_7);
 	/* la %r4,run_ctx_off(%r15) */
 	EMIT4_DISP(0x41000000, REG_4, REG_15, tjit->run_ctx_off);
-	/* %r1() */
-	call_r1(jit);
+	/* brasl %r14,__bpf_prog_exit */
+	EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, bpf_trampoline_exit(p));
 
 	return 0;
 }
@@ -2746,9 +2724,6 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 
 		/* lgr %r8,%r0 */
 		EMIT4(0xb9040000, REG_8, REG_0);
-	} else {
-		/* %r8 = func_addr + S390X_PATCH_SIZE */
-		load_imm64(jit, REG_8, (u64)func_addr + S390X_PATCH_SIZE);
 	}
 
 	/*
@@ -2774,12 +2749,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 		 * __bpf_tramp_enter(im);
 		 */
 
-		/* %r1 = __bpf_tramp_enter */
-		load_imm64(jit, REG_1, (u64)__bpf_tramp_enter);
 		/* %r2 = im */
 		load_imm64(jit, REG_2, (u64)im);
-		/* %r1() */
-		call_r1(jit);
+		/* brasl %r14,__bpf_tramp_enter */
+		EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, __bpf_tramp_enter);
 	}
 
 	for (i = 0; i < fentry->nr_links; i++)
@@ -2832,10 +2805,19 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 		/* mvc tail_call_cnt(4,%r15),tccnt_off(%r15) */
 		_EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt),
 		       0xf000 | tjit->tccnt_off);
-		/* lgr %r1,%r8 */
-		EMIT4(0xb9040000, REG_1, REG_8);
-		/* %r1() */
-		call_r1(jit);
+		if (flags & BPF_TRAMP_F_ORIG_STACK) {
+			if (nospec_uses_trampoline())
+				/* brasl %r14,__s390_indirect_jump_r8 */
+				EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
+						     __s390_indirect_jump_r8);
+			else
+				/* basr %r14,%r8 */
+				EMIT2(0x0d00, REG_14, REG_8);
+		} else {
+			/* brasl %r14,func_addr+S390X_PATCH_SIZE */
+			EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
+					     func_addr + S390X_PATCH_SIZE);
+		}
 		/* stg %r2,retval_off(%r15) */
 		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
 			      tjit->retval_off);
@@ -2866,12 +2848,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 		 * __bpf_tramp_exit(im);
 		 */
 
-		/* %r1 = __bpf_tramp_exit */
-		load_imm64(jit, REG_1, (u64)__bpf_tramp_exit);
 		/* %r2 = im */
 		load_imm64(jit, REG_2, (u64)im);
-		/* %r1() */
-		call_r1(jit);
+		/* brasl %r14,__bpf_tramp_exit */
+		EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, __bpf_tramp_exit);
 	}
 
 	/* lmg %r2,%rN,reg_args_off(%r15) */
@@ -2880,7 +2860,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 			      REG_2 + (nr_reg_args - 1), REG_15,
 			      tjit->reg_args_off);
 	/* lgr %r1,%r8 */
-	if (!(flags & BPF_TRAMP_F_SKIP_FRAME))
+	if (!(flags & BPF_TRAMP_F_SKIP_FRAME) &&
+	    (flags & BPF_TRAMP_F_ORIG_STACK))
 		EMIT4(0xb9040000, REG_1, REG_8);
 	/* lmg %r7,%r8,r7_r8_off(%r15) */
 	EMIT6_DISP_LH(0xeb000000, 0x0004, REG_7, REG_8, REG_15,
@@ -2899,9 +2880,12 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 	EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
 	if (flags & BPF_TRAMP_F_SKIP_FRAME)
 		EMIT_JUMP_REG(14);
-	else
+	else if (flags & BPF_TRAMP_F_ORIG_STACK)
 		EMIT_JUMP_REG(1);
-
+	else
+		/* brcl 0xf,func_addr+S390X_PATCH_SIZE */
+		EMIT6_PCREL_RILC_PTR(0xc0040000, 0xf,
+				     func_addr + S390X_PATCH_SIZE);
 	return 0;
 }
 
-- 
2.50.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH bpf-next v2] s390/bpf: Use direct calls and jumps where possible
  2025-08-19 10:20 [PATCH bpf-next v2] s390/bpf: Use direct calls and jumps where possible Ilya Leoshkevich
@ 2025-08-20 13:00 ` patchwork-bot+netdevbpf
  0 siblings, 0 replies; 2+ messages in thread
From: patchwork-bot+netdevbpf @ 2025-08-20 13:00 UTC (permalink / raw)
  To: Ilya Leoshkevich; +Cc: ast, daniel, andrii, bpf, hca, gor, agordeev

Hello:

This patch was applied to bpf/bpf-next.git (master)
by Daniel Borkmann <daniel@iogearbox.net>:

On Tue, 19 Aug 2025 12:20:38 +0200 you wrote:
> After the V!=R rework (commit c98d2ecae08f ("s390/mm: Uncouple physical
> vs virtual address spaces")), all kernel code and related data are
> allocated within a 4G region, making it possible to use relative
> addressing in BPF code more extensively.
> 
> Convert as many indirect calls and jumps to direct calls as possible,
> namely:
> 
> [...]

Here is the summary with links:
  - [bpf-next,v2] s390/bpf: Use direct calls and jumps where possible
    https://git.kernel.org/bpf/bpf-next/c/b5bbbb70e5f5

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2025-08-20 12:59 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-19 10:20 [PATCH bpf-next v2] s390/bpf: Use direct calls and jumps where possible Ilya Leoshkevich
2025-08-20 13:00 ` patchwork-bot+netdevbpf

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).