All of lore.kernel.org
 help / color / mirror / Atom feed
From: Leon Hwang <leon.hwang@linux.dev>
To: bpf@vger.kernel.org
Cc: Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Andrii Nakryiko <andrii@kernel.org>,
	Martin KaFai Lau <martin.lau@linux.dev>,
	Eduard Zingerman <eddyz87@gmail.com>, Song Liu <song@kernel.org>,
	Yonghong Song <yonghong.song@linux.dev>,
	John Fastabend <john.fastabend@gmail.com>,
	KP Singh <kpsingh@kernel.org>,
	Stanislav Fomichev <sdf@fomichev.me>, Hao Luo <haoluo@google.com>,
	Jiri Olsa <jolsa@kernel.org>,
	Puranjay Mohan <puranjay@kernel.org>,
	Xu Kuohai <xukuohai@huaweicloud.com>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Will Deacon <will@kernel.org>,
	"David S . Miller" <davem@davemloft.net>,
	David Ahern <dsahern@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	x86@kernel.org, "H . Peter Anvin" <hpa@zytor.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
	kernel-patches-bot@fb.com, Leon Hwang <leon.hwang@linux.dev>
Subject: [PATCH bpf-next 3/4] bpf, arm64: tailcall: Eliminate max_entries and bpf_func access at runtime
Date: Fri,  2 Jan 2026 23:00:31 +0800	[thread overview]
Message-ID: <20260102150032.53106-4-leon.hwang@linux.dev> (raw)
In-Reply-To: <20260102150032.53106-1-leon.hwang@linux.dev>

Apply the same tail call optimization to arm64 as done for x86_64.

When the prog array map is known at verification time (dyn_array=false):
  - Embed max_entries as an immediate value instead of loading from memory
  - Use the precomputed target from array->ptrs[max_entries + index]
  - Jump directly to the cached target without dereferencing prog->bpf_func

When the map is dynamically determined (dyn_array=true):
  - Load max_entries from the array at runtime
  - Look up prog from array->ptrs[index] and compute the target address

Implement bpf_arch_tail_call_prologue_offset() returning
"PROLOGUE_OFFSET * 4" to convert the instruction count to bytes.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 arch/arm64/net/bpf_jit_comp.c | 71 +++++++++++++++++++++++++----------
 1 file changed, 51 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 0c4d44bcfbf4..bcd890bff36a 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -620,8 +620,10 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
 	return 0;
 }
 
-static int emit_bpf_tail_call(struct jit_ctx *ctx)
+static int emit_bpf_tail_call(struct jit_ctx *ctx, u32 map_index, bool dyn_array)
 {
+	struct bpf_map *map = ctx->prog->aux->used_maps[map_index];
+
 	/* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
 	const u8 r2 = bpf2a64[BPF_REG_2];
 	const u8 r3 = bpf2a64[BPF_REG_3];
@@ -638,9 +640,13 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	/* if (index >= array->map.max_entries)
 	 *     goto out;
 	 */
-	off = offsetof(struct bpf_array, map.max_entries);
-	emit_a64_mov_i64(tmp, off, ctx);
-	emit(A64_LDR32(tmp, r2, tmp), ctx);
+	if (dyn_array) {
+		off = offsetof(struct bpf_array, map.max_entries);
+		emit_a64_mov_i64(tmp, off, ctx);
+		emit(A64_LDR32(tmp, r2, tmp), ctx);
+	} else {
+		emit_a64_mov_i64(tmp, map->max_entries, ctx);
+	}
 	emit(A64_MOV(0, r3, r3), ctx);
 	emit(A64_CMP(0, r3, tmp), ctx);
 	branch1 = ctx->image + ctx->idx;
@@ -659,15 +665,26 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	/* (*tail_call_cnt_ptr)++; */
 	emit(A64_ADD_I(1, tcc, tcc, 1), ctx);
 
-	/* prog = array->ptrs[index];
-	 * if (prog == NULL)
-	 *     goto out;
-	 */
-	off = offsetof(struct bpf_array, ptrs);
-	emit_a64_mov_i64(tmp, off, ctx);
-	emit(A64_ADD(1, tmp, r2, tmp), ctx);
-	emit(A64_LSL(1, prg, r3, 3), ctx);
-	emit(A64_LDR64(prg, tmp, prg), ctx);
+	if (dyn_array) {
+		/* prog = array->ptrs[index];
+		 * if (prog == NULL)
+		 *     goto out;
+		 */
+		off = offsetof(struct bpf_array, ptrs);
+		emit_a64_mov_i64(tmp, off, ctx);
+		emit(A64_ADD(1, tmp, r2, tmp), ctx);
+		emit(A64_LSL(1, prg, r3, 3), ctx);
+		emit(A64_LDR64(prg, tmp, prg), ctx);
+	} else {
+		/* tgt = array->ptrs[max_entries + index];
+		 * if (tgt == 0)
+		 *     goto out;
+		 */
+		emit(A64_LSL(1, prg, r3, 3), ctx);
+		off = offsetof(struct bpf_array, ptrs) + map->max_entries * sizeof(void *);
+		emit_a64_add_i(1, prg, prg, tmp, off, ctx);
+		emit(A64_LDR64(prg, r2, prg), ctx);
+	}
 	branch3 = ctx->image + ctx->idx;
 	emit(A64_NOP, ctx);
 
@@ -680,12 +697,17 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 
 	pop_callee_regs(ctx);
 
-	/* goto *(prog->bpf_func + prologue_offset); */
-	off = offsetof(struct bpf_prog, bpf_func);
-	emit_a64_mov_i64(tmp, off, ctx);
-	emit(A64_LDR64(tmp, prg, tmp), ctx);
-	emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx);
-	emit(A64_BR(tmp), ctx);
+	if (dyn_array) {
+		/* goto *(prog->bpf_func + prologue_offset); */
+		off = offsetof(struct bpf_prog, bpf_func);
+		emit_a64_mov_i64(tmp, off, ctx);
+		emit(A64_LDR64(tmp, prg, tmp), ctx);
+		emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx);
+		emit(A64_BR(tmp), ctx);
+	} else {
+		/* goto *tgt; */
+		emit(A64_BR(prg), ctx);
+	}
 
 	if (ctx->image) {
 		off = &ctx->image[ctx->idx] - branch1;
@@ -701,6 +723,12 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	return 0;
 }
 
+int bpf_arch_tail_call_prologue_offset(void)
+{
+	/* offset is in instructions, convert to bytes */
+	return PROLOGUE_OFFSET * 4;
+}
+
 static int emit_atomic_ld_st(const struct bpf_insn *insn, struct jit_ctx *ctx)
 {
 	const s32 imm = insn->imm;
@@ -1617,7 +1645,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 	}
 	/* tail call */
 	case BPF_JMP | BPF_TAIL_CALL:
-		if (emit_bpf_tail_call(ctx))
+		bool dynamic_array = (insn->imm >> 8) & 0xFF;
+		u32 map_index = insn->imm & 0xFF;
+
+		if (emit_bpf_tail_call(ctx, map_index, dynamic_array))
 			return -EFAULT;
 		break;
 	/* function return */
-- 
2.52.0


  parent reply	other threads:[~2026-01-02 15:02 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-02 15:00 [PATCH bpf-next 0/4] bpf: tailcall: Eliminate max_entries and bpf_func access at runtime Leon Hwang
2026-01-02 15:00 ` [PATCH bpf-next 1/4] bpf: tailcall: Introduce bpf_arch_tail_call_prologue_offset Leon Hwang
2026-01-02 15:21   ` bot+bpf-ci
2026-01-02 15:38     ` Leon Hwang
2026-01-02 15:00 ` [PATCH bpf-next 2/4] bpf, x64: tailcall: Eliminate max_entries and bpf_func access at runtime Leon Hwang
2026-01-02 15:00 ` Leon Hwang [this message]
2026-01-02 15:00 ` [PATCH bpf-next 4/4] bpf, lib/test_bpf: Fix broken tailcall tests Leon Hwang
2026-01-03  0:10 ` [PATCH bpf-next 0/4] bpf: tailcall: Eliminate max_entries and bpf_func access at runtime Alexei Starovoitov
2026-01-14 11:28   ` Jiri Olsa
2026-01-14 16:04     ` Alexei Starovoitov
2026-01-14 21:00       ` Jiri Olsa
2026-01-14 21:56         ` Alexei Starovoitov
2026-01-15 18:00           ` Jiri Olsa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260102150032.53106-4-leon.hwang@linux.dev \
    --to=leon.hwang@linux.dev \
    --cc=akpm@linux-foundation.org \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bp@alien8.de \
    --cc=bpf@vger.kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=daniel@iogearbox.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=eddyz87@gmail.com \
    --cc=haoluo@google.com \
    --cc=hpa@zytor.com \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kernel-patches-bot@fb.com \
    --cc=kpsingh@kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=martin.lau@linux.dev \
    --cc=mingo@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=puranjay@kernel.org \
    --cc=sdf@fomichev.me \
    --cc=song@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    --cc=xukuohai@huaweicloud.com \
    --cc=yonghong.song@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.