From: Leon Hwang <leon.hwang@linux.dev>
To: bpf@vger.kernel.org
Cc: Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Andrii Nakryiko <andrii@kernel.org>,
Martin KaFai Lau <martin.lau@linux.dev>,
Eduard Zingerman <eddyz87@gmail.com>, Song Liu <song@kernel.org>,
Yonghong Song <yonghong.song@linux.dev>,
John Fastabend <john.fastabend@gmail.com>,
KP Singh <kpsingh@kernel.org>,
Stanislav Fomichev <sdf@fomichev.me>, Hao Luo <haoluo@google.com>,
Jiri Olsa <jolsa@kernel.org>,
Puranjay Mohan <puranjay@kernel.org>,
Xu Kuohai <xukuohai@huaweicloud.com>,
Catalin Marinas <catalin.marinas@arm.com>,
Will Deacon <will@kernel.org>,
"David S . Miller" <davem@davemloft.net>,
David Ahern <dsahern@kernel.org>,
Thomas Gleixner <tglx@linutronix.de>,
Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
Dave Hansen <dave.hansen@linux.intel.com>,
x86@kernel.org, "H . Peter Anvin" <hpa@zytor.com>,
Andrew Morton <akpm@linux-foundation.org>,
linux-arm-kernel@lists.infradead.org,
linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
kernel-patches-bot@fb.com, Leon Hwang <leon.hwang@linux.dev>
Subject: [PATCH bpf-next 2/4] bpf, x64: tailcall: Eliminate max_entries and bpf_func access at runtime
Date: Fri, 2 Jan 2026 23:00:30 +0800 [thread overview]
Message-ID: <20260102150032.53106-3-leon.hwang@linux.dev> (raw)
In-Reply-To: <20260102150032.53106-1-leon.hwang@linux.dev>
Optimize BPF tail calls on x86_64 by eliminating runtime memory accesses
for max_entries and prog->bpf_func when the prog array map is known at
verification time.
The verifier now encodes three fields in the tail call instruction's imm:
- bits 0-7: map index in used_maps[] (max 63)
- bits 8-15: dynamic array flag (1 if map pointer is poisoned)
- bits 16-31: poke table index + 1 for direct tail calls (max 1023)
For static tail calls (map known at verification time):
- max_entries is embedded as an immediate in the comparison instruction
- The cached target from array->ptrs[max_entries + index] is used
directly, avoiding the prog->bpf_func dereference
For dynamic tail calls (map pointer poisoned):
- Fall back to runtime lookup of max_entries and prog->bpf_func
This reduces cache misses and improves tail call performance for the
common case where the prog array is statically known.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
arch/x86/net/bpf_jit_comp.c | 51 +++++++++++++++++++++++++++----------
kernel/bpf/verifier.c | 30 ++++++++++++++++++++--
2 files changed, 66 insertions(+), 15 deletions(-)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index e3b1c4b1d550..9fd707612da5 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -733,11 +733,13 @@ static void emit_return(u8 **pprog, u8 *ip)
* out:
*/
static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
+ u32 map_index, bool dyn_array,
u8 **pprog, bool *callee_regs_used,
u32 stack_depth, u8 *ip,
struct jit_context *ctx)
{
int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack_depth);
+ struct bpf_map *map = bpf_prog->aux->used_maps[map_index];
u8 *prog = *pprog, *start = *pprog;
int offset;
@@ -752,11 +754,14 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
* goto out;
*/
EMIT2(0x89, 0xD2); /* mov edx, edx */
- EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
- offsetof(struct bpf_array, map.max_entries));
+ if (dyn_array)
+ EMIT3(0x3B, 0x56, /* cmp edx, dword ptr [rsi + 16] */
+ offsetof(struct bpf_array, map.max_entries));
+ else
+ EMIT2_off32(0x81, 0xFA, map->max_entries); /* cmp edx, imm32 (map->max_entries) */
offset = ctx->tail_call_indirect_label - (prog + 2 - start);
- EMIT2(X86_JBE, offset); /* jbe out */
+ EMIT2(X86_JAE, offset); /* jae out */
/*
* if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT)
@@ -768,9 +773,15 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
offset = ctx->tail_call_indirect_label - (prog + 2 - start);
EMIT2(X86_JAE, offset); /* jae out */
- /* prog = array->ptrs[index]; */
- EMIT4_off32(0x48, 0x8B, 0x8C, 0xD6, /* mov rcx, [rsi + rdx * 8 + offsetof(...)] */
- offsetof(struct bpf_array, ptrs));
+ /*
+ * if (dyn_array)
+ * prog = array->ptrs[index];
+ * else
+ * tgt = array->ptrs[max_entries + index];
+ */
+ offset = offsetof(struct bpf_array, ptrs);
+ offset += dyn_array ? 0 : map->max_entries * sizeof(void *);
+ EMIT4_off32(0x48, 0x8B, 0x8C, 0xD6, offset); /* mov rcx, [rsi + rdx * 8 + offset] */
/*
* if (prog == NULL)
@@ -803,11 +814,14 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
EMIT3_off32(0x48, 0x81, 0xC4, /* add rsp, sd */
round_up(stack_depth, 8));
- /* goto *(prog->bpf_func + X86_TAIL_CALL_OFFSET); */
- EMIT4(0x48, 0x8B, 0x49, /* mov rcx, qword ptr [rcx + 32] */
- offsetof(struct bpf_prog, bpf_func));
- EMIT4(0x48, 0x83, 0xC1, /* add rcx, X86_TAIL_CALL_OFFSET */
- X86_TAIL_CALL_OFFSET);
+ if (dyn_array) {
+ /* goto *(prog->bpf_func + X86_TAIL_CALL_OFFSET); */
+ EMIT4(0x48, 0x8B, 0x49, /* mov rcx, qword ptr [rcx + 32] */
+ offsetof(struct bpf_prog, bpf_func));
+ EMIT4(0x48, 0x83, 0xC1, /* add rcx, X86_TAIL_CALL_OFFSET */
+ X86_TAIL_CALL_OFFSET);
+ }
+
/*
* Now we're ready to jump into next BPF program
* rdi == ctx (1st arg)
@@ -2461,15 +2475,21 @@ st: if (is_imm8(insn->off))
}
case BPF_JMP | BPF_TAIL_CALL:
- if (imm32)
+ bool dynamic_array = (imm32 >> 8) & 0xFF;
+ u32 map_index = imm32 & 0xFF;
+ s32 imm16 = imm32 >> 16;
+
+ if (imm16)
emit_bpf_tail_call_direct(bpf_prog,
- &bpf_prog->aux->poke_tab[imm32 - 1],
+ &bpf_prog->aux->poke_tab[imm16 - 1],
&prog, image + addrs[i - 1],
callee_regs_used,
stack_depth,
ctx);
else
emit_bpf_tail_call_indirect(bpf_prog,
+ map_index,
+ dynamic_array,
&prog,
callee_regs_used,
stack_depth,
@@ -4047,6 +4067,11 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
}
}
+int bpf_arch_tail_call_prologue_offset(void)
+{
+ return X86_TAIL_CALL_OFFSET;
+}
+
bool bpf_jit_supports_arena(void)
{
return true;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3d44c5d06623..ab9c84e76a62 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -22602,6 +22602,18 @@ static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *pat
return 0;
}
+static int tail_call_find_map_index(struct bpf_verifier_env *env, struct bpf_map *map)
+{
+ int i;
+
+ for (i = 0; i < env->used_map_cnt; i++) {
+ if (env->used_maps[i] == map)
+ return i;
+ }
+
+ return -ENOENT;
+}
+
/* Do various post-verification rewrites in a single program pass.
* These rewrites simplify JIT and interpreter implementations.
*/
@@ -22993,10 +23005,24 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
* call and to prevent accidental JITing by JIT compiler
* that doesn't support bpf_tail_call yet
*/
- insn->imm = 0;
insn->code = BPF_JMP | BPF_TAIL_CALL;
+ /*
+ * insn->imm contains 3 fields:
+ * map index(8 bits): 6 bits are enough, 63 max
+ * poisoned(8 bits): 1 bit is enough
+ * poke index(16 bits): 1023 max
+ */
+
aux = &env->insn_aux_data[i + delta];
+ insn->imm = tail_call_find_map_index(env, aux->map_ptr_state.map_ptr);
+ if (insn->imm < 0) {
+ verifier_bug(env, "index not found for prog array map\n");
+ return -EINVAL;
+ }
+
+ insn->imm |= bpf_map_ptr_poisoned(aux) << 8;
+
if (env->bpf_capable && !prog->blinding_requested &&
prog->jit_requested &&
!bpf_map_key_poisoned(aux) &&
@@ -23015,7 +23041,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
return ret;
}
- insn->imm = ret + 1;
+ insn->imm |= (ret + 1) << 16;
goto next_insn;
}
--
2.52.0
next prev parent reply other threads:[~2026-01-02 15:01 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-02 15:00 [PATCH bpf-next 0/4] bpf: tailcall: Eliminate max_entries and bpf_func access at runtime Leon Hwang
2026-01-02 15:00 ` [PATCH bpf-next 1/4] bpf: tailcall: Introduce bpf_arch_tail_call_prologue_offset Leon Hwang
2026-01-02 15:21 ` bot+bpf-ci
2026-01-02 15:38 ` Leon Hwang
2026-01-02 15:00 ` Leon Hwang [this message]
2026-01-02 15:00 ` [PATCH bpf-next 3/4] bpf, arm64: tailcall: Eliminate max_entries and bpf_func access at runtime Leon Hwang
2026-01-02 15:00 ` [PATCH bpf-next 4/4] bpf, lib/test_bpf: Fix broken tailcall tests Leon Hwang
2026-01-03 0:10 ` [PATCH bpf-next 0/4] bpf: tailcall: Eliminate max_entries and bpf_func access at runtime Alexei Starovoitov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260102150032.53106-3-leon.hwang@linux.dev \
--to=leon.hwang@linux.dev \
--cc=akpm@linux-foundation.org \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bp@alien8.de \
--cc=bpf@vger.kernel.org \
--cc=catalin.marinas@arm.com \
--cc=daniel@iogearbox.net \
--cc=dave.hansen@linux.intel.com \
--cc=davem@davemloft.net \
--cc=dsahern@kernel.org \
--cc=eddyz87@gmail.com \
--cc=haoluo@google.com \
--cc=hpa@zytor.com \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kernel-patches-bot@fb.com \
--cc=kpsingh@kernel.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=martin.lau@linux.dev \
--cc=mingo@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=puranjay@kernel.org \
--cc=sdf@fomichev.me \
--cc=song@kernel.org \
--cc=tglx@linutronix.de \
--cc=will@kernel.org \
--cc=x86@kernel.org \
--cc=xukuohai@huaweicloud.com \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).