From: Leon Hwang <leon.hwang@linux.dev>
To: bpf@vger.kernel.org
Cc: Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Andrii Nakryiko <andrii@kernel.org>,
Martin KaFai Lau <martin.lau@linux.dev>,
Eduard Zingerman <eddyz87@gmail.com>, Song Liu <song@kernel.org>,
Yonghong Song <yonghong.song@linux.dev>,
John Fastabend <john.fastabend@gmail.com>,
KP Singh <kpsingh@kernel.org>,
Stanislav Fomichev <sdf@fomichev.me>, Hao Luo <haoluo@google.com>,
Jiri Olsa <jolsa@kernel.org>,
Puranjay Mohan <puranjay@kernel.org>,
Xu Kuohai <xukuohai@huaweicloud.com>,
Catalin Marinas <catalin.marinas@arm.com>,
Will Deacon <will@kernel.org>, Thomas Gleixner <tglx@kernel.org>,
Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
Dave Hansen <dave.hansen@linux.intel.com>,
x86@kernel.org, "H . Peter Anvin" <hpa@zytor.com>,
Shuah Khan <shuah@kernel.org>, Leon Hwang <leon.hwang@linux.dev>,
Peilin Ye <yepeilin@google.com>,
Luis Gerhorst <luis.gerhorst@fau.de>,
Viktor Malik <vmalik@redhat.com>,
linux-arm-kernel@lists.infradead.org,
linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
linux-kselftest@vger.kernel.org, kernel-patches-bot@fb.com
Subject: [PATCH bpf-next v2 2/6] bpf, x86: Add 64-bit bitops kfuncs support for x86_64
Date: Thu, 19 Feb 2026 22:29:24 +0800 [thread overview]
Message-ID: <20260219142933.13904-3-leon.hwang@linux.dev> (raw)
In-Reply-To: <20260219142933.13904-1-leon.hwang@linux.dev>
Implement JIT inlining of the 64-bit bitops kfuncs on x86_64.
bpf_rol64() and bpf_ror64() are always supported via ROL/ROR.
bpf_ctz64() and bpf_ffs64() are supported when the CPU has
X86_FEATURE_BMI1 (TZCNT).
bpf_clz64() and bpf_fls64() are supported when the CPU has
X86_FEATURE_ABM (LZCNT).
bpf_popcnt64() is supported when the CPU has X86_FEATURE_POPCNT.
bpf_bitrev64() is not inlined as x86_64 has no native bit-reverse
instruction, so it falls back to a regular function call.
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
arch/x86/net/bpf_jit_comp.c | 141 ++++++++++++++++++++++++++++++++++++
1 file changed, 141 insertions(+)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 070ba80e39d7..193e1e2d7aa8 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -19,6 +19,7 @@
#include <asm/text-patching.h>
#include <asm/unwind.h>
#include <asm/cfi.h>
+#include <asm/cpufeatures.h>
static bool all_callee_regs_used[4] = {true, true, true, true};
@@ -1604,6 +1605,127 @@ static void emit_priv_frame_ptr(u8 **pprog, void __percpu *priv_frame_ptr)
*pprog = prog;
}
+static bool bpf_inlines_func_call(u8 **pprog, void *func)
+{
+ bool has_popcnt = boot_cpu_has(X86_FEATURE_POPCNT);
+ bool has_bmi1 = boot_cpu_has(X86_FEATURE_BMI1);
+ bool has_abm = boot_cpu_has(X86_FEATURE_ABM);
+ bool inlined = true;
+ u8 *prog = *pprog;
+
+ /*
+ * x86 Bit manipulation instruction set
+ * https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set
+ */
+
+ if (func == bpf_clz64 && has_abm) {
+ /*
+ * Intel® 64 and IA-32 Architectures Software Developer's Manual (June 2023)
+ *
+ * LZCNT - Count the Number of Leading Zero Bits
+ *
+ * Opcode/Instruction
+ * F3 REX.W 0F BD /r
+ * LZCNT r64, r/m64
+ *
+ * Op/En
+ * RVM
+ *
+ * 64/32-bit Mode
+ * V/N.E.
+ *
+ * CPUID Feature Flag
+ * LZCNT
+ *
+ * Description
+ * Count the number of leading zero bits in r/m64, return
+ * result in r64.
+ */
+ /* emit: x ? 64 - fls64(x) : 64 */
+ /* lzcnt rax, rdi */
+ EMIT5(0xF3, 0x48, 0x0F, 0xBD, 0xC7);
+ } else if (func == bpf_ctz64 && has_bmi1) {
+ /*
+ * Intel® 64 and IA-32 Architectures Software Developer's Manual (June 2023)
+ *
+ * TZCNT - Count the Number of Trailing Zero Bits
+ *
+ * Opcode/Instruction
+ * F3 REX.W 0F BC /r
+ * TZCNT r64, r/m64
+ *
+ * Op/En
+ * RVM
+ *
+ * 64/32-bit Mode
+ * V/N.E.
+ *
+ * CPUID Feature Flag
+ * BMI1
+ *
+ * Description
+ * Count the number of trailing zero bits in r/m64, return
+ * result in r64.
+ */
+ /* emit: x ? __ffs64(x) : 64 */
+ /* tzcnt rax, rdi */
+ EMIT5(0xF3, 0x48, 0x0F, 0xBC, 0xC7);
+ } else if (func == bpf_ffs64 && has_bmi1) {
+ /* emit: __ffs64(x); x == 0 has been handled in verifier */
+ /* tzcnt rax, rdi */
+ EMIT5(0xF3, 0x48, 0x0F, 0xBC, 0xC7);
+ } else if (func == bpf_fls64 && has_abm) {
+ /* emit: fls64(x) */
+ /* lzcnt rax, rdi */
+ EMIT5(0xF3, 0x48, 0x0F, 0xBD, 0xC7);
+ EMIT3(0x48, 0xF7, 0xD8); /* neg rax */
+ EMIT4(0x48, 0x83, 0xC0, 0x40); /* add rax, 64 */
+ } else if (func == bpf_popcnt64 && has_popcnt) {
+ /*
+ * Intel® 64 and IA-32 Architectures Software Developer's Manual (June 2023)
+ *
+ * POPCNT - Return the Count of Number of Bits Set to 1
+ *
+ * Opcode/Instruction
+ * F3 REX.W 0F B8 /r
+ * POPCNT r64, r/m64
+ *
+ * Op/En
+ * RM
+ *
+ * 64 Mode
+ * Valid
+ *
+ * Compat/Leg Mode
+ * N.E.
+ *
+ * Description
+ * POPCNT on r/m64
+ */
+ /* popcnt rax, rdi */
+ EMIT5(0xF3, 0x48, 0x0F, 0xB8, 0xC7);
+ } else if (func == bpf_rol64) {
+ EMIT1(0x51); /* push rcx */
+ /* emit: rol64(x, s) */
+ EMIT3(0x48, 0x89, 0xF1); /* mov rcx, rsi */
+ EMIT3(0x48, 0x89, 0xF8); /* mov rax, rdi */
+ EMIT3(0x48, 0xD3, 0xC0); /* rol rax, cl */
+ EMIT1(0x59); /* pop rcx */
+ } else if (func == bpf_ror64) {
+ EMIT1(0x51); /* push rcx */
+ /* emit: ror64(x, s) */
+ EMIT3(0x48, 0x89, 0xF1); /* mov rcx, rsi */
+ EMIT3(0x48, 0x89, 0xF8); /* mov rax, rdi */
+ EMIT3(0x48, 0xD3, 0xC8); /* ror rax, cl */
+ EMIT1(0x59); /* pop rcx */
+ } else {
+ inlined = false;
+ }
+
+ *pprog = prog;
+ return inlined;
+}
+
#define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
#define __LOAD_TCC_PTR(off) \
@@ -2452,6 +2574,8 @@ st: if (is_imm8(insn->off))
u8 *ip = image + addrs[i - 1];
func = (u8 *) __bpf_call_base + imm32;
+ if (bpf_inlines_func_call(&prog, func))
+ break;
if (src_reg == BPF_PSEUDO_CALL && tail_call_reachable) {
LOAD_TAIL_CALL_CNT_PTR(stack_depth);
ip += 7;
@@ -4117,3 +4241,20 @@ bool bpf_jit_supports_fsession(void)
{
return true;
}
+
+bool bpf_jit_inlines_kfunc_call(void *func_addr)
+{
+ if (func_addr == bpf_ctz64 || func_addr == bpf_ffs64)
+ return boot_cpu_has(X86_FEATURE_BMI1);
+
+ if (func_addr == bpf_clz64 || func_addr == bpf_fls64)
+ return boot_cpu_has(X86_FEATURE_ABM);
+
+ if (func_addr == bpf_popcnt64)
+ return boot_cpu_has(X86_FEATURE_POPCNT);
+
+ if (func_addr == bpf_rol64 || func_addr == bpf_ror64)
+ return true;
+
+ return false;
+}
--
2.52.0
next prev parent reply other threads:[~2026-02-19 14:30 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-19 14:29 [PATCH bpf-next v2 0/6] bpf: Introduce 64-bit bitops kfuncs Leon Hwang
2026-02-19 14:29 ` [PATCH bpf-next v2 1/6] " Leon Hwang
2026-02-19 17:50 ` Alexei Starovoitov
2026-02-20 15:34 ` Leon Hwang
2026-02-19 14:29 ` Leon Hwang [this message]
2026-02-19 17:47 ` [PATCH bpf-next v2 2/6] bpf, x86: Add 64-bit bitops kfuncs support for x86_64 Alexei Starovoitov
2026-02-20 15:54 ` Leon Hwang
2026-02-20 17:50 ` Alexei Starovoitov
2026-02-21 12:45 ` Leon Hwang
2026-02-21 16:51 ` Alexei Starovoitov
2026-02-23 16:35 ` Leon Hwang
2026-02-19 22:05 ` kernel test robot
2026-02-20 14:12 ` Leon Hwang
2026-02-20 11:59 ` kernel test robot
2026-02-19 14:29 ` [PATCH bpf-next v2 3/6] bpf, arm64: Add 64-bit bitops kfuncs support Leon Hwang
2026-02-19 15:10 ` Puranjay Mohan
2026-02-19 15:20 ` Puranjay Mohan
2026-02-19 15:25 ` Puranjay Mohan
2026-02-19 15:36 ` Leon Hwang
2026-02-19 14:29 ` [PATCH bpf-next v2 4/6] selftests/bpf: Add tests for 64-bit bitops kfuncs Leon Hwang
2026-02-19 14:29 ` [PATCH bpf-next v2 5/6] selftests/bpf: Add __cpu_feature annotation for CPU-feature-gated tests Leon Hwang
2026-02-19 14:29 ` [PATCH bpf-next v2 6/6] selftests/bpf: Add JIT disassembly tests for 64-bit bitops kfuncs Leon Hwang
-- strict thread matches above, loose matches on Subject: below --
2026-02-20 18:57 [PATCH bpf-next v2 1/6] bpf: Introduce " kernel test robot
2026-02-21 9:58 ` Dan Carpenter
2026-02-21 12:50 ` Leon Hwang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260219142933.13904-3-leon.hwang@linux.dev \
--to=leon.hwang@linux.dev \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bp@alien8.de \
--cc=bpf@vger.kernel.org \
--cc=catalin.marinas@arm.com \
--cc=daniel@iogearbox.net \
--cc=dave.hansen@linux.intel.com \
--cc=eddyz87@gmail.com \
--cc=haoluo@google.com \
--cc=hpa@zytor.com \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kernel-patches-bot@fb.com \
--cc=kpsingh@kernel.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=luis.gerhorst@fau.de \
--cc=martin.lau@linux.dev \
--cc=mingo@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=puranjay@kernel.org \
--cc=sdf@fomichev.me \
--cc=shuah@kernel.org \
--cc=song@kernel.org \
--cc=tglx@kernel.org \
--cc=vmalik@redhat.com \
--cc=will@kernel.org \
--cc=x86@kernel.org \
--cc=xukuohai@huaweicloud.com \
--cc=yepeilin@google.com \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.