From: Ard Biesheuvel <ardb@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org,
Ard Biesheuvel <ardb@kernel.org>,
Demian Shulhan <demyansh@gmail.com>,
Eric Biggers <ebiggers@kernel.org>
Subject: [PATCH 5/5] lib/crc: arm: Enable arm64's NEON intrinsics implementation of crc64
Date: Mon, 30 Mar 2026 16:46:36 +0200 [thread overview]
Message-ID: <20260330144630.33026-12-ardb@kernel.org> (raw)
In-Reply-To: <20260330144630.33026-7-ardb@kernel.org>
Tweak the NEON intrinsics crc64 code written for arm64 so it can be
built for 32-bit ARM as well. The only workaround needed is to provide
alternatives for vmull_p64() and vmull_high_p64() on Clang, which only
defines those when building for the AArch64 or arm64ec ISA.
KUnit benchmark results (Cortex-A53 @ 1 Ghz)
Before:
# crc64_nvme_benchmark: len=1: 35 MB/s
# crc64_nvme_benchmark: len=16: 78 MB/s
# crc64_nvme_benchmark: len=64: 87 MB/s
# crc64_nvme_benchmark: len=127: 88 MB/s
# crc64_nvme_benchmark: len=128: 88 MB/s
# crc64_nvme_benchmark: len=200: 89 MB/s
# crc64_nvme_benchmark: len=256: 89 MB/s
# crc64_nvme_benchmark: len=511: 89 MB/s
# crc64_nvme_benchmark: len=512: 89 MB/s
# crc64_nvme_benchmark: len=1024: 90 MB/s
# crc64_nvme_benchmark: len=3173: 90 MB/s
# crc64_nvme_benchmark: len=4096: 90 MB/s
# crc64_nvme_benchmark: len=16384: 90 MB/s
After:
# crc64_nvme_benchmark: len=1: 32 MB/s
# crc64_nvme_benchmark: len=16: 76 MB/s
# crc64_nvme_benchmark: len=64: 71 MB/s
# crc64_nvme_benchmark: len=127: 88 MB/s
# crc64_nvme_benchmark: len=128: 618 MB/s
# crc64_nvme_benchmark: len=200: 542 MB/s
# crc64_nvme_benchmark: len=256: 920 MB/s
# crc64_nvme_benchmark: len=511: 836 MB/s
# crc64_nvme_benchmark: len=512: 1261 MB/s
# crc64_nvme_benchmark: len=1024: 1531 MB/s
# crc64_nvme_benchmark: len=3173: 1731 MB/s
# crc64_nvme_benchmark: len=4096: 1851 MB/s
# crc64_nvme_benchmark: len=16384: 1858 MB/s
Enable big-endian support only on GCC - the code generated by Clang is
horribly broken.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
lib/crc/Kconfig | 1 +
lib/crc/Makefile | 5 ++-
lib/crc/arm/crc64.h | 36 ++++++++++++++++++++
lib/crc/arm64/crc64-neon-inner.c | 35 +++++++++++++++++++
4 files changed, 76 insertions(+), 1 deletion(-)
diff --git a/lib/crc/Kconfig b/lib/crc/Kconfig
index 31038c8d111a..2f93d4c4d52d 100644
--- a/lib/crc/Kconfig
+++ b/lib/crc/Kconfig
@@ -82,6 +82,7 @@ config CRC64
config CRC64_ARCH
bool
depends on CRC64 && CRC_OPTIMIZATIONS
+ default y if ARM && KERNEL_MODE_NEON && !(CPU_BIG_ENDIAN && CC_IS_CLANG)
default y if ARM64
default y if RISCV && RISCV_ISA_ZBC && 64BIT
default y if X86_64
diff --git a/lib/crc/Makefile b/lib/crc/Makefile
index ff213590e4e3..b6c381cc66bb 100644
--- a/lib/crc/Makefile
+++ b/lib/crc/Makefile
@@ -39,8 +39,11 @@ crc64-y := crc64-main.o
ifeq ($(CONFIG_CRC64_ARCH),y)
CFLAGS_crc64-main.o += -I$(src)/$(SRCARCH)
+crc64-cflags-$(CONFIG_ARM) += -march=armv8-a -mfpu=crypto-neon-fp-armv8
+crc64-cflags-$(CONFIG_ARM64) += -march=armv8-a+crypto
CFLAGS_REMOVE_arm64/crc64-neon-inner.o += $(CC_FLAGS_NO_FPU)
-CFLAGS_arm64/crc64-neon-inner.o += $(CC_FLAGS_FPU) -march=armv8-a+crypto
+CFLAGS_arm64/crc64-neon-inner.o += $(CC_FLAGS_FPU) $(crc64-cflags-y)
+crc64-$(CONFIG_ARM) += arm64/crc64-neon-inner.o
crc64-$(CONFIG_ARM64) += arm64/crc64-neon-inner.o
crc64-$(CONFIG_RISCV) += riscv/crc64_lsb.o riscv/crc64_msb.o
diff --git a/lib/crc/arm/crc64.h b/lib/crc/arm/crc64.h
new file mode 100644
index 000000000000..7c8d54f38e5c
--- /dev/null
+++ b/lib/crc/arm/crc64.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * CRC64 using ARM PMULL instructions
+ */
+
+#include <asm/simd.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull);
+
+u64 crc64_nvme_arm64_c(u64 crc, const u8 *p, size_t len);
+
+#define crc64_be_arch crc64_be_generic
+
+static inline u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len)
+{
+ if (len >= 128 && static_branch_likely(&have_pmull) &&
+ likely(may_use_simd())) {
+ do {
+ size_t chunk = min_t(size_t, len & ~15, SZ_4K);
+
+ scoped_ksimd()
+ crc = crc64_nvme_arm64_c(crc, p, chunk);
+
+ p += chunk;
+ len -= chunk;
+ } while (len >= 128);
+ }
+ return crc64_nvme_generic(crc, p, len);
+}
+
+#define crc64_mod_init_arch crc64_mod_init_arch
+static void crc64_mod_init_arch(void)
+{
+ if (elf_hwcap2 & HWCAP2_PMULL)
+ static_branch_enable(&have_pmull);
+}
diff --git a/lib/crc/arm64/crc64-neon-inner.c b/lib/crc/arm64/crc64-neon-inner.c
index 28527e544ff6..99607dbb7bfd 100644
--- a/lib/crc/arm64/crc64-neon-inner.c
+++ b/lib/crc/arm64/crc64-neon-inner.c
@@ -15,6 +15,40 @@ static const u64 fold_consts_val[2] = { 0xeadc41fd2ba3d420ULL,
static const u64 bconsts_val[2] = { 0x27ecfa329aef9f77ULL,
0x34d926535897936aULL };
+#if defined(CONFIG_ARM) && defined(CONFIG_CC_IS_CLANG)
+static inline uint64x2_t pmull64(uint64x2_t a, uint64x2_t b)
+{
+ uint64_t l = vgetq_lane_u64(a, 0);
+ uint64_t m = vgetq_lane_u64(b, 0);
+ uint64x2_t result;
+
+ asm("vmull.p64 %q0, %1, %2" : "=w"(result) : "w"(l), "w"(m));
+
+ return result;
+}
+
+static inline uint64x2_t pmull64_high(uint64x2_t a, uint64x2_t b)
+{
+ uint64_t l = vgetq_lane_u64(a, 1);
+ uint64_t m = vgetq_lane_u64(b, 1);
+ uint64x2_t result;
+
+ asm("vmull.p64 %q0, %1, %2" : "=w"(result) : "w"(l), "w"(m));
+
+ return result;
+}
+
+static inline uint64x2_t pmull64_hi_lo(uint64x2_t a, uint64x2_t b)
+{
+ uint64_t l = vgetq_lane_u64(a, 1);
+ uint64_t m = vgetq_lane_u64(b, 0);
+ uint64x2_t result;
+
+ asm("vmull.p64 %q0, %1, %2" : "=w"(result) : "w"(l), "w"(m));
+
+ return result;
+}
+#else
static inline uint64x2_t pmull64(uint64x2_t a, uint64x2_t b)
{
return vreinterpretq_u64_p128(vmull_p64(vgetq_lane_u64(a, 0),
@@ -34,6 +68,7 @@ static inline uint64x2_t pmull64_hi_lo(uint64x2_t a, uint64x2_t b)
return vreinterpretq_u64_p128(vmull_p64(vgetq_lane_u64(a, 1),
vgetq_lane_u64(b, 0)));
}
+#endif
u64 crc64_nvme_arm64_c(u64 crc, const u8 *p, size_t len)
{
--
2.53.0.1018.g2bb0e51243-goog
next prev parent reply other threads:[~2026-03-30 14:47 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-30 14:46 [PATCH 0/5] crc64: Tweak intrinsics code and enable it for ARM Ard Biesheuvel
2026-03-30 14:46 ` [PATCH 1/5] lib/crc: arm64: Drop unnecessary chunking logic from crc64 Ard Biesheuvel
2026-03-31 22:33 ` Eric Biggers
2026-04-01 0:09 ` Eric Biggers
2026-04-01 6:57 ` Ard Biesheuvel
2026-03-30 14:46 ` [PATCH 2/5] lib/crc: arm64: Use existing macros for kernel-mode FPU cflags Ard Biesheuvel
2026-03-30 14:46 ` [PATCH 3/5] ARM: Add a neon-intrinsics.h header like on arm64 Ard Biesheuvel
2026-03-30 14:46 ` [PATCH 4/5] lib/crc: arm64: Simplify intrinsics implementation Ard Biesheuvel
2026-03-30 14:46 ` Ard Biesheuvel [this message]
2026-03-31 6:47 ` [PATCH 5/5] lib/crc: arm: Enable arm64's NEON intrinsics implementation of crc64 Christoph Hellwig
2026-03-31 8:20 ` Ard Biesheuvel
2026-03-31 22:41 ` Eric Biggers
2026-04-01 16:48 ` Ard Biesheuvel
2026-04-01 19:59 ` [PATCH 0/5] crc64: Tweak intrinsics code and enable it for ARM Eric Biggers
2026-04-02 8:52 ` Ard Biesheuvel
2026-04-02 23:40 ` Eric Biggers
2026-04-03 6:49 ` Ard Biesheuvel
2026-04-03 19:59 ` Eric Biggers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260330144630.33026-12-ardb@kernel.org \
--to=ardb@kernel.org \
--cc=demyansh@gmail.com \
--cc=ebiggers@kernel.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-crypto@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox