All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ard Biesheuvel <ardb@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org,
	Ard Biesheuvel <ardb@kernel.org>,
	Demian Shulhan <demyansh@gmail.com>,
	Eric Biggers <ebiggers@kernel.org>
Subject: [PATCH 5/5] lib/crc: arm: Enable arm64's NEON intrinsics implementation of crc64
Date: Mon, 30 Mar 2026 16:46:36 +0200	[thread overview]
Message-ID: <20260330144630.33026-12-ardb@kernel.org> (raw)
In-Reply-To: <20260330144630.33026-7-ardb@kernel.org>

Tweak the NEON intrinsics crc64 code written for arm64 so it can be
built for 32-bit ARM as well. The only workaround needed is to provide
alternatives for vmull_p64() and vmull_high_p64() on Clang, which only
defines those when building for the AArch64 or arm64ec ISA.

KUnit benchmark results (Cortex-A53 @ 1 Ghz)

Before:

   # crc64_nvme_benchmark: len=1: 35 MB/s
   # crc64_nvme_benchmark: len=16: 78 MB/s
   # crc64_nvme_benchmark: len=64: 87 MB/s
   # crc64_nvme_benchmark: len=127: 88 MB/s
   # crc64_nvme_benchmark: len=128: 88 MB/s
   # crc64_nvme_benchmark: len=200: 89 MB/s
   # crc64_nvme_benchmark: len=256: 89 MB/s
   # crc64_nvme_benchmark: len=511: 89 MB/s
   # crc64_nvme_benchmark: len=512: 89 MB/s
   # crc64_nvme_benchmark: len=1024: 90 MB/s
   # crc64_nvme_benchmark: len=3173: 90 MB/s
   # crc64_nvme_benchmark: len=4096: 90 MB/s
   # crc64_nvme_benchmark: len=16384: 90 MB/s

After:

   # crc64_nvme_benchmark: len=1: 32 MB/s
   # crc64_nvme_benchmark: len=16: 76 MB/s
   # crc64_nvme_benchmark: len=64: 71 MB/s
   # crc64_nvme_benchmark: len=127: 88 MB/s
   # crc64_nvme_benchmark: len=128: 618 MB/s
   # crc64_nvme_benchmark: len=200: 542 MB/s
   # crc64_nvme_benchmark: len=256: 920 MB/s
   # crc64_nvme_benchmark: len=511: 836 MB/s
   # crc64_nvme_benchmark: len=512: 1261 MB/s
   # crc64_nvme_benchmark: len=1024: 1531 MB/s
   # crc64_nvme_benchmark: len=3173: 1731 MB/s
   # crc64_nvme_benchmark: len=4096: 1851 MB/s
   # crc64_nvme_benchmark: len=16384: 1858 MB/s

Enable big-endian support only on GCC - the code generated by Clang is
horribly broken.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 lib/crc/Kconfig                  |  1 +
 lib/crc/Makefile                 |  5 ++-
 lib/crc/arm/crc64.h              | 36 ++++++++++++++++++++
 lib/crc/arm64/crc64-neon-inner.c | 35 +++++++++++++++++++
 4 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/lib/crc/Kconfig b/lib/crc/Kconfig
index 31038c8d111a..2f93d4c4d52d 100644
--- a/lib/crc/Kconfig
+++ b/lib/crc/Kconfig
@@ -82,6 +82,7 @@ config CRC64
 config CRC64_ARCH
 	bool
 	depends on CRC64 && CRC_OPTIMIZATIONS
+	default y if ARM && KERNEL_MODE_NEON && !(CPU_BIG_ENDIAN && CC_IS_CLANG)
 	default y if ARM64
 	default y if RISCV && RISCV_ISA_ZBC && 64BIT
 	default y if X86_64
diff --git a/lib/crc/Makefile b/lib/crc/Makefile
index ff213590e4e3..b6c381cc66bb 100644
--- a/lib/crc/Makefile
+++ b/lib/crc/Makefile
@@ -39,8 +39,11 @@ crc64-y := crc64-main.o
 ifeq ($(CONFIG_CRC64_ARCH),y)
 CFLAGS_crc64-main.o += -I$(src)/$(SRCARCH)
 
+crc64-cflags-$(CONFIG_ARM) += -march=armv8-a -mfpu=crypto-neon-fp-armv8
+crc64-cflags-$(CONFIG_ARM64) += -march=armv8-a+crypto
 CFLAGS_REMOVE_arm64/crc64-neon-inner.o += $(CC_FLAGS_NO_FPU)
-CFLAGS_arm64/crc64-neon-inner.o += $(CC_FLAGS_FPU) -march=armv8-a+crypto
+CFLAGS_arm64/crc64-neon-inner.o += $(CC_FLAGS_FPU) $(crc64-cflags-y)
+crc64-$(CONFIG_ARM) += arm64/crc64-neon-inner.o
 crc64-$(CONFIG_ARM64) += arm64/crc64-neon-inner.o
 
 crc64-$(CONFIG_RISCV) += riscv/crc64_lsb.o riscv/crc64_msb.o
diff --git a/lib/crc/arm/crc64.h b/lib/crc/arm/crc64.h
new file mode 100644
index 000000000000..7c8d54f38e5c
--- /dev/null
+++ b/lib/crc/arm/crc64.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * CRC64 using ARM PMULL instructions
+ */
+
+#include <asm/simd.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull);
+
+u64 crc64_nvme_arm64_c(u64 crc, const u8 *p, size_t len);
+
+#define crc64_be_arch crc64_be_generic
+
+static inline u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len)
+{
+	if (len >= 128 && static_branch_likely(&have_pmull) &&
+	    likely(may_use_simd())) {
+		do {
+			size_t chunk = min_t(size_t, len & ~15, SZ_4K);
+
+			scoped_ksimd()
+				crc = crc64_nvme_arm64_c(crc, p, chunk);
+
+			p += chunk;
+			len -= chunk;
+		} while (len >= 128);
+	}
+	return crc64_nvme_generic(crc, p, len);
+}
+
+#define crc64_mod_init_arch crc64_mod_init_arch
+static void crc64_mod_init_arch(void)
+{
+	if (elf_hwcap2 & HWCAP2_PMULL)
+		static_branch_enable(&have_pmull);
+}
diff --git a/lib/crc/arm64/crc64-neon-inner.c b/lib/crc/arm64/crc64-neon-inner.c
index 28527e544ff6..99607dbb7bfd 100644
--- a/lib/crc/arm64/crc64-neon-inner.c
+++ b/lib/crc/arm64/crc64-neon-inner.c
@@ -15,6 +15,40 @@ static const u64 fold_consts_val[2] = { 0xeadc41fd2ba3d420ULL,
 static const u64 bconsts_val[2] = { 0x27ecfa329aef9f77ULL,
 				    0x34d926535897936aULL };
 
+#if defined(CONFIG_ARM) && defined(CONFIG_CC_IS_CLANG)
+static inline uint64x2_t pmull64(uint64x2_t a, uint64x2_t b)
+{
+	uint64_t l = vgetq_lane_u64(a, 0);
+	uint64_t m = vgetq_lane_u64(b, 0);
+	uint64x2_t result;
+
+	asm("vmull.p64	%q0, %1, %2" : "=w"(result) : "w"(l), "w"(m));
+
+	return result;
+}
+
+static inline uint64x2_t pmull64_high(uint64x2_t a, uint64x2_t b)
+{
+	uint64_t l = vgetq_lane_u64(a, 1);
+	uint64_t m = vgetq_lane_u64(b, 1);
+	uint64x2_t result;
+
+	asm("vmull.p64	%q0, %1, %2" : "=w"(result) : "w"(l), "w"(m));
+
+	return result;
+}
+
+static inline uint64x2_t pmull64_hi_lo(uint64x2_t a, uint64x2_t b)
+{
+	uint64_t l = vgetq_lane_u64(a, 1);
+	uint64_t m = vgetq_lane_u64(b, 0);
+	uint64x2_t result;
+
+	asm("vmull.p64	%q0, %1, %2" : "=w"(result) : "w"(l), "w"(m));
+
+	return result;
+}
+#else
 static inline uint64x2_t pmull64(uint64x2_t a, uint64x2_t b)
 {
 	return vreinterpretq_u64_p128(vmull_p64(vgetq_lane_u64(a, 0),
@@ -34,6 +68,7 @@ static inline uint64x2_t pmull64_hi_lo(uint64x2_t a, uint64x2_t b)
 	return vreinterpretq_u64_p128(vmull_p64(vgetq_lane_u64(a, 1),
 						vgetq_lane_u64(b, 0)));
 }
+#endif
 
 u64 crc64_nvme_arm64_c(u64 crc, const u8 *p, size_t len)
 {
-- 
2.53.0.1018.g2bb0e51243-goog



  parent reply	other threads:[~2026-03-30 14:47 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-30 14:46 [PATCH 0/5] crc64: Tweak intrinsics code and enable it for ARM Ard Biesheuvel
2026-03-30 14:46 ` [PATCH 1/5] lib/crc: arm64: Drop unnecessary chunking logic from crc64 Ard Biesheuvel
2026-03-31 22:33   ` Eric Biggers
2026-04-01  0:09     ` Eric Biggers
2026-04-01  6:57     ` Ard Biesheuvel
2026-03-30 14:46 ` [PATCH 2/5] lib/crc: arm64: Use existing macros for kernel-mode FPU cflags Ard Biesheuvel
2026-03-30 14:46 ` [PATCH 3/5] ARM: Add a neon-intrinsics.h header like on arm64 Ard Biesheuvel
2026-03-30 14:46 ` [PATCH 4/5] lib/crc: arm64: Simplify intrinsics implementation Ard Biesheuvel
2026-03-30 14:46 ` Ard Biesheuvel [this message]
2026-03-31  6:47   ` [PATCH 5/5] lib/crc: arm: Enable arm64's NEON intrinsics implementation of crc64 Christoph Hellwig
2026-03-31  8:20     ` Ard Biesheuvel
2026-03-31 22:41   ` Eric Biggers
2026-04-01 16:48     ` Ard Biesheuvel
2026-04-01 19:59 ` [PATCH 0/5] crc64: Tweak intrinsics code and enable it for ARM Eric Biggers
2026-04-02  8:52   ` Ard Biesheuvel
2026-04-02 23:40     ` Eric Biggers
2026-04-03  6:49       ` Ard Biesheuvel
2026-04-03 19:59         ` Eric Biggers

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260330144630.33026-12-ardb@kernel.org \
    --to=ardb@kernel.org \
    --cc=demyansh@gmail.com \
    --cc=ebiggers@kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-crypto@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.