From: Ard Biesheuvel <ardb+git@google.com>
To: linux-arm-kernel@lists.infradead.org
Cc: linux-crypto@vger.kernel.org, linux-raid@vger.kernel.org,
Ard Biesheuvel <ardb@kernel.org>, Christoph Hellwig <hch@lst.de>,
Russell King <linux@armlinux.org.uk>,
Arnd Bergmann <arnd@arndb.de>,
Eric Biggers <ebiggers@kernel.org>
Subject: [PATCH 3/8] xor/arm64: Use shared NEON intrinsics implementation from 32-bit ARM
Date: Wed, 22 Apr 2026 19:16:59 +0200 [thread overview]
Message-ID: <20260422171655.3437334-13-ardb+git@google.com> (raw)
In-Reply-To: <20260422171655.3437334-10-ardb+git@google.com>
From: Ard Biesheuvel <ardb@kernel.org>
Tweak the arm64 code so that the pure NEON intrinsics implementation of
XOR is shared between arm64 and ARM. While at it, rename the arm64
specific piece xor-eor3.c to reflect that only the version based on the
EOR3 instruction is kept there.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
lib/raid/xor/Makefile | 7 +-
lib/raid/xor/arm64/xor-eor3.c | 146 +++++++++
lib/raid/xor/arm64/xor-neon.c | 312 --------------------
lib/raid/xor/xor-neon.c | 4 +
4 files changed, 154 insertions(+), 315 deletions(-)
diff --git a/lib/raid/xor/Makefile b/lib/raid/xor/Makefile
index d78400f2427a..e8ecec3c09f9 100644
--- a/lib/raid/xor/Makefile
+++ b/lib/raid/xor/Makefile
@@ -19,7 +19,8 @@ xor-$(CONFIG_ARM) += arm/xor.o
ifeq ($(CONFIG_ARM),y)
xor-$(CONFIG_KERNEL_MODE_NEON) += xor-neon.o arm/xor-neon-glue.o
endif
-xor-$(CONFIG_ARM64) += arm64/xor-neon.o arm64/xor-neon-glue.o
+xor-$(CONFIG_ARM64) += xor-neon.o arm64/xor-eor3.o \
+ arm64/xor-neon-glue.o
xor-$(CONFIG_CPU_HAS_LSX) += loongarch/xor_simd.o
xor-$(CONFIG_CPU_HAS_LSX) += loongarch/xor_simd_glue.o
xor-$(CONFIG_ALTIVEC) += powerpc/xor_vmx.o powerpc/xor_vmx_glue.o
@@ -34,8 +35,8 @@ obj-y += tests/
CFLAGS_xor-neon.o += $(CC_FLAGS_FPU) -I$(src)/$(SRCARCH)
CFLAGS_REMOVE_xor-neon.o += $(CC_FLAGS_NO_FPU)
-CFLAGS_arm64/xor-neon.o += $(CC_FLAGS_FPU)
-CFLAGS_REMOVE_arm64/xor-neon.o += $(CC_FLAGS_NO_FPU)
+CFLAGS_arm64/xor-eor3.o += $(CC_FLAGS_FPU)
+CFLAGS_REMOVE_arm64/xor-eor3.o += $(CC_FLAGS_NO_FPU)
CFLAGS_powerpc/xor_vmx.o += -mhard-float -maltivec \
$(call cc-option,-mabi=altivec) \
diff --git a/lib/raid/xor/arm64/xor-eor3.c b/lib/raid/xor/arm64/xor-eor3.c
new file mode 100644
index 000000000000..e44016c363f1
--- /dev/null
+++ b/lib/raid/xor/arm64/xor-eor3.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/cache.h>
+#include <asm/neon-intrinsics.h>
+#include "xor_impl.h"
+#include "xor_arch.h"
+#include "xor-neon.h"
+
+extern void __xor_eor3_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+
+static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r)
+{
+ uint64x2_t res;
+
+ asm(ARM64_ASM_PREAMBLE ".arch_extension sha3\n"
+ "eor3 %0.16b, %1.16b, %2.16b, %3.16b"
+ : "=w"(res) : "w"(p), "w"(q), "w"(r));
+ return res;
+}
+
+static void __xor_eor3_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
+{
+ uint64_t *dp1 = (uint64_t *)p1;
+ uint64_t *dp2 = (uint64_t *)p2;
+ uint64_t *dp3 = (uint64_t *)p3;
+
+ register uint64x2_t v0, v1, v2, v3;
+ long lines = bytes / (sizeof(uint64x2_t) * 4);
+
+ do {
+ /* p1 ^= p2 ^ p3 */
+ v0 = eor3(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0),
+ vld1q_u64(dp3 + 0));
+ v1 = eor3(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2),
+ vld1q_u64(dp3 + 2));
+ v2 = eor3(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4),
+ vld1q_u64(dp3 + 4));
+ v3 = eor3(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6),
+ vld1q_u64(dp3 + 6));
+
+ /* store */
+ vst1q_u64(dp1 + 0, v0);
+ vst1q_u64(dp1 + 2, v1);
+ vst1q_u64(dp1 + 4, v2);
+ vst1q_u64(dp1 + 6, v3);
+
+ dp1 += 8;
+ dp2 += 8;
+ dp3 += 8;
+ } while (--lines > 0);
+}
+
+static void __xor_eor3_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
+{
+ uint64_t *dp1 = (uint64_t *)p1;
+ uint64_t *dp2 = (uint64_t *)p2;
+ uint64_t *dp3 = (uint64_t *)p3;
+ uint64_t *dp4 = (uint64_t *)p4;
+
+ register uint64x2_t v0, v1, v2, v3;
+ long lines = bytes / (sizeof(uint64x2_t) * 4);
+
+ do {
+ /* p1 ^= p2 ^ p3 */
+ v0 = eor3(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0),
+ vld1q_u64(dp3 + 0));
+ v1 = eor3(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2),
+ vld1q_u64(dp3 + 2));
+ v2 = eor3(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4),
+ vld1q_u64(dp3 + 4));
+ v3 = eor3(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6),
+ vld1q_u64(dp3 + 6));
+
+ /* p1 ^= p4 */
+ v0 = veorq_u64(v0, vld1q_u64(dp4 + 0));
+ v1 = veorq_u64(v1, vld1q_u64(dp4 + 2));
+ v2 = veorq_u64(v2, vld1q_u64(dp4 + 4));
+ v3 = veorq_u64(v3, vld1q_u64(dp4 + 6));
+
+ /* store */
+ vst1q_u64(dp1 + 0, v0);
+ vst1q_u64(dp1 + 2, v1);
+ vst1q_u64(dp1 + 4, v2);
+ vst1q_u64(dp1 + 6, v3);
+
+ dp1 += 8;
+ dp2 += 8;
+ dp3 += 8;
+ dp4 += 8;
+ } while (--lines > 0);
+}
+
+static void __xor_eor3_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
+{
+ uint64_t *dp1 = (uint64_t *)p1;
+ uint64_t *dp2 = (uint64_t *)p2;
+ uint64_t *dp3 = (uint64_t *)p3;
+ uint64_t *dp4 = (uint64_t *)p4;
+ uint64_t *dp5 = (uint64_t *)p5;
+
+ register uint64x2_t v0, v1, v2, v3;
+ long lines = bytes / (sizeof(uint64x2_t) * 4);
+
+ do {
+ /* p1 ^= p2 ^ p3 */
+ v0 = eor3(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0),
+ vld1q_u64(dp3 + 0));
+ v1 = eor3(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2),
+ vld1q_u64(dp3 + 2));
+ v2 = eor3(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4),
+ vld1q_u64(dp3 + 4));
+ v3 = eor3(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6),
+ vld1q_u64(dp3 + 6));
+
+ /* p1 ^= p4 ^ p5 */
+ v0 = eor3(v0, vld1q_u64(dp4 + 0), vld1q_u64(dp5 + 0));
+ v1 = eor3(v1, vld1q_u64(dp4 + 2), vld1q_u64(dp5 + 2));
+ v2 = eor3(v2, vld1q_u64(dp4 + 4), vld1q_u64(dp5 + 4));
+ v3 = eor3(v3, vld1q_u64(dp4 + 6), vld1q_u64(dp5 + 6));
+
+ /* store */
+ vst1q_u64(dp1 + 0, v0);
+ vst1q_u64(dp1 + 2, v1);
+ vst1q_u64(dp1 + 4, v2);
+ vst1q_u64(dp1 + 6, v3);
+
+ dp1 += 8;
+ dp2 += 8;
+ dp3 += 8;
+ dp4 += 8;
+ dp5 += 8;
+ } while (--lines > 0);
+}
+
+__DO_XOR_BLOCKS(eor3_inner, __xor_eor3_2, __xor_eor3_3, __xor_eor3_4,
+ __xor_eor3_5);
diff --git a/lib/raid/xor/arm64/xor-neon.c b/lib/raid/xor/arm64/xor-neon.c
deleted file mode 100644
index 97ef3cb92496..000000000000
--- a/lib/raid/xor/arm64/xor-neon.c
+++ /dev/null
@@ -1,312 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Authors: Jackie Liu <liuyun01@kylinos.cn>
- * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd.
- */
-
-#include <linux/cache.h>
-#include <asm/neon-intrinsics.h>
-#include "xor_impl.h"
-#include "xor_arch.h"
-#include "xor-neon.h"
-
-static void __xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2)
-{
- uint64_t *dp1 = (uint64_t *)p1;
- uint64_t *dp2 = (uint64_t *)p2;
-
- register uint64x2_t v0, v1, v2, v3;
- long lines = bytes / (sizeof(uint64x2_t) * 4);
-
- do {
- /* p1 ^= p2 */
- v0 = veorq_u64(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0));
- v1 = veorq_u64(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2));
- v2 = veorq_u64(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4));
- v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6));
-
- /* store */
- vst1q_u64(dp1 + 0, v0);
- vst1q_u64(dp1 + 2, v1);
- vst1q_u64(dp1 + 4, v2);
- vst1q_u64(dp1 + 6, v3);
-
- dp1 += 8;
- dp2 += 8;
- } while (--lines > 0);
-}
-
-static void __xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3)
-{
- uint64_t *dp1 = (uint64_t *)p1;
- uint64_t *dp2 = (uint64_t *)p2;
- uint64_t *dp3 = (uint64_t *)p3;
-
- register uint64x2_t v0, v1, v2, v3;
- long lines = bytes / (sizeof(uint64x2_t) * 4);
-
- do {
- /* p1 ^= p2 */
- v0 = veorq_u64(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0));
- v1 = veorq_u64(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2));
- v2 = veorq_u64(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4));
- v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6));
-
- /* p1 ^= p3 */
- v0 = veorq_u64(v0, vld1q_u64(dp3 + 0));
- v1 = veorq_u64(v1, vld1q_u64(dp3 + 2));
- v2 = veorq_u64(v2, vld1q_u64(dp3 + 4));
- v3 = veorq_u64(v3, vld1q_u64(dp3 + 6));
-
- /* store */
- vst1q_u64(dp1 + 0, v0);
- vst1q_u64(dp1 + 2, v1);
- vst1q_u64(dp1 + 4, v2);
- vst1q_u64(dp1 + 6, v3);
-
- dp1 += 8;
- dp2 += 8;
- dp3 += 8;
- } while (--lines > 0);
-}
-
-static void __xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4)
-{
- uint64_t *dp1 = (uint64_t *)p1;
- uint64_t *dp2 = (uint64_t *)p2;
- uint64_t *dp3 = (uint64_t *)p3;
- uint64_t *dp4 = (uint64_t *)p4;
-
- register uint64x2_t v0, v1, v2, v3;
- long lines = bytes / (sizeof(uint64x2_t) * 4);
-
- do {
- /* p1 ^= p2 */
- v0 = veorq_u64(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0));
- v1 = veorq_u64(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2));
- v2 = veorq_u64(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4));
- v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6));
-
- /* p1 ^= p3 */
- v0 = veorq_u64(v0, vld1q_u64(dp3 + 0));
- v1 = veorq_u64(v1, vld1q_u64(dp3 + 2));
- v2 = veorq_u64(v2, vld1q_u64(dp3 + 4));
- v3 = veorq_u64(v3, vld1q_u64(dp3 + 6));
-
- /* p1 ^= p4 */
- v0 = veorq_u64(v0, vld1q_u64(dp4 + 0));
- v1 = veorq_u64(v1, vld1q_u64(dp4 + 2));
- v2 = veorq_u64(v2, vld1q_u64(dp4 + 4));
- v3 = veorq_u64(v3, vld1q_u64(dp4 + 6));
-
- /* store */
- vst1q_u64(dp1 + 0, v0);
- vst1q_u64(dp1 + 2, v1);
- vst1q_u64(dp1 + 4, v2);
- vst1q_u64(dp1 + 6, v3);
-
- dp1 += 8;
- dp2 += 8;
- dp3 += 8;
- dp4 += 8;
- } while (--lines > 0);
-}
-
-static void __xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4,
- const unsigned long * __restrict p5)
-{
- uint64_t *dp1 = (uint64_t *)p1;
- uint64_t *dp2 = (uint64_t *)p2;
- uint64_t *dp3 = (uint64_t *)p3;
- uint64_t *dp4 = (uint64_t *)p4;
- uint64_t *dp5 = (uint64_t *)p5;
-
- register uint64x2_t v0, v1, v2, v3;
- long lines = bytes / (sizeof(uint64x2_t) * 4);
-
- do {
- /* p1 ^= p2 */
- v0 = veorq_u64(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0));
- v1 = veorq_u64(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2));
- v2 = veorq_u64(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4));
- v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6));
-
- /* p1 ^= p3 */
- v0 = veorq_u64(v0, vld1q_u64(dp3 + 0));
- v1 = veorq_u64(v1, vld1q_u64(dp3 + 2));
- v2 = veorq_u64(v2, vld1q_u64(dp3 + 4));
- v3 = veorq_u64(v3, vld1q_u64(dp3 + 6));
-
- /* p1 ^= p4 */
- v0 = veorq_u64(v0, vld1q_u64(dp4 + 0));
- v1 = veorq_u64(v1, vld1q_u64(dp4 + 2));
- v2 = veorq_u64(v2, vld1q_u64(dp4 + 4));
- v3 = veorq_u64(v3, vld1q_u64(dp4 + 6));
-
- /* p1 ^= p5 */
- v0 = veorq_u64(v0, vld1q_u64(dp5 + 0));
- v1 = veorq_u64(v1, vld1q_u64(dp5 + 2));
- v2 = veorq_u64(v2, vld1q_u64(dp5 + 4));
- v3 = veorq_u64(v3, vld1q_u64(dp5 + 6));
-
- /* store */
- vst1q_u64(dp1 + 0, v0);
- vst1q_u64(dp1 + 2, v1);
- vst1q_u64(dp1 + 4, v2);
- vst1q_u64(dp1 + 6, v3);
-
- dp1 += 8;
- dp2 += 8;
- dp3 += 8;
- dp4 += 8;
- dp5 += 8;
- } while (--lines > 0);
-}
-
-__DO_XOR_BLOCKS(neon_inner, __xor_neon_2, __xor_neon_3, __xor_neon_4,
- __xor_neon_5);
-
-static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r)
-{
- uint64x2_t res;
-
- asm(ARM64_ASM_PREAMBLE ".arch_extension sha3\n"
- "eor3 %0.16b, %1.16b, %2.16b, %3.16b"
- : "=w"(res) : "w"(p), "w"(q), "w"(r));
- return res;
-}
-
-static void __xor_eor3_3(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3)
-{
- uint64_t *dp1 = (uint64_t *)p1;
- uint64_t *dp2 = (uint64_t *)p2;
- uint64_t *dp3 = (uint64_t *)p3;
-
- register uint64x2_t v0, v1, v2, v3;
- long lines = bytes / (sizeof(uint64x2_t) * 4);
-
- do {
- /* p1 ^= p2 ^ p3 */
- v0 = eor3(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0),
- vld1q_u64(dp3 + 0));
- v1 = eor3(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2),
- vld1q_u64(dp3 + 2));
- v2 = eor3(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4),
- vld1q_u64(dp3 + 4));
- v3 = eor3(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6),
- vld1q_u64(dp3 + 6));
-
- /* store */
- vst1q_u64(dp1 + 0, v0);
- vst1q_u64(dp1 + 2, v1);
- vst1q_u64(dp1 + 4, v2);
- vst1q_u64(dp1 + 6, v3);
-
- dp1 += 8;
- dp2 += 8;
- dp3 += 8;
- } while (--lines > 0);
-}
-
-static void __xor_eor3_4(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4)
-{
- uint64_t *dp1 = (uint64_t *)p1;
- uint64_t *dp2 = (uint64_t *)p2;
- uint64_t *dp3 = (uint64_t *)p3;
- uint64_t *dp4 = (uint64_t *)p4;
-
- register uint64x2_t v0, v1, v2, v3;
- long lines = bytes / (sizeof(uint64x2_t) * 4);
-
- do {
- /* p1 ^= p2 ^ p3 */
- v0 = eor3(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0),
- vld1q_u64(dp3 + 0));
- v1 = eor3(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2),
- vld1q_u64(dp3 + 2));
- v2 = eor3(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4),
- vld1q_u64(dp3 + 4));
- v3 = eor3(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6),
- vld1q_u64(dp3 + 6));
-
- /* p1 ^= p4 */
- v0 = veorq_u64(v0, vld1q_u64(dp4 + 0));
- v1 = veorq_u64(v1, vld1q_u64(dp4 + 2));
- v2 = veorq_u64(v2, vld1q_u64(dp4 + 4));
- v3 = veorq_u64(v3, vld1q_u64(dp4 + 6));
-
- /* store */
- vst1q_u64(dp1 + 0, v0);
- vst1q_u64(dp1 + 2, v1);
- vst1q_u64(dp1 + 4, v2);
- vst1q_u64(dp1 + 6, v3);
-
- dp1 += 8;
- dp2 += 8;
- dp3 += 8;
- dp4 += 8;
- } while (--lines > 0);
-}
-
-static void __xor_eor3_5(unsigned long bytes, unsigned long * __restrict p1,
- const unsigned long * __restrict p2,
- const unsigned long * __restrict p3,
- const unsigned long * __restrict p4,
- const unsigned long * __restrict p5)
-{
- uint64_t *dp1 = (uint64_t *)p1;
- uint64_t *dp2 = (uint64_t *)p2;
- uint64_t *dp3 = (uint64_t *)p3;
- uint64_t *dp4 = (uint64_t *)p4;
- uint64_t *dp5 = (uint64_t *)p5;
-
- register uint64x2_t v0, v1, v2, v3;
- long lines = bytes / (sizeof(uint64x2_t) * 4);
-
- do {
- /* p1 ^= p2 ^ p3 */
- v0 = eor3(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0),
- vld1q_u64(dp3 + 0));
- v1 = eor3(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2),
- vld1q_u64(dp3 + 2));
- v2 = eor3(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4),
- vld1q_u64(dp3 + 4));
- v3 = eor3(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6),
- vld1q_u64(dp3 + 6));
-
- /* p1 ^= p4 ^ p5 */
- v0 = eor3(v0, vld1q_u64(dp4 + 0), vld1q_u64(dp5 + 0));
- v1 = eor3(v1, vld1q_u64(dp4 + 2), vld1q_u64(dp5 + 2));
- v2 = eor3(v2, vld1q_u64(dp4 + 4), vld1q_u64(dp5 + 4));
- v3 = eor3(v3, vld1q_u64(dp4 + 6), vld1q_u64(dp5 + 6));
-
- /* store */
- vst1q_u64(dp1 + 0, v0);
- vst1q_u64(dp1 + 2, v1);
- vst1q_u64(dp1 + 4, v2);
- vst1q_u64(dp1 + 6, v3);
-
- dp1 += 8;
- dp2 += 8;
- dp3 += 8;
- dp4 += 8;
- dp5 += 8;
- } while (--lines > 0);
-}
-
-__DO_XOR_BLOCKS(eor3_inner, __xor_neon_2, __xor_eor3_3, __xor_eor3_4,
- __xor_eor3_5);
diff --git a/lib/raid/xor/xor-neon.c b/lib/raid/xor/xor-neon.c
index a3e2b4af8d36..c7c3cf634e23 100644
--- a/lib/raid/xor/xor-neon.c
+++ b/lib/raid/xor/xor-neon.c
@@ -173,3 +173,7 @@ static void __xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
__DO_XOR_BLOCKS(neon_inner, __xor_neon_2, __xor_neon_3, __xor_neon_4,
__xor_neon_5);
+
+#ifdef CONFIG_ARM64
+extern typeof(__xor_neon_2) __xor_eor3_2 __alias(__xor_neon_2);
+#endif
--
2.54.0.rc1.555.g9c883467ad-goog
next prev parent reply other threads:[~2026-04-22 17:17 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-22 17:16 [PATCH 0/8] ARM crc64 and XOR using NEON intrinsics Ard Biesheuvel
2026-04-22 17:16 ` [PATCH 1/8] ARM: Add a neon-intrinsics.h header like on arm64 Ard Biesheuvel
2026-04-22 17:16 ` [PATCH 2/8] xor/arm: Replace vectorized implementation with arm64's intrinsics Ard Biesheuvel
2026-04-22 18:07 ` Josh Law
2026-04-23 7:44 ` Christoph Hellwig
2026-04-22 17:16 ` Ard Biesheuvel [this message]
2026-04-22 18:11 ` [PATCH 3/8] xor/arm64: Use shared NEON intrinsics implementation from 32-bit ARM Josh Law
2026-04-23 7:46 ` Christoph Hellwig
2026-04-23 7:48 ` Ard Biesheuvel
2026-04-22 17:17 ` [PATCH 4/8] lib/crc: Turn NEON intrinsics crc64 implementation into common code Ard Biesheuvel
2026-04-22 18:13 ` Josh Law
2026-04-22 17:17 ` [PATCH 5/8] lib/crc: arm: Enable arm64's NEON intrinsics implementation of crc64 Ard Biesheuvel
2026-04-22 18:16 ` Josh Law
2026-04-22 17:17 ` [PATCH 6/8] crypto: aegis128 - Use neon-intrinsics.h on ARM too Ard Biesheuvel
2026-04-22 18:19 ` Josh Law
2026-04-22 17:17 ` [PATCH 7/8] lib/raid6: Include asm/neon-intrinsics.h rather than arm_neon.h Ard Biesheuvel
2026-04-22 18:20 ` Josh Law
2026-04-23 7:47 ` Christoph Hellwig
2026-04-22 17:17 ` [PATCH 8/8] ARM: Remove hacked-up asm/types.h header Ard Biesheuvel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260422171655.3437334-13-ardb+git@google.com \
--to=ardb+git@google.com \
--cc=ardb@kernel.org \
--cc=arnd@arndb.de \
--cc=ebiggers@kernel.org \
--cc=hch@lst.de \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-raid@vger.kernel.org \
--cc=linux@armlinux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox