From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, Ard Biesheuvel <ardb@kernel.org>,
"Jason A . Donenfeld" <Jason@zx2c4.com>,
Herbert Xu <herbert@gondor.apana.org.au>,
linux-arm-kernel@lists.infradead.org,
linuxppc-dev@lists.ozlabs.org, linux-riscv@lists.infradead.org,
linux-s390@vger.kernel.org, x86@kernel.org,
Eric Biggers <ebiggers@kernel.org>
Subject: [PATCH 08/19] crypto: arm64/ghash - Move NEON GHASH assembly into its own file
Date: Wed, 18 Mar 2026 23:17:09 -0700 [thread overview]
Message-ID: <20260319061723.1140720-9-ebiggers@kernel.org> (raw)
In-Reply-To: <20260319061723.1140720-1-ebiggers@kernel.org>
arch/arm64/crypto/ghash-ce-core.S implements pmull_ghash_update_p8(),
which is used only by a crypto_shash implementation of GHASH. It also
implements other functions, including pmull_ghash_update_p64() and
others, which are used only by a crypto_aead implementation of AES-GCM.
While some code is shared between pmull_ghash_update_p8() and
pmull_ghash_update_p64(), it's not very much. Since
pmull_ghash_update_p8() will also need to be migrated into lib/crypto/
to achieve parity in the standalone GHASH support, let's move it into a
separate file ghash-neon-core.S.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
arch/arm64/crypto/Makefile | 2 +-
arch/arm64/crypto/ghash-ce-core.S | 207 ++-----------------------
arch/arm64/crypto/ghash-neon-core.S | 226 ++++++++++++++++++++++++++++
3 files changed, 239 insertions(+), 196 deletions(-)
create mode 100644 arch/arm64/crypto/ghash-neon-core.S
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 8a8e3e551ed3..b7ba43ce8584 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -25,11 +25,11 @@ sm4-ce-gcm-y := sm4-ce-gcm-glue.o sm4-ce-gcm-core.o
obj-$(CONFIG_CRYPTO_SM4_ARM64_NEON_BLK) += sm4-neon.o
sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o
obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
-ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
+ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o ghash-neon-core.o
obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
obj-$(CONFIG_CRYPTO_AES_ARM64_CE_BLK) += aes-ce-blk.o
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index 23ee9a5eaf27..4344fe213d14 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -1,8 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Accelerated GHASH implementation with ARMv8 PMULL instructions.
+ * Accelerated AES-GCM implementation with ARMv8 Crypto Extensions.
*
* Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
*/
#include <linux/linkage.h>
@@ -17,35 +17,10 @@
XM .req v5
XL .req v6
XH .req v7
IN1 .req v7
- k00_16 .req v8
- k32_48 .req v9
-
- t3 .req v10
- t4 .req v11
- t5 .req v12
- t6 .req v13
- t7 .req v14
- t8 .req v15
- t9 .req v16
-
- perm1 .req v17
- perm2 .req v18
- perm3 .req v19
-
- sh1 .req v20
- sh2 .req v21
- sh3 .req v22
- sh4 .req v23
-
- ss1 .req v24
- ss2 .req v25
- ss3 .req v26
- ss4 .req v27
-
XL2 .req v8
XM2 .req v9
XH2 .req v10
XL3 .req v11
XM3 .req v12
@@ -58,94 +33,10 @@
HH34 .req v19
.text
.arch armv8-a+crypto
- .macro __pmull_p64, rd, rn, rm
- pmull \rd\().1q, \rn\().1d, \rm\().1d
- .endm
-
- .macro __pmull2_p64, rd, rn, rm
- pmull2 \rd\().1q, \rn\().2d, \rm\().2d
- .endm
-
- .macro __pmull_p8, rq, ad, bd
- ext t3.8b, \ad\().8b, \ad\().8b, #1 // A1
- ext t5.8b, \ad\().8b, \ad\().8b, #2 // A2
- ext t7.8b, \ad\().8b, \ad\().8b, #3 // A3
-
- __pmull_p8_\bd \rq, \ad
- .endm
-
- .macro __pmull2_p8, rq, ad, bd
- tbl t3.16b, {\ad\().16b}, perm1.16b // A1
- tbl t5.16b, {\ad\().16b}, perm2.16b // A2
- tbl t7.16b, {\ad\().16b}, perm3.16b // A3
-
- __pmull2_p8_\bd \rq, \ad
- .endm
-
- .macro __pmull_p8_SHASH, rq, ad
- __pmull_p8_tail \rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4
- .endm
-
- .macro __pmull_p8_SHASH2, rq, ad
- __pmull_p8_tail \rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4
- .endm
-
- .macro __pmull2_p8_SHASH, rq, ad
- __pmull_p8_tail \rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4
- .endm
-
- .macro __pmull_p8_tail, rq, ad, bd, nb, t, b1, b2, b3, b4
- pmull\t t3.8h, t3.\nb, \bd // F = A1*B
- pmull\t t4.8h, \ad, \b1\().\nb // E = A*B1
- pmull\t t5.8h, t5.\nb, \bd // H = A2*B
- pmull\t t6.8h, \ad, \b2\().\nb // G = A*B2
- pmull\t t7.8h, t7.\nb, \bd // J = A3*B
- pmull\t t8.8h, \ad, \b3\().\nb // I = A*B3
- pmull\t t9.8h, \ad, \b4\().\nb // K = A*B4
- pmull\t \rq\().8h, \ad, \bd // D = A*B
-
- eor t3.16b, t3.16b, t4.16b // L = E + F
- eor t5.16b, t5.16b, t6.16b // M = G + H
- eor t7.16b, t7.16b, t8.16b // N = I + J
-
- uzp1 t4.2d, t3.2d, t5.2d
- uzp2 t3.2d, t3.2d, t5.2d
- uzp1 t6.2d, t7.2d, t9.2d
- uzp2 t7.2d, t7.2d, t9.2d
-
- // t3 = (L) (P0 + P1) << 8
- // t5 = (M) (P2 + P3) << 16
- eor t4.16b, t4.16b, t3.16b
- and t3.16b, t3.16b, k32_48.16b
-
- // t7 = (N) (P4 + P5) << 24
- // t9 = (K) (P6 + P7) << 32
- eor t6.16b, t6.16b, t7.16b
- and t7.16b, t7.16b, k00_16.16b
-
- eor t4.16b, t4.16b, t3.16b
- eor t6.16b, t6.16b, t7.16b
-
- zip2 t5.2d, t4.2d, t3.2d
- zip1 t3.2d, t4.2d, t3.2d
- zip2 t9.2d, t6.2d, t7.2d
- zip1 t7.2d, t6.2d, t7.2d
-
- ext t3.16b, t3.16b, t3.16b, #15
- ext t5.16b, t5.16b, t5.16b, #14
- ext t7.16b, t7.16b, t7.16b, #13
- ext t9.16b, t9.16b, t9.16b, #12
-
- eor t3.16b, t3.16b, t5.16b
- eor t7.16b, t7.16b, t9.16b
- eor \rq\().16b, \rq\().16b, t3.16b
- eor \rq\().16b, \rq\().16b, t7.16b
- .endm
-
.macro __pmull_pre_p64
add x8, x3, #16
ld1 {HH.2d-HH4.2d}, [x8]
trn1 SHASH2.2d, SHASH.2d, HH.2d
@@ -158,47 +49,10 @@
movi MASK.16b, #0xe1
shl MASK.2d, MASK.2d, #57
.endm
- .macro __pmull_pre_p8
- ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
- eor SHASH2.16b, SHASH2.16b, SHASH.16b
-
- // k00_16 := 0x0000000000000000_000000000000ffff
- // k32_48 := 0x00000000ffffffff_0000ffffffffffff
- movi k32_48.2d, #0xffffffff
- mov k32_48.h[2], k32_48.h[0]
- ushr k00_16.2d, k32_48.2d, #32
-
- // prepare the permutation vectors
- mov_q x5, 0x080f0e0d0c0b0a09
- movi T1.8b, #8
- dup perm1.2d, x5
- eor perm1.16b, perm1.16b, T1.16b
- ushr perm2.2d, perm1.2d, #8
- ushr perm3.2d, perm1.2d, #16
- ushr T1.2d, perm1.2d, #24
- sli perm2.2d, perm1.2d, #56
- sli perm3.2d, perm1.2d, #48
- sli T1.2d, perm1.2d, #40
-
- // precompute loop invariants
- tbl sh1.16b, {SHASH.16b}, perm1.16b
- tbl sh2.16b, {SHASH.16b}, perm2.16b
- tbl sh3.16b, {SHASH.16b}, perm3.16b
- tbl sh4.16b, {SHASH.16b}, T1.16b
- ext ss1.8b, SHASH2.8b, SHASH2.8b, #1
- ext ss2.8b, SHASH2.8b, SHASH2.8b, #2
- ext ss3.8b, SHASH2.8b, SHASH2.8b, #3
- ext ss4.8b, SHASH2.8b, SHASH2.8b, #4
- .endm
-
- //
- // PMULL (64x64->128) based reduction for CPUs that can do
- // it in a single instruction.
- //
.macro __pmull_reduce_p64
pmull T2.1q, XL.1d, MASK.1d
eor XM.16b, XM.16b, T1.16b
mov XH.d[0], XM.d[1]
@@ -207,51 +61,27 @@
eor XL.16b, XM.16b, T2.16b
ext T2.16b, XL.16b, XL.16b, #8
pmull XL.1q, XL.1d, MASK.1d
.endm
- //
- // Alternative reduction for CPUs that lack support for the
- // 64x64->128 PMULL instruction
- //
- .macro __pmull_reduce_p8
- eor XM.16b, XM.16b, T1.16b
-
- mov XL.d[1], XM.d[0]
- mov XH.d[0], XM.d[1]
-
- shl T1.2d, XL.2d, #57
- shl T2.2d, XL.2d, #62
- eor T2.16b, T2.16b, T1.16b
- shl T1.2d, XL.2d, #63
- eor T2.16b, T2.16b, T1.16b
- ext T1.16b, XL.16b, XH.16b, #8
- eor T2.16b, T2.16b, T1.16b
-
- mov XL.d[1], T2.d[0]
- mov XH.d[0], T2.d[1]
-
- ushr T2.2d, XL.2d, #1
- eor XH.16b, XH.16b, XL.16b
- eor XL.16b, XL.16b, T2.16b
- ushr T2.2d, T2.2d, #6
- ushr XL.2d, XL.2d, #1
- .endm
-
- .macro __pmull_ghash, pn
+ /*
+ * void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
+ * u64 const h[][2], const char *head)
+ */
+SYM_TYPED_FUNC_START(pmull_ghash_update_p64)
ld1 {SHASH.2d}, [x3]
ld1 {XL.2d}, [x1]
- __pmull_pre_\pn
+ __pmull_pre_p64
/* do the head block first, if supplied */
cbz x4, 0f
ld1 {T1.2d}, [x4]
mov x4, xzr
b 3f
-0: .ifc \pn, p64
+0:
tbnz w0, #0, 2f // skip until #blocks is a
tbnz w0, #1, 2f // round multiple of 4
1: ld1 {XM3.16b-TT4.16b}, [x2], #64
@@ -312,11 +142,10 @@
eor T2.16b, T2.16b, XH.16b
eor XL.16b, XL.16b, T2.16b
cbz w0, 5f
b 1b
- .endif
2: ld1 {T1.2d}, [x2], #16
sub w0, w0, #1
3: /* multiply XL by SHASH in GF(2^128) */
@@ -325,42 +154,30 @@ CPU_LE( rev64 T1.16b, T1.16b )
ext T2.16b, XL.16b, XL.16b, #8
ext IN1.16b, T1.16b, T1.16b, #8
eor T1.16b, T1.16b, T2.16b
eor XL.16b, XL.16b, IN1.16b
- __pmull2_\pn XH, XL, SHASH // a1 * b1
+ pmull2 XH.1q, XL.2d, SHASH.2d // a1 * b1
eor T1.16b, T1.16b, XL.16b
- __pmull_\pn XL, XL, SHASH // a0 * b0
- __pmull_\pn XM, T1, SHASH2 // (a1 + a0)(b1 + b0)
+ pmull XL.1q, XL.1d, SHASH.1d // a0 * b0
+ pmull XM.1q, T1.1d, SHASH2.1d // (a1 + a0)(b1 + b0)
4: eor T2.16b, XL.16b, XH.16b
ext T1.16b, XL.16b, XH.16b, #8
eor XM.16b, XM.16b, T2.16b
- __pmull_reduce_\pn
+ __pmull_reduce_p64
eor T2.16b, T2.16b, XH.16b
eor XL.16b, XL.16b, T2.16b
cbnz w0, 0b
5: st1 {XL.2d}, [x1]
ret
- .endm
-
- /*
- * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
- * struct ghash_key const *k, const char *head)
- */
-SYM_TYPED_FUNC_START(pmull_ghash_update_p64)
- __pmull_ghash p64
SYM_FUNC_END(pmull_ghash_update_p64)
-SYM_TYPED_FUNC_START(pmull_ghash_update_p8)
- __pmull_ghash p8
-SYM_FUNC_END(pmull_ghash_update_p8)
-
KS0 .req v8
KS1 .req v9
KS2 .req v10
KS3 .req v11
diff --git a/arch/arm64/crypto/ghash-neon-core.S b/arch/arm64/crypto/ghash-neon-core.S
new file mode 100644
index 000000000000..6157135ad566
--- /dev/null
+++ b/arch/arm64/crypto/ghash-neon-core.S
@@ -0,0 +1,226 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Accelerated GHASH implementation with ARMv8 ASIMD instructions.
+ *
+ * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <linux/linkage.h>
+#include <linux/cfi_types.h>
+#include <asm/assembler.h>
+
+ SHASH .req v0
+ SHASH2 .req v1
+ T1 .req v2
+ T2 .req v3
+ XM .req v5
+ XL .req v6
+ XH .req v7
+ IN1 .req v7
+
+ k00_16 .req v8
+ k32_48 .req v9
+
+ t3 .req v10
+ t4 .req v11
+ t5 .req v12
+ t6 .req v13
+ t7 .req v14
+ t8 .req v15
+ t9 .req v16
+
+ perm1 .req v17
+ perm2 .req v18
+ perm3 .req v19
+
+ sh1 .req v20
+ sh2 .req v21
+ sh3 .req v22
+ sh4 .req v23
+
+ ss1 .req v24
+ ss2 .req v25
+ ss3 .req v26
+ ss4 .req v27
+
+ .text
+
+ .macro __pmull_p8, rq, ad, bd
+ ext t3.8b, \ad\().8b, \ad\().8b, #1 // A1
+ ext t5.8b, \ad\().8b, \ad\().8b, #2 // A2
+ ext t7.8b, \ad\().8b, \ad\().8b, #3 // A3
+
+ __pmull_p8_\bd \rq, \ad
+ .endm
+
+ .macro __pmull2_p8, rq, ad, bd
+ tbl t3.16b, {\ad\().16b}, perm1.16b // A1
+ tbl t5.16b, {\ad\().16b}, perm2.16b // A2
+ tbl t7.16b, {\ad\().16b}, perm3.16b // A3
+
+ __pmull2_p8_\bd \rq, \ad
+ .endm
+
+ .macro __pmull_p8_SHASH, rq, ad
+ __pmull_p8_tail \rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4
+ .endm
+
+ .macro __pmull_p8_SHASH2, rq, ad
+ __pmull_p8_tail \rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4
+ .endm
+
+ .macro __pmull2_p8_SHASH, rq, ad
+ __pmull_p8_tail \rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4
+ .endm
+
+ .macro __pmull_p8_tail, rq, ad, bd, nb, t, b1, b2, b3, b4
+ pmull\t t3.8h, t3.\nb, \bd // F = A1*B
+ pmull\t t4.8h, \ad, \b1\().\nb // E = A*B1
+ pmull\t t5.8h, t5.\nb, \bd // H = A2*B
+ pmull\t t6.8h, \ad, \b2\().\nb // G = A*B2
+ pmull\t t7.8h, t7.\nb, \bd // J = A3*B
+ pmull\t t8.8h, \ad, \b3\().\nb // I = A*B3
+ pmull\t t9.8h, \ad, \b4\().\nb // K = A*B4
+ pmull\t \rq\().8h, \ad, \bd // D = A*B
+
+ eor t3.16b, t3.16b, t4.16b // L = E + F
+ eor t5.16b, t5.16b, t6.16b // M = G + H
+ eor t7.16b, t7.16b, t8.16b // N = I + J
+
+ uzp1 t4.2d, t3.2d, t5.2d
+ uzp2 t3.2d, t3.2d, t5.2d
+ uzp1 t6.2d, t7.2d, t9.2d
+ uzp2 t7.2d, t7.2d, t9.2d
+
+ // t3 = (L) (P0 + P1) << 8
+ // t5 = (M) (P2 + P3) << 16
+ eor t4.16b, t4.16b, t3.16b
+ and t3.16b, t3.16b, k32_48.16b
+
+ // t7 = (N) (P4 + P5) << 24
+ // t9 = (K) (P6 + P7) << 32
+ eor t6.16b, t6.16b, t7.16b
+ and t7.16b, t7.16b, k00_16.16b
+
+ eor t4.16b, t4.16b, t3.16b
+ eor t6.16b, t6.16b, t7.16b
+
+ zip2 t5.2d, t4.2d, t3.2d
+ zip1 t3.2d, t4.2d, t3.2d
+ zip2 t9.2d, t6.2d, t7.2d
+ zip1 t7.2d, t6.2d, t7.2d
+
+ ext t3.16b, t3.16b, t3.16b, #15
+ ext t5.16b, t5.16b, t5.16b, #14
+ ext t7.16b, t7.16b, t7.16b, #13
+ ext t9.16b, t9.16b, t9.16b, #12
+
+ eor t3.16b, t3.16b, t5.16b
+ eor t7.16b, t7.16b, t9.16b
+ eor \rq\().16b, \rq\().16b, t3.16b
+ eor \rq\().16b, \rq\().16b, t7.16b
+ .endm
+
+ .macro __pmull_pre_p8
+ ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
+ eor SHASH2.16b, SHASH2.16b, SHASH.16b
+
+ // k00_16 := 0x0000000000000000_000000000000ffff
+ // k32_48 := 0x00000000ffffffff_0000ffffffffffff
+ movi k32_48.2d, #0xffffffff
+ mov k32_48.h[2], k32_48.h[0]
+ ushr k00_16.2d, k32_48.2d, #32
+
+ // prepare the permutation vectors
+ mov_q x5, 0x080f0e0d0c0b0a09
+ movi T1.8b, #8
+ dup perm1.2d, x5
+ eor perm1.16b, perm1.16b, T1.16b
+ ushr perm2.2d, perm1.2d, #8
+ ushr perm3.2d, perm1.2d, #16
+ ushr T1.2d, perm1.2d, #24
+ sli perm2.2d, perm1.2d, #56
+ sli perm3.2d, perm1.2d, #48
+ sli T1.2d, perm1.2d, #40
+
+ // precompute loop invariants
+ tbl sh1.16b, {SHASH.16b}, perm1.16b
+ tbl sh2.16b, {SHASH.16b}, perm2.16b
+ tbl sh3.16b, {SHASH.16b}, perm3.16b
+ tbl sh4.16b, {SHASH.16b}, T1.16b
+ ext ss1.8b, SHASH2.8b, SHASH2.8b, #1
+ ext ss2.8b, SHASH2.8b, SHASH2.8b, #2
+ ext ss3.8b, SHASH2.8b, SHASH2.8b, #3
+ ext ss4.8b, SHASH2.8b, SHASH2.8b, #4
+ .endm
+
+ .macro __pmull_reduce_p8
+ eor XM.16b, XM.16b, T1.16b
+
+ mov XL.d[1], XM.d[0]
+ mov XH.d[0], XM.d[1]
+
+ shl T1.2d, XL.2d, #57
+ shl T2.2d, XL.2d, #62
+ eor T2.16b, T2.16b, T1.16b
+ shl T1.2d, XL.2d, #63
+ eor T2.16b, T2.16b, T1.16b
+ ext T1.16b, XL.16b, XH.16b, #8
+ eor T2.16b, T2.16b, T1.16b
+
+ mov XL.d[1], T2.d[0]
+ mov XH.d[0], T2.d[1]
+
+ ushr T2.2d, XL.2d, #1
+ eor XH.16b, XH.16b, XL.16b
+ eor XL.16b, XL.16b, T2.16b
+ ushr T2.2d, T2.2d, #6
+ ushr XL.2d, XL.2d, #1
+ .endm
+
+ /*
+ * void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
+ * u64 const h[][2], const char *head)
+ */
+SYM_TYPED_FUNC_START(pmull_ghash_update_p8)
+ ld1 {SHASH.2d}, [x3]
+ ld1 {XL.2d}, [x1]
+
+ __pmull_pre_p8
+
+ /* do the head block first, if supplied */
+ cbz x4, 0f
+ ld1 {T1.2d}, [x4]
+ mov x4, xzr
+ b 3f
+
+0: ld1 {T1.2d}, [x2], #16
+ sub w0, w0, #1
+
+3: /* multiply XL by SHASH in GF(2^128) */
+CPU_LE( rev64 T1.16b, T1.16b )
+
+ ext T2.16b, XL.16b, XL.16b, #8
+ ext IN1.16b, T1.16b, T1.16b, #8
+ eor T1.16b, T1.16b, T2.16b
+ eor XL.16b, XL.16b, IN1.16b
+
+ __pmull2_p8 XH, XL, SHASH // a1 * b1
+ eor T1.16b, T1.16b, XL.16b
+ __pmull_p8 XL, XL, SHASH // a0 * b0
+ __pmull_p8 XM, T1, SHASH2 // (a1 + a0)(b1 + b0)
+
+ eor T2.16b, XL.16b, XH.16b
+ ext T1.16b, XL.16b, XH.16b, #8
+ eor XM.16b, XM.16b, T2.16b
+
+ __pmull_reduce_p8
+
+ eor T2.16b, T2.16b, XH.16b
+ eor XL.16b, XL.16b, T2.16b
+
+ cbnz w0, 0b
+
+ st1 {XL.2d}, [x1]
+ ret
+SYM_FUNC_END(pmull_ghash_update_p8)
--
2.53.0
WARNING: multiple messages have this Message-ID (diff)
From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, Ard Biesheuvel <ardb@kernel.org>,
"Jason A . Donenfeld" <Jason@zx2c4.com>,
Herbert Xu <herbert@gondor.apana.org.au>,
linux-arm-kernel@lists.infradead.org,
linuxppc-dev@lists.ozlabs.org, linux-riscv@lists.infradead.org,
linux-s390@vger.kernel.org, x86@kernel.org,
Eric Biggers <ebiggers@kernel.org>
Subject: [PATCH 08/19] crypto: arm64/ghash - Move NEON GHASH assembly into its own file
Date: Wed, 18 Mar 2026 23:17:09 -0700 [thread overview]
Message-ID: <20260319061723.1140720-9-ebiggers@kernel.org> (raw)
In-Reply-To: <20260319061723.1140720-1-ebiggers@kernel.org>
arch/arm64/crypto/ghash-ce-core.S implements pmull_ghash_update_p8(),
which is used only by a crypto_shash implementation of GHASH. It also
implements other functions, including pmull_ghash_update_p64() and
others, which are used only by a crypto_aead implementation of AES-GCM.
While some code is shared between pmull_ghash_update_p8() and
pmull_ghash_update_p64(), it's not very much. Since
pmull_ghash_update_p8() will also need to be migrated into lib/crypto/
to achieve parity in the standalone GHASH support, let's move it into a
separate file ghash-neon-core.S.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
arch/arm64/crypto/Makefile | 2 +-
arch/arm64/crypto/ghash-ce-core.S | 207 ++-----------------------
arch/arm64/crypto/ghash-neon-core.S | 226 ++++++++++++++++++++++++++++
3 files changed, 239 insertions(+), 196 deletions(-)
create mode 100644 arch/arm64/crypto/ghash-neon-core.S
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 8a8e3e551ed3..b7ba43ce8584 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -25,11 +25,11 @@ sm4-ce-gcm-y := sm4-ce-gcm-glue.o sm4-ce-gcm-core.o
obj-$(CONFIG_CRYPTO_SM4_ARM64_NEON_BLK) += sm4-neon.o
sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o
obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
-ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
+ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o ghash-neon-core.o
obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
obj-$(CONFIG_CRYPTO_AES_ARM64_CE_BLK) += aes-ce-blk.o
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index 23ee9a5eaf27..4344fe213d14 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -1,8 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Accelerated GHASH implementation with ARMv8 PMULL instructions.
+ * Accelerated AES-GCM implementation with ARMv8 Crypto Extensions.
*
* Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
*/
#include <linux/linkage.h>
@@ -17,35 +17,10 @@
XM .req v5
XL .req v6
XH .req v7
IN1 .req v7
- k00_16 .req v8
- k32_48 .req v9
-
- t3 .req v10
- t4 .req v11
- t5 .req v12
- t6 .req v13
- t7 .req v14
- t8 .req v15
- t9 .req v16
-
- perm1 .req v17
- perm2 .req v18
- perm3 .req v19
-
- sh1 .req v20
- sh2 .req v21
- sh3 .req v22
- sh4 .req v23
-
- ss1 .req v24
- ss2 .req v25
- ss3 .req v26
- ss4 .req v27
-
XL2 .req v8
XM2 .req v9
XH2 .req v10
XL3 .req v11
XM3 .req v12
@@ -58,94 +33,10 @@
HH34 .req v19
.text
.arch armv8-a+crypto
- .macro __pmull_p64, rd, rn, rm
- pmull \rd\().1q, \rn\().1d, \rm\().1d
- .endm
-
- .macro __pmull2_p64, rd, rn, rm
- pmull2 \rd\().1q, \rn\().2d, \rm\().2d
- .endm
-
- .macro __pmull_p8, rq, ad, bd
- ext t3.8b, \ad\().8b, \ad\().8b, #1 // A1
- ext t5.8b, \ad\().8b, \ad\().8b, #2 // A2
- ext t7.8b, \ad\().8b, \ad\().8b, #3 // A3
-
- __pmull_p8_\bd \rq, \ad
- .endm
-
- .macro __pmull2_p8, rq, ad, bd
- tbl t3.16b, {\ad\().16b}, perm1.16b // A1
- tbl t5.16b, {\ad\().16b}, perm2.16b // A2
- tbl t7.16b, {\ad\().16b}, perm3.16b // A3
-
- __pmull2_p8_\bd \rq, \ad
- .endm
-
- .macro __pmull_p8_SHASH, rq, ad
- __pmull_p8_tail \rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4
- .endm
-
- .macro __pmull_p8_SHASH2, rq, ad
- __pmull_p8_tail \rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4
- .endm
-
- .macro __pmull2_p8_SHASH, rq, ad
- __pmull_p8_tail \rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4
- .endm
-
- .macro __pmull_p8_tail, rq, ad, bd, nb, t, b1, b2, b3, b4
- pmull\t t3.8h, t3.\nb, \bd // F = A1*B
- pmull\t t4.8h, \ad, \b1\().\nb // E = A*B1
- pmull\t t5.8h, t5.\nb, \bd // H = A2*B
- pmull\t t6.8h, \ad, \b2\().\nb // G = A*B2
- pmull\t t7.8h, t7.\nb, \bd // J = A3*B
- pmull\t t8.8h, \ad, \b3\().\nb // I = A*B3
- pmull\t t9.8h, \ad, \b4\().\nb // K = A*B4
- pmull\t \rq\().8h, \ad, \bd // D = A*B
-
- eor t3.16b, t3.16b, t4.16b // L = E + F
- eor t5.16b, t5.16b, t6.16b // M = G + H
- eor t7.16b, t7.16b, t8.16b // N = I + J
-
- uzp1 t4.2d, t3.2d, t5.2d
- uzp2 t3.2d, t3.2d, t5.2d
- uzp1 t6.2d, t7.2d, t9.2d
- uzp2 t7.2d, t7.2d, t9.2d
-
- // t3 = (L) (P0 + P1) << 8
- // t5 = (M) (P2 + P3) << 16
- eor t4.16b, t4.16b, t3.16b
- and t3.16b, t3.16b, k32_48.16b
-
- // t7 = (N) (P4 + P5) << 24
- // t9 = (K) (P6 + P7) << 32
- eor t6.16b, t6.16b, t7.16b
- and t7.16b, t7.16b, k00_16.16b
-
- eor t4.16b, t4.16b, t3.16b
- eor t6.16b, t6.16b, t7.16b
-
- zip2 t5.2d, t4.2d, t3.2d
- zip1 t3.2d, t4.2d, t3.2d
- zip2 t9.2d, t6.2d, t7.2d
- zip1 t7.2d, t6.2d, t7.2d
-
- ext t3.16b, t3.16b, t3.16b, #15
- ext t5.16b, t5.16b, t5.16b, #14
- ext t7.16b, t7.16b, t7.16b, #13
- ext t9.16b, t9.16b, t9.16b, #12
-
- eor t3.16b, t3.16b, t5.16b
- eor t7.16b, t7.16b, t9.16b
- eor \rq\().16b, \rq\().16b, t3.16b
- eor \rq\().16b, \rq\().16b, t7.16b
- .endm
-
.macro __pmull_pre_p64
add x8, x3, #16
ld1 {HH.2d-HH4.2d}, [x8]
trn1 SHASH2.2d, SHASH.2d, HH.2d
@@ -158,47 +49,10 @@
movi MASK.16b, #0xe1
shl MASK.2d, MASK.2d, #57
.endm
- .macro __pmull_pre_p8
- ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
- eor SHASH2.16b, SHASH2.16b, SHASH.16b
-
- // k00_16 := 0x0000000000000000_000000000000ffff
- // k32_48 := 0x00000000ffffffff_0000ffffffffffff
- movi k32_48.2d, #0xffffffff
- mov k32_48.h[2], k32_48.h[0]
- ushr k00_16.2d, k32_48.2d, #32
-
- // prepare the permutation vectors
- mov_q x5, 0x080f0e0d0c0b0a09
- movi T1.8b, #8
- dup perm1.2d, x5
- eor perm1.16b, perm1.16b, T1.16b
- ushr perm2.2d, perm1.2d, #8
- ushr perm3.2d, perm1.2d, #16
- ushr T1.2d, perm1.2d, #24
- sli perm2.2d, perm1.2d, #56
- sli perm3.2d, perm1.2d, #48
- sli T1.2d, perm1.2d, #40
-
- // precompute loop invariants
- tbl sh1.16b, {SHASH.16b}, perm1.16b
- tbl sh2.16b, {SHASH.16b}, perm2.16b
- tbl sh3.16b, {SHASH.16b}, perm3.16b
- tbl sh4.16b, {SHASH.16b}, T1.16b
- ext ss1.8b, SHASH2.8b, SHASH2.8b, #1
- ext ss2.8b, SHASH2.8b, SHASH2.8b, #2
- ext ss3.8b, SHASH2.8b, SHASH2.8b, #3
- ext ss4.8b, SHASH2.8b, SHASH2.8b, #4
- .endm
-
- //
- // PMULL (64x64->128) based reduction for CPUs that can do
- // it in a single instruction.
- //
.macro __pmull_reduce_p64
pmull T2.1q, XL.1d, MASK.1d
eor XM.16b, XM.16b, T1.16b
mov XH.d[0], XM.d[1]
@@ -207,51 +61,27 @@
eor XL.16b, XM.16b, T2.16b
ext T2.16b, XL.16b, XL.16b, #8
pmull XL.1q, XL.1d, MASK.1d
.endm
- //
- // Alternative reduction for CPUs that lack support for the
- // 64x64->128 PMULL instruction
- //
- .macro __pmull_reduce_p8
- eor XM.16b, XM.16b, T1.16b
-
- mov XL.d[1], XM.d[0]
- mov XH.d[0], XM.d[1]
-
- shl T1.2d, XL.2d, #57
- shl T2.2d, XL.2d, #62
- eor T2.16b, T2.16b, T1.16b
- shl T1.2d, XL.2d, #63
- eor T2.16b, T2.16b, T1.16b
- ext T1.16b, XL.16b, XH.16b, #8
- eor T2.16b, T2.16b, T1.16b
-
- mov XL.d[1], T2.d[0]
- mov XH.d[0], T2.d[1]
-
- ushr T2.2d, XL.2d, #1
- eor XH.16b, XH.16b, XL.16b
- eor XL.16b, XL.16b, T2.16b
- ushr T2.2d, T2.2d, #6
- ushr XL.2d, XL.2d, #1
- .endm
-
- .macro __pmull_ghash, pn
+ /*
+ * void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
+ * u64 const h[][2], const char *head)
+ */
+SYM_TYPED_FUNC_START(pmull_ghash_update_p64)
ld1 {SHASH.2d}, [x3]
ld1 {XL.2d}, [x1]
- __pmull_pre_\pn
+ __pmull_pre_p64
/* do the head block first, if supplied */
cbz x4, 0f
ld1 {T1.2d}, [x4]
mov x4, xzr
b 3f
-0: .ifc \pn, p64
+0:
tbnz w0, #0, 2f // skip until #blocks is a
tbnz w0, #1, 2f // round multiple of 4
1: ld1 {XM3.16b-TT4.16b}, [x2], #64
@@ -312,11 +142,10 @@
eor T2.16b, T2.16b, XH.16b
eor XL.16b, XL.16b, T2.16b
cbz w0, 5f
b 1b
- .endif
2: ld1 {T1.2d}, [x2], #16
sub w0, w0, #1
3: /* multiply XL by SHASH in GF(2^128) */
@@ -325,42 +154,30 @@ CPU_LE( rev64 T1.16b, T1.16b )
ext T2.16b, XL.16b, XL.16b, #8
ext IN1.16b, T1.16b, T1.16b, #8
eor T1.16b, T1.16b, T2.16b
eor XL.16b, XL.16b, IN1.16b
- __pmull2_\pn XH, XL, SHASH // a1 * b1
+ pmull2 XH.1q, XL.2d, SHASH.2d // a1 * b1
eor T1.16b, T1.16b, XL.16b
- __pmull_\pn XL, XL, SHASH // a0 * b0
- __pmull_\pn XM, T1, SHASH2 // (a1 + a0)(b1 + b0)
+ pmull XL.1q, XL.1d, SHASH.1d // a0 * b0
+ pmull XM.1q, T1.1d, SHASH2.1d // (a1 + a0)(b1 + b0)
4: eor T2.16b, XL.16b, XH.16b
ext T1.16b, XL.16b, XH.16b, #8
eor XM.16b, XM.16b, T2.16b
- __pmull_reduce_\pn
+ __pmull_reduce_p64
eor T2.16b, T2.16b, XH.16b
eor XL.16b, XL.16b, T2.16b
cbnz w0, 0b
5: st1 {XL.2d}, [x1]
ret
- .endm
-
- /*
- * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
- * struct ghash_key const *k, const char *head)
- */
-SYM_TYPED_FUNC_START(pmull_ghash_update_p64)
- __pmull_ghash p64
SYM_FUNC_END(pmull_ghash_update_p64)
-SYM_TYPED_FUNC_START(pmull_ghash_update_p8)
- __pmull_ghash p8
-SYM_FUNC_END(pmull_ghash_update_p8)
-
KS0 .req v8
KS1 .req v9
KS2 .req v10
KS3 .req v11
diff --git a/arch/arm64/crypto/ghash-neon-core.S b/arch/arm64/crypto/ghash-neon-core.S
new file mode 100644
index 000000000000..6157135ad566
--- /dev/null
+++ b/arch/arm64/crypto/ghash-neon-core.S
@@ -0,0 +1,226 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Accelerated GHASH implementation with ARMv8 ASIMD instructions.
+ *
+ * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <linux/linkage.h>
+#include <linux/cfi_types.h>
+#include <asm/assembler.h>
+
+ SHASH .req v0
+ SHASH2 .req v1
+ T1 .req v2
+ T2 .req v3
+ XM .req v5
+ XL .req v6
+ XH .req v7
+ IN1 .req v7
+
+ k00_16 .req v8
+ k32_48 .req v9
+
+ t3 .req v10
+ t4 .req v11
+ t5 .req v12
+ t6 .req v13
+ t7 .req v14
+ t8 .req v15
+ t9 .req v16
+
+ perm1 .req v17
+ perm2 .req v18
+ perm3 .req v19
+
+ sh1 .req v20
+ sh2 .req v21
+ sh3 .req v22
+ sh4 .req v23
+
+ ss1 .req v24
+ ss2 .req v25
+ ss3 .req v26
+ ss4 .req v27
+
+ .text
+
+ .macro __pmull_p8, rq, ad, bd
+ ext t3.8b, \ad\().8b, \ad\().8b, #1 // A1
+ ext t5.8b, \ad\().8b, \ad\().8b, #2 // A2
+ ext t7.8b, \ad\().8b, \ad\().8b, #3 // A3
+
+ __pmull_p8_\bd \rq, \ad
+ .endm
+
+ .macro __pmull2_p8, rq, ad, bd
+ tbl t3.16b, {\ad\().16b}, perm1.16b // A1
+ tbl t5.16b, {\ad\().16b}, perm2.16b // A2
+ tbl t7.16b, {\ad\().16b}, perm3.16b // A3
+
+ __pmull2_p8_\bd \rq, \ad
+ .endm
+
+ .macro __pmull_p8_SHASH, rq, ad
+ __pmull_p8_tail \rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4
+ .endm
+
+ .macro __pmull_p8_SHASH2, rq, ad
+ __pmull_p8_tail \rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4
+ .endm
+
+ .macro __pmull2_p8_SHASH, rq, ad
+ __pmull_p8_tail \rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4
+ .endm
+
+ .macro __pmull_p8_tail, rq, ad, bd, nb, t, b1, b2, b3, b4
+ pmull\t t3.8h, t3.\nb, \bd // F = A1*B
+ pmull\t t4.8h, \ad, \b1\().\nb // E = A*B1
+ pmull\t t5.8h, t5.\nb, \bd // H = A2*B
+ pmull\t t6.8h, \ad, \b2\().\nb // G = A*B2
+ pmull\t t7.8h, t7.\nb, \bd // J = A3*B
+ pmull\t t8.8h, \ad, \b3\().\nb // I = A*B3
+ pmull\t t9.8h, \ad, \b4\().\nb // K = A*B4
+ pmull\t \rq\().8h, \ad, \bd // D = A*B
+
+ eor t3.16b, t3.16b, t4.16b // L = E + F
+ eor t5.16b, t5.16b, t6.16b // M = G + H
+ eor t7.16b, t7.16b, t8.16b // N = I + J
+
+ uzp1 t4.2d, t3.2d, t5.2d
+ uzp2 t3.2d, t3.2d, t5.2d
+ uzp1 t6.2d, t7.2d, t9.2d
+ uzp2 t7.2d, t7.2d, t9.2d
+
+ // t3 = (L) (P0 + P1) << 8
+ // t5 = (M) (P2 + P3) << 16
+ eor t4.16b, t4.16b, t3.16b
+ and t3.16b, t3.16b, k32_48.16b
+
+ // t7 = (N) (P4 + P5) << 24
+ // t9 = (K) (P6 + P7) << 32
+ eor t6.16b, t6.16b, t7.16b
+ and t7.16b, t7.16b, k00_16.16b
+
+ eor t4.16b, t4.16b, t3.16b
+ eor t6.16b, t6.16b, t7.16b
+
+ zip2 t5.2d, t4.2d, t3.2d
+ zip1 t3.2d, t4.2d, t3.2d
+ zip2 t9.2d, t6.2d, t7.2d
+ zip1 t7.2d, t6.2d, t7.2d
+
+ ext t3.16b, t3.16b, t3.16b, #15
+ ext t5.16b, t5.16b, t5.16b, #14
+ ext t7.16b, t7.16b, t7.16b, #13
+ ext t9.16b, t9.16b, t9.16b, #12
+
+ eor t3.16b, t3.16b, t5.16b
+ eor t7.16b, t7.16b, t9.16b
+ eor \rq\().16b, \rq\().16b, t3.16b
+ eor \rq\().16b, \rq\().16b, t7.16b
+ .endm
+
+ .macro __pmull_pre_p8
+ ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
+ eor SHASH2.16b, SHASH2.16b, SHASH.16b
+
+ // k00_16 := 0x0000000000000000_000000000000ffff
+ // k32_48 := 0x00000000ffffffff_0000ffffffffffff
+ movi k32_48.2d, #0xffffffff
+ mov k32_48.h[2], k32_48.h[0]
+ ushr k00_16.2d, k32_48.2d, #32
+
+ // prepare the permutation vectors
+ mov_q x5, 0x080f0e0d0c0b0a09
+ movi T1.8b, #8
+ dup perm1.2d, x5
+ eor perm1.16b, perm1.16b, T1.16b
+ ushr perm2.2d, perm1.2d, #8
+ ushr perm3.2d, perm1.2d, #16
+ ushr T1.2d, perm1.2d, #24
+ sli perm2.2d, perm1.2d, #56
+ sli perm3.2d, perm1.2d, #48
+ sli T1.2d, perm1.2d, #40
+
+ // precompute loop invariants
+ tbl sh1.16b, {SHASH.16b}, perm1.16b
+ tbl sh2.16b, {SHASH.16b}, perm2.16b
+ tbl sh3.16b, {SHASH.16b}, perm3.16b
+ tbl sh4.16b, {SHASH.16b}, T1.16b
+ ext ss1.8b, SHASH2.8b, SHASH2.8b, #1
+ ext ss2.8b, SHASH2.8b, SHASH2.8b, #2
+ ext ss3.8b, SHASH2.8b, SHASH2.8b, #3
+ ext ss4.8b, SHASH2.8b, SHASH2.8b, #4
+ .endm
+
+ .macro __pmull_reduce_p8
+ eor XM.16b, XM.16b, T1.16b
+
+ mov XL.d[1], XM.d[0]
+ mov XH.d[0], XM.d[1]
+
+ shl T1.2d, XL.2d, #57
+ shl T2.2d, XL.2d, #62
+ eor T2.16b, T2.16b, T1.16b
+ shl T1.2d, XL.2d, #63
+ eor T2.16b, T2.16b, T1.16b
+ ext T1.16b, XL.16b, XH.16b, #8
+ eor T2.16b, T2.16b, T1.16b
+
+ mov XL.d[1], T2.d[0]
+ mov XH.d[0], T2.d[1]
+
+ ushr T2.2d, XL.2d, #1
+ eor XH.16b, XH.16b, XL.16b
+ eor XL.16b, XL.16b, T2.16b
+ ushr T2.2d, T2.2d, #6
+ ushr XL.2d, XL.2d, #1
+ .endm
+
+ /*
+ * void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
+ * u64 const h[][2], const char *head)
+ */
+SYM_TYPED_FUNC_START(pmull_ghash_update_p8)
+ ld1 {SHASH.2d}, [x3]
+ ld1 {XL.2d}, [x1]
+
+ __pmull_pre_p8
+
+ /* do the head block first, if supplied */
+ cbz x4, 0f
+ ld1 {T1.2d}, [x4]
+ mov x4, xzr
+ b 3f
+
+0: ld1 {T1.2d}, [x2], #16
+ sub w0, w0, #1
+
+3: /* multiply XL by SHASH in GF(2^128) */
+CPU_LE( rev64 T1.16b, T1.16b )
+
+ ext T2.16b, XL.16b, XL.16b, #8
+ ext IN1.16b, T1.16b, T1.16b, #8
+ eor T1.16b, T1.16b, T2.16b
+ eor XL.16b, XL.16b, IN1.16b
+
+ __pmull2_p8 XH, XL, SHASH // a1 * b1
+ eor T1.16b, T1.16b, XL.16b
+ __pmull_p8 XL, XL, SHASH // a0 * b0
+ __pmull_p8 XM, T1, SHASH2 // (a1 + a0)(b1 + b0)
+
+ eor T2.16b, XL.16b, XH.16b
+ ext T1.16b, XL.16b, XH.16b, #8
+ eor XM.16b, XM.16b, T2.16b
+
+ __pmull_reduce_p8
+
+ eor T2.16b, T2.16b, XH.16b
+ eor XL.16b, XL.16b, T2.16b
+
+ cbnz w0, 0b
+
+ st1 {XL.2d}, [x1]
+ ret
+SYM_FUNC_END(pmull_ghash_update_p8)
--
2.53.0
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
next prev parent reply other threads:[~2026-03-19 6:19 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-19 6:17 [PATCH 00/19] GHASH library Eric Biggers
2026-03-19 6:17 ` Eric Biggers
2026-03-19 6:17 ` [PATCH 01/19] lib/crypto: gf128hash: Rename polyval module to gf128hash Eric Biggers
2026-03-19 6:17 ` Eric Biggers
2026-03-19 6:17 ` [PATCH 02/19] lib/crypto: gf128hash: Support GF128HASH_ARCH without all POLYVAL functions Eric Biggers
2026-03-19 6:17 ` Eric Biggers
2026-03-19 6:17 ` [PATCH 03/19] lib/crypto: gf128hash: Add GHASH support Eric Biggers
2026-03-19 6:17 ` Eric Biggers
2026-03-19 6:17 ` [PATCH 04/19] lib/crypto: tests: Add KUnit tests for GHASH Eric Biggers
2026-03-19 6:17 ` Eric Biggers
2026-03-19 6:17 ` [PATCH 05/19] crypto: arm/ghash - Make the "ghash" crypto_shash NEON-only Eric Biggers
2026-03-19 6:17 ` Eric Biggers
2026-03-19 6:17 ` [PATCH 06/19] crypto: arm/ghash - Move NEON GHASH assembly into its own file Eric Biggers
2026-03-19 6:17 ` Eric Biggers
2026-03-19 6:17 ` [PATCH 07/19] lib/crypto: arm/ghash: Migrate optimized code into library Eric Biggers
2026-03-19 6:17 ` Eric Biggers
2026-03-19 6:17 ` Eric Biggers [this message]
2026-03-19 6:17 ` [PATCH 08/19] crypto: arm64/ghash - Move NEON GHASH assembly into its own file Eric Biggers
2026-03-19 6:17 ` [PATCH 09/19] lib/crypto: arm64/ghash: Migrate optimized code into library Eric Biggers
2026-03-19 6:17 ` Eric Biggers
2026-03-19 6:17 ` [PATCH 10/19] crypto: arm64/aes-gcm - Rename struct ghash_key and make fixed-sized Eric Biggers
2026-03-19 6:17 ` Eric Biggers
2026-03-19 6:17 ` [PATCH 11/19] lib/crypto: powerpc/ghash: Migrate optimized code into library Eric Biggers
2026-03-19 6:17 ` Eric Biggers
2026-03-19 6:17 ` [PATCH 12/19] lib/crypto: riscv/ghash: " Eric Biggers
2026-03-19 6:17 ` Eric Biggers
2026-03-19 6:17 ` [PATCH 13/19] lib/crypto: s390/ghash: " Eric Biggers
2026-03-19 6:17 ` Eric Biggers
2026-03-19 6:17 ` [PATCH 14/19] lib/crypto: x86/ghash: " Eric Biggers
2026-03-19 6:17 ` Eric Biggers
2026-03-19 6:17 ` [PATCH 15/19] crypto: gcm - Use GHASH library instead of crypto_ahash Eric Biggers
2026-03-19 6:17 ` Eric Biggers
2026-03-19 6:17 ` [PATCH 16/19] crypto: ghash - Remove ghash from crypto_shash API Eric Biggers
2026-03-19 6:17 ` Eric Biggers
2026-03-19 6:17 ` [PATCH 17/19] lib/crypto: gf128mul: Remove unused 4k_lle functions Eric Biggers
2026-03-19 6:17 ` Eric Biggers
2026-03-19 6:17 ` [PATCH 18/19] lib/crypto: gf128hash: Remove unused content from ghash.h Eric Biggers
2026-03-19 6:17 ` Eric Biggers
2026-03-19 6:17 ` [PATCH 19/19] lib/crypto: aesgcm: Use GHASH library API Eric Biggers
2026-03-19 6:17 ` Eric Biggers
2026-03-23 14:14 ` [PATCH 00/19] GHASH library Ard Biesheuvel
2026-03-23 14:14 ` Ard Biesheuvel
2026-03-24 0:50 ` Eric Biggers
2026-03-24 0:50 ` Eric Biggers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260319061723.1140720-9-ebiggers@kernel.org \
--to=ebiggers@kernel.org \
--cc=Jason@zx2c4.com \
--cc=ardb@kernel.org \
--cc=herbert@gondor.apana.org.au \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-riscv@lists.infradead.org \
--cc=linux-s390@vger.kernel.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.