From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, Ard Biesheuvel <ardb@kernel.org>,
"Jason A . Donenfeld" <Jason@zx2c4.com>,
Herbert Xu <herbert@gondor.apana.org.au>,
linux-arm-kernel@lists.infradead.org, x86@kernel.org,
Eric Biggers <ebiggers@kernel.org>
Subject: [PATCH 05/12] lib/crypto: x86/nh: Migrate optimized code into library
Date: Wed, 10 Dec 2025 17:18:37 -0800 [thread overview]
Message-ID: <20251211011846.8179-6-ebiggers@kernel.org> (raw)
In-Reply-To: <20251211011846.8179-1-ebiggers@kernel.org>
Migrate the x86_64 implementations of NH into lib/crypto/. This makes
the nh() function be optimized on x86_64 kernels.
Note: this temporarily makes the adiantum template not utilize the
x86_64 optimized NH code. This is resolved in a later commit that
converts the adiantum template to use nh() instead of "nhpoly1305".
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
arch/x86/crypto/Kconfig | 20 -----
arch/x86/crypto/Makefile | 5 --
arch/x86/crypto/nhpoly1305-avx2-glue.c | 81 -------------------
arch/x86/crypto/nhpoly1305-sse2-glue.c | 80 ------------------
lib/crypto/Kconfig | 1 +
lib/crypto/Makefile | 1 +
.../crypto/x86/nh-avx2.S | 3 +-
.../crypto/x86/nh-sse2.S | 3 +-
lib/crypto/x86/nh.h | 45 +++++++++++
9 files changed, 49 insertions(+), 190 deletions(-)
delete mode 100644 arch/x86/crypto/nhpoly1305-avx2-glue.c
delete mode 100644 arch/x86/crypto/nhpoly1305-sse2-glue.c
rename arch/x86/crypto/nh-avx2-x86_64.S => lib/crypto/x86/nh-avx2.S (98%)
rename arch/x86/crypto/nh-sse2-x86_64.S => lib/crypto/x86/nh-sse2.S (97%)
create mode 100644 lib/crypto/x86/nh.h
diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig
index 3fd2423d3cf8..ebb0838eaf30 100644
--- a/arch/x86/crypto/Kconfig
+++ b/arch/x86/crypto/Kconfig
@@ -331,30 +331,10 @@ config CRYPTO_AEGIS128_AESNI_SSE2
Architecture: x86_64 using:
- AES-NI (AES New Instructions)
- SSE4.1 (Streaming SIMD Extensions 4.1)
-config CRYPTO_NHPOLY1305_SSE2
- tristate "Hash functions: NHPoly1305 (SSE2)"
- depends on 64BIT
- select CRYPTO_NHPOLY1305
- help
- NHPoly1305 hash function for Adiantum
-
- Architecture: x86_64 using:
- - SSE2 (Streaming SIMD Extensions 2)
-
-config CRYPTO_NHPOLY1305_AVX2
- tristate "Hash functions: NHPoly1305 (AVX2)"
- depends on 64BIT
- select CRYPTO_NHPOLY1305
- help
- NHPoly1305 hash function for Adiantum
-
- Architecture: x86_64 using:
- - AVX2 (Advanced Vector Extensions 2)
-
config CRYPTO_SM3_AVX_X86_64
tristate "Hash functions: SM3 (AVX)"
depends on 64BIT
select CRYPTO_HASH
select CRYPTO_LIB_SM3
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 5f2fb4f148fe..b21ad0978c52 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -51,15 +51,10 @@ aesni-intel-$(CONFIG_64BIT) += aes-ctr-avx-x86_64.o \
aes-xts-avx-x86_64.o
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
-obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o
-nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o
-obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
-nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o
-
obj-$(CONFIG_CRYPTO_SM3_AVX_X86_64) += sm3-avx-x86_64.o
sm3-avx-x86_64-y := sm3-avx-asm_64.o sm3_avx_glue.o
obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o
sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o
diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c
deleted file mode 100644
index c3a872f4d6a7..000000000000
--- a/arch/x86/crypto/nhpoly1305-avx2-glue.c
+++ /dev/null
@@ -1,81 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum
- * (AVX2 accelerated version)
- *
- * Copyright 2018 Google LLC
- */
-
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-#include <crypto/nhpoly1305.h>
-#include <linux/module.h>
-#include <linux/sizes.h>
-#include <asm/simd.h>
-
-asmlinkage void nh_avx2(const u32 *key, const u8 *message, size_t message_len,
- __le64 hash[NH_NUM_PASSES]);
-
-static int nhpoly1305_avx2_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
-{
- if (srclen < 64 || !crypto_simd_usable())
- return crypto_nhpoly1305_update(desc, src, srclen);
-
- do {
- unsigned int n = min_t(unsigned int, srclen, SZ_4K);
-
- kernel_fpu_begin();
- crypto_nhpoly1305_update_helper(desc, src, n, nh_avx2);
- kernel_fpu_end();
- src += n;
- srclen -= n;
- } while (srclen);
- return 0;
-}
-
-static int nhpoly1305_avx2_digest(struct shash_desc *desc,
- const u8 *src, unsigned int srclen, u8 *out)
-{
- return crypto_nhpoly1305_init(desc) ?:
- nhpoly1305_avx2_update(desc, src, srclen) ?:
- crypto_nhpoly1305_final(desc, out);
-}
-
-static struct shash_alg nhpoly1305_alg = {
- .base.cra_name = "nhpoly1305",
- .base.cra_driver_name = "nhpoly1305-avx2",
- .base.cra_priority = 300,
- .base.cra_ctxsize = sizeof(struct nhpoly1305_key),
- .base.cra_module = THIS_MODULE,
- .digestsize = POLY1305_DIGEST_SIZE,
- .init = crypto_nhpoly1305_init,
- .update = nhpoly1305_avx2_update,
- .final = crypto_nhpoly1305_final,
- .digest = nhpoly1305_avx2_digest,
- .setkey = crypto_nhpoly1305_setkey,
- .descsize = sizeof(struct nhpoly1305_state),
-};
-
-static int __init nhpoly1305_mod_init(void)
-{
- if (!boot_cpu_has(X86_FEATURE_AVX2) ||
- !boot_cpu_has(X86_FEATURE_OSXSAVE))
- return -ENODEV;
-
- return crypto_register_shash(&nhpoly1305_alg);
-}
-
-static void __exit nhpoly1305_mod_exit(void)
-{
- crypto_unregister_shash(&nhpoly1305_alg);
-}
-
-module_init(nhpoly1305_mod_init);
-module_exit(nhpoly1305_mod_exit);
-
-MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (AVX2-accelerated)");
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
-MODULE_ALIAS_CRYPTO("nhpoly1305");
-MODULE_ALIAS_CRYPTO("nhpoly1305-avx2");
diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c
deleted file mode 100644
index a268a8439a5c..000000000000
--- a/arch/x86/crypto/nhpoly1305-sse2-glue.c
+++ /dev/null
@@ -1,80 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum
- * (SSE2 accelerated version)
- *
- * Copyright 2018 Google LLC
- */
-
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-#include <crypto/nhpoly1305.h>
-#include <linux/module.h>
-#include <linux/sizes.h>
-#include <asm/simd.h>
-
-asmlinkage void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
- __le64 hash[NH_NUM_PASSES]);
-
-static int nhpoly1305_sse2_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
-{
- if (srclen < 64 || !crypto_simd_usable())
- return crypto_nhpoly1305_update(desc, src, srclen);
-
- do {
- unsigned int n = min_t(unsigned int, srclen, SZ_4K);
-
- kernel_fpu_begin();
- crypto_nhpoly1305_update_helper(desc, src, n, nh_sse2);
- kernel_fpu_end();
- src += n;
- srclen -= n;
- } while (srclen);
- return 0;
-}
-
-static int nhpoly1305_sse2_digest(struct shash_desc *desc,
- const u8 *src, unsigned int srclen, u8 *out)
-{
- return crypto_nhpoly1305_init(desc) ?:
- nhpoly1305_sse2_update(desc, src, srclen) ?:
- crypto_nhpoly1305_final(desc, out);
-}
-
-static struct shash_alg nhpoly1305_alg = {
- .base.cra_name = "nhpoly1305",
- .base.cra_driver_name = "nhpoly1305-sse2",
- .base.cra_priority = 200,
- .base.cra_ctxsize = sizeof(struct nhpoly1305_key),
- .base.cra_module = THIS_MODULE,
- .digestsize = POLY1305_DIGEST_SIZE,
- .init = crypto_nhpoly1305_init,
- .update = nhpoly1305_sse2_update,
- .final = crypto_nhpoly1305_final,
- .digest = nhpoly1305_sse2_digest,
- .setkey = crypto_nhpoly1305_setkey,
- .descsize = sizeof(struct nhpoly1305_state),
-};
-
-static int __init nhpoly1305_mod_init(void)
-{
- if (!boot_cpu_has(X86_FEATURE_XMM2))
- return -ENODEV;
-
- return crypto_register_shash(&nhpoly1305_alg);
-}
-
-static void __exit nhpoly1305_mod_exit(void)
-{
- crypto_unregister_shash(&nhpoly1305_alg);
-}
-
-module_init(nhpoly1305_mod_init);
-module_exit(nhpoly1305_mod_exit);
-
-MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (SSE2-accelerated)");
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
-MODULE_ALIAS_CRYPTO("nhpoly1305");
-MODULE_ALIAS_CRYPTO("nhpoly1305-sse2");
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index aa3f850ece24..33cf46bbadc8 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -117,10 +117,11 @@ config CRYPTO_LIB_NH
config CRYPTO_LIB_NH_ARCH
bool
depends on CRYPTO_LIB_NH && !UML
default y if ARM && KERNEL_MODE_NEON
default y if ARM64 && KERNEL_MODE_NEON
+ default y if X86_64
config CRYPTO_LIB_POLY1305
tristate
help
The Poly1305 library functions. Select this if your module uses any
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index e3a13952bc2a..45128eccedef 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -135,10 +135,11 @@ obj-$(CONFIG_CRYPTO_LIB_NH) += libnh.o
libnh-y := nh.o
ifeq ($(CONFIG_CRYPTO_LIB_NH_ARCH),y)
CFLAGS_nh.o += -I$(src)/$(SRCARCH)
libnh-$(CONFIG_ARM) += arm/nh-neon-core.o
libnh-$(CONFIG_ARM64) += arm64/nh-neon-core.o
+libnh-$(CONFIG_X86) += x86/nh-sse2.o x86/nh-avx2.o
endif
################################################################################
obj-$(CONFIG_CRYPTO_LIB_POLY1305) += libpoly1305.o
diff --git a/arch/x86/crypto/nh-avx2-x86_64.S b/lib/crypto/x86/nh-avx2.S
similarity index 98%
rename from arch/x86/crypto/nh-avx2-x86_64.S
rename to lib/crypto/x86/nh-avx2.S
index 791386d9a83a..9c085a31b137 100644
--- a/arch/x86/crypto/nh-avx2-x86_64.S
+++ b/lib/crypto/x86/nh-avx2.S
@@ -6,11 +6,10 @@
*
* Author: Eric Biggers <ebiggers@google.com>
*/
#include <linux/linkage.h>
-#include <linux/cfi_types.h>
#define PASS0_SUMS %ymm0
#define PASS1_SUMS %ymm1
#define PASS2_SUMS %ymm2
#define PASS3_SUMS %ymm3
@@ -68,11 +67,11 @@
* void nh_avx2(const u32 *key, const u8 *message, size_t message_len,
* __le64 hash[NH_NUM_PASSES])
*
* It's guaranteed that message_len % 16 == 0.
*/
-SYM_TYPED_FUNC_START(nh_avx2)
+SYM_FUNC_START(nh_avx2)
vmovdqu 0x00(KEY), K0
vmovdqu 0x10(KEY), K1
add $0x20, KEY
vpxor PASS0_SUMS, PASS0_SUMS, PASS0_SUMS
diff --git a/arch/x86/crypto/nh-sse2-x86_64.S b/lib/crypto/x86/nh-sse2.S
similarity index 97%
rename from arch/x86/crypto/nh-sse2-x86_64.S
rename to lib/crypto/x86/nh-sse2.S
index 75fb994b6d17..d36c0e6d5556 100644
--- a/arch/x86/crypto/nh-sse2-x86_64.S
+++ b/lib/crypto/x86/nh-sse2.S
@@ -6,11 +6,10 @@
*
* Author: Eric Biggers <ebiggers@google.com>
*/
#include <linux/linkage.h>
-#include <linux/cfi_types.h>
#define PASS0_SUMS %xmm0
#define PASS1_SUMS %xmm1
#define PASS2_SUMS %xmm2
#define PASS3_SUMS %xmm3
@@ -70,11 +69,11 @@
* void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
* __le64 hash[NH_NUM_PASSES])
*
* It's guaranteed that message_len % 16 == 0.
*/
-SYM_TYPED_FUNC_START(nh_sse2)
+SYM_FUNC_START(nh_sse2)
movdqu 0x00(KEY), K0
movdqu 0x10(KEY), K1
movdqu 0x20(KEY), K2
add $0x30, KEY
diff --git a/lib/crypto/x86/nh.h b/lib/crypto/x86/nh.h
new file mode 100644
index 000000000000..83361c2e9783
--- /dev/null
+++ b/lib/crypto/x86/nh.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * x86_64 accelerated implementation of NH
+ *
+ * Copyright 2018 Google LLC
+ */
+
+#include <asm/fpu/api.h>
+#include <linux/static_call.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sse2);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_avx2);
+
+asmlinkage void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
+ __le64 hash[NH_NUM_PASSES]);
+asmlinkage void nh_avx2(const u32 *key, const u8 *message, size_t message_len,
+ __le64 hash[NH_NUM_PASSES]);
+
+static bool nh_arch(const u32 *key, const u8 *message, size_t message_len,
+ __le64 hash[NH_NUM_PASSES])
+{
+ if (message_len >= 64 && static_branch_likely(&have_sse2) &&
+ irq_fpu_usable()) {
+ kernel_fpu_begin();
+ if (static_branch_likely(&have_avx2))
+ nh_avx2(key, message, message_len, hash);
+ else
+ nh_sse2(key, message, message_len, hash);
+ kernel_fpu_end();
+ return true;
+ }
+ return false;
+}
+
+#define nh_mod_init_arch nh_mod_init_arch
+static void nh_mod_init_arch(void)
+{
+ if (boot_cpu_has(X86_FEATURE_XMM2)) {
+ static_branch_enable(&have_sse2);
+ if (boot_cpu_has(X86_FEATURE_AVX2) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+ NULL))
+ static_branch_enable(&have_avx2);
+ }
+}
--
2.52.0
next prev parent reply other threads:[~2025-12-11 1:21 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-12-11 1:18 [PATCH 00/12] NH library and Adiantum cleanup Eric Biggers
2025-12-11 1:18 ` [PATCH 01/12] lib/crypto: nh: Add NH library Eric Biggers
2025-12-11 1:18 ` [PATCH 02/12] lib/crypto: tests: Add KUnit tests for NH Eric Biggers
2026-02-26 13:12 ` Geert Uytterhoeven
2026-02-26 18:11 ` Eric Biggers
2025-12-11 1:18 ` [PATCH 03/12] lib/crypto: arm/nh: Migrate optimized code into library Eric Biggers
2025-12-11 1:18 ` [PATCH 04/12] lib/crypto: arm64/nh: " Eric Biggers
2025-12-11 1:18 ` Eric Biggers [this message]
2025-12-11 1:18 ` [PATCH 06/12] crypto: adiantum - Convert to use NH library Eric Biggers
2025-12-11 1:18 ` [PATCH 07/12] crypto: adiantum - Use scatter_walk API instead of sg_miter Eric Biggers
2025-12-11 1:18 ` [PATCH 08/12] crypto: adiantum - Use memcpy_{to,from}_sglist() Eric Biggers
2025-12-11 3:02 ` Herbert Xu
2025-12-11 1:18 ` [PATCH 09/12] crypto: adiantum - Drop support for asynchronous xchacha ciphers Eric Biggers
2025-12-11 1:18 ` [PATCH 10/12] crypto: nhpoly1305 - Remove crypto_shash support Eric Biggers
2025-12-11 3:02 ` Herbert Xu
2025-12-11 1:18 ` [PATCH 11/12] crypto: testmgr - Remove nhpoly1305 tests Eric Biggers
2025-12-11 3:03 ` Herbert Xu
2025-12-11 1:18 ` [PATCH 12/12] fscrypt: Drop obsolete recommendation to enable optimized NHPoly1305 Eric Biggers
2025-12-18 19:25 ` [PATCH 00/12] NH library and Adiantum cleanup Eric Biggers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251211011846.8179-6-ebiggers@kernel.org \
--to=ebiggers@kernel.org \
--cc=Jason@zx2c4.com \
--cc=ardb@kernel.org \
--cc=herbert@gondor.apana.org.au \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.