public inbox for linux-crypto@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/12] crypto: sha256 - Use partial block API
@ 2025-04-30  6:06 Herbert Xu
  2025-04-30  6:06 ` [PATCH 01/12] crypto: lib/sha256 - Restore lib_sha256 finup code Herbert Xu
                   ` (12 more replies)
  0 siblings, 13 replies; 17+ messages in thread
From: Herbert Xu @ 2025-04-30  6:06 UTC (permalink / raw)
  To: Linux Crypto Mailing List

This is based on

	https://patchwork.kernel.org/project/linux-crypto/list/?series=957785

Rather than going through the lib/sha256 partial block handling,
use the native shash partial block API.  Add two extra shash
algorithms to provide testing coverage for lib/sha256.

Herbert Xu (12):
  crypto: lib/sha256 - Restore lib_sha256 finup code
  crypto: sha256 - Use the partial block API for generic
  crypto: arm/sha256 - Add simd block function
  crypto: arm64/sha256 - Add simd block function
  crypto: mips/sha256 - Export block functions as GPL only
  crypto: powerpc/sha256 - Export block functions as GPL only
  crypto: riscv/sha256 - Add simd block function
  crypto: s390/sha256 - Export block functions as GPL only
  crypto: sparc/sha256 - Export block functions as GPL only
  crypto: x86/sha256 - Add simd block function
  crypto: lib/sha256 - Use generic block helper
  crypto: sha256 - Use the partial block API

 arch/arm/lib/crypto/Kconfig                   |   1 +
 arch/arm/lib/crypto/sha256-armv4.pl           |  20 +--
 arch/arm/lib/crypto/sha256.c                  |  16 +--
 arch/arm64/crypto/sha512-glue.c               |   6 +-
 arch/arm64/lib/crypto/Kconfig                 |   1 +
 arch/arm64/lib/crypto/sha2-armv8.pl           |   2 +-
 arch/arm64/lib/crypto/sha256.c                |  16 +--
 .../mips/cavium-octeon/crypto/octeon-sha256.c |   4 +-
 arch/powerpc/lib/crypto/sha256.c              |   4 +-
 arch/riscv/lib/crypto/Kconfig                 |   1 +
 arch/riscv/lib/crypto/sha256.c                |  17 ++-
 arch/s390/lib/crypto/sha256.c                 |   4 +-
 arch/sparc/lib/crypto/sha256.c                |   4 +-
 arch/x86/lib/crypto/Kconfig                   |   1 +
 arch/x86/lib/crypto/sha256.c                  |  16 ++-
 crypto/sha256.c                               | 134 +++++++++++-------
 include/crypto/internal/sha2.h                |  46 ++++++
 include/crypto/sha2.h                         |  14 +-
 lib/crypto/Kconfig                            |   8 ++
 lib/crypto/sha256.c                           | 100 +++----------
 20 files changed, 232 insertions(+), 183 deletions(-)

-- 
2.39.5


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 01/12] crypto: lib/sha256 - Restore lib_sha256 finup code
  2025-04-30  6:06 [PATCH 00/12] crypto: sha256 - Use partial block API Herbert Xu
@ 2025-04-30  6:06 ` Herbert Xu
  2025-04-30  6:06 ` [PATCH 02/12] crypto: sha256 - Use the partial block API for generic Herbert Xu
                   ` (11 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Herbert Xu @ 2025-04-30  6:06 UTC (permalink / raw)
  To: Linux Crypto Mailing List

The previous lib_sha256 finup code can process two blocks if needed,
restore it and put it into the sha256_finup helper so it can be
reused by the Crypto API.

Also add sha256_choose_blocks and CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD
so that the Crypto API can use the SIMD block function unconditionally.
The Crypto API must not be used in hard IRQs and there is no reason
to have a fallback path for hardirqs.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/sha2.h | 46 ++++++++++++++++++++++++++++++++++
 lib/crypto/Kconfig             |  8 ++++++
 lib/crypto/sha256.c            | 35 ++++++++------------------
 3 files changed, 65 insertions(+), 24 deletions(-)

diff --git a/include/crypto/internal/sha2.h b/include/crypto/internal/sha2.h
index d641c67abcbc..07e41efc6cc6 100644
--- a/include/crypto/internal/sha2.h
+++ b/include/crypto/internal/sha2.h
@@ -3,7 +3,11 @@
 #ifndef _CRYPTO_INTERNAL_SHA2_H
 #define _CRYPTO_INTERNAL_SHA2_H
 
+#include <crypto/internal/simd.h>
 #include <crypto/sha2.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/unaligned.h>
 
 void sha256_update_generic(struct sha256_state *sctx,
 			   const u8 *data, size_t len);
@@ -24,5 +28,47 @@ void sha256_blocks_generic(u32 state[SHA256_STATE_WORDS],
 			   const u8 *data, size_t nblocks);
 void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
 			const u8 *data, size_t nblocks);
+void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS],
+			const u8 *data, size_t nblocks);
+
+static inline void sha256_choose_blocks(
+	u32 state[SHA256_STATE_WORDS], const u8 *data, size_t nblocks,
+	bool force_generic, bool force_simd)
+{
+	if (!IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256) || force_generic)
+		sha256_blocks_generic(state, data, nblocks);
+	else if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD) &&
+		 (force_simd || crypto_simd_usable()))
+		sha256_blocks_simd(state, data, nblocks);
+	else
+		sha256_blocks_arch(state, data, nblocks);
+}
+
+static __always_inline void sha256_finup(
+	struct crypto_sha256_state *sctx, const u8 *src, unsigned int len,
+	u8 out[SHA256_DIGEST_SIZE], size_t digest_size, bool force_generic,
+	bool force_simd)
+{
+	unsigned int bit_offset = SHA256_BLOCK_SIZE / 8 - 1;
+	union {
+		__be64 b64[SHA256_BLOCK_SIZE / 4];
+		u8 u8[SHA256_BLOCK_SIZE * 2];
+	} block = {};
+	int blocks, i;
+
+	sctx->count += len;
+	if (len >= bit_offset * 8)
+		bit_offset += SHA256_BLOCK_SIZE / 8;
+	blocks = (bit_offset + 1) * 8 / SHA256_BLOCK_SIZE;
+	memcpy(&block, src, len);
+	block.u8[len] = 0x80;
+	block.b64[bit_offset] = cpu_to_be64(sctx->count << 3);
+	sha256_choose_blocks(sctx->state, block.u8, blocks, force_generic,
+			     force_simd);
+	memzero_explicit(&block, sizeof(block));
+
+	for (i = 0; i < digest_size; i += 4)
+		put_unaligned_be32(sctx->state[i / 4], out + i);
+}
 
 #endif /* _CRYPTO_INTERNAL_SHA2_H */
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 6319358b38c2..1ec1466108cc 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -150,6 +150,14 @@ config CRYPTO_ARCH_HAVE_LIB_SHA256
 	  Declares whether the architecture provides an arch-specific
 	  accelerated implementation of the SHA-256 library interface.
 
+config CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD
+	bool
+	help
+	  Declares whether the architecture provides an arch-specific
+	  accelerated implementation of the SHA-256 library interface
+	  that is SIMD-based and therefore not usable in hardirq
+	  context.
+
 config CRYPTO_LIB_SHA256_GENERIC
 	tristate
 	default CRYPTO_LIB_SHA256 if !CRYPTO_ARCH_HAVE_LIB_SHA256
diff --git a/lib/crypto/sha256.c b/lib/crypto/sha256.c
index 563f09c9f381..b5ffff032718 100644
--- a/lib/crypto/sha256.c
+++ b/lib/crypto/sha256.c
@@ -15,7 +15,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/string.h>
-#include <linux/unaligned.h>
 
 /*
  * If __DISABLE_EXPORTS is defined, then this file is being compiled for a
@@ -26,14 +25,16 @@
 #include "sha256-generic.c"
 #endif
 
+static inline bool sha256_purgatory(void)
+{
+	return __is_defined(__DISABLE_EXPORTS);
+}
+
 static inline void sha256_blocks(u32 state[SHA256_STATE_WORDS], const u8 *data,
 				 size_t nblocks, bool force_generic)
 {
-#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_SHA256) && !defined(__DISABLE_EXPORTS)
-	if (!force_generic)
-		return sha256_blocks_arch(state, data, nblocks);
-#endif
-	sha256_blocks_generic(state, data, nblocks);
+	sha256_choose_blocks(state, data, nblocks,
+			     force_generic || sha256_purgatory(), false);
 }
 
 static inline void __sha256_update(struct sha256_state *sctx, const u8 *data,
@@ -79,25 +80,11 @@ EXPORT_SYMBOL(sha256_update);
 static inline void __sha256_final(struct sha256_state *sctx, u8 *out,
 				  size_t digest_size, bool force_generic)
 {
-	const size_t bit_offset = SHA256_BLOCK_SIZE - sizeof(__be64);
-	__be64 *bits = (__be64 *)&sctx->buf[bit_offset];
-	size_t partial = sctx->count % SHA256_BLOCK_SIZE;
-	size_t i;
-
-	sctx->buf[partial++] = 0x80;
-	if (partial > bit_offset) {
-		memset(&sctx->buf[partial], 0, SHA256_BLOCK_SIZE - partial);
-		sha256_blocks(sctx->state, sctx->buf, 1, force_generic);
-		partial = 0;
-	}
-
-	memset(&sctx->buf[partial], 0, bit_offset - partial);
-	*bits = cpu_to_be64(sctx->count << 3);
-	sha256_blocks(sctx->state, sctx->buf, 1, force_generic);
-
-	for (i = 0; i < digest_size; i += 4)
-		put_unaligned_be32(sctx->state[i / 4], out + i);
+	unsigned int len = sctx->count % SHA256_BLOCK_SIZE;
 
+	sctx->count -= len;
+	sha256_finup(&sctx->ctx, sctx->buf, len, out, digest_size,
+		     force_generic || sha256_purgatory(), false);
 	memzero_explicit(sctx, sizeof(*sctx));
 }
 
-- 
2.39.5


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 02/12] crypto: sha256 - Use the partial block API for generic
  2025-04-30  6:06 [PATCH 00/12] crypto: sha256 - Use partial block API Herbert Xu
  2025-04-30  6:06 ` [PATCH 01/12] crypto: lib/sha256 - Restore lib_sha256 finup code Herbert Xu
@ 2025-04-30  6:06 ` Herbert Xu
  2025-04-30  6:06 ` [PATCH 03/12] crypto: arm/sha256 - Add simd block function Herbert Xu
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Herbert Xu @ 2025-04-30  6:06 UTC (permalink / raw)
  To: Linux Crypto Mailing List

The shash interface already handles partial blocks, use it for
sha224-generic and sha256-generic instead of going through the
lib/sha256 interface.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/sha256.c       | 60 +++++++++++++++++--------------------------
 include/crypto/sha2.h | 14 ++++++++--
 2 files changed, 35 insertions(+), 39 deletions(-)

diff --git a/crypto/sha256.c b/crypto/sha256.c
index c2588d08ee3e..9463c06ea39c 100644
--- a/crypto/sha256.c
+++ b/crypto/sha256.c
@@ -30,15 +30,19 @@ EXPORT_SYMBOL_GPL(sha256_zero_message_hash);
 
 static int crypto_sha256_init(struct shash_desc *desc)
 {
-	sha256_init(shash_desc_ctx(desc));
+	sha256_block_init(shash_desc_ctx(desc));
 	return 0;
 }
 
 static int crypto_sha256_update_generic(struct shash_desc *desc, const u8 *data,
 					unsigned int len)
 {
-	sha256_update_generic(shash_desc_ctx(desc), data, len);
-	return 0;
+	struct crypto_sha256_state *sctx = shash_desc_ctx(desc);
+	int remain = len - round_down(len, SHA256_BLOCK_SIZE);
+
+	sctx->count += len - remain;
+	sha256_blocks_generic(sctx->state, data, len / SHA256_BLOCK_SIZE);
+	return remain;
 }
 
 static int crypto_sha256_update_arch(struct shash_desc *desc, const u8 *data,
@@ -48,12 +52,6 @@ static int crypto_sha256_update_arch(struct shash_desc *desc, const u8 *data,
 	return 0;
 }
 
-static int crypto_sha256_final_generic(struct shash_desc *desc, u8 *out)
-{
-	sha256_final_generic(shash_desc_ctx(desc), out);
-	return 0;
-}
-
 static int crypto_sha256_final_arch(struct shash_desc *desc, u8 *out)
 {
 	sha256_final(shash_desc_ctx(desc), out);
@@ -63,10 +61,13 @@ static int crypto_sha256_final_arch(struct shash_desc *desc, u8 *out)
 static int crypto_sha256_finup_generic(struct shash_desc *desc, const u8 *data,
 				       unsigned int len, u8 *out)
 {
-	struct sha256_state *sctx = shash_desc_ctx(desc);
+	struct crypto_sha256_state *sctx = shash_desc_ctx(desc);
+	int remain = len;
 
-	sha256_update_generic(sctx, data, len);
-	sha256_final_generic(sctx, out);
+	if (remain >= SHA256_BLOCK_SIZE)
+		remain = crypto_sha256_update_generic(desc, data, remain);
+	sha256_finup(sctx, data + len - remain, remain, out,
+		     crypto_shash_digestsize(desc->tfm), true, false);
 	return 0;
 }
 
@@ -83,12 +84,8 @@ static int crypto_sha256_finup_arch(struct shash_desc *desc, const u8 *data,
 static int crypto_sha256_digest_generic(struct shash_desc *desc, const u8 *data,
 					unsigned int len, u8 *out)
 {
-	struct sha256_state *sctx = shash_desc_ctx(desc);
-
-	sha256_init(sctx);
-	sha256_update_generic(sctx, data, len);
-	sha256_final_generic(sctx, out);
-	return 0;
+	crypto_sha256_init(desc);
+	return crypto_sha256_finup_generic(desc, data, len, out);
 }
 
 static int crypto_sha256_digest_arch(struct shash_desc *desc, const u8 *data,
@@ -100,13 +97,7 @@ static int crypto_sha256_digest_arch(struct shash_desc *desc, const u8 *data,
 
 static int crypto_sha224_init(struct shash_desc *desc)
 {
-	sha224_init(shash_desc_ctx(desc));
-	return 0;
-}
-
-static int crypto_sha224_final_generic(struct shash_desc *desc, u8 *out)
-{
-	sha224_final_generic(shash_desc_ctx(desc), out);
+	sha224_block_init(shash_desc_ctx(desc));
 	return 0;
 }
 
@@ -147,35 +138,30 @@ static struct shash_alg algs[] = {
 		.base.cra_name		= "sha256",
 		.base.cra_driver_name	= "sha256-generic",
 		.base.cra_priority	= 100,
+		.base.cra_flags		= CRYPTO_AHASH_ALG_BLOCK_ONLY |
+					  CRYPTO_AHASH_ALG_FINUP_MAX,
 		.base.cra_blocksize	= SHA256_BLOCK_SIZE,
 		.base.cra_module	= THIS_MODULE,
 		.digestsize		= SHA256_DIGEST_SIZE,
 		.init			= crypto_sha256_init,
 		.update			= crypto_sha256_update_generic,
-		.final			= crypto_sha256_final_generic,
 		.finup			= crypto_sha256_finup_generic,
 		.digest			= crypto_sha256_digest_generic,
-		.descsize		= sizeof(struct sha256_state),
-		.statesize		= sizeof(struct crypto_sha256_state) +
-					  SHA256_BLOCK_SIZE + 1,
-		.import			= crypto_sha256_import_lib,
-		.export			= crypto_sha256_export_lib,
+		.descsize		= sizeof(struct crypto_sha256_state),
 	},
 	{
 		.base.cra_name		= "sha224",
 		.base.cra_driver_name	= "sha224-generic",
 		.base.cra_priority	= 100,
+		.base.cra_flags		= CRYPTO_AHASH_ALG_BLOCK_ONLY |
+					  CRYPTO_AHASH_ALG_FINUP_MAX,
 		.base.cra_blocksize	= SHA224_BLOCK_SIZE,
 		.base.cra_module	= THIS_MODULE,
 		.digestsize		= SHA224_DIGEST_SIZE,
 		.init			= crypto_sha224_init,
 		.update			= crypto_sha256_update_generic,
-		.final			= crypto_sha224_final_generic,
-		.descsize		= sizeof(struct sha256_state),
-		.statesize		= sizeof(struct crypto_sha256_state) +
-					  SHA256_BLOCK_SIZE + 1,
-		.import			= crypto_sha256_import_lib,
-		.export			= crypto_sha256_export_lib,
+		.finup			= crypto_sha256_finup_generic,
+		.descsize		= sizeof(struct crypto_sha256_state),
 	},
 	{
 		.base.cra_name		= "sha256",
diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h
index 9853cd2d1291..4912572578dc 100644
--- a/include/crypto/sha2.h
+++ b/include/crypto/sha2.h
@@ -88,7 +88,7 @@ struct sha512_state {
 	u8 buf[SHA512_BLOCK_SIZE];
 };
 
-static inline void sha256_init(struct sha256_state *sctx)
+static inline void sha256_block_init(struct crypto_sha256_state *sctx)
 {
 	sctx->state[0] = SHA256_H0;
 	sctx->state[1] = SHA256_H1;
@@ -100,11 +100,16 @@ static inline void sha256_init(struct sha256_state *sctx)
 	sctx->state[7] = SHA256_H7;
 	sctx->count = 0;
 }
+
+static inline void sha256_init(struct sha256_state *sctx)
+{
+	sha256_block_init(&sctx->ctx);
+}
 void sha256_update(struct sha256_state *sctx, const u8 *data, size_t len);
 void sha256_final(struct sha256_state *sctx, u8 out[SHA256_DIGEST_SIZE]);
 void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE]);
 
-static inline void sha224_init(struct sha256_state *sctx)
+static inline void sha224_block_init(struct crypto_sha256_state *sctx)
 {
 	sctx->state[0] = SHA224_H0;
 	sctx->state[1] = SHA224_H1;
@@ -116,6 +121,11 @@ static inline void sha224_init(struct sha256_state *sctx)
 	sctx->state[7] = SHA224_H7;
 	sctx->count = 0;
 }
+
+static inline void sha224_init(struct sha256_state *sctx)
+{
+	sha224_block_init(&sctx->ctx);
+}
 /* Simply use sha256_update as it is equivalent to sha224_update. */
 void sha224_final(struct sha256_state *sctx, u8 out[SHA224_DIGEST_SIZE]);
 
-- 
2.39.5


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 03/12] crypto: arm/sha256 - Add simd block function
  2025-04-30  6:06 [PATCH 00/12] crypto: sha256 - Use partial block API Herbert Xu
  2025-04-30  6:06 ` [PATCH 01/12] crypto: lib/sha256 - Restore lib_sha256 finup code Herbert Xu
  2025-04-30  6:06 ` [PATCH 02/12] crypto: sha256 - Use the partial block API for generic Herbert Xu
@ 2025-04-30  6:06 ` Herbert Xu
  2025-04-30  6:06 ` [PATCH 04/12] crypto: arm64/sha256 " Herbert Xu
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Herbert Xu @ 2025-04-30  6:06 UTC (permalink / raw)
  To: Linux Crypto Mailing List

Add CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD and a SIMD block function
so that the caller can decide whether to use SIMD.

Also export the block functions as GPL only, there is no reason
to let arbitrary modules use these internal functions.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/lib/crypto/Kconfig         |  1 +
 arch/arm/lib/crypto/sha256-armv4.pl | 20 ++++++++++----------
 arch/arm/lib/crypto/sha256.c        | 16 ++++++++--------
 3 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/arch/arm/lib/crypto/Kconfig b/arch/arm/lib/crypto/Kconfig
index 9f3ff30f4032..d1ad664f0c67 100644
--- a/arch/arm/lib/crypto/Kconfig
+++ b/arch/arm/lib/crypto/Kconfig
@@ -28,3 +28,4 @@ config CRYPTO_SHA256_ARM
 	depends on !CPU_V7M
 	default CRYPTO_LIB_SHA256
 	select CRYPTO_ARCH_HAVE_LIB_SHA256
+	select CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD
diff --git a/arch/arm/lib/crypto/sha256-armv4.pl b/arch/arm/lib/crypto/sha256-armv4.pl
index f3a2b54efd4e..8122db7fd599 100644
--- a/arch/arm/lib/crypto/sha256-armv4.pl
+++ b/arch/arm/lib/crypto/sha256-armv4.pl
@@ -204,18 +204,18 @@ K256:
 .word	0				@ terminator
 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
 .LOPENSSL_armcap:
-.word	OPENSSL_armcap_P-sha256_block_data_order
+.word	OPENSSL_armcap_P-sha256_blocks_arch
 #endif
 .align	5
 
-.global	sha256_block_data_order
-.type	sha256_block_data_order,%function
-sha256_block_data_order:
-.Lsha256_block_data_order:
+.global	sha256_blocks_arch
+.type	sha256_blocks_arch,%function
+sha256_blocks_arch:
+.Lsha256_blocks_arch:
 #if __ARM_ARCH__<7
-	sub	r3,pc,#8		@ sha256_block_data_order
+	sub	r3,pc,#8		@ sha256_blocks_arch
 #else
-	adr	r3,.Lsha256_block_data_order
+	adr	r3,.Lsha256_blocks_arch
 #endif
 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
 	ldr	r12,.LOPENSSL_armcap
@@ -282,7 +282,7 @@ $code.=<<___;
 	moveq	pc,lr			@ be binary compatible with V4, yet
 	bx	lr			@ interoperable with Thumb ISA:-)
 #endif
-.size	sha256_block_data_order,.-sha256_block_data_order
+.size	sha256_blocks_arch,.-sha256_blocks_arch
 ___
 ######################################################################
 # NEON stuff
@@ -470,8 +470,8 @@ sha256_block_data_order_neon:
 	stmdb	sp!,{r4-r12,lr}
 
 	sub	$H,sp,#16*4+16
-	adr	$Ktbl,.Lsha256_block_data_order
-	sub	$Ktbl,$Ktbl,#.Lsha256_block_data_order-K256
+	adr	$Ktbl,.Lsha256_blocks_arch
+	sub	$Ktbl,$Ktbl,#.Lsha256_blocks_arch-K256
 	bic	$H,$H,#15		@ align for 128-bit stores
 	mov	$t2,sp
 	mov	sp,$H			@ alloca
diff --git a/arch/arm/lib/crypto/sha256.c b/arch/arm/lib/crypto/sha256.c
index 3a8dfc304807..1dd71b8fd611 100644
--- a/arch/arm/lib/crypto/sha256.c
+++ b/arch/arm/lib/crypto/sha256.c
@@ -6,12 +6,12 @@
  */
 #include <asm/neon.h>
 #include <crypto/internal/sha2.h>
-#include <crypto/internal/simd.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 
-asmlinkage void sha256_block_data_order(u32 state[SHA256_STATE_WORDS],
-					const u8 *data, size_t nblocks);
+asmlinkage void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
+				   const u8 *data, size_t nblocks);
+EXPORT_SYMBOL_GPL(sha256_blocks_arch);
 asmlinkage void sha256_block_data_order_neon(u32 state[SHA256_STATE_WORDS],
 					     const u8 *data, size_t nblocks);
 asmlinkage void sha256_ce_transform(u32 state[SHA256_STATE_WORDS],
@@ -20,11 +20,11 @@ asmlinkage void sha256_ce_transform(u32 state[SHA256_STATE_WORDS],
 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce);
 
-void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
+void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS],
 			const u8 *data, size_t nblocks)
 {
 	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
-	    static_branch_likely(&have_neon) && crypto_simd_usable()) {
+	    static_branch_likely(&have_neon)) {
 		kernel_neon_begin();
 		if (static_branch_likely(&have_ce))
 			sha256_ce_transform(state, data, nblocks);
@@ -32,17 +32,17 @@ void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
 			sha256_block_data_order_neon(state, data, nblocks);
 		kernel_neon_end();
 	} else {
-		sha256_block_data_order(state, data, nblocks);
+		sha256_blocks_arch(state, data, nblocks);
 	}
 }
-EXPORT_SYMBOL(sha256_blocks_arch);
+EXPORT_SYMBOL_GPL(sha256_blocks_simd);
 
 bool sha256_is_arch_optimized(void)
 {
 	/* We always can use at least the ARM scalar implementation. */
 	return true;
 }
-EXPORT_SYMBOL(sha256_is_arch_optimized);
+EXPORT_SYMBOL_GPL(sha256_is_arch_optimized);
 
 static int __init sha256_arm_mod_init(void)
 {
-- 
2.39.5


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 04/12] crypto: arm64/sha256 - Add simd block function
  2025-04-30  6:06 [PATCH 00/12] crypto: sha256 - Use partial block API Herbert Xu
                   ` (2 preceding siblings ...)
  2025-04-30  6:06 ` [PATCH 03/12] crypto: arm/sha256 - Add simd block function Herbert Xu
@ 2025-04-30  6:06 ` Herbert Xu
  2025-04-30  6:06 ` [PATCH 05/12] crypto: mips/sha256 - Export block functions as GPL only Herbert Xu
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Herbert Xu @ 2025-04-30  6:06 UTC (permalink / raw)
  To: Linux Crypto Mailing List

Add CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD and a SIMD block function
so that the caller can decide whether to use SIMD.

Also export the block functions as GPL only, there is no reason
to let arbitrary modules use these internal functions.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/sha512-glue.c     |  6 +++---
 arch/arm64/lib/crypto/Kconfig       |  1 +
 arch/arm64/lib/crypto/sha2-armv8.pl |  2 +-
 arch/arm64/lib/crypto/sha256.c      | 16 ++++++++--------
 4 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/crypto/sha512-glue.c b/arch/arm64/crypto/sha512-glue.c
index ab2e1c13dfad..15aa9d8b7b2c 100644
--- a/arch/arm64/crypto/sha512-glue.c
+++ b/arch/arm64/crypto/sha512-glue.c
@@ -18,13 +18,13 @@ MODULE_LICENSE("GPL v2");
 MODULE_ALIAS_CRYPTO("sha384");
 MODULE_ALIAS_CRYPTO("sha512");
 
-asmlinkage void sha512_block_data_order(u64 *digest, const void *data,
-					unsigned int num_blks);
+asmlinkage void sha512_blocks_arch(u64 *digest, const void *data,
+				   unsigned int num_blks);
 
 static void sha512_arm64_transform(struct sha512_state *sst, u8 const *src,
 				   int blocks)
 {
-	sha512_block_data_order(sst->state, src, blocks);
+	sha512_blocks_arch(sst->state, src, blocks);
 }
 
 static int sha512_update(struct shash_desc *desc, const u8 *data,
diff --git a/arch/arm64/lib/crypto/Kconfig b/arch/arm64/lib/crypto/Kconfig
index 49e57bfdb5b5..129a7685cb4c 100644
--- a/arch/arm64/lib/crypto/Kconfig
+++ b/arch/arm64/lib/crypto/Kconfig
@@ -17,3 +17,4 @@ config CRYPTO_SHA256_ARM64
 	tristate
 	default CRYPTO_LIB_SHA256
 	select CRYPTO_ARCH_HAVE_LIB_SHA256
+	select CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD
diff --git a/arch/arm64/lib/crypto/sha2-armv8.pl b/arch/arm64/lib/crypto/sha2-armv8.pl
index 35ec9ae99fe1..4aebd20c498b 100644
--- a/arch/arm64/lib/crypto/sha2-armv8.pl
+++ b/arch/arm64/lib/crypto/sha2-armv8.pl
@@ -95,7 +95,7 @@ if ($output =~ /512/) {
 	$reg_t="w";
 }
 
-$func="sha${BITS}_block_data_order";
+$func="sha${BITS}_blocks_arch";
 
 ($ctx,$inp,$num,$Ktbl)=map("x$_",(0..2,30));
 
diff --git a/arch/arm64/lib/crypto/sha256.c b/arch/arm64/lib/crypto/sha256.c
index 2bd413c586d2..fdceb2d0899c 100644
--- a/arch/arm64/lib/crypto/sha256.c
+++ b/arch/arm64/lib/crypto/sha256.c
@@ -6,12 +6,12 @@
  */
 #include <asm/neon.h>
 #include <crypto/internal/sha2.h>
-#include <crypto/internal/simd.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 
-asmlinkage void sha256_block_data_order(u32 state[SHA256_STATE_WORDS],
-					const u8 *data, size_t nblocks);
+asmlinkage void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
+				   const u8 *data, size_t nblocks);
+EXPORT_SYMBOL_GPL(sha256_blocks_arch);
 asmlinkage void sha256_block_neon(u32 state[SHA256_STATE_WORDS],
 				  const u8 *data, size_t nblocks);
 asmlinkage size_t __sha256_ce_transform(u32 state[SHA256_STATE_WORDS],
@@ -20,11 +20,11 @@ asmlinkage size_t __sha256_ce_transform(u32 state[SHA256_STATE_WORDS],
 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce);
 
-void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
+void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS],
 			const u8 *data, size_t nblocks)
 {
 	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
-	    static_branch_likely(&have_neon) && crypto_simd_usable()) {
+	    static_branch_likely(&have_neon)) {
 		if (static_branch_likely(&have_ce)) {
 			do {
 				size_t rem;
@@ -42,17 +42,17 @@ void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
 			kernel_neon_end();
 		}
 	} else {
-		sha256_block_data_order(state, data, nblocks);
+		sha256_blocks_arch(state, data, nblocks);
 	}
 }
-EXPORT_SYMBOL(sha256_blocks_arch);
+EXPORT_SYMBOL_GPL(sha256_blocks_simd);
 
 bool sha256_is_arch_optimized(void)
 {
 	/* We always can use at least the ARM64 scalar implementation. */
 	return true;
 }
-EXPORT_SYMBOL(sha256_is_arch_optimized);
+EXPORT_SYMBOL_GPL(sha256_is_arch_optimized);
 
 static int __init sha256_arm64_mod_init(void)
 {
-- 
2.39.5


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 05/12] crypto: mips/sha256 - Export block functions as GPL only
  2025-04-30  6:06 [PATCH 00/12] crypto: sha256 - Use partial block API Herbert Xu
                   ` (3 preceding siblings ...)
  2025-04-30  6:06 ` [PATCH 04/12] crypto: arm64/sha256 " Herbert Xu
@ 2025-04-30  6:06 ` Herbert Xu
  2025-04-30  6:06 ` [PATCH 06/12] crypto: powerpc/sha256 " Herbert Xu
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Herbert Xu @ 2025-04-30  6:06 UTC (permalink / raw)
  To: Linux Crypto Mailing List

Export the block functions as GPL only, there is no reason
to let arbitrary modules use these internal functions.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/mips/cavium-octeon/crypto/octeon-sha256.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha256.c b/arch/mips/cavium-octeon/crypto/octeon-sha256.c
index f169054852bc..f93faaf1f4af 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-sha256.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-sha256.c
@@ -60,13 +60,13 @@ void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
 	state64[3] = read_octeon_64bit_hash_dword(3);
 	octeon_crypto_disable(&cop2_state, flags);
 }
-EXPORT_SYMBOL(sha256_blocks_arch);
+EXPORT_SYMBOL_GPL(sha256_blocks_arch);
 
 bool sha256_is_arch_optimized(void)
 {
 	return octeon_has_crypto();
 }
-EXPORT_SYMBOL(sha256_is_arch_optimized);
+EXPORT_SYMBOL_GPL(sha256_is_arch_optimized);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA-256 Secure Hash Algorithm (OCTEON)");
-- 
2.39.5


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 06/12] crypto: powerpc/sha256 - Export block functions as GPL only
  2025-04-30  6:06 [PATCH 00/12] crypto: sha256 - Use partial block API Herbert Xu
                   ` (4 preceding siblings ...)
  2025-04-30  6:06 ` [PATCH 05/12] crypto: mips/sha256 - Export block functions as GPL only Herbert Xu
@ 2025-04-30  6:06 ` Herbert Xu
  2025-04-30  6:06 ` [PATCH 07/12] crypto: riscv/sha256 - Add simd block function Herbert Xu
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Herbert Xu @ 2025-04-30  6:06 UTC (permalink / raw)
  To: Linux Crypto Mailing List

Export the block functions as GPL only, there is no reason
to let arbitrary modules use these internal functions.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/powerpc/lib/crypto/sha256.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/lib/crypto/sha256.c b/arch/powerpc/lib/crypto/sha256.c
index c05023c5acdd..6b0f079587eb 100644
--- a/arch/powerpc/lib/crypto/sha256.c
+++ b/arch/powerpc/lib/crypto/sha256.c
@@ -58,13 +58,13 @@ void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
 		nblocks -= unit;
 	} while (nblocks);
 }
-EXPORT_SYMBOL(sha256_blocks_arch);
+EXPORT_SYMBOL_GPL(sha256_blocks_arch);
 
 bool sha256_is_arch_optimized(void)
 {
 	return true;
 }
-EXPORT_SYMBOL(sha256_is_arch_optimized);
+EXPORT_SYMBOL_GPL(sha256_is_arch_optimized);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA-256 Secure Hash Algorithm, SPE optimized");
-- 
2.39.5


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 07/12] crypto: riscv/sha256 - Add simd block function
  2025-04-30  6:06 [PATCH 00/12] crypto: sha256 - Use partial block API Herbert Xu
                   ` (5 preceding siblings ...)
  2025-04-30  6:06 ` [PATCH 06/12] crypto: powerpc/sha256 " Herbert Xu
@ 2025-04-30  6:06 ` Herbert Xu
  2025-04-30  6:06 ` [PATCH 08/12] crypto: s390/sha256 - Export block functions as GPL only Herbert Xu
                   ` (5 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Herbert Xu @ 2025-04-30  6:06 UTC (permalink / raw)
  To: Linux Crypto Mailing List

Add CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD and a SIMD block function
so that the caller can decide whether to use SIMD.

Also export the block functions as GPL only, there is no reason
to let arbitrary modules use these internal functions.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/riscv/lib/crypto/Kconfig  |  1 +
 arch/riscv/lib/crypto/sha256.c | 17 +++++++++++------
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/arch/riscv/lib/crypto/Kconfig b/arch/riscv/lib/crypto/Kconfig
index c100571feb7e..47c99ea97ce2 100644
--- a/arch/riscv/lib/crypto/Kconfig
+++ b/arch/riscv/lib/crypto/Kconfig
@@ -12,4 +12,5 @@ config CRYPTO_SHA256_RISCV64
 	depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
 	default CRYPTO_LIB_SHA256
 	select CRYPTO_ARCH_HAVE_LIB_SHA256
+	select CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD
 	select CRYPTO_LIB_SHA256_GENERIC
diff --git a/arch/riscv/lib/crypto/sha256.c b/arch/riscv/lib/crypto/sha256.c
index 18b84030f0b3..c1358eafc2ad 100644
--- a/arch/riscv/lib/crypto/sha256.c
+++ b/arch/riscv/lib/crypto/sha256.c
@@ -9,10 +9,8 @@
  * Author: Jerry Shih <jerry.shih@sifive.com>
  */
 
-#include <asm/simd.h>
 #include <asm/vector.h>
 #include <crypto/internal/sha2.h>
-#include <crypto/internal/simd.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 
@@ -21,10 +19,10 @@ asmlinkage void sha256_transform_zvknha_or_zvknhb_zvkb(
 
 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_extensions);
 
-void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
+void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS],
 			const u8 *data, size_t nblocks)
 {
-	if (static_branch_likely(&have_extensions) && crypto_simd_usable()) {
+	if (static_branch_likely(&have_extensions)) {
 		kernel_vector_begin();
 		sha256_transform_zvknha_or_zvknhb_zvkb(state, data, nblocks);
 		kernel_vector_end();
@@ -32,13 +30,20 @@ void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
 		sha256_blocks_generic(state, data, nblocks);
 	}
 }
-EXPORT_SYMBOL(sha256_blocks_arch);
+EXPORT_SYMBOL_GPL(sha256_blocks_simd);
+
+void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
+			const u8 *data, size_t nblocks)
+{
+	sha256_blocks_generic(state, data, nblocks);
+}
+EXPORT_SYMBOL_GPL(sha256_blocks_arch);
 
 bool sha256_is_arch_optimized(void)
 {
 	return static_key_enabled(&have_extensions);
 }
-EXPORT_SYMBOL(sha256_is_arch_optimized);
+EXPORT_SYMBOL_GPL(sha256_is_arch_optimized);
 
 static int __init riscv64_sha256_mod_init(void)
 {
-- 
2.39.5


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 08/12] crypto: s390/sha256 - Export block functions as GPL only
  2025-04-30  6:06 [PATCH 00/12] crypto: sha256 - Use partial block API Herbert Xu
                   ` (6 preceding siblings ...)
  2025-04-30  6:06 ` [PATCH 07/12] crypto: riscv/sha256 - Add simd block function Herbert Xu
@ 2025-04-30  6:06 ` Herbert Xu
  2025-04-30  6:06 ` [PATCH 09/12] crypto: sparc/sha256 " Herbert Xu
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Herbert Xu @ 2025-04-30  6:06 UTC (permalink / raw)
  To: Linux Crypto Mailing List

Export the block functions as GPL only, there is no reason
to let arbitrary modules use these internal functions.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/lib/crypto/sha256.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/s390/lib/crypto/sha256.c b/arch/s390/lib/crypto/sha256.c
index 50c592ce7a5d..fcfa2706a7f9 100644
--- a/arch/s390/lib/crypto/sha256.c
+++ b/arch/s390/lib/crypto/sha256.c
@@ -21,13 +21,13 @@ void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
 	else
 		sha256_blocks_generic(state, data, nblocks);
 }
-EXPORT_SYMBOL(sha256_blocks_arch);
+EXPORT_SYMBOL_GPL(sha256_blocks_arch);
 
 bool sha256_is_arch_optimized(void)
 {
 	return static_key_enabled(&have_cpacf_sha256);
 }
-EXPORT_SYMBOL(sha256_is_arch_optimized);
+EXPORT_SYMBOL_GPL(sha256_is_arch_optimized);
 
 static int __init sha256_s390_mod_init(void)
 {
-- 
2.39.5


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 09/12] crypto: sparc/sha256 - Export block functions as GPL only
  2025-04-30  6:06 [PATCH 00/12] crypto: sha256 - Use partial block API Herbert Xu
                   ` (7 preceding siblings ...)
  2025-04-30  6:06 ` [PATCH 08/12] crypto: s390/sha256 - Export block functions as GPL only Herbert Xu
@ 2025-04-30  6:06 ` Herbert Xu
  2025-04-30  6:06 ` [PATCH 10/12] crypto: x86/sha256 - Add simd block function Herbert Xu
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Herbert Xu @ 2025-04-30  6:06 UTC (permalink / raw)
  To: Linux Crypto Mailing List

Export the block functions as GPL only, there is no reason
to let arbitrary modules use these internal functions.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/sparc/lib/crypto/sha256.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/sparc/lib/crypto/sha256.c b/arch/sparc/lib/crypto/sha256.c
index 6f118a23d210..b4fc475dcc40 100644
--- a/arch/sparc/lib/crypto/sha256.c
+++ b/arch/sparc/lib/crypto/sha256.c
@@ -30,13 +30,13 @@ void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
 	else
 		sha256_blocks_generic(state, data, nblocks);
 }
-EXPORT_SYMBOL(sha256_blocks_arch);
+EXPORT_SYMBOL_GPL(sha256_blocks_arch);
 
 bool sha256_is_arch_optimized(void)
 {
 	return static_key_enabled(&have_sha256_opcodes);
 }
-EXPORT_SYMBOL(sha256_is_arch_optimized);
+EXPORT_SYMBOL_GPL(sha256_is_arch_optimized);
 
 static int __init sha256_sparc64_mod_init(void)
 {
-- 
2.39.5


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 10/12] crypto: x86/sha256 - Add simd block function
  2025-04-30  6:06 [PATCH 00/12] crypto: sha256 - Use partial block API Herbert Xu
                   ` (8 preceding siblings ...)
  2025-04-30  6:06 ` [PATCH 09/12] crypto: sparc/sha256 " Herbert Xu
@ 2025-04-30  6:06 ` Herbert Xu
  2025-04-30  6:06 ` [PATCH 11/12] crypto: lib/sha256 - Use generic block helper Herbert Xu
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Herbert Xu @ 2025-04-30  6:06 UTC (permalink / raw)
  To: Linux Crypto Mailing List

Add CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD and a SIMD block function
so that the caller can decide whether to use SIMD.

Also export the block functions as GPL only, there is no reason
to let arbitrary modules use these internal functions.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/lib/crypto/Kconfig  |  1 +
 arch/x86/lib/crypto/sha256.c | 16 +++++++++++-----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/x86/lib/crypto/Kconfig b/arch/x86/lib/crypto/Kconfig
index e344579db3d8..5e94cdee492c 100644
--- a/arch/x86/lib/crypto/Kconfig
+++ b/arch/x86/lib/crypto/Kconfig
@@ -30,4 +30,5 @@ config CRYPTO_SHA256_X86_64
 	depends on 64BIT
 	default CRYPTO_LIB_SHA256
 	select CRYPTO_ARCH_HAVE_LIB_SHA256
+	select CRYPTO_ARCH_HAVE_LIB_SHA256_SIMD
 	select CRYPTO_LIB_SHA256_GENERIC
diff --git a/arch/x86/lib/crypto/sha256.c b/arch/x86/lib/crypto/sha256.c
index 47865b5cd94b..cdd88497eedf 100644
--- a/arch/x86/lib/crypto/sha256.c
+++ b/arch/x86/lib/crypto/sha256.c
@@ -6,7 +6,6 @@
  */
 #include <asm/fpu/api.h>
 #include <crypto/internal/sha2.h>
-#include <crypto/internal/simd.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/static_call.h>
@@ -24,10 +23,10 @@ static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha256_x86);
 
 DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_transform_ssse3);
 
-void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
+void sha256_blocks_simd(u32 state[SHA256_STATE_WORDS],
 			const u8 *data, size_t nblocks)
 {
-	if (static_branch_likely(&have_sha256_x86) && crypto_simd_usable()) {
+	if (static_branch_likely(&have_sha256_x86)) {
 		kernel_fpu_begin();
 		static_call(sha256_blocks_x86)(state, data, nblocks);
 		kernel_fpu_end();
@@ -35,13 +34,20 @@ void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
 		sha256_blocks_generic(state, data, nblocks);
 	}
 }
-EXPORT_SYMBOL(sha256_blocks_arch);
+EXPORT_SYMBOL_GPL(sha256_blocks_simd);
+
+void sha256_blocks_arch(u32 state[SHA256_STATE_WORDS],
+			const u8 *data, size_t nblocks)
+{
+	sha256_blocks_generic(state, data, nblocks);
+}
+EXPORT_SYMBOL_GPL(sha256_blocks_arch);
 
 bool sha256_is_arch_optimized(void)
 {
 	return static_key_enabled(&have_sha256_x86);
 }
-EXPORT_SYMBOL(sha256_is_arch_optimized);
+EXPORT_SYMBOL_GPL(sha256_is_arch_optimized);
 
 static int __init sha256_x86_mod_init(void)
 {
-- 
2.39.5


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 11/12] crypto: lib/sha256 - Use generic block helper
  2025-04-30  6:06 [PATCH 00/12] crypto: sha256 - Use partial block API Herbert Xu
                   ` (9 preceding siblings ...)
  2025-04-30  6:06 ` [PATCH 10/12] crypto: x86/sha256 - Add simd block function Herbert Xu
@ 2025-04-30  6:06 ` Herbert Xu
  2025-04-30  6:06 ` [PATCH 12/12] crypto: sha256 - Use the partial block API Herbert Xu
  2025-04-30 17:45 ` [PATCH 00/12] crypto: sha256 - Use " Eric Biggers
  12 siblings, 0 replies; 17+ messages in thread
From: Herbert Xu @ 2025-04-30  6:06 UTC (permalink / raw)
  To: Linux Crypto Mailing List

Use the BLOCK_HASH_UPDATE_BLOCKS helper instead of duplicating
partial block handling.

Also remove the unused lib/sha256 force-generic interface.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 lib/crypto/sha256.c | 75 ++++++++-------------------------------------
 1 file changed, 12 insertions(+), 63 deletions(-)

diff --git a/lib/crypto/sha256.c b/lib/crypto/sha256.c
index b5ffff032718..35019d3b2874 100644
--- a/lib/crypto/sha256.c
+++ b/lib/crypto/sha256.c
@@ -11,6 +11,7 @@
  * Copyright (c) 2014 Red Hat Inc.
  */
 
+#include <crypto/internal/blockhash.h>
 #include <crypto/internal/sha2.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -31,72 +32,41 @@ static inline bool sha256_purgatory(void)
 }
 
 static inline void sha256_blocks(u32 state[SHA256_STATE_WORDS], const u8 *data,
-				 size_t nblocks, bool force_generic)
+				 size_t nblocks)
 {
-	sha256_choose_blocks(state, data, nblocks,
-			     force_generic || sha256_purgatory(), false);
-}
-
-static inline void __sha256_update(struct sha256_state *sctx, const u8 *data,
-				   size_t len, bool force_generic)
-{
-	size_t partial = sctx->count % SHA256_BLOCK_SIZE;
-
-	sctx->count += len;
-
-	if (partial + len >= SHA256_BLOCK_SIZE) {
-		size_t nblocks;
-
-		if (partial) {
-			size_t l = SHA256_BLOCK_SIZE - partial;
-
-			memcpy(&sctx->buf[partial], data, l);
-			data += l;
-			len -= l;
-
-			sha256_blocks(sctx->state, sctx->buf, 1, force_generic);
-		}
-
-		nblocks = len / SHA256_BLOCK_SIZE;
-		len %= SHA256_BLOCK_SIZE;
-
-		if (nblocks) {
-			sha256_blocks(sctx->state, data, nblocks,
-				      force_generic);
-			data += nblocks * SHA256_BLOCK_SIZE;
-		}
-		partial = 0;
-	}
-	if (len)
-		memcpy(&sctx->buf[partial], data, len);
+	sha256_choose_blocks(state, data, nblocks, sha256_purgatory(), false);
 }
 
 void sha256_update(struct sha256_state *sctx, const u8 *data, size_t len)
 {
-	__sha256_update(sctx, data, len, false);
+	size_t partial = sctx->count % SHA256_BLOCK_SIZE;
+
+	sctx->count += len;
+	BLOCK_HASH_UPDATE_BLOCKS(sha256_blocks, sctx->ctx.state, data, len,
+				 SHA256_BLOCK_SIZE, sctx->buf, partial);
 }
 EXPORT_SYMBOL(sha256_update);
 
 static inline void __sha256_final(struct sha256_state *sctx, u8 *out,
-				  size_t digest_size, bool force_generic)
+				  size_t digest_size)
 {
 	unsigned int len = sctx->count % SHA256_BLOCK_SIZE;
 
 	sctx->count -= len;
 	sha256_finup(&sctx->ctx, sctx->buf, len, out, digest_size,
-		     force_generic || sha256_purgatory(), false);
+		     sha256_purgatory(), false);
 	memzero_explicit(sctx, sizeof(*sctx));
 }
 
 void sha256_final(struct sha256_state *sctx, u8 out[SHA256_DIGEST_SIZE])
 {
-	__sha256_final(sctx, out, SHA256_DIGEST_SIZE, false);
+	__sha256_final(sctx, out, SHA256_DIGEST_SIZE);
 }
 EXPORT_SYMBOL(sha256_final);
 
 void sha224_final(struct sha256_state *sctx, u8 out[SHA224_DIGEST_SIZE])
 {
-	__sha256_final(sctx, out, SHA224_DIGEST_SIZE, false);
+	__sha256_final(sctx, out, SHA224_DIGEST_SIZE);
 }
 EXPORT_SYMBOL(sha224_final);
 
@@ -110,26 +80,5 @@ void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE])
 }
 EXPORT_SYMBOL(sha256);
 
-#if IS_ENABLED(CONFIG_CRYPTO_SHA256) && !defined(__DISABLE_EXPORTS)
-void sha256_update_generic(struct sha256_state *sctx,
-			   const u8 *data, size_t len)
-{
-	__sha256_update(sctx, data, len, true);
-}
-EXPORT_SYMBOL(sha256_update_generic);
-
-void sha256_final_generic(struct sha256_state *sctx, u8 out[SHA256_DIGEST_SIZE])
-{
-	__sha256_final(sctx, out, SHA256_DIGEST_SIZE, true);
-}
-EXPORT_SYMBOL(sha256_final_generic);
-
-void sha224_final_generic(struct sha256_state *sctx, u8 out[SHA224_DIGEST_SIZE])
-{
-	__sha256_final(sctx, out, SHA224_DIGEST_SIZE, true);
-}
-EXPORT_SYMBOL(sha224_final_generic);
-#endif
-
 MODULE_DESCRIPTION("SHA-256 Algorithm");
 MODULE_LICENSE("GPL");
-- 
2.39.5


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 12/12] crypto: sha256 - Use the partial block API
  2025-04-30  6:06 [PATCH 00/12] crypto: sha256 - Use partial block API Herbert Xu
                   ` (10 preceding siblings ...)
  2025-04-30  6:06 ` [PATCH 11/12] crypto: lib/sha256 - Use generic block helper Herbert Xu
@ 2025-04-30  6:06 ` Herbert Xu
  2025-04-30 17:45 ` [PATCH 00/12] crypto: sha256 - Use " Eric Biggers
  12 siblings, 0 replies; 17+ messages in thread
From: Herbert Xu @ 2025-04-30  6:06 UTC (permalink / raw)
  To: Linux Crypto Mailing List

Use the shash partial block API by default.  Add a separate set
of lib shash algorithms to preserve testing coverage until lib/sha256
has its own tests.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/sha256.c | 90 +++++++++++++++++++++++++++++++++++++------------
 1 file changed, 69 insertions(+), 21 deletions(-)

diff --git a/crypto/sha256.c b/crypto/sha256.c
index 9463c06ea39c..1068c206247f 100644
--- a/crypto/sha256.c
+++ b/crypto/sha256.c
@@ -45,14 +45,26 @@ static int crypto_sha256_update_generic(struct shash_desc *desc, const u8 *data,
 	return remain;
 }
 
-static int crypto_sha256_update_arch(struct shash_desc *desc, const u8 *data,
-				     unsigned int len)
+static int crypto_sha256_update_lib(struct shash_desc *desc, const u8 *data,
+				    unsigned int len)
 {
 	sha256_update(shash_desc_ctx(desc), data, len);
 	return 0;
 }
 
-static int crypto_sha256_final_arch(struct shash_desc *desc, u8 *out)
+static int crypto_sha256_update_arch(struct shash_desc *desc, const u8 *data,
+				     unsigned int len)
+{
+	struct crypto_sha256_state *sctx = shash_desc_ctx(desc);
+	int remain = len - round_down(len, SHA256_BLOCK_SIZE);
+
+	sctx->count += len - remain;
+	sha256_choose_blocks(sctx->state, data, len / SHA256_BLOCK_SIZE,
+			     false, true);
+	return remain;
+}
+
+static int crypto_sha256_final_lib(struct shash_desc *desc, u8 *out)
 {
 	sha256_final(shash_desc_ctx(desc), out);
 	return 0;
@@ -74,10 +86,13 @@ static int crypto_sha256_finup_generic(struct shash_desc *desc, const u8 *data,
 static int crypto_sha256_finup_arch(struct shash_desc *desc, const u8 *data,
 				    unsigned int len, u8 *out)
 {
-	struct sha256_state *sctx = shash_desc_ctx(desc);
+	struct crypto_sha256_state *sctx = shash_desc_ctx(desc);
+	int remain = len;
 
-	sha256_update(sctx, data, len);
-	sha256_final(sctx, out);
+	if (remain >= SHA256_BLOCK_SIZE)
+		remain = crypto_sha256_update_arch(desc, data, remain);
+	sha256_finup(sctx, data + len - remain, remain, out,
+		     crypto_shash_digestsize(desc->tfm), false, true);
 	return 0;
 }
 
@@ -88,20 +103,27 @@ static int crypto_sha256_digest_generic(struct shash_desc *desc, const u8 *data,
 	return crypto_sha256_finup_generic(desc, data, len, out);
 }
 
-static int crypto_sha256_digest_arch(struct shash_desc *desc, const u8 *data,
-				     unsigned int len, u8 *out)
+static int crypto_sha256_digest_lib(struct shash_desc *desc, const u8 *data,
+				    unsigned int len, u8 *out)
 {
 	sha256(data, len, out);
 	return 0;
 }
 
+static int crypto_sha256_digest_arch(struct shash_desc *desc, const u8 *data,
+				     unsigned int len, u8 *out)
+{
+	crypto_sha256_init(desc);
+	return crypto_sha256_finup_arch(desc, data, len, out);
+}
+
 static int crypto_sha224_init(struct shash_desc *desc)
 {
 	sha224_block_init(shash_desc_ctx(desc));
 	return 0;
 }
 
-static int crypto_sha224_final_arch(struct shash_desc *desc, u8 *out)
+static int crypto_sha224_final_lib(struct shash_desc *desc, u8 *out)
 {
 	sha224_final(shash_desc_ctx(desc), out);
 	return 0;
@@ -165,16 +187,14 @@ static struct shash_alg algs[] = {
 	},
 	{
 		.base.cra_name		= "sha256",
-		.base.cra_driver_name	= "sha256-" __stringify(ARCH),
-		.base.cra_priority	= 300,
+		.base.cra_driver_name	= "sha256-lib",
 		.base.cra_blocksize	= SHA256_BLOCK_SIZE,
 		.base.cra_module	= THIS_MODULE,
 		.digestsize		= SHA256_DIGEST_SIZE,
 		.init			= crypto_sha256_init,
-		.update			= crypto_sha256_update_arch,
-		.final			= crypto_sha256_final_arch,
-		.finup			= crypto_sha256_finup_arch,
-		.digest			= crypto_sha256_digest_arch,
+		.update			= crypto_sha256_update_lib,
+		.final			= crypto_sha256_final_lib,
+		.digest			= crypto_sha256_digest_lib,
 		.descsize		= sizeof(struct sha256_state),
 		.statesize		= sizeof(struct crypto_sha256_state) +
 					  SHA256_BLOCK_SIZE + 1,
@@ -183,20 +203,48 @@ static struct shash_alg algs[] = {
 	},
 	{
 		.base.cra_name		= "sha224",
-		.base.cra_driver_name	= "sha224-" __stringify(ARCH),
-		.base.cra_priority	= 300,
+		.base.cra_driver_name	= "sha224-lib",
 		.base.cra_blocksize	= SHA224_BLOCK_SIZE,
 		.base.cra_module	= THIS_MODULE,
 		.digestsize		= SHA224_DIGEST_SIZE,
 		.init			= crypto_sha224_init,
-		.update			= crypto_sha256_update_arch,
-		.final			= crypto_sha224_final_arch,
+		.update			= crypto_sha256_update_lib,
+		.final			= crypto_sha224_final_lib,
 		.descsize		= sizeof(struct sha256_state),
 		.statesize		= sizeof(struct crypto_sha256_state) +
 					  SHA256_BLOCK_SIZE + 1,
 		.import			= crypto_sha256_import_lib,
 		.export			= crypto_sha256_export_lib,
 	},
+	{
+		.base.cra_name		= "sha256",
+		.base.cra_driver_name	= "sha256-" __stringify(ARCH),
+		.base.cra_priority	= 300,
+		.base.cra_flags		= CRYPTO_AHASH_ALG_BLOCK_ONLY |
+					  CRYPTO_AHASH_ALG_FINUP_MAX,
+		.base.cra_blocksize	= SHA256_BLOCK_SIZE,
+		.base.cra_module	= THIS_MODULE,
+		.digestsize		= SHA256_DIGEST_SIZE,
+		.init			= crypto_sha256_init,
+		.update			= crypto_sha256_update_arch,
+		.finup			= crypto_sha256_finup_arch,
+		.digest			= crypto_sha256_digest_arch,
+		.descsize		= sizeof(struct crypto_sha256_state),
+	},
+	{
+		.base.cra_name		= "sha224",
+		.base.cra_driver_name	= "sha224-" __stringify(ARCH),
+		.base.cra_priority	= 300,
+		.base.cra_flags		= CRYPTO_AHASH_ALG_BLOCK_ONLY |
+					  CRYPTO_AHASH_ALG_FINUP_MAX,
+		.base.cra_blocksize	= SHA224_BLOCK_SIZE,
+		.base.cra_module	= THIS_MODULE,
+		.digestsize		= SHA224_DIGEST_SIZE,
+		.init			= crypto_sha224_init,
+		.update			= crypto_sha256_update_arch,
+		.finup			= crypto_sha256_finup_arch,
+		.descsize		= sizeof(struct crypto_sha256_state),
+	},
 };
 
 static unsigned int num_algs;
@@ -205,9 +253,9 @@ static int __init crypto_sha256_mod_init(void)
 {
 	/* register the arch flavours only if they differ from generic */
 	num_algs = ARRAY_SIZE(algs);
-	BUILD_BUG_ON(ARRAY_SIZE(algs) % 2 != 0);
+	BUILD_BUG_ON(ARRAY_SIZE(algs) <= 2);
 	if (!sha256_is_arch_optimized())
-		num_algs /= 2;
+		num_algs -= 2;
 	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
 }
 subsys_initcall(crypto_sha256_mod_init);
-- 
2.39.5


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH 00/12] crypto: sha256 - Use partial block API
  2025-04-30  6:06 [PATCH 00/12] crypto: sha256 - Use partial block API Herbert Xu
                   ` (11 preceding siblings ...)
  2025-04-30  6:06 ` [PATCH 12/12] crypto: sha256 - Use the partial block API Herbert Xu
@ 2025-04-30 17:45 ` Eric Biggers
  2025-05-01  1:21   ` Herbert Xu
  12 siblings, 1 reply; 17+ messages in thread
From: Eric Biggers @ 2025-04-30 17:45 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Linux Crypto Mailing List, linux-kernel, linux-arch,
	linux-arm-kernel, linux-mips, linuxppc-dev, linux-riscv,
	sparclinux, linux-s390, x86, Ard Biesheuvel, Jason A . Donenfeld,
	Linus Torvalds

[Added back Cc's that were dropped]

On Wed, Apr 30, 2025 at 02:06:15PM +0800, Herbert Xu wrote:
> This is based on
> 
> 	https://patchwork.kernel.org/project/linux-crypto/list/?series=957785

I'm assuming that you mean that with your diff
https://lore.kernel.org/r/aBGdiv17ztQnhAps@gondor.apana.org.au folded into my
first patch, since otherwise your patch series doesn't apply.  But even with
that done, your patch series doesn't build:

    In file included from ./include/crypto/hash_info.h:12,
                     from crypto/hash_info.c:9:
    ./include/crypto/sha2.h: In function ‘sha256_init’:
    ./include/crypto/sha2.h:101:32: error: ‘struct sha256_state’ has no member named ‘ctx’
      101 |         sha256_block_init(&sctx->ctx);
          |                                ^~

> Rather than going through the lib/sha256 partial block handling,
> use the native shash partial block API.  Add two extra shash
> algorithms to provide testing coverage for lib/sha256.
> 
> Herbert Xu (12):
>   crypto: lib/sha256 - Restore lib_sha256 finup code
>   crypto: sha256 - Use the partial block API for generic
>   crypto: arm/sha256 - Add simd block function
>   crypto: arm64/sha256 - Add simd block function
>   crypto: mips/sha256 - Export block functions as GPL only
>   crypto: powerpc/sha256 - Export block functions as GPL only
>   crypto: riscv/sha256 - Add simd block function
>   crypto: s390/sha256 - Export block functions as GPL only
>   crypto: sparc/sha256 - Export block functions as GPL only
>   crypto: x86/sha256 - Add simd block function
>   crypto: lib/sha256 - Use generic block helper
>   crypto: sha256 - Use the partial block API
>
>  arch/arm/lib/crypto/Kconfig                   |   1 +
>  arch/arm/lib/crypto/sha256-armv4.pl           |  20 +--
>  arch/arm/lib/crypto/sha256.c                  |  16 +--
>  arch/arm64/crypto/sha512-glue.c               |   6 +-
>  arch/arm64/lib/crypto/Kconfig                 |   1 +
>  arch/arm64/lib/crypto/sha2-armv8.pl           |   2 +-
>  arch/arm64/lib/crypto/sha256.c                |  16 +--
>  .../mips/cavium-octeon/crypto/octeon-sha256.c |   4 +-
>  arch/powerpc/lib/crypto/sha256.c              |   4 +-
>  arch/riscv/lib/crypto/Kconfig                 |   1 +
>  arch/riscv/lib/crypto/sha256.c                |  17 ++-
>  arch/s390/lib/crypto/sha256.c                 |   4 +-
>  arch/sparc/lib/crypto/sha256.c                |   4 +-
>  arch/x86/lib/crypto/Kconfig                   |   1 +
>  arch/x86/lib/crypto/sha256.c                  |  16 ++-
>  crypto/sha256.c                               | 134 +++++++++++-------
>  include/crypto/internal/sha2.h                |  46 ++++++
>  include/crypto/sha2.h                         |  14 +-
>  lib/crypto/Kconfig                            |   8 ++
>  lib/crypto/sha256.c                           | 100 +++----------
>  20 files changed, 232 insertions(+), 183 deletions(-)

The EXPORT_SYMBOL => EXPORT_SYMBOL_GPL changes are fine and should just be one
patch.  I was just trying to be consistent with lib/crypto/sha256.c which uses
EXPORT_SYMBOL, but EXPORT_SYMBOL_GPL is fine too.

Everything else in this series is harmful, IMO.

I already covered why crypto_shash should simply use the library and not do
anything special.

As for your sha256_finup "optimization", it's an interesting idea, but
unfortunately it slightly slows down the common case which is count % 64 < 56,
due to the unnecessary copy to the stack and the following zeroization.  In the
uncommon case where count % 64 >= 56 you do get to pass nblocks=2 to
sha256_blocks_*(), but ultimately SHA-256 is serialized block-by-block anyway,
so it ends up being only slightly faster in that case, which again is the
uncommon case.  So while it's an interesting idea, it doesn't seem to actually
be better.  And the fact that that patch is also being used to submit unrelated,
more dubious changes isn't very helpful, of course.

- Eric

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 00/12] crypto: sha256 - Use partial block API
  2025-04-30 17:45 ` [PATCH 00/12] crypto: sha256 - Use " Eric Biggers
@ 2025-05-01  1:21   ` Herbert Xu
  2025-05-01  2:26     ` Eric Biggers
  0 siblings, 1 reply; 17+ messages in thread
From: Herbert Xu @ 2025-05-01  1:21 UTC (permalink / raw)
  To: Eric Biggers
  Cc: Linux Crypto Mailing List, linux-kernel, linux-arch,
	linux-arm-kernel, linux-mips, linuxppc-dev, linux-riscv,
	sparclinux, linux-s390, x86, Ard Biesheuvel, Jason A . Donenfeld,
	Linus Torvalds

On Wed, Apr 30, 2025 at 10:45:43AM -0700, Eric Biggers wrote:
>
> As for your sha256_finup "optimization", it's an interesting idea, but
> unfortunately it slightly slows down the common case which is count % 64 < 56,
> due to the unnecessary copy to the stack and the following zeroization.  In the
> uncommon case where count % 64 >= 56 you do get to pass nblocks=2 to
> sha256_blocks_*(), but ultimately SHA-256 is serialized block-by-block anyway,
> so it ends up being only slightly faster in that case, which again is the
> uncommon case.  So while it's an interesting idea, it doesn't seem to actually
> be better.  And the fact that that patch is also being used to submit unrelated,
> more dubious changes isn't very helpful, of course.

I'm more than willing to change sha256_finup if you can prove it
with real numbers that it is worse than the single-block version.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 00/12] crypto: sha256 - Use partial block API
  2025-05-01  1:21   ` Herbert Xu
@ 2025-05-01  2:26     ` Eric Biggers
  2025-05-01  5:19       ` Herbert Xu
  0 siblings, 1 reply; 17+ messages in thread
From: Eric Biggers @ 2025-05-01  2:26 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Linux Crypto Mailing List, linux-kernel, linux-arch,
	linux-arm-kernel, linux-mips, linuxppc-dev, linux-riscv,
	sparclinux, linux-s390, x86, Ard Biesheuvel, Jason A . Donenfeld,
	Linus Torvalds

On Thu, May 01, 2025 at 09:21:15AM +0800, Herbert Xu wrote:
> On Wed, Apr 30, 2025 at 10:45:43AM -0700, Eric Biggers wrote:
> >
> > As for your sha256_finup "optimization", it's an interesting idea, but
> > unfortunately it slightly slows down the common case which is count % 64 < 56,
> > due to the unnecessary copy to the stack and the following zeroization.  In the
> > uncommon case where count % 64 >= 56 you do get to pass nblocks=2 to
> > sha256_blocks_*(), but ultimately SHA-256 is serialized block-by-block anyway,
> > so it ends up being only slightly faster in that case, which again is the
> > uncommon case.  So while it's an interesting idea, it doesn't seem to actually
> > be better.  And the fact that that patch is also being used to submit unrelated,
> > more dubious changes isn't very helpful, of course.
> 
> I'm more than willing to change sha256_finup if you can prove it
> with real numbers that it is worse than the single-block version.

Interesting approach -- pushing out misguided optimizations without data, then
demanding data for them to be reverted.  It's obviously worse for
len % 64 < 56 for the reason I gave, so this is a waste of time IMO.  But since
you're insisting on data anyway, here are some quick benchmarks on AMD Zen 5
(not going to bother formatting into a table):

Before your finup "optimization":

 sha256(len=0): 145 cycles
 sha256(len=1): 146 cycles
 sha256(len=2): 146 cycles
 sha256(len=3): 146 cycles
 sha256(len=4): 146 cycles
 sha256(len=5): 146 cycles
 sha256(len=6): 146 cycles
 sha256(len=7): 146 cycles
 sha256(len=8): 151 cycles
 sha256(len=9): 148 cycles
 sha256(len=10): 148 cycles
 sha256(len=11): 148 cycles
 sha256(len=12): 148 cycles
 sha256(len=13): 148 cycles
 sha256(len=14): 148 cycles
 sha256(len=15): 149 cycles
 sha256(len=16): 149 cycles
 sha256(len=17): 148 cycles
 sha256(len=18): 148 cycles
 sha256(len=19): 148 cycles
 sha256(len=20): 148 cycles
 sha256(len=21): 148 cycles
 sha256(len=22): 148 cycles
 sha256(len=23): 148 cycles
 sha256(len=24): 148 cycles
 sha256(len=25): 148 cycles
 sha256(len=26): 148 cycles
 sha256(len=27): 148 cycles
 sha256(len=28): 148 cycles
 sha256(len=29): 148 cycles
 sha256(len=30): 148 cycles
 sha256(len=31): 148 cycles
 sha256(len=32): 151 cycles
 sha256(len=33): 148 cycles
 sha256(len=34): 148 cycles
 sha256(len=35): 148 cycles
 sha256(len=36): 148 cycles
 sha256(len=37): 148 cycles
 sha256(len=38): 148 cycles
 sha256(len=39): 148 cycles
 sha256(len=40): 148 cycles
 sha256(len=41): 148 cycles
 sha256(len=42): 148 cycles
 sha256(len=43): 148 cycles
 sha256(len=44): 148 cycles
 sha256(len=45): 148 cycles
 sha256(len=46): 150 cycles
 sha256(len=47): 149 cycles
 sha256(len=48): 147 cycles
 sha256(len=49): 147 cycles
 sha256(len=50): 147 cycles
 sha256(len=51): 147 cycles
 sha256(len=52): 147 cycles
 sha256(len=53): 147 cycles
 sha256(len=54): 147 cycles
 sha256(len=55): 148 cycles
 sha256(len=56): 278 cycles
 sha256(len=57): 278 cycles
 sha256(len=58): 278 cycles
 sha256(len=59): 278 cycles
 sha256(len=60): 277 cycles
 sha256(len=61): 277 cycles
 sha256(len=62): 277 cycles
 sha256(len=63): 276 cycles
 sha256(len=64): 276 cycles

After your finup "optimization":

 sha256(len=0): 188 cycles
 sha256(len=1): 190 cycles
 sha256(len=2): 190 cycles
 sha256(len=3): 190 cycles
 sha256(len=4): 189 cycles
 sha256(len=5): 189 cycles
 sha256(len=6): 189 cycles
 sha256(len=7): 190 cycles
 sha256(len=8): 187 cycles
 sha256(len=9): 188 cycles
 sha256(len=10): 188 cycles
 sha256(len=11): 188 cycles
 sha256(len=12): 189 cycles
 sha256(len=13): 189 cycles
 sha256(len=14): 188 cycles
 sha256(len=15): 189 cycles
 sha256(len=16): 189 cycles
 sha256(len=17): 190 cycles
 sha256(len=18): 190 cycles
 sha256(len=19): 190 cycles
 sha256(len=20): 190 cycles
 sha256(len=21): 190 cycles
 sha256(len=22): 190 cycles
 sha256(len=23): 190 cycles
 sha256(len=24): 191 cycles
 sha256(len=25): 191 cycles
 sha256(len=26): 191 cycles
 sha256(len=27): 191 cycles
 sha256(len=28): 191 cycles
 sha256(len=29): 192 cycles
 sha256(len=30): 191 cycles
 sha256(len=31): 191 cycles
 sha256(len=32): 191 cycles
 sha256(len=33): 191 cycles
 sha256(len=34): 191 cycles
 sha256(len=35): 191 cycles
 sha256(len=36): 192 cycles
 sha256(len=37): 192 cycles
 sha256(len=38): 192 cycles
 sha256(len=39): 191 cycles
 sha256(len=40): 191 cycles
 sha256(len=41): 194 cycles
 sha256(len=42): 193 cycles
 sha256(len=43): 193 cycles
 sha256(len=44): 193 cycles
 sha256(len=45): 193 cycles
 sha256(len=46): 194 cycles
 sha256(len=47): 194 cycles
 sha256(len=48): 193 cycles
 sha256(len=49): 195 cycles
 sha256(len=50): 195 cycles
 sha256(len=51): 196 cycles
 sha256(len=52): 196 cycles
 sha256(len=53): 195 cycles
 sha256(len=54): 195 cycles
 sha256(len=55): 195 cycles
 sha256(len=56): 297 cycles
 sha256(len=57): 297 cycles
 sha256(len=58): 297 cycles
 sha256(len=59): 297 cycles
 sha256(len=60): 297 cycles
 sha256(len=61): 297 cycles
 sha256(len=62): 297 cycles
 sha256(len=63): 297 cycles
 sha256(len=64): 292 cycles

So your "optimization" made it ~43 cycles slower for len % 64 < 56, or ~19
cycles slower for len % 64 >= 56.

As I said, it's from the overhead of unnecessarily copying the data onto the
stack and then having to zeroize it at the end.

- Eric

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 00/12] crypto: sha256 - Use partial block API
  2025-05-01  2:26     ` Eric Biggers
@ 2025-05-01  5:19       ` Herbert Xu
  0 siblings, 0 replies; 17+ messages in thread
From: Herbert Xu @ 2025-05-01  5:19 UTC (permalink / raw)
  To: Eric Biggers
  Cc: Linux Crypto Mailing List, linux-kernel, linux-arch,
	linux-arm-kernel, linux-mips, linuxppc-dev, linux-riscv,
	sparclinux, linux-s390, x86, Ard Biesheuvel, Jason A . Donenfeld,
	Linus Torvalds

On Wed, Apr 30, 2025 at 07:26:17PM -0700, Eric Biggers wrote:
>
> Interesting approach -- pushing out misguided optimizations without data, then
> demanding data for them to be reverted.  It's obviously worse for
> len % 64 < 56 for the reason I gave, so this is a waste of time IMO.  But since
> you're insisting on data anyway, here are some quick benchmarks on AMD Zen 5
> (not going to bother formatting into a table):
> 
> Before your finup "optimization":

Thanks, I'll revert to the single-block version.
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2025-05-01  5:19 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-04-30  6:06 [PATCH 00/12] crypto: sha256 - Use partial block API Herbert Xu
2025-04-30  6:06 ` [PATCH 01/12] crypto: lib/sha256 - Restore lib_sha256 finup code Herbert Xu
2025-04-30  6:06 ` [PATCH 02/12] crypto: sha256 - Use the partial block API for generic Herbert Xu
2025-04-30  6:06 ` [PATCH 03/12] crypto: arm/sha256 - Add simd block function Herbert Xu
2025-04-30  6:06 ` [PATCH 04/12] crypto: arm64/sha256 " Herbert Xu
2025-04-30  6:06 ` [PATCH 05/12] crypto: mips/sha256 - Export block functions as GPL only Herbert Xu
2025-04-30  6:06 ` [PATCH 06/12] crypto: powerpc/sha256 " Herbert Xu
2025-04-30  6:06 ` [PATCH 07/12] crypto: riscv/sha256 - Add simd block function Herbert Xu
2025-04-30  6:06 ` [PATCH 08/12] crypto: s390/sha256 - Export block functions as GPL only Herbert Xu
2025-04-30  6:06 ` [PATCH 09/12] crypto: sparc/sha256 " Herbert Xu
2025-04-30  6:06 ` [PATCH 10/12] crypto: x86/sha256 - Add simd block function Herbert Xu
2025-04-30  6:06 ` [PATCH 11/12] crypto: lib/sha256 - Use generic block helper Herbert Xu
2025-04-30  6:06 ` [PATCH 12/12] crypto: sha256 - Use the partial block API Herbert Xu
2025-04-30 17:45 ` [PATCH 00/12] crypto: sha256 - Use " Eric Biggers
2025-05-01  1:21   ` Herbert Xu
2025-05-01  2:26     ` Eric Biggers
2025-05-01  5:19       ` Herbert Xu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox