All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, x86@kernel.org,
	Ard Biesheuvel <ardb@kernel.org>,
	"Jason A . Donenfeld" <Jason@zx2c4.com>,
	Eric Biggers <ebiggers@kernel.org>
Subject: [PATCH 1/2] lib/crypto: x86/sha256: Move static_call above kernel-mode FPU section
Date: Thu,  3 Jul 2025 19:39:57 -0700	[thread overview]
Message-ID: <20250704023958.73274-2-ebiggers@kernel.org> (raw)
In-Reply-To: <20250704023958.73274-1-ebiggers@kernel.org>

As I did for sha512_blocks(), reorganize x86's sha256_blocks() to be
just a static_call.  To achieve that, for each assembly function add a C
function that handles the kernel-mode FPU section and fallback.  While
this increases total code size slightly, the amount of code actually
executed on a given system does not increase, and it is slightly more
efficient since it eliminates the extra static_key.  It also makes the
assembly functions be called with standard direct calls instead of
static calls, eliminating the need for ANNOTATE_NOENDBR.

Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
 lib/crypto/x86/sha256-avx-asm.S   |  3 --
 lib/crypto/x86/sha256-avx2-asm.S  |  3 --
 lib/crypto/x86/sha256-ni-asm.S    |  2 --
 lib/crypto/x86/sha256-ssse3-asm.S |  2 --
 lib/crypto/x86/sha256.h           | 48 ++++++++++++++++---------------
 5 files changed, 25 insertions(+), 33 deletions(-)

diff --git a/lib/crypto/x86/sha256-avx-asm.S b/lib/crypto/x86/sha256-avx-asm.S
index 73bcff2b548f4..798a7f07fa013 100644
--- a/lib/crypto/x86/sha256-avx-asm.S
+++ b/lib/crypto/x86/sha256-avx-asm.S
@@ -46,11 +46,10 @@
 ########################################################################
 # This code schedules 1 block at a time, with 4 lanes per block
 ########################################################################
 
 #include <linux/linkage.h>
-#include <linux/objtool.h>
 
 ## assume buffers not aligned
 #define    VMOVDQ vmovdqu
 
 ################################ Define Macros
@@ -344,12 +343,10 @@ a = TMP_
 ## void sha256_transform_avx(struct sha256_block_state *state,
 ##			     const u8 *data, size_t nblocks);
 ########################################################################
 .text
 SYM_FUNC_START(sha256_transform_avx)
-	ANNOTATE_NOENDBR	# since this is called only via static_call
-
 	pushq   %rbx
 	pushq   %r12
 	pushq   %r13
 	pushq   %r14
 	pushq   %r15
diff --git a/lib/crypto/x86/sha256-avx2-asm.S b/lib/crypto/x86/sha256-avx2-asm.S
index 45787570387f2..62a46993359e6 100644
--- a/lib/crypto/x86/sha256-avx2-asm.S
+++ b/lib/crypto/x86/sha256-avx2-asm.S
@@ -47,11 +47,10 @@
 ########################################################################
 # This code schedules 2 blocks at a time, with 4 lanes per block
 ########################################################################
 
 #include <linux/linkage.h>
-#include <linux/objtool.h>
 
 ## assume buffers not aligned
 #define	VMOVDQ vmovdqu
 
 ################################ Define Macros
@@ -521,12 +520,10 @@ STACK_SIZE	= _CTX      + _CTX_SIZE
 ## void sha256_transform_rorx(struct sha256_block_state *state,
 ##			      const u8 *data, size_t nblocks);
 ########################################################################
 .text
 SYM_FUNC_START(sha256_transform_rorx)
-	ANNOTATE_NOENDBR	# since this is called only via static_call
-
 	pushq	%rbx
 	pushq	%r12
 	pushq	%r13
 	pushq	%r14
 	pushq	%r15
diff --git a/lib/crypto/x86/sha256-ni-asm.S b/lib/crypto/x86/sha256-ni-asm.S
index 4af7d22e29e47..9ebbacbb9c13b 100644
--- a/lib/crypto/x86/sha256-ni-asm.S
+++ b/lib/crypto/x86/sha256-ni-asm.S
@@ -52,11 +52,10 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  */
 
 #include <linux/linkage.h>
-#include <linux/objtool.h>
 
 #define STATE_PTR	%rdi	/* 1st arg */
 #define DATA_PTR	%rsi	/* 2nd arg */
 #define NUM_BLKS	%rdx	/* 3rd arg */
 
@@ -109,11 +108,10 @@
  * void sha256_ni_transform(struct sha256_block_state *state,
  *			    const u8 *data, size_t nblocks);
  */
 .text
 SYM_FUNC_START(sha256_ni_transform)
-	ANNOTATE_NOENDBR	# since this is called only via static_call
 
 	shl		$6, NUM_BLKS		/*  convert to bytes */
 	jz		.Ldone_hash
 	add		DATA_PTR, NUM_BLKS	/* pointer to end of data */
 
diff --git a/lib/crypto/x86/sha256-ssse3-asm.S b/lib/crypto/x86/sha256-ssse3-asm.S
index 407b30adcd37f..3b602b7d43fad 100644
--- a/lib/crypto/x86/sha256-ssse3-asm.S
+++ b/lib/crypto/x86/sha256-ssse3-asm.S
@@ -351,12 +351,10 @@ a = TMP_
 ## void sha256_transform_ssse3(struct sha256_block_state *state,
 ##			       const u8 *data, size_t nblocks);
 ########################################################################
 .text
 SYM_FUNC_START(sha256_transform_ssse3)
-	ANNOTATE_NOENDBR	# since this is called only via static_call
-
 	pushq   %rbx
 	pushq   %r12
 	pushq   %r13
 	pushq   %r14
 	pushq   %r15
diff --git a/lib/crypto/x86/sha256.h b/lib/crypto/x86/sha256.h
index 3b5456c222ba6..669bc06538b67 100644
--- a/lib/crypto/x86/sha256.h
+++ b/lib/crypto/x86/sha256.h
@@ -6,50 +6,52 @@
  */
 #include <asm/fpu/api.h>
 #include <crypto/internal/simd.h>
 #include <linux/static_call.h>
 
-asmlinkage void sha256_transform_ssse3(struct sha256_block_state *state,
-				       const u8 *data, size_t nblocks);
-asmlinkage void sha256_transform_avx(struct sha256_block_state *state,
-				     const u8 *data, size_t nblocks);
-asmlinkage void sha256_transform_rorx(struct sha256_block_state *state,
-				      const u8 *data, size_t nblocks);
-asmlinkage void sha256_ni_transform(struct sha256_block_state *state,
-				    const u8 *data, size_t nblocks);
+DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_blocks_generic);
 
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha256_x86);
+#define DEFINE_X86_SHA256_FN(c_fn, asm_fn)                                 \
+	asmlinkage void asm_fn(struct sha256_block_state *state,           \
+			       const u8 *data, size_t nblocks);            \
+	static void c_fn(struct sha256_block_state *state, const u8 *data, \
+			 size_t nblocks)                                   \
+	{                                                                  \
+		if (likely(crypto_simd_usable())) {                        \
+			kernel_fpu_begin();                                \
+			asm_fn(state, data, nblocks);                      \
+			kernel_fpu_end();                                  \
+		} else {                                                   \
+			sha256_blocks_generic(state, data, nblocks);       \
+		}                                                          \
+	}
 
-DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_transform_ssse3);
+DEFINE_X86_SHA256_FN(sha256_blocks_ssse3, sha256_transform_ssse3);
+DEFINE_X86_SHA256_FN(sha256_blocks_avx, sha256_transform_avx);
+DEFINE_X86_SHA256_FN(sha256_blocks_avx2, sha256_transform_rorx);
+DEFINE_X86_SHA256_FN(sha256_blocks_ni, sha256_ni_transform);
 
 static void sha256_blocks(struct sha256_block_state *state,
 			  const u8 *data, size_t nblocks)
 {
-	if (static_branch_likely(&have_sha256_x86) && crypto_simd_usable()) {
-		kernel_fpu_begin();
-		static_call(sha256_blocks_x86)(state, data, nblocks);
-		kernel_fpu_end();
-	} else {
-		sha256_blocks_generic(state, data, nblocks);
-	}
+	static_call(sha256_blocks_x86)(state, data, nblocks);
 }
 
 #define sha256_mod_init_arch sha256_mod_init_arch
 static inline void sha256_mod_init_arch(void)
 {
 	if (boot_cpu_has(X86_FEATURE_SHA_NI)) {
-		static_call_update(sha256_blocks_x86, sha256_ni_transform);
+		static_call_update(sha256_blocks_x86, sha256_blocks_ni);
 	} else if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
 				     NULL) &&
 		   boot_cpu_has(X86_FEATURE_AVX)) {
 		if (boot_cpu_has(X86_FEATURE_AVX2) &&
 		    boot_cpu_has(X86_FEATURE_BMI2))
 			static_call_update(sha256_blocks_x86,
-					   sha256_transform_rorx);
+					   sha256_blocks_avx2);
 		else
 			static_call_update(sha256_blocks_x86,
-					   sha256_transform_avx);
-	} else if (!boot_cpu_has(X86_FEATURE_SSSE3)) {
-		return;
+					   sha256_blocks_avx);
+	} else if (boot_cpu_has(X86_FEATURE_SSSE3)) {
+		static_call_update(sha256_blocks_x86, sha256_blocks_ssse3);
 	}
-	static_branch_enable(&have_sha256_x86);
 }
-- 
2.50.0


  reply	other threads:[~2025-07-04  2:42 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-07-04  2:39 [PATCH 0/2] Small cleanups for x86 SHA-256 Eric Biggers
2025-07-04  2:39 ` Eric Biggers [this message]
2025-07-04  2:39 ` [PATCH 2/2] lib/crypto: x86/sha256: Remove unnecessary checks for nblocks==0 Eric Biggers
2025-07-04 13:26 ` [PATCH 0/2] Small cleanups for x86 SHA-256 Ard Biesheuvel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250704023958.73274-2-ebiggers@kernel.org \
    --to=ebiggers@kernel.org \
    --cc=Jason@zx2c4.com \
    --cc=ardb@kernel.org \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.