[PATCH 2/2] lib/crypto: x86/sha1-ni: Convert to use rounds macros

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, x86@kernel.org,
	Ard Biesheuvel <ardb@kernel.org>,
	"Jason A . Donenfeld" <Jason@zx2c4.com>,
	Eric Biggers <ebiggers@kernel.org>
Subject: [PATCH 2/2] lib/crypto: x86/sha1-ni: Convert to use rounds macros
Date: Fri, 18 Jul 2025 12:19:00 -0700	[thread overview]
Message-ID: <20250718191900.42877-3-ebiggers@kernel.org> (raw)
In-Reply-To: <20250718191900.42877-1-ebiggers@kernel.org>

The assembly code that does all 80 rounds of SHA-1 is highly repetitive.
Replace it with 20 expansions of a macro that does 4 rounds, using the
macro arguments and .if directives to handle the slight variations
between rounds.  This reduces the length of sha1-ni-asm.S by 129 lines
while still producing the exact same object file.  This mirrors
sha256-ni-asm.S which uses this same strategy.

Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
 lib/crypto/x86/sha1-ni-asm.S | 187 ++++++-----------------------------
 1 file changed, 29 insertions(+), 158 deletions(-)

diff --git a/lib/crypto/x86/sha1-ni-asm.S b/lib/crypto/x86/sha1-ni-asm.S
index 1d08b2f364ce7..428f9b9605943 100644
--- a/lib/crypto/x86/sha1-ni-asm.S
+++ b/lib/crypto/x86/sha1-ni-asm.S
@@ -68,23 +68,43 @@
 #define MSG3		%xmm6
 #define SHUF_MASK	%xmm7
 #define ABCD_SAVED	%xmm8
 #define E0_SAVED	%xmm9
 
+.macro do_4rounds	i, m0, m1, m2, m3, e0, e1
+.if \i < 16
+	movdqu		\i*4(DATA_PTR), \m0
+	pshufb		SHUF_MASK, \m0
+.endif
+.if \i == 0
+	paddd		\m0, \e0
+.else
+	sha1nexte	\m0, \e0
+.endif
+	movdqa		ABCD, \e1
+.if \i >= 12 && \i < 76
+	sha1msg2	\m0, \m1
+.endif
+	sha1rnds4	$\i / 20, \e0, ABCD
+.if \i >= 4 && \i < 68
+	sha1msg1	\m0, \m3
+.endif
+.if \i >= 8 && \i < 72
+	pxor		\m0, \m2
+.endif
+.endm
+
 /*
  * Intel SHA Extensions optimized implementation of a SHA-1 block function
  *
  * This function takes a pointer to the current SHA-1 state, a pointer to the
  * input data, and the number of 64-byte blocks to process.  The number of
  * blocks to process is assumed to be nonzero.  Once all blocks have been
  * processed, the state is updated with the new state.  This function only
  * processes complete blocks.  State initialization, buffering of partial
  * blocks, and digest finalization are expected to be handled elsewhere.
  *
- * The indented lines in the loop are instructions related to rounds processing.
- * The non-indented lines are instructions related to the message schedule.
- *
  * void sha1_ni_transform(struct sha1_block_state *state,
  *			  const u8 *data, size_t nblocks)
  */
 .text
 SYM_FUNC_START(sha1_ni_transform)
@@ -100,165 +120,16 @@ SYM_FUNC_START(sha1_ni_transform)
 .Lnext_block:
 	/* Save the state for addition after the rounds. */
 	movdqa		E0, E0_SAVED
 	movdqa		ABCD, ABCD_SAVED
 
-	/* Rounds 0-3 */
-	movdqu		0*16(DATA_PTR), MSG0
-	pshufb		SHUF_MASK, MSG0
-		paddd		MSG0, E0
-		movdqa		ABCD, E1
-		sha1rnds4	$0, E0, ABCD
-
-	/* Rounds 4-7 */
-	movdqu		1*16(DATA_PTR), MSG1
-	pshufb		SHUF_MASK, MSG1
-		sha1nexte	MSG1, E1
-		movdqa		ABCD, E0
-		sha1rnds4	$0, E1, ABCD
-	sha1msg1	MSG1, MSG0
-
-	/* Rounds 8-11 */
-	movdqu		2*16(DATA_PTR), MSG2
-	pshufb		SHUF_MASK, MSG2
-		sha1nexte	MSG2, E0
-		movdqa		ABCD, E1
-		sha1rnds4	$0, E0, ABCD
-	sha1msg1	MSG2, MSG1
-	pxor		MSG2, MSG0
-
-	/* Rounds 12-15 */
-	movdqu		3*16(DATA_PTR), MSG3
-	pshufb		SHUF_MASK, MSG3
-		sha1nexte	MSG3, E1
-		movdqa		ABCD, E0
-	sha1msg2	MSG3, MSG0
-		sha1rnds4	$0, E1, ABCD
-	sha1msg1	MSG3, MSG2
-	pxor		MSG3, MSG1
-
-	/* Rounds 16-19 */
-		sha1nexte	MSG0, E0
-		movdqa		ABCD, E1
-	sha1msg2	MSG0, MSG1
-		sha1rnds4	$0, E0, ABCD
-	sha1msg1	MSG0, MSG3
-	pxor		MSG0, MSG2
-
-	/* Rounds 20-23 */
-		sha1nexte	MSG1, E1
-		movdqa		ABCD, E0
-	sha1msg2	MSG1, MSG2
-		sha1rnds4	$1, E1, ABCD
-	sha1msg1	MSG1, MSG0
-	pxor		MSG1, MSG3
-
-	/* Rounds 24-27 */
-		sha1nexte	MSG2, E0
-		movdqa		ABCD, E1
-	sha1msg2	MSG2, MSG3
-		sha1rnds4	$1, E0, ABCD
-	sha1msg1	MSG2, MSG1
-	pxor		MSG2, MSG0
-
-	/* Rounds 28-31 */
-		sha1nexte	MSG3, E1
-		movdqa		ABCD, E0
-	sha1msg2	MSG3, MSG0
-		sha1rnds4	$1, E1, ABCD
-	sha1msg1	MSG3, MSG2
-	pxor		MSG3, MSG1
-
-	/* Rounds 32-35 */
-		sha1nexte	MSG0, E0
-		movdqa		ABCD, E1
-	sha1msg2	MSG0, MSG1
-		sha1rnds4	$1, E0, ABCD
-	sha1msg1	MSG0, MSG3
-	pxor		MSG0, MSG2
-
-	/* Rounds 36-39 */
-		sha1nexte	MSG1, E1
-		movdqa		ABCD, E0
-	sha1msg2	MSG1, MSG2
-		sha1rnds4	$1, E1, ABCD
-	sha1msg1	MSG1, MSG0
-	pxor		MSG1, MSG3
-
-	/* Rounds 40-43 */
-		sha1nexte	MSG2, E0
-		movdqa		ABCD, E1
-	sha1msg2	MSG2, MSG3
-		sha1rnds4	$2, E0, ABCD
-	sha1msg1	MSG2, MSG1
-	pxor		MSG2, MSG0
-
-	/* Rounds 44-47 */
-		sha1nexte	MSG3, E1
-		movdqa		ABCD, E0
-	sha1msg2	MSG3, MSG0
-		sha1rnds4	$2, E1, ABCD
-	sha1msg1	MSG3, MSG2
-	pxor		MSG3, MSG1
-
-	/* Rounds 48-51 */
-		sha1nexte	MSG0, E0
-		movdqa		ABCD, E1
-	sha1msg2	MSG0, MSG1
-		sha1rnds4	$2, E0, ABCD
-	sha1msg1	MSG0, MSG3
-	pxor		MSG0, MSG2
-
-	/* Rounds 52-55 */
-		sha1nexte	MSG1, E1
-		movdqa		ABCD, E0
-	sha1msg2	MSG1, MSG2
-		sha1rnds4	$2, E1, ABCD
-	sha1msg1	MSG1, MSG0
-	pxor		MSG1, MSG3
-
-	/* Rounds 56-59 */
-		sha1nexte	MSG2, E0
-		movdqa		ABCD, E1
-	sha1msg2	MSG2, MSG3
-		sha1rnds4	$2, E0, ABCD
-	sha1msg1	MSG2, MSG1
-	pxor		MSG2, MSG0
-
-	/* Rounds 60-63 */
-		sha1nexte	MSG3, E1
-		movdqa		ABCD, E0
-	sha1msg2	MSG3, MSG0
-		sha1rnds4	$3, E1, ABCD
-	sha1msg1	MSG3, MSG2
-	pxor		MSG3, MSG1
-
-	/* Rounds 64-67 */
-		sha1nexte	MSG0, E0
-		movdqa		ABCD, E1
-	sha1msg2	MSG0, MSG1
-		sha1rnds4	$3, E0, ABCD
-	sha1msg1	MSG0, MSG3
-	pxor		MSG0, MSG2
-
-	/* Rounds 68-71 */
-		sha1nexte	MSG1, E1
-		movdqa		ABCD, E0
-	sha1msg2	MSG1, MSG2
-		sha1rnds4	$3, E1, ABCD
-	pxor		MSG1, MSG3
-
-	/* Rounds 72-75 */
-		sha1nexte	MSG2, E0
-		movdqa		ABCD, E1
-	sha1msg2	MSG2, MSG3
-		sha1rnds4	$3, E0, ABCD
-
-	/* Rounds 76-79 */
-		sha1nexte	MSG3, E1
-		movdqa		ABCD, E0
-		sha1rnds4	$3, E1, ABCD
+.irp i, 0, 16, 32, 48, 64
+	do_4rounds	(\i + 0),  MSG0, MSG1, MSG2, MSG3, E0, E1
+	do_4rounds	(\i + 4),  MSG1, MSG2, MSG3, MSG0, E1, E0
+	do_4rounds	(\i + 8),  MSG2, MSG3, MSG0, MSG1, E0, E1
+	do_4rounds	(\i + 12), MSG3, MSG0, MSG1, MSG2, E1, E0
+.endr
 
 	/* Add the previous state (before the rounds) to the current state. */
 	sha1nexte	E0_SAVED, E0
 	paddd		ABCD_SAVED, ABCD
 
-- 
2.50.1

next prev parent reply	other threads:[~2025-07-18 19:20 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-07-18 19:18 [PATCH 0/2] sha1-ni-asm.S cleanups Eric Biggers
2025-07-18 19:18 ` [PATCH 1/2] lib/crypto: x86/sha1-ni: Minor optimizations and cleanup Eric Biggers
2025-07-18 19:19 ` Eric Biggers [this message]
2025-07-21  4:30 ` [PATCH 0/2] sha1-ni-asm.S cleanups Ard Biesheuvel

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:1d08b2f364ce dfblob:428f9b960594 )
 OR (
bs:"[PATCH 2/2] lib/crypto: x86/sha1-ni: Convert to use rounds macros" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250718191900.42877-3-ebiggers@kernel.org \
    --to=ebiggers@kernel.org \
    --cc=Jason@zx2c4.com \
    --cc=ardb@kernel.org \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.