public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: x86@kernel.org, Ondrej Mosnacek <omosnace@redhat.com>,
	linux-kernel@vger.kernel.org
Subject: [PATCH v2 08/10] crypto: x86/aegis128 - take advantage of block-aligned len
Date: Wed, 16 Oct 2024 17:00:49 -0700	[thread overview]
Message-ID: <20241017000051.228294-9-ebiggers@kernel.org> (raw)
In-Reply-To: <20241017000051.228294-1-ebiggers@kernel.org>

From: Eric Biggers <ebiggers@google.com>

Update a caller of aegis128_aesni_ad() to round down the length to a
block boundary.  After that, aegis128_aesni_ad(), aegis128_aesni_enc(),
and aegis128_aesni_dec() are only passed whole blocks.  Update the
assembly code to take advantage of that, which eliminates some unneeded
instructions.  For aegis128_aesni_enc() and aegis128_aesni_dec(), the
length is also always nonzero, so stop checking for zero length.

Reviewed-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/x86/crypto/aegis128-aesni-asm.S  | 37 +++++++++++----------------
 arch/x86/crypto/aegis128-aesni-glue.c |  4 +--
 2 files changed, 17 insertions(+), 24 deletions(-)

diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S
index e650330ef6951..345b1eafe45af 100644
--- a/arch/x86/crypto/aegis128-aesni-asm.S
+++ b/arch/x86/crypto/aegis128-aesni-asm.S
@@ -188,19 +188,21 @@ SYM_FUNC_START(aegis128_aesni_init)
 SYM_FUNC_END(aegis128_aesni_init)
 
 /*
  * void aegis128_aesni_ad(struct aegis_state *state, const u8 *data,
  *			  unsigned int len);
+ *
+ * len must be a multiple of 16.
  */
 SYM_FUNC_START(aegis128_aesni_ad)
 	.set STATEP, %rdi
 	.set SRC, %rsi
 	.set LEN, %edx
 	FRAME_BEGIN
 
-	cmp $0x10, LEN
-	jb .Lad_out
+	test LEN, LEN
+	jz .Lad_out
 
 	/* load the state: */
 	movdqu 0x00(STATEP), STATE0
 	movdqu 0x10(STATEP), STATE1
 	movdqu 0x20(STATEP), STATE2
@@ -211,40 +213,35 @@ SYM_FUNC_START(aegis128_aesni_ad)
 .Lad_loop:
 	movdqu 0x00(SRC), MSG
 	aegis128_update
 	pxor MSG, STATE4
 	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lad_out_1
+	jz .Lad_out_1
 
 	movdqu 0x10(SRC), MSG
 	aegis128_update
 	pxor MSG, STATE3
 	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lad_out_2
+	jz .Lad_out_2
 
 	movdqu 0x20(SRC), MSG
 	aegis128_update
 	pxor MSG, STATE2
 	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lad_out_3
+	jz .Lad_out_3
 
 	movdqu 0x30(SRC), MSG
 	aegis128_update
 	pxor MSG, STATE1
 	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lad_out_4
+	jz .Lad_out_4
 
 	movdqu 0x40(SRC), MSG
 	aegis128_update
 	pxor MSG, STATE0
 	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lad_out_0
+	jz .Lad_out_0
 
 	add $0x50, SRC
 	jmp .Lad_loop
 
 	/* store the state: */
@@ -310,28 +307,26 @@ SYM_FUNC_END(aegis128_aesni_ad)
 
 	aegis128_update
 	pxor MSG, \s4
 
 	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lenc_out_\i
+	jz .Lenc_out_\i
 .endm
 
 /*
  * void aegis128_aesni_enc(struct aegis_state *state, const u8 *src, u8 *dst,
  *			   unsigned int len);
+ *
+ * len must be nonzero and a multiple of 16.
  */
 SYM_FUNC_START(aegis128_aesni_enc)
 	.set STATEP, %rdi
 	.set SRC, %rsi
 	.set DST, %rdx
 	.set LEN, %ecx
 	FRAME_BEGIN
 
-	cmp $0x10, LEN
-	jb .Lenc_out
-
 	/* load the state: */
 	movdqu 0x00(STATEP), STATE0
 	movdqu 0x10(STATEP), STATE1
 	movdqu 0x20(STATEP), STATE2
 	movdqu 0x30(STATEP), STATE3
@@ -457,28 +452,26 @@ SYM_FUNC_END(aegis128_aesni_enc_tail)
 
 	aegis128_update
 	pxor MSG, \s4
 
 	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Ldec_out_\i
+	jz .Ldec_out_\i
 .endm
 
 /*
  * void aegis128_aesni_dec(struct aegis_state *state, const u8 *src, u8 *dst,
  *			   unsigned int len);
+ *
+ * len must be nonzero and a multiple of 16.
  */
 SYM_FUNC_START(aegis128_aesni_dec)
 	.set STATEP, %rdi
 	.set SRC, %rsi
 	.set DST, %rdx
 	.set LEN, %ecx
 	FRAME_BEGIN
 
-	cmp $0x10, LEN
-	jb .Ldec_out
-
 	/* load the state: */
 	movdqu 0x00(STATEP), STATE0
 	movdqu 0x10(STATEP), STATE1
 	movdqu 0x20(STATEP), STATE2
 	movdqu 0x30(STATEP), STATE3
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index 9555958e4089d..c19d8e3d96a35 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -85,12 +85,12 @@ static void crypto_aegis128_aesni_process_ad(
 				pos = 0;
 				left -= fill;
 				src += fill;
 			}
 
-			aegis128_aesni_ad(state, src, left);
-
+			aegis128_aesni_ad(state, src,
+					  left & ~(AEGIS128_BLOCK_SIZE - 1));
 			src += left & ~(AEGIS128_BLOCK_SIZE - 1);
 			left &= AEGIS128_BLOCK_SIZE - 1;
 		}
 
 		memcpy(buf.bytes + pos, src, left);
-- 
2.47.0


  parent reply	other threads:[~2024-10-17  0:02 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-17  0:00 [PATCH v2 00/10] AEGIS x86 assembly tuning Eric Biggers
2024-10-17  0:00 ` [PATCH v2 01/10] crypto: x86/aegis128 - access 32-bit arguments as 32-bit Eric Biggers
2024-10-17  0:00 ` [PATCH v2 02/10] crypto: x86/aegis128 - remove no-op init and exit functions Eric Biggers
2024-10-17  0:00 ` [PATCH v2 03/10] crypto: x86/aegis128 - eliminate some indirect calls Eric Biggers
2024-10-17  0:00 ` [PATCH v2 04/10] crypto: x86/aegis128 - don't bother with special code for aligned data Eric Biggers
2024-10-17  0:00 ` [PATCH v2 05/10] crypto: x86/aegis128 - optimize length block preparation using SSE4.1 Eric Biggers
2024-10-17  0:00 ` [PATCH v2 06/10] crypto: x86/aegis128 - improve assembly function prototypes Eric Biggers
2024-10-17  0:00 ` [PATCH v2 07/10] crypto: x86/aegis128 - optimize partial block handling using SSE4.1 Eric Biggers
2024-10-17  0:00 ` Eric Biggers [this message]
2024-10-17  0:00 ` [PATCH v2 09/10] crypto: x86/aegis128 - remove unneeded FRAME_BEGIN and FRAME_END Eric Biggers
2024-10-17  0:00 ` [PATCH v2 10/10] crypto: x86/aegis128 - remove unneeded RETs Eric Biggers
2024-10-26  6:59 ` [PATCH v2 00/10] AEGIS x86 assembly tuning Herbert Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241017000051.228294-9-ebiggers@kernel.org \
    --to=ebiggers@kernel.org \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=omosnace@redhat.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox