All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: x86@kernel.org
Subject: [PATCH 7/7] crypto: x86/aes-xts - more code size optimizations
Date: Tue, 10 Dec 2024 15:58:34 -0800	[thread overview]
Message-ID: <20241210235834.40862-8-ebiggers@kernel.org> (raw)
In-Reply-To: <20241210235834.40862-1-ebiggers@kernel.org>

From: Eric Biggers <ebiggers@google.com>

Prefer immediates of -128 to 128, since the former fits in a signed
byte, saving 3 bytes per instruction.  Also prefer VEX-coded
instructions to EVEX where this is easy to do.

Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/x86/crypto/aes-xts-avx-x86_64.S | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S
index bb3d0dfeca575..a49637f0a729d 100644
--- a/arch/x86/crypto/aes-xts-avx-x86_64.S
+++ b/arch/x86/crypto/aes-xts-avx-x86_64.S
@@ -186,10 +186,11 @@
 .endif
 	// V30-V31 are currently unused.
 .endm
 
 // Move a vector between memory and a register.
+// The register operand must be in the first 16 vector registers.
 .macro	_vmovdqu	src, dst
 .if VL < 64
 	vmovdqu		\src, \dst
 .else
 	vmovdqu8	\src, \dst
@@ -206,15 +207,16 @@
 	vbroadcasti32x4	\src, \dst
 .endif
 .endm
 
 // XOR two vectors together.
+// Any register operands must be in the first 16 vector registers.
 .macro	_vpxor	src1, src2, dst
-.if USE_AVX10
-	vpxord		\src1, \src2, \dst
-.else
+.if VL < 64
 	vpxor		\src1, \src2, \dst
+.else
+	vpxord		\src1, \src2, \dst
 .endif
 .endm
 
 // XOR three vectors together.
 .macro	_xor3	src1, src2, src3_and_dst
@@ -559,22 +561,22 @@
 	_setup_round_keys	\enc
 
 	// Compute the first set of tweaks TWEAK[0-3].
 	_compute_first_set_of_tweaks
 
-	sub		$4*VL, LEN
+	add		$-4*VL, LEN  // shorter than 'sub 4*VL' when VL=32
 	jl		.Lhandle_remainder\@
 
 .Lmain_loop\@:
 	// This is the main loop, en/decrypting 4*VL bytes per iteration.
 
 	// XOR each source block with its tweak and the zero-th round key.
 .if USE_AVX10
-	vmovdqu8	0*VL(SRC), V0
-	vmovdqu8	1*VL(SRC), V1
-	vmovdqu8	2*VL(SRC), V2
-	vmovdqu8	3*VL(SRC), V3
+	_vmovdqu	0*VL(SRC), V0
+	_vmovdqu	1*VL(SRC), V1
+	_vmovdqu	2*VL(SRC), V2
+	_vmovdqu	3*VL(SRC), V3
 	vpternlogd	$0x96, TWEAK0, KEY0, V0
 	vpternlogd	$0x96, TWEAK1, KEY0, V1
 	vpternlogd	$0x96, TWEAK2, KEY0, V2
 	vpternlogd	$0x96, TWEAK3, KEY0, V3
 .else
@@ -616,13 +618,13 @@
 	_vmovdqu	V3, 3*VL(DST)
 
 	// Finish computing the next set of tweaks.
 	_tweak_step	1000
 
-	add		$4*VL, SRC
-	add		$4*VL, DST
-	sub		$4*VL, LEN
+	sub		$-4*VL, SRC  // shorter than 'add 4*VL' when VL=32
+	sub		$-4*VL, DST
+	add		$-4*VL, LEN
 	jge		.Lmain_loop\@
 
 	// Check for the uncommon case where the data length isn't a multiple of
 	// 4*VL.  Handle it out-of-line in order to optimize for the common
 	// case.  In the common case, just fall through to the ret.
-- 
2.47.1


      parent reply	other threads:[~2024-12-10 23:59 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-10 23:58 [PATCH 0/7] crypto: x86 - minor optimizations and cleanup to VAES code Eric Biggers
2024-12-10 23:58 ` [PATCH 1/7] crypto: x86/aes-gcm - code size optimization Eric Biggers
2024-12-10 23:58 ` [PATCH 2/7] crypto: x86/aes-gcm - tune better for AMD CPUs Eric Biggers
2024-12-10 23:58 ` [PATCH 3/7] crypto: x86/aes-xts - use .irp when useful Eric Biggers
2024-12-10 23:58 ` [PATCH 4/7] crypto: x86/aes-xts - make the register aliases per-function Eric Biggers
2024-12-10 23:58 ` [PATCH 5/7] crypto: x86/aes-xts - improve some comments Eric Biggers
2024-12-10 23:58 ` [PATCH 6/7] crypto: x86/aes-xts - change len parameter to int Eric Biggers
2024-12-10 23:58 ` Eric Biggers [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241210235834.40862-8-ebiggers@kernel.org \
    --to=ebiggers@kernel.org \
    --cc=linux-crypto@vger.kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.