public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] crypto: x86/aes-xts - access round keys using single-byte offsets
@ 2024-04-09  0:01 Eric Biggers
  2024-04-09  9:12 ` Ard Biesheuvel
  2024-04-19 10:59 ` Herbert Xu
  0 siblings, 2 replies; 5+ messages in thread
From: Eric Biggers @ 2024-04-09  0:01 UTC (permalink / raw)
  To: linux-crypto, x86; +Cc: linux-kernel, Chang S . Bae, Stefan Kanthak

From: Eric Biggers <ebiggers@google.com>

Access the AES round keys using offsets -7*16 through 7*16, instead of
0*16 through 14*16.  This allows VEX-encoded instructions to address all
round keys using 1-byte offsets, whereas before some needed 4-byte
offsets.  This decreases the code size of aes-xts-avx-x86_64.o by 4.2%.

Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/x86/crypto/aes-xts-avx-x86_64.S | 81 +++++++++++++++-------------
 1 file changed, 44 insertions(+), 37 deletions(-)

diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S
index fcaf64a2f8c6..95e412e7601d 100644
--- a/arch/x86/crypto/aes-xts-avx-x86_64.S
+++ b/arch/x86/crypto/aes-xts-avx-x86_64.S
@@ -80,11 +80,11 @@
 	.byte	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
 .text
 
 // Function parameters
 .set	KEY,		%rdi	// Initially points to crypto_aes_ctx, then is
-				// advanced to point directly to the round keys
+				// advanced to point directly to 7th round key
 .set	SRC,		%rsi	// Pointer to next source data
 .set	DST,		%rdx	// Pointer to next destination data
 .set	LEN,		%rcx	// Remaining length in bytes
 .set	TWEAK,		%r8	// Pointer to next tweak
 
@@ -406,28 +406,28 @@
 .endif
 .endm
 
 // Load the round keys: just the first one if !USE_AVX10, otherwise all of them.
 .macro	_load_round_keys
-	_vbroadcast128	0*16(KEY), KEY0
+	_vbroadcast128	-7*16(KEY), KEY0
 .if USE_AVX10
-	_vbroadcast128	1*16(KEY), KEY1
-	_vbroadcast128	2*16(KEY), KEY2
-	_vbroadcast128	3*16(KEY), KEY3
-	_vbroadcast128	4*16(KEY), KEY4
-	_vbroadcast128	5*16(KEY), KEY5
-	_vbroadcast128	6*16(KEY), KEY6
-	_vbroadcast128	7*16(KEY), KEY7
-	_vbroadcast128	8*16(KEY), KEY8
-	_vbroadcast128	9*16(KEY), KEY9
-	_vbroadcast128	10*16(KEY), KEY10
+	_vbroadcast128	-6*16(KEY), KEY1
+	_vbroadcast128	-5*16(KEY), KEY2
+	_vbroadcast128	-4*16(KEY), KEY3
+	_vbroadcast128	-3*16(KEY), KEY4
+	_vbroadcast128	-2*16(KEY), KEY5
+	_vbroadcast128	-1*16(KEY), KEY6
+	_vbroadcast128	0*16(KEY), KEY7
+	_vbroadcast128	1*16(KEY), KEY8
+	_vbroadcast128	2*16(KEY), KEY9
+	_vbroadcast128	3*16(KEY), KEY10
 	// Note: if it's AES-128 or AES-192, the last several round keys won't
 	// be used.  We do the loads anyway to save a conditional jump.
-	_vbroadcast128	11*16(KEY), KEY11
-	_vbroadcast128	12*16(KEY), KEY12
-	_vbroadcast128	13*16(KEY), KEY13
-	_vbroadcast128	14*16(KEY), KEY14
+	_vbroadcast128	4*16(KEY), KEY11
+	_vbroadcast128	5*16(KEY), KEY12
+	_vbroadcast128	6*16(KEY), KEY13
+	_vbroadcast128	7*16(KEY), KEY14
 .endif
 .endm
 
 // Do a single round of AES encryption (if \enc==1) or decryption (if \enc==0)
 // on the block(s) in \data using the round key(s) in \key.  The register length
@@ -454,13 +454,13 @@
 .macro _vaes_1x		enc, last, i, xmm_suffix, data
 .if USE_AVX10
 	_vaes		\enc, \last, KEY\i\xmm_suffix, \data
 .else
 .ifnb \xmm_suffix
-	_vaes		\enc, \last, \i*16(KEY), \data
+	_vaes		\enc, \last, (\i-7)*16(KEY), \data
 .else
-	_vbroadcast128	\i*16(KEY), V4
+	_vbroadcast128	(\i-7)*16(KEY), V4
 	_vaes		\enc, \last, V4, \data
 .endif
 .endif
 .endm
 
@@ -475,11 +475,11 @@
 	_vaes		\enc, \last, KEY\i, V1
 	_tweak_step	(2*(\i-1) + 1)
 	_vaes		\enc, \last, KEY\i, V2
 	_vaes		\enc, \last, KEY\i, V3
 .else
-	_vbroadcast128	\i*16(KEY), V4
+	_vbroadcast128	(\i-7)*16(KEY), V4
 	_tweak_step	(2*(\i-1))
 	_vaes		\enc, \last, V4, V0
 	_vaes		\enc, \last, V4, V1
 	_tweak_step	(2*(\i-1) + 1)
 	_vaes		\enc, \last, V4, V2
@@ -526,13 +526,19 @@
 	_define_aliases
 
 	// Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256).
 	movl		480(KEY), KEYLEN
 
-	// If decrypting, advance KEY to the decryption round keys.
-.if !\enc
-	add		$240, KEY
+	// Advance KEY to point to the 7th encryption round key (if encrypting)
+	// or the 7th decryption round key (if decrypting).  This makes the
+	// offset to any round key be in the range [-112, 112], fitting in a
+	// signed byte.  This shortens VEX-encoded instructions that access the
+	// 8th and later round keys which otherwise would need 4-byte offsets.
+.if \enc
+	add		$7*16, KEY
+.else
+	add		$(15+7)*16, KEY
 .endif
 
 	// Check whether the data length is a multiple of the AES block length.
 	test		$15, LEN
 	jnz		.Lneed_cts\@
@@ -751,40 +757,41 @@
 
 // void aes_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key,
 //			   u8 iv[AES_BLOCK_SIZE]);
 SYM_TYPED_FUNC_START(aes_xts_encrypt_iv)
 	vmovdqu		(%rsi), %xmm0
-	vpxor		0*16(%rdi), %xmm0, %xmm0
+	add		$7*16, %rdi
+	vpxor		-7*16(%rdi), %xmm0, %xmm0
+	vaesenc		-6*16(%rdi), %xmm0, %xmm0
+	vaesenc		-5*16(%rdi), %xmm0, %xmm0
+	vaesenc		-4*16(%rdi), %xmm0, %xmm0
+	vaesenc		-3*16(%rdi), %xmm0, %xmm0
+	vaesenc		-2*16(%rdi), %xmm0, %xmm0
+	vaesenc		-1*16(%rdi), %xmm0, %xmm0
+	vaesenc		0*16(%rdi), %xmm0, %xmm0
 	vaesenc		1*16(%rdi), %xmm0, %xmm0
 	vaesenc		2*16(%rdi), %xmm0, %xmm0
+	cmpl		$24, 480-(7*16)(%rdi)
+	jle		.Lencrypt_iv_aes_128_or_192
 	vaesenc		3*16(%rdi), %xmm0, %xmm0
 	vaesenc		4*16(%rdi), %xmm0, %xmm0
 	vaesenc		5*16(%rdi), %xmm0, %xmm0
 	vaesenc		6*16(%rdi), %xmm0, %xmm0
-	vaesenc		7*16(%rdi), %xmm0, %xmm0
-	vaesenc		8*16(%rdi), %xmm0, %xmm0
-	vaesenc		9*16(%rdi), %xmm0, %xmm0
-	cmpl		$24, 480(%rdi)
-	jle		.Lencrypt_iv_aes_128_or_192
-	vaesenc		10*16(%rdi), %xmm0, %xmm0
-	vaesenc		11*16(%rdi), %xmm0, %xmm0
-	vaesenc		12*16(%rdi), %xmm0, %xmm0
-	vaesenc		13*16(%rdi), %xmm0, %xmm0
-	vaesenclast	14*16(%rdi), %xmm0, %xmm0
+	vaesenclast	7*16(%rdi), %xmm0, %xmm0
 .Lencrypt_iv_done:
 	vmovdqu		%xmm0, (%rsi)
 	RET
 
 	// Out-of-line handling of AES-128 and AES-192
 .Lencrypt_iv_aes_128_or_192:
 	jz		.Lencrypt_iv_aes_192
-	vaesenclast	10*16(%rdi), %xmm0, %xmm0
+	vaesenclast	3*16(%rdi), %xmm0, %xmm0
 	jmp		.Lencrypt_iv_done
 .Lencrypt_iv_aes_192:
-	vaesenc		10*16(%rdi), %xmm0, %xmm0
-	vaesenc		11*16(%rdi), %xmm0, %xmm0
-	vaesenclast	12*16(%rdi), %xmm0, %xmm0
+	vaesenc		3*16(%rdi), %xmm0, %xmm0
+	vaesenc		4*16(%rdi), %xmm0, %xmm0
+	vaesenclast	5*16(%rdi), %xmm0, %xmm0
 	jmp		.Lencrypt_iv_done
 SYM_FUNC_END(aes_xts_encrypt_iv)
 
 // Below are the actual AES-XTS encryption and decryption functions,
 // instantiated from the above macro.  They all have the following prototype:

base-commit: 4ad27a8be9dbefd4820da0f60da879d512b2f659
prerequisite-patch-id: 8d09ed747039f5e718ac7267e2a15e22504aa7f3
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] crypto: x86/aes-xts - access round keys using single-byte offsets
  2024-04-09  0:01 [PATCH] crypto: x86/aes-xts - access round keys using single-byte offsets Eric Biggers
@ 2024-04-09  9:12 ` Ard Biesheuvel
  2024-04-09 12:11   ` Eric Biggers
  2024-04-19 10:59 ` Herbert Xu
  1 sibling, 1 reply; 5+ messages in thread
From: Ard Biesheuvel @ 2024-04-09  9:12 UTC (permalink / raw)
  To: Eric Biggers
  Cc: linux-crypto, x86, linux-kernel, Chang S . Bae, Stefan Kanthak

On Tue, 9 Apr 2024 at 02:02, Eric Biggers <ebiggers@kernel.org> wrote:
>
> From: Eric Biggers <ebiggers@google.com>
>
> Access the AES round keys using offsets -7*16 through 7*16, instead of
> 0*16 through 14*16.  This allows VEX-encoded instructions to address all
> round keys using 1-byte offsets, whereas before some needed 4-byte
> offsets.  This decreases the code size of aes-xts-avx-x86_64.o by 4.2%.
>
> Signed-off-by: Eric Biggers <ebiggers@google.com>

Nice optimization!

Do you think we might be able to macrofy this a bit so we can use zero
based indexing for the round keys, and hide the arithmetic?


> ---
>  arch/x86/crypto/aes-xts-avx-x86_64.S | 81 +++++++++++++++-------------
>  1 file changed, 44 insertions(+), 37 deletions(-)
>
> diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S
> index fcaf64a2f8c6..95e412e7601d 100644
> --- a/arch/x86/crypto/aes-xts-avx-x86_64.S
> +++ b/arch/x86/crypto/aes-xts-avx-x86_64.S
> @@ -80,11 +80,11 @@
>         .byte   0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
>  .text
>
>  // Function parameters
>  .set   KEY,            %rdi    // Initially points to crypto_aes_ctx, then is
> -                               // advanced to point directly to the round keys
> +                               // advanced to point directly to 7th round key
>  .set   SRC,            %rsi    // Pointer to next source data
>  .set   DST,            %rdx    // Pointer to next destination data
>  .set   LEN,            %rcx    // Remaining length in bytes
>  .set   TWEAK,          %r8     // Pointer to next tweak
>
> @@ -406,28 +406,28 @@
>  .endif
>  .endm
>
>  // Load the round keys: just the first one if !USE_AVX10, otherwise all of them.
>  .macro _load_round_keys
> -       _vbroadcast128  0*16(KEY), KEY0
> +       _vbroadcast128  -7*16(KEY), KEY0
>  .if USE_AVX10
> -       _vbroadcast128  1*16(KEY), KEY1
> -       _vbroadcast128  2*16(KEY), KEY2
> -       _vbroadcast128  3*16(KEY), KEY3
> -       _vbroadcast128  4*16(KEY), KEY4
> -       _vbroadcast128  5*16(KEY), KEY5
> -       _vbroadcast128  6*16(KEY), KEY6
> -       _vbroadcast128  7*16(KEY), KEY7
> -       _vbroadcast128  8*16(KEY), KEY8
> -       _vbroadcast128  9*16(KEY), KEY9
> -       _vbroadcast128  10*16(KEY), KEY10
> +       _vbroadcast128  -6*16(KEY), KEY1
> +       _vbroadcast128  -5*16(KEY), KEY2
> +       _vbroadcast128  -4*16(KEY), KEY3
> +       _vbroadcast128  -3*16(KEY), KEY4
> +       _vbroadcast128  -2*16(KEY), KEY5
> +       _vbroadcast128  -1*16(KEY), KEY6
> +       _vbroadcast128  0*16(KEY), KEY7
> +       _vbroadcast128  1*16(KEY), KEY8
> +       _vbroadcast128  2*16(KEY), KEY9
> +       _vbroadcast128  3*16(KEY), KEY10
>         // Note: if it's AES-128 or AES-192, the last several round keys won't
>         // be used.  We do the loads anyway to save a conditional jump.
> -       _vbroadcast128  11*16(KEY), KEY11
> -       _vbroadcast128  12*16(KEY), KEY12
> -       _vbroadcast128  13*16(KEY), KEY13
> -       _vbroadcast128  14*16(KEY), KEY14
> +       _vbroadcast128  4*16(KEY), KEY11
> +       _vbroadcast128  5*16(KEY), KEY12
> +       _vbroadcast128  6*16(KEY), KEY13
> +       _vbroadcast128  7*16(KEY), KEY14
>  .endif
>  .endm
>
>  // Do a single round of AES encryption (if \enc==1) or decryption (if \enc==0)
>  // on the block(s) in \data using the round key(s) in \key.  The register length
> @@ -454,13 +454,13 @@
>  .macro _vaes_1x                enc, last, i, xmm_suffix, data
>  .if USE_AVX10
>         _vaes           \enc, \last, KEY\i\xmm_suffix, \data
>  .else
>  .ifnb \xmm_suffix
> -       _vaes           \enc, \last, \i*16(KEY), \data
> +       _vaes           \enc, \last, (\i-7)*16(KEY), \data
>  .else
> -       _vbroadcast128  \i*16(KEY), V4
> +       _vbroadcast128  (\i-7)*16(KEY), V4
>         _vaes           \enc, \last, V4, \data
>  .endif
>  .endif
>  .endm
>
> @@ -475,11 +475,11 @@
>         _vaes           \enc, \last, KEY\i, V1
>         _tweak_step     (2*(\i-1) + 1)
>         _vaes           \enc, \last, KEY\i, V2
>         _vaes           \enc, \last, KEY\i, V3
>  .else
> -       _vbroadcast128  \i*16(KEY), V4
> +       _vbroadcast128  (\i-7)*16(KEY), V4
>         _tweak_step     (2*(\i-1))
>         _vaes           \enc, \last, V4, V0
>         _vaes           \enc, \last, V4, V1
>         _tweak_step     (2*(\i-1) + 1)
>         _vaes           \enc, \last, V4, V2
> @@ -526,13 +526,19 @@
>         _define_aliases
>
>         // Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256).
>         movl            480(KEY), KEYLEN
>
> -       // If decrypting, advance KEY to the decryption round keys.
> -.if !\enc
> -       add             $240, KEY
> +       // Advance KEY to point to the 7th encryption round key (if encrypting)
> +       // or the 7th decryption round key (if decrypting).  This makes the
> +       // offset to any round key be in the range [-112, 112], fitting in a
> +       // signed byte.  This shortens VEX-encoded instructions that access the
> +       // 8th and later round keys which otherwise would need 4-byte offsets.
> +.if \enc
> +       add             $7*16, KEY
> +.else
> +       add             $(15+7)*16, KEY
>  .endif
>
>         // Check whether the data length is a multiple of the AES block length.
>         test            $15, LEN
>         jnz             .Lneed_cts\@
> @@ -751,40 +757,41 @@
>
>  // void aes_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key,
>  //                        u8 iv[AES_BLOCK_SIZE]);
>  SYM_TYPED_FUNC_START(aes_xts_encrypt_iv)
>         vmovdqu         (%rsi), %xmm0
> -       vpxor           0*16(%rdi), %xmm0, %xmm0
> +       add             $7*16, %rdi
> +       vpxor           -7*16(%rdi), %xmm0, %xmm0
> +       vaesenc         -6*16(%rdi), %xmm0, %xmm0
> +       vaesenc         -5*16(%rdi), %xmm0, %xmm0
> +       vaesenc         -4*16(%rdi), %xmm0, %xmm0
> +       vaesenc         -3*16(%rdi), %xmm0, %xmm0
> +       vaesenc         -2*16(%rdi), %xmm0, %xmm0
> +       vaesenc         -1*16(%rdi), %xmm0, %xmm0
> +       vaesenc         0*16(%rdi), %xmm0, %xmm0
>         vaesenc         1*16(%rdi), %xmm0, %xmm0
>         vaesenc         2*16(%rdi), %xmm0, %xmm0
> +       cmpl            $24, 480-(7*16)(%rdi)
> +       jle             .Lencrypt_iv_aes_128_or_192
>         vaesenc         3*16(%rdi), %xmm0, %xmm0
>         vaesenc         4*16(%rdi), %xmm0, %xmm0
>         vaesenc         5*16(%rdi), %xmm0, %xmm0
>         vaesenc         6*16(%rdi), %xmm0, %xmm0
> -       vaesenc         7*16(%rdi), %xmm0, %xmm0
> -       vaesenc         8*16(%rdi), %xmm0, %xmm0
> -       vaesenc         9*16(%rdi), %xmm0, %xmm0
> -       cmpl            $24, 480(%rdi)
> -       jle             .Lencrypt_iv_aes_128_or_192
> -       vaesenc         10*16(%rdi), %xmm0, %xmm0
> -       vaesenc         11*16(%rdi), %xmm0, %xmm0
> -       vaesenc         12*16(%rdi), %xmm0, %xmm0
> -       vaesenc         13*16(%rdi), %xmm0, %xmm0
> -       vaesenclast     14*16(%rdi), %xmm0, %xmm0
> +       vaesenclast     7*16(%rdi), %xmm0, %xmm0
>  .Lencrypt_iv_done:
>         vmovdqu         %xmm0, (%rsi)
>         RET
>
>         // Out-of-line handling of AES-128 and AES-192
>  .Lencrypt_iv_aes_128_or_192:
>         jz              .Lencrypt_iv_aes_192
> -       vaesenclast     10*16(%rdi), %xmm0, %xmm0
> +       vaesenclast     3*16(%rdi), %xmm0, %xmm0
>         jmp             .Lencrypt_iv_done
>  .Lencrypt_iv_aes_192:
> -       vaesenc         10*16(%rdi), %xmm0, %xmm0
> -       vaesenc         11*16(%rdi), %xmm0, %xmm0
> -       vaesenclast     12*16(%rdi), %xmm0, %xmm0
> +       vaesenc         3*16(%rdi), %xmm0, %xmm0
> +       vaesenc         4*16(%rdi), %xmm0, %xmm0
> +       vaesenclast     5*16(%rdi), %xmm0, %xmm0
>         jmp             .Lencrypt_iv_done
>  SYM_FUNC_END(aes_xts_encrypt_iv)
>
>  // Below are the actual AES-XTS encryption and decryption functions,
>  // instantiated from the above macro.  They all have the following prototype:
>
> base-commit: 4ad27a8be9dbefd4820da0f60da879d512b2f659
> prerequisite-patch-id: 8d09ed747039f5e718ac7267e2a15e22504aa7f3
> --
> 2.44.0
>
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] crypto: x86/aes-xts - access round keys using single-byte offsets
  2024-04-09  9:12 ` Ard Biesheuvel
@ 2024-04-09 12:11   ` Eric Biggers
  2024-04-09 12:44     ` Ard Biesheuvel
  0 siblings, 1 reply; 5+ messages in thread
From: Eric Biggers @ 2024-04-09 12:11 UTC (permalink / raw)
  To: Ard Biesheuvel
  Cc: linux-crypto, x86, linux-kernel, Chang S . Bae, Stefan Kanthak

On Tue, Apr 09, 2024 at 11:12:11AM +0200, Ard Biesheuvel wrote:
> On Tue, 9 Apr 2024 at 02:02, Eric Biggers <ebiggers@kernel.org> wrote:
> >
> > From: Eric Biggers <ebiggers@google.com>
> >
> > Access the AES round keys using offsets -7*16 through 7*16, instead of
> > 0*16 through 14*16.  This allows VEX-encoded instructions to address all
> > round keys using 1-byte offsets, whereas before some needed 4-byte
> > offsets.  This decreases the code size of aes-xts-avx-x86_64.o by 4.2%.
> >
> > Signed-off-by: Eric Biggers <ebiggers@google.com>
> 
> Nice optimization!
> 
> Do you think we might be able to macrofy this a bit so we can use zero
> based indexing for the round keys, and hide the arithmetic?
> 
> 

There are two alternatives I considered: defining variables KEYOFF0 through
KEYOFF14 and writing the offsets as KEYOFF\i(KEY), or defining one variable
KEYOFF and writing the offsets as \i*16-KEYOFF(KEY).  I think I slightly prefer
the current patch where it's less abstracted out, though.  It makes it clear the
offsets really are single-byte, and also index 7 is the exact mid-point so going
from -7 to 7 still feels fairly natural.  If we wanted to do something more
complex like use different offsets for AVX vs. AVX512, then we'd need the
abstraction to handle that, but it doesn't seem useful to do that.

- Eric

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] crypto: x86/aes-xts - access round keys using single-byte offsets
  2024-04-09 12:11   ` Eric Biggers
@ 2024-04-09 12:44     ` Ard Biesheuvel
  0 siblings, 0 replies; 5+ messages in thread
From: Ard Biesheuvel @ 2024-04-09 12:44 UTC (permalink / raw)
  To: Eric Biggers
  Cc: linux-crypto, x86, linux-kernel, Chang S . Bae, Stefan Kanthak

On Tue, 9 Apr 2024 at 14:11, Eric Biggers <ebiggers@kernel.org> wrote:
>
> On Tue, Apr 09, 2024 at 11:12:11AM +0200, Ard Biesheuvel wrote:
> > On Tue, 9 Apr 2024 at 02:02, Eric Biggers <ebiggers@kernel.org> wrote:
> > >
> > > From: Eric Biggers <ebiggers@google.com>
> > >
> > > Access the AES round keys using offsets -7*16 through 7*16, instead of
> > > 0*16 through 14*16.  This allows VEX-encoded instructions to address all
> > > round keys using 1-byte offsets, whereas before some needed 4-byte
> > > offsets.  This decreases the code size of aes-xts-avx-x86_64.o by 4.2%.
> > >
> > > Signed-off-by: Eric Biggers <ebiggers@google.com>
> >
> > Nice optimization!
> >
> > Do you think we might be able to macrofy this a bit so we can use zero
> > based indexing for the round keys, and hide the arithmetic?
> >
> >
>
> There are two alternatives I considered: defining variables KEYOFF0 through
> KEYOFF14 and writing the offsets as KEYOFF\i(KEY), or defining one variable
> KEYOFF and writing the offsets as \i*16-KEYOFF(KEY).  I think I slightly prefer
> the current patch where it's less abstracted out, though.  It makes it clear the
> offsets really are single-byte, and also index 7 is the exact mid-point so going
> from -7 to 7 still feels fairly natural.  If we wanted to do something more
> complex like use different offsets for AVX vs. AVX512, then we'd need the
> abstraction to handle that, but it doesn't seem useful to do that.
>

Fair enough.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] crypto: x86/aes-xts - access round keys using single-byte offsets
  2024-04-09  0:01 [PATCH] crypto: x86/aes-xts - access round keys using single-byte offsets Eric Biggers
  2024-04-09  9:12 ` Ard Biesheuvel
@ 2024-04-19 10:59 ` Herbert Xu
  1 sibling, 0 replies; 5+ messages in thread
From: Herbert Xu @ 2024-04-19 10:59 UTC (permalink / raw)
  To: Eric Biggers
  Cc: linux-crypto, x86, linux-kernel, chang.seok.bae, stefan.kanthak

Eric Biggers <ebiggers@kernel.org> wrote:
> From: Eric Biggers <ebiggers@google.com>
> 
> Access the AES round keys using offsets -7*16 through 7*16, instead of
> 0*16 through 14*16.  This allows VEX-encoded instructions to address all
> round keys using 1-byte offsets, whereas before some needed 4-byte
> offsets.  This decreases the code size of aes-xts-avx-x86_64.o by 4.2%.
> 
> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
> arch/x86/crypto/aes-xts-avx-x86_64.S | 81 +++++++++++++++-------------
> 1 file changed, 44 insertions(+), 37 deletions(-)

Patch applied.  Thanks.
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-04-19 10:59 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-04-09  0:01 [PATCH] crypto: x86/aes-xts - access round keys using single-byte offsets Eric Biggers
2024-04-09  9:12 ` Ard Biesheuvel
2024-04-09 12:11   ` Eric Biggers
2024-04-09 12:44     ` Ard Biesheuvel
2024-04-19 10:59 ` Herbert Xu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox