Re: [PATCH 1/8] crypto: x86/aes-gcm - add VAES+AVX2 optimized code

linux-crypto.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, x86@kernel.org,
	Ard Biesheuvel <ardb@kernel.org>,
	"Jason A . Donenfeld" <Jason@zx2c4.com>
Subject: Re: [PATCH 1/8] crypto: x86/aes-gcm - add VAES+AVX2 optimized code
Date: Fri, 17 Oct 2025 18:34:26 +0000	[thread overview]
Message-ID: <20251017183426.GA315411@google.com> (raw)
In-Reply-To: <20251002023117.37504-2-ebiggers@kernel.org>

On Wed, Oct 01, 2025 at 07:31:10PM -0700, Eric Biggers wrote:
> Add an implementation of AES-GCM that uses 256-bit vectors and the
> following CPU features: Vector AES (VAES), Vector Carryless
> Multiplication (VPCLMULQDQ), and AVX2.

A few non-functional cleanups I applied after reading over the assembly
file again (wasn't worth resending the whole patchset):

diff --git a/arch/x86/crypto/aes-gcm-vaes-avx2.S b/arch/x86/crypto/aes-gcm-vaes-avx2.S
index e628dbb33c0e..f58096a37342 100644
--- a/arch/x86/crypto/aes-gcm-vaes-avx2.S
+++ b/arch/x86/crypto/aes-gcm-vaes-avx2.S
@@ -231,11 +231,10 @@ SYM_FUNC_START(aes_gcm_precompute_vaes_avx2)
 	.set	TMP2,		%ymm2
 	.set	TMP2_XMM,	%xmm2
 	.set	H_CUR,		%ymm3
 	.set	H_CUR_XMM,	%xmm3
 	.set	H_CUR2,		%ymm4
-	.set	H_CUR2_XMM,	%xmm4
 	.set	H_INC,		%ymm5
 	.set	H_INC_XMM,	%xmm5
 	.set	GFPOLY,		%ymm6
 	.set	GFPOLY_XMM,	%xmm6
 
@@ -576,11 +575,10 @@ SYM_FUNC_START(aes_gcm_aad_update_vaes_avx2)
 
 	jz		.Laad_done
 	cmp		$16, AADLEN
 	jle		.Laad_lastblock
 
-.Laad_last2blocks:
 	// Update GHASH with the remaining 17 <= AADLEN <= 31 bytes of AAD.
 	mov		AADLEN, AADLEN	// Zero-extend AADLEN to AADLEN64.
 	vmovdqu		(AAD), TMP0_XMM
 	vmovdqu		-16(AAD, AADLEN64), TMP1_XMM
 	vpshufb		BSWAP_MASK_XMM, TMP0_XMM, TMP0_XMM
@@ -632,11 +630,11 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
 	vpxor		RNDKEY0, AESDATA\i, AESDATA\i
 .endr
 .endm
 
 // Generate and encrypt counter blocks in the given AESDATA vectors, excluding
-// the last AES round.  Clobbers TMP0.
+// the last AES round.  Clobbers %rax and TMP0.
 .macro	_aesenc_loop	vecs:vararg
 	_ctr_begin	\vecs
 	lea		16(KEY), %rax
 .Laesenc_loop\@:
 	vbroadcasti128	(%rax), TMP0
@@ -687,11 +685,11 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
 	.set	KEY,		%rdi
 	.set	LE_CTR_PTR,	%rsi
 	.set	LE_CTR_PTR32,	%esi
 	.set	GHASH_ACC_PTR,	%rdx
 	.set	SRC,		%rcx	// Assumed to be %rcx.
-					// See .Ltail_xor_and_ghash_partial_vec
+					// See .Ltail_xor_and_ghash_1to16bytes
 	.set	DST,		%r8
 	.set	DATALEN,	%r9d
 	.set	DATALEN64,	%r9	// Zero-extend DATALEN before using!
 
 	// Additional local variables
@@ -734,11 +732,10 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
 	// H_POW[2-1]_XORED contain cached values from KEY->h_powers_xored.  The
 	// descending numbering reflects the order of the key powers.
 	.set	H_POW2_XORED,	%ymm7
 	.set	H_POW2_XORED_XMM, %xmm7
 	.set	H_POW1_XORED,	%ymm8
-	.set	H_POW1_XORED_XMM, %xmm8
 
 	// RNDKEY0 caches the zero-th round key, and RNDKEYLAST the last one.
 	.set	RNDKEY0,	%ymm9
 	.set	RNDKEYLAST,	%ymm10
 
@@ -749,13 +746,11 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
 	.set	AESDATA0,	%ymm12
 	.set	AESDATA0_XMM,	%xmm12
 	.set	AESDATA1,	%ymm13
 	.set	AESDATA1_XMM,	%xmm13
 	.set	AESDATA2,	%ymm14
-	.set	AESDATA2_XMM,	%xmm14
 	.set	AESDATA3,	%ymm15
-	.set	AESDATA3_XMM,	%xmm15
 
 .if \enc
 	.set	GHASHDATA_PTR,	DST
 .else
 	.set	GHASHDATA_PTR,	SRC

next prev parent reply	other threads:[~2025-10-17 18:34 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-02  2:31 [PATCH 0/8] VAES+AVX2 optimized implementation of AES-GCM Eric Biggers
2025-10-02  2:31 ` [PATCH 1/8] crypto: x86/aes-gcm - add VAES+AVX2 optimized code Eric Biggers
2025-10-17 18:34   ` Eric Biggers [this message]
2025-10-02  2:31 ` [PATCH 2/8] crypto: x86/aes-gcm - remove VAES+AVX10/256 " Eric Biggers
2025-10-02  2:31 ` [PATCH 3/8] crypto: x86/aes-gcm - rename avx10 and avx10_512 to avx512 Eric Biggers
2025-10-02  2:31 ` [PATCH 4/8] crypto: x86/aes-gcm - clean up AVX512 code to assume 512-bit vectors Eric Biggers
2025-10-02  2:31 ` [PATCH 5/8] crypto: x86/aes-gcm - reorder AVX512 precompute and aad_update functions Eric Biggers
2025-10-02  2:31 ` [PATCH 6/8] crypto: x86/aes-gcm - revise some comments in AVX512 code Eric Biggers
2025-10-02  2:31 ` [PATCH 7/8] crypto: x86/aes-gcm - optimize AVX512 precomputation of H^2 from H^1 Eric Biggers
2025-10-02  2:31 ` [PATCH 8/8] crypto: x86/aes-gcm - optimize long AAD processing with AVX512 Eric Biggers
2025-10-10 18:21 ` [PATCH 0/8] VAES+AVX2 optimized implementation of AES-GCM Ard Biesheuvel
2025-10-14  0:31 ` Eric Biggers
2025-10-17  8:25   ` Herbert Xu
2025-10-17  8:44     ` Ard Biesheuvel
2025-10-17 16:04       ` Eric Biggers
2025-10-17 20:50         ` Eric Biggers
2025-10-20  4:13         ` Herbert Xu
2025-10-20 16:57           ` Eric Biggers
2025-10-21  3:00             ` Herbert Xu
2025-10-17  8:24 ` Herbert Xu

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:e628dbb33c0 dfblob:f58096a3734 )
 OR (
bs:"Re: [PATCH 1/8] crypto: x86/aes-gcm - add VAES+AVX2 optimized code" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251017183426.GA315411@google.com \
    --to=ebiggers@kernel.org \
    --cc=Jason@zx2c4.com \
    --cc=ardb@kernel.org \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).