Re: [PATCH 1/8] crypto: x86/aes-gcm - add VAES+AVX2 optimized code

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, x86@kernel.org,
	Ard Biesheuvel <ardb@kernel.org>,
	"Jason A . Donenfeld" <Jason@zx2c4.com>
Subject: Re: [PATCH 1/8] crypto: x86/aes-gcm - add VAES+AVX2 optimized code
Date: Fri, 17 Oct 2025 18:34:26 +0000	[thread overview]
Message-ID: <20251017183426.GA315411@google.com> (raw)
In-Reply-To: <20251002023117.37504-2-ebiggers@kernel.org>

On Wed, Oct 01, 2025 at 07:31:10PM -0700, Eric Biggers wrote:
> Add an implementation of AES-GCM that uses 256-bit vectors and the
> following CPU features: Vector AES (VAES), Vector Carryless
> Multiplication (VPCLMULQDQ), and AVX2.

A few non-functional cleanups I applied after reading over the assembly
file again (wasn't worth resending the whole patchset):

diff --git a/arch/x86/crypto/aes-gcm-vaes-avx2.S b/arch/x86/crypto/aes-gcm-vaes-avx2.S
index e628dbb33c0e..f58096a37342 100644
--- a/arch/x86/crypto/aes-gcm-vaes-avx2.S
+++ b/arch/x86/crypto/aes-gcm-vaes-avx2.S
@@ -231,11 +231,10 @@ SYM_FUNC_START(aes_gcm_precompute_vaes_avx2)
 	.set	TMP2,		%ymm2
 	.set	TMP2_XMM,	%xmm2
 	.set	H_CUR,		%ymm3
 	.set	H_CUR_XMM,	%xmm3
 	.set	H_CUR2,		%ymm4
-	.set	H_CUR2_XMM,	%xmm4
 	.set	H_INC,		%ymm5
 	.set	H_INC_XMM,	%xmm5
 	.set	GFPOLY,		%ymm6
 	.set	GFPOLY_XMM,	%xmm6
 
@@ -576,11 +575,10 @@ SYM_FUNC_START(aes_gcm_aad_update_vaes_avx2)
 
 	jz		.Laad_done
 	cmp		$16, AADLEN
 	jle		.Laad_lastblock
 
-.Laad_last2blocks:
 	// Update GHASH with the remaining 17 <= AADLEN <= 31 bytes of AAD.
 	mov		AADLEN, AADLEN	// Zero-extend AADLEN to AADLEN64.
 	vmovdqu		(AAD), TMP0_XMM
 	vmovdqu		-16(AAD, AADLEN64), TMP1_XMM
 	vpshufb		BSWAP_MASK_XMM, TMP0_XMM, TMP0_XMM
@@ -632,11 +630,11 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
 	vpxor		RNDKEY0, AESDATA\i, AESDATA\i
 .endr
 .endm
 
 // Generate and encrypt counter blocks in the given AESDATA vectors, excluding
-// the last AES round.  Clobbers TMP0.
+// the last AES round.  Clobbers %rax and TMP0.
 .macro	_aesenc_loop	vecs:vararg
 	_ctr_begin	\vecs
 	lea		16(KEY), %rax
 .Laesenc_loop\@:
 	vbroadcasti128	(%rax), TMP0
@@ -687,11 +685,11 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
 	.set	KEY,		%rdi
 	.set	LE_CTR_PTR,	%rsi
 	.set	LE_CTR_PTR32,	%esi
 	.set	GHASH_ACC_PTR,	%rdx
 	.set	SRC,		%rcx	// Assumed to be %rcx.
-					// See .Ltail_xor_and_ghash_partial_vec
+					// See .Ltail_xor_and_ghash_1to16bytes
 	.set	DST,		%r8
 	.set	DATALEN,	%r9d
 	.set	DATALEN64,	%r9	// Zero-extend DATALEN before using!
 
 	// Additional local variables
@@ -734,11 +732,10 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
 	// H_POW[2-1]_XORED contain cached values from KEY->h_powers_xored.  The
 	// descending numbering reflects the order of the key powers.
 	.set	H_POW2_XORED,	%ymm7
 	.set	H_POW2_XORED_XMM, %xmm7
 	.set	H_POW1_XORED,	%ymm8
-	.set	H_POW1_XORED_XMM, %xmm8
 
 	// RNDKEY0 caches the zero-th round key, and RNDKEYLAST the last one.
 	.set	RNDKEY0,	%ymm9
 	.set	RNDKEYLAST,	%ymm10
 
@@ -749,13 +746,11 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
 	.set	AESDATA0,	%ymm12
 	.set	AESDATA0_XMM,	%xmm12
 	.set	AESDATA1,	%ymm13
 	.set	AESDATA1_XMM,	%xmm13
 	.set	AESDATA2,	%ymm14
-	.set	AESDATA2_XMM,	%xmm14
 	.set	AESDATA3,	%ymm15
-	.set	AESDATA3_XMM,	%xmm15
 
 .if \enc
 	.set	GHASHDATA_PTR,	DST
 .else
 	.set	GHASHDATA_PTR,	SRC

next prev parent reply	other threads:[~2025-10-17 18:34 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-02  2:31 [PATCH 0/8] VAES+AVX2 optimized implementation of AES-GCM Eric Biggers
2025-10-02  2:31 ` [PATCH 1/8] crypto: x86/aes-gcm - add VAES+AVX2 optimized code Eric Biggers
2025-10-17 18:34   ` Eric Biggers [this message]
2025-10-02  2:31 ` [PATCH 2/8] crypto: x86/aes-gcm - remove VAES+AVX10/256 " Eric Biggers
2025-10-02  2:31 ` [PATCH 3/8] crypto: x86/aes-gcm - rename avx10 and avx10_512 to avx512 Eric Biggers
2025-10-02  2:31 ` [PATCH 4/8] crypto: x86/aes-gcm - clean up AVX512 code to assume 512-bit vectors Eric Biggers
2025-10-02  2:31 ` [PATCH 5/8] crypto: x86/aes-gcm - reorder AVX512 precompute and aad_update functions Eric Biggers
2025-10-02  2:31 ` [PATCH 6/8] crypto: x86/aes-gcm - revise some comments in AVX512 code Eric Biggers
2025-10-02  2:31 ` [PATCH 7/8] crypto: x86/aes-gcm - optimize AVX512 precomputation of H^2 from H^1 Eric Biggers
2025-10-02  2:31 ` [PATCH 8/8] crypto: x86/aes-gcm - optimize long AAD processing with AVX512 Eric Biggers
2025-10-10 18:21 ` [PATCH 0/8] VAES+AVX2 optimized implementation of AES-GCM Ard Biesheuvel
2025-10-14  0:31 ` Eric Biggers
2025-10-17  8:25   ` Herbert Xu
2025-10-17  8:44     ` Ard Biesheuvel
2025-10-17 16:04       ` Eric Biggers
2025-10-17 20:50         ` Eric Biggers
2025-10-20  4:13         ` Herbert Xu
2025-10-20 16:57           ` Eric Biggers
2025-10-21  3:00             ` Herbert Xu
2025-10-17  8:24 ` Herbert Xu

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:e628dbb33c0 dfblob:f58096a3734 )
 OR (
bs:"Re: [PATCH 1/8] crypto: x86/aes-gcm - add VAES+AVX2 optimized code" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251017183426.GA315411@google.com \
    --to=ebiggers@kernel.org \
    --cc=Jason@zx2c4.com \
    --cc=ardb@kernel.org \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.