From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, x86@kernel.org,
Ard Biesheuvel <ardb@kernel.org>,
"Jason A . Donenfeld" <Jason@zx2c4.com>
Subject: Re: [PATCH 1/8] crypto: x86/aes-gcm - add VAES+AVX2 optimized code
Date: Fri, 17 Oct 2025 18:34:26 +0000 [thread overview]
Message-ID: <20251017183426.GA315411@google.com> (raw)
In-Reply-To: <20251002023117.37504-2-ebiggers@kernel.org>
On Wed, Oct 01, 2025 at 07:31:10PM -0700, Eric Biggers wrote:
> Add an implementation of AES-GCM that uses 256-bit vectors and the
> following CPU features: Vector AES (VAES), Vector Carryless
> Multiplication (VPCLMULQDQ), and AVX2.
A few non-functional cleanups I applied after reading over the assembly
file again (wasn't worth resending the whole patchset):
diff --git a/arch/x86/crypto/aes-gcm-vaes-avx2.S b/arch/x86/crypto/aes-gcm-vaes-avx2.S
index e628dbb33c0e..f58096a37342 100644
--- a/arch/x86/crypto/aes-gcm-vaes-avx2.S
+++ b/arch/x86/crypto/aes-gcm-vaes-avx2.S
@@ -231,11 +231,10 @@ SYM_FUNC_START(aes_gcm_precompute_vaes_avx2)
.set TMP2, %ymm2
.set TMP2_XMM, %xmm2
.set H_CUR, %ymm3
.set H_CUR_XMM, %xmm3
.set H_CUR2, %ymm4
- .set H_CUR2_XMM, %xmm4
.set H_INC, %ymm5
.set H_INC_XMM, %xmm5
.set GFPOLY, %ymm6
.set GFPOLY_XMM, %xmm6
@@ -576,11 +575,10 @@ SYM_FUNC_START(aes_gcm_aad_update_vaes_avx2)
jz .Laad_done
cmp $16, AADLEN
jle .Laad_lastblock
-.Laad_last2blocks:
// Update GHASH with the remaining 17 <= AADLEN <= 31 bytes of AAD.
mov AADLEN, AADLEN // Zero-extend AADLEN to AADLEN64.
vmovdqu (AAD), TMP0_XMM
vmovdqu -16(AAD, AADLEN64), TMP1_XMM
vpshufb BSWAP_MASK_XMM, TMP0_XMM, TMP0_XMM
@@ -632,11 +630,11 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
vpxor RNDKEY0, AESDATA\i, AESDATA\i
.endr
.endm
// Generate and encrypt counter blocks in the given AESDATA vectors, excluding
-// the last AES round. Clobbers TMP0.
+// the last AES round. Clobbers %rax and TMP0.
.macro _aesenc_loop vecs:vararg
_ctr_begin \vecs
lea 16(KEY), %rax
.Laesenc_loop\@:
vbroadcasti128 (%rax), TMP0
@@ -687,11 +685,11 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
.set KEY, %rdi
.set LE_CTR_PTR, %rsi
.set LE_CTR_PTR32, %esi
.set GHASH_ACC_PTR, %rdx
.set SRC, %rcx // Assumed to be %rcx.
- // See .Ltail_xor_and_ghash_partial_vec
+ // See .Ltail_xor_and_ghash_1to16bytes
.set DST, %r8
.set DATALEN, %r9d
.set DATALEN64, %r9 // Zero-extend DATALEN before using!
// Additional local variables
@@ -734,11 +732,10 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
// H_POW[2-1]_XORED contain cached values from KEY->h_powers_xored. The
// descending numbering reflects the order of the key powers.
.set H_POW2_XORED, %ymm7
.set H_POW2_XORED_XMM, %xmm7
.set H_POW1_XORED, %ymm8
- .set H_POW1_XORED_XMM, %xmm8
// RNDKEY0 caches the zero-th round key, and RNDKEYLAST the last one.
.set RNDKEY0, %ymm9
.set RNDKEYLAST, %ymm10
@@ -749,13 +746,11 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
.set AESDATA0, %ymm12
.set AESDATA0_XMM, %xmm12
.set AESDATA1, %ymm13
.set AESDATA1_XMM, %xmm13
.set AESDATA2, %ymm14
- .set AESDATA2_XMM, %xmm14
.set AESDATA3, %ymm15
- .set AESDATA3_XMM, %xmm15
.if \enc
.set GHASHDATA_PTR, DST
.else
.set GHASHDATA_PTR, SRC
next prev parent reply other threads:[~2025-10-17 18:34 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-02 2:31 [PATCH 0/8] VAES+AVX2 optimized implementation of AES-GCM Eric Biggers
2025-10-02 2:31 ` [PATCH 1/8] crypto: x86/aes-gcm - add VAES+AVX2 optimized code Eric Biggers
2025-10-17 18:34 ` Eric Biggers [this message]
2025-10-02 2:31 ` [PATCH 2/8] crypto: x86/aes-gcm - remove VAES+AVX10/256 " Eric Biggers
2025-10-02 2:31 ` [PATCH 3/8] crypto: x86/aes-gcm - rename avx10 and avx10_512 to avx512 Eric Biggers
2025-10-02 2:31 ` [PATCH 4/8] crypto: x86/aes-gcm - clean up AVX512 code to assume 512-bit vectors Eric Biggers
2025-10-02 2:31 ` [PATCH 5/8] crypto: x86/aes-gcm - reorder AVX512 precompute and aad_update functions Eric Biggers
2025-10-02 2:31 ` [PATCH 6/8] crypto: x86/aes-gcm - revise some comments in AVX512 code Eric Biggers
2025-10-02 2:31 ` [PATCH 7/8] crypto: x86/aes-gcm - optimize AVX512 precomputation of H^2 from H^1 Eric Biggers
2025-10-02 2:31 ` [PATCH 8/8] crypto: x86/aes-gcm - optimize long AAD processing with AVX512 Eric Biggers
2025-10-10 18:21 ` [PATCH 0/8] VAES+AVX2 optimized implementation of AES-GCM Ard Biesheuvel
2025-10-14 0:31 ` Eric Biggers
2025-10-17 8:25 ` Herbert Xu
2025-10-17 8:44 ` Ard Biesheuvel
2025-10-17 16:04 ` Eric Biggers
2025-10-17 20:50 ` Eric Biggers
2025-10-20 4:13 ` Herbert Xu
2025-10-20 16:57 ` Eric Biggers
2025-10-21 3:00 ` Herbert Xu
2025-10-17 8:24 ` Herbert Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251017183426.GA315411@google.com \
--to=ebiggers@kernel.org \
--cc=Jason@zx2c4.com \
--cc=ardb@kernel.org \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).