From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, x86@kernel.org,
Ard Biesheuvel <ardb@kernel.org>,
"Jason A . Donenfeld" <Jason@zx2c4.com>
Subject: Re: [PATCH 1/8] crypto: x86/aes-gcm - add VAES+AVX2 optimized code
Date: Fri, 17 Oct 2025 18:34:26 +0000 [thread overview]
Message-ID: <20251017183426.GA315411@google.com> (raw)
In-Reply-To: <20251002023117.37504-2-ebiggers@kernel.org>
On Wed, Oct 01, 2025 at 07:31:10PM -0700, Eric Biggers wrote:
> Add an implementation of AES-GCM that uses 256-bit vectors and the
> following CPU features: Vector AES (VAES), Vector Carryless
> Multiplication (VPCLMULQDQ), and AVX2.
A few non-functional cleanups I applied after reading over the assembly
file again (wasn't worth resending the whole patchset):
diff --git a/arch/x86/crypto/aes-gcm-vaes-avx2.S b/arch/x86/crypto/aes-gcm-vaes-avx2.S
index e628dbb33c0e..f58096a37342 100644
--- a/arch/x86/crypto/aes-gcm-vaes-avx2.S
+++ b/arch/x86/crypto/aes-gcm-vaes-avx2.S
@@ -231,11 +231,10 @@ SYM_FUNC_START(aes_gcm_precompute_vaes_avx2)
.set TMP2, %ymm2
.set TMP2_XMM, %xmm2
.set H_CUR, %ymm3
.set H_CUR_XMM, %xmm3
.set H_CUR2, %ymm4
- .set H_CUR2_XMM, %xmm4
.set H_INC, %ymm5
.set H_INC_XMM, %xmm5
.set GFPOLY, %ymm6
.set GFPOLY_XMM, %xmm6
@@ -576,11 +575,10 @@ SYM_FUNC_START(aes_gcm_aad_update_vaes_avx2)
jz .Laad_done
cmp $16, AADLEN
jle .Laad_lastblock
-.Laad_last2blocks:
// Update GHASH with the remaining 17 <= AADLEN <= 31 bytes of AAD.
mov AADLEN, AADLEN // Zero-extend AADLEN to AADLEN64.
vmovdqu (AAD), TMP0_XMM
vmovdqu -16(AAD, AADLEN64), TMP1_XMM
vpshufb BSWAP_MASK_XMM, TMP0_XMM, TMP0_XMM
@@ -632,11 +630,11 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
vpxor RNDKEY0, AESDATA\i, AESDATA\i
.endr
.endm
// Generate and encrypt counter blocks in the given AESDATA vectors, excluding
-// the last AES round. Clobbers TMP0.
+// the last AES round. Clobbers %rax and TMP0.
.macro _aesenc_loop vecs:vararg
_ctr_begin \vecs
lea 16(KEY), %rax
.Laesenc_loop\@:
vbroadcasti128 (%rax), TMP0
@@ -687,11 +685,11 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
.set KEY, %rdi
.set LE_CTR_PTR, %rsi
.set LE_CTR_PTR32, %esi
.set GHASH_ACC_PTR, %rdx
.set SRC, %rcx // Assumed to be %rcx.
- // See .Ltail_xor_and_ghash_partial_vec
+ // See .Ltail_xor_and_ghash_1to16bytes
.set DST, %r8
.set DATALEN, %r9d
.set DATALEN64, %r9 // Zero-extend DATALEN before using!
// Additional local variables
@@ -734,11 +732,10 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
// H_POW[2-1]_XORED contain cached values from KEY->h_powers_xored. The
// descending numbering reflects the order of the key powers.
.set H_POW2_XORED, %ymm7
.set H_POW2_XORED_XMM, %xmm7
.set H_POW1_XORED, %ymm8
- .set H_POW1_XORED_XMM, %xmm8
// RNDKEY0 caches the zero-th round key, and RNDKEYLAST the last one.
.set RNDKEY0, %ymm9
.set RNDKEYLAST, %ymm10
@@ -749,13 +746,11 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
.set AESDATA0, %ymm12
.set AESDATA0_XMM, %xmm12
.set AESDATA1, %ymm13
.set AESDATA1_XMM, %xmm13
.set AESDATA2, %ymm14
- .set AESDATA2_XMM, %xmm14
.set AESDATA3, %ymm15
- .set AESDATA3_XMM, %xmm15
.if \enc
.set GHASHDATA_PTR, DST
.else
.set GHASHDATA_PTR, SRC
next prev parent reply other threads:[~2025-10-17 18:34 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-02 2:31 [PATCH 0/8] VAES+AVX2 optimized implementation of AES-GCM Eric Biggers
2025-10-02 2:31 ` [PATCH 1/8] crypto: x86/aes-gcm - add VAES+AVX2 optimized code Eric Biggers
2025-10-17 18:34 ` Eric Biggers [this message]
2025-10-02 2:31 ` [PATCH 2/8] crypto: x86/aes-gcm - remove VAES+AVX10/256 " Eric Biggers
2025-10-02 2:31 ` [PATCH 3/8] crypto: x86/aes-gcm - rename avx10 and avx10_512 to avx512 Eric Biggers
2025-10-02 2:31 ` [PATCH 4/8] crypto: x86/aes-gcm - clean up AVX512 code to assume 512-bit vectors Eric Biggers
2025-10-02 2:31 ` [PATCH 5/8] crypto: x86/aes-gcm - reorder AVX512 precompute and aad_update functions Eric Biggers
2025-10-02 2:31 ` [PATCH 6/8] crypto: x86/aes-gcm - revise some comments in AVX512 code Eric Biggers
2025-10-02 2:31 ` [PATCH 7/8] crypto: x86/aes-gcm - optimize AVX512 precomputation of H^2 from H^1 Eric Biggers
2025-10-02 2:31 ` [PATCH 8/8] crypto: x86/aes-gcm - optimize long AAD processing with AVX512 Eric Biggers
2025-10-10 18:21 ` [PATCH 0/8] VAES+AVX2 optimized implementation of AES-GCM Ard Biesheuvel
2025-10-14 0:31 ` Eric Biggers
2025-10-17 8:25 ` Herbert Xu
2025-10-17 8:44 ` Ard Biesheuvel
2025-10-17 16:04 ` Eric Biggers
2025-10-17 20:50 ` Eric Biggers
2025-10-20 4:13 ` Herbert Xu
2025-10-20 16:57 ` Eric Biggers
2025-10-21 3:00 ` Herbert Xu
2025-10-17 8:24 ` Herbert Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251017183426.GA315411@google.com \
--to=ebiggers@kernel.org \
--cc=Jason@zx2c4.com \
--cc=ardb@kernel.org \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.