From: Sasha Levin <sashal@kernel.org>
To: stable@vger.kernel.org
Cc: Eric Biggers <ebiggers@google.com>,
Ondrej Mosnacek <omosnace@redhat.com>,
Herbert Xu <herbert@gondor.apana.org.au>,
Sasha Levin <sashal@kernel.org>
Subject: [PATCH 6.12.y 2/4] crypto: x86/aegis128 - optimize length block preparation using SSE4.1
Date: Thu, 21 Aug 2025 23:06:15 -0400 [thread overview]
Message-ID: <20250822030617.1053172-2-sashal@kernel.org> (raw)
In-Reply-To: <20250822030617.1053172-1-sashal@kernel.org>
From: Eric Biggers <ebiggers@google.com>
[ Upstream commit af2aff7caf8afb7abbe219a838d61b4c17d88a47 ]
Start using SSE4.1 instructions in the AES-NI AEGIS code, with the first
use case being preparing the length block in fewer instructions.
In practice this does not reduce the set of CPUs on which the code can
run, because all Intel and AMD CPUs with AES-NI also have SSE4.1.
Upgrade the existing SSE2 feature check to SSE4.1, though it seems this
check is not strictly necessary; the aesni-intel module has been getting
away with using SSE4.1 despite checking for AES-NI only.
Reviewed-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Stable-dep-of: c7f49dadfcdf ("crypto: x86/aegis - Fix sleeping when disallowed on PREEMPT_RT")
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
arch/x86/crypto/Kconfig | 4 ++--
arch/x86/crypto/aegis128-aesni-asm.S | 6 ++----
arch/x86/crypto/aegis128-aesni-glue.c | 6 +++---
3 files changed, 7 insertions(+), 9 deletions(-)
diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig
index 46b53ab06165..0cf89264db08 100644
--- a/arch/x86/crypto/Kconfig
+++ b/arch/x86/crypto/Kconfig
@@ -366,7 +366,7 @@ config CRYPTO_CHACHA20_X86_64
- AVX-512VL (Advanced Vector Extensions-512VL)
config CRYPTO_AEGIS128_AESNI_SSE2
- tristate "AEAD ciphers: AEGIS-128 (AES-NI/SSE2)"
+ tristate "AEAD ciphers: AEGIS-128 (AES-NI/SSE4.1)"
depends on X86 && 64BIT
select CRYPTO_AEAD
select CRYPTO_SIMD
@@ -375,7 +375,7 @@ config CRYPTO_AEGIS128_AESNI_SSE2
Architecture: x86_64 using:
- AES-NI (AES New Instructions)
- - SSE2 (Streaming SIMD Extensions 2)
+ - SSE4.1 (Streaming SIMD Extensions 4.1)
config CRYPTO_NHPOLY1305_SSE2
tristate "Hash functions: NHPoly1305 (SSE2)"
diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S
index 1b57558548c7..639ba6f31a90 100644
--- a/arch/x86/crypto/aegis128-aesni-asm.S
+++ b/arch/x86/crypto/aegis128-aesni-asm.S
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * AES-NI + SSE2 implementation of AEGIS-128
+ * AES-NI + SSE4.1 implementation of AEGIS-128
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
@@ -716,9 +716,7 @@ SYM_FUNC_START(crypto_aegis128_aesni_final)
/* prepare length block: */
movd %edx, MSG
- movd %ecx, T0
- pslldq $8, T0
- pxor T0, MSG
+ pinsrd $2, %ecx, MSG
psllq $3, MSG /* multiply by 8 (to get bit count) */
pxor STATE3, MSG
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index 6c4c2cda2c2d..9b52451f6fee 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* The AEGIS-128 Authenticated-Encryption Algorithm
- * Glue for AES-NI + SSE2 implementation
+ * Glue for AES-NI + SSE4.1 implementation
*
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
@@ -265,7 +265,7 @@ static struct simd_aead_alg *simd_alg;
static int __init crypto_aegis128_aesni_module_init(void)
{
- if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+ if (!boot_cpu_has(X86_FEATURE_XMM4_1) ||
!boot_cpu_has(X86_FEATURE_AES) ||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
@@ -284,6 +284,6 @@ module_exit(crypto_aegis128_aesni_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm -- AESNI+SSE2 implementation");
+MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm -- AESNI+SSE4.1 implementation");
MODULE_ALIAS_CRYPTO("aegis128");
MODULE_ALIAS_CRYPTO("aegis128-aesni");
--
2.50.1
next prev parent reply other threads:[~2025-08-22 3:06 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-21 12:55 FAILED: patch "[PATCH] crypto: x86/aegis - Fix sleeping when disallowed on" failed to apply to 6.12-stable tree gregkh
2025-08-22 3:06 ` [PATCH 6.12.y 1/4] crypto: x86/aegis128 - eliminate some indirect calls Sasha Levin
2025-08-22 3:06 ` Sasha Levin [this message]
2025-08-22 3:06 ` [PATCH 6.12.y 3/4] crypto: x86/aegis128 - improve assembly function prototypes Sasha Levin
2025-08-22 3:06 ` [PATCH 6.12.y 4/4] crypto: x86/aegis - Fix sleeping when disallowed on PREEMPT_RT Sasha Levin
2025-08-22 3:23 ` Eric Biggers
2025-08-22 3:28 ` Sasha Levin
2025-08-22 3:39 ` Eric Biggers
2025-08-22 3:48 ` Eric Biggers
2025-08-22 3:56 ` Sasha Levin
2025-08-22 4:10 ` Eric Biggers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250822030617.1053172-2-sashal@kernel.org \
--to=sashal@kernel.org \
--cc=ebiggers@google.com \
--cc=herbert@gondor.apana.org.au \
--cc=omosnace@redhat.com \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.