* [PATCH 1/9] lib/crypto: arm64/aes: Remove obsolete chunking logic
2026-04-01 0:05 [PATCH 0/9] lib/crypto: arm64: Remove obsolete chunking logic Eric Biggers
@ 2026-04-01 0:05 ` Eric Biggers
2026-04-01 0:05 ` [PATCH 2/9] lib/crypto: arm64/chacha: " Eric Biggers
` (9 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Eric Biggers @ 2026-04-01 0:05 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, Ard Biesheuvel, Jason A . Donenfeld, Herbert Xu,
linux-arm-kernel, Eric Biggers
Since commit aefbab8e77eb ("arm64: fpsimd: Preserve/restore kernel mode
NEON at context switch"), kernel-mode NEON sections have been
preemptible on arm64. And since commit 7dadeaa6e851 ("sched: Further
restrict the preemption modes"), voluntary preemption is no longer
supported on arm64 either. Therefore, there's no longer any need to
limit the length of kernel-mode NEON sections on arm64.
Simplify the AES-CBC-MAC code accordingly.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
arch/arm64/crypto/aes-ce-ccm-glue.c | 13 ++++-------
include/crypto/aes.h | 6 ++---
lib/crypto/arm64/aes-modes.S | 8 +++----
lib/crypto/arm64/aes.h | 35 +++++++++++------------------
4 files changed, 23 insertions(+), 39 deletions(-)
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index 45aed0073283..a304375ce724 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -99,20 +99,15 @@ static u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
do {
u32 blocks = abytes / AES_BLOCK_SIZE;
if (macp == AES_BLOCK_SIZE || (!macp && blocks > 0)) {
- u32 rem = ce_aes_mac_update(in, rk, rounds, blocks, mac,
- macp, enc_after);
- u32 adv = (blocks - rem) * AES_BLOCK_SIZE;
-
+ ce_aes_mac_update(in, rk, rounds, blocks, mac, macp,
+ enc_after);
macp = enc_after ? 0 : AES_BLOCK_SIZE;
- in += adv;
- abytes -= adv;
-
- if (unlikely(rem))
- macp = 0;
+ in += blocks * AES_BLOCK_SIZE;
+ abytes -= blocks * AES_BLOCK_SIZE;
} else {
u32 l = min(AES_BLOCK_SIZE - macp, abytes);
crypto_xor(&mac[macp], in, l);
in += l;
diff --git a/include/crypto/aes.h b/include/crypto/aes.h
index 3feb4105c2a2..16fbfd93e2bd 100644
--- a/include/crypto/aes.h
+++ b/include/crypto/aes.h
@@ -228,13 +228,13 @@ asmlinkage void ce_aes_essiv_cbc_encrypt(u8 out[], u8 const in[],
u32 const rk1[], int rounds,
int blocks, u8 iv[], u32 const rk2[]);
asmlinkage void ce_aes_essiv_cbc_decrypt(u8 out[], u8 const in[],
u32 const rk1[], int rounds,
int blocks, u8 iv[], u32 const rk2[]);
-asmlinkage size_t ce_aes_mac_update(u8 const in[], u32 const rk[], int rounds,
- size_t blocks, u8 dg[], int enc_before,
- int enc_after);
+asmlinkage void ce_aes_mac_update(u8 const in[], u32 const rk[], int rounds,
+ size_t blocks, u8 dg[], int enc_before,
+ int enc_after);
#elif defined(CONFIG_PPC)
void ppc_expand_key_128(u32 *key_enc, const u8 *key);
void ppc_expand_key_192(u32 *key_enc, const u8 *key);
void ppc_expand_key_256(u32 *key_enc, const u8 *key);
void ppc_generate_decrypt_key(u32 *key_dec, u32 *key_enc, unsigned int key_len);
diff --git a/lib/crypto/arm64/aes-modes.S b/lib/crypto/arm64/aes-modes.S
index fc89cd02b642..10e537317eaf 100644
--- a/lib/crypto/arm64/aes-modes.S
+++ b/lib/crypto/arm64/aes-modes.S
@@ -815,13 +815,13 @@ AES_FUNC_START(aes_xts_decrypt)
b .Lxtsdecctsout
AES_FUNC_END(aes_xts_decrypt)
#if IS_ENABLED(CONFIG_CRYPTO_LIB_AES_CBC_MACS)
/*
- * size_t aes_mac_update(u8 const in[], u32 const rk[], int rounds,
- * size_t blocks, u8 dg[], int enc_before,
- * int enc_after);
+ * void aes_mac_update(u8 const in[], u32 const rk[], int rounds,
+ * size_t blocks, u8 dg[], int enc_before,
+ * int enc_after);
*/
AES_FUNC_START(aes_mac_update)
ld1 {v0.16b}, [x4] /* get dg */
enc_prepare w2, x1, x7
cbz w5, .Lmacloop4x
@@ -842,11 +842,10 @@ AES_FUNC_START(aes_mac_update)
cmp x3, xzr
csinv w5, w6, wzr, eq
cbz w5, .Lmacout
encrypt_block v0, w2, x1, x7, w8
st1 {v0.16b}, [x4] /* return dg */
- cond_yield .Lmacout, x7, x8
b .Lmacloop4x
.Lmac1x:
add x3, x3, #4
.Lmacloop:
cbz x3, .Lmacout
@@ -861,9 +860,8 @@ AES_FUNC_START(aes_mac_update)
encrypt_block v0, w2, x1, x7, w8
b .Lmacloop
.Lmacout:
st1 {v0.16b}, [x4] /* return dg */
- mov x0, x3
ret
AES_FUNC_END(aes_mac_update)
#endif /* CONFIG_CRYPTO_LIB_AES_CBC_MACS */
diff --git a/lib/crypto/arm64/aes.h b/lib/crypto/arm64/aes.h
index 135d3324a30a..9e9e45a6f787 100644
--- a/lib/crypto/arm64/aes.h
+++ b/lib/crypto/arm64/aes.h
@@ -27,13 +27,13 @@ asmlinkage void __aes_ce_encrypt(const u32 rk[], u8 out[AES_BLOCK_SIZE],
asmlinkage void __aes_ce_decrypt(const u32 inv_rk[], u8 out[AES_BLOCK_SIZE],
const u8 in[AES_BLOCK_SIZE], int rounds);
asmlinkage u32 __aes_ce_sub(u32 l);
asmlinkage void __aes_ce_invert(struct aes_block *out,
const struct aes_block *in);
-asmlinkage size_t neon_aes_mac_update(u8 const in[], u32 const rk[], int rounds,
- size_t blocks, u8 dg[], int enc_before,
- int enc_after);
+asmlinkage void neon_aes_mac_update(u8 const in[], u32 const rk[], int rounds,
+ size_t blocks, u8 dg[], int enc_before,
+ int enc_after);
/*
* Expand an AES key using the crypto extensions if supported and usable or
* generic code otherwise. The expanded key format is compatible between the
* two cases. The outputs are @rndkeys (required) and @inv_rndkeys (optional).
@@ -190,29 +190,20 @@ static bool aes_cbcmac_blocks_arch(u8 h[AES_BLOCK_SIZE],
const struct aes_enckey *key, const u8 *data,
size_t nblocks, bool enc_before,
bool enc_after)
{
if (static_branch_likely(&have_neon) && likely(may_use_simd())) {
- do {
- size_t rem;
-
- scoped_ksimd() {
- if (static_branch_likely(&have_aes))
- rem = ce_aes_mac_update(
- data, key->k.rndkeys,
- key->nrounds, nblocks, h,
- enc_before, enc_after);
- else
- rem = neon_aes_mac_update(
- data, key->k.rndkeys,
- key->nrounds, nblocks, h,
- enc_before, enc_after);
- }
- data += (nblocks - rem) * AES_BLOCK_SIZE;
- nblocks = rem;
- enc_before = false;
- } while (nblocks);
+ scoped_ksimd() {
+ if (static_branch_likely(&have_aes))
+ ce_aes_mac_update(data, key->k.rndkeys,
+ key->nrounds, nblocks, h,
+ enc_before, enc_after);
+ else
+ neon_aes_mac_update(data, key->k.rndkeys,
+ key->nrounds, nblocks, h,
+ enc_before, enc_after);
+ }
return true;
}
return false;
}
#endif /* CONFIG_CRYPTO_LIB_AES_CBC_MACS */
--
2.53.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 2/9] lib/crypto: arm64/chacha: Remove obsolete chunking logic
2026-04-01 0:05 [PATCH 0/9] lib/crypto: arm64: Remove obsolete chunking logic Eric Biggers
2026-04-01 0:05 ` [PATCH 1/9] lib/crypto: arm64/aes: " Eric Biggers
@ 2026-04-01 0:05 ` Eric Biggers
2026-04-01 0:05 ` [PATCH 3/9] lib/crypto: arm64/gf128hash: " Eric Biggers
` (8 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Eric Biggers @ 2026-04-01 0:05 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, Ard Biesheuvel, Jason A . Donenfeld, Herbert Xu,
linux-arm-kernel, Eric Biggers
Since commit aefbab8e77eb ("arm64: fpsimd: Preserve/restore kernel mode
NEON at context switch"), kernel-mode NEON sections have been
preemptible on arm64. And since commit 7dadeaa6e851 ("sched: Further
restrict the preemption modes"), voluntary preemption is no longer
supported on arm64 either. Therefore, there's no longer any need to
limit the length of kernel-mode NEON sections on arm64.
Simplify the ChaCha code accordingly.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
lib/crypto/arm64/chacha.h | 16 ++++------------
1 file changed, 4 insertions(+), 12 deletions(-)
diff --git a/lib/crypto/arm64/chacha.h b/lib/crypto/arm64/chacha.h
index ca8c6a8b0578..c6f8ddf98e2d 100644
--- a/lib/crypto/arm64/chacha.h
+++ b/lib/crypto/arm64/chacha.h
@@ -34,13 +34,13 @@ asmlinkage void hchacha_block_neon(const struct chacha_state *state,
u32 out[HCHACHA_OUT_WORDS], int nrounds);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src,
- int bytes, int nrounds)
+ unsigned int bytes, int nrounds)
{
- while (bytes > 0) {
+ while (bytes) {
int l = min(bytes, CHACHA_BLOCK_SIZE * 5);
if (l <= CHACHA_BLOCK_SIZE) {
u8 buf[CHACHA_BLOCK_SIZE];
@@ -74,20 +74,12 @@ static void chacha_crypt_arch(struct chacha_state *state, u8 *dst,
{
if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE ||
!crypto_simd_usable())
return chacha_crypt_generic(state, dst, src, bytes, nrounds);
- do {
- unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
-
- scoped_ksimd()
- chacha_doneon(state, dst, src, todo, nrounds);
-
- bytes -= todo;
- src += todo;
- dst += todo;
- } while (bytes);
+ scoped_ksimd()
+ chacha_doneon(state, dst, src, bytes, nrounds);
}
#define chacha_mod_init_arch chacha_mod_init_arch
static void chacha_mod_init_arch(void)
{
--
2.53.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 3/9] lib/crypto: arm64/gf128hash: Remove obsolete chunking logic
2026-04-01 0:05 [PATCH 0/9] lib/crypto: arm64: Remove obsolete chunking logic Eric Biggers
2026-04-01 0:05 ` [PATCH 1/9] lib/crypto: arm64/aes: " Eric Biggers
2026-04-01 0:05 ` [PATCH 2/9] lib/crypto: arm64/chacha: " Eric Biggers
@ 2026-04-01 0:05 ` Eric Biggers
2026-04-01 0:05 ` [PATCH 4/9] lib/crypto: arm64/poly1305: " Eric Biggers
` (7 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Eric Biggers @ 2026-04-01 0:05 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, Ard Biesheuvel, Jason A . Donenfeld, Herbert Xu,
linux-arm-kernel, Eric Biggers
Since commit aefbab8e77eb ("arm64: fpsimd: Preserve/restore kernel mode
NEON at context switch"), kernel-mode NEON sections have been
preemptible on arm64. And since commit 7dadeaa6e851 ("sched: Further
restrict the preemption modes"), voluntary preemption is no longer
supported on arm64 either. Therefore, there's no longer any need to
limit the length of kernel-mode NEON sections on arm64.
Simplify the GHASH and POLYVAL code accordingly.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
lib/crypto/arm64/gf128hash.h | 24 ++++--------------------
1 file changed, 4 insertions(+), 20 deletions(-)
diff --git a/lib/crypto/arm64/gf128hash.h b/lib/crypto/arm64/gf128hash.h
index b2c85585b758..1d1179f87f8d 100644
--- a/lib/crypto/arm64/gf128hash.h
+++ b/lib/crypto/arm64/gf128hash.h
@@ -87,20 +87,12 @@ static void polyval_mul_arch(struct polyval_elem *acc,
static void ghash_blocks_arch(struct polyval_elem *acc,
const struct ghash_key *key,
const u8 *data, size_t nblocks)
{
if (static_branch_likely(&have_asimd) && may_use_simd()) {
- do {
- /* Allow rescheduling every 4 KiB. */
- size_t n = min_t(size_t, nblocks,
- 4096 / GHASH_BLOCK_SIZE);
-
- scoped_ksimd()
- pmull_ghash_update_p8(n, acc, data, &key->h);
- data += n * GHASH_BLOCK_SIZE;
- nblocks -= n;
- } while (nblocks);
+ scoped_ksimd()
+ pmull_ghash_update_p8(nblocks, acc, data, &key->h);
} else {
ghash_blocks_generic(acc, &key->h, data, nblocks);
}
}
@@ -108,20 +100,12 @@ static void ghash_blocks_arch(struct polyval_elem *acc,
static void polyval_blocks_arch(struct polyval_elem *acc,
const struct polyval_key *key,
const u8 *data, size_t nblocks)
{
if (static_branch_likely(&have_pmull) && may_use_simd()) {
- do {
- /* Allow rescheduling every 4 KiB. */
- size_t n = min_t(size_t, nblocks,
- 4096 / POLYVAL_BLOCK_SIZE);
-
- scoped_ksimd()
- polyval_blocks_pmull(acc, key, data, n);
- data += n * POLYVAL_BLOCK_SIZE;
- nblocks -= n;
- } while (nblocks);
+ scoped_ksimd()
+ polyval_blocks_pmull(acc, key, data, nblocks);
} else {
polyval_blocks_generic(acc, &key->h_powers[NUM_H_POWERS - 1],
data, nblocks);
}
}
--
2.53.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 4/9] lib/crypto: arm64/poly1305: Remove obsolete chunking logic
2026-04-01 0:05 [PATCH 0/9] lib/crypto: arm64: Remove obsolete chunking logic Eric Biggers
` (2 preceding siblings ...)
2026-04-01 0:05 ` [PATCH 3/9] lib/crypto: arm64/gf128hash: " Eric Biggers
@ 2026-04-01 0:05 ` Eric Biggers
2026-04-01 0:05 ` [PATCH 5/9] lib/crypto: arm64/sha1: " Eric Biggers
` (6 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Eric Biggers @ 2026-04-01 0:05 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, Ard Biesheuvel, Jason A . Donenfeld, Herbert Xu,
linux-arm-kernel, Eric Biggers
Since commit aefbab8e77eb ("arm64: fpsimd: Preserve/restore kernel mode
NEON at context switch"), kernel-mode NEON sections have been
preemptible on arm64. And since commit 7dadeaa6e851 ("sched: Further
restrict the preemption modes"), voluntary preemption is no longer
supported on arm64 either. Therefore, there's no longer any need to
limit the length of kernel-mode NEON sections on arm64.
Simplify the Poly1305 code accordingly.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
lib/crypto/arm64/poly1305.h | 14 ++++----------
1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/lib/crypto/arm64/poly1305.h b/lib/crypto/arm64/poly1305.h
index b77669767cd6..3d4bde857699 100644
--- a/lib/crypto/arm64/poly1305.h
+++ b/lib/crypto/arm64/poly1305.h
@@ -25,21 +25,15 @@ static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src,
unsigned int len, u32 padbit)
{
if (static_branch_likely(&have_neon) && likely(may_use_simd())) {
- do {
- unsigned int todo = min_t(unsigned int, len, SZ_4K);
-
- scoped_ksimd()
- poly1305_blocks_neon(state, src, todo, padbit);
-
- len -= todo;
- src += todo;
- } while (len);
- } else
+ scoped_ksimd()
+ poly1305_blocks_neon(state, src, len, padbit);
+ } else {
poly1305_blocks_arm64(state, src, len, padbit);
+ }
}
#define poly1305_mod_init_arch poly1305_mod_init_arch
static void poly1305_mod_init_arch(void)
{
--
2.53.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 5/9] lib/crypto: arm64/sha1: Remove obsolete chunking logic
2026-04-01 0:05 [PATCH 0/9] lib/crypto: arm64: Remove obsolete chunking logic Eric Biggers
` (3 preceding siblings ...)
2026-04-01 0:05 ` [PATCH 4/9] lib/crypto: arm64/poly1305: " Eric Biggers
@ 2026-04-01 0:05 ` Eric Biggers
2026-04-01 0:05 ` [PATCH 6/9] lib/crypto: arm64/sha256: " Eric Biggers
` (5 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Eric Biggers @ 2026-04-01 0:05 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, Ard Biesheuvel, Jason A . Donenfeld, Herbert Xu,
linux-arm-kernel, Eric Biggers
Since commit aefbab8e77eb ("arm64: fpsimd: Preserve/restore kernel mode
NEON at context switch"), kernel-mode NEON sections have been
preemptible on arm64. And since commit 7dadeaa6e851 ("sched: Further
restrict the preemption modes"), voluntary preemption is no longer
supported on arm64 either. Therefore, there's no longer any need to
limit the length of kernel-mode NEON sections on arm64.
Simplify the SHA-1 code accordingly.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
lib/crypto/arm64/sha1-ce-core.S | 14 +++++---------
lib/crypto/arm64/sha1.h | 15 ++++-----------
2 files changed, 9 insertions(+), 20 deletions(-)
diff --git a/lib/crypto/arm64/sha1-ce-core.S b/lib/crypto/arm64/sha1-ce-core.S
index 8fbd4767f0f0..59d27fda0714 100644
--- a/lib/crypto/arm64/sha1-ce-core.S
+++ b/lib/crypto/arm64/sha1-ce-core.S
@@ -60,14 +60,14 @@
movk \tmp, :abs_g1:\val
dup \k, \tmp
.endm
/*
- * size_t __sha1_ce_transform(struct sha1_block_state *state,
- * const u8 *data, size_t nblocks);
+ * void sha1_ce_transform(struct sha1_block_state *state,
+ * const u8 *data, size_t nblocks);
*/
-SYM_FUNC_START(__sha1_ce_transform)
+SYM_FUNC_START(sha1_ce_transform)
/* load round constants */
loadrc k0.4s, 0x5a827999, w6
loadrc k1.4s, 0x6ed9eba1, w6
loadrc k2.4s, 0x8f1bbcdc, w6
loadrc k3.4s, 0xca62c1d6, w6
@@ -114,17 +114,13 @@ CPU_LE( rev32 v11.16b, v11.16b )
/* update state */
add dgbv.2s, dgbv.2s, dg1v.2s
add dgav.4s, dgav.4s, dg0v.4s
- /* return early if voluntary preemption is needed */
- cond_yield 1f, x5, x6
-
/* handled all input blocks? */
cbnz x2, 0b
/* store new state */
-1: st1 {dgav.4s}, [x0]
+ st1 {dgav.4s}, [x0]
str dgb, [x0, #16]
- mov x0, x2
ret
-SYM_FUNC_END(__sha1_ce_transform)
+SYM_FUNC_END(sha1_ce_transform)
diff --git a/lib/crypto/arm64/sha1.h b/lib/crypto/arm64/sha1.h
index bc7071f1be09..112c5d443c56 100644
--- a/lib/crypto/arm64/sha1.h
+++ b/lib/crypto/arm64/sha1.h
@@ -7,26 +7,19 @@
#include <asm/simd.h>
#include <linux/cpufeature.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce);
-asmlinkage size_t __sha1_ce_transform(struct sha1_block_state *state,
- const u8 *data, size_t nblocks);
+asmlinkage void sha1_ce_transform(struct sha1_block_state *state,
+ const u8 *data, size_t nblocks);
static void sha1_blocks(struct sha1_block_state *state,
const u8 *data, size_t nblocks)
{
if (static_branch_likely(&have_ce) && likely(may_use_simd())) {
- do {
- size_t rem;
-
- scoped_ksimd()
- rem = __sha1_ce_transform(state, data, nblocks);
-
- data += (nblocks - rem) * SHA1_BLOCK_SIZE;
- nblocks = rem;
- } while (nblocks);
+ scoped_ksimd()
+ sha1_ce_transform(state, data, nblocks);
} else {
sha1_blocks_generic(state, data, nblocks);
}
}
--
2.53.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 6/9] lib/crypto: arm64/sha256: Remove obsolete chunking logic
2026-04-01 0:05 [PATCH 0/9] lib/crypto: arm64: Remove obsolete chunking logic Eric Biggers
` (4 preceding siblings ...)
2026-04-01 0:05 ` [PATCH 5/9] lib/crypto: arm64/sha1: " Eric Biggers
@ 2026-04-01 0:05 ` Eric Biggers
2026-04-01 0:05 ` [PATCH 7/9] lib/crypto: arm64/sha512: " Eric Biggers
` (4 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Eric Biggers @ 2026-04-01 0:05 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, Ard Biesheuvel, Jason A . Donenfeld, Herbert Xu,
linux-arm-kernel, Eric Biggers
Since commit aefbab8e77eb ("arm64: fpsimd: Preserve/restore kernel mode
NEON at context switch"), kernel-mode NEON sections have been
preemptible on arm64. And since commit 7dadeaa6e851 ("sched: Further
restrict the preemption modes"), voluntary preemption is no longer
supported on arm64 either. Therefore, there's no longer any need to
limit the length of kernel-mode NEON sections on arm64.
Simplify the SHA-256 code accordingly.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
lib/crypto/arm64/sha256-ce.S | 14 +++++---------
lib/crypto/arm64/sha256.h | 29 ++++++++---------------------
2 files changed, 13 insertions(+), 30 deletions(-)
diff --git a/lib/crypto/arm64/sha256-ce.S b/lib/crypto/arm64/sha256-ce.S
index e4bfe42a61a9..b54ad977afa3 100644
--- a/lib/crypto/arm64/sha256-ce.S
+++ b/lib/crypto/arm64/sha256-ce.S
@@ -77,15 +77,15 @@
ld1 { v8.4s-v11.4s}, [\tmp], #64
ld1 {v12.4s-v15.4s}, [\tmp]
.endm
/*
- * size_t __sha256_ce_transform(struct sha256_block_state *state,
- * const u8 *data, size_t nblocks);
+ * void sha256_ce_transform(struct sha256_block_state *state,
+ * const u8 *data, size_t nblocks);
*/
.text
-SYM_FUNC_START(__sha256_ce_transform)
+SYM_FUNC_START(sha256_ce_transform)
load_round_constants x8
/* load state */
ld1 {dgav.4s, dgbv.4s}, [x0]
@@ -125,21 +125,17 @@ CPU_LE( rev32 v19.16b, v19.16b )
/* update state */
add dgav.4s, dgav.4s, dg0v.4s
add dgbv.4s, dgbv.4s, dg1v.4s
- /* return early if voluntary preemption is needed */
- cond_yield 1f, x5, x6
-
/* handled all input blocks? */
cbnz x2, 0b
/* store new state */
-1: st1 {dgav.4s, dgbv.4s}, [x0]
- mov x0, x2
+ st1 {dgav.4s, dgbv.4s}, [x0]
ret
-SYM_FUNC_END(__sha256_ce_transform)
+SYM_FUNC_END(sha256_ce_transform)
.unreq dga
.unreq dgav
.unreq dgb
.unreq dgbv
diff --git a/lib/crypto/arm64/sha256.h b/lib/crypto/arm64/sha256.h
index 1fad3d7baa9a..b4353d3c4dd0 100644
--- a/lib/crypto/arm64/sha256.h
+++ b/lib/crypto/arm64/sha256.h
@@ -12,30 +12,21 @@ static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce);
asmlinkage void sha256_block_data_order(struct sha256_block_state *state,
const u8 *data, size_t nblocks);
asmlinkage void sha256_block_neon(struct sha256_block_state *state,
const u8 *data, size_t nblocks);
-asmlinkage size_t __sha256_ce_transform(struct sha256_block_state *state,
- const u8 *data, size_t nblocks);
+asmlinkage void sha256_ce_transform(struct sha256_block_state *state,
+ const u8 *data, size_t nblocks);
static void sha256_blocks(struct sha256_block_state *state,
const u8 *data, size_t nblocks)
{
if (static_branch_likely(&have_neon) && likely(may_use_simd())) {
- if (static_branch_likely(&have_ce)) {
- do {
- size_t rem;
-
- scoped_ksimd()
- rem = __sha256_ce_transform(state, data,
- nblocks);
-
- data += (nblocks - rem) * SHA256_BLOCK_SIZE;
- nblocks = rem;
- } while (nblocks);
- } else {
- scoped_ksimd()
+ scoped_ksimd() {
+ if (static_branch_likely(&have_ce))
+ sha256_ce_transform(state, data, nblocks);
+ else
sha256_block_neon(state, data, nblocks);
}
} else {
sha256_block_data_order(state, data, nblocks);
}
@@ -53,17 +44,13 @@ asmlinkage void sha256_ce_finup2x(const struct __sha256_ctx *ctx,
static bool sha256_finup_2x_arch(const struct __sha256_ctx *ctx,
const u8 *data1, const u8 *data2, size_t len,
u8 out1[SHA256_DIGEST_SIZE],
u8 out2[SHA256_DIGEST_SIZE])
{
- /*
- * The assembly requires len >= SHA256_BLOCK_SIZE && len <= INT_MAX.
- * Further limit len to 65536 to avoid spending too long with preemption
- * disabled. (Of course, in practice len is nearly always 4096 anyway.)
- */
+ /* The assembly requires len >= SHA256_BLOCK_SIZE && len <= INT_MAX. */
if (static_branch_likely(&have_ce) && len >= SHA256_BLOCK_SIZE &&
- len <= 65536 && likely(may_use_simd())) {
+ len <= INT_MAX && likely(may_use_simd())) {
scoped_ksimd()
sha256_ce_finup2x(ctx, data1, data2, len, out1, out2);
kmsan_unpoison_memory(out1, SHA256_DIGEST_SIZE);
kmsan_unpoison_memory(out2, SHA256_DIGEST_SIZE);
return true;
--
2.53.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 7/9] lib/crypto: arm64/sha512: Remove obsolete chunking logic
2026-04-01 0:05 [PATCH 0/9] lib/crypto: arm64: Remove obsolete chunking logic Eric Biggers
` (5 preceding siblings ...)
2026-04-01 0:05 ` [PATCH 6/9] lib/crypto: arm64/sha256: " Eric Biggers
@ 2026-04-01 0:05 ` Eric Biggers
2026-04-01 0:05 ` [PATCH 8/9] lib/crypto: arm64/sha3: " Eric Biggers
` (3 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Eric Biggers @ 2026-04-01 0:05 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, Ard Biesheuvel, Jason A . Donenfeld, Herbert Xu,
linux-arm-kernel, Eric Biggers
Since commit aefbab8e77eb ("arm64: fpsimd: Preserve/restore kernel mode
NEON at context switch"), kernel-mode NEON sections have been
preemptible on arm64. And since commit 7dadeaa6e851 ("sched: Further
restrict the preemption modes"), voluntary preemption is no longer
supported on arm64 either. Therefore, there's no longer any need to
limit the length of kernel-mode NEON sections on arm64.
Simplify the SHA-512 code accordingly.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
lib/crypto/arm64/sha512-ce-core.S | 12 +++++-------
lib/crypto/arm64/sha512.h | 15 ++++-----------
2 files changed, 9 insertions(+), 18 deletions(-)
diff --git a/lib/crypto/arm64/sha512-ce-core.S b/lib/crypto/arm64/sha512-ce-core.S
index ffd51acfd1ee..26834921e8d6 100644
--- a/lib/crypto/arm64/sha512-ce-core.S
+++ b/lib/crypto/arm64/sha512-ce-core.S
@@ -91,15 +91,15 @@
add v\i4\().2d, v\i1\().2d, v\i3\().2d
sha512h2 q\i3, q\i1, v\i0\().2d
.endm
/*
- * size_t __sha512_ce_transform(struct sha512_block_state *state,
- * const u8 *data, size_t nblocks);
+ * void sha512_ce_transform(struct sha512_block_state *state,
+ * const u8 *data, size_t nblocks);
*/
.text
-SYM_FUNC_START(__sha512_ce_transform)
+SYM_FUNC_START(sha512_ce_transform)
/* load state */
ld1 {v8.2d-v11.2d}, [x0]
/* load first 4 round constants */
adr_l x3, .Lsha512_rcon
@@ -184,14 +184,12 @@ CPU_LE( rev64 v19.16b, v19.16b )
add v8.2d, v8.2d, v0.2d
add v9.2d, v9.2d, v1.2d
add v10.2d, v10.2d, v2.2d
add v11.2d, v11.2d, v3.2d
- cond_yield 3f, x4, x5
/* handled all input blocks? */
cbnz x2, 0b
/* store new state */
-3: st1 {v8.2d-v11.2d}, [x0]
- mov x0, x2
+ st1 {v8.2d-v11.2d}, [x0]
ret
-SYM_FUNC_END(__sha512_ce_transform)
+SYM_FUNC_END(sha512_ce_transform)
diff --git a/lib/crypto/arm64/sha512.h b/lib/crypto/arm64/sha512.h
index d978c4d07e90..5da27e6e23ea 100644
--- a/lib/crypto/arm64/sha512.h
+++ b/lib/crypto/arm64/sha512.h
@@ -10,27 +10,20 @@
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha512_insns);
asmlinkage void sha512_block_data_order(struct sha512_block_state *state,
const u8 *data, size_t nblocks);
-asmlinkage size_t __sha512_ce_transform(struct sha512_block_state *state,
- const u8 *data, size_t nblocks);
+asmlinkage void sha512_ce_transform(struct sha512_block_state *state,
+ const u8 *data, size_t nblocks);
static void sha512_blocks(struct sha512_block_state *state,
const u8 *data, size_t nblocks)
{
if (static_branch_likely(&have_sha512_insns) &&
likely(may_use_simd())) {
- do {
- size_t rem;
-
- scoped_ksimd()
- rem = __sha512_ce_transform(state, data, nblocks);
-
- data += (nblocks - rem) * SHA512_BLOCK_SIZE;
- nblocks = rem;
- } while (nblocks);
+ scoped_ksimd()
+ sha512_ce_transform(state, data, nblocks);
} else {
sha512_block_data_order(state, data, nblocks);
}
}
--
2.53.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 8/9] lib/crypto: arm64/sha3: Remove obsolete chunking logic
2026-04-01 0:05 [PATCH 0/9] lib/crypto: arm64: Remove obsolete chunking logic Eric Biggers
` (6 preceding siblings ...)
2026-04-01 0:05 ` [PATCH 7/9] lib/crypto: arm64/sha512: " Eric Biggers
@ 2026-04-01 0:05 ` Eric Biggers
2026-04-01 0:05 ` [PATCH 9/9] arm64: fpsimd: Remove obsolete cond_yield macro Eric Biggers
` (2 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Eric Biggers @ 2026-04-01 0:05 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, Ard Biesheuvel, Jason A . Donenfeld, Herbert Xu,
linux-arm-kernel, Eric Biggers
Since commit aefbab8e77eb ("arm64: fpsimd: Preserve/restore kernel mode
NEON at context switch"), kernel-mode NEON sections have been
preemptible on arm64. And since commit 7dadeaa6e851 ("sched: Further
restrict the preemption modes"), voluntary preemption is no longer
supported on arm64 either. Therefore, there's no longer any need to
limit the length of kernel-mode NEON sections on arm64.
Simplify the SHA-3 code accordingly.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
lib/crypto/arm64/sha3-ce-core.S | 8 +++-----
lib/crypto/arm64/sha3.h | 15 ++++-----------
2 files changed, 7 insertions(+), 16 deletions(-)
diff --git a/lib/crypto/arm64/sha3-ce-core.S b/lib/crypto/arm64/sha3-ce-core.S
index ace90b506490..b8ab01987ae0 100644
--- a/lib/crypto/arm64/sha3-ce-core.S
+++ b/lib/crypto/arm64/sha3-ce-core.S
@@ -35,12 +35,12 @@
.macro xar, rd, rn, rm, imm6
.inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
.endm
/*
- * size_t sha3_ce_transform(struct sha3_state *state, const u8 *data,
- * size_t nblocks, size_t block_size)
+ * void sha3_ce_transform(struct sha3_state *state, const u8 *data,
+ * size_t nblocks, size_t block_size)
*
* block_size is assumed to be one of 72 (SHA3-512), 104 (SHA3-384), 136
* (SHA3-256 and SHAKE256), 144 (SHA3-224), or 168 (SHAKE128).
*/
.text
@@ -183,22 +183,20 @@ SYM_FUNC_START(sha3_ce_transform)
bcax v2.16b, v2.16b, v28.16b, v27.16b
eor v0.16b, v0.16b, v31.16b
cbnz w8, 3b
- cond_yield 4f, x8, x9
cbnz x2, 0b
/* save state */
-4: st1 { v0.1d- v3.1d}, [x0], #32
+ st1 { v0.1d- v3.1d}, [x0], #32
st1 { v4.1d- v7.1d}, [x0], #32
st1 { v8.1d-v11.1d}, [x0], #32
st1 {v12.1d-v15.1d}, [x0], #32
st1 {v16.1d-v19.1d}, [x0], #32
st1 {v20.1d-v23.1d}, [x0], #32
st1 {v24.1d}, [x0]
- mov x0, x2
ret
SYM_FUNC_END(sha3_ce_transform)
.section ".rodata", "a"
.align 8
diff --git a/lib/crypto/arm64/sha3.h b/lib/crypto/arm64/sha3.h
index b602f1b3b282..eaaba3224acc 100644
--- a/lib/crypto/arm64/sha3.h
+++ b/lib/crypto/arm64/sha3.h
@@ -10,26 +10,19 @@
#include <asm/simd.h>
#include <linux/cpufeature.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha3);
-asmlinkage size_t sha3_ce_transform(struct sha3_state *state, const u8 *data,
- size_t nblocks, size_t block_size);
+asmlinkage void sha3_ce_transform(struct sha3_state *state, const u8 *data,
+ size_t nblocks, size_t block_size);
static void sha3_absorb_blocks(struct sha3_state *state, const u8 *data,
size_t nblocks, size_t block_size)
{
if (static_branch_likely(&have_sha3) && likely(may_use_simd())) {
- do {
- size_t rem;
-
- scoped_ksimd()
- rem = sha3_ce_transform(state, data, nblocks,
- block_size);
- data += (nblocks - rem) * block_size;
- nblocks = rem;
- } while (nblocks);
+ scoped_ksimd()
+ sha3_ce_transform(state, data, nblocks, block_size);
} else {
sha3_absorb_blocks_generic(state, data, nblocks, block_size);
}
}
--
2.53.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 9/9] arm64: fpsimd: Remove obsolete cond_yield macro
2026-04-01 0:05 [PATCH 0/9] lib/crypto: arm64: Remove obsolete chunking logic Eric Biggers
` (7 preceding siblings ...)
2026-04-01 0:05 ` [PATCH 8/9] lib/crypto: arm64/sha3: " Eric Biggers
@ 2026-04-01 0:05 ` Eric Biggers
2026-04-01 7:00 ` [PATCH 0/9] lib/crypto: arm64: Remove obsolete chunking logic Ard Biesheuvel
2026-04-02 23:12 ` Eric Biggers
10 siblings, 0 replies; 12+ messages in thread
From: Eric Biggers @ 2026-04-01 0:05 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, Ard Biesheuvel, Jason A . Donenfeld, Herbert Xu,
linux-arm-kernel, Eric Biggers
All invocations of the cond_yield macro have been removed, so remove the
macro definition as well.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
arch/arm64/include/asm/assembler.h | 22 ----------------------
1 file changed, 22 deletions(-)
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index d3d46e5f7188..9d7c9ae5ac96 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -746,32 +746,10 @@ alternative_else_nop_endif
.macro set_sctlr_el2, reg
set_sctlr sctlr_el2, \reg
.endm
- /*
- * Check whether asm code should yield as soon as it is able. This is
- * the case if we are currently running in task context, and the
- * TIF_NEED_RESCHED flag is set. (Note that the TIF_NEED_RESCHED flag
- * is stored negated in the top word of the thread_info::preempt_count
- * field)
- */
- .macro cond_yield, lbl:req, tmp:req, tmp2
-#ifdef CONFIG_PREEMPT_VOLUNTARY
- get_current_task \tmp
- ldr \tmp, [\tmp, #TSK_TI_PREEMPT]
- /*
- * If we are serving a softirq, there is no point in yielding: the
- * softirq will not be preempted no matter what we do, so we should
- * run to completion as quickly as we can. The preempt_count field will
- * have BIT(SOFTIRQ_SHIFT) set in this case, so the zero check will
- * catch this case too.
- */
- cbz \tmp, \lbl
-#endif
- .endm
-
/*
* Branch Target Identifier (BTI)
*/
.macro bti, targets
.equ .L__bti_targets_c, 34
--
2.53.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH 0/9] lib/crypto: arm64: Remove obsolete chunking logic
2026-04-01 0:05 [PATCH 0/9] lib/crypto: arm64: Remove obsolete chunking logic Eric Biggers
` (8 preceding siblings ...)
2026-04-01 0:05 ` [PATCH 9/9] arm64: fpsimd: Remove obsolete cond_yield macro Eric Biggers
@ 2026-04-01 7:00 ` Ard Biesheuvel
2026-04-02 23:12 ` Eric Biggers
10 siblings, 0 replies; 12+ messages in thread
From: Ard Biesheuvel @ 2026-04-01 7:00 UTC (permalink / raw)
To: Eric Biggers, linux-crypto
Cc: linux-kernel, Jason A . Donenfeld, Herbert Xu, linux-arm-kernel
On Wed, 1 Apr 2026, at 02:05, Eric Biggers wrote:
> Since commit aefbab8e77eb ("arm64: fpsimd: Preserve/restore kernel mode
> NEON at context switch"), kernel-mode NEON sections have been
> preemptible on arm64. And since commit 7dadeaa6e851 ("sched: Further
> restrict the preemption modes"), voluntary preemption is no longer
> supported on arm64 either. Therefore, there's no longer any need to
> limit the length of kernel-mode NEON sections on arm64.
>
> This series simplifies the code in lib/crypto/arm64/ accordingly by
> using longer kernel-mode NEON sections instead of multiple shorter ones.
>
> This series is targeting libcrypto-next.
>
> Eric Biggers (9):
> lib/crypto: arm64/aes: Remove obsolete chunking logic
> lib/crypto: arm64/chacha: Remove obsolete chunking logic
> lib/crypto: arm64/gf128hash: Remove obsolete chunking logic
> lib/crypto: arm64/poly1305: Remove obsolete chunking logic
> lib/crypto: arm64/sha1: Remove obsolete chunking logic
> lib/crypto: arm64/sha256: Remove obsolete chunking logic
> lib/crypto: arm64/sha512: Remove obsolete chunking logic
> lib/crypto: arm64/sha3: Remove obsolete chunking logic
> arm64: fpsimd: Remove obsolete cond_yield macro
>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH 0/9] lib/crypto: arm64: Remove obsolete chunking logic
2026-04-01 0:05 [PATCH 0/9] lib/crypto: arm64: Remove obsolete chunking logic Eric Biggers
` (9 preceding siblings ...)
2026-04-01 7:00 ` [PATCH 0/9] lib/crypto: arm64: Remove obsolete chunking logic Ard Biesheuvel
@ 2026-04-02 23:12 ` Eric Biggers
10 siblings, 0 replies; 12+ messages in thread
From: Eric Biggers @ 2026-04-02 23:12 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, Ard Biesheuvel, Jason A . Donenfeld, Herbert Xu,
linux-arm-kernel
On Tue, Mar 31, 2026 at 05:05:39PM -0700, Eric Biggers wrote:
> Since commit aefbab8e77eb ("arm64: fpsimd: Preserve/restore kernel mode
> NEON at context switch"), kernel-mode NEON sections have been
> preemptible on arm64. And since commit 7dadeaa6e851 ("sched: Further
> restrict the preemption modes"), voluntary preemption is no longer
> supported on arm64 either. Therefore, there's no longer any need to
> limit the length of kernel-mode NEON sections on arm64.
>
> This series simplifies the code in lib/crypto/arm64/ accordingly by
> using longer kernel-mode NEON sections instead of multiple shorter ones.
>
> This series is targeting libcrypto-next.
Applied to https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux.git/log/?h=libcrypto-next
- Eric
^ permalink raw reply [flat|nested] 12+ messages in thread