From: Ard Biesheuvel <ardb+git@google.com>
To: linux-crypto@vger.kernel.org
Cc: ebiggers@kernel.org, herbert@gondor.apana.org.au,
Ard Biesheuvel <ardb@kernel.org>
Subject: [PATCH v2 4/8] crypto: arm64/aes-ccm - Replace bytewise tail handling with NEON permute
Date: Thu, 18 Jan 2024 18:06:33 +0100 [thread overview]
Message-ID: <20240118170628.3049797-14-ardb+git@google.com> (raw)
In-Reply-To: <20240118170628.3049797-10-ardb+git@google.com>
From: Ard Biesheuvel <ardb@kernel.org>
Implement the CCM tail handling using a single sequence that uses
permute vectors and overlapping loads and stores, rather than going over
the tail byte by byte in a loop, and using scalar operations. This is
more efficient, even though the measured speedup is only around 1-2% on
the CPUs I have tried.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/arm64/crypto/aes-ce-ccm-core.S | 59 +++++++++++++-------
1 file changed, 38 insertions(+), 21 deletions(-)
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index b03f7f71f893..b21a9b759ab2 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -1,8 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
+ * aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
*
- * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd.
+ * Copyright (C) 2024 Google LLC
+ *
+ * Author: Ard Biesheuvel <ardb@kernel.org>
*/
#include <linux/linkage.h>
@@ -168,13 +171,13 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
ld1 {v2.16b}, [x1], #16 /* load next input block */
.if \enc == 1
eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
- eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */
+ eor v6.16b, v1.16b, v2.16b /* xor with crypted ctr */
.else
eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
- eor v1.16b, v2.16b, v5.16b /* final round enc */
+ eor v6.16b, v2.16b, v5.16b /* final round enc */
.endif
eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
- st1 {v1.16b}, [x0], #16 /* write output block */
+ st1 {v6.16b}, [x0], #16 /* write output block */
bne 0b
CPU_LE( rev x8, x8 )
st1 {v0.16b}, [x5] /* store mac */
@@ -183,25 +186,31 @@ CPU_LE( rev x8, x8 )
6: eor v0.16b, v0.16b, v5.16b /* final round mac */
eor v1.16b, v1.16b, v5.16b /* final round enc */
- st1 {v0.16b}, [x5] /* store mac */
- add w2, w2, #16 /* process partial tail block */
-7: ldrb w9, [x1], #1 /* get 1 byte of input */
- umov w6, v1.b[0] /* get top crypted ctr byte */
- umov w7, v0.b[0] /* get top mac byte */
+
+ add x1, x1, w2, sxtw /* rewind the input pointer (w2 < 0) */
+ add x0, x0, w2, sxtw /* rewind the output pointer */
+
+ adr_l x8, .Lpermute /* load permute vectors */
+ add x9, x8, w2, sxtw
+ sub x8, x8, w2, sxtw
+ ld1 {v7.16b-v8.16b}, [x9]
+ ld1 {v9.16b}, [x8]
+
+ ld1 {v2.16b}, [x1] /* load a full block of input */
+ tbl v1.16b, {v1.16b}, v7.16b /* move keystream to end of register */
.if \enc == 1
- eor w7, w7, w9
- eor w9, w9, w6
+ tbl v7.16b, {v2.16b}, v9.16b /* copy plaintext to start of v7 */
+ eor v2.16b, v2.16b, v1.16b /* encrypt partial input block */
.else
- eor w9, w9, w6
- eor w7, w7, w9
+ eor v2.16b, v2.16b, v1.16b /* decrypt partial input block */
+ tbl v7.16b, {v2.16b}, v9.16b /* copy plaintext to start of v7 */
.endif
- strb w9, [x0], #1 /* store out byte */
- strb w7, [x5], #1 /* store mac byte */
- subs w2, w2, #1
- beq 5b
- ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */
- ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */
- b 7b
+ eor v0.16b, v0.16b, v7.16b /* fold plaintext into mac */
+ tbx v2.16b, {v6.16b}, v8.16b /* insert output from previous iteration */
+
+ st1 {v0.16b}, [x5] /* store mac */
+ st1 {v2.16b}, [x0] /* store output block */
+ ret
.endm
/*
@@ -219,3 +228,11 @@ SYM_FUNC_END(ce_aes_ccm_encrypt)
SYM_FUNC_START(ce_aes_ccm_decrypt)
aes_ccm_do_crypt 0
SYM_FUNC_END(ce_aes_ccm_decrypt)
+
+ .section ".rodata", "a"
+ .align 6
+ .fill 15, 1, 0xff
+.Lpermute:
+ .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+ .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
+ .fill 15, 1, 0xff
--
2.43.0.381.gb435a96ce8-goog
next prev parent reply other threads:[~2024-01-18 17:07 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-18 17:06 [PATCH v2 0/8] crypto: Clean up arm64 AES-CCM code Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 1/8] crypto: arm64/aes-ccm - Revert "Rewrite skcipher walker loop" Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 2/8] crypto: arm64/aes-ccm - Keep NEON enabled during skcipher walk Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 3/8] crypto: arm64/aes-ccm - Pass short inputs via stack buffer Ard Biesheuvel
2024-01-18 17:06 ` Ard Biesheuvel [this message]
2024-01-18 17:06 ` [PATCH v2 5/8] crypto: arm64/aes-ccm - Reuse existing MAC update for AAD input Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 6/8] crypto: arm64/aes-ccm - Cache round keys and unroll AES loops Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 7/8] crypto: arm64/aes-ccm - Merge encrypt and decrypt tail handling Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 8/8] crypto: arm64/aes-ccm - Merge finalization into en/decrypt asm helpers Ard Biesheuvel
2024-01-26 9:05 ` [PATCH v2 0/8] crypto: Clean up arm64 AES-CCM code Herbert Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240118170628.3049797-14-ardb+git@google.com \
--to=ardb+git@google.com \
--cc=ardb@kernel.org \
--cc=ebiggers@kernel.org \
--cc=herbert@gondor.apana.org.au \
--cc=linux-crypto@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).