From: Ard Biesheuvel <ardb+git@google.com>
To: linux-crypto@vger.kernel.org
Cc: ebiggers@kernel.org, herbert@gondor.apana.org.au,
Ard Biesheuvel <ardb@kernel.org>
Subject: [PATCH v2 4/8] crypto: arm64/aes-ccm - Replace bytewise tail handling with NEON permute
Date: Thu, 18 Jan 2024 18:06:33 +0100 [thread overview]
Message-ID: <20240118170628.3049797-14-ardb+git@google.com> (raw)
In-Reply-To: <20240118170628.3049797-10-ardb+git@google.com>
From: Ard Biesheuvel <ardb@kernel.org>
Implement the CCM tail handling using a single sequence that uses
permute vectors and overlapping loads and stores, rather than going over
the tail byte by byte in a loop, and using scalar operations. This is
more efficient, even though the measured speedup is only around 1-2% on
the CPUs I have tried.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/arm64/crypto/aes-ce-ccm-core.S | 59 +++++++++++++-------
1 file changed, 38 insertions(+), 21 deletions(-)
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index b03f7f71f893..b21a9b759ab2 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -1,8 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
+ * aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
*
- * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd.
+ * Copyright (C) 2024 Google LLC
+ *
+ * Author: Ard Biesheuvel <ardb@kernel.org>
*/
#include <linux/linkage.h>
@@ -168,13 +171,13 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
ld1 {v2.16b}, [x1], #16 /* load next input block */
.if \enc == 1
eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
- eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */
+ eor v6.16b, v1.16b, v2.16b /* xor with crypted ctr */
.else
eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
- eor v1.16b, v2.16b, v5.16b /* final round enc */
+ eor v6.16b, v2.16b, v5.16b /* final round enc */
.endif
eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
- st1 {v1.16b}, [x0], #16 /* write output block */
+ st1 {v6.16b}, [x0], #16 /* write output block */
bne 0b
CPU_LE( rev x8, x8 )
st1 {v0.16b}, [x5] /* store mac */
@@ -183,25 +186,31 @@ CPU_LE( rev x8, x8 )
6: eor v0.16b, v0.16b, v5.16b /* final round mac */
eor v1.16b, v1.16b, v5.16b /* final round enc */
- st1 {v0.16b}, [x5] /* store mac */
- add w2, w2, #16 /* process partial tail block */
-7: ldrb w9, [x1], #1 /* get 1 byte of input */
- umov w6, v1.b[0] /* get top crypted ctr byte */
- umov w7, v0.b[0] /* get top mac byte */
+
+ add x1, x1, w2, sxtw /* rewind the input pointer (w2 < 0) */
+ add x0, x0, w2, sxtw /* rewind the output pointer */
+
+ adr_l x8, .Lpermute /* load permute vectors */
+ add x9, x8, w2, sxtw
+ sub x8, x8, w2, sxtw
+ ld1 {v7.16b-v8.16b}, [x9]
+ ld1 {v9.16b}, [x8]
+
+ ld1 {v2.16b}, [x1] /* load a full block of input */
+ tbl v1.16b, {v1.16b}, v7.16b /* move keystream to end of register */
.if \enc == 1
- eor w7, w7, w9
- eor w9, w9, w6
+ tbl v7.16b, {v2.16b}, v9.16b /* copy plaintext to start of v7 */
+ eor v2.16b, v2.16b, v1.16b /* encrypt partial input block */
.else
- eor w9, w9, w6
- eor w7, w7, w9
+ eor v2.16b, v2.16b, v1.16b /* decrypt partial input block */
+ tbl v7.16b, {v2.16b}, v9.16b /* copy plaintext to start of v7 */
.endif
- strb w9, [x0], #1 /* store out byte */
- strb w7, [x5], #1 /* store mac byte */
- subs w2, w2, #1
- beq 5b
- ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */
- ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */
- b 7b
+ eor v0.16b, v0.16b, v7.16b /* fold plaintext into mac */
+ tbx v2.16b, {v6.16b}, v8.16b /* insert output from previous iteration */
+
+ st1 {v0.16b}, [x5] /* store mac */
+ st1 {v2.16b}, [x0] /* store output block */
+ ret
.endm
/*
@@ -219,3 +228,11 @@ SYM_FUNC_END(ce_aes_ccm_encrypt)
SYM_FUNC_START(ce_aes_ccm_decrypt)
aes_ccm_do_crypt 0
SYM_FUNC_END(ce_aes_ccm_decrypt)
+
+ .section ".rodata", "a"
+ .align 6
+ .fill 15, 1, 0xff
+.Lpermute:
+ .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+ .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
+ .fill 15, 1, 0xff
--
2.43.0.381.gb435a96ce8-goog
next prev parent reply other threads:[~2024-01-18 17:07 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-18 17:06 [PATCH v2 0/8] crypto: Clean up arm64 AES-CCM code Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 1/8] crypto: arm64/aes-ccm - Revert "Rewrite skcipher walker loop" Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 2/8] crypto: arm64/aes-ccm - Keep NEON enabled during skcipher walk Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 3/8] crypto: arm64/aes-ccm - Pass short inputs via stack buffer Ard Biesheuvel
2024-01-18 17:06 ` Ard Biesheuvel [this message]
2024-01-18 17:06 ` [PATCH v2 5/8] crypto: arm64/aes-ccm - Reuse existing MAC update for AAD input Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 6/8] crypto: arm64/aes-ccm - Cache round keys and unroll AES loops Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 7/8] crypto: arm64/aes-ccm - Merge encrypt and decrypt tail handling Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 8/8] crypto: arm64/aes-ccm - Merge finalization into en/decrypt asm helpers Ard Biesheuvel
2024-01-26 9:05 ` [PATCH v2 0/8] crypto: Clean up arm64 AES-CCM code Herbert Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240118170628.3049797-14-ardb+git@google.com \
--to=ardb+git@google.com \
--cc=ardb@kernel.org \
--cc=ebiggers@kernel.org \
--cc=herbert@gondor.apana.org.au \
--cc=linux-crypto@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.