All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ard Biesheuvel <ardb+git@google.com>
To: linux-crypto@vger.kernel.org
Cc: ebiggers@kernel.org, herbert@gondor.apana.org.au,
	 Ard Biesheuvel <ardb@kernel.org>
Subject: [PATCH v2 4/8] crypto: arm64/aes-ccm - Replace bytewise tail handling with NEON permute
Date: Thu, 18 Jan 2024 18:06:33 +0100	[thread overview]
Message-ID: <20240118170628.3049797-14-ardb+git@google.com> (raw)
In-Reply-To: <20240118170628.3049797-10-ardb+git@google.com>

From: Ard Biesheuvel <ardb@kernel.org>

Implement the CCM tail handling using a single sequence that uses
permute vectors and overlapping loads and stores, rather than going over
the tail byte by byte in a loop, and using scalar operations. This is
more efficient, even though the measured speedup is only around 1-2% on
the CPUs I have tried.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/crypto/aes-ce-ccm-core.S | 59 +++++++++++++-------
 1 file changed, 38 insertions(+), 21 deletions(-)

diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index b03f7f71f893..b21a9b759ab2 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -1,8 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
+ * aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
  *
- * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd.
+ * Copyright (C) 2024 Google LLC
+ *
+ * Author: Ard Biesheuvel <ardb@kernel.org>
  */
 
 #include <linux/linkage.h>
@@ -168,13 +171,13 @@ CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
 	ld1	{v2.16b}, [x1], #16		/* load next input block */
 	.if	\enc == 1
 	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
-	eor	v1.16b, v1.16b, v2.16b		/* xor with crypted ctr */
+	eor	v6.16b, v1.16b, v2.16b		/* xor with crypted ctr */
 	.else
 	eor	v2.16b, v2.16b, v1.16b		/* xor with crypted ctr */
-	eor	v1.16b, v2.16b, v5.16b		/* final round enc */
+	eor	v6.16b, v2.16b, v5.16b		/* final round enc */
 	.endif
 	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
-	st1	{v1.16b}, [x0], #16		/* write output block */
+	st1	{v6.16b}, [x0], #16		/* write output block */
 	bne	0b
 CPU_LE(	rev	x8, x8			)
 	st1	{v0.16b}, [x5]			/* store mac */
@@ -183,25 +186,31 @@ CPU_LE(	rev	x8, x8			)
 
 6:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
 	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
-	st1	{v0.16b}, [x5]			/* store mac */
-	add	w2, w2, #16			/* process partial tail block */
-7:	ldrb	w9, [x1], #1			/* get 1 byte of input */
-	umov	w6, v1.b[0]			/* get top crypted ctr byte */
-	umov	w7, v0.b[0]			/* get top mac byte */
+
+	add	x1, x1, w2, sxtw		/* rewind the input pointer (w2 < 0) */
+	add	x0, x0, w2, sxtw		/* rewind the output pointer */
+
+	adr_l	x8, .Lpermute			/* load permute vectors */
+	add	x9, x8, w2, sxtw
+	sub	x8, x8, w2, sxtw
+	ld1	{v7.16b-v8.16b}, [x9]
+	ld1	{v9.16b}, [x8]
+
+	ld1	{v2.16b}, [x1]			/* load a full block of input */
+	tbl	v1.16b, {v1.16b}, v7.16b	/* move keystream to end of register */
 	.if	\enc == 1
-	eor	w7, w7, w9
-	eor	w9, w9, w6
+	tbl	v7.16b, {v2.16b}, v9.16b	/* copy plaintext to start of v7 */
+	eor	v2.16b, v2.16b, v1.16b		/* encrypt partial input block */
 	.else
-	eor	w9, w9, w6
-	eor	w7, w7, w9
+	eor	v2.16b, v2.16b, v1.16b		/* decrypt partial input block */
+	tbl	v7.16b, {v2.16b}, v9.16b	/* copy plaintext to start of v7 */
 	.endif
-	strb	w9, [x0], #1			/* store out byte */
-	strb	w7, [x5], #1			/* store mac byte */
-	subs	w2, w2, #1
-	beq	5b
-	ext	v0.16b, v0.16b, v0.16b, #1	/* shift out mac byte */
-	ext	v1.16b, v1.16b, v1.16b, #1	/* shift out ctr byte */
-	b	7b
+	eor	v0.16b, v0.16b, v7.16b		/* fold plaintext into mac */
+	tbx	v2.16b, {v6.16b}, v8.16b	/* insert output from previous iteration */
+
+	st1	{v0.16b}, [x5]			/* store mac */
+	st1	{v2.16b}, [x0]			/* store output block */
+	ret
 	.endm
 
 	/*
@@ -219,3 +228,11 @@ SYM_FUNC_END(ce_aes_ccm_encrypt)
 SYM_FUNC_START(ce_aes_ccm_decrypt)
 	aes_ccm_do_crypt	0
 SYM_FUNC_END(ce_aes_ccm_decrypt)
+
+	.section ".rodata", "a"
+	.align	6
+	.fill	15, 1, 0xff
+.Lpermute:
+	.byte	0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+	.byte	0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
+	.fill	15, 1, 0xff
-- 
2.43.0.381.gb435a96ce8-goog


  parent reply	other threads:[~2024-01-18 17:07 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-18 17:06 [PATCH v2 0/8] crypto: Clean up arm64 AES-CCM code Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 1/8] crypto: arm64/aes-ccm - Revert "Rewrite skcipher walker loop" Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 2/8] crypto: arm64/aes-ccm - Keep NEON enabled during skcipher walk Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 3/8] crypto: arm64/aes-ccm - Pass short inputs via stack buffer Ard Biesheuvel
2024-01-18 17:06 ` Ard Biesheuvel [this message]
2024-01-18 17:06 ` [PATCH v2 5/8] crypto: arm64/aes-ccm - Reuse existing MAC update for AAD input Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 6/8] crypto: arm64/aes-ccm - Cache round keys and unroll AES loops Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 7/8] crypto: arm64/aes-ccm - Merge encrypt and decrypt tail handling Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 8/8] crypto: arm64/aes-ccm - Merge finalization into en/decrypt asm helpers Ard Biesheuvel
2024-01-26  9:05 ` [PATCH v2 0/8] crypto: Clean up arm64 AES-CCM code Herbert Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240118170628.3049797-14-ardb+git@google.com \
    --to=ardb+git@google.com \
    --cc=ardb@kernel.org \
    --cc=ebiggers@kernel.org \
    --cc=herbert@gondor.apana.org.au \
    --cc=linux-crypto@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.