linux-crypto.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ard Biesheuvel <ardb+git@google.com>
To: linux-crypto@vger.kernel.org
Cc: ebiggers@kernel.org, herbert@gondor.apana.org.au,
	 Ard Biesheuvel <ardb@kernel.org>
Subject: [PATCH v2 7/8] crypto: arm64/aes-ccm - Merge encrypt and decrypt tail handling
Date: Thu, 18 Jan 2024 18:06:36 +0100	[thread overview]
Message-ID: <20240118170628.3049797-17-ardb+git@google.com> (raw)
In-Reply-To: <20240118170628.3049797-10-ardb+git@google.com>

From: Ard Biesheuvel <ardb@kernel.org>

The encryption and decryption code paths are mostly identical, except
for a small difference where the plaintext input into the MAC is taken
from either the input or the output block.

We can factor this in quite easily using a vector bit select, and a few
additional XORs, without the need for branches. This way, we can use the
same tail handling logic on the encrypt and decrypt code paths, allowing
further consolidation of the asm helpers in a subsequent patch.

(In the main loop, adding just a handful of ALU instructions results in
a noticeable performance hit [around 5% on Apple M2], so those routines
are kept separate)

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/crypto/aes-ce-ccm-core.S | 26 ++++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index 0ec59fc4ef3e..bf3a888a5615 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -77,7 +77,7 @@ CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
 	aes_encrypt	v0, v1, w4
 
 	subs	w2, w2, #16
-	bmi	6f				/* partial block? */
+	bmi	ce_aes_ccm_crypt_tail
 	ld1	{v2.16b}, [x1], #16		/* load next input block */
 	.if	\enc == 1
 	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
@@ -93,8 +93,10 @@ CPU_LE(	rev	x8, x8			)
 	st1	{v0.16b}, [x5]			/* store mac */
 	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
 5:	ret
+	.endm
 
-6:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
+SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail)
+	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
 	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
 
 	add	x1, x1, w2, sxtw		/* rewind the input pointer (w2 < 0) */
@@ -108,20 +110,16 @@ CPU_LE(	rev	x8, x8			)
 
 	ld1	{v2.16b}, [x1]			/* load a full block of input */
 	tbl	v1.16b, {v1.16b}, v7.16b	/* move keystream to end of register */
-	.if	\enc == 1
-	tbl	v7.16b, {v2.16b}, v9.16b	/* copy plaintext to start of v7 */
-	eor	v2.16b, v2.16b, v1.16b		/* encrypt partial input block */
-	.else
-	eor	v2.16b, v2.16b, v1.16b		/* decrypt partial input block */
-	tbl	v7.16b, {v2.16b}, v9.16b	/* copy plaintext to start of v7 */
-	.endif
-	eor	v0.16b, v0.16b, v7.16b		/* fold plaintext into mac */
-	tbx	v2.16b, {v6.16b}, v8.16b	/* insert output from previous iteration */
+	eor	v7.16b, v2.16b, v1.16b		/* encrypt partial input block */
+	bif	v2.16b, v7.16b, v22.16b		/* select plaintext */
+	tbx	v7.16b, {v6.16b}, v8.16b	/* insert output from previous iteration */
+	tbl	v2.16b, {v2.16b}, v9.16b	/* copy plaintext to start of v2 */
+	eor	v0.16b, v0.16b, v2.16b		/* fold plaintext into mac */
 
 	st1	{v0.16b}, [x5]			/* store mac */
-	st1	{v2.16b}, [x0]			/* store output block */
+	st1	{v7.16b}, [x0]			/* store output block */
 	ret
-	.endm
+SYM_FUNC_END(ce_aes_ccm_crypt_tail)
 
 	/*
 	 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
@@ -132,10 +130,12 @@ CPU_LE(	rev	x8, x8			)
 	 * 			   u8 ctr[]);
 	 */
 SYM_FUNC_START(ce_aes_ccm_encrypt)
+	movi	v22.16b, #255
 	aes_ccm_do_crypt	1
 SYM_FUNC_END(ce_aes_ccm_encrypt)
 
 SYM_FUNC_START(ce_aes_ccm_decrypt)
+	movi	v22.16b, #0
 	aes_ccm_do_crypt	0
 SYM_FUNC_END(ce_aes_ccm_decrypt)
 
-- 
2.43.0.381.gb435a96ce8-goog


  parent reply	other threads:[~2024-01-18 17:07 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-18 17:06 [PATCH v2 0/8] crypto: Clean up arm64 AES-CCM code Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 1/8] crypto: arm64/aes-ccm - Revert "Rewrite skcipher walker loop" Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 2/8] crypto: arm64/aes-ccm - Keep NEON enabled during skcipher walk Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 3/8] crypto: arm64/aes-ccm - Pass short inputs via stack buffer Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 4/8] crypto: arm64/aes-ccm - Replace bytewise tail handling with NEON permute Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 5/8] crypto: arm64/aes-ccm - Reuse existing MAC update for AAD input Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 6/8] crypto: arm64/aes-ccm - Cache round keys and unroll AES loops Ard Biesheuvel
2024-01-18 17:06 ` Ard Biesheuvel [this message]
2024-01-18 17:06 ` [PATCH v2 8/8] crypto: arm64/aes-ccm - Merge finalization into en/decrypt asm helpers Ard Biesheuvel
2024-01-26  9:05 ` [PATCH v2 0/8] crypto: Clean up arm64 AES-CCM code Herbert Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240118170628.3049797-17-ardb+git@google.com \
    --to=ardb+git@google.com \
    --cc=ardb@kernel.org \
    --cc=ebiggers@kernel.org \
    --cc=herbert@gondor.apana.org.au \
    --cc=linux-crypto@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).