linux-crypto.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ard Biesheuvel <ardb+git@google.com>
To: linux-crypto@vger.kernel.org
Cc: ebiggers@kernel.org, herbert@gondor.apana.org.au,
	 Ard Biesheuvel <ardb@kernel.org>
Subject: [PATCH v2 5/8] crypto: arm64/aes-ccm - Reuse existing MAC update for AAD input
Date: Thu, 18 Jan 2024 18:06:34 +0100	[thread overview]
Message-ID: <20240118170628.3049797-15-ardb+git@google.com> (raw)
In-Reply-To: <20240118170628.3049797-10-ardb+git@google.com>

From: Ard Biesheuvel <ardb@kernel.org>

CCM combines the counter (CTR) encryption mode with a MAC based on the
same block cipher. This MAC construction is a bit clunky: it invokes the
block cipher in a way that cannot be parallelized, resulting in poor CPU
pipeline efficiency.

The arm64 CCM code mitigates this by interleaving the encryption and MAC
at the AES round level, resulting in a substantial speedup. But this
approach does not apply to the additional authenticated data (AAD) which
is not encrypted.

This means the special asm routine dealing with the AAD is not any
better than the MAC update routine used by the arm64 AES block
encryption driver, so let's reuse that, and drop the special AES-CCM
version.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/crypto/Kconfig           |  1 +
 arch/arm64/crypto/aes-ce-ccm-core.S | 71 --------------------
 arch/arm64/crypto/aes-ce-ccm-glue.c | 49 +++++++++++---
 arch/arm64/crypto/aes-glue.c        |  1 +
 4 files changed, 43 insertions(+), 79 deletions(-)

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index eb7b423ba463..e7d9bd8e4709 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -268,6 +268,7 @@ config CRYPTO_AES_ARM64_CE_CCM
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_ALGAPI
 	select CRYPTO_AES_ARM64_CE
+	select CRYPTO_AES_ARM64_CE_BLK
 	select CRYPTO_AEAD
 	select CRYPTO_LIB_AES
 	help
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index b21a9b759ab2..0132872bd780 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -14,77 +14,6 @@
 	.text
 	.arch	armv8-a+crypto
 
-	/*
-	 * u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
-	 *			    u32 macp, u8 const rk[], u32 rounds);
-	 */
-SYM_FUNC_START(ce_aes_ccm_auth_data)
-	ld1	{v0.16b}, [x0]			/* load mac */
-	cbz	w3, 1f
-	sub	w3, w3, #16
-	eor	v1.16b, v1.16b, v1.16b
-0:	ldrb	w7, [x1], #1			/* get 1 byte of input */
-	subs	w2, w2, #1
-	add	w3, w3, #1
-	ins	v1.b[0], w7
-	ext	v1.16b, v1.16b, v1.16b, #1	/* rotate in the input bytes */
-	beq	8f				/* out of input? */
-	cbnz	w3, 0b
-	eor	v0.16b, v0.16b, v1.16b
-1:	ld1	{v3.4s}, [x4]			/* load first round key */
-	prfm	pldl1strm, [x1]
-	cmp	w5, #12				/* which key size? */
-	add	x6, x4, #16
-	sub	w7, w5, #2			/* modified # of rounds */
-	bmi	2f
-	bne	5f
-	mov	v5.16b, v3.16b
-	b	4f
-2:	mov	v4.16b, v3.16b
-	ld1	{v5.4s}, [x6], #16		/* load 2nd round key */
-3:	aese	v0.16b, v4.16b
-	aesmc	v0.16b, v0.16b
-4:	ld1	{v3.4s}, [x6], #16		/* load next round key */
-	aese	v0.16b, v5.16b
-	aesmc	v0.16b, v0.16b
-5:	ld1	{v4.4s}, [x6], #16		/* load next round key */
-	subs	w7, w7, #3
-	aese	v0.16b, v3.16b
-	aesmc	v0.16b, v0.16b
-	ld1	{v5.4s}, [x6], #16		/* load next round key */
-	bpl	3b
-	aese	v0.16b, v4.16b
-	subs	w2, w2, #16			/* last data? */
-	eor	v0.16b, v0.16b, v5.16b		/* final round */
-	bmi	6f
-	ld1	{v1.16b}, [x1], #16		/* load next input block */
-	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
-	bne	1b
-6:	st1	{v0.16b}, [x0]			/* store mac */
-	beq	10f
-	adds	w2, w2, #16
-	beq	10f
-	mov	w3, w2
-7:	ldrb	w7, [x1], #1
-	umov	w6, v0.b[0]
-	eor	w6, w6, w7
-	strb	w6, [x0], #1
-	subs	w2, w2, #1
-	beq	10f
-	ext	v0.16b, v0.16b, v0.16b, #1	/* rotate out the mac bytes */
-	b	7b
-8:	cbz	w3, 91f
-	mov	w7, w3
-	add	w3, w3, #16
-9:	ext	v1.16b, v1.16b, v1.16b, #1
-	adds	w7, w7, #1
-	bne	9b
-91:	eor	v0.16b, v0.16b, v1.16b
-	st1	{v0.16b}, [x0]
-10:	mov	w0, w3
-	ret
-SYM_FUNC_END(ce_aes_ccm_auth_data)
-
 	/*
 	 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
 	 * 			 u32 rounds);
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index 4710e59075f5..ed3d79e05112 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -18,6 +18,8 @@
 
 #include "aes-ce-setkey.h"
 
+MODULE_IMPORT_NS(CRYPTO_INTERNAL);
+
 static int num_rounds(struct crypto_aes_ctx *ctx)
 {
 	/*
@@ -30,8 +32,9 @@ static int num_rounds(struct crypto_aes_ctx *ctx)
 	return 6 + ctx->key_length / 4;
 }
 
-asmlinkage u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
-				    u32 macp, u32 const rk[], u32 rounds);
+asmlinkage u32 ce_aes_mac_update(u8 const in[], u32 const rk[], int rounds,
+				 int blocks, u8 dg[], int enc_before,
+				 int enc_after);
 
 asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
 				   u32 const rk[], u32 rounds, u8 mac[],
@@ -97,6 +100,41 @@ static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
 	return 0;
 }
 
+static u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
+				u32 macp, u32 const rk[], u32 rounds)
+{
+	int enc_after = (macp + abytes) % AES_BLOCK_SIZE;
+
+	do {
+		u32 blocks = abytes / AES_BLOCK_SIZE;
+
+		if (macp == AES_BLOCK_SIZE || (!macp && blocks > 0)) {
+			u32 rem = ce_aes_mac_update(in, rk, rounds, blocks, mac,
+						    macp, enc_after);
+			u32 adv = (blocks - rem) * AES_BLOCK_SIZE;
+
+			macp = enc_after ? 0 : AES_BLOCK_SIZE;
+			in += adv;
+			abytes -= adv;
+
+			if (unlikely(rem)) {
+				kernel_neon_end();
+				kernel_neon_begin();
+				macp = 0;
+			}
+		} else {
+			u32 l = min(AES_BLOCK_SIZE - macp, abytes);
+
+			crypto_xor(&mac[macp], in, l);
+			in += l;
+			macp += l;
+			abytes -= l;
+		}
+	} while (abytes > 0);
+
+	return macp;
+}
+
 static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
@@ -104,7 +142,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 	struct __packed { __be16 l; __be32 h; u16 len; } ltag;
 	struct scatter_walk walk;
 	u32 len = req->assoclen;
-	u32 macp = 0;
+	u32 macp = AES_BLOCK_SIZE;
 
 	/* prepend the AAD with a length tag */
 	if (len < 0xff00) {
@@ -128,16 +166,11 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 			scatterwalk_start(&walk, sg_next(walk.sg));
 			n = scatterwalk_clamp(&walk, len);
 		}
-		n = min_t(u32, n, SZ_4K); /* yield NEON at least every 4k */
 		p = scatterwalk_map(&walk);
 
 		macp = ce_aes_ccm_auth_data(mac, p, n, macp, ctx->key_enc,
 					    num_rounds(ctx));
 
-		if (len / SZ_4K > (len - n) / SZ_4K) {
-			kernel_neon_end();
-			kernel_neon_begin();
-		}
 		len -= n;
 
 		scatterwalk_unmap(p);
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 162787c7aa86..a147e847a5a1 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -1048,6 +1048,7 @@ static int __init aes_init(void)
 
 #ifdef USE_V8_CRYPTO_EXTENSIONS
 module_cpu_feature_match(AES, aes_init);
+EXPORT_SYMBOL_NS(ce_aes_mac_update, CRYPTO_INTERNAL);
 #else
 module_init(aes_init);
 EXPORT_SYMBOL(neon_aes_ecb_encrypt);
-- 
2.43.0.381.gb435a96ce8-goog


  parent reply	other threads:[~2024-01-18 17:07 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-18 17:06 [PATCH v2 0/8] crypto: Clean up arm64 AES-CCM code Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 1/8] crypto: arm64/aes-ccm - Revert "Rewrite skcipher walker loop" Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 2/8] crypto: arm64/aes-ccm - Keep NEON enabled during skcipher walk Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 3/8] crypto: arm64/aes-ccm - Pass short inputs via stack buffer Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 4/8] crypto: arm64/aes-ccm - Replace bytewise tail handling with NEON permute Ard Biesheuvel
2024-01-18 17:06 ` Ard Biesheuvel [this message]
2024-01-18 17:06 ` [PATCH v2 6/8] crypto: arm64/aes-ccm - Cache round keys and unroll AES loops Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 7/8] crypto: arm64/aes-ccm - Merge encrypt and decrypt tail handling Ard Biesheuvel
2024-01-18 17:06 ` [PATCH v2 8/8] crypto: arm64/aes-ccm - Merge finalization into en/decrypt asm helpers Ard Biesheuvel
2024-01-26  9:05 ` [PATCH v2 0/8] crypto: Clean up arm64 AES-CCM code Herbert Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240118170628.3049797-15-ardb+git@google.com \
    --to=ardb+git@google.com \
    --cc=ardb@kernel.org \
    --cc=ebiggers@kernel.org \
    --cc=herbert@gondor.apana.org.au \
    --cc=linux-crypto@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).