All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: Paul Crowley <paulcrowley@google.com>,
	Ard Biesheuvel <ard.biesheuvel@linaro.org>,
	"Jason A . Donenfeld" <Jason@zx2c4.com>,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH v2 3/4] crypto: arm64/chacha20 - refactor to allow varying number of rounds
Date: Mon,  3 Dec 2018 19:52:51 -0800	[thread overview]
Message-ID: <20181204035252.14853-4-ebiggers@kernel.org> (raw)
In-Reply-To: <20181204035252.14853-1-ebiggers@kernel.org>

From: Eric Biggers <ebiggers@google.com>

In preparation for adding XChaCha12 support, rename/refactor the ARM64
NEON implementation of ChaCha20 to support different numbers of rounds.

Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/arm64/crypto/Makefile                    |  4 +-
 ...hacha20-neon-core.S => chacha-neon-core.S} | 45 ++++++++-------
 ...hacha20-neon-glue.c => chacha-neon-glue.c} | 57 ++++++++++---------
 3 files changed, 57 insertions(+), 49 deletions(-)
 rename arch/arm64/crypto/{chacha20-neon-core.S => chacha-neon-core.S} (94%)
 rename arch/arm64/crypto/{chacha20-neon-glue.c => chacha-neon-glue.c} (71%)

diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 125dbb10a93e..a4ffd9fe3265 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -50,8 +50,8 @@ sha256-arm64-y := sha256-glue.o sha256-core.o
 obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o
 sha512-arm64-y := sha512-glue.o sha512-core.o
 
-obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
-chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
+obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
+chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
 
 obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
 nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
diff --git a/arch/arm64/crypto/chacha20-neon-core.S b/arch/arm64/crypto/chacha-neon-core.S
similarity index 94%
rename from arch/arm64/crypto/chacha20-neon-core.S
rename to arch/arm64/crypto/chacha-neon-core.S
index 0571e45a1a0a..3d3a12db5204 100644
--- a/arch/arm64/crypto/chacha20-neon-core.S
+++ b/arch/arm64/crypto/chacha-neon-core.S
@@ -1,5 +1,5 @@
 /*
- * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
+ * ChaCha/XChaCha NEON helper functions
  *
  * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
  *
@@ -24,17 +24,18 @@
 	.align		6
 
 /*
- * chacha20_permute - permute one block
+ * chacha_permute - permute one block
  *
  * Permute one 64-byte block where the state matrix is stored in the four NEON
  * registers v0-v3.  It performs matrix operations on four words in parallel,
  * but requires shuffling to rearrange the words after each round.
  *
- * Clobbers: x3, x10, v4, v12
+ * The round count is given in w3.
+ *
+ * Clobbers: w3, x10, v4, v12
  */
-chacha20_permute:
+chacha_permute:
 
-	mov		x3, #10
 	adr		x10, ROT8
 	ld1		{v12.4s}, [x10]
 
@@ -97,16 +98,17 @@ chacha20_permute:
 	// x3 = shuffle32(x3, MASK(0, 3, 2, 1))
 	ext		v3.16b, v3.16b, v3.16b, #4
 
-	subs		x3, x3, #1
+	subs		w3, w3, #2
 	b.ne		.Ldoubleround
 
 	ret
-ENDPROC(chacha20_permute)
+ENDPROC(chacha_permute)
 
-ENTRY(chacha20_block_xor_neon)
+ENTRY(chacha_block_xor_neon)
 	// x0: Input state matrix, s
 	// x1: 1 data block output, o
 	// x2: 1 data block input, i
+	// w3: nrounds
 
 	stp		x29, x30, [sp, #-16]!
 	mov		x29, sp
@@ -115,7 +117,7 @@ ENTRY(chacha20_block_xor_neon)
 	ld1		{v0.4s-v3.4s}, [x0]
 	ld1		{v8.4s-v11.4s}, [x0]
 
-	bl		chacha20_permute
+	bl		chacha_permute
 
 	ld1		{v4.16b-v7.16b}, [x2]
 
@@ -139,42 +141,45 @@ ENTRY(chacha20_block_xor_neon)
 
 	ldp		x29, x30, [sp], #16
 	ret
-ENDPROC(chacha20_block_xor_neon)
+ENDPROC(chacha_block_xor_neon)
 
-ENTRY(hchacha20_block_neon)
+ENTRY(hchacha_block_neon)
 	// x0: Input state matrix, s
 	// x1: output (8 32-bit words)
+	// w2: nrounds
 
 	stp		x29, x30, [sp, #-16]!
 	mov		x29, sp
 
 	ld1		{v0.4s-v3.4s}, [x0]
 
-	bl		chacha20_permute
+	mov		w3, w2
+	bl		chacha_permute
 
 	st1		{v0.16b}, [x1], #16
 	st1		{v3.16b}, [x1]
 
 	ldp		x29, x30, [sp], #16
 	ret
-ENDPROC(hchacha20_block_neon)
+ENDPROC(hchacha_block_neon)
 
 	.align		6
-ENTRY(chacha20_4block_xor_neon)
+ENTRY(chacha_4block_xor_neon)
 	// x0: Input state matrix, s
 	// x1: 4 data blocks output, o
 	// x2: 4 data blocks input, i
+	// w3: nrounds
 
 	//
-	// This function encrypts four consecutive ChaCha20 blocks by loading
+	// This function encrypts four consecutive ChaCha blocks by loading
 	// the state matrix in NEON registers four times. The algorithm performs
 	// each operation on the corresponding word of each state matrix, hence
 	// requires no word shuffling. For final XORing step we transpose the
 	// matrix by interleaving 32- and then 64-bit words, which allows us to
 	// do XOR in NEON registers.
 	//
-	adr		x3, CTRINC		// ... and ROT8
-	ld1		{v30.4s-v31.4s}, [x3]
+	adr		x9, CTRINC		// ... and ROT8
+	ld1		{v30.4s-v31.4s}, [x9]
 
 	// x0..15[0-3] = s0..3[0..3]
 	mov		x4, x0
@@ -186,8 +191,6 @@ ENTRY(chacha20_4block_xor_neon)
 	// x12 += counter values 0-3
 	add		v12.4s, v12.4s, v30.4s
 
-	mov		x3, #10
-
 .Ldoubleround4:
 	// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
 	// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
@@ -361,7 +364,7 @@ ENTRY(chacha20_4block_xor_neon)
 	sri		v7.4s, v18.4s, #25
 	sri		v4.4s, v19.4s, #25
 
-	subs		x3, x3, #1
+	subs		w3, w3, #2
 	b.ne		.Ldoubleround4
 
 	ld4r		{v16.4s-v19.4s}, [x0], #16
@@ -475,7 +478,7 @@ ENTRY(chacha20_4block_xor_neon)
 	st1		{v28.16b-v31.16b}, [x1]
 
 	ret
-ENDPROC(chacha20_4block_xor_neon)
+ENDPROC(chacha_4block_xor_neon)
 
 CTRINC:	.word		0, 1, 2, 3
 ROT8:	.word		0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f
diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
similarity index 71%
rename from arch/arm64/crypto/chacha20-neon-glue.c
rename to arch/arm64/crypto/chacha-neon-glue.c
index a5b9cbc0c4de..4d992029b912 100644
--- a/arch/arm64/crypto/chacha20-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -1,5 +1,6 @@
 /*
- * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
+ * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
  *
  * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
  *
@@ -28,18 +29,20 @@
 #include <asm/neon.h>
 #include <asm/simd.h>
 
-asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
-asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
-asmlinkage void hchacha20_block_neon(const u32 *state, u32 *out);
+asmlinkage void chacha_block_xor_neon(u32 *state, u8 *dst, const u8 *src,
+				      int nrounds);
+asmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src,
+				       int nrounds);
+asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
 
-static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
-			    unsigned int bytes)
+static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
+			  unsigned int bytes, int nrounds)
 {
 	u8 buf[CHACHA_BLOCK_SIZE];
 
 	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
 		kernel_neon_begin();
-		chacha20_4block_xor_neon(state, dst, src);
+		chacha_4block_xor_neon(state, dst, src, nrounds);
 		kernel_neon_end();
 		bytes -= CHACHA_BLOCK_SIZE * 4;
 		src += CHACHA_BLOCK_SIZE * 4;
@@ -52,7 +55,7 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
 
 	kernel_neon_begin();
 	while (bytes >= CHACHA_BLOCK_SIZE) {
-		chacha20_block_xor_neon(state, dst, src);
+		chacha_block_xor_neon(state, dst, src, nrounds);
 		bytes -= CHACHA_BLOCK_SIZE;
 		src += CHACHA_BLOCK_SIZE;
 		dst += CHACHA_BLOCK_SIZE;
@@ -60,14 +63,14 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
 	}
 	if (bytes) {
 		memcpy(buf, src, bytes);
-		chacha20_block_xor_neon(state, buf, buf);
+		chacha_block_xor_neon(state, buf, buf, nrounds);
 		memcpy(dst, buf, bytes);
 	}
 	kernel_neon_end();
 }
 
-static int chacha20_neon_stream_xor(struct skcipher_request *req,
-				    struct chacha_ctx *ctx, u8 *iv)
+static int chacha_neon_stream_xor(struct skcipher_request *req,
+				  struct chacha_ctx *ctx, u8 *iv)
 {
 	struct skcipher_walk walk;
 	u32 state[16];
@@ -83,15 +86,15 @@ static int chacha20_neon_stream_xor(struct skcipher_request *req,
 		if (nbytes < walk.total)
 			nbytes = round_down(nbytes, walk.stride);
 
-		chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
-				nbytes);
+		chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
+			      nbytes, ctx->nrounds);
 		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
 	return err;
 }
 
-static int chacha20_neon(struct skcipher_request *req)
+static int chacha_neon(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -99,10 +102,10 @@ static int chacha20_neon(struct skcipher_request *req)
 	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
 		return crypto_chacha_crypt(req);
 
-	return chacha20_neon_stream_xor(req, ctx, req->iv);
+	return chacha_neon_stream_xor(req, ctx, req->iv);
 }
 
-static int xchacha20_neon(struct skcipher_request *req)
+static int xchacha_neon(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -116,12 +119,13 @@ static int xchacha20_neon(struct skcipher_request *req)
 	crypto_chacha_init(state, ctx, req->iv);
 
 	kernel_neon_begin();
-	hchacha20_block_neon(state, subctx.key);
+	hchacha_block_neon(state, subctx.key, ctx->nrounds);
 	kernel_neon_end();
+	subctx.nrounds = ctx->nrounds;
 
 	memcpy(&real_iv[0], req->iv + 24, 8);
 	memcpy(&real_iv[8], req->iv + 16, 8);
-	return chacha20_neon_stream_xor(req, &subctx, real_iv);
+	return chacha_neon_stream_xor(req, &subctx, real_iv);
 }
 
 static struct skcipher_alg algs[] = {
@@ -139,8 +143,8 @@ static struct skcipher_alg algs[] = {
 		.chunksize		= CHACHA_BLOCK_SIZE,
 		.walksize		= 4 * CHACHA_BLOCK_SIZE,
 		.setkey			= crypto_chacha20_setkey,
-		.encrypt		= chacha20_neon,
-		.decrypt		= chacha20_neon,
+		.encrypt		= chacha_neon,
+		.decrypt		= chacha_neon,
 	}, {
 		.base.cra_name		= "xchacha20",
 		.base.cra_driver_name	= "xchacha20-neon",
@@ -155,12 +159,12 @@ static struct skcipher_alg algs[] = {
 		.chunksize		= CHACHA_BLOCK_SIZE,
 		.walksize		= 4 * CHACHA_BLOCK_SIZE,
 		.setkey			= crypto_chacha20_setkey,
-		.encrypt		= xchacha20_neon,
-		.decrypt		= xchacha20_neon,
+		.encrypt		= xchacha_neon,
+		.decrypt		= xchacha_neon,
 	}
 };
 
-static int __init chacha20_simd_mod_init(void)
+static int __init chacha_simd_mod_init(void)
 {
 	if (!(elf_hwcap & HWCAP_ASIMD))
 		return -ENODEV;
@@ -168,14 +172,15 @@ static int __init chacha20_simd_mod_init(void)
 	return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
 }
 
-static void __exit chacha20_simd_mod_fini(void)
+static void __exit chacha_simd_mod_fini(void)
 {
 	crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
 }
 
-module_init(chacha20_simd_mod_init);
-module_exit(chacha20_simd_mod_fini);
+module_init(chacha_simd_mod_init);
+module_exit(chacha_simd_mod_fini);
 
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS_CRYPTO("chacha20");
-- 
2.19.2

WARNING: multiple messages have this Message-ID (diff)
From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: "Jason A . Donenfeld" <Jason@zx2c4.com>,
	Ard Biesheuvel <ard.biesheuvel@linaro.org>,
	linux-kernel@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	Paul Crowley <paulcrowley@google.com>
Subject: [PATCH v2 3/4] crypto: arm64/chacha20 - refactor to allow varying number of rounds
Date: Mon,  3 Dec 2018 19:52:51 -0800	[thread overview]
Message-ID: <20181204035252.14853-4-ebiggers@kernel.org> (raw)
In-Reply-To: <20181204035252.14853-1-ebiggers@kernel.org>

From: Eric Biggers <ebiggers@google.com>

In preparation for adding XChaCha12 support, rename/refactor the ARM64
NEON implementation of ChaCha20 to support different numbers of rounds.

Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/arm64/crypto/Makefile                    |  4 +-
 ...hacha20-neon-core.S => chacha-neon-core.S} | 45 ++++++++-------
 ...hacha20-neon-glue.c => chacha-neon-glue.c} | 57 ++++++++++---------
 3 files changed, 57 insertions(+), 49 deletions(-)
 rename arch/arm64/crypto/{chacha20-neon-core.S => chacha-neon-core.S} (94%)
 rename arch/arm64/crypto/{chacha20-neon-glue.c => chacha-neon-glue.c} (71%)

diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 125dbb10a93e..a4ffd9fe3265 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -50,8 +50,8 @@ sha256-arm64-y := sha256-glue.o sha256-core.o
 obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o
 sha512-arm64-y := sha512-glue.o sha512-core.o
 
-obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
-chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
+obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
+chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
 
 obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
 nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
diff --git a/arch/arm64/crypto/chacha20-neon-core.S b/arch/arm64/crypto/chacha-neon-core.S
similarity index 94%
rename from arch/arm64/crypto/chacha20-neon-core.S
rename to arch/arm64/crypto/chacha-neon-core.S
index 0571e45a1a0a..3d3a12db5204 100644
--- a/arch/arm64/crypto/chacha20-neon-core.S
+++ b/arch/arm64/crypto/chacha-neon-core.S
@@ -1,5 +1,5 @@
 /*
- * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
+ * ChaCha/XChaCha NEON helper functions
  *
  * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
  *
@@ -24,17 +24,18 @@
 	.align		6
 
 /*
- * chacha20_permute - permute one block
+ * chacha_permute - permute one block
  *
  * Permute one 64-byte block where the state matrix is stored in the four NEON
  * registers v0-v3.  It performs matrix operations on four words in parallel,
  * but requires shuffling to rearrange the words after each round.
  *
- * Clobbers: x3, x10, v4, v12
+ * The round count is given in w3.
+ *
+ * Clobbers: w3, x10, v4, v12
  */
-chacha20_permute:
+chacha_permute:
 
-	mov		x3, #10
 	adr		x10, ROT8
 	ld1		{v12.4s}, [x10]
 
@@ -97,16 +98,17 @@ chacha20_permute:
 	// x3 = shuffle32(x3, MASK(0, 3, 2, 1))
 	ext		v3.16b, v3.16b, v3.16b, #4
 
-	subs		x3, x3, #1
+	subs		w3, w3, #2
 	b.ne		.Ldoubleround
 
 	ret
-ENDPROC(chacha20_permute)
+ENDPROC(chacha_permute)
 
-ENTRY(chacha20_block_xor_neon)
+ENTRY(chacha_block_xor_neon)
 	// x0: Input state matrix, s
 	// x1: 1 data block output, o
 	// x2: 1 data block input, i
+	// w3: nrounds
 
 	stp		x29, x30, [sp, #-16]!
 	mov		x29, sp
@@ -115,7 +117,7 @@ ENTRY(chacha20_block_xor_neon)
 	ld1		{v0.4s-v3.4s}, [x0]
 	ld1		{v8.4s-v11.4s}, [x0]
 
-	bl		chacha20_permute
+	bl		chacha_permute
 
 	ld1		{v4.16b-v7.16b}, [x2]
 
@@ -139,42 +141,45 @@ ENTRY(chacha20_block_xor_neon)
 
 	ldp		x29, x30, [sp], #16
 	ret
-ENDPROC(chacha20_block_xor_neon)
+ENDPROC(chacha_block_xor_neon)
 
-ENTRY(hchacha20_block_neon)
+ENTRY(hchacha_block_neon)
 	// x0: Input state matrix, s
 	// x1: output (8 32-bit words)
+	// w2: nrounds
 
 	stp		x29, x30, [sp, #-16]!
 	mov		x29, sp
 
 	ld1		{v0.4s-v3.4s}, [x0]
 
-	bl		chacha20_permute
+	mov		w3, w2
+	bl		chacha_permute
 
 	st1		{v0.16b}, [x1], #16
 	st1		{v3.16b}, [x1]
 
 	ldp		x29, x30, [sp], #16
 	ret
-ENDPROC(hchacha20_block_neon)
+ENDPROC(hchacha_block_neon)
 
 	.align		6
-ENTRY(chacha20_4block_xor_neon)
+ENTRY(chacha_4block_xor_neon)
 	// x0: Input state matrix, s
 	// x1: 4 data blocks output, o
 	// x2: 4 data blocks input, i
+	// w3: nrounds
 
 	//
-	// This function encrypts four consecutive ChaCha20 blocks by loading
+	// This function encrypts four consecutive ChaCha blocks by loading
 	// the state matrix in NEON registers four times. The algorithm performs
 	// each operation on the corresponding word of each state matrix, hence
 	// requires no word shuffling. For final XORing step we transpose the
 	// matrix by interleaving 32- and then 64-bit words, which allows us to
 	// do XOR in NEON registers.
 	//
-	adr		x3, CTRINC		// ... and ROT8
-	ld1		{v30.4s-v31.4s}, [x3]
+	adr		x9, CTRINC		// ... and ROT8
+	ld1		{v30.4s-v31.4s}, [x9]
 
 	// x0..15[0-3] = s0..3[0..3]
 	mov		x4, x0
@@ -186,8 +191,6 @@ ENTRY(chacha20_4block_xor_neon)
 	// x12 += counter values 0-3
 	add		v12.4s, v12.4s, v30.4s
 
-	mov		x3, #10
-
 .Ldoubleround4:
 	// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
 	// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
@@ -361,7 +364,7 @@ ENTRY(chacha20_4block_xor_neon)
 	sri		v7.4s, v18.4s, #25
 	sri		v4.4s, v19.4s, #25
 
-	subs		x3, x3, #1
+	subs		w3, w3, #2
 	b.ne		.Ldoubleround4
 
 	ld4r		{v16.4s-v19.4s}, [x0], #16
@@ -475,7 +478,7 @@ ENTRY(chacha20_4block_xor_neon)
 	st1		{v28.16b-v31.16b}, [x1]
 
 	ret
-ENDPROC(chacha20_4block_xor_neon)
+ENDPROC(chacha_4block_xor_neon)
 
 CTRINC:	.word		0, 1, 2, 3
 ROT8:	.word		0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f
diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
similarity index 71%
rename from arch/arm64/crypto/chacha20-neon-glue.c
rename to arch/arm64/crypto/chacha-neon-glue.c
index a5b9cbc0c4de..4d992029b912 100644
--- a/arch/arm64/crypto/chacha20-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -1,5 +1,6 @@
 /*
- * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
+ * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
  *
  * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
  *
@@ -28,18 +29,20 @@
 #include <asm/neon.h>
 #include <asm/simd.h>
 
-asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
-asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
-asmlinkage void hchacha20_block_neon(const u32 *state, u32 *out);
+asmlinkage void chacha_block_xor_neon(u32 *state, u8 *dst, const u8 *src,
+				      int nrounds);
+asmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src,
+				       int nrounds);
+asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
 
-static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
-			    unsigned int bytes)
+static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
+			  unsigned int bytes, int nrounds)
 {
 	u8 buf[CHACHA_BLOCK_SIZE];
 
 	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
 		kernel_neon_begin();
-		chacha20_4block_xor_neon(state, dst, src);
+		chacha_4block_xor_neon(state, dst, src, nrounds);
 		kernel_neon_end();
 		bytes -= CHACHA_BLOCK_SIZE * 4;
 		src += CHACHA_BLOCK_SIZE * 4;
@@ -52,7 +55,7 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
 
 	kernel_neon_begin();
 	while (bytes >= CHACHA_BLOCK_SIZE) {
-		chacha20_block_xor_neon(state, dst, src);
+		chacha_block_xor_neon(state, dst, src, nrounds);
 		bytes -= CHACHA_BLOCK_SIZE;
 		src += CHACHA_BLOCK_SIZE;
 		dst += CHACHA_BLOCK_SIZE;
@@ -60,14 +63,14 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
 	}
 	if (bytes) {
 		memcpy(buf, src, bytes);
-		chacha20_block_xor_neon(state, buf, buf);
+		chacha_block_xor_neon(state, buf, buf, nrounds);
 		memcpy(dst, buf, bytes);
 	}
 	kernel_neon_end();
 }
 
-static int chacha20_neon_stream_xor(struct skcipher_request *req,
-				    struct chacha_ctx *ctx, u8 *iv)
+static int chacha_neon_stream_xor(struct skcipher_request *req,
+				  struct chacha_ctx *ctx, u8 *iv)
 {
 	struct skcipher_walk walk;
 	u32 state[16];
@@ -83,15 +86,15 @@ static int chacha20_neon_stream_xor(struct skcipher_request *req,
 		if (nbytes < walk.total)
 			nbytes = round_down(nbytes, walk.stride);
 
-		chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
-				nbytes);
+		chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
+			      nbytes, ctx->nrounds);
 		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
 	return err;
 }
 
-static int chacha20_neon(struct skcipher_request *req)
+static int chacha_neon(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -99,10 +102,10 @@ static int chacha20_neon(struct skcipher_request *req)
 	if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
 		return crypto_chacha_crypt(req);
 
-	return chacha20_neon_stream_xor(req, ctx, req->iv);
+	return chacha_neon_stream_xor(req, ctx, req->iv);
 }
 
-static int xchacha20_neon(struct skcipher_request *req)
+static int xchacha_neon(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -116,12 +119,13 @@ static int xchacha20_neon(struct skcipher_request *req)
 	crypto_chacha_init(state, ctx, req->iv);
 
 	kernel_neon_begin();
-	hchacha20_block_neon(state, subctx.key);
+	hchacha_block_neon(state, subctx.key, ctx->nrounds);
 	kernel_neon_end();
+	subctx.nrounds = ctx->nrounds;
 
 	memcpy(&real_iv[0], req->iv + 24, 8);
 	memcpy(&real_iv[8], req->iv + 16, 8);
-	return chacha20_neon_stream_xor(req, &subctx, real_iv);
+	return chacha_neon_stream_xor(req, &subctx, real_iv);
 }
 
 static struct skcipher_alg algs[] = {
@@ -139,8 +143,8 @@ static struct skcipher_alg algs[] = {
 		.chunksize		= CHACHA_BLOCK_SIZE,
 		.walksize		= 4 * CHACHA_BLOCK_SIZE,
 		.setkey			= crypto_chacha20_setkey,
-		.encrypt		= chacha20_neon,
-		.decrypt		= chacha20_neon,
+		.encrypt		= chacha_neon,
+		.decrypt		= chacha_neon,
 	}, {
 		.base.cra_name		= "xchacha20",
 		.base.cra_driver_name	= "xchacha20-neon",
@@ -155,12 +159,12 @@ static struct skcipher_alg algs[] = {
 		.chunksize		= CHACHA_BLOCK_SIZE,
 		.walksize		= 4 * CHACHA_BLOCK_SIZE,
 		.setkey			= crypto_chacha20_setkey,
-		.encrypt		= xchacha20_neon,
-		.decrypt		= xchacha20_neon,
+		.encrypt		= xchacha_neon,
+		.decrypt		= xchacha_neon,
 	}
 };
 
-static int __init chacha20_simd_mod_init(void)
+static int __init chacha_simd_mod_init(void)
 {
 	if (!(elf_hwcap & HWCAP_ASIMD))
 		return -ENODEV;
@@ -168,14 +172,15 @@ static int __init chacha20_simd_mod_init(void)
 	return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
 }
 
-static void __exit chacha20_simd_mod_fini(void)
+static void __exit chacha_simd_mod_fini(void)
 {
 	crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
 }
 
-module_init(chacha20_simd_mod_init);
-module_exit(chacha20_simd_mod_fini);
+module_init(chacha_simd_mod_init);
+module_exit(chacha_simd_mod_fini);
 
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS_CRYPTO("chacha20");
-- 
2.19.2


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  parent reply	other threads:[~2018-12-04  3:56 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-04  3:52 [PATCH v2 0/4] crypto: ARM64 NEON optimized XChaCha and NHPoly1305 (for Adiantum) Eric Biggers
2018-12-04  3:52 ` Eric Biggers
2018-12-04  3:52 ` [PATCH v2 1/4] crypto: arm64/nhpoly1305 - add NEON-accelerated NHPoly1305 Eric Biggers
2018-12-04  3:52   ` Eric Biggers
2018-12-04  3:52 ` [PATCH v2 2/4] crypto: arm64/chacha20 - add XChaCha20 support Eric Biggers
2018-12-04  3:52   ` Eric Biggers
2018-12-04 14:51   ` Ard Biesheuvel
2018-12-04 14:51     ` Ard Biesheuvel
2018-12-04  3:52 ` Eric Biggers [this message]
2018-12-04  3:52   ` [PATCH v2 3/4] crypto: arm64/chacha20 - refactor to allow varying number of rounds Eric Biggers
2018-12-04  3:52 ` [PATCH v2 4/4] crypto: arm64/chacha - add XChaCha12 support Eric Biggers
2018-12-04  3:52   ` Eric Biggers
2018-12-13 10:31 ` [PATCH v2 0/4] crypto: ARM64 NEON optimized XChaCha and NHPoly1305 (for Adiantum) Herbert Xu
2018-12-13 10:31   ` Herbert Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181204035252.14853-4-ebiggers@kernel.org \
    --to=ebiggers@kernel.org \
    --cc=Jason@zx2c4.com \
    --cc=ard.biesheuvel@linaro.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=paulcrowley@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.