From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: Paul Crowley <paulcrowley@google.com>,
Martin Willi <martin@strongswan.org>,
Milan Broz <gmazyland@gmail.com>,
"Jason A . Donenfeld" <Jason@zx2c4.com>,
linux-kernel@vger.kernel.org
Subject: [PATCH 5/6] crypto: x86/chacha20 - refactor to allow varying number of rounds
Date: Tue, 27 Nov 2018 22:44:44 -0800 [thread overview]
Message-ID: <20181128064445.3813-6-ebiggers@kernel.org> (raw)
In-Reply-To: <20181128064445.3813-1-ebiggers@kernel.org>
From: Eric Biggers <ebiggers@google.com>
In preparation for adding XChaCha12 support, rename/refactor the x86_64
SIMD implementations of ChaCha20 to support different numbers of rounds.
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/x86/crypto/Makefile | 6 +-
...a20-avx2-x86_64.S => chacha-avx2-x86_64.S} | 33 +++---
...0-ssse3-x86_64.S => chacha-ssse3-x86_64.S} | 41 ++++---
.../crypto/{chacha20_glue.c => chacha_glue.c} | 110 +++++++++---------
4 files changed, 97 insertions(+), 93 deletions(-)
rename arch/x86/crypto/{chacha20-avx2-x86_64.S => chacha-avx2-x86_64.S} (97%)
rename arch/x86/crypto/{chacha20-ssse3-x86_64.S => chacha-ssse3-x86_64.S} (96%)
rename arch/x86/crypto/{chacha20_glue.c => chacha_glue.c} (56%)
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 006433da45f8c..164b4e792e8d2 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -23,7 +23,7 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
-obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o
+obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha-x86_64.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
@@ -77,7 +77,7 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
-chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o
+chacha-x86_64-y := chacha-ssse3-x86_64.o chacha_glue.o
serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
@@ -102,7 +102,7 @@ endif
ifeq ($(avx2_supported),yes)
camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
- chacha20-x86_64-y += chacha20-avx2-x86_64.o
+ chacha-x86_64-y += chacha-avx2-x86_64.o
serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
diff --git a/arch/x86/crypto/chacha20-avx2-x86_64.S b/arch/x86/crypto/chacha-avx2-x86_64.S
similarity index 97%
rename from arch/x86/crypto/chacha20-avx2-x86_64.S
rename to arch/x86/crypto/chacha-avx2-x86_64.S
index b6ab082be6572..32da1be9a3550 100644
--- a/arch/x86/crypto/chacha20-avx2-x86_64.S
+++ b/arch/x86/crypto/chacha-avx2-x86_64.S
@@ -1,5 +1,5 @@
/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, x64 AVX2 functions
+ * ChaCha 256-bit cipher algorithm, x64 AVX2 functions
*
* Copyright (C) 2015 Martin Willi
*
@@ -38,13 +38,14 @@ CTR4BL: .octa 0x00000000000000000000000000000002
.text
-ENTRY(chacha20_2block_xor_avx2)
+ENTRY(chacha_2block_xor_avx2)
# %rdi: Input state matrix, s
# %rsi: up to 2 data blocks output, o
# %rdx: up to 2 data blocks input, i
# %rcx: input/output length in bytes
+ # %r8d: nrounds
- # This function encrypts two ChaCha20 blocks by loading the state
+ # This function encrypts two ChaCha blocks by loading the state
# matrix twice across four AVX registers. It performs matrix operations
# on four words in each matrix in parallel, but requires shuffling to
# rearrange the words after each round.
@@ -68,7 +69,6 @@ ENTRY(chacha20_2block_xor_avx2)
vmovdqa ROT16(%rip),%ymm5
mov %rcx,%rax
- mov $10,%ecx
.Ldoubleround:
@@ -138,7 +138,7 @@ ENTRY(chacha20_2block_xor_avx2)
# x3 = shuffle32(x3, MASK(0, 3, 2, 1))
vpshufd $0x39,%ymm3,%ymm3
- dec %ecx
+ sub $2,%r8d
jnz .Ldoubleround
# o0 = i0 ^ (x0 + s0)
@@ -228,15 +228,16 @@ ENTRY(chacha20_2block_xor_avx2)
lea -8(%r10),%rsp
jmp .Ldone2
-ENDPROC(chacha20_2block_xor_avx2)
+ENDPROC(chacha_2block_xor_avx2)
-ENTRY(chacha20_4block_xor_avx2)
+ENTRY(chacha_4block_xor_avx2)
# %rdi: Input state matrix, s
# %rsi: up to 4 data blocks output, o
# %rdx: up to 4 data blocks input, i
# %rcx: input/output length in bytes
+ # %r8d: nrounds
- # This function encrypts four ChaCha20 block by loading the state
+ # This function encrypts four ChaCha block by loading the state
# matrix four times across eight AVX registers. It performs matrix
# operations on four words in two matrices in parallel, sequentially
# to the operations on the four words of the other two matrices. The
@@ -269,7 +270,6 @@ ENTRY(chacha20_4block_xor_avx2)
vmovdqa ROT16(%rip),%ymm9
mov %rcx,%rax
- mov $10,%ecx
.Ldoubleround4:
@@ -389,7 +389,7 @@ ENTRY(chacha20_4block_xor_avx2)
vpshufd $0x39,%ymm3,%ymm3
vpshufd $0x39,%ymm7,%ymm7
- dec %ecx
+ sub $2,%r8d
jnz .Ldoubleround4
# o0 = i0 ^ (x0 + s0), first block
@@ -533,15 +533,16 @@ ENTRY(chacha20_4block_xor_avx2)
lea -8(%r10),%rsp
jmp .Ldone4
-ENDPROC(chacha20_4block_xor_avx2)
+ENDPROC(chacha_4block_xor_avx2)
-ENTRY(chacha20_8block_xor_avx2)
+ENTRY(chacha_8block_xor_avx2)
# %rdi: Input state matrix, s
# %rsi: up to 8 data blocks output, o
# %rdx: up to 8 data blocks input, i
# %rcx: input/output length in bytes
+ # %r8d: nrounds
- # This function encrypts eight consecutive ChaCha20 blocks by loading
+ # This function encrypts eight consecutive ChaCha blocks by loading
# the state matrix in AVX registers eight times. As we need some
# scratch registers, we save the first four registers on the stack. The
# algorithm performs each operation on the corresponding word of each
@@ -588,8 +589,6 @@ ENTRY(chacha20_8block_xor_avx2)
# x12 += counter values 0-3
vpaddd %ymm1,%ymm12,%ymm12
- mov $10,%ecx
-
.Ldoubleround8:
# x0 += x4, x12 = rotl32(x12 ^ x0, 16)
vpaddd 0x00(%rsp),%ymm4,%ymm0
@@ -775,7 +774,7 @@ ENTRY(chacha20_8block_xor_avx2)
vpsrld $25,%ymm4,%ymm4
vpor %ymm0,%ymm4,%ymm4
- dec %ecx
+ sub $2,%r8d
jnz .Ldoubleround8
# x0..15[0-3] += s[0..15]
@@ -1023,4 +1022,4 @@ ENTRY(chacha20_8block_xor_avx2)
jmp .Ldone8
-ENDPROC(chacha20_8block_xor_avx2)
+ENDPROC(chacha_8block_xor_avx2)
diff --git a/arch/x86/crypto/chacha20-ssse3-x86_64.S b/arch/x86/crypto/chacha-ssse3-x86_64.S
similarity index 96%
rename from arch/x86/crypto/chacha20-ssse3-x86_64.S
rename to arch/x86/crypto/chacha-ssse3-x86_64.S
index 45e4ccdd9c98b..613f80ae98576 100644
--- a/arch/x86/crypto/chacha20-ssse3-x86_64.S
+++ b/arch/x86/crypto/chacha-ssse3-x86_64.S
@@ -1,5 +1,5 @@
/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
+ * ChaCha 256-bit cipher algorithm, x64 SSSE3 functions
*
* Copyright (C) 2015 Martin Willi
*
@@ -24,7 +24,7 @@ CTRINC: .octa 0x00000003000000020000000100000000
.text
/*
- * chacha20_permute - permute one block
+ * chacha_permute - permute one block
*
* Permute one 64-byte block where the state matrix is in %xmm0-%xmm3. This
* function performs matrix operations on four words in parallel, but requires
@@ -32,13 +32,14 @@ CTRINC: .octa 0x00000003000000020000000100000000
* done with the slightly better performing SSSE3 byte shuffling, 7/12-bit word
* rotation uses traditional shift+OR.
*
- * Clobbers: %ecx, %xmm4-%xmm7
+ * The round count is given in %r8d.
+ *
+ * Clobbers: %r8d, %xmm4-%xmm7
*/
-chacha20_permute:
+chacha_permute:
movdqa ROT8(%rip),%xmm4
movdqa ROT16(%rip),%xmm5
- mov $10,%ecx
.Ldoubleround:
# x0 += x1, x3 = rotl32(x3 ^ x0, 16)
@@ -107,17 +108,18 @@ chacha20_permute:
# x3 = shuffle32(x3, MASK(0, 3, 2, 1))
pshufd $0x39,%xmm3,%xmm3
- dec %ecx
+ sub $2,%r8d
jnz .Ldoubleround
ret
-ENDPROC(chacha20_permute)
+ENDPROC(chacha_permute)
-ENTRY(chacha20_block_xor_ssse3)
+ENTRY(chacha_block_xor_ssse3)
# %rdi: Input state matrix, s
# %rsi: up to 1 data block output, o
# %rdx: up to 1 data block input, i
# %rcx: input/output length in bytes
+ # %r8d: nrounds
# x0..3 = s0..3
movdqa 0x00(%rdi),%xmm0
@@ -130,7 +132,7 @@ ENTRY(chacha20_block_xor_ssse3)
movdqa %xmm3,%xmm11
mov %rcx,%rax
- call chacha20_permute
+ call chacha_permute
# o0 = i0 ^ (x0 + s0)
paddd %xmm8,%xmm0
@@ -196,32 +198,35 @@ ENTRY(chacha20_block_xor_ssse3)
lea -8(%r10),%rsp
jmp .Ldone
-ENDPROC(chacha20_block_xor_ssse3)
+ENDPROC(chacha_block_xor_ssse3)
-ENTRY(hchacha20_block_ssse3)
+ENTRY(hchacha_block_ssse3)
# %rdi: Input state matrix, s
# %rsi: output (8 32-bit words)
+ # %edx: nrounds
movdqa 0x00(%rdi),%xmm0
movdqa 0x10(%rdi),%xmm1
movdqa 0x20(%rdi),%xmm2
movdqa 0x30(%rdi),%xmm3
- call chacha20_permute
+ mov %edx,%r8d
+ call chacha_permute
movdqu %xmm0,0x00(%rsi)
movdqu %xmm3,0x10(%rsi)
ret
-ENDPROC(hchacha20_block_ssse3)
+ENDPROC(hchacha_block_ssse3)
-ENTRY(chacha20_4block_xor_ssse3)
+ENTRY(chacha_4block_xor_ssse3)
# %rdi: Input state matrix, s
# %rsi: up to 4 data blocks output, o
# %rdx: up to 4 data blocks input, i
# %rcx: input/output length in bytes
+ # %r8d: nrounds
- # This function encrypts four consecutive ChaCha20 blocks by loading the
+ # This function encrypts four consecutive ChaCha blocks by loading the
# the state matrix in SSE registers four times. As we need some scratch
# registers, we save the first four registers on the stack. The
# algorithm performs each operation on the corresponding word of each
@@ -274,8 +279,6 @@ ENTRY(chacha20_4block_xor_ssse3)
# x12 += counter values 0-3
paddd %xmm1,%xmm12
- mov $10,%ecx
-
.Ldoubleround4:
# x0 += x4, x12 = rotl32(x12 ^ x0, 16)
movdqa 0x00(%rsp),%xmm0
@@ -493,7 +496,7 @@ ENTRY(chacha20_4block_xor_ssse3)
psrld $25,%xmm4
por %xmm0,%xmm4
- dec %ecx
+ sub $2,%r8d
jnz .Ldoubleround4
# x0[0-3] += s0[0]
@@ -784,4 +787,4 @@ ENTRY(chacha20_4block_xor_ssse3)
jmp .Ldone4
-ENDPROC(chacha20_4block_xor_ssse3)
+ENDPROC(chacha_4block_xor_ssse3)
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha_glue.c
similarity index 56%
rename from arch/x86/crypto/chacha20_glue.c
rename to arch/x86/crypto/chacha_glue.c
index ca85b5d2c4751..c643993a29c9f 100644
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -1,5 +1,6 @@
/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
+ * x64 SIMD accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
*
* Copyright (C) 2015 Martin Willi
*
@@ -17,85 +18,85 @@
#include <asm/fpu/api.h>
#include <asm/simd.h>
-#define CHACHA20_STATE_ALIGN 16
+#define CHACHA_STATE_ALIGN 16
-asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
- unsigned int len);
-asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
- unsigned int len);
-asmlinkage void hchacha20_block_ssse3(const u32 *state, u32 *out);
+asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds);
#ifdef CONFIG_AS_AVX2
-asmlinkage void chacha20_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
- unsigned int len);
-asmlinkage void chacha20_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
- unsigned int len);
-asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
- unsigned int len);
-static bool chacha20_use_avx2;
+asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+static bool chacha_use_avx2;
#endif
-static unsigned int chacha20_advance(unsigned int len, unsigned int maxblocks)
+static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
{
len = min(len, maxblocks * CHACHA_BLOCK_SIZE);
return round_up(len, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE;
}
-static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes)
+static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
{
#ifdef CONFIG_AS_AVX2
- if (chacha20_use_avx2) {
+ if (chacha_use_avx2) {
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
- chacha20_8block_xor_avx2(state, dst, src, bytes);
+ chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
bytes -= CHACHA_BLOCK_SIZE * 8;
src += CHACHA_BLOCK_SIZE * 8;
dst += CHACHA_BLOCK_SIZE * 8;
state[12] += 8;
}
if (bytes > CHACHA_BLOCK_SIZE * 4) {
- chacha20_8block_xor_avx2(state, dst, src, bytes);
- state[12] += chacha20_advance(bytes, 8);
+ chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
+ state[12] += chacha_advance(bytes, 8);
return;
}
if (bytes > CHACHA_BLOCK_SIZE * 2) {
- chacha20_4block_xor_avx2(state, dst, src, bytes);
- state[12] += chacha20_advance(bytes, 4);
+ chacha_4block_xor_avx2(state, dst, src, bytes, nrounds);
+ state[12] += chacha_advance(bytes, 4);
return;
}
if (bytes > CHACHA_BLOCK_SIZE) {
- chacha20_2block_xor_avx2(state, dst, src, bytes);
- state[12] += chacha20_advance(bytes, 2);
+ chacha_2block_xor_avx2(state, dst, src, bytes, nrounds);
+ state[12] += chacha_advance(bytes, 2);
return;
}
}
#endif
while (bytes >= CHACHA_BLOCK_SIZE * 4) {
- chacha20_4block_xor_ssse3(state, dst, src, bytes);
+ chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
bytes -= CHACHA_BLOCK_SIZE * 4;
src += CHACHA_BLOCK_SIZE * 4;
dst += CHACHA_BLOCK_SIZE * 4;
state[12] += 4;
}
if (bytes > CHACHA_BLOCK_SIZE) {
- chacha20_4block_xor_ssse3(state, dst, src, bytes);
- state[12] += chacha20_advance(bytes, 4);
+ chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
+ state[12] += chacha_advance(bytes, 4);
return;
}
if (bytes) {
- chacha20_block_xor_ssse3(state, dst, src, bytes);
+ chacha_block_xor_ssse3(state, dst, src, bytes, nrounds);
state[12]++;
}
}
-static int chacha20_simd_stream_xor(struct skcipher_request *req,
- struct chacha_ctx *ctx, u8 *iv)
+static int chacha_simd_stream_xor(struct skcipher_request *req,
+ struct chacha_ctx *ctx, u8 *iv)
{
struct skcipher_walk walk;
u32 *state, state_buf[16 + 2] __aligned(8);
int err;
- BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16);
- state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN);
+ BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
+ state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
err = skcipher_walk_virt(&walk, req, false);
@@ -108,8 +109,8 @@ static int chacha20_simd_stream_xor(struct skcipher_request *req,
nbytes = round_down(nbytes, walk.stride);
kernel_fpu_begin();
- chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes);
+ chacha_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes, ctx->nrounds);
kernel_fpu_end();
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
@@ -118,7 +119,7 @@ static int chacha20_simd_stream_xor(struct skcipher_request *req,
return err;
}
-static int chacha20_simd(struct skcipher_request *req)
+static int chacha_simd(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -126,10 +127,10 @@ static int chacha20_simd(struct skcipher_request *req)
if (req->cryptlen <= CHACHA_BLOCK_SIZE || !irq_fpu_usable())
return crypto_chacha_crypt(req);
- return chacha20_simd_stream_xor(req, ctx, req->iv);
+ return chacha_simd_stream_xor(req, ctx, req->iv);
}
-static int xchacha20_simd(struct skcipher_request *req)
+static int xchacha_simd(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -140,17 +141,18 @@ static int xchacha20_simd(struct skcipher_request *req)
if (req->cryptlen <= CHACHA_BLOCK_SIZE || !irq_fpu_usable())
return crypto_xchacha_crypt(req);
- BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16);
- state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN);
+ BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
+ state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
crypto_chacha_init(state, ctx, req->iv);
kernel_fpu_begin();
- hchacha20_block_ssse3(state, subctx.key);
+ hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
kernel_fpu_end();
+ subctx.nrounds = ctx->nrounds;
memcpy(&real_iv[0], req->iv + 24, 8);
memcpy(&real_iv[8], req->iv + 16, 8);
- return chacha20_simd_stream_xor(req, &subctx, real_iv);
+ return chacha_simd_stream_xor(req, &subctx, real_iv);
}
static struct skcipher_alg algs[] = {
@@ -167,8 +169,8 @@ static struct skcipher_alg algs[] = {
.ivsize = CHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
.setkey = crypto_chacha20_setkey,
- .encrypt = chacha20_simd,
- .decrypt = chacha20_simd,
+ .encrypt = chacha_simd,
+ .decrypt = chacha_simd,
}, {
.base.cra_name = "xchacha20",
.base.cra_driver_name = "xchacha20-simd",
@@ -182,35 +184,35 @@ static struct skcipher_alg algs[] = {
.ivsize = XCHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
.setkey = crypto_chacha20_setkey,
- .encrypt = xchacha20_simd,
- .decrypt = xchacha20_simd,
+ .encrypt = xchacha_simd,
+ .decrypt = xchacha_simd,
},
};
-static int __init chacha20_simd_mod_init(void)
+static int __init chacha_simd_mod_init(void)
{
if (!boot_cpu_has(X86_FEATURE_SSSE3))
return -ENODEV;
#ifdef CONFIG_AS_AVX2
- chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
- boot_cpu_has(X86_FEATURE_AVX2) &&
- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
+ chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX2) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
#endif
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
}
-static void __exit chacha20_simd_mod_fini(void)
+static void __exit chacha_simd_mod_fini(void)
{
crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
}
-module_init(chacha20_simd_mod_init);
-module_exit(chacha20_simd_mod_fini);
+module_init(chacha_simd_mod_init);
+module_exit(chacha_simd_mod_fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
-MODULE_DESCRIPTION("chacha20 cipher algorithm, SIMD accelerated");
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (x64 SIMD accelerated)");
MODULE_ALIAS_CRYPTO("chacha20");
MODULE_ALIAS_CRYPTO("chacha20-simd");
MODULE_ALIAS_CRYPTO("xchacha20");
--
2.19.2
next prev parent reply other threads:[~2018-11-28 17:47 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-11-28 6:44 [PATCH 0/6] crypto: x86_64 optimized XChaCha and NHPoly1305 (for Adiantum) Eric Biggers
2018-11-28 6:44 ` [PATCH 1/6] crypto: x86/nhpoly1305 - add SSE2 accelerated NHPoly1305 Eric Biggers
2018-11-28 6:44 ` [PATCH 2/6] crypto: x86/nhpoly1305 - add AVX2 " Eric Biggers
2018-11-28 6:44 ` [PATCH 3/6] crypto: x86/chacha20 - limit the preemption-disabled section Eric Biggers
2018-11-28 6:44 ` [PATCH 4/6] crypto: x86/chacha20 - add XChaCha20 support Eric Biggers
2018-11-28 6:44 ` Eric Biggers [this message]
2018-11-28 6:44 ` [PATCH 6/6] crypto: x86/chacha - add XChaCha12 support Eric Biggers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20181128064445.3813-6-ebiggers@kernel.org \
--to=ebiggers@kernel.org \
--cc=Jason@zx2c4.com \
--cc=gmazyland@gmail.com \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=martin@strongswan.org \
--cc=paulcrowley@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.