From: Danny Tsen <dtsen@linux.ibm.com>
To: linux-crypto@vger.kernel.org
Cc: herbert@gondor.apana.org.au, leitao@debian.org,
nayna@linux.ibm.com, appro@cryptogams.org,
linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
mpe@ellerman.id.au, ltcgcw@linux.vnet.ibm.com, dtsen@us.ibm.com,
Danny Tsen <dtsen@linux.ibm.com>
Subject: [PATCH 2/5] Glue code for optmized Chacha20 implementation for ppc64le.
Date: Mon, 24 Apr 2023 14:47:23 -0400 [thread overview]
Message-ID: <20230424184726.2091-3-dtsen@linux.ibm.com> (raw)
In-Reply-To: <20230424184726.2091-1-dtsen@linux.ibm.com>
Signed-off-by: Danny Tsen <dtsen@linux.ibm.com>
---
arch/powerpc/crypto/chacha-p10-glue.c | 223 ++++++++++++++++++++++++++
1 file changed, 223 insertions(+)
create mode 100644 arch/powerpc/crypto/chacha-p10-glue.c
diff --git a/arch/powerpc/crypto/chacha-p10-glue.c b/arch/powerpc/crypto/chacha-p10-glue.c
new file mode 100644
index 000000000000..cefb150e7b3c
--- /dev/null
+++ b/arch/powerpc/crypto/chacha-p10-glue.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerPC P10 (ppc64le) accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
+ *
+ * Copyright 2023- IBM Inc. All rights reserved.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/chacha.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/sizes.h>
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+
+asmlinkage void chacha_p10le_8x(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10);
+
+static void vsx_begin(void)
+{
+ preempt_disable();
+ enable_kernel_vsx();
+}
+
+static void vsx_end(void)
+{
+ disable_kernel_vsx();
+ preempt_enable();
+}
+
+static void chacha_p10_do_8x(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ unsigned int l = bytes & ~0x0FF;
+
+ if (l > 0) {
+ chacha_p10le_8x(state, dst, src, l, nrounds);
+ bytes -= l;
+ src += l;
+ dst += l;
+ state[12] += l / CHACHA_BLOCK_SIZE;
+ }
+
+ if (bytes > 0)
+ chacha_crypt_generic(state, dst, src, bytes, nrounds);
+}
+
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+ hchacha_block_generic(state, stream, nrounds);
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+ chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+ int nrounds)
+{
+ if (!static_branch_likely(&have_p10) || bytes <= CHACHA_BLOCK_SIZE ||
+ !crypto_simd_usable())
+ return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+ do {
+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
+
+ vsx_begin();
+ chacha_p10_do_8x(state, dst, src, todo, nrounds);
+ vsx_end();
+
+ bytes -= todo;
+ src += todo;
+ dst += todo;
+ } while (bytes);
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+static int chacha_p10_stream_xor(struct skcipher_request *req,
+ const struct chacha_ctx *ctx, const u8 *iv)
+{
+ struct skcipher_walk walk;
+ u32 state[16];
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+ if (err)
+ return err;
+
+ chacha_init_generic(state, ctx->key, iv);
+
+ while (walk.nbytes > 0) {
+ unsigned int nbytes = walk.nbytes;
+
+ if (nbytes < walk.total)
+ nbytes = rounddown(nbytes, walk.stride);
+
+ if (!static_branch_likely(&have_p10) ||
+ !crypto_simd_usable()) {
+ chacha_crypt_generic(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes,
+ ctx->nrounds);
+ } else {
+ vsx_begin();
+ chacha_p10_do_8x(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes, ctx->nrounds);
+ vsx_end();
+ }
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+static int chacha_p10(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return chacha_p10_stream_xor(req, ctx, req->iv);
+}
+
+static int xchacha_p10(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct chacha_ctx subctx;
+ u32 state[16];
+ u8 real_iv[16];
+
+ chacha_init_generic(state, ctx->key, req->iv);
+ hchacha_block_arch(state, subctx.key, ctx->nrounds);
+ subctx.nrounds = ctx->nrounds;
+
+ memcpy(&real_iv[0], req->iv + 24, 8);
+ memcpy(&real_iv[8], req->iv + 16, 8);
+ return chacha_p10_stream_xor(req, &subctx, real_iv);
+}
+
+static struct skcipher_alg algs[] = {
+ {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-p10",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = CHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = chacha_p10,
+ .decrypt = chacha_p10,
+ }, {
+ .base.cra_name = "xchacha20",
+ .base.cra_driver_name = "xchacha20-p10",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = xchacha_p10,
+ .decrypt = xchacha_p10,
+ }, {
+ .base.cra_name = "xchacha12",
+ .base.cra_driver_name = "xchacha12-p10",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha12_setkey,
+ .encrypt = xchacha_p10,
+ .decrypt = xchacha_p10,
+ }
+};
+
+static int __init chacha_p10_init(void)
+{
+ static_branch_enable(&have_p10);
+
+ return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
+ crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
+}
+
+static void __exit chacha_p10_exit(void)
+{
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+module_cpu_feature_match(PPC_MODULE_FEATURE_P10, chacha_p10_init);
+module_exit(chacha_p10_exit);
+
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (P10 accelerated)");
+MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-p10");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-p10");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-p10");
--
2.31.1
WARNING: multiple messages have this Message-ID (diff)
From: Danny Tsen <dtsen@linux.ibm.com>
To: linux-crypto@vger.kernel.org
Cc: herbert@gondor.apana.org.au, dtsen@us.ibm.com,
nayna@linux.ibm.com, linux-kernel@vger.kernel.org,
Danny Tsen <dtsen@linux.ibm.com>,
appro@cryptogams.org, ltcgcw@linux.vnet.ibm.com,
leitao@debian.org, linuxppc-dev@lists.ozlabs.org
Subject: [PATCH 2/5] Glue code for optmized Chacha20 implementation for ppc64le.
Date: Mon, 24 Apr 2023 14:47:23 -0400 [thread overview]
Message-ID: <20230424184726.2091-3-dtsen@linux.ibm.com> (raw)
In-Reply-To: <20230424184726.2091-1-dtsen@linux.ibm.com>
Signed-off-by: Danny Tsen <dtsen@linux.ibm.com>
---
arch/powerpc/crypto/chacha-p10-glue.c | 223 ++++++++++++++++++++++++++
1 file changed, 223 insertions(+)
create mode 100644 arch/powerpc/crypto/chacha-p10-glue.c
diff --git a/arch/powerpc/crypto/chacha-p10-glue.c b/arch/powerpc/crypto/chacha-p10-glue.c
new file mode 100644
index 000000000000..cefb150e7b3c
--- /dev/null
+++ b/arch/powerpc/crypto/chacha-p10-glue.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerPC P10 (ppc64le) accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
+ *
+ * Copyright 2023- IBM Inc. All rights reserved.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/chacha.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/sizes.h>
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+
+asmlinkage void chacha_p10le_8x(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10);
+
+static void vsx_begin(void)
+{
+ preempt_disable();
+ enable_kernel_vsx();
+}
+
+static void vsx_end(void)
+{
+ disable_kernel_vsx();
+ preempt_enable();
+}
+
+static void chacha_p10_do_8x(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ unsigned int l = bytes & ~0x0FF;
+
+ if (l > 0) {
+ chacha_p10le_8x(state, dst, src, l, nrounds);
+ bytes -= l;
+ src += l;
+ dst += l;
+ state[12] += l / CHACHA_BLOCK_SIZE;
+ }
+
+ if (bytes > 0)
+ chacha_crypt_generic(state, dst, src, bytes, nrounds);
+}
+
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+ hchacha_block_generic(state, stream, nrounds);
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+ chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+ int nrounds)
+{
+ if (!static_branch_likely(&have_p10) || bytes <= CHACHA_BLOCK_SIZE ||
+ !crypto_simd_usable())
+ return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+ do {
+ unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
+
+ vsx_begin();
+ chacha_p10_do_8x(state, dst, src, todo, nrounds);
+ vsx_end();
+
+ bytes -= todo;
+ src += todo;
+ dst += todo;
+ } while (bytes);
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+static int chacha_p10_stream_xor(struct skcipher_request *req,
+ const struct chacha_ctx *ctx, const u8 *iv)
+{
+ struct skcipher_walk walk;
+ u32 state[16];
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+ if (err)
+ return err;
+
+ chacha_init_generic(state, ctx->key, iv);
+
+ while (walk.nbytes > 0) {
+ unsigned int nbytes = walk.nbytes;
+
+ if (nbytes < walk.total)
+ nbytes = rounddown(nbytes, walk.stride);
+
+ if (!static_branch_likely(&have_p10) ||
+ !crypto_simd_usable()) {
+ chacha_crypt_generic(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes,
+ ctx->nrounds);
+ } else {
+ vsx_begin();
+ chacha_p10_do_8x(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes, ctx->nrounds);
+ vsx_end();
+ }
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+static int chacha_p10(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return chacha_p10_stream_xor(req, ctx, req->iv);
+}
+
+static int xchacha_p10(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct chacha_ctx subctx;
+ u32 state[16];
+ u8 real_iv[16];
+
+ chacha_init_generic(state, ctx->key, req->iv);
+ hchacha_block_arch(state, subctx.key, ctx->nrounds);
+ subctx.nrounds = ctx->nrounds;
+
+ memcpy(&real_iv[0], req->iv + 24, 8);
+ memcpy(&real_iv[8], req->iv + 16, 8);
+ return chacha_p10_stream_xor(req, &subctx, real_iv);
+}
+
+static struct skcipher_alg algs[] = {
+ {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-p10",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = CHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = chacha_p10,
+ .decrypt = chacha_p10,
+ }, {
+ .base.cra_name = "xchacha20",
+ .base.cra_driver_name = "xchacha20-p10",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = xchacha_p10,
+ .decrypt = xchacha_p10,
+ }, {
+ .base.cra_name = "xchacha12",
+ .base.cra_driver_name = "xchacha12-p10",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha12_setkey,
+ .encrypt = xchacha_p10,
+ .decrypt = xchacha_p10,
+ }
+};
+
+static int __init chacha_p10_init(void)
+{
+ static_branch_enable(&have_p10);
+
+ return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
+ crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
+}
+
+static void __exit chacha_p10_exit(void)
+{
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+module_cpu_feature_match(PPC_MODULE_FEATURE_P10, chacha_p10_init);
+module_exit(chacha_p10_exit);
+
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (P10 accelerated)");
+MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-p10");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-p10");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-p10");
--
2.31.1
next prev parent reply other threads:[~2023-04-24 18:48 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-04-24 18:47 [PATCH 0/5] crypto: Accelerated Chacha20/Poly1305 implementation Danny Tsen
2023-04-24 18:47 ` Danny Tsen
2023-04-24 18:47 ` [PATCH 1/5] An optimized Chacha20 implementation with 8-way unrolling for ppc64le Danny Tsen
2023-04-24 18:47 ` Danny Tsen
2023-04-24 20:40 ` Elliott, Robert (Servers)
2023-04-24 20:40 ` Elliott, Robert (Servers)
2023-04-24 23:07 ` Danny Tsen
2023-04-24 23:07 ` Danny Tsen
2023-04-25 12:02 ` Michael Ellerman
2023-04-25 12:02 ` Michael Ellerman
2023-04-25 12:08 ` Danny Tsen
2023-04-25 12:08 ` Danny Tsen
2023-04-26 8:19 ` David Laight
2023-04-26 8:19 ` David Laight
2023-04-24 18:47 ` Danny Tsen [this message]
2023-04-24 18:47 ` [PATCH 2/5] Glue code for optmized Chacha20 implementation " Danny Tsen
2023-04-25 5:37 ` Herbert Xu
2023-04-25 5:37 ` Herbert Xu
2023-04-25 5:40 ` Herbert Xu
2023-04-25 5:40 ` Herbert Xu
2023-04-25 5:41 ` Herbert Xu
2023-04-25 5:41 ` Herbert Xu
2023-04-25 11:53 ` Danny Tsen
2023-04-25 11:53 ` Danny Tsen
2023-04-24 18:47 ` [PATCH 3/5] An optimized Poly1305 implementation with 4-way unrolling " Danny Tsen
2023-04-24 18:47 ` Danny Tsen
2023-04-24 18:47 ` [PATCH 4/5] Glue code for optmized Poly1305 implementation " Danny Tsen
2023-04-24 18:47 ` Danny Tsen
2023-04-25 5:44 ` Herbert Xu
2023-04-25 5:44 ` Herbert Xu
2023-04-25 11:58 ` Danny Tsen
2023-04-25 11:58 ` Danny Tsen
2023-04-24 18:47 ` [PATCH 5/5] Update Kconfig and Makefile Danny Tsen
2023-04-24 18:47 ` Danny Tsen
2023-04-25 5:46 ` Herbert Xu
2023-04-25 5:46 ` Herbert Xu
2023-04-25 12:04 ` Danny Tsen
2023-04-25 12:04 ` Danny Tsen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230424184726.2091-3-dtsen@linux.ibm.com \
--to=dtsen@linux.ibm.com \
--cc=appro@cryptogams.org \
--cc=dtsen@us.ibm.com \
--cc=herbert@gondor.apana.org.au \
--cc=leitao@debian.org \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=ltcgcw@linux.vnet.ibm.com \
--cc=mpe@ellerman.id.au \
--cc=nayna@linux.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.