From: Qingfang Deng <dqfext@gmail.com>
To: Herbert Xu <herbert@gondor.apana.org.au>,
"David S. Miller" <davem@davemloft.net>,
Paul Walmsley <paul.walmsley@sifive.com>,
Palmer Dabbelt <palmer@dabbelt.com>,
Albert Ou <aou@eecs.berkeley.edu>,
Alexandre Ghiti <alex@ghiti.fr>,
linux-crypto@vger.kernel.org, linux-riscv@lists.infradead.org,
linux-kernel@vger.kernel.org
Cc: "Christoph Müllner" <christoph.muellner@vrull.eu>,
"Heiko Stuebner" <heiko.stuebner@vrull.eu>,
"Qingfang Deng" <qingfang.deng@siflower.com.cn>
Subject: [RFC PATCH] crypto: riscv: scalar accelerated GHASH
Date: Thu, 17 Apr 2025 14:49:38 +0800 [thread overview]
Message-ID: <20250417064940.68469-1-dqfext@gmail.com> (raw)
From: Qingfang Deng <qingfang.deng@siflower.com.cn>
Add a scalar implementation of GHASH for RISC-V using the Zbc (carry-less
multiplication) and Zbb (bit-manipulation) extensions. This implementation
is adapted from OpenSSL but rewritten in plain C for clarity.
Unlike the OpenSSL one that rely on bit-reflection of the data, this
version uses a pre-computed (reflected and multiplied) key, inspired by
the approach used in Intel's CLMUL driver, to avoid reflections during
runtime.
Signed-off-by: Qingfang Deng <qingfang.deng@siflower.com.cn>
---
arch/riscv/crypto/Kconfig | 16 +-
arch/riscv/crypto/Makefile | 2 +
arch/riscv/crypto/ghash-riscv64-clmul.c | 270 ++++++++++++++++++++++++
3 files changed, 287 insertions(+), 1 deletion(-)
create mode 100644 arch/riscv/crypto/ghash-riscv64-clmul.c
diff --git a/arch/riscv/crypto/Kconfig b/arch/riscv/crypto/Kconfig
index 6392e1e11bc9..03b74d4116cb 100644
--- a/arch/riscv/crypto/Kconfig
+++ b/arch/riscv/crypto/Kconfig
@@ -26,7 +26,7 @@ config CRYPTO_CHACHA_RISCV64
default CRYPTO_LIB_CHACHA_INTERNAL
config CRYPTO_GHASH_RISCV64
- tristate "Hash functions: GHASH"
+ tristate "Hash functions: GHASH (vector accelarated)"
depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
select CRYPTO_GCM
help
@@ -35,6 +35,20 @@ config CRYPTO_GHASH_RISCV64
Architecture: riscv64 using:
- Zvkg vector crypto extension
+config CRYPTO_GHASH_RISCV64_CLMUL
+ tristate "Hash functions: GHASH (CLMUL scalar accelerated)"
+ depends on 64BIT && TOOLCHAIN_HAS_ZBB && TOOLCHAIN_HAS_ZBC
+ select CRYPTO_GCM
+ help
+ GCM GHASH function (NIST SP 800-38D)
+
+ Architecture: riscv64 using:
+ - Zbb Bitmanipulation extension
+ - Zbc Carry-less multiplication
+ OR
+ - Zbkb Bit-manipulation for Cryptography
+ - Zbkc Carry-less multiplication for Cryptography
+
config CRYPTO_SHA256_RISCV64
tristate "Hash functions: SHA-224 and SHA-256"
depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
diff --git a/arch/riscv/crypto/Makefile b/arch/riscv/crypto/Makefile
index 247c7bc7288c..b5dc497d398c 100644
--- a/arch/riscv/crypto/Makefile
+++ b/arch/riscv/crypto/Makefile
@@ -10,6 +10,8 @@ chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o
obj-$(CONFIG_CRYPTO_GHASH_RISCV64) += ghash-riscv64.o
ghash-riscv64-y := ghash-riscv64-glue.o ghash-riscv64-zvkg.o
+obj-$(CONFIG_CRYPTO_GHASH_RISCV64_CLMUL) += ghash-riscv64-clmul.o
+
obj-$(CONFIG_CRYPTO_SHA256_RISCV64) += sha256-riscv64.o
sha256-riscv64-y := sha256-riscv64-glue.o sha256-riscv64-zvknha_or_zvknhb-zvkb.o
diff --git a/arch/riscv/crypto/ghash-riscv64-clmul.c b/arch/riscv/crypto/ghash-riscv64-clmul.c
new file mode 100644
index 000000000000..4777aa8e94cb
--- /dev/null
+++ b/arch/riscv/crypto/ghash-riscv64-clmul.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * GHASH using the RISC-V Zbc/Zbkc (CLMUL) extension
+ *
+ * Copyright (C) 2023 VRULL GmbH
+ * Author: Christoph Müllner <christoph.muellner@vrull.eu>
+ *
+ * Copyright (C) 2025 Siflower Communications Ltd
+ * Author: Qingfang Deng <qingfang.deng@siflower.com.cn>
+ */
+
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <crypto/ghash.h>
+#include <crypto/internal/hash.h>
+
+#define GHASH_MOD_POLY 0xc200000000000000
+
+struct riscv64_clmul_ghash_ctx {
+ __uint128_t key;
+};
+
+struct riscv64_clmul_ghash_desc_ctx {
+ __uint128_t shash;
+ u8 buffer[GHASH_DIGEST_SIZE];
+ int bytes;
+};
+
+static __always_inline u64 riscv_zbb_swab64(u64 val)
+{
+ asm (".option push\n"
+ ".option arch,+zbb\n"
+ "rev8 %0, %1\n"
+ ".option pop\n"
+ : "=r" (val) : "r" (val));
+ return val;
+}
+
+static __always_inline __uint128_t get_unaligned_be128(const u8 *p)
+{
+ __uint128_t val;
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ val = *(__uint128_t *)p;
+ val = riscv_zbb_swab64(val >> 64) | (__uint128_t)riscv_zbb_swab64(val) << 64;
+#else
+ val = (__uint128_t)p[0] << 120;
+ val |= (__uint128_t)p[1] << 112;
+ val |= (__uint128_t)p[2] << 104;
+ val |= (__uint128_t)p[3] << 96;
+ val |= (__uint128_t)p[4] << 88;
+ val |= (__uint128_t)p[5] << 80;
+ val |= (__uint128_t)p[6] << 72;
+ val |= (__uint128_t)p[7] << 64;
+ val |= (__uint128_t)p[8] << 56;
+ val |= (__uint128_t)p[9] << 48;
+ val |= (__uint128_t)p[10] << 40;
+ val |= (__uint128_t)p[11] << 32;
+ val |= (__uint128_t)p[12] << 24;
+ val |= (__uint128_t)p[13] << 16;
+ val |= (__uint128_t)p[14] << 8;
+ val |= (__uint128_t)p[15];
+#endif
+ return val;
+}
+
+static __always_inline void put_unaligned_be128(__uint128_t val, u8 *p)
+{
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ *(__uint128_t *)p = riscv_zbb_swab64(val >> 64) | (__uint128_t)riscv_zbb_swab64(val) << 64;
+#else
+ p[0] = val >> 120;
+ p[1] = val >> 112;
+ p[2] = val >> 104;
+ p[3] = val >> 96;
+ p[4] = val >> 88;
+ p[5] = val >> 80;
+ p[6] = val >> 72;
+ p[7] = val >> 64;
+ p[8] = val >> 56;
+ p[9] = val >> 48;
+ p[10] = val >> 40;
+ p[11] = val >> 32;
+ p[12] = val >> 24;
+ p[13] = val >> 16;
+ p[14] = val >> 8;
+ p[15] = val;
+#endif
+}
+
+static __always_inline __attribute_const__
+__uint128_t clmul128(u64 a, u64 b)
+{
+ u64 hi, lo;
+
+ asm(".option push\n"
+ ".option arch,+zbc\n"
+ "clmul %0, %2, %3\n"
+ "clmulh %1, %2, %3\n"
+ ".option pop\n"
+ : "=&r" (lo), "=&r" (hi) : "r" (a), "r" (b));
+ return (__uint128_t)hi << 64 | lo;
+}
+
+static int riscv64_clmul_ghash_init(struct shash_desc *desc)
+{
+ struct riscv64_clmul_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ dctx->bytes = 0;
+ dctx->shash = 0;
+ return 0;
+}
+
+/* Compute GMULT (Xi*H mod f) using the Zbc (clmul) extensions.
+ * Using the no-Karatsuba approach and clmul for the final reduction.
+ * This results in an implementation with minimized number of instructions.
+ * HW with clmul latencies higher than 2 cycles might observe a performance
+ * improvement with Karatsuba. HW with clmul latencies higher than 6 cycles
+ * might observe a performance improvement with additionally converting the
+ * reduction to shift&xor. For a full discussion of this estimates see
+ * https://github.com/riscv/riscv-crypto/blob/master/doc/supp/gcm-mode-cmul.adoc
+ */
+static void gcm_ghash_rv64i_zbc(__uint128_t *Xi, __uint128_t k, const u8 *inp, size_t len)
+{
+ u64 k_hi = k >> 64, k_lo = k, p_hi, p_lo;
+ __uint128_t hash = *Xi, p;
+
+ do {
+ __uint128_t t0, t1, t2, t3, lo, mid, hi;
+
+ /* Load the input data, byte-reverse them, and XOR them with Xi */
+ p = get_unaligned_be128(inp);
+
+ inp += GHASH_BLOCK_SIZE;
+ len -= GHASH_BLOCK_SIZE;
+
+ p ^= hash;
+ p_hi = p >> 64;
+ p_lo = p;
+
+ /* Multiplication (without Karatsuba) */
+ t0 = clmul128(p_lo, k_lo);
+ t1 = clmul128(p_lo, k_hi);
+ t2 = clmul128(p_hi, k_lo);
+ t3 = clmul128(p_hi, k_hi);
+ mid = t1 ^ t2;
+ lo = t0 ^ (mid << 64);
+ hi = t3 ^ (mid >> 64);
+
+ /* Reduction with clmul */
+ mid = clmul128(lo, GHASH_MOD_POLY);
+ lo ^= mid << 64;
+ hi ^= lo ^ (mid >> 64);
+ hi ^= clmul128(lo >> 64, GHASH_MOD_POLY);
+ hash = hi;
+ } while (len);
+
+ *Xi = hash;
+}
+
+static int riscv64_clmul_ghash_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen)
+{
+ struct riscv64_clmul_ghash_ctx *ctx = crypto_shash_ctx(tfm);
+ __uint128_t k;
+
+ if (keylen != GHASH_BLOCK_SIZE)
+ return -EINVAL;
+
+ k = get_unaligned_be128(key);
+ k = (k << 1 | k >> 127) ^ (k >> 127 ? (__uint128_t)GHASH_MOD_POLY << 64 : 0);
+ ctx->key = k;
+
+ return 0;
+}
+
+static int riscv64_clmul_ghash_update(struct shash_desc *desc, const u8 *src, unsigned int srclen)
+{
+ struct riscv64_clmul_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ struct riscv64_clmul_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ unsigned int len;
+
+ if (dctx->bytes) {
+ if (dctx->bytes + srclen < GHASH_DIGEST_SIZE) {
+ memcpy(dctx->buffer + dctx->bytes, src, srclen);
+ dctx->bytes += srclen;
+ return 0;
+ }
+ memcpy(dctx->buffer + dctx->bytes, src, GHASH_DIGEST_SIZE - dctx->bytes);
+
+ gcm_ghash_rv64i_zbc(&dctx->shash, ctx->key, dctx->buffer, GHASH_DIGEST_SIZE);
+
+ src += GHASH_DIGEST_SIZE - dctx->bytes;
+ srclen -= GHASH_DIGEST_SIZE - dctx->bytes;
+ dctx->bytes = 0;
+ }
+
+ len = round_down(srclen, GHASH_BLOCK_SIZE);
+ if (len) {
+ gcm_ghash_rv64i_zbc(&dctx->shash, ctx->key, src, len);
+ src += len;
+ srclen -= len;
+ }
+
+ if (srclen) {
+ memcpy(dctx->buffer, src, srclen);
+ dctx->bytes = srclen;
+ }
+ return 0;
+}
+
+static int riscv64_clmul_ghash_final(struct shash_desc *desc, u8 out[GHASH_DIGEST_SIZE])
+{
+ struct riscv64_clmul_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ struct riscv64_clmul_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ int i;
+
+ if (dctx->bytes) {
+ for (i = dctx->bytes; i < GHASH_DIGEST_SIZE; i++)
+ dctx->buffer[i] = 0;
+ gcm_ghash_rv64i_zbc(&dctx->shash, ctx->key, dctx->buffer, GHASH_DIGEST_SIZE);
+ dctx->bytes = 0;
+ }
+ put_unaligned_be128(dctx->shash, out);
+ return 0;
+}
+
+struct shash_alg riscv64_clmul_ghash_alg = {
+ .init = riscv64_clmul_ghash_init,
+ .update = riscv64_clmul_ghash_update,
+ .final = riscv64_clmul_ghash_final,
+ .setkey = riscv64_clmul_ghash_setkey,
+ .descsize = sizeof(struct riscv64_clmul_ghash_desc_ctx),
+ .digestsize = GHASH_DIGEST_SIZE,
+ .base = {
+ .cra_blocksize = GHASH_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct riscv64_clmul_ghash_ctx),
+ .cra_priority = 250,
+ .cra_name = "ghash",
+ .cra_driver_name = "ghash-riscv64-clmul",
+ .cra_module = THIS_MODULE,
+ },
+};
+
+static int __init riscv64_clmul_ghash_mod_init(void)
+{
+ bool has_clmul, has_rev8;
+
+ has_clmul = riscv_isa_extension_available(NULL, ZBC) ||
+ riscv_isa_extension_available(NULL, ZBKC);
+ has_rev8 = riscv_isa_extension_available(NULL, ZBB) ||
+ riscv_isa_extension_available(NULL, ZBKB);
+ if (has_clmul && has_rev8)
+ return crypto_register_shash(&riscv64_clmul_ghash_alg);
+
+ return -ENODEV;
+}
+
+static void __exit riscv64_clmul_ghash_mod_fini(void)
+{
+ crypto_unregister_shash(&riscv64_clmul_ghash_alg);
+}
+
+module_init(riscv64_clmul_ghash_mod_init);
+module_exit(riscv64_clmul_ghash_mod_fini);
+
+MODULE_DESCRIPTION("GHASH (RISC-V CLMUL accelerated)");
+MODULE_AUTHOR("Qingfang Deng <dqfext@gmail.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("ghash");
--
2.43.0
WARNING: multiple messages have this Message-ID (diff)
From: Qingfang Deng <dqfext@gmail.com>
To: Herbert Xu <herbert@gondor.apana.org.au>,
"David S. Miller" <davem@davemloft.net>,
Paul Walmsley <paul.walmsley@sifive.com>,
Palmer Dabbelt <palmer@dabbelt.com>,
Albert Ou <aou@eecs.berkeley.edu>,
Alexandre Ghiti <alex@ghiti.fr>,
linux-crypto@vger.kernel.org, linux-riscv@lists.infradead.org,
linux-kernel@vger.kernel.org
Cc: "Christoph Müllner" <christoph.muellner@vrull.eu>,
"Heiko Stuebner" <heiko.stuebner@vrull.eu>,
"Qingfang Deng" <qingfang.deng@siflower.com.cn>
Subject: [RFC PATCH] crypto: riscv: scalar accelerated GHASH
Date: Thu, 17 Apr 2025 14:49:38 +0800 [thread overview]
Message-ID: <20250417064940.68469-1-dqfext@gmail.com> (raw)
From: Qingfang Deng <qingfang.deng@siflower.com.cn>
Add a scalar implementation of GHASH for RISC-V using the Zbc (carry-less
multiplication) and Zbb (bit-manipulation) extensions. This implementation
is adapted from OpenSSL but rewritten in plain C for clarity.
Unlike the OpenSSL one that rely on bit-reflection of the data, this
version uses a pre-computed (reflected and multiplied) key, inspired by
the approach used in Intel's CLMUL driver, to avoid reflections during
runtime.
Signed-off-by: Qingfang Deng <qingfang.deng@siflower.com.cn>
---
arch/riscv/crypto/Kconfig | 16 +-
arch/riscv/crypto/Makefile | 2 +
arch/riscv/crypto/ghash-riscv64-clmul.c | 270 ++++++++++++++++++++++++
3 files changed, 287 insertions(+), 1 deletion(-)
create mode 100644 arch/riscv/crypto/ghash-riscv64-clmul.c
diff --git a/arch/riscv/crypto/Kconfig b/arch/riscv/crypto/Kconfig
index 6392e1e11bc9..03b74d4116cb 100644
--- a/arch/riscv/crypto/Kconfig
+++ b/arch/riscv/crypto/Kconfig
@@ -26,7 +26,7 @@ config CRYPTO_CHACHA_RISCV64
default CRYPTO_LIB_CHACHA_INTERNAL
config CRYPTO_GHASH_RISCV64
- tristate "Hash functions: GHASH"
+ tristate "Hash functions: GHASH (vector accelarated)"
depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
select CRYPTO_GCM
help
@@ -35,6 +35,20 @@ config CRYPTO_GHASH_RISCV64
Architecture: riscv64 using:
- Zvkg vector crypto extension
+config CRYPTO_GHASH_RISCV64_CLMUL
+ tristate "Hash functions: GHASH (CLMUL scalar accelerated)"
+ depends on 64BIT && TOOLCHAIN_HAS_ZBB && TOOLCHAIN_HAS_ZBC
+ select CRYPTO_GCM
+ help
+ GCM GHASH function (NIST SP 800-38D)
+
+ Architecture: riscv64 using:
+ - Zbb Bitmanipulation extension
+ - Zbc Carry-less multiplication
+ OR
+ - Zbkb Bit-manipulation for Cryptography
+ - Zbkc Carry-less multiplication for Cryptography
+
config CRYPTO_SHA256_RISCV64
tristate "Hash functions: SHA-224 and SHA-256"
depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
diff --git a/arch/riscv/crypto/Makefile b/arch/riscv/crypto/Makefile
index 247c7bc7288c..b5dc497d398c 100644
--- a/arch/riscv/crypto/Makefile
+++ b/arch/riscv/crypto/Makefile
@@ -10,6 +10,8 @@ chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o
obj-$(CONFIG_CRYPTO_GHASH_RISCV64) += ghash-riscv64.o
ghash-riscv64-y := ghash-riscv64-glue.o ghash-riscv64-zvkg.o
+obj-$(CONFIG_CRYPTO_GHASH_RISCV64_CLMUL) += ghash-riscv64-clmul.o
+
obj-$(CONFIG_CRYPTO_SHA256_RISCV64) += sha256-riscv64.o
sha256-riscv64-y := sha256-riscv64-glue.o sha256-riscv64-zvknha_or_zvknhb-zvkb.o
diff --git a/arch/riscv/crypto/ghash-riscv64-clmul.c b/arch/riscv/crypto/ghash-riscv64-clmul.c
new file mode 100644
index 000000000000..4777aa8e94cb
--- /dev/null
+++ b/arch/riscv/crypto/ghash-riscv64-clmul.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * GHASH using the RISC-V Zbc/Zbkc (CLMUL) extension
+ *
+ * Copyright (C) 2023 VRULL GmbH
+ * Author: Christoph Müllner <christoph.muellner@vrull.eu>
+ *
+ * Copyright (C) 2025 Siflower Communications Ltd
+ * Author: Qingfang Deng <qingfang.deng@siflower.com.cn>
+ */
+
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <crypto/ghash.h>
+#include <crypto/internal/hash.h>
+
+#define GHASH_MOD_POLY 0xc200000000000000
+
+struct riscv64_clmul_ghash_ctx {
+ __uint128_t key;
+};
+
+struct riscv64_clmul_ghash_desc_ctx {
+ __uint128_t shash;
+ u8 buffer[GHASH_DIGEST_SIZE];
+ int bytes;
+};
+
+static __always_inline u64 riscv_zbb_swab64(u64 val)
+{
+ asm (".option push\n"
+ ".option arch,+zbb\n"
+ "rev8 %0, %1\n"
+ ".option pop\n"
+ : "=r" (val) : "r" (val));
+ return val;
+}
+
+static __always_inline __uint128_t get_unaligned_be128(const u8 *p)
+{
+ __uint128_t val;
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ val = *(__uint128_t *)p;
+ val = riscv_zbb_swab64(val >> 64) | (__uint128_t)riscv_zbb_swab64(val) << 64;
+#else
+ val = (__uint128_t)p[0] << 120;
+ val |= (__uint128_t)p[1] << 112;
+ val |= (__uint128_t)p[2] << 104;
+ val |= (__uint128_t)p[3] << 96;
+ val |= (__uint128_t)p[4] << 88;
+ val |= (__uint128_t)p[5] << 80;
+ val |= (__uint128_t)p[6] << 72;
+ val |= (__uint128_t)p[7] << 64;
+ val |= (__uint128_t)p[8] << 56;
+ val |= (__uint128_t)p[9] << 48;
+ val |= (__uint128_t)p[10] << 40;
+ val |= (__uint128_t)p[11] << 32;
+ val |= (__uint128_t)p[12] << 24;
+ val |= (__uint128_t)p[13] << 16;
+ val |= (__uint128_t)p[14] << 8;
+ val |= (__uint128_t)p[15];
+#endif
+ return val;
+}
+
+static __always_inline void put_unaligned_be128(__uint128_t val, u8 *p)
+{
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ *(__uint128_t *)p = riscv_zbb_swab64(val >> 64) | (__uint128_t)riscv_zbb_swab64(val) << 64;
+#else
+ p[0] = val >> 120;
+ p[1] = val >> 112;
+ p[2] = val >> 104;
+ p[3] = val >> 96;
+ p[4] = val >> 88;
+ p[5] = val >> 80;
+ p[6] = val >> 72;
+ p[7] = val >> 64;
+ p[8] = val >> 56;
+ p[9] = val >> 48;
+ p[10] = val >> 40;
+ p[11] = val >> 32;
+ p[12] = val >> 24;
+ p[13] = val >> 16;
+ p[14] = val >> 8;
+ p[15] = val;
+#endif
+}
+
+static __always_inline __attribute_const__
+__uint128_t clmul128(u64 a, u64 b)
+{
+ u64 hi, lo;
+
+ asm(".option push\n"
+ ".option arch,+zbc\n"
+ "clmul %0, %2, %3\n"
+ "clmulh %1, %2, %3\n"
+ ".option pop\n"
+ : "=&r" (lo), "=&r" (hi) : "r" (a), "r" (b));
+ return (__uint128_t)hi << 64 | lo;
+}
+
+static int riscv64_clmul_ghash_init(struct shash_desc *desc)
+{
+ struct riscv64_clmul_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ dctx->bytes = 0;
+ dctx->shash = 0;
+ return 0;
+}
+
+/* Compute GMULT (Xi*H mod f) using the Zbc (clmul) extensions.
+ * Using the no-Karatsuba approach and clmul for the final reduction.
+ * This results in an implementation with minimized number of instructions.
+ * HW with clmul latencies higher than 2 cycles might observe a performance
+ * improvement with Karatsuba. HW with clmul latencies higher than 6 cycles
+ * might observe a performance improvement with additionally converting the
+ * reduction to shift&xor. For a full discussion of this estimates see
+ * https://github.com/riscv/riscv-crypto/blob/master/doc/supp/gcm-mode-cmul.adoc
+ */
+static void gcm_ghash_rv64i_zbc(__uint128_t *Xi, __uint128_t k, const u8 *inp, size_t len)
+{
+ u64 k_hi = k >> 64, k_lo = k, p_hi, p_lo;
+ __uint128_t hash = *Xi, p;
+
+ do {
+ __uint128_t t0, t1, t2, t3, lo, mid, hi;
+
+ /* Load the input data, byte-reverse them, and XOR them with Xi */
+ p = get_unaligned_be128(inp);
+
+ inp += GHASH_BLOCK_SIZE;
+ len -= GHASH_BLOCK_SIZE;
+
+ p ^= hash;
+ p_hi = p >> 64;
+ p_lo = p;
+
+ /* Multiplication (without Karatsuba) */
+ t0 = clmul128(p_lo, k_lo);
+ t1 = clmul128(p_lo, k_hi);
+ t2 = clmul128(p_hi, k_lo);
+ t3 = clmul128(p_hi, k_hi);
+ mid = t1 ^ t2;
+ lo = t0 ^ (mid << 64);
+ hi = t3 ^ (mid >> 64);
+
+ /* Reduction with clmul */
+ mid = clmul128(lo, GHASH_MOD_POLY);
+ lo ^= mid << 64;
+ hi ^= lo ^ (mid >> 64);
+ hi ^= clmul128(lo >> 64, GHASH_MOD_POLY);
+ hash = hi;
+ } while (len);
+
+ *Xi = hash;
+}
+
+static int riscv64_clmul_ghash_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen)
+{
+ struct riscv64_clmul_ghash_ctx *ctx = crypto_shash_ctx(tfm);
+ __uint128_t k;
+
+ if (keylen != GHASH_BLOCK_SIZE)
+ return -EINVAL;
+
+ k = get_unaligned_be128(key);
+ k = (k << 1 | k >> 127) ^ (k >> 127 ? (__uint128_t)GHASH_MOD_POLY << 64 : 0);
+ ctx->key = k;
+
+ return 0;
+}
+
+static int riscv64_clmul_ghash_update(struct shash_desc *desc, const u8 *src, unsigned int srclen)
+{
+ struct riscv64_clmul_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ struct riscv64_clmul_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ unsigned int len;
+
+ if (dctx->bytes) {
+ if (dctx->bytes + srclen < GHASH_DIGEST_SIZE) {
+ memcpy(dctx->buffer + dctx->bytes, src, srclen);
+ dctx->bytes += srclen;
+ return 0;
+ }
+ memcpy(dctx->buffer + dctx->bytes, src, GHASH_DIGEST_SIZE - dctx->bytes);
+
+ gcm_ghash_rv64i_zbc(&dctx->shash, ctx->key, dctx->buffer, GHASH_DIGEST_SIZE);
+
+ src += GHASH_DIGEST_SIZE - dctx->bytes;
+ srclen -= GHASH_DIGEST_SIZE - dctx->bytes;
+ dctx->bytes = 0;
+ }
+
+ len = round_down(srclen, GHASH_BLOCK_SIZE);
+ if (len) {
+ gcm_ghash_rv64i_zbc(&dctx->shash, ctx->key, src, len);
+ src += len;
+ srclen -= len;
+ }
+
+ if (srclen) {
+ memcpy(dctx->buffer, src, srclen);
+ dctx->bytes = srclen;
+ }
+ return 0;
+}
+
+static int riscv64_clmul_ghash_final(struct shash_desc *desc, u8 out[GHASH_DIGEST_SIZE])
+{
+ struct riscv64_clmul_ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+ struct riscv64_clmul_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+ int i;
+
+ if (dctx->bytes) {
+ for (i = dctx->bytes; i < GHASH_DIGEST_SIZE; i++)
+ dctx->buffer[i] = 0;
+ gcm_ghash_rv64i_zbc(&dctx->shash, ctx->key, dctx->buffer, GHASH_DIGEST_SIZE);
+ dctx->bytes = 0;
+ }
+ put_unaligned_be128(dctx->shash, out);
+ return 0;
+}
+
+struct shash_alg riscv64_clmul_ghash_alg = {
+ .init = riscv64_clmul_ghash_init,
+ .update = riscv64_clmul_ghash_update,
+ .final = riscv64_clmul_ghash_final,
+ .setkey = riscv64_clmul_ghash_setkey,
+ .descsize = sizeof(struct riscv64_clmul_ghash_desc_ctx),
+ .digestsize = GHASH_DIGEST_SIZE,
+ .base = {
+ .cra_blocksize = GHASH_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct riscv64_clmul_ghash_ctx),
+ .cra_priority = 250,
+ .cra_name = "ghash",
+ .cra_driver_name = "ghash-riscv64-clmul",
+ .cra_module = THIS_MODULE,
+ },
+};
+
+static int __init riscv64_clmul_ghash_mod_init(void)
+{
+ bool has_clmul, has_rev8;
+
+ has_clmul = riscv_isa_extension_available(NULL, ZBC) ||
+ riscv_isa_extension_available(NULL, ZBKC);
+ has_rev8 = riscv_isa_extension_available(NULL, ZBB) ||
+ riscv_isa_extension_available(NULL, ZBKB);
+ if (has_clmul && has_rev8)
+ return crypto_register_shash(&riscv64_clmul_ghash_alg);
+
+ return -ENODEV;
+}
+
+static void __exit riscv64_clmul_ghash_mod_fini(void)
+{
+ crypto_unregister_shash(&riscv64_clmul_ghash_alg);
+}
+
+module_init(riscv64_clmul_ghash_mod_init);
+module_exit(riscv64_clmul_ghash_mod_fini);
+
+MODULE_DESCRIPTION("GHASH (RISC-V CLMUL accelerated)");
+MODULE_AUTHOR("Qingfang Deng <dqfext@gmail.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("ghash");
--
2.43.0
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
next reply other threads:[~2025-04-17 6:49 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-17 6:49 Qingfang Deng [this message]
2025-04-17 6:49 ` [RFC PATCH] crypto: riscv: scalar accelerated GHASH Qingfang Deng
2025-04-17 6:57 ` Ard Biesheuvel
2025-04-17 6:57 ` Ard Biesheuvel
2025-04-17 7:25 ` Qingfang Deng
2025-04-17 7:25 ` Qingfang Deng
2025-04-17 7:39 ` Jeffrey Walton
2025-04-17 7:39 ` Jeffrey Walton
2025-04-17 7:45 ` Qingfang Deng
2025-04-17 7:45 ` Qingfang Deng
2025-04-17 7:57 ` Ard Biesheuvel
2025-04-17 7:57 ` Ard Biesheuvel
2025-04-17 8:42 ` Qingfang Deng
2025-04-17 8:42 ` Qingfang Deng
2025-04-17 14:15 ` Ard Biesheuvel
2025-04-17 14:15 ` Ard Biesheuvel
2025-04-17 14:39 ` Qingfang Deng
2025-04-17 14:39 ` Qingfang Deng
2025-04-17 16:58 ` Eric Biggers
2025-04-17 16:58 ` Eric Biggers
2025-04-18 1:48 ` Qingfang Deng
2025-04-18 1:48 ` Qingfang Deng
2025-04-17 7:21 ` Herbert Xu
2025-04-17 7:21 ` Herbert Xu
2025-04-17 17:09 ` Eric Biggers
2025-04-17 17:09 ` Eric Biggers
2025-04-18 2:49 ` Qingfang Deng
2025-04-18 2:49 ` Qingfang Deng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250417064940.68469-1-dqfext@gmail.com \
--to=dqfext@gmail.com \
--cc=alex@ghiti.fr \
--cc=aou@eecs.berkeley.edu \
--cc=christoph.muellner@vrull.eu \
--cc=davem@davemloft.net \
--cc=heiko.stuebner@vrull.eu \
--cc=herbert@gondor.apana.org.au \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-riscv@lists.infradead.org \
--cc=palmer@dabbelt.com \
--cc=paul.walmsley@sifive.com \
--cc=qingfang.deng@siflower.com.cn \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.