* [PATCH] crypto: riscv/poly1305 - import OpenSSL/CRYPTOGAMS implementation
@ 2025-06-05 14:56 zhihang.shao.iscas
2025-06-05 15:01 ` Eric Biggers
2025-06-05 16:42 ` Andy Polyakov
0 siblings, 2 replies; 3+ messages in thread
From: zhihang.shao.iscas @ 2025-06-05 14:56 UTC (permalink / raw)
To: linux-crypto; +Cc: linux-riscv, herbert, paul.walmsley, ou, alex, appro
From: Zhihang Shao <zhihang.shao.iscas@gmail.com>
This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305
implementation for riscv authored by Andy Polyakov.
The file 'poly1305-riscv.pl' is taken straight from this upstream
GitHub repository [0] at commit 33fe84bc21219a16825459b37c825bf4580a0a7b,
and this commit fixed a bug in riscv 64bit implementation.
Also, this patch passed extra run-time self tests.
[0] https://github.com/dot-asm/cryptogams
Signed-off-by: Zhihang Shao <zhihang.shao.iscas@gmail.com>
---
arch/riscv/crypto/Kconfig | 10 +
arch/riscv/crypto/Makefile | 17 +
arch/riscv/crypto/poly1305-glue.c | 202 +++++++
arch/riscv/crypto/poly1305-riscv.pl | 797 ++++++++++++++++++++++++++++
drivers/net/Kconfig | 1 +
lib/crypto/Kconfig | 2 +-
6 files changed, 1028 insertions(+), 1 deletion(-)
create mode 100644 arch/riscv/crypto/poly1305-glue.c
create mode 100644 arch/riscv/crypto/poly1305-riscv.pl
diff --git a/arch/riscv/crypto/Kconfig b/arch/riscv/crypto/Kconfig
index c67095a3d669..228bb3c6940d 100644
--- a/arch/riscv/crypto/Kconfig
+++ b/arch/riscv/crypto/Kconfig
@@ -38,6 +38,16 @@ config CRYPTO_GHASH_RISCV64
Architecture: riscv64 using:
- Zvkg vector crypto extension
+config CRYPTO_POLY1305_RISCV
+ tristate "Hash functions: Poly1305"
+ select CRYPTO_HASH
+ select CRYPTO_ARCH_HAVE_LIB_POLY1305
+ help
+ Poly1305 authenticator algorithm (RFC7539)
+
+ Architecture: riscv using:
+ - V vector extension
+
config CRYPTO_SHA256_RISCV64
tristate "Hash functions: SHA-224 and SHA-256"
depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
diff --git a/arch/riscv/crypto/Makefile b/arch/riscv/crypto/Makefile
index 247c7bc7288c..0c96bd9a61b3 100644
--- a/arch/riscv/crypto/Makefile
+++ b/arch/riscv/crypto/Makefile
@@ -10,6 +10,10 @@ chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o
obj-$(CONFIG_CRYPTO_GHASH_RISCV64) += ghash-riscv64.o
ghash-riscv64-y := ghash-riscv64-glue.o ghash-riscv64-zvkg.o
+obj-$(CONFIG_CRYPTO_POLY1305_RISCV) += poly1305-riscv.o
+poly1305-riscv-y := poly1305-core.o poly1305-glue.o
+AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_init_riscv
+
obj-$(CONFIG_CRYPTO_SHA256_RISCV64) += sha256-riscv64.o
sha256-riscv64-y := sha256-riscv64-glue.o sha256-riscv64-zvknha_or_zvknhb-zvkb.o
@@ -21,3 +25,15 @@ sm3-riscv64-y := sm3-riscv64-glue.o sm3-riscv64-zvksh-zvkb.o
obj-$(CONFIG_CRYPTO_SM4_RISCV64) += sm4-riscv64.o
sm4-riscv64-y := sm4-riscv64-glue.o sm4-riscv64-zvksed-zvkb.o
+
+ifeq ($(CONFIG_64BIT),y)
+PERLASM_ARCH := 64
+else
+PERLASM_ARCH := void
+endif
+
+quiet_cmd_perlasm = PERLASM $@
+ cmd_perlasm = $(PERL) $(<) $(PERLASM_ARCH) $(@)
+
+$(obj)/%-core.S: $(src)/%-riscv.pl
+ $(call cmd,perlasm)
diff --git a/arch/riscv/crypto/poly1305-glue.c b/arch/riscv/crypto/poly1305-glue.c
new file mode 100644
index 000000000000..b8e038a50c13
--- /dev/null
+++ b/arch/riscv/crypto/poly1305-glue.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OpenSSL/Cryptogams accelerated Poly1305 transform for riscv
+ *
+ * Copyright (C) 2025 Institute of Software, CAS.
+ */
+
+#include <asm/hwcap.h>
+#include <asm/simd.h>
+#include <linux/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
+#include <crypto/internal/simd.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/jump_label.h>
+#include <linux/module.h>
+
+asmlinkage void poly1305_init_riscv(void *state, const u8 *key);
+asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
+asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce);
+
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
+{
+ poly1305_init_riscv(&dctx->h, key);
+ dctx->s[0] = get_unaligned_le32(key + 16);
+ dctx->s[1] = get_unaligned_le32(key + 20);
+ dctx->s[2] = get_unaligned_le32(key + 24);
+ dctx->s[3] = get_unaligned_le32(key + 28);
+ dctx->buflen = 0;
+}
+EXPORT_SYMBOL(poly1305_init_arch);
+
+static int riscv64_poly1305_init(struct shash_desc *desc)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ dctx->buflen = 0;
+ dctx->rset = 0;
+ dctx->sset = false;
+
+ return 0;
+}
+
+static void riscv64_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
+ u32 len, u32 hibit)
+{
+ if (unlikely(!dctx->sset)) {
+ if (!dctx->rset) {
+ poly1305_init_riscv(&dctx->h, src);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->rset = 1;
+ }
+ if (len >= POLY1305_BLOCK_SIZE) {
+ dctx->s[0] = get_unaligned_le32(src + 0);
+ dctx->s[1] = get_unaligned_le32(src + 4);
+ dctx->s[2] = get_unaligned_le32(src + 8);
+ dctx->s[3] = get_unaligned_le32(src + 12);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->sset = true;
+ }
+ if (len < POLY1305_BLOCK_SIZE)
+ return;
+ }
+
+ len &= ~(POLY1305_BLOCK_SIZE - 1);
+
+ poly1305_blocks(&dctx->h, src, len, hibit);
+}
+
+static void riscv64_poly1305_do_update(struct poly1305_desc_ctx *dctx,
+ const u8 *src, u32 len)
+{
+ if (unlikely(dctx->buflen)) {
+ u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ len -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ riscv64_poly1305_blocks(dctx, dctx->buf,
+ POLY1305_BLOCK_SIZE, 1);
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(len >= POLY1305_BLOCK_SIZE)) {
+ riscv64_poly1305_blocks(dctx, src, len, 1);
+ src += round_down(len, POLY1305_BLOCK_SIZE);
+ len %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(len)) {
+ dctx->buflen = len;
+ memcpy(dctx->buf, src, len);
+ }
+}
+
+static int riscv64_poly1305_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ riscv64_poly1305_do_update(dctx, src, srclen);
+ return 0;
+}
+
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+ unsigned int nbytes)
+{
+ if (unlikely(dctx->buflen)) {
+ u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ nbytes -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
+ unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
+
+ poly1305_blocks(&dctx->h, src, len, 1);
+ src += len;
+ nbytes %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(nbytes)) {
+ dctx->buflen = nbytes;
+ memcpy(dctx->buf, src, nbytes);
+ }
+}
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
+{
+ if (unlikely(dctx->buflen)) {
+ dctx->buf[dctx->buflen++] = 1;
+ memset(dctx->buf + dctx->buflen, 0,
+ POLY1305_BLOCK_SIZE - dctx->buflen);
+ poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+ }
+
+ poly1305_emit(&dctx->h, dst, dctx->s);
+ memzero_explicit(dctx, sizeof(*dctx));
+}
+EXPORT_SYMBOL(poly1305_final_arch);
+
+static int riscv64_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (unlikely(!dctx->sset))
+ return -ENOKEY;
+
+ poly1305_final_arch(dctx, dst);
+ return 0;
+}
+
+static struct shash_alg riscv64_poly1305_alg = {
+ .init = riscv64_poly1305_init,
+ .update = riscv64_poly1305_update,
+ .final = riscv64_poly1305_final,
+ .digestsize = POLY1305_DIGEST_SIZE,
+ .descsize = sizeof(struct poly1305_desc_ctx),
+
+ .base.cra_name = "poly1305",
+ .base.cra_driver_name = "poly1305-riscv64",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = POLY1305_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+};
+
+static int __init riscv64_poly1305_mod_init(void)
+{
+ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
+ crypto_register_shash(&riscv64_poly1305_alg) : 0;
+}
+
+static void __exit riscv64_poly1305_mod_exit(void)
+{
+ if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
+ crypto_unregister_shash(&riscv64_poly1305_alg);
+}
+
+module_init(riscv64_poly1305_mod_init);
+module_exit(riscv64_poly1305_mod_exit);
+
+MODULE_DESCRIPTION("Poly1305 (RISC-V accelerated)");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-riscv64");
diff --git a/arch/riscv/crypto/poly1305-riscv.pl b/arch/riscv/crypto/poly1305-riscv.pl
new file mode 100644
index 000000000000..315ccc328fd9
--- /dev/null
+++ b/arch/riscv/crypto/poly1305-riscv.pl
@@ -0,0 +1,797 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
+#
+# ====================================================================
+# Written by Andy Polyakov, @dot-asm, initially for use with OpenSSL.
+# ====================================================================
+#
+# Poly1305 hash for RISC-V.
+#
+# February 2019
+#
+# In the essence it's pretty straightforward transliteration of MIPS
+# module [without big-endian option].
+#
+# 3.9 cycles per byte on U74, ~60% faster than compiler-generated code.
+# 1.9 cpb on C910, ~75% improvement. 2.3 cpb on JH7110 (U74 with
+# apparently better multiplier), ~69% faster, 3.3 on Spacemit X60,
+# ~69% improvement.
+#
+# June 2024.
+#
+# Add CHERI support.
+#
+######################################################################
+#
+($zero,$ra,$sp,$gp,$tp)=map("x$_",(0..4));
+($t0,$t1,$t2,$t3,$t4,$t5,$t6)=map("x$_",(5..7,28..31));
+($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("x$_",(10..17));
+($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("x$_",(8,9,18..27));
+#
+######################################################################
+
+$flavour = shift || "64";
+
+for (@ARGV) { $output=$_ if (/\w[\w\-]*\.\w+$/); }
+open STDOUT,">$output";
+
+if ($flavour =~ /64/) {{{
+######################################################################
+# 64-bit code path...
+#
+my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
+my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$t0,$t1,$t2);
+
+$code.=<<___;
+#if __riscv_xlen == 64
+# if __SIZEOF_POINTER__ == 16
+# define PUSH csc
+# define POP clc
+# else
+# define PUSH sd
+# define POP ld
+# endif
+#else
+# error "unsupported __riscv_xlen"
+#endif
+
+.option pic
+.text
+
+.globl poly1305_init
+.type poly1305_init,\@function
+poly1305_init:
+ sd $zero,0($ctx)
+ sd $zero,8($ctx)
+ sd $zero,16($ctx)
+
+ beqz $inp,.Lno_key
+
+#ifndef __CHERI_PURE_CAPABILITY__
+ andi $tmp0,$inp,7 # $inp % 8
+ andi $inp,$inp,-8 # align $inp
+ slli $tmp0,$tmp0,3 # byte to bit offset
+#endif
+ ld $in0,0($inp)
+ ld $in1,8($inp)
+#ifndef __CHERI_PURE_CAPABILITY__
+ beqz $tmp0,.Laligned_key
+
+ ld $tmp2,16($inp)
+ neg $tmp1,$tmp0 # implicit &63 in sll
+ srl $in0,$in0,$tmp0
+ sll $tmp3,$in1,$tmp1
+ srl $in1,$in1,$tmp0
+ sll $tmp2,$tmp2,$tmp1
+ or $in0,$in0,$tmp3
+ or $in1,$in1,$tmp2
+
+.Laligned_key:
+#endif
+ li $tmp0,1
+ slli $tmp0,$tmp0,32 # 0x0000000100000000
+ addi $tmp0,$tmp0,-63 # 0x00000000ffffffc1
+ slli $tmp0,$tmp0,28 # 0x0ffffffc10000000
+ addi $tmp0,$tmp0,-1 # 0x0ffffffc0fffffff
+
+ and $in0,$in0,$tmp0
+ addi $tmp0,$tmp0,-3 # 0x0ffffffc0ffffffc
+ and $in1,$in1,$tmp0
+
+ sd $in0,24($ctx)
+ srli $tmp0,$in1,2
+ sd $in1,32($ctx)
+ add $tmp0,$tmp0,$in1 # s1 = r1 + (r1 >> 2)
+ sd $tmp0,40($ctx)
+
+.Lno_key:
+ li $a0,0 # return 0
+ ret
+.size poly1305_init,.-poly1305_init
+___
+{
+my ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) =
+ ($s0,$s1,$s2,$s3,$t3,$t4,$in0,$in1,$t2);
+my ($shr,$shl) = ($t5,$t6); # used on R6
+
+$code.=<<___;
+.globl poly1305_blocks
+.type poly1305_blocks,\@function
+poly1305_blocks:
+ andi $len,$len,-16 # complete blocks only
+ beqz $len,.Lno_data
+
+ caddi $sp,$sp,-4*__SIZEOF_POINTER__
+ PUSH $s0,3*__SIZEOF_POINTER__($sp)
+ PUSH $s1,2*__SIZEOF_POINTER__($sp)
+ PUSH $s2,1*__SIZEOF_POINTER__($sp)
+ PUSH $s3,0*__SIZEOF_POINTER__($sp)
+
+#ifndef __CHERI_PURE_CAPABILITY__
+ andi $shr,$inp,7
+ andi $inp,$inp,-8 # align $inp
+ slli $shr,$shr,3 # byte to bit offset
+ neg $shl,$shr # implicit &63 in sll
+#endif
+
+ ld $h0,0($ctx) # load hash value
+ ld $h1,8($ctx)
+ ld $h2,16($ctx)
+
+ ld $r0,24($ctx) # load key
+ ld $r1,32($ctx)
+ ld $rs1,40($ctx)
+
+ add $len,$len,$inp # end of buffer
+
+.Loop:
+ ld $in0,0($inp) # load input
+ ld $in1,8($inp)
+#ifndef __CHERI_PURE_CAPABILITY__
+ beqz $shr,.Laligned_inp
+
+ ld $tmp2,16($inp)
+ srl $in0,$in0,$shr
+ sll $tmp3,$in1,$shl
+ srl $in1,$in1,$shr
+ sll $tmp2,$tmp2,$shl
+ or $in0,$in0,$tmp3
+ or $in1,$in1,$tmp2
+
+.Laligned_inp:
+#endif
+ caddi $inp,$inp,16
+
+ andi $tmp0,$h2,-4 # modulo-scheduled reduction
+ srli $tmp1,$h2,2
+ andi $h2,$h2,3
+
+ add $d0,$h0,$in0 # accumulate input
+ add $tmp1,$tmp1,$tmp0
+ sltu $tmp0,$d0,$h0
+ add $d0,$d0,$tmp1 # ... and residue
+ sltu $tmp1,$d0,$tmp1
+ add $d1,$h1,$in1
+ add $tmp0,$tmp0,$tmp1
+ sltu $tmp1,$d1,$h1
+ add $d1,$d1,$tmp0
+
+ add $d2,$h2,$padbit
+ sltu $tmp0,$d1,$tmp0
+ mulhu $h1,$r0,$d0 # h0*r0
+ mul $h0,$r0,$d0
+
+ add $d2,$d2,$tmp1
+ add $d2,$d2,$tmp0
+ mulhu $tmp1,$rs1,$d1 # h1*5*r1
+ mul $tmp0,$rs1,$d1
+
+ mulhu $h2,$r1,$d0 # h0*r1
+ mul $tmp2,$r1,$d0
+ add $h0,$h0,$tmp0
+ add $h1,$h1,$tmp1
+ sltu $tmp0,$h0,$tmp0
+
+ add $h1,$h1,$tmp0
+ add $h1,$h1,$tmp2
+ mulhu $tmp1,$r0,$d1 # h1*r0
+ mul $tmp0,$r0,$d1
+
+ sltu $tmp2,$h1,$tmp2
+ add $h2,$h2,$tmp2
+ mul $tmp2,$rs1,$d2 # h2*5*r1
+
+ add $h1,$h1,$tmp0
+ add $h2,$h2,$tmp1
+ mul $tmp3,$r0,$d2 # h2*r0
+ sltu $tmp0,$h1,$tmp0
+ add $h2,$h2,$tmp0
+
+ add $h1,$h1,$tmp2
+ sltu $tmp2,$h1,$tmp2
+ add $h2,$h2,$tmp2
+ add $h2,$h2,$tmp3
+
+ bne $inp,$len,.Loop
+
+ sd $h0,0($ctx) # store hash value
+ sd $h1,8($ctx)
+ sd $h2,16($ctx)
+
+ POP $s0,3*__SIZEOF_POINTER__($sp) # epilogue
+ POP $s1,2*__SIZEOF_POINTER__($sp)
+ POP $s2,1*__SIZEOF_POINTER__($sp)
+ POP $s3,0*__SIZEOF_POINTER__($sp)
+ caddi $sp,$sp,4*__SIZEOF_POINTER__
+
+.Lno_data:
+ ret
+.size poly1305_blocks,.-poly1305_blocks
+___
+}
+{
+my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
+
+$code.=<<___;
+.globl poly1305_emit
+.type poly1305_emit,\@function
+poly1305_emit:
+ ld $tmp2,16($ctx)
+ ld $tmp0,0($ctx)
+ ld $tmp1,8($ctx)
+
+ andi $in0,$tmp2,-4 # final reduction
+ srl $in1,$tmp2,2
+ andi $tmp2,$tmp2,3
+ add $in0,$in0,$in1
+
+ add $tmp0,$tmp0,$in0
+ sltu $in1,$tmp0,$in0
+ addi $in0,$tmp0,5 # compare to modulus
+ add $tmp1,$tmp1,$in1
+ sltiu $tmp3,$in0,5
+ sltu $tmp4,$tmp1,$in1
+ add $in1,$tmp1,$tmp3
+ add $tmp2,$tmp2,$tmp4
+ sltu $tmp3,$in1,$tmp3
+ add $tmp2,$tmp2,$tmp3
+
+ srli $tmp2,$tmp2,2 # see if it carried/borrowed
+ neg $tmp2,$tmp2
+
+ xor $in0,$in0,$tmp0
+ xor $in1,$in1,$tmp1
+ and $in0,$in0,$tmp2
+ and $in1,$in1,$tmp2
+ xor $in0,$in0,$tmp0
+ xor $in1,$in1,$tmp1
+
+ lwu $tmp0,0($nonce) # load nonce
+ lwu $tmp1,4($nonce)
+ lwu $tmp2,8($nonce)
+ lwu $tmp3,12($nonce)
+ slli $tmp1,$tmp1,32
+ slli $tmp3,$tmp3,32
+ or $tmp0,$tmp0,$tmp1
+ or $tmp2,$tmp2,$tmp3
+
+ add $in0,$in0,$tmp0 # accumulate nonce
+ add $in1,$in1,$tmp2
+ sltu $tmp0,$in0,$tmp0
+ add $in1,$in1,$tmp0
+
+ srli $tmp0,$in0,8 # write mac value
+ srli $tmp1,$in0,16
+ srli $tmp2,$in0,24
+ sb $in0,0($mac)
+ srli $tmp3,$in0,32
+ sb $tmp0,1($mac)
+ srli $tmp0,$in0,40
+ sb $tmp1,2($mac)
+ srli $tmp1,$in0,48
+ sb $tmp2,3($mac)
+ srli $tmp2,$in0,56
+ sb $tmp3,4($mac)
+ srli $tmp3,$in1,8
+ sb $tmp0,5($mac)
+ srli $tmp0,$in1,16
+ sb $tmp1,6($mac)
+ srli $tmp1,$in1,24
+ sb $tmp2,7($mac)
+
+ sb $in1,8($mac)
+ srli $tmp2,$in1,32
+ sb $tmp3,9($mac)
+ srli $tmp3,$in1,40
+ sb $tmp0,10($mac)
+ srli $tmp0,$in1,48
+ sb $tmp1,11($mac)
+ srli $tmp1,$in1,56
+ sb $tmp2,12($mac)
+ sb $tmp3,13($mac)
+ sb $tmp0,14($mac)
+ sb $tmp1,15($mac)
+
+ ret
+.size poly1305_emit,.-poly1305_emit
+.string "Poly1305 for RISC-V, CRYPTOGAMS by \@dot-asm"
+___
+}
+}}} else {{{
+######################################################################
+# 32-bit code path
+#
+
+my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
+my ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) =
+ ($a4,$a5,$a6,$a7,$t0,$t1,$t2,$t3);
+
+$code.=<<___;
+#if __riscv_xlen == 32
+# if __SIZEOF_POINTER__ == 8
+# define PUSH csc
+# define POP clc
+# else
+# define PUSH sw
+# define POP lw
+# endif
+# define MULX(hi,lo,a,b) mulhu hi,a,b; mul lo,a,b
+# define srlw srl
+# define sllw sll
+# define addw add
+# define addiw addi
+# define mulw mul
+#elif __riscv_xlen == 64
+# if __SIZEOF_POINTER__ == 16
+# define PUSH csc
+# define POP clc
+# else
+# define PUSH sd
+# define POP ld
+# endif
+# define MULX(hi,lo,a,b) slli b,b,32; srli b,b,32; mul hi,a,b; addiw lo,hi,0; srai hi,hi,32
+#else
+# error "unsupported __riscv_xlen"
+#endif
+
+.option pic
+.text
+
+.globl poly1305_init
+.type poly1305_init,\@function
+poly1305_init:
+ sw $zero,0($ctx)
+ sw $zero,4($ctx)
+ sw $zero,8($ctx)
+ sw $zero,12($ctx)
+ sw $zero,16($ctx)
+
+ beqz $inp,.Lno_key
+
+#ifndef __CHERI_PURE_CAPABILITY__
+ andi $tmp0,$inp,3 # $inp % 4
+ sub $inp,$inp,$tmp0 # align $inp
+ sll $tmp0,$tmp0,3 # byte to bit offset
+#endif
+ lw $in0,0($inp)
+ lw $in1,4($inp)
+ lw $in2,8($inp)
+ lw $in3,12($inp)
+#ifndef __CHERI_PURE_CAPABILITY__
+ beqz $tmp0,.Laligned_key
+
+ lw $tmp2,16($inp)
+ sub $tmp1,$zero,$tmp0
+ srlw $in0,$in0,$tmp0
+ sllw $tmp3,$in1,$tmp1
+ srlw $in1,$in1,$tmp0
+ or $in0,$in0,$tmp3
+ sllw $tmp3,$in2,$tmp1
+ srlw $in2,$in2,$tmp0
+ or $in1,$in1,$tmp3
+ sllw $tmp3,$in3,$tmp1
+ srlw $in3,$in3,$tmp0
+ or $in2,$in2,$tmp3
+ sllw $tmp2,$tmp2,$tmp1
+ or $in3,$in3,$tmp2
+.Laligned_key:
+#endif
+
+ lui $tmp0,0x10000
+ addi $tmp0,$tmp0,-1 # 0x0fffffff
+ and $in0,$in0,$tmp0
+ addi $tmp0,$tmp0,-3 # 0x0ffffffc
+ and $in1,$in1,$tmp0
+ and $in2,$in2,$tmp0
+ and $in3,$in3,$tmp0
+
+ sw $in0,20($ctx)
+ sw $in1,24($ctx)
+ sw $in2,28($ctx)
+ sw $in3,32($ctx)
+
+ srlw $tmp1,$in1,2
+ srlw $tmp2,$in2,2
+ srlw $tmp3,$in3,2
+ addw $in1,$in1,$tmp1 # s1 = r1 + (r1 >> 2)
+ addw $in2,$in2,$tmp2
+ addw $in3,$in3,$tmp3
+ sw $in1,36($ctx)
+ sw $in2,40($ctx)
+ sw $in3,44($ctx)
+.Lno_key:
+ li $a0,0
+ ret
+.size poly1305_init,.-poly1305_init
+___
+{
+my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) =
+ ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $t0,$t1,$t2);
+my ($d0,$d1,$d2,$d3) =
+ ($a4,$a5,$a6,$a7);
+my $shr = $ra; # used on R6
+
+$code.=<<___;
+.globl poly1305_blocks
+.type poly1305_blocks,\@function
+poly1305_blocks:
+ andi $len,$len,-16 # complete blocks only
+ beqz $len,.Labort
+
+ caddi $sp,$sp,-__SIZEOF_POINTER__*12
+ PUSH $ra, __SIZEOF_POINTER__*11($sp)
+ PUSH $s0, __SIZEOF_POINTER__*10($sp)
+ PUSH $s1, __SIZEOF_POINTER__*9($sp)
+ PUSH $s2, __SIZEOF_POINTER__*8($sp)
+ PUSH $s3, __SIZEOF_POINTER__*7($sp)
+ PUSH $s4, __SIZEOF_POINTER__*6($sp)
+ PUSH $s5, __SIZEOF_POINTER__*5($sp)
+ PUSH $s6, __SIZEOF_POINTER__*4($sp)
+ PUSH $s7, __SIZEOF_POINTER__*3($sp)
+ PUSH $s8, __SIZEOF_POINTER__*2($sp)
+
+#ifndef __CHERI_PURE_CAPABILITY__
+ andi $shr,$inp,3
+ andi $inp,$inp,-4 # align $inp
+ slli $shr,$shr,3 # byte to bit offset
+#endif
+
+ lw $h0,0($ctx) # load hash value
+ lw $h1,4($ctx)
+ lw $h2,8($ctx)
+ lw $h3,12($ctx)
+ lw $h4,16($ctx)
+
+ lw $r0,20($ctx) # load key
+ lw $r1,24($ctx)
+ lw $r2,28($ctx)
+ lw $r3,32($ctx)
+ lw $rs1,36($ctx)
+ lw $rs2,40($ctx)
+ lw $rs3,44($ctx)
+
+ add $len,$len,$inp # end of buffer
+
+.Loop:
+ lw $d0,0($inp) # load input
+ lw $d1,4($inp)
+ lw $d2,8($inp)
+ lw $d3,12($inp)
+#ifndef __CHERI_PURE_CAPABILITY__
+ beqz $shr,.Laligned_inp
+
+ lw $t4,16($inp)
+ sub $t5,$zero,$shr
+ srlw $d0,$d0,$shr
+ sllw $t3,$d1,$t5
+ srlw $d1,$d1,$shr
+ or $d0,$d0,$t3
+ sllw $t3,$d2,$t5
+ srlw $d2,$d2,$shr
+ or $d1,$d1,$t3
+ sllw $t3,$d3,$t5
+ srlw $d3,$d3,$shr
+ or $d2,$d2,$t3
+ sllw $t4,$t4,$t5
+ or $d3,$d3,$t4
+
+.Laligned_inp:
+#endif
+ srli $t3,$h4,2 # modulo-scheduled reduction
+ andi $t4,$h4,-4
+ andi $h4,$h4,3
+
+ addw $d0,$d0,$h0 # accumulate input
+ addw $t4,$t4,$t3
+ sltu $h0,$d0,$h0
+ addw $d0,$d0,$t4 # ... and residue
+ sltu $t4,$d0,$t4
+
+ addw $d1,$d1,$h1
+ addw $h0,$h0,$t4 # carry
+ sltu $h1,$d1,$h1
+ addw $d1,$d1,$h0
+ sltu $h0,$d1,$h0
+
+ addw $d2,$d2,$h2
+ addw $h1,$h1,$h0 # carry
+ sltu $h2,$d2,$h2
+ addw $d2,$d2,$h1
+ sltu $h1,$d2,$h1
+
+ addw $d3,$d3,$h3
+ addw $h2,$h2,$h1 # carry
+ sltu $h3,$d3,$h3
+ addw $d3,$d3,$h2
+
+ MULX ($h1,$h0,$r0,$d0) # d0*r0
+
+ sltu $h2,$d3,$h2
+ addw $h3,$h3,$h2 # carry
+
+ MULX ($t4,$t3,$rs3,$d1) # d1*s3
+
+ addw $h4,$h4,$padbit
+ caddi $inp,$inp,16
+ addw $h4,$h4,$h3
+
+ MULX ($t6,$a3,$rs2,$d2) # d2*s2
+ addw $h0,$h0,$t3
+ addw $h1,$h1,$t4
+ sltu $t3,$h0,$t3
+ addw $h1,$h1,$t3
+
+ MULX ($t4,$t3,$rs1,$d3) # d3*s1
+ addw $h0,$h0,$a3
+ addw $h1,$h1,$t6
+ sltu $a3,$h0,$a3
+ addw $h1,$h1,$a3
+
+
+ MULX ($h2,$a3,$r1,$d0) # d0*r1
+ addw $h0,$h0,$t3
+ addw $h1,$h1,$t4
+ sltu $t3,$h0,$t3
+ addw $h1,$h1,$t3
+
+ MULX ($t4,$t3,$r0,$d1) # d1*r0
+ addw $h1,$h1,$a3
+ sltu $a3,$h1,$a3
+ addw $h2,$h2,$a3
+
+ MULX ($t6,$a3,$rs3,$d2) # d2*s3
+ addw $h1,$h1,$t3
+ addw $h2,$h2,$t4
+ sltu $t3,$h1,$t3
+ addw $h2,$h2,$t3
+
+ MULX ($t4,$t3,$rs2,$d3) # d3*s2
+ addw $h1,$h1,$a3
+ addw $h2,$h2,$t6
+ sltu $a3,$h1,$a3
+ addw $h2,$h2,$a3
+
+ mulw $a3,$rs1,$h4 # h4*s1
+ addw $h1,$h1,$t3
+ addw $h2,$h2,$t4
+ sltu $t3,$h1,$t3
+ addw $h2,$h2,$t3
+
+
+ MULX ($h3,$t3,$r2,$d0) # d0*r2
+ addw $h1,$h1,$a3
+ sltu $a3,$h1,$a3
+ addw $h2,$h2,$a3
+
+ MULX ($t6,$a3,$r1,$d1) # d1*r1
+ addw $h2,$h2,$t3
+ sltu $t3,$h2,$t3
+ addw $h3,$h3,$t3
+
+ MULX ($t4,$t3,$r0,$d2) # d2*r0
+ addw $h2,$h2,$a3
+ addw $h3,$h3,$t6
+ sltu $a3,$h2,$a3
+ addw $h3,$h3,$a3
+
+ MULX ($t6,$a3,$rs3,$d3) # d3*s3
+ addw $h2,$h2,$t3
+ addw $h3,$h3,$t4
+ sltu $t3,$h2,$t3
+ addw $h3,$h3,$t3
+
+ mulw $t3,$rs2,$h4 # h4*s2
+ addw $h2,$h2,$a3
+ addw $h3,$h3,$t6
+ sltu $a3,$h2,$a3
+ addw $h3,$h3,$a3
+
+
+ MULX ($t6,$a3,$r3,$d0) # d0*r3
+ addw $h2,$h2,$t3
+ sltu $t3,$h2,$t3
+ addw $h3,$h3,$t3
+
+ MULX ($t4,$t3,$r2,$d1) # d1*r2
+ addw $h3,$h3,$a3
+ sltu $a3,$h3,$a3
+ addw $t6,$t6,$a3
+
+ MULX ($a3,$d3,$r0,$d3) # d3*r0
+ addw $h3,$h3,$t3
+ addw $t6,$t6,$t4
+ sltu $t3,$h3,$t3
+ addw $t6,$t6,$t3
+
+ MULX ($t4,$t3,$r1,$d2) # d2*r1
+ addw $h3,$h3,$d3
+ addw $t6,$t6,$a3
+ sltu $d3,$h3,$d3
+ addw $t6,$t6,$d3
+
+ mulw $a3,$rs3,$h4 # h4*s3
+ addw $h3,$h3,$t3
+ addw $t6,$t6,$t4
+ sltu $t3,$h3,$t3
+ addw $t6,$t6,$t3
+
+
+ mulw $h4,$r0,$h4 # h4*r0
+ addw $h3,$h3,$a3
+ sltu $a3,$h3,$a3
+ addw $t6,$t6,$a3
+ addw $h4,$t6,$h4
+
+ li $padbit,1 # if we loop, padbit is 1
+
+ bne $inp,$len,.Loop
+
+ sw $h0,0($ctx) # store hash value
+ sw $h1,4($ctx)
+ sw $h2,8($ctx)
+ sw $h3,12($ctx)
+ sw $h4,16($ctx)
+
+ POP $ra, __SIZEOF_POINTER__*11($sp)
+ POP $s0, __SIZEOF_POINTER__*10($sp)
+ POP $s1, __SIZEOF_POINTER__*9($sp)
+ POP $s2, __SIZEOF_POINTER__*8($sp)
+ POP $s3, __SIZEOF_POINTER__*7($sp)
+ POP $s4, __SIZEOF_POINTER__*6($sp)
+ POP $s5, __SIZEOF_POINTER__*5($sp)
+ POP $s6, __SIZEOF_POINTER__*4($sp)
+ POP $s7, __SIZEOF_POINTER__*3($sp)
+ POP $s8, __SIZEOF_POINTER__*2($sp)
+ caddi $sp,$sp,__SIZEOF_POINTER__*12
+.Labort:
+ ret
+.size poly1305_blocks,.-poly1305_blocks
+___
+}
+{
+my ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3);
+
+$code.=<<___;
+.globl poly1305_emit
+.type poly1305_emit,\@function
+poly1305_emit:
+ lw $tmp4,16($ctx)
+ lw $tmp0,0($ctx)
+ lw $tmp1,4($ctx)
+ lw $tmp2,8($ctx)
+ lw $tmp3,12($ctx)
+
+ srli $ctx,$tmp4,2 # final reduction
+ andi $in0,$tmp4,-4
+ andi $tmp4,$tmp4,3
+ addw $ctx,$ctx,$in0
+
+ addw $tmp0,$tmp0,$ctx
+ sltu $ctx,$tmp0,$ctx
+ addiw $in0,$tmp0,5 # compare to modulus
+ addw $tmp1,$tmp1,$ctx
+ sltiu $in1,$in0,5
+ sltu $ctx,$tmp1,$ctx
+ addw $in1,$in1,$tmp1
+ addw $tmp2,$tmp2,$ctx
+ sltu $in2,$in1,$tmp1
+ sltu $ctx,$tmp2,$ctx
+ addw $in2,$in2,$tmp2
+ addw $tmp3,$tmp3,$ctx
+ sltu $in3,$in2,$tmp2
+ sltu $ctx,$tmp3,$ctx
+ addw $in3,$in3,$tmp3
+ addw $tmp4,$tmp4,$ctx
+ sltu $ctx,$in3,$tmp3
+ addw $ctx,$ctx,$tmp4
+
+ srl $ctx,$ctx,2 # see if it carried/borrowed
+ sub $ctx,$zero,$ctx
+
+ xor $in0,$in0,$tmp0
+ xor $in1,$in1,$tmp1
+ xor $in2,$in2,$tmp2
+ xor $in3,$in3,$tmp3
+ and $in0,$in0,$ctx
+ and $in1,$in1,$ctx
+ and $in2,$in2,$ctx
+ and $in3,$in3,$ctx
+ xor $in0,$in0,$tmp0
+ xor $in1,$in1,$tmp1
+ xor $in2,$in2,$tmp2
+ xor $in3,$in3,$tmp3
+
+ lw $tmp0,0($nonce) # load nonce
+ lw $tmp1,4($nonce)
+ lw $tmp2,8($nonce)
+ lw $tmp3,12($nonce)
+
+ addw $in0,$in0,$tmp0 # accumulate nonce
+ sltu $ctx,$in0,$tmp0
+
+ addw $in1,$in1,$tmp1
+ sltu $tmp1,$in1,$tmp1
+ addw $in1,$in1,$ctx
+ sltu $ctx,$in1,$ctx
+ addw $ctx,$ctx,$tmp1
+
+ addw $in2,$in2,$tmp2
+ sltu $tmp2,$in2,$tmp2
+ addw $in2,$in2,$ctx
+ sltu $ctx,$in2,$ctx
+ addw $ctx,$ctx,$tmp2
+
+ addw $in3,$in3,$tmp3
+ addw $in3,$in3,$ctx
+
+ srl $tmp0,$in0,8 # write mac value
+ srl $tmp1,$in0,16
+ srl $tmp2,$in0,24
+ sb $in0, 0($mac)
+ sb $tmp0,1($mac)
+ srl $tmp0,$in1,8
+ sb $tmp1,2($mac)
+ srl $tmp1,$in1,16
+ sb $tmp2,3($mac)
+ srl $tmp2,$in1,24
+ sb $in1, 4($mac)
+ sb $tmp0,5($mac)
+ srl $tmp0,$in2,8
+ sb $tmp1,6($mac)
+ srl $tmp1,$in2,16
+ sb $tmp2,7($mac)
+ srl $tmp2,$in2,24
+ sb $in2, 8($mac)
+ sb $tmp0,9($mac)
+ srl $tmp0,$in3,8
+ sb $tmp1,10($mac)
+ srl $tmp1,$in3,16
+ sb $tmp2,11($mac)
+ srl $tmp2,$in3,24
+ sb $in3, 12($mac)
+ sb $tmp0,13($mac)
+ sb $tmp1,14($mac)
+ sb $tmp2,15($mac)
+
+ ret
+.size poly1305_emit,.-poly1305_emit
+.string "Poly1305 for RISC-V, CRYPTOGAMS by \@dot-asm"
+___
+}
+}}}
+
+foreach (split("\n", $code)) {
+ if ($flavour =~ /^cheri/) {
+ s/\(x([0-9]+)\)/(c$1)/ and s/\b([ls][bhwd]u?)\b/c$1/;
+ s/\b(PUSH|POP)(\s+)x([0-9]+)/$1$2c$3/ or
+ s/\b(ret|jal)\b/c$1/;
+ s/\bcaddi?\b/cincoffset/ and s/\bx([0-9]+,)/c$1/g or
+ m/\bcmove\b/ and s/\bx([0-9]+)/c$1/g;
+ } else {
+ s/\bcaddi?\b/add/ or
+ s/\bcmove\b/mv/;
+ }
+ print $_, "\n";
+}
+
+close STDOUT;
\ No newline at end of file
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 271520510b5f..f303ac05a380 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -94,6 +94,7 @@ config WIREGUARD
select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2
select CRYPTO_POLY1305_MIPS if MIPS
select CRYPTO_CHACHA_S390 if S390
+ select CRYPTO_POLY1305_RISCV if RISCV
help
WireGuard is a secure, fast, and easy to use replacement for IPSec
that uses modern cryptography and clever networking tricks. It's
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 798972b29b68..d9a13519bf6b 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -106,7 +106,7 @@ config CRYPTO_LIB_DES
config CRYPTO_LIB_POLY1305_RSIZE
int
- default 2 if MIPS
+ default 2 if MIPS || RISCV
default 11 if X86_64
default 9 if ARM || ARM64
default 1
--
2.43.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] crypto: riscv/poly1305 - import OpenSSL/CRYPTOGAMS implementation
2025-06-05 14:56 [PATCH] crypto: riscv/poly1305 - import OpenSSL/CRYPTOGAMS implementation zhihang.shao.iscas
@ 2025-06-05 15:01 ` Eric Biggers
2025-06-05 16:42 ` Andy Polyakov
1 sibling, 0 replies; 3+ messages in thread
From: Eric Biggers @ 2025-06-05 15:01 UTC (permalink / raw)
To: zhihang.shao.iscas
Cc: linux-crypto, linux-riscv, herbert, paul.walmsley, ou, alex,
appro
On Thu, Jun 05, 2025 at 10:56:34PM +0800, zhihang.shao.iscas@gmail.com wrote:
> From: Zhihang Shao <zhihang.shao.iscas@gmail.com>
>
> This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305
> implementation for riscv authored by Andy Polyakov.
> The file 'poly1305-riscv.pl' is taken straight from this upstream
> GitHub repository [0] at commit 33fe84bc21219a16825459b37c825bf4580a0a7b,
> and this commit fixed a bug in riscv 64bit implementation.
> Also, this patch passed extra run-time self tests.
>
> [0] https://github.com/dot-asm/cryptogams
>
> Signed-off-by: Zhihang Shao <zhihang.shao.iscas@gmail.com>
> ---
> arch/riscv/crypto/Kconfig | 10 +
> arch/riscv/crypto/Makefile | 17 +
> arch/riscv/crypto/poly1305-glue.c | 202 +++++++
> arch/riscv/crypto/poly1305-riscv.pl | 797 ++++++++++++++++++++++++++++
> drivers/net/Kconfig | 1 +
> lib/crypto/Kconfig | 2 +-
> 6 files changed, 1028 insertions(+), 1 deletion(-)
> create mode 100644 arch/riscv/crypto/poly1305-glue.c
> create mode 100644 arch/riscv/crypto/poly1305-riscv.pl
>
> diff --git a/arch/riscv/crypto/Kconfig b/arch/riscv/crypto/Kconfig
> index c67095a3d669..228bb3c6940d 100644
> --- a/arch/riscv/crypto/Kconfig
> +++ b/arch/riscv/crypto/Kconfig
> @@ -38,6 +38,16 @@ config CRYPTO_GHASH_RISCV64
> Architecture: riscv64 using:
> - Zvkg vector crypto extension
>
> +config CRYPTO_POLY1305_RISCV
> + tristate "Hash functions: Poly1305"
> + select CRYPTO_HASH
> + select CRYPTO_ARCH_HAVE_LIB_POLY1305
> + help
> + Poly1305 authenticator algorithm (RFC7539)
> +
> + Architecture: riscv using:
> + - V vector extension
> +
> config CRYPTO_SHA256_RISCV64
> tristate "Hash functions: SHA-224 and SHA-256"
> depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
> diff --git a/arch/riscv/crypto/Makefile b/arch/riscv/crypto/Makefile
> index 247c7bc7288c..0c96bd9a61b3 100644
> --- a/arch/riscv/crypto/Makefile
> +++ b/arch/riscv/crypto/Makefile
> @@ -10,6 +10,10 @@ chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o
> obj-$(CONFIG_CRYPTO_GHASH_RISCV64) += ghash-riscv64.o
> ghash-riscv64-y := ghash-riscv64-glue.o ghash-riscv64-zvkg.o
>
> +obj-$(CONFIG_CRYPTO_POLY1305_RISCV) += poly1305-riscv.o
> +poly1305-riscv-y := poly1305-core.o poly1305-glue.o
> +AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_init_riscv
> +
> obj-$(CONFIG_CRYPTO_SHA256_RISCV64) += sha256-riscv64.o
> sha256-riscv64-y := sha256-riscv64-glue.o sha256-riscv64-zvknha_or_zvknhb-zvkb.o
Please rebase onto mainline and port your change to arch/riscv/lib/crypto/.
Poly1305 is now available only through the library API, not the crypto_shash
API.
- Eric
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] crypto: riscv/poly1305 - import OpenSSL/CRYPTOGAMS implementation
2025-06-05 14:56 [PATCH] crypto: riscv/poly1305 - import OpenSSL/CRYPTOGAMS implementation zhihang.shao.iscas
2025-06-05 15:01 ` Eric Biggers
@ 2025-06-05 16:42 ` Andy Polyakov
1 sibling, 0 replies; 3+ messages in thread
From: Andy Polyakov @ 2025-06-05 16:42 UTC (permalink / raw)
To: zhihang.shao.iscas, linux-crypto
Cc: linux-riscv, herbert, paul.walmsley, ou, alex
> This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305
> implementation for riscv authored by Andy Polyakov.
> The file 'poly1305-riscv.pl' is taken straight from this upstream
> GitHub repository [0] at commit 33fe84bc21219a16825459b37c825bf4580a0a7b,
> and this commit fixed a bug in riscv 64bit implementation.
Just in case for reference, the commit fixed a bug in the 32-bit code
path when it's compiled for 64-bit architecture. For better/adequate
performance on a 64-bit system the 64-bit code path should be used. And
it was fine all along. It even passed an algorithmic verification, in
other words confidence level goes beyond the unit tests.
> Also, this patch passed extra run-time self tests.
>
> [0] https://github.com/dot-asm/cryptogams
>
> Signed-off-by: Zhihang Shao <zhihang.shao.iscas@gmail.com>
> ---
> arch/riscv/crypto/Kconfig | 10 +
> arch/riscv/crypto/Makefile | 17 +
> arch/riscv/crypto/poly1305-glue.c | 202 +++++++
> arch/riscv/crypto/poly1305-riscv.pl | 797 ++++++++++++++++++++++++++++
> drivers/net/Kconfig | 1 +
> lib/crypto/Kconfig | 2 +-
> 6 files changed, 1028 insertions(+), 1 deletion(-)
> create mode 100644 arch/riscv/crypto/poly1305-glue.c
> create mode 100644 arch/riscv/crypto/poly1305-riscv.pl
>
> diff --git a/arch/riscv/crypto/Kconfig b/arch/riscv/crypto/Kconfig
> index c67095a3d669..228bb3c6940d 100644
> --- a/arch/riscv/crypto/Kconfig
> +++ b/arch/riscv/crypto/Kconfig
> @@ -38,6 +38,16 @@ config CRYPTO_GHASH_RISCV64
> Architecture: riscv64 using:
> - Zvkg vector crypto extension
>
> +config CRYPTO_POLY1305_RISCV
> + tristate "Hash functions: Poly1305"
> + select CRYPTO_HASH
> + select CRYPTO_ARCH_HAVE_LIB_POLY1305
> + help
> + Poly1305 authenticator algorithm (RFC7539)
> +
> + Architecture: riscv using:
> + - V vector extension
Implementation in question doesn't use RISC-V vector extension, only
Integer Multiplication extension.
> +static void riscv64_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
> + u32 len, u32 hibit)
> +{
> + if (unlikely(!dctx->sset)) {
> + if (!dctx->rset) {
> + poly1305_init_riscv(&dctx->h, src);
> + src += POLY1305_BLOCK_SIZE;
> + len -= POLY1305_BLOCK_SIZE;
> + dctx->rset = 1;
> + }
> + if (len >= POLY1305_BLOCK_SIZE) {
> + dctx->s[0] = get_unaligned_le32(src + 0);
> + dctx->s[1] = get_unaligned_le32(src + 4);
> + dctx->s[2] = get_unaligned_le32(src + 8);
> + dctx->s[3] = get_unaligned_le32(src + 12);
> + src += POLY1305_BLOCK_SIZE;
> + len -= POLY1305_BLOCK_SIZE;
> + dctx->sset = true;
> + }
> + if (len < POLY1305_BLOCK_SIZE)
> + return;
> + }
> +
> + len &= ~(POLY1305_BLOCK_SIZE - 1);
> +
> + poly1305_blocks(&dctx->h, src, len, hibit);
> +}
This interface doesn't make sense. It looks like it's supposed to
accommodate concatenated key, nonce and data input of arbitrary length.
However the data length is truncated to the multiples of poly1305
blocks, in which case |hibit| is supposed to be 1 unconditionally. Or in
other words, considered in isolation this subroutine shouldn't have
|hibit| as a parameter, but simply pass 1 as the last argument to
poly1305_blocks.
On a general note. The poly1305 implementation in question supports both
32- and 64-bit architectures, so maybe riscv_ prefixes would be more
appropriate. As opposed to riscv64_ that is :-)
Cheers.
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2025-06-05 16:42 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-05 14:56 [PATCH] crypto: riscv/poly1305 - import OpenSSL/CRYPTOGAMS implementation zhihang.shao.iscas
2025-06-05 15:01 ` Eric Biggers
2025-06-05 16:42 ` Andy Polyakov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox