* [PATCH V2] crypto: aegis128: Add RISC-V vector SIMD implementation
@ 2026-01-26 9:24 Chunyan Zhang
2026-02-06 10:03 ` Herbert Xu
0 siblings, 1 reply; 3+ messages in thread
From: Chunyan Zhang @ 2026-01-26 9:24 UTC (permalink / raw)
To: Paul Walmsley, Palmer Dabbelt, Albert Ou, Alexandre Ghiti,
Herbert Xu, David S . Miller
Cc: linux-riscv, linux-crypto, linux-kernel, Chunyan Zhang
Add a RISC-V vector-accelerated implementation of aegis128 by
wiring it into the generic SIMD hooks.
This implementation supports vlen values of 512, 256, and 128.
Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
---
V2:
- Add config dependency of RISCV_ISA_V to fix the issue reported by kernel test robot;
- Add return value in preload_round_data() and aegis128_round().
V1: https://lore.kernel.org/all/20260121101923.64657-1-zhangchunyan@iscas.ac.cn/
---
crypto/Kconfig | 4 +-
crypto/Makefile | 4 +
crypto/aegis-rvv.h | 19 +
crypto/aegis128-rvv-inner.c | 762 ++++++++++++++++++++++++++++++++++++
crypto/aegis128-rvv.c | 63 +++
5 files changed, 850 insertions(+), 2 deletions(-)
create mode 100644 crypto/aegis-rvv.h
create mode 100644 crypto/aegis128-rvv-inner.c
create mode 100644 crypto/aegis128-rvv.c
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 2e5b195b1b06..9766b3596049 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -777,8 +777,8 @@ config CRYPTO_AEGIS128
AEGIS-128 AEAD algorithm
config CRYPTO_AEGIS128_SIMD
- bool "AEGIS-128 (arm NEON, arm64 NEON)"
- depends on CRYPTO_AEGIS128 && ((ARM || ARM64) && KERNEL_MODE_NEON)
+ bool "AEGIS-128 (arm NEON, arm64 NEON, RISC-V vector)"
+ depends on CRYPTO_AEGIS128 && (((ARM || ARM64) && KERNEL_MODE_NEON) || (RISCV && RISCV_ISA_V))
default y
help
AEGIS-128 AEAD algorithm
diff --git a/crypto/Makefile b/crypto/Makefile
index 16a35649dd91..3d94cae9eeba 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -121,6 +121,10 @@ endif
# Enable <arm_neon.h>
CFLAGS_aegis128-neon-inner.o += -isystem $(shell $(CC) -print-file-name=include)
+ifeq ($(ARCH),riscv)
+aegis128-$(CONFIG_CRYPTO_AEGIS128_SIMD) += aegis128-rvv.o aegis128-rvv-inner.o
+endif
+
obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o
obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
obj-$(CONFIG_CRYPTO_DES) += des_generic.o
diff --git a/crypto/aegis-rvv.h b/crypto/aegis-rvv.h
new file mode 100644
index 000000000000..02bd646e4467
--- /dev/null
+++ b/crypto/aegis-rvv.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2026 Institute of Software, CAS
+ */
+
+#ifndef _AEGIS_RVV_H
+#define _AEGIS_RVV_H
+
+extern const u8 crypto_aes_sbox[];
+
+void crypto_aegis128_init_rvv(void *state, const void *key, const void *iv);
+void crypto_aegis128_update_rvv(void *state, const void *msg);
+void crypto_aegis128_encrypt_chunk_rvv(void *state, void *dst, const void *src,
+ unsigned int size);
+void crypto_aegis128_decrypt_chunk_rvv(void *state, void *dst, const void *src,
+ unsigned int size);
+int crypto_aegis128_final_rvv(void *state, void *tag_xor, unsigned int assoclen,
+ unsigned int cryptlen, unsigned int authsize);
+#endif
diff --git a/crypto/aegis128-rvv-inner.c b/crypto/aegis128-rvv-inner.c
new file mode 100644
index 000000000000..2d7439769d77
--- /dev/null
+++ b/crypto/aegis128-rvv-inner.c
@@ -0,0 +1,762 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2026 Institute of Software, CAS
+ * Author: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
+ *
+ * Based on aegis128-neon-inner.c:
+ * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/vector.h>
+#include <linux/types.h>
+
+#include "aegis-rvv.h"
+#include "aegis.h"
+
+#define AEGIS128_STATE_BLOCKS 5
+#define RVV_VLEN riscv_vector_vlen()
+
+typedef u8 aegis128_block_bytes[AEGIS_BLOCK_SIZE];
+struct aegis_state {
+ aegis128_block_bytes blocks[AEGIS128_STATE_BLOCKS];
+};
+
+/* Load 256 bytes at one time into the vector registers starting from r0 */
+#define preload_sbox_1(m, r0) do { \
+ unsigned long vl; \
+ asm volatile (".option push\n" \
+ ".option arch,+v\n" \
+ "vsetvli %0, x0, e8, "m", ta, ma\n" \
+ "vle8.v "r0", (%1)\n" \
+ ".option pop\n" \
+ : "=&r" (vl) \
+ : \
+ "r" (crypto_aes_sbox) \
+ :); \
+} while (0)
+
+/* Load 256 bytes at two times into the vector registers starting from r0 and r1 */
+#define preload_sbox_2(m, r0, r1) do { \
+ unsigned long vl; \
+ asm volatile (".option push\n" \
+ ".option arch,+v\n" \
+ "vsetvli %0, x0, e8, "m", ta, ma\n" \
+ "vle8.v "r0", (%1)\n" \
+ "vle8.v "r1", (%2)\n" \
+ ".option pop\n" \
+ : "=&r" (vl) \
+ : \
+ "r" (crypto_aes_sbox), \
+ "r" (crypto_aes_sbox + 0x80) \
+ :); \
+} while (0)
+
+/* v16 - v31: crypto_aes_sbox[0-255] */
+#define preload_sbox_128() preload_sbox_2("m8", "v16", "v24")
+
+/* v16 - v23: crypto_aes_sbox[0-255] */
+#define preload_sbox_256() preload_sbox_1("m8", "v16")
+
+/* v16 - v19: crypto_aes_sbox[0-255] */
+#define preload_sbox_512() preload_sbox_1("m4", "v16")
+
+static __always_inline
+int preload_round_data(void)
+{
+ static const u8 rev32qu16[] = {
+ 0x2, 0x3, 0x0, 0x1, 0x6, 0x7, 0x4, 0x5,
+ 0xa, 0xb, 0x8, 0x9, 0xe, 0xf, 0xc, 0xd,
+ };
+
+ static const u8 shift_rows[] = {
+ 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
+ 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
+ };
+
+ static const u8 ror32by8[] = {
+ 0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
+ 0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
+ };
+
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetivli zero, 0x10, e8, m1, ta, ma\n"
+ "vle8.v v13, (%[rev32qu16])\n"
+ "vle8.v v14, (%[shift_rows])\n"
+ "vle8.v v15, (%[ror32by8])\n"
+ ".option pop\n"
+ : :
+ [rev32qu16]"r"(rev32qu16),
+ [shift_rows]"r"(shift_rows),
+ [ror32by8]"r"(ror32by8)
+ :);
+
+ switch (RVV_VLEN) {
+ case 128:
+ preload_sbox_128();
+ break;
+ case 256:
+ preload_sbox_256();
+ break;
+ case 512:
+ preload_sbox_512();
+ break;
+ default:
+ pr_err("ERROR: %d is not a supported vector length!", RVV_VLEN);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+#define AEGIS128_ROUND_PART1 \
+ ".option push\n" \
+ ".option arch,+v\n" \
+ "vsetivli zero, 0x10, e8, m1, ta, ma\n" \
+ /* s = vqtbl1q_u8(s, vld1q_u8(shift_rows)) */ \
+ "vle8.v v0, (%[s])\n" \
+ "vrgather.vv v1, v0, v14\n" /* v14: shift_rows */
+
+#define AEGIS128_ROUND_PART3 \
+ /* s= (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b) */ \
+ "vsetivli zero, 0x10, e8, m1, ta, ma\n" \
+ "vsra.vi v3, v2, 7\n" /* ((int8x16_t)v >> 7) */ \
+ "vand.vx v3, v3, %[x1b]\n" \
+ "vsll.vi v4, v2, 1\n" /* (v << 1) */ \
+ "vxor.vv v3, v4, v3\n" \
+ /* s ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v) */ \
+ "vrgather.vv v4, v2, v13\n" /* v13: rev32qu16 */ \
+ "vxor.vv v3, v3, v4\n" \
+ /* s ^= vqtbl1q_u8(v ^ s, vld1q_u8(ror32by8)); */ \
+ "vxor.vv v4, v3, v2\n" /* v ^ s */ \
+ "vrgather.vv v5, v4, v15\n" /* v15: ror32by8 */ \
+ "vxor.vv v3, v3, v5\n" \
+ "vle8.v v4, (%[d])\n" \
+ "vxor.vv v3, v3, v4\n" /* dst ^= v3 */ \
+ "vse8.v v3, (%[d])\n" \
+ ".option pop\n"
+
+/*
+ * v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + step), s - step);
+ * r: vector register which stores sbox array
+ */
+#define gather_sbox(r) \
+ "vsub.vx v1, v1, %[step]\n" \
+ "vrgather.vv v3, "r", v1\n" \
+ "vor.vv v2, v2, v3\n"
+
+static __always_inline
+void aegis128_round_128(u8 *dst, const u8 *src)
+{
+ unsigned long vl;
+
+ /* v16 - v31: crypto_aes_sbox[0-255] */
+ asm volatile (AEGIS128_ROUND_PART1
+ /* v = vqtbl4q_u8(vld1q_u8_x4(crypto_aes_sbox), s); */
+ "vsetvli %0, x0, e8, m1, ta, ma\n"
+ "vrgather.vv v2, v16, v1\n"
+ /* v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0x10), s - 0x10); */
+ gather_sbox("v17")
+ gather_sbox("v18")
+ gather_sbox("v19")
+ gather_sbox("v20")
+ gather_sbox("v21")
+ gather_sbox("v22")
+ gather_sbox("v23")
+ gather_sbox("v24")
+ gather_sbox("v25")
+ gather_sbox("v26")
+ gather_sbox("v27")
+ gather_sbox("v28")
+ gather_sbox("v29")
+ gather_sbox("v30")
+ gather_sbox("v31")
+ AEGIS128_ROUND_PART3
+ : "=&r" (vl) :
+ [s]"r"(src),
+ [step]"r"(0x10),
+ [x1b]"r"(0x1b),
+ [d]"r"(dst)
+ : "memory"
+ );
+}
+
+static __always_inline
+void aegis128_round_256(u8 *dst, const u8 *src)
+{
+ unsigned long vl;
+
+ /* v16 - v23: crypto_aes_sbox[0-255] */
+ asm volatile (AEGIS128_ROUND_PART1
+ /* v = vqtbl4q_u8(vld1q_u8_x4(crypto_aes_sbox), s); */
+ "vsetvli %0, x0, e8, m1, ta, ma\n"
+ "vrgather.vv v2, v16, v1\n"
+ /* v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0x20), s - 0x20); */
+ gather_sbox("v17")
+ gather_sbox("v18")
+ gather_sbox("v19")
+ gather_sbox("v20")
+ gather_sbox("v21")
+ gather_sbox("v22")
+ gather_sbox("v23")
+ AEGIS128_ROUND_PART3
+ : "=&r" (vl) :
+ [s]"r"(src),
+ [step]"r"(0x20),
+ [x1b]"r"(0x1b),
+ [d]"r"(dst)
+ : "memory"
+ );
+}
+
+static __always_inline
+void aegis128_round_512(u8 *dst, const u8 *src)
+{
+ unsigned long vl;
+
+ /* v16 - v19: crypto_aes_sbox[0-255] */
+ asm volatile (AEGIS128_ROUND_PART1
+ /* v = vqtbl4q_u8(vld1q_u8_x4(crypto_aes_sbox), s); */
+ "vsetvli %0, x0, e8, m1, ta, ma\n"
+ "vrgather.vv v2, v16, v1\n"
+ /*v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0x40), s - 0x40);*/
+ gather_sbox("v17")
+ /*v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0x80), s - 0x80);*/
+ gather_sbox("v18")
+ /*v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0xc0), s - 0xc0);*/
+ gather_sbox("v19")
+ AEGIS128_ROUND_PART3
+ : "=&r" (vl) :
+ [s]"r"(src),
+ [step]"r"(0x40),
+ [x1b]"r"(0x1b),
+ [d]"r"(dst)
+ : "memory"
+ );
+}
+
+static __always_inline
+int aegis128_round(u8 *dst, const u8 *src)
+{
+ switch (RVV_VLEN) {
+ case 128:
+ aegis128_round_128(dst, src);
+ break;
+ case 256:
+ aegis128_round_256(dst, src);
+ break;
+ case 512:
+ aegis128_round_512(dst, src);
+ break;
+ default:
+ pr_err("ERROR: %d is not a supported vector length!", RVV_VLEN);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static __always_inline
+void aegis128_update_rvv(struct aegis_state *state, const void *key)
+{
+ u8 k[AEGIS_BLOCK_SIZE];
+
+ /* save key to k[16] */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetivli zero, 0x10, e8, m1, ta, ma\n"
+ "vle8.v v1, (%[key])\n"
+ "vse8.v v1, (%[k])\n"
+ ".option pop\n"
+ : :
+ [key]"r"(key),
+ [k]"r"(k)
+ :);
+
+ aegis128_round(k, state->blocks[4]);
+ aegis128_round(state->blocks[4], state->blocks[3]);
+ aegis128_round(state->blocks[3], state->blocks[2]);
+ aegis128_round(state->blocks[2], state->blocks[1]);
+ aegis128_round(state->blocks[1], state->blocks[0]);
+
+ /* state->blocks[0] ^= key */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetivli zero, 0x10, e8, m1, ta, ma\n"
+ "vle8.v v1, (%[k])\n"
+ "vle8.v v2, (%[block0])\n"
+ "vxor.vv v2, v2, v1\n"
+ "vse8.v v2, (%[block0])\n"
+ ".option pop\n"
+ : :
+ [k]"r"(k),
+ [block0]"r"(state->blocks[0])
+ :);
+}
+
+void crypto_aegis128_init_rvv(void *state, const void *key, const void *iv)
+{
+ struct aegis_state *st = state;
+ u8 kiv[AEGIS_BLOCK_SIZE];
+
+ static const u8 const0[] = {
+ 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d,
+ 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62,
+ };
+ static const u8 const1[] = {
+ 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1,
+ 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd,
+ };
+
+ /*
+ * kiv = key^iv
+ * struct aegis128_state st = {{
+ kiv,
+ vld1q_u8(const1),
+ vld1q_u8(const0),
+ key ^ vld1q_u8(const0),
+ key ^ vld1q_u8(const1),
+ }};
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetivli zero, 0x10, e8, m1, ta, ma\n"
+ "vle8.v v0, (%[const0])\n"
+ "vle8.v v1, (%[const1])\n"
+ "vse8.v v0, (%[block2])\n"
+ "vse8.v v1, (%[block1])\n"
+ "vle8.v v2, (%[iv])\n"
+ "vle8.v v3, (%[key])\n"
+ "vxor.vv v0, v0, v3\n"
+ "vxor.vv v1, v1, v3\n"
+ "vxor.vv v2, v2, v3\n"
+ "vse8.v v2, (%[block0])\n"
+ "vse8.v v2, (%[kiv])\n"
+ "vse8.v v0, (%[block3])\n"
+ "vse8.v v1, (%[block4])\n"
+ ".option pop\n"
+ : :
+ [const0]"r"(const0),
+ [const1]"r"(const1),
+ [block1]"r"(st->blocks[1]),
+ [block2]"r"(st->blocks[2]),
+ [iv]"r"(iv),
+ [key]"r"(key),
+ [block0]"r"(st->blocks[0]),
+ [kiv]"r"(kiv),
+ [block3]"r"(st->blocks[3]),
+ [block4]"r"(st->blocks[4])
+ :);
+
+ if (preload_round_data())
+ return;
+
+ for (int i = 0; i < 5; i++) {
+ aegis128_update_rvv(st, key);
+ aegis128_update_rvv(st, kiv);
+ }
+}
+
+void crypto_aegis128_update_rvv(void *state, const void *msg)
+{
+ struct aegis_state *st = state;
+
+ if (preload_round_data())
+ return;
+
+ aegis128_update_rvv(st, msg);
+}
+
+static const u8 permute[] __aligned(64) = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
+void crypto_aegis128_encrypt_chunk_rvv(void *state, void *dst, const void *src,
+ unsigned int size)
+{
+ struct aegis_state *st = state;
+ const int short_input = size < AEGIS_BLOCK_SIZE;
+ u8 s[AEGIS_BLOCK_SIZE];
+ u8 msg[AEGIS_BLOCK_SIZE];
+
+ if (preload_round_data())
+ return;
+
+ while (size >= AEGIS_BLOCK_SIZE) {
+ /* s = st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4]; */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetivli zero, 0x10, e8, m1, ta, ma\n"
+ "vle8.v v1, (%[block1])\n"
+ "vle8.v v2, (%[block2])\n"
+ "vle8.v v3, (%[block3])\n"
+ "vle8.v v4, (%[block4])\n"
+ "vxor.vv v1, v1, v4\n"
+ "vand.vv v2, v2, v3\n"
+ "vxor.vv v1, v1, v2\n"
+ "vse8.v v1, (%[s])\n"
+ ".option pop\n"
+ : :
+ [block1]"r"(st->blocks[1]),
+ [block2]"r"(st->blocks[2]),
+ [block3]"r"(st->blocks[3]),
+ [block4]"r"(st->blocks[4]),
+ [s]"r"(s)
+ :);
+
+ aegis128_update_rvv(st, src);
+ /* dst = s ^ src*/
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vle8.v v1, (%[s])\n"
+ "vle8.v v2, (%[src])\n"
+ "vxor.vv v1, v1, v2\n"
+ "vse8.v v1, (%[dst])\n"
+ "vse8.v v1, (%[msg])\n"
+ ".option pop\n"
+ : :
+ [s]"r"(s),
+ [src]"r"(src),
+ [dst]"r"(dst),
+ [msg]"r"(msg)
+ :);
+
+ size -= AEGIS_BLOCK_SIZE;
+ src += AEGIS_BLOCK_SIZE;
+ dst += AEGIS_BLOCK_SIZE;
+ }
+
+ if (size > 0) {
+ u8 buf[AEGIS_BLOCK_SIZE];
+ const void *in = src;
+ void *out = dst;
+ u8 m[AEGIS_BLOCK_SIZE];
+
+ /* s = st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4]; */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetivli zero, 0x10, e8, m1, ta, ma\n"
+ "vle8.v v1, (%[block1])\n"
+ "vle8.v v2, (%[block2])\n"
+ "vle8.v v3, (%[block3])\n"
+ "vle8.v v4, (%[block4])\n"
+ "vxor.vv v1, v1, v4\n" /* st.v[1] ^ st.v[4] */
+ "vand.vv v2, v2, v3\n" /* st.v[2] & st.v[3] */
+ "vxor.vv v1, v1, v2\n"
+ "vse8.v v1, (%[s])\n"
+ ".option pop\n"
+ : :
+ [block1]"r"(st->blocks[1]),
+ [block2]"r"(st->blocks[2]),
+ [block3]"r"(st->blocks[3]),
+ [block4]"r"(st->blocks[4]),
+ [s]"r"(s)
+ :);
+
+ if (__builtin_expect(short_input, 0))
+ in = out = memcpy(buf + AEGIS_BLOCK_SIZE - size, src, size);
+
+ /*
+ * m = vqtbl1q_u8(vld1q_u8(in + size - AEGIS_BLOCK_SIZE),
+ * vld1q_u8(permute + 32 - size));
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vle8.v v1, (%[in])\n"
+ "vle8.v v2, (%[p])\n"
+ "vrgather.vv v3, v1, v2\n"
+ "vse8.v v3, (%[m])\n"
+ ".option pop\n"
+ : :
+ [in]"r"(in + size - AEGIS_BLOCK_SIZE),
+ [p]"r"(permute + 32 - size),
+ [m]"r"(m)
+ :);
+
+ aegis128_update_rvv(st, m);
+
+ /*
+ * vst1q_u8(out + size - AEGIS_BLOCK_SIZE,
+ * vqtbl1q_u8(m ^ s, vld1q_u8(permute + size)));
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetivli zero, 0x10, e8, m1, ta, ma\n"
+ "vle8.v v1, (%[m])\n"
+ "vle8.v v2, (%[s])\n"
+ "vxor.vv v1, v1, v2\n"
+ "vle8.v v2, (%[p])\n"
+ "vrgather.vv v3, v1, v2\n"
+ "vse8.v v3, (%[out])\n"
+ ".option pop\n"
+ : :
+ [m]"r"(m),
+ [s]"r"(s),
+ [p]"r"(permute + size),
+ [out]"r"(out + size - AEGIS_BLOCK_SIZE)
+ :);
+
+ if (__builtin_expect(short_input, 0)) {
+ memcpy(dst, out, size);
+ } else {
+ /* vst1q_u8(out - AEGIS_BLOCK_SIZE, m); */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetivli zero, 0x10, e8, m1, ta, ma\n"
+ "vle8.v v1, (%[msg])\n"
+ "vse8.v v1, (%[out])\n"
+ ".option pop\n"
+ : :
+ [msg]"r"(msg),
+ [out]"r"(out - AEGIS_BLOCK_SIZE)
+ :);
+ }
+ }
+}
+
+void crypto_aegis128_decrypt_chunk_rvv(void *state, void *dst, const void *src,
+ unsigned int size)
+{
+ struct aegis_state *st = state;
+ const int short_input = size < AEGIS_BLOCK_SIZE;
+ u8 s[AEGIS_BLOCK_SIZE];
+ u8 msg[AEGIS_BLOCK_SIZE];
+
+ if (preload_round_data())
+ return;
+
+ while (size >= AEGIS_BLOCK_SIZE) {
+ /* s = vld1q_u8(src) ^ st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4]; */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetivli zero, 0x10, e8, m1, ta, ma\n"
+ "vle8.v v1, (%[block1])\n"
+ "vle8.v v2, (%[block2])\n"
+ "vle8.v v3, (%[block3])\n"
+ "vle8.v v4, (%[block4])\n"
+ "vle8.v v5, (%[src])\n"
+ "vxor.vv v1, v1, v4\n"
+ "vand.vv v2, v2, v3\n"
+ "vxor.vv v1, v1, v2\n"
+ "vxor.vv v1, v1, v5\n"
+ "vse8.v v1, (%[msg])\n"
+ ".option pop\n"
+ : :
+ [block1]"r"(st->blocks[1]),
+ [block2]"r"(st->blocks[2]),
+ [block3]"r"(st->blocks[3]),
+ [block4]"r"(st->blocks[4]),
+ [src]"r"(src),
+ [msg]"r"(msg)
+ :);
+
+ aegis128_update_rvv(st, msg);
+
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vle8.v v1, (%[msg])\n"
+ "vse8.v v1, (%[dst])\n"
+ ".option pop\n"
+ : :
+ [msg]"r"(msg),
+ [dst]"r"(dst)
+ :);
+
+ size -= AEGIS_BLOCK_SIZE;
+ src += AEGIS_BLOCK_SIZE;
+ dst += AEGIS_BLOCK_SIZE;
+ }
+
+ if (size > 0) {
+ u8 buf[AEGIS_BLOCK_SIZE];
+ const void *in = src;
+ void *out = dst;
+ u8 m[AEGIS_BLOCK_SIZE];
+
+ /* s = st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4]; */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetivli zero, 0x10, e8, m1, ta, ma\n"
+ "vle8.v v1, (%[block1])\n"
+ "vle8.v v2, (%[block2])\n"
+ "vle8.v v3, (%[block3])\n"
+ "vle8.v v4, (%[block4])\n"
+ "vxor.vv v1, v1, v4\n"
+ "vand.vv v2, v2, v3\n"
+ "vxor.vv v1, v1, v2\n"
+ "vse8.v v1, (%[s])\n"
+ ".option pop\n"
+ : :
+ [block1]"r"(st->blocks[1]),
+ [block2]"r"(st->blocks[2]),
+ [block3]"r"(st->blocks[3]),
+ [block4]"r"(st->blocks[4]),
+ [s]"r"(s)
+ :);
+
+ if (__builtin_expect(short_input, 0))
+ in = out = memcpy(buf + AEGIS_BLOCK_SIZE - size, src, size);
+
+ /*
+ * m = s ^ vqtbx1q_u8(s, vld1q_u8(in + size - AEGIS_BLOCK_SIZE),
+ * vld1q_u8(permute + 32 - size));
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vle8.v v1, (%[in])\n"
+ "vle8.v v2, (%[p])\n"
+ "vrgather.vv v3, v1, v2\n"
+ "vle8.v v4, (%[s])\n"
+ "vmsltu.vx v0, v2, %[x10]\n" /* set if less then 0x10 */
+ "vmerge.vvm v3, v4, v3, v0\n"
+ "vxor.vv v3, v4, v3\n"
+ "vse8.v v3, (%[m])\n"
+ ".option pop\n"
+ : :
+ [in]"r"(in + size - AEGIS_BLOCK_SIZE),
+ [p]"r"(permute + 32 - size),
+ [s]"r"(s),
+ [x10]"r"(0x10),
+ [m]"r"(m)
+ :);
+
+ aegis128_update_rvv(st, m);
+
+ /*
+ * vst1q_u8(out + size - AEGIS_BLOCK_SIZE,
+ * vqtbl1q_u8(m, vld1q_u8(permute + size)));
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetivli zero, 0x10, e8, m1, ta, ma\n"
+ "vle8.v v1, (%[m])\n"
+ "vle8.v v2, (%[p])\n"
+ "vrgather.vv v3, v1, v2\n"
+ "vse8.v v3, (%[out])\n"
+ ".option pop\n"
+ : :
+ [m]"r"(m),
+ [p]"r"(permute + size),
+ [out]"r"(out + size - AEGIS_BLOCK_SIZE)
+ :);
+
+ if (__builtin_expect(short_input, 0)) {
+ memcpy(dst, out, size);
+ } else {
+ /* vst1q_u8(out - AEGIS_BLOCK_SIZE, m); */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetivli zero, 0x10, e8, m1, ta, ma\n"
+ "vle8.v v1, (%[msg])\n"
+ "vse8.v v1, (%[out])\n"
+ ".option pop\n"
+ : :
+ [msg]"r"(msg),
+ [out]"r"(out - AEGIS_BLOCK_SIZE)
+ :);
+ }
+ }
+}
+
+int crypto_aegis128_final_rvv(void *state, void *tag_xor, unsigned int assoclen,
+ unsigned int cryptlen, unsigned int authsize)
+{
+ struct aegis_state *st = state;
+ u64 v[2];
+ int i;
+ int ret;
+
+ ret = preload_round_data();
+ if (ret)
+ return ret;
+
+ /*
+ *v = st.v[3] ^ (uint8x16_t)vcombine_u64(vmov_n_u64(8ULL * assoclen),
+ * vmov_n_u64(8ULL * cryptlen));
+ */
+ v[0] = 8ULL * assoclen;
+ v[1] = 8ULL * cryptlen;
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetivli zero, 0x10, e8, m1, ta, ma\n"
+ "vle8.v v0, (%[v])\n"
+ "vle8.v v1, (%[block3])\n"
+ "vxor.vv v0, v0, v1\n"
+ "vse8.v v0, (%[v])\n"
+ ".option pop\n"
+ : :
+ [v]"r"(v),
+ [block3]"r"(st->blocks[3])
+ :);
+
+ for (i = 0; i < 7; i++)
+ aegis128_update_rvv(st, v);
+
+ /* v = st.v[0] ^ st.v[1] ^ st.v[2] ^ st.v[3] ^ st.v[4]; */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetivli zero, 0x10, e8, m1, ta, ma\n"
+ "vle8.v v0, (%[block0])\n"
+ "vle8.v v1, (%[block1])\n"
+ "vle8.v v2, (%[block2])\n"
+ "vle8.v v3, (%[block3])\n"
+ "vle8.v v4, (%[block4])\n"
+ "vxor.vv v0, v0, v1\n"
+ "vxor.vv v2, v2, v3\n"
+ "vxor.vv v0, v0, v2\n"
+ "vxor.vv v0, v0, v4\n"
+ "vse8.v v0, (%[v])\n"
+ ".option pop\n"
+ : :
+ [block0]"r"(st->blocks[0]),
+ [block1]"r"(st->blocks[1]),
+ [block2]"r"(st->blocks[2]),
+ [block3]"r"(st->blocks[3]),
+ [block4]"r"(st->blocks[4]),
+ [v]"r"(v)
+ :);
+
+ if (authsize > 0) {
+ /*
+ * v = vqtbl1q_u8(~vceqq_u8(v, vld1q_u8(tag_xor)),
+ * vld1q_u8(permute + authsize));
+ */
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetivli zero, 0x10, e8, m1, ta, ma\n"
+ "vle8.v v0, (%[v])\n"
+ "vle8.v v1, (%[tag_xor])\n"
+ "vmseq.vv v0, v0, v1\n" /* vceqq_u8(v, vld1q_u8(tag_xor) */
+ "vmv.v.i v1, 0\n" /* set v1 = 0 */
+ "vmerge.vxm v1, v1, %[xff], v0\n"
+ "vxor.vi v1, v1, -1\n" /* vnot.v v0, v0 */
+ "vle8.v v0, (%[pa])\n"
+ "vrgather.vv v2, v1, v0\n"
+ "vredmin.vs v2, v2, v2\n" /* vminvq_s8((int8x16_t)v) */
+ "vse8.v v2, (%[v])\n"
+ ".option pop\n"
+ : :
+ [v]"r"(v),
+ [tag_xor]"r"(tag_xor),
+ [xff]"r"(0xff),
+ [pa]"r"(permute + authsize)
+ :);
+
+ return *((s8 *)v);
+ }
+
+ asm volatile (".option push\n"
+ ".option arch,+v\n"
+ "vsetivli zero, 0x10, e8, m1, ta, ma\n"
+ "vle8.v v0, (%[v])\n"
+ "vse8.v v0, (%[tag_xor])\n"
+ ".option pop\n"
+ : :
+ [v]"r"(v),
+ [tag_xor]"r"(tag_xor)
+ :);
+
+ return 0;
+}
diff --git a/crypto/aegis128-rvv.c b/crypto/aegis128-rvv.c
new file mode 100644
index 000000000000..5a6647722d82
--- /dev/null
+++ b/crypto/aegis128-rvv.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2026 Institute of Software, CAS
+ * Author: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
+ */
+
+#include <asm/vector.h>
+
+#include "aegis.h"
+#include "aegis-rvv.h"
+
+bool crypto_aegis128_have_simd(void)
+{
+ return IS_ENABLED(CONFIG_RISCV_ISA_V);
+}
+
+void crypto_aegis128_init_simd(struct aegis_state *state,
+ const union aegis_block *key,
+ const u8 *iv)
+{
+ kernel_vector_begin();
+ crypto_aegis128_init_rvv(state, key, iv);
+ kernel_vector_end();
+}
+
+void crypto_aegis128_update_simd(struct aegis_state *state, const void *msg)
+{
+ kernel_vector_begin();
+ crypto_aegis128_update_rvv(state, msg);
+ kernel_vector_end();
+}
+
+void crypto_aegis128_encrypt_chunk_simd(struct aegis_state *state, u8 *dst,
+ const u8 *src, unsigned int size)
+{
+ kernel_vector_begin();
+ crypto_aegis128_encrypt_chunk_rvv(state, dst, src, size);
+ kernel_vector_end();
+}
+
+void crypto_aegis128_decrypt_chunk_simd(struct aegis_state *state, u8 *dst,
+ const u8 *src, unsigned int size)
+{
+ kernel_vector_begin();
+ crypto_aegis128_decrypt_chunk_rvv(state, dst, src, size);
+ kernel_vector_end();
+}
+
+int crypto_aegis128_final_simd(struct aegis_state *state,
+ union aegis_block *tag_xor,
+ unsigned int assoclen,
+ unsigned int cryptlen,
+ unsigned int authsize)
+{
+ int ret;
+
+ kernel_vector_begin();
+ ret = crypto_aegis128_final_rvv(state, tag_xor, assoclen, cryptlen,
+ authsize);
+ kernel_vector_end();
+
+ return ret;
+}
--
2.34.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH V2] crypto: aegis128: Add RISC-V vector SIMD implementation
2026-01-26 9:24 [PATCH V2] crypto: aegis128: Add RISC-V vector SIMD implementation Chunyan Zhang
@ 2026-02-06 10:03 ` Herbert Xu
2026-02-13 0:13 ` Eric Biggers
0 siblings, 1 reply; 3+ messages in thread
From: Herbert Xu @ 2026-02-06 10:03 UTC (permalink / raw)
To: Chunyan Zhang
Cc: Paul Walmsley, Palmer Dabbelt, Albert Ou, Alexandre Ghiti,
David S . Miller, linux-riscv, linux-crypto, linux-kernel,
Chunyan Zhang, Eric Biggers
On Mon, Jan 26, 2026 at 05:24:11PM +0800, Chunyan Zhang wrote:
> Add a RISC-V vector-accelerated implementation of aegis128 by
> wiring it into the generic SIMD hooks.
>
> This implementation supports vlen values of 512, 256, and 128.
>
> Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
> ---
> V2:
> - Add config dependency of RISCV_ISA_V to fix the issue reported by kernel test robot;
> - Add return value in preload_round_data() and aegis128_round().
>
> V1: https://lore.kernel.org/all/20260121101923.64657-1-zhangchunyan@iscas.ac.cn/
> ---
> crypto/Kconfig | 4 +-
> crypto/Makefile | 4 +
> crypto/aegis-rvv.h | 19 +
> crypto/aegis128-rvv-inner.c | 762 ++++++++++++++++++++++++++++++++++++
> crypto/aegis128-rvv.c | 63 +++
> 5 files changed, 850 insertions(+), 2 deletions(-)
> create mode 100644 crypto/aegis-rvv.h
> create mode 100644 crypto/aegis128-rvv-inner.c
> create mode 100644 crypto/aegis128-rvv.c
In light of the recent move of aes from crypto to lib/crypto,
perhaps the same should be done for aegis?
Thanks,
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH V2] crypto: aegis128: Add RISC-V vector SIMD implementation
2026-02-06 10:03 ` Herbert Xu
@ 2026-02-13 0:13 ` Eric Biggers
0 siblings, 0 replies; 3+ messages in thread
From: Eric Biggers @ 2026-02-13 0:13 UTC (permalink / raw)
To: Herbert Xu
Cc: Chunyan Zhang, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Alexandre Ghiti, David S . Miller, linux-riscv, linux-crypto,
linux-kernel, Chunyan Zhang
On Fri, Feb 06, 2026 at 06:03:08PM +0800, Herbert Xu wrote:
> On Mon, Jan 26, 2026 at 05:24:11PM +0800, Chunyan Zhang wrote:
> > Add a RISC-V vector-accelerated implementation of aegis128 by
> > wiring it into the generic SIMD hooks.
> >
> > This implementation supports vlen values of 512, 256, and 128.
> >
> > Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
> > ---
> > V2:
> > - Add config dependency of RISCV_ISA_V to fix the issue reported by kernel test robot;
> > - Add return value in preload_round_data() and aegis128_round().
> >
> > V1: https://lore.kernel.org/all/20260121101923.64657-1-zhangchunyan@iscas.ac.cn/
> > ---
> > crypto/Kconfig | 4 +-
> > crypto/Makefile | 4 +
> > crypto/aegis-rvv.h | 19 +
> > crypto/aegis128-rvv-inner.c | 762 ++++++++++++++++++++++++++++++++++++
> > crypto/aegis128-rvv.c | 63 +++
> > 5 files changed, 850 insertions(+), 2 deletions(-)
> > create mode 100644 crypto/aegis-rvv.h
> > create mode 100644 crypto/aegis128-rvv-inner.c
> > create mode 100644 crypto/aegis128-rvv.c
>
> In light of the recent move of aes from crypto to lib/crypto,
> perhaps the same should be done for aegis?
Yes, I'll be focusing on AES modes next, but it will make sense to move
AEGIS too.
Regardless of that though, this patch needs a proper review. I'll try
to find time, but maybe others in the RISC-V community can help too.
- Eric
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2026-02-13 0:13 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-26 9:24 [PATCH V2] crypto: aegis128: Add RISC-V vector SIMD implementation Chunyan Zhang
2026-02-06 10:03 ` Herbert Xu
2026-02-13 0:13 ` Eric Biggers
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox