Re: [PATCH v4 10/17] target/riscv: Add Zvkned ISA extension support

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Richard Henderson <richard.henderson@linaro.org>
To: Max Chou <max.chou@sifive.com>,
	qemu-devel@nongnu.org, qemu-riscv@nongnu.org
Cc: dbarboza@ventanamicro.com,
	Nazar Kazakov <nazar.kazakov@codethink.co.uk>,
	 Lawrence Hunter <lawrence.hunter@codethink.co.uk>,
	William Salmon <will.salmon@codethink.co.uk>,
	Palmer Dabbelt <palmer@dabbelt.com>,
	Alistair Francis <alistair.francis@wdc.com>,
	Bin Meng <bin.meng@windriver.com>,
	Weiwei Li <liweiwei@iscas.ac.cn>,
	Liu Zhiwei <zhiwei_liu@linux.alibaba.com>,
	Kiran Ostrolenk <kiran.ostrolenk@codethink.co.uk>
Subject: Re: [PATCH v4 10/17] target/riscv: Add Zvkned ISA extension support
Date: Fri, 23 Jun 2023 09:33:04 +0200	[thread overview]
Message-ID: <323d7da1-3e66-bf22-42c1-1afa4df3aeb4@linaro.org> (raw)
In-Reply-To: <20230622161646.32005-11-max.chou@sifive.com>

On 6/22/23 18:16, Max Chou wrote:
> --- a/target/riscv/vcrypto_helper.c
> +++ b/target/riscv/vcrypto_helper.c
> @@ -22,6 +22,7 @@
>   #include "qemu/bitops.h"
>   #include "qemu/bswap.h"
>   #include "cpu.h"
> +#include "crypto/aes.h"
>   #include "exec/memop.h"
>   #include "exec/exec-all.h"
>   #include "exec/helper-proto.h"
> @@ -195,3 +196,310 @@ RVVCALL(OPIVX2, vwsll_vx_w, WOP_UUU_W, H8, H4, DO_SLL)
>   GEN_VEXT_VX(vwsll_vx_b, 2)
>   GEN_VEXT_VX(vwsll_vx_h, 4)
>   GEN_VEXT_VX(vwsll_vx_w, 8)
> +
> +static inline void aes_sub_bytes(uint8_t round_state[4][4])
> +{
> +    for (int j = 0; j < 16; j++) {
> +        round_state[j / 4][j % 4] = AES_sbox[round_state[j / 4][j % 4]];
> +    }
> +}
> +
> +static inline void aes_shift_bytes(uint8_t round_state[4][4])
> +{
> +    uint8_t temp;
> +    temp = round_state[0][1];
> +    round_state[0][1] = round_state[1][1];
> +    round_state[1][1] = round_state[2][1];
> +    round_state[2][1] = round_state[3][1];
> +    round_state[3][1] = temp;
> +    temp = round_state[0][2];
> +    round_state[0][2] = round_state[2][2];
> +    round_state[2][2] = temp;
> +    temp = round_state[1][2];
> +    round_state[1][2] = round_state[3][2];
> +    round_state[3][2] = temp;
> +    temp = round_state[0][3];
> +    round_state[0][3] = round_state[3][3];
> +    round_state[3][3] = round_state[2][3];
> +    round_state[2][3] = round_state[1][3];
> +    round_state[1][3] = temp;
> +}
> +
> +static inline void xor_round_key(uint8_t round_state[4][4], uint8_t *round_key)
> +{
> +    for (int j = 0; j < 16; j++) {
> +        round_state[j / 4][j % 4] = round_state[j / 4][j % 4] ^ (round_key)[j];
> +    }
> +}
> +
> +static inline void aes_inv_sub_bytes(uint8_t round_state[4][4])
> +{
> +    for (int j = 0; j < 16; j++) {
> +        round_state[j / 4][j % 4] = AES_isbox[round_state[j / 4][j % 4]];
> +    }
> +}
> +
> +static inline void aes_inv_shift_bytes(uint8_t round_state[4][4])
> +{
> +    uint8_t temp;
> +    temp = round_state[3][1];
> +    round_state[3][1] = round_state[2][1];
> +    round_state[2][1] = round_state[1][1];
> +    round_state[1][1] = round_state[0][1];
> +    round_state[0][1] = temp;
> +    temp = round_state[0][2];
> +    round_state[0][2] = round_state[2][2];
> +    round_state[2][2] = temp;
> +    temp = round_state[1][2];
> +    round_state[1][2] = round_state[3][2];
> +    round_state[3][2] = temp;
> +    temp = round_state[0][3];
> +    round_state[0][3] = round_state[1][3];
> +    round_state[1][3] = round_state[2][3];
> +    round_state[2][3] = round_state[3][3];
> +    round_state[3][3] = temp;
> +}
> +
> +static inline uint8_t xtime(uint8_t x)
> +{
> +    return (x << 1) ^ (((x >> 7) & 1) * 0x1b);
> +}
> +
> +static inline uint8_t multiply(uint8_t x, uint8_t y)
> +{
> +    return (((y & 1) * x) ^ ((y >> 1 & 1) * xtime(x)) ^
> +            ((y >> 2 & 1) * xtime(xtime(x))) ^
> +            ((y >> 3 & 1) * xtime(xtime(xtime(x)))) ^
> +            ((y >> 4 & 1) * xtime(xtime(xtime(xtime(x))))));
> +}
> +
> +static inline void aes_inv_mix_cols(uint8_t round_state[4][4])
> +{
> +    uint8_t a, b, c, d;
> +    for (int j = 0; j < 4; ++j) {
> +        a = round_state[j][0];
> +        b = round_state[j][1];
> +        c = round_state[j][2];
> +        d = round_state[j][3];
> +        round_state[j][0] = multiply(a, 0x0e) ^ multiply(b, 0x0b) ^
> +                            multiply(c, 0x0d) ^ multiply(d, 0x09);
> +        round_state[j][1] = multiply(a, 0x09) ^ multiply(b, 0x0e) ^
> +                            multiply(c, 0x0b) ^ multiply(d, 0x0d);
> +        round_state[j][2] = multiply(a, 0x0d) ^ multiply(b, 0x09) ^
> +                            multiply(c, 0x0e) ^ multiply(d, 0x0b);
> +        round_state[j][3] = multiply(a, 0x0b) ^ multiply(b, 0x0d) ^
> +                            multiply(c, 0x09) ^ multiply(d, 0x0e);
> +    }
> +}
> +
> +static inline void aes_mix_cols(uint8_t round_state[4][4])
> +{
> +    uint8_t a, b;
> +    for (int j = 0; j < 4; ++j) {
> +        a = round_state[j][0];
> +        b = round_state[j][0] ^ round_state[j][1] ^ round_state[j][2] ^
> +            round_state[j][3];
> +        round_state[j][0] ^= xtime(round_state[j][0] ^ round_state[j][1]) ^ b;
> +        round_state[j][1] ^= xtime(round_state[j][1] ^ round_state[j][2]) ^ b;
> +        round_state[j][2] ^= xtime(round_state[j][2] ^ round_state[j][3]) ^ b;
> +        round_state[j][3] ^= xtime(round_state[j][3] ^ a) ^ b;
> +    }
> +}
> +
> +#define GEN_ZVKNED_HELPER_VV(NAME, ...)                                   \
> +    void HELPER(NAME)(void *vd_vptr, void *vs2_vptr, CPURISCVState *env,  \
> +                      uint32_t desc)                                      \
> +    {                                                                     \
> +        uint64_t *vd = vd_vptr;                                           \
> +        uint64_t *vs2 = vs2_vptr;                                         \
> +        uint32_t vl = env->vl;                                            \
> +        uint32_t total_elems = vext_get_total_elems(env, desc, 4);        \
> +        uint32_t vta = vext_vta(desc);                                    \
> +                                                                          \
> +        for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {        \
> +            uint64_t round_key[2] = {                                     \
> +                cpu_to_le64(vs2[i * 2 + 0]),                              \
> +                cpu_to_le64(vs2[i * 2 + 1]),                              \
> +            };                                                            \
> +            uint8_t round_state[4][4];                                    \
> +            cpu_to_le64s(vd + i * 2 + 0);                                 \
> +            cpu_to_le64s(vd + i * 2 + 1);                                 \
> +            for (int j = 0; j < 16; j++) {                                \
> +                round_state[j / 4][j % 4] = ((uint8_t *)(vd + i * 2))[j]; \
> +            }                                                             \
> +            __VA_ARGS__;                                                  \
> +            for (int j = 0; j < 16; j++) {                                \
> +                ((uint8_t *)(vd + i * 2))[j] = round_state[j / 4][j % 4]; \
> +            }                                                             \
> +            le64_to_cpus(vd + i * 2 + 0);                                 \
> +            le64_to_cpus(vd + i * 2 + 1);                                 \
> +        }                                                                 \
> +        env->vstart = 0;                                                  \
> +        /* set tail elements to 1s */                                     \
> +        vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4);              \
> +    }
> +
> +#define GEN_ZVKNED_HELPER_VS(NAME, ...)                                   \
> +    void HELPER(NAME)(void *vd_vptr, void *vs2_vptr, CPURISCVState *env,  \
> +                      uint32_t desc)                                      \
> +    {                                                                     \
> +        uint64_t *vd = vd_vptr;                                           \
> +        uint64_t *vs2 = vs2_vptr;                                         \
> +        uint32_t vl = env->vl;                                            \
> +        uint32_t total_elems = vext_get_total_elems(env, desc, 4);        \
> +        uint32_t vta = vext_vta(desc);                                    \
> +                                                                          \
> +        for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {        \
> +            uint64_t round_key[2] = {                                     \
> +                cpu_to_le64(vs2[0]),                                      \
> +                cpu_to_le64(vs2[1]),                                      \
> +            };                                                            \
> +            uint8_t round_state[4][4];                                    \
> +            cpu_to_le64s(vd + i * 2 + 0);                                 \
> +            cpu_to_le64s(vd + i * 2 + 1);                                 \
> +            for (int j = 0; j < 16; j++) {                                \
> +                round_state[j / 4][j % 4] = ((uint8_t *)(vd + i * 2))[j]; \
> +            }                                                             \
> +            __VA_ARGS__;                                                  \
> +            for (int j = 0; j < 16; j++) {                                \
> +                ((uint8_t *)(vd + i * 2))[j] = round_state[j / 4][j % 4]; \
> +            }                                                             \
> +            le64_to_cpus(vd + i * 2 + 0);                                 \
> +            le64_to_cpus(vd + i * 2 + 1);                                 \
> +        }                                                                 \
> +        env->vstart = 0;                                                  \
> +        /* set tail elements to 1s */                                     \
> +        vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4);              \
> +    }

See

https://lore.kernel.org/qemu-devel/20230620110758.787479-1-richard.henderson@linaro.org/

which should greatly simplify all of this.


r~

next prev parent reply	other threads:[~2023-06-23  7:33 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-22 16:16 [PATCH v4 00/17] Add RISC-V vector cryptographic instruction set support Max Chou
2023-06-22 16:16 ` [PATCH v4 01/17] target/riscv: Refactor some of the generic vector functionality Max Chou
2023-06-22 16:16 ` [PATCH v4 02/17] target/riscv: Refactor vector-vector translation macro Max Chou
2023-06-22 16:16 ` [PATCH v4 03/17] target/riscv: Remove redundant "cpu_vl == 0" checks Max Chou
2023-06-22 16:16 ` [PATCH v4 04/17] target/riscv: Add Zvbc ISA extension support Max Chou
2023-06-22 16:16 ` [PATCH v4 05/17] target/riscv: Move vector translation checks Max Chou
2023-06-22 16:16 ` [PATCH v4 06/17] target/riscv: Refactor translation of vector-widening instruction Max Chou
2023-06-22 16:16 ` [PATCH v4 07/17] target/riscv: Refactor some of the generic vector functionality Max Chou
2023-06-22 16:16 ` [PATCH v4 08/17] tcg: Fix temporary variable in tcg_gen_gvec_andcs Max Chou
2023-06-22 17:30   ` Daniel Henrique Barboza
2023-06-23  6:51     ` Richard Henderson
2023-06-22 16:16 ` [PATCH v4 09/17] target/riscv: Add Zvbb ISA extension support Max Chou
2023-06-22 17:48   ` Daniel Henrique Barboza
2023-06-22 16:16 ` [PATCH v4 10/17] target/riscv: Add Zvkned " Max Chou
2023-06-22 18:03   ` Daniel Henrique Barboza
2023-06-23  7:33   ` Richard Henderson [this message]
2023-06-26  8:02     ` Max Chou
2023-06-22 16:16 ` [PATCH v4 11/17] target/riscv: Add Zvknh " Max Chou
2023-06-22 18:06   ` Daniel Henrique Barboza
2023-06-22 16:16 ` [PATCH v4 12/17] target/riscv: Add Zvksh " Max Chou
2023-06-22 18:09   ` Daniel Henrique Barboza
2023-06-22 16:16 ` [PATCH v4 13/17] target/riscv: Add Zvkg " Max Chou
2023-06-22 18:10   ` Daniel Henrique Barboza
2023-06-22 16:16 ` [PATCH v4 14/17] crypto: Create sm4_subword Max Chou
2023-06-22 16:16 ` [PATCH v4 15/17] crypto: Add SM4 constant parameter CK Max Chou
2023-06-22 16:16 ` [PATCH v4 16/17] target/riscv: Add Zvksed ISA extension support Max Chou
2023-06-22 16:16 ` [PATCH v4 17/17] target/riscv: Expose Zvk* and Zvb[b, c] cpu properties Max Chou via
2023-06-22 17:41   ` [PATCH v4 17/17] target/riscv: Expose Zvk* and Zvb[b,c] " Daniel Henrique Barboza
2023-06-26  8:08     ` Max Chou

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=323d7da1-3e66-bf22-42c1-1afa4df3aeb4@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=alistair.francis@wdc.com \
    --cc=bin.meng@windriver.com \
    --cc=dbarboza@ventanamicro.com \
    --cc=kiran.ostrolenk@codethink.co.uk \
    --cc=lawrence.hunter@codethink.co.uk \
    --cc=liweiwei@iscas.ac.cn \
    --cc=max.chou@sifive.com \
    --cc=nazar.kazakov@codethink.co.uk \
    --cc=palmer@dabbelt.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-riscv@nongnu.org \
    --cc=will.salmon@codethink.co.uk \
    --cc=zhiwei_liu@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).