Re: [Qemu-devel] [PATCH v2 08/16] target/arm: Expand vector registers for SVE

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Alex Bennée" <alex.bennee@linaro.org>
To: Richard Henderson <richard.henderson@linaro.org>
Cc: qemu-devel@nongnu.org, peter.maydell@linaro.org
Subject: Re: [Qemu-devel] [PATCH v2 08/16] target/arm: Expand vector registers for SVE
Date: Mon, 22 Jan 2018 11:08:15 +0000	[thread overview]
Message-ID: <87wp0ajgm8.fsf@linaro.org> (raw)
In-Reply-To: <20180119045438.28582-9-richard.henderson@linaro.org>


Richard Henderson <richard.henderson@linaro.org> writes:

> Change vfp.regs as a uint64_t to vfp.zregs as an ARMVectorReg.
> The previous patches have made the change in representation
> relatively painless.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/cpu.h           | 57 ++++++++++++++++++++++++++++++----------------
>  target/arm/machine.c       | 35 +++++++++++++++++++++++++++-
>  target/arm/translate-a64.c |  8 +++----
>  target/arm/translate.c     |  7 +++---
>  4 files changed, 79 insertions(+), 28 deletions(-)
>
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index 7d396606f3..57d805b5d8 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -153,6 +153,40 @@ typedef struct {
>      uint32_t base_mask;
>  } TCR;
>
> +/* Define a maximum sized vector register.
> + * For 32-bit, this is a 128-bit NEON/AdvSIMD register.
> + * For 64-bit, this is a 2048-bit SVE register.
> + *
> + * Note that the mapping between S, D, and Q views of the register bank
> + * differs between AArch64 and AArch32.
> + * In AArch32:
> + *  Qn = regs[n].d[1]:regs[n].d[0]
> + *  Dn = regs[n / 2].d[n & 1]
> + *  Sn = regs[n / 4].d[n % 4 / 2],
> + *       bits 31..0 for even n, and bits 63..32 for odd n
> + *       (and regs[16] to regs[31] are inaccessible)
> + * In AArch64:
> + *  Zn = regs[n].d[*]
> + *  Qn = regs[n].d[1]:regs[n].d[0]
> + *  Dn = regs[n].d[0]
> + *  Sn = regs[n].d[0] bits 31..0
> + *
> + * This corresponds to the architecturally defined mapping between
> + * the two execution states, and means we do not need to explicitly
> + * map these registers when changing states.

It might be worth prompting to use the helper functions to get the right
offsets here.

> + */
> +
> +#ifdef TARGET_AARCH64
> +# define ARM_MAX_VQ    16
> +#else
> +# define ARM_MAX_VQ    1
> +#endif
> +
> +typedef struct ARMVectorReg {
> +    uint64_t d[2 * ARM_MAX_VQ] QEMU_ALIGNED(16);
> +} ARMVectorReg;
> +

The QEMU_ALIGNED also deserves a comment either here or in the comment
block above.

> +
>  typedef struct CPUARMState {
>      /* Regs for current mode.  */
>      uint32_t regs[16];
> @@ -477,22 +511,7 @@ typedef struct CPUARMState {
>
>      /* VFP coprocessor state.  */
>      struct {
> -        /* VFP/Neon register state. Note that the mapping between S, D and Q
> -         * views of the register bank differs between AArch64 and AArch32:
> -         * In AArch32:
> -         *  Qn = regs[2n+1]:regs[2n]
> -         *  Dn = regs[n]
> -         *  Sn = regs[n/2] bits 31..0 for even n, and bits 63..32 for odd n
> -         * (and regs[32] to regs[63] are inaccessible)
> -         * In AArch64:
> -         *  Qn = regs[2n+1]:regs[2n]
> -         *  Dn = regs[2n]
> -         *  Sn = regs[2n] bits 31..0
> -         * This corresponds to the architecturally defined mapping between
> -         * the two execution states, and means we do not need to explicitly
> -         * map these registers when changing states.
> -         */
> -        uint64_t regs[64];
> +        ARMVectorReg zregs[32];
>
>          uint32_t xregs[16];
>          /* We store these fpcsr fields separately for convenience.  */
> @@ -2891,7 +2910,7 @@ static inline void *arm_get_el_change_hook_opaque(ARMCPU *cpu)
>   */
>  static inline uint64_t *aa32_vfp_dreg(CPUARMState *env, unsigned regno)
>  {
> -    return &env->vfp.regs[regno];
> +    return &env->vfp.zregs[regno >> 1].d[regno & 1];
>  }
>
>  /**
> @@ -2900,7 +2919,7 @@ static inline uint64_t *aa32_vfp_dreg(CPUARMState *env, unsigned regno)
>   */
>  static inline uint64_t *aa32_vfp_qreg(CPUARMState *env, unsigned regno)
>  {
> -    return &env->vfp.regs[2 * regno];
> +    return &env->vfp.zregs[regno].d[0];
>  }
>
>  /**
> @@ -2909,7 +2928,7 @@ static inline uint64_t *aa32_vfp_qreg(CPUARMState *env, unsigned regno)
>   */
>  static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno)
>  {
> -    return &env->vfp.regs[2 * regno];
> +    return &env->vfp.zregs[regno].d[0];
>  }
>
>  #endif
> diff --git a/target/arm/machine.c b/target/arm/machine.c
> index a85c2430d3..cb0e1c92bb 100644
> --- a/target/arm/machine.c
> +++ b/target/arm/machine.c
> @@ -50,7 +50,40 @@ static const VMStateDescription vmstate_vfp = {
>      .minimum_version_id = 3,
>      .needed = vfp_needed,
>      .fields = (VMStateField[]) {
> -        VMSTATE_UINT64_ARRAY(env.vfp.regs, ARMCPU, 64),
> +        /* For compatibility, store Qn out of Zn here.  */
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[0].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[1].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[2].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[3].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[4].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[5].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[6].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[7].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[8].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[9].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[10].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[11].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[12].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[13].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[14].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[15].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[16].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[17].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[18].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[19].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[20].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[21].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[22].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[23].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[24].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[25].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[26].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[27].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[28].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[29].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[30].d, ARMCPU, 0, 2),
> +        VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[31].d, ARMCPU, 0, 2),
> +
>          /* The xregs array is a little awkward because element 1 (FPSCR)
>           * requires a specific accessor, so we have to split it up in
>           * the vmstate:
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index eed64c73e5..10eef870fe 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -517,8 +517,8 @@ static inline int vec_reg_offset(DisasContext *s, int regno,
>  {
>      int offs = 0;
>  #ifdef HOST_WORDS_BIGENDIAN
> -    /* This is complicated slightly because vfp.regs[2n] is
> -     * still the low half and  vfp.regs[2n+1] the high half
> +    /* This is complicated slightly because vfp.zregs[n].d[0] is
> +     * still the low half and vfp.zregs[n].d[1] the high half
>       * of the 128 bit vector, even on big endian systems.
>       * Calculate the offset assuming a fully bigendian 128 bits,
>       * then XOR to account for the order of the two 64 bit halves.
> @@ -528,7 +528,7 @@ static inline int vec_reg_offset(DisasContext *s, int regno,
>  #else
>      offs += element * (1 << size);
>  #endif
> -    offs += offsetof(CPUARMState, vfp.regs[regno * 2]);
> +    offs += offsetof(CPUARMState, vfp.zregs[regno]);
>      assert_fp_access_checked(s);
>      return offs;
>  }
> @@ -537,7 +537,7 @@ static inline int vec_reg_offset(DisasContext *s, int regno,
>  static inline int vec_full_reg_offset(DisasContext *s, int regno)
>  {
>      assert_fp_access_checked(s);
> -    return offsetof(CPUARMState, vfp.regs[regno * 2]);
> +    return offsetof(CPUARMState, vfp.zregs[regno]);
>  }
>
>  /* Return a newly allocated pointer to the vector register.  */
> diff --git a/target/arm/translate.c b/target/arm/translate.c
> index 55826b7e5a..a8c13d3758 100644
> --- a/target/arm/translate.c
> +++ b/target/arm/translate.c
> @@ -1512,13 +1512,12 @@ static inline void gen_vfp_st(DisasContext *s, int dp, TCGv_i32 addr)
>      }
>  }
>
> -static inline long
> -vfp_reg_offset (int dp, int reg)
> +static inline long vfp_reg_offset(bool dp, unsigned reg)
>  {
>      if (dp) {
> -        return offsetof(CPUARMState, vfp.regs[reg]);
> +        return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
>      } else {
> -        long ofs = offsetof(CPUARMState, vfp.regs[reg >> 1]);
> +        long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
>          if (reg & 1) {
>              ofs += offsetof(CPU_DoubleU, l.upper);
>          } else {

Other than the minor comment notes it looks good:

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

--
Alex Bennée

next prev parent reply	other threads:[~2018-01-22 11:08 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-19  4:54 [Qemu-devel] [PATCH v2 00/16] target/arm: Prepatory work for SVE Richard Henderson
2018-01-19  4:54 ` [Qemu-devel] [PATCH v2 01/16] target/arm: Mark disas_set_insn_syndrome inline Richard Henderson
2018-01-19 13:54   ` Alex Bennée
2018-01-19  4:54 ` [Qemu-devel] [PATCH v2 02/16] target/arm: Use pointers in crypto helpers Richard Henderson
2018-01-22 10:09   ` Alex Bennée
2018-01-19  4:54 ` [Qemu-devel] [PATCH v2 03/16] target/arm: Use pointers in neon zip/uzp helpers Richard Henderson
2018-01-22 10:44   ` Alex Bennée
2018-01-22 10:44   ` Alex Bennée
2018-01-19  4:54 ` [Qemu-devel] [PATCH v2 04/16] target/arm: Use pointers in neon tbl helper Richard Henderson
2018-01-22 10:52   ` Alex Bennée
2018-01-19  4:54 ` [Qemu-devel] [PATCH v2 05/16] target/arm: Change the type of vfp.regs Richard Henderson
2018-01-22 10:56   ` Alex Bennée
2018-01-22 16:03     ` Richard Henderson
2018-01-19  4:54 ` [Qemu-devel] [PATCH v2 06/16] target/arm: Add aa{32, 64}_vfp_{dreg, qreg} helpers Richard Henderson
2018-01-22 11:02   ` Alex Bennée
2018-01-19  4:54 ` [Qemu-devel] [PATCH v2 07/16] vmstate: Add VMSTATE_UINT64_SUB_ARRAY Richard Henderson
2018-01-22 11:02   ` Alex Bennée
2018-01-19  4:54 ` [Qemu-devel] [PATCH v2 08/16] target/arm: Expand vector registers for SVE Richard Henderson
2018-01-22 11:08   ` Alex Bennée [this message]
2018-01-19  4:54 ` [Qemu-devel] [PATCH v2 09/16] target/arm: Add predicate " Richard Henderson
2018-01-22 12:04   ` Alex Bennée
2018-01-22 16:07     ` Richard Henderson
2018-01-22 18:35       ` Alex Bennée
2018-01-19  4:54 ` [Qemu-devel] [PATCH v2 10/16] target/arm: Add ARM_FEATURE_SVE Richard Henderson
2018-01-22 12:05   ` Alex Bennée
2018-01-19  4:54 ` [Qemu-devel] [PATCH v2 11/16] target/arm: Add SVE to migration state Richard Henderson
2018-01-22 13:40   ` Alex Bennée
2018-01-22 16:11     ` Richard Henderson
2018-01-22 14:16   ` Peter Maydell
2018-01-22 16:10     ` Richard Henderson
2018-01-19  4:54 ` [Qemu-devel] [PATCH v2 12/16] target/arm: Add ZCR_ELx Richard Henderson
2018-01-22 14:38   ` Peter Maydell
2018-01-22 15:00   ` Alex Bennée
2018-01-19  4:54 ` [Qemu-devel] [PATCH v2 13/16] target/arm: Move cpu_get_tb_cpu_state out of line Richard Henderson
2018-01-22 15:07   ` Alex Bennée
2018-01-22 16:18     ` Richard Henderson
2018-01-19  4:54 ` [Qemu-devel] [PATCH v2 14/16] target/arm: Hoist store to flags output in cpu_get_tb_cpu_state Richard Henderson
2018-01-22 15:09   ` Alex Bennée
2018-01-19  4:54 ` [Qemu-devel] [PATCH v2 15/16] target/arm: Simplify fp_exception_el for user-only Richard Henderson
2018-01-22 15:10   ` Alex Bennée
2018-01-19  4:54 ` [Qemu-devel] [PATCH v2 16/16] target/arm: Add SVE state to TB->FLAGS Richard Henderson
2018-01-22 14:40   ` Peter Maydell
2018-01-19  5:29 ` [Qemu-devel] [PATCH v2 00/16] target/arm: Prepatory work for SVE no-reply
2018-01-22 14:12 ` Peter Maydell
2018-01-22 15:12   ` Alex Bennée
2018-01-22 15:12 ` Alex Bennée

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87wp0ajgm8.fsf@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.