From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org
Subject: [Qemu-devel] [PATCH v2 08/16] target/arm: Expand vector registers for SVE
Date: Thu, 18 Jan 2018 20:54:30 -0800 [thread overview]
Message-ID: <20180119045438.28582-9-richard.henderson@linaro.org> (raw)
In-Reply-To: <20180119045438.28582-1-richard.henderson@linaro.org>
Change vfp.regs as a uint64_t to vfp.zregs as an ARMVectorReg.
The previous patches have made the change in representation
relatively painless.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/cpu.h | 57 ++++++++++++++++++++++++++++++----------------
target/arm/machine.c | 35 +++++++++++++++++++++++++++-
target/arm/translate-a64.c | 8 +++----
target/arm/translate.c | 7 +++---
4 files changed, 79 insertions(+), 28 deletions(-)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 7d396606f3..57d805b5d8 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -153,6 +153,40 @@ typedef struct {
uint32_t base_mask;
} TCR;
+/* Define a maximum sized vector register.
+ * For 32-bit, this is a 128-bit NEON/AdvSIMD register.
+ * For 64-bit, this is a 2048-bit SVE register.
+ *
+ * Note that the mapping between S, D, and Q views of the register bank
+ * differs between AArch64 and AArch32.
+ * In AArch32:
+ * Qn = regs[n].d[1]:regs[n].d[0]
+ * Dn = regs[n / 2].d[n & 1]
+ * Sn = regs[n / 4].d[n % 4 / 2],
+ * bits 31..0 for even n, and bits 63..32 for odd n
+ * (and regs[16] to regs[31] are inaccessible)
+ * In AArch64:
+ * Zn = regs[n].d[*]
+ * Qn = regs[n].d[1]:regs[n].d[0]
+ * Dn = regs[n].d[0]
+ * Sn = regs[n].d[0] bits 31..0
+ *
+ * This corresponds to the architecturally defined mapping between
+ * the two execution states, and means we do not need to explicitly
+ * map these registers when changing states.
+ */
+
+#ifdef TARGET_AARCH64
+# define ARM_MAX_VQ 16
+#else
+# define ARM_MAX_VQ 1
+#endif
+
+typedef struct ARMVectorReg {
+ uint64_t d[2 * ARM_MAX_VQ] QEMU_ALIGNED(16);
+} ARMVectorReg;
+
+
typedef struct CPUARMState {
/* Regs for current mode. */
uint32_t regs[16];
@@ -477,22 +511,7 @@ typedef struct CPUARMState {
/* VFP coprocessor state. */
struct {
- /* VFP/Neon register state. Note that the mapping between S, D and Q
- * views of the register bank differs between AArch64 and AArch32:
- * In AArch32:
- * Qn = regs[2n+1]:regs[2n]
- * Dn = regs[n]
- * Sn = regs[n/2] bits 31..0 for even n, and bits 63..32 for odd n
- * (and regs[32] to regs[63] are inaccessible)
- * In AArch64:
- * Qn = regs[2n+1]:regs[2n]
- * Dn = regs[2n]
- * Sn = regs[2n] bits 31..0
- * This corresponds to the architecturally defined mapping between
- * the two execution states, and means we do not need to explicitly
- * map these registers when changing states.
- */
- uint64_t regs[64];
+ ARMVectorReg zregs[32];
uint32_t xregs[16];
/* We store these fpcsr fields separately for convenience. */
@@ -2891,7 +2910,7 @@ static inline void *arm_get_el_change_hook_opaque(ARMCPU *cpu)
*/
static inline uint64_t *aa32_vfp_dreg(CPUARMState *env, unsigned regno)
{
- return &env->vfp.regs[regno];
+ return &env->vfp.zregs[regno >> 1].d[regno & 1];
}
/**
@@ -2900,7 +2919,7 @@ static inline uint64_t *aa32_vfp_dreg(CPUARMState *env, unsigned regno)
*/
static inline uint64_t *aa32_vfp_qreg(CPUARMState *env, unsigned regno)
{
- return &env->vfp.regs[2 * regno];
+ return &env->vfp.zregs[regno].d[0];
}
/**
@@ -2909,7 +2928,7 @@ static inline uint64_t *aa32_vfp_qreg(CPUARMState *env, unsigned regno)
*/
static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno)
{
- return &env->vfp.regs[2 * regno];
+ return &env->vfp.zregs[regno].d[0];
}
#endif
diff --git a/target/arm/machine.c b/target/arm/machine.c
index a85c2430d3..cb0e1c92bb 100644
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@@ -50,7 +50,40 @@ static const VMStateDescription vmstate_vfp = {
.minimum_version_id = 3,
.needed = vfp_needed,
.fields = (VMStateField[]) {
- VMSTATE_UINT64_ARRAY(env.vfp.regs, ARMCPU, 64),
+ /* For compatibility, store Qn out of Zn here. */
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[0].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[1].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[2].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[3].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[4].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[5].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[6].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[7].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[8].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[9].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[10].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[11].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[12].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[13].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[14].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[15].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[16].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[17].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[18].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[19].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[20].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[21].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[22].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[23].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[24].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[25].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[26].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[27].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[28].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[29].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[30].d, ARMCPU, 0, 2),
+ VMSTATE_UINT64_SUB_ARRAY(env.vfp.zregs[31].d, ARMCPU, 0, 2),
+
/* The xregs array is a little awkward because element 1 (FPSCR)
* requires a specific accessor, so we have to split it up in
* the vmstate:
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index eed64c73e5..10eef870fe 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -517,8 +517,8 @@ static inline int vec_reg_offset(DisasContext *s, int regno,
{
int offs = 0;
#ifdef HOST_WORDS_BIGENDIAN
- /* This is complicated slightly because vfp.regs[2n] is
- * still the low half and vfp.regs[2n+1] the high half
+ /* This is complicated slightly because vfp.zregs[n].d[0] is
+ * still the low half and vfp.zregs[n].d[1] the high half
* of the 128 bit vector, even on big endian systems.
* Calculate the offset assuming a fully bigendian 128 bits,
* then XOR to account for the order of the two 64 bit halves.
@@ -528,7 +528,7 @@ static inline int vec_reg_offset(DisasContext *s, int regno,
#else
offs += element * (1 << size);
#endif
- offs += offsetof(CPUARMState, vfp.regs[regno * 2]);
+ offs += offsetof(CPUARMState, vfp.zregs[regno]);
assert_fp_access_checked(s);
return offs;
}
@@ -537,7 +537,7 @@ static inline int vec_reg_offset(DisasContext *s, int regno,
static inline int vec_full_reg_offset(DisasContext *s, int regno)
{
assert_fp_access_checked(s);
- return offsetof(CPUARMState, vfp.regs[regno * 2]);
+ return offsetof(CPUARMState, vfp.zregs[regno]);
}
/* Return a newly allocated pointer to the vector register. */
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 55826b7e5a..a8c13d3758 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -1512,13 +1512,12 @@ static inline void gen_vfp_st(DisasContext *s, int dp, TCGv_i32 addr)
}
}
-static inline long
-vfp_reg_offset (int dp, int reg)
+static inline long vfp_reg_offset(bool dp, unsigned reg)
{
if (dp) {
- return offsetof(CPUARMState, vfp.regs[reg]);
+ return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
} else {
- long ofs = offsetof(CPUARMState, vfp.regs[reg >> 1]);
+ long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
if (reg & 1) {
ofs += offsetof(CPU_DoubleU, l.upper);
} else {
--
2.14.3
next prev parent reply other threads:[~2018-01-19 4:55 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-01-19 4:54 [Qemu-devel] [PATCH v2 00/16] target/arm: Prepatory work for SVE Richard Henderson
2018-01-19 4:54 ` [Qemu-devel] [PATCH v2 01/16] target/arm: Mark disas_set_insn_syndrome inline Richard Henderson
2018-01-19 13:54 ` Alex Bennée
2018-01-19 4:54 ` [Qemu-devel] [PATCH v2 02/16] target/arm: Use pointers in crypto helpers Richard Henderson
2018-01-22 10:09 ` Alex Bennée
2018-01-19 4:54 ` [Qemu-devel] [PATCH v2 03/16] target/arm: Use pointers in neon zip/uzp helpers Richard Henderson
2018-01-22 10:44 ` Alex Bennée
2018-01-22 10:44 ` Alex Bennée
2018-01-19 4:54 ` [Qemu-devel] [PATCH v2 04/16] target/arm: Use pointers in neon tbl helper Richard Henderson
2018-01-22 10:52 ` Alex Bennée
2018-01-19 4:54 ` [Qemu-devel] [PATCH v2 05/16] target/arm: Change the type of vfp.regs Richard Henderson
2018-01-22 10:56 ` Alex Bennée
2018-01-22 16:03 ` Richard Henderson
2018-01-19 4:54 ` [Qemu-devel] [PATCH v2 06/16] target/arm: Add aa{32, 64}_vfp_{dreg, qreg} helpers Richard Henderson
2018-01-22 11:02 ` Alex Bennée
2018-01-19 4:54 ` [Qemu-devel] [PATCH v2 07/16] vmstate: Add VMSTATE_UINT64_SUB_ARRAY Richard Henderson
2018-01-22 11:02 ` Alex Bennée
2018-01-19 4:54 ` Richard Henderson [this message]
2018-01-22 11:08 ` [Qemu-devel] [PATCH v2 08/16] target/arm: Expand vector registers for SVE Alex Bennée
2018-01-19 4:54 ` [Qemu-devel] [PATCH v2 09/16] target/arm: Add predicate " Richard Henderson
2018-01-22 12:04 ` Alex Bennée
2018-01-22 16:07 ` Richard Henderson
2018-01-22 18:35 ` Alex Bennée
2018-01-19 4:54 ` [Qemu-devel] [PATCH v2 10/16] target/arm: Add ARM_FEATURE_SVE Richard Henderson
2018-01-22 12:05 ` Alex Bennée
2018-01-19 4:54 ` [Qemu-devel] [PATCH v2 11/16] target/arm: Add SVE to migration state Richard Henderson
2018-01-22 13:40 ` Alex Bennée
2018-01-22 16:11 ` Richard Henderson
2018-01-22 14:16 ` Peter Maydell
2018-01-22 16:10 ` Richard Henderson
2018-01-19 4:54 ` [Qemu-devel] [PATCH v2 12/16] target/arm: Add ZCR_ELx Richard Henderson
2018-01-22 14:38 ` Peter Maydell
2018-01-22 15:00 ` Alex Bennée
2018-01-19 4:54 ` [Qemu-devel] [PATCH v2 13/16] target/arm: Move cpu_get_tb_cpu_state out of line Richard Henderson
2018-01-22 15:07 ` Alex Bennée
2018-01-22 16:18 ` Richard Henderson
2018-01-19 4:54 ` [Qemu-devel] [PATCH v2 14/16] target/arm: Hoist store to flags output in cpu_get_tb_cpu_state Richard Henderson
2018-01-22 15:09 ` Alex Bennée
2018-01-19 4:54 ` [Qemu-devel] [PATCH v2 15/16] target/arm: Simplify fp_exception_el for user-only Richard Henderson
2018-01-22 15:10 ` Alex Bennée
2018-01-19 4:54 ` [Qemu-devel] [PATCH v2 16/16] target/arm: Add SVE state to TB->FLAGS Richard Henderson
2018-01-22 14:40 ` Peter Maydell
2018-01-19 5:29 ` [Qemu-devel] [PATCH v2 00/16] target/arm: Prepatory work for SVE no-reply
2018-01-22 14:12 ` Peter Maydell
2018-01-22 15:12 ` Alex Bennée
2018-01-22 15:12 ` Alex Bennée
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180119045438.28582-9-richard.henderson@linaro.org \
--to=richard.henderson@linaro.org \
--cc=peter.maydell@linaro.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).