* [Qemu-devel] [PATCH 2/4] target-i386: do not memcpy in and out of xmm_regs
2015-01-07 17:39 [Qemu-devel] [PATCH 0/3] target-i386: rework how AVX/AVX512 registers are stored Paolo Bonzini
2015-01-07 17:39 ` [Qemu-devel] [PATCH 1/4] target-i386: fix movntsd on big-endian hosts Paolo Bonzini
@ 2015-01-07 17:39 ` Paolo Bonzini
2015-01-13 17:02 ` Eduardo Habkost
2015-01-13 18:37 ` Eduardo Habkost
2015-01-07 17:39 ` [Qemu-devel] [PATCH 3/4] target-i386: use vmstate_offset_sub_array for AVX registers Paolo Bonzini
2015-01-07 17:39 ` [Qemu-devel] [PATCH 4/4] target-i386: make xmm_regs 512-bit wide Paolo Bonzini
3 siblings, 2 replies; 14+ messages in thread
From: Paolo Bonzini @ 2015-01-07 17:39 UTC (permalink / raw)
To: qemu-devel; +Cc: ehabkost
After the next patch, we will move the high parts of AVX and AVX512 registers
in the same array as the SSE registers. This will make it impossible to
memcpy an array of 128-bit values in and out of xmm_regs in one swoop.
Use a for loop instead.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target-i386/kvm.c | 30 ++++++++++++++++++++++++------
target-i386/translate.c | 8 ++++----
2 files changed, 28 insertions(+), 10 deletions(-)
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index f92edfe..cf9f331 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1019,7 +1019,10 @@ static int kvm_put_fpu(X86CPU *cpu)
fpu.ftwx |= (!env->fptags[i]) << i;
}
memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
- memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
+ for (i = 0; i < CPU_NB_REGS; i++) {
+ stq_p(&fpu.xmm[i][0], env->xmm_regs[i].XMM_Q(0));
+ stq_p(&fpu.xmm[i][8], env->xmm_regs[i].XMM_Q(1));
+ }
fpu.mxcsr = env->mxcsr;
return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_FPU, &fpu);
@@ -1045,6 +1048,7 @@ static int kvm_put_xsave(X86CPU *cpu)
CPUX86State *env = &cpu->env;
struct kvm_xsave* xsave = env->kvm_xsave_buf;
uint16_t cwd, swd, twd;
+ uint8_t *xmm;
int i, r;
if (!kvm_has_xsave()) {
@@ -1065,8 +1069,6 @@ static int kvm_put_xsave(X86CPU *cpu)
memcpy(&xsave->region[XSAVE_CWD_RDP], &env->fpdp, sizeof(env->fpdp));
memcpy(&xsave->region[XSAVE_ST_SPACE], env->fpregs,
sizeof env->fpregs);
- memcpy(&xsave->region[XSAVE_XMM_SPACE], env->xmm_regs,
- sizeof env->xmm_regs);
xsave->region[XSAVE_MXCSR] = env->mxcsr;
*(uint64_t *)&xsave->region[XSAVE_XSTATE_BV] = env->xstate_bv;
memcpy(&xsave->region[XSAVE_YMMH_SPACE], env->ymmh_regs,
@@ -1079,6 +1081,13 @@ static int kvm_put_xsave(X86CPU *cpu)
sizeof env->opmask_regs);
memcpy(&xsave->region[XSAVE_ZMM_Hi256], env->zmmh_regs,
sizeof env->zmmh_regs);
+
+ xmm = (uint8_t *)&xsave->region[XSAVE_XMM_SPACE];
+ for (i = 0; i < CPU_NB_REGS; i++, xmm += 16) {
+ stq_p(xmm, env->xmm_regs[i].XMM_Q(0));
+ stq_p(xmm+8, env->xmm_regs[i].XMM_Q(1));
+ }
+
#ifdef TARGET_X86_64
memcpy(&xsave->region[XSAVE_Hi16_ZMM], env->hi16_zmm_regs,
sizeof env->hi16_zmm_regs);
@@ -1384,7 +1393,10 @@ static int kvm_get_fpu(X86CPU *cpu)
env->fptags[i] = !((fpu.ftwx >> i) & 1);
}
memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
- memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
+ for (i = 0; i < CPU_NB_REGS; i++) {
+ env->xmm_regs[i].XMM_Q(0) = ldq_p(&fpu.xmm[i][0]);
+ env->xmm_regs[i].XMM_Q(1) = ldq_p(&fpu.xmm[i][8]);
+ }
env->mxcsr = fpu.mxcsr;
return 0;
@@ -1395,6 +1407,7 @@ static int kvm_get_xsave(X86CPU *cpu)
CPUX86State *env = &cpu->env;
struct kvm_xsave* xsave = env->kvm_xsave_buf;
int ret, i;
+ const uint8_t *xmm;
uint16_t cwd, swd, twd;
if (!kvm_has_xsave()) {
@@ -1421,8 +1434,6 @@ static int kvm_get_xsave(X86CPU *cpu)
env->mxcsr = xsave->region[XSAVE_MXCSR];
memcpy(env->fpregs, &xsave->region[XSAVE_ST_SPACE],
sizeof env->fpregs);
- memcpy(env->xmm_regs, &xsave->region[XSAVE_XMM_SPACE],
- sizeof env->xmm_regs);
env->xstate_bv = *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV];
memcpy(env->ymmh_regs, &xsave->region[XSAVE_YMMH_SPACE],
sizeof env->ymmh_regs);
@@ -1434,6 +1445,13 @@ static int kvm_get_xsave(X86CPU *cpu)
sizeof env->opmask_regs);
memcpy(env->zmmh_regs, &xsave->region[XSAVE_ZMM_Hi256],
sizeof env->zmmh_regs);
+
+ xmm = (const uint8_t *)&xsave->region[XSAVE_XMM_SPACE];
+ for (i = 0; i < CPU_NB_REGS; i++, xmm += 16) {
+ env->xmm_regs[i].XMM_Q(0) = ldq_p(xmm);
+ env->xmm_regs[i].XMM_Q(1) = ldq_p(xmm+8);
+ }
+
#ifdef TARGET_X86_64
memcpy(env->hi16_zmm_regs, &xsave->region[XSAVE_Hi16_ZMM],
sizeof env->hi16_zmm_regs);
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 5af4300..253009a 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -2621,10 +2621,10 @@ static inline void gen_sto_env_A0(DisasContext *s, int offset)
static inline void gen_op_movo(int d_offset, int s_offset)
{
- tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
- tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
- tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + 8);
- tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + 8);
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q(0));
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q(0));
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q(1));
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q(1));
}
static inline void gen_op_movq(int d_offset, int s_offset)
--
1.8.3.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH 3/4] target-i386: use vmstate_offset_sub_array for AVX registers
2015-01-07 17:39 [Qemu-devel] [PATCH 0/3] target-i386: rework how AVX/AVX512 registers are stored Paolo Bonzini
2015-01-07 17:39 ` [Qemu-devel] [PATCH 1/4] target-i386: fix movntsd on big-endian hosts Paolo Bonzini
2015-01-07 17:39 ` [Qemu-devel] [PATCH 2/4] target-i386: do not memcpy in and out of xmm_regs Paolo Bonzini
@ 2015-01-07 17:39 ` Paolo Bonzini
2015-01-07 17:39 ` [Qemu-devel] [PATCH 4/4] target-i386: make xmm_regs 512-bit wide Paolo Bonzini
3 siblings, 0 replies; 14+ messages in thread
From: Paolo Bonzini @ 2015-01-07 17:39 UTC (permalink / raw)
To: qemu-devel; +Cc: ehabkost
After the next patch, each vmstate field will extract parts of a larger
(32x512-bit) array, so we cannot check the vmstate field against the
type of the array.
While changing this, change the macros to accept the index of the first
element (which will not be 0 for Hi16_ZMM_REGS) instead of the number
of elements (which is always CPU_NB_REGS).
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
include/migration/vmstate.h | 10 ++++++++++
target-i386/machine.c | 28 ++++++++++++++++------------
2 files changed, 26 insertions(+), 12 deletions(-)
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index e45fc49..3b9e0de 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -359,6 +359,16 @@ extern const VMStateInfo vmstate_info_bitmap;
.offset = vmstate_offset_array(_s, _f, _type*, _n), \
}
+#define VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, _num, _version, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num = (_num), \
+ .vmsd = &(_vmsd), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT|VMS_ARRAY, \
+ .offset = vmstate_offset_sub_array(_state, _field, _type, _start), \
+}
+
#define VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, _test, _version, _vmsd, _type) { \
.name = (stringify(_field)), \
.num = (_num), \
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 722d62e4..604a49a 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -42,8 +42,9 @@ static const VMStateDescription vmstate_xmm_reg = {
}
};
-#define VMSTATE_XMM_REGS(_field, _state, _n) \
- VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_xmm_reg, XMMReg)
+#define VMSTATE_XMM_REGS(_field, _state, _start) \
+ VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, CPU_NB_REGS, 0, \
+ vmstate_xmm_reg, XMMReg)
/* YMMH format is the same as XMM */
static const VMStateDescription vmstate_ymmh_reg = {
@@ -57,8 +58,9 @@ static const VMStateDescription vmstate_ymmh_reg = {
}
};
-#define VMSTATE_YMMH_REGS_VARS(_field, _state, _n, _v) \
- VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_ymmh_reg, XMMReg)
+#define VMSTATE_YMMH_REGS_VARS(_field, _state, _start, _v) \
+ VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, CPU_NB_REGS, _v, \
+ vmstate_ymmh_reg, XMMReg)
static const VMStateDescription vmstate_zmmh_reg = {
.name = "zmmh_reg",
@@ -73,8 +75,9 @@ static const VMStateDescription vmstate_zmmh_reg = {
}
};
-#define VMSTATE_ZMMH_REGS_VARS(_field, _state, _n) \
- VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_zmmh_reg, YMMReg)
+#define VMSTATE_ZMMH_REGS_VARS(_field, _state, _start) \
+ VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, CPU_NB_REGS, 0, \
+ vmstate_zmmh_reg, YMMReg)
#ifdef TARGET_X86_64
static const VMStateDescription vmstate_hi16_zmm_reg = {
@@ -94,8 +97,9 @@ static const VMStateDescription vmstate_hi16_zmm_reg = {
}
};
-#define VMSTATE_Hi16_ZMM_REGS_VARS(_field, _state, _n) \
- VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_hi16_zmm_reg, ZMMReg)
+#define VMSTATE_Hi16_ZMM_REGS_VARS(_field, _state, _start) \
+ VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, CPU_NB_REGS, 0, \
+ vmstate_hi16_zmm_reg, ZMMReg)
#endif
static const VMStateDescription vmstate_bnd_regs = {
@@ -679,9 +683,9 @@ static const VMStateDescription vmstate_avx512 = {
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_UINT64_ARRAY(env.opmask_regs, X86CPU, NB_OPMASK_REGS),
- VMSTATE_ZMMH_REGS_VARS(env.zmmh_regs, X86CPU, CPU_NB_REGS),
+ VMSTATE_ZMMH_REGS_VARS(env.zmmh_regs, X86CPU, 0),
#ifdef TARGET_X86_64
- VMSTATE_Hi16_ZMM_REGS_VARS(env.hi16_zmm_regs, X86CPU, CPU_NB_REGS),
+ VMSTATE_Hi16_ZMM_REGS_VARS(env.hi16_zmm_regs, X86CPU, 0),
#endif
VMSTATE_END_OF_LIST()
}
@@ -750,7 +754,7 @@ VMStateDescription vmstate_x86_cpu = {
VMSTATE_INT32(env.a20_mask, X86CPU),
/* XMM */
VMSTATE_UINT32(env.mxcsr, X86CPU),
- VMSTATE_XMM_REGS(env.xmm_regs, X86CPU, CPU_NB_REGS),
+ VMSTATE_XMM_REGS(env.xmm_regs, X86CPU, 0),
#ifdef TARGET_X86_64
VMSTATE_UINT64(env.efer, X86CPU),
@@ -803,7 +807,7 @@ VMStateDescription vmstate_x86_cpu = {
/* XSAVE related fields */
VMSTATE_UINT64_V(env.xcr0, X86CPU, 12),
VMSTATE_UINT64_V(env.xstate_bv, X86CPU, 12),
- VMSTATE_YMMH_REGS_VARS(env.ymmh_regs, X86CPU, CPU_NB_REGS, 12),
+ VMSTATE_YMMH_REGS_VARS(env.ymmh_regs, X86CPU, 0, 12),
VMSTATE_END_OF_LIST()
/* The above list is not sorted /wrt version numbers, watch out! */
},
--
1.8.3.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [PATCH 4/4] target-i386: make xmm_regs 512-bit wide
2015-01-07 17:39 [Qemu-devel] [PATCH 0/3] target-i386: rework how AVX/AVX512 registers are stored Paolo Bonzini
` (2 preceding siblings ...)
2015-01-07 17:39 ` [Qemu-devel] [PATCH 3/4] target-i386: use vmstate_offset_sub_array for AVX registers Paolo Bonzini
@ 2015-01-07 17:39 ` Paolo Bonzini
3 siblings, 0 replies; 14+ messages in thread
From: Paolo Bonzini @ 2015-01-07 17:39 UTC (permalink / raw)
To: qemu-devel; +Cc: ehabkost
Right now, the AVX512 registers are split in many different fields:
xmm_regs for the low 128 bits of the first 16 registers, ymmh_regs
for the next 128 bits of the same first 16 registers, zmmh_regs
for the next 256 bits of the same first 16 registers, and finally
hi16_zmm_regs for the full 512 bits of the second 16 bit registers.
This makes it simple to move data in and out of the xsave region,
but would be a nightmare for a hypothetical TCG implementation and
leads to a proliferation of [XYZ]MM_[BWLSQD] macros. Instead,
this patch marshals data manually from the xsave region to a single
32x512-bit array, simplifying the macro jungle and clarifying which
bits are in which vmstate subsection.
The migration format is unaffected.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target-i386/cpu.h | 68 ++++++---------------------------------------------
target-i386/kvm.c | 40 ++++++++++++++++++------------
target-i386/machine.c | 55 ++++++++++++++++++++---------------------
3 files changed, 59 insertions(+), 104 deletions(-)
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 3ecff96..9ad92e2 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -715,31 +715,13 @@ typedef struct SegmentCache {
} SegmentCache;
typedef union {
- uint8_t _b[16];
- uint16_t _w[8];
- uint32_t _l[4];
- uint64_t _q[2];
- float32 _s[4];
- float64 _d[2];
-} XMMReg;
-
-typedef union {
- uint8_t _b[32];
- uint16_t _w[16];
- uint32_t _l[8];
- uint64_t _q[4];
- float32 _s[8];
- float64 _d[4];
-} YMMReg;
-
-typedef union {
uint8_t _b[64];
uint16_t _w[32];
uint32_t _l[16];
uint64_t _q[8];
float32 _s[16];
float64 _d[8];
-} ZMMReg;
+} XMMReg; /* really zmm */
typedef union {
uint8_t _b[8];
@@ -760,46 +742,18 @@ typedef struct BNDCSReg {
} BNDCSReg;
#ifdef HOST_WORDS_BIGENDIAN
-#define ZMM_B(n) _b[63 - (n)]
-#define ZMM_W(n) _w[31 - (n)]
-#define ZMM_L(n) _l[15 - (n)]
-#define ZMM_S(n) _s[15 - (n)]
-#define ZMM_Q(n) _q[7 - (n)]
-#define ZMM_D(n) _d[7 - (n)]
-
-#define YMM_B(n) _b[31 - (n)]
-#define YMM_W(n) _w[15 - (n)]
-#define YMM_L(n) _l[7 - (n)]
-#define YMM_S(n) _s[7 - (n)]
-#define YMM_Q(n) _q[3 - (n)]
-#define YMM_D(n) _d[3 - (n)]
-
-#define XMM_B(n) _b[15 - (n)]
-#define XMM_W(n) _w[7 - (n)]
-#define XMM_L(n) _l[3 - (n)]
-#define XMM_S(n) _s[3 - (n)]
-#define XMM_Q(n) _q[1 - (n)]
-#define XMM_D(n) _d[1 - (n)]
+#define XMM_B(n) _b[63 - (n)]
+#define XMM_W(n) _w[31 - (n)]
+#define XMM_L(n) _l[15 - (n)]
+#define XMM_S(n) _s[15 - (n)]
+#define XMM_Q(n) _q[7 - (n)]
+#define XMM_D(n) _d[7 - (n)]
#define MMX_B(n) _b[7 - (n)]
#define MMX_W(n) _w[3 - (n)]
#define MMX_L(n) _l[1 - (n)]
#define MMX_S(n) _s[1 - (n)]
#else
-#define ZMM_B(n) _b[n]
-#define ZMM_W(n) _w[n]
-#define ZMM_L(n) _l[n]
-#define ZMM_S(n) _s[n]
-#define ZMM_Q(n) _q[n]
-#define ZMM_D(n) _d[n]
-
-#define YMM_B(n) _b[n]
-#define YMM_W(n) _w[n]
-#define YMM_L(n) _l[n]
-#define YMM_S(n) _s[n]
-#define YMM_Q(n) _q[n]
-#define YMM_D(n) _d[n]
-
#define XMM_B(n) _b[n]
#define XMM_W(n) _w[n]
#define XMM_L(n) _l[n]
@@ -898,17 +852,11 @@ typedef struct CPUX86State {
float_status mmx_status; /* for 3DNow! float ops */
float_status sse_status;
uint32_t mxcsr;
- XMMReg xmm_regs[CPU_NB_REGS];
+ XMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32];
XMMReg xmm_t0;
MMXReg mmx_t0;
- XMMReg ymmh_regs[CPU_NB_REGS];
-
uint64_t opmask_regs[NB_OPMASK_REGS];
- YMMReg zmmh_regs[CPU_NB_REGS];
-#ifdef TARGET_X86_64
- ZMMReg hi16_zmm_regs[CPU_NB_REGS];
-#endif
/* sysenter registers */
uint32_t sysenter_cs;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index cf9f331..5bfea7e 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1048,7 +1048,7 @@ static int kvm_put_xsave(X86CPU *cpu)
CPUX86State *env = &cpu->env;
struct kvm_xsave* xsave = env->kvm_xsave_buf;
uint16_t cwd, swd, twd;
- uint8_t *xmm;
+ uint8_t *xmm, *ymmh, *zmmh;
int i, r;
if (!kvm_has_xsave()) {
@@ -1071,26 +1071,30 @@ static int kvm_put_xsave(X86CPU *cpu)
sizeof env->fpregs);
xsave->region[XSAVE_MXCSR] = env->mxcsr;
*(uint64_t *)&xsave->region[XSAVE_XSTATE_BV] = env->xstate_bv;
- memcpy(&xsave->region[XSAVE_YMMH_SPACE], env->ymmh_regs,
- sizeof env->ymmh_regs);
memcpy(&xsave->region[XSAVE_BNDREGS], env->bnd_regs,
sizeof env->bnd_regs);
memcpy(&xsave->region[XSAVE_BNDCSR], &env->bndcs_regs,
sizeof(env->bndcs_regs));
memcpy(&xsave->region[XSAVE_OPMASK], env->opmask_regs,
sizeof env->opmask_regs);
- memcpy(&xsave->region[XSAVE_ZMM_Hi256], env->zmmh_regs,
- sizeof env->zmmh_regs);
xmm = (uint8_t *)&xsave->region[XSAVE_XMM_SPACE];
- for (i = 0; i < CPU_NB_REGS; i++, xmm += 16) {
+ ymmh = (uint8_t *)&xsave->region[XSAVE_YMMH_SPACE];
+ zmmh = (uint8_t *)&xsave->region[XSAVE_ZMM_Hi256];
+ for (i = 0; i < CPU_NB_REGS; i++, xmm += 16, ymmh += 16, zmmh += 32) {
stq_p(xmm, env->xmm_regs[i].XMM_Q(0));
stq_p(xmm+8, env->xmm_regs[i].XMM_Q(1));
+ stq_p(ymmh, env->xmm_regs[i].XMM_Q(2));
+ stq_p(ymmh+8, env->xmm_regs[i].XMM_Q(3));
+ stq_p(zmmh, env->xmm_regs[i].XMM_Q(4));
+ stq_p(zmmh+8, env->xmm_regs[i].XMM_Q(5));
+ stq_p(zmmh+16, env->xmm_regs[i].XMM_Q(6));
+ stq_p(zmmh+24, env->xmm_regs[i].XMM_Q(7));
}
#ifdef TARGET_X86_64
- memcpy(&xsave->region[XSAVE_Hi16_ZMM], env->hi16_zmm_regs,
- sizeof env->hi16_zmm_regs);
+ memcpy(&xsave->region[XSAVE_Hi16_ZMM], &env->xmm_regs[16],
+ 16 * sizeof env->xmm_regs[16]);
#endif
r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave);
return r;
@@ -1407,7 +1411,7 @@ static int kvm_get_xsave(X86CPU *cpu)
CPUX86State *env = &cpu->env;
struct kvm_xsave* xsave = env->kvm_xsave_buf;
int ret, i;
- const uint8_t *xmm;
+ const uint8_t *xmm, *ymmh, *zmmh;
uint16_t cwd, swd, twd;
if (!kvm_has_xsave()) {
@@ -1435,26 +1439,30 @@ static int kvm_get_xsave(X86CPU *cpu)
memcpy(env->fpregs, &xsave->region[XSAVE_ST_SPACE],
sizeof env->fpregs);
env->xstate_bv = *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV];
- memcpy(env->ymmh_regs, &xsave->region[XSAVE_YMMH_SPACE],
- sizeof env->ymmh_regs);
memcpy(env->bnd_regs, &xsave->region[XSAVE_BNDREGS],
sizeof env->bnd_regs);
memcpy(&env->bndcs_regs, &xsave->region[XSAVE_BNDCSR],
sizeof(env->bndcs_regs));
memcpy(env->opmask_regs, &xsave->region[XSAVE_OPMASK],
sizeof env->opmask_regs);
- memcpy(env->zmmh_regs, &xsave->region[XSAVE_ZMM_Hi256],
- sizeof env->zmmh_regs);
xmm = (const uint8_t *)&xsave->region[XSAVE_XMM_SPACE];
- for (i = 0; i < CPU_NB_REGS; i++, xmm += 16) {
+ ymmh = (const uint8_t *)&xsave->region[XSAVE_YMMH_SPACE];
+ zmmh = (const uint8_t *)&xsave->region[XSAVE_ZMM_Hi256];
+ for (i = 0; i < CPU_NB_REGS; i++, xmm += 16, ymmh += 16, zmmh += 32) {
env->xmm_regs[i].XMM_Q(0) = ldq_p(xmm);
env->xmm_regs[i].XMM_Q(1) = ldq_p(xmm+8);
+ env->xmm_regs[i].XMM_Q(2) = ldq_p(ymmh);
+ env->xmm_regs[i].XMM_Q(3) = ldq_p(ymmh+8);
+ env->xmm_regs[i].XMM_Q(4) = ldq_p(zmmh);
+ env->xmm_regs[i].XMM_Q(5) = ldq_p(zmmh+8);
+ env->xmm_regs[i].XMM_Q(6) = ldq_p(zmmh+16);
+ env->xmm_regs[i].XMM_Q(7) = ldq_p(zmmh+24);
}
#ifdef TARGET_X86_64
- memcpy(env->hi16_zmm_regs, &xsave->region[XSAVE_Hi16_ZMM],
- sizeof env->hi16_zmm_regs);
+ memcpy(&env->xmm_regs[16], &xsave->region[XSAVE_Hi16_ZMM],
+ 16 * sizeof env->xmm_regs[16]);
#endif
return 0;
}
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 604a49a..cd1ddd2 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -46,14 +46,14 @@ static const VMStateDescription vmstate_xmm_reg = {
VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, CPU_NB_REGS, 0, \
vmstate_xmm_reg, XMMReg)
-/* YMMH format is the same as XMM */
+/* YMMH format is the same as XMM, but for bits 128-255 */
static const VMStateDescription vmstate_ymmh_reg = {
.name = "ymmh_reg",
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
- VMSTATE_UINT64(XMM_Q(0), XMMReg),
- VMSTATE_UINT64(XMM_Q(1), XMMReg),
+ VMSTATE_UINT64(XMM_Q(2), XMMReg),
+ VMSTATE_UINT64(XMM_Q(3), XMMReg),
VMSTATE_END_OF_LIST()
}
};
@@ -67,17 +67,17 @@ static const VMStateDescription vmstate_zmmh_reg = {
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
- VMSTATE_UINT64(YMM_Q(0), YMMReg),
- VMSTATE_UINT64(YMM_Q(1), YMMReg),
- VMSTATE_UINT64(YMM_Q(2), YMMReg),
- VMSTATE_UINT64(YMM_Q(3), YMMReg),
+ VMSTATE_UINT64(XMM_Q(4), XMMReg),
+ VMSTATE_UINT64(XMM_Q(5), XMMReg),
+ VMSTATE_UINT64(XMM_Q(6), XMMReg),
+ VMSTATE_UINT64(XMM_Q(7), XMMReg),
VMSTATE_END_OF_LIST()
}
};
#define VMSTATE_ZMMH_REGS_VARS(_field, _state, _start) \
VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, CPU_NB_REGS, 0, \
- vmstate_zmmh_reg, YMMReg)
+ vmstate_zmmh_reg, XMMReg)
#ifdef TARGET_X86_64
static const VMStateDescription vmstate_hi16_zmm_reg = {
@@ -85,21 +85,21 @@ static const VMStateDescription vmstate_hi16_zmm_reg = {
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
- VMSTATE_UINT64(ZMM_Q(0), ZMMReg),
- VMSTATE_UINT64(ZMM_Q(1), ZMMReg),
- VMSTATE_UINT64(ZMM_Q(2), ZMMReg),
- VMSTATE_UINT64(ZMM_Q(3), ZMMReg),
- VMSTATE_UINT64(ZMM_Q(4), ZMMReg),
- VMSTATE_UINT64(ZMM_Q(5), ZMMReg),
- VMSTATE_UINT64(ZMM_Q(6), ZMMReg),
- VMSTATE_UINT64(ZMM_Q(7), ZMMReg),
+ VMSTATE_UINT64(XMM_Q(0), XMMReg),
+ VMSTATE_UINT64(XMM_Q(1), XMMReg),
+ VMSTATE_UINT64(XMM_Q(2), XMMReg),
+ VMSTATE_UINT64(XMM_Q(3), XMMReg),
+ VMSTATE_UINT64(XMM_Q(4), XMMReg),
+ VMSTATE_UINT64(XMM_Q(5), XMMReg),
+ VMSTATE_UINT64(XMM_Q(6), XMMReg),
+ VMSTATE_UINT64(XMM_Q(7), XMMReg),
VMSTATE_END_OF_LIST()
}
};
#define VMSTATE_Hi16_ZMM_REGS_VARS(_field, _state, _start) \
VMSTATE_STRUCT_SUB_ARRAY(_field, _state, _start, CPU_NB_REGS, 0, \
- vmstate_hi16_zmm_reg, ZMMReg)
+ vmstate_hi16_zmm_reg, XMMReg)
#endif
static const VMStateDescription vmstate_bnd_regs = {
@@ -658,17 +658,16 @@ static bool avx512_needed(void *opaque)
}
for (i = 0; i < CPU_NB_REGS; i++) {
-#define ENV_ZMMH(reg, field) (env->zmmh_regs[reg].YMM_Q(field))
- if (ENV_ZMMH(i, 0) || ENV_ZMMH(i, 1) ||
- ENV_ZMMH(i, 2) || ENV_ZMMH(i, 3)) {
+#define ENV_XMM(reg, field) (env->xmm_regs[reg].XMM_Q(field))
+ if (ENV_XMM(i, 4) || ENV_XMM(i, 6) ||
+ ENV_XMM(i, 5) || ENV_XMM(i, 7)) {
return true;
}
#ifdef TARGET_X86_64
-#define ENV_Hi16_ZMM(reg, field) (env->hi16_zmm_regs[reg].ZMM_Q(field))
- if (ENV_Hi16_ZMM(i, 0) || ENV_Hi16_ZMM(i, 1) ||
- ENV_Hi16_ZMM(i, 2) || ENV_Hi16_ZMM(i, 3) ||
- ENV_Hi16_ZMM(i, 4) || ENV_Hi16_ZMM(i, 5) ||
- ENV_Hi16_ZMM(i, 6) || ENV_Hi16_ZMM(i, 7)) {
+ if (ENV_XMM(i+16, 0) || ENV_XMM(i+16, 1) ||
+ ENV_XMM(i+16, 2) || ENV_XMM(i+16, 3) ||
+ ENV_XMM(i+16, 4) || ENV_XMM(i+16, 5) ||
+ ENV_XMM(i+16, 6) || ENV_XMM(i+16, 7)) {
return true;
}
#endif
@@ -683,9 +682,9 @@ static const VMStateDescription vmstate_avx512 = {
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_UINT64_ARRAY(env.opmask_regs, X86CPU, NB_OPMASK_REGS),
- VMSTATE_ZMMH_REGS_VARS(env.zmmh_regs, X86CPU, 0),
+ VMSTATE_ZMMH_REGS_VARS(env.xmm_regs, X86CPU, 0),
#ifdef TARGET_X86_64
- VMSTATE_Hi16_ZMM_REGS_VARS(env.hi16_zmm_regs, X86CPU, 0),
+ VMSTATE_Hi16_ZMM_REGS_VARS(env.xmm_regs, X86CPU, 16),
#endif
VMSTATE_END_OF_LIST()
}
@@ -807,7 +806,7 @@ VMStateDescription vmstate_x86_cpu = {
/* XSAVE related fields */
VMSTATE_UINT64_V(env.xcr0, X86CPU, 12),
VMSTATE_UINT64_V(env.xstate_bv, X86CPU, 12),
- VMSTATE_YMMH_REGS_VARS(env.ymmh_regs, X86CPU, 0, 12),
+ VMSTATE_YMMH_REGS_VARS(env.xmm_regs, X86CPU, 0, 12),
VMSTATE_END_OF_LIST()
/* The above list is not sorted /wrt version numbers, watch out! */
},
--
1.8.3.1
^ permalink raw reply related [flat|nested] 14+ messages in thread