* Re: [Qemu-devel] [PATCH] target-i386: add Intel AVX-512 support
2014-10-23 3:02 [Qemu-devel] [PATCH] target-i386: add Intel AVX-512 support Chao Peng
@ 2014-10-23 14:34 ` Paolo Bonzini
2014-10-24 16:38 ` Eduardo Habkost
2014-10-23 19:49 ` Eduardo Habkost
` (2 subsequent siblings)
3 siblings, 1 reply; 16+ messages in thread
From: Paolo Bonzini @ 2014-10-23 14:34 UTC (permalink / raw)
To: Chao Peng, qemu-devel
Cc: kvm, Michael S. Tsirkin, Marcelo Tosatti, Vadim Rozenfeld,
Laszlo Ersek, Andreas Färber
On 10/23/2014 05:02 AM, Chao Peng wrote:
> Add AVX512 feature bits, register definition and corresponding
> xsave/vmstate support.
>
> Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> ---
> target-i386/cpu.c | 10 ++++--
> target-i386/cpu.h | 61 ++++++++++++++++++++++++++++++++++
> target-i386/kvm.c | 19 +++++++++++
> target-i386/machine.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 175 insertions(+), 2 deletions(-)
Looks good---Eduardo, Juan, can you look at it too? Documentation is at
https://software.intel.com/sites/default/files/managed/68/8b/319433-019.pdf
See Tables 2-5, 3-6, 3-7, 3-8.
Paolo
> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> index e7bf9de..e91bfbd 100644
> --- a/target-i386/cpu.c
> +++ b/target-i386/cpu.c
> @@ -259,8 +259,8 @@ static const char *svm_feature_name[] = {
> static const char *cpuid_7_0_ebx_feature_name[] = {
> "fsgsbase", "tsc_adjust", NULL, "bmi1", "hle", "avx2", NULL, "smep",
> "bmi2", "erms", "invpcid", "rtm", NULL, NULL, "mpx", NULL,
> - NULL, NULL, "rdseed", "adx", "smap", NULL, NULL, NULL,
> - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
> + "avx512f", NULL, "rdseed", "adx", "smap", NULL, NULL, NULL,
> + NULL, NULL, "avx512pf", "avx512er", "avx512cd", NULL, NULL, NULL,
> };
>
> static const char *cpuid_apm_edx_feature_name[] = {
> @@ -426,6 +426,12 @@ static const ExtSaveArea ext_save_areas[] = {
> .offset = 0x3c0, .size = 0x40 },
> [4] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX,
> .offset = 0x400, .size = 0x40 },
> + [5] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
> + .offset = 0x440, .size = 0x40 },
> + [6] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
> + .offset = 0x480, .size = 0x200 },
> + [7] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
> + .offset = 0x680, .size = 0x400 },
> };
>
> const char *get_register_name_32(unsigned int reg)
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index 2968749..9f01831 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -395,6 +395,9 @@
> #define XSTATE_YMM (1ULL << 2)
> #define XSTATE_BNDREGS (1ULL << 3)
> #define XSTATE_BNDCSR (1ULL << 4)
> +#define XSTATE_OPMASK (1ULL << 5)
> +#define XSTATE_ZMM_Hi256 (1ULL << 6)
> +#define XSTATE_Hi16_ZMM (1ULL << 7)
>
>
> /* CPUID feature words */
> @@ -560,9 +563,13 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
> #define CPUID_7_0_EBX_INVPCID (1U << 10)
> #define CPUID_7_0_EBX_RTM (1U << 11)
> #define CPUID_7_0_EBX_MPX (1U << 14)
> +#define CPUID_7_0_EBX_AVX512F (1U << 16) /* AVX-512 Foundation */
> #define CPUID_7_0_EBX_RDSEED (1U << 18)
> #define CPUID_7_0_EBX_ADX (1U << 19)
> #define CPUID_7_0_EBX_SMAP (1U << 20)
> +#define CPUID_7_0_EBX_AVX512PF (1U << 26) /* AVX-512 Prefetch */
> +#define CPUID_7_0_EBX_AVX512ER (1U << 27) /* AVX-512 Exponential and Reciprocal */
> +#define CPUID_7_0_EBX_AVX512CD (1U << 28) /* AVX-512 Conflict Detection */
>
> /* CPUID[0x80000007].EDX flags: */
> #define CPUID_APM_INVTSC (1U << 8)
> @@ -707,6 +714,24 @@ typedef union {
> } XMMReg;
>
> typedef union {
> + uint8_t _b[32];
> + uint16_t _w[16];
> + uint32_t _l[8];
> + uint64_t _q[4];
> + float32 _s[8];
> + float64 _d[4];
> +} YMMReg;
> +
> +typedef union {
> + uint8_t _b[64];
> + uint16_t _w[32];
> + uint32_t _l[16];
> + uint64_t _q[8];
> + float32 _s[16];
> + float64 _d[8];
> +} ZMMReg;
> +
> +typedef union {
> uint8_t _b[8];
> uint16_t _w[4];
> uint32_t _l[2];
> @@ -725,6 +750,20 @@ typedef struct BNDCSReg {
> } BNDCSReg;
>
> #ifdef HOST_WORDS_BIGENDIAN
> +#define ZMM_B(n) _b[63 - (n)]
> +#define ZMM_W(n) _w[31 - (n)]
> +#define ZMM_L(n) _l[15 - (n)]
> +#define ZMM_S(n) _s[15 - (n)]
> +#define ZMM_Q(n) _q[7 - (n)]
> +#define ZMM_D(n) _d[7 - (n)]
> +
> +#define YMM_B(n) _b[31 - (n)]
> +#define YMM_W(n) _w[15 - (n)]
> +#define YMM_L(n) _l[7 - (n)]
> +#define YMM_S(n) _s[7 - (n)]
> +#define YMM_Q(n) _q[3 - (n)]
> +#define YMM_D(n) _d[3 - (n)]
> +
> #define XMM_B(n) _b[15 - (n)]
> #define XMM_W(n) _w[7 - (n)]
> #define XMM_L(n) _l[3 - (n)]
> @@ -737,6 +776,20 @@ typedef struct BNDCSReg {
> #define MMX_L(n) _l[1 - (n)]
> #define MMX_S(n) _s[1 - (n)]
> #else
> +#define ZMM_B(n) _b[n]
> +#define ZMM_W(n) _w[n]
> +#define ZMM_L(n) _l[n]
> +#define ZMM_S(n) _s[n]
> +#define ZMM_Q(n) _q[n]
> +#define ZMM_D(n) _d[n]
> +
> +#define YMM_B(n) _b[n]
> +#define YMM_W(n) _w[n]
> +#define YMM_L(n) _l[n]
> +#define YMM_S(n) _s[n]
> +#define YMM_Q(n) _q[n]
> +#define YMM_D(n) _d[n]
> +
> #define XMM_B(n) _b[n]
> #define XMM_W(n) _w[n]
> #define XMM_L(n) _l[n]
> @@ -775,6 +828,8 @@ typedef struct {
>
> #define NB_MMU_MODES 3
>
> +#define NB_OPMASK_REGS 8
> +
> typedef enum TPRAccess {
> TPR_ACCESS_READ,
> TPR_ACCESS_WRITE,
> @@ -839,6 +894,12 @@ typedef struct CPUX86State {
>
> XMMReg ymmh_regs[CPU_NB_REGS];
>
> + uint64_t opmask_regs[NB_OPMASK_REGS];
> + YMMReg zmmh_regs[CPU_NB_REGS];
> +#ifdef TARGET_X86_64
> + ZMMReg hi16_zmm_regs[CPU_NB_REGS];
> +#endif
> +
> /* sysenter registers */
> uint32_t sysenter_cs;
> target_ulong sysenter_esp;
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index ddedc73..ccf36e8 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -1031,6 +1031,9 @@ static int kvm_put_fpu(X86CPU *cpu)
> #define XSAVE_YMMH_SPACE 144
> #define XSAVE_BNDREGS 240
> #define XSAVE_BNDCSR 256
> +#define XSAVE_OPMASK 272
> +#define XSAVE_ZMM_Hi256 288
> +#define XSAVE_Hi16_ZMM 416
>
> static int kvm_put_xsave(X86CPU *cpu)
> {
> @@ -1067,6 +1070,14 @@ static int kvm_put_xsave(X86CPU *cpu)
> sizeof env->bnd_regs);
> memcpy(&xsave->region[XSAVE_BNDCSR], &env->bndcs_regs,
> sizeof(env->bndcs_regs));
> + memcpy(&xsave->region[XSAVE_OPMASK], env->opmask_regs,
> + sizeof env->opmask_regs);
> + memcpy(&xsave->region[XSAVE_ZMM_Hi256], env->zmmh_regs,
> + sizeof env->zmmh_regs);
> +#ifdef TARGET_X86_64
> + memcpy(&xsave->region[XSAVE_Hi16_ZMM], env->hi16_zmm_regs,
> + sizeof env->hi16_zmm_regs);
> +#endif
> r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave);
> return r;
> }
> @@ -1402,6 +1413,14 @@ static int kvm_get_xsave(X86CPU *cpu)
> sizeof env->bnd_regs);
> memcpy(&env->bndcs_regs, &xsave->region[XSAVE_BNDCSR],
> sizeof(env->bndcs_regs));
> + memcpy(env->opmask_regs, &xsave->region[XSAVE_OPMASK],
> + sizeof env->opmask_regs);
> + memcpy(env->zmmh_regs, &xsave->region[XSAVE_ZMM_Hi256],
> + sizeof env->zmmh_regs);
> +#ifdef TARGET_X86_64
> + memcpy(env->hi16_zmm_regs, &xsave->region[XSAVE_Hi16_ZMM],
> + sizeof env->hi16_zmm_regs);
> +#endif
> return 0;
> }
>
> diff --git a/target-i386/machine.c b/target-i386/machine.c
> index 0dd49f0..708fc54 100644
> --- a/target-i386/machine.c
> +++ b/target-i386/machine.c
> @@ -60,6 +60,44 @@ static const VMStateDescription vmstate_ymmh_reg = {
> #define VMSTATE_YMMH_REGS_VARS(_field, _state, _n, _v) \
> VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_ymmh_reg, XMMReg)
>
> +static const VMStateDescription vmstate_zmmh_reg = {
> + .name = "zmmh_reg",
> + .version_id = 1,
> + .minimum_version_id = 1,
> + .fields = (VMStateField[]) {
> + VMSTATE_UINT64(YMM_Q(0), YMMReg),
> + VMSTATE_UINT64(YMM_Q(1), YMMReg),
> + VMSTATE_UINT64(YMM_Q(2), YMMReg),
> + VMSTATE_UINT64(YMM_Q(3), YMMReg),
> + VMSTATE_END_OF_LIST()
> + }
> +};
> +
> +#define VMSTATE_ZMMH_REGS_VARS(_field, _state, _n) \
> + VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_zmmh_reg, YMMReg)
> +
> +#ifdef TARGET_X86_64
> +static const VMStateDescription vmstate_hi16_zmm_reg = {
> + .name = "hi16_zmm_reg",
> + .version_id = 1,
> + .minimum_version_id = 1,
> + .fields = (VMStateField[]) {
> + VMSTATE_UINT64(ZMM_Q(0), ZMMReg),
> + VMSTATE_UINT64(ZMM_Q(1), ZMMReg),
> + VMSTATE_UINT64(ZMM_Q(2), ZMMReg),
> + VMSTATE_UINT64(ZMM_Q(3), ZMMReg),
> + VMSTATE_UINT64(ZMM_Q(4), ZMMReg),
> + VMSTATE_UINT64(ZMM_Q(5), ZMMReg),
> + VMSTATE_UINT64(ZMM_Q(6), ZMMReg),
> + VMSTATE_UINT64(ZMM_Q(7), ZMMReg),
> + VMSTATE_END_OF_LIST()
> + }
> +};
> +
> +#define VMSTATE_Hi16_ZMM_REGS_VARS(_field, _state, _n) \
> + VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_hi16_zmm_reg, ZMMReg)
> +#endif
> +
> static const VMStateDescription vmstate_bnd_regs = {
> .name = "bnd_regs",
> .version_id = 1,
> @@ -603,6 +641,52 @@ static const VMStateDescription vmstate_msr_hyperv_time = {
> }
> };
>
> +static bool avx512_needed(void *opaque)
> +{
> + X86CPU *cpu = opaque;
> + CPUX86State *env = &cpu->env;
> + unsigned int i;
> +
> + for (i = 0; i < NB_OPMASK_REGS; i++) {
> + if (env->opmask_regs[i]) {
> + return true;
> + }
> + }
> +
> + for (i = 0; i < CPU_NB_REGS; i++) {
> +#define ENV_ZMMH(reg, field) (env->zmmh_regs[reg].YMM_Q(field))
> + if (ENV_ZMMH(i, 0) || ENV_ZMMH(i, 1) ||
> + ENV_ZMMH(i, 2) || ENV_ZMMH(i, 3)) {
> + return true;
> + }
> +#ifdef TARGET_X86_64
> +#define ENV_Hi16_ZMM(reg, field) (env->hi16_zmm_regs[reg].ZMM_Q(field))
> + if (ENV_Hi16_ZMM(i, 0) || ENV_Hi16_ZMM(i, 1) ||
> + ENV_Hi16_ZMM(i, 2) || ENV_Hi16_ZMM(i, 3) ||
> + ENV_Hi16_ZMM(i, 4) || ENV_Hi16_ZMM(i, 5) ||
> + ENV_Hi16_ZMM(i, 6) || ENV_Hi16_ZMM(i, 7)) {
> + return true;
> + }
> +#endif
> + }
> +
> + return false;
> +}
> +
> +static const VMStateDescription vmstate_avx512 = {
> + .name = "cpu/avx512",
> + .version_id = 1,
> + .minimum_version_id = 1,
> + .fields = (VMStateField[]) {
> + VMSTATE_UINT64_ARRAY(env.opmask_regs, X86CPU, NB_OPMASK_REGS),
> + VMSTATE_ZMMH_REGS_VARS(env.zmmh_regs, X86CPU, CPU_NB_REGS),
> +#ifdef TARGET_X86_64
> + VMSTATE_Hi16_ZMM_REGS_VARS(env.hi16_zmm_regs, X86CPU, CPU_NB_REGS),
> +#endif
> + VMSTATE_END_OF_LIST()
> + }
> +};
> +
> VMStateDescription vmstate_x86_cpu = {
> .name = "cpu",
> .version_id = 12,
> @@ -745,6 +829,9 @@ VMStateDescription vmstate_x86_cpu = {
> }, {
> .vmsd = &vmstate_msr_hyperv_time,
> .needed = hyperv_time_enable_needed,
> + }, {
> + .vmsd = &vmstate_avx512,
> + .needed = avx512_needed,
> } , {
> /* empty */
> }
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Qemu-devel] [PATCH] target-i386: add Intel AVX-512 support
2014-10-23 14:34 ` Paolo Bonzini
@ 2014-10-24 16:38 ` Eduardo Habkost
0 siblings, 0 replies; 16+ messages in thread
From: Eduardo Habkost @ 2014-10-24 16:38 UTC (permalink / raw)
To: Paolo Bonzini
Cc: kvm, Michael S. Tsirkin, Marcelo Tosatti, qemu-devel,
Vadim Rozenfeld, Chao Peng, Laszlo Ersek, Andreas Färber
On Thu, Oct 23, 2014 at 04:34:46PM +0200, Paolo Bonzini wrote:
> On 10/23/2014 05:02 AM, Chao Peng wrote:
> > Add AVX512 feature bits, register definition and corresponding
> > xsave/vmstate support.
> >
> > Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> > ---
> > target-i386/cpu.c | 10 ++++--
> > target-i386/cpu.h | 61 ++++++++++++++++++++++++++++++++++
> > target-i386/kvm.c | 19 +++++++++++
> > target-i386/machine.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++
> > 4 files changed, 175 insertions(+), 2 deletions(-)
>
> Looks good---Eduardo, Juan, can you look at it too? Documentation is at
>
> https://software.intel.com/sites/default/files/managed/68/8b/319433-019.pdf
>
> See Tables 2-5, 3-6, 3-7, 3-8.
Also, if anybody is confused by the TARGET_X86_64 #ifdefs, see section
1.2.2 32 SIMD Register Support ("The number of available vector
registers in 32-bit mode is still 8").
--
Eduardo
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Qemu-devel] [PATCH] target-i386: add Intel AVX-512 support
2014-10-23 3:02 [Qemu-devel] [PATCH] target-i386: add Intel AVX-512 support Chao Peng
2014-10-23 14:34 ` Paolo Bonzini
@ 2014-10-23 19:49 ` Eduardo Habkost
2014-10-24 1:27 ` Chao Peng
2014-10-24 16:01 ` Eduardo Habkost
2014-11-02 10:19 ` Michael S. Tsirkin
3 siblings, 1 reply; 16+ messages in thread
From: Eduardo Habkost @ 2014-10-23 19:49 UTC (permalink / raw)
To: Chao Peng
Cc: kvm, Michael S. Tsirkin, Marcelo Tosatti, qemu-devel,
Vadim Rozenfeld, Paolo Bonzini, Laszlo Ersek, Andreas Färber
On Thu, Oct 23, 2014 at 11:02:43AM +0800, Chao Peng wrote:
[...]
> @@ -707,6 +714,24 @@ typedef union {
> } XMMReg;
>
> typedef union {
> + uint8_t _b[32];
> + uint16_t _w[16];
> + uint32_t _l[8];
> + uint64_t _q[4];
> + float32 _s[8];
> + float64 _d[4];
> +} YMMReg;
> +
> +typedef union {
> + uint8_t _b[64];
> + uint16_t _w[32];
> + uint32_t _l[16];
> + uint64_t _q[8];
> + float32 _s[16];
> + float64 _d[8];
> +} ZMMReg;
> +
> +typedef union {
> uint8_t _b[8];
> uint16_t _w[4];
> uint32_t _l[2];
> @@ -725,6 +750,20 @@ typedef struct BNDCSReg {
> } BNDCSReg;
>
> #ifdef HOST_WORDS_BIGENDIAN
> +#define ZMM_B(n) _b[63 - (n)]
> +#define ZMM_W(n) _w[31 - (n)]
> +#define ZMM_L(n) _l[15 - (n)]
> +#define ZMM_S(n) _s[15 - (n)]
> +#define ZMM_Q(n) _q[7 - (n)]
> +#define ZMM_D(n) _d[7 - (n)]
> +
> +#define YMM_B(n) _b[31 - (n)]
> +#define YMM_W(n) _w[15 - (n)]
> +#define YMM_L(n) _l[7 - (n)]
> +#define YMM_S(n) _s[7 - (n)]
> +#define YMM_Q(n) _q[3 - (n)]
> +#define YMM_D(n) _d[3 - (n)]
> +
> #define XMM_B(n) _b[15 - (n)]
> #define XMM_W(n) _w[7 - (n)]
> #define XMM_L(n) _l[3 - (n)]
> @@ -737,6 +776,20 @@ typedef struct BNDCSReg {
> #define MMX_L(n) _l[1 - (n)]
> #define MMX_S(n) _s[1 - (n)]
> #else
> +#define ZMM_B(n) _b[n]
> +#define ZMM_W(n) _w[n]
> +#define ZMM_L(n) _l[n]
> +#define ZMM_S(n) _s[n]
> +#define ZMM_Q(n) _q[n]
> +#define ZMM_D(n) _d[n]
> +
> +#define YMM_B(n) _b[n]
> +#define YMM_W(n) _w[n]
> +#define YMM_L(n) _l[n]
> +#define YMM_S(n) _s[n]
> +#define YMM_Q(n) _q[n]
> +#define YMM_D(n) _d[n]
> +
I am probably not being able to see some future use case of those data
structures, but: why all the extra complexity here, if only ZMM_Q and
YMM_Q are being used in the code, and the only place affected by the
ordering of YMMReg and ZMMReg array elements are the memcpy() calls on
kvm_{put,get}_xsave(), where the data always have the same layout?
--
Eduardo
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Qemu-devel] [PATCH] target-i386: add Intel AVX-512 support
2014-10-23 19:49 ` Eduardo Habkost
@ 2014-10-24 1:27 ` Chao Peng
2014-10-24 5:55 ` Paolo Bonzini
0 siblings, 1 reply; 16+ messages in thread
From: Chao Peng @ 2014-10-24 1:27 UTC (permalink / raw)
To: Eduardo Habkost
Cc: kvm, Michael S. Tsirkin, Marcelo Tosatti, qemu-devel,
Vadim Rozenfeld, Paolo Bonzini, Laszlo Ersek, Andreas Färber
On Thu, Oct 23, 2014 at 05:49:23PM -0200, Eduardo Habkost wrote:
> On Thu, Oct 23, 2014 at 11:02:43AM +0800, Chao Peng wrote:
> [...]
> > @@ -707,6 +714,24 @@ typedef union {
> > } XMMReg;
> >
> > typedef union {
> > + uint8_t _b[32];
> > + uint16_t _w[16];
> > + uint32_t _l[8];
> > + uint64_t _q[4];
> > + float32 _s[8];
> > + float64 _d[4];
> > +} YMMReg;
> > +
> > +typedef union {
> > + uint8_t _b[64];
> > + uint16_t _w[32];
> > + uint32_t _l[16];
> > + uint64_t _q[8];
> > + float32 _s[16];
> > + float64 _d[8];
> > +} ZMMReg;
> > +
> > +typedef union {
> > uint8_t _b[8];
> > uint16_t _w[4];
> > uint32_t _l[2];
> > @@ -725,6 +750,20 @@ typedef struct BNDCSReg {
> > } BNDCSReg;
> >
> > #ifdef HOST_WORDS_BIGENDIAN
> > +#define ZMM_B(n) _b[63 - (n)]
> > +#define ZMM_W(n) _w[31 - (n)]
> > +#define ZMM_L(n) _l[15 - (n)]
> > +#define ZMM_S(n) _s[15 - (n)]
> > +#define ZMM_Q(n) _q[7 - (n)]
> > +#define ZMM_D(n) _d[7 - (n)]
> > +
> > +#define YMM_B(n) _b[31 - (n)]
> > +#define YMM_W(n) _w[15 - (n)]
> > +#define YMM_L(n) _l[7 - (n)]
> > +#define YMM_S(n) _s[7 - (n)]
> > +#define YMM_Q(n) _q[3 - (n)]
> > +#define YMM_D(n) _d[3 - (n)]
> > +
> > #define XMM_B(n) _b[15 - (n)]
> > #define XMM_W(n) _w[7 - (n)]
> > #define XMM_L(n) _l[3 - (n)]
> > @@ -737,6 +776,20 @@ typedef struct BNDCSReg {
> > #define MMX_L(n) _l[1 - (n)]
> > #define MMX_S(n) _s[1 - (n)]
> > #else
> > +#define ZMM_B(n) _b[n]
> > +#define ZMM_W(n) _w[n]
> > +#define ZMM_L(n) _l[n]
> > +#define ZMM_S(n) _s[n]
> > +#define ZMM_Q(n) _q[n]
> > +#define ZMM_D(n) _d[n]
> > +
> > +#define YMM_B(n) _b[n]
> > +#define YMM_W(n) _w[n]
> > +#define YMM_L(n) _l[n]
> > +#define YMM_S(n) _s[n]
> > +#define YMM_Q(n) _q[n]
> > +#define YMM_D(n) _d[n]
> > +
>
> I am probably not being able to see some future use case of those data
> structures, but: why all the extra complexity here, if only ZMM_Q and
> YMM_Q are being used in the code, and the only place affected by the
> ordering of YMMReg and ZMMReg array elements are the memcpy() calls on
> kvm_{put,get}_xsave(), where the data always have the same layout?
>
Thanks Eduardo, then I feel comfortable to drop most of these macros and
only keep YMM_Q/ZMM_Q left. As no acutal benefit for ordering, then I
will also make these two endiness-insensitive.
Chao
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Qemu-devel] [PATCH] target-i386: add Intel AVX-512 support
2014-10-24 1:27 ` Chao Peng
@ 2014-10-24 5:55 ` Paolo Bonzini
2014-10-24 11:12 ` Eduardo Habkost
2014-10-27 15:48 ` Eduardo Habkost
0 siblings, 2 replies; 16+ messages in thread
From: Paolo Bonzini @ 2014-10-24 5:55 UTC (permalink / raw)
To: Chao Peng, Eduardo Habkost
Cc: kvm, Michael S. Tsirkin, Marcelo Tosatti, qemu-devel,
Vadim Rozenfeld, Laszlo Ersek, Andreas Färber
On 10/24/2014 03:27 AM, Chao Peng wrote:
> On Thu, Oct 23, 2014 at 05:49:23PM -0200, Eduardo Habkost wrote:
>> On Thu, Oct 23, 2014 at 11:02:43AM +0800, Chao Peng wrote:
>> [...]
>>> @@ -707,6 +714,24 @@ typedef union {
>>> } XMMReg;
>>>
>>> typedef union {
>>> + uint8_t _b[32];
>>> + uint16_t _w[16];
>>> + uint32_t _l[8];
>>> + uint64_t _q[4];
>>> + float32 _s[8];
>>> + float64 _d[4];
>>> +} YMMReg;
>>> +
>>> +typedef union {
>>> + uint8_t _b[64];
>>> + uint16_t _w[32];
>>> + uint32_t _l[16];
>>> + uint64_t _q[8];
>>> + float32 _s[16];
>>> + float64 _d[8];
>>> +} ZMMReg;
>>> +
>>> +typedef union {
>>> uint8_t _b[8];
>>> uint16_t _w[4];
>>> uint32_t _l[2];
>>> @@ -725,6 +750,20 @@ typedef struct BNDCSReg {
>>> } BNDCSReg;
>>>
>>> #ifdef HOST_WORDS_BIGENDIAN
>>> +#define ZMM_B(n) _b[63 - (n)]
>>> +#define ZMM_W(n) _w[31 - (n)]
>>> +#define ZMM_L(n) _l[15 - (n)]
>>> +#define ZMM_S(n) _s[15 - (n)]
>>> +#define ZMM_Q(n) _q[7 - (n)]
>>> +#define ZMM_D(n) _d[7 - (n)]
>>> +
>>> +#define YMM_B(n) _b[31 - (n)]
>>> +#define YMM_W(n) _w[15 - (n)]
>>> +#define YMM_L(n) _l[7 - (n)]
>>> +#define YMM_S(n) _s[7 - (n)]
>>> +#define YMM_Q(n) _q[3 - (n)]
>>> +#define YMM_D(n) _d[3 - (n)]
>>> +
>>> #define XMM_B(n) _b[15 - (n)]
>>> #define XMM_W(n) _w[7 - (n)]
>>> #define XMM_L(n) _l[3 - (n)]
>>> @@ -737,6 +776,20 @@ typedef struct BNDCSReg {
>>> #define MMX_L(n) _l[1 - (n)]
>>> #define MMX_S(n) _s[1 - (n)]
>>> #else
>>> +#define ZMM_B(n) _b[n]
>>> +#define ZMM_W(n) _w[n]
>>> +#define ZMM_L(n) _l[n]
>>> +#define ZMM_S(n) _s[n]
>>> +#define ZMM_Q(n) _q[n]
>>> +#define ZMM_D(n) _d[n]
>>> +
>>> +#define YMM_B(n) _b[n]
>>> +#define YMM_W(n) _w[n]
>>> +#define YMM_L(n) _l[n]
>>> +#define YMM_S(n) _s[n]
>>> +#define YMM_Q(n) _q[n]
>>> +#define YMM_D(n) _d[n]
>>> +
>>
>> I am probably not being able to see some future use case of those data
>> structures, but: why all the extra complexity here, if only ZMM_Q and
>> YMM_Q are being used in the code, and the only place affected by the
>> ordering of YMMReg and ZMMReg array elements are the memcpy() calls on
>> kvm_{put,get}_xsave(), where the data always have the same layout?
>>
>
> Thanks Eduardo, then I feel comfortable to drop most of these macros and
> only keep YMM_Q/ZMM_Q left. As no acutal benefit for ordering, then I
> will also make these two endiness-insensitive.
I think we can keep the macros. The actual cleanup would be to have a
single member for the 32 512-bit ZMM registers, instead of splitting
xmm/ymmh/zmmh/zmm_hi16. This will get rid of the YMM_* and ZMM_*
registers. However, we could not use simple memcpy()s to marshal in and
out of the XSAVE data. We can do it in 2.2.
Paolo
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Qemu-devel] [PATCH] target-i386: add Intel AVX-512 support
2014-10-24 5:55 ` Paolo Bonzini
@ 2014-10-24 11:12 ` Eduardo Habkost
2014-10-24 11:38 ` Paolo Bonzini
2014-10-27 15:48 ` Eduardo Habkost
1 sibling, 1 reply; 16+ messages in thread
From: Eduardo Habkost @ 2014-10-24 11:12 UTC (permalink / raw)
To: Paolo Bonzini
Cc: kvm, Michael S. Tsirkin, Marcelo Tosatti, qemu-devel,
Vadim Rozenfeld, Chao Peng, Laszlo Ersek, Andreas Färber
On Fri, Oct 24, 2014 at 07:55:10AM +0200, Paolo Bonzini wrote:
>
>
> On 10/24/2014 03:27 AM, Chao Peng wrote:
> > On Thu, Oct 23, 2014 at 05:49:23PM -0200, Eduardo Habkost wrote:
> >> On Thu, Oct 23, 2014 at 11:02:43AM +0800, Chao Peng wrote:
> >> [...]
> >>> @@ -707,6 +714,24 @@ typedef union {
> >>> } XMMReg;
> >>>
> >>> typedef union {
> >>> + uint8_t _b[32];
> >>> + uint16_t _w[16];
> >>> + uint32_t _l[8];
> >>> + uint64_t _q[4];
> >>> + float32 _s[8];
> >>> + float64 _d[4];
> >>> +} YMMReg;
> >>> +
> >>> +typedef union {
> >>> + uint8_t _b[64];
> >>> + uint16_t _w[32];
> >>> + uint32_t _l[16];
> >>> + uint64_t _q[8];
> >>> + float32 _s[16];
> >>> + float64 _d[8];
> >>> +} ZMMReg;
> >>> +
> >>> +typedef union {
> >>> uint8_t _b[8];
> >>> uint16_t _w[4];
> >>> uint32_t _l[2];
> >>> @@ -725,6 +750,20 @@ typedef struct BNDCSReg {
> >>> } BNDCSReg;
> >>>
> >>> #ifdef HOST_WORDS_BIGENDIAN
> >>> +#define ZMM_B(n) _b[63 - (n)]
> >>> +#define ZMM_W(n) _w[31 - (n)]
> >>> +#define ZMM_L(n) _l[15 - (n)]
> >>> +#define ZMM_S(n) _s[15 - (n)]
> >>> +#define ZMM_Q(n) _q[7 - (n)]
> >>> +#define ZMM_D(n) _d[7 - (n)]
> >>> +
> >>> +#define YMM_B(n) _b[31 - (n)]
> >>> +#define YMM_W(n) _w[15 - (n)]
> >>> +#define YMM_L(n) _l[7 - (n)]
> >>> +#define YMM_S(n) _s[7 - (n)]
> >>> +#define YMM_Q(n) _q[3 - (n)]
> >>> +#define YMM_D(n) _d[3 - (n)]
> >>> +
> >>> #define XMM_B(n) _b[15 - (n)]
> >>> #define XMM_W(n) _w[7 - (n)]
> >>> #define XMM_L(n) _l[3 - (n)]
> >>> @@ -737,6 +776,20 @@ typedef struct BNDCSReg {
> >>> #define MMX_L(n) _l[1 - (n)]
> >>> #define MMX_S(n) _s[1 - (n)]
> >>> #else
> >>> +#define ZMM_B(n) _b[n]
> >>> +#define ZMM_W(n) _w[n]
> >>> +#define ZMM_L(n) _l[n]
> >>> +#define ZMM_S(n) _s[n]
> >>> +#define ZMM_Q(n) _q[n]
> >>> +#define ZMM_D(n) _d[n]
> >>> +
> >>> +#define YMM_B(n) _b[n]
> >>> +#define YMM_W(n) _w[n]
> >>> +#define YMM_L(n) _l[n]
> >>> +#define YMM_S(n) _s[n]
> >>> +#define YMM_Q(n) _q[n]
> >>> +#define YMM_D(n) _d[n]
> >>> +
> >>
> >> I am probably not being able to see some future use case of those data
> >> structures, but: why all the extra complexity here, if only ZMM_Q and
> >> YMM_Q are being used in the code, and the only place affected by the
> >> ordering of YMMReg and ZMMReg array elements are the memcpy() calls on
> >> kvm_{put,get}_xsave(), where the data always have the same layout?
> >>
> >
> > Thanks Eduardo, then I feel comfortable to drop most of these macros and
> > only keep YMM_Q/ZMM_Q left. As no acutal benefit for ordering, then I
> > will also make these two endiness-insensitive.
>
> I think we can keep the macros. The actual cleanup would be to have a
> single member for the 32 512-bit ZMM registers, instead of splitting
> xmm/ymmh/zmmh/zmm_hi16. This will get rid of the YMM_* and ZMM_*
> registers. However, we could not use simple memcpy()s to marshal in and
> out of the XSAVE data.
Agreed. I don't mind keeping those macros in this patch, as this is just
following the existing conventions in the code. Whatever we do to clean
that up, we can do it later.
> We can do it in 2.2.
You mean 2.3, or do you want to clean that up in this release?
--
Eduardo
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Qemu-devel] [PATCH] target-i386: add Intel AVX-512 support
2014-10-24 11:12 ` Eduardo Habkost
@ 2014-10-24 11:38 ` Paolo Bonzini
0 siblings, 0 replies; 16+ messages in thread
From: Paolo Bonzini @ 2014-10-24 11:38 UTC (permalink / raw)
To: Eduardo Habkost
Cc: kvm, Michael S. Tsirkin, Marcelo Tosatti, qemu-devel,
Vadim Rozenfeld, Chao Peng, Laszlo Ersek, Andreas Färber
On 10/24/2014 01:12 PM, Eduardo Habkost wrote:
>> > I think we can keep the macros. The actual cleanup would be to have a
>> > single member for the 32 512-bit ZMM registers, instead of splitting
>> > xmm/ymmh/zmmh/zmm_hi16. This will get rid of the YMM_* and ZMM_*
>> > registers. However, we could not use simple memcpy()s to marshal in and
>> > out of the XSAVE data.
> Agreed. I don't mind keeping those macros in this patch, as this is just
> following the existing conventions in the code. Whatever we do to clean
> that up, we can do it later.
>
>> > We can do it in 2.2.
> You mean 2.3, or do you want to clean that up in this release?
I mean 2.3.
Paolo
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Qemu-devel] [PATCH] target-i386: add Intel AVX-512 support
2014-10-24 5:55 ` Paolo Bonzini
2014-10-24 11:12 ` Eduardo Habkost
@ 2014-10-27 15:48 ` Eduardo Habkost
2014-10-27 15:53 ` Paolo Bonzini
1 sibling, 1 reply; 16+ messages in thread
From: Eduardo Habkost @ 2014-10-27 15:48 UTC (permalink / raw)
To: Paolo Bonzini
Cc: kvm, Michael S. Tsirkin, Marcelo Tosatti, qemu-devel,
Vadim Rozenfeld, Chao Peng, Laszlo Ersek, Andreas Färber
On Fri, Oct 24, 2014 at 07:55:10AM +0200, Paolo Bonzini wrote:
[...]
> I think we can keep the macros. The actual cleanup would be to have a
> single member for the 32 512-bit ZMM registers, instead of splitting
> xmm/ymmh/zmmh/zmm_hi16. This will get rid of the YMM_* and ZMM_*
> registers. However, we could not use simple memcpy()s to marshal in and
> out of the XSAVE data. We can do it in 2.2.
Now, about the actual 2.3 plans: I believe the xmm/ymmh/zmmh/zmm_hi16
separation actually makes the code simpler, because the only code
touching those registers inside QEMU is xsave/vmstate state
loading/saving, where the bits of the ZMM registers are split into
different sections/fields exactly in the same way.
We can still eliminate the YMM_* and ZMM_* macros while keeping
xmm/ymmh/zmmh/zmm_hi16 as separate fields, and having a single zmm_regs
member would just add unnecessary complexity to the
XSAVE<->CPUX86State<->VMState translation code.
(But if one day TCG starts implementing AVX or AVX512, then it may be
worthwhile to move the ZMM registers into a single field.)
--
Eduardo
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Qemu-devel] [PATCH] target-i386: add Intel AVX-512 support
2014-10-27 15:48 ` Eduardo Habkost
@ 2014-10-27 15:53 ` Paolo Bonzini
0 siblings, 0 replies; 16+ messages in thread
From: Paolo Bonzini @ 2014-10-27 15:53 UTC (permalink / raw)
To: Eduardo Habkost
Cc: kvm, Michael S. Tsirkin, Marcelo Tosatti, qemu-devel,
Vadim Rozenfeld, Chao Peng, Laszlo Ersek, Andreas Färber
On 10/27/2014 04:48 PM, Eduardo Habkost wrote:
> On Fri, Oct 24, 2014 at 07:55:10AM +0200, Paolo Bonzini wrote:
> [...]
>> I think we can keep the macros. The actual cleanup would be to have a
>> single member for the 32 512-bit ZMM registers, instead of splitting
>> xmm/ymmh/zmmh/zmm_hi16. This will get rid of the YMM_* and ZMM_*
>> registers. However, we could not use simple memcpy()s to marshal in and
>> out of the XSAVE data. We can do it in 2.2.
>
> Now, about the actual 2.3 plans: I believe the xmm/ymmh/zmmh/zmm_hi16
> separation actually makes the code simpler, because the only code
> touching those registers inside QEMU is xsave/vmstate state
> loading/saving, where the bits of the ZMM registers are split into
> different sections/fields exactly in the same way.
>
> We can still eliminate the YMM_* and ZMM_* macros while keeping
> xmm/ymmh/zmmh/zmm_hi16 as separate fields, and having a single zmm_regs
> member would just add unnecessary complexity to the
> XSAVE<->CPUX86State<->VMState translation code.
>
> (But if one day TCG starts implementing AVX or AVX512, then it may be
> worthwhile to move the ZMM registers into a single field.)
Yes, exactly---or even sooner, if we start dumping the 256 or 512 byte
registers. I think the current separation of xmm/ymmh/zmmh/zmm_hi16 is
a KVM-ism.
Let's see what the patches look like, once they are ready.
Paolo
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Qemu-devel] [PATCH] target-i386: add Intel AVX-512 support
2014-10-23 3:02 [Qemu-devel] [PATCH] target-i386: add Intel AVX-512 support Chao Peng
2014-10-23 14:34 ` Paolo Bonzini
2014-10-23 19:49 ` Eduardo Habkost
@ 2014-10-24 16:01 ` Eduardo Habkost
2014-10-27 2:07 ` Chao Peng
2014-11-02 10:19 ` Michael S. Tsirkin
3 siblings, 1 reply; 16+ messages in thread
From: Eduardo Habkost @ 2014-10-24 16:01 UTC (permalink / raw)
To: Chao Peng
Cc: kvm, Michael S. Tsirkin, Marcelo Tosatti, qemu-devel,
Vadim Rozenfeld, Paolo Bonzini, Laszlo Ersek, Andreas Färber
On Thu, Oct 23, 2014 at 11:02:43AM +0800, Chao Peng wrote:
> Add AVX512 feature bits, register definition and corresponding
> xsave/vmstate support.
>
> Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> ---
[...]
> @@ -745,6 +829,9 @@ VMStateDescription vmstate_x86_cpu = {
> }, {
> .vmsd = &vmstate_msr_hyperv_time,
> .needed = hyperv_time_enable_needed,
> + }, {
> + .vmsd = &vmstate_avx512,
> + .needed = avx512_needed,
> } , {
> /* empty */
The tab character above needs to be replaced by spaces, but the rest
looks good and matches the Intel docs.
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
--
Eduardo
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Qemu-devel] [PATCH] target-i386: add Intel AVX-512 support
2014-10-24 16:01 ` Eduardo Habkost
@ 2014-10-27 2:07 ` Chao Peng
0 siblings, 0 replies; 16+ messages in thread
From: Chao Peng @ 2014-10-27 2:07 UTC (permalink / raw)
To: Eduardo Habkost, Paolo Bonzini
Cc: kvm, Michael S. Tsirkin, Marcelo Tosatti, qemu-devel,
Vadim Rozenfeld, Laszlo Ersek, Andreas Färber
On Fri, Oct 24, 2014 at 02:01:44PM -0200, Eduardo Habkost wrote:
> On Thu, Oct 23, 2014 at 11:02:43AM +0800, Chao Peng wrote:
> > Add AVX512 feature bits, register definition and corresponding
> > xsave/vmstate support.
> >
> > Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> > ---
> [...]
I agree to the cleanup scheme paolo suggested.
And thanks Eduardo for going thru the spec and double checking for
this.
> > @@ -745,6 +829,9 @@ VMStateDescription vmstate_x86_cpu = {
> > }, {
> > .vmsd = &vmstate_msr_hyperv_time,
> > .needed = hyperv_time_enable_needed,
> > + }, {
> > + .vmsd = &vmstate_avx512,
> > + .needed = avx512_needed,
> > } , {
> > /* empty */
>
> The tab character above needs to be replaced by spaces, but the rest
> looks good and matches the Intel docs.
>
> Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
If this is the only issue, paolo, could you help on this when you
commit? Thank you.
Chao
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Qemu-devel] [PATCH] target-i386: add Intel AVX-512 support
2014-10-23 3:02 [Qemu-devel] [PATCH] target-i386: add Intel AVX-512 support Chao Peng
` (2 preceding siblings ...)
2014-10-24 16:01 ` Eduardo Habkost
@ 2014-11-02 10:19 ` Michael S. Tsirkin
2014-11-03 1:53 ` Chao Peng
2014-11-03 11:31 ` Paolo Bonzini
3 siblings, 2 replies; 16+ messages in thread
From: Michael S. Tsirkin @ 2014-11-02 10:19 UTC (permalink / raw)
To: Chao Peng
Cc: kvm, Marcelo Tosatti, qemu-devel, Vadim Rozenfeld, Paolo Bonzini,
Laszlo Ersek, Andreas Färber
On Thu, Oct 23, 2014 at 11:02:43AM +0800, Chao Peng wrote:
> Add AVX512 feature bits, register definition and corresponding
> xsave/vmstate support.
>
> Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
Thanks!
As this was first posted after soft freeze, please
resubmit after 2.2 is out.
See schedule http://wiki.qemu.org/Planning/2.2
> ---
> target-i386/cpu.c | 10 ++++--
> target-i386/cpu.h | 61 ++++++++++++++++++++++++++++++++++
> target-i386/kvm.c | 19 +++++++++++
> target-i386/machine.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 175 insertions(+), 2 deletions(-)
>
> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> index e7bf9de..e91bfbd 100644
> --- a/target-i386/cpu.c
> +++ b/target-i386/cpu.c
> @@ -259,8 +259,8 @@ static const char *svm_feature_name[] = {
> static const char *cpuid_7_0_ebx_feature_name[] = {
> "fsgsbase", "tsc_adjust", NULL, "bmi1", "hle", "avx2", NULL, "smep",
> "bmi2", "erms", "invpcid", "rtm", NULL, NULL, "mpx", NULL,
> - NULL, NULL, "rdseed", "adx", "smap", NULL, NULL, NULL,
> - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
> + "avx512f", NULL, "rdseed", "adx", "smap", NULL, NULL, NULL,
> + NULL, NULL, "avx512pf", "avx512er", "avx512cd", NULL, NULL, NULL,
> };
>
> static const char *cpuid_apm_edx_feature_name[] = {
> @@ -426,6 +426,12 @@ static const ExtSaveArea ext_save_areas[] = {
> .offset = 0x3c0, .size = 0x40 },
> [4] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX,
> .offset = 0x400, .size = 0x40 },
> + [5] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
> + .offset = 0x440, .size = 0x40 },
> + [6] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
> + .offset = 0x480, .size = 0x200 },
> + [7] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
> + .offset = 0x680, .size = 0x400 },
> };
>
> const char *get_register_name_32(unsigned int reg)
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index 2968749..9f01831 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -395,6 +395,9 @@
> #define XSTATE_YMM (1ULL << 2)
> #define XSTATE_BNDREGS (1ULL << 3)
> #define XSTATE_BNDCSR (1ULL << 4)
> +#define XSTATE_OPMASK (1ULL << 5)
> +#define XSTATE_ZMM_Hi256 (1ULL << 6)
> +#define XSTATE_Hi16_ZMM (1ULL << 7)
>
>
> /* CPUID feature words */
> @@ -560,9 +563,13 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
> #define CPUID_7_0_EBX_INVPCID (1U << 10)
> #define CPUID_7_0_EBX_RTM (1U << 11)
> #define CPUID_7_0_EBX_MPX (1U << 14)
> +#define CPUID_7_0_EBX_AVX512F (1U << 16) /* AVX-512 Foundation */
> #define CPUID_7_0_EBX_RDSEED (1U << 18)
> #define CPUID_7_0_EBX_ADX (1U << 19)
> #define CPUID_7_0_EBX_SMAP (1U << 20)
> +#define CPUID_7_0_EBX_AVX512PF (1U << 26) /* AVX-512 Prefetch */
> +#define CPUID_7_0_EBX_AVX512ER (1U << 27) /* AVX-512 Exponential and Reciprocal */
> +#define CPUID_7_0_EBX_AVX512CD (1U << 28) /* AVX-512 Conflict Detection */
>
> /* CPUID[0x80000007].EDX flags: */
> #define CPUID_APM_INVTSC (1U << 8)
> @@ -707,6 +714,24 @@ typedef union {
> } XMMReg;
>
> typedef union {
> + uint8_t _b[32];
> + uint16_t _w[16];
> + uint32_t _l[8];
> + uint64_t _q[4];
> + float32 _s[8];
> + float64 _d[4];
> +} YMMReg;
> +
> +typedef union {
> + uint8_t _b[64];
> + uint16_t _w[32];
> + uint32_t _l[16];
> + uint64_t _q[8];
> + float32 _s[16];
> + float64 _d[8];
> +} ZMMReg;
> +
> +typedef union {
> uint8_t _b[8];
> uint16_t _w[4];
> uint32_t _l[2];
> @@ -725,6 +750,20 @@ typedef struct BNDCSReg {
> } BNDCSReg;
>
> #ifdef HOST_WORDS_BIGENDIAN
> +#define ZMM_B(n) _b[63 - (n)]
> +#define ZMM_W(n) _w[31 - (n)]
> +#define ZMM_L(n) _l[15 - (n)]
> +#define ZMM_S(n) _s[15 - (n)]
> +#define ZMM_Q(n) _q[7 - (n)]
> +#define ZMM_D(n) _d[7 - (n)]
> +
> +#define YMM_B(n) _b[31 - (n)]
> +#define YMM_W(n) _w[15 - (n)]
> +#define YMM_L(n) _l[7 - (n)]
> +#define YMM_S(n) _s[7 - (n)]
> +#define YMM_Q(n) _q[3 - (n)]
> +#define YMM_D(n) _d[3 - (n)]
> +
> #define XMM_B(n) _b[15 - (n)]
> #define XMM_W(n) _w[7 - (n)]
> #define XMM_L(n) _l[3 - (n)]
> @@ -737,6 +776,20 @@ typedef struct BNDCSReg {
> #define MMX_L(n) _l[1 - (n)]
> #define MMX_S(n) _s[1 - (n)]
> #else
> +#define ZMM_B(n) _b[n]
> +#define ZMM_W(n) _w[n]
> +#define ZMM_L(n) _l[n]
> +#define ZMM_S(n) _s[n]
> +#define ZMM_Q(n) _q[n]
> +#define ZMM_D(n) _d[n]
> +
> +#define YMM_B(n) _b[n]
> +#define YMM_W(n) _w[n]
> +#define YMM_L(n) _l[n]
> +#define YMM_S(n) _s[n]
> +#define YMM_Q(n) _q[n]
> +#define YMM_D(n) _d[n]
> +
> #define XMM_B(n) _b[n]
> #define XMM_W(n) _w[n]
> #define XMM_L(n) _l[n]
> @@ -775,6 +828,8 @@ typedef struct {
>
> #define NB_MMU_MODES 3
>
> +#define NB_OPMASK_REGS 8
> +
> typedef enum TPRAccess {
> TPR_ACCESS_READ,
> TPR_ACCESS_WRITE,
> @@ -839,6 +894,12 @@ typedef struct CPUX86State {
>
> XMMReg ymmh_regs[CPU_NB_REGS];
>
> + uint64_t opmask_regs[NB_OPMASK_REGS];
> + YMMReg zmmh_regs[CPU_NB_REGS];
> +#ifdef TARGET_X86_64
> + ZMMReg hi16_zmm_regs[CPU_NB_REGS];
> +#endif
> +
> /* sysenter registers */
> uint32_t sysenter_cs;
> target_ulong sysenter_esp;
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index ddedc73..ccf36e8 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -1031,6 +1031,9 @@ static int kvm_put_fpu(X86CPU *cpu)
> #define XSAVE_YMMH_SPACE 144
> #define XSAVE_BNDREGS 240
> #define XSAVE_BNDCSR 256
> +#define XSAVE_OPMASK 272
> +#define XSAVE_ZMM_Hi256 288
> +#define XSAVE_Hi16_ZMM 416
>
> static int kvm_put_xsave(X86CPU *cpu)
> {
> @@ -1067,6 +1070,14 @@ static int kvm_put_xsave(X86CPU *cpu)
> sizeof env->bnd_regs);
> memcpy(&xsave->region[XSAVE_BNDCSR], &env->bndcs_regs,
> sizeof(env->bndcs_regs));
> + memcpy(&xsave->region[XSAVE_OPMASK], env->opmask_regs,
> + sizeof env->opmask_regs);
> + memcpy(&xsave->region[XSAVE_ZMM_Hi256], env->zmmh_regs,
> + sizeof env->zmmh_regs);
> +#ifdef TARGET_X86_64
> + memcpy(&xsave->region[XSAVE_Hi16_ZMM], env->hi16_zmm_regs,
> + sizeof env->hi16_zmm_regs);
> +#endif
> r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave);
> return r;
> }
> @@ -1402,6 +1413,14 @@ static int kvm_get_xsave(X86CPU *cpu)
> sizeof env->bnd_regs);
> memcpy(&env->bndcs_regs, &xsave->region[XSAVE_BNDCSR],
> sizeof(env->bndcs_regs));
> + memcpy(env->opmask_regs, &xsave->region[XSAVE_OPMASK],
> + sizeof env->opmask_regs);
> + memcpy(env->zmmh_regs, &xsave->region[XSAVE_ZMM_Hi256],
> + sizeof env->zmmh_regs);
> +#ifdef TARGET_X86_64
> + memcpy(env->hi16_zmm_regs, &xsave->region[XSAVE_Hi16_ZMM],
> + sizeof env->hi16_zmm_regs);
> +#endif
> return 0;
> }
>
> diff --git a/target-i386/machine.c b/target-i386/machine.c
> index 0dd49f0..708fc54 100644
> --- a/target-i386/machine.c
> +++ b/target-i386/machine.c
> @@ -60,6 +60,44 @@ static const VMStateDescription vmstate_ymmh_reg = {
> #define VMSTATE_YMMH_REGS_VARS(_field, _state, _n, _v) \
> VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_ymmh_reg, XMMReg)
>
> +static const VMStateDescription vmstate_zmmh_reg = {
> + .name = "zmmh_reg",
> + .version_id = 1,
> + .minimum_version_id = 1,
> + .fields = (VMStateField[]) {
> + VMSTATE_UINT64(YMM_Q(0), YMMReg),
> + VMSTATE_UINT64(YMM_Q(1), YMMReg),
> + VMSTATE_UINT64(YMM_Q(2), YMMReg),
> + VMSTATE_UINT64(YMM_Q(3), YMMReg),
> + VMSTATE_END_OF_LIST()
> + }
> +};
> +
> +#define VMSTATE_ZMMH_REGS_VARS(_field, _state, _n) \
> + VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_zmmh_reg, YMMReg)
> +
> +#ifdef TARGET_X86_64
> +static const VMStateDescription vmstate_hi16_zmm_reg = {
> + .name = "hi16_zmm_reg",
> + .version_id = 1,
> + .minimum_version_id = 1,
> + .fields = (VMStateField[]) {
> + VMSTATE_UINT64(ZMM_Q(0), ZMMReg),
> + VMSTATE_UINT64(ZMM_Q(1), ZMMReg),
> + VMSTATE_UINT64(ZMM_Q(2), ZMMReg),
> + VMSTATE_UINT64(ZMM_Q(3), ZMMReg),
> + VMSTATE_UINT64(ZMM_Q(4), ZMMReg),
> + VMSTATE_UINT64(ZMM_Q(5), ZMMReg),
> + VMSTATE_UINT64(ZMM_Q(6), ZMMReg),
> + VMSTATE_UINT64(ZMM_Q(7), ZMMReg),
> + VMSTATE_END_OF_LIST()
> + }
> +};
> +
> +#define VMSTATE_Hi16_ZMM_REGS_VARS(_field, _state, _n) \
> + VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_hi16_zmm_reg, ZMMReg)
> +#endif
> +
> static const VMStateDescription vmstate_bnd_regs = {
> .name = "bnd_regs",
> .version_id = 1,
> @@ -603,6 +641,52 @@ static const VMStateDescription vmstate_msr_hyperv_time = {
> }
> };
>
> +static bool avx512_needed(void *opaque)
> +{
> + X86CPU *cpu = opaque;
> + CPUX86State *env = &cpu->env;
> + unsigned int i;
> +
> + for (i = 0; i < NB_OPMASK_REGS; i++) {
> + if (env->opmask_regs[i]) {
> + return true;
> + }
> + }
> +
> + for (i = 0; i < CPU_NB_REGS; i++) {
> +#define ENV_ZMMH(reg, field) (env->zmmh_regs[reg].YMM_Q(field))
> + if (ENV_ZMMH(i, 0) || ENV_ZMMH(i, 1) ||
> + ENV_ZMMH(i, 2) || ENV_ZMMH(i, 3)) {
> + return true;
> + }
> +#ifdef TARGET_X86_64
> +#define ENV_Hi16_ZMM(reg, field) (env->hi16_zmm_regs[reg].ZMM_Q(field))
> + if (ENV_Hi16_ZMM(i, 0) || ENV_Hi16_ZMM(i, 1) ||
> + ENV_Hi16_ZMM(i, 2) || ENV_Hi16_ZMM(i, 3) ||
> + ENV_Hi16_ZMM(i, 4) || ENV_Hi16_ZMM(i, 5) ||
> + ENV_Hi16_ZMM(i, 6) || ENV_Hi16_ZMM(i, 7)) {
> + return true;
> + }
> +#endif
> + }
> +
> + return false;
> +}
> +
> +static const VMStateDescription vmstate_avx512 = {
> + .name = "cpu/avx512",
> + .version_id = 1,
> + .minimum_version_id = 1,
> + .fields = (VMStateField[]) {
> + VMSTATE_UINT64_ARRAY(env.opmask_regs, X86CPU, NB_OPMASK_REGS),
> + VMSTATE_ZMMH_REGS_VARS(env.zmmh_regs, X86CPU, CPU_NB_REGS),
> +#ifdef TARGET_X86_64
> + VMSTATE_Hi16_ZMM_REGS_VARS(env.hi16_zmm_regs, X86CPU, CPU_NB_REGS),
> +#endif
> + VMSTATE_END_OF_LIST()
> + }
> +};
> +
> VMStateDescription vmstate_x86_cpu = {
> .name = "cpu",
> .version_id = 12,
> @@ -745,6 +829,9 @@ VMStateDescription vmstate_x86_cpu = {
> }, {
> .vmsd = &vmstate_msr_hyperv_time,
> .needed = hyperv_time_enable_needed,
> + }, {
> + .vmsd = &vmstate_avx512,
> + .needed = avx512_needed,
> } , {
> /* empty */
> }
> --
> 1.7.9.5
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Qemu-devel] [PATCH] target-i386: add Intel AVX-512 support
2014-11-02 10:19 ` Michael S. Tsirkin
@ 2014-11-03 1:53 ` Chao Peng
2014-11-03 11:31 ` Paolo Bonzini
1 sibling, 0 replies; 16+ messages in thread
From: Chao Peng @ 2014-11-03 1:53 UTC (permalink / raw)
To: Michael S. Tsirkin, Paolo Bonzini
Cc: kvm, Marcelo Tosatti, qemu-devel, Vadim Rozenfeld, Laszlo Ersek,
Andreas Färber
On Sun, Nov 02, 2014 at 12:19:09PM +0200, Michael S. Tsirkin wrote:
> On Thu, Oct 23, 2014 at 11:02:43AM +0800, Chao Peng wrote:
> > Add AVX512 feature bits, register definition and corresponding
> > xsave/vmstate support.
> >
> > Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
>
> Thanks!
>
> As this was first posted after soft freeze, please
> resubmit after 2.2 is out.
>
> See schedule http://wiki.qemu.org/Planning/2.2
Sorry I didn't notice this. But I think Paolo already merged this patch
and it's now in the qemu main tree(9aecd6f). So what else I can do for this?
Chao
>
> > ---
> > target-i386/cpu.c | 10 ++++--
> > target-i386/cpu.h | 61 ++++++++++++++++++++++++++++++++++
> > target-i386/kvm.c | 19 +++++++++++
> > target-i386/machine.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++
> > 4 files changed, 175 insertions(+), 2 deletions(-)
> >
> > diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> > index e7bf9de..e91bfbd 100644
> > --- a/target-i386/cpu.c
> > +++ b/target-i386/cpu.c
> > @@ -259,8 +259,8 @@ static const char *svm_feature_name[] = {
> > static const char *cpuid_7_0_ebx_feature_name[] = {
> > "fsgsbase", "tsc_adjust", NULL, "bmi1", "hle", "avx2", NULL, "smep",
> > "bmi2", "erms", "invpcid", "rtm", NULL, NULL, "mpx", NULL,
> > - NULL, NULL, "rdseed", "adx", "smap", NULL, NULL, NULL,
> > - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
> > + "avx512f", NULL, "rdseed", "adx", "smap", NULL, NULL, NULL,
> > + NULL, NULL, "avx512pf", "avx512er", "avx512cd", NULL, NULL, NULL,
> > };
> >
> > static const char *cpuid_apm_edx_feature_name[] = {
> > @@ -426,6 +426,12 @@ static const ExtSaveArea ext_save_areas[] = {
> > .offset = 0x3c0, .size = 0x40 },
> > [4] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX,
> > .offset = 0x400, .size = 0x40 },
> > + [5] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
> > + .offset = 0x440, .size = 0x40 },
> > + [6] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
> > + .offset = 0x480, .size = 0x200 },
> > + [7] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
> > + .offset = 0x680, .size = 0x400 },
> > };
> >
> > const char *get_register_name_32(unsigned int reg)
> > diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> > index 2968749..9f01831 100644
> > --- a/target-i386/cpu.h
> > +++ b/target-i386/cpu.h
> > @@ -395,6 +395,9 @@
> > #define XSTATE_YMM (1ULL << 2)
> > #define XSTATE_BNDREGS (1ULL << 3)
> > #define XSTATE_BNDCSR (1ULL << 4)
> > +#define XSTATE_OPMASK (1ULL << 5)
> > +#define XSTATE_ZMM_Hi256 (1ULL << 6)
> > +#define XSTATE_Hi16_ZMM (1ULL << 7)
> >
> >
> > /* CPUID feature words */
> > @@ -560,9 +563,13 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
> > #define CPUID_7_0_EBX_INVPCID (1U << 10)
> > #define CPUID_7_0_EBX_RTM (1U << 11)
> > #define CPUID_7_0_EBX_MPX (1U << 14)
> > +#define CPUID_7_0_EBX_AVX512F (1U << 16) /* AVX-512 Foundation */
> > #define CPUID_7_0_EBX_RDSEED (1U << 18)
> > #define CPUID_7_0_EBX_ADX (1U << 19)
> > #define CPUID_7_0_EBX_SMAP (1U << 20)
> > +#define CPUID_7_0_EBX_AVX512PF (1U << 26) /* AVX-512 Prefetch */
> > +#define CPUID_7_0_EBX_AVX512ER (1U << 27) /* AVX-512 Exponential and Reciprocal */
> > +#define CPUID_7_0_EBX_AVX512CD (1U << 28) /* AVX-512 Conflict Detection */
> >
> > /* CPUID[0x80000007].EDX flags: */
> > #define CPUID_APM_INVTSC (1U << 8)
> > @@ -707,6 +714,24 @@ typedef union {
> > } XMMReg;
> >
> > typedef union {
> > + uint8_t _b[32];
> > + uint16_t _w[16];
> > + uint32_t _l[8];
> > + uint64_t _q[4];
> > + float32 _s[8];
> > + float64 _d[4];
> > +} YMMReg;
> > +
> > +typedef union {
> > + uint8_t _b[64];
> > + uint16_t _w[32];
> > + uint32_t _l[16];
> > + uint64_t _q[8];
> > + float32 _s[16];
> > + float64 _d[8];
> > +} ZMMReg;
> > +
> > +typedef union {
> > uint8_t _b[8];
> > uint16_t _w[4];
> > uint32_t _l[2];
> > @@ -725,6 +750,20 @@ typedef struct BNDCSReg {
> > } BNDCSReg;
> >
> > #ifdef HOST_WORDS_BIGENDIAN
> > +#define ZMM_B(n) _b[63 - (n)]
> > +#define ZMM_W(n) _w[31 - (n)]
> > +#define ZMM_L(n) _l[15 - (n)]
> > +#define ZMM_S(n) _s[15 - (n)]
> > +#define ZMM_Q(n) _q[7 - (n)]
> > +#define ZMM_D(n) _d[7 - (n)]
> > +
> > +#define YMM_B(n) _b[31 - (n)]
> > +#define YMM_W(n) _w[15 - (n)]
> > +#define YMM_L(n) _l[7 - (n)]
> > +#define YMM_S(n) _s[7 - (n)]
> > +#define YMM_Q(n) _q[3 - (n)]
> > +#define YMM_D(n) _d[3 - (n)]
> > +
> > #define XMM_B(n) _b[15 - (n)]
> > #define XMM_W(n) _w[7 - (n)]
> > #define XMM_L(n) _l[3 - (n)]
> > @@ -737,6 +776,20 @@ typedef struct BNDCSReg {
> > #define MMX_L(n) _l[1 - (n)]
> > #define MMX_S(n) _s[1 - (n)]
> > #else
> > +#define ZMM_B(n) _b[n]
> > +#define ZMM_W(n) _w[n]
> > +#define ZMM_L(n) _l[n]
> > +#define ZMM_S(n) _s[n]
> > +#define ZMM_Q(n) _q[n]
> > +#define ZMM_D(n) _d[n]
> > +
> > +#define YMM_B(n) _b[n]
> > +#define YMM_W(n) _w[n]
> > +#define YMM_L(n) _l[n]
> > +#define YMM_S(n) _s[n]
> > +#define YMM_Q(n) _q[n]
> > +#define YMM_D(n) _d[n]
> > +
> > #define XMM_B(n) _b[n]
> > #define XMM_W(n) _w[n]
> > #define XMM_L(n) _l[n]
> > @@ -775,6 +828,8 @@ typedef struct {
> >
> > #define NB_MMU_MODES 3
> >
> > +#define NB_OPMASK_REGS 8
> > +
> > typedef enum TPRAccess {
> > TPR_ACCESS_READ,
> > TPR_ACCESS_WRITE,
> > @@ -839,6 +894,12 @@ typedef struct CPUX86State {
> >
> > XMMReg ymmh_regs[CPU_NB_REGS];
> >
> > + uint64_t opmask_regs[NB_OPMASK_REGS];
> > + YMMReg zmmh_regs[CPU_NB_REGS];
> > +#ifdef TARGET_X86_64
> > + ZMMReg hi16_zmm_regs[CPU_NB_REGS];
> > +#endif
> > +
> > /* sysenter registers */
> > uint32_t sysenter_cs;
> > target_ulong sysenter_esp;
> > diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> > index ddedc73..ccf36e8 100644
> > --- a/target-i386/kvm.c
> > +++ b/target-i386/kvm.c
> > @@ -1031,6 +1031,9 @@ static int kvm_put_fpu(X86CPU *cpu)
> > #define XSAVE_YMMH_SPACE 144
> > #define XSAVE_BNDREGS 240
> > #define XSAVE_BNDCSR 256
> > +#define XSAVE_OPMASK 272
> > +#define XSAVE_ZMM_Hi256 288
> > +#define XSAVE_Hi16_ZMM 416
> >
> > static int kvm_put_xsave(X86CPU *cpu)
> > {
> > @@ -1067,6 +1070,14 @@ static int kvm_put_xsave(X86CPU *cpu)
> > sizeof env->bnd_regs);
> > memcpy(&xsave->region[XSAVE_BNDCSR], &env->bndcs_regs,
> > sizeof(env->bndcs_regs));
> > + memcpy(&xsave->region[XSAVE_OPMASK], env->opmask_regs,
> > + sizeof env->opmask_regs);
> > + memcpy(&xsave->region[XSAVE_ZMM_Hi256], env->zmmh_regs,
> > + sizeof env->zmmh_regs);
> > +#ifdef TARGET_X86_64
> > + memcpy(&xsave->region[XSAVE_Hi16_ZMM], env->hi16_zmm_regs,
> > + sizeof env->hi16_zmm_regs);
> > +#endif
> > r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave);
> > return r;
> > }
> > @@ -1402,6 +1413,14 @@ static int kvm_get_xsave(X86CPU *cpu)
> > sizeof env->bnd_regs);
> > memcpy(&env->bndcs_regs, &xsave->region[XSAVE_BNDCSR],
> > sizeof(env->bndcs_regs));
> > + memcpy(env->opmask_regs, &xsave->region[XSAVE_OPMASK],
> > + sizeof env->opmask_regs);
> > + memcpy(env->zmmh_regs, &xsave->region[XSAVE_ZMM_Hi256],
> > + sizeof env->zmmh_regs);
> > +#ifdef TARGET_X86_64
> > + memcpy(env->hi16_zmm_regs, &xsave->region[XSAVE_Hi16_ZMM],
> > + sizeof env->hi16_zmm_regs);
> > +#endif
> > return 0;
> > }
> >
> > diff --git a/target-i386/machine.c b/target-i386/machine.c
> > index 0dd49f0..708fc54 100644
> > --- a/target-i386/machine.c
> > +++ b/target-i386/machine.c
> > @@ -60,6 +60,44 @@ static const VMStateDescription vmstate_ymmh_reg = {
> > #define VMSTATE_YMMH_REGS_VARS(_field, _state, _n, _v) \
> > VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_ymmh_reg, XMMReg)
> >
> > +static const VMStateDescription vmstate_zmmh_reg = {
> > + .name = "zmmh_reg",
> > + .version_id = 1,
> > + .minimum_version_id = 1,
> > + .fields = (VMStateField[]) {
> > + VMSTATE_UINT64(YMM_Q(0), YMMReg),
> > + VMSTATE_UINT64(YMM_Q(1), YMMReg),
> > + VMSTATE_UINT64(YMM_Q(2), YMMReg),
> > + VMSTATE_UINT64(YMM_Q(3), YMMReg),
> > + VMSTATE_END_OF_LIST()
> > + }
> > +};
> > +
> > +#define VMSTATE_ZMMH_REGS_VARS(_field, _state, _n) \
> > + VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_zmmh_reg, YMMReg)
> > +
> > +#ifdef TARGET_X86_64
> > +static const VMStateDescription vmstate_hi16_zmm_reg = {
> > + .name = "hi16_zmm_reg",
> > + .version_id = 1,
> > + .minimum_version_id = 1,
> > + .fields = (VMStateField[]) {
> > + VMSTATE_UINT64(ZMM_Q(0), ZMMReg),
> > + VMSTATE_UINT64(ZMM_Q(1), ZMMReg),
> > + VMSTATE_UINT64(ZMM_Q(2), ZMMReg),
> > + VMSTATE_UINT64(ZMM_Q(3), ZMMReg),
> > + VMSTATE_UINT64(ZMM_Q(4), ZMMReg),
> > + VMSTATE_UINT64(ZMM_Q(5), ZMMReg),
> > + VMSTATE_UINT64(ZMM_Q(6), ZMMReg),
> > + VMSTATE_UINT64(ZMM_Q(7), ZMMReg),
> > + VMSTATE_END_OF_LIST()
> > + }
> > +};
> > +
> > +#define VMSTATE_Hi16_ZMM_REGS_VARS(_field, _state, _n) \
> > + VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_hi16_zmm_reg, ZMMReg)
> > +#endif
> > +
> > static const VMStateDescription vmstate_bnd_regs = {
> > .name = "bnd_regs",
> > .version_id = 1,
> > @@ -603,6 +641,52 @@ static const VMStateDescription vmstate_msr_hyperv_time = {
> > }
> > };
> >
> > +static bool avx512_needed(void *opaque)
> > +{
> > + X86CPU *cpu = opaque;
> > + CPUX86State *env = &cpu->env;
> > + unsigned int i;
> > +
> > + for (i = 0; i < NB_OPMASK_REGS; i++) {
> > + if (env->opmask_regs[i]) {
> > + return true;
> > + }
> > + }
> > +
> > + for (i = 0; i < CPU_NB_REGS; i++) {
> > +#define ENV_ZMMH(reg, field) (env->zmmh_regs[reg].YMM_Q(field))
> > + if (ENV_ZMMH(i, 0) || ENV_ZMMH(i, 1) ||
> > + ENV_ZMMH(i, 2) || ENV_ZMMH(i, 3)) {
> > + return true;
> > + }
> > +#ifdef TARGET_X86_64
> > +#define ENV_Hi16_ZMM(reg, field) (env->hi16_zmm_regs[reg].ZMM_Q(field))
> > + if (ENV_Hi16_ZMM(i, 0) || ENV_Hi16_ZMM(i, 1) ||
> > + ENV_Hi16_ZMM(i, 2) || ENV_Hi16_ZMM(i, 3) ||
> > + ENV_Hi16_ZMM(i, 4) || ENV_Hi16_ZMM(i, 5) ||
> > + ENV_Hi16_ZMM(i, 6) || ENV_Hi16_ZMM(i, 7)) {
> > + return true;
> > + }
> > +#endif
> > + }
> > +
> > + return false;
> > +}
> > +
> > +static const VMStateDescription vmstate_avx512 = {
> > + .name = "cpu/avx512",
> > + .version_id = 1,
> > + .minimum_version_id = 1,
> > + .fields = (VMStateField[]) {
> > + VMSTATE_UINT64_ARRAY(env.opmask_regs, X86CPU, NB_OPMASK_REGS),
> > + VMSTATE_ZMMH_REGS_VARS(env.zmmh_regs, X86CPU, CPU_NB_REGS),
> > +#ifdef TARGET_X86_64
> > + VMSTATE_Hi16_ZMM_REGS_VARS(env.hi16_zmm_regs, X86CPU, CPU_NB_REGS),
> > +#endif
> > + VMSTATE_END_OF_LIST()
> > + }
> > +};
> > +
> > VMStateDescription vmstate_x86_cpu = {
> > .name = "cpu",
> > .version_id = 12,
> > @@ -745,6 +829,9 @@ VMStateDescription vmstate_x86_cpu = {
> > }, {
> > .vmsd = &vmstate_msr_hyperv_time,
> > .needed = hyperv_time_enable_needed,
> > + }, {
> > + .vmsd = &vmstate_avx512,
> > + .needed = avx512_needed,
> > } , {
> > /* empty */
> > }
> > --
> > 1.7.9.5
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Qemu-devel] [PATCH] target-i386: add Intel AVX-512 support
2014-11-02 10:19 ` Michael S. Tsirkin
2014-11-03 1:53 ` Chao Peng
@ 2014-11-03 11:31 ` Paolo Bonzini
2014-11-03 12:34 ` Michael S. Tsirkin
1 sibling, 1 reply; 16+ messages in thread
From: Paolo Bonzini @ 2014-11-03 11:31 UTC (permalink / raw)
To: Michael S. Tsirkin, Chao Peng
Cc: kvm, Marcelo Tosatti, qemu-devel, Vadim Rozenfeld, Laszlo Ersek,
Andreas Färber
On 02/11/2014 11:19, Michael S. Tsirkin wrote:
> > Add AVX512 feature bits, register definition and corresponding
> > xsave/vmstate support.
> >
> > Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
>
> Thanks!
>
> As this was first posted after soft freeze, please
> resubmit after 2.2 is out.
>
> See schedule http://wiki.qemu.org/Planning/2.2
Actually this is already in.
While "Planning/2.2" says "all features should have patches posted on
the mailing list", the actual page describing soft feature freeze is
more lenient:
By the date of the soft feature freeze, any non-trivial feature
should have some code posted to the qemu-devel mailing list if it's
targeting a given release. Major features, and in particular
features with a high likelihood of breaking things, should already
be in the process of being merged.
This patch was fairly trivial, as it only adds a few memcpys and a
subsection. Likelihood of breaking things is basically zero because
AVX512 does not yet exist in silicon. Eduardo reviewed the patch
promptly and agreed with putting it in 2.2, so that's what I did.
Paolo
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [Qemu-devel] [PATCH] target-i386: add Intel AVX-512 support
2014-11-03 11:31 ` Paolo Bonzini
@ 2014-11-03 12:34 ` Michael S. Tsirkin
0 siblings, 0 replies; 16+ messages in thread
From: Michael S. Tsirkin @ 2014-11-03 12:34 UTC (permalink / raw)
To: Paolo Bonzini
Cc: kvm, Marcelo Tosatti, qemu-devel, Vadim Rozenfeld, Chao Peng,
Laszlo Ersek, Andreas Färber
On Mon, Nov 03, 2014 at 12:31:42PM +0100, Paolo Bonzini wrote:
>
> On 02/11/2014 11:19, Michael S. Tsirkin wrote:
> > > Add AVX512 feature bits, register definition and corresponding
> > > xsave/vmstate support.
> > >
> > > Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> >
> > Thanks!
> >
> > As this was first posted after soft freeze, please
> > resubmit after 2.2 is out.
> >
> > See schedule http://wiki.qemu.org/Planning/2.2
>
> Actually this is already in.
>
> While "Planning/2.2" says "all features should have patches posted on
> the mailing list", the actual page describing soft feature freeze is
> more lenient:
>
> By the date of the soft feature freeze, any non-trivial feature
> should have some code posted to the qemu-devel mailing list if it's
> targeting a given release. Major features, and in particular
> features with a high likelihood of breaking things, should already
> be in the process of being merged.
>
> This patch was fairly trivial, as it only adds a few memcpys and a
> subsection. Likelihood of breaking things is basically zero because
> AVX512 does not yet exist in silicon. Eduardo reviewed the patch
> promptly and agreed with putting it in 2.2, so that's what I did.
>
> Paolo
Oh that's fine.
I was not trying to argue, I didn't notice it was in
and thought I'm asked to merge it.
It's always a judgement call, I trust you did the right thing.
Thanks and sorry about the noise.
--
MST
^ permalink raw reply [flat|nested] 16+ messages in thread