[PATCH 1/1] KVM: X86: add the support of XSAVE/XRSTOR to guest

kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 1/1] KVM: X86: add the support of XSAVE/XRSTOR to guest
@ 2010-04-29  5:22 Dexuan Cui
  2010-05-02 14:13 ` Avi Kivity
  0 siblings, 1 reply; 7+ messages in thread
From: Dexuan Cui @ 2010-04-29  5:22 UTC (permalink / raw)
  To: kvm, avi; +Cc: dexuan.cui, sheng.yang

When the host enables XSAVE/XRSTOR, the patch exposes the XSAVE/XRSTOR
related CPUID leaves to guest by fixing up kvm_emulate_cpuid() and the
patch allows guest to set CR4.OSXSAVE to enable XSAVE.
The patch adds per-vcpu host/guest xstate image/mask and enhances the
current FXSAVE/FRSTOR with the new XSAVE/XRSTOR on the host xstate
(FPU/SSE/YMM) switch.

Signed-off-by: Dexuan Cui <dexuan.cui@intel.com>
---
 arch/x86/include/asm/kvm_host.h |   15 +--
 arch/x86/include/asm/vmx.h      |    1 +
 arch/x86/include/asm/xsave.h    |    3 +
 arch/x86/kvm/vmx.c              |   24 +++++
 arch/x86/kvm/x86.c              |  217 +++++++++++++++++++++++++++++++++++++--
 5 files changed, 242 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3f0007b..60be1a7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -303,6 +303,11 @@ struct kvm_vcpu_arch {
 	struct i387_fxsave_struct host_fx_image;
 	struct i387_fxsave_struct guest_fx_image;
 
+	struct xsave_struct *host_xstate_image;
+	struct xsave_struct *guest_xstate_image;
+	uint64_t host_xstate_mask;
+	uint64_t guest_xstate_mask;
+
 	gva_t mmio_fault_cr2;
 	struct kvm_pio_request pio;
 	void *pio_data;
@@ -718,16 +723,6 @@ static inline unsigned long read_msr(unsigned long msr)
 }
 #endif
 
-static inline void kvm_fx_save(struct i387_fxsave_struct *image)
-{
-	asm("fxsave (%0)":: "r" (image));
-}
-
-static inline void kvm_fx_restore(struct i387_fxsave_struct *image)
-{
-	asm("fxrstor (%0)":: "r" (image));
-}
-
 static inline void kvm_fx_finit(void)
 {
 	asm("finit");
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index fb9a080..842286b 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -260,6 +260,7 @@ enum vmcs_field {
 #define EXIT_REASON_EPT_VIOLATION       48
 #define EXIT_REASON_EPT_MISCONFIG       49
 #define EXIT_REASON_WBINVD		54
+#define EXIT_REASON_XSETBV		55
 
 /*
  * Interruption-information format
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index ddc04cc..ada81a2 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -13,6 +13,9 @@
 
 #define FXSAVE_SIZE	512
 
+#define XSTATE_YMM_SIZE 256
+#define XSTATE_YMM_OFFSET (512 + 64)
+
 /*
  * These are the features that the OS can handle currently.
  */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 875b785..a72d024 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -35,6 +35,8 @@
 #include <asm/vmx.h>
 #include <asm/virtext.h>
 #include <asm/mce.h>
+#include <asm/i387.h>
+#include <asm/xcr.h>
 
 #include "trace.h"
 
@@ -2517,6 +2519,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
 	if (enable_ept)
 		vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
+	if (cpu_has_xsave)
+		vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_OSXSAVE;
 	vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
 
 	tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
@@ -3258,6 +3262,25 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static int handle_xsetbv(struct kvm_vcpu *vcpu)
+{
+	u64 new_bv = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX)) |
+		kvm_register_read(vcpu, VCPU_REGS_RAX);
+	u64 host_bv = vcpu->arch.host_xstate_mask;
+
+	if (((new_bv ^ host_bv) & ~host_bv) || !(new_bv & 1))
+		goto err;
+	if ((host_bv & XSTATE_YMM & new_bv) && !(new_bv & XSTATE_SSE))
+		goto err;
+	vcpu->arch.guest_xstate_mask = new_bv;
+	xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.guest_xstate_mask);
+	skip_emulated_instruction(vcpu);
+	return 1;
+err:
+	kvm_inject_gp(vcpu, 0);
+	return 1;
+}
+
 static int handle_apic_access(struct kvm_vcpu *vcpu)
 {
 	unsigned long exit_qualification;
@@ -3556,6 +3579,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
 	[EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
 	[EXIT_REASON_WBINVD]                  = handle_wbinvd,
+	[EXIT_REASON_XSETBV]                  = handle_xsetbv,
 	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
 	[EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
 	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6b2ce1d..2af3fbe 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -52,6 +52,8 @@
 #include <asm/desc.h>
 #include <asm/mtrr.h>
 #include <asm/mce.h>
+#include <asm/i387.h>
+#include <asm/xcr.h>
 
 #define MAX_IO_MSRS 256
 #define CR0_RESERVED_BITS						\
@@ -62,6 +64,7 @@
 	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
 			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE	\
 			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR	\
+			  | (cpu_has_xsave ? X86_CR4_OSXSAVE : 0)	\
 			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -4017,6 +4020,36 @@ void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
 
+static struct kmem_cache *kvm_xstate_cachep;
+static unsigned int kvm_xstate_size;
+
+static int kvm_alloc_xstate_cachep(void)
+{
+	u32 eax, ebx, ecx, edx;
+
+	if (!cpu_has_xsave)
+		return 0;
+
+	cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
+	kvm_xstate_size = ebx;
+	kvm_xstate_cachep =
+		kmem_cache_create("kvm_vcpu_xstate", kvm_xstate_size,
+			__alignof__(union thread_xstate), 0, NULL);
+	if (!kvm_xstate_cachep)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void kvm_free_xstate_cachep(void)
+{
+	if (!kvm_xstate_cachep)
+		return;
+
+	kmem_cache_destroy(kvm_xstate_cachep);
+	kvm_xstate_cachep = NULL;
+}
+
 int kvm_arch_init(void *opaque)
 {
 	int r;
@@ -4039,6 +4072,10 @@ int kvm_arch_init(void *opaque)
 		goto out;
 	}
 
+	r = kvm_alloc_xstate_cachep();
+	if (r)
+		goto out;
+
 	r = kvm_mmu_module_init();
 	if (r)
 		goto out;
@@ -4058,6 +4095,7 @@ int kvm_arch_init(void *opaque)
 	return 0;
 
 out:
+	kvm_free_xstate_cachep();
 	return r;
 }
 
@@ -4070,6 +4108,7 @@ void kvm_arch_exit(void)
 					    CPUFREQ_TRANSITION_NOTIFIER);
 	kvm_x86_ops = NULL;
 	kvm_mmu_module_exit();
+	kvm_free_xstate_cachep();
 }
 
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
@@ -4307,6 +4346,65 @@ not_found:
 	return 36;
 }
 
+#define bitmaskof(idx)  (1U << ((idx) & 31))
+static void kvm_emulate_cpuid_fixup(struct kvm_vcpu *vcpu, u32 func, u32 idx)
+{
+	u32 eax, ebx, ecx, edx;
+
+	if (func != 0 && func != 1 && func != 0xd)
+		return;
+
+	eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
+	ebx = kvm_register_read(vcpu, VCPU_REGS_RBX);
+	ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
+	edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
+
+	switch (func) {
+	case 0:
+		/* fixup the Maximum Input Value */
+		if (cpu_has_xsave && eax < 0xd)
+			eax = 0xd;
+		break;
+	case 1:
+		ecx &= ~(bitmaskof(X86_FEATURE_XSAVE) |
+			bitmaskof(X86_FEATURE_OSXSAVE));
+		if (!cpu_has_xsave)
+			break;
+		ecx |= bitmaskof(X86_FEATURE_XSAVE);
+		if (kvm_read_cr4(vcpu) & X86_CR4_OSXSAVE)
+			ecx |= bitmaskof(X86_FEATURE_OSXSAVE);
+		break;
+	case 0xd:
+		eax = ebx = ecx = edx = 0;
+		if (!cpu_has_xsave)
+			break;
+		switch (idx) {
+		case 0:
+			eax = vcpu->arch.host_xstate_mask & XCNTXT_MASK;
+			/* FP/SSE + XSAVE.HEADER + YMM. */
+			ecx = 512 + 64;
+			if (eax & XSTATE_YMM)
+				ecx += XSTATE_YMM_SIZE;
+			ebx = ecx;
+			break;
+		case 2:
+			if (!(vcpu->arch.host_xstate_mask & XSTATE_YMM))
+				break;
+			eax = XSTATE_YMM_SIZE;
+			ebx = XSTATE_YMM_OFFSET;
+			break;
+		default:
+			break;
+		}
+		break;
+	}
+
+	kvm_register_write(vcpu, VCPU_REGS_RAX, eax);
+	kvm_register_write(vcpu, VCPU_REGS_RBX, ebx);
+	kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
+	kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
+}
+
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 {
 	u32 function, index;
@@ -4325,6 +4423,9 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 		kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
 		kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
 	}
+
+	kvm_emulate_cpuid_fixup(vcpu, function, index);
+
 	kvm_x86_ops->skip_emulated_instruction(vcpu);
 	trace_kvm_cpuid(function,
 			kvm_register_read(vcpu, VCPU_REGS_RAX),
@@ -5091,6 +5192,60 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 	return 0;
 }
 
+#ifdef CONFIG_X86_64
+#define REX_PREFIX  "0x48, "
+#else
+#define REX_PREFIX
+#endif
+
+static inline void kvm_fx_save_host(struct kvm_vcpu *vcpu)
+{
+	if (cpu_has_xsave) {
+		asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27"
+		: : "a" (-1), "d" (-1), "D"(vcpu->arch.host_xstate_image)
+		: "memory");
+		vcpu->arch.host_xstate_mask =
+			xgetbv(XCR_XFEATURE_ENABLED_MASK);
+	} else {
+		asm("fxsave (%0)" : : "r" (&vcpu->arch.host_fx_image));
+	}
+}
+
+static inline void kvm_fx_save_guest(struct kvm_vcpu *vcpu)
+{
+	if (cpu_has_xsave) {
+		asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27"
+		: : "a" (-1), "d" (-1), "D"(vcpu->arch.guest_xstate_image)
+		: "memory");
+		vcpu->arch.guest_xstate_mask =
+			xgetbv(XCR_XFEATURE_ENABLED_MASK);
+	} else {
+		asm("fxsave (%0)" : : "r" (&vcpu->arch.guest_fx_image));
+	}
+}
+
+static inline void kvm_fx_restore_host(struct kvm_vcpu *vcpu)
+{
+	if (cpu_has_xsave) {
+		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.host_xstate_mask);
+		asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f"
+		: : "a" (-1), "d" (-1), "D"(vcpu->arch.host_xstate_image));
+	} else {
+		asm("fxrstor (%0)" : : "r" (&vcpu->arch.host_fx_image));
+	}
+}
+
+static inline void kvm_fx_restore_guest(struct kvm_vcpu *vcpu)
+{
+	if (cpu_has_xsave) {
+		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.guest_xstate_mask);
+		asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f"
+		: : "a" (-1), "d" (-1), "D"(vcpu->arch.guest_xstate_image));
+	} else {
+		asm("fxrstor (%0)" : : "r" (&vcpu->arch.guest_fx_image));
+	}
+}
+
 void fx_init(struct kvm_vcpu *vcpu)
 {
 	unsigned after_mxcsr_mask;
@@ -5102,17 +5257,21 @@ void fx_init(struct kvm_vcpu *vcpu)
 	 * allocate ram with GFP_KERNEL.
 	 */
 	if (!used_math())
-		kvm_fx_save(&vcpu->arch.host_fx_image);
+		kvm_fx_save_host(vcpu);
 
 	/* Initialize guest FPU by resetting ours and saving into guest's */
 	preempt_disable();
-	kvm_fx_save(&vcpu->arch.host_fx_image);
+	kvm_fx_save_host(vcpu);
 	kvm_fx_finit();
-	kvm_fx_save(&vcpu->arch.guest_fx_image);
-	kvm_fx_restore(&vcpu->arch.host_fx_image);
+	kvm_fx_save_guest(vcpu);
+	kvm_fx_restore_host(vcpu);
 	preempt_enable();
 
 	vcpu->arch.cr0 |= X86_CR0_ET;
+
+	if (cpu_has_xsave)
+		return;
+
 	after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
 	vcpu->arch.guest_fx_image.mxcsr = 0x1f80;
 	memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask,
@@ -5126,8 +5285,8 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 		return;
 
 	vcpu->guest_fpu_loaded = 1;
-	kvm_fx_save(&vcpu->arch.host_fx_image);
-	kvm_fx_restore(&vcpu->arch.guest_fx_image);
+	kvm_fx_save_host(vcpu);
+	kvm_fx_restore_guest(vcpu);
 	trace_kvm_fpu(1);
 }
 
@@ -5137,13 +5296,50 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 		return;
 
 	vcpu->guest_fpu_loaded = 0;
-	kvm_fx_save(&vcpu->arch.guest_fx_image);
-	kvm_fx_restore(&vcpu->arch.host_fx_image);
+	kvm_fx_save_guest(vcpu);
+	kvm_fx_restore_host(vcpu);
 	++vcpu->stat.fpu_reload;
 	set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
 	trace_kvm_fpu(0);
 }
 
+static void kvm_arch_vcpu_destroy_xstate_image(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.guest_xstate_image)
+		kmem_cache_free(kvm_xstate_cachep,
+			vcpu->arch.guest_xstate_image);
+	if (vcpu->arch.host_xstate_image)
+		kmem_cache_free(kvm_xstate_cachep,
+			vcpu->arch.host_xstate_image);
+	vcpu->arch.guest_xstate_image = NULL;
+	vcpu->arch.host_xstate_image = NULL;
+}
+
+static int kvm_arch_vcpu_create_xstate_image(struct kvm_vcpu *vcpu)
+{
+	if (!cpu_has_xsave)
+		return 0;
+
+	if (!vcpu->arch.guest_xstate_image) {
+		vcpu->arch.guest_xstate_image =
+			kmem_cache_zalloc(kvm_xstate_cachep, GFP_KERNEL);
+		if (!vcpu->arch.guest_xstate_image)
+			goto err;
+	}
+	if (!vcpu->arch.host_xstate_image) {
+		vcpu->arch.host_xstate_image =
+			kmem_cache_zalloc(kvm_xstate_cachep, GFP_KERNEL);
+		if (!vcpu->arch.host_xstate_image)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	kvm_arch_vcpu_destroy_xstate_image(vcpu);
+	return -ENOMEM;
+}
+
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->arch.time_page) {
@@ -5152,6 +5348,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 	}
 
 	kvm_x86_ops->vcpu_free(vcpu);
+	kvm_arch_vcpu_destroy_xstate_image(vcpu);
 }
 
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
@@ -5189,6 +5386,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 	vcpu_put(vcpu);
 
 	kvm_x86_ops->vcpu_free(vcpu);
+	kvm_arch_vcpu_destroy_xstate_image(vcpu);
 }
 
 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -5201,6 +5399,9 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 	vcpu->arch.dr6 = DR6_FIXED_1;
 	vcpu->arch.dr7 = DR7_FIXED_1;
 
+	if (kvm_arch_vcpu_create_xstate_image(vcpu) < 0)
+		return -ENOMEM;
+
 	return kvm_x86_ops->vcpu_reset(vcpu);
 }
 
-- 
1.6.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/1] KVM: X86: add the support of XSAVE/XRSTOR to guest
  2010-04-29  5:22 [PATCH 1/1] KVM: X86: add the support of XSAVE/XRSTOR to guest Dexuan Cui
@ 2010-05-02 14:13 ` Avi Kivity
  2010-05-06  4:23   ` Cui, Dexuan
  0 siblings, 1 reply; 7+ messages in thread
From: Avi Kivity @ 2010-05-02 14:13 UTC (permalink / raw)
  To: Dexuan Cui; +Cc: kvm, sheng.yang

On 04/29/2010 08:22 AM, Dexuan Cui wrote:
> When the host enables XSAVE/XRSTOR, the patch exposes the XSAVE/XRSTOR
> related CPUID leaves to guest by fixing up kvm_emulate_cpuid() and the
> patch allows guest to set CR4.OSXSAVE to enable XSAVE.
> The patch adds per-vcpu host/guest xstate image/mask and enhances the
> current FXSAVE/FRSTOR with the new XSAVE/XRSTOR on the host xstate
> (FPU/SSE/YMM) switch.
>
>
>   5 files changed, 242 insertions(+), 18 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 3f0007b..60be1a7 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -303,6 +303,11 @@ struct kvm_vcpu_arch {
>   	struct i387_fxsave_struct host_fx_image;
>   	struct i387_fxsave_struct guest_fx_image;
>
> +	struct xsave_struct *host_xstate_image;
> +	struct xsave_struct *guest_xstate_image;
> +	uint64_t host_xstate_mask;
>    

Does host_xstate_mask need to be per-vcpu, or can it be global?

> +	uint64_t guest_xstate_mask;
>    

Can be called xcr0, like other shadow registers.

> +
>   	gva_t mmio_fault_cr2;
>   	struct kvm_pio_request pio;
>   	void *pio_data;
>
>
> @@ -3258,6 +3262,25 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu)
>   	return 1;
>   }
>
> +static int handle_xsetbv(struct kvm_vcpu *vcpu)
> +{
> +	u64 new_bv = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX)) |
> +		kvm_register_read(vcpu, VCPU_REGS_RAX);
>    

Missing shift?

Probably worthwhile to create a helper for reading/writing edx:eax into 
a u64.

> +	u64 host_bv = vcpu->arch.host_xstate_mask;
>    

What about ecx?

> +
> +	if (((new_bv ^ host_bv)&  ~host_bv)

Isn't (new_bv & ~host_bv) equivalent? (guest cannot exceed host...)

>   || !(new_bv&  1))
>    

Symbolic value or comment.

> +		goto err;
> +	if ((host_bv&  XSTATE_YMM&  new_bv)&&  !(new_bv&  XSTATE_SSE))
>    

host_bv unneeded, I think.

> +		goto err;
> +	vcpu->arch.guest_xstate_mask = new_bv;
> +	xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.guest_xstate_mask);
>    

Can't we run with the host xcr0?  isn't it guaranteed to be a superset 
of the guest xcr0?

> +	skip_emulated_instruction(vcpu);
> +	return 1;
> +err:
> +	kvm_inject_gp(vcpu, 0);
>    

Need to #UD in some circumstances.

> +	return 1;
> +}
> +
>   static int handle_apic_access(struct kvm_vcpu *vcpu)
>   {
>   	unsigned long exit_qualification;
> @@ -3556,6 +3579,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
>   	[EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
>   	[EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
>   	[EXIT_REASON_WBINVD]                  = handle_wbinvd,
> +	[EXIT_REASON_XSETBV]                  = handle_xsetbv,
>   	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
>   	[EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
>   	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 6b2ce1d..2af3fbe 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -52,6 +52,8 @@
>   #include<asm/desc.h>
>   #include<asm/mtrr.h>
>   #include<asm/mce.h>
> +#include<asm/i387.h>
> +#include<asm/xcr.h>
>
>   #define MAX_IO_MSRS 256
>   #define CR0_RESERVED_BITS						\
> @@ -62,6 +64,7 @@
>   	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
>   			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE	\
>   			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR	\
> +			  | (cpu_has_xsave ? X86_CR4_OSXSAVE : 0)	\
>   			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
>    

It also depends on the guest cpuid value.  Please add it outside the 
macro, it's confusing to read something that looks like a constant but 
isn't.

>   int kvm_emulate_halt(struct kvm_vcpu *vcpu)
> @@ -4307,6 +4346,65 @@ not_found:
>   	return 36;
>   }
>
> +#define bitmaskof(idx)  (1U<<  ((idx)&  31))
> +static void kvm_emulate_cpuid_fixup(struct kvm_vcpu *vcpu, u32 func, u32 idx)
> +{
> +	u32 eax, ebx, ecx, edx;
> +
> +	if (func != 0&&  func != 1&&  func != 0xd)
> +		return;
> +
> +	eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
> +	ebx = kvm_register_read(vcpu, VCPU_REGS_RBX);
> +	ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
> +	edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
> +
> +	switch (func) {
> +	case 0:
> +		/* fixup the Maximum Input Value */
> +		if (cpu_has_xsave&&  eax<  0xd)
> +			eax = 0xd;
> +		break;
> +	case 1:
> +		ecx&= ~(bitmaskof(X86_FEATURE_XSAVE) |
> +			bitmaskof(X86_FEATURE_OSXSAVE));
> +		if (!cpu_has_xsave)
> +			break;
> +		ecx |= bitmaskof(X86_FEATURE_XSAVE);
> +		if (kvm_read_cr4(vcpu)&  X86_CR4_OSXSAVE)
> +			ecx |= bitmaskof(X86_FEATURE_OSXSAVE);
> +		break;
> +	case 0xd:
> +		eax = ebx = ecx = edx = 0;
> +		if (!cpu_has_xsave)
> +			break;
> +		switch (idx) {
> +		case 0:
> +			eax = vcpu->arch.host_xstate_mask&  XCNTXT_MASK;
> +			/* FP/SSE + XSAVE.HEADER + YMM. */
> +			ecx = 512 + 64;
> +			if (eax&  XSTATE_YMM)
> +				ecx += XSTATE_YMM_SIZE;
> +			ebx = ecx;
> +			break;
> +		case 2:
> +			if (!(vcpu->arch.host_xstate_mask&  XSTATE_YMM))
> +				break;
> +			eax = XSTATE_YMM_SIZE;
> +			ebx = XSTATE_YMM_OFFSET;
> +			break;
> +		default:
> +			break;
> +		}
> +		break;
> +	}
> +
> +	kvm_register_write(vcpu, VCPU_REGS_RAX, eax);
> +	kvm_register_write(vcpu, VCPU_REGS_RBX, ebx);
> +	kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
> +	kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
> +}
>    

This should be part of KVM_GET_SUPPORTED_CPUID.@@ -5091,6 +5192,60 @@ 
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
>   	return 0;
>   }
>
> +#ifdef CONFIG_X86_64
> +#define REX_PREFIX  "0x48, "
> +#else
> +#define REX_PREFIX
> +#endif
> +
> +static inline void kvm_fx_save_host(struct kvm_vcpu *vcpu)
> +{
> +	if (cpu_has_xsave) {
> +		asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27"
> +		: : "a" (-1), "d" (-1), "D"(vcpu->arch.host_xstate_image)
> +		: "memory");
> +		vcpu->arch.host_xstate_mask =
> +			xgetbv(XCR_XFEATURE_ENABLED_MASK);
> +	} else {
> +		asm("fxsave (%0)" : : "r" (&vcpu->arch.host_fx_image));
> +	}
> +}
> +
> +static inline void kvm_fx_save_guest(struct kvm_vcpu *vcpu)
> +{
> +	if (cpu_has_xsave) {
> +		asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27"
> +		: : "a" (-1), "d" (-1), "D"(vcpu->arch.guest_xstate_image)
> +		: "memory");
> +		vcpu->arch.guest_xstate_mask =
> +			xgetbv(XCR_XFEATURE_ENABLED_MASK);
> +	} else {
> +		asm("fxsave (%0)" : : "r" (&vcpu->arch.guest_fx_image));
> +	}
> +}
> +
> +static inline void kvm_fx_restore_host(struct kvm_vcpu *vcpu)
> +{
> +	if (cpu_has_xsave) {
> +		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.host_xstate_mask);
> +		asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f"
> +		: : "a" (-1), "d" (-1), "D"(vcpu->arch.host_xstate_image));
> +	} else {
> +		asm("fxrstor (%0)" : : "r" (&vcpu->arch.host_fx_image));
> +	}
> +}
> +
> +static inline void kvm_fx_restore_guest(struct kvm_vcpu *vcpu)
> +{
> +	if (cpu_has_xsave) {
> +		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.guest_xstate_mask);
> +		asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f"
> +		: : "a" (-1), "d" (-1), "D"(vcpu->arch.guest_xstate_image));
> +	} else {
> +		asm("fxrstor (%0)" : : "r" (&vcpu->arch.guest_fx_image));
> +	}
> +}
> +
>    


This mostly duplicates the standard x86 fpu code.  I have a patch 
somewhere that abstracts it out, I'll dig it up and send it out.


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH 1/1] KVM: X86: add the support of XSAVE/XRSTOR to guest
  2010-05-02 14:13 ` Avi Kivity
@ 2010-05-06  4:23   ` Cui, Dexuan
  2010-05-06  8:14     ` Avi Kivity
  0 siblings, 1 reply; 7+ messages in thread
From: Cui, Dexuan @ 2010-05-06  4:23 UTC (permalink / raw)
  To: 'Avi Kivity'; +Cc: kvm@vger.kernel.org, Yang, Sheng

Avi Kivity wrote:
> On 04/29/2010 08:22 AM, Dexuan Cui wrote:
>> When the host enables XSAVE/XRSTOR, the patch exposes the
>> XSAVE/XRSTOR 
>> related CPUID leaves to guest by fixing up kvm_emulate_cpuid() and
>> the 
>> patch allows guest to set CR4.OSXSAVE to enable XSAVE.
>> The patch adds per-vcpu host/guest xstate image/mask and enhances the
>> current FXSAVE/FRSTOR with the new XSAVE/XRSTOR on the host xstate
>> (FPU/SSE/YMM) switch. 
>> 
>> 
>>   5 files changed, 242 insertions(+), 18 deletions(-)
>> 
>> diff --git a/arch/x86/include/asm/kvm_host.h
>> b/arch/x86/include/asm/kvm_host.h 
>> index 3f0007b..60be1a7 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -303,6 +303,11 @@ struct kvm_vcpu_arch {
>>   	struct i387_fxsave_struct host_fx_image;
>>   	struct i387_fxsave_struct guest_fx_image;
>> 
>> +	struct xsave_struct *host_xstate_image;
>> +	struct xsave_struct *guest_xstate_image;
>> +	uint64_t host_xstate_mask;
>> 
> 
> Does host_xstate_mask need to be per-vcpu, or can it be global?
(I'm sorry for my late reply as I was on a long leave...)
Yes, I think host_xstate_mask can be global because in Linux
xsave_cntxt_init() and xsave_init() always enables all the XSTATEs on all
CPUs (currently XCNTXT_MASK has 3 bits). In host, only the kernel
can execute xsetbv() and applications can't change the XCR.I think this
usage of Linux won't change in future.

> 
>> +	uint64_t guest_xstate_mask;
>> 
> 
> Can be called xcr0, like other shadow registers.
OK.

>> +
>>   	gva_t mmio_fault_cr2;
>>   	struct kvm_pio_request pio;
>>   	void *pio_data;
>> 
>> 
>> @@ -3258,6 +3262,25 @@ static int handle_wbinvd(struct kvm_vcpu
>>   *vcpu)   	return 1; }
>> 
>> +static int handle_xsetbv(struct kvm_vcpu *vcpu)
>> +{
>> +	u64 new_bv = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX)) |
>> +		kvm_register_read(vcpu, VCPU_REGS_RAX);
>> 
> 
> Missing shift?
Oh, my carelessness! Thanks for pointing it out.


> Probably worthwhile to create a helper for reading/writing edx:eax
> into 
> a u64.
OK.

> 
>> +	u64 host_bv = vcpu->arch.host_xstate_mask;
>> 
> 
> What about ecx?
Sorry, I missed ecx. 
Currently only ecx==0 is defined. When a guest tries a non-zero value
of ecx, a #GP should be injected into the guest.

> 
>> +
>> +	if (((new_bv ^ host_bv)&  ~host_bv)
> 
> Isn't (new_bv & ~host_bv) equivalent? (guest cannot exceed host...)
OK. Will use the simple one. :-)

> 
>>   || !(new_bv&  1))
>> 
> 
> Symbolic value or comment.
OK. the "1" here should be "XSTATE_FP".

> 
>> +		goto err;
>> +	if ((host_bv&  XSTATE_YMM&  new_bv)&&  !(new_bv&  XSTATE_SSE))
>> 
> 
> host_bv unneeded, I think.
Agree.

> 
>> +		goto err;
>> +	vcpu->arch.guest_xstate_mask = new_bv;
>> +	xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.guest_xstate_mask);
>> 
> 
> Can't we run with the host xcr0?  isn't it guaranteed to be a superset
> of the guest xcr0?
I agree host xcr0 is a superset of guest xcr0.
In the case guest xcr0 has less bits set  than host xcr0, I suppose running with guest
xcr0 would be a bit faster. If you think simplying the code by removing the field
guest_xstate_mask is more important, we can do it.

>> +	skip_emulated_instruction(vcpu);
>> +	return 1;
>> +err:
>> +	kvm_inject_gp(vcpu, 0);
>> 
> 
> Need to #UD in some circumstances.
I don't think so. When the CPU doesn't suport XSAVE, or guest doesn't set guest
CR4.OSXSAVE, guest's attempt of exectuing xsetbv would get a #UD immediately
and no VMexit would occur.
BTW: I missed injecting #GP in the cases ECX !=0 or CPL !=0. Will add them.

> 
>> +	return 1;
>> +}
>> +
>>   static int handle_apic_access(struct kvm_vcpu *vcpu)   {
>>   	unsigned long exit_qualification;
>> @@ -3556,6 +3579,7 @@ static int (*kvm_vmx_exit_handlers[])(struct
>>   	kvm_vcpu *vcpu) = { [EXIT_REASON_TPR_BELOW_THRESHOLD]     =
>>   	handle_tpr_below_threshold, [EXIT_REASON_APIC_ACCESS]            
>>   	= handle_apic_access, [EXIT_REASON_WBINVD]                  =
>> handle_wbinvd, +	[EXIT_REASON_XSETBV]                  =
>>   	handle_xsetbv, [EXIT_REASON_TASK_SWITCH]             =
>>   	handle_task_switch, [EXIT_REASON_MCE_DURING_VMENTRY]      =
>>   	handle_machine_check, [EXIT_REASON_EPT_VIOLATION]	      =
>> handle_ept_violation, 
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 6b2ce1d..2af3fbe 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -52,6 +52,8 @@
>>   #include<asm/desc.h>
>>   #include<asm/mtrr.h>
>>   #include<asm/mce.h>
>> +#include<asm/i387.h>
>> +#include<asm/xcr.h>
>> 
>>   #define MAX_IO_MSRS 256
>>   #define CR0_RESERVED_BITS						\
>> @@ -62,6 +64,7 @@
>>   	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD |
>>   			  X86_CR4_DE\ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE	\
>>   			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR	\
>> +			  | (cpu_has_xsave ? X86_CR4_OSXSAVE : 0)	\
>>   			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
>> 
> 
> It also depends on the guest cpuid value.  Please add it outside the
If cpu_has_xsave is true, we always present xsave to guest via cpuid, so I
think the code is correct here.

> macro, it's confusing to read something that looks like a constant but
> isn't.
Ok. Will try to make it outside the macro.

>>   int kvm_emulate_halt(struct kvm_vcpu *vcpu)
>> @@ -4307,6 +4346,65 @@ not_found:
>>   	return 36;
>>   }
>> 
>> +#define bitmaskof(idx)  (1U<<  ((idx)&  31))
>> +static void kvm_emulate_cpuid_fixup(struct kvm_vcpu *vcpu, u32
>> func, u32 idx) +{ +	u32 eax, ebx, ecx, edx;
>> +
>> +	if (func != 0&&  func != 1&&  func != 0xd)
>> +		return;
>> +
>> +	eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
>> +	ebx = kvm_register_read(vcpu, VCPU_REGS_RBX);
>> +	ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
>> +	edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
>> +
>> +	switch (func) {
>> +	case 0:
>> +		/* fixup the Maximum Input Value */
>> +		if (cpu_has_xsave&&  eax<  0xd)
>> +			eax = 0xd;
>> +		break;
>> +	case 1:
>> +		ecx&= ~(bitmaskof(X86_FEATURE_XSAVE) |
>> +			bitmaskof(X86_FEATURE_OSXSAVE));
>> +		if (!cpu_has_xsave)
>> +			break;
>> +		ecx |= bitmaskof(X86_FEATURE_XSAVE);
>> +		if (kvm_read_cr4(vcpu)&  X86_CR4_OSXSAVE)
>> +			ecx |= bitmaskof(X86_FEATURE_OSXSAVE);
>> +		break;
>> +	case 0xd:
>> +		eax = ebx = ecx = edx = 0;
>> +		if (!cpu_has_xsave)
>> +			break;
>> +		switch (idx) {
>> +		case 0:
>> +			eax = vcpu->arch.host_xstate_mask&  XCNTXT_MASK;
>> +			/* FP/SSE + XSAVE.HEADER + YMM. */
>> +			ecx = 512 + 64;
>> +			if (eax&  XSTATE_YMM)
>> +				ecx += XSTATE_YMM_SIZE;
>> +			ebx = ecx;
>> +			break;
>> +		case 2:
>> +			if (!(vcpu->arch.host_xstate_mask&  XSTATE_YMM)) +				break;
>> +			eax = XSTATE_YMM_SIZE;
>> +			ebx = XSTATE_YMM_OFFSET;
>> +			break;
>> +		default:
>> +			break;
>> +		}
>> +		break;
>> +	}
>> +
>> +	kvm_register_write(vcpu, VCPU_REGS_RAX, eax);
>> +	kvm_register_write(vcpu, VCPU_REGS_RBX, ebx);
>> +	kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
>> +	kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
>> +}
>> 
> 
> This should be part of KVM_GET_SUPPORTED_CPUID.@@ -5091,6 +5192,60 @@
Ok. Will add this.

> int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu
> *fpu) 
>>   	return 0;
>>   }
>> 
>> +#ifdef CONFIG_X86_64
>> +#define REX_PREFIX  "0x48, "
>> +#else
>> +#define REX_PREFIX
>> +#endif
>> +
>> +static inline void kvm_fx_save_host(struct kvm_vcpu *vcpu) +{
>> +	if (cpu_has_xsave) {
>> +		asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27"
>> +		: : "a" (-1), "d" (-1), "D"(vcpu->arch.host_xstate_image) +		:
>> "memory"); +		vcpu->arch.host_xstate_mask =
>> +			xgetbv(XCR_XFEATURE_ENABLED_MASK);
>> +	} else {
>> +		asm("fxsave (%0)" : : "r" (&vcpu->arch.host_fx_image)); +	}
>> +}
>> +
>> +static inline void kvm_fx_save_guest(struct kvm_vcpu *vcpu) +{
>> +	if (cpu_has_xsave) {
>> +		asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27"
>> +		: : "a" (-1), "d" (-1), "D"(vcpu->arch.guest_xstate_image) +		:
>> "memory"); +		vcpu->arch.guest_xstate_mask =
>> +			xgetbv(XCR_XFEATURE_ENABLED_MASK);
>> +	} else {
>> +		asm("fxsave (%0)" : : "r" (&vcpu->arch.guest_fx_image)); +	}
>> +}
>> +
>> +static inline void kvm_fx_restore_host(struct kvm_vcpu *vcpu) +{
>> +	if (cpu_has_xsave) {
>> +		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.host_xstate_mask);
>> +		asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f"
>> +		: : "a" (-1), "d" (-1), "D"(vcpu->arch.host_xstate_image)); +	}
>> else { +		asm("fxrstor (%0)" : : "r" (&vcpu->arch.host_fx_image));
>> +	} +}
>> +
>> +static inline void kvm_fx_restore_guest(struct kvm_vcpu *vcpu) +{
>> +	if (cpu_has_xsave) {
>> +		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.guest_xstate_mask);
>> +		asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f"
>> +		: : "a" (-1), "d" (-1), "D"(vcpu->arch.guest_xstate_image)); +	}
>> else { +		asm("fxrstor (%0)" : : "r" (&vcpu->arch.guest_fx_image));
>> +	} +}
>> +
>> 
> 
> 
> This mostly duplicates the standard x86 fpu code.  I have a patch
> somewhere that abstracts it out, I'll dig it up and send it out.
I tried to avoid touching the host kernel itself. :-)
I saw the 2 patches you sent. They (like the new APIs fpu_alloc/fpu_free) will simplify
the implementation of kvm xsave support. Thanks a lot!
BTW: Sheng Yang(in the Cc) will help me to follow up all the left issues.

Thanks,
-- Dexuan

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/1] KVM: X86: add the support of XSAVE/XRSTOR to guest
  2010-05-06  4:23   ` Cui, Dexuan
@ 2010-05-06  8:14     ` Avi Kivity
  2010-05-06 14:20       ` Cui, Dexuan
  0 siblings, 1 reply; 7+ messages in thread
From: Avi Kivity @ 2010-05-06  8:14 UTC (permalink / raw)
  To: Cui, Dexuan; +Cc: kvm@vger.kernel.org, Yang, Sheng

On 05/06/2010 07:23 AM, Cui, Dexuan wrote:
>
>>> +		goto err;
>>> +	vcpu->arch.guest_xstate_mask = new_bv;
>>> +	xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.guest_xstate_mask);
>>>
>>>        
>> Can't we run with the host xcr0?  isn't it guaranteed to be a superset
>> of the guest xcr0?
>>      
> I agree host xcr0 is a superset of guest xcr0.
> In the case guest xcr0 has less bits set  than host xcr0, I suppose running with guest
> xcr0 would be a bit faster.

However, switching xcr0 may be slow.  That's our experience with msrs.  
Can you measure its latency?

Can also be avoided if guest and host xcr0 match.

> If you think simplying the code by removing the field
> guest_xstate_mask is more important, we can do it.
>    

Well we need guest_xstate_mask, it's the guest's xcr0, no?

btw, it needs save/restore for live migration, as well as save/restore 
for the new fpu state.

>>> +	skip_emulated_instruction(vcpu);
>>> +	return 1;
>>> +err:
>>> +	kvm_inject_gp(vcpu, 0);
>>>
>>>        
>> Need to #UD in some circumstances.
>>      
> I don't think so. When the CPU doesn't suport XSAVE, or guest doesn't set guest
> CR4.OSXSAVE, guest's attempt of exectuing xsetbv would get a #UD immediately
> and no VMexit would occur.
>    

Ok.

>>> @@ -62,6 +64,7 @@
>>>    	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD |
>>>    			  X86_CR4_DE\ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE	\
>>>    			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR	\
>>> +			  | (cpu_has_xsave ? X86_CR4_OSXSAVE : 0)	\
>>>    			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
>>>
>>>        
>> It also depends on the guest cpuid value.  Please add it outside the
>>      
> If cpu_has_xsave is true, we always present xsave to guest via cpuid, so I
> think the code is correct here.
>    

We don't pass all host cpuid values to the guest.  We expose them to 
userspace via KVM_GET_SUPPORTED_CPUID, and then userspace decides what 
cpuid to present to the guest via KVM_SET_CPUID2.  So the guest may run 
with fewer features than the host.

> I saw the 2 patches you sent. They (like the new APIs fpu_alloc/fpu_free) will simplify
> the implementation of kvm xsave support. Thanks a lot!
>    

Thanks.  To simplify things, please separate host xsave support 
(switching to the fpu api) and guest xsave support (cpuid, xsetbv, new 
ioctls) into separate patches.  In fact, guest xsave support is probably 
best split into patches as well.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH 1/1] KVM: X86: add the support of XSAVE/XRSTOR to guest
  2010-05-06  8:14     ` Avi Kivity
@ 2010-05-06 14:20       ` Cui, Dexuan
  2010-05-06 19:45         ` Avi Kivity
  0 siblings, 1 reply; 7+ messages in thread
From: Cui, Dexuan @ 2010-05-06 14:20 UTC (permalink / raw)
  To: 'Avi Kivity'; +Cc: kvm@vger.kernel.org, Yang, Sheng

Avi Kivity wrote:
> On 05/06/2010 07:23 AM, Cui, Dexuan wrote:
>> 
>>>> +		goto err;
>>>> +	vcpu->arch.guest_xstate_mask = new_bv;
>>>> +	xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.guest_xstate_mask);
>>>> 
>>>> 
>>> Can't we run with the host xcr0?  isn't it guaranteed to be a
>>> superset of the guest xcr0? 
>>> 
>> I agree host xcr0 is a superset of guest xcr0.
>> In the case guest xcr0 has less bits set  than host xcr0, I suppose
>> running with guest xcr0 would be a bit faster.
> 
> However, switching xcr0 may be slow.  That's our experience with msrs.
> Can you measure its latency?
We can measure that.
However, I think the changing xcr0 to guest xcr0 in handle_xsetbv() is necessary --
or else, inside guest xgetbv() would return host xcr0 rather than guest xcr0 -- 
this is obviously incorrect. Once handle_xsetbv() changes the xcr0 to guest's value,
the xsetbv() in kvm_fx_restore_host() is also necessary, and the xsetbv() in
kvm_fx_restore_guest() is also necessary. So looks guest can't run with the
host xcr0.

 
> Can also be avoided if guest and host xcr0 match.
> 
>> If you think simplying the code by removing the field
>> guest_xstate_mask is more important, we can do it.
>> 
> 
> Well we need guest_xstate_mask, it's the guest's xcr0, no?
I misread it. Yes, we need geust_xsave_mask.

> 
> btw, it needs save/restore for live migration, as well as save/restore
> for the new fpu state.
Yes. This part is missing. Sheng and I is also doing this -- it may be a bittle
troublesome as the number of XSTATEs can grown as time goes on. We'll
have to handle the compatibility issue.

> 
>>>> +	skip_emulated_instruction(vcpu);
>>>> +	return 1;
>>>> +err:
>>>> +	kvm_inject_gp(vcpu, 0);
>>>> 
>>>> 
>>> Need to #UD in some circumstances.
>>> 
>> I don't think so. When the CPU doesn't suport XSAVE, or guest
>> doesn't set guest CR4.OSXSAVE, guest's attempt of exectuing xsetbv
>> would get a #UD immediately 
>> and no VMexit would occur.
>> 
> 
> Ok.
> 
>>>> @@ -62,6 +64,7 @@
>>>>    	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD |
>>>>    			  X86_CR4_DE\ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE	\
>>>>    			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR	\
>>>> +			  | (cpu_has_xsave ? X86_CR4_OSXSAVE : 0)	\
>>>>    			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
>>>> 
>>>> 
>>> It also depends on the guest cpuid value.  Please add it outside the
>>> 
>> If cpu_has_xsave is true, we always present xsave to guest via
>> cpuid, so I 
>> think the code is correct here.
>> 
> 
> We don't pass all host cpuid values to the guest.  We expose them to
> userspace via KVM_GET_SUPPORTED_CPUID, and then userspace decides what
> cpuid to present to the guest via KVM_SET_CPUID2.  So the guest may
> run with fewer features than the host.
Sorry, I didn't notice this. Will look into this.

> 
>> I saw the 2 patches you sent. They (like the new APIs
>> fpu_alloc/fpu_free) will simplify the implementation of kvm xsave
>> support. Thanks a lot! 
>> 
> 
> Thanks.  To simplify things, please separate host xsave support
> (switching to the fpu api) and guest xsave support (cpuid, xsetbv, new
> ioctls) into separate patches.  In fact, guest xsave support is
> probably best split into patches as well.
Ok. Will try to do these.

Thanks,
-- Dexuan

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/1] KVM: X86: add the support of XSAVE/XRSTOR to guest
  2010-05-06 14:20       ` Cui, Dexuan
@ 2010-05-06 19:45         ` Avi Kivity
  2010-05-06 19:49           ` Avi Kivity
  0 siblings, 1 reply; 7+ messages in thread
From: Avi Kivity @ 2010-05-06 19:45 UTC (permalink / raw)
  To: Cui, Dexuan; +Cc: kvm@vger.kernel.org, Yang, Sheng

On 05/06/2010 05:20 PM, Cui, Dexuan wrote:
>
>> However, switching xcr0 may be slow.  That's our experience with msrs.
>> Can you measure its latency?
>>      
> We can measure that.
> However, I think the changing xcr0 to guest xcr0 in handle_xsetbv() is necessary --
> or else, inside guest xgetbv() would return host xcr0 rather than guest xcr0 --
> this is obviously incorrect. Once handle_xsetbv() changes the xcr0 to guest's value,
> the xsetbv() in kvm_fx_restore_host() is also necessary, and the xsetbv() in
> kvm_fx_restore_guest() is also necessary. So looks guest can't run with the
> host xcr0.
>    

Right.  Moreover, xsave would write into memory the guest doesn't expect.

>> btw, it needs save/restore for live migration, as well as save/restore
>> for the new fpu state.
>>      
> Yes. This part is missing. Sheng and I is also doing this -- it may be a bittle
> troublesome as the number of XSTATEs can grown as time goes on. We'll
> have to handle the compatibility issue.
>    


Reserve tons of space in the ioctl - and we can use the same format as 
xsave.

All those control registers are annoying, we have cr1 and cr5-cr7 free, 
cr9-cr15 on x86_64, infinite msr space, and now we have XCRs.  Great.

Looking forward to YCR0.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/1] KVM: X86: add the support of XSAVE/XRSTOR to guest
  2010-05-06 19:45         ` Avi Kivity
@ 2010-05-06 19:49           ` Avi Kivity
  0 siblings, 0 replies; 7+ messages in thread
From: Avi Kivity @ 2010-05-06 19:49 UTC (permalink / raw)
  To: Cui, Dexuan; +Cc: kvm@vger.kernel.org, Yang, Sheng

On 05/06/2010 10:45 PM, Avi Kivity wrote:
>
> All those control registers are annoying, we have cr1 and cr5-cr7 
> free, cr9-cr15 on x86_64, infinite msr space, and now we have XCRs.  
> Great.
>
> Looking forward to YCR0.
>

I think I see the reason - xgetbv is unprivileged, so applications can 
see what the OS supports instead of looking at cpuid and hoping.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2010-05-06 19:49 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-04-29  5:22 [PATCH 1/1] KVM: X86: add the support of XSAVE/XRSTOR to guest Dexuan Cui
2010-05-02 14:13 ` Avi Kivity
2010-05-06  4:23   ` Cui, Dexuan
2010-05-06  8:14     ` Avi Kivity
2010-05-06 14:20       ` Cui, Dexuan
2010-05-06 19:45         ` Avi Kivity
2010-05-06 19:49           ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).