public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] KVM: VMX: Enable XSAVE/XRSTORE for guest
@ 2010-05-19  8:34 Sheng Yang
  2010-05-19  8:34 ` [PATCH] qemu-kvm: Enable xsave related CPUID Sheng Yang
  2010-05-19 16:56 ` [PATCH v2] KVM: VMX: Enable XSAVE/XRSTORE for guest Avi Kivity
  0 siblings, 2 replies; 8+ messages in thread
From: Sheng Yang @ 2010-05-19  8:34 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: kvm, qemu-devel, Dexuan Cui, Sheng Yang

From: Dexuan Cui <dexuan.cui@intel.com>

Enable XSAVE/XRSTORE for guest.

Change from V1:

1. Use FPU API.
2. Fix CPUID issue.
3. Save/restore all possible guest xstate fields when switching. Because we
don't know which fields guest has already touched.

Signed-off-by: Dexuan Cui <dexuan.cui@intel.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/include/asm/vmx.h      |    1 +
 arch/x86/kvm/vmx.c              |   28 +++++++++++++
 arch/x86/kvm/x86.c              |   85 +++++++++++++++++++++++++++++++++++---
 4 files changed, 108 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d08bb4a..78d7b06 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -302,6 +302,7 @@ struct kvm_vcpu_arch {
 	} update_pte;
 
 	struct fpu guest_fpu;
+	uint64_t xcr0, host_xcr0;
 
 	gva_t mmio_fault_cr2;
 	struct kvm_pio_request pio;
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 9e6779f..346ea66 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -266,6 +266,7 @@ enum vmcs_field {
 #define EXIT_REASON_EPT_VIOLATION       48
 #define EXIT_REASON_EPT_MISCONFIG       49
 #define EXIT_REASON_WBINVD		54
+#define EXIT_REASON_XSETBV		55
 
 /*
  * Interruption-information format
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 99ae513..2ee8ff6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -36,6 +36,8 @@
 #include <asm/vmx.h>
 #include <asm/virtext.h>
 #include <asm/mce.h>
+#include <asm/i387.h>
+#include <asm/xcr.h>
 
 #include "trace.h"
 
@@ -2616,6 +2618,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
 	if (enable_ept)
 		vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
+	if (cpu_has_xsave)
+		vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_OSXSAVE;
 	vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
 
 	tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
@@ -3354,6 +3358,29 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static int handle_xsetbv(struct kvm_vcpu *vcpu)
+{
+	u64 new_bv = ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) << 32)) |
+		kvm_register_read(vcpu, VCPU_REGS_RAX);
+
+	if (kvm_register_read(vcpu, VCPU_REGS_RCX) != 0)
+		goto err;
+	if (vmx_get_cpl(vcpu) != 0)
+		goto err;
+	if (!(new_bv & XSTATE_FP) ||
+	     (new_bv & ~vcpu->arch.host_xcr0))
+		goto err;
+	if ((new_bv & XSTATE_YMM) && !(new_bv & XSTATE_SSE))
+		goto err;
+	vcpu->arch.xcr0 = new_bv;
+	xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
+	skip_emulated_instruction(vcpu);
+	return 1;
+err:
+	kvm_inject_gp(vcpu, 0);
+	return 1;
+}
+
 static int handle_apic_access(struct kvm_vcpu *vcpu)
 {
 	return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE;
@@ -3632,6 +3659,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
 	[EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
 	[EXIT_REASON_WBINVD]                  = handle_wbinvd,
+	[EXIT_REASON_XSETBV]                  = handle_xsetbv,
 	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
 	[EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
 	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7be1d36..5e20f37 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -64,6 +64,7 @@
 	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
 			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE	\
 			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR	\
+			  | X86_CR4_OSXSAVE \
 			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -149,6 +150,11 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ NULL }
 };
 
+static inline u32 bit(int bitno)
+{
+	return 1 << (bitno & 31);
+}
+
 static void kvm_on_user_return(struct user_return_notifier *urn)
 {
 	unsigned slot;
@@ -473,6 +479,17 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 }
 EXPORT_SYMBOL_GPL(kvm_lmsw);
 
+static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 1, 0);
+	if (best->ecx & bit(X86_FEATURE_XSAVE))
+		return true;
+
+	return false;
+}
+
 int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
 	unsigned long old_cr4 = kvm_read_cr4(vcpu);
@@ -481,6 +498,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	if (cr4 & CR4_RESERVED_BITS)
 		return 1;
 
+	if (!guest_cpuid_has_xsave(vcpu) && X86_CR4_OSXSAVE)
+		return 1;
+
 	if (is_long_mode(vcpu)) {
 		if (!(cr4 & X86_CR4_PAE))
 			return 1;
@@ -665,11 +685,6 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
 }
 EXPORT_SYMBOL_GPL(kvm_get_dr);
 
-static inline u32 bit(int bitno)
-{
-	return 1 << (bitno & 31);
-}
-
 /*
  * List of msr numbers which we expose to userspace through KVM_GET_MSRS
  * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
@@ -1887,6 +1902,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	unsigned f_lm = 0;
 #endif
 	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
+	unsigned f_xsave = cpu_has_xsave ? F(XSAVE) : 0;
 
 	/* cpuid 1.edx */
 	const u32 kvm_supported_word0_x86_features =
@@ -1916,7 +1932,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
 		0 /* Reserved, DCA */ | F(XMM4_1) |
 		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
-		0 /* Reserved, XSAVE, OSXSAVE */;
+		0 /* Reserved, AES */ | f_xsave | 0 /* OSXSAVE */;
 	/* cpuid 0x80000001.ecx */
 	const u32 kvm_supported_word6_x86_features =
 		F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
@@ -1931,7 +1947,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
 	switch (function) {
 	case 0:
-		entry->eax = min(entry->eax, (u32)0xb);
+		entry->eax = min(entry->eax, (u32)0xd);
 		break;
 	case 1:
 		entry->edx &= kvm_supported_word0_x86_features;
@@ -1989,6 +2005,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		}
 		break;
 	}
+	case 0xd: {
+		int i;
+
+		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+		for (i = 1; *nent < maxnent; ++i) {
+			if (entry[i - 1].eax == 0 && i != 2)
+				break;
+			do_cpuid_1_ent(&entry[i], function, i);
+			entry[i].flags |=
+			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+			++*nent;
+		}
+		break;
+	}
 	case KVM_CPUID_SIGNATURE: {
 		char signature[12] = "KVMKVMKVM\0\0";
 		u32 *sigptr = (u32 *)signature;
@@ -4376,6 +4406,17 @@ not_found:
 	return 36;
 }
 
+static void kvm_update_cpuid(struct kvm_vcpu *vcpu,
+			     struct kvm_cpuid_entry2 *best)
+{
+	/* Update OSXSAVE bit */
+	if (cpu_has_xsave && best->function == 0x1) {
+		best->ecx &= ~(bit(X86_FEATURE_OSXSAVE));
+		if (kvm_read_cr4(vcpu) & X86_CR4_OSXSAVE)
+			best->ecx |= bit(X86_FEATURE_OSXSAVE);
+	}
+}
+
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 {
 	u32 function, index;
@@ -4389,6 +4430,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 	kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
 	best = kvm_find_cpuid_entry(vcpu, function, index);
 	if (best) {
+		kvm_update_cpuid(vcpu, best);
 		kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
 		kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
 		kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
@@ -5118,6 +5160,11 @@ void fx_init(struct kvm_vcpu *vcpu)
 	fpu_alloc(&vcpu->arch.guest_fpu);
 	fpu_finit(&vcpu->arch.guest_fpu);
 
+	if (cpu_has_xsave) {
+		vcpu->arch.host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+		vcpu->arch.xcr0 = vcpu->arch.host_xcr0;
+	}
+
 	vcpu->arch.cr0 |= X86_CR0_ET;
 }
 EXPORT_SYMBOL_GPL(fx_init);
@@ -5127,14 +5174,30 @@ static void fx_free(struct kvm_vcpu *vcpu)
 	fpu_free(&vcpu->arch.guest_fpu);
 }
 
+static u64 cpuid_get_possible_xcr0(struct kvm_vcpu *vcpu)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
+	return eax + ((u64)edx << 32);
+}
+
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->guest_fpu_loaded)
 		return;
 
 	vcpu->guest_fpu_loaded = 1;
+	if (cpu_has_xsave)
+		vcpu->arch.host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
 	unlazy_fpu(current);
+	/* Restore all possible states in the guest */
+	if (cpu_has_xsave && guest_cpuid_has_xsave(vcpu))
+		xsetbv(XCR_XFEATURE_ENABLED_MASK,
+			cpuid_get_possible_xcr0(vcpu));
 	fpu_restore_checking(&vcpu->arch.guest_fpu);
+	if (cpu_has_xsave)
+		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
 	trace_kvm_fpu(1);
 }
 
@@ -5144,7 +5207,15 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 		return;
 
 	vcpu->guest_fpu_loaded = 0;
+	if (cpu_has_xsave)
+		vcpu->arch.xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+	/* Save all possible states in the guest */
+	if (cpu_has_xsave && guest_cpuid_has_xsave(vcpu))
+		xsetbv(XCR_XFEATURE_ENABLED_MASK,
+			cpuid_get_possible_xcr0(vcpu));
 	fpu_save_init(&vcpu->arch.guest_fpu);
+	if (cpu_has_xsave)
+		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.host_xcr0);
 	++vcpu->stat.fpu_reload;
 	set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
 	trace_kvm_fpu(0);
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH] qemu-kvm: Enable xsave related CPUID
  2010-05-19  8:34 [PATCH v2] KVM: VMX: Enable XSAVE/XRSTORE for guest Sheng Yang
@ 2010-05-19  8:34 ` Sheng Yang
  2010-05-19 16:58   ` Avi Kivity
  2010-05-19 16:56 ` [PATCH v2] KVM: VMX: Enable XSAVE/XRSTORE for guest Avi Kivity
  1 sibling, 1 reply; 8+ messages in thread
From: Sheng Yang @ 2010-05-19  8:34 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: kvm, qemu-devel, Sheng Yang


Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 target-i386/cpuid.c |   32 ++++++++++++++++++++++++++++++++
 1 files changed, 32 insertions(+), 0 deletions(-)

diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
index eebf038..21e94f3 100644
--- a/target-i386/cpuid.c
+++ b/target-i386/cpuid.c
@@ -1067,6 +1067,38 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         *ecx = 0;
         *edx = 0;
         break;
+    case 0xD:
+        /* Processor Extended State */
+        if (!(env->cpuid_ext_features & CPUID_EXT_XSAVE)) {
+            *eax = 0;
+            *ebx = 0;
+            *ecx = 0;
+            *edx = 0;
+            break;
+        }
+        if (count == 0) {
+            *eax = 0x7;
+            *ebx = 0x340;
+            *ecx = 0x340;
+            *edx = 0;
+        } else if (count == 1) {
+            /* eax = 1, so we can continue with others */
+            *eax = 1;
+            *ebx = 0;
+            *ecx = 0;
+            *edx = 0;
+        } else if (count == 2) {
+            *eax = 0x100;
+            *ebx = 0x240;
+            *ecx = 0;
+            *edx = 0;
+        } else {
+            *eax = 0;
+            *ebx = 0;
+            *ecx = 0;
+            *edx = 0;
+        }
+        break;
     case 0x80000000:
         *eax = env->cpuid_xlevel;
         *ebx = env->cpuid_vendor1;
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] KVM: VMX: Enable XSAVE/XRSTORE for guest
  2010-05-19  8:34 [PATCH v2] KVM: VMX: Enable XSAVE/XRSTORE for guest Sheng Yang
  2010-05-19  8:34 ` [PATCH] qemu-kvm: Enable xsave related CPUID Sheng Yang
@ 2010-05-19 16:56 ` Avi Kivity
  2010-05-20  9:16   ` [PATCH][v3] " Sheng Yang
  1 sibling, 1 reply; 8+ messages in thread
From: Avi Kivity @ 2010-05-19 16:56 UTC (permalink / raw)
  To: Sheng Yang; +Cc: Marcelo Tosatti, kvm, qemu-devel, Dexuan Cui

On 05/19/2010 11:34 AM, Sheng Yang wrote:
> From: Dexuan Cui<dexuan.cui@intel.com>
>
> Enable XSAVE/XRSTORE for guest.
>
> Change from V1:
>
> 1. Use FPU API.
> 2. Fix CPUID issue.
> 3. Save/restore all possible guest xstate fields when switching. Because we
> don't know which fields guest has already touched.
>
> Signed-off-by: Dexuan Cui<dexuan.cui@intel.com>
> Signed-off-by: Sheng Yang<sheng@linux.intel.com>
> ---
>   arch/x86/include/asm/kvm_host.h |    1 +
>   arch/x86/include/asm/vmx.h      |    1 +
>   arch/x86/kvm/vmx.c              |   28 +++++++++++++
>   arch/x86/kvm/x86.c              |   85 +++++++++++++++++++++++++++++++++++---
>   4 files changed, 108 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index d08bb4a..78d7b06 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -302,6 +302,7 @@ struct kvm_vcpu_arch {
>   	} update_pte;
>
>   	struct fpu guest_fpu;
> +	uint64_t xcr0, host_xcr0;
>    

host_xcr0 can be a global.

>   /*
>    * Interruption-information format
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 99ae513..2ee8ff6 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -36,6 +36,8 @@
>   #include<asm/vmx.h>
>   #include<asm/virtext.h>
>   #include<asm/mce.h>
> +#include<asm/i387.h>
> +#include<asm/xcr.h>
>
>   #include "trace.h"
>
> @@ -2616,6 +2618,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>   	vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
>   	if (enable_ept)
>   		vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
> +	if (cpu_has_xsave)
> +		vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_OSXSAVE;
>    

First, we should only allow the guest to play with cr4.osxsave if 
guest_has_xsave in cpuid; otherwise we need to #GP if the guest sets 
it.  Second, it may be better to trap when the guest sets it (should be 
rare); this way, we only need to save/restore xcr0 if the guest has 
enabled cr4.osxsave.

>   	vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
>
>   	tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
> @@ -3354,6 +3358,29 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu)
>   	return 1;
>   }
>
> +static int handle_xsetbv(struct kvm_vcpu *vcpu)
> +{
> +	u64 new_bv = ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX)<<  32)) |
> +		kvm_register_read(vcpu, VCPU_REGS_RAX);
>    

I think you need to trim the upper 32 bits of rax.

Please introduce helpers for reading edx:eax into a u64 and vice versa.  
We can then use the helpers here and in the msr code.

> +
> +	if (kvm_register_read(vcpu, VCPU_REGS_RCX) != 0)
> +		goto err;
> +	if (vmx_get_cpl(vcpu) != 0)
> +		goto err;
> +	if (!(new_bv&  XSTATE_FP) ||
> +	     (new_bv&  ~vcpu->arch.host_xcr0))
> +		goto err;
> +	if ((new_bv&  XSTATE_YMM)&&  !(new_bv&  XSTATE_SSE))
> +		goto err;
>    

This is a little worrying.  What if a new bit is introduced later that 
depends on other bits?  We'll need to add a dependency between ZMM and 
YMM or whatever, and old versions will be broken.

So I think we need to check xcr0 not against host_xcr0 but instead 
against a whitelist of xcr0 bits that we know how to handle (currently 
fpu, see, and ymm).

> +	vcpu->arch.xcr0 = new_bv;
> +	xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
> +	skip_emulated_instruction(vcpu);
> +	return 1;
> +err:
> +	kvm_inject_gp(vcpu, 0);
> +	return 1;
> +}
> +
>    

> @@ -149,6 +150,11 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
>   	{ NULL }
>   };
>
> +static inline u32 bit(int bitno)
> +{
> +	return 1<<  (bitno&  31);
> +}
> +
>   static void kvm_on_user_return(struct user_return_notifier *urn)
>   {
>   	unsigned slot;
> @@ -473,6 +479,17 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
>   }
>   EXPORT_SYMBOL_GPL(kvm_lmsw);
>
> +static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_cpuid_entry2 *best;
> +
> +	best = kvm_find_cpuid_entry(vcpu, 1, 0);
> +	if (best->ecx&  bit(X86_FEATURE_XSAVE))
>    

Sanity:  if (best && ...)

> +		return true;
> +
> +	return false;
>    

Can avoid the if (): return best && (best->ecx & ...);

> +}
> +
>   int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
>   {
>   	unsigned long old_cr4 = kvm_read_cr4(vcpu);
> @@ -481,6 +498,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
>   	if (cr4&  CR4_RESERVED_BITS)
>   		return 1;
>
> +	if (!guest_cpuid_has_xsave(vcpu)&&  X86_CR4_OSXSAVE)
>    

s/&&.*//

> +		return 1;
> +
>   	if (is_long_mode(vcpu)) {
>   		if (!(cr4&  X86_CR4_PAE))
>    

>   			return 1;
>
> @@ -1887,6 +1902,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
>   	unsigned f_lm = 0;
>   #endif
>   	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
> +	unsigned f_xsave = cpu_has_xsave ? F(XSAVE) : 0;
>
>   	/* cpuid 1.edx */
>   	const u32 kvm_supported_word0_x86_features =
> @@ -1916,7 +1932,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
>   		0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
>   		0 /* Reserved, DCA */ | F(XMM4_1) |
>   		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
> -		0 /* Reserved, XSAVE, OSXSAVE */;
> +		0 /* Reserved, AES */ | f_xsave | 0 /* OSXSAVE */;
>    

Enough to put F(XSAVE) there, no?  The code should mask it out if not 
present, like XMM4_2.

>
> +static void kvm_update_cpuid(struct kvm_vcpu *vcpu,
> +			     struct kvm_cpuid_entry2 *best)
> +{
> +	/* Update OSXSAVE bit */
> +	if (cpu_has_xsave&&  best->function == 0x1) {
> +		best->ecx&= ~(bit(X86_FEATURE_OSXSAVE));
> +		if (kvm_read_cr4(vcpu)&  X86_CR4_OSXSAVE)
> +			best->ecx |= bit(X86_FEATURE_OSXSAVE);
> +	}
> +}
> +
>   void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
>   {
>   	u32 function, index;
> @@ -4389,6 +4430,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
>   	kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
>   	best = kvm_find_cpuid_entry(vcpu, function, index);
>   	if (best) {
> +		kvm_update_cpuid(vcpu, best);
>    

Slightly faster to do it at kvm_set_cr4() time.  Not sure it matters.

>   		kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
>   		kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
>   		kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
> @@ -5118,6 +5160,11 @@ void fx_init(struct kvm_vcpu *vcpu)
>   	fpu_alloc(&vcpu->arch.guest_fpu);
>   	fpu_finit(&vcpu->arch.guest_fpu);
>
> +	if (cpu_has_xsave) {
> +		vcpu->arch.host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
> +		vcpu->arch.xcr0 = vcpu->arch.host_xcr0;
>    

Should be initialized to the default value.

>   void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
>   {
>   	if (vcpu->guest_fpu_loaded)
>   		return;
>
>   	vcpu->guest_fpu_loaded = 1;
> +	if (cpu_has_xsave)
> +		vcpu->arch.host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
>    

Why read it every time?

>   	unlazy_fpu(current);
> +	/* Restore all possible states in the guest */
> +	if (cpu_has_xsave&&  guest_cpuid_has_xsave(vcpu))
> +		xsetbv(XCR_XFEATURE_ENABLED_MASK,
> +			cpuid_get_possible_xcr0(vcpu));
>   	fpu_restore_checking(&vcpu->arch.guest_fpu);
> +	if (cpu_has_xsave)
>    

if guest enabled xsave...

> +		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
>    

Need to do it on every entry, not just fpu reload, since xgetbv does not 
check cr0.ts.

Need to add save/restore support for xcrs.

Need to add save/restore support for xsave state.

Please send a test case for this (see qemu-kvm.git user/test/x86 for 
examples), to be run twice: once with -cpu host,-xsave and once with 
-cpu host,+xsave.

Things to check:

- Set cr4.xsave without cpuid.xsave -> #GP
- Set cr4.xsave with cpuid.xsave -> works, sets cr4.xsave, sets 
cpuid.osxsave
- clearing cr4.xsave
- xsetbv/xgetbv/xsave/xrstor with cr4.xsave enabled/disabled
- interdepdencies between xcr0 bits (fpu, sse, ymm), illegal 
combinations, illegal bits, illegal xcrs
- anything else you can think of...


-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] qemu-kvm: Enable xsave related CPUID
  2010-05-19  8:34 ` [PATCH] qemu-kvm: Enable xsave related CPUID Sheng Yang
@ 2010-05-19 16:58   ` Avi Kivity
  0 siblings, 0 replies; 8+ messages in thread
From: Avi Kivity @ 2010-05-19 16:58 UTC (permalink / raw)
  To: Sheng Yang; +Cc: Marcelo Tosatti, kvm, qemu-devel

On 05/19/2010 11:34 AM, Sheng Yang wrote:
> Signed-off-by: Sheng Yang<sheng@linux.intel.com>
> ---
>   target-i386/cpuid.c |   32 ++++++++++++++++++++++++++++++++
>    

Can send to Anthony directly, while tcg doesn't support xsave/ymm, all 
the code here is generic.

>   1 files changed, 32 insertions(+), 0 deletions(-)
>
> diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
> index eebf038..21e94f3 100644
> --- a/target-i386/cpuid.c
> +++ b/target-i386/cpuid.c
> @@ -1067,6 +1067,38 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>           *ecx = 0;
>           *edx = 0;
>           break;
> +    case 0xD:
> +        /* Processor Extended State */
> +        if (!(env->cpuid_ext_features&  CPUID_EXT_XSAVE)) {
> +            *eax = 0;
> +            *ebx = 0;
> +            *ecx = 0;
> +            *edx = 0;
> +            break;
> +        }
> +        if (count == 0) {
> +            *eax = 0x7;
> +            *ebx = 0x340;
> +            *ecx = 0x340;
> +            *edx = 0;
> +        } else if (count == 1) {
> +            /* eax = 1, so we can continue with others */
> +            *eax = 1;
> +            *ebx = 0;
> +            *ecx = 0;
> +            *edx = 0;
> +        } else if (count == 2) {
> +            *eax = 0x100;
> +            *ebx = 0x240;
> +            *ecx = 0;
> +            *edx = 0;
> +        } else {
> +            *eax = 0;
> +            *ebx = 0;
> +            *ecx = 0;
> +            *edx = 0;
> +        }
> +        break;
>    

Lots of magic numbers.  Symbolic constants or explanatory comments.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH][v3] KVM: VMX: Enable XSAVE/XRSTORE for guest
  2010-05-19 16:56 ` [PATCH v2] KVM: VMX: Enable XSAVE/XRSTORE for guest Avi Kivity
@ 2010-05-20  9:16   ` Sheng Yang
  2010-05-20  9:46     ` Avi Kivity
  0 siblings, 1 reply; 8+ messages in thread
From: Sheng Yang @ 2010-05-20  9:16 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Dexuan Cui, Sheng Yang

From: Dexuan Cui <dexuan.cui@intel.com>

Enable XSAVE/XRSTORE for guest.

Change from V2:
Addressed comments from Avi.

Change from V1:

1. Use FPU API.
2. Fix CPUID issue.
3. Save/restore all possible guest xstate fields when switching. Because we
don't know which fields guest has already touched.

Signed-off-by: Dexuan Cui <dexuan.cui@intel.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---

Avi, could you help to review this kernel patch first? Testcase and LM are in
progress now.

 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/include/asm/vmx.h      |    1 +
 arch/x86/kvm/vmx.c              |   28 ++++++++++++
 arch/x86/kvm/x86.c              |   88 +++++++++++++++++++++++++++++++++++---
 4 files changed, 111 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d08bb4a..3938bd1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -302,6 +302,7 @@ struct kvm_vcpu_arch {
 	} update_pte;
 
 	struct fpu guest_fpu;
+	u64 xcr0;
 
 	gva_t mmio_fault_cr2;
 	struct kvm_pio_request pio;
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 9e6779f..346ea66 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -266,6 +266,7 @@ enum vmcs_field {
 #define EXIT_REASON_EPT_VIOLATION       48
 #define EXIT_REASON_EPT_MISCONFIG       49
 #define EXIT_REASON_WBINVD		54
+#define EXIT_REASON_XSETBV		55
 
 /*
  * Interruption-information format
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 99ae513..a63f206 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -36,6 +36,8 @@
 #include <asm/vmx.h>
 #include <asm/virtext.h>
 #include <asm/mce.h>
+#include <asm/i387.h>
+#include <asm/xcr.h>
 
 #include "trace.h"
 
@@ -247,6 +249,9 @@ static const u32 vmx_msr_index[] = {
 };
 #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
 
+#define MERGE_TO_U64(low, high) \
+		(((low) & -1u) | ((u64)((high) & -1u) << 32))
+
 static inline bool is_page_fault(u32 intr_info)
 {
 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
@@ -3354,6 +3359,28 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static int handle_xsetbv(struct kvm_vcpu *vcpu)
+{
+	u64 new_bv = MERGE_TO_U64(kvm_register_read(vcpu, VCPU_REGS_RAX),
+		kvm_register_read(vcpu, VCPU_REGS_RDX));
+
+	if (kvm_register_read(vcpu, VCPU_REGS_RCX) != 0)
+		goto err;
+	if (vmx_get_cpl(vcpu) != 0)
+		goto err;
+	if (!(new_bv & XSTATE_FP))
+		goto err;
+	if ((new_bv & XSTATE_YMM) && !(new_bv & XSTATE_SSE))
+		goto err;
+	vcpu->arch.xcr0 = new_bv;
+	xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
+	skip_emulated_instruction(vcpu);
+	return 1;
+err:
+	kvm_inject_gp(vcpu, 0);
+	return 1;
+}
+
 static int handle_apic_access(struct kvm_vcpu *vcpu)
 {
 	return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE;
@@ -3632,6 +3659,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
 	[EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
 	[EXIT_REASON_WBINVD]                  = handle_wbinvd,
+	[EXIT_REASON_XSETBV]                  = handle_xsetbv,
 	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
 	[EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
 	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7be1d36..7580d14 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -64,6 +64,7 @@
 	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
 			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE	\
 			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR	\
+			  | X86_CR4_OSXSAVE \
 			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
@@ -149,6 +150,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ NULL }
 };
 
+static u64 host_xcr0;
+
+static inline u32 bit(int bitno)
+{
+	return 1 << (bitno & 31);
+}
+
 static void kvm_on_user_return(struct user_return_notifier *urn)
 {
 	unsigned slot;
@@ -473,6 +481,30 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 }
 EXPORT_SYMBOL_GPL(kvm_lmsw);
 
+static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 1, 0);
+	return best && (best->ecx & bit(X86_FEATURE_XSAVE));
+}
+
+static void update_cpuid(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 1, 0);
+	if (!best)
+		return;
+
+	/* Update OSXSAVE bit */
+	if (cpu_has_xsave && best->function == 0x1) {
+		best->ecx &= ~(bit(X86_FEATURE_OSXSAVE));
+		if (kvm_read_cr4(vcpu) & X86_CR4_OSXSAVE)
+			best->ecx |= bit(X86_FEATURE_OSXSAVE);
+	}
+}
+
 int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
 	unsigned long old_cr4 = kvm_read_cr4(vcpu);
@@ -481,6 +513,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	if (cr4 & CR4_RESERVED_BITS)
 		return 1;
 
+	if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
+		return 1;
+
 	if (is_long_mode(vcpu)) {
 		if (!(cr4 & X86_CR4_PAE))
 			return 1;
@@ -497,6 +532,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	if ((cr4 ^ old_cr4) & pdptr_bits)
 		kvm_mmu_reset_context(vcpu);
 
+	if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
+		update_cpuid(vcpu);
+
 	return 0;
 }
 
@@ -665,11 +703,6 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
 }
 EXPORT_SYMBOL_GPL(kvm_get_dr);
 
-static inline u32 bit(int bitno)
-{
-	return 1 << (bitno & 31);
-}
-
 /*
  * List of msr numbers which we expose to userspace through KVM_GET_MSRS
  * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
@@ -1916,7 +1949,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
 		0 /* Reserved, DCA */ | F(XMM4_1) |
 		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
-		0 /* Reserved, XSAVE, OSXSAVE */;
+		0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */;
 	/* cpuid 0x80000001.ecx */
 	const u32 kvm_supported_word6_x86_features =
 		F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
@@ -1931,7 +1964,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
 	switch (function) {
 	case 0:
-		entry->eax = min(entry->eax, (u32)0xb);
+		entry->eax = min(entry->eax, (u32)0xd);
 		break;
 	case 1:
 		entry->edx &= kvm_supported_word0_x86_features;
@@ -1989,6 +2022,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		}
 		break;
 	}
+	case 0xd: {
+		int i;
+
+		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+		for (i = 1; *nent < maxnent; ++i) {
+			if (entry[i - 1].eax == 0 && i != 2)
+				break;
+			do_cpuid_1_ent(&entry[i], function, i);
+			entry[i].flags |=
+			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+			++*nent;
+		}
+		break;
+	}
 	case KVM_CPUID_SIGNATURE: {
 		char signature[12] = "KVMKVMKVM\0\0";
 		u32 *sigptr = (u32 *)signature;
@@ -4124,6 +4171,8 @@ int kvm_arch_init(void *opaque)
 
 	perf_register_guest_info_callbacks(&kvm_guest_cbs);
 
+	host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+
 	return 0;
 
 out:
@@ -4567,6 +4616,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->prepare_guest_switch(vcpu);
 	if (vcpu->fpu_active)
 		kvm_load_guest_fpu(vcpu);
+	if (kvm_read_cr4(vcpu) & X86_CR4_OSXSAVE)
+		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
 
 	atomic_set(&vcpu->guest_mode, 1);
 	smp_wmb();
@@ -5118,6 +5169,10 @@ void fx_init(struct kvm_vcpu *vcpu)
 	fpu_alloc(&vcpu->arch.guest_fpu);
 	fpu_finit(&vcpu->arch.guest_fpu);
 
+	/* Ensure guest xcr0 is valid for loading */
+	if (cpu_has_xsave)
+		vcpu->arch.xcr0 = XSTATE_FP;
+
 	vcpu->arch.cr0 |= X86_CR0_ET;
 }
 EXPORT_SYMBOL_GPL(fx_init);
@@ -5127,6 +5182,14 @@ static void fx_free(struct kvm_vcpu *vcpu)
 	fpu_free(&vcpu->arch.guest_fpu);
 }
 
+static u64 cpuid_get_possible_xcr0(struct kvm_vcpu *vcpu)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
+	return eax + ((u64)edx << 32);
+}
+
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->guest_fpu_loaded)
@@ -5134,6 +5197,10 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 
 	vcpu->guest_fpu_loaded = 1;
 	unlazy_fpu(current);
+	/* Restore all possible states in the guest */
+	if (cpu_has_xsave && guest_cpuid_has_xsave(vcpu))
+		xsetbv(XCR_XFEATURE_ENABLED_MASK,
+			cpuid_get_possible_xcr0(vcpu));
 	fpu_restore_checking(&vcpu->arch.guest_fpu);
 	trace_kvm_fpu(1);
 }
@@ -5144,7 +5211,14 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 		return;
 
 	vcpu->guest_fpu_loaded = 0;
+	/* Save all possible states in the guest */
+	if (cpu_has_xsave && guest_cpuid_has_xsave(vcpu))
+		xsetbv(XCR_XFEATURE_ENABLED_MASK,
+			cpuid_get_possible_xcr0(vcpu));
 	fpu_save_init(&vcpu->arch.guest_fpu);
+	if (cpu_has_xsave)
+		xsetbv(XCR_XFEATURE_ENABLED_MASK,
+			host_xcr0);
 	++vcpu->stat.fpu_reload;
 	set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
 	trace_kvm_fpu(0);
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH][v3] KVM: VMX: Enable XSAVE/XRSTORE for guest
  2010-05-20  9:16   ` [PATCH][v3] " Sheng Yang
@ 2010-05-20  9:46     ` Avi Kivity
  2010-05-21  7:26       ` Sheng Yang
  0 siblings, 1 reply; 8+ messages in thread
From: Avi Kivity @ 2010-05-20  9:46 UTC (permalink / raw)
  To: Sheng Yang; +Cc: Marcelo Tosatti, kvm, Dexuan Cui

On 05/20/2010 12:16 PM, Sheng Yang wrote:
> From: Dexuan Cui<dexuan.cui@intel.com>
>
> Enable XSAVE/XRSTORE for guest.
>
> Change from V2:
> Addressed comments from Avi.
>
> Change from V1:
>
> 1. Use FPU API.
> 2. Fix CPUID issue.
> 3. Save/restore all possible guest xstate fields when switching. Because we
> don't know which fields guest has already touched.
>
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index d08bb4a..3938bd1 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -302,6 +302,7 @@ struct kvm_vcpu_arch {
>   	} update_pte;
>
>   	struct fpu guest_fpu;
> +	u64 xcr0;
>
>   	gva_t mmio_fault_cr2;
>   	struct kvm_pio_request pio;
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 9e6779f..346ea66 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -266,6 +266,7 @@ enum vmcs_field {
>   #define EXIT_REASON_EPT_VIOLATION       48
>   #define EXIT_REASON_EPT_MISCONFIG       49
>   #define EXIT_REASON_WBINVD		54
> +#define EXIT_REASON_XSETBV		55
>
>   /*
>    * Interruption-information format
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 99ae513..a63f206 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -36,6 +36,8 @@
>   #include<asm/vmx.h>
>   #include<asm/virtext.h>
>   #include<asm/mce.h>
> +#include<asm/i387.h>
> +#include<asm/xcr.h>
>
>   #include "trace.h"
>
> @@ -247,6 +249,9 @@ static const u32 vmx_msr_index[] = {
>   };
>   #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
>
> +#define MERGE_TO_U64(low, high) \
> +		(((low)&  -1u) | ((u64)((high)&  -1u)<<  32))
> +
>    

static inline u64 kvm_read_edx_eax(vcpu) in cache_regs.h

>
> +static int handle_xsetbv(struct kvm_vcpu *vcpu)
> +{
> +	u64 new_bv = MERGE_TO_U64(kvm_register_read(vcpu, VCPU_REGS_RAX),
> +		kvm_register_read(vcpu, VCPU_REGS_RDX));
> +
> +	if (kvm_register_read(vcpu, VCPU_REGS_RCX) != 0)
> +		goto err;
> +	if (vmx_get_cpl(vcpu) != 0)
> +		goto err;
> +	if (!(new_bv&  XSTATE_FP))
> +		goto err;
> +	if ((new_bv&  XSTATE_YMM)&&  !(new_bv&  XSTATE_SSE))
> +		goto err;
>    

What about a check against unknown bits?

> +	vcpu->arch.xcr0 = new_bv;
> +	xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
> +	skip_emulated_instruction(vcpu);
> +	return 1;
> +err:
> +	kvm_inject_gp(vcpu, 0);
> +	return 1;
> +}
> +
>   static int handle_apic_access(struct kvm_vcpu *vcpu)
>   {
>   	return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE;
>
>
> +static u64 host_xcr0;
>    

__read_mostly.

> +
> +static void update_cpuid(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_cpuid_entry2 *best;
> +
> +	best = kvm_find_cpuid_entry(vcpu, 1, 0);
> +	if (!best)
> +		return;
> +
> +	/* Update OSXSAVE bit */
> +	if (cpu_has_xsave&&  best->function == 0x1) {
> +		best->ecx&= ~(bit(X86_FEATURE_OSXSAVE));
> +		if (kvm_read_cr4(vcpu)&  X86_CR4_OSXSAVE)
> +			best->ecx |= bit(X86_FEATURE_OSXSAVE);
> +	}
> +}
>    

Note: need to update after userspace writes cpuid as well.

> +
>   int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
>   {
>   	unsigned long old_cr4 = kvm_read_cr4(vcpu);
> @@ -481,6 +513,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
>   	if (cr4&  CR4_RESERVED_BITS)
>   		return 1;
>
> +	if (!guest_cpuid_has_xsave(vcpu)&&  (cr4&  X86_CR4_OSXSAVE))
> +		return 1;
> +
>   	if (is_long_mode(vcpu)) {
>   		if (!(cr4&  X86_CR4_PAE))
>   			return 1;
> @@ -497,6 +532,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
>   	if ((cr4 ^ old_cr4)&  pdptr_bits)
>   		kvm_mmu_reset_context(vcpu);
>
> +	if ((cr4 ^ old_cr4)&  X86_CR4_OSXSAVE)
> +		update_cpuid(vcpu);
> +
>    

I think we need to reload the guest's xcr0 at this point.  
Alternatively, call vmx_load_host_state() to ensure the the next entry 
will reload it.

> @@ -1931,7 +1964,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
>
>   	switch (function) {
>   	case 0:
> -		entry->eax = min(entry->eax, (u32)0xb);
> +		entry->eax = min(entry->eax, (u32)0xd);
>    

Do we need any special handling for leaf 0xc?

> @@ -4567,6 +4616,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>   	kvm_x86_ops->prepare_guest_switch(vcpu);
>   	if (vcpu->fpu_active)
>   		kvm_load_guest_fpu(vcpu);
> +	if (kvm_read_cr4(vcpu)&  X86_CR4_OSXSAVE)
> +		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
>    

Better done in vmx_save_host_state(), so we only do it on context 
switches or entries from userspace.

kvm_read_cr4_bits() is faster - doesn't need a vmcs_readl().

>
>   	atomic_set(&vcpu->guest_mode, 1);
>   	smp_wmb();
> @@ -5118,6 +5169,10 @@ void fx_init(struct kvm_vcpu *vcpu)
>   	fpu_alloc(&vcpu->arch.guest_fpu);
>   	fpu_finit(&vcpu->arch.guest_fpu);
>
> +	/* Ensure guest xcr0 is valid for loading */
> +	if (cpu_has_xsave)
> +		vcpu->arch.xcr0 = XSTATE_FP;
> +
>    

Can do it unconditionally, not that it matters much.

>   void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
>   {
>   	if (vcpu->guest_fpu_loaded)
> @@ -5134,6 +5197,10 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
>
>   	vcpu->guest_fpu_loaded = 1;
>   	unlazy_fpu(current);
> +	/* Restore all possible states in the guest */
> +	if (cpu_has_xsave&&  guest_cpuid_has_xsave(vcpu))
> +		xsetbv(XCR_XFEATURE_ENABLED_MASK,
> +			cpuid_get_possible_xcr0(vcpu));
>    

Best to calculate it out of the fast path, when guest cpuid is set.  
Need to check it at this time as well.

Also can avoid it if guest xcr0 == host xcr0.

>   	fpu_restore_checking(&vcpu->arch.guest_fpu);
>   	trace_kvm_fpu(1);
>   }
> @@ -5144,7 +5211,14 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
>   		return;
>
>   	vcpu->guest_fpu_loaded = 0;
> +	/* Save all possible states in the guest */
> +	if (cpu_has_xsave&&  guest_cpuid_has_xsave(vcpu))
> +		xsetbv(XCR_XFEATURE_ENABLED_MASK,
> +			cpuid_get_possible_xcr0(vcpu));
>    

Ditto.

>   	fpu_save_init(&vcpu->arch.guest_fpu);
> +	if (cpu_has_xsave)
> +		xsetbv(XCR_XFEATURE_ENABLED_MASK,
> +			host_xcr0);
>   	++vcpu->stat.fpu_reload;
>   	set_bit(KVM_REQ_DEACTIVATE_FPU,&vcpu->requests);
>   	trace_kvm_fpu(0);
>    


-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH][v3] KVM: VMX: Enable XSAVE/XRSTORE for guest
  2010-05-20  9:46     ` Avi Kivity
@ 2010-05-21  7:26       ` Sheng Yang
  2010-05-21  8:56         ` Avi Kivity
  0 siblings, 1 reply; 8+ messages in thread
From: Sheng Yang @ 2010-05-21  7:26 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Dexuan Cui

On Thursday 20 May 2010 17:46:40 Avi Kivity wrote:
> On 05/20/2010 12:16 PM, Sheng Yang wrote:
> > From: Dexuan Cui<dexuan.cui@intel.com>
> > 
> > Enable XSAVE/XRSTORE for guest.
> > 
> > Change from V2:
> > Addressed comments from Avi.
> > 
> > Change from V1:
> > 
> > 1. Use FPU API.
> > 2. Fix CPUID issue.
> > 3. Save/restore all possible guest xstate fields when switching. Because
> > we don't know which fields guest has already touched.
> > 
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h
> > b/arch/x86/include/asm/kvm_host.h index d08bb4a..3938bd1 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -302,6 +302,7 @@ struct kvm_vcpu_arch {
> > 
> >   	} update_pte;
> >   	
> >   	struct fpu guest_fpu;
> > 
> > +	u64 xcr0;
> > 
> >   	gva_t mmio_fault_cr2;
> >   	struct kvm_pio_request pio;
> > 
> > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> > index 9e6779f..346ea66 100644
> > --- a/arch/x86/include/asm/vmx.h
> > +++ b/arch/x86/include/asm/vmx.h
> > @@ -266,6 +266,7 @@ enum vmcs_field {
> > 
> >   #define EXIT_REASON_EPT_VIOLATION       48
> >   #define EXIT_REASON_EPT_MISCONFIG       49
> >   #define EXIT_REASON_WBINVD		54
> > 
> > +#define EXIT_REASON_XSETBV		55
> > 
> >   /*
> >   
> >    * Interruption-information format
> > 
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index 99ae513..a63f206 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -36,6 +36,8 @@
> > 
> >   #include<asm/vmx.h>
> >   #include<asm/virtext.h>
> >   #include<asm/mce.h>
> > 
> > +#include<asm/i387.h>
> > +#include<asm/xcr.h>
> > 
> >   #include "trace.h"
> > 
> > @@ -247,6 +249,9 @@ static const u32 vmx_msr_index[] = {
> > 
> >   };
> >   #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
> > 
> > +#define MERGE_TO_U64(low, high) \
> > +		(((low)&  -1u) | ((u64)((high)&  -1u)<<  32))
> > +
> 
> static inline u64 kvm_read_edx_eax(vcpu) in cache_regs.h
> 
> > +static int handle_xsetbv(struct kvm_vcpu *vcpu)
> > +{
> > +	u64 new_bv = MERGE_TO_U64(kvm_register_read(vcpu, VCPU_REGS_RAX),
> > +		kvm_register_read(vcpu, VCPU_REGS_RDX));
> > +
> > +	if (kvm_register_read(vcpu, VCPU_REGS_RCX) != 0)
> > +		goto err;
> > +	if (vmx_get_cpl(vcpu) != 0)
> > +		goto err;
> > +	if (!(new_bv&  XSTATE_FP))
> > +		goto err;
> > +	if ((new_bv&  XSTATE_YMM)&&  !(new_bv&  XSTATE_SSE))
> > +		goto err;
> 
> What about a check against unknown bits?
> 
> > +	vcpu->arch.xcr0 = new_bv;
> > +	xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
> > +	skip_emulated_instruction(vcpu);
> > +	return 1;
> > +err:
> > +	kvm_inject_gp(vcpu, 0);
> > +	return 1;
> > +}
> > +
> > 
> >   static int handle_apic_access(struct kvm_vcpu *vcpu)
> >   {
> >   
> >   	return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE;
> > 
> > +static u64 host_xcr0;
> 
> __read_mostly.
> 
> > +
> > +static void update_cpuid(struct kvm_vcpu *vcpu)
> > +{
> > +	struct kvm_cpuid_entry2 *best;
> > +
> > +	best = kvm_find_cpuid_entry(vcpu, 1, 0);
> > +	if (!best)
> > +		return;
> > +
> > +	/* Update OSXSAVE bit */
> > +	if (cpu_has_xsave&&  best->function == 0x1) {
> > +		best->ecx&= ~(bit(X86_FEATURE_OSXSAVE));
> > +		if (kvm_read_cr4(vcpu)&  X86_CR4_OSXSAVE)
> > +			best->ecx |= bit(X86_FEATURE_OSXSAVE);
> > +	}
> > +}
> 
> Note: need to update after userspace writes cpuid as well.

Not quite understand. Userspace set OSXSAVE should be trimmed IMO...
> 
> > +
> > 
> >   int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
> >   {
> >   
> >   	unsigned long old_cr4 = kvm_read_cr4(vcpu);
> > 
> > @@ -481,6 +513,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned
> > long cr4)
> > 
> >   	if (cr4&  CR4_RESERVED_BITS)
> >   	
> >   		return 1;
> > 
> > +	if (!guest_cpuid_has_xsave(vcpu)&&  (cr4&  X86_CR4_OSXSAVE))
> > +		return 1;
> > +
> > 
> >   	if (is_long_mode(vcpu)) {
> >   	
> >   		if (!(cr4&  X86_CR4_PAE))
> >   		
> >   			return 1;
> > 
> > @@ -497,6 +532,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned
> > long cr4)
> > 
> >   	if ((cr4 ^ old_cr4)&  pdptr_bits)
> >   	
> >   		kvm_mmu_reset_context(vcpu);
> > 
> > +	if ((cr4 ^ old_cr4)&  X86_CR4_OSXSAVE)
> > +		update_cpuid(vcpu);
> > +
> 
> I think we need to reload the guest's xcr0 at this point.
> Alternatively, call vmx_load_host_state() to ensure the the next entry
> will reload it.

Current xcr0 would be loaded when next vmentry.

And if we use prepare_guest_switch(), how about SVM?

> 
> > @@ -1931,7 +1964,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2
> > *entry, u32 function,
> > 
> >   	switch (function) {
> > 
> >   	case 0:
> > -		entry->eax = min(entry->eax, (u32)0xb);
> > +		entry->eax = min(entry->eax, (u32)0xd);
> 
> Do we need any special handling for leaf 0xc?

Don't think so. CPUID would return all 0 for it.
> 
> > @@ -4567,6 +4616,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> > 
> >   	kvm_x86_ops->prepare_guest_switch(vcpu);
> >   	if (vcpu->fpu_active)
> >   	
> >   		kvm_load_guest_fpu(vcpu);
> > 
> > +	if (kvm_read_cr4(vcpu)&  X86_CR4_OSXSAVE)
> > +		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
> 
> Better done in vmx_save_host_state(), so we only do it on context
> switches or entries from userspace.
> 
> kvm_read_cr4_bits() is faster - doesn't need a vmcs_readl().
> 
> >   	atomic_set(&vcpu->guest_mode, 1);
> >   	smp_wmb();
> > 
> > @@ -5118,6 +5169,10 @@ void fx_init(struct kvm_vcpu *vcpu)
> > 
> >   	fpu_alloc(&vcpu->arch.guest_fpu);
> >   	fpu_finit(&vcpu->arch.guest_fpu);
> > 
> > +	/* Ensure guest xcr0 is valid for loading */
> > +	if (cpu_has_xsave)
> > +		vcpu->arch.xcr0 = XSTATE_FP;
> > +
> 
> Can do it unconditionally, not that it matters much.
> 
> >   void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
> >   {
> >   
> >   	if (vcpu->guest_fpu_loaded)
> > 
> > @@ -5134,6 +5197,10 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
> > 
> >   	vcpu->guest_fpu_loaded = 1;
> >   	unlazy_fpu(current);
> > 
> > +	/* Restore all possible states in the guest */
> > +	if (cpu_has_xsave&&  guest_cpuid_has_xsave(vcpu))
> > +		xsetbv(XCR_XFEATURE_ENABLED_MASK,
> > +			cpuid_get_possible_xcr0(vcpu));
> 
> Best to calculate it out of the fast path, when guest cpuid is set.
> Need to check it at this time as well.

You mean guest_cpuid_has_xsave()? Not quite understand the point here...

> 
> Also can avoid it if guest xcr0 == host xcr0.

I don't know the assumption that "host use all possible xcr0 bits" can apply. If 
so, only use host_xcr0 should be fine.

Would update other points. Thanks.
--
regards
Yang, Sheng
> 
> >   	fpu_restore_checking(&vcpu->arch.guest_fpu);
> >   	trace_kvm_fpu(1);
> >   
> >   }
> > 
> > @@ -5144,7 +5211,14 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
> > 
> >   		return;
> >   	
> >   	vcpu->guest_fpu_loaded = 0;
> > 
> > +	/* Save all possible states in the guest */
> > +	if (cpu_has_xsave&&  guest_cpuid_has_xsave(vcpu))
> > +		xsetbv(XCR_XFEATURE_ENABLED_MASK,
> > +			cpuid_get_possible_xcr0(vcpu));
> 
> Ditto.
> 
> >   	fpu_save_init(&vcpu->arch.guest_fpu);
> > 
> > +	if (cpu_has_xsave)
> > +		xsetbv(XCR_XFEATURE_ENABLED_MASK,
> > +			host_xcr0);
> > 
> >   	++vcpu->stat.fpu_reload;
> >   	set_bit(KVM_REQ_DEACTIVATE_FPU,&vcpu->requests);
> >   	trace_kvm_fpu(0);

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH][v3] KVM: VMX: Enable XSAVE/XRSTORE for guest
  2010-05-21  7:26       ` Sheng Yang
@ 2010-05-21  8:56         ` Avi Kivity
  0 siblings, 0 replies; 8+ messages in thread
From: Avi Kivity @ 2010-05-21  8:56 UTC (permalink / raw)
  To: Sheng Yang; +Cc: Marcelo Tosatti, kvm, Dexuan Cui

On 05/21/2010 10:26 AM, Sheng Yang wrote:
>>
>>> +
>>> +static void update_cpuid(struct kvm_vcpu *vcpu)
>>> +{
>>> +	struct kvm_cpuid_entry2 *best;
>>> +
>>> +	best = kvm_find_cpuid_entry(vcpu, 1, 0);
>>> +	if (!best)
>>> +		return;
>>> +
>>> +	/* Update OSXSAVE bit */
>>> +	if (cpu_has_xsave&&   best->function == 0x1) {
>>> +		best->ecx&= ~(bit(X86_FEATURE_OSXSAVE));
>>> +		if (kvm_read_cr4(vcpu)&   X86_CR4_OSXSAVE)
>>> +			best->ecx |= bit(X86_FEATURE_OSXSAVE);
>>> +	}
>>> +}
>>>        
>> Note: need to update after userspace writes cpuid as well.
>>      
> Not quite understand. Userspace set OSXSAVE should be trimmed IMO...
>    

Two cases: userspace does KVM_SET_CPUID2 with osxsave set but cr4.xsave 
clear, or the other way round.

So we should set cpuid.osxsave depending to cr4.xsave whenever cr4 OR 
cpuid is modified, and completely ignore userspace setting for that bit.

>>> @@ -497,6 +532,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned
>>> long cr4)
>>>
>>>    	if ((cr4 ^ old_cr4)&   pdptr_bits)
>>>    	
>>>    		kvm_mmu_reset_context(vcpu);
>>>
>>> +	if ((cr4 ^ old_cr4)&   X86_CR4_OSXSAVE)
>>> +		update_cpuid(vcpu);
>>> +
>>>        
>> I think we need to reload the guest's xcr0 at this point.
>> Alternatively, call vmx_load_host_state() to ensure the the next entry
>> will reload it.
>>      
> Current xcr0 would be loaded when next vmentry.
>    

True.

> And if we use prepare_guest_switch(), how about SVM?
>    

kvm_arch_vcpu_load() looks like a good place, as long as interrupts 
don't use the fpu.

>>
>>> @@ -5134,6 +5197,10 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
>>>
>>>    	vcpu->guest_fpu_loaded = 1;
>>>    	unlazy_fpu(current);
>>>
>>> +	/* Restore all possible states in the guest */
>>> +	if (cpu_has_xsave&&   guest_cpuid_has_xsave(vcpu))
>>> +		xsetbv(XCR_XFEATURE_ENABLED_MASK,
>>> +			cpuid_get_possible_xcr0(vcpu));
>>>        
>> Best to calculate it out of the fast path, when guest cpuid is set.
>> Need to check it at this time as well.
>>      
> You mean guest_cpuid_has_xsave()? Not quite understand the point here...
>    

Also cpuid_get_possible_cr0().  So we have something like

    if (vcpu->save_xcr0)
        xsetbv(vcpu->save_xcr0);

Those cpuid functions have loops, we don't want them running every 
context switch.

>> Also can avoid it if guest xcr0 == host xcr0.
>>      
> I don't know the assumption that "host use all possible xcr0 bits" can apply. If
> so, only use host_xcr0 should be fine.
>    

I think we can rely on it.  Those bits are a service to userspace and 
the guest is just a different kind of userspace, so it makes sense to 
expose the same set.

> Would update other points. Thanks.
>    

Thanks.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2010-05-21  8:56 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-19  8:34 [PATCH v2] KVM: VMX: Enable XSAVE/XRSTORE for guest Sheng Yang
2010-05-19  8:34 ` [PATCH] qemu-kvm: Enable xsave related CPUID Sheng Yang
2010-05-19 16:58   ` Avi Kivity
2010-05-19 16:56 ` [PATCH v2] KVM: VMX: Enable XSAVE/XRSTORE for guest Avi Kivity
2010-05-20  9:16   ` [PATCH][v3] " Sheng Yang
2010-05-20  9:46     ` Avi Kivity
2010-05-21  7:26       ` Sheng Yang
2010-05-21  8:56         ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox