qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Avi Kivity <avi@redhat.com>
To: Sheng Yang <sheng@linux.intel.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>,
	qemu-devel@nongnu.org, kvm@vger.kernel.org,
	Dexuan Cui <dexuan.cui@intel.com>
Subject: [Qemu-devel] Re: [PATCH v2] KVM: VMX: Enable XSAVE/XRSTORE for guest
Date: Wed, 19 May 2010 19:56:06 +0300	[thread overview]
Message-ID: <4BF41826.8070706@redhat.com> (raw)
In-Reply-To: <1274258090-12247-1-git-send-email-sheng@linux.intel.com>

On 05/19/2010 11:34 AM, Sheng Yang wrote:
> From: Dexuan Cui<dexuan.cui@intel.com>
>
> Enable XSAVE/XRSTORE for guest.
>
> Change from V1:
>
> 1. Use FPU API.
> 2. Fix CPUID issue.
> 3. Save/restore all possible guest xstate fields when switching. Because we
> don't know which fields guest has already touched.
>
> Signed-off-by: Dexuan Cui<dexuan.cui@intel.com>
> Signed-off-by: Sheng Yang<sheng@linux.intel.com>
> ---
>   arch/x86/include/asm/kvm_host.h |    1 +
>   arch/x86/include/asm/vmx.h      |    1 +
>   arch/x86/kvm/vmx.c              |   28 +++++++++++++
>   arch/x86/kvm/x86.c              |   85 +++++++++++++++++++++++++++++++++++---
>   4 files changed, 108 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index d08bb4a..78d7b06 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -302,6 +302,7 @@ struct kvm_vcpu_arch {
>   	} update_pte;
>
>   	struct fpu guest_fpu;
> +	uint64_t xcr0, host_xcr0;
>    

host_xcr0 can be a global.

>   /*
>    * Interruption-information format
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 99ae513..2ee8ff6 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -36,6 +36,8 @@
>   #include<asm/vmx.h>
>   #include<asm/virtext.h>
>   #include<asm/mce.h>
> +#include<asm/i387.h>
> +#include<asm/xcr.h>
>
>   #include "trace.h"
>
> @@ -2616,6 +2618,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>   	vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
>   	if (enable_ept)
>   		vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
> +	if (cpu_has_xsave)
> +		vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_OSXSAVE;
>    

First, we should only allow the guest to play with cr4.osxsave if 
guest_has_xsave in cpuid; otherwise we need to #GP if the guest sets 
it.  Second, it may be better to trap when the guest sets it (should be 
rare); this way, we only need to save/restore xcr0 if the guest has 
enabled cr4.osxsave.

>   	vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
>
>   	tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
> @@ -3354,6 +3358,29 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu)
>   	return 1;
>   }
>
> +static int handle_xsetbv(struct kvm_vcpu *vcpu)
> +{
> +	u64 new_bv = ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX)<<  32)) |
> +		kvm_register_read(vcpu, VCPU_REGS_RAX);
>    

I think you need to trim the upper 32 bits of rax.

Please introduce helpers for reading edx:eax into a u64 and vice versa.  
We can then use the helpers here and in the msr code.

> +
> +	if (kvm_register_read(vcpu, VCPU_REGS_RCX) != 0)
> +		goto err;
> +	if (vmx_get_cpl(vcpu) != 0)
> +		goto err;
> +	if (!(new_bv&  XSTATE_FP) ||
> +	     (new_bv&  ~vcpu->arch.host_xcr0))
> +		goto err;
> +	if ((new_bv&  XSTATE_YMM)&&  !(new_bv&  XSTATE_SSE))
> +		goto err;
>    

This is a little worrying.  What if a new bit is introduced later that 
depends on other bits?  We'll need to add a dependency between ZMM and 
YMM or whatever, and old versions will be broken.

So I think we need to check xcr0 not against host_xcr0 but instead 
against a whitelist of xcr0 bits that we know how to handle (currently 
fpu, see, and ymm).

> +	vcpu->arch.xcr0 = new_bv;
> +	xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
> +	skip_emulated_instruction(vcpu);
> +	return 1;
> +err:
> +	kvm_inject_gp(vcpu, 0);
> +	return 1;
> +}
> +
>    

> @@ -149,6 +150,11 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
>   	{ NULL }
>   };
>
> +static inline u32 bit(int bitno)
> +{
> +	return 1<<  (bitno&  31);
> +}
> +
>   static void kvm_on_user_return(struct user_return_notifier *urn)
>   {
>   	unsigned slot;
> @@ -473,6 +479,17 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
>   }
>   EXPORT_SYMBOL_GPL(kvm_lmsw);
>
> +static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_cpuid_entry2 *best;
> +
> +	best = kvm_find_cpuid_entry(vcpu, 1, 0);
> +	if (best->ecx&  bit(X86_FEATURE_XSAVE))
>    

Sanity:  if (best && ...)

> +		return true;
> +
> +	return false;
>    

Can avoid the if (): return best && (best->ecx & ...);

> +}
> +
>   int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
>   {
>   	unsigned long old_cr4 = kvm_read_cr4(vcpu);
> @@ -481,6 +498,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
>   	if (cr4&  CR4_RESERVED_BITS)
>   		return 1;
>
> +	if (!guest_cpuid_has_xsave(vcpu)&&  X86_CR4_OSXSAVE)
>    

s/&&.*//

> +		return 1;
> +
>   	if (is_long_mode(vcpu)) {
>   		if (!(cr4&  X86_CR4_PAE))
>    

>   			return 1;
>
> @@ -1887,6 +1902,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
>   	unsigned f_lm = 0;
>   #endif
>   	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
> +	unsigned f_xsave = cpu_has_xsave ? F(XSAVE) : 0;
>
>   	/* cpuid 1.edx */
>   	const u32 kvm_supported_word0_x86_features =
> @@ -1916,7 +1932,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
>   		0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
>   		0 /* Reserved, DCA */ | F(XMM4_1) |
>   		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
> -		0 /* Reserved, XSAVE, OSXSAVE */;
> +		0 /* Reserved, AES */ | f_xsave | 0 /* OSXSAVE */;
>    

Enough to put F(XSAVE) there, no?  The code should mask it out if not 
present, like XMM4_2.

>
> +static void kvm_update_cpuid(struct kvm_vcpu *vcpu,
> +			     struct kvm_cpuid_entry2 *best)
> +{
> +	/* Update OSXSAVE bit */
> +	if (cpu_has_xsave&&  best->function == 0x1) {
> +		best->ecx&= ~(bit(X86_FEATURE_OSXSAVE));
> +		if (kvm_read_cr4(vcpu)&  X86_CR4_OSXSAVE)
> +			best->ecx |= bit(X86_FEATURE_OSXSAVE);
> +	}
> +}
> +
>   void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
>   {
>   	u32 function, index;
> @@ -4389,6 +4430,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
>   	kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
>   	best = kvm_find_cpuid_entry(vcpu, function, index);
>   	if (best) {
> +		kvm_update_cpuid(vcpu, best);
>    

Slightly faster to do it at kvm_set_cr4() time.  Not sure it matters.

>   		kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
>   		kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
>   		kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
> @@ -5118,6 +5160,11 @@ void fx_init(struct kvm_vcpu *vcpu)
>   	fpu_alloc(&vcpu->arch.guest_fpu);
>   	fpu_finit(&vcpu->arch.guest_fpu);
>
> +	if (cpu_has_xsave) {
> +		vcpu->arch.host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
> +		vcpu->arch.xcr0 = vcpu->arch.host_xcr0;
>    

Should be initialized to the default value.

>   void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
>   {
>   	if (vcpu->guest_fpu_loaded)
>   		return;
>
>   	vcpu->guest_fpu_loaded = 1;
> +	if (cpu_has_xsave)
> +		vcpu->arch.host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
>    

Why read it every time?

>   	unlazy_fpu(current);
> +	/* Restore all possible states in the guest */
> +	if (cpu_has_xsave&&  guest_cpuid_has_xsave(vcpu))
> +		xsetbv(XCR_XFEATURE_ENABLED_MASK,
> +			cpuid_get_possible_xcr0(vcpu));
>   	fpu_restore_checking(&vcpu->arch.guest_fpu);
> +	if (cpu_has_xsave)
>    

if guest enabled xsave...

> +		xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
>    

Need to do it on every entry, not just fpu reload, since xgetbv does not 
check cr0.ts.

Need to add save/restore support for xcrs.

Need to add save/restore support for xsave state.

Please send a test case for this (see qemu-kvm.git user/test/x86 for 
examples), to be run twice: once with -cpu host,-xsave and once with 
-cpu host,+xsave.

Things to check:

- Set cr4.xsave without cpuid.xsave -> #GP
- Set cr4.xsave with cpuid.xsave -> works, sets cr4.xsave, sets 
cpuid.osxsave
- clearing cr4.xsave
- xsetbv/xgetbv/xsave/xrstor with cr4.xsave enabled/disabled
- interdepdencies between xcr0 bits (fpu, sse, ymm), illegal 
combinations, illegal bits, illegal xcrs
- anything else you can think of...


-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.

      parent reply	other threads:[~2010-05-19 16:56 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-05-19  8:34 [Qemu-devel] [PATCH v2] KVM: VMX: Enable XSAVE/XRSTORE for guest Sheng Yang
2010-05-19  8:34 ` [Qemu-devel] [PATCH] qemu-kvm: Enable xsave related CPUID Sheng Yang
2010-05-19 16:58   ` [Qemu-devel] " Avi Kivity
2010-05-19 16:56 ` Avi Kivity [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4BF41826.8070706@redhat.com \
    --to=avi@redhat.com \
    --cc=dexuan.cui@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=sheng@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).