From mboxrd@z Thu Jan 1 00:00:00 1970 From: Avi Kivity Subject: Re: [PATCH 06/27] nVMX: Implement reading and writing of VMX MSRs Date: Sun, 17 Oct 2010 14:52:10 +0200 Message-ID: <4CBAF17A.10000@redhat.com> References: <1287309814-nyh@il.ibm.com> <201010171006.o9HA6crb029359@rice.haifa.ibm.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Cc: kvm@vger.kernel.org, gleb@redhat.com To: "Nadav Har'El" Return-path: Received: from mx1.redhat.com ([209.132.183.28]:62081 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753474Ab0JQMwP (ORCPT ); Sun, 17 Oct 2010 08:52:15 -0400 In-Reply-To: <201010171006.o9HA6crb029359@rice.haifa.ibm.com> Sender: kvm-owner@vger.kernel.org List-ID: On 10/17/2010 12:06 PM, Nadav Har'El wrote: > When the guest can use VMX instructions (when the "nested" module option is > on), it should also be able to read and write VMX MSRs, e.g., to query about > VMX capabilities. This patch adds this support. > > Signed-off-by: Nadav Har'El > --- > arch/x86/kvm/vmx.c | 117 +++++++++++++++++++++++++++++++++++++++++++ > arch/x86/kvm/x86.c | 6 +- > 2 files changed, 122 insertions(+), 1 deletion(-) > > --- .before/arch/x86/kvm/x86.c 2010-10-17 11:52:00.000000000 +0200 > +++ .after/arch/x86/kvm/x86.c 2010-10-17 11:52:00.000000000 +0200 > @@ -789,7 +789,11 @@ static u32 msrs_to_save[] = { > #ifdef CONFIG_X86_64 > MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, > #endif > - MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA > + MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, > + MSR_IA32_FEATURE_CONTROL, MSR_IA32_VMX_BASIC, > + MSR_IA32_VMX_PINBASED_CTLS, MSR_IA32_VMX_PROCBASED_CTLS, > + MSR_IA32_VMX_EXIT_CTLS, MSR_IA32_VMX_ENTRY_CTLS, > + MSR_IA32_VMX_PROCBASED_CTLS2, MSR_IA32_VMX_EPT_VPID_CAP, > }; These MSRs are read-only by the guest (except FEATURE_CONTROL). No need to save/restore them. > > static unsigned num_msrs_to_save; > --- .before/arch/x86/kvm/vmx.c 2010-10-17 11:52:00.000000000 +0200 > +++ .after/arch/x86/kvm/vmx.c 2010-10-17 11:52:00.000000000 +0200 > @@ -1216,6 +1216,119 @@ static void vmx_adjust_tsc_offset(struct > } > > /* > + * If we allow our guest to use VMX instructions (i.e., nested VMX), we should > + * also let it use VMX-specific MSRs. > + * vmx_get_vmx_msr() and vmx_set_vmx_msr() return 0 when we handled a > + * VMX-specific MSR, or 1 when we haven't (and the caller should handled it > + * like all other MSRs). > + */ > +static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) > +{ > + u64 vmx_msr = 0; > + u32 vmx_msr_high, vmx_msr_low; > + > + switch (msr_index) { > + case MSR_IA32_FEATURE_CONTROL: > + *pdata = 0; > + break; > + case MSR_IA32_VMX_BASIC: > + /* > + * This MSR reports some information about VMX support of the > + * processor. We should return information about the VMX we > + * emulate for the guest, and the VMCS structure we give it - > + * not about the VMX support of the underlying hardware. > + * However, some capabilities of the underlying hardware are > + * used directly by our emulation (e.g., the physical address > + * width), so these are copied from what the hardware reports. > + */ > + *pdata = VMCS12_REVISION | (((u64)sizeof(struct vmcs12))<< 32); Let's reserve 4K unconditionally to avoid future complications. > + rdmsrl(MSR_IA32_VMX_BASIC, vmx_msr); > +#define VMX_BASIC_64 0x0001000000000000LLU > +#define VMX_BASIC_MEM_TYPE 0x003c000000000000LLU > +#define VMX_BASIC_INOUT 0x0040000000000000LLU Please move the defines to vmx.h (or msr-index.h). > + *pdata |= vmx_msr& > + (VMX_BASIC_64 | VMX_BASIC_MEM_TYPE | VMX_BASIC_INOUT); I don't see why we need the real data here. Nothing prevents us from supporting 64-bit physical addresses on 32-bit hosts (so long as we use gpa_t for addresses; ditto for MEM_TYPE and INOUT. It's helpful to have fixed values here to remove obstacles to live migration. > + break; > +#define CORE2_PINBASED_CTLS_MUST_BE_ONE 0x00000016 Please use the bit names instead. > +#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x48d msr-index.h > + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: > + case MSR_IA32_VMX_PINBASED_CTLS: > + vmx_msr_low = CORE2_PINBASED_CTLS_MUST_BE_ONE; > + vmx_msr_high = CORE2_PINBASED_CTLS_MUST_BE_ONE | > + PIN_BASED_EXT_INTR_MASK | > + PIN_BASED_NMI_EXITING | > + PIN_BASED_VIRTUAL_NMIS; > + *pdata = vmx_msr_low | ((u64)vmx_msr_high<< 32); > + break; > + case MSR_IA32_VMX_PROCBASED_CTLS: > + /* This MSR determines which vm-execution controls the L1 > + * hypervisor may ask, or may not ask, to enable. Normally we > + * can only allow enabling features which the hardware can > + * support, but we limit ourselves to allowing only known > + * features that were tested nested. We allow disabling any > + * feature (even if the hardware can't disable it). > + */ > + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); > + > + vmx_msr_low = 0; /* allow disabling any feature */ What if the host doesn't allow disabling a feature? I think we can't modify vmx_msr_low. > + vmx_msr_high&= /* do not expose new untested features */ > + CPU_BASED_HLT_EXITING | CPU_BASED_CR3_LOAD_EXITING | > + CPU_BASED_CR3_STORE_EXITING | CPU_BASED_USE_IO_BITMAPS | > + CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING | > + CPU_BASED_MWAIT_EXITING | CPU_BASED_MONITOR_EXITING | > + CPU_BASED_INVLPG_EXITING | CPU_BASED_TPR_SHADOW | > + CPU_BASED_USE_MSR_BITMAPS | > +#ifdef CONFIG_X86_64 > + CPU_BASED_CR8_LOAD_EXITING | > + CPU_BASED_CR8_STORE_EXITING | > +#endif > + CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; > + *pdata = vmx_msr_low | ((u64)vmx_msr_high<< 32); > + break; > + case MSR_IA32_VMX_EXIT_CTLS: > + *pdata = 0; > +#ifdef CONFIG_X86_64 > + *pdata |= VM_EXIT_HOST_ADDR_SPACE_SIZE; > +#endif > + break; > + case MSR_IA32_VMX_ENTRY_CTLS: > + *pdata = 0; > + break; > + case MSR_IA32_VMX_PROCBASED_CTLS2: > + *pdata = 0; > + if (vm_need_virtualize_apic_accesses(vcpu->kvm)) > + *pdata |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; > + break; > + case MSR_IA32_VMX_EPT_VPID_CAP: > + *pdata = 0; > + break; > + default: > + return 1; > + } > + > + return 0; > +} > + > +static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) > +{ > + switch (msr_index) { > + case MSR_IA32_FEATURE_CONTROL: > + case MSR_IA32_VMX_BASIC: > + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: > + case MSR_IA32_VMX_PINBASED_CTLS: > + case MSR_IA32_VMX_PROCBASED_CTLS: > + case MSR_IA32_VMX_EXIT_CTLS: > + case MSR_IA32_VMX_ENTRY_CTLS: > + case MSR_IA32_VMX_PROCBASED_CTLS2: > + case MSR_IA32_VMX_EPT_VPID_CAP: > + pr_unimpl(vcpu, "unimplemented VMX MSR write: 0x%x data %llx\n", > + msr_index, data); > + return 0; These are illegal to write anyway and should #GP (except FEATURE_CONTROL). We will however need a way for userspace to write these MSRs to allow fine tuning the exposed features (as we do with cpuid). > + default: > + return 1; > + } > +} -- error compiling committee.c: too many arguments to function