From mboxrd@z Thu Jan 1 00:00:00 1970 From: George Dunlap Subject: Re: [PATCH v14 07/17] pvh: vmx-specific changes Date: Mon, 4 Nov 2013 16:19:19 +0000 Message-ID: <5277C907.1020602@eu.citrix.com> References: <1383567306-6636-1-git-send-email-george.dunlap@eu.citrix.com> <1383567306-6636-8-git-send-email-george.dunlap@eu.citrix.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1383567306-6636-8-git-send-email-george.dunlap@eu.citrix.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen-devel@lists.xen.org Cc: "Dong, Eddie" , "Nakajima, Jun" List-Id: xen-devel@lists.xenproject.org CC'ing the VMX maintainers.. -George On 04/11/13 12:14, George Dunlap wrote: > Changes: > * Enforce HAP mode for now > * Disable exits related to virtual interrupts or emulated APICs > * Disable changing paging mode > - "unrestricted guest" (i.e., real mode for EPT) disabled > - write guest EFER disabled > * Start in 64-bit mode > * Force TSC mode to be "none" > * Paging mode update to happen in arch_set_info_guest > > Signed-off-by: George Dunlap > Signed-off-by: Mukesh Rathor > --- > v14: > - Mask out bits of cr4 that the guest is not allowed to set > v13: > - Fix up default cr0 settings > - Get rid of some unnecessary PVH-related changes > - Return EOPNOTSUPP instead of ENOSYS if hardware features are not present > - Remove an unnecessary variable from pvh_check_requirements > CC: Jan Beulich > CC: Tim Deegan > CC: Keir Fraser > --- > xen/arch/x86/hvm/vmx/vmcs.c | 132 +++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 128 insertions(+), 4 deletions(-) > > diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c > index f2a2857..ba05ebb 100644 > --- a/xen/arch/x86/hvm/vmx/vmcs.c > +++ b/xen/arch/x86/hvm/vmx/vmcs.c > @@ -28,6 +28,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -841,6 +842,60 @@ void virtual_vmcs_vmwrite(void *vvmcs, u32 vmcs_encoding, u64 val) > virtual_vmcs_exit(vvmcs); > } > > +static int pvh_check_requirements(struct vcpu *v) > +{ > + u64 required; > + > + /* Check for required hardware features */ > + if ( !cpu_has_vmx_ept ) > + { > + printk(XENLOG_G_INFO "PVH: CPU does not have EPT support\n"); > + return -EOPNOTSUPP; > + } > + if ( !cpu_has_vmx_pat ) > + { > + printk(XENLOG_G_INFO "PVH: CPU does not have PAT support\n"); > + return -EOPNOTSUPP; > + } > + if ( !cpu_has_vmx_msr_bitmap ) > + { > + printk(XENLOG_G_INFO "PVH: CPU does not have msr bitmap\n"); > + return -EOPNOTSUPP; > + } > + if ( !cpu_has_vmx_secondary_exec_control ) > + { > + printk(XENLOG_G_INFO "CPU Secondary exec is required to run PVH\n"); > + return -EOPNOTSUPP; > + } > + required = X86_CR4_PAE | X86_CR4_VMXE | X86_CR4_OSFXSR; > + if ( (real_cr4_to_pv_guest_cr4(mmu_cr4_features) & required) != required ) > + { > + printk(XENLOG_G_INFO "PVH: required CR4 features not available:%lx\n", > + required); > + return -EOPNOTSUPP; > + } > + > + /* Check for required configuration options */ > + if ( !paging_mode_hap(v->domain) ) > + { > + printk(XENLOG_G_INFO "HAP is required for PVH guest.\n"); > + return -EINVAL; > + } > + /* > + * If rdtsc exiting is turned on and it goes thru emulate_privileged_op, > + * then pv_vcpu.ctrlreg must be added to the pvh struct. > + */ > + if ( v->domain->arch.vtsc ) > + { > + printk(XENLOG_G_INFO > + "At present PVH only supports the default timer mode\n"); > + return -EINVAL; > + } > + > + > + return 0; > +} > + > static int construct_vmcs(struct vcpu *v) > { > struct domain *d = v->domain; > @@ -849,6 +904,13 @@ static int construct_vmcs(struct vcpu *v) > u32 vmexit_ctl = vmx_vmexit_control; > u32 vmentry_ctl = vmx_vmentry_control; > > + if ( is_pvh_domain(d) ) > + { > + int rc = pvh_check_requirements(v); > + if ( rc ) > + return rc; > + } > + > vmx_vmcs_enter(v); > > /* VMCS controls. */ > @@ -887,7 +949,32 @@ static int construct_vmcs(struct vcpu *v) > /* Do not enable Monitor Trap Flag unless start single step debug */ > v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG; > > + if ( is_pvh_domain(d) ) > + { > + /* Disable virtual apics, TPR */ > + v->arch.hvm_vmx.secondary_exec_control &= > + ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES > + | SECONDARY_EXEC_APIC_REGISTER_VIRT > + | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); > + v->arch.hvm_vmx.exec_control &= ~CPU_BASED_TPR_SHADOW; > + > + /* Disable wbinvd (only necessary for MMIO), > + * unrestricted guest (real mode for EPT) */ > + v->arch.hvm_vmx.secondary_exec_control &= > + ~(SECONDARY_EXEC_UNRESTRICTED_GUEST > + | SECONDARY_EXEC_WBINVD_EXITING); > + > + /* Start in 64-bit mode. > + * PVH 32bitfixme. */ > + vmentry_ctl |= VM_ENTRY_IA32E_MODE; /* GUEST_EFER.LME/LMA ignored */ > + > + ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS); > + ASSERT(v->arch.hvm_vmx.exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP); > + ASSERT(!(v->arch.hvm_vmx.exec_control & CPU_BASED_RDTSC_EXITING)); > + } > + > vmx_update_cpu_exec_control(v); > + > __vmwrite(VM_EXIT_CONTROLS, vmexit_ctl); > __vmwrite(VM_ENTRY_CONTROLS, vmentry_ctl); > > @@ -923,6 +1010,17 @@ static int construct_vmcs(struct vcpu *v) > vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, MSR_TYPE_R | MSR_TYPE_W); > if ( cpu_has_vmx_pat && paging_mode_hap(d) ) > vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, MSR_TYPE_R | MSR_TYPE_W); > + if ( is_pvh_domain(d) ) > + vmx_disable_intercept_for_msr(v, MSR_SHADOW_GS_BASE, MSR_TYPE_R | MSR_TYPE_W); > + > + /* > + * PVH: We don't disable intercepts for MSRs: MSR_STAR, MSR_LSTAR, > + * MSR_CSTAR, and MSR_SYSCALL_MASK because we need to specify > + * save/restore area to save/restore at every VM exit and entry. > + * Instead, let the intercept functions save them into > + * vmx_msr_state fields. See comment in vmx_restore_host_msrs(). > + * See also vmx_restore_guest_msrs(). > + */ > } > > /* I/O access bitmap. */ > @@ -1011,7 +1109,11 @@ static int construct_vmcs(struct vcpu *v) > __vmwrite(GUEST_DS_AR_BYTES, 0xc093); > __vmwrite(GUEST_FS_AR_BYTES, 0xc093); > __vmwrite(GUEST_GS_AR_BYTES, 0xc093); > - __vmwrite(GUEST_CS_AR_BYTES, 0xc09b); /* exec/read, accessed */ > + if ( is_pvh_domain(d) ) > + /* CS.L == 1, exec, read/write, accessed. PVH 32bitfixme. */ > + __vmwrite(GUEST_CS_AR_BYTES, 0xa09b); > + else > + __vmwrite(GUEST_CS_AR_BYTES, 0xc09b); /* exec/read, accessed */ > > /* Guest IDT. */ > __vmwrite(GUEST_IDTR_BASE, 0); > @@ -1041,12 +1143,29 @@ static int construct_vmcs(struct vcpu *v) > | (1U << TRAP_no_device); > vmx_update_exception_bitmap(v); > > + /* In HVM domains, this happens on the realmode->paging > + * transition. Since PVH never goes through this transition, we > + * need to do it at start-of-day. */ > + if ( is_pvh_domain(d) ) > + vmx_update_debug_state(v); > + > v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET; > + > + /* PVH domains always start in paging mode */ > + if ( is_pvh_domain(d) ) > + v->arch.hvm_vcpu.guest_cr[0] |= X86_CR0_PG | X86_CR0_NE | X86_CR0_WP; > + > hvm_update_guest_cr(v, 0); > > - v->arch.hvm_vcpu.guest_cr[4] = 0; > + v->arch.hvm_vcpu.guest_cr[4] = is_pvh_domain(d) ? > + (real_cr4_to_pv_guest_cr4(mmu_cr4_features) > + & ~HVM_CR4_GUEST_RESERVED_BITS(v)) > + : 0; > hvm_update_guest_cr(v, 4); > > + if ( is_pvh_domain(d) ) > + v->arch.hvm_vmx.vmx_realmode = 0; > + > if ( cpu_has_vmx_tpr_shadow ) > { > __vmwrite(VIRTUAL_APIC_PAGE_ADDR, > @@ -1076,9 +1195,14 @@ static int construct_vmcs(struct vcpu *v) > > vmx_vmcs_exit(v); > > - paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */ > + /* PVH: paging mode is updated by arch_set_info_guest(). */ > + if ( is_hvm_vcpu(v) ) > + { > + /* will update HOST & GUEST_CR3 as reqd */ > + paging_update_paging_modes(v); > > - vmx_vlapic_msr_changed(v); > + vmx_vlapic_msr_changed(v); > + } > > return 0; > }