From mboxrd@z Thu Jan 1 00:00:00 1970 From: Mukesh Rathor Subject: [V11 PATCH 17/21] PVH xen: vmcs related changes Date: Thu, 22 Aug 2013 18:19:06 -0700 Message-ID: <1377220750-19514-18-git-send-email-mukesh.rathor@oracle.com> References: <1377220750-19514-1-git-send-email-mukesh.rathor@oracle.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1377220750-19514-1-git-send-email-mukesh.rathor@oracle.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Xen-devel@lists.xensource.com List-Id: xen-devel@lists.xenproject.org This patch contains vmcs changes related for PVH, mainly creating a VMCS for PVH guest. Changes in V11: - Remove pvh_construct_vmcs and make it part of construct_vmcs Signed-off-by: Mukesh Rathor Acked-by: Keir Fraser Reviewed-by: Andrew Cooper PV-HVM-Regression-Tested-by: Andrew Cooper --- xen/arch/x86/hvm/vmx/vmcs.c | 188 ++++++++++++++++++++++++++++++++++++++----- 1 files changed, 167 insertions(+), 21 deletions(-) diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c index 36c4adf..89a1e9f 100644 --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -637,7 +637,7 @@ void vmx_vmcs_exit(struct vcpu *v) { /* Don't confuse vmx_do_resume (for @v or @current!) */ vmx_clear_vmcs(v); - if ( is_hvm_vcpu(current) ) + if ( !is_pv_vcpu(current) ) vmx_load_vmcs(current); spin_unlock(&v->arch.hvm_vmx.vmcs_lock); @@ -828,6 +828,63 @@ void virtual_vmcs_vmwrite(void *vvmcs, u32 vmcs_encoding, u64 val) virtual_vmcs_exit(vvmcs); } +static int pvh_check_requirements(struct vcpu *v) +{ + u64 required, tmpval = real_cr4_to_pv_guest_cr4(mmu_cr4_features); + + if ( !paging_mode_hap(v->domain) ) + { + printk(XENLOG_G_INFO "HAP is required for PVH guest.\n"); + return -EINVAL; + } + if ( !cpu_has_vmx_ept ) + { + printk(XENLOG_G_INFO "PVH: CPU does not have EPT support\n"); + return -ENOSYS; + } + if ( !cpu_has_vmx_pat ) + { + printk(XENLOG_G_INFO "PVH: CPU does not have PAT support\n"); + return -ENOSYS; + } + if ( !cpu_has_vmx_msr_bitmap ) + { + printk(XENLOG_G_INFO "PVH: CPU does not have msr bitmap\n"); + return -ENOSYS; + } + if ( !cpu_has_vmx_vpid ) + { + printk(XENLOG_G_INFO "PVH: CPU doesn't have VPID support\n"); + return -ENOSYS; + } + if ( !cpu_has_vmx_secondary_exec_control ) + { + printk(XENLOG_G_INFO "CPU Secondary exec is required to run PVH\n"); + return -ENOSYS; + } + + /* + * If rdtsc exiting is turned on and it goes thru emulate_privileged_op, + * then pv_vcpu.ctrlreg must be added to the pvh struct. + */ + if ( v->domain->arch.vtsc ) + { + printk(XENLOG_G_INFO + "At present PVH only supports the default timer mode\n"); + return -ENOSYS; + } + + required = X86_CR4_PAE | X86_CR4_VMXE | X86_CR4_OSFXSR; + if ( (tmpval & required) != required ) + { + printk(XENLOG_G_INFO "PVH: required CR4 features not available:%lx\n", + required); + return -ENOSYS; + } + + return 0; +} + static void vmx_set_host_vmcs_fields(struct vcpu *v) { uint16_t sysenter_cs; @@ -861,10 +918,14 @@ static void vmx_set_host_vmcs_fields(struct vcpu *v) static int construct_vmcs(struct vcpu *v) { + int rc; struct domain *d = v->domain; u32 vmexit_ctl = vmx_vmexit_control; u32 vmentry_ctl = vmx_vmentry_control; + if ( is_pvh_vcpu(v) && (rc = pvh_check_requirements(v)) ) + return rc; + vmx_vmcs_enter(v); /* VMCS controls. */ @@ -874,16 +935,37 @@ static int construct_vmcs(struct vcpu *v) if ( d->arch.vtsc ) v->arch.hvm_vmx.exec_control |= CPU_BASED_RDTSC_EXITING; - v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control; + if ( is_pvh_vcpu(v) ) + { + /* Phase I PVH: we run with minimal secondary exec features */ + u32 bits = SECONDARY_EXEC_ENABLE_EPT | SECONDARY_EXEC_ENABLE_VPID; - /* Disable VPID for now: we decide when to enable it on VMENTER. */ - v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; + /* Intel SDM: resvd bits are 0 */ + v->arch.hvm_vmx.secondary_exec_control = bits; + } + else + { + v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control; + + /* Disable VPID for now: we decide when to enable it on VMENTER. */ + v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; + } if ( paging_mode_hap(d) ) { v->arch.hvm_vmx.exec_control &= ~(CPU_BASED_INVLPG_EXITING | CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); + if ( is_pvh_vcpu(v) ) + { + u32 bits = CPU_BASED_ACTIVATE_SECONDARY_CONTROLS | + CPU_BASED_ACTIVATE_MSR_BITMAP; + v->arch.hvm_vmx.exec_control |= bits; + + bits = CPU_BASED_USE_TSC_OFFSETING | CPU_BASED_TPR_SHADOW | + CPU_BASED_VIRTUAL_NMI_PENDING; + v->arch.hvm_vmx.exec_control &= ~bits; + } } else { @@ -905,9 +987,22 @@ static int construct_vmcs(struct vcpu *v) vmx_update_cpu_exec_control(v); __vmwrite(VM_EXIT_CONTROLS, vmexit_ctl); + + if ( is_pvh_vcpu(v) ) + { + /* + * Note: we run with default VM_ENTRY_LOAD_DEBUG_CTLS of 1, which means + * upon vmentry, the cpu reads/loads VMCS.DR7 and VMCS.DEBUGCTLS, and + * not use the host values. 0 would cause it to not use the VMCS values. + */ + vmentry_ctl &= ~(VM_ENTRY_LOAD_GUEST_EFER | VM_ENTRY_SMM | + VM_ENTRY_DEACT_DUAL_MONITOR); + /* PVH 32bitfixme. */ + vmentry_ctl |= VM_ENTRY_IA32E_MODE; /* GUEST_EFER.LME/LMA ignored */ + } __vmwrite(VM_ENTRY_CONTROLS, vmentry_ctl); - if ( cpu_has_vmx_ple ) + if ( cpu_has_vmx_ple && !is_pvh_vcpu(v) ) { __vmwrite(PLE_GAP, ple_gap); __vmwrite(PLE_WINDOW, ple_window); @@ -921,28 +1016,46 @@ static int construct_vmcs(struct vcpu *v) if ( cpu_has_vmx_msr_bitmap ) { unsigned long *msr_bitmap = alloc_xenheap_page(); + int msr_type = MSR_TYPE_R | MSR_TYPE_W; if ( msr_bitmap == NULL ) + { + vmx_vmcs_exit(v); return -ENOMEM; + } memset(msr_bitmap, ~0, PAGE_SIZE); v->arch.hvm_vmx.msr_bitmap = msr_bitmap; __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap)); - vmx_disable_intercept_for_msr(v, MSR_FS_BASE, MSR_TYPE_R | MSR_TYPE_W); - vmx_disable_intercept_for_msr(v, MSR_GS_BASE, MSR_TYPE_R | MSR_TYPE_W); - vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, MSR_TYPE_R | MSR_TYPE_W); - vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, MSR_TYPE_R | MSR_TYPE_W); - vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, MSR_TYPE_R | MSR_TYPE_W); + /* Disable interecepts for MSRs that have corresponding VMCS fields. */ + vmx_disable_intercept_for_msr(v, MSR_FS_BASE, msr_type); + vmx_disable_intercept_for_msr(v, MSR_GS_BASE, msr_type); + vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, msr_type); + vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, msr_type); + vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, msr_type); + if ( cpu_has_vmx_pat && paging_mode_hap(d) ) - vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, MSR_TYPE_R | MSR_TYPE_W); + vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, msr_type); + + if ( is_pvh_vcpu(v) ) + vmx_disable_intercept_for_msr(v, MSR_SHADOW_GS_BASE, msr_type); + + /* + * PVH: We don't disable intercepts for MSRs: MSR_STAR, MSR_LSTAR, + * MSR_CSTAR, and MSR_SYSCALL_MASK because we need to specify + * save/restore area to save/restore at every VM exit and entry. + * Instead, let the intercept functions save them into + * vmx_msr_state fields. See comment in vmx_restore_host_msrs(). + * See also vmx_restore_guest_msrs(). + */ } /* I/O access bitmap. */ __vmwrite(IO_BITMAP_A, virt_to_maddr((char *)hvm_io_bitmap + 0)); __vmwrite(IO_BITMAP_B, virt_to_maddr((char *)hvm_io_bitmap + PAGE_SIZE)); - if ( cpu_has_vmx_virtual_intr_delivery ) + if ( cpu_has_vmx_virtual_intr_delivery && !is_pvh_vcpu(v) ) { /* EOI-exit bitmap */ v->arch.hvm_vmx.eoi_exit_bitmap[0] = (uint64_t)0; @@ -958,7 +1071,7 @@ static int construct_vmcs(struct vcpu *v) __vmwrite(GUEST_INTR_STATUS, 0); } - if ( cpu_has_vmx_posted_intr_processing ) + if ( cpu_has_vmx_posted_intr_processing && !is_pvh_vcpu(v) ) { __vmwrite(PI_DESC_ADDR, virt_to_maddr(&v->arch.hvm_vmx.pi_desc)); __vmwrite(POSTED_INTR_NOTIFICATION_VECTOR, posted_intr_vector); @@ -1005,7 +1118,12 @@ static int construct_vmcs(struct vcpu *v) __vmwrite(GUEST_DS_AR_BYTES, 0xc093); __vmwrite(GUEST_FS_AR_BYTES, 0xc093); __vmwrite(GUEST_GS_AR_BYTES, 0xc093); - __vmwrite(GUEST_CS_AR_BYTES, 0xc09b); /* exec/read, accessed */ + + if ( is_pvh_vcpu(v) ) + /* CS.L == 1, exec, read/write, accessed. PVH 32bitfixme. */ + __vmwrite(GUEST_CS_AR_BYTES, 0xa09b); /* exec/read, accessed */ + else + __vmwrite(GUEST_CS_AR_BYTES, 0xc09b); /* exec/read, accessed */ /* Guest IDT. */ __vmwrite(GUEST_IDTR_BASE, 0); @@ -1032,16 +1150,36 @@ static int construct_vmcs(struct vcpu *v) v->arch.hvm_vmx.exception_bitmap = HVM_TRAP_MASK | (paging_mode_hap(d) ? 0 : (1U << TRAP_page_fault)) + | (is_pvh_vcpu(v) ? (1U << TRAP_int3) | (1U << TRAP_debug) : 0) | (1U << TRAP_no_device); vmx_update_exception_bitmap(v); v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET; - hvm_update_guest_cr(v, 0); + if ( is_pvh_vcpu(v) ) + { + u64 tmpval = v->arch.hvm_vcpu.guest_cr[0] | X86_CR0_PG | X86_CR0_NE; + v->arch.hvm_vcpu.hw_cr[0] = v->arch.hvm_vcpu.guest_cr[0] = tmpval; + __vmwrite(GUEST_CR0, tmpval); + __vmwrite(CR0_READ_SHADOW, tmpval); - v->arch.hvm_vcpu.guest_cr[4] = 0; - hvm_update_guest_cr(v, 4); + v->arch.hvm_vmx.vmx_realmode = 0; + } else + hvm_update_guest_cr(v, 0); - if ( cpu_has_vmx_tpr_shadow ) + if ( is_pvh_vcpu(v) ) + { + u64 tmpval = real_cr4_to_pv_guest_cr4(mmu_cr4_features); + __vmwrite(GUEST_CR4, tmpval); + __vmwrite(CR4_READ_SHADOW, tmpval); + v->arch.hvm_vcpu.guest_cr[4] = tmpval; + } + else + { + v->arch.hvm_vcpu.guest_cr[4] = 0; + hvm_update_guest_cr(v, 4); + } + + if ( cpu_has_vmx_tpr_shadow && !is_pvh_vcpu(v) ) { __vmwrite(VIRTUAL_APIC_PAGE_ADDR, page_to_maddr(vcpu_vlapic(v)->regs_page)); @@ -1070,9 +1208,14 @@ static int construct_vmcs(struct vcpu *v) vmx_vmcs_exit(v); - paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */ + /* PVH: paging mode is updated by arch_set_info_guest(). */ + if ( is_hvm_vcpu(v) ) + { + /* will update HOST & GUEST_CR3 as reqd */ + paging_update_paging_modes(v); - vmx_vlapic_msr_changed(v); + vmx_vlapic_msr_changed(v); + } return 0; } @@ -1297,6 +1440,9 @@ void vmx_do_resume(struct vcpu *v) hvm_asid_flush_vcpu(v); } + if ( is_pvh_vcpu(v) ) + reset_stack_and_jump(vmx_asm_do_vmentry); + debug_state = v->domain->debugger_attached || v->domain->arch.hvm_domain.params[HVM_PARAM_MEMORY_EVENT_INT3] || v->domain->arch.hvm_domain.params[HVM_PARAM_MEMORY_EVENT_SINGLE_STEP]; @@ -1480,7 +1626,7 @@ static void vmcs_dump(unsigned char ch) for_each_domain ( d ) { - if ( !is_hvm_domain(d) ) + if ( is_pv_domain(d) ) continue; printk("\n>>> Domain %d <<<\n", d->domain_id); for_each_vcpu ( d, v ) -- 1.7.2.3