From: Mukesh Rathor <mukesh.rathor@oracle.com>
To: Xen-devel@lists.xensource.com
Subject: [PATCH 09/17] PVH xen: create PVH vmcs, and also initialization
Date: Tue, 23 Apr 2013 14:25:58 -0700 [thread overview]
Message-ID: <1366752366-16594-10-git-send-email-mukesh.rathor@oracle.com> (raw)
In-Reply-To: <1366752366-16594-1-git-send-email-mukesh.rathor@oracle.com>
This patch mainly contains code to create a VMCS for PVH guest, and HVM
specific vcpu/domain creation code.
Changes in V2:
- Avoid call to hvm_do_resume() at call site rather than return in it.
- Return for PVH vmx_do_resume prior to intel debugger stuff.
Changes in V3:
- Cleanup pvh_construct_vmcs().
- Fix formatting in few places, adding XENLOG_G_ERR to printing.
- Do not load the CS selector for PVH here, but try to do that in Linux.
Changes in V4:
- Remove VM_ENTRY_LOAD_DEBUG_CTLS clearing.
- Add 32bit kernel changes mark.
- Verify pit_init call for PVH.
Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
---
xen/arch/x86/hvm/hvm.c | 88 ++++++++++++-
xen/arch/x86/hvm/vmx/vmcs.c | 309 ++++++++++++++++++++++++++++++++++++++----
xen/arch/x86/hvm/vmx/vmx.c | 39 ++++++
3 files changed, 403 insertions(+), 33 deletions(-)
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 38e87ce..27dbe3d 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -510,6 +510,31 @@ static int hvm_print_line(
return X86EMUL_OKAY;
}
+static int pvh_dom_initialise(struct domain *d)
+{
+ int rc;
+
+ if ( !d->arch.hvm_domain.hap_enabled )
+ return -EINVAL;
+
+ spin_lock_init(&d->arch.hvm_domain.irq_lock);
+ hvm_init_guest_time(d);
+
+ hvm_init_cacheattr_region_list(d);
+
+ if ( (rc = paging_enable(d, PG_refcounts|PG_translate|PG_external)) != 0 )
+ goto fail1;
+
+ if ( (rc = hvm_funcs.domain_initialise(d)) != 0 )
+ goto fail1;
+
+ return 0;
+
+fail1:
+ hvm_destroy_cacheattr_region_list(d);
+ return rc;
+}
+
int hvm_domain_initialise(struct domain *d)
{
int rc;
@@ -520,6 +545,8 @@ int hvm_domain_initialise(struct domain *d)
"on a non-VT/AMDV platform.\n");
return -EINVAL;
}
+ if ( is_pvh_domain(d) )
+ return pvh_dom_initialise(d);
spin_lock_init(&d->arch.hvm_domain.pbuf_lock);
spin_lock_init(&d->arch.hvm_domain.irq_lock);
@@ -584,6 +611,11 @@ int hvm_domain_initialise(struct domain *d)
void hvm_domain_relinquish_resources(struct domain *d)
{
+ if ( is_pvh_domain(d) )
+ {
+ pit_deinit(d);
+ return;
+ }
if ( hvm_funcs.nhvm_domain_relinquish_resources )
hvm_funcs.nhvm_domain_relinquish_resources(d);
@@ -609,10 +641,14 @@ void hvm_domain_relinquish_resources(struct domain *d)
void hvm_domain_destroy(struct domain *d)
{
hvm_funcs.domain_destroy(d);
+ hvm_destroy_cacheattr_region_list(d);
+
+ if ( is_pvh_domain(d) )
+ return;
+
rtc_deinit(d);
stdvga_deinit(d);
vioapic_deinit(d);
- hvm_destroy_cacheattr_region_list(d);
}
static int hvm_save_tsc_adjust(struct domain *d, hvm_domain_context_t *h)
@@ -1066,14 +1102,43 @@ static int __init __hvm_register_CPU_XSAVE_save_and_restore(void)
}
__initcall(__hvm_register_CPU_XSAVE_save_and_restore);
+static int pvh_vcpu_initialise(struct vcpu *v)
+{
+ int rc;
+
+ if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 )
+ return rc;
+
+ softirq_tasklet_init(&v->arch.hvm_vcpu.assert_evtchn_irq_tasklet,
+ (void(*)(unsigned long))hvm_assert_evtchn_irq,
+ (unsigned long)v);
+
+ v->arch.hvm_vcpu.hcall_64bit = 1; /* PVH 32bitfixme */
+ v->arch.user_regs.eflags = 2;
+ v->arch.hvm_vcpu.inject_trap.vector = -1;
+
+ if ( (rc = hvm_vcpu_cacheattr_init(v)) != 0 )
+ {
+ hvm_funcs.vcpu_destroy(v);
+ return rc;
+ }
+ if ( v->vcpu_id == 0 )
+ pit_init(v, cpu_khz);
+
+ return 0;
+}
+
int hvm_vcpu_initialise(struct vcpu *v)
{
int rc;
struct domain *d = v->domain;
- domid_t dm_domid = d->arch.hvm_domain.params[HVM_PARAM_DM_DOMAIN];
+ domid_t dm_domid;
hvm_asid_flush_vcpu(v);
+ if ( is_pvh_vcpu(v) )
+ return pvh_vcpu_initialise(v);
+
if ( (rc = vlapic_init(v)) != 0 )
goto fail1;
@@ -1084,6 +1149,8 @@ int hvm_vcpu_initialise(struct vcpu *v)
&& (rc = nestedhvm_vcpu_initialise(v)) < 0 )
goto fail3;
+ dm_domid = d->arch.hvm_domain.params[HVM_PARAM_DM_DOMAIN];
+
/* Create ioreq event channel. */
rc = alloc_unbound_xen_event_channel(v, dm_domid, NULL);
if ( rc < 0 )
@@ -1163,7 +1230,10 @@ void hvm_vcpu_destroy(struct vcpu *v)
tasklet_kill(&v->arch.hvm_vcpu.assert_evtchn_irq_tasklet);
hvm_vcpu_cacheattr_destroy(v);
- vlapic_destroy(v);
+
+ if ( !is_pvh_vcpu(v) )
+ vlapic_destroy(v);
+
hvm_funcs.vcpu_destroy(v);
/* Event channel is already freed by evtchn_destroy(). */
@@ -4512,6 +4582,8 @@ static int hvm_memory_event_traps(long p, uint32_t reason,
void hvm_memory_event_cr0(unsigned long value, unsigned long old)
{
+ if ( is_pvh_vcpu(current) )
+ return;
hvm_memory_event_traps(current->domain->arch.hvm_domain
.params[HVM_PARAM_MEMORY_EVENT_CR0],
MEM_EVENT_REASON_CR0,
@@ -4520,6 +4592,8 @@ void hvm_memory_event_cr0(unsigned long value, unsigned long old)
void hvm_memory_event_cr3(unsigned long value, unsigned long old)
{
+ if ( is_pvh_vcpu(current) )
+ return;
hvm_memory_event_traps(current->domain->arch.hvm_domain
.params[HVM_PARAM_MEMORY_EVENT_CR3],
MEM_EVENT_REASON_CR3,
@@ -4528,6 +4602,8 @@ void hvm_memory_event_cr3(unsigned long value, unsigned long old)
void hvm_memory_event_cr4(unsigned long value, unsigned long old)
{
+ if ( is_pvh_vcpu(current) )
+ return;
hvm_memory_event_traps(current->domain->arch.hvm_domain
.params[HVM_PARAM_MEMORY_EVENT_CR4],
MEM_EVENT_REASON_CR4,
@@ -4536,6 +4612,8 @@ void hvm_memory_event_cr4(unsigned long value, unsigned long old)
void hvm_memory_event_msr(unsigned long msr, unsigned long value)
{
+ if ( is_pvh_vcpu(current) )
+ return;
hvm_memory_event_traps(current->domain->arch.hvm_domain
.params[HVM_PARAM_MEMORY_EVENT_MSR],
MEM_EVENT_REASON_MSR,
@@ -4548,6 +4626,8 @@ int hvm_memory_event_int3(unsigned long gla)
unsigned long gfn;
gfn = paging_gva_to_gfn(current, gla, &pfec);
+ if ( is_pvh_vcpu(current) )
+ return 0;
return hvm_memory_event_traps(current->domain->arch.hvm_domain
.params[HVM_PARAM_MEMORY_EVENT_INT3],
MEM_EVENT_REASON_INT3,
@@ -4560,6 +4640,8 @@ int hvm_memory_event_single_step(unsigned long gla)
unsigned long gfn;
gfn = paging_gva_to_gfn(current, gla, &pfec);
+ if ( is_pvh_vcpu(current) )
+ return 0;
return hvm_memory_event_traps(current->domain->arch.hvm_domain
.params[HVM_PARAM_MEMORY_EVENT_SINGLE_STEP],
MEM_EVENT_REASON_SINGLESTEP,
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 9926ffb..e7b0c4b 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -624,7 +624,7 @@ void vmx_vmcs_exit(struct vcpu *v)
{
/* Don't confuse vmx_do_resume (for @v or @current!) */
vmx_clear_vmcs(v);
- if ( is_hvm_vcpu(current) )
+ if ( !is_pv_vcpu(current) )
vmx_load_vmcs(current);
spin_unlock(&v->arch.hvm_vmx.vmcs_lock);
@@ -815,16 +815,283 @@ void virtual_vmcs_vmwrite(void *vvmcs, u32 vmcs_encoding, u64 val)
virtual_vmcs_exit(vvmcs);
}
-static int construct_vmcs(struct vcpu *v)
+static void vmx_set_common_host_vmcs_fields(struct vcpu *v)
{
- struct domain *d = v->domain;
uint16_t sysenter_cs;
unsigned long sysenter_eip;
+
+ /* Host data selectors. */
+ __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS);
+ __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS);
+ __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS);
+ __vmwrite(HOST_FS_SELECTOR, 0);
+ __vmwrite(HOST_GS_SELECTOR, 0);
+ __vmwrite(HOST_FS_BASE, 0);
+ __vmwrite(HOST_GS_BASE, 0);
+
+ /* Host control registers. */
+ v->arch.hvm_vmx.host_cr0 = read_cr0() | X86_CR0_TS;
+ __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
+ __vmwrite(HOST_CR4,
+ mmu_cr4_features | (xsave_enabled(v) ? X86_CR4_OSXSAVE : 0));
+
+ /* Host CS:RIP. */
+ __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
+ __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler);
+
+ /* Host SYSENTER CS:RIP. */
+ rdmsrl(MSR_IA32_SYSENTER_CS, sysenter_cs);
+ __vmwrite(HOST_SYSENTER_CS, sysenter_cs);
+ rdmsrl(MSR_IA32_SYSENTER_EIP, sysenter_eip);
+ __vmwrite(HOST_SYSENTER_EIP, sysenter_eip);
+}
+
+static int pvh_check_requirements(struct vcpu *v)
+{
+ u64 required, tmpval = real_cr4_to_pv_guest_cr4(mmu_cr4_features);
+
+ if ( !paging_mode_hap(v->domain) )
+ {
+ dprintk(XENLOG_G_ERR, "HAP is required for PVH guest.\n");
+ return -EINVAL;
+ }
+ if ( !cpu_has_vmx_pat )
+ {
+ dprintk(XENLOG_G_ERR, "PVH: CPU does not have PAT support\n");
+ return -ENOSYS;
+ }
+ if ( !cpu_has_vmx_msr_bitmap )
+ {
+ dprintk(XENLOG_G_ERR, "PVH: CPU does not have msr bitmap\n");
+ return -ENOSYS;
+ }
+ if ( !cpu_has_vmx_vpid )
+ {
+ dprintk(XENLOG_G_ERR, "PVH: CPU doesn't have VPID support\n");
+ return -ENOSYS;
+ }
+ if ( !cpu_has_vmx_secondary_exec_control )
+ {
+ dprintk(XENLOG_G_ERR, "CPU Secondary exec is required to run PVH\n");
+ return -ENOSYS;
+ }
+
+ if ( v->domain->arch.vtsc )
+ {
+ dprintk(XENLOG_G_ERR,
+ "At present PVH only supports the default timer mode\n");
+ return -ENOSYS;
+ }
+
+ required = X86_CR4_PAE | X86_CR4_VMXE | X86_CR4_OSFXSR;
+ if ( (tmpval & required) != required )
+ {
+ dprintk(XENLOG_G_ERR, "PVH: required CR4 features not available:%lx\n",
+ required);
+ return -ENOSYS;
+ }
+
+ return 0;
+}
+
+static int pvh_construct_vmcs(struct vcpu *v)
+{
+ int rc, msr_type;
+ unsigned long *msr_bitmap;
+ struct domain *d = v->domain;
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+ struct ept_data *ept = &p2m->ept;
+ u32 vmexit_ctl = vmx_vmexit_control;
+ u32 vmentry_ctl = vmx_vmentry_control;
+ u64 host_pat, guest_pat, tmpval = -1;
+
+ if ( (rc = pvh_check_requirements(v)) )
+ return rc;
+
+ msr_bitmap = alloc_xenheap_page();
+ if ( msr_bitmap == NULL )
+ return -ENOMEM;
+
+ /* 1. Pin-Based Controls */
+ __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
+
+ v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control;
+
+ /* 2. Primary Processor-based controls */
+ /*
+ * If rdtsc exiting is turned on and it goes thru emulate_privileged_op,
+ * then pv_vcpu.ctrlreg must be added to the pvh struct.
+ */
+ v->arch.hvm_vmx.exec_control &= ~CPU_BASED_RDTSC_EXITING;
+ v->arch.hvm_vmx.exec_control &= ~CPU_BASED_USE_TSC_OFFSETING;
+
+ v->arch.hvm_vmx.exec_control &= ~(CPU_BASED_INVLPG_EXITING |
+ CPU_BASED_CR3_LOAD_EXITING |
+ CPU_BASED_CR3_STORE_EXITING);
+ v->arch.hvm_vmx.exec_control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+ v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
+ v->arch.hvm_vmx.exec_control |= CPU_BASED_ACTIVATE_MSR_BITMAP;
+ v->arch.hvm_vmx.exec_control &= ~CPU_BASED_TPR_SHADOW;
+ v->arch.hvm_vmx.exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
+
+ __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
+
+ /* 3. Secondary Processor-based controls. Intel SDM: all resvd bits are 0*/
+ v->arch.hvm_vmx.secondary_exec_control = SECONDARY_EXEC_ENABLE_EPT;
+ v->arch.hvm_vmx.secondary_exec_control |= SECONDARY_EXEC_ENABLE_VPID;
+ v->arch.hvm_vmx.secondary_exec_control |= SECONDARY_EXEC_PAUSE_LOOP_EXITING;
+
+ __vmwrite(SECONDARY_VM_EXEC_CONTROL,
+ v->arch.hvm_vmx.secondary_exec_control);
+
+ __vmwrite(IO_BITMAP_A, virt_to_maddr((char *)hvm_io_bitmap + 0));
+ __vmwrite(IO_BITMAP_B, virt_to_maddr((char *)hvm_io_bitmap + PAGE_SIZE));
+
+ /* MSR bitmap for intercepts */
+ memset(msr_bitmap, ~0, PAGE_SIZE);
+ v->arch.hvm_vmx.msr_bitmap = msr_bitmap;
+ __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap));
+
+ msr_type = MSR_TYPE_R | MSR_TYPE_W;
+ vmx_disable_intercept_for_msr(v, MSR_FS_BASE, msr_type);
+ vmx_disable_intercept_for_msr(v, MSR_GS_BASE, msr_type);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, msr_type);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, msr_type);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, msr_type);
+ vmx_disable_intercept_for_msr(v, MSR_SHADOW_GS_BASE, msr_type);
+ vmx_disable_intercept_for_msr(v, MSR_STAR, msr_type);
+ vmx_disable_intercept_for_msr(v, MSR_LSTAR, msr_type);
+ vmx_disable_intercept_for_msr(v, MSR_CSTAR, msr_type);
+ vmx_disable_intercept_for_msr(v, MSR_SYSCALL_MASK, msr_type);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, msr_type);
+
+ __vmwrite(VM_EXIT_CONTROLS, vmexit_ctl);
+
+ /*
+ * Note: we run with default VM_ENTRY_LOAD_DEBUG_CTLS of 1, which means
+ * upon vmentry, the cpu reads/loads VMCS.DR7 and VMCS.DEBUGCTLS, and not
+ * use the host values. 0 would cause it to not use the VMCS values.
+ */
+ vmentry_ctl &= ~VM_ENTRY_LOAD_GUEST_EFER;
+ vmentry_ctl &= ~VM_ENTRY_SMM;
+ vmentry_ctl &= ~VM_ENTRY_DEACT_DUAL_MONITOR;
+ /* PVH 32bitfixme */
+ vmentry_ctl |= VM_ENTRY_IA32E_MODE; /* GUEST_EFER.LME/LMA ignored */
+
+ __vmwrite(VM_ENTRY_CONTROLS, vmentry_ctl);
+
+ __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
+ __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
+ __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
+
+ vmx_set_common_host_vmcs_fields(v);
+ vmx_set_host_env(v);
+
+ __vmwrite(VM_ENTRY_INTR_INFO, 0);
+ __vmwrite(CR3_TARGET_COUNT, 0);
+ __vmwrite(GUEST_ACTIVITY_STATE, 0);
+
+ /* These are sorta irrelevant as we load the discriptors directly. */
+ __vmwrite(GUEST_CS_SELECTOR, 0);
+ __vmwrite(GUEST_DS_SELECTOR, 0);
+ __vmwrite(GUEST_SS_SELECTOR, 0);
+ __vmwrite(GUEST_ES_SELECTOR, 0);
+ __vmwrite(GUEST_FS_SELECTOR, 0);
+ __vmwrite(GUEST_GS_SELECTOR, 0);
+
+ __vmwrite(GUEST_CS_BASE, 0);
+ __vmwrite(GUEST_CS_LIMIT, ~0u);
+ /* CS.L == 1, exec, read/write, accessed. PVH 32bitfixme */
+ __vmwrite(GUEST_CS_AR_BYTES, 0xa09b);
+
+ __vmwrite(GUEST_DS_BASE, 0);
+ __vmwrite(GUEST_DS_LIMIT, ~0u);
+ __vmwrite(GUEST_DS_AR_BYTES, 0xc093); /* read/write, accessed */
+
+ __vmwrite(GUEST_SS_BASE, 0);
+ __vmwrite(GUEST_SS_LIMIT, ~0u);
+ __vmwrite(GUEST_SS_AR_BYTES, 0xc093); /* read/write, accessed */
+
+ __vmwrite(GUEST_ES_BASE, 0);
+ __vmwrite(GUEST_ES_LIMIT, ~0u);
+ __vmwrite(GUEST_ES_AR_BYTES, 0xc093); /* read/write, accessed */
+
+ __vmwrite(GUEST_FS_BASE, 0);
+ __vmwrite(GUEST_FS_LIMIT, ~0u);
+ __vmwrite(GUEST_FS_AR_BYTES, 0xc093); /* read/write, accessed */
+
+ __vmwrite(GUEST_GS_BASE, 0);
+ __vmwrite(GUEST_GS_LIMIT, ~0u);
+ __vmwrite(GUEST_GS_AR_BYTES, 0xc093); /* read/write, accessed */
+
+ __vmwrite(GUEST_GDTR_BASE, 0);
+ __vmwrite(GUEST_GDTR_LIMIT, 0);
+
+ __vmwrite(GUEST_LDTR_BASE, 0);
+ __vmwrite(GUEST_LDTR_LIMIT, 0);
+ __vmwrite(GUEST_LDTR_AR_BYTES, 0x82); /* LDT */
+ __vmwrite(GUEST_LDTR_SELECTOR, 0);
+
+ /* Guest TSS. */
+ __vmwrite(GUEST_TR_BASE, 0);
+ __vmwrite(GUEST_TR_LIMIT, 0xff);
+ __vmwrite(GUEST_TR_AR_BYTES, 0x8b); /* 32-bit TSS (busy) */
+
+ __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
+ __vmwrite(GUEST_DR7, 0);
+ __vmwrite(VMCS_LINK_POINTER, ~0UL);
+
+ __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
+ __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
+
+ v->arch.hvm_vmx.exception_bitmap = HVM_TRAP_MASK | (1U << TRAP_debug) |
+ (1U << TRAP_int3) | (1U << TRAP_no_device);
+ __vmwrite(EXCEPTION_BITMAP, v->arch.hvm_vmx.exception_bitmap);
+
+ /* Set WP bit so rdonly pages are not written from CPL 0 */
+ tmpval = X86_CR0_PG | X86_CR0_NE | X86_CR0_PE | X86_CR0_WP;
+ __vmwrite(GUEST_CR0, tmpval);
+ __vmwrite(CR0_READ_SHADOW, tmpval);
+ v->arch.hvm_vcpu.hw_cr[0] = v->arch.hvm_vcpu.guest_cr[0] = tmpval;
+
+ tmpval = real_cr4_to_pv_guest_cr4(mmu_cr4_features);
+ __vmwrite(GUEST_CR4, tmpval);
+ __vmwrite(CR4_READ_SHADOW, tmpval);
+ v->arch.hvm_vcpu.guest_cr[4] = tmpval;
+
+ __vmwrite(CR0_GUEST_HOST_MASK, ~0UL);
+ __vmwrite(CR4_GUEST_HOST_MASK, ~0UL);
+
+ v->arch.hvm_vmx.vmx_realmode = 0;
+
+ ept->asr = pagetable_get_pfn(p2m_get_pagetable(p2m));
+ __vmwrite(EPT_POINTER, ept_get_eptp(ept));
+
+ rdmsrl(MSR_IA32_CR_PAT, host_pat);
+ __vmwrite(HOST_PAT, host_pat);
+ guest_pat = MSR_IA32_CR_PAT_RESET;
+ __vmwrite(GUEST_PAT, guest_pat);
+
+ /* the paging mode is updated for PVH by arch_set_info_guest() */
+
+ return 0;
+}
+
+static int construct_vmcs(struct vcpu *v)
+{
+ struct domain *d = v->domain;
u32 vmexit_ctl = vmx_vmexit_control;
u32 vmentry_ctl = vmx_vmentry_control;
vmx_vmcs_enter(v);
+ if ( is_pvh_vcpu(v) )
+ {
+ int rc = pvh_construct_vmcs(v);
+ vmx_vmcs_exit(v);
+ return rc;
+ }
+
/* VMCS controls. */
__vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
@@ -916,30 +1183,7 @@ static int construct_vmcs(struct vcpu *v)
__vmwrite(GUEST_INTR_STATUS, 0);
}
- /* Host data selectors. */
- __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS);
- __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS);
- __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS);
- __vmwrite(HOST_FS_SELECTOR, 0);
- __vmwrite(HOST_GS_SELECTOR, 0);
- __vmwrite(HOST_FS_BASE, 0);
- __vmwrite(HOST_GS_BASE, 0);
-
- /* Host control registers. */
- v->arch.hvm_vmx.host_cr0 = read_cr0() | X86_CR0_TS;
- __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
- __vmwrite(HOST_CR4,
- mmu_cr4_features | (xsave_enabled(v) ? X86_CR4_OSXSAVE : 0));
-
- /* Host CS:RIP. */
- __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
- __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler);
-
- /* Host SYSENTER CS:RIP. */
- rdmsrl(MSR_IA32_SYSENTER_CS, sysenter_cs);
- __vmwrite(HOST_SYSENTER_CS, sysenter_cs);
- rdmsrl(MSR_IA32_SYSENTER_EIP, sysenter_eip);
- __vmwrite(HOST_SYSENTER_EIP, sysenter_eip);
+ vmx_set_common_host_vmcs_fields(v);
/* MSR intercepts. */
__vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
@@ -1259,8 +1503,10 @@ void vmx_do_resume(struct vcpu *v)
vmx_clear_vmcs(v);
vmx_load_vmcs(v);
- hvm_migrate_timers(v);
- hvm_migrate_pirqs(v);
+ if ( !is_pvh_vcpu(v) ) {
+ hvm_migrate_timers(v);
+ hvm_migrate_pirqs(v);
+ }
vmx_set_host_env(v);
/*
* Both n1 VMCS and n2 VMCS need to update the host environment after
@@ -1272,6 +1518,9 @@ void vmx_do_resume(struct vcpu *v)
hvm_asid_flush_vcpu(v);
}
+ if ( is_pvh_vcpu(v) )
+ reset_stack_and_jump(vmx_asm_do_vmentry);
+
debug_state = v->domain->debugger_attached
|| v->domain->arch.hvm_domain.params[HVM_PARAM_MEMORY_EVENT_INT3]
|| v->domain->arch.hvm_domain.params[HVM_PARAM_MEMORY_EVENT_SINGLE_STEP];
@@ -1455,7 +1704,7 @@ static void vmcs_dump(unsigned char ch)
for_each_domain ( d )
{
- if ( !is_hvm_domain(d) )
+ if ( is_pv_domain(d) )
continue;
printk("\n>>> Domain %d <<<\n", d->domain_id);
for_each_vcpu ( d, v )
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 59336b9..70d0286 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -79,6 +79,9 @@ static int vmx_domain_initialise(struct domain *d)
{
int rc;
+ if ( is_pvh_domain(d) )
+ return 0;
+
if ( (rc = vmx_alloc_vlapic_mapping(d)) != 0 )
return rc;
@@ -87,6 +90,9 @@ static int vmx_domain_initialise(struct domain *d)
static void vmx_domain_destroy(struct domain *d)
{
+ if ( is_pvh_domain(d) )
+ return;
+
vmx_free_vlapic_mapping(d);
}
@@ -110,6 +116,12 @@ static int vmx_vcpu_initialise(struct vcpu *v)
vpmu_initialise(v);
+ if (is_pvh_vcpu(v) )
+ {
+ /* this for hvm_long_mode_enabled(v) */
+ v->arch.hvm_vcpu.guest_efer = EFER_SCE | EFER_LMA | EFER_LME;
+ return 0;
+ }
vmx_install_vlapic_mapping(v);
/* %eax == 1 signals full real-mode support to the guest loader. */
@@ -1031,6 +1043,27 @@ static void vmx_update_host_cr3(struct vcpu *v)
vmx_vmcs_exit(v);
}
+/*
+ * PVH guest never causes CR3 write vmexit. This called during the guest setup.
+ */
+static void vmx_update_pvh_cr(struct vcpu *v, unsigned int cr)
+{
+ vmx_vmcs_enter(v);
+ switch ( cr )
+ {
+ case 3:
+ __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.guest_cr[3]);
+ hvm_asid_flush_vcpu(v);
+ break;
+
+ default:
+ dprintk(XENLOG_ERR,
+ "PVH: d%d v%d unexpected cr%d update at rip:%lx\n",
+ v->domain->domain_id, v->vcpu_id, cr, __vmread(GUEST_RIP));
+ }
+ vmx_vmcs_exit(v);
+}
+
void vmx_update_debug_state(struct vcpu *v)
{
unsigned long mask;
@@ -1050,6 +1083,12 @@ void vmx_update_debug_state(struct vcpu *v)
static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
{
+ if ( is_pvh_vcpu(v) )
+ {
+ vmx_update_pvh_cr(v, cr);
+ return;
+ }
+
vmx_vmcs_enter(v);
switch ( cr )
--
1.7.2.3
next prev parent reply other threads:[~2013-04-23 21:25 UTC|newest]
Thread overview: 72+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-04-23 21:25 [PATCH 00/17][V4]: PVH xen: version 4 patches Mukesh Rathor
2013-04-23 21:25 ` [PATCH 01/17] PVH xen: turn gdb_frames/gdt_ents into union Mukesh Rathor
2013-04-23 21:25 ` [PATCH 02/17] PVH xen: add XENMEM_add_to_physmap_range Mukesh Rathor
2013-04-23 21:25 ` [PATCH 03/17] PVH xen: create domctl_memory_mapping() function Mukesh Rathor
2013-04-24 7:01 ` Jan Beulich
2013-04-23 21:25 ` [PATCH 04/17] PVH xen: add params to read_segment_register Mukesh Rathor
2013-04-23 21:25 ` [PATCH 05/17] PVH xen: vmx realted preparatory changes for PVH Mukesh Rathor
2013-04-23 21:25 ` [PATCH 06/17] PVH xen: Introduce PVH guest type Mukesh Rathor
2013-04-24 7:07 ` Jan Beulich
2013-04-24 23:01 ` Mukesh Rathor
2013-04-25 8:28 ` Jan Beulich
2013-04-23 21:25 ` [PATCH 07/17] PVH xen: tools changes to create PVH domain Mukesh Rathor
2013-04-24 7:10 ` Jan Beulich
2013-04-24 23:02 ` Mukesh Rathor
2013-04-23 21:25 ` [PATCH 08/17] PVH xen: domain creation code changes Mukesh Rathor
2013-04-23 21:25 ` Mukesh Rathor [this message]
2013-04-24 7:42 ` [PATCH 09/17] PVH xen: create PVH vmcs, and also initialization Jan Beulich
2013-04-30 21:01 ` Mukesh Rathor
2013-04-30 21:04 ` Mukesh Rathor
2013-04-23 21:25 ` [PATCH 10/17] PVH xen: introduce vmx_pvh.c and pvh.c Mukesh Rathor
2013-04-24 8:47 ` Jan Beulich
2013-04-25 0:57 ` Mukesh Rathor
2013-04-25 8:36 ` Jan Beulich
2013-04-26 1:16 ` Mukesh Rathor
2013-04-26 1:58 ` Mukesh Rathor
2013-04-26 7:29 ` Jan Beulich
2013-04-26 7:20 ` Jan Beulich
2013-04-27 2:06 ` Mukesh Rathor
2013-05-01 0:51 ` Mukesh Rathor
2013-05-01 13:52 ` Jan Beulich
2013-05-02 1:10 ` Mukesh Rathor
2013-05-02 6:42 ` Jan Beulich
2013-05-03 1:03 ` Mukesh Rathor
2013-05-10 1:51 ` Mukesh Rathor
2013-05-10 7:07 ` Jan Beulich
2013-05-10 23:44 ` Mukesh Rathor
2013-05-02 1:17 ` Mukesh Rathor
2013-05-02 6:53 ` Jan Beulich
2013-05-03 0:40 ` Mukesh Rathor
2013-05-03 6:33 ` Jan Beulich
2013-05-04 1:40 ` Mukesh Rathor
2013-05-06 6:44 ` Jan Beulich
2013-05-07 1:25 ` Mukesh Rathor
2013-05-07 8:07 ` Jan Beulich
2013-05-11 0:30 ` Mukesh Rathor
2013-04-25 11:19 ` Tim Deegan
2013-04-23 21:26 ` [PATCH 11/17] PVH xen: some misc changes like mtrr, intr, msi Mukesh Rathor
2013-04-23 21:26 ` [PATCH 12/17] PVH xen: support invalid op, return PVH features etc Mukesh Rathor
2013-04-24 9:01 ` Jan Beulich
2013-04-25 1:01 ` Mukesh Rathor
2013-04-23 21:26 ` [PATCH 13/17] PVH xen: p2m related changes Mukesh Rathor
2013-04-25 11:28 ` Tim Deegan
2013-04-25 21:59 ` Mukesh Rathor
2013-04-26 8:53 ` Tim Deegan
2013-04-23 21:26 ` [PATCH 14/17] PVH xen: Add and remove foreign pages Mukesh Rathor
2013-04-25 11:38 ` Tim Deegan
2013-04-23 21:26 ` [PATCH 15/17] PVH xen: Miscellaneous changes Mukesh Rathor
2013-04-24 9:06 ` Jan Beulich
2013-05-10 1:54 ` Mukesh Rathor
2013-05-10 7:10 ` Jan Beulich
2013-04-23 21:26 ` [PATCH 16/17] PVH xen: elf and iommu related changes to prep for dom0 PVH Mukesh Rathor
2013-04-24 9:15 ` Jan Beulich
2013-05-14 1:16 ` Mukesh Rathor
2013-05-14 6:56 ` Jan Beulich
2013-05-14 19:14 ` Mukesh Rathor
2013-04-23 21:26 ` [PATCH 17/17] PVH xen: PVH dom0 creation Mukesh Rathor
2013-04-24 9:28 ` Jan Beulich
2013-04-26 1:18 ` Mukesh Rathor
2013-04-26 7:22 ` Jan Beulich
2013-05-10 1:53 ` Mukesh Rathor
2013-05-10 7:14 ` Jan Beulich
2013-05-15 1:18 ` Mukesh Rathor
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1366752366-16594-10-git-send-email-mukesh.rathor@oracle.com \
--to=mukesh.rathor@oracle.com \
--cc=Xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).