From: Mukesh Rathor <mukesh.rathor@oracle.com>
To: Xen-devel@lists.xensource.com
Subject: [PATCH 10/18] PVH xen: create PVH vmcs, and also initialization
Date: Fri, 24 May 2013 18:25:29 -0700 [thread overview]
Message-ID: <1369445137-19755-11-git-send-email-mukesh.rathor@oracle.com> (raw)
In-Reply-To: <1369445137-19755-1-git-send-email-mukesh.rathor@oracle.com>
This patch mainly contains code to create a VMCS for PVH guest, and HVM
specific vcpu/domain creation code.
Changes in V2:
- Avoid call to hvm_do_resume() at call site rather than return in it.
- Return for PVH vmx_do_resume prior to intel debugger stuff.
Changes in V3:
- Cleanup pvh_construct_vmcs().
- Fix formatting in few places, adding XENLOG_G_ERR to printing.
- Do not load the CS selector for PVH here, but try to do that in Linux.
Changes in V4:
- Remove VM_ENTRY_LOAD_DEBUG_CTLS clearing.
- Add 32bit kernel changes mark.
- Verify pit_init call for PVH.
Changes in V5:
- formatting. remove unnecessary variable guest_pat.
Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
---
xen/arch/x86/hvm/hvm.c | 94 ++++++++++++-
xen/arch/x86/hvm/vmx/vmcs.c | 312 ++++++++++++++++++++++++++++++++++++++----
xen/arch/x86/hvm/vmx/vmx.c | 40 ++++++
3 files changed, 410 insertions(+), 36 deletions(-)
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index bcf9609..a525080 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -510,6 +510,30 @@ static int hvm_print_line(
return X86EMUL_OKAY;
}
+static int pvh_dom_initialise(struct domain *d)
+{
+ int rc;
+
+ if ( !d->arch.hvm_domain.hap_enabled )
+ return -EINVAL;
+
+ spin_lock_init(&d->arch.hvm_domain.irq_lock);
+
+ hvm_init_cacheattr_region_list(d);
+
+ if ( (rc = paging_enable(d, PG_refcounts|PG_translate|PG_external)) != 0 )
+ goto fail1;
+
+ if ( (rc = hvm_funcs.domain_initialise(d)) != 0 )
+ goto fail1;
+
+ return 0;
+
+fail1:
+ hvm_destroy_cacheattr_region_list(d);
+ return rc;
+}
+
int hvm_domain_initialise(struct domain *d)
{
int rc;
@@ -520,6 +544,8 @@ int hvm_domain_initialise(struct domain *d)
"on a non-VT/AMDV platform.\n");
return -EINVAL;
}
+ if ( is_pvh_domain(d) )
+ return pvh_dom_initialise(d);
spin_lock_init(&d->arch.hvm_domain.pbuf_lock);
spin_lock_init(&d->arch.hvm_domain.irq_lock);
@@ -584,6 +610,11 @@ int hvm_domain_initialise(struct domain *d)
void hvm_domain_relinquish_resources(struct domain *d)
{
+ if ( is_pvh_domain(d) )
+ {
+ pit_deinit(d);
+ return;
+ }
if ( hvm_funcs.nhvm_domain_relinquish_resources )
hvm_funcs.nhvm_domain_relinquish_resources(d);
@@ -609,10 +640,14 @@ void hvm_domain_relinquish_resources(struct domain *d)
void hvm_domain_destroy(struct domain *d)
{
hvm_funcs.domain_destroy(d);
+ hvm_destroy_cacheattr_region_list(d);
+
+ if ( is_pvh_domain(d) )
+ return;
+
rtc_deinit(d);
stdvga_deinit(d);
vioapic_deinit(d);
- hvm_destroy_cacheattr_region_list(d);
}
static int hvm_save_tsc_adjust(struct domain *d, hvm_domain_context_t *h)
@@ -1066,14 +1101,46 @@ static int __init __hvm_register_CPU_XSAVE_save_and_restore(void)
}
__initcall(__hvm_register_CPU_XSAVE_save_and_restore);
+static int pvh_vcpu_initialise(struct vcpu *v)
+{
+ int rc;
+
+ if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 )
+ return rc;
+
+ softirq_tasklet_init(&v->arch.hvm_vcpu.assert_evtchn_irq_tasklet,
+ (void(*)(unsigned long))hvm_assert_evtchn_irq,
+ (unsigned long)v);
+
+ v->arch.hvm_vcpu.hcall_64bit = 1; /* PVH 32bitfixme */
+ v->arch.user_regs.eflags = 2;
+ v->arch.hvm_vcpu.inject_trap.vector = -1;
+
+ if ( (rc = hvm_vcpu_cacheattr_init(v)) != 0 )
+ {
+ hvm_funcs.vcpu_destroy(v);
+ return rc;
+ }
+ if ( v->vcpu_id == 0 )
+ pit_init(v, cpu_khz);
+
+ return 0;
+}
+
int hvm_vcpu_initialise(struct vcpu *v)
{
int rc;
struct domain *d = v->domain;
- domid_t dm_domid = d->arch.hvm_domain.params[HVM_PARAM_DM_DOMAIN];
+ domid_t dm_domid;
hvm_asid_flush_vcpu(v);
+ spin_lock_init(&v->arch.hvm_vcpu.tm_lock);
+ INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list);
+
+ if ( is_pvh_vcpu(v) )
+ return pvh_vcpu_initialise(v);
+
if ( (rc = vlapic_init(v)) != 0 )
goto fail1;
@@ -1084,6 +1151,8 @@ int hvm_vcpu_initialise(struct vcpu *v)
&& (rc = nestedhvm_vcpu_initialise(v)) < 0 )
goto fail3;
+ dm_domid = d->arch.hvm_domain.params[HVM_PARAM_DM_DOMAIN];
+
/* Create ioreq event channel. */
rc = alloc_unbound_xen_event_channel(v, dm_domid, NULL);
if ( rc < 0 )
@@ -1106,9 +1175,6 @@ int hvm_vcpu_initialise(struct vcpu *v)
get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
spin_unlock(&d->arch.hvm_domain.ioreq.lock);
- spin_lock_init(&v->arch.hvm_vcpu.tm_lock);
- INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list);
-
v->arch.hvm_vcpu.inject_trap.vector = -1;
rc = setup_compat_arg_xlat(v);
@@ -1163,7 +1229,10 @@ void hvm_vcpu_destroy(struct vcpu *v)
tasklet_kill(&v->arch.hvm_vcpu.assert_evtchn_irq_tasklet);
hvm_vcpu_cacheattr_destroy(v);
- vlapic_destroy(v);
+
+ if ( !is_pvh_vcpu(v) )
+ vlapic_destroy(v);
+
hvm_funcs.vcpu_destroy(v);
/* Event channel is already freed by evtchn_destroy(). */
@@ -4528,8 +4597,11 @@ static int hvm_memory_event_traps(long p, uint32_t reason,
return 1;
}
+/* PVH fixme: add support for monitoring guest behaviour in below functions */
void hvm_memory_event_cr0(unsigned long value, unsigned long old)
{
+ if ( is_pvh_vcpu(current) )
+ return;
hvm_memory_event_traps(current->domain->arch.hvm_domain
.params[HVM_PARAM_MEMORY_EVENT_CR0],
MEM_EVENT_REASON_CR0,
@@ -4538,6 +4610,8 @@ void hvm_memory_event_cr0(unsigned long value, unsigned long old)
void hvm_memory_event_cr3(unsigned long value, unsigned long old)
{
+ if ( is_pvh_vcpu(current) )
+ return;
hvm_memory_event_traps(current->domain->arch.hvm_domain
.params[HVM_PARAM_MEMORY_EVENT_CR3],
MEM_EVENT_REASON_CR3,
@@ -4546,6 +4620,8 @@ void hvm_memory_event_cr3(unsigned long value, unsigned long old)
void hvm_memory_event_cr4(unsigned long value, unsigned long old)
{
+ if ( is_pvh_vcpu(current) )
+ return;
hvm_memory_event_traps(current->domain->arch.hvm_domain
.params[HVM_PARAM_MEMORY_EVENT_CR4],
MEM_EVENT_REASON_CR4,
@@ -4554,6 +4630,8 @@ void hvm_memory_event_cr4(unsigned long value, unsigned long old)
void hvm_memory_event_msr(unsigned long msr, unsigned long value)
{
+ if ( is_pvh_vcpu(current) )
+ return;
hvm_memory_event_traps(current->domain->arch.hvm_domain
.params[HVM_PARAM_MEMORY_EVENT_MSR],
MEM_EVENT_REASON_MSR,
@@ -4566,6 +4644,8 @@ int hvm_memory_event_int3(unsigned long gla)
unsigned long gfn;
gfn = paging_gva_to_gfn(current, gla, &pfec);
+ if ( is_pvh_vcpu(current) )
+ return 0;
return hvm_memory_event_traps(current->domain->arch.hvm_domain
.params[HVM_PARAM_MEMORY_EVENT_INT3],
MEM_EVENT_REASON_INT3,
@@ -4578,6 +4658,8 @@ int hvm_memory_event_single_step(unsigned long gla)
unsigned long gfn;
gfn = paging_gva_to_gfn(current, gla, &pfec);
+ if ( is_pvh_vcpu(current) )
+ return 0;
return hvm_memory_event_traps(current->domain->arch.hvm_domain
.params[HVM_PARAM_MEMORY_EVENT_SINGLE_STEP],
MEM_EVENT_REASON_SINGLESTEP,
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index ef0ee7f..3844104 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -634,7 +634,7 @@ void vmx_vmcs_exit(struct vcpu *v)
{
/* Don't confuse vmx_do_resume (for @v or @current!) */
vmx_clear_vmcs(v);
- if ( is_hvm_vcpu(current) )
+ if ( !is_pv_vcpu(current) )
vmx_load_vmcs(current);
spin_unlock(&v->arch.hvm_vmx.vmcs_lock);
@@ -825,16 +825,285 @@ void virtual_vmcs_vmwrite(void *vvmcs, u32 vmcs_encoding, u64 val)
virtual_vmcs_exit(vvmcs);
}
-static int construct_vmcs(struct vcpu *v)
+static void vmx_set_common_host_vmcs_fields(struct vcpu *v)
{
- struct domain *d = v->domain;
uint16_t sysenter_cs;
unsigned long sysenter_eip;
+
+ /* Host data selectors. */
+ __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS);
+ __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS);
+ __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS);
+ __vmwrite(HOST_FS_SELECTOR, 0);
+ __vmwrite(HOST_GS_SELECTOR, 0);
+ __vmwrite(HOST_FS_BASE, 0);
+ __vmwrite(HOST_GS_BASE, 0);
+
+ /* Host control registers. */
+ v->arch.hvm_vmx.host_cr0 = read_cr0() | X86_CR0_TS;
+ __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
+ __vmwrite(HOST_CR4,
+ mmu_cr4_features | (xsave_enabled(v) ? X86_CR4_OSXSAVE : 0));
+
+ /* Host CS:RIP. */
+ __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
+ __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler);
+
+ /* Host SYSENTER CS:RIP. */
+ rdmsrl(MSR_IA32_SYSENTER_CS, sysenter_cs);
+ __vmwrite(HOST_SYSENTER_CS, sysenter_cs);
+ rdmsrl(MSR_IA32_SYSENTER_EIP, sysenter_eip);
+ __vmwrite(HOST_SYSENTER_EIP, sysenter_eip);
+}
+
+static int pvh_check_requirements(struct vcpu *v)
+{
+ u64 required, tmpval = real_cr4_to_pv_guest_cr4(mmu_cr4_features);
+
+ if ( !paging_mode_hap(v->domain) )
+ {
+ printk(XENLOG_G_INFO "HAP is required for PVH guest.\n");
+ return -EINVAL;
+ }
+ if ( !cpu_has_vmx_pat )
+ {
+ printk(XENLOG_G_INFO "PVH: CPU does not have PAT support\n");
+ return -ENOSYS;
+ }
+ if ( !cpu_has_vmx_msr_bitmap )
+ {
+ printk(XENLOG_G_INFO "PVH: CPU does not have msr bitmap\n");
+ return -ENOSYS;
+ }
+ if ( !cpu_has_vmx_vpid )
+ {
+ printk(XENLOG_G_INFO "PVH: CPU doesn't have VPID support\n");
+ return -ENOSYS;
+ }
+ if ( !cpu_has_vmx_secondary_exec_control )
+ {
+ printk(XENLOG_G_INFO "CPU Secondary exec is required to run PVH\n");
+ return -ENOSYS;
+ }
+
+ if ( v->domain->arch.vtsc )
+ {
+ printk(XENLOG_G_INFO
+ "At present PVH only supports the default timer mode\n");
+ return -ENOSYS;
+ }
+
+ required = X86_CR4_PAE | X86_CR4_VMXE | X86_CR4_OSFXSR;
+ if ( (tmpval & required) != required )
+ {
+ printk(XENLOG_G_INFO "PVH: required CR4 features not available:%lx\n",
+ required);
+ return -ENOSYS;
+ }
+
+ return 0;
+}
+
+static int pvh_construct_vmcs(struct vcpu *v)
+{
+ int rc, msr_type;
+ unsigned long *msr_bitmap;
+ struct domain *d = v->domain;
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+ struct ept_data *ept = &p2m->ept;
+ u32 vmexit_ctl = vmx_vmexit_control;
+ u32 vmentry_ctl = vmx_vmentry_control;
+ u64 host_pat, tmpval = -1;
+
+ if ( (rc = pvh_check_requirements(v)) )
+ return rc;
+
+ msr_bitmap = alloc_xenheap_page();
+ if ( msr_bitmap == NULL )
+ return -ENOMEM;
+
+ /* 1. Pin-Based Controls */
+ __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
+
+ v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control;
+
+ /* 2. Primary Processor-based controls */
+ /*
+ * If rdtsc exiting is turned on and it goes thru emulate_privileged_op,
+ * then pv_vcpu.ctrlreg must be added to the pvh struct.
+ */
+ v->arch.hvm_vmx.exec_control &= ~CPU_BASED_RDTSC_EXITING;
+ v->arch.hvm_vmx.exec_control &= ~CPU_BASED_USE_TSC_OFFSETING;
+
+ v->arch.hvm_vmx.exec_control &= ~(CPU_BASED_INVLPG_EXITING |
+ CPU_BASED_CR3_LOAD_EXITING |
+ CPU_BASED_CR3_STORE_EXITING);
+ v->arch.hvm_vmx.exec_control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+ v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
+ v->arch.hvm_vmx.exec_control |= CPU_BASED_ACTIVATE_MSR_BITMAP;
+ v->arch.hvm_vmx.exec_control &= ~CPU_BASED_TPR_SHADOW;
+ v->arch.hvm_vmx.exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
+
+ __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
+
+ /* 3. Secondary Processor-based controls. Intel SDM: resvd bits are 0 */
+ v->arch.hvm_vmx.secondary_exec_control = SECONDARY_EXEC_ENABLE_EPT;
+ v->arch.hvm_vmx.secondary_exec_control |= SECONDARY_EXEC_ENABLE_VPID;
+ v->arch.hvm_vmx.secondary_exec_control |= SECONDARY_EXEC_PAUSE_LOOP_EXITING;
+
+ __vmwrite(SECONDARY_VM_EXEC_CONTROL,
+ v->arch.hvm_vmx.secondary_exec_control);
+
+ __vmwrite(IO_BITMAP_A, virt_to_maddr((char *)hvm_io_bitmap + 0));
+ __vmwrite(IO_BITMAP_B, virt_to_maddr((char *)hvm_io_bitmap + PAGE_SIZE));
+
+ /* MSR bitmap for intercepts */
+ memset(msr_bitmap, ~0, PAGE_SIZE);
+ v->arch.hvm_vmx.msr_bitmap = msr_bitmap;
+ __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap));
+
+ msr_type = MSR_TYPE_R | MSR_TYPE_W;
+ /* Disable interecepts for MSRs that have corresponding VMCS fields */
+ vmx_disable_intercept_for_msr(v, MSR_FS_BASE, msr_type);
+ vmx_disable_intercept_for_msr(v, MSR_GS_BASE, msr_type);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, msr_type);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, msr_type);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, msr_type);
+ vmx_disable_intercept_for_msr(v, MSR_SHADOW_GS_BASE, msr_type);
+ vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, msr_type);
+
+ /*
+ * We don't disable intercepts for MSRs: MSR_STAR, MSR_LSTAR, MSR_CSTAR,
+ * and MSR_SYSCALL_MASK because we need to specify save/restore area to
+ * save/restore at every VM exit and entry. Instead, let the intercept
+ * functions save them into vmx_msr_state fields. See comment in
+ * vmx_restore_host_msrs(). See also vmx_restore_guest_msrs().
+ */
+ __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
+ __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
+ __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
+
+ __vmwrite(VM_EXIT_CONTROLS, vmexit_ctl);
+
+ /*
+ * Note: we run with default VM_ENTRY_LOAD_DEBUG_CTLS of 1, which means
+ * upon vmentry, the cpu reads/loads VMCS.DR7 and VMCS.DEBUGCTLS, and not
+ * use the host values. 0 would cause it to not use the VMCS values.
+ */
+ vmentry_ctl &= ~VM_ENTRY_LOAD_GUEST_EFER;
+ vmentry_ctl &= ~VM_ENTRY_SMM;
+ vmentry_ctl &= ~VM_ENTRY_DEACT_DUAL_MONITOR;
+ /* PVH 32bitfixme */
+ vmentry_ctl |= VM_ENTRY_IA32E_MODE; /* GUEST_EFER.LME/LMA ignored */
+
+ __vmwrite(VM_ENTRY_CONTROLS, vmentry_ctl);
+
+ vmx_set_common_host_vmcs_fields(v);
+
+ __vmwrite(VM_ENTRY_INTR_INFO, 0);
+ __vmwrite(CR3_TARGET_COUNT, 0);
+ __vmwrite(GUEST_ACTIVITY_STATE, 0);
+
+ /* These are sorta irrelevant as we load the discriptors directly. */
+ __vmwrite(GUEST_CS_SELECTOR, 0);
+ __vmwrite(GUEST_DS_SELECTOR, 0);
+ __vmwrite(GUEST_SS_SELECTOR, 0);
+ __vmwrite(GUEST_ES_SELECTOR, 0);
+ __vmwrite(GUEST_FS_SELECTOR, 0);
+ __vmwrite(GUEST_GS_SELECTOR, 0);
+
+ __vmwrite(GUEST_CS_BASE, 0);
+ __vmwrite(GUEST_CS_LIMIT, ~0u);
+ /* CS.L == 1, exec, read/write, accessed. PVH 32bitfixme */
+ __vmwrite(GUEST_CS_AR_BYTES, 0xa09b);
+
+ __vmwrite(GUEST_DS_BASE, 0);
+ __vmwrite(GUEST_DS_LIMIT, ~0u);
+ __vmwrite(GUEST_DS_AR_BYTES, 0xc093); /* read/write, accessed */
+
+ __vmwrite(GUEST_SS_BASE, 0);
+ __vmwrite(GUEST_SS_LIMIT, ~0u);
+ __vmwrite(GUEST_SS_AR_BYTES, 0xc093); /* read/write, accessed */
+
+ __vmwrite(GUEST_ES_BASE, 0);
+ __vmwrite(GUEST_ES_LIMIT, ~0u);
+ __vmwrite(GUEST_ES_AR_BYTES, 0xc093); /* read/write, accessed */
+
+ __vmwrite(GUEST_FS_BASE, 0);
+ __vmwrite(GUEST_FS_LIMIT, ~0u);
+ __vmwrite(GUEST_FS_AR_BYTES, 0xc093); /* read/write, accessed */
+
+ __vmwrite(GUEST_GS_BASE, 0);
+ __vmwrite(GUEST_GS_LIMIT, ~0u);
+ __vmwrite(GUEST_GS_AR_BYTES, 0xc093); /* read/write, accessed */
+
+ __vmwrite(GUEST_GDTR_BASE, 0);
+ __vmwrite(GUEST_GDTR_LIMIT, 0);
+
+ __vmwrite(GUEST_LDTR_BASE, 0);
+ __vmwrite(GUEST_LDTR_LIMIT, 0);
+ __vmwrite(GUEST_LDTR_AR_BYTES, 0x82); /* LDT */
+ __vmwrite(GUEST_LDTR_SELECTOR, 0);
+
+ /* Guest TSS. */
+ __vmwrite(GUEST_TR_BASE, 0);
+ __vmwrite(GUEST_TR_LIMIT, 0xff);
+ __vmwrite(GUEST_TR_AR_BYTES, 0x8b); /* 32-bit TSS (busy) */
+
+ __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
+ __vmwrite(GUEST_DR7, 0);
+ __vmwrite(VMCS_LINK_POINTER, ~0UL);
+
+ __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
+ __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
+
+ v->arch.hvm_vmx.exception_bitmap = HVM_TRAP_MASK | (1U << TRAP_debug) |
+ (1U << TRAP_int3) | (1U << TRAP_no_device);
+ __vmwrite(EXCEPTION_BITMAP, v->arch.hvm_vmx.exception_bitmap);
+
+ /* Set WP bit so rdonly pages are not written from CPL 0 */
+ tmpval = X86_CR0_PG | X86_CR0_NE | X86_CR0_PE | X86_CR0_WP;
+ __vmwrite(GUEST_CR0, tmpval);
+ __vmwrite(CR0_READ_SHADOW, tmpval);
+ v->arch.hvm_vcpu.hw_cr[0] = v->arch.hvm_vcpu.guest_cr[0] = tmpval;
+
+ tmpval = real_cr4_to_pv_guest_cr4(mmu_cr4_features);
+ __vmwrite(GUEST_CR4, tmpval);
+ __vmwrite(CR4_READ_SHADOW, tmpval);
+ v->arch.hvm_vcpu.guest_cr[4] = tmpval;
+
+ __vmwrite(CR0_GUEST_HOST_MASK, ~0UL);
+ __vmwrite(CR4_GUEST_HOST_MASK, ~0UL);
+
+ v->arch.hvm_vmx.vmx_realmode = 0;
+
+ ept->asr = pagetable_get_pfn(p2m_get_pagetable(p2m));
+ __vmwrite(EPT_POINTER, ept_get_eptp(ept));
+
+ rdmsrl(MSR_IA32_CR_PAT, host_pat);
+ __vmwrite(HOST_PAT, host_pat);
+ __vmwrite(GUEST_PAT, MSR_IA32_CR_PAT_RESET);
+
+ /* the paging mode is updated for PVH by arch_set_info_guest() */
+
+ return 0;
+}
+
+static int construct_vmcs(struct vcpu *v)
+{
+ struct domain *d = v->domain;
u32 vmexit_ctl = vmx_vmexit_control;
u32 vmentry_ctl = vmx_vmentry_control;
vmx_vmcs_enter(v);
+ if ( is_pvh_vcpu(v) )
+ {
+ int rc = pvh_construct_vmcs(v);
+ vmx_vmcs_exit(v);
+ return rc;
+ }
+
/* VMCS controls. */
__vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
@@ -932,30 +1201,7 @@ static int construct_vmcs(struct vcpu *v)
__vmwrite(POSTED_INTR_NOTIFICATION_VECTOR, posted_intr_vector);
}
- /* Host data selectors. */
- __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS);
- __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS);
- __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS);
- __vmwrite(HOST_FS_SELECTOR, 0);
- __vmwrite(HOST_GS_SELECTOR, 0);
- __vmwrite(HOST_FS_BASE, 0);
- __vmwrite(HOST_GS_BASE, 0);
-
- /* Host control registers. */
- v->arch.hvm_vmx.host_cr0 = read_cr0() | X86_CR0_TS;
- __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
- __vmwrite(HOST_CR4,
- mmu_cr4_features | (xsave_enabled(v) ? X86_CR4_OSXSAVE : 0));
-
- /* Host CS:RIP. */
- __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
- __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler);
-
- /* Host SYSENTER CS:RIP. */
- rdmsrl(MSR_IA32_SYSENTER_CS, sysenter_cs);
- __vmwrite(HOST_SYSENTER_CS, sysenter_cs);
- rdmsrl(MSR_IA32_SYSENTER_EIP, sysenter_eip);
- __vmwrite(HOST_SYSENTER_EIP, sysenter_eip);
+ vmx_set_common_host_vmcs_fields(v);
/* MSR intercepts. */
__vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
@@ -1275,8 +1521,11 @@ void vmx_do_resume(struct vcpu *v)
vmx_clear_vmcs(v);
vmx_load_vmcs(v);
- hvm_migrate_timers(v);
- hvm_migrate_pirqs(v);
+ if ( !is_pvh_vcpu(v) )
+ {
+ hvm_migrate_timers(v);
+ hvm_migrate_pirqs(v);
+ }
vmx_set_host_env(v);
/*
* Both n1 VMCS and n2 VMCS need to update the host environment after
@@ -1288,6 +1537,9 @@ void vmx_do_resume(struct vcpu *v)
hvm_asid_flush_vcpu(v);
}
+ if ( is_pvh_vcpu(v) )
+ reset_stack_and_jump(vmx_asm_do_vmentry);
+
debug_state = v->domain->debugger_attached
|| v->domain->arch.hvm_domain.params[HVM_PARAM_MEMORY_EVENT_INT3]
|| v->domain->arch.hvm_domain.params[HVM_PARAM_MEMORY_EVENT_SINGLE_STEP];
@@ -1471,7 +1723,7 @@ static void vmcs_dump(unsigned char ch)
for_each_domain ( d )
{
- if ( !is_hvm_domain(d) )
+ if ( is_pv_domain(d) )
continue;
printk("\n>>> Domain %d <<<\n", d->domain_id);
for_each_vcpu ( d, v )
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 25a265e..d20be75 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -82,6 +82,9 @@ static int vmx_domain_initialise(struct domain *d)
{
int rc;
+ if ( is_pvh_domain(d) )
+ return 0;
+
if ( (rc = vmx_alloc_vlapic_mapping(d)) != 0 )
return rc;
@@ -90,6 +93,9 @@ static int vmx_domain_initialise(struct domain *d)
static void vmx_domain_destroy(struct domain *d)
{
+ if ( is_pvh_domain(d) )
+ return;
+
vmx_free_vlapic_mapping(d);
}
@@ -113,6 +119,12 @@ static int vmx_vcpu_initialise(struct vcpu *v)
vpmu_initialise(v);
+ if ( is_pvh_vcpu(v) )
+ {
+ /* this for hvm_long_mode_enabled(v) */
+ v->arch.hvm_vcpu.guest_efer = EFER_SCE | EFER_LMA | EFER_LME;
+ return 0;
+ }
vmx_install_vlapic_mapping(v);
/* %eax == 1 signals full real-mode support to the guest loader. */
@@ -1034,6 +1046,28 @@ static void vmx_update_host_cr3(struct vcpu *v)
vmx_vmcs_exit(v);
}
+/*
+ * PVH guest never causes CR3 write vmexit. This called during the guest
+ * setup.
+ */
+static void vmx_update_pvh_cr(struct vcpu *v, unsigned int cr)
+{
+ vmx_vmcs_enter(v);
+ switch ( cr )
+ {
+ case 3:
+ __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.guest_cr[3]);
+ hvm_asid_flush_vcpu(v);
+ break;
+
+ default:
+ printk(XENLOG_ERR
+ "PVH: d%d v%d unexpected cr%d update at rip:%lx\n",
+ v->domain->domain_id, v->vcpu_id, cr, __vmread(GUEST_RIP));
+ }
+ vmx_vmcs_exit(v);
+}
+
void vmx_update_debug_state(struct vcpu *v)
{
unsigned long mask;
@@ -1053,6 +1087,12 @@ void vmx_update_debug_state(struct vcpu *v)
static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
{
+ if ( is_pvh_vcpu(v) )
+ {
+ vmx_update_pvh_cr(v, cr);
+ return;
+ }
+
vmx_vmcs_enter(v);
switch ( cr )
--
1.7.2.3
next prev parent reply other threads:[~2013-05-25 1:25 UTC|newest]
Thread overview: 80+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-05-25 1:25 [PATCH 00/18][V6]: PVH xen: version 6 patches Mukesh Rathor
2013-05-25 1:25 ` [PATCH 01/18] PVH xen: turn gdb_frames/gdt_ents into union Mukesh Rathor
2013-05-31 9:13 ` Jan Beulich
2013-05-25 1:25 ` [PATCH 02/18] PVH xen: add XENMEM_add_to_physmap_range Mukesh Rathor
2013-05-31 9:28 ` Jan Beulich
2013-05-31 9:38 ` Ian Campbell
2013-05-31 10:14 ` Jan Beulich
2013-05-31 10:40 ` Ian Campbell
2013-06-05 0:24 ` Mukesh Rathor
2013-06-05 0:31 ` Mukesh Rathor
2013-06-05 7:32 ` Jan Beulich
2013-06-05 20:41 ` Mukesh Rathor
2013-06-06 6:43 ` Jan Beulich
2013-06-06 22:19 ` Mukesh Rathor
2013-06-07 6:13 ` Jan Beulich
2013-06-07 20:46 ` Mukesh Rathor
2013-06-07 15:08 ` Konrad Rzeszutek Wilk
2013-06-07 15:48 ` Jan Beulich
2013-05-25 1:25 ` [PATCH 03/18] PVH xen: create domctl_memory_mapping() function Mukesh Rathor
2013-05-31 9:46 ` Jan Beulich
2013-06-05 0:47 ` Mukesh Rathor
2013-06-05 7:34 ` Jan Beulich
2013-05-25 1:25 ` [PATCH 04/18] PVH xen: add params to read_segment_register Mukesh Rathor
2013-05-31 10:00 ` Jan Beulich
2013-06-06 1:25 ` Mukesh Rathor
2013-06-06 6:48 ` Jan Beulich
2013-06-07 1:43 ` Mukesh Rathor
2013-06-07 6:29 ` Jan Beulich
2013-06-08 0:45 ` Mukesh Rathor
2013-06-10 8:01 ` Jan Beulich
2013-06-10 23:10 ` Mukesh Rathor
2013-05-25 1:25 ` [PATCH 05/18] PVH xen: vmx realted preparatory changes for PVH Mukesh Rathor
2013-05-25 1:25 ` [PATCH 06/18] PVH xen: Move e820 fields out of pv_domain struct Mukesh Rathor
2013-06-05 15:33 ` Konrad Rzeszutek Wilk
2013-05-25 1:25 ` [PATCH 07/18] PVH xen: Introduce PVH guest type Mukesh Rathor
2013-05-25 1:25 ` [PATCH 08/18] PVH xen: tools changes to create PVH domain Mukesh Rathor
2013-06-12 14:58 ` Ian Campbell
2013-06-15 0:14 ` Mukesh Rathor
2013-06-17 11:11 ` Ian Campbell
2013-07-30 23:47 ` Mukesh Rathor
2013-07-31 12:00 ` Ian Campbell
2013-08-01 2:02 ` Mukesh Rathor
2013-08-01 8:01 ` Ian Campbell
2013-08-02 1:12 ` Mukesh Rathor
2013-08-29 1:51 ` Mukesh Rathor
2013-08-29 9:01 ` Ian Campbell
2013-08-30 0:45 ` Mukesh Rathor
2013-08-30 9:56 ` Ian Campbell
2013-08-29 11:13 ` George Dunlap
2013-08-29 11:29 ` Ian Campbell
2013-08-30 1:24 ` Mukesh Rathor
2013-08-30 9:53 ` Ian Campbell
2013-08-30 10:22 ` George Dunlap
2013-08-30 10:27 ` George Dunlap
2013-08-29 0:14 ` Mukesh Rathor
2013-07-31 1:06 ` Mukesh Rathor
2013-07-31 11:32 ` Ian Campbell
2013-05-25 1:25 ` [PATCH 09/18] PVH xen: domain creation code changes Mukesh Rathor
2013-05-25 1:25 ` Mukesh Rathor [this message]
2013-05-25 1:25 ` [PATCH 11/18] PVH xen: create read_descriptor_sel() Mukesh Rathor
2013-05-25 1:25 ` [PATCH 12/18] PVH xen: support hypercalls for PVH Mukesh Rathor
2013-06-05 15:27 ` Konrad Rzeszutek Wilk
2013-05-25 1:25 ` [PATCH 13/18] PVH xen: introduce vmx_pvh.c Mukesh Rathor
2013-05-25 1:25 ` [PATCH 14/18] PVH xen: some misc changes like mtrr, intr, msi Mukesh Rathor
2013-05-25 1:25 ` [PATCH 15/18] PVH xen: hcall page initialize, create PVH guest type, etc Mukesh Rathor
2013-05-25 1:25 ` [PATCH 16/18] PVH xen: Miscellaneous changes Mukesh Rathor
2013-06-05 15:39 ` Konrad Rzeszutek Wilk
2013-05-25 1:25 ` [PATCH 17/18] PVH xen: Introduce p2m_map_foreign Mukesh Rathor
2013-05-25 1:25 ` [PATCH 18/18] PVH xen: Add and remove foreign pages Mukesh Rathor
2013-06-05 15:23 ` [PATCH 00/18][V6]: PVH xen: version 6 patches Konrad Rzeszutek Wilk
2013-06-05 15:25 ` George Dunlap
2013-06-05 15:36 ` Ian Campbell
2013-06-05 18:34 ` Konrad Rzeszutek Wilk
2013-06-05 20:51 ` Ian Campbell
2013-06-05 22:01 ` Mukesh Rathor
2013-06-06 8:46 ` Ian Campbell
2013-06-07 13:56 ` Konrad Rzeszutek Wilk
2013-06-06 10:08 ` George Dunlap
2013-06-05 17:14 ` Tim Deegan
2013-06-06 7:29 ` Jan Beulich
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1369445137-19755-11-git-send-email-mukesh.rathor@oracle.com \
--to=mukesh.rathor@oracle.com \
--cc=Xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).