From: Mukesh Rathor <mukesh.rathor@oracle.com>
To: "Xen-devel@lists.xensource.com" <Xen-devel@lists.xensource.com>
Subject: [PATCH 8/18 V2]: PVH xen: domain creation code changes
Date: Fri, 15 Mar 2013 17:36:58 -0700 [thread overview]
Message-ID: <20130315173658.53f402b3@mantra.us.oracle.com> (raw)
This patch contains changes to arch/x86/domain.c to allow for a PVH
domain. Also, since PVH uses lot of HVM data structs and code paths, in
hvm_vcpu {} a sub struct to store PVH specific info is created. Right
now it only has one field, but it can grow over time.
Changes in V2:
- changes to read_segment_register() moved to this patch.
- The other comment was to create NULL functions for pvh_set_vcpu_info
and pvh_read_descriptor which are implemented in later patch, but since
I disable PVH creation until all patches are checked in, it is not needed.
But it helps breaking down of patches.
Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
---
xen/arch/x86/domain.c | 99 +++++++++++++++++++++++++++-------------
xen/include/asm-x86/hvm/hvm.h | 18 +++++++
xen/include/asm-x86/hvm/vcpu.h | 9 ++++
xen/include/asm-x86/system.h | 8 +++-
4 files changed, 101 insertions(+), 33 deletions(-)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 768c19d..5b5444f 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -387,8 +387,11 @@ int vcpu_initialise(struct vcpu *v)
vmce_init_vcpu(v);
- if ( is_hvm_domain(d) )
+ if ( is_hvm_or_pvh_domain(d) )
{
+ if ( is_pvh_domain(d) )
+ v->arch.hvm_vcpu.hvm_pvh.vcpu_info_mfn = INVALID_MFN;
+
rc = hvm_vcpu_initialise(v);
goto done;
}
@@ -455,7 +458,7 @@ void vcpu_destroy(struct vcpu *v)
vcpu_destroy_fpu(v);
- if ( is_hvm_vcpu(v) )
+ if ( is_hvm_or_pvh_vcpu(v) )
hvm_vcpu_destroy(v);
else
xfree(v->arch.pv_vcpu.trap_ctxt);
@@ -467,7 +470,7 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags)
int rc = -ENOMEM;
d->arch.hvm_domain.hap_enabled =
- is_hvm_domain(d) &&
+ is_hvm_or_pvh_domain(d) &&
hvm_funcs.hap_supported &&
(domcr_flags & DOMCRF_hap);
d->arch.hvm_domain.mem_sharing_enabled = 0;
@@ -515,7 +518,7 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags)
mapcache_domain_init(d);
HYPERVISOR_COMPAT_VIRT_START(d) =
- is_hvm_domain(d) ? ~0u : __HYPERVISOR_COMPAT_VIRT_START;
+ is_hvm_or_pvh_domain(d) ? ~0u : __HYPERVISOR_COMPAT_VIRT_START;
if ( (rc = paging_domain_init(d, domcr_flags)) != 0 )
goto fail;
@@ -557,7 +560,7 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags)
goto fail;
}
- if ( is_hvm_domain(d) )
+ if ( is_hvm_or_pvh_domain(d) )
{
if ( (rc = hvm_domain_initialise(d)) != 0 )
{
@@ -569,9 +572,9 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags)
{
/* 64-bit PV guest by default. */
d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
-
- spin_lock_init(&d->arch.pv_domain.e820_lock);
}
+ if ( !is_hvm_domain(d) )
+ spin_lock_init(&d->arch.pv_domain.e820_lock);
/* initialize default tsc behavior in case tools don't */
tsc_set_info(d, TSC_MODE_DEFAULT, 0UL, 0, 0);
@@ -593,9 +596,10 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags)
void arch_domain_destroy(struct domain *d)
{
- if ( is_hvm_domain(d) )
+ if ( is_hvm_or_pvh_domain(d) )
hvm_domain_destroy(d);
- else
+
+ if ( !is_hvm_domain(d) )
xfree(d->arch.pv_domain.e820);
free_domain_pirqs(d);
@@ -663,7 +667,7 @@ int arch_set_info_guest(
#define c(fld) (compat ? (c.cmp->fld) : (c.nat->fld))
flags = c(flags);
- if ( !is_hvm_vcpu(v) )
+ if ( !is_hvm_or_pvh_vcpu(v) )
{
if ( !compat )
{
@@ -716,7 +720,7 @@ int arch_set_info_guest(
v->fpu_initialised = !!(flags & VGCF_I387_VALID);
v->arch.flags &= ~TF_kernel_mode;
- if ( (flags & VGCF_in_kernel) || is_hvm_vcpu(v)/*???*/ )
+ if ( (flags & VGCF_in_kernel) || is_hvm_or_pvh_vcpu(v)/*???*/ )
v->arch.flags |= TF_kernel_mode;
v->arch.vgc_flags = flags;
@@ -727,7 +731,7 @@ int arch_set_info_guest(
if ( !compat )
{
memcpy(&v->arch.user_regs, &c.nat->user_regs, sizeof(c.nat->user_regs));
- if ( !is_hvm_vcpu(v) )
+ if ( !is_hvm_or_pvh_vcpu(v) )
memcpy(v->arch.pv_vcpu.trap_ctxt, c.nat->trap_ctxt,
sizeof(c.nat->trap_ctxt));
}
@@ -743,10 +747,13 @@ int arch_set_info_guest(
v->arch.user_regs.eflags |= 2;
- if ( is_hvm_vcpu(v) )
+ if ( is_hvm_or_pvh_vcpu(v) )
{
hvm_set_info_guest(v);
- goto out;
+ if ( is_hvm_vcpu(v) || v->is_initialised )
+ goto out;
+ else
+ goto pvh_skip_pv_stuff;
}
init_int80_direct_trap(v);
@@ -755,7 +762,8 @@ int arch_set_info_guest(
v->arch.pv_vcpu.iopl = (v->arch.user_regs.eflags >> 12) & 3;
v->arch.user_regs.eflags &= ~X86_EFLAGS_IOPL;
- /* Ensure real hardware interrupts are enabled. */
+ /* Ensure real hardware interrupts are enabled. Note: PVH may not have
+ * IDT set on all vcpus so don't enable IF for it yet. */
v->arch.user_regs.eflags |= X86_EFLAGS_IF;
if ( !v->is_initialised )
@@ -852,6 +860,7 @@ int arch_set_info_guest(
if ( rc != 0 )
return rc;
+pvh_skip_pv_stuff:
if ( !compat )
{
cr3_gfn = xen_cr3_to_pfn(c.nat->ctrlreg[3]);
@@ -859,19 +868,26 @@ int arch_set_info_guest(
if ( !cr3_page )
{
- destroy_gdt(v);
+ if ( !is_pvh_vcpu(v) )
+ destroy_gdt(v);
return -EINVAL;
}
if ( !paging_mode_refcounts(d)
&& !get_page_type(cr3_page, PGT_base_page_table) )
{
put_page(cr3_page);
- destroy_gdt(v);
+ if ( !is_pvh_vcpu(v) )
+ destroy_gdt(v);
return -EINVAL;
}
+ if ( is_pvh_vcpu(v) ) {
+ v->arch.cr3 = page_to_mfn(cr3_page);
+ v->arch.hvm_vcpu.guest_cr[3] = c.nat->ctrlreg[3];
+ }
+
v->arch.guest_table = pagetable_from_page(cr3_page);
- if ( c.nat->ctrlreg[1] )
+ if ( c.nat->ctrlreg[1] && !is_pvh_vcpu(v) )
{
cr3_gfn = xen_cr3_to_pfn(c.nat->ctrlreg[1]);
cr3_page = get_page_from_gfn(d, cr3_gfn, NULL, P2M_ALLOC);
@@ -896,7 +912,8 @@ int arch_set_info_guest(
}
else if ( !(flags & VGCF_in_kernel) )
{
- destroy_gdt(v);
+ if ( !is_pvh_vcpu(v) )
+ destroy_gdt(v);
return -EINVAL;
}
}
@@ -938,6 +955,13 @@ int arch_set_info_guest(
update_cr3(v);
+ if ( is_pvh_vcpu(v) )
+ {
+ /* guest is bringing up non-boot SMP vcpu */
+ if ( (rc=hvm_pvh_set_vcpu_info(v, c.nat)) != 0 )
+ return rc;
+ }
+
out:
if ( flags & VGCF_online )
clear_bit(_VPF_down, &v->pause_flags);
@@ -968,16 +992,21 @@ void arch_vcpu_reset(struct vcpu *v)
static void
unmap_vcpu_info(struct vcpu *v)
{
- unsigned long mfn;
+ unsigned long mfn, *mfnp;
+
+ if ( is_pvh_vcpu(v) )
+ mfnp = &v->arch.hvm_vcpu.hvm_pvh.vcpu_info_mfn;
+ else
+ mfnp = &v->arch.pv_vcpu.vcpu_info_mfn;
- if ( v->arch.pv_vcpu.vcpu_info_mfn == INVALID_MFN )
+ mfn = *mfnp;
+ if ( mfn == INVALID_MFN )
return;
- mfn = v->arch.pv_vcpu.vcpu_info_mfn;
unmap_domain_page_global(v->vcpu_info);
v->vcpu_info = &dummy_vcpu_info;
- v->arch.pv_vcpu.vcpu_info_mfn = INVALID_MFN;
+ *mfnp = INVALID_MFN;
put_page_and_type(mfn_to_page(mfn));
}
@@ -996,11 +1025,17 @@ map_vcpu_info(struct vcpu *v, unsigned long gfn, unsigned offset)
vcpu_info_t *new_info;
struct page_info *page;
int i;
+ unsigned long *mfnp;
+
+ if ( is_pvh_vcpu(v) )
+ mfnp = &v->arch.hvm_vcpu.hvm_pvh.vcpu_info_mfn;
+ else
+ mfnp = &v->arch.pv_vcpu.vcpu_info_mfn;
if ( offset > (PAGE_SIZE - sizeof(vcpu_info_t)) )
return -EINVAL;
- if ( v->arch.pv_vcpu.vcpu_info_mfn != INVALID_MFN )
+ if ( *mfnp != INVALID_MFN )
return -EINVAL;
/* Run this command on yourself or on other offline VCPUS. */
@@ -1037,7 +1072,7 @@ map_vcpu_info(struct vcpu *v, unsigned long gfn, unsigned offset)
}
v->vcpu_info = new_info;
- v->arch.pv_vcpu.vcpu_info_mfn = page_to_mfn(page);
+ *mfnp = page_to_mfn(page);
/* Set new vcpu_info pointer /before/ setting pending flags. */
wmb();
@@ -1443,7 +1478,7 @@ static void update_runstate_area(struct vcpu *v)
static inline int need_full_gdt(struct vcpu *v)
{
- return (!is_hvm_vcpu(v) && !is_idle_vcpu(v));
+ return (!is_hvm_or_pvh_vcpu(v) && !is_idle_vcpu(v));
}
static void __context_switch(void)
@@ -1571,7 +1606,7 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
/* Re-enable interrupts before restoring state which may fault. */
local_irq_enable();
- if ( !is_hvm_vcpu(next) )
+ if ( !is_hvm_or_pvh_vcpu(next) )
{
load_LDT(next);
load_segments(next);
@@ -1690,12 +1725,12 @@ unsigned long hypercall_create_continuation(
regs->eax = op;
/* Ensure the hypercall trap instruction is re-executed. */
- if ( !is_hvm_vcpu(current) )
+ if ( !is_hvm_or_pvh_vcpu(current) )
regs->eip -= 2; /* re-execute 'syscall' / 'int $xx' */
else
current->arch.hvm_vcpu.hcall_preempted = 1;
- if ( !is_hvm_vcpu(current) ?
+ if ( !is_hvm_or_pvh_vcpu(current) ?
!is_pv_32on64_vcpu(current) :
(hvm_guest_x86_mode(current) == 8) )
{
@@ -2011,7 +2046,7 @@ int domain_relinquish_resources(struct domain *d)
for_each_vcpu ( d, v )
vcpu_destroy_pagetables(v);
- if ( !is_hvm_domain(d) )
+ if ( !is_hvm_or_pvh_domain(d) )
{
for_each_vcpu ( d, v )
{
@@ -2086,7 +2121,7 @@ int domain_relinquish_resources(struct domain *d)
BUG();
}
- if ( is_hvm_domain(d) )
+ if ( is_hvm_or_pvh_domain(d) )
hvm_domain_relinquish_resources(d);
return 0;
@@ -2167,7 +2202,7 @@ void vcpu_mark_events_pending(struct vcpu *v)
if ( already_pending )
return;
- if ( is_hvm_vcpu(v) )
+ if ( is_hvm_or_pvh_vcpu(v) )
hvm_assert_evtchn_irq(v);
else
vcpu_kick(v);
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 2fa2ea5..31aa04f 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -190,6 +190,11 @@ struct hvm_function_table {
paddr_t *L1_gpa, unsigned int *page_order,
uint8_t *p2m_acc, bool_t access_r,
bool_t access_w, bool_t access_x);
+ /* PVH functions */
+ int (*pvh_set_vcpu_info)(struct vcpu *v, struct vcpu_guest_context *ctxtp);
+ int (*pvh_read_descriptor)(unsigned int sel, const struct vcpu *v,
+ const struct cpu_user_regs *regs, unsigned long *base,
+ unsigned long *limit, unsigned int *ar);
};
extern struct hvm_function_table hvm_funcs;
@@ -323,6 +328,19 @@ static inline unsigned long hvm_get_shadow_gs_base(struct vcpu *v)
return hvm_funcs.get_shadow_gs_base(v);
}
+static inline int hvm_pvh_set_vcpu_info(struct vcpu *v,
+ struct vcpu_guest_context *ctxtp)
+{
+ return hvm_funcs.pvh_set_vcpu_info(v, ctxtp);
+}
+
+static inline int hvm_pvh_read_descriptor(unsigned int sel,
+ const struct vcpu *v, const struct cpu_user_regs *regs,
+ unsigned long *base, unsigned long *limit, unsigned int *ar)
+{
+ return hvm_funcs.pvh_read_descriptor(sel, v, regs, base, limit, ar);
+}
+
#define is_viridian_domain(_d) \
(is_hvm_domain(_d) && ((_d)->arch.hvm_domain.params[HVM_PARAM_VIRIDIAN]))
diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h
index e8b8cd7..2725a62 100644
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -104,6 +104,13 @@ struct nestedvcpu {
#define vcpu_nestedhvm(v) ((v)->arch.hvm_vcpu.nvcpu)
+/* add any PVH specific fields here */
+struct pvh_hvm_vcpu_ext
+{
+ /* Guest-specified relocation of vcpu_info. */
+ unsigned long vcpu_info_mfn;
+};
+
struct hvm_vcpu {
/* Guest control-register and EFER values, just as the guest sees them. */
unsigned long guest_cr[5];
@@ -170,6 +177,8 @@ struct hvm_vcpu {
struct hvm_trap inject_trap;
struct viridian_vcpu viridian;
+
+ struct pvh_hvm_vcpu_ext hvm_pvh;
};
#endif /* __ASM_X86_HVM_VCPU_H__ */
diff --git a/xen/include/asm-x86/system.h b/xen/include/asm-x86/system.h
index d8dc6f2..5681806 100644
--- a/xen/include/asm-x86/system.h
+++ b/xen/include/asm-x86/system.h
@@ -4,9 +4,15 @@
#include <xen/lib.h>
#include <asm/bitops.h>
+/* We need vcpu because during context switch, going from pure PV to PVH,
+ * in save_segments(), current has been updated to next, and no longer pointing
+ * to the pure PV. Note: for PVH, we update regs->selectors on each vmexit */
#define read_segment_register(vcpu, regs, name) \
({ u16 __sel; \
- asm volatile ( "movw %%" STR(name) ",%0" : "=r" (__sel) ); \
+ if (is_pvh_vcpu(vcpu)) \
+ __sel = regs->name; \
+ else \
+ asm volatile ( "movw %%" STR(name) ",%0" : "=r" (__sel) ); \
__sel; \
})
--
1.7.2.3
next reply other threads:[~2013-03-16 0:36 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-03-16 0:36 Mukesh Rathor [this message]
2013-03-18 11:57 ` [PATCH 8/18 V2]: PVH xen: domain creation code changes Jan Beulich
2013-03-21 16:16 ` Tim Deegan
2013-03-26 1:29 ` Mukesh Rathor
2013-03-26 7:39 ` Jan Beulich
2013-03-26 21:04 ` Mukesh Rathor
2013-03-27 7:30 ` Jan Beulich
2013-03-18 15:54 ` Konrad Rzeszutek Wilk
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130315173658.53f402b3@mantra.us.oracle.com \
--to=mukesh.rathor@oracle.com \
--cc=Xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).