From: Mukesh Rathor <mukesh.rathor@oracle.com>
To: Xen-devel@lists.xensource.com
Cc: george.dunlap@eu.citrix.com, keir.xen@gmail.com, tim@xen.org,
JBeulich@suse.com
Subject: [V3 PATCH 4/9] dom0: construct_dom0 changes
Date: Tue, 26 Nov 2013 18:27:05 -0800 [thread overview]
Message-ID: <1385519230-21132-5-git-send-email-mukesh.rathor@oracle.com> (raw)
In-Reply-To: <1385519230-21132-1-git-send-email-mukesh.rathor@oracle.com>
This patch changes construct_dom0 to boot in PVH mode. Changes
need to support it are also included here.
Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
---
xen/arch/x86/domain_build.c | 229 +++++++++++++++++++++++++++++++++++++++---
xen/arch/x86/domctl.c | 2 +-
xen/arch/x86/mm/hap/hap.c | 15 +++
xen/include/asm-x86/hap.h | 1 +
xen/include/xen/domain.h | 3 +
5 files changed, 232 insertions(+), 18 deletions(-)
diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
index c9ff680..f4a32df 100644
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -35,6 +35,7 @@
#include <asm/setup.h>
#include <asm/bzimage.h> /* for bzimage_parse */
#include <asm/io_apic.h>
+#include <asm/hap.h>
#include <public/version.h>
@@ -307,6 +308,145 @@ static void __init process_dom0_ioports_disable(void)
}
}
+/*
+ * Set the 1:1 map for all non-RAM regions for dom 0. Thus, dom0 will have
+ * the entire io region mapped in the EPT/NPT.
+ *
+ * pvh fixme: The following doesn't map MMIO ranges when they sit above the
+ * highest E820 covered address.
+ */
+static __init void pvh_map_all_iomem(struct domain *d)
+{
+ unsigned long start_pfn, end_pfn, end = 0, start = 0;
+ const struct e820entry *entry;
+ unsigned int i, nump;
+ int rc;
+
+ for ( i = 0, entry = e820.map; i < e820.nr_map; i++, entry++ )
+ {
+ end = entry->addr + entry->size;
+
+ if ( entry->type == E820_RAM || entry->type == E820_UNUSABLE ||
+ i == e820.nr_map - 1 )
+ {
+ start_pfn = PFN_DOWN(start);
+
+ /* Unused RAM areas are marked UNUSABLE, so skip it too */
+ if ( entry->type == E820_RAM || entry->type == E820_UNUSABLE )
+ end_pfn = PFN_UP(entry->addr);
+ else
+ end_pfn = PFN_UP(end);
+
+ if ( start_pfn < end_pfn )
+ {
+ nump = end_pfn - start_pfn;
+ /* Add pages to the mapping */
+ rc = add_mem_mapping_for_xlate(d, start_pfn, start_pfn, nump);
+ BUG_ON(rc);
+ }
+ start = end;
+ }
+ }
+
+ /* If the e820 ended under 4GB, we must map the remaining space upto 4GB */
+ if ( end < GB(4) )
+ {
+ start_pfn = PFN_UP(end);
+ end_pfn = (GB(4)) >> PAGE_SHIFT;
+ nump = end_pfn - start_pfn;
+ rc = add_mem_mapping_for_xlate(d, start_pfn, start_pfn, nump);
+ BUG_ON(rc);
+ }
+}
+
+static __init void dom0_update_physmap(struct domain *d, unsigned long pfn,
+ unsigned long mfn, unsigned long vphysmap_s)
+{
+ if ( is_pvh_domain(d) )
+ {
+ int rc = guest_physmap_add_page(d, pfn, mfn, 0);
+ BUG_ON(rc);
+ return;
+ }
+ if ( !is_pv_32on64_domain(d) )
+ ((unsigned long *)vphysmap_s)[pfn] = mfn;
+ else
+ ((unsigned int *)vphysmap_s)[pfn] = mfn;
+
+ set_gpfn_from_mfn(mfn, pfn);
+}
+
+static __init void pvh_fixup_page_tables_for_hap(struct vcpu *v,
+ unsigned long v_start,
+ unsigned long v_end)
+{
+ int i, j, k;
+ l4_pgentry_t *pl4e, *l4start;
+ l3_pgentry_t *pl3e;
+ l2_pgentry_t *pl2e;
+ l1_pgentry_t *pl1e;
+ unsigned long cr3_pfn;
+
+ ASSERT(paging_mode_enabled(v->domain));
+
+ l4start = map_domain_page(pagetable_get_pfn(v->arch.guest_table));
+
+ /* Clear entries prior to guest L4 start */
+ pl4e = l4start + l4_table_offset(v_start);
+ memset(l4start, 0, (unsigned long)pl4e - (unsigned long)l4start);
+
+ for ( ; pl4e <= l4start + l4_table_offset(v_end - 1); pl4e++ )
+ {
+ pl3e = map_l3t_from_l4e(*pl4e);
+ for ( i = 0; i < PAGE_SIZE / sizeof(*pl3e); i++, pl3e++ )
+ {
+ if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
+ continue;
+
+ pl2e = map_l2t_from_l3e(*pl3e);
+ for ( j = 0; j < PAGE_SIZE / sizeof(*pl2e); j++, pl2e++ )
+ {
+ if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
+ continue;
+
+ pl1e = map_l1t_from_l2e(*pl2e);
+ for ( k = 0; k < PAGE_SIZE / sizeof(*pl1e); k++, pl1e++ )
+ {
+ if ( !(l1e_get_flags(*pl1e) & _PAGE_PRESENT) )
+ continue;
+
+ *pl1e = l1e_from_pfn(get_gpfn_from_mfn(l1e_get_pfn(*pl1e)),
+ l1e_get_flags(*pl1e));
+ }
+ unmap_domain_page(pl1e);
+ *pl2e = l2e_from_pfn(get_gpfn_from_mfn(l2e_get_pfn(*pl2e)),
+ l2e_get_flags(*pl2e));
+ }
+ unmap_domain_page(pl2e);
+ *pl3e = l3e_from_pfn(get_gpfn_from_mfn(l3e_get_pfn(*pl3e)),
+ l3e_get_flags(*pl3e));
+ }
+ unmap_domain_page(pl3e);
+ *pl4e = l4e_from_pfn(get_gpfn_from_mfn(l4e_get_pfn(*pl4e)),
+ l4e_get_flags(*pl4e));
+ }
+
+ /* Clear entries post guest L4. */
+ if ( (unsigned long)pl4e & (PAGE_SIZE - 1) )
+ memset(pl4e, 0, PAGE_SIZE - ((unsigned long)pl4e & (PAGE_SIZE - 1)));
+
+ unmap_domain_page(l4start);
+
+ cr3_pfn = get_gpfn_from_mfn(paddr_to_pfn(v->arch.cr3));
+ v->arch.hvm_vcpu.guest_cr[3] = pfn_to_paddr(cr3_pfn);
+
+ /*
+ * Finally, we update the paging modes (hap_update_paging_modes). This will
+ * create monitor_table for us, update v->arch.cr3, and update vmcs.cr3.
+ */
+ paging_update_paging_modes(v);
+}
+
/* Pages that are part of page tables must be read only. */
static __init void mark_pv_pt_pages_rdonly(struct domain *d,
l4_pgentry_t *l4start,
@@ -520,6 +660,8 @@ int __init construct_dom0(
l3_pgentry_t *l3tab = NULL, *l3start = NULL;
l2_pgentry_t *l2tab = NULL, *l2start = NULL;
l1_pgentry_t *l1tab = NULL, *l1start = NULL;
+ paddr_t shared_info_paddr = 0;
+ u32 save_pvh_pg_mode = 0;
/*
* This fully describes the memory layout of the initial domain. All
@@ -597,12 +739,21 @@ int __init construct_dom0(
goto out;
}
- if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != XEN_ENT_NONE &&
- !test_bit(XENFEAT_dom0, parms.f_supported) )
+ if ( parms.elf_notes[XEN_ELFNOTE_SUPPORTED_FEATURES].type != XEN_ENT_NONE )
{
- printk("Kernel does not support Dom0 operation\n");
- rc = -EINVAL;
- goto out;
+ if ( !test_bit(XENFEAT_dom0, parms.f_supported) )
+ {
+ printk("Kernel does not support Dom0 operation\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ if ( is_pvh_domain(d) &&
+ !test_bit(XENFEAT_hvm_callback_vector, parms.f_supported) )
+ {
+ printk("Kernel does not support PVH mode\n");
+ rc = -EINVAL;
+ goto out;
+ }
}
if ( compat32 )
@@ -667,6 +818,13 @@ int __init construct_dom0(
vstartinfo_end = (vstartinfo_start +
sizeof(struct start_info) +
sizeof(struct dom0_vga_console_info));
+
+ if ( is_pvh_domain(d) )
+ {
+ shared_info_paddr = round_pgup(vstartinfo_end) - v_start;
+ vstartinfo_end += PAGE_SIZE;
+ }
+
vpt_start = round_pgup(vstartinfo_end);
for ( nr_pt_pages = 2; ; nr_pt_pages++ )
{
@@ -906,6 +1064,13 @@ int __init construct_dom0(
(void)alloc_vcpu(d, i, cpu);
}
+ /*
+ * pvh: we temporarily disable paging mode so that we can build cr3 needed
+ * to run on dom0's page tables.
+ */
+ save_pvh_pg_mode = d->arch.paging.mode;
+ d->arch.paging.mode = 0;
+
/* Set up CR3 value for write_ptbase */
if ( paging_mode_enabled(d) )
paging_update_paging_modes(v);
@@ -971,6 +1136,15 @@ int __init construct_dom0(
nr_pages);
}
+ if ( is_pvh_domain(d) )
+ hap_set_pvh_alloc_for_dom0(d, nr_pages);
+
+ /*
+ * We enable paging mode again so guest_physmap_add_page will do the
+ * right thing for us.
+ */
+ d->arch.paging.mode = save_pvh_pg_mode;
+
/* Write the phys->machine and machine->phys table entries. */
for ( pfn = 0; pfn < count; pfn++ )
{
@@ -987,11 +1161,7 @@ int __init construct_dom0(
if ( pfn > REVERSE_START && (vinitrd_start || pfn < initrd_pfn) )
mfn = alloc_epfn - (pfn - REVERSE_START);
#endif
- if ( !is_pv_32on64_domain(d) )
- ((unsigned long *)vphysmap_start)[pfn] = mfn;
- else
- ((unsigned int *)vphysmap_start)[pfn] = mfn;
- set_gpfn_from_mfn(mfn, pfn);
+ dom0_update_physmap(d, pfn, mfn, vphysmap_start);
if (!(pfn & 0xfffff))
process_pending_softirqs();
}
@@ -1007,8 +1177,8 @@ int __init construct_dom0(
if ( !page->u.inuse.type_info &&
!get_page_and_type(page, d, PGT_writable_page) )
BUG();
- ((unsigned long *)vphysmap_start)[pfn] = mfn;
- set_gpfn_from_mfn(mfn, pfn);
+
+ dom0_update_physmap(d, pfn, mfn, vphysmap_start);
++pfn;
if (!(pfn & 0xfffff))
process_pending_softirqs();
@@ -1028,11 +1198,7 @@ int __init construct_dom0(
#ifndef NDEBUG
#define pfn (nr_pages - 1 - (pfn - (alloc_epfn - alloc_spfn)))
#endif
- if ( !is_pv_32on64_domain(d) )
- ((unsigned long *)vphysmap_start)[pfn] = mfn;
- else
- ((unsigned int *)vphysmap_start)[pfn] = mfn;
- set_gpfn_from_mfn(mfn, pfn);
+ dom0_update_physmap(d, pfn, mfn, vphysmap_start);
#undef pfn
page++; pfn++;
if (!(pfn & 0xfffff))
@@ -1056,6 +1222,15 @@ int __init construct_dom0(
si->console.dom0.info_size = sizeof(struct dom0_vga_console_info);
}
+ /*
+ * PVH: We need to update si->shared_info while we are on dom0 page tables,
+ * but need to defer the p2m update until after we have fixed up the
+ * page tables for PVH so that the m2p for the si pte entry returns
+ * correct pfn.
+ */
+ if ( is_pvh_domain(d) )
+ si->shared_info = shared_info_paddr;
+
if ( is_pv_32on64_domain(d) )
xlat_start_info(si, XLAT_start_info_console_dom0);
@@ -1089,8 +1264,15 @@ int __init construct_dom0(
regs->eflags = X86_EFLAGS_IF;
if ( opt_dom0_shadow )
+ {
+ if ( is_pvh_domain(d) )
+ {
+ printk("Invalid option dom0_shadow for PVH\n");
+ return -EINVAL;
+ }
if ( paging_enable(d, PG_SH_enable) == 0 )
paging_update_paging_modes(v);
+ }
if ( supervisor_mode_kernel )
{
@@ -1180,6 +1362,19 @@ int __init construct_dom0(
printk(" Xen warning: dom0 kernel broken ELF: %s\n",
elf_check_broken(&elf));
+ if ( is_pvh_domain(d) )
+ {
+ /* finally, fixup the page table, replacing mfns with pfns */
+ pvh_fixup_page_tables_for_hap(v, v_start, v_end);
+
+ /* the pt has correct pfn for si, now update the mfn in the p2m */
+ mfn = virt_to_mfn(d->shared_info);
+ pfn = shared_info_paddr >> PAGE_SHIFT;
+ dom0_update_physmap(d, pfn, mfn, 0);
+
+ pvh_map_all_iomem(d);
+ }
+
iommu_dom0_init(dom0);
return 0;
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index e3f544a..ec18771 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -46,7 +46,7 @@ static int gdbsx_guest_mem_io(
return (iop->remain ? -EFAULT : 0);
}
-static int add_mem_mapping_for_xlate(struct domain *d, unsigned long gfn,
+int add_mem_mapping_for_xlate(struct domain *d, unsigned long gfn,
unsigned long mfn, unsigned long nr_mfns)
{
unsigned long i;
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index d3f64bd..4accab6 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -579,6 +579,21 @@ int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
}
}
+void __init hap_set_pvh_alloc_for_dom0(struct domain *d,
+ unsigned long num_pages)
+{
+ int rc;
+ unsigned long memkb = num_pages * (PAGE_SIZE / 1024);
+
+ /* Copied from: libxl_get_required_shadow_memory() */
+ memkb = 4 * (256 * d->max_vcpus + 2 * (memkb / 1024));
+ num_pages = ((memkb+1023)/1024) << (20 - PAGE_SHIFT);
+ paging_lock(d);
+ rc = hap_set_allocation(d, num_pages, NULL);
+ paging_unlock(d);
+ BUG_ON(rc);
+}
+
static const struct paging_mode hap_paging_real_mode;
static const struct paging_mode hap_paging_protected_mode;
static const struct paging_mode hap_paging_pae_mode;
diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h
index e03f983..aab8558 100644
--- a/xen/include/asm-x86/hap.h
+++ b/xen/include/asm-x86/hap.h
@@ -63,6 +63,7 @@ int hap_track_dirty_vram(struct domain *d,
XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
extern const struct paging_mode *hap_paging_get_mode(struct vcpu *);
+void hap_set_pvh_alloc_for_dom0(struct domain *d, unsigned long num_pages);
#endif /* XEN_HAP_H */
diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
index a057069..fd6fc1a 100644
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -89,4 +89,7 @@ extern unsigned int xen_processor_pmbits;
extern bool_t opt_dom0_vcpus_pin;
+extern int add_mem_mapping_for_xlate(struct domain *d, unsigned long gfn,
+ unsigned long mfn, unsigned long nr_mfns);
+
#endif /* __XEN_DOMAIN_H__ */
--
1.7.2.3
next prev parent reply other threads:[~2013-11-27 2:27 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-11-27 2:27 [V3 PATCH 0/9]: PVH dom0 Mukesh Rathor
2013-11-27 2:27 ` [V3 PATCH 1/9] PVH dom0: iommu related changes Mukesh Rathor
2013-11-27 2:27 ` [V3 PATCH 2/9] PVH dom0: create add_mem_mapping_for_xlate() function Mukesh Rathor
2013-12-02 12:16 ` Jan Beulich
2013-11-27 2:27 ` [V3 PATCH 3/9] PVH dom0: move some pv specific code to static functions Mukesh Rathor
2013-12-02 12:30 ` Jan Beulich
2013-11-27 2:27 ` Mukesh Rathor [this message]
2013-12-02 12:36 ` [V3 PATCH 4/9] dom0: construct_dom0 changes Jan Beulich
2013-11-27 2:27 ` [V3 PATCH 5/9] PVH dom0: implement XENMEM_add_to_physmap_range for x86 Mukesh Rathor
2013-12-02 12:47 ` Jan Beulich
2013-12-03 0:05 ` Mukesh Rathor
2013-12-03 7:48 ` Jan Beulich
2013-12-03 19:49 ` Mukesh Rathor
2013-12-04 8:03 ` Jan Beulich
2013-11-27 2:27 ` [V3 PATCH 6/9] PVH dom0: Introduce p2m_map_foreign Mukesh Rathor
2013-11-27 2:27 ` [V3 PATCH 7/9] pvh: change xsm_add_to_physmap Mukesh Rathor
2013-11-27 16:46 ` Daniel De Graaf
2013-11-27 20:29 ` Mukesh Rathor
2013-11-29 9:21 ` Jan Beulich
2013-12-02 12:55 ` Jan Beulich
2013-11-27 2:27 ` [V3 PATCH 8/9] pvh dom0: Add and remove foreign pages Mukesh Rathor
2013-12-02 12:57 ` Jan Beulich
2013-11-27 2:27 ` [V3 PATCH 9/9] pvh dom0: add opt_dom0pvh to setup.c Mukesh Rathor
2013-11-27 15:00 ` George Dunlap
2013-11-27 20:12 ` Mukesh Rathor
2013-11-28 11:54 ` George Dunlap
2013-11-29 9:29 ` Jan Beulich
2013-12-02 13:00 ` Jan Beulich
2013-12-02 15:09 ` Roger Pau Monné
2013-12-02 19:30 ` Mukesh Rathor
2013-12-02 19:38 ` Roger Pau Monné
2013-12-02 20:38 ` Mukesh Rathor
2013-12-02 20:46 ` Mukesh Rathor
2013-12-03 2:33 ` Mukesh Rathor
2013-12-03 10:30 ` Roger Pau Monné
2013-12-03 19:51 ` Mukesh Rathor
2013-12-03 10:54 ` Jan Beulich
2013-11-28 12:07 ` [V3 PATCH 0/9]: PVH dom0 George Dunlap
2013-11-29 9:17 ` Jan Beulich
2013-12-02 11:39 ` George Dunlap
2013-12-01 23:53 ` Mukesh Rathor
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1385519230-21132-5-git-send-email-mukesh.rathor@oracle.com \
--to=mukesh.rathor@oracle.com \
--cc=JBeulich@suse.com \
--cc=Xen-devel@lists.xensource.com \
--cc=george.dunlap@eu.citrix.com \
--cc=keir.xen@gmail.com \
--cc=tim@xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).