From: Roger Pau Monne <roger.pau@citrix.com>
To: xen-devel@lists.xenproject.org
Cc: Wei Liu <wei.liu2@citrix.com>,
Stefano Stabellini <stefano.stabellini@eu.citrix.com>,
Ian Jackson <ian.jackson@eu.citrix.com>,
Ian Campbell <ian.campbell@citrix.com>,
Roger Pau Monne <roger.pau@citrix.com>
Subject: [PATCH v5 08/28] libxc: introduce a xc_dom_arch for hvm-3.0-x86_32 guests
Date: Fri, 21 Aug 2015 18:53:21 +0200 [thread overview]
Message-ID: <1440176021-18910-9-git-send-email-roger.pau@citrix.com> (raw)
In-Reply-To: <1440176021-18910-1-git-send-email-roger.pau@citrix.com>
This xc_dom_arch will be used in order to build HVM domains. The code is
based on the existing xc_hvm_populate_memory and xc_hvm_populate_params
functions.
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
---
Changes since v4:
- Replace a malloc+memset with a calloc.
- Remove a != NULL check.
- Add Andrew Cooper Reviewed-by.
Changes since v3:
- Make sure c/s b9dbe33 is not reverted on this patch.
- Set the initial BSP state using {get/set}hvmcontext.
---
tools/libxc/include/xc_dom.h | 6 +
tools/libxc/xc_dom_x86.c | 618 ++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 613 insertions(+), 11 deletions(-)
diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h
index 0245d24..cda40d9 100644
--- a/tools/libxc/include/xc_dom.h
+++ b/tools/libxc/include/xc_dom.h
@@ -188,6 +188,12 @@ struct xc_dom_image {
} container_type;
/* HVM specific fields. */
+ xen_pfn_t target_pages;
+ xen_pfn_t mmio_start;
+ xen_pfn_t mmio_size;
+ xen_pfn_t lowmem_end;
+ xen_pfn_t highmem_end;
+
/* Extra ACPI tables passed to HVMLOADER */
struct xc_hvm_firmware_module acpi_module;
diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c
index ae8187f..0a4ac58 100644
--- a/tools/libxc/xc_dom_x86.c
+++ b/tools/libxc/xc_dom_x86.c
@@ -39,10 +39,32 @@
/* ------------------------------------------------------------------------ */
-#define SUPERPAGE_PFN_SHIFT 9
-#define SUPERPAGE_NR_PFNS (1UL << SUPERPAGE_PFN_SHIFT)
#define SUPERPAGE_BATCH_SIZE 512
+#define SUPERPAGE_2MB_SHIFT 9
+#define SUPERPAGE_2MB_NR_PFNS (1UL << SUPERPAGE_2MB_SHIFT)
+#define SUPERPAGE_1GB_SHIFT 18
+#define SUPERPAGE_1GB_NR_PFNS (1UL << SUPERPAGE_1GB_SHIFT)
+
+#define X86_CR0_PE 0x01
+#define X86_CR0_ET 0x10
+
+#define VGA_HOLE_SIZE (0x20)
+
+#define SPECIALPAGE_PAGING 0
+#define SPECIALPAGE_ACCESS 1
+#define SPECIALPAGE_SHARING 2
+#define SPECIALPAGE_BUFIOREQ 3
+#define SPECIALPAGE_XENSTORE 4
+#define SPECIALPAGE_IOREQ 5
+#define SPECIALPAGE_IDENT_PT 6
+#define SPECIALPAGE_CONSOLE 7
+#define NR_SPECIAL_PAGES 8
+#define special_pfn(x) (0xff000u - NR_SPECIAL_PAGES + (x))
+
+#define NR_IOREQ_SERVER_PAGES 8
+#define ioreq_server_pfn(x) (special_pfn(0) - NR_IOREQ_SERVER_PAGES + (x))
+
#define bits_to_mask(bits) (((xen_vaddr_t)1 << (bits))-1)
#define round_down(addr, mask) ((addr) & ~(mask))
#define round_up(addr, mask) ((addr) | (mask))
@@ -461,6 +483,135 @@ static int alloc_magic_pages(struct xc_dom_image *dom)
return 0;
}
+static void build_hvm_info(void *hvm_info_page, struct xc_dom_image *dom)
+{
+ struct hvm_info_table *hvm_info = (struct hvm_info_table *)
+ (((unsigned char *)hvm_info_page) + HVM_INFO_OFFSET);
+ uint8_t sum;
+ int i;
+
+ memset(hvm_info_page, 0, PAGE_SIZE);
+
+ /* Fill in the header. */
+ memcpy(hvm_info->signature, "HVM INFO", sizeof(hvm_info->signature));
+ hvm_info->length = sizeof(struct hvm_info_table);
+
+ /* Sensible defaults: these can be overridden by the caller. */
+ hvm_info->apic_mode = 1;
+ hvm_info->nr_vcpus = 1;
+ memset(hvm_info->vcpu_online, 0xff, sizeof(hvm_info->vcpu_online));
+
+ /* Memory parameters. */
+ hvm_info->low_mem_pgend = dom->lowmem_end >> PAGE_SHIFT;
+ hvm_info->high_mem_pgend = dom->highmem_end >> PAGE_SHIFT;
+ hvm_info->reserved_mem_pgstart = ioreq_server_pfn(0);
+
+ /* Finish with the checksum. */
+ for ( i = 0, sum = 0; i < hvm_info->length; i++ )
+ sum += ((uint8_t *)hvm_info)[i];
+ hvm_info->checksum = -sum;
+}
+
+static int alloc_magic_pages_hvm(struct xc_dom_image *dom)
+{
+ unsigned long i;
+ void *hvm_info_page;
+ uint32_t *ident_pt, domid = dom->guest_domid;
+ int rc;
+ xen_pfn_t special_array[NR_SPECIAL_PAGES];
+ xen_pfn_t ioreq_server_array[NR_IOREQ_SERVER_PAGES];
+ xc_interface *xch = dom->xch;
+
+ if ( (hvm_info_page = xc_map_foreign_range(
+ xch, domid, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ HVM_INFO_PFN)) == NULL )
+ goto error_out;
+ build_hvm_info(hvm_info_page, dom);
+ munmap(hvm_info_page, PAGE_SIZE);
+
+ /* Allocate and clear special pages. */
+ for ( i = 0; i < NR_SPECIAL_PAGES; i++ )
+ special_array[i] = special_pfn(i);
+
+ rc = xc_domain_populate_physmap_exact(xch, domid, NR_SPECIAL_PAGES, 0, 0,
+ special_array);
+ if ( rc != 0 )
+ {
+ DOMPRINTF("Could not allocate special pages.");
+ goto error_out;
+ }
+
+ if ( xc_clear_domain_pages(xch, domid, special_pfn(0), NR_SPECIAL_PAGES) )
+ goto error_out;
+
+ xc_hvm_param_set(xch, domid, HVM_PARAM_STORE_PFN,
+ special_pfn(SPECIALPAGE_XENSTORE));
+ xc_hvm_param_set(xch, domid, HVM_PARAM_BUFIOREQ_PFN,
+ special_pfn(SPECIALPAGE_BUFIOREQ));
+ xc_hvm_param_set(xch, domid, HVM_PARAM_IOREQ_PFN,
+ special_pfn(SPECIALPAGE_IOREQ));
+ xc_hvm_param_set(xch, domid, HVM_PARAM_CONSOLE_PFN,
+ special_pfn(SPECIALPAGE_CONSOLE));
+ xc_hvm_param_set(xch, domid, HVM_PARAM_PAGING_RING_PFN,
+ special_pfn(SPECIALPAGE_PAGING));
+ xc_hvm_param_set(xch, domid, HVM_PARAM_MONITOR_RING_PFN,
+ special_pfn(SPECIALPAGE_ACCESS));
+ xc_hvm_param_set(xch, domid, HVM_PARAM_SHARING_RING_PFN,
+ special_pfn(SPECIALPAGE_SHARING));
+
+ /*
+ * Allocate and clear additional ioreq server pages. The default
+ * server will use the IOREQ and BUFIOREQ special pages above.
+ */
+ for ( i = 0; i < NR_IOREQ_SERVER_PAGES; i++ )
+ ioreq_server_array[i] = ioreq_server_pfn(i);
+
+ rc = xc_domain_populate_physmap_exact(xch, domid, NR_IOREQ_SERVER_PAGES, 0,
+ 0, ioreq_server_array);
+ if ( rc != 0 )
+ {
+ DOMPRINTF("Could not allocate ioreq server pages.");
+ goto error_out;
+ }
+
+ if ( xc_clear_domain_pages(xch, domid, ioreq_server_pfn(0),
+ NR_IOREQ_SERVER_PAGES) )
+ goto error_out;
+
+ /* Tell the domain where the pages are and how many there are */
+ xc_hvm_param_set(xch, domid, HVM_PARAM_IOREQ_SERVER_PFN,
+ ioreq_server_pfn(0));
+ xc_hvm_param_set(xch, domid, HVM_PARAM_NR_IOREQ_SERVER_PAGES,
+ NR_IOREQ_SERVER_PAGES);
+
+ /*
+ * Identity-map page table is required for running with CR0.PG=0 when
+ * using Intel EPT. Create a 32-bit non-PAE page directory of superpages.
+ */
+ if ( (ident_pt = xc_map_foreign_range(
+ xch, domid, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ special_pfn(SPECIALPAGE_IDENT_PT))) == NULL )
+ goto error_out;
+ for ( i = 0; i < PAGE_SIZE / sizeof(*ident_pt); i++ )
+ ident_pt[i] = ((i << 22) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
+ _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
+ munmap(ident_pt, PAGE_SIZE);
+ xc_hvm_param_set(xch, domid, HVM_PARAM_IDENT_PT,
+ special_pfn(SPECIALPAGE_IDENT_PT) << PAGE_SHIFT);
+
+ dom->console_pfn = special_pfn(SPECIALPAGE_CONSOLE);
+ dom->xenstore_pfn = special_pfn(SPECIALPAGE_XENSTORE);
+ dom->parms.virt_hypercall = -1;
+
+ rc = 0;
+ goto out;
+ error_out:
+ rc = -1;
+ out:
+
+ return rc;
+}
+
/* ------------------------------------------------------------------------ */
static int start_info_x86_32(struct xc_dom_image *dom)
@@ -682,6 +833,102 @@ static int vcpu_x86_64(struct xc_dom_image *dom)
return rc;
}
+static int vcpu_hvm(struct xc_dom_image *dom)
+{
+ struct {
+ struct hvm_save_descriptor header_d;
+ HVM_SAVE_TYPE(HEADER) header;
+ struct hvm_save_descriptor cpu_d;
+ HVM_SAVE_TYPE(CPU) cpu;
+ struct hvm_save_descriptor end_d;
+ HVM_SAVE_TYPE(END) end;
+ } bsp_ctx;
+ uint8_t *full_ctx = NULL;
+ int rc;
+
+ DOMPRINTF_CALLED(dom->xch);
+
+ /*
+ * Get the full HVM context in order to have the header, it is not
+ * possible to get the header with getcontext_partial, and crafting one
+ * from userspace is also not an option since cpuid is trapped and
+ * modified by Xen.
+ */
+
+ rc = xc_domain_hvm_getcontext(dom->xch, dom->guest_domid, NULL, 0);
+ if ( rc <= 0 )
+ {
+ xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+ "%s: unable to fetch HVM context size (rc=%d)",
+ __func__, rc);
+ return rc;
+ }
+
+ full_ctx = calloc(1, rc);
+ if ( full_ctx == NULL )
+ {
+ xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+ "%s: unable to allocate memory for HVM context (rc=%d)",
+ __func__, rc);
+ return -ENOMEM;
+ }
+
+ rc = xc_domain_hvm_getcontext(dom->xch, dom->guest_domid, full_ctx, rc);
+ if ( rc <= 0 )
+ {
+ xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+ "%s: unable to fetch HVM context (rc=%d)",
+ __func__, rc);
+ goto out;
+ }
+
+ /* Copy the header to our partial context. */
+ memset(&bsp_ctx, 0, sizeof(bsp_ctx));
+ memcpy(&bsp_ctx, full_ctx,
+ sizeof(struct hvm_save_descriptor) + HVM_SAVE_LENGTH(HEADER));
+
+ /* Set the CPU descriptor. */
+ bsp_ctx.cpu_d.typecode = HVM_SAVE_CODE(CPU);
+ bsp_ctx.cpu_d.instance = 0;
+ bsp_ctx.cpu_d.length = HVM_SAVE_LENGTH(CPU);
+
+ /* Set the cached part of the relevant segment registers. */
+ bsp_ctx.cpu.cs_base = 0;
+ bsp_ctx.cpu.ds_base = 0;
+ bsp_ctx.cpu.ss_base = 0;
+ bsp_ctx.cpu.tr_base = 0;
+ bsp_ctx.cpu.cs_limit = ~0u;
+ bsp_ctx.cpu.ds_limit = ~0u;
+ bsp_ctx.cpu.ss_limit = ~0u;
+ bsp_ctx.cpu.tr_limit = 0xff;
+ bsp_ctx.cpu.cs_arbytes = 0xc9b;
+ bsp_ctx.cpu.ds_arbytes = 0xc93;
+ bsp_ctx.cpu.ss_arbytes = 0xc93;
+ bsp_ctx.cpu.tr_arbytes = 0x8b;
+
+ /* Set the control registers. */
+ bsp_ctx.cpu.cr0 = X86_CR0_PE | X86_CR0_ET;
+
+ /* Set the IP. */
+ bsp_ctx.cpu.rip = dom->parms.phys_entry;
+
+ /* Set the end descriptor. */
+ bsp_ctx.end_d.typecode = HVM_SAVE_CODE(END);
+ bsp_ctx.end_d.instance = 0;
+ bsp_ctx.end_d.length = HVM_SAVE_LENGTH(END);
+
+ rc = xc_domain_hvm_setcontext(dom->xch, dom->guest_domid,
+ (uint8_t *)&bsp_ctx, sizeof(bsp_ctx));
+ if ( rc != 0 )
+ xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+ "%s: SETHVMCONTEXT failed (rc=%d)", __func__, rc);
+
+ out:
+ if ( full_ctx )
+ free(full_ctx);
+ return rc;
+}
+
/* ------------------------------------------------------------------------ */
static int x86_compat(xc_interface *xch, domid_t domid, char *guest_type)
@@ -762,7 +1009,7 @@ static int meminit_pv(struct xc_dom_image *dom)
if ( dom->superpages )
{
- int count = dom->total_pages >> SUPERPAGE_PFN_SHIFT;
+ int count = dom->total_pages >> SUPERPAGE_2MB_SHIFT;
xen_pfn_t extents[count];
dom->p2m_size = dom->total_pages;
@@ -773,9 +1020,9 @@ static int meminit_pv(struct xc_dom_image *dom)
DOMPRINTF("Populating memory with %d superpages", count);
for ( pfn = 0; pfn < count; pfn++ )
- extents[pfn] = pfn << SUPERPAGE_PFN_SHIFT;
+ extents[pfn] = pfn << SUPERPAGE_2MB_SHIFT;
rc = xc_domain_populate_physmap_exact(dom->xch, dom->guest_domid,
- count, SUPERPAGE_PFN_SHIFT, 0,
+ count, SUPERPAGE_2MB_SHIFT, 0,
extents);
if ( rc )
return rc;
@@ -785,7 +1032,7 @@ static int meminit_pv(struct xc_dom_image *dom)
for ( i = 0; i < count; i++ )
{
mfn = extents[i];
- for ( j = 0; j < SUPERPAGE_NR_PFNS; j++, pfn++ )
+ for ( j = 0; j < SUPERPAGE_2MB_NR_PFNS; j++, pfn++ )
dom->p2m_host[pfn] = mfn + j;
}
}
@@ -870,7 +1117,7 @@ static int meminit_pv(struct xc_dom_image *dom)
pages = (vmemranges[i].end - vmemranges[i].start)
>> PAGE_SHIFT;
- super_pages = pages >> SUPERPAGE_PFN_SHIFT;
+ super_pages = pages >> SUPERPAGE_2MB_SHIFT;
pfn_base = vmemranges[i].start >> PAGE_SHIFT;
for ( pfn = pfn_base; pfn < pfn_base+pages; pfn++ )
@@ -883,11 +1130,11 @@ static int meminit_pv(struct xc_dom_image *dom)
super_pages -= count;
for ( pfn = pfn_base_idx, j = 0;
- pfn < pfn_base_idx + (count << SUPERPAGE_PFN_SHIFT);
- pfn += SUPERPAGE_NR_PFNS, j++ )
+ pfn < pfn_base_idx + (count << SUPERPAGE_2MB_SHIFT);
+ pfn += SUPERPAGE_2MB_NR_PFNS, j++ )
extents[j] = dom->p2m_host[pfn];
rc = xc_domain_populate_physmap(dom->xch, dom->guest_domid, count,
- SUPERPAGE_PFN_SHIFT, memflags,
+ SUPERPAGE_2MB_SHIFT, memflags,
extents);
if ( rc < 0 )
return rc;
@@ -897,7 +1144,7 @@ static int meminit_pv(struct xc_dom_image *dom)
for ( j = 0; j < rc; j++ )
{
mfn = extents[j];
- for ( k = 0; k < SUPERPAGE_NR_PFNS; k++, pfn++ )
+ for ( k = 0; k < SUPERPAGE_2MB_NR_PFNS; k++, pfn++ )
dom->p2m_host[pfn] = mfn + k;
}
pfn_base_idx = pfn;
@@ -938,6 +1185,332 @@ static int meminit_pv(struct xc_dom_image *dom)
return rc;
}
+/*
+ * Check whether there exists mmio hole in the specified memory range.
+ * Returns 1 if exists, else returns 0.
+ */
+static int check_mmio_hole(uint64_t start, uint64_t memsize,
+ uint64_t mmio_start, uint64_t mmio_size)
+{
+ if ( start + memsize <= mmio_start || start >= mmio_start + mmio_size )
+ return 0;
+ else
+ return 1;
+}
+
+static int meminit_hvm(struct xc_dom_image *dom)
+{
+ unsigned long i, vmemid, nr_pages = dom->total_pages;
+ unsigned long p2m_size;
+ unsigned long target_pages = dom->target_pages;
+ unsigned long cur_pages, cur_pfn;
+ int rc;
+ xen_capabilities_info_t caps;
+ unsigned long stat_normal_pages = 0, stat_2mb_pages = 0,
+ stat_1gb_pages = 0;
+ unsigned int memflags = 0;
+ int claim_enabled = dom->claim_enabled;
+ uint64_t total_pages;
+ xen_vmemrange_t dummy_vmemrange[2];
+ unsigned int dummy_vnode_to_pnode[1];
+ xen_vmemrange_t *vmemranges;
+ unsigned int *vnode_to_pnode;
+ unsigned int nr_vmemranges, nr_vnodes;
+ xc_interface *xch = dom->xch;
+ uint32_t domid = dom->guest_domid;
+
+ if ( nr_pages > target_pages )
+ memflags |= XENMEMF_populate_on_demand;
+
+ if ( dom->nr_vmemranges == 0 )
+ {
+ /* Build dummy vnode information
+ *
+ * Guest physical address space layout:
+ * [0, hole_start) [hole_start, 4G) [4G, highmem_end)
+ *
+ * Of course if there is no high memory, the second vmemrange
+ * has no effect on the actual result.
+ */
+
+ dummy_vmemrange[0].start = 0;
+ dummy_vmemrange[0].end = dom->lowmem_end;
+ dummy_vmemrange[0].flags = 0;
+ dummy_vmemrange[0].nid = 0;
+ nr_vmemranges = 1;
+
+ if ( dom->highmem_end > (1ULL << 32) )
+ {
+ dummy_vmemrange[1].start = 1ULL << 32;
+ dummy_vmemrange[1].end = dom->highmem_end;
+ dummy_vmemrange[1].flags = 0;
+ dummy_vmemrange[1].nid = 0;
+
+ nr_vmemranges++;
+ }
+
+ dummy_vnode_to_pnode[0] = XC_NUMA_NO_NODE;
+ nr_vnodes = 1;
+ vmemranges = dummy_vmemrange;
+ vnode_to_pnode = dummy_vnode_to_pnode;
+ }
+ else
+ {
+ if ( nr_pages > target_pages )
+ {
+ DOMPRINTF("Cannot enable vNUMA and PoD at the same time");
+ goto error_out;
+ }
+
+ nr_vmemranges = dom->nr_vmemranges;
+ nr_vnodes = dom->nr_vnodes;
+ vmemranges = dom->vmemranges;
+ vnode_to_pnode = dom->vnode_to_pnode;
+ }
+
+ total_pages = 0;
+ p2m_size = 0;
+ for ( i = 0; i < nr_vmemranges; i++ )
+ {
+ total_pages += ((vmemranges[i].end - vmemranges[i].start)
+ >> PAGE_SHIFT);
+ p2m_size = p2m_size > (vmemranges[i].end >> PAGE_SHIFT) ?
+ p2m_size : (vmemranges[i].end >> PAGE_SHIFT);
+ }
+
+ if ( total_pages != nr_pages )
+ {
+ DOMPRINTF("vNUMA memory pages mismatch (0x%"PRIx64" != 0x%"PRIx64")",
+ total_pages, nr_pages);
+ goto error_out;
+ }
+
+ if ( xc_version(xch, XENVER_capabilities, &caps) != 0 )
+ {
+ DOMPRINTF("Could not get Xen capabilities");
+ goto error_out;
+ }
+
+ dom->p2m_size = p2m_size;
+ dom->p2m_host = xc_dom_malloc(dom, sizeof(xen_pfn_t) *
+ dom->p2m_size);
+ if ( dom->p2m_host == NULL )
+ {
+ DOMPRINTF("Could not allocate p2m");
+ goto error_out;
+ }
+
+ for ( i = 0; i < p2m_size; i++ )
+ dom->p2m_host[i] = ((xen_pfn_t)-1);
+ for ( vmemid = 0; vmemid < nr_vmemranges; vmemid++ )
+ {
+ uint64_t pfn;
+
+ for ( pfn = vmemranges[vmemid].start >> PAGE_SHIFT;
+ pfn < vmemranges[vmemid].end >> PAGE_SHIFT;
+ pfn++ )
+ dom->p2m_host[pfn] = pfn;
+ }
+
+ /*
+ * Try to claim pages for early warning of insufficient memory available.
+ * This should go before xc_domain_set_pod_target, becuase that function
+ * actually allocates memory for the guest. Claiming after memory has been
+ * allocated is pointless.
+ */
+ if ( claim_enabled ) {
+ rc = xc_domain_claim_pages(xch, domid, target_pages - VGA_HOLE_SIZE);
+ if ( rc != 0 )
+ {
+ DOMPRINTF("Could not allocate memory for HVM guest as we cannot claim memory!");
+ goto error_out;
+ }
+ }
+
+ if ( memflags & XENMEMF_populate_on_demand )
+ {
+ /*
+ * Subtract VGA_HOLE_SIZE from target_pages for the VGA
+ * "hole". Xen will adjust the PoD cache size so that domain
+ * tot_pages will be target_pages - VGA_HOLE_SIZE after
+ * this call.
+ */
+ rc = xc_domain_set_pod_target(xch, domid, target_pages - VGA_HOLE_SIZE,
+ NULL, NULL, NULL);
+ if ( rc != 0 )
+ {
+ DOMPRINTF("Could not set PoD target for HVM guest.\n");
+ goto error_out;
+ }
+ }
+
+ /*
+ * Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000.
+ *
+ * We attempt to allocate 1GB pages if possible. It falls back on 2MB
+ * pages if 1GB allocation fails. 4KB pages will be used eventually if
+ * both fail.
+ *
+ * Under 2MB mode, we allocate pages in batches of no more than 8MB to
+ * ensure that we can be preempted and hence dom0 remains responsive.
+ */
+ rc = xc_domain_populate_physmap_exact(
+ xch, domid, 0xa0, 0, memflags, &dom->p2m_host[0x00]);
+
+ stat_normal_pages = 0;
+ for ( vmemid = 0; vmemid < nr_vmemranges; vmemid++ )
+ {
+ unsigned int new_memflags = memflags;
+ uint64_t end_pages;
+ unsigned int vnode = vmemranges[vmemid].nid;
+ unsigned int pnode = vnode_to_pnode[vnode];
+
+ if ( pnode != XC_NUMA_NO_NODE )
+ new_memflags |= XENMEMF_exact_node(pnode);
+
+ end_pages = vmemranges[vmemid].end >> PAGE_SHIFT;
+ /*
+ * Consider vga hole belongs to the vmemrange that covers
+ * 0xA0000-0xC0000. Note that 0x00000-0xA0000 is populated just
+ * before this loop.
+ */
+ if ( vmemranges[vmemid].start == 0 )
+ {
+ cur_pages = 0xc0;
+ stat_normal_pages += 0xc0;
+ }
+ else
+ cur_pages = vmemranges[vmemid].start >> PAGE_SHIFT;
+
+ while ( (rc == 0) && (end_pages > cur_pages) )
+ {
+ /* Clip count to maximum 1GB extent. */
+ unsigned long count = end_pages - cur_pages;
+ unsigned long max_pages = SUPERPAGE_1GB_NR_PFNS;
+
+ if ( count > max_pages )
+ count = max_pages;
+
+ cur_pfn = dom->p2m_host[cur_pages];
+
+ /* Take care the corner cases of super page tails */
+ if ( ((cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1)) != 0) &&
+ (count > (-cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1))) )
+ count = -cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1);
+ else if ( ((count & (SUPERPAGE_1GB_NR_PFNS-1)) != 0) &&
+ (count > SUPERPAGE_1GB_NR_PFNS) )
+ count &= ~(SUPERPAGE_1GB_NR_PFNS - 1);
+
+ /* Attemp to allocate 1GB super page. Because in each pass
+ * we only allocate at most 1GB, we don't have to clip
+ * super page boundaries.
+ */
+ if ( ((count | cur_pfn) & (SUPERPAGE_1GB_NR_PFNS - 1)) == 0 &&
+ /* Check if there exists MMIO hole in the 1GB memory
+ * range */
+ !check_mmio_hole(cur_pfn << PAGE_SHIFT,
+ SUPERPAGE_1GB_NR_PFNS << PAGE_SHIFT,
+ dom->mmio_start, dom->mmio_size) )
+ {
+ long done;
+ unsigned long nr_extents = count >> SUPERPAGE_1GB_SHIFT;
+ xen_pfn_t sp_extents[nr_extents];
+
+ for ( i = 0; i < nr_extents; i++ )
+ sp_extents[i] =
+ dom->p2m_host[cur_pages+(i<<SUPERPAGE_1GB_SHIFT)];
+
+ done = xc_domain_populate_physmap(xch, domid, nr_extents,
+ SUPERPAGE_1GB_SHIFT,
+ new_memflags, sp_extents);
+
+ if ( done > 0 )
+ {
+ stat_1gb_pages += done;
+ done <<= SUPERPAGE_1GB_SHIFT;
+ cur_pages += done;
+ count -= done;
+ }
+ }
+
+ if ( count != 0 )
+ {
+ /* Clip count to maximum 8MB extent. */
+ max_pages = SUPERPAGE_2MB_NR_PFNS * 4;
+ if ( count > max_pages )
+ count = max_pages;
+
+ /* Clip partial superpage extents to superpage
+ * boundaries. */
+ if ( ((cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1)) != 0) &&
+ (count > (-cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1))) )
+ count = -cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1);
+ else if ( ((count & (SUPERPAGE_2MB_NR_PFNS-1)) != 0) &&
+ (count > SUPERPAGE_2MB_NR_PFNS) )
+ count &= ~(SUPERPAGE_2MB_NR_PFNS - 1); /* clip non-s.p. tail */
+
+ /* Attempt to allocate superpage extents. */
+ if ( ((count | cur_pfn) & (SUPERPAGE_2MB_NR_PFNS - 1)) == 0 )
+ {
+ long done;
+ unsigned long nr_extents = count >> SUPERPAGE_2MB_SHIFT;
+ xen_pfn_t sp_extents[nr_extents];
+
+ for ( i = 0; i < nr_extents; i++ )
+ sp_extents[i] =
+ dom->p2m_host[cur_pages+(i<<SUPERPAGE_2MB_SHIFT)];
+
+ done = xc_domain_populate_physmap(xch, domid, nr_extents,
+ SUPERPAGE_2MB_SHIFT,
+ new_memflags, sp_extents);
+
+ if ( done > 0 )
+ {
+ stat_2mb_pages += done;
+ done <<= SUPERPAGE_2MB_SHIFT;
+ cur_pages += done;
+ count -= done;
+ }
+ }
+ }
+
+ /* Fall back to 4kB extents. */
+ if ( count != 0 )
+ {
+ rc = xc_domain_populate_physmap_exact(
+ xch, domid, count, 0, new_memflags, &dom->p2m_host[cur_pages]);
+ cur_pages += count;
+ stat_normal_pages += count;
+ }
+ }
+
+ if ( rc != 0 )
+ break;
+ }
+
+ if ( rc != 0 )
+ {
+ DOMPRINTF("Could not allocate memory for HVM guest.");
+ goto error_out;
+ }
+
+ DPRINTF("PHYSICAL MEMORY ALLOCATION:\n");
+ DPRINTF(" 4KB PAGES: 0x%016lx\n", stat_normal_pages);
+ DPRINTF(" 2MB PAGES: 0x%016lx\n", stat_2mb_pages);
+ DPRINTF(" 1GB PAGES: 0x%016lx\n", stat_1gb_pages);
+
+ rc = 0;
+ goto out;
+ error_out:
+ rc = -1;
+ out:
+
+ /* ensure no unclaimed pages are left unused */
+ xc_domain_claim_pages(xch, domid, 0 /* cancels the claim */);
+
+ return rc;
+}
+
/* ------------------------------------------------------------------------ */
static int bootearly(struct xc_dom_image *dom)
@@ -1052,6 +1625,12 @@ static int bootlate_pv(struct xc_dom_image *dom)
return 0;
}
+static int bootlate_hvm(struct xc_dom_image *dom)
+{
+ DOMPRINTF("%s: doing nothing", __func__);
+ return 0;
+}
+
int xc_dom_feature_translated(struct xc_dom_image *dom)
{
/* Guests running inside HVM containers are always auto-translated. */
@@ -1095,10 +1674,27 @@ static struct xc_dom_arch xc_dom_64 = {
.bootlate = bootlate_pv,
};
+static struct xc_dom_arch xc_hvm_32 = {
+ .guest_type = "hvm-3.0-x86_32",
+ .native_protocol = XEN_IO_PROTO_ABI_X86_32,
+ .page_shift = PAGE_SHIFT_X86,
+ .sizeof_pfn = 4,
+ .alloc_magic_pages = alloc_magic_pages_hvm,
+ .count_pgtables = NULL,
+ .setup_pgtables = NULL,
+ .start_info = NULL,
+ .shared_info = NULL,
+ .vcpu = vcpu_hvm,
+ .meminit = meminit_hvm,
+ .bootearly = bootearly,
+ .bootlate = bootlate_hvm,
+};
+
static void __init register_arch_hooks(void)
{
xc_dom_register_arch_hooks(&xc_dom_32_pae);
xc_dom_register_arch_hooks(&xc_dom_64);
+ xc_dom_register_arch_hooks(&xc_hvm_32);
}
/*
--
1.9.5 (Apple Git-50.3)
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
next prev parent reply other threads:[~2015-08-21 16:53 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-08-21 16:53 [PATCH v5 00/28] Introduce HVM without dm and new boot ABI Roger Pau Monne
2015-08-21 16:53 ` [PATCH v5 01/28] libxc: split x86 HVM setup_guest into smaller logical functions Roger Pau Monne
2015-08-21 16:53 ` [PATCH v5 02/28] libxc: unify xc_dom_p2m_{host/guest} Roger Pau Monne
2015-08-21 16:53 ` [PATCH v5 03/28] libxc: introduce the notion of a container type Roger Pau Monne
2015-08-21 16:53 ` [PATCH v5 04/28] libxc: introduce a domain loader for HVM guest firmware Roger Pau Monne
2015-08-21 16:53 ` [PATCH v5 05/28] libxc: make arch_setup_meminit a xc_dom_arch hook Roger Pau Monne
2015-08-21 16:53 ` [PATCH v5 06/28] libxc: make arch_setup_boot{init/late} xc_dom_arch hooks Roger Pau Monne
2015-08-21 16:53 ` [PATCH v5 07/28] libxc: rework BSP initialization Roger Pau Monne
2015-08-24 18:26 ` Konrad Rzeszutek Wilk
2015-08-25 8:33 ` Roger Pau Monné
2015-08-25 9:02 ` Wei Liu
2015-08-25 9:22 ` Roger Pau Monné
2015-08-25 9:32 ` Wei Liu
2015-08-25 9:35 ` Andrew Cooper
2015-08-21 16:53 ` Roger Pau Monne [this message]
2015-08-25 9:17 ` [PATCH v5 08/28] libxc: introduce a xc_dom_arch for hvm-3.0-x86_32 guests Wei Liu
2015-08-21 16:53 ` [PATCH v5 09/28] libxl: switch HVM domain building to use xc_dom_* helpers Roger Pau Monne
2015-08-21 16:53 ` [PATCH v5 10/28] libxc: remove dead HVM building code Roger Pau Monne
2015-08-21 16:53 ` [PATCH v5 11/28] xen/x86: add bitmap of enabled emulated devices Roger Pau Monne
2015-08-21 16:53 ` [PATCH v5 12/28] xen/x86: allow disabling the emulated local apic Roger Pau Monne
2015-08-31 21:55 ` Boris Ostrovsky
2015-08-21 16:53 ` [PATCH v5 13/28] xen/x86: allow disabling the emulated HPET Roger Pau Monne
2015-08-21 16:53 ` [PATCH v5 14/28] xen/x86: allow disabling the pmtimer Roger Pau Monne
2015-08-21 16:53 ` [PATCH v5 15/28] xen/x86: allow disabling the emulated RTC Roger Pau Monne
2015-08-21 16:53 ` [PATCH v5 16/28] xen/x86: allow disabling the emulated IO APIC Roger Pau Monne
2015-08-21 16:53 ` [PATCH v5 17/28] xen/x86: allow disabling the emulated PIC Roger Pau Monne
2015-08-21 16:53 ` [PATCH v5 18/28] xen/x86: allow disabling the emulated pmu Roger Pau Monne
2015-08-21 16:53 ` [PATCH v5 19/28] xen/x86: allow disabling the emulated VGA Roger Pau Monne
2015-08-21 16:53 ` [PATCH v5 20/28] xen/x86: allow disabling the emulated IOMMU Roger Pau Monne
2015-08-21 16:53 ` [PATCH v5 21/28] xen/x86: allow disabling all emulated devices inside of Xen Roger Pau Monne
2015-08-21 17:19 ` Andrew Cooper
2015-08-21 16:53 ` [PATCH v5 22/28] elfnotes: intorduce a new PHYS_ENTRY elfnote Roger Pau Monne
2015-08-21 16:53 ` [PATCH v5 23/28] libxc: allow creating domains without emulated devices Roger Pau Monne
2015-08-21 17:42 ` Andrew Cooper
2015-08-21 16:53 ` [PATCH v5 24/28] xen/x86: allow HVM guests to use hypercalls to bring up vCPUs Roger Pau Monne
2015-08-21 20:36 ` Andrew Cooper
2015-08-24 10:43 ` Roger Pau Monné
2015-08-24 11:57 ` Andrew Cooper
2015-08-21 16:53 ` [PATCH v5 25/28] xenconsole: try to attach to PV console if HVM fails Roger Pau Monne
2015-08-21 16:53 ` [PATCH v5 26/28] libxc/xen: introduce a start info structure for HVMlite guests Roger Pau Monne
2015-08-21 21:00 ` Andrew Cooper
2015-08-24 15:24 ` Roger Pau Monné
2015-08-21 16:53 ` [PATCH v5 27/28] libxc: switch xc_dom_elfloader to be used with HVMlite domains Roger Pau Monne
2015-08-25 9:32 ` Wei Liu
2015-08-21 16:53 ` [PATCH v5 28/28] libxl: allow the creation of HVM domains without a device model Roger Pau Monne
2015-08-25 9:30 ` Wei Liu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1440176021-18910-9-git-send-email-roger.pau@citrix.com \
--to=roger.pau@citrix.com \
--cc=ian.campbell@citrix.com \
--cc=ian.jackson@eu.citrix.com \
--cc=stefano.stabellini@eu.citrix.com \
--cc=wei.liu2@citrix.com \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).