From: Juergen Gross <jgross@suse.com>
To: xen-devel@lists.xensource.com, Ian.Campbell@citrix.com,
ian.jackson@eu.citrix.com, stefano.stabellini@eu.citrix.com,
wei.liu2@citrix.com
Cc: Juergen Gross <jgross@suse.com>
Subject: [PATCH 5/5] libxc: create p2m list outside of kernel mapping if supported
Date: Fri, 11 Sep 2015 14:32:22 +0200 [thread overview]
Message-ID: <1441974742-27352-6-git-send-email-jgross@suse.com> (raw)
In-Reply-To: <1441974742-27352-1-git-send-email-jgross@suse.com>
In case the kernel of a new pv-domU indicates it is supporting a p2m
list outside the initial kernel mapping by specifying INIT_P2M, let
the domain builder allocate the memory for the p2m list from physical
guest memory only and map it to the address the kernel is expecting.
This will enable loading pv-domUs larger than 512 GB.
Signed-off-by: Juergen Gross <jgross@suse.com>
---
tools/libxc/include/xc_dom.h | 1 +
tools/libxc/xc_dom_core.c | 17 ++++++-
tools/libxc/xc_dom_x86.c | 109 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 126 insertions(+), 1 deletion(-)
diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h
index 43b1eab..6192fba 100644
--- a/tools/libxc/include/xc_dom.h
+++ b/tools/libxc/include/xc_dom.h
@@ -212,6 +212,7 @@ struct xc_dom_arch {
char *native_protocol;
int page_shift;
int sizeof_pfn;
+ int p2m_base_supported;
struct xc_dom_arch *next;
};
diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c
index 81b642e..1cf77d7 100644
--- a/tools/libxc/xc_dom_core.c
+++ b/tools/libxc/xc_dom_core.c
@@ -734,6 +734,7 @@ struct xc_dom_image *xc_dom_allocate(xc_interface *xch,
dom->parms.virt_hypercall = UNSET_ADDR;
dom->parms.virt_hv_start_low = UNSET_ADDR;
dom->parms.elf_paddr_offset = UNSET_ADDR;
+ dom->parms.p2m_base = UNSET_ADDR;
dom->alloc_malloc += sizeof(*dom);
return dom;
@@ -1048,7 +1049,11 @@ int xc_dom_build_image(struct xc_dom_image *dom)
}
/* allocate other pages */
- if ( dom->arch_hooks->alloc_p2m_list &&
+ if ( !dom->arch_hooks->p2m_base_supported ||
+ dom->parms.p2m_base >= dom->parms.virt_base ||
+ (dom->parms.p2m_base & (XC_DOM_PAGE_SIZE(dom) - 1)) )
+ dom->parms.p2m_base = UNSET_ADDR;
+ if ( dom->arch_hooks->alloc_p2m_list && dom->parms.p2m_base == UNSET_ADDR &&
dom->arch_hooks->alloc_p2m_list(dom) != 0 )
goto err;
if ( dom->arch_hooks->alloc_magic_pages(dom) != 0 )
@@ -1086,6 +1091,16 @@ int xc_dom_build_image(struct xc_dom_image *dom)
dom->ramdisk_seg.vend += dom->ramdisk_seg.vstart;
}
+ /* Allocate p2m list if outside of initial kernel mapping. */
+ if ( dom->arch_hooks->alloc_p2m_list && dom->parms.p2m_base != UNSET_ADDR )
+ {
+ if ( dom->arch_hooks->alloc_p2m_list(dom) != 0 )
+ goto err;
+ dom->p2m_seg.vend = dom->p2m_seg.vend - dom->p2m_seg.vstart;
+ dom->p2m_seg.vstart = dom->parms.p2m_base;
+ dom->p2m_seg.vend += dom->p2m_seg.vstart;
+ }
+
return 0;
err:
diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c
index 91d4e49..debc685 100644
--- a/tools/libxc/xc_dom_x86.c
+++ b/tools/libxc/xc_dom_x86.c
@@ -46,6 +46,8 @@
#define bits_to_mask(bits) (((xen_vaddr_t)1 << (bits))-1)
#define round_down(addr, mask) ((addr) & ~(mask))
#define round_up(addr, mask) ((addr) | (mask))
+#define round_pg(addr) (((addr) + PAGE_SIZE_X86 - 1) & ~(PAGE_SIZE_X86 - 1))
+#define round_pfn(addr) (((addr) + PAGE_SIZE_X86 - 1) / PAGE_SIZE_X86)
/* get guest IO ABI protocol */
const char *xc_domain_get_native_protocol(xc_interface *xch,
@@ -423,6 +425,81 @@ static int setup_pgtables_x86_64(struct xc_dom_image *dom)
}
}
}
+
+ if ( dom->parms.p2m_base == UNSET_ADDR )
+ return 0;
+
+ /*
+ * Build the page tables for mapping the p2m list at an address
+ * specified by the to be loaded kernel.
+ * l1pfn holds the pfn of the next page table to allocate.
+ * At each level we might already have an entry filled when setting
+ * up the initial kernel mapping. This can happen for the last entry
+ * of each level only!
+ */
+ l3tab = NULL;
+ l2tab = NULL;
+ l1tab = NULL;
+ l1pfn = round_pfn(dom->p2m_size * dom->arch_hooks->sizeof_pfn) +
+ dom->p2m_seg.pfn;
+
+ for ( addr = dom->parms.p2m_base;
+ addr < dom->parms.p2m_base +
+ dom->p2m_size * dom->arch_hooks->sizeof_pfn;
+ addr += PAGE_SIZE_X86 )
+ {
+ if ( l3tab == NULL )
+ {
+ l4off = l4_table_offset_x86_64(addr);
+ l3pfn = l4tab[l4off] ? l4pfn + dom->pg_l4 : l1pfn++;
+ l3tab = xc_dom_pfn_to_ptr(dom, l3pfn, 1);
+ if ( l3tab == NULL )
+ goto pfn_error;
+ l4tab[l4off] =
+ pfn_to_paddr(xc_dom_p2m_guest(dom, l3pfn)) | L4_PROT;
+ }
+
+ if ( l2tab == NULL )
+ {
+ l3off = l3_table_offset_x86_64(addr);
+ l2pfn = l3tab[l3off] ? l3pfn + dom->pg_l3 : l1pfn++;
+ l2tab = xc_dom_pfn_to_ptr(dom, l2pfn, 1);
+ if ( l2tab == NULL )
+ goto pfn_error;
+ l3tab[l3off] =
+ pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT;
+ }
+
+ if ( l1tab == NULL )
+ {
+ l2off = l2_table_offset_x86_64(addr);
+ l1pfn = l2tab[l2off] ? l2pfn + dom->pg_l2 : l1pfn;
+ l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1);
+ if ( l1tab == NULL )
+ goto pfn_error;
+ l2tab[l2off] =
+ pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT;
+ l1pfn++;
+ }
+
+ l1off = l1_table_offset_x86_64(addr);
+ pgpfn = ((addr - dom->parms.p2m_base) >> PAGE_SHIFT_X86) +
+ dom->p2m_seg.pfn;
+ l1tab[l1off] =
+ pfn_to_paddr(xc_dom_p2m_guest(dom, pgpfn)) | L1_PROT;
+
+ if ( l1off == (L1_PAGETABLE_ENTRIES_X86_64 - 1) )
+ {
+ l1tab = NULL;
+ if ( l2off == (L2_PAGETABLE_ENTRIES_X86_64 - 1) )
+ {
+ l2tab = NULL;
+ if ( l3off == (L3_PAGETABLE_ENTRIES_X86_64 - 1) )
+ l3tab = NULL;
+ }
+ }
+ }
+
return 0;
pfn_error:
@@ -441,6 +518,27 @@ pfn_error:
static int alloc_p2m_list(struct xc_dom_image *dom)
{
size_t p2m_alloc_size = dom->p2m_size * dom->arch_hooks->sizeof_pfn;
+ xen_vaddr_t from, to;
+ xen_pfn_t tables;
+
+ p2m_alloc_size = round_pg(p2m_alloc_size);
+ if ( dom->parms.p2m_base != UNSET_ADDR )
+ {
+ /* Add space for page tables, 64 bit only. */
+ from = dom->parms.p2m_base;
+ to = from + p2m_alloc_size - 1;
+ tables = 0;
+ tables += nr_page_tables(dom, from, to, L4_PAGETABLE_SHIFT_X86_64);
+ if ( to > (xen_vaddr_t)(~0ULL << L4_PAGETABLE_SHIFT_X86_64) )
+ tables--;
+ tables += nr_page_tables(dom, from, to, L3_PAGETABLE_SHIFT_X86_64);
+ if ( to > (xen_vaddr_t)(~0ULL << L3_PAGETABLE_SHIFT_X86_64) )
+ tables--;
+ tables += nr_page_tables(dom, from, to, L2_PAGETABLE_SHIFT_X86_64);
+ if ( to > (xen_vaddr_t)(~0ULL << L2_PAGETABLE_SHIFT_X86_64) )
+ tables--;
+ p2m_alloc_size += tables << PAGE_SHIFT_X86;
+ }
/* allocate phys2mach table */
if ( xc_dom_alloc_segment(dom, &dom->p2m_seg, "phys2mach",
@@ -540,6 +638,12 @@ static int start_info_x86_64(struct xc_dom_image *dom)
start_info->pt_base = dom->pgtables_seg.vstart;
start_info->nr_pt_frames = dom->pgtables;
start_info->mfn_list = dom->p2m_seg.vstart;
+ if ( dom->parms.p2m_base != UNSET_ADDR )
+ {
+ start_info->first_p2m_pfn = dom->p2m_seg.pfn;
+ start_info->nr_p2m_frames =
+ (dom->p2m_seg.vend - dom->p2m_seg.vstart) >> PAGE_SHIFT_X86;
+ }
start_info->flags = dom->flags;
start_info->store_mfn = xc_dom_p2m_guest(dom, dom->xenstore_pfn);
@@ -682,6 +786,7 @@ static struct xc_dom_arch xc_dom_32_pae = {
.native_protocol = XEN_IO_PROTO_ABI_X86_32,
.page_shift = PAGE_SHIFT_X86,
.sizeof_pfn = 4,
+ .p2m_base_supported = 0,
.alloc_magic_pages = alloc_magic_pages,
.alloc_p2m_list = alloc_p2m_list,
.count_pgtables = count_pgtables_x86_32_pae,
@@ -696,6 +801,7 @@ static struct xc_dom_arch xc_dom_64 = {
.native_protocol = XEN_IO_PROTO_ABI_X86_64,
.page_shift = PAGE_SHIFT_X86,
.sizeof_pfn = 8,
+ .p2m_base_supported = 1,
.alloc_magic_pages = alloc_magic_pages,
.alloc_p2m_list = alloc_p2m_list,
.count_pgtables = count_pgtables_x86_64,
@@ -1027,7 +1133,10 @@ int arch_setup_bootlate(struct xc_dom_image *dom)
if ( !xc_dom_feature_translated(dom) )
{
/* paravirtualized guest */
+
+ /* Drop references to all initial page tables before pinning. */
xc_dom_unmap_one(dom, dom->pgtables_seg.pfn);
+ xc_dom_unmap_one(dom, dom->p2m_seg.pfn);
rc = pin_table(dom->xch, pgd_type,
xc_dom_p2m_host(dom, dom->pgtables_seg.pfn),
dom->guest_domid);
--
2.1.4
next prev parent reply other threads:[~2015-09-11 12:32 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-09-11 12:32 [PATCH 0/5] libxc: support building large pv-domains Juergen Gross
2015-09-11 12:32 ` [PATCH 1/5] libxc: remove allocate member from struct xc_dom_image Juergen Gross
2015-09-11 12:44 ` Ian Jackson
2015-09-25 15:39 ` Ian Campbell
2015-09-28 3:55 ` Juergen Gross
2015-09-28 9:33 ` Ian Campbell
2015-09-11 12:32 ` [PATCH 2/5] libxc: do initrd processing of domain builder in own function Juergen Gross
2015-09-11 12:45 ` Ian Jackson
2015-09-25 15:39 ` Ian Campbell
2015-09-11 12:32 ` [PATCH 3/5] libxc: create unmapped initrd in domain builder if supported Juergen Gross
2015-09-11 12:54 ` Ian Jackson
2015-09-11 13:15 ` Julien Grall
2015-09-11 13:39 ` Juergen Gross
2015-09-25 15:22 ` Ian Campbell
2015-09-11 13:32 ` Juergen Gross
2015-09-11 15:51 ` Ian Jackson
2015-09-11 12:32 ` [PATCH 4/5] libxc: split p2m allocation in domain builder from other magic pages Juergen Gross
2015-10-01 12:47 ` Ian Campbell
2015-10-02 3:55 ` Juergen Gross
2015-10-02 9:04 ` Ian Campbell
2015-10-02 9:14 ` Juergen Gross
2015-10-02 9:28 ` Ian Campbell
2015-09-11 12:32 ` Juergen Gross [this message]
2015-09-11 13:28 ` [PATCH 0/5] libxc: support building large pv-domains Ian Campbell
2015-09-11 13:42 ` Juergen Gross
2015-09-11 13:53 ` Ian Campbell
2015-09-11 14:01 ` Juergen Gross
2015-09-25 15:40 ` Ian Campbell
2015-09-22 12:12 ` Juergen Gross
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1441974742-27352-6-git-send-email-jgross@suse.com \
--to=jgross@suse.com \
--cc=Ian.Campbell@citrix.com \
--cc=ian.jackson@eu.citrix.com \
--cc=stefano.stabellini@eu.citrix.com \
--cc=wei.liu2@citrix.com \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).