xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Juergen Gross <jgross@suse.com>
To: xen-devel@lists.xen.org, Ian.Campbell@citrix.com,
	ian.jackson@eu.citrix.com, stefano.stabellini@eu.citrix.com,
	wei.liu2@citrix.com
Cc: Juergen Gross <jgross@suse.com>
Subject: [PATCH v2 5/5] libxc: create p2m list outside of kernel mapping if supported
Date: Fri,  2 Oct 2015 07:49:47 +0200	[thread overview]
Message-ID: <1443764987-23639-6-git-send-email-jgross@suse.com> (raw)
In-Reply-To: <1443764987-23639-1-git-send-email-jgross@suse.com>

In case the kernel of a new pv-domU indicates it is supporting a p2m
list outside the initial kernel mapping by specifying INIT_P2M, let
the domain builder allocate the memory for the p2m list from physical
guest memory only and map it to the address the kernel is expecting.

This will enable loading pv-domUs larger than 512 GB.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
 tools/libxc/include/xc_dom.h |   1 +
 tools/libxc/xc_dom_core.c    |  17 ++++++-
 tools/libxc/xc_dom_x86.c     | 109 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 126 insertions(+), 1 deletion(-)

diff --git a/tools/libxc/include/xc_dom.h b/tools/libxc/include/xc_dom.h
index 9117269..5731098 100644
--- a/tools/libxc/include/xc_dom.h
+++ b/tools/libxc/include/xc_dom.h
@@ -210,6 +210,7 @@ struct xc_dom_arch {
     char *native_protocol;
     int page_shift;
     int sizeof_pfn;
+    int p2m_base_supported;
 
     struct xc_dom_arch *next;
 };
diff --git a/tools/libxc/xc_dom_core.c b/tools/libxc/xc_dom_core.c
index bd970c5..36a0d63 100644
--- a/tools/libxc/xc_dom_core.c
+++ b/tools/libxc/xc_dom_core.c
@@ -734,6 +734,7 @@ struct xc_dom_image *xc_dom_allocate(xc_interface *xch,
     dom->parms.virt_hypercall = UNSET_ADDR;
     dom->parms.virt_hv_start_low = UNSET_ADDR;
     dom->parms.elf_paddr_offset = UNSET_ADDR;
+    dom->parms.p2m_base = UNSET_ADDR;
 
     dom->alloc_malloc += sizeof(*dom);
     return dom;
@@ -1047,7 +1048,11 @@ int xc_dom_build_image(struct xc_dom_image *dom)
     }
 
     /* allocate other pages */
-    if ( dom->arch_hooks->alloc_p2m_list &&
+    if ( !dom->arch_hooks->p2m_base_supported ||
+         dom->parms.p2m_base >= dom->parms.virt_base ||
+         (dom->parms.p2m_base & (XC_DOM_PAGE_SIZE(dom) - 1)) )
+        dom->parms.p2m_base = UNSET_ADDR;
+    if ( dom->arch_hooks->alloc_p2m_list && dom->parms.p2m_base == UNSET_ADDR &&
          dom->arch_hooks->alloc_p2m_list(dom) != 0 )
         goto err;
     if ( dom->arch_hooks->alloc_magic_pages(dom) != 0 )
@@ -1084,6 +1089,16 @@ int xc_dom_build_image(struct xc_dom_image *dom)
         dom->ramdisk_seg.vend += dom->ramdisk_seg.vstart;
     }
 
+    /* Allocate p2m list if outside of initial kernel mapping. */
+    if ( dom->arch_hooks->alloc_p2m_list && dom->parms.p2m_base != UNSET_ADDR )
+    {
+        if ( dom->arch_hooks->alloc_p2m_list(dom) != 0 )
+            goto err;
+        dom->p2m_seg.vend = dom->p2m_seg.vend - dom->p2m_seg.vstart;
+        dom->p2m_seg.vstart = dom->parms.p2m_base;
+        dom->p2m_seg.vend += dom->p2m_seg.vstart;
+    }
+
     return 0;
 
  err:
diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c
index 972f081..5c0d28e 100644
--- a/tools/libxc/xc_dom_x86.c
+++ b/tools/libxc/xc_dom_x86.c
@@ -46,6 +46,8 @@
 #define bits_to_mask(bits)       (((xen_vaddr_t)1 << (bits))-1)
 #define round_down(addr, mask)   ((addr) & ~(mask))
 #define round_up(addr, mask)     ((addr) | (mask))
+#define round_pg(addr)    (((addr) + PAGE_SIZE_X86 - 1) & ~(PAGE_SIZE_X86 - 1))
+#define round_pfn(addr)   (((addr) + PAGE_SIZE_X86 - 1) / PAGE_SIZE_X86)
 
 /* get guest IO ABI protocol */
 const char *xc_domain_get_native_protocol(xc_interface *xch,
@@ -424,6 +426,81 @@ static int setup_pgtables_x86_64(struct xc_dom_image *dom)
             }
         }
     }
+
+    if ( dom->parms.p2m_base == UNSET_ADDR )
+        return 0;
+
+    /*
+     * Build the page tables for mapping the p2m list at an address
+     * specified by the to be loaded kernel.
+     * l1pfn holds the pfn of the next page table to allocate.
+     * At each level we might already have an entry filled when setting
+     * up the initial kernel mapping. This can happen for the last entry
+     * of each level only!
+     */
+    l3tab = NULL;
+    l2tab = NULL;
+    l1tab = NULL;
+    l1pfn = round_pfn(dom->p2m_size * dom->arch_hooks->sizeof_pfn) +
+            dom->p2m_seg.pfn;
+
+    for ( addr = dom->parms.p2m_base;
+          addr < dom->parms.p2m_base +
+                 dom->p2m_size * dom->arch_hooks->sizeof_pfn;
+          addr += PAGE_SIZE_X86 )
+    {
+        if ( l3tab == NULL )
+        {
+            l4off = l4_table_offset_x86_64(addr);
+            l3pfn = l4tab[l4off] ? l4pfn + dom->pg_l4 : l1pfn++;
+            l3tab = xc_dom_pfn_to_ptr(dom, l3pfn, 1);
+            if ( l3tab == NULL )
+                goto pfn_error;
+            l4tab[l4off] =
+                pfn_to_paddr(xc_dom_p2m_guest(dom, l3pfn)) | L4_PROT;
+        }
+
+        if ( l2tab == NULL )
+        {
+            l3off = l3_table_offset_x86_64(addr);
+            l2pfn = l3tab[l3off] ? l3pfn + dom->pg_l3 : l1pfn++;
+            l2tab = xc_dom_pfn_to_ptr(dom, l2pfn, 1);
+            if ( l2tab == NULL )
+                goto pfn_error;
+            l3tab[l3off] =
+                pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT;
+        }
+
+        if ( l1tab == NULL )
+        {
+            l2off = l2_table_offset_x86_64(addr);
+            l1pfn = l2tab[l2off] ? l2pfn + dom->pg_l2 : l1pfn;
+            l1tab = xc_dom_pfn_to_ptr(dom, l1pfn, 1);
+            if ( l1tab == NULL )
+                goto pfn_error;
+            l2tab[l2off] =
+                pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT;
+            l1pfn++;
+        }
+
+        l1off = l1_table_offset_x86_64(addr);
+        pgpfn = ((addr - dom->parms.p2m_base) >> PAGE_SHIFT_X86) +
+                dom->p2m_seg.pfn;
+        l1tab[l1off] =
+            pfn_to_paddr(xc_dom_p2m_guest(dom, pgpfn)) | L1_PROT;
+
+        if ( l1off == (L1_PAGETABLE_ENTRIES_X86_64 - 1) )
+        {
+            l1tab = NULL;
+            if ( l2off == (L2_PAGETABLE_ENTRIES_X86_64 - 1) )
+            {
+                l2tab = NULL;
+                if ( l3off == (L3_PAGETABLE_ENTRIES_X86_64 - 1) )
+                    l3tab = NULL;
+            }
+        }
+    }
+
     return 0;
 
 pfn_error:
@@ -442,6 +519,27 @@ pfn_error:
 static int alloc_p2m_list(struct xc_dom_image *dom)
 {
     size_t p2m_alloc_size = dom->p2m_size * dom->arch_hooks->sizeof_pfn;
+    xen_vaddr_t from, to;
+    xen_pfn_t tables;
+
+    p2m_alloc_size = round_pg(p2m_alloc_size);
+    if ( dom->parms.p2m_base != UNSET_ADDR )
+    {
+        /* Add space for page tables, 64 bit only. */
+        from = dom->parms.p2m_base;
+        to = from + p2m_alloc_size - 1;
+        tables = 0;
+        tables += nr_page_tables(dom, from, to, L4_PAGETABLE_SHIFT_X86_64);
+        if ( to > (xen_vaddr_t)(~0ULL << L4_PAGETABLE_SHIFT_X86_64) )
+            tables--;
+        tables += nr_page_tables(dom, from, to, L3_PAGETABLE_SHIFT_X86_64);
+        if ( to > (xen_vaddr_t)(~0ULL << L3_PAGETABLE_SHIFT_X86_64) )
+            tables--;
+        tables += nr_page_tables(dom, from, to, L2_PAGETABLE_SHIFT_X86_64);
+        if ( to > (xen_vaddr_t)(~0ULL << L2_PAGETABLE_SHIFT_X86_64) )
+            tables--;
+        p2m_alloc_size += tables << PAGE_SHIFT_X86;
+    }
 
     /* allocate phys2mach table */
     if ( xc_dom_alloc_segment(dom, &dom->p2m_seg, "phys2mach",
@@ -541,6 +639,12 @@ static int start_info_x86_64(struct xc_dom_image *dom)
     start_info->pt_base = dom->pgtables_seg.vstart;
     start_info->nr_pt_frames = dom->pgtables;
     start_info->mfn_list = dom->p2m_seg.vstart;
+    if ( dom->parms.p2m_base != UNSET_ADDR )
+    {
+        start_info->first_p2m_pfn = dom->p2m_seg.pfn;
+        start_info->nr_p2m_frames =
+            (dom->p2m_seg.vend - dom->p2m_seg.vstart) >> PAGE_SHIFT_X86;
+    }
 
     start_info->flags = dom->flags;
     start_info->store_mfn = xc_dom_p2m_guest(dom, dom->xenstore_pfn);
@@ -680,6 +784,7 @@ static struct xc_dom_arch xc_dom_32_pae = {
     .native_protocol = XEN_IO_PROTO_ABI_X86_32,
     .page_shift = PAGE_SHIFT_X86,
     .sizeof_pfn = 4,
+    .p2m_base_supported = 0,
     .alloc_magic_pages = alloc_magic_pages,
     .alloc_p2m_list = alloc_p2m_list,
     .count_pgtables = count_pgtables_x86_32_pae,
@@ -694,6 +799,7 @@ static struct xc_dom_arch xc_dom_64 = {
     .native_protocol = XEN_IO_PROTO_ABI_X86_64,
     .page_shift = PAGE_SHIFT_X86,
     .sizeof_pfn = 8,
+    .p2m_base_supported = 1,
     .alloc_magic_pages = alloc_magic_pages,
     .alloc_p2m_list = alloc_p2m_list,
     .count_pgtables = count_pgtables_x86_64,
@@ -1025,7 +1131,10 @@ int arch_setup_bootlate(struct xc_dom_image *dom)
     if ( !xc_dom_feature_translated(dom) )
     {
         /* paravirtualized guest */
+
+        /* Drop references to all initial page tables before pinning. */
         xc_dom_unmap_one(dom, dom->pgtables_seg.pfn);
+        xc_dom_unmap_one(dom, dom->p2m_seg.pfn);
         rc = pin_table(dom->xch, pgd_type,
                        xc_dom_p2m_host(dom, dom->pgtables_seg.pfn),
                        dom->guest_domid);
-- 
2.1.4

  parent reply	other threads:[~2015-10-02  5:49 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-10-02  5:49 [PATCH v2 0/5] libxc: support building large pv-domains Juergen Gross
2015-10-02  5:49 ` [PATCH v2 1/5] libxc: remove allocate member from struct xc_dom_image Juergen Gross
2015-10-02 13:01   ` Ian Campbell
2015-10-02 14:25     ` Juergen Gross
2015-10-02 14:47       ` Ian Campbell
2015-10-02 15:00         ` Juergen Gross
2015-10-02  5:49 ` [PATCH v2 2/5] xen: add generic flag to elf_dom_parms indicating support of unmapped initrd Juergen Gross
2015-10-02  9:37   ` Andrew Cooper
2015-10-02  9:41     ` Jan Beulich
2015-10-02  9:44     ` Juergen Gross
2015-10-02  9:53       ` Andrew Cooper
2015-10-02 10:01         ` Juergen Gross
2015-10-02 10:22           ` Ian Campbell
2015-10-02  5:49 ` [PATCH v2 3/5] libxc: create unmapped initrd in domain builder if supported Juergen Gross
2015-10-02 12:59   ` Ian Campbell
2015-10-02 14:46     ` Juergen Gross
2015-10-02 14:56       ` Ian Campbell
2015-10-02 15:13         ` Juergen Gross
2015-10-02 15:21           ` Ian Campbell
2015-10-02 16:28             ` Juergen Gross
2015-10-02  5:49 ` [PATCH v2 4/5] libxc: split p2m allocation in domain builder from other magic pages Juergen Gross
2015-10-02  9:29   ` Ian Campbell
2015-10-02  5:49 ` Juergen Gross [this message]
2015-10-02 13:16   ` [PATCH v2 5/5] libxc: create p2m list outside of kernel mapping if supported Ian Campbell
2015-10-02 14:37     ` Juergen Gross

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1443764987-23639-6-git-send-email-jgross@suse.com \
    --to=jgross@suse.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).