Re: [V10 PATCH 0/4] pvh dom0 patches...

xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed

From: "Roger Pau Monné" <roger.pau@citrix.com>
To: Jan Beulich <JBeulich@suse.com>,
	Mukesh Rathor <mukesh.rathor@oracle.com>
Cc: George.Dunlap@eu.citrix.com, xen-devel@lists.xenproject.org,
	keir.xen@gmail.com, tim@xen.org
Subject: Re: [V10 PATCH 0/4] pvh dom0 patches...
Date: Wed, 7 May 2014 11:48:56 +0200	[thread overview]
Message-ID: <536A0188.9090109@citrix.com> (raw)
In-Reply-To: <536A01D8020000780000FB15@mail.emea.novell.com>

On 07/05/14 09:50, Jan Beulich wrote:
>>>> On 07.05.14 at 03:00, <mukesh.rathor@oracle.com> wrote:
>> As Konrad points out, there are other issues with that part of the code.
>> So, IMO, we should wait and address that altogether for both PV and PVH,
>> and for now leave this as is.
> 
> I'm fine either way, as long as you and Roger can reach agreement.
> I still tend towards considering Roger's position the right one as a
> mid/long term route.
> 
>> If you still feel we should do this in xen (as in required for this
>> patch series to go in), then we'd need to communicate last pfn to guest.
>> However, note that guest would still have all that code for PV, for pvh
>> we'd just skip it. Obvious way coming to mind is:
>>
>> unsigned long last_pfn_mapped; 
>>
>> added to struct start_info.  Alternative would be to add a 
>> new sub call to perhaps getdomaininfo hcall or something similar. 
>> Please lmk. 
> 
> We already have the max_pfn field in the shared info, which for
> PV is under the sole control of the guest kernel. That could
> certainly be set to other than zero to communicate what the
> highest mapped PFN is. The problem I'm seeing with this (and
> with everything you suggest above) is that this doesn't tell the
> guest where the holes are - while Dom0 can get at the machine
> memory map to tell (or really: guess) where they are, DomU can't
> yet will need to as soon as you want to be able to support device
> pass-through. Hence I guess we will need XENMEM_memory_map to
> reflect reality for PVH guests.

I've expanded my patch that added the memory removed form the holes to 
the end of the memory map to also create an e820 map for Dom0 that 
reflects the reality of the underlying p2m. This way PVH guests (either 
DomU or Dom0) should only use XENMEM_memory_map to get the correct e820 
map (and no need for clamping in the Dom0 case).

---
diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
index 38ed9f6..03e8008 100644
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -327,11 +327,13 @@ static __init void pvh_add_mem_mapping(struct domain *d, unsigned long gfn,
  * pvh fixme: The following doesn't map MMIO ranges when they sit above the
  *            highest E820 covered address.
  */
-static __init void pvh_map_all_iomem(struct domain *d)
+static __init void pvh_map_all_iomem(struct domain *d, unsigned long nr_pages)
 {
     unsigned long start_pfn, end_pfn, end = 0, start = 0;
     const struct e820entry *entry;
-    unsigned int i, nump;
+    unsigned int i, j, nump, navail, nmap, nr_holes = 0;
+    struct page_info *page;
+    int rc;
 
     for ( i = 0, entry = e820.map; i < e820.nr_map; i++, entry++ )
     {
@@ -353,6 +355,9 @@ static __init void pvh_map_all_iomem(struct domain *d)
                 nump = end_pfn - start_pfn;
                 /* Add pages to the mapping */
                 pvh_add_mem_mapping(d, start_pfn, start_pfn, nump);
+                if ( start_pfn <= nr_pages )
+                    nr_holes += (end_pfn < nr_pages) ?
+                                    nump : (nr_pages - start_pfn);
             }
             start = end;
         }
@@ -369,6 +374,89 @@ static __init void pvh_map_all_iomem(struct domain *d)
         nump = end_pfn - start_pfn;
         pvh_add_mem_mapping(d, start_pfn, start_pfn, nump);
     }
+
+    /*
+     * Add the memory removed by the holes at the end of the
+     * memory map.
+     */
+    for ( i = 0, entry = e820.map; i < e820.nr_map; i++, entry++ )
+    {
+        if ( entry->type != E820_RAM )
+            continue;
+
+        end_pfn = PFN_UP(entry->addr + entry->size);
+        if ( end_pfn <= nr_pages )
+            continue;
+
+        navail = end_pfn - nr_pages;
+        nmap = navail > nr_holes ? nr_holes : navail;
+        start_pfn = PFN_DOWN(entry->addr) < nr_pages ?
+                        nr_pages : PFN_DOWN(entry->addr);
+        page = alloc_domheap_pages(d, get_order_from_pages(nmap), 0);
+        if ( !page )
+            panic("Not enough RAM for domain 0");
+        for ( j = 0; j < nmap; j++ )
+        {
+            rc = guest_physmap_add_page(d, start_pfn + j, page_to_mfn(page), 0);
+            if ( rc != 0 )
+                panic("Unable to add gpfn %#lx mfn %#lx to Dom0 physmap",
+                      start_pfn + j, page_to_mfn(page));
+            page++;
+
+        }
+        nr_holes -= nmap;
+        if ( nr_holes == 0 )
+            break;
+    }
+
+    ASSERT(nr_holes == 0);
+}
+
+static __init void pvh_setup_e820(struct domain *d, unsigned long nr_pages)
+{
+    struct e820entry *entry;
+    unsigned int i;
+    unsigned long pages, cur_pages = 0;
+
+    /*
+     * Craft the e820 memory map for Dom0 based on the hardware e820 map.
+     */
+    d->arch.e820 = xzalloc_array(struct e820entry, e820.nr_map);
+    if ( !d->arch.e820 )
+        panic("Unable to allocate memory for Dom0 e820 map");
+
+    memcpy(d->arch.e820, e820.map, sizeof(struct e820entry) * e820.nr_map);
+    d->arch.nr_e820 = e820.nr_map;
+
+    /* Clamp e820 memory map to match the memory assigned to Dom0 */
+    for ( i = 0, entry = d->arch.e820; i < d->arch.nr_e820; i++, entry++ )
+    {
+        if ( entry->type != E820_RAM )
+            continue;
+
+        if ( nr_pages == cur_pages )
+        {
+            /*
+             * We already have all the assigned memory,
+             * mark this region as reserved.
+             */
+            entry->type = E820_RESERVED;
+            continue;
+        }
+
+        pages = entry->size >> PAGE_SHIFT;
+        if ( (cur_pages + pages) > nr_pages )
+        {
+            /* Truncate region */
+            entry->size = (nr_pages - cur_pages) << PAGE_SHIFT;
+            cur_pages = nr_pages;
+        }
+        else
+        {
+            cur_pages += pages;
+        }
+    }
+    ASSERT(cur_pages == nr_pages);
 }
 
 static __init void dom0_update_physmap(struct domain *d, unsigned long pfn,
@@ -1391,7 +1479,8 @@ int __init construct_dom0(
         pfn = shared_info_paddr >> PAGE_SHIFT;
         dom0_update_physmap(d, pfn, mfn, 0);
 
-        pvh_map_all_iomem(d);
+        pvh_map_all_iomem(d, nr_pages);
+        pvh_setup_e820(d, nr_pages);
     }
 
     if ( d->domain_id == hardware_domid )

next prev parent reply	other threads:[~2014-05-07  9:48 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-04-30  1:06 [V10 PATCH 0/4] pvh dom0 patches Mukesh Rathor
2014-04-30  1:06 ` [V10 PATCH 1/4] pvh dom0: construct_dom0 changes Mukesh Rathor
2014-05-06 15:18   ` Roger Pau Monné
2014-04-30  1:06 ` [V10 PATCH 2/4] pvh dom0: Add checks and restrictions for p2m_is_foreign Mukesh Rathor
2014-05-01 16:14   ` Tim Deegan
2014-04-30  1:06 ` [V10 PATCH 3/4] pvh dom0: Add and remove foreign pages Mukesh Rathor
2014-05-01 16:19   ` Tim Deegan
2014-05-02  1:45     ` Mukesh Rathor
2014-05-02  8:38       ` Jan Beulich
2014-05-02  8:55       ` Tim Deegan
2014-05-02 23:35         ` Mukesh Rathor
2014-05-05  7:46           ` Jan Beulich
2014-05-08 12:16           ` Tim Deegan
2014-05-08 13:25             ` Jan Beulich
2014-05-08 22:58             ` Mukesh Rathor
2014-04-30  1:06 ` [V10 PATCH 4/4] dom0: add opt_dom0pvh to setup.c Mukesh Rathor
2014-04-30 14:11 ` [V10 PATCH 0/4] pvh dom0 patches Roger Pau Monné
2014-04-30 18:12   ` Mukesh Rathor
2014-05-01  1:19     ` Mukesh Rathor
2014-05-02 11:05       ` Roger Pau Monné
2014-05-02 12:31         ` Jan Beulich
2014-05-02 14:06           ` Roger Pau Monné
2014-05-02 14:16             ` Jan Beulich
2014-05-02 14:35               ` Roger Pau Monné
2014-05-02 15:41                 ` Jan Beulich
2014-05-02 16:13                   ` Roger Pau Monné
2014-05-02 19:35                     ` Konrad Rzeszutek Wilk
2014-05-03  0:01         ` Mukesh Rathor
2014-05-05  8:52           ` Roger Pau Monné
2014-05-06  0:28             ` Mukesh Rathor
2014-05-06  7:13               ` Roger Pau Monné
2014-05-06  8:09                 ` Jan Beulich
2014-05-07  1:00                 ` Mukesh Rathor
2014-05-07  7:50                   ` Jan Beulich
2014-05-07  9:48                     ` Roger Pau Monné [this message]
2014-05-07 11:34                       ` Jan Beulich
2014-05-08 10:27                         ` Roger Pau Monné
2014-05-08 10:44                           ` Jan Beulich
2014-05-08 15:00                             ` Roger Pau Monné
2014-05-08 15:20                               ` Jan Beulich
2014-05-07 13:25                     ` Konrad Rzeszutek Wilk
2014-05-08  0:04                     ` Mukesh Rathor
2014-05-08  6:37                       ` Jan Beulich
2014-05-08 19:15                         ` Mukesh Rathor
2014-05-07 13:20                   ` Konrad Rzeszutek Wilk
2014-05-07 13:38                     ` Roger Pau Monné
2014-05-08  0:12                       ` Mukesh Rathor
2014-05-08 10:52                         ` George Dunlap
2014-05-08 13:15                         ` David Vrabel
2014-05-08 22:29                           ` Mukesh Rathor
2014-05-08  0:07                     ` Mukesh Rathor
2014-05-06 19:38               ` Konrad Rzeszutek Wilk

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:38ed9f6 dfblob:03e8008 )
 OR (
bs:"Re: [V10 PATCH 0/4] pvh dom0 patches..." )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=536A0188.9090109@citrix.com \
    --to=roger.pau@citrix.com \
    --cc=George.Dunlap@eu.citrix.com \
    --cc=JBeulich@suse.com \
    --cc=keir.xen@gmail.com \
    --cc=mukesh.rathor@oracle.com \
    --cc=tim@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).