xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: xen-devel@lists.xensource.com, keir.fraser@eu.citrix.com
Cc: konrad.wilk@oracle.com
Subject: [PATCH 4 of 5] libxl: Add support for passing in the machine's E820 for PCI passthrough
Date: Thu, 07 Apr 2011 16:25:25 -0400	[thread overview]
Message-ID: <2e464234c94cfd29a98a.1302207925@localhost6.localdomain6> (raw)
In-Reply-To: <patchbomb.1302207921@localhost6.localdomain6>

# HG changeset patch
# User Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
# Date 1302206363 14400
# Node ID 2e464234c94cfd29a98a9011a46d76846b87f7f8
# Parent  546d8a03d5cbe0ceddadf701174f2417a0b72891
libxl: Add support for passing in the machine's E820 for PCI passthrough.

The code (libxl_e820_alloc) calls the xc_get_machine_memory_map to
retrieve the systems E820. Then the E820 is sanitized to weed out E820
entries below 16MB, and as well remove any E820_RAM or E820_UNUSED
regions as the guest does not need to know abou them. The guest
only needs the E820_ACPI, E820_NVS, E820_RESERVED to get an idea of
where the PCI I/O space is. Mostly.. The Linux kernel assumes that any
gap in the E820 is considered PCI I/O space which means that if we pass
in the guest 2GB, and the E820_ACPI, and its friend start at 3GB, the
gap between 2GB and 3GB will be considered as PCI I/O space. To guard against
that we also create an E820_UNUSABLE between the region of 'target_kb'
(called ram_end in the code) up to the first E820_[ACPI,NVS,RESERVED] region.

Memory that is slack or for balloon (so 'maxmem' in guest configuration)
is put behind the machine E820. Which in most cases is after the 4GB.

The reason for doing the fetching of the E820 using the hypercall in
the toolstack (instead of the guest doing it) is that when a Linux
guest would do a hypercall to 'XENMEM_machine_memory_map' it would
retrieve an E820 with I/O range caps added in. Meaning that the
region after 4GB up to end of possible memory would be marked as unusuable
and the Linux kernel would not have any space to allocate a balloon
region.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

diff -r 546d8a03d5cb -r 2e464234c94c tools/libxl/libxl.h
--- a/tools/libxl/libxl.h	Thu Apr 07 15:18:46 2011 -0400
+++ b/tools/libxl/libxl.h	Thu Apr 07 15:59:23 2011 -0400
@@ -204,6 +204,14 @@
 } libxl_file_reference;
 void libxl_file_reference_destroy(libxl_file_reference *p);
 
+#include <xc_e820.h>
+typedef struct {
+    uint32_t nr_entries;
+    struct e820entry *entry;
+} libxl_e820;
+
+void libxl_e820_destroy(libxl_e820 *p);
+
 /* libxl_cpuid_policy_list is a dynamic array storing CPUID policies
  * for multiple leafs. It is terminated with an entry holding
  * XEN_CPUID_INPUT_UNUSED in input[0]
diff -r 546d8a03d5cb -r 2e464234c94c tools/libxl/libxl.idl
--- a/tools/libxl/libxl.idl	Thu Apr 07 15:18:46 2011 -0400
+++ b/tools/libxl/libxl.idl	Thu Apr 07 15:59:23 2011 -0400
@@ -22,6 +22,7 @@
 
 libxl_hwcap = Builtin("hwcap")
 
+libxl_e820 = Builtin("e820", destructor_fn="libxl_e820_destroy", passby=PASS_BY_REFERENCE)
 #
 # Complex libxl types
 #
@@ -112,6 +113,7 @@
                                         ])),
                  ("pv", "!%s", Struct(None,
                                        [("slack_memkb", uint32),
+                                        ("e820", libxl_e820),
                                         ("bootloader", string),
                                         ("bootloader_args", string),
                                         ("cmdline", string),
diff -r 546d8a03d5cb -r 2e464234c94c tools/libxl/libxl_dom.c
--- a/tools/libxl/libxl_dom.c	Thu Apr 07 15:18:46 2011 -0400
+++ b/tools/libxl/libxl_dom.c	Thu Apr 07 15:59:23 2011 -0400
@@ -72,9 +72,23 @@
     libxl_ctx *ctx = libxl__gc_owner(gc);
     xc_domain_max_vcpus(ctx->xch, domid, info->max_vcpus);
     xc_domain_setmaxmem(ctx->xch, domid, info->target_memkb + LIBXL_MAXMEM_CONSTANT);
-    if (!info->hvm)
+    if (!info->hvm) {
+      if (info->u.pv.e820.nr_entries) {
+         int rc;
+         rc = libxl_e820_sanitize(ctx, info->target_memkb, 
+                                  (info->max_memkb - info->target_memkb) + info->u.pv.slack_memkb,
+                                   &info->u.pv.e820);
+         if (rc)
+            return rc;
+         xc_domain_set_memory_map(ctx->xch, domid, 
+                                  info->u.pv.e820.entry,
+                                  info->u.pv.e820.nr_entries);
+      }
+      else {
         xc_domain_set_memmap_limit(ctx->xch, domid,
                 (info->max_memkb + info->u.pv.slack_memkb));
+      }
+    }
     xc_domain_set_tsc_info(ctx->xch, domid, info->tsc_mode, 0, 0, 0);
     if ( info->disable_migrate )
         xc_domain_disable_migrate(ctx->xch, domid);
diff -r 546d8a03d5cb -r 2e464234c94c tools/libxl/libxl_pci.c
--- a/tools/libxl/libxl_pci.c	Thu Apr 07 15:18:46 2011 -0400
+++ b/tools/libxl/libxl_pci.c	Thu Apr 07 15:59:23 2011 -0400
@@ -37,6 +37,8 @@
 #include "libxl_internal.h"
 #include "flexarray.h"
 
+#include <xc_e820.h>
+
 #define PCI_BDF                "%04x:%02x:%02x.%01x"
 #define PCI_BDF_SHORT          "%02x:%02x.%01x"
 #define PCI_BDF_VDEVFN         "%04x:%02x:%02x.%01x@%02x"
@@ -1047,3 +1049,154 @@
     free(pcidevs);
     return 0;
 }
+
+#define E820MAX (128)
+int libxl_e820_sanitize(libxl_ctx *ctx, unsigned long map_limitkb,
+                        unsigned long balloon_kb, libxl_e820 *p)
+{
+    uint64_t delta_kb = 0, start = 0, start_kb = 0, last = 0, ram_end;
+    uint32_t i, idx = 0, nr;
+    struct e820entry e820[E820MAX];
+    struct e820entry *src;
+
+    if (!p || !p->entry || !map_limitkb)
+        return ERROR_FAIL;
+
+    if (p->nr_entries <= 1)
+        return ERROR_FAIL;
+
+    src = p->entry;
+    nr = p->nr_entries;
+
+    /* Weed out anything under 16MB */
+    for (i = 0; i < nr; i++) {
+      if (src[i].addr > 0x100000)
+        continue;
+
+      src[i].type = 0;
+      src[i].size = 0;
+      src[i].addr = -1ULL;
+    }
+
+    /* Find the lowest and highest entry in E820, skipping over
+     * undersired entries. */
+    start = -1ULL;
+    last = 0;
+    for (i = 0; i < nr; i++) {
+        if ((src[i].type == E820_RAM) ||
+            (src[i].type == E820_UNUSABLE) ||
+            (src[i].type == 0)) 
+		continue;
+
+            start = src[i].addr < start ? src[i].addr : start;
+            last = src[i].addr + src[i].size > last ?
+                    src[i].addr + src[i].size > last : last;
+    }
+    if (start > 1024)
+      start_kb = start >> 10;
+
+    /* Add the memory RAM region for the guest */
+    e820[idx].addr = 0;
+    e820[idx].size = (uint64_t)map_limitkb << 10;
+    e820[idx].type = E820_RAM;
+
+    /* .. and trim if neccessary */
+    if (start_kb && map_limitkb > start_kb) {
+        delta_kb = map_limitkb - start_kb;
+        if (delta_kb)
+          e820[idx].size -= (uint64_t)(delta_kb << 10);
+    }
+    /* Note: We don't touch balloon_kb here. Will add it at the end. */
+    ram_end = e820[idx].addr + e820[idx].size;
+    idx ++;
+
+    LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Memory: %ldkB End of RAM: 0x%lx (PFN) " \
+               "Delta: %ldkB, PCI start: %ldkB (0x%lx PFN), Balloon %ldkB\n",
+		map_limitkb, ram_end >> 12, delta_kb, start_kb ,start >> 12,
+                balloon_kb);
+
+    /* Check if there is a region between ram_end and start. */
+    if (start > ram_end) {
+      /* .. and if not present, add it in. This is to guard against
+       the Linux guest assuming that the gap between the end of
+       RAM region and the start of the E820_[ACPI,NVS,RESERVED]
+       is PCI I/O space. Which it certainly is _not_. */
+      e820[idx].type = E820_UNUSABLE;
+      e820[idx].addr = ram_end;
+      e820[idx].size = start - ram_end;
+      idx++;
+    } 
+     /* Almost done: copy them over, ignoring the undesireable ones */
+     for (i = 0; i < nr; i++) {
+            if ((src[i].type == E820_RAM) ||
+	        (src[i].type == E820_UNUSABLE) ||
+	         (src[i].type == 0))
+		continue;
+            e820[idx].type = src[i].type;
+            e820[idx].addr = src[i].addr;
+            e820[idx].size = src[i].size;
+            idx++;
+     }
+
+     /* At this point we have the mapped RAM + E820 entries from src. */
+     if (balloon_kb) {
+        /* and if we truncated the RAM region, then add it to the end. */
+        e820[idx].type = E820_RAM;
+        e820[idx].addr = (uint64_t)(1ULL << 32) > last ? (uint64_t)(1ULL << 32) : last;
+        /* also add the balloon memory to the end. */
+        e820[idx].size = (uint64_t)(delta_kb << 10) + (uint64_t)(balloon_kb << 10);
+        idx++;
+
+    }
+    nr = idx;
+
+    for (i = 0; i < nr; i++) {
+      LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, ":%s\t[%lx -> %lx]",
+	e820[i].type == E820_RAM ? "RAM " :
+	(e820[i].type == E820_RESERVED ? "RSV " :
+	 e820[i].type == E820_ACPI ? "ACPI" :
+         (e820[i].type == E820_NVS ? "NVS " :
+          (e820[i].type == E820_UNUSABLE ? "UNU " : "----"))),
+          e820[i].addr >> 12,
+         (e820[i].addr + e820[i].size) >> 12);
+    }
+
+    /* Done: copy the sanitized version. */
+    if (nr > p->nr_entries) {
+      libxl_e820_destroy(p);
+      p->entry = calloc(nr, sizeof(struct e820entry));
+      if (!p->entry)
+          return ERROR_NOMEM;
+    }
+    p->nr_entries = nr;
+    memcpy(p->entry, e820, nr * sizeof(struct e820entry));
+    return 0;
+}
+      
+
+int libxl_e820_alloc(libxl_ctx *ctx, libxl_e820 *p)
+{
+    int nr;
+    struct e820entry map[E820MAX];
+
+    nr = xc_get_machine_memory_map(ctx->xch, map, E820MAX);
+    if (nr < 0) {
+        errno = nr;
+        return ERROR_FAIL;
+    }
+    p->nr_entries = nr;
+    p->entry = calloc(nr, sizeof(struct e820entry));
+    if (!p->entry)
+        return ERROR_NOMEM;
+
+    memcpy(p->entry, map, nr * sizeof(struct e820entry));
+    return 0;
+}
+
+void libxl_e820_destroy(libxl_e820 *p)
+{
+    if (p->entry)
+       free(p->entry);
+    p->entry = NULL;
+    p->nr_entries = 0;
+}
diff -r 546d8a03d5cb -r 2e464234c94c tools/libxl/libxl_utils.h
--- a/tools/libxl/libxl_utils.h	Thu Apr 07 15:18:46 2011 -0400
+++ b/tools/libxl/libxl_utils.h	Thu Apr 07 15:59:23 2011 -0400
@@ -89,5 +89,8 @@
     return (s + 1023) / 1024;
 }
 
+int libxl_e820_alloc(libxl_ctx *ctx, libxl_e820 *p);
+int libxl_e820_sanitize(libxl_ctx *ctx, unsigned long map_limitkb,
+                        unsigned long balloon_kb, libxl_e820 *p);
 #endif
 
diff -r 546d8a03d5cb -r 2e464234c94c tools/libxl/xl_cmdimpl.c
--- a/tools/libxl/xl_cmdimpl.c	Thu Apr 07 15:18:46 2011 -0400
+++ b/tools/libxl/xl_cmdimpl.c	Thu Apr 07 15:59:23 2011 -0400
@@ -1009,6 +1009,13 @@
     if (!xlu_cfg_get_long (config, "pci_power_mgmt", &l))
         pci_power_mgmt = l;
 
+    if (!xlu_cfg_get_long (config, "pci_hole", &l)) {
+	if (l == 1) {
+           int rc = libxl_e820_alloc(&ctx, &b_info->u.pv.e820);
+           if (rc < 0)
+                fprintf(stderr, "failed while collecting E820 with: %d\n", rc);
+        }
+    }
     if (!xlu_cfg_get_list (config, "pci", &pcis, 0, 0)) {
         int i;
         d_config->num_pcidevs = 0;

  parent reply	other threads:[~2011-04-07 20:25 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-04-07 20:25 [PATCH 0 of 5] Patches for PCI passthrough with modified E820 Konrad Rzeszutek Wilk
2011-04-07 20:25 ` [PATCH 1 of 5] tools: Add xc_domain_set_memory_map and xc_get_machine_memory_map calls Konrad Rzeszutek Wilk
2011-04-08  8:18   ` Ian Campbell
2011-04-08 13:19     ` Konrad Rzeszutek Wilk
2011-04-07 20:25 ` [PATCH 2 of 5] x86: make the pv-only e820 array be dynamic Konrad Rzeszutek Wilk
2011-04-08  8:22   ` Ian Campbell
2011-04-08 13:21     ` Konrad Rzeszutek Wilk
2011-04-07 20:25 ` [PATCH 3 of 5] x86: adjust the size of the e820 for pv guest to " Konrad Rzeszutek Wilk
2011-04-07 20:25 ` Konrad Rzeszutek Wilk [this message]
2011-04-08  8:36   ` [PATCH 4 of 5] libxl: Add support for passing in the machine's E820 for PCI passthrough Ian Campbell
2011-04-08 10:56     ` Ian Jackson
2011-04-08 13:35       ` Konrad Rzeszutek Wilk
2011-04-08 13:55         ` Ian Campbell
2011-04-08 14:09           ` Tim Deegan
2011-04-08 14:17             ` Ian Campbell
2011-04-08 14:25               ` Tim Deegan
2011-04-08 14:33                 ` Ian Campbell
2011-04-08 15:00                   ` Konrad Rzeszutek Wilk
2011-04-08 14:34                 ` Konrad Rzeszutek Wilk
2011-04-08 14:42                   ` Ian Campbell
2011-04-08 14:54                     ` Konrad Rzeszutek Wilk
2011-04-08 16:01                 ` Ian Jackson
2011-04-08 13:33     ` Konrad Rzeszutek Wilk
2011-04-08 14:00       ` Ian Campbell
2011-04-07 20:25 ` [PATCH 5 of 5] libxl: Convert E820_UNUSABLE and E820_RAM to E820_UNUSABLE as appropriate Konrad Rzeszutek Wilk
2011-04-08  8:42 ` [PATCH 0 of 5] Patches for PCI passthrough with modified E820 Ian Campbell
2011-04-08 13:24   ` Konrad Rzeszutek Wilk

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2e464234c94cfd29a98a.1302207925@localhost6.localdomain6 \
    --to=konrad.wilk@oracle.com \
    --cc=keir.fraser@eu.citrix.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).