xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Elena Ufimtseva <ufimtseva@gmail.com>
To: xen-devel@lists.xen.org
Cc: keir@xen.org, Ian.Campbell@citrix.com,
	stefano.stabellini@eu.citrix.com, george.dunlap@eu.citrix.com,
	msw@linux.com, dario.faggioli@citrix.com, lccycc123@gmail.com,
	ian.jackson@eu.citrix.com, JBeulich@suse.com,
	Elena Ufimtseva <ufimtseva@gmail.com>
Subject: [PATCH v5 6/8] libxl: build e820 map for vnodes
Date: Tue,  3 Jun 2014 00:53:18 -0400	[thread overview]
Message-ID: <1401771200-11448-8-git-send-email-ufimtseva@gmail.com> (raw)
In-Reply-To: <1401771200-11448-1-git-send-email-ufimtseva@gmail.com>

build e820 map from vnuma memory ranges.

Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
---
 tools/libxl/libxl_internal.h |   10 ++++
 tools/libxl/libxl_numa.c     |  125 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 135 insertions(+)

diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 082749e..7ae8508 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -3113,6 +3113,16 @@ void libxl__numa_candidate_put_nodemap(libxl__gc *gc,
  */
 #define CTYPE(isfoo,c) (isfoo((unsigned char)(c)))
 
+int e820_sanitize(libxl_ctx *ctx, struct e820entry src[],
+                         uint32_t *nr_entries,
+                         unsigned long map_limitkb,
+                         unsigned long balloon_kb);
+
+int libxl__vnuma_align_mem(libxl__gc *gc,
+                            uint32_t domid,
+                            struct libxl_domain_build_info *b_info,
+                            vmemrange_t *memblks);
+
 
 #endif
 
diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c
index 94ca4fe..38f1546 100644
--- a/tools/libxl/libxl_numa.c
+++ b/tools/libxl/libxl_numa.c
@@ -19,6 +19,8 @@
 
 #include "libxl_internal.h"
 
+#include "libxl_vnuma.h"
+
 /*
  * What follows are helpers for generating all the k-combinations
  * without repetitions of a set S with n elements in it. Formally
@@ -508,6 +510,129 @@ int libxl__get_numa_candidate(libxl__gc *gc,
 }
 
 /*
+/*
+ * Used for PV guest with e802_host enabled and thus
+ * having non-contiguous e820 memory map.
+ */
+static unsigned long e820_memory_hole_size(unsigned long start,
+                                            unsigned long end,
+                                            struct e820entry e820[],
+                                            unsigned int nr)
+{
+    unsigned int i;
+    unsigned long absent, start_pfn, end_pfn;
+
+    absent = end - start;
+    for (i = 0; i < nr; i++) {
+        /* if not E820_RAM region, skip it and dont substract from absent */
+        if (e820[i].type == E820_RAM) {
+            start_pfn = e820[i].addr;
+            end_pfn =   e820[i].addr + e820[i].size;
+            /* beginning pfn is in this region? */
+            if (start >= start_pfn && start <= end_pfn) {
+                if (end > end_pfn)
+                    absent -= end_pfn - start;
+                else
+                    /* fit the region? then no absent pages */
+                    absent -= end - start;
+                continue;
+            }
+            /* found the end of range in this region? */
+            if (end <= end_pfn && end >= start_pfn) {
+                absent -= end - start_pfn;
+                /* no need to look for more ranges */
+                break;
+            }
+        }
+    }
+    return absent;
+}
+
+/*
+ * Checks for the beginnig and end of RAM in e820 map for domain
+ * and aligns start of first and end of last vNUMA memory block to
+ * that map. vnode memory size are passed here Megabytes.
+ * For PV guest e820 map has fixed hole sizes.
+ */
+int libxl__vnuma_align_mem(libxl__gc *gc,
+                            uint32_t domid,
+                            libxl_domain_build_info *b_info, /* IN: mem sizes */
+                            vmemrange_t *memblks)        /* OUT: linux numa blocks in pfn */
+{
+    unsigned int i, j;
+    int rc;
+    uint64_t next_start_pfn, end_max = 0, size;//, mem_hole;
+    uint32_t nr;
+    struct e820entry map[E820MAX];
+
+    if (b_info->nr_nodes == 0)
+        return -EINVAL;
+    libxl_ctx *ctx = libxl__gc_owner(gc);
+
+    /* retreive e820 map for this host */
+    rc = xc_get_machine_memory_map(ctx->xch, map, E820MAX);
+
+    if (rc < 0) {
+        errno = rc;
+        return -EINVAL;
+    }
+    nr = rc;
+    rc = e820_sanitize(ctx, map, &nr, b_info->target_memkb,
+                       (b_info->max_memkb - b_info->target_memkb) +
+                       b_info->u.pv.slack_memkb);
+    if (rc)
+    {
+        errno = rc;
+        return -EINVAL;
+    }
+
+    /* max pfn for this host */
+    for (j = nr - 1; j >= 0; j--)
+        if (map[j].type == E820_RAM) {
+            end_max = map[j].addr + map[j].size;
+            break;
+        }
+
+    memset(memblks, 0, sizeof(*memblks) * b_info->nr_nodes);
+    next_start_pfn = 0;
+
+    memblks[0].start = map[0].addr;
+
+    for (i = 0; i < b_info->nr_nodes; i++) {
+        /* start can be not zero */
+        memblks[i].start += next_start_pfn;
+        memblks[i].end = memblks[i].start + (b_info->numa_memszs[i] << 20);
+
+        size = memblks[i].end - memblks[i].start;
+        /*
+         * For pv host with e820_host option turned on we need
+         * to take into account memory holes. For pv host with
+         * e820_host disabled or unset, the map is contiguous
+         * RAM region.
+         */
+        if (libxl_defbool_val(b_info->u.pv.e820_host)) {
+            while((memblks[i].end - memblks[i].start -
+                   e820_memory_hole_size(memblks[i].start,
+                   memblks[i].end, map, nr)) < size )
+            {
+                memblks[i].end += MIN_VNODE_SIZE << 10;
+                if (memblks[i].end > end_max) {
+                    memblks[i].end = end_max;
+                    break;
+                }
+            }
+        }
+        next_start_pfn = memblks[i].end;
+        LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,"i %d, start  = %#010lx, end = %#010lx\n",
+                    i, memblks[i].start, memblks[i].end);
+    }
+    if (memblks[i-1].end > end_max)
+        memblks[i-1].end = end_max;
+
+    return 0;
+}
+
+/*
  * Local variables:
  * mode: C
  * c-basic-offset: 4
-- 
1.7.10.4

  parent reply	other threads:[~2014-06-03  4:53 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-06-03  4:53 [PATCH v5 0/8] vnuma introduction Elena Ufimtseva
2014-06-03  4:53 ` [PATCH v5 8/8] add vnuma info for debug-key Elena Ufimtseva
2014-06-03  9:04   ` Jan Beulich
2014-06-04  4:13     ` Elena Ufimtseva
2014-06-03  4:53 ` [PATCH v5 1/8] xen: vnuma topoplogy and subop hypercalls Elena Ufimtseva
2014-06-03  8:55   ` Jan Beulich
2014-06-03  4:53 ` [PATCH v5 2/8] libxc: Plumb Xen with vnuma topology Elena Ufimtseva
2014-06-03  4:53 ` [PATCH v5 3/8] vnuma xl.cfg.pod and idl config options Elena Ufimtseva
2014-06-03  4:53 ` [PATCH v5 4/8] vnuma topology parsing routines Elena Ufimtseva
2014-06-03  4:53 ` [PATCH v5 5/8] libxc: allocate domain vnuma nodes Elena Ufimtseva
2014-06-03  4:53 ` Elena Ufimtseva [this message]
2014-06-03  4:53 ` [PATCH v5 7/8] libxl: place vnuma domain nodes on numa nodes Elena Ufimtseva
2014-06-03  4:53 ` [PATCH v5 8/8] add vnuma info out on debug-key Elena Ufimtseva
2014-06-03 11:37 ` [PATCH v5 0/8] vnuma introduction Wei Liu
2014-06-04  4:05   ` Elena Ufimtseva

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1401771200-11448-8-git-send-email-ufimtseva@gmail.com \
    --to=ufimtseva@gmail.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=JBeulich@suse.com \
    --cc=dario.faggioli@citrix.com \
    --cc=george.dunlap@eu.citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=keir@xen.org \
    --cc=lccycc123@gmail.com \
    --cc=msw@linux.com \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).