From: Elena Ufimtseva <ufimtseva@gmail.com>
To: xen-devel@lists.xen.org
Cc: keir@xen.org, Ian.Campbell@citrix.com,
stefano.stabellini@eu.citrix.com, george.dunlap@eu.citrix.com,
msw@linux.com, dario.faggioli@citrix.com, lccycc123@gmail.com,
ian.jackson@eu.citrix.com, JBeulich@suse.com,
Elena Ufimtseva <ufimtseva@gmail.com>
Subject: [PATCH v6 08/10] libxl: build numa nodes memory blocks
Date: Fri, 18 Jul 2014 01:50:07 -0400 [thread overview]
Message-ID: <1405662609-31486-9-git-send-email-ufimtseva@gmail.com> (raw)
In-Reply-To: <1405662609-31486-1-git-send-email-ufimtseva@gmail.com>
Create the vmemrange structure based on the
PV guests E820 map. Values are in in Megabytes.
Also export the E820 filter code e820_sanitize
out to be available internally.
Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
---
tools/libxl/libxl_internal.h | 12 +++
tools/libxl/libxl_numa.c | 193 ++++++++++++++++++++++++++++++++++++++++++
tools/libxl/libxl_x86.c | 3 +-
3 files changed, 207 insertions(+), 1 deletion(-)
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index e8f2abb..80f81cd 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -3086,6 +3086,18 @@ void libxl__numa_candidate_put_nodemap(libxl__gc *gc,
libxl_bitmap_copy(CTX, &cndt->nodemap, nodemap);
}
+bool libxl__vnodemap_is_usable(libxl__gc *gc, libxl_domain_build_info *info);
+
+int e820_sanitize(libxl_ctx *ctx, struct e820entry src[],
+ uint32_t *nr_entries,
+ unsigned long map_limitkb,
+ unsigned long balloon_kb);
+
+int libxl__vnuma_align_mem(libxl__gc *gc,
+ uint32_t domid,
+ struct libxl_domain_build_info *b_info,
+ vmemrange_t *memblks);
+
_hidden int libxl__ms_vm_genid_set(libxl__gc *gc, uint32_t domid,
const libxl_ms_vm_genid *id);
diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c
index 94ca4fe..755dc19 100644
--- a/tools/libxl/libxl_numa.c
+++ b/tools/libxl/libxl_numa.c
@@ -19,6 +19,8 @@
#include "libxl_internal.h"
+#include "libxl_vnuma.h"
+
/*
* What follows are helpers for generating all the k-combinations
* without repetitions of a set S with n elements in it. Formally
@@ -508,6 +510,197 @@ int libxl__get_numa_candidate(libxl__gc *gc,
}
/*
+ * Check if we can fit vnuma nodes to numa pnodes
+ * from vnode_to_pnode array.
+ */
+bool libxl__vnodemap_is_usable(libxl__gc *gc,
+ libxl_domain_build_info *info)
+{
+ unsigned int i;
+ libxl_numainfo *ninfo = NULL;
+ unsigned long long *claim;
+ unsigned int node;
+ uint64_t *sz_array;
+ int nr_nodes = 0;
+
+ /* Cannot use specified mapping if not NUMA machine. */
+ ninfo = libxl_get_numainfo(CTX, &nr_nodes);
+ if (ninfo == NULL)
+ return false;
+
+ sz_array = info->vnuma_mem;
+ claim = libxl__calloc(gc, info->nr_nodes, sizeof(*claim));
+ /* Get total memory required on each physical node. */
+ for (i = 0; i < info->nr_nodes; i++)
+ {
+ node = info->vnuma_vnodemap[i];
+
+ if (node < nr_nodes)
+ claim[node] += (sz_array[i] << 20);
+ else
+ goto vnodemapout;
+ }
+ for (i = 0; i < nr_nodes; i++) {
+ if (claim[i] > ninfo[i].free)
+ /* Cannot complete user request, falling to default. */
+ goto vnodemapout;
+ }
+
+ vnodemapout:
+ return true;
+}
+
+/*
+ * Returns number of absent pages within e820 map
+ * between start and end addresses passed. Needed
+ * to correctly set numa memory ranges for domain.
+ */
+static unsigned long e820_memory_hole_size(unsigned long start,
+ unsigned long end,
+ struct e820entry e820[],
+ unsigned int nr)
+{
+ unsigned int i;
+ unsigned long absent, start_blk, end_blk;
+
+ /* init absent number of pages with all memmap size. */
+ absent = end - start;
+ for (i = 0; i < nr; i++) {
+ /* if not E820_RAM region, skip it. */
+ if (e820[i].type == E820_RAM) {
+ start_blk = e820[i].addr;
+ end_blk = e820[i].addr + e820[i].size;
+ /* beginning address is within this region? */
+ if (start >= start_blk && start <= end_blk) {
+ if (end > end_blk)
+ absent -= end_blk - start;
+ else
+ /* fit the region? then no absent pages. */
+ absent -= end - start;
+ continue;
+ }
+ /* found the end of range in this region? */
+ if (end <= end_blk && end >= start_blk) {
+ absent -= end - start_blk;
+ /* no need to look for more ranges. */
+ break;
+ }
+ }
+ }
+ return absent;
+}
+
+/*
+ * For each node, build memory block start and end addresses.
+ * Substract any memory hole from the range found in e820 map.
+ * vnode memory size are passed here in megabytes, the result is
+ * in memory block addresses.
+ * Linux kernel will adjust numa memory block sizes on its own.
+ * But we want to provide to the kernel numa block addresses that
+ * will be the same in kernel and hypervisor.
+ */
+#define max(a,b) ((a > b) ? a : b)
+int libxl__vnuma_align_mem(libxl__gc *gc,
+ uint32_t domid,
+ /* IN: mem sizes in megabytes */
+ libxl_domain_build_info *b_info,
+ /* OUT: linux NUMA blocks addresses */
+ vmemrange_t *memblks)
+{
+ unsigned int i;
+ int j, rc;
+ uint64_t next_start_blk, end_max = 0, size;
+ uint32_t nr;
+ struct e820entry map[E820MAX];
+
+ errno = ERROR_INVAL;
+ if (b_info->nr_nodes == 0)
+ return -EINVAL;
+
+ if (!memblks || !b_info->vnuma_mem)
+ return -EINVAL;
+
+ libxl_ctx *ctx = libxl__gc_owner(gc);
+
+ /* Retrieve e820 map for this host. */
+ rc = xc_get_machine_memory_map(ctx->xch, map, E820MAX);
+
+ if (rc < 0) {
+ errno = rc;
+ return -EINVAL;
+ }
+ nr = rc;
+ rc = e820_sanitize(ctx, map, &nr, b_info->target_memkb,
+ (b_info->max_memkb - b_info->target_memkb) +
+ b_info->u.pv.slack_memkb);
+ if (rc)
+ {
+ errno = rc;
+ return -EINVAL;
+ }
+
+ /* find max memory address for this host. */
+ for (j = 0; j < nr; j++)
+ if (map[j].type == E820_RAM) {
+ end_max = max(end_max, map[j].addr + map[j].size);
+ }
+
+ memset(memblks, 0, sizeof(*memblks) * b_info->nr_nodes);
+ next_start_blk = 0;
+
+ memblks[0].start = map[0].addr;
+
+ for (i = 0; i < b_info->nr_nodes; i++) {
+
+ memblks[i].start += next_start_blk;
+ memblks[i].end = memblks[i].start + (b_info->vnuma_mem[i] << 20);
+
+ if (memblks[i].end > end_max) {
+ LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
+ "Shrunk vNUMA memory block %d address to max e820 address: \
+ %#010lx -> %#010lx\n", i, memblks[i].end, end_max);
+ memblks[i].end = end_max;
+ break;
+ }
+
+ size = memblks[i].end - memblks[i].start;
+ /*
+ * For pv host with e820_host option turned on we need
+ * to take into account memory holes. For pv host with
+ * e820_host disabled or unset, the map is a contiguous
+ * RAM region.
+ */
+ if (libxl_defbool_val(b_info->u.pv.e820_host)) {
+ while((memblks[i].end - memblks[i].start -
+ e820_memory_hole_size(memblks[i].start,
+ memblks[i].end, map, nr)) < size )
+ {
+ memblks[i].end += MIN_VNODE_SIZE << 10;
+ if (memblks[i].end > end_max) {
+ memblks[i].end = end_max;
+ LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
+ "Shrunk vNUMA memory block %d address to max e820 \
+ address: %#010lx -> %#010lx\n", i, memblks[i].end,
+ end_max);
+ break;
+ }
+ }
+ }
+ next_start_blk = memblks[i].end;
+ LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,"i %d, start = %#010lx, \
+ end = %#010lx\n", i, memblks[i].start, memblks[i].end);
+ }
+
+ /* Did not form memory addresses for every node? */
+ if (i != b_info->nr_nodes) {
+ LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Not all nodes were populated with \
+ block addresses, only %d out of %d", i, b_info->nr_nodes);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
* Local variables:
* mode: C
* c-basic-offset: 4
diff --git a/tools/libxl/libxl_x86.c b/tools/libxl/libxl_x86.c
index 7589060..46e84e4 100644
--- a/tools/libxl/libxl_x86.c
+++ b/tools/libxl/libxl_x86.c
@@ -1,5 +1,6 @@
#include "libxl_internal.h"
#include "libxl_arch.h"
+#include "libxl_vnuma.h"
static const char *e820_names(int type)
{
@@ -14,7 +15,7 @@ static const char *e820_names(int type)
return "Unknown";
}
-static int e820_sanitize(libxl_ctx *ctx, struct e820entry src[],
+int e820_sanitize(libxl_ctx *ctx, struct e820entry src[],
uint32_t *nr_entries,
unsigned long map_limitkb,
unsigned long balloon_kb)
--
1.7.10.4
next prev parent reply other threads:[~2014-07-18 5:50 UTC|newest]
Thread overview: 63+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-07-18 5:49 [PATCH v6 00/10] vnuma introduction Elena Ufimtseva
2014-07-18 5:50 ` [PATCH v6 01/10] xen: vnuma topology and subop hypercalls Elena Ufimtseva
2014-07-18 10:30 ` Wei Liu
2014-07-20 13:16 ` Elena Ufimtseva
2014-07-20 15:59 ` Wei Liu
2014-07-22 15:18 ` Dario Faggioli
2014-07-23 5:33 ` Elena Ufimtseva
2014-07-18 13:49 ` Konrad Rzeszutek Wilk
2014-07-20 13:26 ` Elena Ufimtseva
2014-07-22 15:14 ` Dario Faggioli
2014-07-23 5:22 ` Elena Ufimtseva
2014-07-23 14:06 ` Jan Beulich
2014-07-25 4:52 ` Elena Ufimtseva
2014-07-25 7:33 ` Jan Beulich
2014-07-18 5:50 ` [PATCH v6 02/10] xsm bits for vNUMA hypercalls Elena Ufimtseva
2014-07-18 13:50 ` Konrad Rzeszutek Wilk
2014-07-18 15:26 ` Daniel De Graaf
2014-07-20 13:48 ` Elena Ufimtseva
2014-07-18 5:50 ` [PATCH v6 03/10] vnuma hook to debug-keys u Elena Ufimtseva
2014-07-23 14:10 ` Jan Beulich
2014-07-18 5:50 ` [PATCH v6 04/10] libxc: Introduce xc_domain_setvnuma to set vNUMA Elena Ufimtseva
2014-07-18 10:33 ` Wei Liu
2014-07-29 10:33 ` Ian Campbell
2014-07-18 5:50 ` [PATCH v6 05/10] libxl: vnuma topology configuration parser and doc Elena Ufimtseva
2014-07-18 10:53 ` Wei Liu
2014-07-20 14:04 ` Elena Ufimtseva
2014-07-29 10:38 ` Ian Campbell
2014-07-29 10:42 ` Ian Campbell
2014-08-06 4:46 ` Elena Ufimtseva
2014-07-18 5:50 ` [PATCH v6 06/10] libxc: move code to arch_boot_alloc func Elena Ufimtseva
2014-07-29 10:38 ` Ian Campbell
2014-07-18 5:50 ` [PATCH v6 07/10] libxc: allocate domain memory for vnuma enabled Elena Ufimtseva
2014-07-29 10:43 ` Ian Campbell
2014-08-06 4:48 ` Elena Ufimtseva
2014-07-18 5:50 ` Elena Ufimtseva [this message]
2014-07-18 11:01 ` [PATCH v6 08/10] libxl: build numa nodes memory blocks Wei Liu
2014-07-20 12:58 ` Elena Ufimtseva
2014-07-20 15:59 ` Wei Liu
2014-07-18 5:50 ` [PATCH v6 09/10] libxl: vnuma nodes placement bits Elena Ufimtseva
2014-07-18 5:50 ` [PATCH v6 10/10] libxl: set vnuma for domain Elena Ufimtseva
2014-07-18 10:58 ` Wei Liu
2014-07-29 10:45 ` Ian Campbell
2014-08-12 3:52 ` Elena Ufimtseva
2014-08-12 9:42 ` Wei Liu
2014-08-12 17:10 ` Dario Faggioli
2014-08-12 17:13 ` Wei Liu
2014-08-12 17:24 ` Elena Ufimtseva
2014-07-18 6:16 ` [PATCH v6 00/10] vnuma introduction Elena Ufimtseva
2014-07-18 9:53 ` Wei Liu
2014-07-18 10:13 ` Dario Faggioli
2014-07-18 11:48 ` Wei Liu
2014-07-20 14:57 ` Elena Ufimtseva
2014-07-22 15:49 ` Dario Faggioli
2014-07-22 14:03 ` Dario Faggioli
2014-07-22 14:48 ` Wei Liu
2014-07-22 15:06 ` Dario Faggioli
2014-07-22 16:47 ` Wei Liu
2014-07-22 19:43 ` Is: cpuid creation of PV guests is not correct. Was:Re: " Konrad Rzeszutek Wilk
2014-07-22 22:34 ` Is: cpuid creation of PV guests is not correct Andrew Cooper
2014-07-22 22:53 ` Is: cpuid creation of PV guests is not correct. Was:Re: [PATCH v6 00/10] vnuma introduction Dario Faggioli
2014-07-23 6:00 ` Elena Ufimtseva
2014-07-22 12:49 ` Dario Faggioli
2014-07-23 5:59 ` Elena Ufimtseva
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1405662609-31486-9-git-send-email-ufimtseva@gmail.com \
--to=ufimtseva@gmail.com \
--cc=Ian.Campbell@citrix.com \
--cc=JBeulich@suse.com \
--cc=dario.faggioli@citrix.com \
--cc=george.dunlap@eu.citrix.com \
--cc=ian.jackson@eu.citrix.com \
--cc=keir@xen.org \
--cc=lccycc123@gmail.com \
--cc=msw@linux.com \
--cc=stefano.stabellini@eu.citrix.com \
--cc=xen-devel@lists.xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).