xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Elena Ufimtseva <ufimtseva@gmail.com>
To: xen-devel@lists.xen.org
Cc: keir@xen.org, Ian.Campbell@citrix.com,
	stefano.stabellini@eu.citrix.com, george.dunlap@eu.citrix.com,
	msw@linux.com, dario.faggioli@citrix.com, lccycc123@gmail.com,
	ian.jackson@eu.citrix.com, JBeulich@suse.com,
	Elena Ufimtseva <ufimtseva@gmail.com>
Subject: [PATCH v6 01/10] xen: vnuma topology and subop hypercalls
Date: Fri, 18 Jul 2014 01:50:00 -0400	[thread overview]
Message-ID: <1405662609-31486-2-git-send-email-ufimtseva@gmail.com> (raw)
In-Reply-To: <1405662609-31486-1-git-send-email-ufimtseva@gmail.com>

Define interface, structures and hypercalls for toolstack to
build vnuma topology and for guests that wish to retrieve it.
Two subop hypercalls introduced by patch:
XEN_DOMCTL_setvnumainfo to define vNUMA domain topology per domain
and XENMEM_get_vnumainfo to retrieve that topology by guest.

Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
---
 xen/common/domain.c         |   13 ++++
 xen/common/domctl.c         |  167 +++++++++++++++++++++++++++++++++++++++++++
 xen/common/memory.c         |   62 ++++++++++++++++
 xen/include/public/domctl.h |   29 ++++++++
 xen/include/public/memory.h |   47 +++++++++++-
 xen/include/xen/domain.h    |   11 +++
 xen/include/xen/sched.h     |    1 +
 7 files changed, 329 insertions(+), 1 deletion(-)

diff --git a/xen/common/domain.c b/xen/common/domain.c
index cd64aea..895584a 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -584,6 +584,18 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d)
     return 0;
 }
 
+void vnuma_destroy(struct vnuma_info *vnuma)
+{
+    if ( vnuma )
+    {
+        xfree(vnuma->vmemrange);
+        xfree(vnuma->vcpu_to_vnode);
+        xfree(vnuma->vdistance);
+        xfree(vnuma->vnode_to_pnode);
+        xfree(vnuma);
+    }
+}
+
 int domain_kill(struct domain *d)
 {
     int rc = 0;
@@ -602,6 +614,7 @@ int domain_kill(struct domain *d)
         evtchn_destroy(d);
         gnttab_release_mappings(d);
         tmem_destroy(d->tmem_client);
+        vnuma_destroy(d->vnuma);
         domain_set_outstanding_pages(d, 0);
         d->tmem_client = NULL;
         /* fallthrough */
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index c326aba..7464284 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -297,6 +297,144 @@ int vcpuaffinity_params_invalid(const xen_domctl_vcpuaffinity_t *vcpuaff)
             guest_handle_is_null(vcpuaff->cpumap_soft.bitmap));
 }
 
+/*
+ * Allocates memory for vNUMA, **vnuma should be NULL.
+ * Caller has to make sure that domain has max_pages
+ * and number of vcpus set for domain.
+ * Verifies that single allocation does not exceed
+ * PAGE_SIZE.
+ */
+static int vnuma_alloc(struct vnuma_info **vnuma,
+                       unsigned int nr_vnodes,
+                       unsigned int nr_vcpus,
+                       unsigned int dist_size)
+{
+    struct vnuma_info *v;
+
+    if ( vnuma && *vnuma )
+        return -EINVAL;
+
+    v = *vnuma;
+    /*
+     * check if any of xmallocs exeeds PAGE_SIZE.
+     * If yes, consider it as an error for now.
+     */
+    if ( nr_vnodes > PAGE_SIZE / sizeof(nr_vnodes)       ||
+        nr_vcpus > PAGE_SIZE / sizeof(nr_vcpus)          ||
+        nr_vnodes > PAGE_SIZE / sizeof(struct vmemrange) ||
+        dist_size > PAGE_SIZE / sizeof(dist_size) )
+        return -EINVAL;
+
+    v = xzalloc(struct vnuma_info);
+    if ( !v )
+        return -ENOMEM;
+
+    v->vdistance = xmalloc_array(unsigned int, dist_size);
+    v->vmemrange = xmalloc_array(vmemrange_t, nr_vnodes);
+    v->vcpu_to_vnode = xmalloc_array(unsigned int, nr_vcpus);
+    v->vnode_to_pnode = xmalloc_array(unsigned int, nr_vnodes);
+
+    if ( v->vdistance == NULL || v->vmemrange == NULL ||
+        v->vcpu_to_vnode == NULL || v->vnode_to_pnode == NULL )
+    {
+        vnuma_destroy(v);
+        return -ENOMEM;
+    }
+
+    *vnuma = v;
+
+    return 0;
+}
+
+/*
+ * Allocate memory and construct one vNUMA node,
+ * set default parameters, assign all memory and
+ * vcpus to this node, set distance to 10.
+ */
+static long vnuma_fallback(const struct domain *d,
+                          struct vnuma_info **vnuma)
+{
+    struct vnuma_info *v;
+    long ret;
+
+
+    /* Will not destroy vNUMA here, destroy before calling this. */
+    if ( vnuma && *vnuma )
+        return -EINVAL;
+
+    v = *vnuma;
+    ret = vnuma_alloc(&v, 1, d->max_vcpus, 1);
+    if ( ret )
+        return ret;
+
+    v->vmemrange[0].start = 0;
+    v->vmemrange[0].end = d->max_pages << PAGE_SHIFT;
+    v->vdistance[0] = 10;
+    v->vnode_to_pnode[0] = NUMA_NO_NODE;
+    memset(v->vcpu_to_vnode, 0, d->max_vcpus);
+    v->nr_vnodes = 1;
+
+    *vnuma = v;
+
+    return 0;
+}
+
+/*
+ * construct vNUMA topology form u_vnuma struct and return
+ * it in dst.
+ */
+long vnuma_init(const struct xen_domctl_vnuma *u_vnuma,
+                const struct domain *d,
+                struct vnuma_info **dst)
+{
+    unsigned int dist_size, nr_vnodes = 0;
+    long ret;
+    struct vnuma_info *v = NULL;
+
+    ret = -EINVAL;
+
+    /* If vNUMA topology already set, just exit. */
+    if ( !u_vnuma || *dst )
+        return ret;
+
+    nr_vnodes = u_vnuma->nr_vnodes;
+
+    if ( nr_vnodes == 0 )
+        return ret;
+
+    if ( nr_vnodes > (UINT_MAX / nr_vnodes) )
+        return ret;
+
+    dist_size = nr_vnodes * nr_vnodes;
+
+    ret = vnuma_alloc(&v, nr_vnodes, d->max_vcpus, dist_size);
+    if ( ret )
+        return ret;
+
+    /* On failure, set only one vNUMA node and its success. */
+    ret = 0;
+
+    if ( copy_from_guest(v->vdistance, u_vnuma->vdistance, dist_size) )
+        goto vnuma_onenode;
+    if ( copy_from_guest(v->vmemrange, u_vnuma->vmemrange, nr_vnodes) )
+        goto vnuma_onenode;
+    if ( copy_from_guest(v->vcpu_to_vnode, u_vnuma->vcpu_to_vnode,
+        d->max_vcpus) )
+        goto vnuma_onenode;
+    if ( copy_from_guest(v->vnode_to_pnode, u_vnuma->vnode_to_pnode,
+        nr_vnodes) )
+        goto vnuma_onenode;
+
+    v->nr_vnodes = nr_vnodes;
+    *dst = v;
+
+    return ret;
+
+vnuma_onenode:
+    vnuma_destroy(v);
+    return vnuma_fallback(d, dst);
+}
+
 long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
 {
     long ret = 0;
@@ -967,6 +1105,35 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
     }
     break;
 
+    case XEN_DOMCTL_setvnumainfo:
+    {
+        struct vnuma_info *v = NULL;
+
+        ret = -EFAULT;
+        if ( guest_handle_is_null(op->u.vnuma.vdistance)     ||
+            guest_handle_is_null(op->u.vnuma.vmemrange)      ||
+            guest_handle_is_null(op->u.vnuma.vcpu_to_vnode)  ||
+            guest_handle_is_null(op->u.vnuma.vnode_to_pnode) )
+            return ret;
+
+        ret = -EINVAL;
+
+        ret = vnuma_init(&op->u.vnuma, d, &v);
+        if ( ret < 0 || v == NULL )
+            break;
+
+        /* overwrite vnuma for domain */
+        if ( !d->vnuma )
+            vnuma_destroy(d->vnuma);
+
+        domain_lock(d);
+        d->vnuma = v;
+        domain_unlock(d);
+
+        ret = 0;
+    }
+    break;
+
     default:
         ret = arch_do_domctl(op, d, u_domctl);
         break;
diff --git a/xen/common/memory.c b/xen/common/memory.c
index c2dd31b..925b9fc 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -969,6 +969,68 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
 
         break;
 
+    case XENMEM_get_vnumainfo:
+    {
+        struct vnuma_topology_info topology;
+        struct domain *d;
+        unsigned int dom_vnodes = 0;
+
+        /*
+         * guest passes nr_vnodes and nr_vcpus thus
+         * we know how much memory guest has allocated.
+         */
+        if ( copy_from_guest(&topology, arg, 1) ||
+            guest_handle_is_null(topology.vmemrange.h) ||
+            guest_handle_is_null(topology.vdistance.h) ||
+            guest_handle_is_null(topology.vcpu_to_vnode.h) )
+            return -EFAULT;
+
+        if ( (d = rcu_lock_domain_by_any_id(topology.domid)) == NULL )
+            return -ESRCH;
+
+        rc = -EOPNOTSUPP;
+        if ( d->vnuma == NULL )
+            goto vnumainfo_out;
+
+        if ( d->vnuma->nr_vnodes == 0 )
+            goto vnumainfo_out;
+
+        dom_vnodes = d->vnuma->nr_vnodes;
+
+        /*
+         * guest nr_cpus and nr_nodes may differ from domain vnuma config.
+         * Check here guest nr_nodes and nr_cpus to make sure we dont overflow.
+         */
+        rc = -ENOBUFS;
+        if ( topology.nr_vnodes < dom_vnodes ||
+            topology.nr_vcpus < d->max_vcpus )
+            goto vnumainfo_out;
+
+        rc = -EFAULT;
+
+        if ( copy_to_guest(topology.vmemrange.h, d->vnuma->vmemrange,
+                           dom_vnodes) != 0 )
+            goto vnumainfo_out;
+
+        if ( copy_to_guest(topology.vdistance.h, d->vnuma->vdistance,
+                           dom_vnodes * dom_vnodes) != 0 )
+            goto vnumainfo_out;
+
+        if ( copy_to_guest(topology.vcpu_to_vnode.h, d->vnuma->vcpu_to_vnode,
+                           d->max_vcpus) != 0 )
+            goto vnumainfo_out;
+
+        topology.nr_vnodes = dom_vnodes;
+
+        if ( copy_to_guest(arg, &topology, 1) != 0 )
+            goto vnumainfo_out;
+        rc = 0;
+
+ vnumainfo_out:
+        rcu_unlock_domain(d);
+        break;
+    }
+
     default:
         rc = arch_memory_op(cmd, arg);
         break;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 5b11bbf..5ee74f4 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -35,6 +35,7 @@
 #include "xen.h"
 #include "grant_table.h"
 #include "hvm/save.h"
+#include "memory.h"
 
 #define XEN_DOMCTL_INTERFACE_VERSION 0x0000000a
 
@@ -934,6 +935,32 @@ struct xen_domctl_vcpu_msrs {
 };
 typedef struct xen_domctl_vcpu_msrs xen_domctl_vcpu_msrs_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msrs_t);
+
+/*
+ * Use in XEN_DOMCTL_setvnumainfo to set
+ * vNUMA domain topology.
+ */
+struct xen_domctl_vnuma {
+    uint32_t nr_vnodes;
+    uint32_t _pad;
+    XEN_GUEST_HANDLE_64(uint) vdistance;
+    XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
+
+    /*
+     * vnodes to physical NUMA nodes mask.
+     * This kept on per-domain basis for
+     * interested consumers, such as numa aware ballooning.
+     */
+    XEN_GUEST_HANDLE_64(uint) vnode_to_pnode;
+
+    /*
+     * memory rages for each vNUMA node
+     */
+    XEN_GUEST_HANDLE_64(vmemrange_t) vmemrange;
+};
+typedef struct xen_domctl_vnuma xen_domctl_vnuma_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t);
+
 #endif
 
 struct xen_domctl {
@@ -1008,6 +1035,7 @@ struct xen_domctl {
 #define XEN_DOMCTL_cacheflush                    71
 #define XEN_DOMCTL_get_vcpu_msrs                 72
 #define XEN_DOMCTL_set_vcpu_msrs                 73
+#define XEN_DOMCTL_setvnumainfo                  74
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -1068,6 +1096,7 @@ struct xen_domctl {
         struct xen_domctl_cacheflush        cacheflush;
         struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
         struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
+        struct xen_domctl_vnuma             vnuma;
         uint8_t                             pad[128];
     } u;
 };
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 2c57aa0..2c212e1 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -521,9 +521,54 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
  * The zero value is appropiate.
  */
 
+/* vNUMA node memory range */
+struct vmemrange {
+    uint64_t start, end;
+};
+
+typedef struct vmemrange vmemrange_t;
+DEFINE_XEN_GUEST_HANDLE(vmemrange_t);
+
+/*
+ * vNUMA topology specifies vNUMA node number, distance table,
+ * memory ranges and vcpu mapping provided for guests.
+ * XENMEM_get_vnumainfo hypercall expects to see from guest
+ * nr_vnodes and nr_vcpus to indicate available memory. After
+ * filling guests structures, nr_vnodes and nr_vcpus copied
+ * back to guest.
+ */
+struct vnuma_topology_info {
+    /* IN */
+    domid_t domid;
+    /* IN/OUT */
+    unsigned int nr_vnodes;
+    unsigned int nr_vcpus;
+    /* OUT */
+    union {
+        XEN_GUEST_HANDLE(uint) h;
+        uint64_t pad;
+    } vdistance;
+    union {
+        XEN_GUEST_HANDLE(uint) h;
+        uint64_t pad;
+    } vcpu_to_vnode;
+    union {
+        XEN_GUEST_HANDLE(vmemrange_t) h;
+        uint64_t pad;
+    } vmemrange;
+};
+typedef struct vnuma_topology_info vnuma_topology_info_t;
+DEFINE_XEN_GUEST_HANDLE(vnuma_topology_info_t);
+
+/*
+ * XENMEM_get_vnumainfo used by guest to get
+ * vNUMA topology from hypervisor.
+ */
+#define XENMEM_get_vnumainfo               26
+
 #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
 
-/* Next available subop number is 26 */
+/* Next available subop number is 27 */
 
 #endif /* __XEN_PUBLIC_MEMORY_H__ */
 
diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
index bb1c398..d29a84d 100644
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -89,4 +89,15 @@ extern unsigned int xen_processor_pmbits;
 
 extern bool_t opt_dom0_vcpus_pin;
 
+/* vnuma topology per domain. */
+struct vnuma_info {
+    unsigned int nr_vnodes;
+    unsigned int *vdistance;
+    unsigned int *vcpu_to_vnode;
+    unsigned int *vnode_to_pnode;
+    struct vmemrange *vmemrange;
+};
+
+void vnuma_destroy(struct vnuma_info *vnuma);
+
 #endif /* __XEN_DOMAIN_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index d5bc461..71e4218 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -447,6 +447,7 @@ struct domain
     nodemask_t node_affinity;
     unsigned int last_alloc_node;
     spinlock_t node_affinity_lock;
+    struct vnuma_info *vnuma;
 };
 
 struct domain_setup_info
-- 
1.7.10.4

  reply	other threads:[~2014-07-18  5:50 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-18  5:49 [PATCH v6 00/10] vnuma introduction Elena Ufimtseva
2014-07-18  5:50 ` Elena Ufimtseva [this message]
2014-07-18 10:30   ` [PATCH v6 01/10] xen: vnuma topology and subop hypercalls Wei Liu
2014-07-20 13:16     ` Elena Ufimtseva
2014-07-20 15:59       ` Wei Liu
2014-07-22 15:18         ` Dario Faggioli
2014-07-23  5:33           ` Elena Ufimtseva
2014-07-18 13:49   ` Konrad Rzeszutek Wilk
2014-07-20 13:26     ` Elena Ufimtseva
2014-07-22 15:14   ` Dario Faggioli
2014-07-23  5:22     ` Elena Ufimtseva
2014-07-23 14:06   ` Jan Beulich
2014-07-25  4:52     ` Elena Ufimtseva
2014-07-25  7:33       ` Jan Beulich
2014-07-18  5:50 ` [PATCH v6 02/10] xsm bits for vNUMA hypercalls Elena Ufimtseva
2014-07-18 13:50   ` Konrad Rzeszutek Wilk
2014-07-18 15:26     ` Daniel De Graaf
2014-07-20 13:48       ` Elena Ufimtseva
2014-07-18  5:50 ` [PATCH v6 03/10] vnuma hook to debug-keys u Elena Ufimtseva
2014-07-23 14:10   ` Jan Beulich
2014-07-18  5:50 ` [PATCH v6 04/10] libxc: Introduce xc_domain_setvnuma to set vNUMA Elena Ufimtseva
2014-07-18 10:33   ` Wei Liu
2014-07-29 10:33   ` Ian Campbell
2014-07-18  5:50 ` [PATCH v6 05/10] libxl: vnuma topology configuration parser and doc Elena Ufimtseva
2014-07-18 10:53   ` Wei Liu
2014-07-20 14:04     ` Elena Ufimtseva
2014-07-29 10:38   ` Ian Campbell
2014-07-29 10:42   ` Ian Campbell
2014-08-06  4:46     ` Elena Ufimtseva
2014-07-18  5:50 ` [PATCH v6 06/10] libxc: move code to arch_boot_alloc func Elena Ufimtseva
2014-07-29 10:38   ` Ian Campbell
2014-07-18  5:50 ` [PATCH v6 07/10] libxc: allocate domain memory for vnuma enabled Elena Ufimtseva
2014-07-29 10:43   ` Ian Campbell
2014-08-06  4:48     ` Elena Ufimtseva
2014-07-18  5:50 ` [PATCH v6 08/10] libxl: build numa nodes memory blocks Elena Ufimtseva
2014-07-18 11:01   ` Wei Liu
2014-07-20 12:58     ` Elena Ufimtseva
2014-07-20 15:59       ` Wei Liu
2014-07-18  5:50 ` [PATCH v6 09/10] libxl: vnuma nodes placement bits Elena Ufimtseva
2014-07-18  5:50 ` [PATCH v6 10/10] libxl: set vnuma for domain Elena Ufimtseva
2014-07-18 10:58   ` Wei Liu
2014-07-29 10:45   ` Ian Campbell
2014-08-12  3:52     ` Elena Ufimtseva
2014-08-12  9:42       ` Wei Liu
2014-08-12 17:10         ` Dario Faggioli
2014-08-12 17:13           ` Wei Liu
2014-08-12 17:24             ` Elena Ufimtseva
2014-07-18  6:16 ` [PATCH v6 00/10] vnuma introduction Elena Ufimtseva
2014-07-18  9:53 ` Wei Liu
2014-07-18 10:13   ` Dario Faggioli
2014-07-18 11:48     ` Wei Liu
2014-07-20 14:57       ` Elena Ufimtseva
2014-07-22 15:49         ` Dario Faggioli
2014-07-22 14:03       ` Dario Faggioli
2014-07-22 14:48         ` Wei Liu
2014-07-22 15:06           ` Dario Faggioli
2014-07-22 16:47             ` Wei Liu
2014-07-22 19:43         ` Is: cpuid creation of PV guests is not correct. Was:Re: " Konrad Rzeszutek Wilk
2014-07-22 22:34           ` Is: cpuid creation of PV guests is not correct Andrew Cooper
2014-07-22 22:53           ` Is: cpuid creation of PV guests is not correct. Was:Re: [PATCH v6 00/10] vnuma introduction Dario Faggioli
2014-07-23  6:00             ` Elena Ufimtseva
2014-07-22 12:49 ` Dario Faggioli
2014-07-23  5:59   ` Elena Ufimtseva

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1405662609-31486-2-git-send-email-ufimtseva@gmail.com \
    --to=ufimtseva@gmail.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=JBeulich@suse.com \
    --cc=dario.faggioli@citrix.com \
    --cc=george.dunlap@eu.citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=keir@xen.org \
    --cc=lccycc123@gmail.com \
    --cc=msw@linux.com \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).