[PATCH v5 1/8] xen: vnuma topoplogy and subop hypercalls

xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed

From: Elena Ufimtseva <ufimtseva@gmail.com>
To: xen-devel@lists.xen.org
Cc: keir@xen.org, Ian.Campbell@citrix.com,
	stefano.stabellini@eu.citrix.com, george.dunlap@eu.citrix.com,
	msw@linux.com, dario.faggioli@citrix.com, lccycc123@gmail.com,
	ian.jackson@eu.citrix.com, JBeulich@suse.com,
	Elena Ufimtseva <ufimtseva@gmail.com>
Subject: [PATCH v5 1/8] xen: vnuma topoplogy and subop hypercalls
Date: Tue,  3 Jun 2014 00:53:13 -0400	[thread overview]
Message-ID: <1401771200-11448-3-git-send-email-ufimtseva@gmail.com> (raw)
In-Reply-To: <1401771200-11448-1-git-send-email-ufimtseva@gmail.com>

Defines interface, structures and hypercalls for toolstack to
build vnuma topology and for guests that wish to retreive it.
Two subop hypercalls introduced by patch:
XEN_DOMCTL_setvnumainfo to define vNUMA domain topology per domain
and XENMEM_get_vnuma_info to retreive that topology by guest.

Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
---

Changes since v4:
- added check to make sure guest has enough memory for vnuma
topology;
- code style fixes;

Changes since v3:
- added subop hypercall to retrive number of vnodes
and vcpus for domain to make correct allocations before
requesting vnuma topology.
---
 xen/common/domain.c         |   26 ++++++++++++++
 xen/common/domctl.c         |   84 +++++++++++++++++++++++++++++++++++++++++++
 xen/common/memory.c         |   67 ++++++++++++++++++++++++++++++++++
 xen/include/public/domctl.h |   28 +++++++++++++++
 xen/include/public/memory.h |   14 ++++++++
 xen/include/public/vnuma.h  |   54 ++++++++++++++++++++++++++++
 xen/include/xen/domain.h    |   11 ++++++
 xen/include/xen/sched.h     |    1 +
 8 files changed, 285 insertions(+)
 create mode 100644 xen/include/public/vnuma.h

diff --git a/xen/common/domain.c b/xen/common/domain.c
index bc57174..5b7ce17 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -567,6 +567,15 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d)
     return 0;
 }
 
+static void vnuma_destroy(struct vnuma_info *vnuma)
+{
+    vnuma->nr_vnodes = 0;
+    xfree(vnuma->vmemrange);
+    xfree(vnuma->vcpu_to_vnode);
+    xfree(vnuma->vdistance);
+    xfree(vnuma->vnode_to_pnode);
+}
+
 int domain_kill(struct domain *d)
 {
     int rc = 0;
@@ -585,6 +594,7 @@ int domain_kill(struct domain *d)
         evtchn_destroy(d);
         gnttab_release_mappings(d);
         tmem_destroy(d->tmem_client);
+        vnuma_destroy(&d->vnuma);
         domain_set_outstanding_pages(d, 0);
         d->tmem_client = NULL;
         /* fallthrough */
@@ -1350,6 +1360,22 @@ int continue_hypercall_on_cpu(
 }
 
 /*
+ * Changes previously set domain vnuma topology to the defalt one
+ * that has one node and all other default values. Since the domain
+ * memory may be at this point allocated on multiple HW NUMA nodes,
+ * NUMA_NO_NODE is set for vnode to pnode mask.
+ */
+int vnuma_init_zero_topology(struct domain *d)
+{
+    d->vnuma.vmemrange[0].end = d->vnuma.vmemrange[d->vnuma.nr_vnodes - 1].end;
+    d->vnuma.vdistance[0] = 10;
+    memset(d->vnuma.vnode_to_pnode, NUMA_NO_NODE, d->vnuma.nr_vnodes);
+    memset(d->vnuma.vcpu_to_vnode, 0, d->max_vcpus);
+    d->vnuma.nr_vnodes = 1;
+    return 0;
+}
+
+/*
  * Local variables:
  * mode: C
  * c-file-style: "BSD"
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index 4774277..66fdcee 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -29,6 +29,7 @@
 #include <asm/page.h>
 #include <public/domctl.h>
 #include <xsm/xsm.h>
+#include <public/vnuma.h>
 
 static DEFINE_SPINLOCK(domctl_lock);
 DEFINE_SPINLOCK(vcpu_alloc_lock);
@@ -888,6 +889,89 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
     }
     break;
 
+    case XEN_DOMCTL_setvnumainfo:
+    {
+        unsigned int dist_size, nr_vnodes;
+
+        ret = -EINVAL;
+
+        /* If number of vnodes was set before, skip */
+        if ( d->vnuma.nr_vnodes > 0 )
+            break;
+
+        nr_vnodes = op->u.vnuma.nr_vnodes;
+        if ( nr_vnodes == 0 )
+            goto setvnumainfo_out;
+
+        if ( nr_vnodes > (UINT_MAX / nr_vnodes) )
+            goto setvnumainfo_out;
+
+        ret = -EFAULT;
+        if ( guest_handle_is_null(op->u.vnuma.vdistance)     ||
+             guest_handle_is_null(op->u.vnuma.vmemrange)     ||
+             guest_handle_is_null(op->u.vnuma.vcpu_to_vnode) ||
+             guest_handle_is_null(op->u.vnuma.vnode_to_pnode) )
+            goto setvnumainfo_out;
+
+        dist_size = nr_vnodes * nr_vnodes;
+
+        d->vnuma.vdistance = xmalloc_array(unsigned int, dist_size);
+        d->vnuma.vmemrange = xmalloc_array(vmemrange_t, nr_vnodes);
+        d->vnuma.vcpu_to_vnode = xmalloc_array(unsigned int, d->max_vcpus);
+        d->vnuma.vnode_to_pnode = xmalloc_array(unsigned int, nr_vnodes);
+
+        if ( d->vnuma.vdistance == NULL ||
+             d->vnuma.vmemrange == NULL ||
+             d->vnuma.vcpu_to_vnode == NULL ||
+             d->vnuma.vnode_to_pnode == NULL )
+        {
+            ret = -ENOMEM;
+            goto setvnumainfo_nomem;
+        }
+
+        if ( unlikely(__copy_from_guest(d->vnuma.vdistance,
+                                    op->u.vnuma.vdistance,
+                                    dist_size)) )
+            goto setvnumainfo_out;
+        if ( unlikely(__copy_from_guest(d->vnuma.vmemrange,
+                                    op->u.vnuma.vmemrange,
+                                    nr_vnodes)) )
+            goto setvnumainfo_out;
+        if ( unlikely(__copy_from_guest(d->vnuma.vcpu_to_vnode,
+                                    op->u.vnuma.vcpu_to_vnode,
+                                    d->max_vcpus)) )
+            goto setvnumainfo_out;
+        if ( unlikely(__copy_from_guest(d->vnuma.vnode_to_pnode,
+                                    op->u.vnuma.vnode_to_pnode,
+                                    nr_vnodes)) )
+            goto setvnumainfo_out;
+
+        /* Everything is good, lets set the number of vnodes */
+        d->vnuma.nr_vnodes = nr_vnodes;
+
+        ret = 0;
+        break;
+
+ setvnumainfo_out:
+        /* On failure, set one vNUMA node */
+        d->vnuma.vmemrange[0].end = d->vnuma.vmemrange[d->vnuma.nr_vnodes - 1].end;
+        d->vnuma.vdistance[0] = 10;
+        memset(d->vnuma.vnode_to_pnode, NUMA_NO_NODE, d->vnuma.nr_vnodes);
+        memset(d->vnuma.vcpu_to_vnode, 0, d->max_vcpus);
+        d->vnuma.nr_vnodes = 1;
+        ret = 0;
+        break;
+
+ setvnumainfo_nomem:
+        /* The only case where we set number of vnodes to 0 */
+        d->vnuma.nr_vnodes = 0;
+        xfree(d->vnuma.vmemrange);
+        xfree(d->vnuma.vdistance);
+        xfree(d->vnuma.vnode_to_pnode);
+        xfree(d->vnuma.vcpu_to_vnode);
+    }
+    break;
+
     default:
         ret = arch_do_domctl(op, d, u_domctl);
         break;
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 257f4b0..2067f42 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -963,6 +963,73 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
 
         break;
 
+    case XENMEM_get_vnuma_info:
+    {
+        struct vnuma_topology_info guest_topo;
+        struct domain *d;
+
+        if ( copy_from_guest(&guest_topo, arg, 1) )
+            return -EFAULT;
+        if ( (d = rcu_lock_domain_by_any_id(guest_topo.domid)) == NULL )
+            return -ESRCH;
+
+        if ( d->vnuma.nr_vnodes == 0 ) {
+            rc = -EOPNOTSUPP;
+            goto vnumainfo_out;
+        }
+
+        rc = -EOPNOTSUPP;
+        /*
+         * Guest may have different kernel configuration for
+         * number of cpus/nodes. It informs about them via hypercall.
+         */
+        if ( guest_topo.nr_vnodes < d->vnuma.nr_vnodes ||
+            guest_topo.nr_vcpus < d->max_vcpus )
+            goto vnumainfo_out;
+
+        rc = -EFAULT;
+
+        if ( guest_handle_is_null(guest_topo.vmemrange.h)    ||
+             guest_handle_is_null(guest_topo.vdistance.h)    ||
+             guest_handle_is_null(guest_topo.vcpu_to_vnode.h) )
+            goto vnumainfo_out;
+
+        /*
+         * Take a failure path if out of guest allocated memory for topology.
+         * No partial copying.
+         */
+        guest_topo.nr_vnodes = d->vnuma.nr_vnodes;
+
+        if ( __copy_to_guest(guest_topo.vmemrange.h,
+                                d->vnuma.vmemrange,
+                                d->vnuma.nr_vnodes) != 0 )
+            goto vnumainfo_out;
+
+        if ( __copy_to_guest(guest_topo.vdistance.h,
+                                d->vnuma.vdistance,
+                                d->vnuma.nr_vnodes * d->vnuma.nr_vnodes) != 0 )
+            goto vnumainfo_out;
+
+        if ( __copy_to_guest(guest_topo.vcpu_to_vnode.h,
+                                d->vnuma.vcpu_to_vnode,
+                                d->max_vcpus) != 0 )
+            goto vnumainfo_out;
+
+        rc = 0;
+
+ vnumainfo_out:
+        if ( rc != 0 )
+            /*
+             * In case of failure to set vNUMA topology for guest,
+             * leave everything as it is, print error only. Tools will
+             * show for domain vnuma topology, but wont be seen in guest.
+             */
+            gdprintk(XENLOG_INFO, "vNUMA: failed to copy topology info to guest.\n");
+
+        rcu_unlock_domain(d);
+        break;
+    }
+
     default:
         rc = arch_memory_op(cmd, arg);
         break;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 565fa4c..8b65a75 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -35,6 +35,7 @@
 #include "xen.h"
 #include "grant_table.h"
 #include "hvm/save.h"
+#include "vnuma.h"
 
 #define XEN_DOMCTL_INTERFACE_VERSION 0x0000000a
 
@@ -895,6 +896,31 @@ struct xen_domctl_cacheflush {
 typedef struct xen_domctl_cacheflush xen_domctl_cacheflush_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_cacheflush_t);
 
+/*
+ * XEN_DOMCTL_setvnumainfo: sets the vNUMA topology
+ * parameters for domain from toolstack.
+ */
+struct xen_domctl_vnuma {
+    uint32_t nr_vnodes;
+    uint32_t __pad;
+    XEN_GUEST_HANDLE_64(uint) vdistance;
+    XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
+
+    /*
+     * vnodes to physical NUMA nodes mask.
+     * This kept on per-domain basis for
+     * interested consumers, such as numa aware ballooning.
+     */
+    XEN_GUEST_HANDLE_64(uint) vnode_to_pnode;
+
+    /*
+     * memory rages for each vNUMA node
+     */
+    XEN_GUEST_HANDLE_64(vmemrange_t) vmemrange;
+};
+typedef struct xen_domctl_vnuma xen_domctl_vnuma_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t);
+
 struct xen_domctl {
     uint32_t cmd;
 #define XEN_DOMCTL_createdomain                   1
@@ -965,6 +991,7 @@ struct xen_domctl {
 #define XEN_DOMCTL_getnodeaffinity               69
 #define XEN_DOMCTL_set_max_evtchn                70
 #define XEN_DOMCTL_cacheflush                    71
+#define XEN_DOMCTL_setvnumainfo                  72
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -1024,6 +1051,7 @@ struct xen_domctl {
         struct xen_domctl_cacheflush        cacheflush;
         struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
         struct xen_domctl_gdbsx_domstatus   gdbsx_domstatus;
+        struct xen_domctl_vnuma             vnuma;
         uint8_t                             pad[128];
     } u;
 };
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 2c57aa0..a7dc035 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -354,6 +354,20 @@ struct xen_pod_target {
 };
 typedef struct xen_pod_target xen_pod_target_t;
 
+/*
+ * XENMEM_get_vnuma_info used by caller to get
+ * vNUMA topology constructed for particular domain.
+ *
+ * The data exchanged is presented by vnuma_topology_info.
+ */
+#define XENMEM_get_vnuma_info               26
+
+/*
+ * XENMEM_get_vnuma_pnode used by guest to determine
+ * the physical node of the specified vnode.
+ */
+/*#define XENMEM_get_vnuma_pnode              27*/
+
 #if defined(__XEN__) || defined(__XEN_TOOLS__)
 
 #ifndef uint64_aligned_t
diff --git a/xen/include/public/vnuma.h b/xen/include/public/vnuma.h
new file mode 100644
index 0000000..ab9eda0
--- /dev/null
+++ b/xen/include/public/vnuma.h
@@ -0,0 +1,54 @@
+#ifndef _XEN_PUBLIC_VNUMA_H
+#define _XEN_PUBLIC_VNUMA_H
+
+#include "xen.h"
+
+/*
+ * Following structures are used to represent vNUMA
+ * topology to guest if requested.
+ */
+
+/*
+ * Memory ranges can be used to define
+ * vNUMA memory node boundaries by the
+ * linked list. As of now, only one range
+ * per domain is suported.
+ */
+struct vmemrange {
+    uint64_t start, end;
+};
+
+typedef struct vmemrange vmemrange_t;
+DEFINE_XEN_GUEST_HANDLE(vmemrange_t);
+
+/*
+ * vNUMA topology specifies vNUMA node number, distance table, memory ranges and
+ * vcpu mapping provided for guests.
+ * When issuing hypercall, guest is expected to inform Xen about the memory allocated
+ * for vnuma structure through nr_vnodes and nr_vcpus.
+ */
+
+struct vnuma_topology_info {
+    /* IN */
+    domid_t domid;
+    /* IN/OUT */
+    unsigned int nr_vnodes;
+    unsigned int nr_vcpus;
+    /* OUT */
+    union {
+        XEN_GUEST_HANDLE(uint) h;
+        uint64_t    _pad;
+    } vdistance;
+    union {
+        XEN_GUEST_HANDLE(uint) h;
+        uint64_t    _pad;
+    } vcpu_to_vnode;
+    union {
+        XEN_GUEST_HANDLE(vmemrange_t) h;
+        uint64_t    _pad;
+    } vmemrange;
+};
+typedef struct vnuma_topology_info vnuma_topology_info_t;
+DEFINE_XEN_GUEST_HANDLE(vnuma_topology_info_t);
+
+#endif
diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
index bb1c398..e8b36e3 100644
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -89,4 +89,15 @@ extern unsigned int xen_processor_pmbits;
 
 extern bool_t opt_dom0_vcpus_pin;
 
+/* vnuma_info struct to manage by Xen */
+struct vnuma_info {
+    unsigned int nr_vnodes;
+    unsigned int *vdistance;
+    unsigned int *vcpu_to_vnode;
+    unsigned int *vnode_to_pnode;
+    struct vmemrange *vmemrange;
+};
+
+int vnuma_init_zero_topology(struct domain *d);
+
 #endif /* __XEN_DOMAIN_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 44851ae..a1163fd 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -444,6 +444,7 @@ struct domain
     nodemask_t node_affinity;
     unsigned int last_alloc_node;
     spinlock_t node_affinity_lock;
+    struct vnuma_info vnuma;
 };
 
 struct domain_setup_info
-- 
1.7.10.4

next prev parent reply	other threads:[~2014-06-03  4:53 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-06-03  4:53 [PATCH v5 0/8] vnuma introduction Elena Ufimtseva
2014-06-03  4:53 ` [PATCH v5 8/8] add vnuma info for debug-key Elena Ufimtseva
2014-06-03  9:04   ` Jan Beulich
2014-06-04  4:13     ` Elena Ufimtseva
2014-06-03  4:53 ` Elena Ufimtseva [this message]
2014-06-03  8:55   ` [PATCH v5 1/8] xen: vnuma topoplogy and subop hypercalls Jan Beulich
2014-06-03  4:53 ` [PATCH v5 2/8] libxc: Plumb Xen with vnuma topology Elena Ufimtseva
2014-06-03  4:53 ` [PATCH v5 3/8] vnuma xl.cfg.pod and idl config options Elena Ufimtseva
2014-06-03  4:53 ` [PATCH v5 4/8] vnuma topology parsing routines Elena Ufimtseva
2014-06-03  4:53 ` [PATCH v5 5/8] libxc: allocate domain vnuma nodes Elena Ufimtseva
2014-06-03  4:53 ` [PATCH v5 6/8] libxl: build e820 map for vnodes Elena Ufimtseva
2014-06-03  4:53 ` [PATCH v5 7/8] libxl: place vnuma domain nodes on numa nodes Elena Ufimtseva
2014-06-03  4:53 ` [PATCH v5 8/8] add vnuma info out on debug-key Elena Ufimtseva
2014-06-03 11:37 ` [PATCH v5 0/8] vnuma introduction Wei Liu
2014-06-04  4:05   ` Elena Ufimtseva

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:bc57174 dfblob:5b7ce17 dfblob:4774277 dfblob:66fdcee
dfblob:257f4b0 dfblob:2067f42 dfblob:565fa4c dfblob:8b65a75
dfblob:2c57aa0 dfblob:a7dc035 dfblob:ab9eda0 dfblob:bb1c398
dfblob:e8b36e3 dfblob:44851ae dfblob:a1163fd )
 OR (
bs:"[PATCH v5 1/8] xen: vnuma topoplogy and subop hypercalls" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1401771200-11448-3-git-send-email-ufimtseva@gmail.com \
    --to=ufimtseva@gmail.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=JBeulich@suse.com \
    --cc=dario.faggioli@citrix.com \
    --cc=george.dunlap@eu.citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=keir@xen.org \
    --cc=lccycc123@gmail.com \
    --cc=msw@linux.com \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).