From: Elena Ufimtseva <ufimtseva@gmail.com>
To: xen-devel@lists.xen.org
Cc: keir@xen.org, Ian.Campbell@citrix.com,
stefano.stabellini@eu.citrix.com, george.dunlap@eu.citrix.com,
msw@linux.com, dario.faggioli@citrix.com, lccycc123@gmail.com,
ian.jackson@eu.citrix.com, JBeulich@suse.com,
Elena Ufimtseva <ufimtseva@gmail.com>
Subject: [PATCH v4 1/7] xen: vNUMA support for PV guests
Date: Wed, 4 Dec 2013 00:47:09 -0500 [thread overview]
Message-ID: <1386136035-19544-2-git-send-email-ufimtseva@gmail.com> (raw)
In-Reply-To: <1386136035-19544-1-git-send-email-ufimtseva@gmail.com>
Defines interface, structures and hypercalls for toolstack to
build vnuma topology and for guests that wish to retreive it.
Two subop hypercalls introduced by patch:
XEN_DOMCTL_setvnumainfo to define vNUMA domain topology per domain
and XENMEM_get_vnuma_info to retreive that topology by guest.
Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
---
Changes since v3:
- added subop hypercall to retrive number of vnodes
and vcpus for domain to make correct allocations before
requesting vnuma topology.
---
xen/common/domain.c | 10 +++++
xen/common/domctl.c | 79 +++++++++++++++++++++++++++++++++++
xen/common/memory.c | 96 +++++++++++++++++++++++++++++++++++++++++++
xen/include/public/domctl.h | 29 +++++++++++++
xen/include/public/memory.h | 17 ++++++++
xen/include/public/vnuma.h | 59 ++++++++++++++++++++++++++
xen/include/xen/domain.h | 8 ++++
xen/include/xen/sched.h | 1 +
8 files changed, 299 insertions(+)
create mode 100644 xen/include/public/vnuma.h
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 2cbc489..8f5c665 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -511,6 +511,15 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d)
return 0;
}
+static void vnuma_destroy(struct vnuma_info *vnuma)
+{
+ vnuma->nr_vnodes = 0;
+ xfree(vnuma->vmemrange);
+ xfree(vnuma->vcpu_to_vnode);
+ xfree(vnuma->vdistance);
+ xfree(vnuma->vnode_to_pnode);
+}
+
int domain_kill(struct domain *d)
{
int rc = 0;
@@ -531,6 +540,7 @@ int domain_kill(struct domain *d)
tmem_destroy(d->tmem);
domain_set_outstanding_pages(d, 0);
d->tmem = NULL;
+ vnuma_destroy(&d->vnuma);
/* fallthrough */
case DOMDYING_dying:
rc = domain_relinquish_resources(d);
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index 904d27b..4f5a17c 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -29,6 +29,7 @@
#include <asm/page.h>
#include <public/domctl.h>
#include <xsm/xsm.h>
+#include <public/vnuma.h>
static DEFINE_SPINLOCK(domctl_lock);
DEFINE_SPINLOCK(vcpu_alloc_lock);
@@ -889,6 +890,84 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
}
break;
+ case XEN_DOMCTL_setvnumainfo:
+ {
+ unsigned int dist_size, nr_vnodes, i;
+
+ ret = -EINVAL;
+
+ /*
+ * If number of vnodes was set before,
+ * dont initilize it again.
+ */
+ if ( d->vnuma.nr_vnodes > 0 )
+ break;
+
+ nr_vnodes = op->u.vnuma.nr_vnodes;
+ if ( nr_vnodes == 0 )
+ break;
+ if ( nr_vnodes > (UINT_MAX / nr_vnodes) )
+ break;
+
+ ret = -EFAULT;
+ if ( guest_handle_is_null(op->u.vnuma.vdistance) ||
+ guest_handle_is_null(op->u.vnuma.vmemrange) ||
+ guest_handle_is_null(op->u.vnuma.vcpu_to_vnode) ||
+ guest_handle_is_null(op->u.vnuma.vnode_to_pnode) )
+ goto setvnumainfo_out;
+
+ dist_size = nr_vnodes * nr_vnodes;
+
+ d->vnuma.vdistance = xmalloc_array(unsigned int, dist_size);
+ d->vnuma.vmemrange = xmalloc_array(vmemrange_t, nr_vnodes);
+ d->vnuma.vcpu_to_vnode = xmalloc_array(unsigned int, d->max_vcpus);
+ d->vnuma.vnode_to_pnode = xmalloc_array(unsigned int, nr_vnodes);
+
+ if ( d->vnuma.vdistance == NULL ||
+ d->vnuma.vmemrange == NULL ||
+ d->vnuma.vcpu_to_vnode == NULL ||
+ d->vnuma.vnode_to_pnode == NULL )
+ {
+ ret = -ENOMEM;
+ goto setvnumainfo_out;
+ }
+
+ if ( unlikely(copy_from_guest(d->vnuma.vdistance,
+ op->u.vnuma.vdistance,
+ dist_size)) )
+ goto setvnumainfo_out;
+ if ( unlikely(copy_from_guest(d->vnuma.vmemrange,
+ op->u.vnuma.vmemrange,
+ nr_vnodes)) )
+ goto setvnumainfo_out;
+ if ( unlikely(copy_from_guest(d->vnuma.vcpu_to_vnode,
+ op->u.vnuma.vcpu_to_vnode,
+ d->max_vcpus)) )
+ goto setvnumainfo_out;
+ if ( unlikely(copy_from_guest(d->vnuma.vnode_to_pnode,
+ op->u.vnuma.vnode_to_pnode,
+ nr_vnodes)) )
+ goto setvnumainfo_out;
+
+ /* Everything is good, lets set the number of vnodes */
+ d->vnuma.nr_vnodes = nr_vnodes;
+
+ for ( i = 0; i < nr_vnodes; i++ )
+ d->vnuma.vmemrange[i]._reserved = 0;
+
+ ret = 0;
+
+ setvnumainfo_out:
+ if ( ret != 0 )
+ {
+ xfree(d->vnuma.vdistance);
+ xfree(d->vnuma.vmemrange);
+ xfree(d->vnuma.vcpu_to_vnode);
+ xfree(d->vnuma.vnode_to_pnode);
+ }
+ }
+ break;
+
default:
ret = arch_do_domctl(op, d, u_domctl);
break;
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 50b740f..5bfab08 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -28,6 +28,7 @@
#include <public/memory.h>
#include <xsm/xsm.h>
#include <xen/trace.h>
+#include <public/vnuma.h>
struct memop_args {
/* INPUT */
@@ -733,6 +734,101 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
break;
+ case XENMEM_get_vnuma_info:
+ {
+ struct vnuma_topology_info mtopology;
+ struct domain *d;
+
+ if ( copy_from_guest(&mtopology, arg, 1) )
+ {
+ printk(KERN_INFO "COpy from guest of mtopology failed.\n");
+ return -EFAULT;
+ }
+ if ( (d = rcu_lock_domain_by_any_id(mtopology.domid)) == NULL )
+ return -ESRCH;
+
+ if ( (d->vnuma.nr_vnodes == 0) ||
+ (d->vnuma.nr_vnodes > d->max_vcpus) )
+ {
+ rc = -EOPNOTSUPP;
+ goto vnumainfo_out;
+ }
+
+ rc = -EFAULT;
+
+ if ( guest_handle_is_null(mtopology.vmemrange.h) ||
+ guest_handle_is_null(mtopology.vdistance.h) ||
+ guest_handle_is_null(mtopology.vcpu_to_vnode.h)||
+ guest_handle_is_null(mtopology.nr_vnodes.h) )
+ goto vnumainfo_out;
+
+ if ( __copy_to_guest(mtopology.vmemrange.h,
+ d->vnuma.vmemrange,
+ d->vnuma.nr_vnodes) != 0 )
+ goto vnumainfo_out;
+ if ( __copy_to_guest(mtopology.vdistance.h,
+ d->vnuma.vdistance,
+ d->vnuma.nr_vnodes * d->vnuma.nr_vnodes) != 0 )
+ goto vnumainfo_out;
+ if ( __copy_to_guest(mtopology.vcpu_to_vnode.h,
+ d->vnuma.vcpu_to_vnode,
+ d->max_vcpus) != 0 )
+ goto vnumainfo_out;
+
+ if ( __copy_to_guest(mtopology.nr_vnodes.h, &d->vnuma.nr_vnodes, 1) != 0 )
+ goto vnumainfo_out;
+
+ rc = 0;
+
+ vnumainfo_out:
+ rcu_unlock_domain(d);
+ if ( rc != 0 ) {
+ printk(KERN_INFO "Problem with some parts of vnuma hypercall\n");
+ }
+ break;
+ }
+
+ /* only two fields are used here from vnuma_topology_info:
+ * nr_vnodes and max_vcpus. Used by guest to allocate correct
+ * size of vnuma topology arrays.
+ */
+ case XENMEM_get_vnodes_vcpus:
+ {
+ struct vnuma_topology_info mtopology;
+ struct domain *d;
+ unsigned int nr_vnodes, max_vcpus;
+
+ if ( copy_from_guest(&mtopology, arg, 1) )
+ {
+ printk(KERN_INFO "Null pointer vnuma_nodes.\n");
+ return -EFAULT;
+ }
+ if ( (d = rcu_lock_domain_by_any_id(mtopology.domid)) == NULL )
+ return -ESRCH;
+
+ nr_vnodes = d->vnuma.nr_vnodes;
+ max_vcpus = d->max_vcpus;
+ rcu_unlock_domain(d);
+
+ rc = -EFAULT;
+
+ /* check if its request to get number of nodes, first one */
+ if ( guest_handle_is_null(mtopology.nr_vnodes.h) ||
+ guest_handle_is_null(mtopology.nr_vcpus.h) )
+ return rc;
+
+ rc = __copy_to_guest(mtopology.nr_vnodes.h, &nr_vnodes, 1);
+ if (rc)
+ return rc;
+
+ rc = __copy_to_guest(mtopology.nr_vcpus.h, &max_vcpus, 1);
+ if (rc)
+ return rc;
+
+ rc = 0;
+ break;
+ }
+
default:
rc = arch_memory_op(op, arg);
break;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 01a3652..0157a16 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -35,6 +35,7 @@
#include "xen.h"
#include "grant_table.h"
#include "hvm/save.h"
+#include "vnuma.h"
#define XEN_DOMCTL_INTERFACE_VERSION 0x00000009
@@ -869,6 +870,32 @@ struct xen_domctl_set_max_evtchn {
typedef struct xen_domctl_set_max_evtchn xen_domctl_set_max_evtchn_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_max_evtchn_t);
+/*
+ * XEN_DOMCTL_setvnumainfo: sets the vNUMA topology
+ * parameters from toolstack.
+ */
+struct xen_domctl_vnuma {
+ uint32_t nr_vnodes;
+ uint32_t __pad;
+ XEN_GUEST_HANDLE_64(uint) vdistance;
+ XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
+ /*
+ * vnodes to physical NUMA nodes mask.
+ * This will be kept on per-domain basis
+ * for requests by consumers as vnuma
+ * aware ballooning.
+ */
+ XEN_GUEST_HANDLE_64(uint) vnode_to_pnode;
+ /*
+ * memory rages that vNUMA node can represent
+ * If more than one, its a linked list.
+ */
+ XEN_GUEST_HANDLE_64(vmemrange_t) vmemrange;
+};
+
+typedef struct xen_domctl_vnuma xen_domctl_vnuma_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t);
+
struct xen_domctl {
uint32_t cmd;
#define XEN_DOMCTL_createdomain 1
@@ -938,6 +965,7 @@ struct xen_domctl {
#define XEN_DOMCTL_setnodeaffinity 68
#define XEN_DOMCTL_getnodeaffinity 69
#define XEN_DOMCTL_set_max_evtchn 70
+#define XEN_DOMCTL_setvnumainfo 71
#define XEN_DOMCTL_gdbsx_guestmemio 1000
#define XEN_DOMCTL_gdbsx_pausevcpu 1001
#define XEN_DOMCTL_gdbsx_unpausevcpu 1002
@@ -998,6 +1026,7 @@ struct xen_domctl {
struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
struct xen_domctl_gdbsx_domstatus gdbsx_domstatus;
+ struct xen_domctl_vnuma vnuma;
uint8_t pad[128];
} u;
};
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 7a26dee..f5ea6af 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -339,6 +339,23 @@ struct xen_pod_target {
};
typedef struct xen_pod_target xen_pod_target_t;
+/*
+ * XENMEM_get_vnuma_info used by caller to retrieve
+ * vNUMA topology constructed for particular domain.
+ *
+ * The data exchanged is presented by vnuma_topology_info.
+ */
+#define XENMEM_get_vnuma_info 25
+
+/*
+ * XENMEM_get_vnuma_nodes used to retreive number of nodes
+ * and vcpus for allocating correct amount of memory for
+ * vnuma topology.
+ */
+#define XENMEM_get_vnodes_vcpus 26
+
+#define XENMEM_get_vnuma_pnode 27
+
#if defined(__XEN__) || defined(__XEN_TOOLS__)
#ifndef uint64_aligned_t
diff --git a/xen/include/public/vnuma.h b/xen/include/public/vnuma.h
new file mode 100644
index 0000000..32f860f
--- /dev/null
+++ b/xen/include/public/vnuma.h
@@ -0,0 +1,59 @@
+#ifndef _XEN_PUBLIC_VNUMA_H
+#define _XEN_PUBLIC_VNUMA_H
+
+#include "xen.h"
+
+/*
+ * Following structures are used to represent vNUMA
+ * topology to guest if requested.
+ */
+
+/*
+ * Memory ranges can be used to define
+ * vNUMA memory node boundaries by the
+ * linked list. As of now, only one range
+ * per domain is suported.
+ */
+struct vmemrange {
+ uint64_t start, end;
+ uint64_t _reserved;
+};
+
+typedef struct vmemrange vmemrange_t;
+DEFINE_XEN_GUEST_HANDLE(vmemrange_t);
+
+/*
+ * vNUMA topology specifies vNUMA node
+ * number, distance table, memory ranges and
+ * vcpu mapping provided for guests.
+ */
+
+struct vnuma_topology_info {
+ /* IN */
+ domid_t domid;
+ /* OUT */
+ union {
+ XEN_GUEST_HANDLE(uint) h;
+ uint64_t _pad;
+ } nr_vnodes;
+ union {
+ XEN_GUEST_HANDLE(uint) h;
+ uint64_t _pad;
+ } nr_vcpus;
+ union {
+ XEN_GUEST_HANDLE(uint) h;
+ uint64_t _pad;
+ } vdistance;
+ union {
+ XEN_GUEST_HANDLE(uint) h;
+ uint64_t _pad;
+ } vcpu_to_vnode;
+ union {
+ XEN_GUEST_HANDLE(vmemrange_t) h;
+ uint64_t _pad;
+ } vmemrange;
+};
+typedef struct vnuma_topology_info vnuma_topology_info_t;
+DEFINE_XEN_GUEST_HANDLE(vnuma_topology_info_t);
+
+#endif
diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
index a057069..ee0eeee 100644
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -89,4 +89,12 @@ extern unsigned int xen_processor_pmbits;
extern bool_t opt_dom0_vcpus_pin;
+struct vnuma_info {
+ unsigned int nr_vnodes;
+ unsigned int *vdistance;
+ unsigned int *vcpu_to_vnode;
+ unsigned int *vnode_to_pnode;
+ struct vmemrange *vmemrange;
+};
+
#endif /* __XEN_DOMAIN_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index cbdf377..3765eae 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -417,6 +417,7 @@ struct domain
nodemask_t node_affinity;
unsigned int last_alloc_node;
spinlock_t node_affinity_lock;
+ struct vnuma_info vnuma;
};
struct domain_setup_info
--
1.7.10.4
next prev parent reply other threads:[~2013-12-04 5:47 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-12-04 5:47 [PATCH v4 0/7] vNUMA introduction Elena Ufimtseva
2013-12-04 5:47 ` Elena Ufimtseva [this message]
2013-12-04 11:34 ` [PATCH v4 1/7] xen: vNUMA support for PV guests Jan Beulich
2013-12-04 18:02 ` Elena Ufimtseva
2013-12-04 5:47 ` [PATCH v4 2/7] libxc: Plumb Xen with vNUMA topology for domain Elena Ufimtseva
2013-12-16 19:16 ` Konrad Rzeszutek Wilk
2013-12-04 5:47 ` [PATCH v4 3/7] xl: vnuma memory parsing and supplement functions Elena Ufimtseva
2013-12-16 19:57 ` Konrad Rzeszutek Wilk
2013-12-04 5:47 ` [PATCH v4 4/7] xl: vnuma distance, vcpu and pnode masks parser Elena Ufimtseva
2013-12-04 5:47 ` [PATCH v4 5/7] libxc: vnuma memory domain allocation Elena Ufimtseva
2013-12-04 5:47 ` [PATCH v4 6/7] libxl: vNUMA supporting interface Elena Ufimtseva
2013-12-04 5:47 ` [PATCH v4 7/7] xen: adds vNUMA info debug-key u Elena Ufimtseva
2013-12-04 11:23 ` Jan Beulich
2014-02-13 12:49 ` [PATCH v4 0/7] vNUMA introduction Li Yechen
2014-02-13 16:26 ` Elena Ufimtseva
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1386136035-19544-2-git-send-email-ufimtseva@gmail.com \
--to=ufimtseva@gmail.com \
--cc=Ian.Campbell@citrix.com \
--cc=JBeulich@suse.com \
--cc=dario.faggioli@citrix.com \
--cc=george.dunlap@eu.citrix.com \
--cc=ian.jackson@eu.citrix.com \
--cc=keir@xen.org \
--cc=lccycc123@gmail.com \
--cc=msw@linux.com \
--cc=stefano.stabellini@eu.citrix.com \
--cc=xen-devel@lists.xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).