From mboxrd@z Thu Jan 1 00:00:00 1970 From: Elena Ufimtseva Subject: [PATCH v3 1/7] xen: vNUMA support for PV guests Date: Mon, 18 Nov 2013 15:24:16 -0500 Message-ID: <1384806262-12532-2-git-send-email-ufimtseva@gmail.com> References: <1384806262-12532-1-git-send-email-ufimtseva@gmail.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1384806262-12532-1-git-send-email-ufimtseva@gmail.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen-devel@lists.xen.org Cc: keir@xen.org, Ian.Campbell@citrix.com, stefano.stabellini@eu.citrix.com, george.dunlap@eu.citrix.com, msw@linux.com, dario.faggioli@citrix.com, lccycc123@gmail.com, ian.jackson@eu.citrix.com, JBeulich@suse.com, Elena Ufimtseva List-Id: xen-devel@lists.xenproject.org Defines interface, structures and hypercalls for toolstack to build vnuma topology and for guests that wish to retreive it. Two subop hypercalls introduced by patch: XEN_DOMCTL_setvnumainfo to define vNUMA domain topology per domain and XENMEM_get_vnuma_info to retreive that topology by guest. Signed-off-by: Elena Ufimtseva Changes since v2: * same size of struct vnuma_topology_info on 32 and 64 pv guest; Signed-off-by: Elena Ufimtseva --- xen/common/domain.c | 10 ++++++ xen/common/domctl.c | 79 +++++++++++++++++++++++++++++++++++++++++++ xen/common/memory.c | 50 +++++++++++++++++++++++++++ xen/include/public/domctl.h | 29 ++++++++++++++++ xen/include/public/memory.h | 8 +++++ xen/include/public/vnuma.h | 56 ++++++++++++++++++++++++++++++ xen/include/xen/domain.h | 9 +++++ xen/include/xen/sched.h | 1 + 8 files changed, 242 insertions(+) create mode 100644 xen/include/public/vnuma.h diff --git a/xen/common/domain.c b/xen/common/domain.c index 2cbc489..8f5c665 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -511,6 +511,15 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d) return 0; } +static void vnuma_destroy(struct vnuma_info *vnuma) +{ + vnuma->nr_vnodes = 0; + xfree(vnuma->vmemrange); + xfree(vnuma->vcpu_to_vnode); + xfree(vnuma->vdistance); + xfree(vnuma->vnode_to_pnode); +} + int domain_kill(struct domain *d) { int rc = 0; @@ -531,6 +540,7 @@ int domain_kill(struct domain *d) tmem_destroy(d->tmem); domain_set_outstanding_pages(d, 0); d->tmem = NULL; + vnuma_destroy(&d->vnuma); /* fallthrough */ case DOMDYING_dying: rc = domain_relinquish_resources(d); diff --git a/xen/common/domctl.c b/xen/common/domctl.c index 904d27b..af6ae9b 100644 --- a/xen/common/domctl.c +++ b/xen/common/domctl.c @@ -29,6 +29,7 @@ #include #include #include +#include static DEFINE_SPINLOCK(domctl_lock); DEFINE_SPINLOCK(vcpu_alloc_lock); @@ -889,6 +890,84 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) } break; + case XEN_DOMCTL_setvnumainfo: + { + unsigned int dist_size, nr_vnodes; + + ret = -EINVAL; + + /* + * If number of vnodes was set before, + * dont initilize it again. + */ + //ASSERT(d->vnuma.nr_vnodes > 0); + if ( d->vnuma.nr_vnodes > 0 ) + break; + + nr_vnodes = op->u.vnuma.nr_vnodes; + if ( nr_vnodes == 0 ) + break; + if ( nr_vnodes > (UINT_MAX / nr_vnodes) ) + break; + + ret = -EFAULT; + if ( guest_handle_is_null(op->u.vnuma.vdistance) || + guest_handle_is_null(op->u.vnuma.vmemrange) || + guest_handle_is_null(op->u.vnuma.vcpu_to_vnode) || + guest_handle_is_null(op->u.vnuma.vnode_to_pnode) ) + goto setvnumainfo_out; + + dist_size = nr_vnodes * nr_vnodes; + + d->vnuma.vdistance = xmalloc_array(unsigned int, dist_size); + d->vnuma.vmemrange = xmalloc_array(vmemrange_t, nr_vnodes); + d->vnuma.vcpu_to_vnode = xmalloc_array(unsigned int, d->max_vcpus); + d->vnuma.vnode_to_pnode = xmalloc_array(unsigned int, nr_vnodes); + + if ( d->vnuma.vdistance == NULL || + d->vnuma.vmemrange == NULL || + d->vnuma.vcpu_to_vnode == NULL || + d->vnuma.vnode_to_pnode == NULL ) + { + ret = -ENOMEM; + goto setvnumainfo_out; + } + + printk(KERN_INFO "Ready to copy from guest\n"); + if ( unlikely(copy_from_guest(d->vnuma.vdistance, + op->u.vnuma.vdistance, + dist_size)) ) + goto setvnumainfo_out; + if ( unlikely(copy_from_guest(d->vnuma.vmemrange, + op->u.vnuma.vmemrange, + nr_vnodes)) ) + goto setvnumainfo_out; + if ( unlikely(copy_from_guest(d->vnuma.vcpu_to_vnode, + op->u.vnuma.vcpu_to_vnode, + d->max_vcpus)) ) + goto setvnumainfo_out; + if ( unlikely(copy_from_guest(d->vnuma.vnode_to_pnode, + op->u.vnuma.vnode_to_pnode, + nr_vnodes)) ) + goto setvnumainfo_out; + + /* Everything is good, lets set the number of vnodes */ + d->vnuma.nr_vnodes = nr_vnodes; + + ret = 0; + + setvnumainfo_out: + if ( ret != 0 ) + { + d->vnuma.nr_vnodes = 0; + xfree(d->vnuma.vdistance); + xfree(d->vnuma.vmemrange); + xfree(d->vnuma.vcpu_to_vnode); + xfree(d->vnuma.vnode_to_pnode); + } + } + break; + default: ret = arch_do_domctl(op, d, u_domctl); break; diff --git a/xen/common/memory.c b/xen/common/memory.c index 50b740f..b224c16 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -28,6 +28,7 @@ #include #include #include +#include struct memop_args { /* INPUT */ @@ -733,6 +734,55 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) break; + case XENMEM_get_vnuma_info: + { + struct vnuma_topology_info mtopology; + struct domain *d; + + if ( copy_from_guest(&mtopology, arg, 1) ) + return -EFAULT; + if ( (d = rcu_lock_domain_by_any_id(mtopology.domid)) == NULL ) + return -ESRCH; + + if ( (d->vnuma.nr_vnodes == 0) || + (d->vnuma.nr_vnodes > d->max_vcpus) ) + { + rc = -EOPNOTSUPP; + goto vnumainfo_out; + } + + rc = -EFAULT; + if ( guest_handle_is_null(mtopology.vmemrange.h) || + guest_handle_is_null(mtopology.vdistance.h) || + guest_handle_is_null(mtopology.vcpu_to_vnode.h)|| + guest_handle_is_null(mtopology.nr_vnodes.h) ) + { + goto vnumainfo_out; + } + + if ( __copy_to_guest(mtopology.vmemrange.h, + d->vnuma.vmemrange, + d->vnuma.nr_vnodes) != 0 ) + goto vnumainfo_out; + if ( __copy_to_guest(mtopology.vdistance.h, + d->vnuma.vdistance, + d->vnuma.nr_vnodes * d->vnuma.nr_vnodes) != 0 ) + goto vnumainfo_out; + if ( __copy_to_guest(mtopology.vcpu_to_vnode.h, + d->vnuma.vcpu_to_vnode, + d->max_vcpus) != 0 ) + goto vnumainfo_out; + + if ( __copy_to_guest(mtopology.nr_vnodes.h, &d->vnuma.nr_vnodes, 1) != 0 ) + goto vnumainfo_out; + + rc = 0; + + vnumainfo_out: + rcu_unlock_domain(d); + break; + } + default: rc = arch_memory_op(op, arg); break; diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h index 01a3652..e6417d4 100644 --- a/xen/include/public/domctl.h +++ b/xen/include/public/domctl.h @@ -35,6 +35,7 @@ #include "xen.h" #include "grant_table.h" #include "hvm/save.h" +#include "vnuma.h" #define XEN_DOMCTL_INTERFACE_VERSION 0x00000009 @@ -869,6 +870,32 @@ struct xen_domctl_set_max_evtchn { typedef struct xen_domctl_set_max_evtchn xen_domctl_set_max_evtchn_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_max_evtchn_t); +/* + * XEN_DOMCTL_setvnumainfo: sets the vNUMA topology + * parameters from toolstack. + */ +struct xen_domctl_vnuma { + uint32_t nr_vnodes; + uint32_t __pad; + XEN_GUEST_HANDLE_64(uint) vdistance; + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode; + /* + * vnodes to physical NUMA nodes mask. + * This will be kept on per-domain basis + * for requests by consumers as vnuma + * aware ballooning. + */ + XEN_GUEST_HANDLE_64(uint) vnode_to_pnode; + /* + * memory rages that vNUMA node can represent + * If more than one, its a linked list. + */ + XEN_GUEST_HANDLE_64(vmemrange_t) vmemrange; +}; + +typedef struct xen_domctl_vnuma xen_domctl_vnuma_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t); + struct xen_domctl { uint32_t cmd; #define XEN_DOMCTL_createdomain 1 @@ -938,6 +965,7 @@ struct xen_domctl { #define XEN_DOMCTL_setnodeaffinity 68 #define XEN_DOMCTL_getnodeaffinity 69 #define XEN_DOMCTL_set_max_evtchn 70 +#define XEN_DOMCTL_setvnumainfo 71 #define XEN_DOMCTL_gdbsx_guestmemio 1000 #define XEN_DOMCTL_gdbsx_pausevcpu 1001 #define XEN_DOMCTL_gdbsx_unpausevcpu 1002 @@ -998,6 +1026,7 @@ struct xen_domctl { struct xen_domctl_set_broken_page_p2m set_broken_page_p2m; struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu; struct xen_domctl_gdbsx_domstatus gdbsx_domstatus; + struct xen_domctl_vnuma vnuma; uint8_t pad[128]; } u; }; diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h index 7a26dee..80bb0d8 100644 --- a/xen/include/public/memory.h +++ b/xen/include/public/memory.h @@ -339,6 +339,14 @@ struct xen_pod_target { }; typedef struct xen_pod_target xen_pod_target_t; +/* + * XENMEM_get_vnuma_info used by caller to retrieve + * vNUMA topology constructed for particular domain. + * + * The data exchanged is presented by vnuma_topology_info. + */ +#define XENMEM_get_vnuma_info 25 + #if defined(__XEN__) || defined(__XEN_TOOLS__) #ifndef uint64_aligned_t diff --git a/xen/include/public/vnuma.h b/xen/include/public/vnuma.h new file mode 100644 index 0000000..9238889 --- /dev/null +++ b/xen/include/public/vnuma.h @@ -0,0 +1,56 @@ +#ifndef _XEN_PUBLIC_VNUMA_H +#define _XEN_PUBLIC_VNUMA_H + +#include "xen.h" + +/* + * Following structures are used to represent vNUMA + * topology to guest if requested. + */ + +/* + * Memory ranges can be used to define + * vNUMA memory node boundaries by the + * linked list. As of now, only one range + * per domain is suported. + */ +struct vmemrange { + uint64_t start, end; + uint64_t __padm; +}; + +typedef struct vmemrange vmemrange_t; +DEFINE_XEN_GUEST_HANDLE(vmemrange_t); + +/* + * vNUMA topology specifies vNUMA node + * number, distance table, memory ranges and + * vcpu mapping provided for guests. + */ + +struct vnuma_topology_info { + /* IN */ + domid_t domid; + uint32_t _pad; + /* OUT */ + union { + XEN_GUEST_HANDLE(uint) h; + uint64_t _padn; + } nr_vnodes; + union { + XEN_GUEST_HANDLE(uint) h; + uint64_t _padd; + } vdistance; + union { + XEN_GUEST_HANDLE(uint) h; + uint64_t _padv; + } vcpu_to_vnode; + union { + XEN_GUEST_HANDLE(vmemrange_t) h; + uint64_t _padm; + } vmemrange; +}; +typedef struct vnuma_topology_info vnuma_topology_info_t; +DEFINE_XEN_GUEST_HANDLE(vnuma_topology_info_t); + +#endif diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h index a057069..77853e8 100644 --- a/xen/include/xen/domain.h +++ b/xen/include/xen/domain.h @@ -89,4 +89,13 @@ extern unsigned int xen_processor_pmbits; extern bool_t opt_dom0_vcpus_pin; +struct vnuma_info { + unsigned int nr_vnodes; + unsigned int pad; + unsigned int *vdistance; + unsigned int *vcpu_to_vnode; + unsigned int *vnode_to_pnode; + struct vmemrange *vmemrange; +}; + #endif /* __XEN_DOMAIN_H__ */ diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index cbdf377..3765eae 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -417,6 +417,7 @@ struct domain nodemask_t node_affinity; unsigned int last_alloc_node; spinlock_t node_affinity_lock; + struct vnuma_info vnuma; }; struct domain_setup_info -- 1.7.10.4