From mboxrd@z Thu Jan 1 00:00:00 1970 From: Elena Ufimtseva Subject: [PATCH v6 01/10] xen: vnuma topology and subop hypercalls Date: Fri, 18 Jul 2014 01:50:00 -0400 Message-ID: <1405662609-31486-2-git-send-email-ufimtseva@gmail.com> References: <1405662609-31486-1-git-send-email-ufimtseva@gmail.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1405662609-31486-1-git-send-email-ufimtseva@gmail.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen-devel@lists.xen.org Cc: keir@xen.org, Ian.Campbell@citrix.com, stefano.stabellini@eu.citrix.com, george.dunlap@eu.citrix.com, msw@linux.com, dario.faggioli@citrix.com, lccycc123@gmail.com, ian.jackson@eu.citrix.com, JBeulich@suse.com, Elena Ufimtseva List-Id: xen-devel@lists.xenproject.org Define interface, structures and hypercalls for toolstack to build vnuma topology and for guests that wish to retrieve it. Two subop hypercalls introduced by patch: XEN_DOMCTL_setvnumainfo to define vNUMA domain topology per domain and XENMEM_get_vnumainfo to retrieve that topology by guest. Signed-off-by: Elena Ufimtseva --- xen/common/domain.c | 13 ++++ xen/common/domctl.c | 167 +++++++++++++++++++++++++++++++++++++++++++ xen/common/memory.c | 62 ++++++++++++++++ xen/include/public/domctl.h | 29 ++++++++ xen/include/public/memory.h | 47 +++++++++++- xen/include/xen/domain.h | 11 +++ xen/include/xen/sched.h | 1 + 7 files changed, 329 insertions(+), 1 deletion(-) diff --git a/xen/common/domain.c b/xen/common/domain.c index cd64aea..895584a 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -584,6 +584,18 @@ int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d) return 0; } +void vnuma_destroy(struct vnuma_info *vnuma) +{ + if ( vnuma ) + { + xfree(vnuma->vmemrange); + xfree(vnuma->vcpu_to_vnode); + xfree(vnuma->vdistance); + xfree(vnuma->vnode_to_pnode); + xfree(vnuma); + } +} + int domain_kill(struct domain *d) { int rc = 0; @@ -602,6 +614,7 @@ int domain_kill(struct domain *d) evtchn_destroy(d); gnttab_release_mappings(d); tmem_destroy(d->tmem_client); + vnuma_destroy(d->vnuma); domain_set_outstanding_pages(d, 0); d->tmem_client = NULL; /* fallthrough */ diff --git a/xen/common/domctl.c b/xen/common/domctl.c index c326aba..7464284 100644 --- a/xen/common/domctl.c +++ b/xen/common/domctl.c @@ -297,6 +297,144 @@ int vcpuaffinity_params_invalid(const xen_domctl_vcpuaffinity_t *vcpuaff) guest_handle_is_null(vcpuaff->cpumap_soft.bitmap)); } +/* + * Allocates memory for vNUMA, **vnuma should be NULL. + * Caller has to make sure that domain has max_pages + * and number of vcpus set for domain. + * Verifies that single allocation does not exceed + * PAGE_SIZE. + */ +static int vnuma_alloc(struct vnuma_info **vnuma, + unsigned int nr_vnodes, + unsigned int nr_vcpus, + unsigned int dist_size) +{ + struct vnuma_info *v; + + if ( vnuma && *vnuma ) + return -EINVAL; + + v = *vnuma; + /* + * check if any of xmallocs exeeds PAGE_SIZE. + * If yes, consider it as an error for now. + */ + if ( nr_vnodes > PAGE_SIZE / sizeof(nr_vnodes) || + nr_vcpus > PAGE_SIZE / sizeof(nr_vcpus) || + nr_vnodes > PAGE_SIZE / sizeof(struct vmemrange) || + dist_size > PAGE_SIZE / sizeof(dist_size) ) + return -EINVAL; + + v = xzalloc(struct vnuma_info); + if ( !v ) + return -ENOMEM; + + v->vdistance = xmalloc_array(unsigned int, dist_size); + v->vmemrange = xmalloc_array(vmemrange_t, nr_vnodes); + v->vcpu_to_vnode = xmalloc_array(unsigned int, nr_vcpus); + v->vnode_to_pnode = xmalloc_array(unsigned int, nr_vnodes); + + if ( v->vdistance == NULL || v->vmemrange == NULL || + v->vcpu_to_vnode == NULL || v->vnode_to_pnode == NULL ) + { + vnuma_destroy(v); + return -ENOMEM; + } + + *vnuma = v; + + return 0; +} + +/* + * Allocate memory and construct one vNUMA node, + * set default parameters, assign all memory and + * vcpus to this node, set distance to 10. + */ +static long vnuma_fallback(const struct domain *d, + struct vnuma_info **vnuma) +{ + struct vnuma_info *v; + long ret; + + + /* Will not destroy vNUMA here, destroy before calling this. */ + if ( vnuma && *vnuma ) + return -EINVAL; + + v = *vnuma; + ret = vnuma_alloc(&v, 1, d->max_vcpus, 1); + if ( ret ) + return ret; + + v->vmemrange[0].start = 0; + v->vmemrange[0].end = d->max_pages << PAGE_SHIFT; + v->vdistance[0] = 10; + v->vnode_to_pnode[0] = NUMA_NO_NODE; + memset(v->vcpu_to_vnode, 0, d->max_vcpus); + v->nr_vnodes = 1; + + *vnuma = v; + + return 0; +} + +/* + * construct vNUMA topology form u_vnuma struct and return + * it in dst. + */ +long vnuma_init(const struct xen_domctl_vnuma *u_vnuma, + const struct domain *d, + struct vnuma_info **dst) +{ + unsigned int dist_size, nr_vnodes = 0; + long ret; + struct vnuma_info *v = NULL; + + ret = -EINVAL; + + /* If vNUMA topology already set, just exit. */ + if ( !u_vnuma || *dst ) + return ret; + + nr_vnodes = u_vnuma->nr_vnodes; + + if ( nr_vnodes == 0 ) + return ret; + + if ( nr_vnodes > (UINT_MAX / nr_vnodes) ) + return ret; + + dist_size = nr_vnodes * nr_vnodes; + + ret = vnuma_alloc(&v, nr_vnodes, d->max_vcpus, dist_size); + if ( ret ) + return ret; + + /* On failure, set only one vNUMA node and its success. */ + ret = 0; + + if ( copy_from_guest(v->vdistance, u_vnuma->vdistance, dist_size) ) + goto vnuma_onenode; + if ( copy_from_guest(v->vmemrange, u_vnuma->vmemrange, nr_vnodes) ) + goto vnuma_onenode; + if ( copy_from_guest(v->vcpu_to_vnode, u_vnuma->vcpu_to_vnode, + d->max_vcpus) ) + goto vnuma_onenode; + if ( copy_from_guest(v->vnode_to_pnode, u_vnuma->vnode_to_pnode, + nr_vnodes) ) + goto vnuma_onenode; + + v->nr_vnodes = nr_vnodes; + *dst = v; + + return ret; + +vnuma_onenode: + vnuma_destroy(v); + return vnuma_fallback(d, dst); +} + long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) { long ret = 0; @@ -967,6 +1105,35 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) } break; + case XEN_DOMCTL_setvnumainfo: + { + struct vnuma_info *v = NULL; + + ret = -EFAULT; + if ( guest_handle_is_null(op->u.vnuma.vdistance) || + guest_handle_is_null(op->u.vnuma.vmemrange) || + guest_handle_is_null(op->u.vnuma.vcpu_to_vnode) || + guest_handle_is_null(op->u.vnuma.vnode_to_pnode) ) + return ret; + + ret = -EINVAL; + + ret = vnuma_init(&op->u.vnuma, d, &v); + if ( ret < 0 || v == NULL ) + break; + + /* overwrite vnuma for domain */ + if ( !d->vnuma ) + vnuma_destroy(d->vnuma); + + domain_lock(d); + d->vnuma = v; + domain_unlock(d); + + ret = 0; + } + break; + default: ret = arch_do_domctl(op, d, u_domctl); break; diff --git a/xen/common/memory.c b/xen/common/memory.c index c2dd31b..925b9fc 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -969,6 +969,68 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) break; + case XENMEM_get_vnumainfo: + { + struct vnuma_topology_info topology; + struct domain *d; + unsigned int dom_vnodes = 0; + + /* + * guest passes nr_vnodes and nr_vcpus thus + * we know how much memory guest has allocated. + */ + if ( copy_from_guest(&topology, arg, 1) || + guest_handle_is_null(topology.vmemrange.h) || + guest_handle_is_null(topology.vdistance.h) || + guest_handle_is_null(topology.vcpu_to_vnode.h) ) + return -EFAULT; + + if ( (d = rcu_lock_domain_by_any_id(topology.domid)) == NULL ) + return -ESRCH; + + rc = -EOPNOTSUPP; + if ( d->vnuma == NULL ) + goto vnumainfo_out; + + if ( d->vnuma->nr_vnodes == 0 ) + goto vnumainfo_out; + + dom_vnodes = d->vnuma->nr_vnodes; + + /* + * guest nr_cpus and nr_nodes may differ from domain vnuma config. + * Check here guest nr_nodes and nr_cpus to make sure we dont overflow. + */ + rc = -ENOBUFS; + if ( topology.nr_vnodes < dom_vnodes || + topology.nr_vcpus < d->max_vcpus ) + goto vnumainfo_out; + + rc = -EFAULT; + + if ( copy_to_guest(topology.vmemrange.h, d->vnuma->vmemrange, + dom_vnodes) != 0 ) + goto vnumainfo_out; + + if ( copy_to_guest(topology.vdistance.h, d->vnuma->vdistance, + dom_vnodes * dom_vnodes) != 0 ) + goto vnumainfo_out; + + if ( copy_to_guest(topology.vcpu_to_vnode.h, d->vnuma->vcpu_to_vnode, + d->max_vcpus) != 0 ) + goto vnumainfo_out; + + topology.nr_vnodes = dom_vnodes; + + if ( copy_to_guest(arg, &topology, 1) != 0 ) + goto vnumainfo_out; + rc = 0; + + vnumainfo_out: + rcu_unlock_domain(d); + break; + } + default: rc = arch_memory_op(cmd, arg); break; diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h index 5b11bbf..5ee74f4 100644 --- a/xen/include/public/domctl.h +++ b/xen/include/public/domctl.h @@ -35,6 +35,7 @@ #include "xen.h" #include "grant_table.h" #include "hvm/save.h" +#include "memory.h" #define XEN_DOMCTL_INTERFACE_VERSION 0x0000000a @@ -934,6 +935,32 @@ struct xen_domctl_vcpu_msrs { }; typedef struct xen_domctl_vcpu_msrs xen_domctl_vcpu_msrs_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpu_msrs_t); + +/* + * Use in XEN_DOMCTL_setvnumainfo to set + * vNUMA domain topology. + */ +struct xen_domctl_vnuma { + uint32_t nr_vnodes; + uint32_t _pad; + XEN_GUEST_HANDLE_64(uint) vdistance; + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode; + + /* + * vnodes to physical NUMA nodes mask. + * This kept on per-domain basis for + * interested consumers, such as numa aware ballooning. + */ + XEN_GUEST_HANDLE_64(uint) vnode_to_pnode; + + /* + * memory rages for each vNUMA node + */ + XEN_GUEST_HANDLE_64(vmemrange_t) vmemrange; +}; +typedef struct xen_domctl_vnuma xen_domctl_vnuma_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t); + #endif struct xen_domctl { @@ -1008,6 +1035,7 @@ struct xen_domctl { #define XEN_DOMCTL_cacheflush 71 #define XEN_DOMCTL_get_vcpu_msrs 72 #define XEN_DOMCTL_set_vcpu_msrs 73 +#define XEN_DOMCTL_setvnumainfo 74 #define XEN_DOMCTL_gdbsx_guestmemio 1000 #define XEN_DOMCTL_gdbsx_pausevcpu 1001 #define XEN_DOMCTL_gdbsx_unpausevcpu 1002 @@ -1068,6 +1096,7 @@ struct xen_domctl { struct xen_domctl_cacheflush cacheflush; struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu; struct xen_domctl_gdbsx_domstatus gdbsx_domstatus; + struct xen_domctl_vnuma vnuma; uint8_t pad[128]; } u; }; diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h index 2c57aa0..2c212e1 100644 --- a/xen/include/public/memory.h +++ b/xen/include/public/memory.h @@ -521,9 +521,54 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t); * The zero value is appropiate. */ +/* vNUMA node memory range */ +struct vmemrange { + uint64_t start, end; +}; + +typedef struct vmemrange vmemrange_t; +DEFINE_XEN_GUEST_HANDLE(vmemrange_t); + +/* + * vNUMA topology specifies vNUMA node number, distance table, + * memory ranges and vcpu mapping provided for guests. + * XENMEM_get_vnumainfo hypercall expects to see from guest + * nr_vnodes and nr_vcpus to indicate available memory. After + * filling guests structures, nr_vnodes and nr_vcpus copied + * back to guest. + */ +struct vnuma_topology_info { + /* IN */ + domid_t domid; + /* IN/OUT */ + unsigned int nr_vnodes; + unsigned int nr_vcpus; + /* OUT */ + union { + XEN_GUEST_HANDLE(uint) h; + uint64_t pad; + } vdistance; + union { + XEN_GUEST_HANDLE(uint) h; + uint64_t pad; + } vcpu_to_vnode; + union { + XEN_GUEST_HANDLE(vmemrange_t) h; + uint64_t pad; + } vmemrange; +}; +typedef struct vnuma_topology_info vnuma_topology_info_t; +DEFINE_XEN_GUEST_HANDLE(vnuma_topology_info_t); + +/* + * XENMEM_get_vnumainfo used by guest to get + * vNUMA topology from hypervisor. + */ +#define XENMEM_get_vnumainfo 26 + #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ -/* Next available subop number is 26 */ +/* Next available subop number is 27 */ #endif /* __XEN_PUBLIC_MEMORY_H__ */ diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h index bb1c398..d29a84d 100644 --- a/xen/include/xen/domain.h +++ b/xen/include/xen/domain.h @@ -89,4 +89,15 @@ extern unsigned int xen_processor_pmbits; extern bool_t opt_dom0_vcpus_pin; +/* vnuma topology per domain. */ +struct vnuma_info { + unsigned int nr_vnodes; + unsigned int *vdistance; + unsigned int *vcpu_to_vnode; + unsigned int *vnode_to_pnode; + struct vmemrange *vmemrange; +}; + +void vnuma_destroy(struct vnuma_info *vnuma); + #endif /* __XEN_DOMAIN_H__ */ diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index d5bc461..71e4218 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -447,6 +447,7 @@ struct domain nodemask_t node_affinity; unsigned int last_alloc_node; spinlock_t node_affinity_lock; + struct vnuma_info *vnuma; }; struct domain_setup_info -- 1.7.10.4