* [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
@ 2013-09-13 8:49 Elena Ufimtseva
2013-09-13 9:31 ` Andrew Cooper
` (2 more replies)
0 siblings, 3 replies; 14+ messages in thread
From: Elena Ufimtseva @ 2013-09-13 8:49 UTC (permalink / raw)
To: xen-devel
Cc: keir, Elena Ufimtseva, stefano.stabellini, george.dunlap, msw,
dario.faggioli, lccycc123, JBeulich
Defines XENMEM subop hypercall for PV vNUMA
enabled guests and provides vNUMA topology
information from per-domain vnuma topology build info.
Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
---
Changes since v1:
* changed type from int to uint and unsigned in vNUMA structures;
* removed unecessary file vnuma.h as requested in review
* added domain_vnuma_destroy;
* fixed usage of rcu_lock_domain_by_any_id;
* removed unecessary guest_handle_cast calls;
* coding style fixes;
---
xen/common/domain.c | 25 +++++++++++++++-
xen/common/domctl.c | 68 ++++++++++++++++++++++++++++++++++++++++++-
xen/common/memory.c | 56 +++++++++++++++++++++++++++++++++++
xen/include/public/domctl.h | 15 +++++++++-
xen/include/public/memory.h | 9 +++++-
xen/include/xen/domain.h | 11 +++++++
xen/include/xen/sched.h | 1 +
xen/include/xen/vnuma.h | 27 +++++++++++++++++
8 files changed, 208 insertions(+), 4 deletions(-)
create mode 100644 xen/include/xen/vnuma.h
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 9390a22..bb414cf 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -227,6 +227,7 @@ struct domain *domain_create(
spin_lock_init(&d->node_affinity_lock);
d->node_affinity = NODE_MASK_ALL;
d->auto_node_affinity = 1;
+ d->vnuma.nr_vnodes = 0;
spin_lock_init(&d->shutdown_lock);
d->shutdown_code = -1;
@@ -530,8 +531,9 @@ int domain_kill(struct domain *d)
evtchn_destroy(d);
gnttab_release_mappings(d);
tmem_destroy(d->tmem);
- domain_set_outstanding_pages(d, 0);
d->tmem = NULL;
+ domain_set_outstanding_pages(d, 0);
+ domain_vnuma_destroy(&d->vnuma);
/* fallthrough */
case DOMDYING_dying:
rc = domain_relinquish_resources(d);
@@ -1279,6 +1281,27 @@ int continue_hypercall_on_cpu(
return 0;
}
+void domain_vnuma_destroy(struct domain_vnuma_info *v)
+{
+ if (v->vnuma_memblks) {
+ xfree(v->vnuma_memblks);
+ v->vnuma_memblks = NULL;
+ }
+ if (v->vcpu_to_vnode) {
+ xfree(v->vcpu_to_vnode);
+ v->vcpu_to_vnode = NULL;
+ }
+ if (v->vdistance) {
+ xfree(v->vdistance);
+ v->vdistance = NULL;
+ }
+ if (v->vnode_to_pnode) {
+ xfree(v->vnode_to_pnode);
+ v->vnode_to_pnode = NULL;
+ }
+ v->nr_vnodes = 0;
+}
+
/*
* Local variables:
* mode: C
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index 9760d50..e5d05c7 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -29,6 +29,7 @@
#include <asm/page.h>
#include <public/domctl.h>
#include <xsm/xsm.h>
+#include <xen/vnuma.h>
static DEFINE_SPINLOCK(domctl_lock);
DEFINE_SPINLOCK(vcpu_alloc_lock);
@@ -862,7 +863,72 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
ret = set_global_virq_handler(d, virq);
}
break;
-
+ case XEN_DOMCTL_setvnumainfo:
+ {
+ unsigned int i, j, nr_vnodes, dist_size;
+ unsigned int dist, vmap, vntop;
+ vnuma_memblk_t vmemblk;
+
+ ret = -EINVAL;
+ dist = i = j = 0;
+ nr_vnodes = op->u.vnuma.nr_vnodes;
+ if (nr_vnodes <= 0 || nr_vnodes > NR_CPUS)
+ break;
+ d->vnuma.nr_vnodes = nr_vnodes;
+ dist_size = nr_vnodes * nr_vnodes;
+ if (
+ (d->vnuma.vdistance = xmalloc_bytes(
+ sizeof(*d->vnuma.vdistance) * dist_size)) == NULL ||
+ (d->vnuma.vnuma_memblks = xmalloc_bytes(
+ sizeof(*d->vnuma.vnuma_memblks) * nr_vnodes)) == NULL ||
+ (d->vnuma.vcpu_to_vnode = xmalloc_bytes(
+ sizeof(*d->vnuma.vcpu_to_vnode) * d->max_vcpus)) == NULL ||
+ (d->vnuma.vnode_to_pnode = xmalloc_bytes(
+ sizeof(*d->vnuma.vnode_to_pnode) * nr_vnodes)) == NULL )
+ goto err_dom;
+ for ( i = 0; i < nr_vnodes; i++ )
+ for ( j = 0; j < nr_vnodes; j++ )
+ {
+ if ( unlikely(__copy_from_guest_offset(&dist,
+ op->u.vnuma.vdistance,
+ __vnode_distance_offset(d, i, j), 1)) )
+ goto err_dom;
+ __vnode_distance_set(d, i, j, dist);
+ }
+ for ( i = 0; i < nr_vnodes; i++ )
+ {
+ if ( unlikely(__copy_from_guest_offset(&vmemblk,
+ op->u.vnuma.vnuma_memblks, i, 1)) )
+ goto err_dom;
+ d->vnuma.vnuma_memblks[i].start = vmemblk.start;
+ d->vnuma.vnuma_memblks[i].end = vmemblk.end;
+ }
+ for ( i = 0; i < d->max_vcpus; i++ )
+ {
+ if ( unlikely(__copy_from_guest_offset(&vmap,
+ op->u.vnuma.vcpu_to_vnode, i, 1)) )
+ goto err_dom;
+ d->vnuma.vcpu_to_vnode[i] = vmap;
+ }
+ if ( !guest_handle_is_null(op->u.vnuma.vnode_to_pnode) )
+ {
+ for ( i = 0; i < nr_vnodes; i++ )
+ {
+ if ( unlikely(__copy_from_guest_offset(&vntop,
+ op->u.vnuma.vnode_to_pnode, i, 1)) )
+ goto err_dom;
+ d->vnuma.vnode_to_pnode[i] = vntop;
+ }
+ }
+ else
+ for(i = 0; i < nr_vnodes; i++)
+ d->vnuma.vnode_to_pnode[i] = NUMA_NO_NODE;
+ ret = 0;
+ break;
+err_dom:
+ ret = -EINVAL;
+ }
+ break;
default:
ret = arch_do_domctl(op, d, u_domctl);
break;
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 50b740f..6dc2452 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -28,6 +28,7 @@
#include <public/memory.h>
#include <xsm/xsm.h>
#include <xen/trace.h>
+#include <xen/vnuma.h>
struct memop_args {
/* INPUT */
@@ -732,7 +733,62 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
rcu_unlock_domain(d);
break;
+ case XENMEM_get_vnuma_info:
+ {
+ int i, j;
+ struct vnuma_topology_info mtopology;
+ struct vnuma_topology_info touser_topo;
+ struct domain *d;
+ unsigned int max_pages, max_vcpus, nr_vnodes;
+ vnuma_memblk_t *vblks;
+ rc = -EINVAL;
+ if ( guest_handle_is_null(arg) )
+ return rc;
+ if( copy_from_guest(&mtopology, arg, 1) )
+ return -EINVAL;
+ if ( (d = rcu_lock_domain_by_any_id(mtopology.domid)) == NULL )
+ return -EINVAL;
+ touser_topo.nr_vnodes = d->vnuma.nr_vnodes;
+ max_pages = d->max_pages;
+ max_vcpus = d->max_vcpus;
+ rcu_unlock_domain(d);
+
+ nr_vnodes = touser_topo.nr_vnodes;
+ rc = copy_to_guest(arg, &touser_topo, 1);
+ if ( rc )
+ return -EFAULT;
+ if ( nr_vnodes == 0 || nr_vnodes > max_vcpus )
+ return -EFAULT;
+ vblks = xmalloc_array(struct vnuma_memblk, nr_vnodes);
+ if ( vblks == NULL )
+ return -EFAULT;
+ for ( i = 0; i < nr_vnodes; i++ )
+ {
+ if ( copy_to_guest_offset(mtopology.vnuma_memblks, i,
+ &d->vnuma.vnuma_memblks[i], 1) < 0 )
+ goto out;
+ }
+ for ( i = 0; i < touser_topo.nr_vnodes; i++ )
+ for ( j = 0; j < touser_topo.nr_vnodes; j++ )
+ {
+ if ( copy_to_guest_offset(mtopology.vdistance,
+ __vnode_distance_offset(d, i, j),
+ &__vnode_distance(d, i, j), 1) )
+ goto out;
+ }
+ for ( i = 0; i < d->max_vcpus ; i++ )
+ {
+ if ( copy_to_guest_offset(mtopology.vcpu_to_vnode, i,
+ &d->vnuma.vcpu_to_vnode[i], 1) )
+ goto out;
+ }
+ return rc;
+out:
+ if ( vblks ) xfree(vblks);
+ return rc;
+ break;
+ }
default:
rc = arch_memory_op(op, arg);
break;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 4c5b2bb..3574d0a 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -35,6 +35,7 @@
#include "xen.h"
#include "grant_table.h"
#include "hvm/save.h"
+#include "memory.h"
#define XEN_DOMCTL_INTERFACE_VERSION 0x00000009
@@ -852,6 +853,17 @@ struct xen_domctl_set_broken_page_p2m {
typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t);
+struct xen_domctl_vnuma {
+ uint16_t nr_vnodes;
+ XEN_GUEST_HANDLE_64(uint) vdistance;
+ XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
+ XEN_GUEST_HANDLE_64(uint) vnode_to_pnode;
+ XEN_GUEST_HANDLE_64(vnuma_memblk_t) vnuma_memblks;
+};
+
+typedef struct xen_domctl_vnuma xen_domctl_vnuma_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t);
+
struct xen_domctl {
uint32_t cmd;
#define XEN_DOMCTL_createdomain 1
@@ -920,6 +932,7 @@ struct xen_domctl {
#define XEN_DOMCTL_set_broken_page_p2m 67
#define XEN_DOMCTL_setnodeaffinity 68
#define XEN_DOMCTL_getnodeaffinity 69
+#define XEN_DOMCTL_setvnumainfo 70
#define XEN_DOMCTL_gdbsx_guestmemio 1000
#define XEN_DOMCTL_gdbsx_pausevcpu 1001
#define XEN_DOMCTL_gdbsx_unpausevcpu 1002
@@ -979,6 +992,7 @@ struct xen_domctl {
struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
struct xen_domctl_gdbsx_domstatus gdbsx_domstatus;
+ struct xen_domctl_vnuma vnuma;
uint8_t pad[128];
} u;
};
@@ -986,7 +1000,6 @@ typedef struct xen_domctl xen_domctl_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_t);
#endif /* __XEN_PUBLIC_DOMCTL_H__ */
-
/*
* Local variables:
* mode: C
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 7a26dee..28f6aaf 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -453,12 +453,19 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
* Caller must be privileged or the hypercall fails.
*/
#define XENMEM_claim_pages 24
-
/*
* XENMEM_claim_pages flags - the are no flags at this time.
* The zero value is appropiate.
*/
+struct vnuma_memblk {
+ uint64_t start, end;
+};
+typedef struct vnuma_memblk vnuma_memblk_t;
+DEFINE_XEN_GUEST_HANDLE(vnuma_memblk_t);
+
+#define XENMEM_get_vnuma_info 25
+
#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
#endif /* __XEN_PUBLIC_MEMORY_H__ */
diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
index a057069..c9d53e3 100644
--- a/xen/include/xen/domain.h
+++ b/xen/include/xen/domain.h
@@ -4,6 +4,7 @@
#include <public/xen.h>
#include <asm/domain.h>
+#include <public/memory.h>
typedef union {
struct vcpu_guest_context *nat;
@@ -89,4 +90,14 @@ extern unsigned int xen_processor_pmbits;
extern bool_t opt_dom0_vcpus_pin;
+struct domain_vnuma_info {
+ uint16_t nr_vnodes;
+ uint *vdistance;
+ uint *vcpu_to_vnode;
+ uint *vnode_to_pnode;
+ vnuma_memblk_t *vnuma_memblks;
+};
+
+void domain_vnuma_destroy(struct domain_vnuma_info *v);
+
#endif /* __XEN_DOMAIN_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index ae6a3b8..cb023cf 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -377,6 +377,7 @@ struct domain
nodemask_t node_affinity;
unsigned int last_alloc_node;
spinlock_t node_affinity_lock;
+ struct domain_vnuma_info vnuma;
};
struct domain_setup_info
diff --git a/xen/include/xen/vnuma.h b/xen/include/xen/vnuma.h
new file mode 100644
index 0000000..0b41da0
--- /dev/null
+++ b/xen/include/xen/vnuma.h
@@ -0,0 +1,27 @@
+#ifndef _VNUMA_H
+#define _VNUMA_H
+#include <public/memory.h>
+
+/* DEFINE_XEN_GUEST_HANDLE(vnuma_memblk_t); */
+
+struct vnuma_topology_info {
+ domid_t domid;
+ uint16_t nr_vnodes;
+ uint32_t _pad;
+ XEN_GUEST_HANDLE_64(uint) vdistance;
+ XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
+ XEN_GUEST_HANDLE_64(vnuma_memblk_t) vnuma_memblks;
+};
+typedef struct vnuma_topology_info vnuma_topology_info_t;
+DEFINE_XEN_GUEST_HANDLE(vnuma_topology_info_t);
+
+#define __vnode_distance_offset(_dom, _i, _j) \
+ ( ((_j) * ((_dom)->vnuma.nr_vnodes)) + (_i) )
+
+#define __vnode_distance(_dom, _i, _j) \
+ ( (_dom)->vnuma.vdistance[__vnode_distance_offset((_dom), (_i), (_j))] )
+
+#define __vnode_distance_set(_dom, _i, _j, _v) \
+ do { __vnode_distance((_dom), (_i), (_j)) = (_v); } while(0)
+
+#endif
--
1.7.10.4
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
2013-09-13 8:49 [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests Elena Ufimtseva
@ 2013-09-13 9:31 ` Andrew Cooper
2013-09-13 11:53 ` Jan Beulich
2013-09-13 11:00 ` Jan Beulich
2013-09-16 15:46 ` George Dunlap
2 siblings, 1 reply; 14+ messages in thread
From: Andrew Cooper @ 2013-09-13 9:31 UTC (permalink / raw)
To: Elena Ufimtseva
Cc: keir, stefano.stabellini, george.dunlap, msw, dario.faggioli,
lccycc123, xen-devel, JBeulich
On 13/09/2013 09:49, Elena Ufimtseva wrote:
> Defines XENMEM subop hypercall for PV vNUMA
> enabled guests and provides vNUMA topology
> information from per-domain vnuma topology build info.
>
> Signed-off-by: Elena Ufimtseva <ufimtseva@gmail.com>
>
> ---
> Changes since v1:
> * changed type from int to uint and unsigned in vNUMA structures;
> * removed unecessary file vnuma.h as requested in review
> * added domain_vnuma_destroy;
> * fixed usage of rcu_lock_domain_by_any_id;
> * removed unecessary guest_handle_cast calls;
> * coding style fixes;
> ---
> xen/common/domain.c | 25 +++++++++++++++-
> xen/common/domctl.c | 68 ++++++++++++++++++++++++++++++++++++++++++-
> xen/common/memory.c | 56 +++++++++++++++++++++++++++++++++++
> xen/include/public/domctl.h | 15 +++++++++-
> xen/include/public/memory.h | 9 +++++-
> xen/include/xen/domain.h | 11 +++++++
> xen/include/xen/sched.h | 1 +
> xen/include/xen/vnuma.h | 27 +++++++++++++++++
> 8 files changed, 208 insertions(+), 4 deletions(-)
> create mode 100644 xen/include/xen/vnuma.h
>
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 9390a22..bb414cf 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -227,6 +227,7 @@ struct domain *domain_create(
> spin_lock_init(&d->node_affinity_lock);
> d->node_affinity = NODE_MASK_ALL;
> d->auto_node_affinity = 1;
> + d->vnuma.nr_vnodes = 0;
alloc_domain_struct() clears d, so this assignment of 0 is not needed.
>
> spin_lock_init(&d->shutdown_lock);
> d->shutdown_code = -1;
> @@ -530,8 +531,9 @@ int domain_kill(struct domain *d)
> evtchn_destroy(d);
> gnttab_release_mappings(d);
> tmem_destroy(d->tmem);
> - domain_set_outstanding_pages(d, 0);
Any particular reason for this function call to move? I cant see any
reason.
> d->tmem = NULL;
> + domain_set_outstanding_pages(d, 0);
> + domain_vnuma_destroy(&d->vnuma);
> /* fallthrough */
> case DOMDYING_dying:
> rc = domain_relinquish_resources(d);
> @@ -1279,6 +1281,27 @@ int continue_hypercall_on_cpu(
> return 0;
> }
>
> +void domain_vnuma_destroy(struct domain_vnuma_info *v)
> +{
> + if (v->vnuma_memblks) {
> + xfree(v->vnuma_memblks);
> + v->vnuma_memblks = NULL;
> + }
Coding style (see CODING_STYLE in the root of the tree) so this would
become:
if ( v->vnuma_memblks )
{
xfree(v->vnuma_memblks);
v->vnuma_memblks = NULL;
}
However, xfree() (like regular free()) is happy with NULL pointers, so
you can drop the if altogether.
> + if (v->vcpu_to_vnode) {
> + xfree(v->vcpu_to_vnode);
> + v->vcpu_to_vnode = NULL;
> + }
> + if (v->vdistance) {
> + xfree(v->vdistance);
> + v->vdistance = NULL;
> + }
> + if (v->vnode_to_pnode) {
> + xfree(v->vnode_to_pnode);
> + v->vnode_to_pnode = NULL;
> + }
> + v->nr_vnodes = 0;
> +}
> +
> /*
> * Local variables:
> * mode: C
> diff --git a/xen/common/domctl.c b/xen/common/domctl.c
> index 9760d50..e5d05c7 100644
> --- a/xen/common/domctl.c
> +++ b/xen/common/domctl.c
> @@ -29,6 +29,7 @@
> #include <asm/page.h>
> #include <public/domctl.h>
> #include <xsm/xsm.h>
> +#include <xen/vnuma.h>
>
> static DEFINE_SPINLOCK(domctl_lock);
> DEFINE_SPINLOCK(vcpu_alloc_lock);
> @@ -862,7 +863,72 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
> ret = set_global_virq_handler(d, virq);
> }
> break;
> -
Please leave this blank line in here.
> + case XEN_DOMCTL_setvnumainfo:
> + {
> + unsigned int i, j, nr_vnodes, dist_size;
> + unsigned int dist, vmap, vntop;
> + vnuma_memblk_t vmemblk;
> +
> + ret = -EINVAL;
> + dist = i = j = 0;
> + nr_vnodes = op->u.vnuma.nr_vnodes;
> + if (nr_vnodes <= 0 || nr_vnodes > NR_CPUS)
> + break;
nr_vnodes is unsigned. It cannot possibly be less than 0.
> + d->vnuma.nr_vnodes = nr_vnodes;
> + dist_size = nr_vnodes * nr_vnodes;
> + if (
> + (d->vnuma.vdistance = xmalloc_bytes(
> + sizeof(*d->vnuma.vdistance) * dist_size)) == NULL ||
sizeof in an operator not a function. As you are not passing a type,
the brackets are not required.
> + (d->vnuma.vnuma_memblks = xmalloc_bytes(
> + sizeof(*d->vnuma.vnuma_memblks) * nr_vnodes)) == NULL ||
> + (d->vnuma.vcpu_to_vnode = xmalloc_bytes(
> + sizeof(*d->vnuma.vcpu_to_vnode) * d->max_vcpus)) == NULL ||
> + (d->vnuma.vnode_to_pnode = xmalloc_bytes(
> + sizeof(*d->vnuma.vnode_to_pnode) * nr_vnodes)) == NULL )
> + goto err_dom;
> + for ( i = 0; i < nr_vnodes; i++ )
> + for ( j = 0; j < nr_vnodes; j++ )
> + {
> + if ( unlikely(__copy_from_guest_offset(&dist,
> + op->u.vnuma.vdistance,
> + __vnode_distance_offset(d, i, j), 1)) )
> + goto err_dom;
This error logic is broken. A failure in __copy_from_guest_offset()
should result in -EFAULT, but will result in -EINVAL after following
err_dom;
> + __vnode_distance_set(d, i, j, dist);
> + }
> + for ( i = 0; i < nr_vnodes; i++ )
> + {
> + if ( unlikely(__copy_from_guest_offset(&vmemblk,
> + op->u.vnuma.vnuma_memblks, i, 1)) )
> + goto err_dom;
> + d->vnuma.vnuma_memblks[i].start = vmemblk.start;
> + d->vnuma.vnuma_memblks[i].end = vmemblk.end;
> + }
> + for ( i = 0; i < d->max_vcpus; i++ )
> + {
> + if ( unlikely(__copy_from_guest_offset(&vmap,
> + op->u.vnuma.vcpu_to_vnode, i, 1)) )
> + goto err_dom;
> + d->vnuma.vcpu_to_vnode[i] = vmap;
> + }
> + if ( !guest_handle_is_null(op->u.vnuma.vnode_to_pnode) )
> + {
> + for ( i = 0; i < nr_vnodes; i++ )
> + {
> + if ( unlikely(__copy_from_guest_offset(&vntop,
> + op->u.vnuma.vnode_to_pnode, i, 1)) )
> + goto err_dom;
> + d->vnuma.vnode_to_pnode[i] = vntop;
> + }
> + }
> + else
> + for(i = 0; i < nr_vnodes; i++)
> + d->vnuma.vnode_to_pnode[i] = NUMA_NO_NODE;
> + ret = 0;
> + break;
> +err_dom:
> + ret = -EINVAL;
> + }
> + break;
And an extra line in here please.
> default:
> ret = arch_do_domctl(op, d, u_domctl);
> break;
> diff --git a/xen/common/memory.c b/xen/common/memory.c
> index 50b740f..6dc2452 100644
> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -28,6 +28,7 @@
> #include <public/memory.h>
> #include <xsm/xsm.h>
> #include <xen/trace.h>
> +#include <xen/vnuma.h>
>
> struct memop_args {
> /* INPUT */
> @@ -732,7 +733,62 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
> rcu_unlock_domain(d);
>
> break;
> + case XENMEM_get_vnuma_info:
> + {
> + int i, j;
> + struct vnuma_topology_info mtopology;
> + struct vnuma_topology_info touser_topo;
> + struct domain *d;
> + unsigned int max_pages, max_vcpus, nr_vnodes;
> + vnuma_memblk_t *vblks;
>
> + rc = -EINVAL;
> + if ( guest_handle_is_null(arg) )
> + return rc;
> + if( copy_from_guest(&mtopology, arg, 1) )
> + return -EINVAL;
-EFAULT;
> + if ( (d = rcu_lock_domain_by_any_id(mtopology.domid)) == NULL )
> + return -EINVAL;
-ESRCH ( I think? )
> + touser_topo.nr_vnodes = d->vnuma.nr_vnodes;
> + max_pages = d->max_pages;
> + max_vcpus = d->max_vcpus;
> + rcu_unlock_domain(d);
> +
> + nr_vnodes = touser_topo.nr_vnodes;
> + rc = copy_to_guest(arg, &touser_topo, 1);
> + if ( rc )
> + return -EFAULT;
> + if ( nr_vnodes == 0 || nr_vnodes > max_vcpus )
> + return -EFAULT;
-EINVAL;
> + vblks = xmalloc_array(struct vnuma_memblk, nr_vnodes);
> + if ( vblks == NULL )
> + return -EFAULT;
-ENOMEM;
Now, if you have an unconditional rc = -EFAULT then all your goto out
logic will be correct
> + for ( i = 0; i < nr_vnodes; i++ )
> + {
> + if ( copy_to_guest_offset(mtopology.vnuma_memblks, i,
> + &d->vnuma.vnuma_memblks[i], 1) < 0 )
> + goto out;
> + }
> + for ( i = 0; i < touser_topo.nr_vnodes; i++ )
> + for ( j = 0; j < touser_topo.nr_vnodes; j++ )
> + {
> + if ( copy_to_guest_offset(mtopology.vdistance,
> + __vnode_distance_offset(d, i, j),
> + &__vnode_distance(d, i, j), 1) )
> + goto out;
> + }
> + for ( i = 0; i < d->max_vcpus ; i++ )
> + {
> + if ( copy_to_guest_offset(mtopology.vcpu_to_vnode, i,
> + &d->vnuma.vcpu_to_vnode[i], 1) )
> + goto out;
> + }
> + return rc;
But this would need to turn into rc = 0; to avoid return -EFAULT in the
success case, and unconditionally leaking vblks
> +out:
> + if ( vblks ) xfree(vblks);
> + return rc;
> + break;
> + }
> default:
> rc = arch_memory_op(op, arg);
> break;
> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> index 4c5b2bb..3574d0a 100644
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -35,6 +35,7 @@
> #include "xen.h"
> #include "grant_table.h"
> #include "hvm/save.h"
> +#include "memory.h"
>
> #define XEN_DOMCTL_INTERFACE_VERSION 0x00000009
>
> @@ -852,6 +853,17 @@ struct xen_domctl_set_broken_page_p2m {
> typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t;
> DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t);
>
> +struct xen_domctl_vnuma {
> + uint16_t nr_vnodes;
> + XEN_GUEST_HANDLE_64(uint) vdistance;
> + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
> + XEN_GUEST_HANDLE_64(uint) vnode_to_pnode;
> + XEN_GUEST_HANDLE_64(vnuma_memblk_t) vnuma_memblks;
> +};
> +
> +typedef struct xen_domctl_vnuma xen_domctl_vnuma_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t);
> +
> struct xen_domctl {
> uint32_t cmd;
> #define XEN_DOMCTL_createdomain 1
> @@ -920,6 +932,7 @@ struct xen_domctl {
> #define XEN_DOMCTL_set_broken_page_p2m 67
> #define XEN_DOMCTL_setnodeaffinity 68
> #define XEN_DOMCTL_getnodeaffinity 69
> +#define XEN_DOMCTL_setvnumainfo 70
> #define XEN_DOMCTL_gdbsx_guestmemio 1000
> #define XEN_DOMCTL_gdbsx_pausevcpu 1001
> #define XEN_DOMCTL_gdbsx_unpausevcpu 1002
> @@ -979,6 +992,7 @@ struct xen_domctl {
> struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
> struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
> struct xen_domctl_gdbsx_domstatus gdbsx_domstatus;
> + struct xen_domctl_vnuma vnuma;
> uint8_t pad[128];
> } u;
> };
> @@ -986,7 +1000,6 @@ typedef struct xen_domctl xen_domctl_t;
> DEFINE_XEN_GUEST_HANDLE(xen_domctl_t);
>
> #endif /* __XEN_PUBLIC_DOMCTL_H__ */
> -
Spurious whitespace change. Please remove
> /*
> * Local variables:
> * mode: C
> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> index 7a26dee..28f6aaf 100644
> --- a/xen/include/public/memory.h
> +++ b/xen/include/public/memory.h
> @@ -453,12 +453,19 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
> * Caller must be privileged or the hypercall fails.
> */
> #define XENMEM_claim_pages 24
> -
And here
> /*
> * XENMEM_claim_pages flags - the are no flags at this time.
> * The zero value is appropiate.
> */
>
> +struct vnuma_memblk {
> + uint64_t start, end;
> +};
> +typedef struct vnuma_memblk vnuma_memblk_t;
> +DEFINE_XEN_GUEST_HANDLE(vnuma_memblk_t);
> +
> +#define XENMEM_get_vnuma_info 25
> +
> #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
>
> #endif /* __XEN_PUBLIC_MEMORY_H__ */
> diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
> index a057069..c9d53e3 100644
> --- a/xen/include/xen/domain.h
> +++ b/xen/include/xen/domain.h
> @@ -4,6 +4,7 @@
>
> #include <public/xen.h>
> #include <asm/domain.h>
> +#include <public/memory.h>
>
> typedef union {
> struct vcpu_guest_context *nat;
> @@ -89,4 +90,14 @@ extern unsigned int xen_processor_pmbits;
>
> extern bool_t opt_dom0_vcpus_pin;
>
> +struct domain_vnuma_info {
> + uint16_t nr_vnodes;
> + uint *vdistance;
> + uint *vcpu_to_vnode;
> + uint *vnode_to_pnode;
> + vnuma_memblk_t *vnuma_memblks;
> +};
> +
> +void domain_vnuma_destroy(struct domain_vnuma_info *v);
> +
> #endif /* __XEN_DOMAIN_H__ */
> diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
> index ae6a3b8..cb023cf 100644
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -377,6 +377,7 @@ struct domain
> nodemask_t node_affinity;
> unsigned int last_alloc_node;
> spinlock_t node_affinity_lock;
> + struct domain_vnuma_info vnuma;
> };
>
> struct domain_setup_info
> diff --git a/xen/include/xen/vnuma.h b/xen/include/xen/vnuma.h
> new file mode 100644
> index 0000000..0b41da0
> --- /dev/null
> +++ b/xen/include/xen/vnuma.h
> @@ -0,0 +1,27 @@
> +#ifndef _VNUMA_H
> +#define _VNUMA_H
> +#include <public/memory.h>
> +
> +/* DEFINE_XEN_GUEST_HANDLE(vnuma_memblk_t); */
> +
> +struct vnuma_topology_info {
> + domid_t domid;
> + uint16_t nr_vnodes;
> + uint32_t _pad;
> + XEN_GUEST_HANDLE_64(uint) vdistance;
> + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
> + XEN_GUEST_HANDLE_64(vnuma_memblk_t) vnuma_memblks;
> +};
> +typedef struct vnuma_topology_info vnuma_topology_info_t;
> +DEFINE_XEN_GUEST_HANDLE(vnuma_topology_info_t);
> +
> +#define __vnode_distance_offset(_dom, _i, _j) \
> + ( ((_j) * ((_dom)->vnuma.nr_vnodes)) + (_i) )
> +
> +#define __vnode_distance(_dom, _i, _j) \
> + ( (_dom)->vnuma.vdistance[__vnode_distance_offset((_dom), (_i), (_j))] )
You expand _dom here twice, which is bad practice for macros.
I suggest static inline functions as a better alternative.
~Andrew
> +
> +#define __vnode_distance_set(_dom, _i, _j, _v) \
> + do { __vnode_distance((_dom), (_i), (_j)) = (_v); } while(0)
> +
> +#endif
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
2013-09-13 8:49 [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests Elena Ufimtseva
2013-09-13 9:31 ` Andrew Cooper
@ 2013-09-13 11:00 ` Jan Beulich
2013-09-16 15:46 ` George Dunlap
2 siblings, 0 replies; 14+ messages in thread
From: Jan Beulich @ 2013-09-13 11:00 UTC (permalink / raw)
To: Elena Ufimtseva
Cc: keir, lccycc123, george.dunlap, msw, dario.faggioli,
stefano.stabellini, xen-devel
>>> On 13.09.13 at 10:49, Elena Ufimtseva <ufimtseva@gmail.com> wrote:
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -227,6 +227,7 @@ struct domain *domain_create(
> spin_lock_init(&d->node_affinity_lock);
> d->node_affinity = NODE_MASK_ALL;
> d->auto_node_affinity = 1;
> + d->vnuma.nr_vnodes = 0;
Pointless (struct domain gets zero-allocated)?
> @@ -530,8 +531,9 @@ int domain_kill(struct domain *d)
> evtchn_destroy(d);
> gnttab_release_mappings(d);
> tmem_destroy(d->tmem);
> - domain_set_outstanding_pages(d, 0);
> d->tmem = NULL;
> + domain_set_outstanding_pages(d, 0);
Why is this being moved?
> +void domain_vnuma_destroy(struct domain_vnuma_info *v)
> +{
> + if (v->vnuma_memblks) {
> + xfree(v->vnuma_memblks);
> + v->vnuma_memblks = NULL;
> + }
> + if (v->vcpu_to_vnode) {
> + xfree(v->vcpu_to_vnode);
> + v->vcpu_to_vnode = NULL;
> + }
> + if (v->vdistance) {
> + xfree(v->vdistance);
> + v->vdistance = NULL;
> + }
> + if (v->vnode_to_pnode) {
> + xfree(v->vnode_to_pnode);
> + v->vnode_to_pnode = NULL;
> + }
> + v->nr_vnodes = 0;
All the if()s are pointless - xfree() deals with NULL input quite fine.
And for the record, they also all don't match our coding style.
> @@ -862,7 +863,72 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
> ret = set_global_virq_handler(d, virq);
> }
> break;
> -
Please keep that newline, and insert another one after the final
break of the code you add.
> + case XEN_DOMCTL_setvnumainfo:
> + {
> + unsigned int i, j, nr_vnodes, dist_size;
> + unsigned int dist, vmap, vntop;
> + vnuma_memblk_t vmemblk;
> +
> + ret = -EINVAL;
> + dist = i = j = 0;
> + nr_vnodes = op->u.vnuma.nr_vnodes;
> + if (nr_vnodes <= 0 || nr_vnodes > NR_CPUS)
> + break;
> + d->vnuma.nr_vnodes = nr_vnodes;
> + dist_size = nr_vnodes * nr_vnodes;
> + if (
> + (d->vnuma.vdistance = xmalloc_bytes(
> + sizeof(*d->vnuma.vdistance) * dist_size)) == NULL ||
> + (d->vnuma.vnuma_memblks = xmalloc_bytes(
> + sizeof(*d->vnuma.vnuma_memblks) * nr_vnodes)) == NULL ||
> + (d->vnuma.vcpu_to_vnode = xmalloc_bytes(
> + sizeof(*d->vnuma.vcpu_to_vnode) * d->max_vcpus)) == NULL ||
> + (d->vnuma.vnode_to_pnode = xmalloc_bytes(
> + sizeof(*d->vnuma.vnode_to_pnode) * nr_vnodes)) == NULL )
> + goto err_dom;
Coding style. Also, you leak all memory that you managed to get
hold of in the error case.
> + for ( i = 0; i < nr_vnodes; i++ )
> + for ( j = 0; j < nr_vnodes; j++ )
> + {
> + if ( unlikely(__copy_from_guest_offset(&dist,
__copy_from_guest_offset() without prior guest_handle_okay()?
And the error code should be -EFAULT in that case.
> + op->u.vnuma.vdistance,
> + __vnode_distance_offset(d, i, j), 1)) )
> + goto err_dom;
> + __vnode_distance_set(d, i, j, dist);
> + }
> + for ( i = 0; i < nr_vnodes; i++ )
If possible please fold this with the earlier loop.
> + {
> + if ( unlikely(__copy_from_guest_offset(&vmemblk,
> + op->u.vnuma.vnuma_memblks, i, 1)) )
> + goto err_dom;
> + d->vnuma.vnuma_memblks[i].start = vmemblk.start;
> + d->vnuma.vnuma_memblks[i].end = vmemblk.end;
> + }
> + for ( i = 0; i < d->max_vcpus; i++ )
> + {
> + if ( unlikely(__copy_from_guest_offset(&vmap,
> + op->u.vnuma.vcpu_to_vnode, i, 1)) )
> + goto err_dom;
> + d->vnuma.vcpu_to_vnode[i] = vmap;
If the types are compatible, things like this don't need doing in a
loop - you could copy the whole array in one go.
> + }
> + if ( !guest_handle_is_null(op->u.vnuma.vnode_to_pnode) )
> + {
> + for ( i = 0; i < nr_vnodes; i++ )
> + {
> + if ( unlikely(__copy_from_guest_offset(&vntop,
> + op->u.vnuma.vnode_to_pnode, i, 1)) )
> + goto err_dom;
> + d->vnuma.vnode_to_pnode[i] = vntop;
> + }
> + }
> + else
> + for(i = 0; i < nr_vnodes; i++)
Coding style.
> + d->vnuma.vnode_to_pnode[i] = NUMA_NO_NODE;
What's the value of using this domctl this way?
> + ret = 0;
> + break;
> +err_dom:
> + ret = -EINVAL;
"ret" should not be unconditionally set to a fixed value here, the
more that you set it almost first thing in the case statement.
> @@ -732,7 +733,62 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
> rcu_unlock_domain(d);
>
> break;
> + case XENMEM_get_vnuma_info:
> + {
> + int i, j;
> + struct vnuma_topology_info mtopology;
> + struct vnuma_topology_info touser_topo;
> + struct domain *d;
> + unsigned int max_pages, max_vcpus, nr_vnodes;
> + vnuma_memblk_t *vblks;
>
> + rc = -EINVAL;
> + if ( guest_handle_is_null(arg) )
> + return rc;
> + if( copy_from_guest(&mtopology, arg, 1) )
> + return -EINVAL;
Consistency please, preferably such that you - as long as possible -
always return -E rather than rc.
Also the return value here again is -EFAULT.
> + if ( (d = rcu_lock_domain_by_any_id(mtopology.domid)) == NULL )
> + return -EINVAL;
And -ESRCH or some such here (please use existing code as
reference).
> + touser_topo.nr_vnodes = d->vnuma.nr_vnodes;
> + max_pages = d->max_pages;
> + max_vcpus = d->max_vcpus;
> + rcu_unlock_domain(d);
> +
> + nr_vnodes = touser_topo.nr_vnodes;
> + rc = copy_to_guest(arg, &touser_topo, 1);
> + if ( rc )
> + return -EFAULT;
> + if ( nr_vnodes == 0 || nr_vnodes > max_vcpus )
> + return -EFAULT;
-EINVAL
> + vblks = xmalloc_array(struct vnuma_memblk, nr_vnodes);
> + if ( vblks == NULL )
> + return -EFAULT;
-ENOMEM
> + for ( i = 0; i < nr_vnodes; i++ )
> + {
> + if ( copy_to_guest_offset(mtopology.vnuma_memblks, i,
> + &d->vnuma.vnuma_memblks[i], 1) < 0 )
Indentation.
> + goto out;
> + }
> + for ( i = 0; i < touser_topo.nr_vnodes; i++ )
Again, please fold this with the previous loop, and use (preferably)
the simple variable as loop boundary (but in any case be consistent).
> + for ( j = 0; j < touser_topo.nr_vnodes; j++ )
> + {
> + if ( copy_to_guest_offset(mtopology.vdistance,
> + __vnode_distance_offset(d, i, j),
> + &__vnode_distance(d, i, j), 1) )
> + goto out;
> + }
> + for ( i = 0; i < d->max_vcpus ; i++ )
> + {
> + if ( copy_to_guest_offset(mtopology.vcpu_to_vnode, i,
> + &d->vnuma.vcpu_to_vnode[i], 1) )
> + goto out;
> + }
> + return rc;
> +out:
> + if ( vblks ) xfree(vblks);
Coding style.
> --- /dev/null
> +++ b/xen/include/xen/vnuma.h
> @@ -0,0 +1,27 @@
> +#ifndef _VNUMA_H
> +#define _VNUMA_H
> +#include <public/memory.h>
> +
> +/* DEFINE_XEN_GUEST_HANDLE(vnuma_memblk_t); */
> +
> +struct vnuma_topology_info {
> + domid_t domid;
> + uint16_t nr_vnodes;
> + uint32_t _pad;
> + XEN_GUEST_HANDLE_64(uint) vdistance;
> + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
> + XEN_GUEST_HANDLE_64(vnuma_memblk_t) vnuma_memblks;
> +};
> +typedef struct vnuma_topology_info vnuma_topology_info_t;
> +DEFINE_XEN_GUEST_HANDLE(vnuma_topology_info_t);
At least up to here this appears to belong into the public header.
Jan
> +
> +#define __vnode_distance_offset(_dom, _i, _j) \
> + ( ((_j) * ((_dom)->vnuma.nr_vnodes)) + (_i) )
> +
> +#define __vnode_distance(_dom, _i, _j) \
> + ( (_dom)->vnuma.vdistance[__vnode_distance_offset((_dom), (_i),
> (_j))] )
> +
> +#define __vnode_distance_set(_dom, _i, _j, _v) \
> + do { __vnode_distance((_dom), (_i), (_j)) = (_v); } while(0)
> +
> +#endif
> --
> 1.7.10.4
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
2013-09-13 9:31 ` Andrew Cooper
@ 2013-09-13 11:53 ` Jan Beulich
0 siblings, 0 replies; 14+ messages in thread
From: Jan Beulich @ 2013-09-13 11:53 UTC (permalink / raw)
To: Andrew Cooper, Elena Ufimtseva
Cc: keir, lccycc123, george.dunlap, msw, dario.faggioli,
stefano.stabellini, xen-devel
>>> On 13.09.13 at 11:31, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> On 13/09/2013 09:49, Elena Ufimtseva wrote:
>> + if (
>> + (d->vnuma.vdistance = xmalloc_bytes(
>> + sizeof(*d->vnuma.vdistance) * dist_size)) == NULL ||
>
> sizeof in an operator not a function. As you are not passing a type,
> the brackets are not required.
Actually, I'd request parentheses to be added if I saw a use
of sizeof() without them. I think we have quite few (hopefully
a majority) of uses similar to the one above in the tree.
Jan
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
2013-09-13 8:49 [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests Elena Ufimtseva
2013-09-13 9:31 ` Andrew Cooper
2013-09-13 11:00 ` Jan Beulich
@ 2013-09-16 15:46 ` George Dunlap
2013-09-17 6:44 ` Elena Ufimtseva
2 siblings, 1 reply; 14+ messages in thread
From: George Dunlap @ 2013-09-16 15:46 UTC (permalink / raw)
To: Elena Ufimtseva
Cc: Keir Fraser, Stefano Stabellini, Matt Wilson, Dario Faggioli,
Li Yechen, xen-devel@lists.xen.org, Jan Beulich
On Fri, Sep 13, 2013 at 9:49 AM, Elena Ufimtseva <ufimtseva@gmail.com> wrote:
> +void domain_vnuma_destroy(struct domain_vnuma_info *v)
As long as you're respinning, I think I'd prefer this be named
something different -- 'v' is almost universally of type "struct vcpu"
in the Xen code, so this code is a little confusing. I might use
"vinfo", or even just take a domain pointer and dereference it
yourself.
> +{
> + if (v->vnuma_memblks) {
> + xfree(v->vnuma_memblks);
> + v->vnuma_memblks = NULL;
> + }
> + if (v->vcpu_to_vnode) {
> + xfree(v->vcpu_to_vnode);
> + v->vcpu_to_vnode = NULL;
> + }
> + if (v->vdistance) {
> + xfree(v->vdistance);
> + v->vdistance = NULL;
> + }
> + if (v->vnode_to_pnode) {
> + xfree(v->vnode_to_pnode);
> + v->vnode_to_pnode = NULL;
> + }
> + v->nr_vnodes = 0;
> +}
> +
> /*
> * Local variables:
> * mode: C
> diff --git a/xen/common/domctl.c b/xen/common/domctl.c
> index 9760d50..e5d05c7 100644
> --- a/xen/common/domctl.c
> +++ b/xen/common/domctl.c
> @@ -29,6 +29,7 @@
> #include <asm/page.h>
> #include <public/domctl.h>
> #include <xsm/xsm.h>
> +#include <xen/vnuma.h>
>
> static DEFINE_SPINLOCK(domctl_lock);
> DEFINE_SPINLOCK(vcpu_alloc_lock);
> @@ -862,7 +863,72 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
> ret = set_global_virq_handler(d, virq);
> }
> break;
> -
> + case XEN_DOMCTL_setvnumainfo:
> + {
> + unsigned int i, j, nr_vnodes, dist_size;
> + unsigned int dist, vmap, vntop;
> + vnuma_memblk_t vmemblk;
> +
> + ret = -EINVAL;
> + dist = i = j = 0;
I think the zeroing of these is unnecessary -- you zero them /
initialize them before you use them below.
You need to break this code into "paragraphs" of related changes, with
a space in between. For example, maybe here...
> + nr_vnodes = op->u.vnuma.nr_vnodes;
> + if (nr_vnodes <= 0 || nr_vnodes > NR_CPUS)
> + break;
here...
> + d->vnuma.nr_vnodes = nr_vnodes;
> + dist_size = nr_vnodes * nr_vnodes;
here...
> + if (
> + (d->vnuma.vdistance = xmalloc_bytes(
> + sizeof(*d->vnuma.vdistance) * dist_size)) == NULL ||
> + (d->vnuma.vnuma_memblks = xmalloc_bytes(
> + sizeof(*d->vnuma.vnuma_memblks) * nr_vnodes)) == NULL ||
> + (d->vnuma.vcpu_to_vnode = xmalloc_bytes(
> + sizeof(*d->vnuma.vcpu_to_vnode) * d->max_vcpus)) == NULL ||
> + (d->vnuma.vnode_to_pnode = xmalloc_bytes(
> + sizeof(*d->vnuma.vnode_to_pnode) * nr_vnodes)) == NULL )
> + goto err_dom;
here...
Also, for an idiomatic way to do this kind of multiple allocation w/
error checking, take a look at
xen/arch/x86/hvm/hvm.c:hvm_domain_initialise() (to pick a random
function I've worked with recently). Look at the
> + for ( i = 0; i < nr_vnodes; i++ )
> + for ( j = 0; j < nr_vnodes; j++ )
> + {
> + if ( unlikely(__copy_from_guest_offset(&dist,
> + op->u.vnuma.vdistance,
> + __vnode_distance_offset(d, i, j), 1)) )
> + goto err_dom;
> + __vnode_distance_set(d, i, j, dist);
> + }
here...
Also, in each of these error paths, you leak the structures allocated above.
> + for ( i = 0; i < nr_vnodes; i++ )
> + {
> + if ( unlikely(__copy_from_guest_offset(&vmemblk,
> + op->u.vnuma.vnuma_memblks, i, 1)) )
> + goto err_dom;
> + d->vnuma.vnuma_memblks[i].start = vmemblk.start;
> + d->vnuma.vnuma_memblks[i].end = vmemblk.end;
> + }
here...
> + for ( i = 0; i < d->max_vcpus; i++ )
> + {
> + if ( unlikely(__copy_from_guest_offset(&vmap,
> + op->u.vnuma.vcpu_to_vnode, i, 1)) )
> + goto err_dom;
> + d->vnuma.vcpu_to_vnode[i] = vmap;
> + }
here...
> + if ( !guest_handle_is_null(op->u.vnuma.vnode_to_pnode) )
> + {
> + for ( i = 0; i < nr_vnodes; i++ )
> + {
> + if ( unlikely(__copy_from_guest_offset(&vntop,
> + op->u.vnuma.vnode_to_pnode, i, 1)) )
> + goto err_dom;
> + d->vnuma.vnode_to_pnode[i] = vntop;
> + }
> + }
> + else
> + for(i = 0; i < nr_vnodes; i++)
> + d->vnuma.vnode_to_pnode[i] = NUMA_NO_NODE;
here...
> + ret = 0;
and maybe here.
> + break;
> +err_dom:
> + ret = -EINVAL;
> + }
> + break;
> default:
> ret = arch_do_domctl(op, d, u_domctl);
> break;
> diff --git a/xen/common/memory.c b/xen/common/memory.c
> index 50b740f..6dc2452 100644
> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -28,6 +28,7 @@
> #include <public/memory.h>
> #include <xsm/xsm.h>
> #include <xen/trace.h>
> +#include <xen/vnuma.h>
>
> struct memop_args {
> /* INPUT */
> @@ -732,7 +733,62 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
> rcu_unlock_domain(d);
>
> break;
> + case XENMEM_get_vnuma_info:
> + {
> + int i, j;
> + struct vnuma_topology_info mtopology;
> + struct vnuma_topology_info touser_topo;
> + struct domain *d;
> + unsigned int max_pages, max_vcpus, nr_vnodes;
> + vnuma_memblk_t *vblks;
>
> + rc = -EINVAL;
here...
> + if ( guest_handle_is_null(arg) )
> + return rc;
here...
> + if( copy_from_guest(&mtopology, arg, 1) )
> + return -EINVAL;
here... (and so on)
This should be -EFAULT
> + if ( (d = rcu_lock_domain_by_any_id(mtopology.domid)) == NULL )
> + return -EINVAL;
> + touser_topo.nr_vnodes = d->vnuma.nr_vnodes;
> + max_pages = d->max_pages;
> + max_vcpus = d->max_vcpus;
> + rcu_unlock_domain(d);
> +
> + nr_vnodes = touser_topo.nr_vnodes;
> + rc = copy_to_guest(arg, &touser_topo, 1);
> + if ( rc )
> + return -EFAULT;
> + if ( nr_vnodes == 0 || nr_vnodes > max_vcpus )
> + return -EFAULT;
> + vblks = xmalloc_array(struct vnuma_memblk, nr_vnodes);
> + if ( vblks == NULL )
> + return -EFAULT;
Uum, what is this for? It doesn't appear to be used -- if there's a
copy failure it's freed again, if things succeed it's leaked.
> + for ( i = 0; i < nr_vnodes; i++ )
> + {
> + if ( copy_to_guest_offset(mtopology.vnuma_memblks, i,
> + &d->vnuma.vnuma_memblks[i], 1) < 0 )
> + goto out;
4 spaces for indentation, please.
> + }
> + for ( i = 0; i < touser_topo.nr_vnodes; i++ )
> + for ( j = 0; j < touser_topo.nr_vnodes; j++ )
> + {
> + if ( copy_to_guest_offset(mtopology.vdistance,
> + __vnode_distance_offset(d, i, j),
> + &__vnode_distance(d, i, j), 1) )
> + goto out;
> + }
> + for ( i = 0; i < d->max_vcpus ; i++ )
> + {
> + if ( copy_to_guest_offset(mtopology.vcpu_to_vnode, i,
> + &d->vnuma.vcpu_to_vnode[i], 1) )
> + goto out;
> + }
> + return rc;
> +out:
> + if ( vblks ) xfree(vblks);
> + return rc;
> + break;
> + }
> default:
> rc = arch_memory_op(op, arg);
> break;
> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
> index 4c5b2bb..3574d0a 100644
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -35,6 +35,7 @@
> #include "xen.h"
> #include "grant_table.h"
> #include "hvm/save.h"
> +#include "memory.h"
>
> #define XEN_DOMCTL_INTERFACE_VERSION 0x00000009
>
> @@ -852,6 +853,17 @@ struct xen_domctl_set_broken_page_p2m {
> typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t;
> DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t);
>
> +struct xen_domctl_vnuma {
> + uint16_t nr_vnodes;
> + XEN_GUEST_HANDLE_64(uint) vdistance;
> + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
> + XEN_GUEST_HANDLE_64(uint) vnode_to_pnode;
> + XEN_GUEST_HANDLE_64(vnuma_memblk_t) vnuma_memblks;
> +};
> +
> +typedef struct xen_domctl_vnuma xen_domctl_vnuma_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t);
> +
> struct xen_domctl {
> uint32_t cmd;
> #define XEN_DOMCTL_createdomain 1
> @@ -920,6 +932,7 @@ struct xen_domctl {
> #define XEN_DOMCTL_set_broken_page_p2m 67
> #define XEN_DOMCTL_setnodeaffinity 68
> #define XEN_DOMCTL_getnodeaffinity 69
> +#define XEN_DOMCTL_setvnumainfo 70
> #define XEN_DOMCTL_gdbsx_guestmemio 1000
> #define XEN_DOMCTL_gdbsx_pausevcpu 1001
> #define XEN_DOMCTL_gdbsx_unpausevcpu 1002
> @@ -979,6 +992,7 @@ struct xen_domctl {
> struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
> struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
> struct xen_domctl_gdbsx_domstatus gdbsx_domstatus;
> + struct xen_domctl_vnuma vnuma;
> uint8_t pad[128];
> } u;
> };
> @@ -986,7 +1000,6 @@ typedef struct xen_domctl xen_domctl_t;
> DEFINE_XEN_GUEST_HANDLE(xen_domctl_t);
>
> #endif /* __XEN_PUBLIC_DOMCTL_H__ */
> -
> /*
> * Local variables:
> * mode: C
> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> index 7a26dee..28f6aaf 100644
> --- a/xen/include/public/memory.h
> +++ b/xen/include/public/memory.h
> @@ -453,12 +453,19 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
> * Caller must be privileged or the hypercall fails.
> */
> #define XENMEM_claim_pages 24
> -
> /*
> * XENMEM_claim_pages flags - the are no flags at this time.
> * The zero value is appropiate.
> */
>
> +struct vnuma_memblk {
> + uint64_t start, end;
> +};
> +typedef struct vnuma_memblk vnuma_memblk_t;
> +DEFINE_XEN_GUEST_HANDLE(vnuma_memblk_t);
> +
> +#define XENMEM_get_vnuma_info 25
> +
> #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
>
> #endif /* __XEN_PUBLIC_MEMORY_H__ */
> diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
> index a057069..c9d53e3 100644
> --- a/xen/include/xen/domain.h
> +++ b/xen/include/xen/domain.h
> @@ -4,6 +4,7 @@
>
> #include <public/xen.h>
> #include <asm/domain.h>
> +#include <public/memory.h>
>
> typedef union {
> struct vcpu_guest_context *nat;
> @@ -89,4 +90,14 @@ extern unsigned int xen_processor_pmbits;
>
> extern bool_t opt_dom0_vcpus_pin;
>
> +struct domain_vnuma_info {
> + uint16_t nr_vnodes;
> + uint *vdistance;
> + uint *vcpu_to_vnode;
> + uint *vnode_to_pnode;
> + vnuma_memblk_t *vnuma_memblks;
> +};
Are these the right sizes for things to allocate? Do we really need
32 bits to represent distance, vnode and pnode? Can we use u16 or u8?
-George
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
2013-09-16 15:46 ` George Dunlap
@ 2013-09-17 6:44 ` Elena Ufimtseva
2013-09-17 6:59 ` Elena Ufimtseva
2013-09-17 7:05 ` Jan Beulich
0 siblings, 2 replies; 14+ messages in thread
From: Elena Ufimtseva @ 2013-09-17 6:44 UTC (permalink / raw)
To: George Dunlap
Cc: Keir Fraser, Stefano Stabellini, Matt Wilson, Dario Faggioli,
Li Yechen, xen-devel@lists.xen.org, Jan Beulich
Hi guys
Thank you for reviewing, I will clean these out.
George, after talking to Dario, I think the max number of physical
nodes will not exceed 256. Dario's automatic NUMA
placement work with this number and I think it can be easily u8.
Unless anyone has other thoughts.
Elena
On Mon, Sep 16, 2013 at 11:46 AM, George Dunlap
<George.Dunlap@eu.citrix.com> wrote:
> On Fri, Sep 13, 2013 at 9:49 AM, Elena Ufimtseva <ufimtseva@gmail.com> wrote:
>> +void domain_vnuma_destroy(struct domain_vnuma_info *v)
>
> As long as you're respinning, I think I'd prefer this be named
> something different -- 'v' is almost universally of type "struct vcpu"
> in the Xen code, so this code is a little confusing. I might use
> "vinfo", or even just take a domain pointer and dereference it
> yourself.
>
>> +{
>> + if (v->vnuma_memblks) {
>> + xfree(v->vnuma_memblks);
>> + v->vnuma_memblks = NULL;
>> + }
>> + if (v->vcpu_to_vnode) {
>> + xfree(v->vcpu_to_vnode);
>> + v->vcpu_to_vnode = NULL;
>> + }
>> + if (v->vdistance) {
>> + xfree(v->vdistance);
>> + v->vdistance = NULL;
>> + }
>> + if (v->vnode_to_pnode) {
>> + xfree(v->vnode_to_pnode);
>> + v->vnode_to_pnode = NULL;
>> + }
>> + v->nr_vnodes = 0;
>> +}
>> +
>> /*
>> * Local variables:
>> * mode: C
>> diff --git a/xen/common/domctl.c b/xen/common/domctl.c
>> index 9760d50..e5d05c7 100644
>> --- a/xen/common/domctl.c
>> +++ b/xen/common/domctl.c
>> @@ -29,6 +29,7 @@
>> #include <asm/page.h>
>> #include <public/domctl.h>
>> #include <xsm/xsm.h>
>> +#include <xen/vnuma.h>
>>
>> static DEFINE_SPINLOCK(domctl_lock);
>> DEFINE_SPINLOCK(vcpu_alloc_lock);
>> @@ -862,7 +863,72 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
>> ret = set_global_virq_handler(d, virq);
>> }
>> break;
>> -
>> + case XEN_DOMCTL_setvnumainfo:
>> + {
>> + unsigned int i, j, nr_vnodes, dist_size;
>> + unsigned int dist, vmap, vntop;
>> + vnuma_memblk_t vmemblk;
>> +
>> + ret = -EINVAL;
>> + dist = i = j = 0;
>
> I think the zeroing of these is unnecessary -- you zero them /
> initialize them before you use them below.
>
> You need to break this code into "paragraphs" of related changes, with
> a space in between. For example, maybe here...
>
>> + nr_vnodes = op->u.vnuma.nr_vnodes;
>> + if (nr_vnodes <= 0 || nr_vnodes > NR_CPUS)
>> + break;
>
> here...
>
>> + d->vnuma.nr_vnodes = nr_vnodes;
>> + dist_size = nr_vnodes * nr_vnodes;
>
> here...
>
>> + if (
>> + (d->vnuma.vdistance = xmalloc_bytes(
>> + sizeof(*d->vnuma.vdistance) * dist_size)) == NULL ||
>> + (d->vnuma.vnuma_memblks = xmalloc_bytes(
>> + sizeof(*d->vnuma.vnuma_memblks) * nr_vnodes)) == NULL ||
>> + (d->vnuma.vcpu_to_vnode = xmalloc_bytes(
>> + sizeof(*d->vnuma.vcpu_to_vnode) * d->max_vcpus)) == NULL ||
>> + (d->vnuma.vnode_to_pnode = xmalloc_bytes(
>> + sizeof(*d->vnuma.vnode_to_pnode) * nr_vnodes)) == NULL )
>> + goto err_dom;
>
> here...
>
> Also, for an idiomatic way to do this kind of multiple allocation w/
> error checking, take a look at
> xen/arch/x86/hvm/hvm.c:hvm_domain_initialise() (to pick a random
> function I've worked with recently). Look at the
>
>> + for ( i = 0; i < nr_vnodes; i++ )
>> + for ( j = 0; j < nr_vnodes; j++ )
>> + {
>> + if ( unlikely(__copy_from_guest_offset(&dist,
>> + op->u.vnuma.vdistance,
>> + __vnode_distance_offset(d, i, j), 1)) )
>> + goto err_dom;
>> + __vnode_distance_set(d, i, j, dist);
>> + }
>
> here...
>
> Also, in each of these error paths, you leak the structures allocated above.
>
>> + for ( i = 0; i < nr_vnodes; i++ )
>> + {
>> + if ( unlikely(__copy_from_guest_offset(&vmemblk,
>> + op->u.vnuma.vnuma_memblks, i, 1)) )
>> + goto err_dom;
>> + d->vnuma.vnuma_memblks[i].start = vmemblk.start;
>> + d->vnuma.vnuma_memblks[i].end = vmemblk.end;
>> + }
>
> here...
>
>> + for ( i = 0; i < d->max_vcpus; i++ )
>> + {
>> + if ( unlikely(__copy_from_guest_offset(&vmap,
>> + op->u.vnuma.vcpu_to_vnode, i, 1)) )
>> + goto err_dom;
>> + d->vnuma.vcpu_to_vnode[i] = vmap;
>> + }
>
> here...
>
>> + if ( !guest_handle_is_null(op->u.vnuma.vnode_to_pnode) )
>> + {
>> + for ( i = 0; i < nr_vnodes; i++ )
>> + {
>> + if ( unlikely(__copy_from_guest_offset(&vntop,
>> + op->u.vnuma.vnode_to_pnode, i, 1)) )
>> + goto err_dom;
>> + d->vnuma.vnode_to_pnode[i] = vntop;
>> + }
>> + }
>> + else
>> + for(i = 0; i < nr_vnodes; i++)
>> + d->vnuma.vnode_to_pnode[i] = NUMA_NO_NODE;
>
> here...
>
>> + ret = 0;
>
> and maybe here.
>
>> + break;
>> +err_dom:
>> + ret = -EINVAL;
>> + }
>> + break;
>> default:
>> ret = arch_do_domctl(op, d, u_domctl);
>> break;
>> diff --git a/xen/common/memory.c b/xen/common/memory.c
>> index 50b740f..6dc2452 100644
>> --- a/xen/common/memory.c
>> +++ b/xen/common/memory.c
>> @@ -28,6 +28,7 @@
>> #include <public/memory.h>
>> #include <xsm/xsm.h>
>> #include <xen/trace.h>
>> +#include <xen/vnuma.h>
>>
>> struct memop_args {
>> /* INPUT */
>> @@ -732,7 +733,62 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>> rcu_unlock_domain(d);
>>
>> break;
>> + case XENMEM_get_vnuma_info:
>> + {
>> + int i, j;
>> + struct vnuma_topology_info mtopology;
>> + struct vnuma_topology_info touser_topo;
>> + struct domain *d;
>> + unsigned int max_pages, max_vcpus, nr_vnodes;
>> + vnuma_memblk_t *vblks;
>>
>> + rc = -EINVAL;
>
> here...
>
>> + if ( guest_handle_is_null(arg) )
>> + return rc;
>
> here...
>
>> + if( copy_from_guest(&mtopology, arg, 1) )
>> + return -EINVAL;
>
> here... (and so on)
>
> This should be -EFAULT
>
>> + if ( (d = rcu_lock_domain_by_any_id(mtopology.domid)) == NULL )
>> + return -EINVAL;
>> + touser_topo.nr_vnodes = d->vnuma.nr_vnodes;
>> + max_pages = d->max_pages;
>> + max_vcpus = d->max_vcpus;
>> + rcu_unlock_domain(d);
>> +
>> + nr_vnodes = touser_topo.nr_vnodes;
>> + rc = copy_to_guest(arg, &touser_topo, 1);
>> + if ( rc )
>> + return -EFAULT;
>> + if ( nr_vnodes == 0 || nr_vnodes > max_vcpus )
>> + return -EFAULT;
>> + vblks = xmalloc_array(struct vnuma_memblk, nr_vnodes);
>> + if ( vblks == NULL )
>> + return -EFAULT;
>
> Uum, what is this for? It doesn't appear to be used -- if there's a
> copy failure it's freed again, if things succeed it's leaked.
>
>> + for ( i = 0; i < nr_vnodes; i++ )
>> + {
>> + if ( copy_to_guest_offset(mtopology.vnuma_memblks, i,
>> + &d->vnuma.vnuma_memblks[i], 1) < 0 )
>> + goto out;
>
> 4 spaces for indentation, please.
>
>> + }
>> + for ( i = 0; i < touser_topo.nr_vnodes; i++ )
>> + for ( j = 0; j < touser_topo.nr_vnodes; j++ )
>> + {
>> + if ( copy_to_guest_offset(mtopology.vdistance,
>> + __vnode_distance_offset(d, i, j),
>> + &__vnode_distance(d, i, j), 1) )
>> + goto out;
>> + }
>> + for ( i = 0; i < d->max_vcpus ; i++ )
>> + {
>> + if ( copy_to_guest_offset(mtopology.vcpu_to_vnode, i,
>> + &d->vnuma.vcpu_to_vnode[i], 1) )
>> + goto out;
>> + }
>> + return rc;
>> +out:
>> + if ( vblks ) xfree(vblks);
>> + return rc;
>> + break;
>> + }
>> default:
>> rc = arch_memory_op(op, arg);
>> break;
>> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
>> index 4c5b2bb..3574d0a 100644
>> --- a/xen/include/public/domctl.h
>> +++ b/xen/include/public/domctl.h
>> @@ -35,6 +35,7 @@
>> #include "xen.h"
>> #include "grant_table.h"
>> #include "hvm/save.h"
>> +#include "memory.h"
>>
>> #define XEN_DOMCTL_INTERFACE_VERSION 0x00000009
>>
>> @@ -852,6 +853,17 @@ struct xen_domctl_set_broken_page_p2m {
>> typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t;
>> DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t);
>>
>> +struct xen_domctl_vnuma {
>> + uint16_t nr_vnodes;
>> + XEN_GUEST_HANDLE_64(uint) vdistance;
>> + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
>> + XEN_GUEST_HANDLE_64(uint) vnode_to_pnode;
>> + XEN_GUEST_HANDLE_64(vnuma_memblk_t) vnuma_memblks;
>> +};
>> +
>> +typedef struct xen_domctl_vnuma xen_domctl_vnuma_t;
>> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t);
>> +
>> struct xen_domctl {
>> uint32_t cmd;
>> #define XEN_DOMCTL_createdomain 1
>> @@ -920,6 +932,7 @@ struct xen_domctl {
>> #define XEN_DOMCTL_set_broken_page_p2m 67
>> #define XEN_DOMCTL_setnodeaffinity 68
>> #define XEN_DOMCTL_getnodeaffinity 69
>> +#define XEN_DOMCTL_setvnumainfo 70
>> #define XEN_DOMCTL_gdbsx_guestmemio 1000
>> #define XEN_DOMCTL_gdbsx_pausevcpu 1001
>> #define XEN_DOMCTL_gdbsx_unpausevcpu 1002
>> @@ -979,6 +992,7 @@ struct xen_domctl {
>> struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
>> struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
>> struct xen_domctl_gdbsx_domstatus gdbsx_domstatus;
>> + struct xen_domctl_vnuma vnuma;
>> uint8_t pad[128];
>> } u;
>> };
>> @@ -986,7 +1000,6 @@ typedef struct xen_domctl xen_domctl_t;
>> DEFINE_XEN_GUEST_HANDLE(xen_domctl_t);
>>
>> #endif /* __XEN_PUBLIC_DOMCTL_H__ */
>> -
>> /*
>> * Local variables:
>> * mode: C
>> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
>> index 7a26dee..28f6aaf 100644
>> --- a/xen/include/public/memory.h
>> +++ b/xen/include/public/memory.h
>> @@ -453,12 +453,19 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
>> * Caller must be privileged or the hypercall fails.
>> */
>> #define XENMEM_claim_pages 24
>> -
>> /*
>> * XENMEM_claim_pages flags - the are no flags at this time.
>> * The zero value is appropiate.
>> */
>>
>> +struct vnuma_memblk {
>> + uint64_t start, end;
>> +};
>> +typedef struct vnuma_memblk vnuma_memblk_t;
>> +DEFINE_XEN_GUEST_HANDLE(vnuma_memblk_t);
>> +
>> +#define XENMEM_get_vnuma_info 25
>> +
>> #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
>>
>> #endif /* __XEN_PUBLIC_MEMORY_H__ */
>> diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
>> index a057069..c9d53e3 100644
>> --- a/xen/include/xen/domain.h
>> +++ b/xen/include/xen/domain.h
>> @@ -4,6 +4,7 @@
>>
>> #include <public/xen.h>
>> #include <asm/domain.h>
>> +#include <public/memory.h>
>>
>> typedef union {
>> struct vcpu_guest_context *nat;
>> @@ -89,4 +90,14 @@ extern unsigned int xen_processor_pmbits;
>>
>> extern bool_t opt_dom0_vcpus_pin;
>>
>> +struct domain_vnuma_info {
>> + uint16_t nr_vnodes;
>> + uint *vdistance;
>> + uint *vcpu_to_vnode;
>> + uint *vnode_to_pnode;
>> + vnuma_memblk_t *vnuma_memblks;
>> +};
>
> Are these the right sizes for things to allocate? Do we really need
> 32 bits to represent distance, vnode and pnode? Can we use u16 or u8?
>
> -George
--
Elena
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
2013-09-17 6:44 ` Elena Ufimtseva
@ 2013-09-17 6:59 ` Elena Ufimtseva
2013-09-17 7:05 ` Jan Beulich
1 sibling, 0 replies; 14+ messages in thread
From: Elena Ufimtseva @ 2013-09-17 6:59 UTC (permalink / raw)
To: George Dunlap
Cc: Keir Fraser, Stefano Stabellini, Matt Wilson, Dario Faggioli,
Li Yechen, xen-devel@lists.xen.org, Jan Beulich
Looks like I have.. Linux type for number of nodes is int, so Id
rather keep it this way.
On Tue, Sep 17, 2013 at 2:44 AM, Elena Ufimtseva <ufimtseva@gmail.com> wrote:
> Hi guys
> Thank you for reviewing, I will clean these out.
>
> George, after talking to Dario, I think the max number of physical
> nodes will not exceed 256. Dario's automatic NUMA
> placement work with this number and I think it can be easily u8.
> Unless anyone has other thoughts.
>
> Elena
>
> On Mon, Sep 16, 2013 at 11:46 AM, George Dunlap
> <George.Dunlap@eu.citrix.com> wrote:
>> On Fri, Sep 13, 2013 at 9:49 AM, Elena Ufimtseva <ufimtseva@gmail.com> wrote:
>>> +void domain_vnuma_destroy(struct domain_vnuma_info *v)
>>
>> As long as you're respinning, I think I'd prefer this be named
>> something different -- 'v' is almost universally of type "struct vcpu"
>> in the Xen code, so this code is a little confusing. I might use
>> "vinfo", or even just take a domain pointer and dereference it
>> yourself.
>>
>>> +{
>>> + if (v->vnuma_memblks) {
>>> + xfree(v->vnuma_memblks);
>>> + v->vnuma_memblks = NULL;
>>> + }
>>> + if (v->vcpu_to_vnode) {
>>> + xfree(v->vcpu_to_vnode);
>>> + v->vcpu_to_vnode = NULL;
>>> + }
>>> + if (v->vdistance) {
>>> + xfree(v->vdistance);
>>> + v->vdistance = NULL;
>>> + }
>>> + if (v->vnode_to_pnode) {
>>> + xfree(v->vnode_to_pnode);
>>> + v->vnode_to_pnode = NULL;
>>> + }
>>> + v->nr_vnodes = 0;
>>> +}
>>> +
>>> /*
>>> * Local variables:
>>> * mode: C
>>> diff --git a/xen/common/domctl.c b/xen/common/domctl.c
>>> index 9760d50..e5d05c7 100644
>>> --- a/xen/common/domctl.c
>>> +++ b/xen/common/domctl.c
>>> @@ -29,6 +29,7 @@
>>> #include <asm/page.h>
>>> #include <public/domctl.h>
>>> #include <xsm/xsm.h>
>>> +#include <xen/vnuma.h>
>>>
>>> static DEFINE_SPINLOCK(domctl_lock);
>>> DEFINE_SPINLOCK(vcpu_alloc_lock);
>>> @@ -862,7 +863,72 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
>>> ret = set_global_virq_handler(d, virq);
>>> }
>>> break;
>>> -
>>> + case XEN_DOMCTL_setvnumainfo:
>>> + {
>>> + unsigned int i, j, nr_vnodes, dist_size;
>>> + unsigned int dist, vmap, vntop;
>>> + vnuma_memblk_t vmemblk;
>>> +
>>> + ret = -EINVAL;
>>> + dist = i = j = 0;
>>
>> I think the zeroing of these is unnecessary -- you zero them /
>> initialize them before you use them below.
>>
>> You need to break this code into "paragraphs" of related changes, with
>> a space in between. For example, maybe here...
>>
>>> + nr_vnodes = op->u.vnuma.nr_vnodes;
>>> + if (nr_vnodes <= 0 || nr_vnodes > NR_CPUS)
>>> + break;
>>
>> here...
>>
>>> + d->vnuma.nr_vnodes = nr_vnodes;
>>> + dist_size = nr_vnodes * nr_vnodes;
>>
>> here...
>>
>>> + if (
>>> + (d->vnuma.vdistance = xmalloc_bytes(
>>> + sizeof(*d->vnuma.vdistance) * dist_size)) == NULL ||
>>> + (d->vnuma.vnuma_memblks = xmalloc_bytes(
>>> + sizeof(*d->vnuma.vnuma_memblks) * nr_vnodes)) == NULL ||
>>> + (d->vnuma.vcpu_to_vnode = xmalloc_bytes(
>>> + sizeof(*d->vnuma.vcpu_to_vnode) * d->max_vcpus)) == NULL ||
>>> + (d->vnuma.vnode_to_pnode = xmalloc_bytes(
>>> + sizeof(*d->vnuma.vnode_to_pnode) * nr_vnodes)) == NULL )
>>> + goto err_dom;
>>
>> here...
>>
>> Also, for an idiomatic way to do this kind of multiple allocation w/
>> error checking, take a look at
>> xen/arch/x86/hvm/hvm.c:hvm_domain_initialise() (to pick a random
>> function I've worked with recently). Look at the
>>
>>> + for ( i = 0; i < nr_vnodes; i++ )
>>> + for ( j = 0; j < nr_vnodes; j++ )
>>> + {
>>> + if ( unlikely(__copy_from_guest_offset(&dist,
>>> + op->u.vnuma.vdistance,
>>> + __vnode_distance_offset(d, i, j), 1)) )
>>> + goto err_dom;
>>> + __vnode_distance_set(d, i, j, dist);
>>> + }
>>
>> here...
>>
>> Also, in each of these error paths, you leak the structures allocated above.
>>
>>> + for ( i = 0; i < nr_vnodes; i++ )
>>> + {
>>> + if ( unlikely(__copy_from_guest_offset(&vmemblk,
>>> + op->u.vnuma.vnuma_memblks, i, 1)) )
>>> + goto err_dom;
>>> + d->vnuma.vnuma_memblks[i].start = vmemblk.start;
>>> + d->vnuma.vnuma_memblks[i].end = vmemblk.end;
>>> + }
>>
>> here...
>>
>>> + for ( i = 0; i < d->max_vcpus; i++ )
>>> + {
>>> + if ( unlikely(__copy_from_guest_offset(&vmap,
>>> + op->u.vnuma.vcpu_to_vnode, i, 1)) )
>>> + goto err_dom;
>>> + d->vnuma.vcpu_to_vnode[i] = vmap;
>>> + }
>>
>> here...
>>
>>> + if ( !guest_handle_is_null(op->u.vnuma.vnode_to_pnode) )
>>> + {
>>> + for ( i = 0; i < nr_vnodes; i++ )
>>> + {
>>> + if ( unlikely(__copy_from_guest_offset(&vntop,
>>> + op->u.vnuma.vnode_to_pnode, i, 1)) )
>>> + goto err_dom;
>>> + d->vnuma.vnode_to_pnode[i] = vntop;
>>> + }
>>> + }
>>> + else
>>> + for(i = 0; i < nr_vnodes; i++)
>>> + d->vnuma.vnode_to_pnode[i] = NUMA_NO_NODE;
>>
>> here...
>>
>>> + ret = 0;
>>
>> and maybe here.
>>
>>> + break;
>>> +err_dom:
>>> + ret = -EINVAL;
>>> + }
>>> + break;
>>> default:
>>> ret = arch_do_domctl(op, d, u_domctl);
>>> break;
>>> diff --git a/xen/common/memory.c b/xen/common/memory.c
>>> index 50b740f..6dc2452 100644
>>> --- a/xen/common/memory.c
>>> +++ b/xen/common/memory.c
>>> @@ -28,6 +28,7 @@
>>> #include <public/memory.h>
>>> #include <xsm/xsm.h>
>>> #include <xen/trace.h>
>>> +#include <xen/vnuma.h>
>>>
>>> struct memop_args {
>>> /* INPUT */
>>> @@ -732,7 +733,62 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
>>> rcu_unlock_domain(d);
>>>
>>> break;
>>> + case XENMEM_get_vnuma_info:
>>> + {
>>> + int i, j;
>>> + struct vnuma_topology_info mtopology;
>>> + struct vnuma_topology_info touser_topo;
>>> + struct domain *d;
>>> + unsigned int max_pages, max_vcpus, nr_vnodes;
>>> + vnuma_memblk_t *vblks;
>>>
>>> + rc = -EINVAL;
>>
>> here...
>>
>>> + if ( guest_handle_is_null(arg) )
>>> + return rc;
>>
>> here...
>>
>>> + if( copy_from_guest(&mtopology, arg, 1) )
>>> + return -EINVAL;
>>
>> here... (and so on)
>>
>> This should be -EFAULT
>>
>>> + if ( (d = rcu_lock_domain_by_any_id(mtopology.domid)) == NULL )
>>> + return -EINVAL;
>>> + touser_topo.nr_vnodes = d->vnuma.nr_vnodes;
>>> + max_pages = d->max_pages;
>>> + max_vcpus = d->max_vcpus;
>>> + rcu_unlock_domain(d);
>>> +
>>> + nr_vnodes = touser_topo.nr_vnodes;
>>> + rc = copy_to_guest(arg, &touser_topo, 1);
>>> + if ( rc )
>>> + return -EFAULT;
>>> + if ( nr_vnodes == 0 || nr_vnodes > max_vcpus )
>>> + return -EFAULT;
>>> + vblks = xmalloc_array(struct vnuma_memblk, nr_vnodes);
>>> + if ( vblks == NULL )
>>> + return -EFAULT;
>>
>> Uum, what is this for? It doesn't appear to be used -- if there's a
>> copy failure it's freed again, if things succeed it's leaked.
>>
>>> + for ( i = 0; i < nr_vnodes; i++ )
>>> + {
>>> + if ( copy_to_guest_offset(mtopology.vnuma_memblks, i,
>>> + &d->vnuma.vnuma_memblks[i], 1) < 0 )
>>> + goto out;
>>
>> 4 spaces for indentation, please.
>>
>>> + }
>>> + for ( i = 0; i < touser_topo.nr_vnodes; i++ )
>>> + for ( j = 0; j < touser_topo.nr_vnodes; j++ )
>>> + {
>>> + if ( copy_to_guest_offset(mtopology.vdistance,
>>> + __vnode_distance_offset(d, i, j),
>>> + &__vnode_distance(d, i, j), 1) )
>>> + goto out;
>>> + }
>>> + for ( i = 0; i < d->max_vcpus ; i++ )
>>> + {
>>> + if ( copy_to_guest_offset(mtopology.vcpu_to_vnode, i,
>>> + &d->vnuma.vcpu_to_vnode[i], 1) )
>>> + goto out;
>>> + }
>>> + return rc;
>>> +out:
>>> + if ( vblks ) xfree(vblks);
>>> + return rc;
>>> + break;
>>> + }
>>> default:
>>> rc = arch_memory_op(op, arg);
>>> break;
>>> diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
>>> index 4c5b2bb..3574d0a 100644
>>> --- a/xen/include/public/domctl.h
>>> +++ b/xen/include/public/domctl.h
>>> @@ -35,6 +35,7 @@
>>> #include "xen.h"
>>> #include "grant_table.h"
>>> #include "hvm/save.h"
>>> +#include "memory.h"
>>>
>>> #define XEN_DOMCTL_INTERFACE_VERSION 0x00000009
>>>
>>> @@ -852,6 +853,17 @@ struct xen_domctl_set_broken_page_p2m {
>>> typedef struct xen_domctl_set_broken_page_p2m xen_domctl_set_broken_page_p2m_t;
>>> DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_broken_page_p2m_t);
>>>
>>> +struct xen_domctl_vnuma {
>>> + uint16_t nr_vnodes;
>>> + XEN_GUEST_HANDLE_64(uint) vdistance;
>>> + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
>>> + XEN_GUEST_HANDLE_64(uint) vnode_to_pnode;
>>> + XEN_GUEST_HANDLE_64(vnuma_memblk_t) vnuma_memblks;
>>> +};
>>> +
>>> +typedef struct xen_domctl_vnuma xen_domctl_vnuma_t;
>>> +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vnuma_t);
>>> +
>>> struct xen_domctl {
>>> uint32_t cmd;
>>> #define XEN_DOMCTL_createdomain 1
>>> @@ -920,6 +932,7 @@ struct xen_domctl {
>>> #define XEN_DOMCTL_set_broken_page_p2m 67
>>> #define XEN_DOMCTL_setnodeaffinity 68
>>> #define XEN_DOMCTL_getnodeaffinity 69
>>> +#define XEN_DOMCTL_setvnumainfo 70
>>> #define XEN_DOMCTL_gdbsx_guestmemio 1000
>>> #define XEN_DOMCTL_gdbsx_pausevcpu 1001
>>> #define XEN_DOMCTL_gdbsx_unpausevcpu 1002
>>> @@ -979,6 +992,7 @@ struct xen_domctl {
>>> struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
>>> struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
>>> struct xen_domctl_gdbsx_domstatus gdbsx_domstatus;
>>> + struct xen_domctl_vnuma vnuma;
>>> uint8_t pad[128];
>>> } u;
>>> };
>>> @@ -986,7 +1000,6 @@ typedef struct xen_domctl xen_domctl_t;
>>> DEFINE_XEN_GUEST_HANDLE(xen_domctl_t);
>>>
>>> #endif /* __XEN_PUBLIC_DOMCTL_H__ */
>>> -
>>> /*
>>> * Local variables:
>>> * mode: C
>>> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
>>> index 7a26dee..28f6aaf 100644
>>> --- a/xen/include/public/memory.h
>>> +++ b/xen/include/public/memory.h
>>> @@ -453,12 +453,19 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
>>> * Caller must be privileged or the hypercall fails.
>>> */
>>> #define XENMEM_claim_pages 24
>>> -
>>> /*
>>> * XENMEM_claim_pages flags - the are no flags at this time.
>>> * The zero value is appropiate.
>>> */
>>>
>>> +struct vnuma_memblk {
>>> + uint64_t start, end;
>>> +};
>>> +typedef struct vnuma_memblk vnuma_memblk_t;
>>> +DEFINE_XEN_GUEST_HANDLE(vnuma_memblk_t);
>>> +
>>> +#define XENMEM_get_vnuma_info 25
>>> +
>>> #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
>>>
>>> #endif /* __XEN_PUBLIC_MEMORY_H__ */
>>> diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
>>> index a057069..c9d53e3 100644
>>> --- a/xen/include/xen/domain.h
>>> +++ b/xen/include/xen/domain.h
>>> @@ -4,6 +4,7 @@
>>>
>>> #include <public/xen.h>
>>> #include <asm/domain.h>
>>> +#include <public/memory.h>
>>>
>>> typedef union {
>>> struct vcpu_guest_context *nat;
>>> @@ -89,4 +90,14 @@ extern unsigned int xen_processor_pmbits;
>>>
>>> extern bool_t opt_dom0_vcpus_pin;
>>>
>>> +struct domain_vnuma_info {
>>> + uint16_t nr_vnodes;
>>> + uint *vdistance;
>>> + uint *vcpu_to_vnode;
>>> + uint *vnode_to_pnode;
>>> + vnuma_memblk_t *vnuma_memblks;
>>> +};
>>
>> Are these the right sizes for things to allocate? Do we really need
>> 32 bits to represent distance, vnode and pnode? Can we use u16 or u8?
>>
>> -George
>
>
>
> --
> Elena
--
Elena
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
2013-09-17 6:44 ` Elena Ufimtseva
2013-09-17 6:59 ` Elena Ufimtseva
@ 2013-09-17 7:05 ` Jan Beulich
2013-09-17 7:11 ` Dario Faggioli
1 sibling, 1 reply; 14+ messages in thread
From: Jan Beulich @ 2013-09-17 7:05 UTC (permalink / raw)
To: George Dunlap, Elena Ufimtseva
Cc: Keir Fraser, Li Yechen, Matt Wilson, Dario Faggioli,
Stefano Stabellini, xen-devel@lists.xen.org
>>> On 17.09.13 at 08:44, Elena Ufimtseva <ufimtseva@gmail.com> wrote:
Please don't top post.
> George, after talking to Dario, I think the max number of physical
> nodes will not exceed 256. Dario's automatic NUMA
> placement work with this number and I think it can be easily u8.
> Unless anyone has other thoughts.
With nr_vnodes being uint16_t, the vnode numbers should be
too. Limiting them to u8 would possibly be even better, but then
nr_vnodes would better be unsigned int (perhaps that was the
case from the beginning, regardless of the types used for the
arrays).
The pnode array surely can also be uint8_t for the time being,
considering that there are other places where node IDs are
limited to 8 bits.
And with struct acpi_table_slit having just 8-bit distances, there's
no apparent reason why the virtual distances can't be 8 bits too.
But - all this is only for the internal representations. Anything in
the public interface should be wide enough to allow future
extension.
Jan
>>> @@ -89,4 +90,14 @@ extern unsigned int xen_processor_pmbits;
>>>
>>> extern bool_t opt_dom0_vcpus_pin;
>>>
>>> +struct domain_vnuma_info {
>>> + uint16_t nr_vnodes;
>>> + uint *vdistance;
>>> + uint *vcpu_to_vnode;
>>> + uint *vnode_to_pnode;
>>> + vnuma_memblk_t *vnuma_memblks;
>>> +};
>>
>> Are these the right sizes for things to allocate? Do we really need
>> 32 bits to represent distance, vnode and pnode? Can we use u16 or u8?
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
2013-09-17 7:05 ` Jan Beulich
@ 2013-09-17 7:11 ` Dario Faggioli
2013-09-17 7:19 ` Elena Ufimtseva
0 siblings, 1 reply; 14+ messages in thread
From: Dario Faggioli @ 2013-09-17 7:11 UTC (permalink / raw)
To: Jan Beulich
Cc: Keir Fraser, Li Yechen, George Dunlap, Matt Wilson,
Stefano Stabellini, xen-devel@lists.xen.org, Elena Ufimtseva
[-- Attachment #1.1: Type: text/plain, Size: 1580 bytes --]
On mar, 2013-09-17 at 08:05 +0100, Jan Beulich wrote:
> >>> On 17.09.13 at 08:44, Elena Ufimtseva <ufimtseva@gmail.com> wrote:
>
> Please don't top post.
>
> > George, after talking to Dario, I think the max number of physical
> > nodes will not exceed 256. Dario's automatic NUMA
> > placement work with this number and I think it can be easily u8.
> > Unless anyone has other thoughts.
>
> With nr_vnodes being uint16_t, the vnode numbers should be
> too. Limiting them to u8 would possibly be even better, but then
> nr_vnodes would better be unsigned int (perhaps that was the
> case from the beginning, regardless of the types used for the
> arrays).
>
> The pnode array surely can also be uint8_t for the time being,
> considering that there are other places where node IDs are
> limited to 8 bits.
>
All agreed.
> And with struct acpi_table_slit having just 8-bit distances, there's
> no apparent reason why the virtual distances can't be 8 bits too.
>
> But - all this is only for the internal representations. Anything in
> the public interface should be wide enough to allow future
> extension.
>
And, in fact, 'node_to_node_distance' in xen/include/public/sysctl.h
(http://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=xen/include/public/sysctl.h)
is uint32.
Dario
--
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
2013-09-17 7:11 ` Dario Faggioli
@ 2013-09-17 7:19 ` Elena Ufimtseva
2013-09-17 9:04 ` Dario Faggioli
0 siblings, 1 reply; 14+ messages in thread
From: Elena Ufimtseva @ 2013-09-17 7:19 UTC (permalink / raw)
To: Dario Faggioli
Cc: Keir Fraser, Li Yechen, George Dunlap, Matt Wilson,
Stefano Stabellini, xen-devel@lists.xen.org, Jan Beulich
On Tue, Sep 17, 2013 at 3:11 AM, Dario Faggioli
<dario.faggioli@citrix.com> wrote:
> On mar, 2013-09-17 at 08:05 +0100, Jan Beulich wrote:
>> >>> On 17.09.13 at 08:44, Elena Ufimtseva <ufimtseva@gmail.com> wrote:
>>
>> Please don't top post.
>>
>> > George, after talking to Dario, I think the max number of physical
>> > nodes will not exceed 256. Dario's automatic NUMA
>> > placement work with this number and I think it can be easily u8.
>> > Unless anyone has other thoughts.
>>
>> With nr_vnodes being uint16_t, the vnode numbers should be
>> too. Limiting them to u8 would possibly be even better, but then
>> nr_vnodes would better be unsigned int (perhaps that was the
>> case from the beginning, regardless of the types used for the
>> arrays).
>>
>> The pnode array surely can also be uint8_t for the time being,
>> considering that there are other places where node IDs are
>> limited to 8 bits.
>>
> All agreed.
>
>> And with struct acpi_table_slit having just 8-bit distances, there's
>> no apparent reason why the virtual distances can't be 8 bits too.
>>
>> But - all this is only for the internal representations. Anything in
>> the public interface should be wide enough to allow future
>> extension.
>>
> And, in fact, 'node_to_node_distance' in xen/include/public/sysctl.h
> (http://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=xen/include/public/sysctl.h)
> is uint32.
>
Linux has u8 for distance. Ok, thank you for pointing that out.
Elena
> Dario
>
> --
> <<This happens because I choose it to happen!>> (Raistlin Majere)
> -----------------------------------------------------------------
> Dario Faggioli, Ph.D, http://about.me/dario.faggioli
> Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
>
--
Elena
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
2013-09-17 7:19 ` Elena Ufimtseva
@ 2013-09-17 9:04 ` Dario Faggioli
2013-10-03 12:27 ` Li Yechen
0 siblings, 1 reply; 14+ messages in thread
From: Dario Faggioli @ 2013-09-17 9:04 UTC (permalink / raw)
To: Elena Ufimtseva
Cc: Keir Fraser, Li Yechen, George Dunlap, Matt Wilson,
Stefano Stabellini, xen-devel@lists.xen.org, Jan Beulich
[-- Attachment #1.1: Type: text/plain, Size: 1794 bytes --]
On mar, 2013-09-17 at 03:19 -0400, Elena Ufimtseva wrote:
> On Tue, Sep 17, 2013 at 3:11 AM, Dario Faggioli
> <dario.faggioli@citrix.com> wrote:
> > On mar, 2013-09-17 at 08:05 +0100, Jan Beulich wrote:
> >> But - all this is only for the internal representations. Anything in
> >> the public interface should be wide enough to allow future
> >> extension.
> >>
> > And, in fact, 'node_to_node_distance' in xen/include/public/sysctl.h
> > (http://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=xen/include/public/sysctl.h)
> > is uint32.
> >
>
> Linux has u8 for distance. Ok, thank you for pointing that out.
>
EhEh... So, very hard to be consistent with every actor in the
play! :-)
Anyway, I think the point here is, as Jan was saying, to distinguish
internal representation from interface. Within Xen, we should store
everything in the smallest and nicest possible way (which, BTW, is what
Linux does by using u8 for distances).
OTOH, when it comes to exported interfaces, we should be much more
cautious, since changing the way Xen stores distances internally is
trivial, changing the interface (either API or ABI) could be a
nightmare!
In this case, what we (Xen) tell Linux is the interface, it is up to him
(Linux) to convert that in a way that suits its own internals. In fact,
despite Linux being the only one OS using this interface for now, it's
not wise to design the interface itself specifically for Linux, since
many OSes may want to support it in the future.
Regards,
Dario
--
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
2013-09-17 9:04 ` Dario Faggioli
@ 2013-10-03 12:27 ` Li Yechen
2013-10-03 15:17 ` Dario Faggioli
0 siblings, 1 reply; 14+ messages in thread
From: Li Yechen @ 2013-10-03 12:27 UTC (permalink / raw)
To: Dario Faggioli
Cc: Keir Fraser, Elena Ufimtseva, Stefano Stabellini, George Dunlap,
Matt Wilson, xen-devel@lists.xen.org, Jan Beulich
[-- Attachment #1.1: Type: text/plain, Size: 2693 bytes --]
Hi Elena,
Thank you for your great work first :)
In your patch, guest can use a hypercall name: "XENMEM_get_vnuma_info" to
get the structure below:
+struct vnuma_topology_info {
+ domid_t domid;
+ uint16_t nr_vnodes;
+ uint32_t _pad;
+ XEN_GUEST_HANDLE_64(uint) vdistance;
+ XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
+ XEN_GUEST_HANDLE_64(vnuma_
memblk_t) vnuma_memblks;
+};
I see that Xen have save a vnode_to_pnode[] array for mapping
However, this hypercall won't give this mapping to Guest VM.
Is that right?
On Tue, Sep 17, 2013 at 5:04 PM, Dario Faggioli
<dario.faggioli@citrix.com>wrote:
> On mar, 2013-09-17 at 03:19 -0400, Elena Ufimtseva wrote:
> > On Tue, Sep 17, 2013 at 3:11 AM, Dario Faggioli
> > <dario.faggioli@citrix.com> wrote:
> > > On mar, 2013-09-17 at 08:05 +0100, Jan Beulich wrote:
> > >> But - all this is only for the internal representations. Anything in
> > >> the public interface should be wide enough to allow future
> > >> extension.
> > >>
> > > And, in fact, 'node_to_node_distance' in xen/include/public/sysctl.h
> > > (
> http://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=xen/include/public/sysctl.h
> )
> > > is uint32.
> > >
> >
> > Linux has u8 for distance. Ok, thank you for pointing that out.
> >
> EhEh... So, very hard to be consistent with every actor in the
> play! :-)
>
> Anyway, I think the point here is, as Jan was saying, to distinguish
> internal representation from interface. Within Xen, we should store
> everything in the smallest and nicest possible way (which, BTW, is what
> Linux does by using u8 for distances).
>
> OTOH, when it comes to exported interfaces, we should be much more
> cautious, since changing the way Xen stores distances internally is
> trivial, changing the interface (either API or ABI) could be a
> nightmare!
>
> In this case, what we (Xen) tell Linux is the interface, it is up to him
> (Linux) to convert that in a way that suits its own internals. In fact,
> despite Linux being the only one OS using this interface for now, it's
> not wise to design the interface itself specifically for Linux, since
> many OSes may want to support it in the future.
>
> Regards,
> Dario
>
> --
> <<This happens because I choose it to happen!>> (Raistlin Majere)
> -----------------------------------------------------------------
> Dario Faggioli, Ph.D, http://about.me/dario.faggioli
> Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
>
>
--
Yechen Li
Team of System Virtualization and Cloud Computing
School of Electronic Engineering and Computer Science,
Peking University, China
Nothing is impossible because impossible itself says: " I'm possible "
lccycc From PKU
[-- Attachment #1.2: Type: text/html, Size: 4426 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
2013-10-03 12:27 ` Li Yechen
@ 2013-10-03 15:17 ` Dario Faggioli
2013-10-03 15:33 ` Elena Ufimtseva
0 siblings, 1 reply; 14+ messages in thread
From: Dario Faggioli @ 2013-10-03 15:17 UTC (permalink / raw)
To: Li Yechen
Cc: Keir Fraser, Elena Ufimtseva, Stefano Stabellini, George Dunlap,
Matt Wilson, xen-devel@lists.xen.org, Jan Beulich
[-- Attachment #1.1: Type: text/plain, Size: 1317 bytes --]
On gio, 2013-10-03 at 20:27 +0800, Li Yechen wrote:
> Hi Elena,
> Thank you for your great work first :)
>
> In your patch, guest can use a hypercall name: "XENMEM_get_vnuma_info"
> to get the structure below:
> +struct vnuma_topology_info {
> + domid_t domid;
> + uint16_t nr_vnodes;
> + uint32_t _pad;
> + XEN_GUEST_HANDLE_64(uint) vdistance;
> + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
> + XEN_GUEST_HANDLE_64(vnuma_
> memblk_t) vnuma_memblks;
> +};
>
>
> I see that Xen have save a vnode_to_pnode[] array for mapping
>
Yes it does. We spoke about it quite a bit, and Xen seems to be the best
place where to store such info, considering how easy it is to keep it
there, and that it may turn out useful in future (and ballooning is a
lcear example of that).
> However, this hypercall won't give this mapping to Guest VM.
>
Not with this hypercall, no. Actually, did most of the people, when
reviewing your RFC patches, said that they prefer the mapping not
exposed at all to the guest?
Dario
--
<<This happens because I choose it to happen!>> (Raistlin Majere)
-----------------------------------------------------------------
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
[-- Attachment #1.2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]
[-- Attachment #2: Type: text/plain, Size: 126 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests.
2013-10-03 15:17 ` Dario Faggioli
@ 2013-10-03 15:33 ` Elena Ufimtseva
0 siblings, 0 replies; 14+ messages in thread
From: Elena Ufimtseva @ 2013-10-03 15:33 UTC (permalink / raw)
To: Dario Faggioli
Cc: Keir Fraser, Stefano Stabellini, George Dunlap, Matt Wilson,
Li Yechen, xen-devel@lists.xen.org, Jan Beulich
Hello Li, Dario! :)
On Thu, Oct 3, 2013 at 11:17 AM, Dario Faggioli
<dario.faggioli@citrix.com> wrote:
> On gio, 2013-10-03 at 20:27 +0800, Li Yechen wrote:
>> Hi Elena,
>> Thank you for your great work first :)
>>
>> In your patch, guest can use a hypercall name: "XENMEM_get_vnuma_info"
>> to get the structure below:
>> +struct vnuma_topology_info {
>> + domid_t domid;
>> + uint16_t nr_vnodes;
>> + uint32_t _pad;
>> + XEN_GUEST_HANDLE_64(uint) vdistance;
>> + XEN_GUEST_HANDLE_64(uint) vcpu_to_vnode;
>> + XEN_GUEST_HANDLE_64(vnuma_
>> memblk_t) vnuma_memblks;
>> +};
>>
>>
>> I see that Xen have save a vnode_to_pnode[] array for mapping
>>
> Yes it does. We spoke about it quite a bit, and Xen seems to be the best
> place where to store such info, considering how easy it is to keep it
> there, and that it may turn out useful in future (and ballooning is a
> lcear example of that).
>
>> However, this hypercall won't give this mapping to Guest VM.
>>
> Not with this hypercall, no. Actually, did most of the people, when
> reviewing your RFC patches, said that they prefer the mapping not
> exposed at all to the guest?
Yes Dario, correct. I think someone, possibly Konrad, was saying that
it would be an interesting possibility.
But I dont think there was a change in this.
So fo Li I think I can provide hypercall that will return the mappings.
Li, do you think it work for you?
>
> Dario
>
> --
> <<This happens because I choose it to happen!>> (Raistlin Majere)
> -----------------------------------------------------------------
> Dario Faggioli, Ph.D, http://about.me/dario.faggioli
> Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
>
--
Elena
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2013-10-03 15:33 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-09-13 8:49 [PATCH RFC v2 1/7] xen/vNUMA: vNUMA support for PV guests Elena Ufimtseva
2013-09-13 9:31 ` Andrew Cooper
2013-09-13 11:53 ` Jan Beulich
2013-09-13 11:00 ` Jan Beulich
2013-09-16 15:46 ` George Dunlap
2013-09-17 6:44 ` Elena Ufimtseva
2013-09-17 6:59 ` Elena Ufimtseva
2013-09-17 7:05 ` Jan Beulich
2013-09-17 7:11 ` Dario Faggioli
2013-09-17 7:19 ` Elena Ufimtseva
2013-09-17 9:04 ` Dario Faggioli
2013-10-03 12:27 ` Li Yechen
2013-10-03 15:17 ` Dario Faggioli
2013-10-03 15:33 ` Elena Ufimtseva
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).