From mboxrd@z Thu Jan 1 00:00:00 1970 From: Boris Ostrovsky Subject: Re: [PATCH v4 3/9] sysctl: Make XEN_SYSCTL_topologyinfo sysctl a little more efficient Date: Tue, 10 Mar 2015 11:22:48 -0400 Message-ID: <54FF0C48.9090400@oracle.com> References: <1425954475-4913-1-git-send-email-boris.ostrovsky@oracle.com> <1425954475-4913-4-git-send-email-boris.ostrovsky@oracle.com> <54FEFFDA.7090903@citrix.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <54FEFFDA.7090903@citrix.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Andrew Cooper , jbeulich@suse.com, keir@xen.org, ian.jackson@eu.citrix.com, stefano.stabellini@eu.citrix.com, ian.campbell@citrix.com, wei.liu2@citrix.com Cc: elena.ufimtseva@oracle.com, dario.faggioli@citrix.com, xen-devel@lists.xen.org List-Id: xen-devel@lists.xenproject.org On 03/10/2015 10:29 AM, Andrew Cooper wrote: > On 10/03/15 02:27, Boris Ostrovsky wrote: >> Instead of copying data for each field in xen_sysctl_topologyinfo separately >> put cpu/socket/node into a single structure and do a single copy for each >> processor. >> >> Do not use max_cpu_index, which is almost always used for calculating number >> CPUs (thus requiring adding or subtracting one), replace it with num_cpus. >> >> There is no need to copy whole op in sysctl to user at the end, we only need >> num_cpus. >> >> Rename xen_sysctl_topologyinfo and XEN_SYSCTL_topologyinfo to reflect the fact >> that these are used for CPU topology. Subsequent patch will add support for >> PCI topology sysctl. >> >> Replace INVALID_TOPOLOGY_ID with "XEN_"-prefixed macros for each invalid type >> (core, socket, node). >> >> Signed-off-by: Boris Ostrovsky > In principle, a good improvement, but I have some specific issues. > >> --- >> >> diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c >> index 2aa0dc7..2fd93e0 100644 >> --- a/tools/python/xen/lowlevel/xc/xc.c >> +++ b/tools/python/xen/lowlevel/xc/xc.c >> @@ -1375,7 +1365,7 @@ static PyObject *pyxc_numainfo(XcObject *self) >> for ( j = 0; j <= max_node_index; j++ ) >> { >> uint32_t dist = nodes_dist[i*(max_node_index+1) + j]; >> - if ( dist == INVALID_TOPOLOGY_ID ) >> + if ( dist == ~0u ) >> { >> PyList_Append(node_to_node_dist_obj, Py_None); >> } > This looks like a spurious hunk (perhaps from patch 9?) No, this is sort of an intermediate step: INVALID_TOPOLOGY_ID is no longer defined and a new macro for distance will show up in the next patch. And, in fact, it's wrong anyway since the sysctl never sets distance to INVALID_TOPOLOGY_ID, it sets it explicitly to ~0u. It just so happens that INVALID_TOPOLOGY_ID is also ~0U. I thought about moving macro definition into this patch but then logically it wouldn't belong here so I figured I'd do what the hypervisor does, which is use a constant. Until the next patch. > >> diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c >> index 0cb6ee1..fe48ee8 100644 >> --- a/xen/common/sysctl.c >> +++ b/xen/common/sysctl.c >> @@ -320,39 +320,61 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl) >> } >> break; >> >> - case XEN_SYSCTL_topologyinfo: >> + case XEN_SYSCTL_cputopoinfo: >> { >> - uint32_t i, max_cpu_index, last_online_cpu; >> - xen_sysctl_topologyinfo_t *ti = &op->u.topologyinfo; >> + uint32_t i, num_cpus; >> + xen_sysctl_cputopoinfo_t *ti = &op->u.cputopoinfo; >> >> - last_online_cpu = cpumask_last(&cpu_online_map); >> - max_cpu_index = min_t(uint32_t, ti->max_cpu_index, last_online_cpu); >> - ti->max_cpu_index = last_online_cpu; >> + if ( guest_handle_is_null(ti->cputopo) ) >> + { >> + ret = -EINVAL; >> + break; >> + } > The prevailing hypervisor style is to use a NULL guest handle as an > explicit request for size. i.e. write back num_cpus in ti and return > success. Yes, this would look better from the interface POV. -boris > >> >> - for ( i = 0; i <= max_cpu_index; i++ ) >> + num_cpus = cpumask_last(&cpu_online_map) + 1; >> + if ( ti->num_cpus != num_cpus ) >> { >> - if ( !guest_handle_is_null(ti->cpu_to_core) ) >> + uint32_t array_sz = ti->num_cpus; >> + >> + ti->num_cpus = num_cpus; >> + if ( __copy_field_to_guest(u_sysctl, op, >> + u.cputopoinfo.num_cpus) ) >> { >> - uint32_t core = cpu_online(i) ? cpu_to_core(i) : ~0u; >> - if ( copy_to_guest_offset(ti->cpu_to_core, i, &core, 1) ) >> - break; >> + ret = -EFAULT; >> + break; >> + } >> + num_cpus = min_t(uint32_t, array_sz, num_cpus); >> + } >> + >> + for ( i = 0; i < num_cpus; i++ ) >> + { >> + xen_sysctl_cputopo_t cputopo; >> + >> + if ( cpu_present(i) ) >> + { >> + cputopo.core = cpu_to_core(i); >> + if ( cputopo.core == BAD_APICID ) >> + cputopo.core = XEN_INVALID_CORE_ID; >> + cputopo.socket = cpu_to_socket(i); >> + if ( cputopo.socket == BAD_APICID ) >> + cputopo.socket = XEN_INVALID_SOCKET_ID; >> + cputopo.node = cpu_to_node(i); >> + if ( cputopo.node == NUMA_NO_NODE ) >> + cputopo.node = XEN_INVALID_NODE_ID; >> } >> - if ( !guest_handle_is_null(ti->cpu_to_socket) ) >> + else >> { >> - uint32_t socket = cpu_online(i) ? cpu_to_socket(i) : ~0u; >> - if ( copy_to_guest_offset(ti->cpu_to_socket, i, &socket, 1) ) >> - break; >> + cputopo.core = XEN_INVALID_CORE_ID; >> + cputopo.socket = XEN_INVALID_SOCKET_ID; >> + cputopo.node = XEN_INVALID_NODE_ID; >> } >> - if ( !guest_handle_is_null(ti->cpu_to_node) ) >> + >> + if ( copy_to_guest_offset(ti->cputopo, i, &cputopo, 1) ) >> { >> - uint32_t node = cpu_online(i) ? cpu_to_node(i) : ~0u; >> - if ( copy_to_guest_offset(ti->cpu_to_node, i, &node, 1) ) >> - break; >> + ret = -EFAULT; >> + break; >> } >> } >> - >> - ret = ((i <= max_cpu_index) || copy_to_guest(u_sysctl, op, 1)) >> - ? -EFAULT : 0; >> } >> break; >> >> diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h >> index 8552dc6..f20da69 100644 >> --- a/xen/include/public/sysctl.h >> +++ b/xen/include/public/sysctl.h >> @@ -462,31 +462,37 @@ struct xen_sysctl_lockprof_op { >> typedef struct xen_sysctl_lockprof_op xen_sysctl_lockprof_op_t; >> DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_op_t); >> >> -/* XEN_SYSCTL_topologyinfo */ >> -#define INVALID_TOPOLOGY_ID (~0U) >> -struct xen_sysctl_topologyinfo { >> +/* XEN_SYSCTL_cputopoinfo */ >> +#define XEN_INVALID_CORE_ID (~0U) >> +#define XEN_INVALID_SOCKET_ID (~0U) >> +#define XEN_INVALID_NODE_ID ((uint8_t)~0) >> + >> +struct xen_sysctl_cputopo { >> + uint32_t core; >> + uint32_t socket; >> + uint8_t node; >> +}; >> +typedef struct xen_sysctl_cputopo xen_sysctl_cputopo_t; >> +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cputopo_t); >> + >> +struct xen_sysctl_cputopoinfo { >> /* >> - * IN: maximum addressable entry in the caller-provided arrays. >> - * OUT: largest cpu identifier in the system. >> - * If OUT is greater than IN then the arrays are truncated! >> - * If OUT is leass than IN then the array tails are not written by sysctl. >> + * IN: size of caller-provided cputopo array. >> + * OUT: Number of CPUs in the system. >> */ >> - uint32_t max_cpu_index; >> + uint32_t num_cpus; >> >> /* >> - * If not NULL, these arrays are filled with core/socket/node identifier >> - * for each cpu. >> - * If a cpu has no core/socket/node information (e.g., cpu not present) >> - * then the sentinel value ~0u is written to each array. >> - * The number of array elements written by the sysctl is: >> - * min(@max_cpu_index_IN,@max_cpu_index_OUT)+1 >> + * If not NULL, filled with core/socket/node identifier for each cpu >> + * If information for a particular entry is not avalable it is set to >> + * XEN_INVALID__ID. >> + * The number of array elements for CPU topology written by the sysctl is: >> + * min(@num_cpus_IN,@num_cpus_OUT)+1. > This is also a problematic interface as it clobbers the actual length of > the valid array. strace/valgrind have no way of validating the state > after the hypercall, and the user of the interface has to remember the > number they passed in to start with. > > It is also prevailing style to return the number of entries actually > written to, and fail the hypercall with -ENOBUFS if there is > insufficient space in the destination array. > > See the implementation of XEN_DOMCTL_get_vcpu_msrs as an example which > handles each of these corner cases. > > ~Andrew > >> */ >> - XEN_GUEST_HANDLE_64(uint32) cpu_to_core; >> - XEN_GUEST_HANDLE_64(uint32) cpu_to_socket; >> - XEN_GUEST_HANDLE_64(uint32) cpu_to_node; >> + XEN_GUEST_HANDLE_64(xen_sysctl_cputopo_t) cputopo; >> }; >> -typedef struct xen_sysctl_topologyinfo xen_sysctl_topologyinfo_t; >> -DEFINE_XEN_GUEST_HANDLE(xen_sysctl_topologyinfo_t); >> +typedef struct xen_sysctl_cputopoinfo xen_sysctl_cputopoinfo_t; >> +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cputopoinfo_t); >> >> /* XEN_SYSCTL_numainfo */ >> #define INVALID_NUMAINFO_ID (~0U) >> @@ -672,7 +678,7 @@ struct xen_sysctl { >> #define XEN_SYSCTL_pm_op 12 >> #define XEN_SYSCTL_page_offline_op 14 >> #define XEN_SYSCTL_lockprof_op 15 >> -#define XEN_SYSCTL_topologyinfo 16 >> +#define XEN_SYSCTL_cputopoinfo 16 >> #define XEN_SYSCTL_numainfo 17 >> #define XEN_SYSCTL_cpupool_op 18 >> #define XEN_SYSCTL_scheduler_op 19 >> @@ -683,7 +689,7 @@ struct xen_sysctl { >> struct xen_sysctl_readconsole readconsole; >> struct xen_sysctl_tbuf_op tbuf_op; >> struct xen_sysctl_physinfo physinfo; >> - struct xen_sysctl_topologyinfo topologyinfo; >> + struct xen_sysctl_cputopoinfo cputopoinfo; >> struct xen_sysctl_numainfo numainfo; >> struct xen_sysctl_sched_id sched_id; >> struct xen_sysctl_perfc_op perfc_op; >