From mboxrd@z Thu Jan 1 00:00:00 1970 From: Juergen Gross Subject: Re: [PATCH 1/4] xen: report how much memory a domain has on each NUMA node Date: Wed, 05 Mar 2014 15:50:05 +0100 Message-ID: <5317399D.1000001@ts.fujitsu.com> References: <20140305143357.6984.7729.stgit@Solace> <20140305143625.6984.3763.stgit@Solace> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <20140305143625.6984.3763.stgit@Solace> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Dario Faggioli Cc: Ian Campbell , Andrew Cooper , Ian Jackson , xen-devel , Jan Beulich , Daniel De Graaf List-Id: xen-devel@lists.xenproject.org On 05.03.2014 15:36, Dario Faggioli wrote: > by means of a new hypercal, XEN_DOMCTL_numainfo, doing something > similar to what XEN_SYSCTL_numainfo does, but on a per domain basis. > > Signed-off-by: Dario Faggioli > --- > xen/common/domctl.c | 45 +++++++++++++++++++++++++++++++++++ > xen/include/public/domctl.h | 22 +++++++++++++++++ > xen/xsm/flask/hooks.c | 3 ++ > xen/xsm/flask/policy/access_vectors | 2 ++ > 4 files changed, 72 insertions(+) > > diff --git a/xen/common/domctl.c b/xen/common/domctl.c > index 7cf610a..96bf326 100644 > --- a/xen/common/domctl.c > +++ b/xen/common/domctl.c > @@ -574,6 +574,51 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) > } > break; > > + case XEN_DOMCTL_numainfo: > + { > + uint32_t node, max_node_index, last_online_node; > + xen_domctl_numainfo_t *ni = &op->u.numainfo; > + uint64_t *memkb_on_node; > + struct page_info *page; > + > + /* > + * We report back info about the min number of nodes between how > + * much of them the caller can handle and the number of them that > + * are actually online. > + */ > + last_online_node = last_node(node_online_map); > + max_node_index = min_t(uint32_t, ni->max_node_index, last_online_node); > + ni->max_node_index = max_node_index; > + > + ret = -ENOMEM; > + memkb_on_node = xzalloc_array(uint64_t, max_node_index); > + if ( !memkb_on_node ) > + break; > + > + spin_lock(&d->page_alloc_lock); > + page_list_for_each(page, &d->page_list) > + { > + node = phys_to_nid((paddr_t)page_to_mfn(page) << PAGE_SHIFT); > + /* For nodes that are offline, don't touch the counter */ > + if ( node <= max_node_index && node_online(node) ) > + memkb_on_node[node]++; > + } This loop will run quite a long time for huge domains. Wouldn't it be better to do the accounting during page allocation? > + spin_unlock(&d->page_alloc_lock); > + > + for ( node = 0; node <= max_node_index; node++ ) > + { > + memkb_on_node[node] <<= (PAGE_SHIFT-10); If you already use a 64 bit element you could use bytes as unit. > + if ( copy_to_guest_offset(ni->memkb_on_node, node, > + &memkb_on_node[node], 1) ) > + break; > + } > + > + ret = ((node <= max_node_index) || copy_to_guest(u_domctl, op, 1)) > + ? -EFAULT : 0; > + xfree(memkb_on_node); > + } > + break; > + > case XEN_DOMCTL_destroydomain: > { > ret = domain_kill(d); > diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h > index f22fe2e..a455d78 100644 > --- a/xen/include/public/domctl.h > +++ b/xen/include/public/domctl.h > @@ -315,6 +315,26 @@ typedef struct xen_domctl_max_vcpus xen_domctl_max_vcpus_t; > DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t); > > > +/* XEN_DOMCTL_numainfo */ > +struct xen_domctl_numainfo { > + /* > + * IN: maximum addressable entry in the caller-provided arrays. > + * OUT: minimum between the maximum addressable entry in the > + * caller-provided arrays and largest online node identifier > + * in the system. > + */ > + uint32_t max_node_index; Add explicit padding? > + > + /* > + * OUT: memory, in Kb, on each node. i-eth element equal to 0 means > + * either "no memory on node i" or "node i offline". > + */ > + XEN_GUEST_HANDLE_64(uint64) memkb_on_node; > +}; > +typedef struct xen_domctl_numainfo xen_domctl_numainfo_t; > +DEFINE_XEN_GUEST_HANDLE(xen_domctl_numainfo_t); > + > + > /* XEN_DOMCTL_scheduler_op */ > /* Scheduler types. */ > #define XEN_SCHEDULER_SEDF 4 > @@ -966,6 +986,7 @@ struct xen_domctl { > #define XEN_DOMCTL_getnodeaffinity 69 > #define XEN_DOMCTL_set_max_evtchn 70 > #define XEN_DOMCTL_cacheflush 71 > +#define XEN_DOMCTL_numainfo 72 > #define XEN_DOMCTL_gdbsx_guestmemio 1000 > #define XEN_DOMCTL_gdbsx_pausevcpu 1001 > #define XEN_DOMCTL_gdbsx_unpausevcpu 1002 > @@ -986,6 +1007,7 @@ struct xen_domctl { > struct xen_domctl_vcpucontext vcpucontext; > struct xen_domctl_getvcpuinfo getvcpuinfo; > struct xen_domctl_max_vcpus max_vcpus; > + struct xen_domctl_numainfo numainfo; > struct xen_domctl_scheduler_op scheduler_op; > struct xen_domctl_setdomainhandle setdomainhandle; > struct xen_domctl_setdebugging setdebugging; > diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c > index 96276ac..edc1d34 100644 > --- a/xen/xsm/flask/hooks.c > +++ b/xen/xsm/flask/hooks.c > @@ -727,6 +727,9 @@ static int flask_domctl(struct domain *d, int cmd) > case XEN_DOMCTL_cacheflush: > return current_has_perm(d, SECCLASS_DOMAIN2, DOMAIN2__CACHEFLUSH); > > + case XEN_DOMCTL_numainfo: > + return current_has_perm(d, SECCLASS_DOMAIN2, DOMAIN2__NUMAINFO); > + > default: > printk("flask_domctl: Unknown op %d\n", cmd); > return -EPERM; > diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors > index a0ed13d..e218992 100644 > --- a/xen/xsm/flask/policy/access_vectors > +++ b/xen/xsm/flask/policy/access_vectors > @@ -198,6 +198,8 @@ class domain2 > set_max_evtchn > # XEN_DOMCTL_cacheflush > cacheflush > +# XEN_DOMCTL_numainfo > + numainfo > } > > # Similar to class domain, but primarily contains domctls related to HVM domains Juergen -- Juergen Gross Principal Developer Operating Systems PBG PDG ES&S SWE OS6 Telephone: +49 (0) 89 62060 2932 Fujitsu e-mail: juergen.gross@ts.fujitsu.com Mies-van-der-Rohe-Str. 8 Internet: ts.fujitsu.com D-80807 Muenchen Company details: ts.fujitsu.com/imprint.html