From mboxrd@z Thu Jan 1 00:00:00 1970 From: Juergen Gross Subject: Re: [PATCH 05 of 11 v3] xen: allow for explicitly specifying node-affinity Date: Fri, 01 Feb 2013 15:20:17 +0100 Message-ID: <510BCF21.50908@ts.fujitsu.com> References: <7ad5cdfea9c033b89415.1359716475@Solace> Mime-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1"; Format="flowed" Content-Transfer-Encoding: quoted-printable Return-path: In-Reply-To: <7ad5cdfea9c033b89415.1359716475@Solace> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Dario Faggioli Cc: Marcus Granado , Dan Magenheimer , Ian Campbell , Anil Madhavapeddy , George Dunlap , Andrew Cooper , Ian Jackson , xen-devel@lists.xen.org, Jan Beulich , Daniel De Graaf , Matt Wilson List-Id: xen-devel@lists.xenproject.org Am 01.02.2013 12:01, schrieb Dario Faggioli: > Make it possible to pass the node-affinity of a domain to the hypervisor > from the upper layers, instead of always being computed automatically. > > Note that this also required generalizing the Flask hooks for setting > and getting the affinity, so that they now deal with both vcpu and > node affinity. > > Signed-off-by: Dario Faggioli > Acked-by: Daniel De Graaf > Acked-by: George Dunlap Except one minor note (see below): Acked-by: Juergen Gross > --- > Changes from v2: > * reworked as needed after the merge of Daniel's IS_PRIV work; > * xen.te taken care of, as requested during review. > > Changes from v1: > * added the missing dummy hook for nodeaffinity; > * let the permission renaming affect flask policies too. > > diff --git a/tools/flask/policy/policy/mls b/tools/flask/policy/policy/mls > --- a/tools/flask/policy/policy/mls > +++ b/tools/flask/policy/policy/mls > @@ -70,11 +70,11 @@ mlsconstrain domain transition > (( h1 dom h2 ) and (( l1 eq l2 ) or (t1 =3D=3D mls_priv))); > > # all the domain "read" ops > -mlsconstrain domain { getvcpuaffinity getdomaininfo getvcpuinfo getvcpuc= ontext getaddrsize getextvcpucontext } > +mlsconstrain domain { getaffinity getdomaininfo getvcpuinfo getvcpuconte= xt getaddrsize getextvcpucontext } > ((l1 dom l2) or (t1 =3D=3D mls_priv)); > > # all the domain "write" ops > -mlsconstrain domain { setvcpucontext pause unpause resume create max_vcp= us destroy setvcpuaffinity scheduler setdomainmaxmem setdomainhandle setdeb= ugging hypercall settime set_target shutdown setaddrsize trigger setextvcpu= context } > +mlsconstrain domain { setvcpucontext pause unpause resume create max_vcp= us destroy setaffinity scheduler setdomainmaxmem setdomainhandle setdebuggi= ng hypercall settime set_target shutdown setaddrsize trigger setextvcpucont= ext } > ((l1 eq l2) or (t1 =3D=3D mls_priv)); > > # This is incomplete - similar constraints must be written for all clas= ses > diff --git a/tools/flask/policy/policy/modules/xen/xen.if b/tools/flask/p= olicy/policy/modules/xen/xen.if > --- a/tools/flask/policy/policy/modules/xen/xen.if > +++ b/tools/flask/policy/policy/modules/xen/xen.if > @@ -48,7 +48,7 @@ define(`create_domain_common', ` > allow $1 $2:domain { create max_vcpus setdomainmaxmem setaddrsize > getdomaininfo hypercall setvcpucontext setextvcpucontext > getscheduler getvcpuinfo getvcpuextstate getaddrsize > - getvcpuaffinity setvcpuaffinity }; > + getaffinity setaffinity }; > allow $1 $2:domain2 { set_cpuid settsc setscheduler }; > allow $1 $2:security check_context; > allow $1 $2:shadow enable; > @@ -77,9 +77,9 @@ define(`create_domain_build_label', ` > # manage_domain(priv, target) > # Allow managing a running domain > define(`manage_domain', ` > - allow $1 $2:domain { getdomaininfo getvcpuinfo getvcpuaffinity > + allow $1 $2:domain { getdomaininfo getvcpuinfo getaffinity > getaddrsize pause unpause trigger shutdown destroy > - setvcpuaffinity setdomainmaxmem getscheduler }; > + setaffinity setdomainmaxmem getscheduler }; > ') > > # migrate_domain_out(priv, target) > diff --git a/tools/flask/policy/policy/modules/xen/xen.te b/tools/flask/p= olicy/policy/modules/xen/xen.te > --- a/tools/flask/policy/policy/modules/xen/xen.te > +++ b/tools/flask/policy/policy/modules/xen/xen.te > @@ -62,7 +62,7 @@ allow dom0_t security_t:security { check > compute_member load_policy compute_relabel compute_user setenforce > setbool setsecparam add_ocontext del_ocontext }; > > -allow dom0_t dom0_t:domain { getdomaininfo getvcpuinfo getvcpuaffinity }; > +allow dom0_t dom0_t:domain { getdomaininfo getvcpuinfo getaffinity }; > allow dom0_t dom0_t:resource { add remove }; > > admin_device(dom0_t, device_t) > diff --git a/xen/common/domain.c b/xen/common/domain.c > --- a/xen/common/domain.c > +++ b/xen/common/domain.c > @@ -222,6 +222,7 @@ struct domain *domain_create( > > spin_lock_init(&d->node_affinity_lock); > d->node_affinity =3D NODE_MASK_ALL; > + d->auto_node_affinity =3D 1; > > spin_lock_init(&d->shutdown_lock); > d->shutdown_code =3D -1; > @@ -362,11 +363,26 @@ void domain_update_node_affinity(struct > cpumask_or(cpumask, cpumask, online_affinity); > } > > - for_each_online_node ( node ) > - if ( cpumask_intersects(&node_to_cpumask(node), cpumask) ) > - node_set(node, nodemask); > + if ( d->auto_node_affinity ) > + { > + /* Node-affinity is automaically computed from all vcpu-affiniti= es */ > + for_each_online_node ( node ) > + if ( cpumask_intersects(&node_to_cpumask(node), cpumask) ) > + node_set(node, nodemask); > > - d->node_affinity =3D nodemask; > + d->node_affinity =3D nodemask; > + } > + else > + { > + /* Node-affinity is provided by someone else, just filter out cp= us > + * that are either offline or not in the affinity of any vcpus. = */ > + for_each_node_mask ( node, d->node_affinity ) > + if ( !cpumask_intersects(&node_to_cpumask(node), cpumask) ) > + node_clear(node, d->node_affinity); > + } > + > + sched_set_node_affinity(d,&d->node_affinity); > + > spin_unlock(&d->node_affinity_lock); > > free_cpumask_var(online_affinity); > @@ -374,6 +390,36 @@ void domain_update_node_affinity(struct > } > > > +int domain_set_node_affinity(struct domain *d, const nodemask_t *affinit= y) > +{ > + /* Being affine with no nodes is just wrong */ > + if ( nodes_empty(*affinity) ) > + return -EINVAL; > + > + spin_lock(&d->node_affinity_lock); > + > + /* > + * Being/becoming explicitly affine to all nodes is not particularly > + * useful. Let's take it as the `reset node affinity` command. > + */ > + if ( nodes_full(*affinity) ) > + { > + d->auto_node_affinity =3D 1; > + goto out; > + } > + > + d->auto_node_affinity =3D 0; > + d->node_affinity =3D *affinity; > + > +out: > + spin_unlock(&d->node_affinity_lock); > + > + domain_update_node_affinity(d); > + > + return 0; > +} > + > + > struct domain *get_domain_by_id(domid_t dom) > { > struct domain *d; > diff --git a/xen/common/domctl.c b/xen/common/domctl.c > --- a/xen/common/domctl.c > +++ b/xen/common/domctl.c > @@ -559,6 +559,26 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe > } > break; > > + case XEN_DOMCTL_setnodeaffinity: > + case XEN_DOMCTL_getnodeaffinity: > + { > + if ( op->cmd =3D=3D XEN_DOMCTL_setnodeaffinity ) > + { > + nodemask_t new_affinity; > + > + ret =3D xenctl_bitmap_to_nodemask(&new_affinity, > +&op->u.nodeaffinity.nodemap); > + if ( !ret ) > + ret =3D domain_set_node_affinity(d,&new_affinity); > + } > + else > + { > + ret =3D nodemask_to_xenctl_bitmap(&op->u.nodeaffinity.nodema= p, > +&d->node_affinity); > + } > + } > + break; > + The two cases shouldn't be handled in one block, I think. Only the break statement seems to be common here. J=FCrgen -- = Juergen Gross Principal Developer Operating Systems PBG PDG ES&S SWE OS6 Telephone: +49 (0) 89 3222 2967 Fujitsu Technology Solutions e-mail: juergen.gross@ts.fujitsu.= com Domagkstr. 28 Internet: ts.fujitsu.com D-80807 Muenchen Company details: ts.fujitsu.com/imprint.ht= ml