From mboxrd@z Thu Jan 1 00:00:00 1970 From: George Dunlap Subject: Re: [PATCH 08 of 10 v2] libxl: automatic placement deals with node-affinity Date: Fri, 21 Dec 2012 16:22:32 +0000 Message-ID: <50D48CC8.6080801@eu.citrix.com> References: Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Dario Faggioli Cc: Marcus Granado , Dan Magenheimer , Ian Campbell , Anil Madhavapeddy , Andrew Cooper , Juergen Gross , Ian Jackson , "xen-devel@lists.xen.org" , Jan Beulich , Daniel De Graaf , Matt Wilson List-Id: xen-devel@lists.xenproject.org On 19/12/12 19:07, Dario Faggioli wrote: > Which basically means the following two things: > 1) during domain creation, it is the node-affinity of > the domain --rather than the vcpu-affinities of its > VCPUs-- that is affected by automatic placement; > 2) during automatic placement, when counting how many > VCPUs are already "bound" to a placement candidate > (as part of the process of choosing the best > candidate), both vcpu-affinity and node-affinity > are considered. > > Signed-off-by: Dario Faggioli > Acked-by: George Dunlap > Acked-by: Juergen Gross Re-confirming Ack. -George > > diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c > --- a/tools/libxl/libxl_dom.c > +++ b/tools/libxl/libxl_dom.c > @@ -133,13 +133,13 @@ static int numa_place_domain(libxl__gc * > { > int found; > libxl__numa_candidate candidate; > - libxl_bitmap candidate_nodemap; > + libxl_bitmap cpupool_nodemap; > libxl_cpupoolinfo cpupool_info; > int i, cpupool, rc = 0; > uint32_t memkb; > > libxl__numa_candidate_init(&candidate); > - libxl_bitmap_init(&candidate_nodemap); > + libxl_bitmap_init(&cpupool_nodemap); > > /* > * Extract the cpumap from the cpupool the domain belong to. In fact, > @@ -156,7 +156,7 @@ static int numa_place_domain(libxl__gc * > rc = libxl_domain_need_memory(CTX, info, &memkb); > if (rc) > goto out; > - if (libxl_node_bitmap_alloc(CTX, &candidate_nodemap, 0)) { > + if (libxl_node_bitmap_alloc(CTX, &cpupool_nodemap, 0)) { > rc = ERROR_FAIL; > goto out; > } > @@ -174,17 +174,19 @@ static int numa_place_domain(libxl__gc * > if (found == 0) > goto out; > > - /* Map the candidate's node map to the domain's info->cpumap */ > - libxl__numa_candidate_get_nodemap(gc, &candidate, &candidate_nodemap); > - rc = libxl_nodemap_to_cpumap(CTX, &candidate_nodemap, &info->cpumap); > + /* Map the candidate's node map to the domain's info->nodemap */ > + libxl__numa_candidate_get_nodemap(gc, &candidate, &info->nodemap); > + > + /* Avoid trying to set the affinity to nodes that might be in the > + * candidate's nodemap but out of our cpupool. */ > + rc = libxl_cpumap_to_nodemap(CTX, &cpupool_info.cpumap, > + &cpupool_nodemap); > if (rc) > goto out; > > - /* Avoid trying to set the affinity to cpus that might be in the > - * nodemap but not in our cpupool. */ > - libxl_for_each_set_bit(i, info->cpumap) { > - if (!libxl_bitmap_test(&cpupool_info.cpumap, i)) > - libxl_bitmap_reset(&info->cpumap, i); > + libxl_for_each_set_bit(i, info->nodemap) { > + if (!libxl_bitmap_test(&cpupool_nodemap, i)) > + libxl_bitmap_reset(&info->nodemap, i); > } > > LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and " > @@ -193,7 +195,7 @@ static int numa_place_domain(libxl__gc * > > out: > libxl__numa_candidate_dispose(&candidate); > - libxl_bitmap_dispose(&candidate_nodemap); > + libxl_bitmap_dispose(&cpupool_nodemap); > libxl_cpupoolinfo_dispose(&cpupool_info); > return rc; > } > @@ -211,10 +213,10 @@ int libxl__build_pre(libxl__gc *gc, uint > /* > * Check if the domain has any CPU affinity. If not, try to build > * up one. In case numa_place_domain() find at least a suitable > - * candidate, it will affect info->cpumap accordingly; if it > + * candidate, it will affect info->nodemap accordingly; if it > * does not, it just leaves it as it is. This means (unless > * some weird error manifests) the subsequent call to > - * libxl_set_vcpuaffinity_all() will do the actual placement, > + * libxl_domain_set_nodeaffinity() will do the actual placement, > * whatever that turns out to be. > */ > if (libxl_defbool_val(info->numa_placement)) { > diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c > --- a/tools/libxl/libxl_numa.c > +++ b/tools/libxl/libxl_numa.c > @@ -184,7 +184,7 @@ static int nr_vcpus_on_nodes(libxl__gc * > int vcpus_on_node[]) > { > libxl_dominfo *dinfo = NULL; > - libxl_bitmap vcpu_nodemap; > + libxl_bitmap dom_nodemap, vcpu_nodemap; > int nr_doms, nr_cpus; > int i, j, k; > > @@ -197,6 +197,12 @@ static int nr_vcpus_on_nodes(libxl__gc * > return ERROR_FAIL; > } > > + if (libxl_node_bitmap_alloc(CTX, &dom_nodemap, 0) < 0) { > + libxl_bitmap_dispose(&vcpu_nodemap); > + libxl_dominfo_list_free(dinfo, nr_doms); > + return ERROR_FAIL; > + } > + > for (i = 0; i < nr_doms; i++) { > libxl_vcpuinfo *vinfo; > int nr_dom_vcpus; > @@ -205,14 +211,21 @@ static int nr_vcpus_on_nodes(libxl__gc * > if (vinfo == NULL) > continue; > > + /* Retrieve the domain's node-affinity map */ > + libxl_domain_get_nodeaffinity(CTX, dinfo[i].domid, &dom_nodemap); > + > for (j = 0; j < nr_dom_vcpus; j++) { > - /* For each vcpu of each domain, increment the elements of > - * the array corresponding to the nodes where the vcpu runs */ > + /* > + * For each vcpu of each domain, it must have both vcpu-affinity > + * and node-affinity to (a pcpu belonging to) a certain node to > + * cause an increment in the corresponding element of the array. > + */ > libxl_bitmap_set_none(&vcpu_nodemap); > libxl_for_each_set_bit(k, vinfo[j].cpumap) { > int node = tinfo[k].node; > > if (libxl_bitmap_test(suitable_cpumap, k) && > + libxl_bitmap_test(&dom_nodemap, node) && > !libxl_bitmap_test(&vcpu_nodemap, node)) { > libxl_bitmap_set(&vcpu_nodemap, node); > vcpus_on_node[node]++; > @@ -223,6 +236,7 @@ static int nr_vcpus_on_nodes(libxl__gc * > libxl_vcpuinfo_list_free(vinfo, nr_dom_vcpus); > } > > + libxl_bitmap_dispose(&dom_nodemap); > libxl_bitmap_dispose(&vcpu_nodemap); > libxl_dominfo_list_free(dinfo, nr_doms); > return 0;