[PATCH 7 of 8] libxl: automatic placement deals with node-affinity

xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed

From: Dario Faggioli <dario.faggioli@citrix.com>
To: xen-devel@lists.xen.org
Cc: Andre Przywara <andre.przywara@amd.com>,
	Ian Campbell <Ian.Campbell@citrix.com>,
	Anil Madhavapeddy <anil@recoil.org>,
	George Dunlap <george.dunlap@eu.citrix.com>,
	Andrew Cooper <Andrew.Cooper3@citrix.com>,
	Juergen Gross <juergen.gross@ts.fujitsu.com>,
	Ian Jackson <Ian.Jackson@eu.citrix.com>,
	Jan Beulich <JBeulich@suse.com>,
	Marcus Granado <Marcus.Granado@eu.citrix.com>,
	Daniel De Graaf <dgdegra@tycho.nsa.gov>,
	Matt Wilson <msw@amazon.com>
Subject: [PATCH 7 of 8] libxl: automatic placement deals with node-affinity
Date: Fri, 05 Oct 2012 16:08:25 +0200	[thread overview]
Message-ID: <aa8255447a30ef25ff12.1349446105@Solace> (raw)
In-Reply-To: <patchbomb.1349446098@Solace>

Which basically means the following two things:
 1) during domain creation, it is the node-affinity of
    the domain --rather than the vcpu-affinities of its
    vcpus-- that is affected by automatic placement;
 2) during automatic placement, when counting how many
    vcpus are already "bound" to a placement candidate
    (as part of the process of choosing the best
    candidate), node-affinity is also considered,
    together with vcpu-affinity.

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>

diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -133,13 +133,13 @@ static int numa_place_domain(libxl__gc *
 {
     int found;
     libxl__numa_candidate candidate;
-    libxl_bitmap candidate_nodemap;
+    libxl_bitmap cpupool_nodemap;
     libxl_cpupoolinfo cpupool_info;
     int i, cpupool, rc = 0;
     uint32_t memkb;
 
     libxl__numa_candidate_init(&candidate);
-    libxl_bitmap_init(&candidate_nodemap);
+    libxl_bitmap_init(&cpupool_nodemap);
 
     /*
      * Extract the cpumap from the cpupool the domain belong to. In fact,
@@ -156,7 +156,7 @@ static int numa_place_domain(libxl__gc *
     rc = libxl_domain_need_memory(CTX, info, &memkb);
     if (rc)
         goto out;
-    if (libxl_node_bitmap_alloc(CTX, &candidate_nodemap, 0)) {
+    if (libxl_node_bitmap_alloc(CTX, &cpupool_nodemap, 0)) {
         rc = ERROR_FAIL;
         goto out;
     }
@@ -174,17 +174,19 @@ static int numa_place_domain(libxl__gc *
     if (found == 0)
         goto out;
 
-    /* Map the candidate's node map to the domain's info->cpumap */
-    libxl__numa_candidate_get_nodemap(gc, &candidate, &candidate_nodemap);
-    rc = libxl_nodemap_to_cpumap(CTX, &candidate_nodemap, &info->cpumap);
+    /* Map the candidate's node map to the domain's info->nodemap */
+    libxl__numa_candidate_get_nodemap(gc, &candidate, &info->nodemap);
+
+    /* Avoid trying to set the affinity to nodes that might be in the
+     * candidate's nodemap but out of our cpupool. */
+    rc = libxl_cpumap_to_nodemap(CTX, &cpupool_info.cpumap,
+                                 &cpupool_nodemap);
     if (rc)
         goto out;
 
-    /* Avoid trying to set the affinity to cpus that might be in the
-     * nodemap but not in our cpupool. */
-    libxl_for_each_set_bit(i, info->cpumap) {
-        if (!libxl_bitmap_test(&cpupool_info.cpumap, i))
-            libxl_bitmap_reset(&info->cpumap, i);
+    libxl_for_each_set_bit(i, info->nodemap) {
+        if (!libxl_bitmap_test(&cpupool_nodemap, i))
+            libxl_bitmap_reset(&info->nodemap, i);
     }
 
     LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and "
@@ -193,7 +195,7 @@ static int numa_place_domain(libxl__gc *
 
  out:
     libxl__numa_candidate_dispose(&candidate);
-    libxl_bitmap_dispose(&candidate_nodemap);
+    libxl_bitmap_dispose(&cpupool_nodemap);
     libxl_cpupoolinfo_dispose(&cpupool_info);
     return rc;
 }
@@ -211,10 +213,10 @@ int libxl__build_pre(libxl__gc *gc, uint
     /*
      * Check if the domain has any CPU affinity. If not, try to build
      * up one. In case numa_place_domain() find at least a suitable
-     * candidate, it will affect info->cpumap accordingly; if it
+     * candidate, it will affect info->nodemap accordingly; if it
      * does not, it just leaves it as it is. This means (unless
      * some weird error manifests) the subsequent call to
-     * libxl_set_vcpuaffinity_all() will do the actual placement,
+     * libxl_domain_set_nodeaffinity() will do the actual placement,
      * whatever that turns out to be.
      */
     if (libxl_defbool_val(info->numa_placement)) {
diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c
--- a/tools/libxl/libxl_numa.c
+++ b/tools/libxl/libxl_numa.c
@@ -171,7 +171,7 @@ static int nodemap_to_nr_vcpus(libxl__gc
                                const libxl_bitmap *nodemap)
 {
     libxl_dominfo *dinfo = NULL;
-    libxl_bitmap vcpu_nodemap;
+    libxl_bitmap vcpu_nodemap, dom_nodemap;
     int nr_doms, nr_cpus;
     int nr_vcpus = 0;
     int i, j, k;
@@ -185,6 +185,12 @@ static int nodemap_to_nr_vcpus(libxl__gc
         return ERROR_FAIL;
     }
 
+    if (libxl_node_bitmap_alloc(CTX, &dom_nodemap, 0) < 0) {
+        libxl_dominfo_list_free(dinfo, nr_doms);
+        libxl_bitmap_dispose(&vcpu_nodemap);
+        return ERROR_FAIL;
+    }
+
     for (i = 0; i < nr_doms; i++) {
         libxl_vcpuinfo *vinfo;
         int nr_dom_vcpus;
@@ -193,6 +199,9 @@ static int nodemap_to_nr_vcpus(libxl__gc
         if (vinfo == NULL)
             continue;
 
+        /* Retrieve the domain's node-affinity map (see below) */
+        libxl_domain_get_nodeaffinity(CTX, dinfo[i].domid, &dom_nodemap);
+
         /* For each vcpu of each domain ... */
         for (j = 0; j < nr_dom_vcpus; j++) {
 
@@ -201,9 +210,17 @@ static int nodemap_to_nr_vcpus(libxl__gc
             libxl_for_each_set_bit(k, vinfo[j].cpumap)
                 libxl_bitmap_set(&vcpu_nodemap, tinfo[k].node);
 
-            /* And check if that map has any intersection with our nodemap */
+            /*
+             * We now check whether the && of the vcpu's nodemap and the
+             * domain's nodemap has any intersection with the nodemap of our
+             * canidate.
+             * Using both (vcpu's and domain's) nodemaps allows us to take
+             * both vcpu-affinity and node-affinity into account when counting
+             * the number of vcpus bound to the candidate.
+             */
             libxl_for_each_set_bit(k, vcpu_nodemap) {
-                if (libxl_bitmap_test(nodemap, k)) {
+                if (libxl_bitmap_test(&dom_nodemap, k) &&
+                    libxl_bitmap_test(nodemap, k)) {
                     nr_vcpus++;
                     break;
                 }
@@ -213,6 +230,7 @@ static int nodemap_to_nr_vcpus(libxl__gc
         libxl_vcpuinfo_list_free(vinfo, nr_dom_vcpus);
     }
 
+    libxl_bitmap_dispose(&dom_nodemap);
     libxl_bitmap_dispose(&vcpu_nodemap);
     libxl_dominfo_list_free(dinfo, nr_doms);
     return nr_vcpus;

next prev parent reply	other threads:[~2012-10-05 14:08 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-10-05 14:08 [PATCH 0 of 8] NUMA Awareness for the Credit Scheduler Dario Faggioli
2012-10-05 14:08 ` [PATCH 1 of 8] xen, libxc: rename xenctl_cpumap to xenctl_bitmap Dario Faggioli
2012-10-09 15:59   ` George Dunlap
2012-10-05 14:08 ` [PATCH 2 of 8] xen, libxc: introduce node maps and masks Dario Faggioli
2012-10-09 15:59   ` George Dunlap
2012-10-05 14:08 ` [PATCH 3 of 8] xen: let the (credit) scheduler know about `node affinity` Dario Faggioli
2012-10-05 14:25   ` Jan Beulich
2012-10-09 10:29     ` Dario Faggioli
2012-10-09 11:10       ` Keir Fraser
2012-10-09  9:53   ` Juergen Gross
2012-10-09 10:21     ` Dario Faggioli
2012-10-09 16:29   ` George Dunlap
2012-10-05 14:08 ` [PATCH 4 of 8] xen: allow for explicitly specifying node-affinity Dario Faggioli
2012-10-09 16:47   ` George Dunlap
2012-10-09 16:52     ` Ian Campbell
2012-10-09 18:31       ` [PATCH RFC] flask: move policy header sources into hypervisor Daniel De Graaf
2012-10-10  8:38         ` Ian Campbell
2012-10-10  8:44         ` Dario Faggioli
2012-10-10 14:03           ` Daniel De Graaf
2012-10-10 14:39             ` Dario Faggioli
2012-10-10 15:32               ` Daniel De Graaf
2012-10-09 17:17     ` [PATCH 4 of 8] xen: allow for explicitly specifying node-affinity Dario Faggioli
2012-10-05 14:08 ` [PATCH 5 of 8] libxc: " Dario Faggioli
2012-10-05 14:08 ` [PATCH 6 of 8] libxl: " Dario Faggioli
2012-10-05 14:08 ` Dario Faggioli [this message]
2012-10-10 10:55   ` [PATCH 7 of 8] libxl: automatic placement deals with node-affinity George Dunlap
2012-10-05 14:08 ` [PATCH 8 of 8] xl: add node-affinity to the output of `xl list` Dario Faggioli
2012-10-05 16:36   ` Ian Jackson
2012-10-09 11:07     ` Dario Faggioli
2012-10-09 15:03       ` Ian Jackson
2012-10-10  8:46         ` Dario Faggioli
2012-10-08 19:43 ` [PATCH 0 of 8] NUMA Awareness for the Credit Scheduler Dan Magenheimer
2012-10-09 10:45   ` Dario Faggioli
2012-10-09 20:20     ` Matt Wilson
2012-10-10 16:18   ` Dario Faggioli
2012-10-09 10:02 ` Juergen Gross
2012-10-10 11:00 ` George Dunlap
2012-10-10 12:28   ` Dario Faggioli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aa8255447a30ef25ff12.1349446105@Solace \
    --to=dario.faggioli@citrix.com \
    --cc=Andrew.Cooper3@citrix.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=Ian.Jackson@eu.citrix.com \
    --cc=JBeulich@suse.com \
    --cc=Marcus.Granado@eu.citrix.com \
    --cc=andre.przywara@amd.com \
    --cc=anil@recoil.org \
    --cc=dgdegra@tycho.nsa.gov \
    --cc=george.dunlap@eu.citrix.com \
    --cc=juergen.gross@ts.fujitsu.com \
    --cc=msw@amazon.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).