All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dario Faggioli <dario.faggioli@citrix.com>
To: xen-devel@lists.xen.org
Cc: Marcus Granado <Marcus.Granado@eu.citrix.com>,
	Dan Magenheimer <dan.magenheimer@oracle.com>,
	Ian Campbell <Ian.Campbell@citrix.com>,
	Anil Madhavapeddy <anil@recoil.org>,
	George Dunlap <george.dunlap@eu.citrix.com>,
	Andrew Cooper <Andrew.Cooper3@citrix.com>,
	Juergen Gross <juergen.gross@ts.fujitsu.com>,
	Ian Jackson <Ian.Jackson@eu.citrix.com>,
	Jan Beulich <JBeulich@suse.com>,
	Daniel De Graaf <dgdegra@tycho.nsa.gov>,
	Matt Wilson <msw@amazon.com>
Subject: [PATCH 08 of 10 v2] libxl: automatic placement deals with node-affinity
Date: Wed, 19 Dec 2012 20:07:24 +0100	[thread overview]
Message-ID: <ff98e6bcc0dd18f6b97a.1355944044@Solace> (raw)
In-Reply-To: <patchbomb.1355944036@Solace>

Which basically means the following two things:
 1) during domain creation, it is the node-affinity of
    the domain --rather than the vcpu-affinities of its
    VCPUs-- that is affected by automatic placement;
 2) during automatic placement, when counting how many
    VCPUs are already "bound" to a placement candidate
    (as part of the process of choosing the best
    candidate), both vcpu-affinity and node-affinity
    are considered.

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
Acked-by: George Dunlap <george.dunlap@eu.citrix.com>
Acked-by: Juergen Gross <juergen.gross@ts.fujitsu.com>

diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -133,13 +133,13 @@ static int numa_place_domain(libxl__gc *
 {
     int found;
     libxl__numa_candidate candidate;
-    libxl_bitmap candidate_nodemap;
+    libxl_bitmap cpupool_nodemap;
     libxl_cpupoolinfo cpupool_info;
     int i, cpupool, rc = 0;
     uint32_t memkb;
 
     libxl__numa_candidate_init(&candidate);
-    libxl_bitmap_init(&candidate_nodemap);
+    libxl_bitmap_init(&cpupool_nodemap);
 
     /*
      * Extract the cpumap from the cpupool the domain belong to. In fact,
@@ -156,7 +156,7 @@ static int numa_place_domain(libxl__gc *
     rc = libxl_domain_need_memory(CTX, info, &memkb);
     if (rc)
         goto out;
-    if (libxl_node_bitmap_alloc(CTX, &candidate_nodemap, 0)) {
+    if (libxl_node_bitmap_alloc(CTX, &cpupool_nodemap, 0)) {
         rc = ERROR_FAIL;
         goto out;
     }
@@ -174,17 +174,19 @@ static int numa_place_domain(libxl__gc *
     if (found == 0)
         goto out;
 
-    /* Map the candidate's node map to the domain's info->cpumap */
-    libxl__numa_candidate_get_nodemap(gc, &candidate, &candidate_nodemap);
-    rc = libxl_nodemap_to_cpumap(CTX, &candidate_nodemap, &info->cpumap);
+    /* Map the candidate's node map to the domain's info->nodemap */
+    libxl__numa_candidate_get_nodemap(gc, &candidate, &info->nodemap);
+
+    /* Avoid trying to set the affinity to nodes that might be in the
+     * candidate's nodemap but out of our cpupool. */
+    rc = libxl_cpumap_to_nodemap(CTX, &cpupool_info.cpumap,
+                                 &cpupool_nodemap);
     if (rc)
         goto out;
 
-    /* Avoid trying to set the affinity to cpus that might be in the
-     * nodemap but not in our cpupool. */
-    libxl_for_each_set_bit(i, info->cpumap) {
-        if (!libxl_bitmap_test(&cpupool_info.cpumap, i))
-            libxl_bitmap_reset(&info->cpumap, i);
+    libxl_for_each_set_bit(i, info->nodemap) {
+        if (!libxl_bitmap_test(&cpupool_nodemap, i))
+            libxl_bitmap_reset(&info->nodemap, i);
     }
 
     LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and "
@@ -193,7 +195,7 @@ static int numa_place_domain(libxl__gc *
 
  out:
     libxl__numa_candidate_dispose(&candidate);
-    libxl_bitmap_dispose(&candidate_nodemap);
+    libxl_bitmap_dispose(&cpupool_nodemap);
     libxl_cpupoolinfo_dispose(&cpupool_info);
     return rc;
 }
@@ -211,10 +213,10 @@ int libxl__build_pre(libxl__gc *gc, uint
     /*
      * Check if the domain has any CPU affinity. If not, try to build
      * up one. In case numa_place_domain() find at least a suitable
-     * candidate, it will affect info->cpumap accordingly; if it
+     * candidate, it will affect info->nodemap accordingly; if it
      * does not, it just leaves it as it is. This means (unless
      * some weird error manifests) the subsequent call to
-     * libxl_set_vcpuaffinity_all() will do the actual placement,
+     * libxl_domain_set_nodeaffinity() will do the actual placement,
      * whatever that turns out to be.
      */
     if (libxl_defbool_val(info->numa_placement)) {
diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c
--- a/tools/libxl/libxl_numa.c
+++ b/tools/libxl/libxl_numa.c
@@ -184,7 +184,7 @@ static int nr_vcpus_on_nodes(libxl__gc *
                              int vcpus_on_node[])
 {
     libxl_dominfo *dinfo = NULL;
-    libxl_bitmap vcpu_nodemap;
+    libxl_bitmap dom_nodemap, vcpu_nodemap;
     int nr_doms, nr_cpus;
     int i, j, k;
 
@@ -197,6 +197,12 @@ static int nr_vcpus_on_nodes(libxl__gc *
         return ERROR_FAIL;
     }
 
+    if (libxl_node_bitmap_alloc(CTX, &dom_nodemap, 0) < 0) {
+        libxl_bitmap_dispose(&vcpu_nodemap);
+        libxl_dominfo_list_free(dinfo, nr_doms);
+        return ERROR_FAIL;
+    }
+
     for (i = 0; i < nr_doms; i++) {
         libxl_vcpuinfo *vinfo;
         int nr_dom_vcpus;
@@ -205,14 +211,21 @@ static int nr_vcpus_on_nodes(libxl__gc *
         if (vinfo == NULL)
             continue;
 
+        /* Retrieve the domain's node-affinity map */
+        libxl_domain_get_nodeaffinity(CTX, dinfo[i].domid, &dom_nodemap);
+
         for (j = 0; j < nr_dom_vcpus; j++) {
-            /* For each vcpu of each domain, increment the elements of
-             * the array corresponding to the nodes where the vcpu runs */
+            /*
+             * For each vcpu of each domain, it must have both vcpu-affinity
+             * and node-affinity to (a pcpu belonging to) a certain node to
+             * cause an increment in the corresponding element of the array.
+             */
             libxl_bitmap_set_none(&vcpu_nodemap);
             libxl_for_each_set_bit(k, vinfo[j].cpumap) {
                 int node = tinfo[k].node;
 
                 if (libxl_bitmap_test(suitable_cpumap, k) &&
+                    libxl_bitmap_test(&dom_nodemap, node) &&
                     !libxl_bitmap_test(&vcpu_nodemap, node)) {
                     libxl_bitmap_set(&vcpu_nodemap, node);
                     vcpus_on_node[node]++;
@@ -223,6 +236,7 @@ static int nr_vcpus_on_nodes(libxl__gc *
         libxl_vcpuinfo_list_free(vinfo, nr_dom_vcpus);
     }
 
+    libxl_bitmap_dispose(&dom_nodemap);
     libxl_bitmap_dispose(&vcpu_nodemap);
     libxl_dominfo_list_free(dinfo, nr_doms);
     return 0;

  parent reply	other threads:[~2012-12-19 19:07 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-12-19 19:07 [PATCH 00 of 10 v2] NUMA aware credit scheduling Dario Faggioli
2012-12-19 19:07 ` [PATCH 01 of 10 v2] xen, libxc: rename xenctl_cpumap to xenctl_bitmap Dario Faggioli
2012-12-20  9:17   ` Jan Beulich
2012-12-20  9:35     ` Dario Faggioli
2012-12-19 19:07 ` [PATCH 02 of 10 v2] xen, libxc: introduce node maps and masks Dario Faggioli
2012-12-20  9:18   ` Jan Beulich
2012-12-20  9:55     ` Dario Faggioli
2012-12-20 14:33     ` George Dunlap
2012-12-20 14:52       ` Jan Beulich
2012-12-20 15:13         ` Dario Faggioli
2012-12-19 19:07 ` [PATCH 03 of 10 v2] xen: sched_credit: let the scheduler know about node-affinity Dario Faggioli
2012-12-20  6:44   ` Juergen Gross
2012-12-20  8:16     ` Dario Faggioli
2012-12-20  8:25       ` Juergen Gross
2012-12-20  8:33         ` Dario Faggioli
2012-12-20  8:39           ` Juergen Gross
2012-12-20  8:58             ` Dario Faggioli
2012-12-20 15:28             ` George Dunlap
2012-12-20 16:00               ` Dario Faggioli
2012-12-20  9:22           ` Jan Beulich
2012-12-20 15:56   ` George Dunlap
2012-12-20 17:12     ` Dario Faggioli
2012-12-20 16:48   ` George Dunlap
2012-12-20 18:18     ` Dario Faggioli
2012-12-21 14:29       ` George Dunlap
2012-12-21 16:07         ` Dario Faggioli
2012-12-20 20:21   ` George Dunlap
2012-12-21  0:18     ` Dario Faggioli
2012-12-21 14:56       ` George Dunlap
2012-12-21 16:13         ` Dario Faggioli
2012-12-19 19:07 ` [PATCH 04 of 10 v2] xen: allow for explicitly specifying node-affinity Dario Faggioli
2012-12-21 15:17   ` George Dunlap
2012-12-21 16:17     ` Dario Faggioli
2013-01-03 16:05     ` Daniel De Graaf
2012-12-19 19:07 ` [PATCH 05 of 10 v2] libxc: " Dario Faggioli
2012-12-21 15:19   ` George Dunlap
2012-12-21 16:27     ` Dario Faggioli
2012-12-19 19:07 ` [PATCH 06 of 10 v2] libxl: " Dario Faggioli
2012-12-21 15:30   ` George Dunlap
2012-12-21 16:18     ` Dario Faggioli
2012-12-21 17:02       ` Ian Jackson
2012-12-21 17:09         ` Dario Faggioli
2012-12-19 19:07 ` [PATCH 07 of 10 v2] libxl: optimize the calculation of how many VCPUs can run on a candidate Dario Faggioli
2012-12-20  8:41   ` Ian Campbell
2012-12-20  9:24     ` Dario Faggioli
2012-12-21 16:00   ` George Dunlap
2012-12-21 16:23     ` Dario Faggioli
2012-12-19 19:07 ` Dario Faggioli [this message]
2012-12-21 16:22   ` [PATCH 08 of 10 v2] libxl: automatic placement deals with node-affinity George Dunlap
2012-12-19 19:07 ` [PATCH 09 of 10 v2] xl: add node-affinity to the output of `xl list` Dario Faggioli
2012-12-21 16:34   ` George Dunlap
2012-12-21 16:54     ` Dario Faggioli
2012-12-19 19:07 ` [PATCH 10 of 10 v2] docs: rearrange and update NUMA placement documentation Dario Faggioli
2012-12-19 23:16 ` [PATCH 00 of 10 v2] NUMA aware credit scheduling Dario Faggioli
2013-01-11 12:19 ` Ian Campbell
2013-01-11 13:57   ` Dario Faggioli
2013-01-11 14:09     ` Ian Campbell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ff98e6bcc0dd18f6b97a.1355944044@Solace \
    --to=dario.faggioli@citrix.com \
    --cc=Andrew.Cooper3@citrix.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=Ian.Jackson@eu.citrix.com \
    --cc=JBeulich@suse.com \
    --cc=Marcus.Granado@eu.citrix.com \
    --cc=anil@recoil.org \
    --cc=dan.magenheimer@oracle.com \
    --cc=dgdegra@tycho.nsa.gov \
    --cc=george.dunlap@eu.citrix.com \
    --cc=juergen.gross@ts.fujitsu.com \
    --cc=msw@amazon.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.