[PATCH 07 of 10 v2] libxl: optimize the calculation of how many VCPUs can run on a candidate

xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed

From: Dario Faggioli <dario.faggioli@citrix.com>
To: xen-devel@lists.xen.org
Cc: Marcus Granado <Marcus.Granado@eu.citrix.com>,
	Dan Magenheimer <dan.magenheimer@oracle.com>,
	Ian Campbell <Ian.Campbell@citrix.com>,
	Anil Madhavapeddy <anil@recoil.org>,
	George Dunlap <george.dunlap@eu.citrix.com>,
	Andrew Cooper <Andrew.Cooper3@citrix.com>,
	Juergen Gross <juergen.gross@ts.fujitsu.com>,
	Ian Jackson <Ian.Jackson@eu.citrix.com>,
	Jan Beulich <JBeulich@suse.com>,
	Daniel De Graaf <dgdegra@tycho.nsa.gov>,
	Matt Wilson <msw@amazon.com>
Subject: [PATCH 07 of 10 v2] libxl: optimize the calculation of how many VCPUs can run on a candidate
Date: Wed, 19 Dec 2012 20:07:23 +0100	[thread overview]
Message-ID: <5dc2571ae5faef87977c.1355944043@Solace> (raw)
In-Reply-To: <patchbomb.1355944036@Solace>

For choosing the best NUMA placement candidate, we need to figure out
how many VCPUs are runnable on each of them. That requires going through
all the VCPUs of all the domains and check their affinities.

With this change, instead of doing the above for each candidate, we
do it once for all, populating an array while counting. This way, when
we later are evaluating candidates, all we need is summing up the right
elements of the array itself.

This reduces the complexity of the overall algorithm, as it moves a
potentially expensive operation (for_each_vcpu_of_each_domain {})
outside from the core placement loop, so that it is performed only
once instead of (potentially) tens or hundreds of times.

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>

diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c
--- a/tools/libxl/libxl_numa.c
+++ b/tools/libxl/libxl_numa.c
@@ -165,15 +165,27 @@ static uint32_t nodemap_to_free_memkb(li
     return free_memkb;
 }
 
-/* Retrieve the number of vcpus able to run on the cpus of the nodes
- * that are part of the nodemap. */
-static int nodemap_to_nr_vcpus(libxl__gc *gc, libxl_cputopology *tinfo,
+/* Retrieve the number of vcpus able to run on the nodes in nodemap */
+static int nodemap_to_nr_vcpus(libxl__gc *gc, int vcpus_on_node[],
                                const libxl_bitmap *nodemap)
 {
+    int i, nr_vcpus = 0;
+
+    libxl_for_each_set_bit(i, *nodemap)
+        nr_vcpus += vcpus_on_node[i];
+
+    return nr_vcpus;
+}
+
+/* Number of vcpus able to run on the cpus of the various nodes
+ * (reported by filling the array vcpus_on_node[]). */
+static int nr_vcpus_on_nodes(libxl__gc *gc, libxl_cputopology *tinfo,
+                             const libxl_bitmap *suitable_cpumap,
+                             int vcpus_on_node[])
+{
     libxl_dominfo *dinfo = NULL;
     libxl_bitmap vcpu_nodemap;
     int nr_doms, nr_cpus;
-    int nr_vcpus = 0;
     int i, j, k;
 
     dinfo = libxl_list_domain(CTX, &nr_doms);
@@ -193,19 +205,17 @@ static int nodemap_to_nr_vcpus(libxl__gc
         if (vinfo == NULL)
             continue;
 
-        /* For each vcpu of each domain ... */
         for (j = 0; j < nr_dom_vcpus; j++) {
+            /* For each vcpu of each domain, increment the elements of
+             * the array corresponding to the nodes where the vcpu runs */
+            libxl_bitmap_set_none(&vcpu_nodemap);
+            libxl_for_each_set_bit(k, vinfo[j].cpumap) {
+                int node = tinfo[k].node;
 
-            /* Build up a map telling on which nodes the vcpu is runnable on */
-            libxl_bitmap_set_none(&vcpu_nodemap);
-            libxl_for_each_set_bit(k, vinfo[j].cpumap)
-                libxl_bitmap_set(&vcpu_nodemap, tinfo[k].node);
-
-            /* And check if that map has any intersection with our nodemap */
-            libxl_for_each_set_bit(k, vcpu_nodemap) {
-                if (libxl_bitmap_test(nodemap, k)) {
-                    nr_vcpus++;
-                    break;
+                if (libxl_bitmap_test(suitable_cpumap, k) &&
+                    !libxl_bitmap_test(&vcpu_nodemap, node)) {
+                    libxl_bitmap_set(&vcpu_nodemap, node);
+                    vcpus_on_node[node]++;
                 }
             }
         }
@@ -215,7 +225,7 @@ static int nodemap_to_nr_vcpus(libxl__gc
 
     libxl_bitmap_dispose(&vcpu_nodemap);
     libxl_dominfo_list_free(dinfo, nr_doms);
-    return nr_vcpus;
+    return 0;
 }
 
 /*
@@ -270,7 +280,7 @@ int libxl__get_numa_candidate(libxl__gc 
     libxl_numainfo *ninfo = NULL;
     int nr_nodes = 0, nr_suit_nodes, nr_cpus = 0;
     libxl_bitmap suitable_nodemap, nodemap;
-    int rc = 0;
+    int *vcpus_on_node, rc = 0;
 
     libxl_bitmap_init(&nodemap);
     libxl_bitmap_init(&suitable_nodemap);
@@ -281,6 +291,8 @@ int libxl__get_numa_candidate(libxl__gc 
     if (ninfo == NULL)
         return ERROR_FAIL;
 
+    GCNEW_ARRAY(vcpus_on_node, nr_nodes);
+
     /*
      * The good thing about this solution is that it is based on heuristics
      * (implemented in numa_cmpf() ), but we at least can evaluate it on
@@ -330,6 +342,19 @@ int libxl__get_numa_candidate(libxl__gc 
         goto out;
 
     /*
+     * Later on, we will try to figure out how many vcpus are runnable on
+     * each candidate (as a part of choosing the best one of them). That
+     * requires going through all the vcpus of all the domains and check
+     * their affinities. So, instead of doing that for each candidate,
+     * let's count here the number of vcpus runnable on each node, so that
+     * all we have to do later is summing up the right elements of the
+     * vcpus_on_node array.
+     */
+    rc = nr_vcpus_on_nodes(gc, tinfo, suitable_cpumap, vcpus_on_node);
+    if (rc)
+        goto out;
+
+    /*
      * If the minimum number of NUMA nodes is not explicitly specified
      * (i.e., min_nodes == 0), we try to figure out a sensible number of nodes
      * from where to start generating candidates, if possible (or just start
@@ -414,7 +439,8 @@ int libxl__get_numa_candidate(libxl__gc 
              * current best one (if any).
              */
             libxl__numa_candidate_put_nodemap(gc, &new_cndt, &nodemap);
-            new_cndt.nr_vcpus = nodemap_to_nr_vcpus(gc, tinfo, &nodemap);
+            new_cndt.nr_vcpus = nodemap_to_nr_vcpus(gc, vcpus_on_node,
+                                                    &nodemap);
             new_cndt.free_memkb = nodes_free_memkb;
             new_cndt.nr_nodes = libxl_bitmap_count_set(&nodemap);
             new_cndt.nr_cpus = nodes_cpus;

next prev parent reply	other threads:[~2012-12-19 19:07 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-12-19 19:07 [PATCH 00 of 10 v2] NUMA aware credit scheduling Dario Faggioli
2012-12-19 19:07 ` [PATCH 01 of 10 v2] xen, libxc: rename xenctl_cpumap to xenctl_bitmap Dario Faggioli
2012-12-20  9:17   ` Jan Beulich
2012-12-20  9:35     ` Dario Faggioli
2012-12-19 19:07 ` [PATCH 02 of 10 v2] xen, libxc: introduce node maps and masks Dario Faggioli
2012-12-20  9:18   ` Jan Beulich
2012-12-20  9:55     ` Dario Faggioli
2012-12-20 14:33     ` George Dunlap
2012-12-20 14:52       ` Jan Beulich
2012-12-20 15:13         ` Dario Faggioli
2012-12-19 19:07 ` [PATCH 03 of 10 v2] xen: sched_credit: let the scheduler know about node-affinity Dario Faggioli
2012-12-20  6:44   ` Juergen Gross
2012-12-20  8:16     ` Dario Faggioli
2012-12-20  8:25       ` Juergen Gross
2012-12-20  8:33         ` Dario Faggioli
2012-12-20  8:39           ` Juergen Gross
2012-12-20  8:58             ` Dario Faggioli
2012-12-20 15:28             ` George Dunlap
2012-12-20 16:00               ` Dario Faggioli
2012-12-20  9:22           ` Jan Beulich
2012-12-20 15:56   ` George Dunlap
2012-12-20 17:12     ` Dario Faggioli
2012-12-20 16:48   ` George Dunlap
2012-12-20 18:18     ` Dario Faggioli
2012-12-21 14:29       ` George Dunlap
2012-12-21 16:07         ` Dario Faggioli
2012-12-20 20:21   ` George Dunlap
2012-12-21  0:18     ` Dario Faggioli
2012-12-21 14:56       ` George Dunlap
2012-12-21 16:13         ` Dario Faggioli
2012-12-19 19:07 ` [PATCH 04 of 10 v2] xen: allow for explicitly specifying node-affinity Dario Faggioli
2012-12-21 15:17   ` George Dunlap
2012-12-21 16:17     ` Dario Faggioli
2013-01-03 16:05     ` Daniel De Graaf
2012-12-19 19:07 ` [PATCH 05 of 10 v2] libxc: " Dario Faggioli
2012-12-21 15:19   ` George Dunlap
2012-12-21 16:27     ` Dario Faggioli
2012-12-19 19:07 ` [PATCH 06 of 10 v2] libxl: " Dario Faggioli
2012-12-21 15:30   ` George Dunlap
2012-12-21 16:18     ` Dario Faggioli
2012-12-21 17:02       ` Ian Jackson
2012-12-21 17:09         ` Dario Faggioli
2012-12-19 19:07 ` Dario Faggioli [this message]
2012-12-20  8:41   ` [PATCH 07 of 10 v2] libxl: optimize the calculation of how many VCPUs can run on a candidate Ian Campbell
2012-12-20  9:24     ` Dario Faggioli
2012-12-21 16:00   ` George Dunlap
2012-12-21 16:23     ` Dario Faggioli
2012-12-19 19:07 ` [PATCH 08 of 10 v2] libxl: automatic placement deals with node-affinity Dario Faggioli
2012-12-21 16:22   ` George Dunlap
2012-12-19 19:07 ` [PATCH 09 of 10 v2] xl: add node-affinity to the output of `xl list` Dario Faggioli
2012-12-21 16:34   ` George Dunlap
2012-12-21 16:54     ` Dario Faggioli
2012-12-19 19:07 ` [PATCH 10 of 10 v2] docs: rearrange and update NUMA placement documentation Dario Faggioli
2012-12-19 23:16 ` [PATCH 00 of 10 v2] NUMA aware credit scheduling Dario Faggioli
2013-01-11 12:19 ` Ian Campbell
2013-01-11 13:57   ` Dario Faggioli
2013-01-11 14:09     ` Ian Campbell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5dc2571ae5faef87977c.1355944043@Solace \
    --to=dario.faggioli@citrix.com \
    --cc=Andrew.Cooper3@citrix.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=Ian.Jackson@eu.citrix.com \
    --cc=JBeulich@suse.com \
    --cc=Marcus.Granado@eu.citrix.com \
    --cc=anil@recoil.org \
    --cc=dan.magenheimer@oracle.com \
    --cc=dgdegra@tycho.nsa.gov \
    --cc=george.dunlap@eu.citrix.com \
    --cc=juergen.gross@ts.fujitsu.com \
    --cc=msw@amazon.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).