# HG changeset patch
# User Andre Przywara <andre.przywara@amd.com>
# Date 1259594006 -3600
# Node ID bdf4109edffbcc0cbac605a19d2fd7a7459f1117
# Parent  abc6183f486e66b5721dbf0313ee0d3460613a99
allocate enough NUMA nodes for all VCPUs

If numa=on, we constrain a guest to one node to keep it's memory
accesses local. This will hurt performance if the number of VCPUs
is greater than the number of cores per node. We detect this case
now and allocate further NUMA nodes to allow all VCPUs to run
simultaneously.

Signed-off-by: Andre Przywara <andre.przywara@amd.com>

diff -r abc6183f486e -r bdf4109edffb tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py	Mon Nov 30 10:58:23 2009 +0000
+++ b/tools/python/xen/xend/XendDomainInfo.py	Mon Nov 30 16:13:26 2009 +0100
@@ -2637,8 +2637,7 @@
                         nodeload[i] = int(nodeload[i] * 16 / len(info['node_to_cpu'][i]))
                     else:
                         nodeload[i] = sys.maxint
-                index = nodeload.index( min(nodeload) )    
-                return index
+                return map(lambda x: x[0], sorted(enumerate(nodeload), key=lambda x:x[1]))
 
             info = xc.physinfo()
             if info['nr_nodes'] > 1:
@@ -2648,8 +2647,15 @@
                 for i in range(0, info['nr_nodes']):
                     if node_memory_list[i] >= needmem and len(info['node_to_cpu'][i]) > 0:
                         candidate_node_list.append(i)
-                index = find_relaxed_node(candidate_node_list)
-                cpumask = info['node_to_cpu'][index]
+                best_node = find_relaxed_node(candidate_node_list)[0]
+                cpumask = info['node_to_cpu'][best_node]
+                cores_per_node = info['nr_cpus'] / info['nr_nodes']
+                nodes_required = (self.info['VCPUs_max'] + cores_per_node - 1) / cores_per_node
+                if nodes_required > 1:
+                    log.debug("allocating %d NUMA nodes", nodes_required)
+                    best_nodes = find_relaxed_node(filter(lambda x: x != best_node, range(0,info['nr_nodes'])))
+                    for i in best_nodes[:nodes_required - 1]:
+                        cpumask = cpumask + info['node_to_cpu'][i]
                 for v in range(0, self.info['VCPUs_max']):
                     xc.vcpu_setaffinity(self.domid, v, cpumask)
         return index