All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] xl: add memory allocation logic for numa platform
@ 2011-08-09  4:55 Zhang, Yang Z
  2011-08-09  5:37 ` Juergen Gross
  2011-08-09 11:46 ` Ian Campbell
  0 siblings, 2 replies; 3+ messages in thread
From: Zhang, Yang Z @ 2011-08-09  4:55 UTC (permalink / raw)
  To: xen-devel@lists.xensource.com
  Cc: 'Stefano Stabellini (stefano.stabellini@eu.citrix.com)'

For numa platform, we need to allocate memory for guest on which guest cpu reside. This patch add
this feature for xl. Just use the simple algorithm to select the best node.

diff -r 0f36c2eec2e1 tools/libxl/libxl.c
--- a/tools/libxl/libxl.c       Thu Jul 28 15:40:54 2011 +0100
+++ b/tools/libxl/libxl.c       Tue Aug 09 12:46:18 2011 +0800
@@ -2236,6 +2236,109 @@
     return ERROR_FAIL;
 }

+static inline void set_bit(int nr, volatile void *addr)
+{
+    asm volatile (
+      "btsl %1,%0"
+       : "=m" (ADDR)
+       : "Ir" (nr), "m" (ADDR) : "memory");
+}
+
+int libxl_get_numainfo(libxl_ctx *ctx, libxl_numainfo_t *numainfo)
+{
+    xc_numainfo_t ninfo = { 0 };
+    libxl_physinfo physinfo = { 0 };
+    libxl_topologyinfo topoinfo;
+    int i,  max_nodes, max_cpus, node;
+    libxl_nodeinfo_t *nodeinfo;
+    DECLARE_HYPERCALL_BUFFER(xc_node_to_memsize_t, node_memsize);
+    DECLARE_HYPERCALL_BUFFER(xc_node_to_memfree_t, node_memfree);
+
+    if (libxl_get_physinfo(ctx, &physinfo))
+        goto out;
+
+    max_cpus = physinfo.max_cpu_id + 1;
+    max_nodes = NUMA_NO_NODE + 1;
+    numainfo->max_cpus = max_cpus;
+
+    numainfo->cpu_to_node = calloc(max_cpus, sizeof (unsigned long));
+    if (numainfo->cpu_to_node == NULL)
+        goto out;
+
+    numainfo->nodeinfo = (char *)calloc(max_nodes, sizeof(libxl_nodeinfo_t));
+    if (numainfo->nodeinfo == NULL)
+        goto out;
+
+    nodeinfo = (libxl_nodeinfo_t *)numainfo->nodeinfo;
+    node_memsize = xc_hypercall_buffer_alloc(ctx->xch, node_memsize, sizeof(*node_memsize) * max_nodes);
+    if ( node_memsize == NULL )
+        goto out;
+    node_memfree = xc_hypercall_buffer_alloc(ctx->xch, node_memfree, sizeof(*node_memfree) * max_nodes);
+    if ( node_memfree == NULL )
+        goto out;
+
+    set_xen_guest_handle(ninfo.node_to_memsize, node_memsize);
+    set_xen_guest_handle(ninfo.node_to_memfree, node_memfree);
+    ninfo.max_node_index = max_nodes - 1;
+
+    if ( xc_numainfo(ctx->xch, &ninfo) != 0 )
+        goto out;
+
+    max_nodes = ninfo.max_node_index + 1;
+    numainfo->max_nodes = max_nodes;
+
+    if (libxl_get_topologyinfo(ctx, &topoinfo))
+        goto out;
+
+    for ( i = 0; i <= max_nodes; i++ ) {
+        if (node_memsize[i] != INVALID_MEM_NODE) {
+            nodeinfo[i].online = 1;
+            nodeinfo[i].cpumap = malloc(BITS_TO_LONGS(max_cpus) * sizeof (unsigned long));
+            bzero(nodeinfo[i].cpumap, BITS_TO_LONGS(max_cpus) * sizeof (unsigned long));
+
+            /* Total Memory */
+            nodeinfo[i].total_memkb = node_memsize[i] >> 10; /* KB */
+
+            /* Free Memory */
+            nodeinfo[i].free_memkb = node_memfree[i] >> 10; /* KB */
+        } else
+            nodeinfo[i].online = 0;
+    }
+
+    for (i = 0; i < max_cpus; i++)
+        if (topoinfo.coremap.array[i] != LIBXL_CPUARRAY_INVALID_ENTRY) {
+            node = topoinfo.nodemap.array[i];
+            set_bit(i, nodeinfo[node].cpumap);
+            numainfo->cpu_to_node[i] = node;
+        }
+    libxl_topologyinfo_destroy(&topoinfo);
+
+    xc_hypercall_buffer_free(ctx->xch, node_memsize);
+    xc_hypercall_buffer_free(ctx->xch, node_memfree);
+    return 0;
+
+out:
+    if (numainfo->cpu_to_node)
+        free(numainfo->cpu_to_node);
+    if (numainfo->nodeinfo);
+        free(numainfo->nodeinfo);
+    xc_hypercall_buffer_free(ctx->xch, node_memsize);
+    xc_hypercall_buffer_free(ctx->xch, node_memfree);
+    return ERROR_FAIL;
+}
+
+void libxl_free_numainfo(libxl_numainfo_t *numainfo)
+{
+    int i;
+    libxl_nodeinfo_t *nodeinfo = (libxl_nodeinfo_t *)numainfo->nodeinfo;
+
+    for(i = 0; i < numainfo->max_nodes; i++)
+        if(nodeinfo[i].cpumap)
+            free(nodeinfo[i].cpumap);
+    free(numainfo->cpu_to_node);
+    free(numainfo->nodeinfo);
+}
+
 const libxl_version_info* libxl_get_version_info(libxl_ctx *ctx)
 {
     union {
diff -r 0f36c2eec2e1 tools/libxl/libxl.h
--- a/tools/libxl/libxl.h       Thu Jul 28 15:40:54 2011 +0100
+++ b/tools/libxl/libxl.h       Tue Aug 09 12:46:18 2011 +0800
@@ -459,6 +459,16 @@

 int libxl_get_physinfo(libxl_ctx *ctx, libxl_physinfo *physinfo);
 int libxl_get_topologyinfo(libxl_ctx *ctx, libxl_topologyinfo *info);
+
+#define NUMA_NO_NODE    0xFF
+#define INVALID_MEM_NODE     0ul
+#define BITS_PER_LONG (sizeof(unsigned long) * 8)
+#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
+#define ADDR (*(volatile long *) addr)
+
+int libxl_get_numainfo(libxl_ctx *ctx, libxl_numainfo_t *numainfo);
+void libxl_free_numainfo(libxl_numainfo_t *numainfo);
+
 libxl_vcpuinfo *libxl_list_vcpu(libxl_ctx *ctx, uint32_t domid,
                                        int *nb_vcpu, int *nrcpus);
 int libxl_set_vcpuaffinity(libxl_ctx *ctx, uint32_t domid, uint32_t vcpuid,
diff -r 0f36c2eec2e1 tools/libxl/libxl.idl
--- a/tools/libxl/libxl.idl     Thu Jul 28 15:40:54 2011 +0100
+++ b/tools/libxl/libxl.idl     Tue Aug 09 12:46:18 2011 +0800
@@ -366,6 +366,19 @@
     ("socketmap", libxl_cpuarray, False, "cpu to socket map"),
     ("nodemap", libxl_cpuarray,   False, "cpu to node map"),
     ])
+libxl_nodeinfo = Struct("nodeinfo_t", [
+    ("free_memkb", uint32),
+    ("total_memkb", uint32),
+    ("candidate", uint32),
+    ("online", uint32),
+    ("cpumap", string),
+    ])
+libxl_numainfo = Struct("numainfo_t", [
+    ("nodeinfo", string),
+    ("max_nodes", uint32),
+    ("cpu_to_node", string),
+    ("max_cpus", uint32),
+    ])

 libxl_sched_credit = Struct("sched_credit", [
     ("weight", integer),
diff -r 0f36c2eec2e1 tools/libxl/libxl_create.c
--- a/tools/libxl/libxl_create.c        Thu Jul 28 15:40:54 2011 +0100
+++ b/tools/libxl/libxl_create.c        Tue Aug 09 12:46:18 2011 +0800
@@ -159,6 +159,86 @@
     return 0;
 }

+static int find_best_node(libxl_ctx *ctx, libxl_numainfo_t *numainfo)
+{
+    int nr_doms, i, j, nr_vcpu, nrcpus, best_node, pcpu, node_id;
+    unsigned long max_nodes = numainfo->max_nodes;
+    unsigned long *nodeload;
+    libxl_dominfo *dominfo;
+    libxl_vcpuinfo *vcpuinfo;
+    libxl_nodeinfo_t *nodeinfo = (libxl_nodeinfo_t *)numainfo->nodeinfo;
+
+    nodeload = malloc(max_nodes * sizeof(*nodeload));
+    bzero(nodeload, max_nodes * sizeof(*nodeload));
+
+    if (!(dominfo = libxl_list_domain(ctx, &nr_doms)))
+        goto out;
+
+    for (i = 0; i < nr_doms; i++) {
+        vcpuinfo = libxl_list_vcpu(ctx, dominfo[i].domid, &nr_vcpu, &nrcpus);
+        if (!vcpuinfo)
+            goto out;
+        for (j = 0; j < nr_vcpu; j++) {
+            if (!vcpuinfo[j].online)
+                continue;
+            pcpu = vcpuinfo[j].cpu;
+            node_id = numainfo->cpu_to_node[pcpu];
+            if (nodeinfo[node_id].candidate)
+                nodeload[node_id]++;
+            else
+                nodeload[node_id] += 8;
+        }
+        free(vcpuinfo);
+    }
+    best_node = 0;
+    for (i = 1; i < max_nodes; i++)
+        if(nodeinfo[i].candidate && nodeinfo[i].online
+            && nodeload[i] < nodeload[best_node])
+            best_node = i;
+
+    return best_node;
+out:
+    if (dominfo)
+        free(dominfo);
+    return -1;
+}
+
+static int libxl_node_select(libxl_ctx *ctx, libxl_domain_build_info *b_info, uint32_t domid)
+{
+    unsigned long i, best_node;
+    unsigned long needmem = b_info->max_memkb;
+    libxl_numainfo_t numainfo ={ 0 };
+    libxl_nodeinfo_t *nodeinfo;
+
+    if (libxl_get_numainfo(ctx, &numainfo)) {
+        fprintf(stderr, "libxl_get_topologyinfo failed.\n");
+        return -1;
+    }
+
+    if (numainfo.max_nodes < 2) {
+        printf("max_nodes = %d\n", numainfo.max_nodes);
+        return 0;
+        }
+
+    nodeinfo = (libxl_nodeinfo_t *)numainfo.nodeinfo;
+
+    for (i = 0; i < numainfo.max_nodes; i++)
+        if (nodeinfo[i].free_memkb > needmem)
+            nodeinfo[i].candidate = 1;
+
+    best_node = find_best_node(ctx, &numainfo);
+    if (best_node == -1) {
+        libxl_numainfo_t_destroy(&numainfo);
+        return -1;
+    }
+
+    for (i = 0; i < b_info->max_vcpus; i++)
+        xc_vcpu_setaffinity(ctx->xch, domid, i, (uint8_t *)(nodeinfo[best_node].cpumap));
+
+    libxl_numainfo_t_destroy(&numainfo);
+    return 0;
+}
+
 int libxl__domain_build(libxl__gc *gc,
                         libxl_domain_build_info *info,
                         libxl_device_model_info *dm_info,
@@ -168,11 +248,15 @@
     char **vments = NULL, **localents = NULL;
     struct timeval start_time;
     int i, ret;
+    libxl_ctx *ctx = libxl__gc_owner(gc);

     ret = libxl__build_pre(gc, domid, info, state);
     if (ret)
         goto out;

+   if (libxl_node_select(ctx, info, domid))
+       printf("Cannot find best node, using defaul algorithm\n");
+
     gettimeofday(&start_time, NULL);

     switch (info->type) {

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2011-08-09 11:46 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-08-09  4:55 [PATCH] xl: add memory allocation logic for numa platform Zhang, Yang Z
2011-08-09  5:37 ` Juergen Gross
2011-08-09 11:46 ` Ian Campbell

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.