All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Andre Przywara" <andre.przywara@amd.com>
To: xen-devel@lists.xensource.com
Subject: [PATCH 3/4] [HVM] allocate HVM guest memory with NUMA in mind
Date: Mon, 13 Aug 2007 12:02:59 +0200	[thread overview]
Message-ID: <46C02C53.9060306@amd.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 55 bytes --]

Signed-off-by: Andre Przywara <andre.przywara@amd.com>

[-- Attachment #2: numa_hvm_guest3.patch --]
[-- Type: text/plain, Size: 8592 bytes --]

# HG changeset patch
# User andre.przywara@amd.com
# Date 1186563732 -7200
# Node ID f5e9f20109d9dc3c82bfadcedd4af77a35e8c5fb
# Parent  e730c1207604414f6f2779cc6adb213e3c1362eb
allocate HVM guest memory according to NUMA setup

diff -r e730c1207604 -r f5e9f20109d9 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c	Tue Aug 07 15:11:00 2007 +0200
+++ b/tools/libxc/xc_hvm_build.c	Wed Aug 08 11:02:12 2007 +0200
@@ -152,8 +152,101 @@ static int loadelfimage(
     return rc;
 }
 
+#define MAX_CPU_ID 255
+
+static int setup_numa_affinity (int xc_handle, uint32_t dom, int numanodes)
+{
+    xc_physinfo_t physinfo;
+    xc_cpu_to_node_t *cpumap;
+    uint64_t *nodemasks;
+
+    int nrcpus, i, node;
+    xc_dominfo_t dominfo;
+
+    cpumap=(xc_cpu_to_node_t *)malloc(sizeof(xc_cpu_to_node_t)*MAX_CPU_ID);
+    set_xen_guest_handle(physinfo.cpu_to_node, cpumap);
+
+    xc_physinfo (xc_handle,&physinfo);
+    nrcpus = physinfo.threads_per_core * physinfo.cores_per_socket *
+        physinfo.sockets_per_node * physinfo.nr_nodes;
+
+    nodemasks=malloc(sizeof(uint64_t)*physinfo.nr_nodes);
+    memset (nodemasks,0,sizeof(uint64_t)*physinfo.nr_nodes);
+    for (i=0;i<nrcpus;i++)
+    {
+        nodemasks[cpumap[i]]|=(1<<i);
+    }
+
+    if (xc_domain_getinfo (xc_handle, dom, 1, &dominfo) != 1)
+    {
+        ERROR("Unable to get platform info.");
+        return -1;
+    }
+
+    for (i=0;i<=dominfo.max_vcpu_id;i++)
+    {
+        node= ( i * numanodes ) / (dominfo.max_vcpu_id+1);
+        xc_vcpu_setaffinity (xc_handle, dom, i, nodemasks[node]);
+    }
+
+    return 0;
+}
+
+static int setup_numa_mem ( int xc_handle, uint32_t dom, int nr_pages,
+			xen_pfn_t *page_array, int numanodes )
+{
+    xc_physinfo_t physinfo;
+    xc_cpu_to_node_t *cpumap;
+
+    int nrcpus, i, j, rc;
+    uint32_t firstcpu;
+    unsigned long offset;
+    unsigned long pages_per_node, curpages;
+
+    cpumap=(xc_cpu_to_node_t *)malloc(sizeof(xc_cpu_to_node_t)*MAX_CPU_ID);
+    set_xen_guest_handle(physinfo.cpu_to_node, cpumap);
+
+
+    xc_physinfo (xc_handle,&physinfo);
+    nrcpus = physinfo.threads_per_core * physinfo.cores_per_socket *
+        physinfo.sockets_per_node * physinfo.nr_nodes;
+
+    offset = 0xc0;
+
+    pages_per_node=((nr_pages+0xFF)&(~0xFFUL))/numanodes;
+    firstcpu=0;
+    for ( i=0 ; i<numanodes ; i++ )
+    {
+        for ( j=0 ; j<nrcpus ; j++ )
+        {
+            if ( cpumap[j] == i )
+            {
+                firstcpu = j;
+                break;
+            }
+        }
+
+        if ( j == nrcpus ) firstcpu=0;
+
+        if ( i == numanodes - 1 )
+            curpages = nr_pages - i * pages_per_node;
+        else curpages = pages_per_node;
+
+        if ( i == 0 ) curpages -= 0xc0;
+
+        rc = xc_domain_memory_populate_physmap(
+            xc_handle, dom, curpages, 0, 0, firstcpu, 
+            &page_array[offset]);
+
+        if ( rc != 0 ) return rc;
+
+        offset+=curpages;
+    }
+    return 0;
+}
+
 static int setup_guest(int xc_handle,
-                       uint32_t dom, int memsize,
+                       uint32_t dom, int memsize, int numanodes,
                        char *image, unsigned long image_size,
                        vcpu_guest_context_either_t *ctxt)
 {
@@ -213,13 +306,24 @@ static int setup_guest(int xc_handle,
     rc = xc_domain_memory_populate_physmap(
         xc_handle, dom, 0xa0, 0, 0, XENMEM_DEFAULT_CPU, &page_array[0x00]);
     if ( rc == 0 )
-        rc = xc_domain_memory_populate_physmap(
-            xc_handle, dom, nr_pages - 0xc0, 0, 0, XENMEM_DEFAULT_CPU,
-            &page_array[0xc0]);
+    {
+        if ( numanodes > 0 )
+            rc = setup_numa_mem (xc_handle, dom, nr_pages, page_array,
+            numanodes);
+        else
+            rc = xc_domain_memory_populate_physmap (
+                xc_handle, dom, nr_pages - 0xc0, 0, 0, XENMEM_DEFAULT_CPU,
+                &page_array[0xc0] );
+    }
     if ( rc != 0 )
     {
         PERROR("Could not allocate memory for HVM guest.\n");
         goto error_out;
+    }
+
+    if ( numanodes > 0 )
+    {
+        setup_numa_affinity (xc_handle, dom, numanodes);
     }
 
     if ( loadelfimage(&elf, xc_handle, dom, page_array) != 0 )
@@ -288,6 +392,7 @@ static int xc_hvm_build_internal(int xc_
 static int xc_hvm_build_internal(int xc_handle,
                                  uint32_t domid,
                                  int memsize,
+                                 int numanodes,
                                  char *image,
                                  unsigned long image_size)
 {
@@ -303,7 +408,8 @@ static int xc_hvm_build_internal(int xc_
 
     memset(&ctxt, 0, sizeof(ctxt));
 
-    if ( setup_guest(xc_handle, domid, memsize, image, image_size, &ctxt) < 0 )
+    if ( setup_guest(xc_handle, domid, memsize, numanodes,
+        image, image_size, &ctxt) < 0 )
     {
         goto error_out;
     }
@@ -341,6 +447,7 @@ int xc_hvm_build(int xc_handle,
 int xc_hvm_build(int xc_handle,
                  uint32_t domid,
                  int memsize,
+                 int numanodes,
                  const char *image_name)
 {
     char *image;
@@ -351,7 +458,8 @@ int xc_hvm_build(int xc_handle,
          ((image = xc_read_image(image_name, &image_size)) == NULL) )
         return -1;
 
-    sts = xc_hvm_build_internal(xc_handle, domid, memsize, image, image_size);
+    sts = xc_hvm_build_internal(xc_handle, domid, memsize, numanodes,
+        image, image_size);
 
     free(image);
 
@@ -364,6 +472,7 @@ int xc_hvm_build_mem(int xc_handle,
 int xc_hvm_build_mem(int xc_handle,
                      uint32_t domid,
                      int memsize,
+                     int numanodes,
                      const char *image_buffer,
                      unsigned long image_size)
 {
@@ -386,7 +495,7 @@ int xc_hvm_build_mem(int xc_handle,
         return -1;
     }
 
-    sts = xc_hvm_build_internal(xc_handle, domid, memsize,
+    sts = xc_hvm_build_internal(xc_handle, domid, memsize, numanodes,
                                 img, img_len);
 
     /* xc_inflate_buffer may return the original buffer pointer (for
diff -r e730c1207604 -r f5e9f20109d9 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h	Tue Aug 07 15:11:00 2007 +0200
+++ b/tools/libxc/xenguest.h	Wed Aug 08 11:02:12 2007 +0200
@@ -128,11 +128,13 @@ int xc_hvm_build(int xc_handle,
 int xc_hvm_build(int xc_handle,
                  uint32_t domid,
                  int memsize,
+                 int numanodes,
                  const char *image_name);
 
 int xc_hvm_build_mem(int xc_handle,
                      uint32_t domid,
                      int memsize,
+                     int numanodes,
                      const char *image_buffer,
                      unsigned long image_size);
 
diff -r e730c1207604 -r f5e9f20109d9 tools/libxc/xg_private.c
--- a/tools/libxc/xg_private.c	Tue Aug 07 15:11:00 2007 +0200
+++ b/tools/libxc/xg_private.c	Wed Aug 08 11:02:12 2007 +0200
@@ -192,6 +192,7 @@ __attribute__((weak))
     int xc_hvm_build(int xc_handle,
                      uint32_t domid,
                      int memsize,
+                     int numanodes,
                      const char *image_name)
 {
     errno = ENOSYS;
diff -r e730c1207604 -r f5e9f20109d9 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c	Tue Aug 07 15:11:00 2007 +0200
+++ b/tools/python/xen/lowlevel/xc/xc.c	Wed Aug 08 11:02:12 2007 +0200
@@ -549,7 +549,7 @@ static PyObject *pyxc_hvm_build(XcObject
                                       &numanodes) )
         return NULL;
 
-    if ( xc_hvm_build(self->xc_handle, dom, memsize, image) != 0 )
+    if ( xc_hvm_build(self->xc_handle, dom, memsize, numanodes, image) != 0 )
         return pyxc_error_to_exception();
 
 #if !defined(__ia64__)
diff -r e730c1207604 -r f5e9f20109d9 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c	Tue Aug 07 15:11:00 2007 +0200
+++ b/xen/common/page_alloc.c	Wed Aug 08 11:02:12 2007 +0200
@@ -806,8 +806,12 @@ struct page_info *__alloc_domheap_pages(
 
     if ( (zone_hi + PAGE_SHIFT) >= dma_bitsize )
     {
-        pg = alloc_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi, cpu, order);
-
+        if (avail_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi,
+            cpu_to_node (cpu)) >= ( 1UL << order ))
+        {
+            pg = alloc_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi,
+                cpu, order);
+        }
         /* Failure? Then check if we can fall back to the DMA pool. */
         if ( unlikely(pg == NULL) &&
              ((order > MAX_ORDER) ||

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

                 reply	other threads:[~2007-08-13 10:02 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=46C02C53.9060306@amd.com \
    --to=andre.przywara@amd.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.