[XEN][vNUMA][PATCH 7/9] Build NUMA HVM

xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed

* [XEN][vNUMA][PATCH 7/9] Build NUMA HVM
       [not found] <1BEA8649F0C00540AB2811D7922ECB6C9338B4D2@orsmsx507.amr.corp.intel.com>
@ 2010-07-02 23:55 ` Dulloor
  2010-07-05  9:55   ` George Dunlap
  2010-08-01 22:05   ` [vNUMA v2][PATCH 6/8] " Dulloor
  0 siblings, 2 replies; 7+ messages in thread
From: Dulloor @ 2010-07-02 23:55 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 135 bytes --]

Allocate the memory for the HVM based on the scheme and the selection of nodes.

-dulloor

Signed-off-by : Dulloor <dulloor@gmail.com>

[-- Attachment #2: xen-07-build-numa-hvm.patch --]
[-- Type: text/x-patch, Size: 19912 bytes --]

diff --git a/tools/libxc/ia64/xc_ia64_hvm_build.c b/tools/libxc/ia64/xc_ia64_hvm_build.c
--- a/tools/libxc/ia64/xc_ia64_hvm_build.c
+++ b/tools/libxc/ia64/xc_ia64_hvm_build.c
@@ -1109,6 +1109,7 @@ int xc_hvm_build_target_mem(xc_interface
                             uint32_t domid,
                             int memsize,
                             int target,
+                            xc_domain_numa_config_t *numa_config,
                             const char *image_name)
 {
     /* XXX:PoD isn't supported yet */
diff --git a/tools/libxc/xc_dom_numa.c b/tools/libxc/xc_dom_numa.c
--- a/tools/libxc/xc_dom_numa.c
+++ b/tools/libxc/xc_dom_numa.c
@@ -123,7 +123,7 @@ xc_dump_dom_numa_layout(xc_interface *xc
     for (i = 0; i < layout->nr_vnodes; i++)
     {
         xc_vnode_data_t *vnode_data = &layout->vnode_data[i];
-        dumpstr += sprintf(dumpstr, "vnode[%u]:mnode(%u), node_nr_pages(%lx)", 
+        dumpstr += sprintf(dumpstr, "vnode[%u]:mnode(%u), node_nr_pages(%x)", 
                 vnode_data->vnode_id, vnode_data->mnode_id,
                 vnode_data->nr_pages);
         if (layout->type == XEN_DOM_NUMA_SPLIT)
diff --git a/tools/libxc/xc_dom_numa.h b/tools/libxc/xc_dom_numa.h
--- a/tools/libxc/xc_dom_numa.h
+++ b/tools/libxc/xc_dom_numa.h
@@ -19,7 +19,7 @@ struct xc_domain_numa_layout {
 
     uint32_t nr_pages;
     /* Only (nr_vnodes) entries are filled */
-    xc_vnode_data_t vnode_data[XC_MAX_VNODES];
+    struct xen_vnode_info vnode_data[XC_MAX_VNODES];
     /* Only (nr_vnodes*nr_vnodes) entries are filled */
     uint8_t vnode_distance[XC_MAX_VNODES*XC_MAX_VNODES];
 
diff --git a/tools/libxc/xc_hvm_build.c b/tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c
+++ b/tools/libxc/xc_hvm_build.c
@@ -10,6 +10,7 @@
 
 #include "xg_private.h"
 #include "xc_private.h"
+#include "xc_dom_numa.h"
 
 #include <xen/foreign/x86_32.h>
 #include <xen/foreign/x86_64.h>
@@ -143,29 +144,16 @@ static long populate_physmap(xc_interfac
     return xc_memory_op(xch, XENMEM_populate_physmap, &reservation);
 }
 
-static int setup_guest_memory(xc_interface *xch, uint32_t dom,
-                            unsigned long nr_pages, unsigned long target_pages,
-                            struct elf_binary *elf)
+#define INVALID_NODE (~0)
+static int __setup_guest_memory(xc_interface *xch, uint32_t dom, 
+                        unsigned long nr_pages, unsigned long target_pages, 
+                        unsigned long cur_pages, xen_pfn_t *page_array, 
+                        int vga_hole, int node, int exact_node)
 {
-    xen_pfn_t *page_array = NULL;
-    unsigned long pod_pages = 0, cur_pages, i;
     unsigned long stat_normal_pages = 0, stat_2mb_pages = 0, stat_1gb_pages = 0;
+    unsigned long pod_pages = 0;
+    unsigned int mem_flags = 0;
     int pod_mode = 0;
-
-    if ( nr_pages > target_pages )
-        pod_mode = 1;
-
-    if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
-    {
-        PERROR("Could not allocate memory.");
-        goto error_out;
-    }
-
-    for ( i = 0; i < nr_pages; i++ )
-        page_array[i] = i;
-    for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ )
-        page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
-
     /*
      * Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000.
      *
@@ -176,13 +164,26 @@ static int setup_guest_memory(xc_interfa
      * Under 2MB mode, we allocate pages in batches of no more than 8MB to 
      * ensure that we can be preempted and hence dom0 remains responsive.
      */
-    if (populate_physmap(xch, dom, 0xa0, 0, 0, page_array, 0x00) != 0xa0 )
+    if (nr_pages > target_pages)
     {
-        PERROR("Could not allocate memory.");
-        goto error_out;
+        pod_mode = 1;
+        mem_flags |= XENMEMF_populate_on_demand;
+        IPRINTF("I SHOULDN'T BE HERE !!\n");
     }
-    cur_pages = 0xc0;
-    stat_normal_pages = 0xc0;
+    if (node != INVALID_NODE)
+        mem_flags |= exact_node?XENMEMF_exact_node(node):XENMEMF_node(node);
+
+    if (vga_hole)
+    {
+        if (populate_physmap(xch, dom, 0xa0, 0, mem_flags, page_array, 0x00) 
+                                                                    != 0xa0)
+        {
+            PERROR("Could not allocate memory.");
+            goto error_out;
+        }
+        cur_pages = 0xc0;
+        stat_normal_pages = 0xc0;
+    }
 
 #define ALIGN_COUNT_TO_MAX_PAGES(count, cur_pages, max_pages)       \
 do{                                                                 \
@@ -194,7 +195,6 @@ do{                                     
                 (count > max_pages) )                               \
                     count &= ~(max_pages-1);                        \
 }while(0)
-
     while ( nr_pages > cur_pages )
     {
         /* Clip count to maximum 1GB extent. */
@@ -204,42 +204,46 @@ do{                                     
         if ( count > SUPERPAGE_1GB_NR_PFNS )
             count = SUPERPAGE_1GB_NR_PFNS;
 
-        /* Attempt to allocate 1GB super page. Because in each pass we only
-         * allocate at most 1GB, we don't have to clip super page boundaries.
+        /* Attempt to allocate 1GB super page. Because in each pass we 
+         * allocate atmost 1GB, we don't have to clip super page boundaries.
          */
         ALIGN_COUNT_TO_MAX_PAGES(count, cur_pages, SUPERPAGE_1GB_NR_PFNS);
         if ( ((count | cur_pages) & (SUPERPAGE_1GB_NR_PFNS - 1)) == 0 &&
-             /* Check if there exists MMIO hole in the 1GB memory range */
-             !check_mmio_hole(cur_pages << PAGE_SHIFT,
-                              SUPERPAGE_1GB_NR_PFNS << PAGE_SHIFT) )
+            /* Check if there exists MMIO hole in the 1GB memory range */
+            !check_mmio_hole(cur_pages << PAGE_SHIFT,
+                            SUPERPAGE_1GB_NR_PFNS << PAGE_SHIFT) )
         {
             done = populate_physmap(xch, dom, count, SUPERPAGE_1GB_SHIFT, 
-                                    (pod_mode)?XENMEMF_populate_on_demand:0,
-                                    page_array, cur_pages);
-            stat_1gb_pages += done;
-            done <<= SUPERPAGE_1GB_SHIFT;
-            if ( pod_mode && target_pages > cur_pages )
+                    XENMEMF_exact_node(node), page_array, cur_pages);
+            if ( done > 0 )
             {
-                int d = target_pages - cur_pages;
-                pod_pages += ( done < d ) ? done : d;
+                stat_1gb_pages += done;
+                done <<= SUPERPAGE_1GB_SHIFT;
+                if ( pod_mode && target_pages > cur_pages )
+                {
+                    int d = target_pages - cur_pages;
+                    pod_pages += ( done < d ) ? done : d;
+                }
+                cur_pages += done;
+                count -= done;
             }
-            cur_pages += done;
-            count -= done;
         }
 
-        if ( count != 0 )
+        if ( count == 0 )
+            continue;
+        /* Clip count to maximum 8MB extent. */
+        if ( count > SUPERPAGE_2MB_NR_PFNS*4 )
+            count = SUPERPAGE_2MB_NR_PFNS*4;
+        
+        /* Attempt to allocate superpage extents. */
+        ALIGN_COUNT_TO_MAX_PAGES(count, cur_pages, SUPERPAGE_2MB_NR_PFNS);
+        if ( ((count | cur_pages) & (SUPERPAGE_2MB_NR_PFNS - 1)) == 0 )
         {
-            /* Clip count to maximum 8MB extent. */
-            if ( count > SUPERPAGE_2MB_NR_PFNS*4 )
-                count = SUPERPAGE_2MB_NR_PFNS*4;
-            
-            /* Attempt to allocate superpage extents. */
-            ALIGN_COUNT_TO_MAX_PAGES(count, cur_pages, SUPERPAGE_2MB_NR_PFNS);
-            if ( ((count | cur_pages) & (SUPERPAGE_2MB_NR_PFNS - 1)) == 0 )
+            done = populate_physmap(xch, dom, count, SUPERPAGE_2MB_SHIFT, 
+                                (pod_mode)?XENMEMF_populate_on_demand:0,
+                                page_array, cur_pages);
+            if ( done > 0 )
             {
-                done = populate_physmap(xch, dom, count, SUPERPAGE_2MB_SHIFT, 
-                                    (pod_mode)?XENMEMF_populate_on_demand:0,
-                                    page_array, cur_pages);
                 stat_2mb_pages += done;
                 done <<= SUPERPAGE_2MB_SHIFT;
                 if ( pod_mode && target_pages > cur_pages )
@@ -252,49 +256,159 @@ do{                                     
             }
         }
 
+        if ( count == 0 )
+            continue;
         /* Fall back to 4kB extents. */
-        if ( count != 0 )
+        done = populate_physmap(xch, dom, count, 0, 0, 
+                                                page_array, cur_pages);
+        if ( done != count )
         {
-            done = populate_physmap(xch, dom, count, 0, 0, 
-                                                    page_array, cur_pages);
-            if ( done != count )
-            {
-                PERROR("Could not allocate memory for HVM guest.");
-                goto error_out;
-            }
-            stat_normal_pages += count;
-            cur_pages += count;
+            PERROR("Could not allocate memory for HVM guest.");
             if ( pod_mode )
-                pod_pages -= count;
+                break;
+            goto error_out;
         }
+        stat_normal_pages += count;
+        cur_pages += count;
+        if ( pod_mode )
+            pod_pages -= count;
     }
 #undef ALIGN_COUNT_TO_MAX_PAGES
-
     if ( pod_mode )
     {
+        IPRINTF("OR HERE !!\n");
         if ( xc_domain_memory_set_pod_target(xch, dom, pod_pages,
-                                                    NULL, NULL, NULL) )
+                                                NULL, NULL, NULL) )
         {
             PERROR("Could not set POD target for HVM guest.");
             goto error_out;
         }
     }
 
-    IPRINTF("PHYSICAL MEMORY ALLOCATION:\n"
+    IPRINTF("PHYSICAL MEMORY ALLOCATION (NODE %d):\n"
             "  4KB PAGES: 0x%016lx\n"
             "  2MB PAGES: 0x%016lx\n"
             "  1GB PAGES: 0x%016lx\n",
-            stat_normal_pages, stat_2mb_pages, stat_1gb_pages);
-  
-    if ( loadelfimage(xch, elf, dom, page_array) )
-        goto error_out;
-    free(page_array);
+            node, stat_normal_pages, stat_2mb_pages, stat_1gb_pages);
     return 0;
+error_out:
+    return -1;
+}
 
-error_out:
+static int setup_guest_numa_stripe(xc_interface *xch,
+                xc_domain_numa_layout_t *dom_layout, xen_pfn_t *page_array)
+{
+    int vnode, rc;
+    unsigned long cur_pages, nr_pages;
+    /* Make a private copy for stripe iterations */
+    xc_domain_numa_layout_t *layout;
+    if (!(layout = malloc(sizeof(*layout))))
+    {
+        PERROR("%s : Failed malloc.", __FUNCTION__);
+        return -1;
+    }
+    memcpy(layout, dom_layout, sizeof(*layout));
+
+    for (vnode=0, cur_pages=0, nr_pages=0; 
+                            cur_pages<layout->nr_pages && !rc; vnode++)
+    {
+        unsigned long allocsz;
+        xc_vnode_data_t *vnode_data;
+        while (!layout->vnode_data[vnode].nr_pages)
+        {
+            vnode++;
+            if (vnode >= layout->nr_vnodes)
+                vnode = 0;
+        }
+        vnode_data = &layout->vnode_data[vnode];
+        allocsz = layout->stripe_size;
+        if (allocsz > vnode_data->nr_pages)
+            allocsz = vnode_data->nr_pages;
+
+        nr_pages = cur_pages + allocsz;
+        rc = __setup_guest_memory(xch, layout->domid, nr_pages, nr_pages, 
+                cur_pages, page_array, !cur_pages, vnode_data->mnode_id, 1);
+        vnode_data->nr_pages -= allocsz;
+        cur_pages = nr_pages;
+    }
+    free(layout);
+    return rc;
+}
+
+static int setup_guest_numa_memory(xc_interface *xch, 
+                xc_domain_numa_layout_t *dom_layout, xen_pfn_t *page_array)
+{
+    int vnode, rc;
+    unsigned long cur_pages, nr_pages;
+
+    if ((rc = xc_setup_numa_domain(xch, dom_layout)))
+        goto setup_done;
+
+    if (dom_layout->type == XEN_DOM_NUMA_STRIPE)
+    {
+        rc = setup_guest_numa_stripe(xch, dom_layout, page_array);
+        goto setup_done;
+    }
+
+    /* XXX: pod is turned off with NUMA allocation for now */
+    for (vnode=0, cur_pages=0, nr_pages=0; 
+                            vnode<dom_layout->nr_vnodes && !rc; vnode++)
+    {
+        xc_vnode_data_t *vnode_data = &dom_layout->vnode_data[vnode];
+
+        nr_pages = cur_pages + vnode_data->nr_pages;
+        rc = __setup_guest_memory(xch, dom_layout->domid, nr_pages, nr_pages,
+                    cur_pages, page_array, (vnode == 0), vnode_data->mnode_id, 
+                    (dom_layout->type != XEN_DOM_NUMA_DONTCARE));
+        cur_pages = nr_pages;
+    }
+setup_done:
+    if (!rc)
+        rc = xc_domain_numa_pinvcpus(xch, dom_layout);
+    return rc;
+}
+
+static int setup_guest_nonnuma_memory(xc_interface *xch, uint32_t domid,
+                unsigned long nr_pages, unsigned long target_pages, 
+                xen_pfn_t *page_array)
+{
+    return __setup_guest_memory(xch, domid, nr_pages, target_pages, 0, 
+            page_array, 1, INVALID_NODE, 0);
+}
+
+static int setup_guest_memory(xc_interface *xch, uint32_t dom,
+                            xc_domain_numa_layout_t *dom_layout,
+                            unsigned long nr_pages, unsigned long target_pages,
+                            struct elf_binary *elf)
+{
+    xen_pfn_t *page_array = NULL;
+    unsigned long i;
+    int rc;
+
+    if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
+    {
+        rc = -1;
+        PERROR("Could not allocate memory.");
+        goto out;
+    }
+
+    for ( i = 0; i < nr_pages; i++ )
+        page_array[i] = i;
+    for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ )
+        page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
+
+    if ( dom_layout )
+        rc = setup_guest_numa_memory(xch, dom_layout, page_array);
+    else
+        rc = setup_guest_nonnuma_memory(xch, dom, 
+                                nr_pages, target_pages, page_array);
+    if ( rc )
+        goto out;
+    rc = loadelfimage(xch, elf, dom, page_array);
+out:
     if ( page_array )
         free(page_array);
-    return -1;
+    return rc;
 }
 
 static int
@@ -373,6 +487,7 @@ error_out:
 
 static int setup_guest(xc_interface *xch,
                        uint32_t dom, int memsize, int target,
+                       xc_domain_numa_layout_t  *dom_layout,
                        char *image, unsigned long image_size)
 {
     unsigned long entry_eip;
@@ -411,7 +526,7 @@ static int setup_guest(xc_interface *xch
             v_start, v_end,
             elf_uval(&elf, elf.ehdr, e_entry));
 
-    rc = setup_guest_memory(xch, dom, 
+    rc = setup_guest_memory(xch, dom, dom_layout,
                     (unsigned long)memsize << (20 - PAGE_SHIFT),
                     (unsigned long)target << (20 - PAGE_SHIFT), &elf);
     if ( rc < 0 )
@@ -444,16 +559,26 @@ static int xc_hvm_build_internal(xc_inte
                                  uint32_t domid,
                                  int memsize,
                                  int target,
+                                 xc_domain_numa_config_t *numa_config,
                                  char *image,
                                  unsigned long image_size)
 {
+    int rc;
+    xc_domain_numa_layout_t  *dom_layout = 0;
+
     if ( (image == NULL) || (image_size == 0) )
     {
         ERROR("Image required");
         return -1;
     }
-
-    return setup_guest(xch, domid, memsize, target, image, image_size);
+    if ( numa_config )
+        dom_layout = xc_dom_alloc_numa_layout(xch, domid, 
+                        (uint64_t)memsize << (20 - PAGE_SHIFT), numa_config);
+    rc = setup_guest(xch, domid, memsize, target, dom_layout,
+                                                        image, image_size);
+    if ( dom_layout )
+        xc_dom_free_numa_layout(xch, dom_layout);
+    return rc;
 }
 
 /* xc_hvm_build:
@@ -471,11 +596,9 @@ int xc_hvm_build(xc_interface *xch,
     if ( (image_name == NULL) ||
          ((image = xc_read_image(xch, image_name, &image_size)) == NULL) )
         return -1;
-
-    sts = xc_hvm_build_internal(xch, domid, memsize, memsize, image, image_size);
-
+    sts = xc_hvm_build_internal(xch, domid, memsize, memsize, NULL,
+                                                        image, image_size);
     free(image);
-
     return sts;
 }
 
@@ -489,6 +612,7 @@ int xc_hvm_build_target_mem(xc_interface
                            uint32_t domid,
                            int memsize,
                            int target,
+                           xc_domain_numa_config_t *numa_config,
                            const char *image_name)
 {
     char *image;
@@ -498,11 +622,9 @@ int xc_hvm_build_target_mem(xc_interface
     if ( (image_name == NULL) ||
          ((image = xc_read_image(xch, image_name, &image_size)) == NULL) )
         return -1;
-
-    sts = xc_hvm_build_internal(xch, domid, memsize, target, image, image_size);
-
+    sts = xc_hvm_build_internal(xch, domid, memsize, target, numa_config, 
+                                                            image, image_size);
     free(image);
-
     return sts;
 }
 
@@ -520,29 +642,23 @@ int xc_hvm_build_mem(xc_interface *xch,
     char         *img;
 
     /* Validate that there is a kernel buffer */
-
     if ( (image_buffer == NULL) || (image_size == 0) )
     {
         ERROR("kernel image buffer not present");
         return -1;
     }
-
     img = xc_inflate_buffer(xch, image_buffer, image_size, &img_len);
     if ( img == NULL )
     {
         ERROR("unable to inflate ram disk buffer");
         return -1;
     }
-
-    sts = xc_hvm_build_internal(xch, domid, memsize, memsize,
-                                img, img_len);
-
+    sts = xc_hvm_build_internal(xch, domid, memsize, memsize, NULL,
+                                                            img, img_len);
     /* xc_inflate_buffer may return the original buffer pointer (for
        for already inflated buffers), so exercise some care in freeing */
-
     if ( (img != NULL) && (img != image_buffer) )
         free(img);
-
     return sts;
 }
 
diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h
+++ b/tools/libxc/xenguest.h
@@ -149,6 +149,7 @@ int xc_hvm_build_target_mem(xc_interface
                             uint32_t domid,
                             int memsize,
                             int target,
+                            xc_domain_numa_config_t *numa_config,
                             const char *image_name);
 
 int xc_hvm_build_mem(xc_interface *xch,
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -176,8 +176,9 @@ int build_hvm(struct libxl_ctx *ctx, uin
         domid,
         (info->max_memkb - info->video_memkb) / 1024,
         (info->target_memkb - info->video_memkb) / 1024,
-        libxl_abs_path(ctx, (char *)info->kernel,
-                       libxl_xenfirmwaredir_path()));
+        &info->numa_config,
+        libxl_abs_path(ctx, (char *)info->kernel, libxl_xenfirmwaredir_path()));
+
     if (ret) {
         XL_LOG_ERRNOVAL(ctx, XL_LOG_ERROR, ret, "hvm building failed");
         return ERROR_FAIL;
diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c
+++ b/tools/python/xen/lowlevel/xc/xc.c
@@ -995,7 +995,7 @@ static PyObject *pyxc_hvm_build(XcObject
         target = memsize;
 
     if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize,
-                                 target, image) != 0 )
+                                 target, NULL, image) != 0 )
         return pyxc_error_to_exception(self->xc_handle);
 
 #if !defined(__ia64__)

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [XEN][vNUMA][PATCH 7/9] Build NUMA HVM
  2010-07-02 23:55 ` [XEN][vNUMA][PATCH 7/9] Build NUMA HVM Dulloor
@ 2010-07-05  9:55   ` George Dunlap
  2010-07-06  6:07     ` Dulloor
  2010-08-01 22:05   ` [vNUMA v2][PATCH 6/8] " Dulloor
  1 sibling, 1 reply; 7+ messages in thread
From: George Dunlap @ 2010-07-05  9:55 UTC (permalink / raw)
  To: Dulloor; +Cc: xen-devel

What's this line for:

>+    if (nr_pages > target_pages)
>     {
>-        PERROR("Could not allocate memory.");
>-        goto error_out;
>+        pod_mode = 1;
>+        mem_flags |= XENMEMF_populate_on_demand;
>+        IPRINTF("I SHOULDN'T BE HERE !!\n");

It's not clear what this patch does to the PoD logic... does it still
need some work, or should I try harder to grok it?  Have you tested it
in PoD mode?

 -George

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [XEN][vNUMA][PATCH 7/9] Build NUMA HVM
  2010-07-05  9:55   ` George Dunlap
@ 2010-07-06  6:07     ` Dulloor
  2010-07-06 10:09       ` George Dunlap
  0 siblings, 1 reply; 7+ messages in thread
From: Dulloor @ 2010-07-06  6:07 UTC (permalink / raw)
  To: George Dunlap; +Cc: xen-devel

WIth the NUMA allocator, pod is simply disabled as of now. This debug
statement seeped through when testing that. Will take care of it :)
However, I did test PoD for any regressions.

-dulloor

On Mon, Jul 5, 2010 at 2:55 AM, George Dunlap
<George.Dunlap@eu.citrix.com> wrote:
> What's this line for:
>
>>+    if (nr_pages > target_pages)
>>     {
>>-        PERROR("Could not allocate memory.");
>>-        goto error_out;
>>+        pod_mode = 1;
>>+        mem_flags |= XENMEMF_populate_on_demand;
>>+        IPRINTF("I SHOULDN'T BE HERE !!\n");
>
> It's not clear what this patch does to the PoD logic... does it still
> need some work, or should I try harder to grok it?  Have you tested it
> in PoD mode?
>
>  -George
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [XEN][vNUMA][PATCH 7/9] Build NUMA HVM
  2010-07-06  6:07     ` Dulloor
@ 2010-07-06 10:09       ` George Dunlap
  2010-07-06 16:10         ` Dulloor
  0 siblings, 1 reply; 7+ messages in thread
From: George Dunlap @ 2010-07-06 10:09 UTC (permalink / raw)
  To: Dulloor; +Cc: xen-devel

You mean, if NUMA is on, then PoD is disabled, but if NUMA is off, PoD
still works?

Or do you mean, this patch will break PoD functionality if accepted?

 -George

On Tue, Jul 6, 2010 at 7:07 AM, Dulloor <dulloor@gmail.com> wrote:
> WIth the NUMA allocator, pod is simply disabled as of now. This debug
> statement seeped through when testing that. Will take care of it :)
> However, I did test PoD for any regressions.
>
> -dulloor
>
> On Mon, Jul 5, 2010 at 2:55 AM, George Dunlap
> <George.Dunlap@eu.citrix.com> wrote:
>> What's this line for:
>>
>>>+    if (nr_pages > target_pages)
>>>     {
>>>-        PERROR("Could not allocate memory.");
>>>-        goto error_out;
>>>+        pod_mode = 1;
>>>+        mem_flags |= XENMEMF_populate_on_demand;
>>>+        IPRINTF("I SHOULDN'T BE HERE !!\n");
>>
>> It's not clear what this patch does to the PoD logic... does it still
>> need some work, or should I try harder to grok it?  Have you tested it
>> in PoD mode?
>>
>>  -George
>>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [XEN][vNUMA][PATCH 7/9] Build NUMA HVM
  2010-07-06 10:09       ` George Dunlap
@ 2010-07-06 16:10         ` Dulloor
  0 siblings, 0 replies; 7+ messages in thread
From: Dulloor @ 2010-07-06 16:10 UTC (permalink / raw)
  To: George Dunlap; +Cc: xen-devel

On Tue, Jul 6, 2010 at 3:09 AM, George Dunlap
<George.Dunlap@eu.citrix.com> wrote:
> You mean, if NUMA is on, then PoD is disabled, but if NUMA is off, PoD
> still works?

Yes. Only if we choose a NUMA allocation strategy for a guest, PoD is
disabled for it.
Otherwise things are the same as now. I plan to take care of PoD with
NUMA allocation
once this series is checked-in.

>
> Or do you mean, this patch will break PoD functionality if accepted?
>
>  -George
>
> On Tue, Jul 6, 2010 at 7:07 AM, Dulloor <dulloor@gmail.com> wrote:
>> WIth the NUMA allocator, pod is simply disabled as of now. This debug
>> statement seeped through when testing that. Will take care of it :)
>> However, I did test PoD for any regressions.
>>
>> -dulloor
>>
>> On Mon, Jul 5, 2010 at 2:55 AM, George Dunlap
>> <George.Dunlap@eu.citrix.com> wrote:
>>> What's this line for:
>>>
>>>>+    if (nr_pages > target_pages)
>>>>     {
>>>>-        PERROR("Could not allocate memory.");
>>>>-        goto error_out;
>>>>+        pod_mode = 1;
>>>>+        mem_flags |= XENMEMF_populate_on_demand;
>>>>+        IPRINTF("I SHOULDN'T BE HERE !!\n");
>>>
>>> It's not clear what this patch does to the PoD logic... does it still
>>> need some work, or should I try harder to grok it?  Have you tested it
>>> in PoD mode?
>>>
>>>  -George
>>>
>>
>> _______________________________________________
>> Xen-devel mailing list
>> Xen-devel@lists.xensource.com
>> http://lists.xensource.com/xen-devel
>>
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [vNUMA v2][PATCH 6/8] Build NUMA HVM
  2010-07-02 23:55 ` [XEN][vNUMA][PATCH 7/9] Build NUMA HVM Dulloor
  2010-07-05  9:55   ` George Dunlap
@ 2010-08-01 22:05   ` Dulloor
  2010-08-13 15:24     ` Andre Przywara
  1 sibling, 1 reply; 7+ messages in thread
From: Dulloor @ 2010-08-01 22:05 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 182 bytes --]

Allocate the memory for the HVM based on the scheme and the selection
of nodes. Also, disable PoD for NUMA allocation schemes.

-dulloor

Signed-off-by : Dulloor <dulloor@gmail.com>

[-- Attachment #2: xen-06-build-numa-hvm.patch --]
[-- Type: text/x-patch, Size: 18605 bytes --]

vNUMA : Build domain numa layout for HVMs

diff --git a/tools/libxc/ia64/xc_ia64_hvm_build.c b/tools/libxc/ia64/xc_ia64_hvm_build.c
--- a/tools/libxc/ia64/xc_ia64_hvm_build.c
+++ b/tools/libxc/ia64/xc_ia64_hvm_build.c
@@ -1110,6 +1110,7 @@ int xc_hvm_build_target_mem(xc_interface
                             uint32_t domid,
                             int memsize,
                             int target,
+                            xc_domain_numa_config_t *numa_config,
                             const char *image_name)
 {
     /* XXX:PoD isn't supported yet */
diff --git a/tools/libxc/xc_hvm_build.c b/tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c
+++ b/tools/libxc/xc_hvm_build.c
@@ -10,6 +10,7 @@
 
 #include "xg_private.h"
 #include "xc_private.h"
+#include "xc_dom_numa.h"
 
 #include <xen/foreign/x86_32.h>
 #include <xen/foreign/x86_64.h>
@@ -142,29 +143,16 @@ static long populate_physmap(xc_interfac
     return xc_memory_op(xch, XENMEM_populate_physmap, &reservation);
 }
 
-static int setup_guest_memory(xc_interface *xch, uint32_t dom,
-                            unsigned long nr_pages, unsigned long target_pages,
-                            struct elf_binary *elf)
+#define INVALID_NODE (~0)
+static int __setup_guest_memory(xc_interface *xch, uint32_t dom, 
+                        unsigned long nr_pages, unsigned long target_pages, 
+                        unsigned long cur_pages, xen_pfn_t *page_array, 
+                        int vga_hole, int node, int exact_node)
 {
-    xen_pfn_t *page_array = NULL;
-    unsigned long pod_pages = 0, cur_pages, i;
     unsigned long stat_normal_pages = 0, stat_2mb_pages = 0, stat_1gb_pages = 0;
+    unsigned long pod_pages = 0;
+    unsigned int mem_flags = 0;
     int pod_mode = 0;
-
-    if ( nr_pages > target_pages )
-        pod_mode = 1;
-
-    if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
-    {
-        PERROR("Could not allocate memory.");
-        goto error_out;
-    }
-
-    for ( i = 0; i < nr_pages; i++ )
-        page_array[i] = i;
-    for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ )
-        page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
-
     /*
      * Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000.
      *
@@ -175,13 +163,25 @@ static int setup_guest_memory(xc_interfa
      * Under 2MB mode, we allocate pages in batches of no more than 8MB to 
      * ensure that we can be preempted and hence dom0 remains responsive.
      */
-    if (populate_physmap(xch, dom, 0xa0, 0, 0, page_array, 0x00) != 0xa0 )
+    if (nr_pages > target_pages)
     {
-        PERROR("Could not allocate memory.");
-        goto error_out;
+        pod_mode = 1;
+        mem_flags |= XENMEMF_populate_on_demand;
     }
-    cur_pages = 0xc0;
-    stat_normal_pages = 0xc0;
+    if (node != INVALID_NODE)
+        mem_flags |= exact_node?XENMEMF_exact_node(node):XENMEMF_node(node);
+
+    if (vga_hole)
+    {
+        if (populate_physmap(xch, dom, 0xa0, 0, mem_flags, page_array, 0x00) 
+                                                                    != 0xa0)
+        {
+            PERROR("Could not allocate memory.");
+            goto error_out;
+        }
+        cur_pages = 0xc0;
+        stat_normal_pages = 0xc0;
+    }
 
 #define ALIGN_COUNT_TO_MAX_PAGES(count, cur_pages, max_pages)       \
 do{                                                                 \
@@ -193,7 +193,6 @@ do{                                     
                 (count > max_pages) )                               \
                     count &= ~(max_pages-1);                        \
 }while(0)
-
     while ( nr_pages > cur_pages )
     {
         /* Clip count to maximum 1GB extent. */
@@ -203,42 +202,46 @@ do{                                     
         if ( count > SUPERPAGE_1GB_NR_PFNS )
             count = SUPERPAGE_1GB_NR_PFNS;
 
-        /* Attempt to allocate 1GB super page. Because in each pass we only
-         * allocate at most 1GB, we don't have to clip super page boundaries.
+        /* Attempt to allocate 1GB super page. Because in each pass we 
+         * allocate atmost 1GB, we don't have to clip super page boundaries.
          */
         ALIGN_COUNT_TO_MAX_PAGES(count, cur_pages, SUPERPAGE_1GB_NR_PFNS);
         if ( ((count | cur_pages) & (SUPERPAGE_1GB_NR_PFNS - 1)) == 0 &&
-             /* Check if there exists MMIO hole in the 1GB memory range */
-             !check_mmio_hole(cur_pages << PAGE_SHIFT,
-                              SUPERPAGE_1GB_NR_PFNS << PAGE_SHIFT) )
+            /* Check if there exists MMIO hole in the 1GB memory range */
+            !check_mmio_hole(cur_pages << PAGE_SHIFT,
+                            SUPERPAGE_1GB_NR_PFNS << PAGE_SHIFT) )
         {
             done = populate_physmap(xch, dom, count, SUPERPAGE_1GB_SHIFT, 
-                                    (pod_mode)?XENMEMF_populate_on_demand:0,
-                                    page_array, cur_pages);
-            stat_1gb_pages += done;
-            done <<= SUPERPAGE_1GB_SHIFT;
-            if ( pod_mode && target_pages > cur_pages )
+                    XENMEMF_exact_node(node), page_array, cur_pages);
+            if ( done > 0 )
             {
-                int d = target_pages - cur_pages;
-                pod_pages += ( done < d ) ? done : d;
+                stat_1gb_pages += done;
+                done <<= SUPERPAGE_1GB_SHIFT;
+                if ( pod_mode && target_pages > cur_pages )
+                {
+                    int d = target_pages - cur_pages;
+                    pod_pages += ( done < d ) ? done : d;
+                }
+                cur_pages += done;
+                count -= done;
             }
-            cur_pages += done;
-            count -= done;
         }
 
-        if ( count != 0 )
+        if ( count == 0 )
+            continue;
+        /* Clip count to maximum 8MB extent. */
+        if ( count > SUPERPAGE_2MB_NR_PFNS*4 )
+            count = SUPERPAGE_2MB_NR_PFNS*4;
+        
+        /* Attempt to allocate superpage extents. */
+        ALIGN_COUNT_TO_MAX_PAGES(count, cur_pages, SUPERPAGE_2MB_NR_PFNS);
+        if ( ((count | cur_pages) & (SUPERPAGE_2MB_NR_PFNS - 1)) == 0 )
         {
-            /* Clip count to maximum 8MB extent. */
-            if ( count > SUPERPAGE_2MB_NR_PFNS*4 )
-                count = SUPERPAGE_2MB_NR_PFNS*4;
-            
-            /* Attempt to allocate superpage extents. */
-            ALIGN_COUNT_TO_MAX_PAGES(count, cur_pages, SUPERPAGE_2MB_NR_PFNS);
-            if ( ((count | cur_pages) & (SUPERPAGE_2MB_NR_PFNS - 1)) == 0 )
+            done = populate_physmap(xch, dom, count, SUPERPAGE_2MB_SHIFT, 
+                                (pod_mode)?XENMEMF_populate_on_demand:0,
+                                page_array, cur_pages);
+            if ( done > 0 )
             {
-                done = populate_physmap(xch, dom, count, SUPERPAGE_2MB_SHIFT, 
-                                    (pod_mode)?XENMEMF_populate_on_demand:0,
-                                    page_array, cur_pages);
                 stat_2mb_pages += done;
                 done <<= SUPERPAGE_2MB_SHIFT;
                 if ( pod_mode && target_pages > cur_pages )
@@ -251,49 +254,158 @@ do{                                     
             }
         }
 
+        if ( count == 0 )
+            continue;
         /* Fall back to 4kB extents. */
-        if ( count != 0 )
+        done = populate_physmap(xch, dom, count, 0, 0, 
+                                                page_array, cur_pages);
+        if ( done != count )
         {
-            done = populate_physmap(xch, dom, count, 0, 0, 
-                                                    page_array, cur_pages);
-            if ( done != count )
-            {
-                PERROR("Could not allocate memory for HVM guest.");
-                goto error_out;
-            }
-            stat_normal_pages += count;
-            cur_pages += count;
+            PERROR("Could not allocate memory for HVM guest.");
             if ( pod_mode )
-                pod_pages -= count;
+                break;
+            goto error_out;
         }
+        stat_normal_pages += count;
+        cur_pages += count;
+        if ( pod_mode )
+            pod_pages -= count;
     }
 #undef ALIGN_COUNT_TO_MAX_PAGES
-
     if ( pod_mode )
     {
         if ( xc_domain_memory_set_pod_target(xch, dom, pod_pages,
-                                                    NULL, NULL, NULL) )
+                                                NULL, NULL, NULL) )
         {
             PERROR("Could not set POD target for HVM guest.");
             goto error_out;
         }
     }
 
-    IPRINTF("PHYSICAL MEMORY ALLOCATION:\n"
+    IPRINTF("PHYSICAL MEMORY ALLOCATION (NODE %d):\n"
             "  4KB PAGES: 0x%016lx\n"
             "  2MB PAGES: 0x%016lx\n"
             "  1GB PAGES: 0x%016lx\n",
-            stat_normal_pages, stat_2mb_pages, stat_1gb_pages);
-  
-    if ( loadelfimage(xch, elf, dom, page_array) )
-        goto error_out;
-    free(page_array);
+            node, stat_normal_pages, stat_2mb_pages, stat_1gb_pages);
     return 0;
+error_out:
+    return -1;
+}
 
-error_out:
+static int setup_guest_numa_stripe(xc_interface *xch,
+                xc_domain_numa_layout_t *dom_layout, xen_pfn_t *page_array)
+{
+    int vnode, rc;
+    unsigned long cur_pages, nr_pages;
+    /* Make a private copy for stripe iterations */
+    xc_domain_numa_layout_t *layout;
+    if (!(layout = malloc(sizeof(*layout))))
+    {
+        PERROR("%s : Failed malloc.", __FUNCTION__);
+        return -1;
+    }
+    memcpy(layout, dom_layout, sizeof(*layout));
+
+    for (vnode=0, cur_pages=0, nr_pages=0; 
+                            cur_pages<layout->nr_pages && !rc; vnode++)
+    {
+        unsigned long allocsz;
+        xc_vnode_data_t *vnode_data;
+        while (!layout->vnode_data[vnode].nr_pages)
+        {
+            vnode++;
+            if (vnode >= layout->nr_vnodes)
+                vnode = 0;
+        }
+        vnode_data = &layout->vnode_data[vnode];
+        allocsz = layout->stripe_size;
+        if (allocsz > vnode_data->nr_pages)
+            allocsz = vnode_data->nr_pages;
+
+        nr_pages = cur_pages + allocsz;
+        rc = __setup_guest_memory(xch, layout->domid, nr_pages, nr_pages, 
+                cur_pages, page_array, !cur_pages, vnode_data->mnode_id, 1);
+        vnode_data->nr_pages -= allocsz;
+        cur_pages = nr_pages;
+    }
+    free(layout);
+    return rc;
+}
+
+static int setup_guest_numa_memory(xc_interface *xch, 
+                xc_domain_numa_layout_t *dom_layout, xen_pfn_t *page_array)
+{
+    int vnode, rc;
+    unsigned long cur_pages, nr_pages;
+
+    if ((rc = xc_setup_numa_domain(xch, dom_layout)))
+        goto setup_done;
+
+    if (dom_layout->type == XEN_DOM_NUMA_STRIPE)
+    {
+        rc = setup_guest_numa_stripe(xch, dom_layout, page_array);
+        goto setup_done;
+    }
+
+    /* XXX: pod is turned off with NUMA allocation for now */
+    for (vnode=0, cur_pages=0, nr_pages=0; 
+                            vnode<dom_layout->nr_vnodes && !rc; vnode++)
+    {
+        xc_vnode_data_t *vnode_data = &dom_layout->vnode_data[vnode];
+
+        nr_pages = cur_pages + vnode_data->nr_pages;
+        rc = __setup_guest_memory(xch, dom_layout->domid, nr_pages, nr_pages,
+                    cur_pages, page_array, (vnode == 0), vnode_data->mnode_id, 
+                    (dom_layout->type != XEN_DOM_NUMA_DONTCARE));
+        cur_pages = nr_pages;
+    }
+setup_done:
+    if (!rc)
+        rc = xc_domain_numa_pinvcpus(xch, dom_layout);
+    return rc;
+}
+
+static int setup_guest_nonnuma_memory(xc_interface *xch, uint32_t domid,
+                unsigned long nr_pages, unsigned long target_pages, 
+                xen_pfn_t *page_array)
+{
+    return __setup_guest_memory(xch, domid, nr_pages, target_pages, 0, 
+            page_array, 1, INVALID_NODE, 0);
+}
+
+static int setup_guest_memory(xc_interface *xch, uint32_t dom,
+                            xc_domain_numa_layout_t *dom_layout,
+                            unsigned long nr_pages, unsigned long target_pages,
+                            struct elf_binary *elf)
+{
+    xen_pfn_t *page_array = NULL;
+    unsigned long i;
+    int rc;
+
+    if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
+    {
+        rc = -1;
+        PERROR("Could not allocate memory.");
+        goto out;
+    }
+
+    for ( i = 0; i < nr_pages; i++ )
+        page_array[i] = i;
+    for ( i = HVM_BELOW_4G_RAM_END >> PAGE_SHIFT; i < nr_pages; i++ )
+        page_array[i] += HVM_BELOW_4G_MMIO_LENGTH >> PAGE_SHIFT;
+
+    if ( dom_layout )
+        rc = setup_guest_numa_memory(xch, dom_layout, page_array);
+    else
+        rc = setup_guest_nonnuma_memory(xch, dom, 
+                                nr_pages, target_pages, page_array);
+    if ( rc )
+        goto out;
+    rc = loadelfimage(xch, elf, dom, page_array);
+out:
     if ( page_array )
         free(page_array);
-    return -1;
+    return rc;
 }
 
 static int
@@ -352,6 +464,7 @@ error_out:
 
 static int setup_guest(xc_interface *xch,
                        uint32_t dom, int memsize, int target,
+                       xc_domain_numa_layout_t  *dom_layout,
                        char *image, unsigned long image_size)
 {
     unsigned long entry_eip;
@@ -390,7 +503,7 @@ static int setup_guest(xc_interface *xch
             v_start, v_end,
             elf_uval(&elf, elf.ehdr, e_entry));
 
-    rc = setup_guest_memory(xch, dom, 
+    rc = setup_guest_memory(xch, dom, dom_layout,
                     (unsigned long)memsize << (20 - PAGE_SHIFT),
                     (unsigned long)target << (20 - PAGE_SHIFT), &elf);
     if ( rc < 0 )
@@ -423,16 +536,26 @@ static int xc_hvm_build_internal(xc_inte
                                  uint32_t domid,
                                  int memsize,
                                  int target,
+                                 xc_domain_numa_config_t *numa_config,
                                  char *image,
                                  unsigned long image_size)
 {
+    int rc;
+    xc_domain_numa_layout_t  *dom_layout = 0;
+
     if ( (image == NULL) || (image_size == 0) )
     {
         ERROR("Image required");
         return -1;
     }
-
-    return setup_guest(xch, domid, memsize, target, image, image_size);
+    if ( numa_config )
+        dom_layout = xc_dom_alloc_numa_layout(xch, domid, 
+                        (uint64_t)memsize << (20 - PAGE_SHIFT), numa_config);
+    rc = setup_guest(xch, domid, memsize, target, dom_layout,
+                                                        image, image_size);
+    if ( dom_layout )
+        xc_dom_free_numa_layout(xch, dom_layout);
+    return rc;
 }
 
 /* xc_hvm_build:
@@ -450,11 +573,9 @@ int xc_hvm_build(xc_interface *xch,
     if ( (image_name == NULL) ||
          ((image = xc_read_image(xch, image_name, &image_size)) == NULL) )
         return -1;
-
-    sts = xc_hvm_build_internal(xch, domid, memsize, memsize, image, image_size);
-
+    sts = xc_hvm_build_internal(xch, domid, memsize, memsize, NULL,
+                                                        image, image_size);
     free(image);
-
     return sts;
 }
 
@@ -468,6 +589,7 @@ int xc_hvm_build_target_mem(xc_interface
                            uint32_t domid,
                            int memsize,
                            int target,
+                           xc_domain_numa_config_t *numa_config,
                            const char *image_name)
 {
     char *image;
@@ -477,11 +599,9 @@ int xc_hvm_build_target_mem(xc_interface
     if ( (image_name == NULL) ||
          ((image = xc_read_image(xch, image_name, &image_size)) == NULL) )
         return -1;
-
-    sts = xc_hvm_build_internal(xch, domid, memsize, target, image, image_size);
-
+    sts = xc_hvm_build_internal(xch, domid, memsize, target, numa_config, 
+                                                            image, image_size);
     free(image);
-
     return sts;
 }
 
@@ -499,29 +619,23 @@ int xc_hvm_build_mem(xc_interface *xch,
     char         *img;
 
     /* Validate that there is a kernel buffer */
-
     if ( (image_buffer == NULL) || (image_size == 0) )
     {
         ERROR("kernel image buffer not present");
         return -1;
     }
-
     img = xc_inflate_buffer(xch, image_buffer, image_size, &img_len);
     if ( img == NULL )
     {
         ERROR("unable to inflate ram disk buffer");
         return -1;
     }
-
-    sts = xc_hvm_build_internal(xch, domid, memsize, memsize,
-                                img, img_len);
-
+    sts = xc_hvm_build_internal(xch, domid, memsize, memsize, NULL,
+                                                            img, img_len);
     /* xc_inflate_buffer may return the original buffer pointer (for
        for already inflated buffers), so exercise some care in freeing */
-
     if ( (img != NULL) && (img != image_buffer) )
         free(img);
-
     return sts;
 }
 
diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h
+++ b/tools/libxc/xenguest.h
@@ -149,6 +149,7 @@ int xc_hvm_build_target_mem(xc_interface
                             uint32_t domid,
                             int memsize,
                             int target,
+                            xc_domain_numa_config_t *numa_config,
                             const char *image_name);
 
 int xc_hvm_build_mem(xc_interface *xch,
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -230,6 +230,7 @@ int build_hvm(libxl_ctx *ctx, uint32_t d
         domid,
         (info->max_memkb - info->video_memkb) / 1024,
         (info->target_memkb - info->video_memkb) / 1024,
+        &info->numa_config,
         libxl_abs_path(ctx, (char *)info->kernel.path,
                        libxl_xenfirmwaredir_path()));
     if (ret) {
diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c
+++ b/tools/python/xen/lowlevel/xc/xc.c
@@ -997,7 +997,7 @@ static PyObject *pyxc_hvm_build(XcObject
         target = memsize;
 
     if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize,
-                                 target, image) != 0 )
+                                 target, NULL, image) != 0 )
         return pyxc_error_to_exception(self->xc_handle);
 
 #if !defined(__ia64__)

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [vNUMA v2][PATCH 6/8] Build NUMA HVM
  2010-08-01 22:05   ` [vNUMA v2][PATCH 6/8] " Dulloor
@ 2010-08-13 15:24     ` Andre Przywara
  0 siblings, 0 replies; 7+ messages in thread
From: Andre Przywara @ 2010-08-13 15:24 UTC (permalink / raw)
  To: Dulloor; +Cc: xen-devel@lists.xensource.com

Dulloor wrote:
> Allocate the memory for the HVM based on the scheme and the selection
> of nodes. Also, disable PoD for NUMA allocation schemes.
> 
Sorry for the delay, finally I found some time to play a bit with the code.
To me it looks quite matured, so sometimes it is hard to see why things
were done in a certain way, although it mostly gets clearer later.

Some general comments:

1. I didn't manage to get striping to work. I tried several settings,
it all ended up with an almost endless loop of:
xc: info: PHYSICAL MEMORY ALLOCATION (NODE {7,6,4,5}):
   4KB PAGES: 0x00000000000000c0
   2MB PAGES: 0x0000000000000000
   1GB PAGES: 0x0000000000000000
and then stopped creating the guest. I didn't investigate, though.

2. I don't like the limitation imposed on the guest's NUMA layout.
Requiring the number of nodes and the number of VCPUs to be a power of 2
is too restrictive in my eyes. My older code could cope with a wild
combination of memory, nodes and VCPUSs. I remember testing a rather
big matrix, including things like 3.5 GB of memory over 3 nodes and 5 VCPUs.
As your patch 6&7 touch my work anyway, I'd also volunteer to fix this 
by basically rebasing my code onto your foundation. I left out the SLIT
part for the first round, but I suppose this could be easily added at 
the end.

I started to hack on this already and moved the "hole-punching" (VGA 
hole and PCI hole) from libxc into hvmloader. I then removed the 
limitation check and tried some setups, although there seems to be still 
an issue with the memory layout, as the guest Linux kernel crashes early 
(although the same guest setup works with QEMU).

3. Is that really necessary to clutter the hvm_info_table with such much
information? Until now it is really small and static. I'd prefer to
simply enter the values really needed: vCPU->vnode mapping, vnode memory
size and SLIT information.
AFAIK there is no compatibility promise for this interface between 
hvmloader and the Xen tools, so we could even make the arrays here 
statically declared at compile-time.

Regards,
Andre.
-- 
Andre Przywara
AMD-Operating System Research Center (OSRC), Dresden, Germany
Tel: +49 351 448-3567-12

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2010-08-13 15:24 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <1BEA8649F0C00540AB2811D7922ECB6C9338B4D2@orsmsx507.amr.corp.intel.com>
2010-07-02 23:55 ` [XEN][vNUMA][PATCH 7/9] Build NUMA HVM Dulloor
2010-07-05  9:55   ` George Dunlap
2010-07-06  6:07     ` Dulloor
2010-07-06 10:09       ` George Dunlap
2010-07-06 16:10         ` Dulloor
2010-08-01 22:05   ` [vNUMA v2][PATCH 6/8] " Dulloor
2010-08-13 15:24     ` Andre Przywara

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).