All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH 2/2] Add hcall to probe Xen heap
@ 2007-04-10  1:09 Ryan Harper
  2007-06-06 16:07 ` Ryan Harper
  0 siblings, 1 reply; 10+ messages in thread
From: Ryan Harper @ 2007-04-10  1:09 UTC (permalink / raw)
  To: xen-devel

For post-3.0.5 inclusion:

Add new domctl hypercall to expose current heap values.  This functionality is
needed for probing how much memory is available in a given node prior to VM
creation.

-- 
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253   T/L: 678-9253
ryanh@us.ibm.com


diffstat output:
 tools/libxc/xc_domain.c     |   28 ++++++++++++++++++++++++++++
 tools/libxc/xenctrl.h       |   16 ++++++++++++++++
 xen/common/domctl.c         |   35 +++++++++++++++++++++++++++++++++++
 xen/common/page_alloc.c     |    9 +--------
 xen/include/public/domctl.h |   16 ++++++++++++++++
 xen/include/xen/mm.h        |   10 ++++++++++
 6 files changed, 106 insertions(+), 8 deletions(-)

Signed-off-by: Ryan Harper <ryanh@us.ibm.com>
---
Add new domctl hypercall to expose current heap values.  This functionality is
needed for probing how much memory is available in a given node prior to VM
creation.

Signed-off-by: Ryan Harper <ryanh@us.ibm.com>

diff -r 48cbb32df526 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c	Thu Mar 29 21:55:42 2007 -0500
+++ b/tools/libxc/xc_domain.c	Fri Mar 30 09:41:40 2007 -0500
@@ -584,6 +584,34 @@ int xc_domain_ioport_permission(int xc_h
     domctl.u.ioport_permission.allow_access = allow_access;
 
     return do_domctl(xc_handle, &domctl);
+}
+
+int xc_availheap(int xc_handle,
+                 int zone_lo,
+                 int zone_hi,
+                 int node,
+                 uint32_t *nr_zones,
+                 uint32_t *nr_nodes,
+                 uint64_t *pages)
+{
+    DECLARE_DOMCTL;
+    int rc = 0;
+
+    domctl.cmd = XEN_DOMCTL_availheap;
+    domctl.u.availheap.zone_lo = zone_lo;
+    domctl.u.availheap.zone_hi = zone_hi;
+    domctl.u.availheap.node = node;
+
+    rc = do_domctl(xc_handle, &domctl);
+    if ( rc >= 0 ) {
+        if (nr_zones)
+            *nr_zones = domctl.u.availheap.nr_zones;
+        if (nr_nodes)
+            *nr_nodes = domctl.u.availheap.nr_nodes;
+        *pages = domctl.u.availheap.pages;
+    }
+
+    return rc;
 }
 
 int xc_vcpu_setcontext(int xc_handle,
diff -r 48cbb32df526 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h	Thu Mar 29 21:55:42 2007 -0500
+++ b/tools/libxc/xenctrl.h	Fri Mar 30 09:44:28 2007 -0500
@@ -611,6 +611,22 @@ int xc_get_pfn_type_batch(int xc_handle,
 /* Get current total pages allocated to a domain. */
 long xc_get_tot_pages(int xc_handle, uint32_t domid);
 
+/**
+ * This function retrieves the the number of pages available
+ * in the heap in a specific range of zones and nodes.
+ * 
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain to query
+ * @parm zone_lo the starting zone to query
+ * @parm zone_lo the last zone to query
+ * @parm node the node to query
+ * @parm *nr_zones caller variable to put number of zones queried
+ * @parm *nr_nodes caller variable to put number of nodes queried
+ * @parm *pages caller variable to put total pages counted
+ * @return 0 on success, <0 on failure.
+ */
+int xc_availheap(int xc_handle, int zone_lo, int zone_hi, int node,
+                 uint32_t *nr_zones, uint32_t *nr_nodes, uint64_t *pages);
 
 /*
  * Trace Buffer Operations
diff -r 48cbb32df526 xen/common/domctl.c
--- a/xen/common/domctl.c	Thu Mar 29 21:55:42 2007 -0500
+++ b/xen/common/domctl.c	Fri Mar 30 10:02:01 2007 -0500
@@ -24,6 +24,8 @@
 #include <asm/current.h>
 #include <public/domctl.h>
 #include <acm/acm_hooks.h>
+#include <asm/numa.h>
+#include <xen/nodemask.h>
 
 extern long arch_do_domctl(
     struct xen_domctl *op, XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
@@ -711,6 +713,39 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
     }
     break;
 
+    case XEN_DOMCTL_availheap:
+    { 
+        int zone_lo = 0, zone_hi = NR_ZONES-1;
+
+        ret = -EINVAL;
+        if ( op->u.availheap.node >= num_online_nodes() )
+            break;
+        if ( op->u.availheap.zone_lo >= NR_ZONES )
+            break;
+        if ( op->u.availheap.zone_lo > op->u.availheap.zone_hi )
+            break;
+
+        if ( op->u.availheap.zone_lo > 0 )
+           zone_lo = op->u.availheap.zone_lo;
+        if ( op->u.availheap.zone_hi >= 0 && op->u.availheap.zone_hi < NR_ZONES )
+           zone_hi = op->u.availheap.zone_hi;
+
+        op->u.availheap.nr_zones = zone_hi - zone_lo + 1;
+        
+        ( op->u.availheap.node < 0 ) ?
+            (op->u.availheap.nr_nodes=num_online_nodes()) :
+            (op->u.availheap.nr_nodes=1);
+
+        op->u.availheap.pages =
+            avail_heap_pages(zone_lo, zone_hi, op->u.availheap.node);
+
+        if ( copy_to_guest(u_domctl, op, 1) )
+            ret = -EFAULT;
+        else
+            ret = 0;
+    }
+    break;
+
     default:
         ret = arch_do_domctl(op, u_domctl);
         break;
diff -r 48cbb32df526 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c	Thu Mar 29 21:55:42 2007 -0500
+++ b/xen/common/page_alloc.c	Fri Mar 30 10:00:25 2007 -0500
@@ -310,13 +310,6 @@ unsigned long alloc_boot_pages(
  * BINARY BUDDY ALLOCATOR
  */
 
-#define MEMZONE_XEN 0
-#ifdef PADDR_BITS
-#define NR_ZONES    (PADDR_BITS - PAGE_SHIFT)
-#else
-#define NR_ZONES    (BITS_PER_LONG - PAGE_SHIFT)
-#endif
-
 #define pfn_dom_zone_type(_pfn) (fls(_pfn) - 1)
 
 typedef struct list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
@@ -544,7 +537,7 @@ void init_heap_pages(
     }
 }
 
-static unsigned long avail_heap_pages(
+unsigned long avail_heap_pages(
     unsigned int zone_lo, unsigned int zone_hi, unsigned int node)
 {
     unsigned int i, zone, num_nodes = num_online_nodes();
diff -r 48cbb32df526 xen/include/public/domctl.h
--- a/xen/include/public/domctl.h	Thu Mar 29 21:55:42 2007 -0500
+++ b/xen/include/public/domctl.h	Thu Mar 29 22:29:43 2007 -0500
@@ -389,6 +389,21 @@ typedef struct xen_domctl_settimeoffset 
 typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t);
 
+
+#define XEN_DOMCTL_availheap        37 
+struct xen_domctl_availheap {
+    /* in  */
+    int zone_lo;             /* starting zone */
+    int zone_hi;             /* ending zone, -1 for zone_lo to NR_ZONES */
+    int node;                /* query available pages in node, -1 for all */
+    /* out */
+    uint32_t nr_zones;    /* number of zones queried */
+    uint32_t nr_nodes;    /* number of nodes queried */
+    uint64_t pages;
+};
+typedef struct xen_domctl_availheap xen_domctl_availheap_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_availheap_t);
+
  
 #define XEN_DOMCTL_gethvmcontext     33
 #define XEN_DOMCTL_sethvmcontext     34
@@ -457,6 +472,7 @@ struct xen_domctl {
         struct xen_domctl_hvmcontext        hvmcontext;
         struct xen_domctl_address_size      address_size;
         struct xen_domctl_sendtrigger       sendtrigger;
+        struct xen_domctl_availheap         availheap;
         uint8_t                             pad[128];
     } u;
 };
diff -r 48cbb32df526 xen/include/xen/mm.h
--- a/xen/include/xen/mm.h	Thu Mar 29 21:55:42 2007 -0500
+++ b/xen/include/xen/mm.h	Fri Mar 30 10:01:02 2007 -0500
@@ -33,6 +33,13 @@
 #include <xen/list.h>
 #include <xen/spinlock.h>
 
+#define MEMZONE_XEN 0
+#ifdef PADDR_BITS
+#define NR_ZONES    (PADDR_BITS - PAGE_SHIFT)
+#else
+#define NR_ZONES    (BITS_PER_LONG - PAGE_SHIFT)
+#endif
+
 struct domain;
 struct page_info;
 
@@ -64,6 +71,9 @@ unsigned long avail_domheap_pages(void);
 unsigned long avail_domheap_pages(void);
 #define alloc_domheap_page(d) (alloc_domheap_pages(d,0,0))
 #define free_domheap_page(p)  (free_domheap_pages(p,0))
+
+unsigned long avail_heap_pages(
+    unsigned int zone_lo, unsigned int zone_hi, unsigned int node);
 
 void scrub_heap_pages(void);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC][PATCH 2/2] Add hcall to probe Xen heap
  2007-04-10  1:09 [RFC][PATCH 2/2] Add hcall to probe Xen heap Ryan Harper
@ 2007-06-06 16:07 ` Ryan Harper
  2007-06-17 16:44   ` Subrahmanian, Raj
  2007-07-06 15:15   ` Keir Fraser
  0 siblings, 2 replies; 10+ messages in thread
From: Ryan Harper @ 2007-06-06 16:07 UTC (permalink / raw)
  To: xen-devel

* Ryan Harper <ryanh@us.ibm.com> [2007-04-09 20:10]:
> For post-3.0.5 inclusion:
> 
> Add new domctl hypercall to expose current heap values.  This functionality is
> needed for probing how much memory is available in a given node prior to VM
> creation.

Refreshed to changeset:   15200:bd3d6b4c52ec

-- 
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253   T/L: 678-9253
ryanh@us.ibm.com


diffstat output:
 tools/libxc/xc_domain.c     |   28 ++++++++++++++++++++++++++++
 tools/libxc/xenctrl.h       |   16 ++++++++++++++++
 xen/common/domctl.c         |   35 +++++++++++++++++++++++++++++++++++
 xen/common/page_alloc.c     |    9 +--------
 xen/include/public/domctl.h |   16 ++++++++++++++++
 xen/include/xen/mm.h        |   10 ++++++++++
 6 files changed, 106 insertions(+), 8 deletions(-)

Signed-off-by: Ryan Harper <ryanh@us.ibm.com>
---
Add new domctl hypercall to expose current heap values.  This functionality is
needed for probing how much memory is available in a given node prior to VM
creation.

Signed-off-by: Ryan Harper <ryanh@us.ibm.com>

diff -r 48cbb32df526 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c	Thu Mar 29 21:55:42 2007 -0500
+++ b/tools/libxc/xc_domain.c	Fri Mar 30 09:41:40 2007 -0500
@@ -584,6 +584,34 @@ int xc_domain_ioport_permission(int xc_h
     domctl.u.ioport_permission.allow_access = allow_access;
 
     return do_domctl(xc_handle, &domctl);
+}
+
+int xc_availheap(int xc_handle,
+                 int zone_lo,
+                 int zone_hi,
+                 int node,
+                 uint32_t *nr_zones,
+                 uint32_t *nr_nodes,
+                 uint64_t *pages)
+{
+    DECLARE_DOMCTL;
+    int rc = 0;
+
+    domctl.cmd = XEN_DOMCTL_availheap;
+    domctl.u.availheap.zone_lo = zone_lo;
+    domctl.u.availheap.zone_hi = zone_hi;
+    domctl.u.availheap.node = node;
+
+    rc = do_domctl(xc_handle, &domctl);
+    if ( rc >= 0 ) {
+        if (nr_zones)
+            *nr_zones = domctl.u.availheap.nr_zones;
+        if (nr_nodes)
+            *nr_nodes = domctl.u.availheap.nr_nodes;
+        *pages = domctl.u.availheap.pages;
+    }
+
+    return rc;
 }
 
 int xc_vcpu_setcontext(int xc_handle,
diff -r 48cbb32df526 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h	Thu Mar 29 21:55:42 2007 -0500
+++ b/tools/libxc/xenctrl.h	Fri Mar 30 09:44:28 2007 -0500
@@ -611,6 +611,22 @@ int xc_get_pfn_type_batch(int xc_handle,
 /* Get current total pages allocated to a domain. */
 long xc_get_tot_pages(int xc_handle, uint32_t domid);
 
+/**
+ * This function retrieves the the number of pages available
+ * in the heap in a specific range of zones and nodes.
+ * 
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain to query
+ * @parm zone_lo the starting zone to query
+ * @parm zone_lo the last zone to query
+ * @parm node the node to query
+ * @parm *nr_zones caller variable to put number of zones queried
+ * @parm *nr_nodes caller variable to put number of nodes queried
+ * @parm *pages caller variable to put total pages counted
+ * @return 0 on success, <0 on failure.
+ */
+int xc_availheap(int xc_handle, int zone_lo, int zone_hi, int node,
+                 uint32_t *nr_zones, uint32_t *nr_nodes, uint64_t *pages);
 
 /*
  * Trace Buffer Operations
diff -r 48cbb32df526 xen/common/domctl.c
--- a/xen/common/domctl.c	Thu Mar 29 21:55:42 2007 -0500
+++ b/xen/common/domctl.c	Fri Mar 30 10:02:01 2007 -0500
@@ -24,6 +24,8 @@
 #include <asm/current.h>
 #include <public/domctl.h>
 #include <acm/acm_hooks.h>
+#include <asm/numa.h>
+#include <xen/nodemask.h>
 
 extern long arch_do_domctl(
     struct xen_domctl *op, XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
@@ -711,6 +713,39 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
     }
     break;
 
+    case XEN_DOMCTL_availheap:
+    { 
+        int zone_lo = 0, zone_hi = NR_ZONES-1;
+
+        ret = -EINVAL;
+        if ( op->u.availheap.node >= num_online_nodes() )
+            break;
+        if ( op->u.availheap.zone_lo >= NR_ZONES )
+            break;
+        if ( op->u.availheap.zone_lo > op->u.availheap.zone_hi )
+            break;
+
+        if ( op->u.availheap.zone_lo > 0 )
+           zone_lo = op->u.availheap.zone_lo;
+        if ( op->u.availheap.zone_hi >= 0 && op->u.availheap.zone_hi < NR_ZONES )
+           zone_hi = op->u.availheap.zone_hi;
+
+        op->u.availheap.nr_zones = zone_hi - zone_lo + 1;
+        
+        ( op->u.availheap.node < 0 ) ?
+            (op->u.availheap.nr_nodes=num_online_nodes()) :
+            (op->u.availheap.nr_nodes=1);
+
+        op->u.availheap.pages =
+            avail_heap_pages(zone_lo, zone_hi, op->u.availheap.node);
+
+        if ( copy_to_guest(u_domctl, op, 1) )
+            ret = -EFAULT;
+        else
+            ret = 0;
+    }
+    break;
+
     default:
         ret = arch_do_domctl(op, u_domctl);
         break;
diff -r 48cbb32df526 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c	Thu Mar 29 21:55:42 2007 -0500
+++ b/xen/common/page_alloc.c	Fri Mar 30 10:00:25 2007 -0500
@@ -310,13 +310,6 @@ unsigned long alloc_boot_pages(
  * BINARY BUDDY ALLOCATOR
  */
 
-#define MEMZONE_XEN 0
-#ifdef PADDR_BITS
-#define NR_ZONES    (PADDR_BITS - PAGE_SHIFT)
-#else
-#define NR_ZONES    (BITS_PER_LONG - PAGE_SHIFT)
-#endif
-
 #define pfn_dom_zone_type(_pfn) (fls(_pfn) - 1)
 
 typedef struct list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
@@ -544,7 +537,7 @@ void init_heap_pages(
     }
 }
 
-static unsigned long avail_heap_pages(
+unsigned long avail_heap_pages(
     unsigned int zone_lo, unsigned int zone_hi, unsigned int node)
 {
     unsigned int i, zone, num_nodes = num_online_nodes();
diff -r 48cbb32df526 xen/include/public/domctl.h
--- a/xen/include/public/domctl.h	Thu Mar 29 21:55:42 2007 -0500
+++ b/xen/include/public/domctl.h	Thu Mar 29 22:29:43 2007 -0500
@@ -389,6 +389,21 @@ typedef struct xen_domctl_settimeoffset 
 typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t);
 
+
+#define XEN_DOMCTL_availheap        37 
+struct xen_domctl_availheap {
+    /* in  */
+    int zone_lo;             /* starting zone */
+    int zone_hi;             /* ending zone, -1 for zone_lo to NR_ZONES */
+    int node;                /* query available pages in node, -1 for all */
+    /* out */
+    uint32_t nr_zones;    /* number of zones queried */
+    uint32_t nr_nodes;    /* number of nodes queried */
+    uint64_t pages;
+};
+typedef struct xen_domctl_availheap xen_domctl_availheap_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_availheap_t);
+
  
 #define XEN_DOMCTL_gethvmcontext     33
 #define XEN_DOMCTL_sethvmcontext     34
@@ -457,6 +472,7 @@ struct xen_domctl {
         struct xen_domctl_hvmcontext        hvmcontext;
         struct xen_domctl_address_size      address_size;
         struct xen_domctl_sendtrigger       sendtrigger;
+        struct xen_domctl_availheap         availheap;
         uint8_t                             pad[128];
     } u;
 };
diff -r 48cbb32df526 xen/include/xen/mm.h
--- a/xen/include/xen/mm.h	Thu Mar 29 21:55:42 2007 -0500
+++ b/xen/include/xen/mm.h	Fri Mar 30 10:01:02 2007 -0500
@@ -33,6 +33,13 @@
 #include <xen/list.h>
 #include <xen/spinlock.h>
 
+#define MEMZONE_XEN 0
+#ifdef PADDR_BITS
+#define NR_ZONES    (PADDR_BITS - PAGE_SHIFT)
+#else
+#define NR_ZONES    (BITS_PER_LONG - PAGE_SHIFT)
+#endif
+
 struct domain;
 struct page_info;
 
@@ -64,6 +71,9 @@ unsigned long avail_domheap_pages(void);
 unsigned long avail_domheap_pages(void);
 #define alloc_domheap_page(d) (alloc_domheap_pages(d,0,0))
 #define free_domheap_page(p)  (free_domheap_pages(p,0))
+
+unsigned long avail_heap_pages(
+    unsigned int zone_lo, unsigned int zone_hi, unsigned int node);
 
 void scrub_heap_pages(void);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [RFC][PATCH 2/2] Add hcall to probe Xen heap
  2007-06-06 16:07 ` Ryan Harper
@ 2007-06-17 16:44   ` Subrahmanian, Raj
  2007-06-17 17:00     ` Keir Fraser
  2007-06-18 15:51     ` Ryan Harper
  2007-07-06 15:15   ` Keir Fraser
  1 sibling, 2 replies; 10+ messages in thread
From: Subrahmanian, Raj @ 2007-06-17 16:44 UTC (permalink / raw)
  To: Ryan Harper, xen-devel

Ryan,
I applied these patches against 15200 and was able to boot on a 4-cell
ES7000 with numa=on.
Boots fine. I brought up a few VMs with some loads on them. That seems
to work fine as well.
Were there any other data points that you would like me to check out?

I was able to see the node info from xm info
node_to_cpu            : node0:24-31
                         node1:16-23
                         node2:8-15
                         node3:0-7

If you could give me an updated set of patches against the tip, we can
run our weekly tests with numa=on.

Thanks
Raj

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC][PATCH 2/2] Add hcall to probe Xen heap
  2007-06-17 16:44   ` Subrahmanian, Raj
@ 2007-06-17 17:00     ` Keir Fraser
  2007-06-18 13:27       ` Subrahmanian, Raj
  2007-06-18 15:51     ` Ryan Harper
  1 sibling, 1 reply; 10+ messages in thread
From: Keir Fraser @ 2007-06-17 17:00 UTC (permalink / raw)
  To: Subrahmanian, Raj, Ryan Harper, xen-devel

The patches are only to provide information via the toolstack, so there
should be no difference in behaviour with or without them. You should be
able to run numa=on with unstable tip just fine.

 -- Keir

On 17/6/07 17:44, "Subrahmanian, Raj" <raj.subrahmanian@unisys.com> wrote:

> Ryan,
> I applied these patches against 15200 and was able to boot on a 4-cell
> ES7000 with numa=on.
> Boots fine. I brought up a few VMs with some loads on them. That seems
> to work fine as well.
> Were there any other data points that you would like me to check out?
> 
> I was able to see the node info from xm info
> node_to_cpu            : node0:24-31
>                          node1:16-23
>                          node2:8-15
>                          node3:0-7
> 
> If you could give me an updated set of patches against the tip, we can
> run our weekly tests with numa=on.
> 
> Thanks
> Raj
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [RFC][PATCH 2/2] Add hcall to probe Xen heap
  2007-06-17 17:00     ` Keir Fraser
@ 2007-06-18 13:27       ` Subrahmanian, Raj
  0 siblings, 0 replies; 10+ messages in thread
From: Subrahmanian, Raj @ 2007-06-18 13:27 UTC (permalink / raw)
  To: Keir Fraser, Ryan Harper, xen-devel

>The patches are only to provide information via the toolstack, 
>so there should be no difference in behaviour with or without 
>them. You should be able to run numa=on with unstable tip just fine.
Yes, of course. My apologies. I don't know what I was thinking :-)

Raj

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC][PATCH 2/2] Add hcall to probe Xen heap
  2007-06-17 16:44   ` Subrahmanian, Raj
  2007-06-17 17:00     ` Keir Fraser
@ 2007-06-18 15:51     ` Ryan Harper
  2007-06-19 22:30       ` Subrahmanian, Raj
  1 sibling, 1 reply; 10+ messages in thread
From: Ryan Harper @ 2007-06-18 15:51 UTC (permalink / raw)
  To: Subrahmanian, Raj; +Cc: Ryan Harper, xen-devel

* Subrahmanian, Raj <raj.subrahmanian@unisys.com> [2007-06-17 11:46]:
> Ryan,
> I applied these patches against 15200 and was able to boot on a 4-cell
> ES7000 with numa=on.
> Boots fine. I brought up a few VMs with some loads on them. That seems
> to work fine as well.

Great! Thanks for testing the patches out.

> Were there any other data points that you would like me to check out?
> 
> I was able to see the node info from xm info
> node_to_cpu            : node0:24-31
>                          node1:16-23
>                          node2:8-15
>                          node3:0-7

Yeah, in xm info, I'd like to see:

nr_cpus
nr_nodes
sockets_per_node
cores_per_socket
threads_per_core

I believe I've fixed up the sockets_per_node calculation, but the more
machines we test on the sooner we know for sure.

> 
> If you could give me an updated set of patches against the tip, we can
> run our weekly tests with numa=on.

As Keir mentioned, you can still test with numa=on without the patches.
The new patches cover how the topology is exposed to userspace.  In any
case, the latest versions are posted [1]here and [2]here.


1. http://lists.xensource.com/archives/html/xen-devel/2007-06/msg00298.html
2. http://lists.xensource.com/archives/html/xen-devel/2007-06/msg00299.html

-- 
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253   T/L: 678-9253
ryanh@us.ibm.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [RFC][PATCH 2/2] Add hcall to probe Xen heap
  2007-06-18 15:51     ` Ryan Harper
@ 2007-06-19 22:30       ` Subrahmanian, Raj
  2007-06-19 22:39         ` Ryan Harper
  0 siblings, 1 reply; 10+ messages in thread
From: Subrahmanian, Raj @ 2007-06-19 22:30 UTC (permalink / raw)
  To: Ryan Harper; +Cc: xen-devel

Ryan
>> I was able to see the node info from xm info
>> node_to_cpu            : node0:24-31
>>                          node1:16-23
>>                          node2:8-15
>>                          node3:0-7
>
>Yeah, in xm info, I'd like to see:
>
>nr_cpus
>nr_nodes
>sockets_per_node
>cores_per_socket
>threads_per_core
>
nr_cpus                : 32
nr_nodes               : 4
sockets_per_node       : 8
cores_per_socket       : 1
threads_per_core       : 1
cpu_mhz                : 3400
hw_caps                :
bfebfbff:20100800:00000000:00000180:0000e5bd:00000000:0
0000001
total_memory           : 130943
free_memory            : 128740
node_to_cpu            : node0:24-31
                         node1:16-23
                         node2:8-15
                         node3:0-7

The cores per socket and sockets per node are off.
These are dual-core, 4 socket nodes.

Raj

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC][PATCH 2/2] Add hcall to probe Xen heap
  2007-06-19 22:30       ` Subrahmanian, Raj
@ 2007-06-19 22:39         ` Ryan Harper
  0 siblings, 0 replies; 10+ messages in thread
From: Ryan Harper @ 2007-06-19 22:39 UTC (permalink / raw)
  To: Subrahmanian, Raj; +Cc: Ryan Harper, xen-devel

* Subrahmanian, Raj <raj.subrahmanian@unisys.com> [2007-06-19 17:32]:
> Ryan
> >> I was able to see the node info from xm info
> >> node_to_cpu            : node0:24-31
> >>                          node1:16-23
> >>                          node2:8-15
> >>                          node3:0-7
> >
> >Yeah, in xm info, I'd like to see:
> >
> >nr_cpus
> >nr_nodes
> >sockets_per_node
> >cores_per_socket
> >threads_per_core
> >
> nr_cpus                : 32
> nr_nodes               : 4
> sockets_per_node       : 8
> cores_per_socket       : 1
> threads_per_core       : 1
> cpu_mhz                : 3400
> hw_caps                :
> bfebfbff:20100800:00000000:00000180:0000e5bd:00000000:0
> 0000001
> total_memory           : 130943
> free_memory            : 128740
> node_to_cpu            : node0:24-31
>                          node1:16-23
>                          node2:8-15
>                          node3:0-7
> 
> The cores per socket and sockets per node are off.
> These are dual-core, 4 socket nodes.

Hrm, ok.  I think if the cores_per_socket were fixed, then the
sockets_per_node calc would fix itself.  I've not touched the
cores_per_socket calculation; that will need some attention, but I
assume that value has always been incorrect, ie, not a NUMA issue.

-- 
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253   T/L: 678-9253
ryanh@us.ibm.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC][PATCH 2/2] Add hcall to probe Xen heap
  2007-06-06 16:07 ` Ryan Harper
  2007-06-17 16:44   ` Subrahmanian, Raj
@ 2007-07-06 15:15   ` Keir Fraser
  2007-07-10 20:34     ` Ryan Harper
  1 sibling, 1 reply; 10+ messages in thread
From: Keir Fraser @ 2007-07-06 15:15 UTC (permalink / raw)
  To: Ryan Harper, xen-devel

First, it should be a sysctl, not a domctl. Second, please add the zone
range checks into the function you added to page_alloc.c. This will avoid
making use of allocator-private defines outside of page_alloc.c (so you can
avoid moving NR_ZONES into mm.h.

By the way, the first patch of your pair is now checked in, after minor
tidying.

 -- Keir

On 6/6/07 17:07, "Ryan Harper" <ryanh@us.ibm.com> wrote:

> * Ryan Harper <ryanh@us.ibm.com> [2007-04-09 20:10]:
>> For post-3.0.5 inclusion:
>> 
>> Add new domctl hypercall to expose current heap values.  This functionality
>> is
>> needed for probing how much memory is available in a given node prior to VM
>> creation.
> 
> Refreshed to changeset:   15200:bd3d6b4c52ec

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC][PATCH 2/2] Add hcall to probe Xen heap
  2007-07-06 15:15   ` Keir Fraser
@ 2007-07-10 20:34     ` Ryan Harper
  0 siblings, 0 replies; 10+ messages in thread
From: Ryan Harper @ 2007-07-10 20:34 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel, ryanh

[-- Attachment #1: Type: text/plain, Size: 834 bytes --]

* Keir Fraser <keir@xensource.com> [2007-07-06 10:17]:
> First, it should be a sysctl, not a domctl. Second, please add the zone
> range checks into the function you added to page_alloc.c. This will avoid
> making use of allocator-private defines outside of page_alloc.c (so you can
> avoid moving NR_ZONES into mm.h.

We need access to NR_ZONES so we can calculate zone_hi limit as the user
might not have put in a value for zone_hi, and NR_ZONES would truncate
that.  I've moved the calculation into avail_heap_pages(), but had to
change the signature to take a pointer to the nr_zone value that will be
calculated.  I don't really like it that much so I'm open to suggestions
on a different approach.

-- 
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253   T/L: 678-9253
ryanh@us.ibm.com

[-- Attachment #2: add_availheap_hcall.patch --]
[-- Type: text/plain, Size: 8408 bytes --]

Add new domctl hypercall to expose current heap values.  This functionality is
needed for probing how much memory is available in a given node prior to VM
creation.

Signed-off-by: Ryan Harper <ryanh@us.ibm.com>

diff -r 27e993c80ceb tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c	Mon Jul 09 09:47:20 2007 +0100
+++ b/tools/libxc/xc_domain.c	Tue Jul 10 14:02:53 2007 -0500
@@ -586,6 +586,34 @@ int xc_domain_ioport_permission(int xc_h
     domctl.u.ioport_permission.allow_access = allow_access;
 
     return do_domctl(xc_handle, &domctl);
+}
+
+int xc_availheap(int xc_handle,
+                 int zone_lo,
+                 int zone_hi,
+                 int node,
+                 uint32_t *nr_zones,
+                 uint32_t *nr_nodes,
+                 uint64_t *pages)
+{
+    DECLARE_SYSCTL;
+    int rc = 0;
+
+    sysctl.cmd = XEN_SYSCTL_availheap;
+    sysctl.u.availheap.zone_lo = zone_lo;
+    sysctl.u.availheap.zone_hi = zone_hi;
+    sysctl.u.availheap.node = node;
+
+    rc = xc_sysctl(xc_handle, &sysctl);
+    if ( rc >= 0 ) {
+        if (nr_zones)
+            *nr_zones = sysctl.u.availheap.nr_zones;
+        if (nr_nodes)
+            *nr_nodes = sysctl.u.availheap.nr_nodes;
+        *pages = sysctl.u.availheap.pages;
+    }
+
+    return rc;
 }
 
 int xc_vcpu_setcontext(int xc_handle,
diff -r 27e993c80ceb tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h	Mon Jul 09 09:47:20 2007 +0100
+++ b/tools/libxc/xenctrl.h	Tue Jul 10 14:02:53 2007 -0500
@@ -616,6 +616,22 @@ int xc_get_pfn_type_batch(int xc_handle,
 /* Get current total pages allocated to a domain. */
 long xc_get_tot_pages(int xc_handle, uint32_t domid);
 
+/**
+ * This function retrieves the the number of pages available
+ * in the heap in a specific range of zones and nodes.
+ * 
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain to query
+ * @parm zone_lo the starting zone to query
+ * @parm zone_lo the last zone to query
+ * @parm node the node to query
+ * @parm *nr_zones caller variable to put number of zones queried
+ * @parm *nr_nodes caller variable to put number of nodes queried
+ * @parm *pages caller variable to put total pages counted
+ * @return 0 on success, <0 on failure.
+ */
+int xc_availheap(int xc_handle, int zone_lo, int zone_hi, int node,
+                 uint32_t *nr_zones, uint32_t *nr_nodes, uint64_t *pages);
 
 /*
  * Trace Buffer Operations
diff -r 27e993c80ceb xen/common/page_alloc.c
--- a/xen/common/page_alloc.c	Mon Jul 09 09:47:20 2007 +0100
+++ b/xen/common/page_alloc.c	Tue Jul 10 14:09:28 2007 -0500
@@ -569,8 +569,9 @@ void init_heap_pages(
     }
 }
 
-static unsigned long avail_heap_pages(
-    unsigned int zone_lo, unsigned int zone_hi, unsigned int node)
+unsigned long avail_heap_pages(
+    unsigned int zone_lo, unsigned int zone_hi, unsigned int node,
+    unsigned int *nr_zones)
 {
     unsigned int i, zone, num_nodes = num_online_nodes();
     unsigned long free_pages = 0;
@@ -586,6 +587,9 @@ static unsigned long avail_heap_pages(
             if ( (node == -1) || (node == i) )
                 free_pages += avail[i][zone];
     }
+
+    if (nr_zones)
+       *nr_zones = zone_hi - zone_lo + 1;
 
     return free_pages;
 }
@@ -834,7 +838,7 @@ struct page_info *__alloc_domheap_pages(
              ((order > MAX_ORDER) ||
               (avail_heap_pages(MEMZONE_XEN + 1,
                                 dma_bitsize - PAGE_SHIFT - 1,
-                                -1) <
+                                -1, NULL) <
                (dma_emergency_pool_pages + (1UL << order)))) )
             return NULL;
     }
@@ -934,11 +938,11 @@ unsigned long avail_domheap_pages(void)
     
     avail_nrm = avail_heap_pages(dma_bitsize - PAGE_SHIFT,
                                  NR_ZONES - 1,
-                                 -1);
+                                 -1, NULL);
 
     avail_dma = avail_heap_pages(MEMZONE_XEN + 1,
                                  dma_bitsize - PAGE_SHIFT - 1,
-                                 -1);
+                                 -1, NULL);
 
     if ( avail_dma > dma_emergency_pool_pages )
         avail_dma -= dma_emergency_pool_pages;
@@ -950,7 +954,7 @@ unsigned long avail_domheap_pages(void)
 
 unsigned long avail_nodeheap_pages(int node)
 {
-    return avail_heap_pages(0, NR_ZONES - 1, node);
+    return avail_heap_pages(0, NR_ZONES - 1, node, NULL);
 }
 
 static void pagealloc_keyhandler(unsigned char key)
@@ -960,7 +964,7 @@ static void pagealloc_keyhandler(unsigne
 
     printk("Physical memory information:\n");
     printk("    Xen heap: %lukB free\n",
-           avail_heap_pages(zone, zone, -1) << (PAGE_SHIFT-10));
+           avail_heap_pages(zone, zone, -1, NULL) << (PAGE_SHIFT-10));
 
     while ( ++zone < NR_ZONES )
     {
@@ -972,7 +976,7 @@ static void pagealloc_keyhandler(unsigne
             total = 0;
         }
 
-        if ( (n = avail_heap_pages(zone, zone, -1)) != 0 )
+        if ( (n = avail_heap_pages(zone, zone, -1, NULL)) != 0 )
         {
             total += n;
             printk("    heap[%02u]: %lukB free\n", zone, n << (PAGE_SHIFT-10));
diff -r 27e993c80ceb xen/common/sysctl.c
--- a/xen/common/sysctl.c	Mon Jul 09 09:47:20 2007 +0100
+++ b/xen/common/sysctl.c	Tue Jul 10 14:09:59 2007 -0500
@@ -21,6 +21,8 @@
 #include <xen/keyhandler.h>
 #include <asm/current.h>
 #include <public/sysctl.h>
+#include <asm/numa.h>
+#include <xen/nodemask.h>
 
 extern long arch_do_sysctl(
     struct xen_sysctl *op, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl);
@@ -166,6 +168,31 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
 
         if ( copy_to_guest(u_sysctl, op, 1) )
             ret = -EFAULT;
+    }
+    break;
+
+    case XEN_SYSCTL_availheap:
+    { 
+        ret = -EINVAL;
+
+        if ( op->u.availheap.node >= num_online_nodes() )
+            break;
+        if ( op->u.availheap.zone_lo > op->u.availheap.zone_hi )
+            break;
+
+        ( op->u.availheap.node < 0 ) ?
+            (op->u.availheap.nr_nodes=num_online_nodes()) :
+            (op->u.availheap.nr_nodes=1);
+
+        op->u.availheap.pages = avail_heap_pages(op->u.availheap.zone_lo,
+                                                 op->u.availheap.zone_hi,
+                                                 op->u.availheap.node,
+                                                 &(op->u.availheap.nr_zones));
+
+        if ( copy_to_guest(u_sysctl, op, 1) )
+            ret = -EFAULT;
+        else
+            ret = 0;
     }
     break;
 
diff -r 27e993c80ceb xen/include/public/sysctl.h
--- a/xen/include/public/sysctl.h	Mon Jul 09 09:47:20 2007 +0100
+++ b/xen/include/public/sysctl.h	Tue Jul 10 14:02:53 2007 -0500
@@ -185,6 +185,20 @@ typedef struct xen_sysctl_getcpuinfo xen
 typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t); 
 
+#define XEN_SYSCTL_availheap        9
+struct xen_sysctl_availheap {
+    /* in  */
+    int zone_lo;             /* starting zone */
+    int zone_hi;             /* ending zone, -1 for zone_lo to NR_ZONES */
+    int node;                /* query available pages in node, -1 for all */
+    /* out */
+    uint32_t nr_zones;    /* number of zones queried */
+    uint32_t nr_nodes;    /* number of nodes queried */
+    uint64_t pages;
+};
+typedef struct xen_sysctl_availheap xen_sysctl_availheap_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_availheap_t);
+ 
 struct xen_sysctl {
     uint32_t cmd;
     uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
@@ -197,6 +211,7 @@ struct xen_sysctl {
         struct xen_sysctl_getdomaininfolist getdomaininfolist;
         struct xen_sysctl_debug_keys        debug_keys;
         struct xen_sysctl_getcpuinfo        getcpuinfo;
+        struct xen_sysctl_availheap         availheap;
         uint8_t                             pad[128];
     } u;
 };
diff -r 27e993c80ceb xen/include/xen/mm.h
--- a/xen/include/xen/mm.h	Mon Jul 09 09:47:20 2007 +0100
+++ b/xen/include/xen/mm.h	Tue Jul 10 14:08:03 2007 -0500
@@ -65,6 +65,10 @@ unsigned long avail_domheap_pages(void);
 #define alloc_domheap_page(d) (alloc_domheap_pages(d,0,0))
 #define free_domheap_page(p)  (free_domheap_pages(p,0))
 
+unsigned long avail_heap_pages(
+    unsigned int zone_lo, unsigned int zone_hi, unsigned int node,
+    unsigned int *nr_zones);
+
 void scrub_heap_pages(void);
 
 int assign_pages(

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2007-07-10 20:34 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-04-10  1:09 [RFC][PATCH 2/2] Add hcall to probe Xen heap Ryan Harper
2007-06-06 16:07 ` Ryan Harper
2007-06-17 16:44   ` Subrahmanian, Raj
2007-06-17 17:00     ` Keir Fraser
2007-06-18 13:27       ` Subrahmanian, Raj
2007-06-18 15:51     ` Ryan Harper
2007-06-19 22:30       ` Subrahmanian, Raj
2007-06-19 22:39         ` Ryan Harper
2007-07-06 15:15   ` Keir Fraser
2007-07-10 20:34     ` Ryan Harper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.