All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ryan Harper <ryanh@us.ibm.com>
To: xen-devel@lists.xensource.com
Subject: [PATCH 0/6] xen,xend,tools: NUMA support for Xen
Date: Tue, 11 Jul 2006 10:35:47 -0500	[thread overview]
Message-ID: <20060711153547.GF1694@us.ibm.com> (raw)

Reposting the latest patches, no significant changes since May when I
last received feedback.  I've done some simple overhead and performance
numbers for these patches.

Measuring NUMA allocator patch overhead via balloon driver and DOM0
involved starting DOM0 with all memory (4G) and ballooning down (256M)
as a starting point, [1]tracking the time it takes to balloon back up
to full memory.  The test was done with and without NUMA patches.

With NUMA:

Try1: 911ms
Try2: 907ms
Try3: 910ms

Without NUMA:

Try1: 606ms
Try2: 604ms
Try3: 608ms


Measuring NUMA allocator patch overhead via increase_reservation memory op
during domain creation we [2]modified the increase_reservation op to
track the time using get_s_time() from start to finish and sampled the
times for various memory sizes.

With NUMA:

MemSize  128M 512M 1G   2G    3G    
-------------------------------------
Try1:    6ms  26ms 53ms 221ms 390ms
Try2:    6ms  26ms 48ms 212ms 390ms
Try3:    6ms  26ms 48ms 212ms 390ms

Without NUMA:
MemSize  128M 512M 1G   2G    3G    
-------------------------------------
Try1:    4ms  16ms 25ms 70ms  100ms
Try2:    3ms  14ms 28ms 56ms  109ms
Try3:    3ms  14ms 23ms 56ms   95ms


Using a microbenchmark which mallocs memory and touches each byte, we
can observe the affects of local memory versus remote.  The domain is
created with 1GB memory, and 1 VCPU coming from the same node.  The
microbenchmark forks off one child per-VCPU and malloc/memsets 512M
buffer each.  We then compare worst-case (All non-local memory) and
best-case (all-local).

Machine Topology:
node_to_cpu            : node0:0
                         node1:1

Domain's memory placement
(bebop) tmp # xen_numastat -d 8
DOM8: NODE0: PAGES: 5
DOM8: NODE1: PAGES: 262144

Domain's vcpu placement
(bebop) tmp # xm vcpu-list 8
Name                              ID  VCPU  CPU  State  Time(s)  CPU
Affinity
hungerforce                        8     0    0   -b-      12.0  0

All-remote memory:
root@amd64-domU:/usr/src # while true; do ./memwrite -j1 -m512M; sleep 1; done
Time to write '0' to 512.000 MiB 1168711 usecs.  Throughput: 438.090 MiB/sec
Time to write '0' to 512.000 MiB 1175179 usecs.  Throughput: 435.678 MiB/sec
Time to write '0' to 512.000 MiB 1172454 usecs.  Throughput: 436.691 MiB/sec
Time to write '0' to 512.000 MiB 1170378 usecs.  Throughput: 437.466 MiB/sec

Domain's vcpu placement
(bebop) tmp # xm vcpu-list 8
Name                              ID  VCPU  CPU  State  Time(s)  CPU
Affinity
hungerforce                        8     0    0   -b-      15.9  1

All-local memory:
root@amd64-domU:/usr/src # while true; do ./memwrite -j1 -m512M; sleep 1; done
Time to write '0' to 512.000 MiB 759186 usecs.  Throughput: 674.406 MiB/sec
Time to write '0' to 512.000 MiB 765143 usecs.  Throughput: 669.156 MiB/sec
Time to write '0' to 512.000 MiB 768462 usecs.  Throughput: 666.266 MiB/sec
Time to write '0' to 512.000 MiB 763406 usecs.  Throughput: 670.679 MiB/sec


1.  diff -r ae245d35457b linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c	Wed Jun 28 12:59:29 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c	Mon Jun 26 14:54:10 2006
@@ -44,6 +44,7 @@
 #include <linux/bootmem.h>
 #include <linux/highmem.h>
 #include <linux/vmalloc.h>
+#include <linux/time.h>
 #include <xen/xen_proc.h>
 #include <asm/hypervisor.h>
 #include <xen/balloon.h>
@@ -63,6 +64,9 @@
 #endif
 
 static DECLARE_MUTEX(balloon_mutex);
+
+static struct timeval start, stop, delay;
+static int calc_delay = 0;
 
 /*
  * Protects atomic reservation decrease/increase against concurrent increases.
@@ -337,6 +341,14 @@
 	return need_sleep;
 }
 
+/** Convert to milliseconds */
+static inline __u64 tv_to_ms(const struct timeval* tv)
+{
+        __u64 ms = tv->tv_usec / 1000;
+        ms += (__u64)tv->tv_sec * (__u64)1000;
+        return ms;
+}
+
 /*
  * We avoid multiple worker processes conflicting via the balloon mutex.
  * We may of course race updates of the target counts (which are protected
@@ -350,6 +362,11 @@
 
 	down(&balloon_mutex);
 
+	if (calc_delay) {
+		do_gettimeofday(&delay);
+		calc_delay = 0;
+	}
+ 
 	do {
 		credit = current_target() - current_pages;
 		if (credit > 0)
@@ -366,6 +383,13 @@
 	/* Schedule more work if there is some still to be done. */
 	if (current_target() != current_pages)
 		mod_timer(&balloon_timer, jiffies + HZ);
+	else {
+		/* We've hit target, notify completion */
+		do_gettimeofday(&stop);
+		printk(KERN_WARNING "Ballooning complete.  startup delay: %lums", 
+					tv_to_ms(&delay)-tv_to_ms(&start));
+		printk(" total time: %lums\n", tv_to_ms(&stop)-tv_to_ms(&start));
+	}
 
 	up(&balloon_mutex);
 }
@@ -376,6 +400,11 @@
 	/* No need for lock. Not read-modify-write updates. */
 	hard_limit   = ~0UL;
 	target_pages = target;
+
+	/* note start time of balloon process */
+	do_gettimeofday(&start);
+	calc_delay = 1;
+
 	schedule_work(&balloon_worker);
 }

2.  diff -r c257ac74b5c7 xen/common/memory.c
--- a/xen/common/memory.c	Tue Jul  4 16:31:13 2006
+++ b/xen/common/memory.c	Wed Jul  5 12:12:43 2006
@@ -27,6 +27,10 @@
  * high-order bits of the @cmd parameter, which are otherwise unused and zero.
  */
 #define START_EXTENT_SHIFT 4 /* cmd[:4] == start_extent */
+
+static int calc_start = 1;
+static unsigned long initial_extent;
+static s_time_t start, stop;
 
 static long
 increase_reservation(
@@ -574,6 +578,13 @@
         switch ( op )
         {
         case XENMEM_increase_reservation:
+            if ( calc_start ) {
+                 printk("WARK: calcing start time on IR of %lu pages\n",
+                         reservation.nr_extents);
+                 initial_extent = reservation.nr_extents;
+                 start = get_s_time();
+                 calc_start = 0;
+            }
             rc = increase_reservation(
                 d,
                 reservation.extent_start,
@@ -612,6 +623,12 @@
                 __HYPERVISOR_memory_op, "lh",
                 op | (rc << START_EXTENT_SHIFT), arg);
 
+        if ( op == XENMEM_increase_reservation ) {
+            stop = get_s_time();
+            printk("WARK: increase_reservation of %lu pages took->%lums\n", 
+                   initial_extent, ((u64)stop-(u64)start)/1000000 );
+            calc_start = 1;
+        }
         break;
 
     case XENMEM_exchange:
 
-- 
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253   T/L: 678-9253
ryanh@us.ibm.com

             reply	other threads:[~2006-07-11 15:35 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-07-11 15:35 Ryan Harper [this message]
2006-07-11 15:57 ` [PATCH 0/6] xen,xend,tools: NUMA support for Xen Keir Fraser
2006-07-11 17:47   ` Ryan Harper
  -- strict thread matches above, loose matches on Subject: below --
2006-07-11 16:40 Lu, Yinghai
2006-07-11 21:28 Ian Pratt
2006-07-12  1:23 ` Ryan Harper
2006-07-12 20:30   ` Ryan Harper

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20060711153547.GF1694@us.ibm.com \
    --to=ryanh@us.ibm.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.