public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 7/7] Simple Performance Counters: SLUB instrumentation
@ 2007-07-31 23:25 Christoph Lameter
  2007-08-17 16:21 ` Mathieu Desnoyers
  0 siblings, 1 reply; 5+ messages in thread
From: Christoph Lameter @ 2007-07-31 23:25 UTC (permalink / raw)
  To: linux-kernel; +Cc: Christoph Lameter

With this patch SLUB will perform tests on bootup and display results.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
 mm/slub.c |   97 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 files changed, 92 insertions(+), 5 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 6c6d74f..568b16a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -20,6 +20,7 @@
 #include <linux/mempolicy.h>
 #include <linux/ctype.h>
 #include <linux/kallsyms.h>
+#include <linux/perf.h>
 
 /*
  * Lock order:
@@ -152,6 +153,7 @@ static inline void ClearSlabDebug(struct page *page)
 
 /* Enable to test recovery from slab corruption on boot */
 #undef SLUB_RESILIENCY_TEST
+#undef SLUB_PERFORMANCE_TEST
 
 #if PAGE_SHIFT <= 12
 
@@ -2870,9 +2872,97 @@ static long validate_slab_cache(struct kmem_cache *s)
 	return count;
 }
 
-#ifdef SLUB_RESILIENCY_TEST
 static void resiliency_test(void)
 {
+#ifdef SLUB_PERFORMANCE_TEST
+#define TEST_COUNT 10000
+	int size, i;
+	struct pc x;
+	void **v = kmalloc(TEST_COUNT * sizeof(void *), GFP_KERNEL);
+
+	printk(KERN_INFO "SLUB Performance testing\n");
+	printk(KERN_INFO "========================\n");
+	printk(KERN_INFO "1. Kmalloc: Repeatedly allocate then free test\n");
+	for (size = 8; size <= PAGE_SIZE << 2; size <<= 1) {
+		pc_start(&x);
+		for(i = 0; i < TEST_COUNT; i++) {
+			v[i] = kmalloc(size, GFP_KERNEL);
+		}
+		printk(KERN_INFO "%i times kmalloc(%d) = ", i, size);
+		pc_stop_printk(&x);
+		pc_start(&x);
+		for(i = 0; i < TEST_COUNT; i++)
+			kfree(v[i]);
+		printk(" kfree() = ");
+		pc_stop_printk(&x);
+		printk("\n");
+	}
+
+	printk(KERN_INFO "2. Kmalloc: alloc/free test\n");
+	for (size = 8; size <= PAGE_SIZE << 2; size <<= 1) {
+		pc_start(&x);
+		for(i = 0; i < TEST_COUNT; i++)
+			kfree(kmalloc(size, GFP_KERNEL));
+		printk(KERN_INFO "%i times kmalloc(%d)/kfree = ", i, size);
+		pc_stop_printk(&x);
+		printk("\n");
+	}
+	printk(KERN_INFO "3. kmem_cache_alloc: Repeatedly allocate then free test\n");
+	for (size = 3; size <= PAGE_SHIFT; size ++) {
+		pc_start(&x);
+		for(i = 0; i < TEST_COUNT; i++) {
+			v[i] = kmem_cache_alloc(kmalloc_caches + size, GFP_KERNEL);
+		}
+		printk(KERN_INFO "%d times kmem_cache_alloc(%d) = ", i, 1 << size);
+		pc_stop_printk(&x);
+		pc_start(&x);
+		for(i = 0; i < TEST_COUNT; i++)
+			kmem_cache_free(kmalloc_caches + size, v[i]);
+		printk(" kmem_cache_free() = ");
+		pc_stop_printk(&x);
+		printk("\n");
+	}
+
+	printk(KERN_INFO "4. kmem_cache_alloc: alloc/free test\n");
+	for (size = 3; size <= PAGE_SHIFT; size++) {
+		pc_start(&x);
+		for(i = 0; i < TEST_COUNT; i++)
+			kmem_cache_free(kmalloc_caches + size,
+				kmem_cache_alloc(kmalloc_caches + size,
+							GFP_KERNEL));
+		printk(KERN_INFO "%d times kmem_cache_alloc(%d)/kmem_cache_free = ", i, 1 << size);
+		pc_stop_printk(&x);
+		printk("\n");
+	}
+	printk(KERN_INFO "5. kmem_cache_zalloc: Repeatedly allocate then free test\n");
+	for (size = 3; size <= PAGE_SHIFT; size ++) {
+		pc_start(&x);
+		for(i = 0; i < TEST_COUNT; i++) {
+			v[i] = kmem_cache_zalloc(kmalloc_caches + size, GFP_KERNEL);
+		}
+		printk(KERN_INFO "%d times kmem_cache_zalloc(%d) = ", i, 1 << size);
+		pc_stop_printk(&x);
+		pc_start(&x);
+		for(i = 0; i < TEST_COUNT; i++)
+			kmem_cache_free(kmalloc_caches + size, v[i]);
+		printk(" kmem_cache_free() = ");
+		pc_stop_printk(&x);
+		printk("\n");
+	}
+
+	printk(KERN_INFO "6. kmem_cache_zalloc: alloc/free test\n");
+	for (size = 3; size <= PAGE_SHIFT; size++) {
+		pc_start(&x);
+		for(i = 0; i < TEST_COUNT; i++)
+			kmem_cache_free(kmalloc_caches + size,
+				kmem_cache_zalloc(kmalloc_caches + size,
+							GFP_KERNEL));
+		printk(KERN_INFO "%d times kmem_cache_zalloc(%d)/kmem_cache_free = ", i, 1 << size);
+		pc_stop_printk(&x);
+		printk("\n");
+	}
+#endif
+#ifdef SLUB_RESILIENCY_TEST
 	u8 *p;
 
 	printk(KERN_ERR "SLUB resiliency testing\n");
@@ -2920,11 +3010,8 @@ static void resiliency_test(void)
 	p[512] = 0xab;
 	printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
 	validate_slab_cache(kmalloc_caches + 9);
-}
-#else
-static void resiliency_test(void) {};
 #endif
-
+}
 /*
  * Generate lists of code addresses where slabcache objects are allocated
  * and freed.
-- 
1.5.2.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 7/7] Simple Performance Counters: SLUB instrumentation
  2007-07-31 23:25 [PATCH 7/7] Simple Performance Counters: SLUB instrumentation Christoph Lameter
@ 2007-08-17 16:21 ` Mathieu Desnoyers
  2007-08-17 20:39   ` Christoph Lameter
  0 siblings, 1 reply; 5+ messages in thread
From: Mathieu Desnoyers @ 2007-08-17 16:21 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: linux-kernel

Hi Christoph,

A few remarks on these tests:

Why do you printk inside the timing period ? Filling the printk buffers
or outputting on things such as serial console could really hurt your
results.

I hope you run your system with idle=poll and without frequency scaling
at all, because otherwise your cycle count would be completely off on
many AMD and Intel CPUs. You can have a look at this (very rough)
document on the topic: 

http://ltt.polymtl.ca/ > "Notes on AMD and Intel asynchronous TSC
                          architectures (with workarounds)"
http://ltt.polymtl.ca/svn/ltt/branches/poly/doc/developer/tsc.txt

I would be tempted to try running these tests with interrupts disabled,
just to make sure that the timings are not too much modified by
the system load. Especially since you are comparing an algorithm that
disables interrupts with one that doesn't, it would be unfair to say
that the second one is slower just because less interrupts have been
serviced during its execution.

Mathieu

* Christoph Lameter (clameter@sgi.com) wrote:
> With this patch SLUB will perform tests on bootup and display results.
> 
> Signed-off-by: Christoph Lameter <clameter@sgi.com>
> ---
>  mm/slub.c |   97 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 files changed, 92 insertions(+), 5 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index 6c6d74f..568b16a 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -20,6 +20,7 @@
>  #include <linux/mempolicy.h>
>  #include <linux/ctype.h>
>  #include <linux/kallsyms.h>
> +#include <linux/perf.h>
>  
>  /*
>   * Lock order:
> @@ -152,6 +153,7 @@ static inline void ClearSlabDebug(struct page *page)
>  
>  /* Enable to test recovery from slab corruption on boot */
>  #undef SLUB_RESILIENCY_TEST
> +#undef SLUB_PERFORMANCE_TEST
>  
>  #if PAGE_SHIFT <= 12
>  
> @@ -2870,9 +2872,97 @@ static long validate_slab_cache(struct kmem_cache *s)
>  	return count;
>  }
>  
> -#ifdef SLUB_RESILIENCY_TEST
>  static void resiliency_test(void)
>  {
> +#ifdef SLUB_PERFORMANCE_TEST
> +#define TEST_COUNT 10000
> +	int size, i;
> +	struct pc x;
> +	void **v = kmalloc(TEST_COUNT * sizeof(void *), GFP_KERNEL);
> +
> +	printk(KERN_INFO "SLUB Performance testing\n");
> +	printk(KERN_INFO "========================\n");
> +	printk(KERN_INFO "1. Kmalloc: Repeatedly allocate then free test\n");
> +	for (size = 8; size <= PAGE_SIZE << 2; size <<= 1) {
> +		pc_start(&x);
> +		for(i = 0; i < TEST_COUNT; i++) {
> +			v[i] = kmalloc(size, GFP_KERNEL);
> +		}
> +		printk(KERN_INFO "%i times kmalloc(%d) = ", i, size);
> +		pc_stop_printk(&x);
> +		pc_start(&x);
> +		for(i = 0; i < TEST_COUNT; i++)
> +			kfree(v[i]);
> +		printk(" kfree() = ");
> +		pc_stop_printk(&x);
> +		printk("\n");
> +	}
> +
> +	printk(KERN_INFO "2. Kmalloc: alloc/free test\n");
> +	for (size = 8; size <= PAGE_SIZE << 2; size <<= 1) {
> +		pc_start(&x);
> +		for(i = 0; i < TEST_COUNT; i++)
> +			kfree(kmalloc(size, GFP_KERNEL));
> +		printk(KERN_INFO "%i times kmalloc(%d)/kfree = ", i, size);
> +		pc_stop_printk(&x);
> +		printk("\n");
> +	}
> +	printk(KERN_INFO "3. kmem_cache_alloc: Repeatedly allocate then free test\n");
> +	for (size = 3; size <= PAGE_SHIFT; size ++) {
> +		pc_start(&x);
> +		for(i = 0; i < TEST_COUNT; i++) {
> +			v[i] = kmem_cache_alloc(kmalloc_caches + size, GFP_KERNEL);
> +		}
> +		printk(KERN_INFO "%d times kmem_cache_alloc(%d) = ", i, 1 << size);
> +		pc_stop_printk(&x);
> +		pc_start(&x);
> +		for(i = 0; i < TEST_COUNT; i++)
> +			kmem_cache_free(kmalloc_caches + size, v[i]);
> +		printk(" kmem_cache_free() = ");
> +		pc_stop_printk(&x);
> +		printk("\n");
> +	}
> +
> +	printk(KERN_INFO "4. kmem_cache_alloc: alloc/free test\n");
> +	for (size = 3; size <= PAGE_SHIFT; size++) {
> +		pc_start(&x);
> +		for(i = 0; i < TEST_COUNT; i++)
> +			kmem_cache_free(kmalloc_caches + size,
> +				kmem_cache_alloc(kmalloc_caches + size,
> +							GFP_KERNEL));
> +		printk(KERN_INFO "%d times kmem_cache_alloc(%d)/kmem_cache_free = ", i, 1 << size);
> +		pc_stop_printk(&x);
> +		printk("\n");
> +	}
> +	printk(KERN_INFO "5. kmem_cache_zalloc: Repeatedly allocate then free test\n");
> +	for (size = 3; size <= PAGE_SHIFT; size ++) {
> +		pc_start(&x);
> +		for(i = 0; i < TEST_COUNT; i++) {
> +			v[i] = kmem_cache_zalloc(kmalloc_caches + size, GFP_KERNEL);
> +		}
> +		printk(KERN_INFO "%d times kmem_cache_zalloc(%d) = ", i, 1 << size);
> +		pc_stop_printk(&x);
> +		pc_start(&x);
> +		for(i = 0; i < TEST_COUNT; i++)
> +			kmem_cache_free(kmalloc_caches + size, v[i]);
> +		printk(" kmem_cache_free() = ");
> +		pc_stop_printk(&x);
> +		printk("\n");
> +	}
> +
> +	printk(KERN_INFO "6. kmem_cache_zalloc: alloc/free test\n");
> +	for (size = 3; size <= PAGE_SHIFT; size++) {
> +		pc_start(&x);
> +		for(i = 0; i < TEST_COUNT; i++)
> +			kmem_cache_free(kmalloc_caches + size,
> +				kmem_cache_zalloc(kmalloc_caches + size,
> +							GFP_KERNEL));
> +		printk(KERN_INFO "%d times kmem_cache_zalloc(%d)/kmem_cache_free = ", i, 1 << size);
> +		pc_stop_printk(&x);
> +		printk("\n");
> +	}
> +#endif
> +#ifdef SLUB_RESILIENCY_TEST
>  	u8 *p;
>  
>  	printk(KERN_ERR "SLUB resiliency testing\n");
> @@ -2920,11 +3010,8 @@ static void resiliency_test(void)
>  	p[512] = 0xab;
>  	printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
>  	validate_slab_cache(kmalloc_caches + 9);
> -}
> -#else
> -static void resiliency_test(void) {};
>  #endif
> -
> +}
>  /*
>   * Generate lists of code addresses where slabcache objects are allocated
>   * and freed.
> -- 
> 1.5.2.4
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 7/7] Simple Performance Counters: SLUB instrumentation
  2007-08-17 16:21 ` Mathieu Desnoyers
@ 2007-08-17 20:39   ` Christoph Lameter
  2007-08-18  3:20     ` Mathieu Desnoyers
  0 siblings, 1 reply; 5+ messages in thread
From: Christoph Lameter @ 2007-08-17 20:39 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: linux-kernel

On Fri, 17 Aug 2007, Mathieu Desnoyers wrote:

> Why do you printk inside the timing period ? Filling the printk buffers
> or outputting on things such as serial console could really hurt your
> results.

It was easier to code?

> I hope you run your system with idle=poll and without frequency scaling
> at all, because otherwise your cycle count would be completely off on
> many AMD and Intel CPUs. You can have a look at this (very rough)
> document on the topic: 

The cpu will definitely not be idle during these measurements and no 
frequency scaling is active.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 7/7] Simple Performance Counters: SLUB instrumentation
  2007-08-17 20:39   ` Christoph Lameter
@ 2007-08-18  3:20     ` Mathieu Desnoyers
  2007-08-20 19:41       ` Christoph Lameter
  0 siblings, 1 reply; 5+ messages in thread
From: Mathieu Desnoyers @ 2007-08-18  3:20 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: linux-kernel

* Christoph Lameter (clameter@sgi.com) wrote:
> On Fri, 17 Aug 2007, Mathieu Desnoyers wrote:
> 
> > Why do you printk inside the timing period ? Filling the printk buffers
> > or outputting on things such as serial console could really hurt your
> > results.
> 
> It was easier to code?
> 
> > I hope you run your system with idle=poll and without frequency scaling
> > at all, because otherwise your cycle count would be completely off on
> > many AMD and Intel CPUs. You can have a look at this (very rough)
> > document on the topic: 
> 
> The cpu will definitely not be idle during these measurements and no 
> frequency scaling is active.

The problem is that if the cpu is idle _before_ the measurements, the
frequency will change differently from one cpu to another. Therefore,
the cycle counters may have large offsets when you start your tests. So,
if get_cycles() is executed on different CPUs (thread being migrated)
between the beginning and the end of the test, the results would be
skewed.

Mathieu


-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 7/7] Simple Performance Counters: SLUB instrumentation
  2007-08-18  3:20     ` Mathieu Desnoyers
@ 2007-08-20 19:41       ` Christoph Lameter
  0 siblings, 0 replies; 5+ messages in thread
From: Christoph Lameter @ 2007-08-20 19:41 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: linux-kernel

On Fri, 17 Aug 2007, Mathieu Desnoyers wrote:

> > The cpu will definitely not be idle during these measurements and no 
> > frequency scaling is active.
> 
> The problem is that if the cpu is idle _before_ the measurements, the
> frequency will change differently from one cpu to another. Therefore,
> the cycle counters may have large offsets when you start your tests. So,
> if get_cycles() is executed on different CPUs (thread being migrated)
> between the beginning and the end of the test, the results would be
> skewed.

TSC measurements as done by this patch are associated with cpus. The 
skew is irrelevant. Development of this patchset was done on a 
system whose TSC are never synchronized.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2007-08-20 19:41 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-07-31 23:25 [PATCH 7/7] Simple Performance Counters: SLUB instrumentation Christoph Lameter
2007-08-17 16:21 ` Mathieu Desnoyers
2007-08-17 20:39   ` Christoph Lameter
2007-08-18  3:20     ` Mathieu Desnoyers
2007-08-20 19:41       ` Christoph Lameter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox