* [PATCH][2.6] Completely out of line spinlocks / x86_64
@ 2004-08-08 4:50 Zwane Mwaikambo
2004-08-08 6:08 ` Zwane Mwaikambo
0 siblings, 1 reply; 7+ messages in thread
From: Zwane Mwaikambo @ 2004-08-08 4:50 UTC (permalink / raw)
To: Linux Kernel; +Cc: Andrew Morton, Andi Kleen, Matt Mackall
Pulled from the -tiny tree, the focus of this patch is for reduced kernel
image size, the shared spinlock text may be more of a benefit later on
when multicore packages with shared caches arrive but should benefit
em64t systems with HT enabled. I've set the config option to y so
that it gets a bit of runtime, but it can be left as a config option.
Tested on 4x logical em64t.
text data bss dec hex filename
3870057 1350301 508048 5728406 576896 vmlinux-after
3885801 1352134 508048 5745983 57ad3f vmlinux-before
arch/x86_64/Kconfig | 10 ++++++
arch/x86_64/kernel/x8664_ksyms.c | 11 +++++++
arch/x86_64/lib/Makefile | 1
arch/x86_64/lib/spinlock.c | 57 +++++++++++++++++++++++++++++++++++++++
include/asm-x86_64/spinlock.h | 22 +++++++++++++--
5 files changed, 99 insertions(+), 2 deletions(-)
Signed-off-by: Zwane Mwaikambo <zwane@linuxpower.ca>
Index: linux-2.6.8-rc3-mm1-amd64/arch/x86_64/Kconfig
===================================================================
RCS file: /home/cvsroot/linux-2.6.8-rc3-mm1/arch/x86_64/Kconfig,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 Kconfig
--- linux-2.6.8-rc3-mm1-amd64/arch/x86_64/Kconfig 5 Aug 2004 16:37:48 -0000 1.1.1.1
+++ linux-2.6.8-rc3-mm1-amd64/arch/x86_64/Kconfig 7 Aug 2004 22:47:30 -0000
@@ -438,6 +438,16 @@ config DEBUG_SPINLOCK
best used in conjunction with the NMI watchdog so that spinlock
deadlocks are also debuggable.
+config COOL_SPINLOCK
+ bool "Completely out of line spinlocks"
+ depends on SMP
+ default y
+ help
+ Say Y here to build spinlocks which have common text for contended
+ and uncontended paths. This reduces kernel text size by at least
+ 50k on most configurations, plus there is the additional benefit
+ of better cache utilisation.
+
# !SMP for now because the context switch early causes GPF in segment reloading
# and the GS base checking does the wrong thing then, causing a hang.
config CHECKING
Index: linux-2.6.8-rc3-mm1-amd64/arch/x86_64/kernel/x8664_ksyms.c
===================================================================
RCS file: /home/cvsroot/linux-2.6.8-rc3-mm1/arch/x86_64/kernel/x8664_ksyms.c,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 x8664_ksyms.c
--- linux-2.6.8-rc3-mm1-amd64/arch/x86_64/kernel/x8664_ksyms.c 5 Aug 2004 16:37:48 -0000 1.1.1.1
+++ linux-2.6.8-rc3-mm1-amd64/arch/x86_64/kernel/x8664_ksyms.c 7 Aug 2004 22:41:34 -0000
@@ -220,3 +220,14 @@ EXPORT_SYMBOL(flush_tlb_page);
EXPORT_SYMBOL_GPL(flush_tlb_all);
#endif
+#ifdef CONFIG_COOL_SPINLOCK
+extern void asmlinkage __spin_lock_failed(spinlock_t *);
+extern void asmlinkage __spin_lock_failed_flags(spinlock_t *, unsigned long);
+extern void asmlinkage __spin_lock_loop(spinlock_t *);
+extern void asmlinkage __spin_lock_loop_flags(spinlock_t *, unsigned long);
+EXPORT_SYMBOL(__spin_lock_failed);
+EXPORT_SYMBOL(__spin_lock_failed_flags);
+EXPORT_SYMBOL(__spin_lock_loop);
+EXPORT_SYMBOL(__spin_lock_loop_flags);
+#endif
+
Index: linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/Makefile
===================================================================
RCS file: /home/cvsroot/linux-2.6.8-rc3-mm1/arch/x86_64/lib/Makefile,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 Makefile
--- linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/Makefile 5 Aug 2004 16:37:48 -0000 1.1.1.1
+++ linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/Makefile 7 Aug 2004 22:32:44 -0000
@@ -13,3 +13,4 @@ lib-y += memcpy.o memmove.o memset.o cop
lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
lib-$(CONFIG_KGDB) += kgdb_serial.o
+lib-$(CONFIG_COOL_SPINLOCK) += spinlock.o
Index: linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/spinlock.c
===================================================================
RCS file: linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/spinlock.c
diff -N linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/spinlock.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/spinlock.c 7 Aug 2004 23:00:34 -0000
@@ -0,0 +1,57 @@
+#define PROC(name) \
+ ".align 4\n" \
+ ".globl " #name"\n" \
+ #name":\n"
+
+asm (PROC(__spin_lock_failed_flags)
+ "test $0x200, %rbx\n"
+ "jz 1f\n"
+ "sti\n"
+ "1:\n\t"
+ "rep; nop\n"
+ "cmpb $0, (%rax)\n"
+ "jle 1b\n"
+ "cli\n"
+ "lock; decb (%rax)\n\t"
+ "js __spin_lock_failed_flags\n\t"
+ "nop\n"
+ "ret\n"
+);
+
+asm (PROC(__spin_lock_loop_flags)
+ "lock; decb (%rax)\n\t"
+ "js 1f\n\t"
+ "nop\n\t"
+ "ret\n\t"
+ "1:\n\t"
+ "test $0x200, %rbx\n\t"
+ "jz 1f\n\t"
+ "sti\n\t"
+ "2: rep; nop\n\t"
+ "cmpb $0, (%rax)\n\t"
+ "jle 2b\n\t"
+ "cli\n\t"
+ "jmp __spin_lock_loop_flags\n\t"
+);
+
+asm (PROC(__spin_lock_failed)
+ "rep; nop\n\t"
+ "cmpb $0, (%rax)\n\t"
+ "jle __spin_lock_failed\n\t"
+ "lock; decb (%rax)\n\t"
+ "js __spin_lock_failed\n\t"
+ "nop\n\t"
+ "ret\n\t"
+);
+
+asm (PROC(__spin_lock_loop)
+ "lock; decb (%rax)\n\t"
+ "js 1f\n\t"
+ "nop\n\t"
+ "ret\n\t"
+ "1: rep; nop\n\t"
+ "cmpb $0, (%rax)\n\t"
+ "jle 1b\n\t"
+ "jmp __spin_lock_loop\n\t"
+);
+
Index: linux-2.6.8-rc3-mm1-amd64/include/asm-x86_64/spinlock.h
===================================================================
RCS file: /home/cvsroot/linux-2.6.8-rc3-mm1/include/asm-x86_64/spinlock.h,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 spinlock.h
--- linux-2.6.8-rc3-mm1-amd64/include/asm-x86_64/spinlock.h 5 Aug 2004 16:37:48 -0000 1.1.1.1
+++ linux-2.6.8-rc3-mm1-amd64/include/asm-x86_64/spinlock.h 7 Aug 2004 22:48:13 -0000
@@ -42,6 +42,13 @@ typedef struct {
#define spin_is_locked(x) (*(volatile signed char *)(&(x)->lock) <= 0)
#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x))
+#ifdef CONFIG_COOL_SPINLOCK
+#define spin_lock_string \
+ "call __spin_lock_loop\n\t"
+
+#define spin_lock_string_flags \
+ "call __spin_lock_loop_flags\n\t"
+#else
#define spin_lock_string \
"\n1:\t" \
"lock ; decb %0\n\t" \
@@ -70,6 +77,7 @@ typedef struct {
"cli\n\t" \
"jmp 1b\n" \
LOCK_SECTION_END
+#endif
/*
* This works. Despite all the confusion.
@@ -138,7 +146,12 @@ printk("eip: %p\n", &&here);
#endif
__asm__ __volatile__(
spin_lock_string
- :"=m" (lock->lock) : : "memory");
+#ifdef CONFIG_COOL_SPINLOCK
+ : : "a" (&lock->lock) : "memory"
+#else
+ :"=m" (lock->lock) : : "memory"
+#endif
+ );
}
static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
@@ -152,7 +165,12 @@ here:
}
#endif
__asm__ __volatile__(spin_lock_string_flags
- :"=m" (lock->lock) : "r" (flags) : "memory");
+#ifdef CONFIG_COOL_SPINLOCK
+ : : "a" (&lock->lock), "b" (flags) : "memory"
+#else
+ :"=m" (lock->lock) : "r" (flags) : "memory"
+#endif
+ );
}
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [PATCH][2.6] Completely out of line spinlocks / x86_64
2004-08-08 4:50 [PATCH][2.6] Completely out of line spinlocks / x86_64 Zwane Mwaikambo
@ 2004-08-08 6:08 ` Zwane Mwaikambo
2004-08-09 11:23 ` Andi Kleen
0 siblings, 1 reply; 7+ messages in thread
From: Zwane Mwaikambo @ 2004-08-08 6:08 UTC (permalink / raw)
To: Linux Kernel; +Cc: Andrew Morton, Andi Kleen, Matt Mackall
On Sun, 8 Aug 2004, Zwane Mwaikambo wrote:
> Pulled from the -tiny tree, the focus of this patch is for reduced kernel
> image size, the shared spinlock text may be more of a benefit later on
> when multicore packages with shared caches arrive but should benefit
> em64t systems with HT enabled. I've set the config option to y so
> that it gets a bit of runtime, but it can be left as a config option.
>
> Tested on 4x logical em64t.
>
> text data bss dec hex filename
> 3870057 1350301 508048 5728406 576896 vmlinux-after
> 3885801 1352134 508048 5745983 57ad3f vmlinux-before
A few changes from the i386 thread.
arch/x86_64/Kconfig | 10 ++++++++++
arch/x86_64/lib/Makefile | 1 +
arch/x86_64/lib/spinlock.c | 38 ++++++++++++++++++++++++++++++++++++++
include/asm-x86_64/spinlock.h | 22 ++++++++++++++++++++--
4 files changed, 69 insertions(+), 2 deletions(-)
Index: linux-2.6.8-rc3-mm1-amd64/arch/x86_64/Kconfig
===================================================================
RCS file: /home/cvsroot/linux-2.6.8-rc3-mm1/arch/x86_64/Kconfig,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 Kconfig
--- linux-2.6.8-rc3-mm1-amd64/arch/x86_64/Kconfig 5 Aug 2004 16:37:48 -0000 1.1.1.1
+++ linux-2.6.8-rc3-mm1-amd64/arch/x86_64/Kconfig 7 Aug 2004 22:47:30 -0000
@@ -438,6 +438,16 @@ config DEBUG_SPINLOCK
best used in conjunction with the NMI watchdog so that spinlock
deadlocks are also debuggable.
+config COOL_SPINLOCK
+ bool "Completely out of line spinlocks"
+ depends on SMP
+ default y
+ help
+ Say Y here to build spinlocks which have common text for contended
+ and uncontended paths. This reduces kernel text size by at least
+ 50k on most configurations, plus there is the additional benefit
+ of better cache utilisation.
+
# !SMP for now because the context switch early causes GPF in segment reloading
# and the GS base checking does the wrong thing then, causing a hang.
config CHECKING
Index: linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/Makefile
===================================================================
RCS file: /home/cvsroot/linux-2.6.8-rc3-mm1/arch/x86_64/lib/Makefile,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 Makefile
--- linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/Makefile 5 Aug 2004 16:37:48 -0000 1.1.1.1
+++ linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/Makefile 7 Aug 2004 22:32:44 -0000
@@ -13,3 +13,4 @@ lib-y += memcpy.o memmove.o memset.o cop
lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
lib-$(CONFIG_KGDB) += kgdb_serial.o
+lib-$(CONFIG_COOL_SPINLOCK) += spinlock.o
Index: linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/spinlock.c
===================================================================
RCS file: linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/spinlock.c
diff -N linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/spinlock.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/spinlock.c 8 Aug 2004 05:39:04 -0000
@@ -0,0 +1,38 @@
+#include <linux/module.h>
+
+#define PROC(name) \
+ ".align 4\n" \
+ ".globl " #name"\n" \
+ #name":\n"
+
+asm (PROC(__spin_lock_loop_flags)
+ "lock; decb (%rax)\n\t"
+ "js 1f\n\t"
+ "nop\n\t"
+ "ret\n\t"
+ "1:\n\t"
+ "test $0x200, %rbx\n\t"
+ "jz 1f\n\t"
+ "sti\n\t"
+ "2: rep; nop\n\t"
+ "cmpb $0, (%rax)\n\t"
+ "jle 2b\n\t"
+ "cli\n\t"
+ "jmp __spin_lock_loop_flags\n\t"
+);
+
+asm (PROC(__spin_lock_loop)
+ "lock; decb (%rax)\n\t"
+ "js 1f\n\t"
+ "nop\n\t"
+ "ret\n\t"
+ "1: rep; nop\n\t"
+ "cmpb $0, (%rax)\n\t"
+ "jle 1b\n\t"
+ "jmp __spin_lock_loop\n\t"
+);
+
+void __spin_lock_loop_flags(void);
+void __spin_lock_loop(void);
+EXPORT_SYMBOL(__spin_lock_loop_flags);
+EXPORT_SYMBOL(__spin_lock_loop);
Index: linux-2.6.8-rc3-mm1-amd64/include/asm-x86_64/spinlock.h
===================================================================
RCS file: /home/cvsroot/linux-2.6.8-rc3-mm1/include/asm-x86_64/spinlock.h,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 spinlock.h
--- linux-2.6.8-rc3-mm1-amd64/include/asm-x86_64/spinlock.h 5 Aug 2004 16:37:48 -0000 1.1.1.1
+++ linux-2.6.8-rc3-mm1-amd64/include/asm-x86_64/spinlock.h 8 Aug 2004 05:19:41 -0000
@@ -42,6 +42,13 @@ typedef struct {
#define spin_is_locked(x) (*(volatile signed char *)(&(x)->lock) <= 0)
#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x))
+#ifdef CONFIG_COOL_SPINLOCK
+#define spin_lock_string \
+ "call __spin_lock_loop\n\t"
+
+#define spin_lock_string_flags \
+ "call __spin_lock_loop_flags\n\t"
+#else
#define spin_lock_string \
"\n1:\t" \
"lock ; decb %0\n\t" \
@@ -70,6 +77,7 @@ typedef struct {
"cli\n\t" \
"jmp 1b\n" \
LOCK_SECTION_END
+#endif
/*
* This works. Despite all the confusion.
@@ -138,7 +146,12 @@ printk("eip: %p\n", &&here);
#endif
__asm__ __volatile__(
spin_lock_string
- :"=m" (lock->lock) : : "memory");
+#ifdef CONFIG_COOL_SPINLOCK
+ : : "a" (&lock->lock) : "memory"
+#else
+ :"=m" (lock->lock) : : "memory"
+#endif
+ );
}
static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
@@ -152,7 +165,12 @@ here:
}
#endif
__asm__ __volatile__(spin_lock_string_flags
- :"=m" (lock->lock) : "r" (flags) : "memory");
+#ifdef CONFIG_COOL_SPINLOCK
+ : : "a" (&lock->lock), "d" (flags) : "memory"
+#else
+ :"=m" (lock->lock) : "r" (flags) : "memory"
+#endif
+ );
}
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [PATCH][2.6] Completely out of line spinlocks / x86_64
2004-08-08 6:08 ` Zwane Mwaikambo
@ 2004-08-09 11:23 ` Andi Kleen
2004-08-09 11:41 ` Marcelo Tosatti
2004-08-09 21:31 ` Zwane Mwaikambo
0 siblings, 2 replies; 7+ messages in thread
From: Andi Kleen @ 2004-08-09 11:23 UTC (permalink / raw)
To: Zwane Mwaikambo; +Cc: linux-kernel, akpm, mpm
On Sun, 8 Aug 2004 02:08:30 -0400 (EDT)
Zwane Mwaikambo <zwane@linuxpower.ca> wrote:
> arch/x86_64/Kconfig | 10 ++++++++++
> arch/x86_64/lib/Makefile | 1 +
> arch/x86_64/lib/spinlock.c | 38 ++++++++++++++++++++++++++++++++++++++
> include/asm-x86_64/spinlock.h | 22 ++++++++++++++++++++--
> 4 files changed, 69 insertions(+), 2 deletions(-)
>
> Index: linux-2.6.8-rc3-mm1-amd64/arch/x86_64/Kconfig
> ===================================================================
> RCS file: /home/cvsroot/linux-2.6.8-rc3-mm1/arch/x86_64/Kconfig,v
> retrieving revision 1.1.1.1
> diff -u -p -B -r1.1.1.1 Kconfig
> --- linux-2.6.8-rc3-mm1-amd64/arch/x86_64/Kconfig 5 Aug 2004 16:37:48 -0000 1.1.1.1
> +++ linux-2.6.8-rc3-mm1-amd64/arch/x86_64/Kconfig 7 Aug 2004 22:47:30 -0000
> @@ -438,6 +438,16 @@ config DEBUG_SPINLOCK
> best used in conjunction with the NMI watchdog so that spinlock
> deadlocks are also debuggable.
>
> +config COOL_SPINLOCK
> + bool "Completely out of line spinlocks"
> + depends on SMP
> + default y
> + help
> + Say Y here to build spinlocks which have common text for contended
> + and uncontended paths. This reduces kernel text size by at least
> + 50k on most configurations, plus there is the additional benefit
> + of better cache utilisation.
I think the 50k number is wrong. I took a look at it and the big
difference is only seen when you enable interrupts during spinning, which
we didn't do before. If you compare it to the old implementation the
difference is much less.
I don't really like the config option. Either it's a good idea
then it should be done by default without option or it should not be done at all.
Did you do any lock intensive benchmarks that could show a slowdown?
> Index: linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/spinlock.c
> ===================================================================
> RCS file: linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/spinlock.c
> diff -N linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/spinlock.c
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/spinlock.c 8 Aug 2004 05:39:04 -0000
> @@ -0,0 +1,38 @@
> +#include <linux/module.h>
You should make this file assembly only.
-Andi
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH][2.6] Completely out of line spinlocks / x86_64
2004-08-09 11:23 ` Andi Kleen
@ 2004-08-09 11:41 ` Marcelo Tosatti
2004-08-09 11:49 ` Marcelo Tosatti
2004-08-09 21:31 ` Zwane Mwaikambo
1 sibling, 1 reply; 7+ messages in thread
From: Marcelo Tosatti @ 2004-08-09 11:41 UTC (permalink / raw)
To: Andi Kleen; +Cc: Zwane Mwaikambo, linux-kernel, akpm, mpm
On Mon, Aug 09, 2004 at 01:23:08PM +0200, Andi Kleen wrote:
> On Sun, 8 Aug 2004 02:08:30 -0400 (EDT)
> Zwane Mwaikambo <zwane@linuxpower.ca> wrote:
>
> > arch/x86_64/Kconfig | 10 ++++++++++
> > arch/x86_64/lib/Makefile | 1 +
> > arch/x86_64/lib/spinlock.c | 38 ++++++++++++++++++++++++++++++++++++++
> > include/asm-x86_64/spinlock.h | 22 ++++++++++++++++++++--
> > 4 files changed, 69 insertions(+), 2 deletions(-)
> >
> > Index: linux-2.6.8-rc3-mm1-amd64/arch/x86_64/Kconfig
> > ===================================================================
> > RCS file: /home/cvsroot/linux-2.6.8-rc3-mm1/arch/x86_64/Kconfig,v
> > retrieving revision 1.1.1.1
> > diff -u -p -B -r1.1.1.1 Kconfig
> > --- linux-2.6.8-rc3-mm1-amd64/arch/x86_64/Kconfig 5 Aug 2004 16:37:48 -0000 1.1.1.1
> > +++ linux-2.6.8-rc3-mm1-amd64/arch/x86_64/Kconfig 7 Aug 2004 22:47:30 -0000
> > @@ -438,6 +438,16 @@ config DEBUG_SPINLOCK
> > best used in conjunction with the NMI watchdog so that spinlock
> > deadlocks are also debuggable.
> >
> > +config COOL_SPINLOCK
> > + bool "Completely out of line spinlocks"
> > + depends on SMP
> > + default y
> > + help
> > + Say Y here to build spinlocks which have common text for contended
> > + and uncontended paths. This reduces kernel text size by at least
> > + 50k on most configurations, plus there is the additional benefit
> > + of better cache utilisation.
>
> I think the 50k number is wrong. I took a look at it and the big
> difference is only seen when you enable interrupts during spinning, which
> we didn't do before. If you compare it to the old implementation the
> difference is much less.
>
> I don't really like the config option. Either it's a good idea
> then it should be done by default without option or it should not be done at all.
>
> Did you do any lock intensive benchmarks that could show a slowdown?
Out of curiosity, also, have you ran any lock intensive benchmarks to get some
numbers out of the increased cacheline hits due to uninlining?
I think you can measure the hits/misses precisely with Mikael's perfcounters.
>
> > Index: linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/spinlock.c
> > ===================================================================
> > RCS file: linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/spinlock.c
> > diff -N linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/spinlock.c
> > --- /dev/null 1 Jan 1970 00:00:00 -0000
> > +++ linux-2.6.8-rc3-mm1-amd64/arch/x86_64/lib/spinlock.c 8 Aug 2004 05:39:04 -0000
> > @@ -0,0 +1,38 @@
> > +#include <linux/module.h>
>
> You should make this file assembly only.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH][2.6] Completely out of line spinlocks / x86_64
2004-08-09 11:41 ` Marcelo Tosatti
@ 2004-08-09 11:49 ` Marcelo Tosatti
2004-08-09 14:38 ` Matt Mackall
0 siblings, 1 reply; 7+ messages in thread
From: Marcelo Tosatti @ 2004-08-09 11:49 UTC (permalink / raw)
To: Andi Kleen; +Cc: Zwane Mwaikambo, linux-kernel, akpm, mpm
On Mon, Aug 09, 2004 at 08:41:38AM -0300, Marcelo Tosatti wrote:
> On Mon, Aug 09, 2004 at 01:23:08PM +0200, Andi Kleen wrote:
> > On Sun, 8 Aug 2004 02:08:30 -0400 (EDT)
> > Zwane Mwaikambo <zwane@linuxpower.ca> wrote:
> >
> > > arch/x86_64/Kconfig | 10 ++++++++++
> > > arch/x86_64/lib/Makefile | 1 +
> > > arch/x86_64/lib/spinlock.c | 38 ++++++++++++++++++++++++++++++++++++++
> > > include/asm-x86_64/spinlock.h | 22 ++++++++++++++++++++--
> > > 4 files changed, 69 insertions(+), 2 deletions(-)
> > >
> > > Index: linux-2.6.8-rc3-mm1-amd64/arch/x86_64/Kconfig
> > > ===================================================================
> > > RCS file: /home/cvsroot/linux-2.6.8-rc3-mm1/arch/x86_64/Kconfig,v
> > > retrieving revision 1.1.1.1
> > > diff -u -p -B -r1.1.1.1 Kconfig
> > > --- linux-2.6.8-rc3-mm1-amd64/arch/x86_64/Kconfig 5 Aug 2004 16:37:48 -0000 1.1.1.1
> > > +++ linux-2.6.8-rc3-mm1-amd64/arch/x86_64/Kconfig 7 Aug 2004 22:47:30 -0000
> > > @@ -438,6 +438,16 @@ config DEBUG_SPINLOCK
> > > best used in conjunction with the NMI watchdog so that spinlock
> > > deadlocks are also debuggable.
> > >
> > > +config COOL_SPINLOCK
> > > + bool "Completely out of line spinlocks"
> > > + depends on SMP
> > > + default y
> > > + help
> > > + Say Y here to build spinlocks which have common text for contended
> > > + and uncontended paths. This reduces kernel text size by at least
> > > + 50k on most configurations, plus there is the additional benefit
> > > + of better cache utilisation.
> >
> > I think the 50k number is wrong. I took a look at it and the big
> > difference is only seen when you enable interrupts during spinning, which
> > we didn't do before. If you compare it to the old implementation the
> > difference is much less.
> >
> > I don't really like the config option. Either it's a good idea
> > then it should be done by default without option or it should not be done at all.
> >
> > Did you do any lock intensive benchmarks that could show a slowdown?
>
> Out of curiosity, also, have you ran any lock intensive benchmarks to get some
> numbers out of the increased cacheline hits due to uninlining?
>
> I think you can measure the hits/misses precisely with Mikael's perfcounters.
Hi Zwane,
Just seen your bonnie++ results (should have the whole thread before replying),
looks great, except a slight reduction in sequential output:
out-of-line spinlocks:
Version @version@ ------Sequential Output------ --Sequential Input- --Random-
-Per Chr- --Block-- -Rewrite- -Per Chr- --Block-- --Seeks--
Machine Size K/sec %CP K/sec %CP K/sec %CP K/sec %CP K/sec %CP /sec %CP
stp2-000 2G 7018 99 64560 36 21694 16 6789 97 43729 14 340.6 1
stp2-000 2G 7055 99 64836 39 21899 16 6752 97 44827 17 330.8 2
stp2-000 2G 7023 99 64525 38 22987 17 6704 96 44777 14 337.3 1
mainline:
Version @version@ ------Sequential Output------ --Sequential Input- --Random-
-Per Chr- --Block-- -Rewrite- -Per Chr- --Block-- --Seeks--
Machine Size K/sec %CP K/sec %CP K/sec %CP K/sec %CP K/sec %CP /sec %CP
stp2-000 2G 7048 99 64912 38 22510 17 6732 96 43900 14 332.0 1
stp2-000 2G 7018 99 63821 39 21732 16 6787 97 44889 17 326.7 2
stp2-000 2G 7063 99 63834 38 22361 17 6738 97 43310 14 338.3 1
------Sequential Create------ --------Random Create--------
Probably just noise, still I think its worth mentioning.
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [PATCH][2.6] Completely out of line spinlocks / x86_64
2004-08-09 11:49 ` Marcelo Tosatti
@ 2004-08-09 14:38 ` Matt Mackall
0 siblings, 0 replies; 7+ messages in thread
From: Matt Mackall @ 2004-08-09 14:38 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Andi Kleen, Zwane Mwaikambo, linux-kernel, akpm
On Mon, Aug 09, 2004 at 08:49:12AM -0300, Marcelo Tosatti wrote:
>
> Hi Zwane,
>
> Just seen your bonnie++ results (should have the whole thread before replying),
> looks great, except a slight reduction in sequential output:
>
> out-of-line spinlocks:
> Version @version@ ------Sequential Output------ --Sequential Input- --Random-
> -Per Chr- --Block-- -Rewrite- -Per Chr- --Block-- --Seeks--
> Machine Size K/sec %CP K/sec %CP K/sec %CP K/sec %CP K/sec %CP /sec %CP
> stp2-000 2G 7018 99 64560 36 21694 16 6789 97 43729 14 340.6 1
> stp2-000 2G 7055 99 64836 39 21899 16 6752 97 44827 17 330.8 2
> stp2-000 2G 7023 99 64525 38 22987 17 6704 96 44777 14 337.3 1
>
> mainline:
> Version @version@ ------Sequential Output------ --Sequential Input- --Random-
> -Per Chr- --Block-- -Rewrite- -Per Chr- --Block-- --Seeks--
> Machine Size K/sec %CP K/sec %CP K/sec %CP K/sec %CP K/sec %CP /sec %CP
> stp2-000 2G 7048 99 64912 38 22510 17 6732 96 43900 14 332.0 1
> stp2-000 2G 7018 99 63821 39 21732 16 6787 97 44889 17 326.7 2
> stp2-000 2G 7063 99 63834 38 22361 17 6738 97 43310 14 338.3 1
> ------Sequential Create------ --------Random Create--------
>
> Probably just noise, still I think its worth mentioning.
That really does look like noise here, though this is probably not the
ideal benchmark test. My hope is that we can stick this in -mm for a
bit and get some wider benchmarking of it (hence the config option).
--
Mathematics is the supreme nostalgia of our time.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH][2.6] Completely out of line spinlocks / x86_64
2004-08-09 11:23 ` Andi Kleen
2004-08-09 11:41 ` Marcelo Tosatti
@ 2004-08-09 21:31 ` Zwane Mwaikambo
1 sibling, 0 replies; 7+ messages in thread
From: Zwane Mwaikambo @ 2004-08-09 21:31 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-kernel, akpm, mpm
On Mon, 9 Aug 2004, Andi Kleen wrote:
> I think the 50k number is wrong. I took a look at it and the big
> difference is only seen when you enable interrupts during spinning, which
> we didn't do before. If you compare it to the old implementation the
> difference is much less.
Yes agreed the increase wouldn't be of as high a magnitude if compared to
the original code, but it's still a decent saving.
> I don't really like the config option. Either it's a good idea
> then it should be done by default without option or it should not be done at all.
>
> Did you do any lock intensive benchmarks that could show a slowdown?
I went for a file IO type benchmark, the differences looked like
statistical noise, possibly the best bet would be to check for cache
hits/misses.
> You should make this file assembly only.
Ok you're the second person to mention that, i don't have a problem with
switching to assembly only and dumping the exports in x8664_ksyms.c
Thanks,
Zwane
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2004-08-09 21:29 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-08-08 4:50 [PATCH][2.6] Completely out of line spinlocks / x86_64 Zwane Mwaikambo
2004-08-08 6:08 ` Zwane Mwaikambo
2004-08-09 11:23 ` Andi Kleen
2004-08-09 11:41 ` Marcelo Tosatti
2004-08-09 11:49 ` Marcelo Tosatti
2004-08-09 14:38 ` Matt Mackall
2004-08-09 21:31 ` Zwane Mwaikambo
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox