* [PATCH][1/4] Completely out of line spinlocks / i386
@ 2004-08-21 18:28 Zwane Mwaikambo
2004-08-21 18:34 ` Andrew Morton
` (2 more replies)
0 siblings, 3 replies; 10+ messages in thread
From: Zwane Mwaikambo @ 2004-08-21 18:28 UTC (permalink / raw)
To: Linux Kernel; +Cc: Andrew Morton, Andi Kleen, Linus Torvalds, Keith Owens
-- from previous announce --
Pulled from the -tiny tree, the focus of this patch is for reduced kernel
image size but in the process we benefit from improved cache performance
since it's possible for the common text to be present in cache. This is
probably more of a win on shared cache multiprocessor systems like
P4/Xeon HT. It's been benchmarked with bonnie++ on 2x and 4x PIII (my
ideal target would be a 4x+ logical cpu Xeon).
--
Changes have been made based on feedback from various people, most notably
profiling support for readprofile and oprofile.
arch/i386/Kconfig.debug | 10 ++++++++++
arch/i386/kernel/i386_ksyms.c | 5 +++++
arch/i386/lib/Makefile | 1 +
arch/i386/lib/spinlock.S | 33 +++++++++++++++++++++++++++++++++
include/asm-i386/spinlock.h | 27 +++++++++++++++++++++++++--
5 files changed, 74 insertions(+), 2 deletions(-)
Signed-off-by: Zwane Mwaikambo <zwane@fsmlabs.com>
Index: linux-2.6.8.1-mm3/include/asm-i386/spinlock.h
===================================================================
RCS file: /home/cvsroot/linux-2.6.8.1-mm3/include/asm-i386/spinlock.h,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 spinlock.h
--- linux-2.6.8.1-mm3/include/asm-i386/spinlock.h 21 Aug 2004 13:15:01 -0000 1.1.1.1
+++ linux-2.6.8.1-mm3/include/asm-i386/spinlock.h 21 Aug 2004 16:14:44 -0000
@@ -43,6 +43,18 @@ typedef struct {
#define spin_is_locked(x) (*(volatile signed char *)(&(x)->lock) <= 0)
#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x))
+#ifdef CONFIG_COOL_SPINLOCK
+extern void __spin_lock_loop(void);
+extern void __spin_lock_loop_flags(void);
+extern unsigned long start_spin_lock_text;
+extern unsigned long end_spin_lock_text;
+
+ #define spin_lock_string \
+ "call __spin_lock_loop\n\t"
+
+ #define spin_lock_string_flags \
+ "call __spin_lock_loop_flags\n\t"
+#else
#define spin_lock_string \
"\n1:\t" \
"lock ; decb %0\n\t" \
@@ -71,6 +83,7 @@ typedef struct {
"cli\n\t" \
"jmp 1b\n" \
LOCK_SECTION_END
+#endif
/*
* This works. Despite all the confusion.
@@ -139,7 +152,12 @@ here:
#endif
__asm__ __volatile__(
spin_lock_string
- :"=m" (lock->lock) : : "memory");
+#ifdef CONFIG_COOL_SPINLOCK
+ : : "a" (&lock->lock) : "memory"
+#else
+ :"=m" (lock->lock) : : "memory"
+#endif
+ );
}
static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
@@ -154,7 +172,12 @@ here:
#endif
__asm__ __volatile__(
spin_lock_string_flags
- :"=m" (lock->lock) : "r" (flags) : "memory");
+#ifdef CONFIG_COOL_SPINLOCK
+ : : "a" (&lock->lock), "d" (flags) : "memory"
+#else
+ :"=m" (lock->lock) : "r" (flags) : "memory"
+#endif
+ );
}
/*
Index: linux-2.6.8.1-mm3/arch/i386/Kconfig.debug
===================================================================
RCS file: /home/cvsroot/linux-2.6.8.1-mm3/arch/i386/Kconfig.debug,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 Kconfig.debug
--- linux-2.6.8.1-mm3/arch/i386/Kconfig.debug 21 Aug 2004 13:14:49 -0000 1.1.1.1
+++ linux-2.6.8.1-mm3/arch/i386/Kconfig.debug 21 Aug 2004 16:01:47 -0000
@@ -74,6 +74,16 @@ config SCHEDSTATS
application, you can say N to avoid the very slight overhead
this adds.
+config COOL_SPINLOCK
+ bool "Completely out of line spinlocks"
+ depends on SMP
+ default y
+ help
+ Say Y here to build spinlocks which have common text for contended
+ and uncontended paths. This reduces kernel text size by at least
+ 50k on most configurations, plus there is the additional benefit
+ of better cache utilisation.
+
config X86_FIND_SMP_CONFIG
bool
depends on X86_LOCAL_APIC || X86_VOYAGER
Index: linux-2.6.8.1-mm3/arch/i386/kernel/i386_ksyms.c
===================================================================
RCS file: /home/cvsroot/linux-2.6.8.1-mm3/arch/i386/kernel/i386_ksyms.c,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 i386_ksyms.c
--- linux-2.6.8.1-mm3/arch/i386/kernel/i386_ksyms.c 21 Aug 2004 13:14:49 -0000 1.1.1.1
+++ linux-2.6.8.1-mm3/arch/i386/kernel/i386_ksyms.c 21 Aug 2004 16:17:00 -0000
@@ -57,6 +57,11 @@ extern struct drive_info_struct drive_in
EXPORT_SYMBOL(drive_info);
#endif
+#ifdef CONFIG_COOL_SPINLOCK
+EXPORT_SYMBOL(__spin_lock_loop);
+EXPORT_SYMBOL(__spin_lock_loop_flags);
+#endif
+
extern unsigned long cpu_khz;
extern unsigned long get_cmos_time(void);
Index: linux-2.6.8.1-mm3/arch/i386/lib/Makefile
===================================================================
RCS file: /home/cvsroot/linux-2.6.8.1-mm3/arch/i386/lib/Makefile,v
retrieving revision 1.1.1.1
diff -u -p -B -r1.1.1.1 Makefile
--- linux-2.6.8.1-mm3/arch/i386/lib/Makefile 21 Aug 2004 13:14:49 -0000 1.1.1.1
+++ linux-2.6.8.1-mm3/arch/i386/lib/Makefile 21 Aug 2004 15:43:31 -0000
@@ -6,6 +6,7 @@
lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
bitops.o
+lib-$(CONFIG_COOL_SPINLOCK) += spinlock.o
lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
lib-$(CONFIG_KGDB) += kgdb_serial.o
Index: linux-2.6.8.1-mm3/arch/i386/lib/spinlock.S
===================================================================
RCS file: linux-2.6.8.1-mm3/arch/i386/lib/spinlock.S
diff -N linux-2.6.8.1-mm3/arch/i386/lib/spinlock.S
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ linux-2.6.8.1-mm3/arch/i386/lib/spinlock.S 21 Aug 2004 15:52:13 -0000
@@ -0,0 +1,33 @@
+#include <linux/config.h>
+#include <linux/linkage.h>
+
+.globl start_spin_lock_text
+start_spin_lock_text:
+
+ENTRY(__spin_lock_loop_flags)
+ lock; decb (%eax)
+ js 1f
+ nop
+ ret
+ 1:
+ testl $0x200, %edx
+ jz 1f
+ sti
+ 2: rep; nop
+ cmpb $0, (%eax)
+ jle 2b
+ cli
+ jmp __spin_lock_loop_flags
+
+ENTRY(__spin_lock_loop)
+ lock; decb (%eax)
+ js 1f
+ nop
+ ret
+ 1: rep; nop
+ cmpb $0, (%eax)
+ jle 1b
+ jmp __spin_lock_loop
+
+.globl end_spin_lock_text
+end_spin_lock_text:
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH][1/4] Completely out of line spinlocks / i386
2004-08-21 18:28 [PATCH][1/4] Completely out of line spinlocks / i386 Zwane Mwaikambo
@ 2004-08-21 18:34 ` Andrew Morton
2004-08-21 18:42 ` Zwane Mwaikambo
2004-08-21 18:35 ` Andrew Morton
2004-08-22 20:21 ` Linus Torvalds
2 siblings, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2004-08-21 18:34 UTC (permalink / raw)
To: Zwane Mwaikambo; +Cc: linux-kernel, ak, torvalds, kaos
Zwane Mwaikambo <zwane@fsmlabs.com> wrote:
>
> It's been benchmarked with bonnie++ on 2x and 4x PIII
What were the results?
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH][1/4] Completely out of line spinlocks / i386
2004-08-21 18:28 [PATCH][1/4] Completely out of line spinlocks / i386 Zwane Mwaikambo
2004-08-21 18:34 ` Andrew Morton
@ 2004-08-21 18:35 ` Andrew Morton
2004-08-22 5:41 ` Zwane Mwaikambo
2004-08-22 20:21 ` Linus Torvalds
2 siblings, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2004-08-21 18:35 UTC (permalink / raw)
To: Zwane Mwaikambo; +Cc: linux-kernel, ak, torvalds, kaos
Zwane Mwaikambo <zwane@fsmlabs.com> wrote:
>
> the focus of this patch is for reduced kernel image size
By how much does it reduce kernel image size?
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH][1/4] Completely out of line spinlocks / i386
2004-08-21 18:35 ` Andrew Morton
@ 2004-08-22 5:41 ` Zwane Mwaikambo
0 siblings, 0 replies; 10+ messages in thread
From: Zwane Mwaikambo @ 2004-08-22 5:41 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel, ak, torvalds, kaos
On Sat, 21 Aug 2004, Andrew Morton wrote:
> Zwane Mwaikambo <zwane@fsmlabs.com> wrote:
> >
> > the focus of this patch is for reduced kernel image size
>
> By how much does it reduce kernel image size?
Something like;
text data bss dec hex filename
5527214 873510 321872 6722596 669424 vmlinux-before
5480308 867964 321872 6670144 65c740 vmlinux-after
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH][1/4] Completely out of line spinlocks / i386
2004-08-21 18:28 [PATCH][1/4] Completely out of line spinlocks / i386 Zwane Mwaikambo
2004-08-21 18:34 ` Andrew Morton
2004-08-21 18:35 ` Andrew Morton
@ 2004-08-22 20:21 ` Linus Torvalds
2004-08-22 21:53 ` Zwane Mwaikambo
2 siblings, 1 reply; 10+ messages in thread
From: Linus Torvalds @ 2004-08-22 20:21 UTC (permalink / raw)
To: Zwane Mwaikambo; +Cc: Linux Kernel, Andrew Morton, Andi Kleen, Keith Owens
On Sat, 21 Aug 2004, Zwane Mwaikambo wrote:
>
> Pulled from the -tiny tree, the focus of this patch is for reduced kernel
> image size but in the process we benefit from improved cache performance
> since it's possible for the common text to be present in cache. This is
> probably more of a win on shared cache multiprocessor systems like
> P4/Xeon HT. It's been benchmarked with bonnie++ on 2x and 4x PIII (my
> ideal target would be a 4x+ logical cpu Xeon).
I _really_ think that if we're going to make spinlocks be out-of-line,
then we need to out-of-line the preemption code too.
And at that point I'm more than happy to just make it unconditional,
assuming the profiling thing (which was my only worry) has been verified.
And I suspect that the all-C version is pretty much equivalent to the
assembler one, if you use FASTCALL() to make gcc at least use register
argument passing conventions. The advantage is much clearer code, I'd say.
Linus
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH][1/4] Completely out of line spinlocks / i386
2004-08-22 20:21 ` Linus Torvalds
@ 2004-08-22 21:53 ` Zwane Mwaikambo
2004-08-23 0:29 ` Keith Owens
0 siblings, 1 reply; 10+ messages in thread
From: Zwane Mwaikambo @ 2004-08-22 21:53 UTC (permalink / raw)
To: Linus Torvalds
Cc: Linux Kernel, Andrew Morton, Andi Kleen, Keith Owens,
William Lee Irwin III
On Sun, 22 Aug 2004, Linus Torvalds wrote:
> On Sat, 21 Aug 2004, Zwane Mwaikambo wrote:
> >
> > Pulled from the -tiny tree, the focus of this patch is for reduced kernel
> > image size but in the process we benefit from improved cache performance
> > since it's possible for the common text to be present in cache. This is
> > probably more of a win on shared cache multiprocessor systems like
> > P4/Xeon HT. It's been benchmarked with bonnie++ on 2x and 4x PIII (my
> > ideal target would be a 4x+ logical cpu Xeon).
>
> I _really_ think that if we're going to make spinlocks be out-of-line,
> then we need to out-of-line the preemption code too.
Good point, Bill saw a lot of extra saving by moving the preemption code
out of line too.
> And at that point I'm more than happy to just make it unconditional,
> assuming the profiling thing (which was my only worry) has been verified.
With the readprofile and oprofile changes it's still not that easy to
determine which locks are being contended as the samples generally are
being charged to the function the lock is being contended in. So some
investigation has to be done when looking at profiles. This could be
remedied by making the valid PC range include data or, preferably, moving
spinlock variables into a special section. That way we can simply
report back the lock word during sampling.
> And I suspect that the all-C version is pretty much equivalent to the
> assembler one, if you use FASTCALL() to make gcc at least use register
> argument passing conventions. The advantage is much clearer code, I'd say.
Yes i agree there and it would probably allow for better optimisation by
gcc during call setup.
Thanks,
Zwane
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH][1/4] Completely out of line spinlocks / i386
2004-08-22 21:53 ` Zwane Mwaikambo
@ 2004-08-23 0:29 ` Keith Owens
2004-08-23 4:40 ` David S. Miller
0 siblings, 1 reply; 10+ messages in thread
From: Keith Owens @ 2004-08-23 0:29 UTC (permalink / raw)
To: Zwane Mwaikambo
Cc: Linus Torvalds, Linux Kernel, Andrew Morton, Andi Kleen,
William Lee Irwin III
On Sun, 22 Aug 2004 17:53:29 -0400 (EDT),
Zwane Mwaikambo <zwane@linuxpower.ca> wrote:
>With the readprofile and oprofile changes it's still not that easy to
>determine which locks are being contended as the samples generally are
>being charged to the function the lock is being contended in. So some
>investigation has to be done when looking at profiles. This could be
>remedied by making the valid PC range include data or, preferably, moving
>spinlock variables into a special section. That way we can simply
>report back the lock word during sampling.
kdb attempts to decode the lock address on ia64. A lot of the time,
the lock is dynamically allocated (think inodes) so symbol lookup is no
good. I find that the decoding the lock is useful but not required,
the function that contended on the lock is more interesting.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH][1/4] Completely out of line spinlocks / i386
2004-08-23 0:29 ` Keith Owens
@ 2004-08-23 4:40 ` David S. Miller
2004-08-23 8:20 ` Zwane Mwaikambo
0 siblings, 1 reply; 10+ messages in thread
From: David S. Miller @ 2004-08-23 4:40 UTC (permalink / raw)
To: Keith Owens; +Cc: zwane, torvalds, linux-kernel, akpm, ak, wli
On Mon, 23 Aug 2004 10:29:48 +1000
Keith Owens <kaos@ocs.com.au> wrote:
> I find that the decoding the lock is useful but not required,
> the function that contended on the lock is more interesting.
This is my belief as well.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH][1/4] Completely out of line spinlocks / i386
2004-08-23 4:40 ` David S. Miller
@ 2004-08-23 8:20 ` Zwane Mwaikambo
0 siblings, 0 replies; 10+ messages in thread
From: Zwane Mwaikambo @ 2004-08-23 8:20 UTC (permalink / raw)
To: David S. Miller; +Cc: Keith Owens, torvalds, linux-kernel, akpm, ak, wli
On Sun, 22 Aug 2004, David S. Miller wrote:
> On Mon, 23 Aug 2004 10:29:48 +1000
> Keith Owens <kaos@ocs.com.au> wrote:
>
> > I find that the decoding the lock is useful but not required,
> > the function that contended on the lock is more interesting.
>
> This is my belief as well.
That's great then, we have that covered =)
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2004-08-23 8:16 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-08-21 18:28 [PATCH][1/4] Completely out of line spinlocks / i386 Zwane Mwaikambo
2004-08-21 18:34 ` Andrew Morton
2004-08-21 18:42 ` Zwane Mwaikambo
2004-08-21 18:35 ` Andrew Morton
2004-08-22 5:41 ` Zwane Mwaikambo
2004-08-22 20:21 ` Linus Torvalds
2004-08-22 21:53 ` Zwane Mwaikambo
2004-08-23 0:29 ` Keith Owens
2004-08-23 4:40 ` David S. Miller
2004-08-23 8:20 ` Zwane Mwaikambo
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.