* [PATCH] rw_semaphores, optimisations
@ 2001-04-22 0:27 D.W.Howells
2001-04-22 19:07 ` Andrea Arcangeli
0 siblings, 1 reply; 6+ messages in thread
From: D.W.Howells @ 2001-04-22 0:27 UTC (permalink / raw)
To: torvalds; +Cc: linux-kernel, dhowells, andrea, davem
[-- Attachment #1: Type: text/plain, Size: 868 bytes --]
This patch (made against linux-2.4.4-pre6) makes a number of changes to the
rwsem implementation:
(1) Fixes a subtle contention bug between up_write and the down_* functions.
(2) Optimises the i386 fastpath implementation and changed the slowpath
implementation to aid it.
- The arch/i386/lib/rwsem.c is now gone.
- Better inline asm constraints have been applied.
(3) Changed the sparc64 fastpath implementation to use revised slowpath
interface.
[Dave Miller: can you check this please]
(4) Makes the generic spinlock implementation non-inline.
- lib/rwsem.c has been duplicated to lib/rwsem-spinlock.c and a
slightly different algorithm has been created. This one is simpler
since it does not have to use atomic operations on the counters as
all accesses to them are governed by a blanket spinlock.
David
[-- Attachment #2: rw-semaphore optimisations patch --]
[-- Type: text/plain, Size: 29950 bytes --]
diff -uNr linux-2.4.4-pre6/arch/i386/kernel/i386_ksyms.c linux/arch/i386/kernel/i386_ksyms.c
--- linux-2.4.4-pre6/arch/i386/kernel/i386_ksyms.c Sat Apr 21 21:24:25 2001
+++ linux/arch/i386/kernel/i386_ksyms.c Sat Apr 21 22:52:50 2001
@@ -80,11 +80,6 @@
EXPORT_SYMBOL_NOVERS(__down_failed_interruptible);
EXPORT_SYMBOL_NOVERS(__down_failed_trylock);
EXPORT_SYMBOL_NOVERS(__up_wakeup);
-#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
-EXPORT_SYMBOL_NOVERS(__rwsem_down_write_failed);
-EXPORT_SYMBOL_NOVERS(__rwsem_down_read_failed);
-EXPORT_SYMBOL_NOVERS(__rwsem_wake);
-#endif
/* Networking helper routines. */
EXPORT_SYMBOL(csum_partial_copy_generic);
/* Delay loops */
diff -uNr linux-2.4.4-pre6/arch/i386/lib/Makefile linux/arch/i386/lib/Makefile
--- linux-2.4.4-pre6/arch/i386/lib/Makefile Sat Apr 21 21:24:25 2001
+++ linux/arch/i386/lib/Makefile Sat Apr 21 22:52:50 2001
@@ -9,7 +9,7 @@
obj-y = checksum.o old-checksum.o delay.o \
usercopy.o getuser.o putuser.o \
- memcpy.o strstr.o rwsem.o
+ memcpy.o strstr.o
obj-$(CONFIG_X86_USE_3DNOW) += mmx.o
obj-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
diff -uNr linux-2.4.4-pre6/arch/i386/lib/rwsem.S linux/arch/i386/lib/rwsem.S
--- linux-2.4.4-pre6/arch/i386/lib/rwsem.S Sat Apr 21 21:24:25 2001
+++ linux/arch/i386/lib/rwsem.S Thu Jan 1 01:00:00 1970
@@ -1,36 +0,0 @@
-/* rwsem.S: R/W semaphores, register saving wrapper function stubs
- *
- * Written by David Howells (dhowells@redhat.com).
- * Derived from arch/i386/kernel/semaphore.c
- */
-
-.text
-.align 4
-.globl __rwsem_down_read_failed
-__rwsem_down_read_failed:
- pushl %edx
- pushl %ecx
- call rwsem_down_read_failed
- popl %ecx
- popl %edx
- ret
-
-.align 4
-.globl __rwsem_down_write_failed
-__rwsem_down_write_failed:
- pushl %edx
- pushl %ecx
- call rwsem_down_write_failed
- popl %ecx
- popl %edx
- ret
-
-.align 4
-.globl __rwsem_wake
-__rwsem_wake:
- pushl %edx
- pushl %ecx
- call rwsem_wake
- popl %ecx
- popl %edx
- ret
diff -uNr linux-2.4.4-pre6/include/asm-i386/rwsem.h linux/include/asm-i386/rwsem.h
--- linux-2.4.4-pre6/include/asm-i386/rwsem.h Sat Apr 21 21:24:32 2001
+++ linux/include/asm-i386/rwsem.h Sun Apr 22 00:54:15 2001
@@ -17,11 +17,6 @@
#include <linux/list.h>
#include <linux/spinlock.h>
-/* we use FASTCALL convention for the helpers */
-extern struct rw_semaphore *FASTCALL(__rwsem_down_read_failed(struct rw_semaphore *sem));
-extern struct rw_semaphore *FASTCALL(__rwsem_down_write_failed(struct rw_semaphore *sem));
-extern struct rw_semaphore *FASTCALL(__rwsem_wake(struct rw_semaphore *sem));
-
struct rwsem_waiter;
/*
@@ -41,11 +36,6 @@
#if RWSEM_DEBUG
int debug;
#endif
-#if RWSEM_DEBUG_MAGIC
- long __magic;
- atomic_t readers;
- atomic_t writers;
-#endif
};
/*
@@ -56,15 +46,10 @@
#else
#define __RWSEM_DEBUG_INIT /* */
#endif
-#if RWSEM_DEBUG_MAGIC
-#define __RWSEM_DEBUG_MINIT(name) , (int)&(name).__magic, ATOMIC_INIT(0), ATOMIC_INIT(0)
-#else
-#define __RWSEM_DEBUG_MINIT(name) /* */
-#endif
#define __RWSEM_INITIALIZER(name) \
{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, NULL, &(name).wait_front \
- __RWSEM_DEBUG_INIT __RWSEM_DEBUG_MINIT(name) }
+ __RWSEM_DEBUG_INIT }
#define DECLARE_RWSEM(name) \
struct rw_semaphore name = __RWSEM_INITIALIZER(name)
@@ -78,11 +63,6 @@
#if RWSEM_DEBUG
sem->debug = 0;
#endif
-#if RWSEM_DEBUG_MAGIC
- sem->__magic = (long)&sem->__magic;
- atomic_set(&sem->readers, 0);
- atomic_set(&sem->writers, 0);
-#endif
}
/*
@@ -97,7 +77,11 @@
"1:\n\t"
".section .text.lock,\"ax\"\n"
"2:\n\t"
- " call __rwsem_down_read_failed\n\t"
+ " pushl %%ecx\n\t"
+ " pushl %%edx\n\t"
+ " call rwsem_down_read_failed\n\t"
+ " popl %%edx\n\t"
+ " popl %%ecx\n\t"
" jmp 1b\n"
".previous"
"# ending down_read\n\t"
@@ -116,17 +100,19 @@
tmp = RWSEM_ACTIVE_WRITE_BIAS;
__asm__ __volatile__(
"# beginning down_write\n\t"
-LOCK_PREFIX " xadd %0,(%%eax)\n\t" /* subtract 0x00010001, returns the old value */
+LOCK_PREFIX " xadd %0,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */
" testl %0,%0\n\t" /* was the count 0 before? */
" jnz 2f\n\t" /* jump if we weren't granted the lock */
"1:\n\t"
".section .text.lock,\"ax\"\n"
"2:\n\t"
- " call __rwsem_down_write_failed\n\t"
+ " pushl %%ecx\n\t"
+ " call rwsem_down_write_failed\n\t"
+ " popl %%ecx\n\t"
" jmp 1b\n"
".previous\n"
"# ending down_write"
- : "+r"(tmp), "=m"(sem->count)
+ : "+d"(tmp), "=m"(sem->count)
: "a"(sem), "m"(sem->count)
: "memory");
}
@@ -136,26 +122,23 @@
*/
static inline void __up_read(struct rw_semaphore *sem)
{
- int tmp;
-
- tmp = -RWSEM_ACTIVE_READ_BIAS;
__asm__ __volatile__(
"# beginning __up_read\n\t"
-LOCK_PREFIX " xadd %0,(%%eax)\n\t" /* subtracts 1, returns the old value */
+LOCK_PREFIX " xadd %%eax,(%%edx)\n\t" /* subtracts 1, returns the old value */
" js 2f\n\t" /* jump if the lock is being waited upon */
"1:\n\t"
".section .text.lock,\"ax\"\n"
"2:\n\t"
- " decl %0\n\t" /* xadd gave us the old count */
- " testl %3,%0\n\t" /* do nothing if still outstanding active readers */
+ " decl %%eax\n\t" /* xadd gave us the old count */
+ " testl %3,%%eax\n\t" /* do nothing if still outstanding active readers */
" jnz 1b\n\t"
- " call __rwsem_wake\n\t"
+ " call rwsem_up_read_wake\n\t"
" jmp 1b\n"
".previous\n"
"# ending __up_read\n"
- : "+r"(tmp), "=m"(sem->count)
- : "a"(sem), "i"(RWSEM_ACTIVE_MASK), "m"(sem->count)
- : "memory");
+ : "=m"(sem->count)
+ : "d"(sem), "a"(-RWSEM_ACTIVE_READ_BIAS), "i"(RWSEM_ACTIVE_MASK), "m"(sem->count)
+ : "memory", "ecx");
}
/*
@@ -165,21 +148,32 @@
{
__asm__ __volatile__(
"# beginning __up_write\n\t"
-LOCK_PREFIX " addl %2,(%%eax)\n\t" /* adds 0x0000ffff */
- " js 2f\n\t" /* jump if the lock is being waited upon */
+LOCK_PREFIX " cmpxchgl %%ecx,(%%edx)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */
+ " jnz 2f\n\t" /* jump if the lock is being waited upon */
"1:\n\t"
".section .text.lock,\"ax\"\n"
"2:\n\t"
- " call __rwsem_wake\n\t"
+ " call rwsem_up_write_wake\n\t"
" jmp 1b\n"
".previous\n"
"# ending __up_write\n"
: "=m"(sem->count)
- : "a"(sem), "i"(-RWSEM_ACTIVE_WRITE_BIAS), "m"(sem->count)
+ : "d"(sem), "a"(RWSEM_ACTIVE_WRITE_BIAS), "c"(0), "m"(sem->count)
: "memory");
}
/*
+ * implement atomic add functionality
+ */
+static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
+{
+ __asm__ __volatile__(
+LOCK_PREFIX "addl %1,%0"
+ :"=m"(sem->count)
+ :"ir"(delta), "m"(sem->count));
+}
+
+/*
* implement exchange and add functionality
*/
static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
@@ -187,9 +181,9 @@
int tmp = delta;
__asm__ __volatile__(
- LOCK_PREFIX "xadd %0,(%1)"
- : "+r"(tmp)
- : "r"(sem)
+LOCK_PREFIX "xadd %0,(%2)"
+ : "+r"(tmp), "=m"(sem->count)
+ : "r"(sem), "m"(sem->count)
: "memory");
return tmp+delta;
@@ -200,7 +194,31 @@
*/
static inline __u16 rwsem_cmpxchgw(struct rw_semaphore *sem, __u16 old, __u16 new)
{
- return cmpxchg((__u16*)&sem->count,0,RWSEM_ACTIVE_BIAS);
+ __u16 tmp = old;
+
+ __asm__ __volatile__(
+LOCK_PREFIX "cmpxchgw %w2,%3"
+ : "=a"(tmp), "=m"(sem->count)
+ : "r"(new), "m1"(sem->count), "a"(tmp)
+ : "memory");
+
+ return tmp;
+}
+
+/*
+ * implement compare and exchange functionality on the rw-semaphore count
+ */
+static inline signed long rwsem_cmpxchg(struct rw_semaphore *sem, signed long old, signed long new)
+{
+ signed long tmp = old;
+
+ __asm__ __volatile__(
+LOCK_PREFIX "cmpxchgl %2,%3"
+ : "=a"(tmp), "=m"(sem->count)
+ : "r"(new), "m1"(sem->count), "a"(tmp)
+ : "memory");
+
+ return tmp;
}
#endif /* __KERNEL__ */
diff -uNr linux-2.4.4-pre6/include/asm-sparc64/rwsem.h linux/include/asm-sparc64/rwsem.h
--- linux-2.4.4-pre6/include/asm-sparc64/rwsem.h Sat Apr 21 21:24:33 2001
+++ linux/include/asm-sparc64/rwsem.h Sat Apr 21 23:12:22 2001
@@ -2,7 +2,7 @@
* rwsem.h: R/W semaphores implemented using CAS
*
* Written by David S. Miller (davem@redhat.com), 2001.
- * Derived from asm-i386/rwsem-xadd.h
+ * Derived from asm-i386/rwsem.h
*/
#ifndef _SPARC64_RWSEM_H
#define _SPARC64_RWSEM_H
@@ -127,14 +127,15 @@
"save %%sp, -160, %%sp\n\t"
"mov %%g2, %%l2\n\t"
"mov %%g3, %%l3\n\t"
+ " mov %%g7, %%o0\n\t"
"call %1\n\t"
- " mov %%g5, %%o0\n\t"
+ " mov %%g5, %%o1\n\t"
"mov %%l2, %%g2\n\t"
"ba,pt %%xcc, 2b\n\t"
" restore %%l3, %%g0, %%g3\n\t"
".previous\n\t"
"! ending __up_read"
- : : "r" (sem), "i" (rwsem_wake),
+ : : "r" (sem), "i" (rwsem_up_read_wake),
"i" (RWSEM_ACTIVE_MASK)
: "g1", "g5", "g7", "memory", "cc");
}
@@ -145,31 +146,28 @@
"! beginning __up_write\n\t"
"sethi %%hi(%2), %%g1\n\t"
"or %%g1, %%lo(%2), %%g1\n"
- "1:\tlduw [%0], %%g5\n\t"
- "sub %%g5, %%g1, %%g7\n\t"
- "cas [%0], %%g5, %%g7\n\t"
- "cmp %%g5, %%g7\n\t"
- "bne,pn %%icc, 1b\n\t"
- " sub %%g7, %%g1, %%g7\n\t"
- "cmp %%g7, 0\n\t"
- "bl,pn %%icc, 3f\n\t"
+ "sub %%g5, %%g5, %%g5\n\t"
+ "cas [%0], %%g1, %%g5\n\t"
+ "cmp %%g1, %%g5\n\t"
+ "bne,pn %%icc, 1f\n\t"
" membar #StoreStore\n"
"2:\n\t"
".subsection 2\n"
- "3:\tmov %0, %%g5\n\t"
+ "3:\tmov %0, %%g1\n\t"
"save %%sp, -160, %%sp\n\t"
"mov %%g2, %%l2\n\t"
"mov %%g3, %%l3\n\t"
+ "mov %%g1, %%o0\n\t"
"call %1\n\t"
- " mov %%g5, %%o0\n\t"
+ " mov %%g5, %%o1\n\t"
"mov %%l2, %%g2\n\t"
"ba,pt %%xcc, 2b\n\t"
" restore %%l3, %%g0, %%g3\n\t"
".previous\n\t"
"! ending __up_write"
- : : "r" (sem), "i" (rwsem_wake),
+ : : "r" (sem), "i" (rwsem_up_write_wake),
"i" (RWSEM_ACTIVE_WRITE_BIAS)
- : "g1", "g5", "g7", "memory", "cc");
+ : "g1", "g5", "memory", "cc");
}
static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
@@ -191,6 +189,8 @@
return tmp + delta;
}
+#define rwsem_atomic_add rwsem_atomic_update
+
static inline __u16 rwsem_cmpxchgw(struct rw_semaphore *sem, __u16 __old, __u16 __new)
{
u32 old = (sem->count & 0xffff0000) | (u32) __old;
@@ -212,6 +212,11 @@
goto again;
return prev & 0xffff;
+}
+
+static inline signed long rwsem_cmpxchg(struct rw_semaphore *sem, signed long old, signed long new)
+{
+ return cmpxchg(&sem->count,old,new);
}
#endif /* __KERNEL__ */
diff -uNr linux-2.4.4-pre6/include/linux/rwsem-spinlock.h linux/include/linux/rwsem-spinlock.h
--- linux-2.4.4-pre6/include/linux/rwsem-spinlock.h Sat Apr 21 21:24:33 2001
+++ linux/include/linux/rwsem-spinlock.h Sun Apr 22 00:54:15 2001
@@ -22,24 +22,14 @@
* the semaphore definition
*/
struct rw_semaphore {
- signed long count;
-#define RWSEM_UNLOCKED_VALUE 0x00000000
-#define RWSEM_ACTIVE_BIAS 0x00000001
-#define RWSEM_ACTIVE_MASK 0x0000ffff
-#define RWSEM_WAITING_BIAS (-0x00010000)
-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
+ __u32 active;
+ __u32 waiting;
spinlock_t wait_lock;
struct rwsem_waiter *wait_front;
struct rwsem_waiter **wait_back;
#if RWSEM_DEBUG
int debug;
#endif
-#if RWSEM_DEBUG_MAGIC
- long __magic;
- atomic_t readers;
- atomic_t writers;
-#endif
};
/*
@@ -50,119 +40,18 @@
#else
#define __RWSEM_DEBUG_INIT /* */
#endif
-#if RWSEM_DEBUG_MAGIC
-#define __RWSEM_DEBUG_MINIT(name) , (int)&(name).__magic, ATOMIC_INIT(0), ATOMIC_INIT(0)
-#else
-#define __RWSEM_DEBUG_MINIT(name) /* */
-#endif
#define __RWSEM_INITIALIZER(name) \
-{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, NULL, &(name).wait_front \
- __RWSEM_DEBUG_INIT __RWSEM_DEBUG_MINIT(name) }
+{ 0, 0, SPIN_LOCK_UNLOCKED, NULL, &(name).wait_front __RWSEM_DEBUG_INIT }
#define DECLARE_RWSEM(name) \
struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- sem->wait_front = NULL;
- sem->wait_back = &sem->wait_front;
-#if RWSEM_DEBUG
- sem->debug = 0;
-#endif
-#if RWSEM_DEBUG_MAGIC
- sem->__magic = (long)&sem->__magic;
- atomic_set(&sem->readers, 0);
- atomic_set(&sem->writers, 0);
-#endif
-}
-
-/*
- * lock for reading
- */
-static inline void __down_read(struct rw_semaphore *sem)
-{
- int count;
- spin_lock(&sem->wait_lock);
- sem->count += RWSEM_ACTIVE_READ_BIAS;
- count = sem->count;
- spin_unlock(&sem->wait_lock);
- if (count<0)
- rwsem_down_read_failed(sem);
-}
-
-/*
- * lock for writing
- */
-static inline void __down_write(struct rw_semaphore *sem)
-{
- int count;
- spin_lock(&sem->wait_lock);
- count = sem->count;
- sem->count += RWSEM_ACTIVE_WRITE_BIAS;
- spin_unlock(&sem->wait_lock);
- if (count)
- rwsem_down_write_failed(sem);
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
- int count;
- spin_lock(&sem->wait_lock);
- count = sem->count;
- sem->count -= RWSEM_ACTIVE_READ_BIAS;
- spin_unlock(&sem->wait_lock);
- if (count<0 && !((count-RWSEM_ACTIVE_READ_BIAS)&RWSEM_ACTIVE_MASK))
- rwsem_wake(sem);
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
- int count;
- spin_lock(&sem->wait_lock);
- sem->count -= RWSEM_ACTIVE_WRITE_BIAS;
- count = sem->count;
- spin_unlock(&sem->wait_lock);
- if (count<0)
- rwsem_wake(sem);
-}
-
-/*
- * implement exchange and add functionality
- * - only called when spinlock is already held
- */
-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
-{
- int count;
-
- sem->count += delta;
- count = sem->count;
-
- return count;
-}
-
-/*
- * implement compare and exchange functionality on the rw-semaphore count LSW
- * - only called by __rwsem_do_wake(), so spinlock is already held when called
- */
-static inline __u16 rwsem_cmpxchgw(struct rw_semaphore *sem, __u16 old, __u16 new)
-{
- __u16 prev;
-
- prev = sem->count & RWSEM_ACTIVE_MASK;
- if (prev==old)
- sem->count = (sem->count & ~RWSEM_ACTIVE_MASK) | new;
-
- return prev;
-}
+extern void FASTCALL(init_rwsem(struct rw_semaphore *sem));
+extern void FASTCALL(__down_read(struct rw_semaphore *sem));
+extern void FASTCALL(__down_write(struct rw_semaphore *sem));
+extern void FASTCALL(__up_read(struct rw_semaphore *sem));
+extern void FASTCALL(__up_write(struct rw_semaphore *sem));
#endif /* __KERNEL__ */
#endif /* _LINUX_RWSEM_SPINLOCK_H */
diff -uNr linux-2.4.4-pre6/include/linux/rwsem.h linux/include/linux/rwsem.h
--- linux-2.4.4-pre6/include/linux/rwsem.h Sat Apr 21 21:24:33 2001
+++ linux/include/linux/rwsem.h Sun Apr 22 00:54:15 2001
@@ -34,7 +34,6 @@
#include <linux/linkage.h>
#define RWSEM_DEBUG 0
-#define RWSEM_DEBUG_MAGIC 0
#ifdef __KERNEL__
@@ -47,11 +46,12 @@
/* defined contention handler functions for the generic case
* - these are also used for the exchange-and-add based algorithm
*/
-#if defined(CONFIG_RWSEM_GENERIC_SPINLOCK) || defined(CONFIG_RWSEM_XCHGADD_ALGORITHM)
+#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
/* we use FASTCALL convention for the helpers */
extern struct rw_semaphore *FASTCALL(rwsem_down_read_failed(struct rw_semaphore *sem));
extern struct rw_semaphore *FASTCALL(rwsem_down_write_failed(struct rw_semaphore *sem));
-extern struct rw_semaphore *FASTCALL(rwsem_wake(struct rw_semaphore *sem));
+extern void FASTCALL(rwsem_up_read_wake(signed long, struct rw_semaphore *));
+extern void FASTCALL(rwsem_up_write_wake(signed long, struct rw_semaphore *));
#endif
#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
@@ -74,20 +74,7 @@
static inline void down_read(struct rw_semaphore *sem)
{
rwsemtrace(sem,"Entering down_read");
-
-#if RWSEM_DEBUG_MAGIC
- if (sem->__magic != (long)&sem->__magic)
- BUG();
-#endif
-
__down_read(sem);
-
-#if RWSEM_DEBUG_MAGIC
- if (atomic_read(&sem->writers))
- BUG();
- atomic_inc(&sem->readers);
-#endif
-
rwsemtrace(sem,"Leaving down_read");
}
@@ -97,22 +84,7 @@
static inline void down_write(struct rw_semaphore *sem)
{
rwsemtrace(sem,"Entering down_write");
-
-#if RWSEM_DEBUG_MAGIC
- if (sem->__magic != (long)&sem->__magic)
- BUG();
-#endif
-
__down_write(sem);
-
-#if RWSEM_DEBUG_MAGIC
- if (atomic_read(&sem->writers))
- BUG();
- if (atomic_read(&sem->readers))
- BUG();
- atomic_inc(&sem->writers);
-#endif
-
rwsemtrace(sem,"Leaving down_write");
}
@@ -122,14 +94,7 @@
static inline void up_read(struct rw_semaphore *sem)
{
rwsemtrace(sem,"Entering up_read");
-
-#if RWSEM_DEBUG_MAGIC
- if (atomic_read(&sem->writers))
- BUG();
- atomic_dec(&sem->readers);
-#endif
__up_read(sem);
-
rwsemtrace(sem,"Leaving up_read");
}
@@ -139,16 +104,7 @@
static inline void up_write(struct rw_semaphore *sem)
{
rwsemtrace(sem,"Entering up_write");
-
-#if RWSEM_DEBUG_MAGIC
- if (atomic_read(&sem->readers))
- BUG();
- if (atomic_read(&sem->writers) != 1)
- BUG();
- atomic_dec(&sem->writers);
-#endif
__up_write(sem);
-
rwsemtrace(sem,"Leaving up_write");
}
diff -uNr linux-2.4.4-pre6/lib/Makefile linux/lib/Makefile
--- linux-2.4.4-pre6/lib/Makefile Sat Apr 21 21:24:33 2001
+++ linux/lib/Makefile Sun Apr 22 00:07:33 2001
@@ -8,14 +8,12 @@
L_TARGET := lib.a
-export-objs := cmdline.o
+export-objs := cmdline.o rwsem-spinlock.o rwsem.o
obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o
-ifneq ($(CONFIG_RWSEM_GENERIC_SPINLOCK)$(CONFIG_RWSEM_XCHGADD_ALGORITHM),nn)
-export-objs += rwsem.o
-obj-y += rwsem.o
-endif
+obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
+obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
obj-y += dec_and_lock.o
diff -uNr linux-2.4.4-pre6/lib/rwsem-spinlock.c linux/lib/rwsem-spinlock.c
--- linux-2.4.4-pre6/lib/rwsem-spinlock.c Thu Jan 1 01:00:00 1970
+++ linux/lib/rwsem-spinlock.c Sun Apr 22 00:58:45 2001
@@ -0,0 +1,245 @@
+/* rwsem-spinlock.c: R/W semaphores: contention handling functions for generic spinlock
+ * implementation
+ *
+ * Copyright (c) 2001 David Howells (dhowells@redhat.com).
+ */
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+
+struct rwsem_waiter {
+ struct rwsem_waiter *next;
+ struct task_struct *task;
+ unsigned int flags;
+#define RWSEM_WAITING_FOR_READ 0x00000001
+#define RWSEM_WAITING_FOR_WRITE 0x00000002
+};
+
+#if RWSEM_DEBUG
+void rwsemtrace(struct rw_semaphore *sem, const char *str)
+{
+ if (sem->debug)
+ printk("[%d] %s({%d,%d})\n",current->pid,str,sem->active,sem->waiting);
+}
+#endif
+
+/*
+ * initialise the semaphore
+ */
+void init_rwsem(struct rw_semaphore *sem)
+{
+ sem->active = 0;
+ sem->waiting = 0;
+ spin_lock_init(&sem->wait_lock);
+ sem->wait_front = NULL;
+ sem->wait_back = &sem->wait_front;
+#if RWSEM_DEBUG
+ sem->debug = 0;
+#endif
+}
+
+/*
+ * handle the lock being released whilst there are processes blocked on it that can now run
+ * - if we come here, then:
+ * - the 'active count' _reached_ zero
+ * - the 'waiting count' is non-zero
+ * - the spinlock must be held by the caller
+ * - woken process blocks are discarded from the list after having flags zeroised
+ */
+static inline struct rw_semaphore *__rwsem_do_wake(struct rw_semaphore *sem)
+{
+ struct rwsem_waiter *waiter, *next;
+ int woken, loop;
+
+ rwsemtrace(sem,"Entering __rwsem_do_wake");
+
+ waiter = sem->wait_front;
+
+ if (__builtin_expect(!waiter,0))
+ goto list_unexpectedly_empty;
+
+ next = NULL;
+
+ /* try to grant a single write lock if there's a writer at the front of the queue
+ * - we leave the 'waiting count' incremented to signify potential contention
+ */
+ if (waiter->flags & RWSEM_WAITING_FOR_WRITE) {
+ sem->active++;
+ next = waiter->next;
+ waiter->flags = 0;
+ wake_up_process(waiter->task);
+ goto discard_woken_processes;
+ }
+
+ /* grant an infinite number of read locks to the readers at the front of the queue */
+ woken = 0;
+ do {
+ woken++;
+ waiter = waiter->next;
+ } while (waiter && waiter->flags&RWSEM_WAITING_FOR_READ);
+
+ sem->active += woken;
+ sem->waiting -= woken;
+
+ waiter = sem->wait_front;
+ for (loop=woken; loop>0; loop--) {
+ next = waiter->next;
+ waiter->flags = 0;
+ wake_up_process(waiter->task);
+ waiter = next;
+ }
+
+ discard_woken_processes:
+ sem->wait_front = next;
+ if (!next) sem->wait_back = &sem->wait_front;
+
+ out:
+ rwsemtrace(sem,"Leaving __rwsem_do_wake");
+ return sem;
+
+ list_unexpectedly_empty:
+ printk("__rwsem_do_wake(): wait_list unexpectedly empty\n");
+ printk("[%d] %p = { %d, %d })\n",current->pid,sem,sem->active,sem->waiting);
+ BUG();
+ goto out;
+}
+
+/*
+ * get a read lock on the semaphore
+ */
+void __down_read(struct rw_semaphore *sem)
+{
+ struct rwsem_waiter waiter;
+ struct task_struct *tsk = current;
+
+ rwsemtrace(sem,"Entering __down_read");
+
+ spin_lock(&sem->wait_lock);
+
+ if (!sem->waiting) {
+ /* granted */
+ sem->active++;
+ spin_unlock(&sem->wait_lock);
+ goto out;
+ }
+ sem->waiting++;
+
+ set_task_state(tsk,TASK_UNINTERRUPTIBLE);
+
+ /* set up my own style of waitqueue */
+ waiter.next = NULL;
+ waiter.task = tsk;
+ waiter.flags = RWSEM_WAITING_FOR_READ;
+
+ *sem->wait_back = &waiter; /* add to back of queue */
+ sem->wait_back = &waiter.next;
+
+ /* we don't need to touch the semaphore struct anymore */
+ spin_unlock(&sem->wait_lock);
+
+ /* wait to be given the lock */
+ for (;;) {
+ if (!waiter.flags)
+ break;
+ schedule();
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ }
+
+ tsk->state = TASK_RUNNING;
+
+ out:
+ rwsemtrace(sem,"Leaving __down_read");
+}
+
+/*
+ * get a write lock on the semaphore
+ * - note that we increment the waiting count anyway to indicate an exclusive lock
+ */
+void __down_write(struct rw_semaphore *sem)
+{
+ struct rwsem_waiter waiter;
+ struct task_struct *tsk = current;
+
+ rwsemtrace(sem,"Entering __down_write");
+
+ spin_lock(&sem->wait_lock);
+
+ if (!sem->waiting && !sem->active) {
+ /* granted */
+ sem->active++;
+ sem->waiting++;
+ spin_unlock(&sem->wait_lock);
+ goto out;
+ }
+ sem->waiting++;
+
+ set_task_state(tsk,TASK_UNINTERRUPTIBLE);
+
+ /* set up my own style of waitqueue */
+ waiter.next = NULL;
+ waiter.task = tsk;
+ waiter.flags = RWSEM_WAITING_FOR_WRITE;
+
+ *sem->wait_back = &waiter; /* add to back of queue */
+ sem->wait_back = &waiter.next;
+
+ /* we don't need to touch the semaphore struct anymore */
+ spin_unlock(&sem->wait_lock);
+
+ /* wait to be given the lock */
+ for (;;) {
+ if (!waiter.flags)
+ break;
+ schedule();
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ }
+
+ tsk->state = TASK_RUNNING;
+
+ out:
+ rwsemtrace(sem,"Leaving __down_write");
+}
+
+/*
+ * release a read lock on the semaphore
+ */
+void __up_read(struct rw_semaphore *sem)
+{
+ rwsemtrace(sem,"Entering __up_read");
+
+ spin_lock(&sem->wait_lock);
+
+ if (--sem->active==0 && sem->waiting)
+ __rwsem_do_wake(sem);
+
+ spin_unlock(&sem->wait_lock);
+
+ rwsemtrace(sem,"Leaving __up_read");
+}
+
+/*
+ * release a write lock on the semaphore
+ */
+void __up_write(struct rw_semaphore *sem)
+{
+ rwsemtrace(sem,"Entering __up_write");
+
+ spin_lock(&sem->wait_lock);
+
+ sem->waiting--;
+ if (--sem->active==0 && sem->waiting)
+ __rwsem_do_wake(sem);
+
+ spin_unlock(&sem->wait_lock);
+
+ rwsemtrace(sem,"Leaving __up_write");
+}
+
+EXPORT_SYMBOL(init_rwsem);
+EXPORT_SYMBOL(__down_read);
+EXPORT_SYMBOL(__down_write);
+EXPORT_SYMBOL(__up_read);
+EXPORT_SYMBOL(__up_write);
+#if RWSEM_DEBUG
+EXPORT_SYMBOL(rwsemtrace);
+#endif
diff -uNr linux-2.4.4-pre6/lib/rwsem.c linux/lib/rwsem.c
--- linux-2.4.4-pre6/lib/rwsem.c Sat Apr 21 21:24:33 2001
+++ linux/lib/rwsem.c Sun Apr 22 00:32:26 2001
@@ -14,57 +14,36 @@
#define RWSEM_WAITING_FOR_READ 0x00000001
#define RWSEM_WAITING_FOR_WRITE 0x00000002
};
-#define RWSEM_WAITER_MAGIC 0x52575345
-
-static struct rw_semaphore *FASTCALL(__rwsem_do_wake(struct rw_semaphore *sem));
#if RWSEM_DEBUG
void rwsemtrace(struct rw_semaphore *sem, const char *str)
{
if (sem->debug)
- printk("[%d] %s(count=%08lx)\n",current->pid,str,sem->count);
+ printk("[%d] %s({%08lx})\n",current->pid,str,sem->count);
}
#endif
/*
* handle the lock being released whilst there are processes blocked on it that can now run
+ * - the caller can specify an adjustment that will need to be made to the semaphore count to
+ * help reduce the number of atomic operations invoked
* - if we come here, then:
- * - the 'active part' of the count (&0x0000ffff) reached zero (but may no longer be zero)
+ * - the 'active part' of the count (&0x0000ffff) reached zero but has been re-incremented
* - the 'waiting part' of the count (&0xffff0000) is negative (and will still be so)
- * - the spinlock must be held before entry
- * - woken process blocks are discarded from the list after having flags zeroised
+ * - the spinlock must be held by the caller
+ * - woken process blocks are discarded from the list after having flags zeroised
*/
-static struct rw_semaphore *__rwsem_do_wake(struct rw_semaphore *sem)
+static inline struct rw_semaphore *__rwsem_do_wake(int adjustment, struct rw_semaphore *sem)
{
struct rwsem_waiter *waiter, *next;
int woken, loop;
rwsemtrace(sem,"Entering __rwsem_do_wake");
- /* try to grab an 'activity' marker
- * - need to make sure two copies of rwsem_wake() don't do this for two separate processes
- * simultaneously
- * - be horribly naughty, and only deal with the LSW of the atomic counter
- */
- if (rwsem_cmpxchgw(sem,0,RWSEM_ACTIVE_BIAS)!=0) {
- rwsemtrace(sem,"__rwsem_do_wake: abort wakeup due to renewed activity");
- goto out;
- }
-
- /* check the wait queue is populated */
waiter = sem->wait_front;
- if (__builtin_expect(!waiter,0)) {
- printk("__rwsem_do_wake(): wait_list unexpectedly empty\n");
- BUG();
- goto out;
- }
-
- if (__builtin_expect(!waiter->flags,0)) {
- printk("__rwsem_do_wake(): wait_list front apparently not waiting\n");
- BUG();
- goto out;
- }
+ if (__builtin_expect(!waiter,0))
+ goto list_unexpectedly_empty;
next = NULL;
@@ -73,6 +52,8 @@
* incremented by 0x00010000
*/
if (waiter->flags & RWSEM_WAITING_FOR_WRITE) {
+ if (adjustment)
+ rwsem_atomic_add(adjustment,sem);
next = waiter->next;
waiter->flags = 0;
wake_up_process(waiter->task);
@@ -92,7 +73,8 @@
loop = woken;
woken *= RWSEM_ACTIVE_BIAS-RWSEM_WAITING_BIAS;
woken -= RWSEM_ACTIVE_BIAS;
- rwsem_atomic_update(woken,sem);
+ woken += adjustment;
+ rwsem_atomic_add(woken,sem);
waiter = sem->wait_front;
for (; loop>0; loop--) {
@@ -109,6 +91,12 @@
out:
rwsemtrace(sem,"Leaving __rwsem_do_wake");
return sem;
+
+ list_unexpectedly_empty:
+ printk("__rwsem_do_wake(): wait_list unexpectedly empty\n");
+ printk("[%d] %p = { %08lx })\n",current->pid,sem,sem->count);
+ BUG();
+ goto out;
}
/*
@@ -123,7 +111,7 @@
signed long count;
rwsemtrace(sem,"Entering rwsem_down_read_failed");
-
+
set_task_state(tsk,TASK_UNINTERRUPTIBLE);
/* set up my own style of waitqueue */
@@ -141,9 +129,11 @@
/* if there are no longer active locks, wake the front queued process(es) up
* - it might even be this process, since the waker takes a more active part
+ * - should only enter __rwsem_do_wake() only on a transition 0->1 in the LSW
*/
if (!(count & RWSEM_ACTIVE_MASK))
- __rwsem_do_wake(sem);
+ if (rwsem_cmpxchgw(sem,0,RWSEM_ACTIVE_BIAS)==0)
+ __rwsem_do_wake(0,sem);
spin_unlock(&sem->wait_lock);
@@ -189,9 +179,11 @@
/* if there are no longer active locks, wake the front queued process(es) up
* - it might even be this process, since the waker takes a more active part
+ * - should only enter __rwsem_do_wake() only on a transition 0->1 in the LSW
*/
if (!(count & RWSEM_ACTIVE_MASK))
- __rwsem_do_wake(sem);
+ if (rwsem_cmpxchgw(sem,0,RWSEM_ACTIVE_BIAS)==0)
+ __rwsem_do_wake(0,sem);
spin_unlock(&sem->wait_lock);
@@ -210,25 +202,64 @@
}
/*
- * spinlock grabbing wrapper for __rwsem_do_wake()
+ * handle up_read() finding a waiter on the semaphore
+ * - up_read has decremented the active part of the count if we come here
*/
-struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
+void rwsem_up_read_wake(signed long count, struct rw_semaphore *sem)
{
- rwsemtrace(sem,"Entering rwsem_wake");
+ rwsemtrace(sem,"Entering rwsem_up_read_wake");
spin_lock(&sem->wait_lock);
- sem = __rwsem_do_wake(sem);
+ /* need to wake up a waiter unless the semaphore has gone active again
+ * - should only enter __rwsem_do_wake() only on a transition 0->1 in the LSW
+ */
+ if (rwsem_cmpxchgw(sem,0,RWSEM_ACTIVE_BIAS)==0)
+ sem = __rwsem_do_wake(0,sem);
spin_unlock(&sem->wait_lock);
- rwsemtrace(sem,"Leaving rwsem_wake");
- return sem;
+ rwsemtrace(sem,"Leaving rwsem_up_read_wake");
+}
+
+/*
+ * handle up_write() finding a waiter on the semaphore
+ * - up_write has not modified the count if we come here
+ */
+void rwsem_up_write_wake(signed long count, struct rw_semaphore *sem)
+{
+ signed long new;
+
+ rwsemtrace(sem,"Entering rwsem_up_write_wake");
+
+ spin_lock(&sem->wait_lock);
+
+ try_again:
+ /* if the active part of the count is 1, we should perform a wake-up, else we should
+ * decrement the count and return
+ */
+ if ((count&RWSEM_ACTIVE_MASK)==RWSEM_ACTIVE_BIAS) {
+ sem = __rwsem_do_wake(-RWSEM_WAITING_BIAS,sem);
+ }
+ else {
+ /* tricky - we mustn't return the active part of the count to 0 */
+ new = count - RWSEM_ACTIVE_WRITE_BIAS;
+ new = rwsem_cmpxchg(sem,count,new);
+ if (count!=new) {
+ count = new;
+ goto try_again;
+ }
+ }
+
+ spin_unlock(&sem->wait_lock);
+
+ rwsemtrace(sem,"Leaving rwsem_up_write_wake");
}
EXPORT_SYMBOL(rwsem_down_read_failed);
EXPORT_SYMBOL(rwsem_down_write_failed);
-EXPORT_SYMBOL(rwsem_wake);
+EXPORT_SYMBOL(rwsem_up_read_wake);
+EXPORT_SYMBOL(rwsem_up_write_wake);
#if RWSEM_DEBUG
EXPORT_SYMBOL(rwsemtrace);
#endif
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH] rw_semaphores, optimisations
2001-04-22 0:27 [PATCH] rw_semaphores, optimisations D.W.Howells
@ 2001-04-22 19:07 ` Andrea Arcangeli
2001-04-22 19:16 ` Andrea Arcangeli
0 siblings, 1 reply; 6+ messages in thread
From: Andrea Arcangeli @ 2001-04-22 19:07 UTC (permalink / raw)
To: D . W . Howells; +Cc: torvalds, linux-kernel, dhowells, davem
[-- Attachment #1: Type: text/plain, Size: 4826 bytes --]
On Sun, Apr 22, 2001 at 01:27:20AM +0100, D . W . Howells wrote:
> This patch (made against linux-2.4.4-pre6) makes a number of changes to the
> rwsem implementation:
>
> (1) Fixes a subtle contention bug between up_write and the down_* functions.
>
> (2) Optimises the i386 fastpath implementation and changed the slowpath
> implementation to aid it.
> - The arch/i386/lib/rwsem.c is now gone.
> - Better inline asm constraints have been applied.
>
> (3) Changed the sparc64 fastpath implementation to use revised slowpath
> interface.
> [Dave Miller: can you check this please]
>
> (4) Makes the generic spinlock implementation non-inline.
> - lib/rwsem.c has been duplicated to lib/rwsem-spinlock.c and a
> slightly different algorithm has been created. This one is simpler
> since it does not have to use atomic operations on the counters as
> all accesses to them are governed by a blanket spinlock.
I finally finished dropping the list_empty() check from my generic rwsemaphores.
The new behaviour of my rwsemaphores are:
- max sleepers and max simultanous readers both downgraded to 2^(BITS_PER_LONG>>1) for
higher performance in the fast path
- up_* cannot be recalled from irq/softirq context anymore
I'm using this your latest patch on top of vanilla 2.4.4-pre6 to benchmark my
new generic rwsemaphores against yours.
To benchmark I'm using your tar.gz but I left my 50 seconds run of the rwtest
with many more readers and writers to stress it a bit more (I posted the
changes to the rwsem-rw.c test in the message where I reported the race in your
semaphores before pre6).
Here the results (I did two tries for every bench and btw the numbers are
quite stable as you can see).
rwsem-2.4.4-pre6 + my new generic rwsem (fast path in C inlined)
rw
reads taken: 6499121
writes taken: 3355701
reads taken: 6413447
writes taken: 3311328
r1
reads taken: 15218540
reads taken: 15203915
r2
reads taken: 5087253
reads taken: 5099084
ro
reads taken: 4274607
reads taken: 4280280
w1
writes taken: 14723159
writes taken: 14708296
wo
writes taken: 1778713
writes taken: 1776248
----------------------------------------------
rwsem-2.4.4-pre6 + my new generic rwsem with fast path out of line (fast path in C out of line)
rw
reads taken: 6116063
writes taken: 3157816
reads taken: 6248542
writes taken: 3226122
r1
reads taken: 14092045
reads taken: 14112771
r2
reads taken: 4002635
reads taken: 4006940
ro
reads taken: 4150747
reads taken: 4150279
w1
writes taken: 13655019
writes taken: 13639011
wo
writes taken: 1757065
writes taken: 1758623
----------------------------------------------
RWSEM_GENERIC_SPINLOCK y in rwsem-2.4.4-pre6 + your latest #try2 (fast path in C out of line)
rw
reads taken: 5872682
writes taken: 3032179
reads taken: 5892582
writes taken: 3042346
r1
reads taken: 13079190
reads taken: 13104405
r2
reads taken: 3907702
reads taken: 3906729
ro
reads taken: 3005924
reads taken: 3006690
w1
writes taken: 12581209
writes taken: 12570627
wo
writes taken: 1376782
writes taken: 1328468
----------------------------------------------
RWSEM_XCHGADD y in rwsem-2.4.4-pre6 + your latest #try2 (fast path in asm in line)
rw
reads taken: 5789650
writes taken: 2989604
reads taken: 5776594
writes taken: 2982812
r1
reads taken: 16935488
reads taken: 16930738
r2
reads taken: 5646446
reads taken: 5651600
ro
reads taken: 4952654
reads taken: 4956992
w1
writes taken: 15432874
writes taken: 15408684
wo
writes taken: 814131
writes taken: 815551
To make it more readable I plotted the numbers in a graph (attached).
So my new C based rw semaphores are faster than your C based semaphores in
all tests and they're even faster than your x86 asm inlined semaphores when
there's contention (however it's probably normal that with contention the asm
semaphores are slower so I'm not sure I can make the asm inlined semaphore
faster than yours). I will try to boost my new rwsem with asm in the fast path
now (it won't be easy but possibly by only changing a few lines of the slow
path inside an #ifdef CONFIG_...) to see where can I go. Once I will have an
asm inlined fast path for >=486 integrated I will upload a patch but if you
want a patch now with only the new generic C implementation to verify the above
numbers let me know of course (ah and btw the machine is a 2-way PII 450mhz).
I think it's interesting also the comparison of my generic rwsem when inlined
and not inlined (first four columns). BTW, I also tried to move my only lock in
a separated cacheline (growing the size of the rwsem over 32bytes) and that
hurted a lot.
Note also the your wo results with your asm version (the one in pre6 + your
latest try#2) is scoring more than two times worse than my generic semaphores.
Andrea
[-- Attachment #2: rwsem.png --]
[-- Type: image/png, Size: 6137 bytes --]
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] rw_semaphores, optimisations
2001-04-22 19:07 ` Andrea Arcangeli
@ 2001-04-22 19:16 ` Andrea Arcangeli
0 siblings, 0 replies; 6+ messages in thread
From: Andrea Arcangeli @ 2001-04-22 19:16 UTC (permalink / raw)
To: D . W . Howells; +Cc: torvalds, linux-kernel, dhowells, davem
On Sun, Apr 22, 2001 at 09:07:03PM +0200, Andrea Arcangeli wrote:
> On Sun, Apr 22, 2001 at 01:27:20AM +0100, D . W . Howells wrote:
btw, I noticed I answered your previous email but for my benchmarks I really
used your latest #try2 posted today at 13 (not last night a 1am), just to avoid
mistakes this is the the md5sum of the patch I applied on top of pre6:
510c05d168c2b60e0d9c804381579b51 rwsem.diff
Andrea
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] rw_semaphores, optimisations
@ 2001-04-22 22:52 D.W.Howells
2001-04-23 1:04 ` Andrea Arcangeli
0 siblings, 1 reply; 6+ messages in thread
From: D.W.Howells @ 2001-04-22 22:52 UTC (permalink / raw)
To: andrea; +Cc: torvalds, linux-kernel, dhowells
[-- Attachment #1: Type: text/plain, Size: 2995 bytes --]
Hello Andrea,
Interesting benchmarks... did you compile the test programs with "make
SCHED=yes" by any chance? Also what other software are you running?
The reason I ask is that running a full blown KDE setup running in the
background, I get the following numbers on the rwsem-ro test (XADD optimised
kernel):
SCHED: 4615646, 4530769, 4534453 and 4628365
no SCHED: 6311620, 6312776, 6327772 and 6325508
Also quite stable as you can see.
> (ah and btw the machine is a 2-way PII 450mhz).
Your numbers were "4274607" and "4280280" for this kernel and test This I
find a little suprising. I'd expect them to be about 10% higher than I get on
my machine given your faster CPUs.
What compiler are you using? I'm using the following:
Reading specs from /usr/lib/gcc-lib/i386-redhat-linux/2.96/specs
gcc version 2.96 20000731 (Red Hat Linux 7.1 2.96-80)
Something else that I noticed: Playing a music CD appears to improve the
benchmarks all round:-) Must be some interrupt effect of some sort, or maybe
they just like the music...
> rwsem-2.4.4-pre6 + my new generic rwsem (fast path in C inlined)
Linus wants out of line generic code only, I believe. Hence why I made my
generic code out of line.
I have noticed one glaring potential slowdown in my generic code's down
functions. I've got the following in _both_ fastpaths!:
struct task_struct *tsk = current;
It shouldn't hurt _too_ much (its only reg->reg anyway), but it will have an
effect. I'll have to move it and post another patch tomorrow.
I've also been comparing the assembly from the two generic spinlock
implementations (having out-of-lined yours in what I think is the you'd have
done it). I've noticed a number of things:
(1) My fastpaths have slightly fewer instructions in them
(2) gcc-2.96-20000731 produces what looks like much less efficient code
than gcc-snapshot-20010409 (to be expected, I suppose).
(3) Both compilers do insane things to registers (like in one instruction
moving %eax to %edx and then moving it back again in the next).
(4) If _any_ inline assembly is used, the compiler grabs extra chunks of
stack which it does not then use. It will then pop these into registers
under some circumstances. It'll also save random registers it doesn't
clobber under others.
(Basically, I had a lot of frustrating fun playing around with the spinlock
asm constraints trying to avoid the compiler clobbering registers
unnecessarily because of them).
I've attached the source file I've been playing with and an example
disassembly dump for your amusement. I used the snapshot gcc to do this (it
emits conditional chunks of code out of line more intelligently than the
older one.
It's also interesting that your generic out-of-line semaphores are faster
given the fact that you muck around with EFLAGS and CLI/STI, and I don't.
Maybe I'm getting hit by an interrupt. I'll have to play around with it and
benchmark it again.
David
[-- Attachment #2: rwsem implementations slowpath comparison C source code --]
[-- Type: text/x-plain, Size: 1989 bytes --]
/* slowpath.c: description
*
* Copyright (c) 2001 David Howells (dhowells@redhat.com).
*/
#define __KERNEL__
#include <asm/types.h>
#include <asm/current.h>
#include <asm/system.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/spinlock.h>
struct rw_semaphore_dwh {
__u32 active;
__u32 waiting;
spinlock_t wait_lock;
};
extern void FASTCALL(dwh_down_read(struct rw_semaphore_dwh *));
extern void FASTCALL(dwh_up_read(struct rw_semaphore_dwh *));
extern void FASTCALL(dwh_down_read_failed(struct rw_semaphore_dwh *,
struct task_struct *));
extern void FASTCALL(dwh__rwsem_do_wake(struct rw_semaphore_dwh *));
struct rw_semaphore_aa {
spinlock_t lock;
int count;
struct list_head wait;
};
#define RWSEM_WAITQUEUE_READ 0
extern void FASTCALL(aa_down_read(struct rw_semaphore_aa *));
extern void FASTCALL(aa_up_read(struct rw_semaphore_aa *));
extern void FASTCALL(aa_down_failed(struct rw_semaphore_aa *, int));
extern void FASTCALL(aa_rwsem_wake(struct rw_semaphore_aa *));
void dwh_down_read(struct rw_semaphore_dwh *sem)
{
struct task_struct *tsk = current;
spin_lock(&sem->wait_lock);
if (sem->waiting) {
sem->active++;
spin_unlock(&sem->wait_lock);
goto out;
}
sem->waiting++;
dwh_down_read_failed(sem,tsk);
spin_unlock(&sem->wait_lock);
out:
}
void aa_down_read(struct rw_semaphore_aa *sem)
{
spin_lock_irq(&sem->lock);
if (sem->count < 0 || !list_empty(&sem->wait))
goto slow_path;
sem->count++;
out:
spin_unlock_irq(&sem->lock);
return;
slow_path:
aa_down_failed(sem, RWSEM_WAITQUEUE_READ);
goto out;
}
void dwh_up_read(struct rw_semaphore_dwh *sem)
{
spin_lock(&sem->wait_lock);
if (--sem->active==0 && sem->waiting)
dwh__rwsem_do_wake(sem);
spin_unlock(&sem->wait_lock);
}
void aa_up_read(struct rw_semaphore_aa *sem)
{
unsigned long flags;
spin_lock_irqsave(&sem->lock, flags);
if (!--sem->count && !list_empty(&sem->wait))
aa_rwsem_wake(sem);
spin_unlock_irqrestore(&sem->lock, flags);
}
[-- Attachment #3: disassembly of compiled slowpath.c --]
[-- Type: text/plain, Size: 5480 bytes --]
slowpath.o: file format elf32-i386
Disassembly of section .text:
00000000 <dwh_down_read>:
0: 53 push %ebx
1: 83 ec 08 sub $0x8,%esp
4: 89 c3 mov %eax,%ebx
6: ba 00 e0 ff ff mov $0xffffe000,%edx
b: 21 e2 and %esp,%edx
d: f0 fe 4b 08 lock decb 0x8(%ebx)
11: 0f 88 fc ff ff ff js 13 <dwh_down_read+0x13>
17: 8b 4b 04 mov 0x4(%ebx),%ecx
1a: 85 c9 test %ecx,%ecx
1c: 74 0a je 28 <dwh_down_read+0x28>
1e: ff 03 incl (%ebx)
20: c6 43 08 01 movb $0x1,0x8(%ebx)
24: 58 pop %eax
25: 5a pop %edx
26: 5b pop %ebx
27: c3 ret
28: c7 43 04 01 00 00 00 movl $0x1,0x4(%ebx)
2f: 89 d8 mov %ebx,%eax
31: e8 fc ff ff ff call 32 <dwh_down_read+0x32>
36: eb e8 jmp 20 <dwh_down_read+0x20>
00000038 <aa_down_read>:
38: 53 push %ebx
39: 83 ec 08 sub $0x8,%esp
3c: 89 c3 mov %eax,%ebx
3e: fa cli
3f: f0 fe 0b lock decb (%ebx)
42: 0f 88 09 00 00 00 js 51 <aa_down_read+0x19>
48: 8b 53 04 mov 0x4(%ebx),%edx
4b: 85 d2 test %edx,%edx
4d: 78 19 js 68 <aa_down_read+0x30>
4f: 8d 43 08 lea 0x8(%ebx),%eax
52: 39 43 08 cmp %eax,0x8(%ebx)
55: 75 11 jne 68 <aa_down_read+0x30>
57: 8d 42 01 lea 0x1(%edx),%eax
5a: 89 43 04 mov %eax,0x4(%ebx)
5d: c6 03 01 movb $0x1,(%ebx)
60: fb sti
61: 5b pop %ebx
62: 58 pop %eax
63: 5b pop %ebx
64: c3 ret
65: 8d 76 00 lea 0x0(%esi),%esi
68: 31 d2 xor %edx,%edx
6a: 89 d8 mov %ebx,%eax
6c: e8 fc ff ff ff call 6d <aa_down_read+0x35>
71: eb ea jmp 5d <aa_down_read+0x25>
73: 90 nop
00000074 <dwh_up_read>:
74: 53 push %ebx
75: 83 ec 08 sub $0x8,%esp
78: 89 c3 mov %eax,%ebx
7a: f0 fe 4b 08 lock decb 0x8(%ebx)
7e: 0f 88 15 00 00 00 js 99 <dwh_up_read+0x25>
84: 8b 03 mov (%ebx),%eax
86: 48 dec %eax
87: 89 03 mov %eax,(%ebx)
89: 85 c0 test %eax,%eax
8b: 74 0b je 98 <dwh_up_read+0x24>
8d: c6 43 08 01 movb $0x1,0x8(%ebx)
91: 5a pop %edx
92: 59 pop %ecx
93: 5b pop %ebx
94: c3 ret
95: 8d 76 00 lea 0x0(%esi),%esi
98: 8b 43 04 mov 0x4(%ebx),%eax
9b: 85 c0 test %eax,%eax
9d: 74 ee je 8d <dwh_up_read+0x19>
9f: 89 d8 mov %ebx,%eax
a1: e8 fc ff ff ff call a2 <dwh_up_read+0x2e>
a6: eb e5 jmp 8d <dwh_up_read+0x19>
000000a8 <aa_up_read>:
a8: 56 push %esi
a9: 53 push %ebx
aa: 51 push %ecx
ab: 89 c3 mov %eax,%ebx
ad: 9c pushf
ae: 5e pop %esi
af: fa cli
b0: f0 fe 0b lock decb (%ebx)
b3: 0f 88 22 00 00 00 js db <aa_up_read+0x33>
b9: 8b 43 04 mov 0x4(%ebx),%eax
bc: 48 dec %eax
bd: 89 43 04 mov %eax,0x4(%ebx)
c0: 85 c0 test %eax,%eax
c2: 74 0c je d0 <aa_up_read+0x28>
c4: c6 03 01 movb $0x1,(%ebx)
c7: 56 push %esi
c8: 9d popf
c9: 5a pop %edx
ca: 5b pop %ebx
cb: 5e pop %esi
cc: c3 ret
cd: 8d 76 00 lea 0x0(%esi),%esi
d0: 8d 43 08 lea 0x8(%ebx),%eax
d3: 39 43 08 cmp %eax,0x8(%ebx)
d6: 74 ec je c4 <aa_up_read+0x1c>
d8: 89 d8 mov %ebx,%eax
da: e8 fc ff ff ff call db <aa_up_read+0x33>
df: eb e3 jmp c4 <aa_up_read+0x1c>
Disassembly of section .text.lock:
00000000 <.text.lock>:
0: 80 7b 08 00 cmpb $0x0,0x8(%ebx)
4: f3 90 repz nop
6: 7e f8 jle 0 <.text.lock>
8: e9 09 00 00 00 jmp 16 <.text.lock+0x16>
d: 80 3b 00 cmpb $0x0,(%ebx)
10: f3 90 repz nop
12: 7e f9 jle d <.text.lock+0xd>
14: e9 3b 00 00 00 jmp 54 <aa_down_read+0x1c>
19: 80 7b 08 00 cmpb $0x0,0x8(%ebx)
1d: f3 90 repz nop
1f: 7e f8 jle 19 <.text.lock+0x19>
21: e9 76 00 00 00 jmp 9c <dwh_up_read+0x28>
26: 80 3b 00 cmpb $0x0,(%ebx)
29: f3 90 repz nop
2b: 7e f9 jle 26 <.text.lock+0x26>
2d: e9 ac 00 00 00 jmp de <aa_up_read+0x36>
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH] rw_semaphores, optimisations
2001-04-22 22:52 D.W.Howells
@ 2001-04-23 1:04 ` Andrea Arcangeli
2001-04-23 1:12 ` Andrea Arcangeli
0 siblings, 1 reply; 6+ messages in thread
From: Andrea Arcangeli @ 2001-04-23 1:04 UTC (permalink / raw)
To: D . W . Howells; +Cc: torvalds, linux-kernel, dhowells
On Sun, Apr 22, 2001 at 11:52:29PM +0100, D . W . Howells wrote:
> Hello Andrea,
>
> Interesting benchmarks... did you compile the test programs with "make
> SCHED=yes" by any chance? Also what other software are you running?
No I never tried the SCHED=yes. However in my modification of the rwsem-rw bench
I dropped the #ifdef SCHED completly and I schedule the right way (first
checking need_resched) in a more interesting place (in the middle of the
critical section).
> The reason I ask is that running a full blown KDE setup running in the
> background, I get the following numbers on the rwsem-ro test (XADD optimised
> kernel):
>
> SCHED: 4615646, 4530769, 4534453 and 4628365
> no SCHED: 6311620, 6312776, 6327772 and 6325508
No absolutely not, that machine has nearly only the kernel daemons running
in background (even cron is disabled to make sure it doesn't screwup
the benchmarks). This is how the machine looks like before running the
bench.
andrea@laser:~ > ps xa
PID TTY STAT TIME COMMAND
1 ? S 0:03 init [2]
2 ? SW 0:00 [keventd]
3 ? SW 0:00 [kswapd]
4 ? SW 0:00 [kreclaimd]
5 ? SW 0:00 [bdflush]
6 ? SW 0:00 [kupdated]
7 ? SW< 0:00 [mdrecoveryd]
123 ? S 0:00 /sbin/dhcpcd -d eth0
150 ? S 0:00 /sbin/portmap
168 ? S 0:00 /usr/sbin/syslogd -m 1000
172 ? S 0:00 /usr/sbin/klogd -c 5
220 ? S 0:00 /usr/sbin/sshd
254 ? S 0:00 /usr/sbin/automount /misc file /etc/auto.misc
256 ? S 0:00 /usr/sbin/automount /net program /etc/auto.net
271 ? S 0:00 /usr/sbin/rpc.kstatd
276 ? S 0:00 /usr/sbin/rpc.kmountd
278 ? SW 0:00 [nfsd]
279 ? SW 0:00 [nfsd]
280 ? SW 0:00 [nfsd]
281 ? SW 0:00 [nfsd]
282 ? SW 0:00 [lockd]
283 ? SW 0:00 [rpciod]
459 ? S 0:00 /usr/sbin/inetd
461 tty1 S 0:00 /sbin/mingetty --noclear tty1
462 tty2 S 0:00 /sbin/mingetty tty2
463 tty3 S 0:00 /sbin/mingetty tty3
464 tty4 S 0:00 /sbin/mingetty tty4
465 tty5 S 0:00 /sbin/mingetty tty5
466 tty6 S 0:00 /sbin/mingetty tty6
1177 ? S 0:00 in.rlogind
1178 pts/0 S 0:00 login -- andrea
1179 pts/0 S 0:00 -bash
1186 pts/0 R 0:00 ps xa
andrea@laser:~ >
> > (ah and btw the machine is a 2-way PII 450mhz).
>
> Your numbers were "4274607" and "4280280" for this kernel and test This I
> find a little suprising. I'd expect them to be about 10% higher than I get on
> my machine given your faster CPUs.
>
> What compiler are you using? I'm using the following:
>
> Reading specs from /usr/lib/gcc-lib/i386-redhat-linux/2.96/specs
> gcc version 2.96 20000731 (Red Hat Linux 7.1 2.96-80)
andrea@athlon:~ > gcc -v
Reading specs from /home/andrea/bin/i686/gcc-2_95-branch-20010325/lib/gcc-lib/i686-pc-linux-gnu/2.95.4/specs
gcc version 2.95.4 20010319 (prerelease)
andrea@athlon:~ >
ah and btw, I also used the builtin expect in all the fast path but they were
compiled out by the preprocessor because I'm compiling with <96.
> Something else that I noticed: Playing a music CD appears to improve the
> benchmarks all round:-) Must be some interrupt effect of some sort, or maybe
> they just like the music...
The machine is a test box without soundcard, disk was idle.
> > rwsem-2.4.4-pre6 + my new generic rwsem (fast path in C inlined)
>
> Linus wants out of line generic code only, I believe. Hence why I made my
> generic code out of line.
I also did a run with my code out of line of course and as you can see
it's not a relevant penality.
> I have noticed one glaring potential slowdown in my generic code's down
> functions. I've got the following in _both_ fastpaths!:
>
> struct task_struct *tsk = current;
that is supposed to be a performance optimization, I do the same too in my code.
> It's also interesting that your generic out-of-line semaphores are faster
> given the fact that you muck around with EFLAGS and CLI/STI, and I don't.
as said in my last email I changed the semantics and you cannot call up_* from
irq context anymore, so in short I'm not mucking with cli/sti/eflags anymore.
Note that I didn't released anything but the bench yet, I am finishing to
plugin an asm fast path on top of my slow path and then I will run new
benchmark and post some code.
But my generic semaphore is also smaller, it's 16 byte in size even in SMP both
the asm optimized rwsem and the C generic one (of course on 32bit archs, for
64bit archs is slightly bigger than 16 bytes).
Andrea
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH] rw_semaphores, optimisations
2001-04-23 1:04 ` Andrea Arcangeli
@ 2001-04-23 1:12 ` Andrea Arcangeli
0 siblings, 0 replies; 6+ messages in thread
From: Andrea Arcangeli @ 2001-04-23 1:12 UTC (permalink / raw)
To: D . W . Howells; +Cc: torvalds, linux-kernel, dhowells
On Mon, Apr 23, 2001 at 03:04:41AM +0200, Andrea Arcangeli wrote:
> that is supposed to be a performance optimization, I do the same too in my code.
ah no I see what you mean, yes you are hurted by that. I'm waiting your #try 3
against pre6, by that time I hope to be able to make a run of the benchmark of
my asm version too (I will grow the graph).
Andrea
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2001-04-23 1:12 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2001-04-22 0:27 [PATCH] rw_semaphores, optimisations D.W.Howells
2001-04-22 19:07 ` Andrea Arcangeli
2001-04-22 19:16 ` Andrea Arcangeli
-- strict thread matches above, loose matches on Subject: below --
2001-04-22 22:52 D.W.Howells
2001-04-23 1:04 ` Andrea Arcangeli
2001-04-23 1:12 ` Andrea Arcangeli
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox