public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* x86-32: clean up rwsem inline asm statements
@ 2010-01-13  0:21 Linus Torvalds
  2010-01-13  0:45 ` Andreas Schwab
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Linus Torvalds @ 2010-01-13  0:21 UTC (permalink / raw)
  To: Ingo Molnar, Peter Anvin, Thomas Gleixner; +Cc: Linux Kernel Mailing List


This makes gcc use the right register names and instruction operand sizes 
automatically for the rwsem inline asm statements.

So instead of using "(%%eax)" to specify the memory address that is the 
semaphore, we use "(%1)" or similar. And instead of forcing the operation 
to always be 32-bit, we use "%z0", taking the size from the actual 
semaphore data structure itself.

This doesn't actually matter on x86-32, but if we want to use the same 
inline asm for x86-64, we'll need to have the compiler generate the proper 
64-bit names for the registers (%rax instead of %eax), and if we want to 
use a 64-bit counter too (in order to avoid the 15-bit limit on the 
write counter that limits concurrent users to 32767 threads), we'll need 
to be able to generate instructions with "q" accesses rather than "l".

Since this header currently isn't enabled on x86-64, none of that matters, 
but we do want to use the xadd version of the semaphores rather than have 
to take spinlocks to do a rwsem. The mm->mmap_sem can be heavily contended 
when you have lots of threads all taking page faults, and the fallback 
rwsem code that uses a spinlock performs abysmally badly in that case.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---

This really should be a no-op, but it does seem to do something to gcc so 
that when I do a "cmp" on the whole kernel before and after, it doesn't 
match. The differences _seem_ to be some code size issue that change all 
the offsets, it's a bit hard to figure it out from the binary.

Anyway, this is purely preparatory, but I think it cleans up the asm too, 
so...

		Linus

 arch/x86/include/asm/rwsem.h |   30 +++++++++++++++---------------
 1 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index ca7517d..4a884e0 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -105,7 +105,7 @@ do {								\
 static inline void __down_read(struct rw_semaphore *sem)
 {
 	asm volatile("# beginning down_read\n\t"
-		     LOCK_PREFIX "  incl      (%%eax)\n\t"
+		     LOCK_PREFIX "  inc%z0      (%1)\n\t"
 		     /* adds 0x00000001, returns the old value */
 		     "  jns        1f\n"
 		     "  call call_rwsem_down_read_failed\n"
@@ -123,10 +123,10 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 {
 	__s32 result, tmp;
 	asm volatile("# beginning __down_read_trylock\n\t"
-		     "  movl      %0,%1\n\t"
+		     "  mov%z0      %0,%1\n\t"
 		     "1:\n\t"
-		     "  movl	     %1,%2\n\t"
-		     "  addl      %3,%2\n\t"
+		     "  mov%z0	    %1,%2\n\t"
+		     "  add%z0      %3,%2\n\t"
 		     "  jle	     2f\n\t"
 		     LOCK_PREFIX "  cmpxchgl  %2,%0\n\t"
 		     "  jnz	     1b\n\t"
@@ -147,9 +147,9 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
 
 	tmp = RWSEM_ACTIVE_WRITE_BIAS;
 	asm volatile("# beginning down_write\n\t"
-		     LOCK_PREFIX "  xadd      %%edx,(%%eax)\n\t"
+		     LOCK_PREFIX "  xadd%z0     %1,(%2)\n\t"
 		     /* subtract 0x0000ffff, returns the old value */
-		     "  testl     %%edx,%%edx\n\t"
+		     "  test%z0     %1,%1\n\t"
 		     /* was the count 0 before? */
 		     "  jz        1f\n"
 		     "  call call_rwsem_down_write_failed\n"
@@ -185,7 +185,7 @@ static inline void __up_read(struct rw_semaphore *sem)
 {
 	__s32 tmp = -RWSEM_ACTIVE_READ_BIAS;
 	asm volatile("# beginning __up_read\n\t"
-		     LOCK_PREFIX "  xadd      %%edx,(%%eax)\n\t"
+		     LOCK_PREFIX "  xadd%z0     %1,(%2)\n\t"
 		     /* subtracts 1, returns the old value */
 		     "  jns        1f\n\t"
 		     "  call call_rwsem_wake\n"
@@ -201,18 +201,18 @@ static inline void __up_read(struct rw_semaphore *sem)
  */
 static inline void __up_write(struct rw_semaphore *sem)
 {
+	unsigned long tmp;
 	asm volatile("# beginning __up_write\n\t"
-		     "  movl      %2,%%edx\n\t"
-		     LOCK_PREFIX "  xaddl     %%edx,(%%eax)\n\t"
+		     LOCK_PREFIX "  xadd%z0     %1,(%2)\n\t"
 		     /* tries to transition
 			0xffff0001 -> 0x00000000 */
 		     "  jz       1f\n"
 		     "  call call_rwsem_wake\n"
 		     "1:\n\t"
 		     "# ending __up_write\n"
-		     : "+m" (sem->count)
-		     : "a" (sem), "i" (-RWSEM_ACTIVE_WRITE_BIAS)
-		     : "memory", "cc", "edx");
+		     : "+m" (sem->count),"=d" (tmp)
+		     : "a" (sem), "1" (-RWSEM_ACTIVE_WRITE_BIAS)
+		     : "memory", "cc");
 }
 
 /*
@@ -221,7 +221,7 @@ static inline void __up_write(struct rw_semaphore *sem)
 static inline void __downgrade_write(struct rw_semaphore *sem)
 {
 	asm volatile("# beginning __downgrade_write\n\t"
-		     LOCK_PREFIX "  addl      %2,(%%eax)\n\t"
+		     LOCK_PREFIX "  add%z0     %2,(%1)\n\t"
 		     /* transitions 0xZZZZ0001 -> 0xYYYY0001 */
 		     "  jns       1f\n\t"
 		     "  call call_rwsem_downgrade_wake\n"
@@ -237,7 +237,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
  */
 static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
 {
-	asm volatile(LOCK_PREFIX "addl %1,%0"
+	asm volatile(LOCK_PREFIX "add%z0 %1,%0"
 		     : "+m" (sem->count)
 		     : "ir" (delta));
 }
@@ -249,7 +249,7 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
 {
 	int tmp = delta;
 
-	asm volatile(LOCK_PREFIX "xadd %0,%1"
+	asm volatile(LOCK_PREFIX "xadd%z0 %0,%1"
 		     : "+r" (tmp), "+m" (sem->count)
 		     : : "memory");
 

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2010-01-21 17:14 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-01-13  0:21 x86-32: clean up rwsem inline asm statements Linus Torvalds
2010-01-13  0:45 ` Andreas Schwab
2010-01-13  1:26   ` Linus Torvalds
2010-01-13  0:48 ` H. Peter Anvin
2010-01-13  0:59   ` Linus Torvalds
2010-01-13  1:24     ` x86: avoid read-cycle on down_read_trylock Linus Torvalds
2010-01-13  1:57       ` x86: clean up rwsem type system Linus Torvalds
2010-01-13  2:16         ` Linus Torvalds
2010-01-14  7:03           ` H. Peter Anvin
2010-01-17  6:05             ` Ingo Molnar
2010-01-17 18:24               ` Linus Torvalds
2010-01-21 17:14                 ` Ingo Molnar
2010-01-13  5:03 ` [tip:x86/asm] x86-32: clean up rwsem inline asm statements tip-bot for Linus Torvalds

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox