public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86: slightly shorten __ticket_spin_trylock() (v3)
@ 2009-12-18 16:01 Jan Beulich
  2009-12-18 16:36 ` Linus Torvalds
  0 siblings, 1 reply; 3+ messages in thread
From: Jan Beulich @ 2009-12-18 16:01 UTC (permalink / raw)
  To: mingo, tglx, hpa
  Cc: Peter Zijlstra, Linus Torvalds, Nick Piggin, linux-kernel

Since the callers generally expect a boolean value, there's no need to
zero-extend the outcome of the comparison. It just requires that all
of x86' trylock implementations have their return type changed
accordingly.

v2: Don't use bool for the return type though - this is being frowned
on and presently doesn't work with the pv-ops patching macros.

v3: Keep the return value in %eax (or really, %al).

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>

---
 arch/x86/include/asm/paravirt.h       |    4 ++--
 arch/x86/include/asm/paravirt_types.h |    2 +-
 arch/x86/include/asm/spinlock.h       |   14 ++++++--------
 arch/x86/xen/spinlock.c               |    2 +-
 4 files changed, 10 insertions(+), 12 deletions(-)

--- linux-2.6.33-rc1/arch/x86/include/asm/paravirt.h	2009-12-18 16:05:40.000000000 +0100
+++ 2.6.33-rc1-x86-spin-trylock-simplify/arch/x86/include/asm/paravirt.h	2009-12-03 09:43:42.000000000 +0100
@@ -753,9 +753,9 @@ static __always_inline void arch_spin_lo
 	PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
 }
 
-static __always_inline int arch_spin_trylock(struct arch_spinlock *lock)
+static __always_inline u8 arch_spin_trylock(struct arch_spinlock *lock)
 {
-	return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
+	return PVOP_CALL1(u8, pv_lock_ops.spin_trylock, lock);
 }
 
 static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
--- linux-2.6.33-rc1/arch/x86/include/asm/paravirt_types.h	2009-12-18 16:05:40.000000000 +0100
+++ 2.6.33-rc1-x86-spin-trylock-simplify/arch/x86/include/asm/paravirt_types.h	2009-12-03 09:43:50.000000000 +0100
@@ -324,7 +324,7 @@ struct pv_lock_ops {
 	int (*spin_is_contended)(struct arch_spinlock *lock);
 	void (*spin_lock)(struct arch_spinlock *lock);
 	void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags);
-	int (*spin_trylock)(struct arch_spinlock *lock);
+	u8 (*spin_trylock)(struct arch_spinlock *lock);
 	void (*spin_unlock)(struct arch_spinlock *lock);
 };
 
--- linux-2.6.33-rc1/arch/x86/include/asm/spinlock.h	2009-12-18 16:05:40.000000000 +0100
+++ 2.6.33-rc1-x86-spin-trylock-simplify/arch/x86/include/asm/spinlock.h	2009-12-10 15:30:52.000000000 +0100
@@ -77,7 +77,7 @@ static __always_inline void __ticket_spi
 		: "memory", "cc");
 }
 
-static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
+static __always_inline u8 __ticket_spin_trylock(arch_spinlock_t *lock)
 {
 	int tmp, new;
 
@@ -87,8 +87,7 @@ static __always_inline int __ticket_spin
 		     "jne 1f\n\t"
 		     LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
 		     "1:"
-		     "sete %b1\n\t"
-		     "movzbl %b1,%0\n\t"
+		     "sete %b0\n\t"
 		     : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
 		     :
 		     : "memory", "cc");
@@ -127,7 +126,7 @@ static __always_inline void __ticket_spi
 		     : "memory", "cc");
 }
 
-static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
+static __always_inline u8 __ticket_spin_trylock(arch_spinlock_t *lock)
 {
 	int tmp;
 	int new;
@@ -140,9 +139,8 @@ static __always_inline int __ticket_spin
 		     "jne 1f\n\t"
 		     LOCK_PREFIX "cmpxchgl %1,%2\n\t"
 		     "1:"
-		     "sete %b1\n\t"
-		     "movzbl %b1,%0\n\t"
-		     : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
+		     "sete %b0\n\t"
+		     : "=&a" (tmp), "=&r" (new), "+m" (lock->slock)
 		     :
 		     : "memory", "cc");
 
@@ -190,7 +188,7 @@ static __always_inline void arch_spin_lo
 	__ticket_spin_lock(lock);
 }
 
-static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
+static __always_inline u8 arch_spin_trylock(arch_spinlock_t *lock)
 {
 	return __ticket_spin_trylock(lock);
 }
--- linux-2.6.33-rc1/arch/x86/xen/spinlock.c	2009-12-18 16:05:40.000000000 +0100
+++ 2.6.33-rc1-x86-spin-trylock-simplify/arch/x86/xen/spinlock.c	2009-12-03 09:44:33.000000000 +0100
@@ -136,7 +136,7 @@ static int xen_spin_is_contended(struct 
 	return xl->spinners != 0;
 }
 
-static int xen_spin_trylock(struct arch_spinlock *lock)
+static u8 xen_spin_trylock(struct arch_spinlock *lock)
 {
 	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
 	u8 old = 1;



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] x86: slightly shorten __ticket_spin_trylock() (v3)
  2009-12-18 16:01 [PATCH] x86: slightly shorten __ticket_spin_trylock() (v3) Jan Beulich
@ 2009-12-18 16:36 ` Linus Torvalds
  2009-12-28  8:48   ` Ingo Molnar
  0 siblings, 1 reply; 3+ messages in thread
From: Linus Torvalds @ 2009-12-18 16:36 UTC (permalink / raw)
  To: Jan Beulich; +Cc: mingo, tglx, hpa, Peter Zijlstra, Nick Piggin, linux-kernel



On Fri, 18 Dec 2009, Jan Beulich wrote:
>
> -static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
> +static __always_inline u8 __ticket_spin_trylock(arch_spinlock_t *lock)
>  {
>  	int tmp, new;
>  
> @@ -87,8 +87,7 @@ static __always_inline int __ticket_spin
>  		     "jne 1f\n\t"
>  		     LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
>  		     "1:"
> -		     "sete %b1\n\t"
> -		     "movzbl %b1,%0\n\t"
> +		     "sete %b0\n\t"
>  		     : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
>  		     :
>  		     : "memory", "cc");
> @@ -127,7 +126,7 @@ static __always_inline void __ticket_spi

Btw, I looked at that earlier, and it's still pretty sub-optimal.

There's two problems - nobody actually uses the low-level code directly, 
it goes through a 

	if (__ticket_spinlock()) {
		..
		return 1;
	}
	return 0;

logic. Which means that regardless of what the asm does, the end result 
will still be something like this _after_ the asm:

        1:sete %al      # tmp

# 0 "" 2
#NO_APP
        testb   %al, %al        # tmp
        leave
        setne   %al     #, tmp66

ie you have that "sete" (in the asm) being then followed by testb/setne 
again (from the C code wrappers).

The other problem is that the compiler could actually generate better code 
if you leave it to it, so doing

        int tmp, new;

        tmp = ACCESS_ONCE(lock->slock);
        new = tmp + 0x100;
        asm volatile("cmpb %h0,%b0\n\t"
                     "jne 1f\n\t"
                     LOCK_PREFIX "cmpxchgw %w2,%1\n\t"
                     "1:"
                     "sete %b0\n\t"
                     : "=a" (tmp), "+m" (lock->slock)
                     : "r" (new), "0" (tmp)
                     : "memory", "cc");

        return tmp;

actually seems to result in better code:

	_raw_spin_trylock:
	        pushq   %rbp    #
	        movl    (%rdi), %eax    #* lock, D.17152
	        movq    %rsp, %rbp      #,
	        leal    256(%rax), %edx #, new
	#APP
	# 86 "/home/torvalds/v2.6/linux/arch/x86/include/asm/spinlock.h" 1
	        cmpb %ah,%al    # tmp
	        jne 1f
	        .section .smp_locks,"a"
	 .balign 8
	 .quad 661f
	.previous
	661:
	        lock; cmpxchgw %dx,(%rdi)       # new,* lock
	        1:sete %al      # tmp
	
	# 0 "" 2
	#NO_APP

Look - no "movzwl" at all at the beginning, because it turns out that the 
spinlock is a "unsigned int" and the "movl" to load the value pairs just 
fine with the "leal" that the compiler can do too.  And we didn't 
artificially constrain the second register to a byte register either (but 
the compiler still picked %dx, of course).

I dunno. I still don't get the feeling that any of this actually 
_matters_.

(Btw, the above isn't actually tested - I just edited the inline asm and 
looked at what gcc generated, I didn't _run_ any of it).

			Linus

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] x86: slightly shorten __ticket_spin_trylock() (v3)
  2009-12-18 16:36 ` Linus Torvalds
@ 2009-12-28  8:48   ` Ingo Molnar
  0 siblings, 0 replies; 3+ messages in thread
From: Ingo Molnar @ 2009-12-28  8:48 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Jan Beulich, tglx, hpa, Peter Zijlstra, Nick Piggin, linux-kernel


* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> On Fri, 18 Dec 2009, Jan Beulich wrote:
> >
> > -static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
> > +static __always_inline u8 __ticket_spin_trylock(arch_spinlock_t *lock)
>
[...]
>
> I dunno. I still don't get the feeling that any of this actually 
> _matters_.

Jan, to apply this patch it would be nice to see a before/after 
comparison/analysis of a defconfig vmlinux image with a reasonably uptodate 
GCC - to see how much this all matters in practice.

	Ingo

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2009-12-28  8:48 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-12-18 16:01 [PATCH] x86: slightly shorten __ticket_spin_trylock() (v3) Jan Beulich
2009-12-18 16:36 ` Linus Torvalds
2009-12-28  8:48   ` Ingo Molnar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox