* [PATCH] x86: slightly shorten __ticket_spin_trylock() (v3)
@ 2009-12-18 16:01 Jan Beulich
2009-12-18 16:36 ` Linus Torvalds
0 siblings, 1 reply; 3+ messages in thread
From: Jan Beulich @ 2009-12-18 16:01 UTC (permalink / raw)
To: mingo, tglx, hpa
Cc: Peter Zijlstra, Linus Torvalds, Nick Piggin, linux-kernel
Since the callers generally expect a boolean value, there's no need to
zero-extend the outcome of the comparison. It just requires that all
of x86' trylock implementations have their return type changed
accordingly.
v2: Don't use bool for the return type though - this is being frowned
on and presently doesn't work with the pv-ops patching macros.
v3: Keep the return value in %eax (or really, %al).
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
arch/x86/include/asm/paravirt.h | 4 ++--
arch/x86/include/asm/paravirt_types.h | 2 +-
arch/x86/include/asm/spinlock.h | 14 ++++++--------
arch/x86/xen/spinlock.c | 2 +-
4 files changed, 10 insertions(+), 12 deletions(-)
--- linux-2.6.33-rc1/arch/x86/include/asm/paravirt.h 2009-12-18 16:05:40.000000000 +0100
+++ 2.6.33-rc1-x86-spin-trylock-simplify/arch/x86/include/asm/paravirt.h 2009-12-03 09:43:42.000000000 +0100
@@ -753,9 +753,9 @@ static __always_inline void arch_spin_lo
PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
}
-static __always_inline int arch_spin_trylock(struct arch_spinlock *lock)
+static __always_inline u8 arch_spin_trylock(struct arch_spinlock *lock)
{
- return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
+ return PVOP_CALL1(u8, pv_lock_ops.spin_trylock, lock);
}
static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
--- linux-2.6.33-rc1/arch/x86/include/asm/paravirt_types.h 2009-12-18 16:05:40.000000000 +0100
+++ 2.6.33-rc1-x86-spin-trylock-simplify/arch/x86/include/asm/paravirt_types.h 2009-12-03 09:43:50.000000000 +0100
@@ -324,7 +324,7 @@ struct pv_lock_ops {
int (*spin_is_contended)(struct arch_spinlock *lock);
void (*spin_lock)(struct arch_spinlock *lock);
void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags);
- int (*spin_trylock)(struct arch_spinlock *lock);
+ u8 (*spin_trylock)(struct arch_spinlock *lock);
void (*spin_unlock)(struct arch_spinlock *lock);
};
--- linux-2.6.33-rc1/arch/x86/include/asm/spinlock.h 2009-12-18 16:05:40.000000000 +0100
+++ 2.6.33-rc1-x86-spin-trylock-simplify/arch/x86/include/asm/spinlock.h 2009-12-10 15:30:52.000000000 +0100
@@ -77,7 +77,7 @@ static __always_inline void __ticket_spi
: "memory", "cc");
}
-static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
+static __always_inline u8 __ticket_spin_trylock(arch_spinlock_t *lock)
{
int tmp, new;
@@ -87,8 +87,7 @@ static __always_inline int __ticket_spin
"jne 1f\n\t"
LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
"1:"
- "sete %b1\n\t"
- "movzbl %b1,%0\n\t"
+ "sete %b0\n\t"
: "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
:
: "memory", "cc");
@@ -127,7 +126,7 @@ static __always_inline void __ticket_spi
: "memory", "cc");
}
-static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
+static __always_inline u8 __ticket_spin_trylock(arch_spinlock_t *lock)
{
int tmp;
int new;
@@ -140,9 +139,8 @@ static __always_inline int __ticket_spin
"jne 1f\n\t"
LOCK_PREFIX "cmpxchgl %1,%2\n\t"
"1:"
- "sete %b1\n\t"
- "movzbl %b1,%0\n\t"
- : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
+ "sete %b0\n\t"
+ : "=&a" (tmp), "=&r" (new), "+m" (lock->slock)
:
: "memory", "cc");
@@ -190,7 +188,7 @@ static __always_inline void arch_spin_lo
__ticket_spin_lock(lock);
}
-static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
+static __always_inline u8 arch_spin_trylock(arch_spinlock_t *lock)
{
return __ticket_spin_trylock(lock);
}
--- linux-2.6.33-rc1/arch/x86/xen/spinlock.c 2009-12-18 16:05:40.000000000 +0100
+++ 2.6.33-rc1-x86-spin-trylock-simplify/arch/x86/xen/spinlock.c 2009-12-03 09:44:33.000000000 +0100
@@ -136,7 +136,7 @@ static int xen_spin_is_contended(struct
return xl->spinners != 0;
}
-static int xen_spin_trylock(struct arch_spinlock *lock)
+static u8 xen_spin_trylock(struct arch_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
u8 old = 1;
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] x86: slightly shorten __ticket_spin_trylock() (v3)
2009-12-18 16:01 [PATCH] x86: slightly shorten __ticket_spin_trylock() (v3) Jan Beulich
@ 2009-12-18 16:36 ` Linus Torvalds
2009-12-28 8:48 ` Ingo Molnar
0 siblings, 1 reply; 3+ messages in thread
From: Linus Torvalds @ 2009-12-18 16:36 UTC (permalink / raw)
To: Jan Beulich; +Cc: mingo, tglx, hpa, Peter Zijlstra, Nick Piggin, linux-kernel
On Fri, 18 Dec 2009, Jan Beulich wrote:
>
> -static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
> +static __always_inline u8 __ticket_spin_trylock(arch_spinlock_t *lock)
> {
> int tmp, new;
>
> @@ -87,8 +87,7 @@ static __always_inline int __ticket_spin
> "jne 1f\n\t"
> LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
> "1:"
> - "sete %b1\n\t"
> - "movzbl %b1,%0\n\t"
> + "sete %b0\n\t"
> : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
> :
> : "memory", "cc");
> @@ -127,7 +126,7 @@ static __always_inline void __ticket_spi
Btw, I looked at that earlier, and it's still pretty sub-optimal.
There's two problems - nobody actually uses the low-level code directly,
it goes through a
if (__ticket_spinlock()) {
..
return 1;
}
return 0;
logic. Which means that regardless of what the asm does, the end result
will still be something like this _after_ the asm:
1:sete %al # tmp
# 0 "" 2
#NO_APP
testb %al, %al # tmp
leave
setne %al #, tmp66
ie you have that "sete" (in the asm) being then followed by testb/setne
again (from the C code wrappers).
The other problem is that the compiler could actually generate better code
if you leave it to it, so doing
int tmp, new;
tmp = ACCESS_ONCE(lock->slock);
new = tmp + 0x100;
asm volatile("cmpb %h0,%b0\n\t"
"jne 1f\n\t"
LOCK_PREFIX "cmpxchgw %w2,%1\n\t"
"1:"
"sete %b0\n\t"
: "=a" (tmp), "+m" (lock->slock)
: "r" (new), "0" (tmp)
: "memory", "cc");
return tmp;
actually seems to result in better code:
_raw_spin_trylock:
pushq %rbp #
movl (%rdi), %eax #* lock, D.17152
movq %rsp, %rbp #,
leal 256(%rax), %edx #, new
#APP
# 86 "/home/torvalds/v2.6/linux/arch/x86/include/asm/spinlock.h" 1
cmpb %ah,%al # tmp
jne 1f
.section .smp_locks,"a"
.balign 8
.quad 661f
.previous
661:
lock; cmpxchgw %dx,(%rdi) # new,* lock
1:sete %al # tmp
# 0 "" 2
#NO_APP
Look - no "movzwl" at all at the beginning, because it turns out that the
spinlock is a "unsigned int" and the "movl" to load the value pairs just
fine with the "leal" that the compiler can do too. And we didn't
artificially constrain the second register to a byte register either (but
the compiler still picked %dx, of course).
I dunno. I still don't get the feeling that any of this actually
_matters_.
(Btw, the above isn't actually tested - I just edited the inline asm and
looked at what gcc generated, I didn't _run_ any of it).
Linus
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] x86: slightly shorten __ticket_spin_trylock() (v3)
2009-12-18 16:36 ` Linus Torvalds
@ 2009-12-28 8:48 ` Ingo Molnar
0 siblings, 0 replies; 3+ messages in thread
From: Ingo Molnar @ 2009-12-28 8:48 UTC (permalink / raw)
To: Linus Torvalds
Cc: Jan Beulich, tglx, hpa, Peter Zijlstra, Nick Piggin, linux-kernel
* Linus Torvalds <torvalds@linux-foundation.org> wrote:
> On Fri, 18 Dec 2009, Jan Beulich wrote:
> >
> > -static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
> > +static __always_inline u8 __ticket_spin_trylock(arch_spinlock_t *lock)
>
[...]
>
> I dunno. I still don't get the feeling that any of this actually
> _matters_.
Jan, to apply this patch it would be nice to see a before/after
comparison/analysis of a defconfig vmlinux image with a reasonably uptodate
GCC - to see how much this all matters in practice.
Ingo
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2009-12-28 8:48 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-12-18 16:01 [PATCH] x86: slightly shorten __ticket_spin_trylock() (v3) Jan Beulich
2009-12-18 16:36 ` Linus Torvalds
2009-12-28 8:48 ` Ingo Molnar
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox