From: Peter Zijlstra <peterz@infradead.org>
To: Waiman Long <longman@redhat.com>
Cc: linux-arch@vger.kernel.org, linux-xtensa@linux-xtensa.org,
Davidlohr Bueso <dave@stgolabs.net>,
linux-ia64@vger.kernel.org, Tim Chen <tim.c.chen@linux.intel.com>,
Arnd Bergmann <arnd@arndb.de>,
linux-sh@vger.kernel.org, linux-hexagon@vger.kernel.org,
x86@kernel.org, Will Deacon <will.deacon@arm.com>,
linux-kernel@vger.kernel.org,
Linus Torvalds <torvalds@linux-foundation.org>,
Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
"H. Peter Anvin" <hpa@zytor.com>,
linux-alpha@vger.kernel.org, sparclinux@vger.kernel.org,
Thomas Gleixner <tglx@linutronix.de>,
linuxppc-dev@lists.ozlabs.org,
Andrew Morton <akpm@linux-foundation.org>,
linux-arm-kernel@lists.infradead.org
Subject: Re: [PATCH] locking/rwsem: Remove arch specific rwsem files
Date: Mon, 11 Feb 2019 12:58:34 +0100 [thread overview]
Message-ID: <20190211115833.GY32511@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <1549850450-10171-1-git-send-email-longman@redhat.com>
[-- Attachment #1: Type: text/plain, Size: 3269 bytes --]
On Sun, Feb 10, 2019 at 09:00:50PM -0500, Waiman Long wrote:
> +static inline int __down_read_trylock(struct rw_semaphore *sem)
> +{
> + long tmp;
> +
> + while ((tmp = atomic_long_read(&sem->count)) >= 0) {
> + if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp,
> + tmp + RWSEM_ACTIVE_READ_BIAS)) {
> + return 1;
> + }
> + }
> + return 0;
> +}
So the orignal x86 implementation reads:
static inline bool __down_read_trylock(struct rw_semaphore *sem)
{
long result, tmp;
asm volatile("# beginning __down_read_trylock\n\t"
" mov %[count],%[result]\n\t"
"1:\n\t"
" mov %[result],%[tmp]\n\t"
" add %[inc],%[tmp]\n\t"
" jle 2f\n\t"
LOCK_PREFIX " cmpxchg %[tmp],%[count]\n\t"
" jnz 1b\n\t"
"2:\n\t"
"# ending __down_read_trylock\n\t"
: [count] "+m" (sem->count), [result] "=&a" (result),
[tmp] "=&r" (tmp)
: [inc] "i" (RWSEM_ACTIVE_READ_BIAS)
: "memory", "cc");
return result >= 0;
}
you replace that with:
int __down_read_trylock1(unsigned long *l)
{
long tmp;
while ((tmp = READ_ONCE(*l)) >= 0) {
if (tmp == cmpxchg(l, tmp, tmp + 1))
return 1;
}
return 0;
}
which generates:
0000000000000000 <__down_read_trylock1>:
0: eb 17 jmp 19 <__down_read_trylock1+0x19>
2: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
8: 48 8d 4a 01 lea 0x1(%rdx),%rcx
c: 48 89 d0 mov %rdx,%rax
f: f0 48 0f b1 0f lock cmpxchg %rcx,(%rdi)
14: 48 39 c2 cmp %rax,%rdx
17: 74 0f je 28 <__down_read_trylock1+0x28>
19: 48 8b 17 mov (%rdi),%rdx
1c: 48 85 d2 test %rdx,%rdx
1f: 79 e7 jns 8 <__down_read_trylock1+0x8>
21: 31 c0 xor %eax,%eax
23: c3 retq
24: 0f 1f 40 00 nopl 0x0(%rax)
28: b8 01 00 00 00 mov $0x1,%eax
2d: c3 retq
Which is clearly worse. Now we can write that as:
int __down_read_trylock2(unsigned long *l)
{
long tmp = READ_ONCE(*l);
while (tmp >= 0) {
if (try_cmpxchg(l, &tmp, tmp + 1))
return 1;
}
return 0;
}
which generates:
0000000000000030 <__down_read_trylock2>:
30: 48 8b 07 mov (%rdi),%rax
33: 48 85 c0 test %rax,%rax
36: 78 18 js 50 <__down_read_trylock2+0x20>
38: 48 8d 50 01 lea 0x1(%rax),%rdx
3c: f0 48 0f b1 17 lock cmpxchg %rdx,(%rdi)
41: 75 f0 jne 33 <__down_read_trylock2+0x3>
43: b8 01 00 00 00 mov $0x1,%eax
48: c3 retq
49: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
50: 31 c0 xor %eax,%eax
52: c3 retq
Which is a lot better; but not quite there yet.
I've tried quite a bit, but I can't seem to get GCC to generate the:
add $1,%rdx
jle
required; stuff like:
new = old + 1;
if (new <= 0)
generates:
lea 0x1(%rax),%rdx
test %rdx, %rdx
jle
Ah well, have fun :-)
[-- Attachment #2: dtl.c --]
[-- Type: text/x-csrc, Size: 4530 bytes --]
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef signed char s8;
typedef signed short s16;
typedef signed int s32;
typedef signed long long s64;
typedef _Bool bool;
# define CC_SET(c) "\n\t/* output condition code " #c "*/\n"
# define CC_OUT(c) "=@cc" #c
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
extern void __cmpxchg_wrong_size(void);
#define __raw_cmpxchg(ptr, old, new, size, lock) \
({ \
__typeof__(*(ptr)) __ret; \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
switch (size) { \
case 1: \
{ \
volatile u8 *__ptr = (volatile u8 *)(ptr); \
asm volatile(lock "cmpxchgb %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "q" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case 2: \
{ \
volatile u16 *__ptr = (volatile u16 *)(ptr); \
asm volatile(lock "cmpxchgw %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case 4: \
{ \
volatile u32 *__ptr = (volatile u32 *)(ptr); \
asm volatile(lock "cmpxchgl %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case 8: \
{ \
volatile u64 *__ptr = (volatile u64 *)(ptr); \
asm volatile(lock "cmpxchgq %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
default: \
__cmpxchg_wrong_size(); \
} \
__ret; \
})
#define __cmpxchg(ptr, old, new, size) \
__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
#define cmpxchg(ptr, old, new) \
__cmpxchg(ptr, old, new, sizeof(*(ptr)))
#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \
({ \
bool success; \
__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
switch (size) { \
case 1: \
{ \
volatile u8 *__ptr = (volatile u8 *)(_ptr); \
asm volatile(lock "cmpxchgb %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "q" (__new) \
: "memory"); \
break; \
} \
case 2: \
{ \
volatile u16 *__ptr = (volatile u16 *)(_ptr); \
asm volatile(lock "cmpxchgw %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
: "memory"); \
break; \
} \
case 4: \
{ \
volatile u32 *__ptr = (volatile u32 *)(_ptr); \
asm volatile(lock "cmpxchgl %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
: "memory"); \
break; \
} \
case 8: \
{ \
volatile u64 *__ptr = (volatile u64 *)(_ptr); \
asm volatile(lock "cmpxchgq %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
: "memory"); \
break; \
} \
default: \
__cmpxchg_wrong_size(); \
} \
if (unlikely(!success)) \
*_old = __old; \
likely(success); \
})
#define LOCK_PREFIX "lock; "
#define __try_cmpxchg(ptr, pold, new, size) \
__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
#define try_cmpxchg(ptr, pold, new) \
__try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
#define READ_ONCE(x) (*(volatile typeof(x) *)(&x))
int __down_read_trylock1(unsigned long *l)
{
long tmp;
while ((tmp = READ_ONCE(*l)) >= 0) {
if (tmp == cmpxchg(l, tmp, tmp + 1))
return 1;
}
return 0;
}
int __down_read_trylock2(unsigned long *l)
{
long tmp = READ_ONCE(*l);
while (tmp >= 0) {
if (try_cmpxchg(l, &tmp, tmp + 1))
return 1;
}
return 0;
}
int __down_read_trylock3(unsigned long *l)
{
long new, old = READ_ONCE(*l);
for (;;) {
new = old + 1;
if (new <= 0)
return 0;
if (try_cmpxchg(l, &old, new))
return 1;
}
}
[-- Attachment #3: Type: text/plain, Size: 176 bytes --]
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
WARNING: multiple messages have this Message-ID (diff)
From: Peter Zijlstra <peterz@infradead.org>
To: Waiman Long <longman@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>, Will Deacon <will.deacon@arm.com>,
Thomas Gleixner <tglx@linutronix.de>,
linux-kernel@vger.kernel.org, linux-alpha@vger.kernel.org,
linux-arm-kernel@lists.infradead.org,
linux-hexagon@vger.kernel.org, linux-ia64@vger.kernel.org,
linuxppc-dev@lists.ozlabs.org, linux-sh@vger.kernel.org,
sparclinux@vger.kernel.org, linux-xtensa@linux-xtensa.org,
linux-arch@vger.kernel.org, x86@kernel.org,
Arnd Bergmann <arnd@arndb.de>, Borislav Petkov <bp@alien8.de>,
"H. Peter Anvin" <hpa@zytor.com>,
Davidlohr Bueso <dave@stgolabs.net>,
Linus Torvalds <torvalds@linux-foundation.org>,
Andrew Morton <akpm@linux-foundation.org>,
Tim Chen <tim.c.chen@linux.intel.com>
Subject: Re: [PATCH] locking/rwsem: Remove arch specific rwsem files
Date: Mon, 11 Feb 2019 12:58:34 +0100 [thread overview]
Message-ID: <20190211115833.GY32511@hirez.programming.kicks-ass.net> (raw)
Message-ID: <20190211115834.Yg0p267MUKJlKGSE7xOXpnRfPkBIPYVX9lG2frSBxSY@z> (raw)
In-Reply-To: <1549850450-10171-1-git-send-email-longman@redhat.com>
[-- Attachment #1: Type: text/plain, Size: 3269 bytes --]
On Sun, Feb 10, 2019 at 09:00:50PM -0500, Waiman Long wrote:
> +static inline int __down_read_trylock(struct rw_semaphore *sem)
> +{
> + long tmp;
> +
> + while ((tmp = atomic_long_read(&sem->count)) >= 0) {
> + if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp,
> + tmp + RWSEM_ACTIVE_READ_BIAS)) {
> + return 1;
> + }
> + }
> + return 0;
> +}
So the orignal x86 implementation reads:
static inline bool __down_read_trylock(struct rw_semaphore *sem)
{
long result, tmp;
asm volatile("# beginning __down_read_trylock\n\t"
" mov %[count],%[result]\n\t"
"1:\n\t"
" mov %[result],%[tmp]\n\t"
" add %[inc],%[tmp]\n\t"
" jle 2f\n\t"
LOCK_PREFIX " cmpxchg %[tmp],%[count]\n\t"
" jnz 1b\n\t"
"2:\n\t"
"# ending __down_read_trylock\n\t"
: [count] "+m" (sem->count), [result] "=&a" (result),
[tmp] "=&r" (tmp)
: [inc] "i" (RWSEM_ACTIVE_READ_BIAS)
: "memory", "cc");
return result >= 0;
}
you replace that with:
int __down_read_trylock1(unsigned long *l)
{
long tmp;
while ((tmp = READ_ONCE(*l)) >= 0) {
if (tmp == cmpxchg(l, tmp, tmp + 1))
return 1;
}
return 0;
}
which generates:
0000000000000000 <__down_read_trylock1>:
0: eb 17 jmp 19 <__down_read_trylock1+0x19>
2: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
8: 48 8d 4a 01 lea 0x1(%rdx),%rcx
c: 48 89 d0 mov %rdx,%rax
f: f0 48 0f b1 0f lock cmpxchg %rcx,(%rdi)
14: 48 39 c2 cmp %rax,%rdx
17: 74 0f je 28 <__down_read_trylock1+0x28>
19: 48 8b 17 mov (%rdi),%rdx
1c: 48 85 d2 test %rdx,%rdx
1f: 79 e7 jns 8 <__down_read_trylock1+0x8>
21: 31 c0 xor %eax,%eax
23: c3 retq
24: 0f 1f 40 00 nopl 0x0(%rax)
28: b8 01 00 00 00 mov $0x1,%eax
2d: c3 retq
Which is clearly worse. Now we can write that as:
int __down_read_trylock2(unsigned long *l)
{
long tmp = READ_ONCE(*l);
while (tmp >= 0) {
if (try_cmpxchg(l, &tmp, tmp + 1))
return 1;
}
return 0;
}
which generates:
0000000000000030 <__down_read_trylock2>:
30: 48 8b 07 mov (%rdi),%rax
33: 48 85 c0 test %rax,%rax
36: 78 18 js 50 <__down_read_trylock2+0x20>
38: 48 8d 50 01 lea 0x1(%rax),%rdx
3c: f0 48 0f b1 17 lock cmpxchg %rdx,(%rdi)
41: 75 f0 jne 33 <__down_read_trylock2+0x3>
43: b8 01 00 00 00 mov $0x1,%eax
48: c3 retq
49: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
50: 31 c0 xor %eax,%eax
52: c3 retq
Which is a lot better; but not quite there yet.
I've tried quite a bit, but I can't seem to get GCC to generate the:
add $1,%rdx
jle
required; stuff like:
new = old + 1;
if (new <= 0)
generates:
lea 0x1(%rax),%rdx
test %rdx, %rdx
jle
Ah well, have fun :-)
[-- Attachment #2: dtl.c --]
[-- Type: text/x-csrc, Size: 4530 bytes --]
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef signed char s8;
typedef signed short s16;
typedef signed int s32;
typedef signed long long s64;
typedef _Bool bool;
# define CC_SET(c) "\n\t/* output condition code " #c "*/\n"
# define CC_OUT(c) "=@cc" #c
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
extern void __cmpxchg_wrong_size(void);
#define __raw_cmpxchg(ptr, old, new, size, lock) \
({ \
__typeof__(*(ptr)) __ret; \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
switch (size) { \
case 1: \
{ \
volatile u8 *__ptr = (volatile u8 *)(ptr); \
asm volatile(lock "cmpxchgb %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "q" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case 2: \
{ \
volatile u16 *__ptr = (volatile u16 *)(ptr); \
asm volatile(lock "cmpxchgw %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case 4: \
{ \
volatile u32 *__ptr = (volatile u32 *)(ptr); \
asm volatile(lock "cmpxchgl %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case 8: \
{ \
volatile u64 *__ptr = (volatile u64 *)(ptr); \
asm volatile(lock "cmpxchgq %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
default: \
__cmpxchg_wrong_size(); \
} \
__ret; \
})
#define __cmpxchg(ptr, old, new, size) \
__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
#define cmpxchg(ptr, old, new) \
__cmpxchg(ptr, old, new, sizeof(*(ptr)))
#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \
({ \
bool success; \
__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
switch (size) { \
case 1: \
{ \
volatile u8 *__ptr = (volatile u8 *)(_ptr); \
asm volatile(lock "cmpxchgb %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "q" (__new) \
: "memory"); \
break; \
} \
case 2: \
{ \
volatile u16 *__ptr = (volatile u16 *)(_ptr); \
asm volatile(lock "cmpxchgw %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
: "memory"); \
break; \
} \
case 4: \
{ \
volatile u32 *__ptr = (volatile u32 *)(_ptr); \
asm volatile(lock "cmpxchgl %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
: "memory"); \
break; \
} \
case 8: \
{ \
volatile u64 *__ptr = (volatile u64 *)(_ptr); \
asm volatile(lock "cmpxchgq %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
: "memory"); \
break; \
} \
default: \
__cmpxchg_wrong_size(); \
} \
if (unlikely(!success)) \
*_old = __old; \
likely(success); \
})
#define LOCK_PREFIX "lock; "
#define __try_cmpxchg(ptr, pold, new, size) \
__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
#define try_cmpxchg(ptr, pold, new) \
__try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
#define READ_ONCE(x) (*(volatile typeof(x) *)(&x))
int __down_read_trylock1(unsigned long *l)
{
long tmp;
while ((tmp = READ_ONCE(*l)) >= 0) {
if (tmp == cmpxchg(l, tmp, tmp + 1))
return 1;
}
return 0;
}
int __down_read_trylock2(unsigned long *l)
{
long tmp = READ_ONCE(*l);
while (tmp >= 0) {
if (try_cmpxchg(l, &tmp, tmp + 1))
return 1;
}
return 0;
}
int __down_read_trylock3(unsigned long *l)
{
long new, old = READ_ONCE(*l);
for (;;) {
new = old + 1;
if (new <= 0)
return 0;
if (try_cmpxchg(l, &old, new))
return 1;
}
}
WARNING: multiple messages have this Message-ID (diff)
From: Peter Zijlstra <peterz@infradead.org>
To: Waiman Long <longman@redhat.com>
Cc: linux-arch@vger.kernel.org, linux-xtensa@linux-xtensa.org,
Davidlohr Bueso <dave@stgolabs.net>,
linux-ia64@vger.kernel.org, Tim Chen <tim.c.chen@linux.intel.com>,
Arnd Bergmann <arnd@arndb.de>,
linux-sh@vger.kernel.org, linux-hexagon@vger.kernel.org,
x86@kernel.org, Will Deacon <will.deacon@arm.com>,
linux-kernel@vger.kernel.org,
Linus Torvalds <torvalds@linux-foundation.org>,
Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
"H. Peter Anvin" <hpa@zytor.com>,
linux-alpha@vger.kernel.org, sparclinux@vger.kernel.org,
Thomas Gleixner <tglx@linutronix.de>,
linuxppc-dev@lists.ozlabs.org,
Andrew Morton <akpm@linux-foundation.org>,
linux-arm-kernel@lists.infradead.org
Subject: Re: [PATCH] locking/rwsem: Remove arch specific rwsem files
Date: Mon, 11 Feb 2019 11:58:34 +0000 [thread overview]
Message-ID: <20190211115833.GY32511@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <1549850450-10171-1-git-send-email-longman@redhat.com>
[-- Attachment #1: Type: text/plain, Size: 3269 bytes --]
On Sun, Feb 10, 2019 at 09:00:50PM -0500, Waiman Long wrote:
> +static inline int __down_read_trylock(struct rw_semaphore *sem)
> +{
> + long tmp;
> +
> + while ((tmp = atomic_long_read(&sem->count)) >= 0) {
> + if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp,
> + tmp + RWSEM_ACTIVE_READ_BIAS)) {
> + return 1;
> + }
> + }
> + return 0;
> +}
So the orignal x86 implementation reads:
static inline bool __down_read_trylock(struct rw_semaphore *sem)
{
long result, tmp;
asm volatile("# beginning __down_read_trylock\n\t"
" mov %[count],%[result]\n\t"
"1:\n\t"
" mov %[result],%[tmp]\n\t"
" add %[inc],%[tmp]\n\t"
" jle 2f\n\t"
LOCK_PREFIX " cmpxchg %[tmp],%[count]\n\t"
" jnz 1b\n\t"
"2:\n\t"
"# ending __down_read_trylock\n\t"
: [count] "+m" (sem->count), [result] "=&a" (result),
[tmp] "=&r" (tmp)
: [inc] "i" (RWSEM_ACTIVE_READ_BIAS)
: "memory", "cc");
return result >= 0;
}
you replace that with:
int __down_read_trylock1(unsigned long *l)
{
long tmp;
while ((tmp = READ_ONCE(*l)) >= 0) {
if (tmp == cmpxchg(l, tmp, tmp + 1))
return 1;
}
return 0;
}
which generates:
0000000000000000 <__down_read_trylock1>:
0: eb 17 jmp 19 <__down_read_trylock1+0x19>
2: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
8: 48 8d 4a 01 lea 0x1(%rdx),%rcx
c: 48 89 d0 mov %rdx,%rax
f: f0 48 0f b1 0f lock cmpxchg %rcx,(%rdi)
14: 48 39 c2 cmp %rax,%rdx
17: 74 0f je 28 <__down_read_trylock1+0x28>
19: 48 8b 17 mov (%rdi),%rdx
1c: 48 85 d2 test %rdx,%rdx
1f: 79 e7 jns 8 <__down_read_trylock1+0x8>
21: 31 c0 xor %eax,%eax
23: c3 retq
24: 0f 1f 40 00 nopl 0x0(%rax)
28: b8 01 00 00 00 mov $0x1,%eax
2d: c3 retq
Which is clearly worse. Now we can write that as:
int __down_read_trylock2(unsigned long *l)
{
long tmp = READ_ONCE(*l);
while (tmp >= 0) {
if (try_cmpxchg(l, &tmp, tmp + 1))
return 1;
}
return 0;
}
which generates:
0000000000000030 <__down_read_trylock2>:
30: 48 8b 07 mov (%rdi),%rax
33: 48 85 c0 test %rax,%rax
36: 78 18 js 50 <__down_read_trylock2+0x20>
38: 48 8d 50 01 lea 0x1(%rax),%rdx
3c: f0 48 0f b1 17 lock cmpxchg %rdx,(%rdi)
41: 75 f0 jne 33 <__down_read_trylock2+0x3>
43: b8 01 00 00 00 mov $0x1,%eax
48: c3 retq
49: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
50: 31 c0 xor %eax,%eax
52: c3 retq
Which is a lot better; but not quite there yet.
I've tried quite a bit, but I can't seem to get GCC to generate the:
add $1,%rdx
jle
required; stuff like:
new = old + 1;
if (new <= 0)
generates:
lea 0x1(%rax),%rdx
test %rdx, %rdx
jle
Ah well, have fun :-)
[-- Attachment #2: dtl.c --]
[-- Type: text/x-csrc, Size: 4530 bytes --]
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef signed char s8;
typedef signed short s16;
typedef signed int s32;
typedef signed long long s64;
typedef _Bool bool;
# define CC_SET(c) "\n\t/* output condition code " #c "*/\n"
# define CC_OUT(c) "=@cc" #c
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
extern void __cmpxchg_wrong_size(void);
#define __raw_cmpxchg(ptr, old, new, size, lock) \
({ \
__typeof__(*(ptr)) __ret; \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
switch (size) { \
case 1: \
{ \
volatile u8 *__ptr = (volatile u8 *)(ptr); \
asm volatile(lock "cmpxchgb %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "q" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case 2: \
{ \
volatile u16 *__ptr = (volatile u16 *)(ptr); \
asm volatile(lock "cmpxchgw %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case 4: \
{ \
volatile u32 *__ptr = (volatile u32 *)(ptr); \
asm volatile(lock "cmpxchgl %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case 8: \
{ \
volatile u64 *__ptr = (volatile u64 *)(ptr); \
asm volatile(lock "cmpxchgq %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
default: \
__cmpxchg_wrong_size(); \
} \
__ret; \
})
#define __cmpxchg(ptr, old, new, size) \
__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
#define cmpxchg(ptr, old, new) \
__cmpxchg(ptr, old, new, sizeof(*(ptr)))
#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \
({ \
bool success; \
__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
switch (size) { \
case 1: \
{ \
volatile u8 *__ptr = (volatile u8 *)(_ptr); \
asm volatile(lock "cmpxchgb %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "q" (__new) \
: "memory"); \
break; \
} \
case 2: \
{ \
volatile u16 *__ptr = (volatile u16 *)(_ptr); \
asm volatile(lock "cmpxchgw %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
: "memory"); \
break; \
} \
case 4: \
{ \
volatile u32 *__ptr = (volatile u32 *)(_ptr); \
asm volatile(lock "cmpxchgl %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
: "memory"); \
break; \
} \
case 8: \
{ \
volatile u64 *__ptr = (volatile u64 *)(_ptr); \
asm volatile(lock "cmpxchgq %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
: "memory"); \
break; \
} \
default: \
__cmpxchg_wrong_size(); \
} \
if (unlikely(!success)) \
*_old = __old; \
likely(success); \
})
#define LOCK_PREFIX "lock; "
#define __try_cmpxchg(ptr, pold, new, size) \
__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
#define try_cmpxchg(ptr, pold, new) \
__try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
#define READ_ONCE(x) (*(volatile typeof(x) *)(&x))
int __down_read_trylock1(unsigned long *l)
{
long tmp;
while ((tmp = READ_ONCE(*l)) >= 0) {
if (tmp == cmpxchg(l, tmp, tmp + 1))
return 1;
}
return 0;
}
int __down_read_trylock2(unsigned long *l)
{
long tmp = READ_ONCE(*l);
while (tmp >= 0) {
if (try_cmpxchg(l, &tmp, tmp + 1))
return 1;
}
return 0;
}
int __down_read_trylock3(unsigned long *l)
{
long new, old = READ_ONCE(*l);
for (;;) {
new = old + 1;
if (new <= 0)
return 0;
if (try_cmpxchg(l, &old, new))
return 1;
}
}
WARNING: multiple messages have this Message-ID (diff)
From: Peter Zijlstra <peterz@infradead.org>
To: Waiman Long <longman@redhat.com>
Cc: linux-arch@vger.kernel.org, linux-xtensa@linux-xtensa.org,
Davidlohr Bueso <dave@stgolabs.net>,
linux-ia64@vger.kernel.org, Tim Chen <tim.c.chen@linux.intel.com>,
Arnd Bergmann <arnd@arndb.de>,
linux-sh@vger.kernel.org, linux-hexagon@vger.kernel.org,
x86@kernel.org, Will Deacon <will.deacon@arm.com>,
linux-kernel@vger.kernel.org,
Linus Torvalds <torvalds@linux-foundation.org>,
Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
"H. Peter Anvin" <hpa@zytor.com>,
linux-alpha@vger.kernel.org, sparclinux@vger.kernel.org,
Thomas Gleixner <tglx@linutronix.de>,
linuxppc-dev@lists.ozlabs.org,
Andrew Morton <akpm@linux-foundation.org>,
linux-arm-kernel@lists.infradead.org
Subject: Re: [PATCH] locking/rwsem: Remove arch specific rwsem files
Date: Mon, 11 Feb 2019 12:58:34 +0100 [thread overview]
Message-ID: <20190211115833.GY32511@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <1549850450-10171-1-git-send-email-longman@redhat.com>
[-- Attachment #1: Type: text/plain, Size: 3269 bytes --]
On Sun, Feb 10, 2019 at 09:00:50PM -0500, Waiman Long wrote:
> +static inline int __down_read_trylock(struct rw_semaphore *sem)
> +{
> + long tmp;
> +
> + while ((tmp = atomic_long_read(&sem->count)) >= 0) {
> + if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp,
> + tmp + RWSEM_ACTIVE_READ_BIAS)) {
> + return 1;
> + }
> + }
> + return 0;
> +}
So the orignal x86 implementation reads:
static inline bool __down_read_trylock(struct rw_semaphore *sem)
{
long result, tmp;
asm volatile("# beginning __down_read_trylock\n\t"
" mov %[count],%[result]\n\t"
"1:\n\t"
" mov %[result],%[tmp]\n\t"
" add %[inc],%[tmp]\n\t"
" jle 2f\n\t"
LOCK_PREFIX " cmpxchg %[tmp],%[count]\n\t"
" jnz 1b\n\t"
"2:\n\t"
"# ending __down_read_trylock\n\t"
: [count] "+m" (sem->count), [result] "=&a" (result),
[tmp] "=&r" (tmp)
: [inc] "i" (RWSEM_ACTIVE_READ_BIAS)
: "memory", "cc");
return result >= 0;
}
you replace that with:
int __down_read_trylock1(unsigned long *l)
{
long tmp;
while ((tmp = READ_ONCE(*l)) >= 0) {
if (tmp == cmpxchg(l, tmp, tmp + 1))
return 1;
}
return 0;
}
which generates:
0000000000000000 <__down_read_trylock1>:
0: eb 17 jmp 19 <__down_read_trylock1+0x19>
2: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
8: 48 8d 4a 01 lea 0x1(%rdx),%rcx
c: 48 89 d0 mov %rdx,%rax
f: f0 48 0f b1 0f lock cmpxchg %rcx,(%rdi)
14: 48 39 c2 cmp %rax,%rdx
17: 74 0f je 28 <__down_read_trylock1+0x28>
19: 48 8b 17 mov (%rdi),%rdx
1c: 48 85 d2 test %rdx,%rdx
1f: 79 e7 jns 8 <__down_read_trylock1+0x8>
21: 31 c0 xor %eax,%eax
23: c3 retq
24: 0f 1f 40 00 nopl 0x0(%rax)
28: b8 01 00 00 00 mov $0x1,%eax
2d: c3 retq
Which is clearly worse. Now we can write that as:
int __down_read_trylock2(unsigned long *l)
{
long tmp = READ_ONCE(*l);
while (tmp >= 0) {
if (try_cmpxchg(l, &tmp, tmp + 1))
return 1;
}
return 0;
}
which generates:
0000000000000030 <__down_read_trylock2>:
30: 48 8b 07 mov (%rdi),%rax
33: 48 85 c0 test %rax,%rax
36: 78 18 js 50 <__down_read_trylock2+0x20>
38: 48 8d 50 01 lea 0x1(%rax),%rdx
3c: f0 48 0f b1 17 lock cmpxchg %rdx,(%rdi)
41: 75 f0 jne 33 <__down_read_trylock2+0x3>
43: b8 01 00 00 00 mov $0x1,%eax
48: c3 retq
49: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
50: 31 c0 xor %eax,%eax
52: c3 retq
Which is a lot better; but not quite there yet.
I've tried quite a bit, but I can't seem to get GCC to generate the:
add $1,%rdx
jle
required; stuff like:
new = old + 1;
if (new <= 0)
generates:
lea 0x1(%rax),%rdx
test %rdx, %rdx
jle
Ah well, have fun :-)
[-- Attachment #2: dtl.c --]
[-- Type: text/x-csrc, Size: 4530 bytes --]
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef signed char s8;
typedef signed short s16;
typedef signed int s32;
typedef signed long long s64;
typedef _Bool bool;
# define CC_SET(c) "\n\t/* output condition code " #c "*/\n"
# define CC_OUT(c) "=@cc" #c
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
extern void __cmpxchg_wrong_size(void);
#define __raw_cmpxchg(ptr, old, new, size, lock) \
({ \
__typeof__(*(ptr)) __ret; \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
switch (size) { \
case 1: \
{ \
volatile u8 *__ptr = (volatile u8 *)(ptr); \
asm volatile(lock "cmpxchgb %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "q" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case 2: \
{ \
volatile u16 *__ptr = (volatile u16 *)(ptr); \
asm volatile(lock "cmpxchgw %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case 4: \
{ \
volatile u32 *__ptr = (volatile u32 *)(ptr); \
asm volatile(lock "cmpxchgl %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
case 8: \
{ \
volatile u64 *__ptr = (volatile u64 *)(ptr); \
asm volatile(lock "cmpxchgq %2,%1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
break; \
} \
default: \
__cmpxchg_wrong_size(); \
} \
__ret; \
})
#define __cmpxchg(ptr, old, new, size) \
__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
#define cmpxchg(ptr, old, new) \
__cmpxchg(ptr, old, new, sizeof(*(ptr)))
#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \
({ \
bool success; \
__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
switch (size) { \
case 1: \
{ \
volatile u8 *__ptr = (volatile u8 *)(_ptr); \
asm volatile(lock "cmpxchgb %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "q" (__new) \
: "memory"); \
break; \
} \
case 2: \
{ \
volatile u16 *__ptr = (volatile u16 *)(_ptr); \
asm volatile(lock "cmpxchgw %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
: "memory"); \
break; \
} \
case 4: \
{ \
volatile u32 *__ptr = (volatile u32 *)(_ptr); \
asm volatile(lock "cmpxchgl %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
: "memory"); \
break; \
} \
case 8: \
{ \
volatile u64 *__ptr = (volatile u64 *)(_ptr); \
asm volatile(lock "cmpxchgq %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
[old] "+a" (__old) \
: [new] "r" (__new) \
: "memory"); \
break; \
} \
default: \
__cmpxchg_wrong_size(); \
} \
if (unlikely(!success)) \
*_old = __old; \
likely(success); \
})
#define LOCK_PREFIX "lock; "
#define __try_cmpxchg(ptr, pold, new, size) \
__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
#define try_cmpxchg(ptr, pold, new) \
__try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
#define READ_ONCE(x) (*(volatile typeof(x) *)(&x))
int __down_read_trylock1(unsigned long *l)
{
long tmp;
while ((tmp = READ_ONCE(*l)) >= 0) {
if (tmp == cmpxchg(l, tmp, tmp + 1))
return 1;
}
return 0;
}
int __down_read_trylock2(unsigned long *l)
{
long tmp = READ_ONCE(*l);
while (tmp >= 0) {
if (try_cmpxchg(l, &tmp, tmp + 1))
return 1;
}
return 0;
}
int __down_read_trylock3(unsigned long *l)
{
long new, old = READ_ONCE(*l);
for (;;) {
new = old + 1;
if (new <= 0)
return 0;
if (try_cmpxchg(l, &old, new))
return 1;
}
}
next prev parent reply other threads:[~2019-02-11 11:58 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-02-11 2:00 [PATCH] locking/rwsem: Remove arch specific rwsem files Waiman Long
2019-02-11 2:00 ` Waiman Long
2019-02-11 2:00 ` Waiman Long
2019-02-11 2:00 ` Waiman Long
2019-02-11 2:08 ` Waiman Long
2019-02-11 2:08 ` Waiman Long
2019-02-11 2:08 ` Waiman Long
2019-02-11 2:08 ` Waiman Long
2019-02-11 7:11 ` Ingo Molnar
2019-02-11 7:11 ` Ingo Molnar
2019-02-11 7:11 ` Ingo Molnar
2019-02-11 7:11 ` Ingo Molnar
2019-02-11 10:39 ` Ingo Molnar
2019-02-11 10:39 ` Ingo Molnar
2019-02-11 10:39 ` Ingo Molnar
2019-02-11 10:39 ` Ingo Molnar
2019-02-11 10:52 ` Will Deacon
2019-02-11 10:52 ` Will Deacon
2019-02-11 10:52 ` Will Deacon
2019-02-11 10:55 ` Ingo Molnar
2019-02-11 10:55 ` Ingo Molnar
2019-02-11 10:55 ` Ingo Molnar
2019-02-11 10:55 ` Ingo Molnar
2019-02-11 13:32 ` Waiman Long
2019-02-11 13:32 ` Waiman Long
2019-02-11 13:32 ` Waiman Long
2019-02-11 13:32 ` Waiman Long
2019-02-11 9:36 ` Peter Zijlstra
2019-02-11 9:36 ` Peter Zijlstra
2019-02-11 9:36 ` Peter Zijlstra
2019-02-11 9:36 ` Peter Zijlstra
2019-02-11 9:40 ` Peter Zijlstra
2019-02-11 9:40 ` Peter Zijlstra
2019-02-11 9:40 ` Peter Zijlstra
2019-02-11 9:40 ` Peter Zijlstra
2019-02-11 10:57 ` Peter Zijlstra
2019-02-11 10:57 ` Peter Zijlstra
2019-02-11 10:57 ` Peter Zijlstra
2019-02-11 10:57 ` Peter Zijlstra
2019-02-11 11:58 ` Peter Zijlstra [this message]
2019-02-11 11:58 ` Peter Zijlstra
2019-02-11 11:58 ` Peter Zijlstra
2019-02-11 11:58 ` Peter Zijlstra
2019-02-11 16:35 ` Waiman Long
2019-02-11 16:35 ` Waiman Long
2019-02-11 16:35 ` Waiman Long
2019-02-11 16:35 ` Waiman Long
2019-02-11 17:04 ` Peter Zijlstra
2019-02-11 17:04 ` Peter Zijlstra
2019-02-11 17:04 ` Peter Zijlstra
2019-02-11 17:04 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190211115833.GY32511@hirez.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=akpm@linux-foundation.org \
--cc=arnd@arndb.de \
--cc=bp@alien8.de \
--cc=dave@stgolabs.net \
--cc=hpa@zytor.com \
--cc=linux-alpha@vger.kernel.org \
--cc=linux-arch@vger.kernel.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-hexagon@vger.kernel.org \
--cc=linux-ia64@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-sh@vger.kernel.org \
--cc=linux-xtensa@linux-xtensa.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=longman@redhat.com \
--cc=mingo@redhat.com \
--cc=sparclinux@vger.kernel.org \
--cc=tglx@linutronix.de \
--cc=tim.c.chen@linux.intel.com \
--cc=torvalds@linux-foundation.org \
--cc=will.deacon@arm.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.