* [PATCH] x86: fix and improve cmpxchg_double{,_local}()
@ 2012-01-02 17:02 Jan Beulich
2012-01-03 15:00 ` Eric Dumazet
2012-01-04 15:31 ` [tip:x86/asm] x86: Fix " tip-bot for Jan Beulich
0 siblings, 2 replies; 13+ messages in thread
From: Jan Beulich @ 2012-01-02 17:02 UTC (permalink / raw)
To: mingo, tglx, hpa; +Cc: Christoph Lameter, linux-kernel
Just like the per-CPU ones they had several problems/shortcomings:
Only the first memory operand was mentioned in the asm() operands, and
the 2x64-bit version didn't have a memory clobber while the 2x32-bit
one did. The former allowed the compiler to not recognize the need to
re-load the data in case it had it cached in some register, while the
latter was overly destructive.
The types of the local copies of the old and new values were incorrect
(the types of the pointed-to variables should be used here, to make
sure the respective old/new variable types are compatible).
The __dummy/__junk variables were pointless, given that local copies
of the inputs already existed (and can hence be used for discarded
outputs).
The 32-bit variant of cmpxchg_double_local() referenced
cmpxchg16b_local().
At once also
- change the return value type to what it really is: 'bool'
- unify 32- and 64-bit variants
- abstract out the common part of the 'normal' and 'local' variants
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
arch/x86/include/asm/cmpxchg.h | 23 +++++++++++++++++++
arch/x86/include/asm/cmpxchg_32.h | 46 --------------------------------------
arch/x86/include/asm/cmpxchg_64.h | 43 -----------------------------------
mm/slub.c | 4 +--
4 files changed, 25 insertions(+), 91 deletions(-)
--- 3.2-rc7/arch/x86/include/asm/cmpxchg.h
+++ 3.2-rc7-x86-cmpxchg-double/arch/x86/include/asm/cmpxchg.h
@@ -207,4 +207,27 @@ extern void __xadd_wrong_size(void)
#define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ")
#define xadd_local(ptr, inc) __xadd((ptr), (inc), "")
+#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \
+({ \
+ bool __ret; \
+ __typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \
+ __typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \
+ BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \
+ BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \
+ VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \
+ VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \
+ asm volatile(pfx "cmpxchg%c4b %2; sete %0" \
+ : "=a" (__ret), "+d" (__old2), \
+ "+m" (*(p1)), "+m" (*(p2)) \
+ : "i" (2 * sizeof(long)), "a" (__old1), \
+ "b" (__new1), "c" (__new2)); \
+ __ret; \
+})
+
+#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \
+ __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2)
+
+#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \
+ __cmpxchg_double(, p1, p2, o1, o2, n1, n2)
+
#endif /* ASM_X86_CMPXCHG_H */
--- 3.2-rc7/arch/x86/include/asm/cmpxchg_32.h
+++ 3.2-rc7-x86-cmpxchg-double/arch/x86/include/asm/cmpxchg_32.h
@@ -166,52 +166,6 @@ static inline unsigned long cmpxchg_386(
#endif
-#define cmpxchg8b(ptr, o1, o2, n1, n2) \
-({ \
- char __ret; \
- __typeof__(o2) __dummy; \
- __typeof__(*(ptr)) __old1 = (o1); \
- __typeof__(o2) __old2 = (o2); \
- __typeof__(*(ptr)) __new1 = (n1); \
- __typeof__(o2) __new2 = (n2); \
- asm volatile(LOCK_PREFIX "cmpxchg8b %2; setz %1" \
- : "=d"(__dummy), "=a" (__ret), "+m" (*ptr)\
- : "a" (__old1), "d"(__old2), \
- "b" (__new1), "c" (__new2) \
- : "memory"); \
- __ret; })
-
-
-#define cmpxchg8b_local(ptr, o1, o2, n1, n2) \
-({ \
- char __ret; \
- __typeof__(o2) __dummy; \
- __typeof__(*(ptr)) __old1 = (o1); \
- __typeof__(o2) __old2 = (o2); \
- __typeof__(*(ptr)) __new1 = (n1); \
- __typeof__(o2) __new2 = (n2); \
- asm volatile("cmpxchg8b %2; setz %1" \
- : "=d"(__dummy), "=a"(__ret), "+m" (*ptr)\
- : "a" (__old), "d"(__old2), \
- "b" (__new1), "c" (__new2), \
- : "memory"); \
- __ret; })
-
-
-#define cmpxchg_double(ptr, o1, o2, n1, n2) \
-({ \
- BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
- VM_BUG_ON((unsigned long)(ptr) % 8); \
- cmpxchg8b((ptr), (o1), (o2), (n1), (n2)); \
-})
-
-#define cmpxchg_double_local(ptr, o1, o2, n1, n2) \
-({ \
- BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
- VM_BUG_ON((unsigned long)(ptr) % 8); \
- cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \
-})
-
#define system_has_cmpxchg_double() cpu_has_cx8
#endif /* _ASM_X86_CMPXCHG_32_H */
--- 3.2-rc7/arch/x86/include/asm/cmpxchg_64.h
+++ 3.2-rc7-x86-cmpxchg-double/arch/x86/include/asm/cmpxchg_64.h
@@ -20,49 +20,6 @@ static inline void set_64bit(volatile u6
cmpxchg_local((ptr), (o), (n)); \
})
-#define cmpxchg16b(ptr, o1, o2, n1, n2) \
-({ \
- char __ret; \
- __typeof__(o2) __junk; \
- __typeof__(*(ptr)) __old1 = (o1); \
- __typeof__(o2) __old2 = (o2); \
- __typeof__(*(ptr)) __new1 = (n1); \
- __typeof__(o2) __new2 = (n2); \
- asm volatile(LOCK_PREFIX "cmpxchg16b %2;setz %1" \
- : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \
- : "b"(__new1), "c"(__new2), \
- "a"(__old1), "d"(__old2)); \
- __ret; })
-
-
-#define cmpxchg16b_local(ptr, o1, o2, n1, n2) \
-({ \
- char __ret; \
- __typeof__(o2) __junk; \
- __typeof__(*(ptr)) __old1 = (o1); \
- __typeof__(o2) __old2 = (o2); \
- __typeof__(*(ptr)) __new1 = (n1); \
- __typeof__(o2) __new2 = (n2); \
- asm volatile("cmpxchg16b %2;setz %1" \
- : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \
- : "b"(__new1), "c"(__new2), \
- "a"(__old1), "d"(__old2)); \
- __ret; })
-
-#define cmpxchg_double(ptr, o1, o2, n1, n2) \
-({ \
- BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- VM_BUG_ON((unsigned long)(ptr) % 16); \
- cmpxchg16b((ptr), (o1), (o2), (n1), (n2)); \
-})
-
-#define cmpxchg_double_local(ptr, o1, o2, n1, n2) \
-({ \
- BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- VM_BUG_ON((unsigned long)(ptr) % 16); \
- cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \
-})
-
#define system_has_cmpxchg_double() cpu_has_cx16
#endif /* _ASM_X86_CMPXCHG_64_H */
--- 3.2-rc7/mm/slub.c
+++ 3.2-rc7-x86-cmpxchg-double/mm/slub.c
@@ -368,7 +368,7 @@ static inline bool __cmpxchg_double_slab
VM_BUG_ON(!irqs_disabled());
#ifdef CONFIG_CMPXCHG_DOUBLE
if (s->flags & __CMPXCHG_DOUBLE) {
- if (cmpxchg_double(&page->freelist,
+ if (cmpxchg_double(&page->freelist, &page->counters,
freelist_old, counters_old,
freelist_new, counters_new))
return 1;
@@ -402,7 +402,7 @@ static inline bool cmpxchg_double_slab(s
{
#ifdef CONFIG_CMPXCHG_DOUBLE
if (s->flags & __CMPXCHG_DOUBLE) {
- if (cmpxchg_double(&page->freelist,
+ if (cmpxchg_double(&page->freelist, &page->counters,
freelist_old, counters_old,
freelist_new, counters_new))
return 1;
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] x86: fix and improve cmpxchg_double{,_local}()
2012-01-02 17:02 [PATCH] x86: fix and improve cmpxchg_double{,_local}() Jan Beulich
@ 2012-01-03 15:00 ` Eric Dumazet
2012-01-03 15:15 ` Eric Dumazet
2012-01-03 15:35 ` [PATCH] x86: fix and improve cmpxchg_double{,_local}() Jan Beulich
2012-01-04 15:31 ` [tip:x86/asm] x86: Fix " tip-bot for Jan Beulich
1 sibling, 2 replies; 13+ messages in thread
From: Eric Dumazet @ 2012-01-03 15:00 UTC (permalink / raw)
To: Jan Beulich; +Cc: mingo, tglx, hpa, Christoph Lameter, linux-kernel, netdev
Le lundi 02 janvier 2012 à 17:02 +0000, Jan Beulich a écrit :
> Just like the per-CPU ones they had several problems/shortcomings:
>
> Only the first memory operand was mentioned in the asm() operands, and
> the 2x64-bit version didn't have a memory clobber while the 2x32-bit
> one did. The former allowed the compiler to not recognize the need to
> re-load the data in case it had it cached in some register, while the
> latter was overly destructive.
>
> The types of the local copies of the old and new values were incorrect
> (the types of the pointed-to variables should be used here, to make
> sure the respective old/new variable types are compatible).
>
> The __dummy/__junk variables were pointless, given that local copies
> of the inputs already existed (and can hence be used for discarded
> outputs).
>
> The 32-bit variant of cmpxchg_double_local() referenced
> cmpxchg16b_local().
>
> At once also
> - change the return value type to what it really is: 'bool'
> - unify 32- and 64-bit variants
> - abstract out the common part of the 'normal' and 'local' variants
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
While looking at your patch, I discovered that atomic64_add() /
atomic64_inc() on 32bit are completely buggy. Oh well...
Generated code :
c03bc00c <atomic64_add_return_cx8>:
c03bc00c: 55 push %ebp
c03bc00d: 53 push %ebx
c03bc00e: 56 push %esi
c03bc00f: 57 push %edi
c03bc010: 89 c6 mov %eax,%esi
c03bc012: 89 d7 mov %edx,%edi
c03bc014: 89 cd mov %ecx,%ebp
c03bc016: 89 d8 mov %ebx,%eax
c03bc018: 89 ca mov %ecx,%edx
c03bc01a: f0 0f c7 4d 00 lock cmpxchg8b 0x0(%ebp)
c03bc01f: 89 c3 mov %eax,%ebx
c03bc021: 89 d1 mov %edx,%ecx
c03bc023: 01 f3 add %esi,%ebx
c03bc025: 11 f9 adc %edi,%ecx
c03bc027: f0 0f c7 4d 00 lock cmpxchg8b 0x0(%ebp)
c03bc02c: 75 f9 jne c03bc027 <atomic64_add_return_cx8+0x1b>
c03bc02e: 89 d8 mov %ebx,%eax
c03bc030: 89 ca mov %ecx,%edx
c03bc032: 5f pop %edi
c03bc033: 5e pop %esi
c03bc034: 5b pop %ebx
c03bc035: 5d pop %ebp
c03bc036: c3 ret
The ' jne c03bc027' should really be 'jne c03bc01f'
No idea how old is this bug.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] x86: fix and improve cmpxchg_double{,_local}()
2012-01-03 15:00 ` Eric Dumazet
@ 2012-01-03 15:15 ` Eric Dumazet
2012-01-03 15:41 ` Eric Dumazet
2012-01-03 15:35 ` [PATCH] x86: fix and improve cmpxchg_double{,_local}() Jan Beulich
1 sibling, 1 reply; 13+ messages in thread
From: Eric Dumazet @ 2012-01-03 15:15 UTC (permalink / raw)
To: Jan Beulich; +Cc: mingo, tglx, hpa, Christoph Lameter, linux-kernel, netdev
Le mardi 03 janvier 2012 à 16:00 +0100, Eric Dumazet a écrit :
> While looking at your patch, I discovered that atomic64_add() /
> atomic64_inc() on 32bit are completely buggy. Oh well...
>
> Generated code :
>
> c03bc00c <atomic64_add_return_cx8>:
> c03bc00c: 55 push %ebp
> c03bc00d: 53 push %ebx
> c03bc00e: 56 push %esi
> c03bc00f: 57 push %edi
> c03bc010: 89 c6 mov %eax,%esi
> c03bc012: 89 d7 mov %edx,%edi
> c03bc014: 89 cd mov %ecx,%ebp
> c03bc016: 89 d8 mov %ebx,%eax
> c03bc018: 89 ca mov %ecx,%edx
> c03bc01a: f0 0f c7 4d 00 lock cmpxchg8b 0x0(%ebp)
> c03bc01f: 89 c3 mov %eax,%ebx
> c03bc021: 89 d1 mov %edx,%ecx
> c03bc023: 01 f3 add %esi,%ebx
> c03bc025: 11 f9 adc %edi,%ecx
> c03bc027: f0 0f c7 4d 00 lock cmpxchg8b 0x0(%ebp)
> c03bc02c: 75 f9 jne c03bc027 <atomic64_add_return_cx8+0x1b>
> c03bc02e: 89 d8 mov %ebx,%eax
> c03bc030: 89 ca mov %ecx,%edx
> c03bc032: 5f pop %edi
> c03bc033: 5e pop %esi
> c03bc034: 5b pop %ebx
> c03bc035: 5d pop %ebp
> c03bc036: c3 ret
>
> The ' jne c03bc027' should really be 'jne c03bc01f'
>
> No idea how old is this bug.
>
Very old it seems...
arch/x86/lib/atomic64_cx8_32.S
all "jxx 1b" are wrong if a LOCK_PREFIX is included after the 1: label
1:
inst1
LOCK_PREFIX
cmpxchg8b (%ebp)
jne 1b / jumps to beginning of LOCK_PREFIX, inst1 is not replayed
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] x86: fix and improve cmpxchg_double{,_local}()
2012-01-03 15:00 ` Eric Dumazet
2012-01-03 15:15 ` Eric Dumazet
@ 2012-01-03 15:35 ` Jan Beulich
2012-01-03 17:31 ` Eric Dumazet
1 sibling, 1 reply; 13+ messages in thread
From: Jan Beulich @ 2012-01-03 15:35 UTC (permalink / raw)
To: Eric Dumazet; +Cc: mingo, tglx, Christoph Lameter, linux-kernel, netdev, hpa
>>> On 03.01.12 at 16:00, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le lundi 02 janvier 2012 à 17:02 +0000, Jan Beulich a écrit :
>> Just like the per-CPU ones they had several problems/shortcomings:
>>
>> Only the first memory operand was mentioned in the asm() operands, and
>> the 2x64-bit version didn't have a memory clobber while the 2x32-bit
>> one did. The former allowed the compiler to not recognize the need to
>> re-load the data in case it had it cached in some register, while the
>> latter was overly destructive.
>>
>> The types of the local copies of the old and new values were incorrect
>> (the types of the pointed-to variables should be used here, to make
>> sure the respective old/new variable types are compatible).
>>
>> The __dummy/__junk variables were pointless, given that local copies
>> of the inputs already existed (and can hence be used for discarded
>> outputs).
>>
>> The 32-bit variant of cmpxchg_double_local() referenced
>> cmpxchg16b_local().
>>
>> At once also
>> - change the return value type to what it really is: 'bool'
>> - unify 32- and 64-bit variants
>> - abstract out the common part of the 'normal' and 'local' variants
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> While looking at your patch, I discovered that atomic64_add() /
> atomic64_inc() on 32bit are completely buggy. Oh well...
>
> Generated code :
>
> c03bc00c <atomic64_add_return_cx8>:
> c03bc00c: 55 push %ebp
> c03bc00d: 53 push %ebx
> c03bc00e: 56 push %esi
> c03bc00f: 57 push %edi
> c03bc010: 89 c6 mov %eax,%esi
> c03bc012: 89 d7 mov %edx,%edi
> c03bc014: 89 cd mov %ecx,%ebp
> c03bc016: 89 d8 mov %ebx,%eax
> c03bc018: 89 ca mov %ecx,%edx
> c03bc01a: f0 0f c7 4d 00 lock cmpxchg8b 0x0(%ebp)
> c03bc01f: 89 c3 mov %eax,%ebx
> c03bc021: 89 d1 mov %edx,%ecx
> c03bc023: 01 f3 add %esi,%ebx
> c03bc025: 11 f9 adc %edi,%ecx
> c03bc027: f0 0f c7 4d 00 lock cmpxchg8b 0x0(%ebp)
> c03bc02c: 75 f9 jne c03bc027
> <atomic64_add_return_cx8+0x1b>
> c03bc02e: 89 d8 mov %ebx,%eax
> c03bc030: 89 ca mov %ecx,%edx
> c03bc032: 5f pop %edi
> c03bc033: 5e pop %esi
> c03bc034: 5b pop %ebx
> c03bc035: 5d pop %ebp
> c03bc036: c3 ret
>
> The ' jne c03bc027' should really be 'jne c03bc01f'
Indeed, and that's the same for all other routines in this file that
incorrectly use 1: together with LOCK_PREFIX between the label and
an intended jump to that label.
> No idea how old is this bug.
The file (and with it the bug) was introduced in 2.6.35.
While looking at this I also noticed this comment in read64: "we need
LOCK_PREFIX since otherwise cmpxchg8b always does the write",
which is saying quite the opposite of the Intel manual: "This instruction
can be used with a LOCK prefix to allow the instruction to be executed
atomically. To simplify the interface to the processor’s bus, the
destination operand receives a write cycle without regard to the result
of the comparison. The destination operand is written back if the
comparison fails; otherwise, the source operand is written into the
destination. (The processor never produces a locked read without
also producing a locked write.)" - I would conclude the LOCK prefix
actually hurts there.
And in atomic64_set_cx8 it's the other way around: The comment
explains why supposedly no LOCK prefix is needed, but that's again
in conflict with above quoted paragraph from the manual.
Jan
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] x86: fix and improve cmpxchg_double{,_local}()
2012-01-03 15:15 ` Eric Dumazet
@ 2012-01-03 15:41 ` Eric Dumazet
2012-01-03 16:08 ` Jan Beulich
0 siblings, 1 reply; 13+ messages in thread
From: Eric Dumazet @ 2012-01-03 15:41 UTC (permalink / raw)
To: Jan Beulich; +Cc: mingo, tglx, hpa, Christoph Lameter, linux-kernel, netdev
Le mardi 03 janvier 2012 à 16:15 +0100, Eric Dumazet a écrit :
> Very old it seems...
>
> arch/x86/lib/atomic64_cx8_32.S
>
> all "jxx 1b" are wrong if a LOCK_PREFIX is included after the 1: label
>
> 1:
> inst1
> LOCK_PREFIX
> cmpxchg8b (%ebp)
> jne 1b / jumps to beginning of LOCK_PREFIX, inst1 is not replayed
>
>
>
>
A possible fix would be to not use "1" label in LOCK_PREFIX macro,
but 672 magic value.
Not sure if we can use a local label in a macro ?
arch/x86/include/asm/alternative-asm.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 091508b..952bd01 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -4,10 +4,10 @@
#ifdef CONFIG_SMP
.macro LOCK_PREFIX
-1: lock
+672: lock
.section .smp_locks,"a"
.balign 4
- .long 1b - .
+ .long 672b - .
.previous
.endm
#else
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH] x86: fix and improve cmpxchg_double{,_local}()
2012-01-03 15:41 ` Eric Dumazet
@ 2012-01-03 16:08 ` Jan Beulich
2012-01-03 16:13 ` Eric Dumazet
0 siblings, 1 reply; 13+ messages in thread
From: Jan Beulich @ 2012-01-03 16:08 UTC (permalink / raw)
To: Eric Dumazet; +Cc: mingo, tglx, Christoph Lameter, linux-kernel, netdev, hpa
>>> On 03.01.12 at 16:41, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le mardi 03 janvier 2012 à 16:15 +0100, Eric Dumazet a écrit :
>
>> Very old it seems...
>>
>> arch/x86/lib/atomic64_cx8_32.S
>>
>> all "jxx 1b" are wrong if a LOCK_PREFIX is included after the 1: label
>>
>> 1:
>> inst1
>> LOCK_PREFIX
>> cmpxchg8b (%ebp)
>> jne 1b / jumps to beginning of LOCK_PREFIX, inst1 is not replayed
>>
>>
>>
>>
>
> A possible fix would be to not use "1" label in LOCK_PREFIX macro,
> but 672 magic value.
>
> Not sure if we can use a local label in a macro ?
"1" and "672" are both local labels, so both are okay. As long as there's
no other (colliding) use of 672 anywhere, that would seem to be the
preferred fix (feel free to put my ack on the patch when you formally
submit it).
Jan
> arch/x86/include/asm/alternative-asm.h | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/include/asm/alternative-asm.h
> b/arch/x86/include/asm/alternative-asm.h
> index 091508b..952bd01 100644
> --- a/arch/x86/include/asm/alternative-asm.h
> +++ b/arch/x86/include/asm/alternative-asm.h
> @@ -4,10 +4,10 @@
>
> #ifdef CONFIG_SMP
> .macro LOCK_PREFIX
> -1: lock
> +672: lock
> .section .smp_locks,"a"
> .balign 4
> - .long 1b - .
> + .long 672b - .
> .previous
> .endm
> #else
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] x86: fix and improve cmpxchg_double{,_local}()
2012-01-03 16:08 ` Jan Beulich
@ 2012-01-03 16:13 ` Eric Dumazet
2012-01-03 16:35 ` [PATCH] x86: fix atomic64_xxx_cx8() functions Eric Dumazet
0 siblings, 1 reply; 13+ messages in thread
From: Eric Dumazet @ 2012-01-03 16:13 UTC (permalink / raw)
To: Jan Beulich; +Cc: mingo, tglx, Christoph Lameter, linux-kernel, netdev, hpa
Le mardi 03 janvier 2012 à 16:08 +0000, Jan Beulich a écrit :
> "1" and "672" are both local labels, so both are okay. As long as there's
> no other (colliding) use of 672 anywhere, that would seem to be the
> preferred fix (feel free to put my ack on the patch when you formally
> submit it).
I was referring the use of a label local to the macro itself, with
restricted scope.
following psudi code would trigger an asm error :
.macro FOO
.local_label 1
1: lock
.section .smp_locks,"a"
.balign 4
.long 1b - .
.previous
.endm
FOO
jne 1b
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH] x86: fix atomic64_xxx_cx8() functions
2012-01-03 16:13 ` Eric Dumazet
@ 2012-01-03 16:35 ` Eric Dumazet
2012-01-04 15:32 ` [tip:x86/asm] x86: Fix " tip-bot for Eric Dumazet
0 siblings, 1 reply; 13+ messages in thread
From: Eric Dumazet @ 2012-01-03 16:35 UTC (permalink / raw)
To: Jan Beulich; +Cc: mingo, tglx, Christoph Lameter, linux-kernel, netdev, hpa
It appears about all functions in arch/x86/lib/atomic64_cx8_32.S are
wrong in case cmpxchg8b must be restarted, because LOCK_PREFIX macro
defines a label "1" clashing with other local labels :
1:
some_instructions
LOCK_PREFIX
cmpxchg8b (%ebp)
jne 1b / jumps to beginning of LOCK_PREFIX !
A possible fix is to use a magic label "672" in LOCK_PREFIX asm
definition, similar to the "671" one we defined in LOCK_PREFIX_HERE.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Jan Beulich <JBeulich@suse.com>
CC: Christoph Lameter <cl@linux.com>
CC: Ingo Molnar <mingo@elte.hu>
CC: H. Peter Anvin <hpa@zytor.com>
Cc: stable@vger.kernel.org [2.6.35+]
---
arch/x86/include/asm/alternative-asm.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 091508b..952bd01 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -4,10 +4,10 @@
#ifdef CONFIG_SMP
.macro LOCK_PREFIX
-1: lock
+672: lock
.section .smp_locks,"a"
.balign 4
- .long 1b - .
+ .long 672b - .
.previous
.endm
#else
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH] x86: fix and improve cmpxchg_double{,_local}()
2012-01-03 15:35 ` [PATCH] x86: fix and improve cmpxchg_double{,_local}() Jan Beulich
@ 2012-01-03 17:31 ` Eric Dumazet
2012-01-04 10:36 ` David Laight
0 siblings, 1 reply; 13+ messages in thread
From: Eric Dumazet @ 2012-01-03 17:31 UTC (permalink / raw)
To: Jan Beulich; +Cc: mingo, tglx, Christoph Lameter, linux-kernel, netdev, hpa
Le mardi 03 janvier 2012 à 15:35 +0000, Jan Beulich a écrit :
> And in atomic64_set_cx8 it's the other way around: The comment
> explains why supposedly no LOCK prefix is needed, but that's again
> in conflict with above quoted paragraph from the manual.
BTW atomic64_set() asm() contraints are wrong :
static inline void atomic64_set(atomic64_t *v, long long i)
{
unsigned high = (unsigned)(i >> 32);
unsigned low = (unsigned)i;
asm volatile(ATOMIC64_ALTERNATIVE(set)
: "+b" (low), "+c" (high)
: "S" (v)
: "eax", "edx", "memory"
);
}
ebx/ecx registers are not modified by cmpxchg8b (or the atomic64_set_386
emulation). Only eax/edx can be modified.
^ permalink raw reply [flat|nested] 13+ messages in thread
* RE: [PATCH] x86: fix and improve cmpxchg_double{,_local}()
2012-01-03 17:31 ` Eric Dumazet
@ 2012-01-04 10:36 ` David Laight
2012-01-04 10:54 ` Jan Beulich
0 siblings, 1 reply; 13+ messages in thread
From: David Laight @ 2012-01-04 10:36 UTC (permalink / raw)
To: Eric Dumazet, Jan Beulich
Cc: mingo, tglx, Christoph Lameter, linux-kernel, netdev, hpa
> BTW atomic64_set() asm() contraints are wrong :
>
> static inline void atomic64_set(atomic64_t *v, long long i)
> {
> unsigned high = (unsigned)(i >> 32);
> unsigned low = (unsigned)i;
> asm volatile(ATOMIC64_ALTERNATIVE(set)
> : "+b" (low), "+c" (high)
> : "S" (v)
> : "eax", "edx", "memory"
> );
> }
>
>
> ebx/ecx registers are not modified by cmpxchg8b (or the
> atomic64_set_386 emulation). Only eax/edx can be modified.
Isn't it also possible to constrain the "memory"
constraint to only apply to '*v' not all of memory?
I can't remember the syntax off hand though...
David
^ permalink raw reply [flat|nested] 13+ messages in thread
* RE: [PATCH] x86: fix and improve cmpxchg_double{,_local}()
2012-01-04 10:36 ` David Laight
@ 2012-01-04 10:54 ` Jan Beulich
0 siblings, 0 replies; 13+ messages in thread
From: Jan Beulich @ 2012-01-04 10:54 UTC (permalink / raw)
To: David Laight, Eric Dumazet
Cc: mingo, tglx, Christoph Lameter, linux-kernel, netdev, hpa
>>> On 04.01.12 at 11:36, "David Laight" <David.Laight@ACULAB.COM> wrote:
>> BTW atomic64_set() asm() contraints are wrong :
>>
>> static inline void atomic64_set(atomic64_t *v, long long i)
>> {
>> unsigned high = (unsigned)(i >> 32);
>> unsigned low = (unsigned)i;
>> asm volatile(ATOMIC64_ALTERNATIVE(set)
>> : "+b" (low), "+c" (high)
>> : "S" (v)
>> : "eax", "edx", "memory"
>> );
>> }
>>
>>
>> ebx/ecx registers are not modified by cmpxchg8b (or the
>> atomic64_set_386 emulation). Only eax/edx can be modified.
Same would be true for atomic64_xchg() and the use of "+c" (v)
in subsequent functions (whether unnecessarily strict or too lax
varies).
> Isn't it also possible to constrain the "memory"
> constraint to only apply to '*v' not all of memory?
> I can't remember the syntax off hand though...
Absolutely - "=m" (v->counter) would be the right way to specify
this.
Jan
^ permalink raw reply [flat|nested] 13+ messages in thread
* [tip:x86/asm] x86: Fix and improve cmpxchg_double{,_local}()
2012-01-02 17:02 [PATCH] x86: fix and improve cmpxchg_double{,_local}() Jan Beulich
2012-01-03 15:00 ` Eric Dumazet
@ 2012-01-04 15:31 ` tip-bot for Jan Beulich
1 sibling, 0 replies; 13+ messages in thread
From: tip-bot for Jan Beulich @ 2012-01-04 15:31 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, hpa, mingo, torvalds, jbeulich, cl, akpm, JBeulich,
tglx, mingo
Commit-ID: cdcd629869fabcd38ebd24a03b0a05ec1cbcafb0
Gitweb: http://git.kernel.org/tip/cdcd629869fabcd38ebd24a03b0a05ec1cbcafb0
Author: Jan Beulich <JBeulich@suse.com>
AuthorDate: Mon, 2 Jan 2012 17:02:18 +0000
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Wed, 4 Jan 2012 15:01:54 +0100
x86: Fix and improve cmpxchg_double{,_local}()
Just like the per-CPU ones they had several
problems/shortcomings:
Only the first memory operand was mentioned in the asm()
operands, and the 2x64-bit version didn't have a memory clobber
while the 2x32-bit one did. The former allowed the compiler to
not recognize the need to re-load the data in case it had it
cached in some register, while the latter was overly
destructive.
The types of the local copies of the old and new values were
incorrect (the types of the pointed-to variables should be used
here, to make sure the respective old/new variable types are
compatible).
The __dummy/__junk variables were pointless, given that local
copies of the inputs already existed (and can hence be used for
discarded outputs).
The 32-bit variant of cmpxchg_double_local() referenced
cmpxchg16b_local().
At once also:
- change the return value type to what it really is: 'bool'
- unify 32- and 64-bit variants
- abstract out the common part of the 'normal' and 'local' variants
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/4F01F12A020000780006A19B@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
arch/x86/include/asm/cmpxchg.h | 23 ++++++++++++++++++
arch/x86/include/asm/cmpxchg_32.h | 46 -------------------------------------
arch/x86/include/asm/cmpxchg_64.h | 43 ----------------------------------
mm/slub.c | 4 +-
4 files changed, 25 insertions(+), 91 deletions(-)
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 5488e10..0c9fa27 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -207,4 +207,27 @@ extern void __add_wrong_size(void)
#define add_smp(ptr, inc) __add((ptr), (inc), LOCK_PREFIX)
#define add_sync(ptr, inc) __add((ptr), (inc), "lock; ")
+#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \
+({ \
+ bool __ret; \
+ __typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \
+ __typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \
+ BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \
+ BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \
+ VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \
+ VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \
+ asm volatile(pfx "cmpxchg%c4b %2; sete %0" \
+ : "=a" (__ret), "+d" (__old2), \
+ "+m" (*(p1)), "+m" (*(p2)) \
+ : "i" (2 * sizeof(long)), "a" (__old1), \
+ "b" (__new1), "c" (__new2)); \
+ __ret; \
+})
+
+#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \
+ __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2)
+
+#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \
+ __cmpxchg_double(, p1, p2, o1, o2, n1, n2)
+
#endif /* ASM_X86_CMPXCHG_H */
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index fbebb07..53f4b21 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -166,52 +166,6 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
#endif
-#define cmpxchg8b(ptr, o1, o2, n1, n2) \
-({ \
- char __ret; \
- __typeof__(o2) __dummy; \
- __typeof__(*(ptr)) __old1 = (o1); \
- __typeof__(o2) __old2 = (o2); \
- __typeof__(*(ptr)) __new1 = (n1); \
- __typeof__(o2) __new2 = (n2); \
- asm volatile(LOCK_PREFIX "cmpxchg8b %2; setz %1" \
- : "=d"(__dummy), "=a" (__ret), "+m" (*ptr)\
- : "a" (__old1), "d"(__old2), \
- "b" (__new1), "c" (__new2) \
- : "memory"); \
- __ret; })
-
-
-#define cmpxchg8b_local(ptr, o1, o2, n1, n2) \
-({ \
- char __ret; \
- __typeof__(o2) __dummy; \
- __typeof__(*(ptr)) __old1 = (o1); \
- __typeof__(o2) __old2 = (o2); \
- __typeof__(*(ptr)) __new1 = (n1); \
- __typeof__(o2) __new2 = (n2); \
- asm volatile("cmpxchg8b %2; setz %1" \
- : "=d"(__dummy), "=a"(__ret), "+m" (*ptr)\
- : "a" (__old), "d"(__old2), \
- "b" (__new1), "c" (__new2), \
- : "memory"); \
- __ret; })
-
-
-#define cmpxchg_double(ptr, o1, o2, n1, n2) \
-({ \
- BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
- VM_BUG_ON((unsigned long)(ptr) % 8); \
- cmpxchg8b((ptr), (o1), (o2), (n1), (n2)); \
-})
-
-#define cmpxchg_double_local(ptr, o1, o2, n1, n2) \
-({ \
- BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
- VM_BUG_ON((unsigned long)(ptr) % 8); \
- cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \
-})
-
#define system_has_cmpxchg_double() cpu_has_cx8
#endif /* _ASM_X86_CMPXCHG_32_H */
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 285da02..614be87 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -20,49 +20,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
cmpxchg_local((ptr), (o), (n)); \
})
-#define cmpxchg16b(ptr, o1, o2, n1, n2) \
-({ \
- char __ret; \
- __typeof__(o2) __junk; \
- __typeof__(*(ptr)) __old1 = (o1); \
- __typeof__(o2) __old2 = (o2); \
- __typeof__(*(ptr)) __new1 = (n1); \
- __typeof__(o2) __new2 = (n2); \
- asm volatile(LOCK_PREFIX "cmpxchg16b %2;setz %1" \
- : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \
- : "b"(__new1), "c"(__new2), \
- "a"(__old1), "d"(__old2)); \
- __ret; })
-
-
-#define cmpxchg16b_local(ptr, o1, o2, n1, n2) \
-({ \
- char __ret; \
- __typeof__(o2) __junk; \
- __typeof__(*(ptr)) __old1 = (o1); \
- __typeof__(o2) __old2 = (o2); \
- __typeof__(*(ptr)) __new1 = (n1); \
- __typeof__(o2) __new2 = (n2); \
- asm volatile("cmpxchg16b %2;setz %1" \
- : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \
- : "b"(__new1), "c"(__new2), \
- "a"(__old1), "d"(__old2)); \
- __ret; })
-
-#define cmpxchg_double(ptr, o1, o2, n1, n2) \
-({ \
- BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- VM_BUG_ON((unsigned long)(ptr) % 16); \
- cmpxchg16b((ptr), (o1), (o2), (n1), (n2)); \
-})
-
-#define cmpxchg_double_local(ptr, o1, o2, n1, n2) \
-({ \
- BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- VM_BUG_ON((unsigned long)(ptr) % 16); \
- cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \
-})
-
#define system_has_cmpxchg_double() cpu_has_cx16
#endif /* _ASM_X86_CMPXCHG_64_H */
diff --git a/mm/slub.c b/mm/slub.c
index ed3334d..09ccee8 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -368,7 +368,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
VM_BUG_ON(!irqs_disabled());
#ifdef CONFIG_CMPXCHG_DOUBLE
if (s->flags & __CMPXCHG_DOUBLE) {
- if (cmpxchg_double(&page->freelist,
+ if (cmpxchg_double(&page->freelist, &page->counters,
freelist_old, counters_old,
freelist_new, counters_new))
return 1;
@@ -402,7 +402,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
{
#ifdef CONFIG_CMPXCHG_DOUBLE
if (s->flags & __CMPXCHG_DOUBLE) {
- if (cmpxchg_double(&page->freelist,
+ if (cmpxchg_double(&page->freelist, &page->counters,
freelist_old, counters_old,
freelist_new, counters_new))
return 1;
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [tip:x86/asm] x86: Fix atomic64_xxx_cx8() functions
2012-01-03 16:35 ` [PATCH] x86: fix atomic64_xxx_cx8() functions Eric Dumazet
@ 2012-01-04 15:32 ` tip-bot for Eric Dumazet
0 siblings, 0 replies; 13+ messages in thread
From: tip-bot for Eric Dumazet @ 2012-01-04 15:32 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, hpa, mingo, eric.dumazet, torvalds, cl, akpm,
JBeulich, tglx, mingo
Commit-ID: ceb7b40b65539a771d1bfaf47660ac0ee57e0c4f
Gitweb: http://git.kernel.org/tip/ceb7b40b65539a771d1bfaf47660ac0ee57e0c4f
Author: Eric Dumazet <eric.dumazet@gmail.com>
AuthorDate: Tue, 3 Jan 2012 17:35:40 +0100
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Wed, 4 Jan 2012 15:01:56 +0100
x86: Fix atomic64_xxx_cx8() functions
It appears about all functions in arch/x86/lib/atomic64_cx8_32.S
are wrong in case cmpxchg8b must be restarted, because
LOCK_PREFIX macro defines a label "1" clashing with other local
labels :
1:
some_instructions
LOCK_PREFIX
cmpxchg8b (%ebp)
jne 1b / jumps to beginning of LOCK_PREFIX !
A possible fix is to use a magic label "672" in LOCK_PREFIX asm
definition, similar to the "671" one we defined in
LOCK_PREFIX_HERE.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Jan Beulich <JBeulich@suse.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/1325608540.2320.103.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
arch/x86/include/asm/alternative-asm.h | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 091508b..952bd01 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -4,10 +4,10 @@
#ifdef CONFIG_SMP
.macro LOCK_PREFIX
-1: lock
+672: lock
.section .smp_locks,"a"
.balign 4
- .long 1b - .
+ .long 672b - .
.previous
.endm
#else
^ permalink raw reply related [flat|nested] 13+ messages in thread
end of thread, other threads:[~2012-01-04 15:33 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-01-02 17:02 [PATCH] x86: fix and improve cmpxchg_double{,_local}() Jan Beulich
2012-01-03 15:00 ` Eric Dumazet
2012-01-03 15:15 ` Eric Dumazet
2012-01-03 15:41 ` Eric Dumazet
2012-01-03 16:08 ` Jan Beulich
2012-01-03 16:13 ` Eric Dumazet
2012-01-03 16:35 ` [PATCH] x86: fix atomic64_xxx_cx8() functions Eric Dumazet
2012-01-04 15:32 ` [tip:x86/asm] x86: Fix " tip-bot for Eric Dumazet
2012-01-03 15:35 ` [PATCH] x86: fix and improve cmpxchg_double{,_local}() Jan Beulich
2012-01-03 17:31 ` Eric Dumazet
2012-01-04 10:36 ` David Laight
2012-01-04 10:54 ` Jan Beulich
2012-01-04 15:31 ` [tip:x86/asm] x86: Fix " tip-bot for Jan Beulich
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox