* [PATCH] x86: fix and improve cmpxchg_double{,_local}()
@ 2012-01-02 17:02 Jan Beulich
2012-01-03 15:00 ` Eric Dumazet
2012-01-04 15:31 ` [tip:x86/asm] x86: Fix " tip-bot for Jan Beulich
0 siblings, 2 replies; 13+ messages in thread
From: Jan Beulich @ 2012-01-02 17:02 UTC (permalink / raw)
To: mingo, tglx, hpa; +Cc: Christoph Lameter, linux-kernel
Just like the per-CPU ones they had several problems/shortcomings:
Only the first memory operand was mentioned in the asm() operands, and
the 2x64-bit version didn't have a memory clobber while the 2x32-bit
one did. The former allowed the compiler to not recognize the need to
re-load the data in case it had it cached in some register, while the
latter was overly destructive.
The types of the local copies of the old and new values were incorrect
(the types of the pointed-to variables should be used here, to make
sure the respective old/new variable types are compatible).
The __dummy/__junk variables were pointless, given that local copies
of the inputs already existed (and can hence be used for discarded
outputs).
The 32-bit variant of cmpxchg_double_local() referenced
cmpxchg16b_local().
At once also
- change the return value type to what it really is: 'bool'
- unify 32- and 64-bit variants
- abstract out the common part of the 'normal' and 'local' variants
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
arch/x86/include/asm/cmpxchg.h | 23 +++++++++++++++++++
arch/x86/include/asm/cmpxchg_32.h | 46 --------------------------------------
arch/x86/include/asm/cmpxchg_64.h | 43 -----------------------------------
mm/slub.c | 4 +--
4 files changed, 25 insertions(+), 91 deletions(-)
--- 3.2-rc7/arch/x86/include/asm/cmpxchg.h
+++ 3.2-rc7-x86-cmpxchg-double/arch/x86/include/asm/cmpxchg.h
@@ -207,4 +207,27 @@ extern void __xadd_wrong_size(void)
#define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ")
#define xadd_local(ptr, inc) __xadd((ptr), (inc), "")
+#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \
+({ \
+ bool __ret; \
+ __typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \
+ __typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \
+ BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \
+ BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \
+ VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \
+ VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \
+ asm volatile(pfx "cmpxchg%c4b %2; sete %0" \
+ : "=a" (__ret), "+d" (__old2), \
+ "+m" (*(p1)), "+m" (*(p2)) \
+ : "i" (2 * sizeof(long)), "a" (__old1), \
+ "b" (__new1), "c" (__new2)); \
+ __ret; \
+})
+
+#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \
+ __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2)
+
+#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \
+ __cmpxchg_double(, p1, p2, o1, o2, n1, n2)
+
#endif /* ASM_X86_CMPXCHG_H */
--- 3.2-rc7/arch/x86/include/asm/cmpxchg_32.h
+++ 3.2-rc7-x86-cmpxchg-double/arch/x86/include/asm/cmpxchg_32.h
@@ -166,52 +166,6 @@ static inline unsigned long cmpxchg_386(
#endif
-#define cmpxchg8b(ptr, o1, o2, n1, n2) \
-({ \
- char __ret; \
- __typeof__(o2) __dummy; \
- __typeof__(*(ptr)) __old1 = (o1); \
- __typeof__(o2) __old2 = (o2); \
- __typeof__(*(ptr)) __new1 = (n1); \
- __typeof__(o2) __new2 = (n2); \
- asm volatile(LOCK_PREFIX "cmpxchg8b %2; setz %1" \
- : "=d"(__dummy), "=a" (__ret), "+m" (*ptr)\
- : "a" (__old1), "d"(__old2), \
- "b" (__new1), "c" (__new2) \
- : "memory"); \
- __ret; })
-
-
-#define cmpxchg8b_local(ptr, o1, o2, n1, n2) \
-({ \
- char __ret; \
- __typeof__(o2) __dummy; \
- __typeof__(*(ptr)) __old1 = (o1); \
- __typeof__(o2) __old2 = (o2); \
- __typeof__(*(ptr)) __new1 = (n1); \
- __typeof__(o2) __new2 = (n2); \
- asm volatile("cmpxchg8b %2; setz %1" \
- : "=d"(__dummy), "=a"(__ret), "+m" (*ptr)\
- : "a" (__old), "d"(__old2), \
- "b" (__new1), "c" (__new2), \
- : "memory"); \
- __ret; })
-
-
-#define cmpxchg_double(ptr, o1, o2, n1, n2) \
-({ \
- BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
- VM_BUG_ON((unsigned long)(ptr) % 8); \
- cmpxchg8b((ptr), (o1), (o2), (n1), (n2)); \
-})
-
-#define cmpxchg_double_local(ptr, o1, o2, n1, n2) \
-({ \
- BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
- VM_BUG_ON((unsigned long)(ptr) % 8); \
- cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \
-})
-
#define system_has_cmpxchg_double() cpu_has_cx8
#endif /* _ASM_X86_CMPXCHG_32_H */
--- 3.2-rc7/arch/x86/include/asm/cmpxchg_64.h
+++ 3.2-rc7-x86-cmpxchg-double/arch/x86/include/asm/cmpxchg_64.h
@@ -20,49 +20,6 @@ static inline void set_64bit(volatile u6
cmpxchg_local((ptr), (o), (n)); \
})
-#define cmpxchg16b(ptr, o1, o2, n1, n2) \
-({ \
- char __ret; \
- __typeof__(o2) __junk; \
- __typeof__(*(ptr)) __old1 = (o1); \
- __typeof__(o2) __old2 = (o2); \
- __typeof__(*(ptr)) __new1 = (n1); \
- __typeof__(o2) __new2 = (n2); \
- asm volatile(LOCK_PREFIX "cmpxchg16b %2;setz %1" \
- : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \
- : "b"(__new1), "c"(__new2), \
- "a"(__old1), "d"(__old2)); \
- __ret; })
-
-
-#define cmpxchg16b_local(ptr, o1, o2, n1, n2) \
-({ \
- char __ret; \
- __typeof__(o2) __junk; \
- __typeof__(*(ptr)) __old1 = (o1); \
- __typeof__(o2) __old2 = (o2); \
- __typeof__(*(ptr)) __new1 = (n1); \
- __typeof__(o2) __new2 = (n2); \
- asm volatile("cmpxchg16b %2;setz %1" \
- : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \
- : "b"(__new1), "c"(__new2), \
- "a"(__old1), "d"(__old2)); \
- __ret; })
-
-#define cmpxchg_double(ptr, o1, o2, n1, n2) \
-({ \
- BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- VM_BUG_ON((unsigned long)(ptr) % 16); \
- cmpxchg16b((ptr), (o1), (o2), (n1), (n2)); \
-})
-
-#define cmpxchg_double_local(ptr, o1, o2, n1, n2) \
-({ \
- BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- VM_BUG_ON((unsigned long)(ptr) % 16); \
- cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \
-})
-
#define system_has_cmpxchg_double() cpu_has_cx16
#endif /* _ASM_X86_CMPXCHG_64_H */
--- 3.2-rc7/mm/slub.c
+++ 3.2-rc7-x86-cmpxchg-double/mm/slub.c
@@ -368,7 +368,7 @@ static inline bool __cmpxchg_double_slab
VM_BUG_ON(!irqs_disabled());
#ifdef CONFIG_CMPXCHG_DOUBLE
if (s->flags & __CMPXCHG_DOUBLE) {
- if (cmpxchg_double(&page->freelist,
+ if (cmpxchg_double(&page->freelist, &page->counters,
freelist_old, counters_old,
freelist_new, counters_new))
return 1;
@@ -402,7 +402,7 @@ static inline bool cmpxchg_double_slab(s
{
#ifdef CONFIG_CMPXCHG_DOUBLE
if (s->flags & __CMPXCHG_DOUBLE) {
- if (cmpxchg_double(&page->freelist,
+ if (cmpxchg_double(&page->freelist, &page->counters,
freelist_old, counters_old,
freelist_new, counters_new))
return 1;
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH] x86: fix and improve cmpxchg_double{,_local}() 2012-01-02 17:02 [PATCH] x86: fix and improve cmpxchg_double{,_local}() Jan Beulich @ 2012-01-03 15:00 ` Eric Dumazet 2012-01-03 15:15 ` Eric Dumazet 2012-01-03 15:35 ` [PATCH] x86: fix and improve cmpxchg_double{,_local}() Jan Beulich 2012-01-04 15:31 ` [tip:x86/asm] x86: Fix " tip-bot for Jan Beulich 1 sibling, 2 replies; 13+ messages in thread From: Eric Dumazet @ 2012-01-03 15:00 UTC (permalink / raw) To: Jan Beulich; +Cc: mingo, tglx, hpa, Christoph Lameter, linux-kernel, netdev Le lundi 02 janvier 2012 à 17:02 +0000, Jan Beulich a écrit : > Just like the per-CPU ones they had several problems/shortcomings: > > Only the first memory operand was mentioned in the asm() operands, and > the 2x64-bit version didn't have a memory clobber while the 2x32-bit > one did. The former allowed the compiler to not recognize the need to > re-load the data in case it had it cached in some register, while the > latter was overly destructive. > > The types of the local copies of the old and new values were incorrect > (the types of the pointed-to variables should be used here, to make > sure the respective old/new variable types are compatible). > > The __dummy/__junk variables were pointless, given that local copies > of the inputs already existed (and can hence be used for discarded > outputs). > > The 32-bit variant of cmpxchg_double_local() referenced > cmpxchg16b_local(). > > At once also > - change the return value type to what it really is: 'bool' > - unify 32- and 64-bit variants > - abstract out the common part of the 'normal' and 'local' variants > > Signed-off-by: Jan Beulich <jbeulich@suse.com> While looking at your patch, I discovered that atomic64_add() / atomic64_inc() on 32bit are completely buggy. Oh well... Generated code : c03bc00c <atomic64_add_return_cx8>: c03bc00c: 55 push %ebp c03bc00d: 53 push %ebx c03bc00e: 56 push %esi c03bc00f: 57 push %edi c03bc010: 89 c6 mov %eax,%esi c03bc012: 89 d7 mov %edx,%edi c03bc014: 89 cd mov %ecx,%ebp c03bc016: 89 d8 mov %ebx,%eax c03bc018: 89 ca mov %ecx,%edx c03bc01a: f0 0f c7 4d 00 lock cmpxchg8b 0x0(%ebp) c03bc01f: 89 c3 mov %eax,%ebx c03bc021: 89 d1 mov %edx,%ecx c03bc023: 01 f3 add %esi,%ebx c03bc025: 11 f9 adc %edi,%ecx c03bc027: f0 0f c7 4d 00 lock cmpxchg8b 0x0(%ebp) c03bc02c: 75 f9 jne c03bc027 <atomic64_add_return_cx8+0x1b> c03bc02e: 89 d8 mov %ebx,%eax c03bc030: 89 ca mov %ecx,%edx c03bc032: 5f pop %edi c03bc033: 5e pop %esi c03bc034: 5b pop %ebx c03bc035: 5d pop %ebp c03bc036: c3 ret The ' jne c03bc027' should really be 'jne c03bc01f' No idea how old is this bug. ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] x86: fix and improve cmpxchg_double{,_local}() 2012-01-03 15:00 ` Eric Dumazet @ 2012-01-03 15:15 ` Eric Dumazet 2012-01-03 15:41 ` Eric Dumazet 2012-01-03 15:35 ` [PATCH] x86: fix and improve cmpxchg_double{,_local}() Jan Beulich 1 sibling, 1 reply; 13+ messages in thread From: Eric Dumazet @ 2012-01-03 15:15 UTC (permalink / raw) To: Jan Beulich; +Cc: mingo, tglx, hpa, Christoph Lameter, linux-kernel, netdev Le mardi 03 janvier 2012 à 16:00 +0100, Eric Dumazet a écrit : > While looking at your patch, I discovered that atomic64_add() / > atomic64_inc() on 32bit are completely buggy. Oh well... > > Generated code : > > c03bc00c <atomic64_add_return_cx8>: > c03bc00c: 55 push %ebp > c03bc00d: 53 push %ebx > c03bc00e: 56 push %esi > c03bc00f: 57 push %edi > c03bc010: 89 c6 mov %eax,%esi > c03bc012: 89 d7 mov %edx,%edi > c03bc014: 89 cd mov %ecx,%ebp > c03bc016: 89 d8 mov %ebx,%eax > c03bc018: 89 ca mov %ecx,%edx > c03bc01a: f0 0f c7 4d 00 lock cmpxchg8b 0x0(%ebp) > c03bc01f: 89 c3 mov %eax,%ebx > c03bc021: 89 d1 mov %edx,%ecx > c03bc023: 01 f3 add %esi,%ebx > c03bc025: 11 f9 adc %edi,%ecx > c03bc027: f0 0f c7 4d 00 lock cmpxchg8b 0x0(%ebp) > c03bc02c: 75 f9 jne c03bc027 <atomic64_add_return_cx8+0x1b> > c03bc02e: 89 d8 mov %ebx,%eax > c03bc030: 89 ca mov %ecx,%edx > c03bc032: 5f pop %edi > c03bc033: 5e pop %esi > c03bc034: 5b pop %ebx > c03bc035: 5d pop %ebp > c03bc036: c3 ret > > The ' jne c03bc027' should really be 'jne c03bc01f' > > No idea how old is this bug. > Very old it seems... arch/x86/lib/atomic64_cx8_32.S all "jxx 1b" are wrong if a LOCK_PREFIX is included after the 1: label 1: inst1 LOCK_PREFIX cmpxchg8b (%ebp) jne 1b / jumps to beginning of LOCK_PREFIX, inst1 is not replayed ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] x86: fix and improve cmpxchg_double{,_local}() 2012-01-03 15:15 ` Eric Dumazet @ 2012-01-03 15:41 ` Eric Dumazet 2012-01-03 16:08 ` Jan Beulich 0 siblings, 1 reply; 13+ messages in thread From: Eric Dumazet @ 2012-01-03 15:41 UTC (permalink / raw) To: Jan Beulich; +Cc: mingo, tglx, hpa, Christoph Lameter, linux-kernel, netdev Le mardi 03 janvier 2012 à 16:15 +0100, Eric Dumazet a écrit : > Very old it seems... > > arch/x86/lib/atomic64_cx8_32.S > > all "jxx 1b" are wrong if a LOCK_PREFIX is included after the 1: label > > 1: > inst1 > LOCK_PREFIX > cmpxchg8b (%ebp) > jne 1b / jumps to beginning of LOCK_PREFIX, inst1 is not replayed > > > > A possible fix would be to not use "1" label in LOCK_PREFIX macro, but 672 magic value. Not sure if we can use a local label in a macro ? arch/x86/include/asm/alternative-asm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index 091508b..952bd01 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -4,10 +4,10 @@ #ifdef CONFIG_SMP .macro LOCK_PREFIX -1: lock +672: lock .section .smp_locks,"a" .balign 4 - .long 1b - . + .long 672b - . .previous .endm #else ^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH] x86: fix and improve cmpxchg_double{,_local}() 2012-01-03 15:41 ` Eric Dumazet @ 2012-01-03 16:08 ` Jan Beulich 2012-01-03 16:13 ` Eric Dumazet 0 siblings, 1 reply; 13+ messages in thread From: Jan Beulich @ 2012-01-03 16:08 UTC (permalink / raw) To: Eric Dumazet; +Cc: mingo, tglx, Christoph Lameter, linux-kernel, netdev, hpa >>> On 03.01.12 at 16:41, Eric Dumazet <eric.dumazet@gmail.com> wrote: > Le mardi 03 janvier 2012 à 16:15 +0100, Eric Dumazet a écrit : > >> Very old it seems... >> >> arch/x86/lib/atomic64_cx8_32.S >> >> all "jxx 1b" are wrong if a LOCK_PREFIX is included after the 1: label >> >> 1: >> inst1 >> LOCK_PREFIX >> cmpxchg8b (%ebp) >> jne 1b / jumps to beginning of LOCK_PREFIX, inst1 is not replayed >> >> >> >> > > A possible fix would be to not use "1" label in LOCK_PREFIX macro, > but 672 magic value. > > Not sure if we can use a local label in a macro ? "1" and "672" are both local labels, so both are okay. As long as there's no other (colliding) use of 672 anywhere, that would seem to be the preferred fix (feel free to put my ack on the patch when you formally submit it). Jan > arch/x86/include/asm/alternative-asm.h | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/include/asm/alternative-asm.h > b/arch/x86/include/asm/alternative-asm.h > index 091508b..952bd01 100644 > --- a/arch/x86/include/asm/alternative-asm.h > +++ b/arch/x86/include/asm/alternative-asm.h > @@ -4,10 +4,10 @@ > > #ifdef CONFIG_SMP > .macro LOCK_PREFIX > -1: lock > +672: lock > .section .smp_locks,"a" > .balign 4 > - .long 1b - . > + .long 672b - . > .previous > .endm > #else ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] x86: fix and improve cmpxchg_double{,_local}() 2012-01-03 16:08 ` Jan Beulich @ 2012-01-03 16:13 ` Eric Dumazet 2012-01-03 16:35 ` [PATCH] x86: fix atomic64_xxx_cx8() functions Eric Dumazet 0 siblings, 1 reply; 13+ messages in thread From: Eric Dumazet @ 2012-01-03 16:13 UTC (permalink / raw) To: Jan Beulich; +Cc: mingo, tglx, Christoph Lameter, linux-kernel, netdev, hpa Le mardi 03 janvier 2012 à 16:08 +0000, Jan Beulich a écrit : > "1" and "672" are both local labels, so both are okay. As long as there's > no other (colliding) use of 672 anywhere, that would seem to be the > preferred fix (feel free to put my ack on the patch when you formally > submit it). I was referring the use of a label local to the macro itself, with restricted scope. following psudi code would trigger an asm error : .macro FOO .local_label 1 1: lock .section .smp_locks,"a" .balign 4 .long 1b - . .previous .endm FOO jne 1b ^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH] x86: fix atomic64_xxx_cx8() functions 2012-01-03 16:13 ` Eric Dumazet @ 2012-01-03 16:35 ` Eric Dumazet 2012-01-04 15:32 ` [tip:x86/asm] x86: Fix " tip-bot for Eric Dumazet 0 siblings, 1 reply; 13+ messages in thread From: Eric Dumazet @ 2012-01-03 16:35 UTC (permalink / raw) To: Jan Beulich; +Cc: mingo, tglx, Christoph Lameter, linux-kernel, netdev, hpa It appears about all functions in arch/x86/lib/atomic64_cx8_32.S are wrong in case cmpxchg8b must be restarted, because LOCK_PREFIX macro defines a label "1" clashing with other local labels : 1: some_instructions LOCK_PREFIX cmpxchg8b (%ebp) jne 1b / jumps to beginning of LOCK_PREFIX ! A possible fix is to use a magic label "672" in LOCK_PREFIX asm definition, similar to the "671" one we defined in LOCK_PREFIX_HERE. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Acked-by: Jan Beulich <JBeulich@suse.com> CC: Christoph Lameter <cl@linux.com> CC: Ingo Molnar <mingo@elte.hu> CC: H. Peter Anvin <hpa@zytor.com> Cc: stable@vger.kernel.org [2.6.35+] --- arch/x86/include/asm/alternative-asm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index 091508b..952bd01 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -4,10 +4,10 @@ #ifdef CONFIG_SMP .macro LOCK_PREFIX -1: lock +672: lock .section .smp_locks,"a" .balign 4 - .long 1b - . + .long 672b - . .previous .endm #else ^ permalink raw reply related [flat|nested] 13+ messages in thread
* [tip:x86/asm] x86: Fix atomic64_xxx_cx8() functions 2012-01-03 16:35 ` [PATCH] x86: fix atomic64_xxx_cx8() functions Eric Dumazet @ 2012-01-04 15:32 ` tip-bot for Eric Dumazet 0 siblings, 0 replies; 13+ messages in thread From: tip-bot for Eric Dumazet @ 2012-01-04 15:32 UTC (permalink / raw) To: linux-tip-commits Cc: linux-kernel, hpa, mingo, eric.dumazet, torvalds, cl, akpm, JBeulich, tglx, mingo Commit-ID: ceb7b40b65539a771d1bfaf47660ac0ee57e0c4f Gitweb: http://git.kernel.org/tip/ceb7b40b65539a771d1bfaf47660ac0ee57e0c4f Author: Eric Dumazet <eric.dumazet@gmail.com> AuthorDate: Tue, 3 Jan 2012 17:35:40 +0100 Committer: Ingo Molnar <mingo@elte.hu> CommitDate: Wed, 4 Jan 2012 15:01:56 +0100 x86: Fix atomic64_xxx_cx8() functions It appears about all functions in arch/x86/lib/atomic64_cx8_32.S are wrong in case cmpxchg8b must be restarted, because LOCK_PREFIX macro defines a label "1" clashing with other local labels : 1: some_instructions LOCK_PREFIX cmpxchg8b (%ebp) jne 1b / jumps to beginning of LOCK_PREFIX ! A possible fix is to use a magic label "672" in LOCK_PREFIX asm definition, similar to the "671" one we defined in LOCK_PREFIX_HERE. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Acked-by: Jan Beulich <JBeulich@suse.com> Cc: Christoph Lameter <cl@linux.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/1325608540.2320.103.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/include/asm/alternative-asm.h | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index 091508b..952bd01 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -4,10 +4,10 @@ #ifdef CONFIG_SMP .macro LOCK_PREFIX -1: lock +672: lock .section .smp_locks,"a" .balign 4 - .long 1b - . + .long 672b - . .previous .endm #else ^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH] x86: fix and improve cmpxchg_double{,_local}() 2012-01-03 15:00 ` Eric Dumazet 2012-01-03 15:15 ` Eric Dumazet @ 2012-01-03 15:35 ` Jan Beulich 2012-01-03 17:31 ` Eric Dumazet 1 sibling, 1 reply; 13+ messages in thread From: Jan Beulich @ 2012-01-03 15:35 UTC (permalink / raw) To: Eric Dumazet; +Cc: mingo, tglx, Christoph Lameter, linux-kernel, netdev, hpa >>> On 03.01.12 at 16:00, Eric Dumazet <eric.dumazet@gmail.com> wrote: > Le lundi 02 janvier 2012 à 17:02 +0000, Jan Beulich a écrit : >> Just like the per-CPU ones they had several problems/shortcomings: >> >> Only the first memory operand was mentioned in the asm() operands, and >> the 2x64-bit version didn't have a memory clobber while the 2x32-bit >> one did. The former allowed the compiler to not recognize the need to >> re-load the data in case it had it cached in some register, while the >> latter was overly destructive. >> >> The types of the local copies of the old and new values were incorrect >> (the types of the pointed-to variables should be used here, to make >> sure the respective old/new variable types are compatible). >> >> The __dummy/__junk variables were pointless, given that local copies >> of the inputs already existed (and can hence be used for discarded >> outputs). >> >> The 32-bit variant of cmpxchg_double_local() referenced >> cmpxchg16b_local(). >> >> At once also >> - change the return value type to what it really is: 'bool' >> - unify 32- and 64-bit variants >> - abstract out the common part of the 'normal' and 'local' variants >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> > > While looking at your patch, I discovered that atomic64_add() / > atomic64_inc() on 32bit are completely buggy. Oh well... > > Generated code : > > c03bc00c <atomic64_add_return_cx8>: > c03bc00c: 55 push %ebp > c03bc00d: 53 push %ebx > c03bc00e: 56 push %esi > c03bc00f: 57 push %edi > c03bc010: 89 c6 mov %eax,%esi > c03bc012: 89 d7 mov %edx,%edi > c03bc014: 89 cd mov %ecx,%ebp > c03bc016: 89 d8 mov %ebx,%eax > c03bc018: 89 ca mov %ecx,%edx > c03bc01a: f0 0f c7 4d 00 lock cmpxchg8b 0x0(%ebp) > c03bc01f: 89 c3 mov %eax,%ebx > c03bc021: 89 d1 mov %edx,%ecx > c03bc023: 01 f3 add %esi,%ebx > c03bc025: 11 f9 adc %edi,%ecx > c03bc027: f0 0f c7 4d 00 lock cmpxchg8b 0x0(%ebp) > c03bc02c: 75 f9 jne c03bc027 > <atomic64_add_return_cx8+0x1b> > c03bc02e: 89 d8 mov %ebx,%eax > c03bc030: 89 ca mov %ecx,%edx > c03bc032: 5f pop %edi > c03bc033: 5e pop %esi > c03bc034: 5b pop %ebx > c03bc035: 5d pop %ebp > c03bc036: c3 ret > > The ' jne c03bc027' should really be 'jne c03bc01f' Indeed, and that's the same for all other routines in this file that incorrectly use 1: together with LOCK_PREFIX between the label and an intended jump to that label. > No idea how old is this bug. The file (and with it the bug) was introduced in 2.6.35. While looking at this I also noticed this comment in read64: "we need LOCK_PREFIX since otherwise cmpxchg8b always does the write", which is saying quite the opposite of the Intel manual: "This instruction can be used with a LOCK prefix to allow the instruction to be executed atomically. To simplify the interface to the processor’s bus, the destination operand receives a write cycle without regard to the result of the comparison. The destination operand is written back if the comparison fails; otherwise, the source operand is written into the destination. (The processor never produces a locked read without also producing a locked write.)" - I would conclude the LOCK prefix actually hurts there. And in atomic64_set_cx8 it's the other way around: The comment explains why supposedly no LOCK prefix is needed, but that's again in conflict with above quoted paragraph from the manual. Jan ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] x86: fix and improve cmpxchg_double{,_local}() 2012-01-03 15:35 ` [PATCH] x86: fix and improve cmpxchg_double{,_local}() Jan Beulich @ 2012-01-03 17:31 ` Eric Dumazet 2012-01-04 10:36 ` David Laight 0 siblings, 1 reply; 13+ messages in thread From: Eric Dumazet @ 2012-01-03 17:31 UTC (permalink / raw) To: Jan Beulich; +Cc: mingo, tglx, Christoph Lameter, linux-kernel, netdev, hpa Le mardi 03 janvier 2012 à 15:35 +0000, Jan Beulich a écrit : > And in atomic64_set_cx8 it's the other way around: The comment > explains why supposedly no LOCK prefix is needed, but that's again > in conflict with above quoted paragraph from the manual. BTW atomic64_set() asm() contraints are wrong : static inline void atomic64_set(atomic64_t *v, long long i) { unsigned high = (unsigned)(i >> 32); unsigned low = (unsigned)i; asm volatile(ATOMIC64_ALTERNATIVE(set) : "+b" (low), "+c" (high) : "S" (v) : "eax", "edx", "memory" ); } ebx/ecx registers are not modified by cmpxchg8b (or the atomic64_set_386 emulation). Only eax/edx can be modified. ^ permalink raw reply [flat|nested] 13+ messages in thread
* RE: [PATCH] x86: fix and improve cmpxchg_double{,_local}() 2012-01-03 17:31 ` Eric Dumazet @ 2012-01-04 10:36 ` David Laight 2012-01-04 10:54 ` Jan Beulich 0 siblings, 1 reply; 13+ messages in thread From: David Laight @ 2012-01-04 10:36 UTC (permalink / raw) To: Eric Dumazet, Jan Beulich Cc: mingo, tglx, Christoph Lameter, linux-kernel, netdev, hpa > BTW atomic64_set() asm() contraints are wrong : > > static inline void atomic64_set(atomic64_t *v, long long i) > { > unsigned high = (unsigned)(i >> 32); > unsigned low = (unsigned)i; > asm volatile(ATOMIC64_ALTERNATIVE(set) > : "+b" (low), "+c" (high) > : "S" (v) > : "eax", "edx", "memory" > ); > } > > > ebx/ecx registers are not modified by cmpxchg8b (or the > atomic64_set_386 emulation). Only eax/edx can be modified. Isn't it also possible to constrain the "memory" constraint to only apply to '*v' not all of memory? I can't remember the syntax off hand though... David ^ permalink raw reply [flat|nested] 13+ messages in thread
* RE: [PATCH] x86: fix and improve cmpxchg_double{,_local}() 2012-01-04 10:36 ` David Laight @ 2012-01-04 10:54 ` Jan Beulich 0 siblings, 0 replies; 13+ messages in thread From: Jan Beulich @ 2012-01-04 10:54 UTC (permalink / raw) To: David Laight, Eric Dumazet Cc: mingo, tglx, Christoph Lameter, linux-kernel, netdev, hpa >>> On 04.01.12 at 11:36, "David Laight" <David.Laight@ACULAB.COM> wrote: >> BTW atomic64_set() asm() contraints are wrong : >> >> static inline void atomic64_set(atomic64_t *v, long long i) >> { >> unsigned high = (unsigned)(i >> 32); >> unsigned low = (unsigned)i; >> asm volatile(ATOMIC64_ALTERNATIVE(set) >> : "+b" (low), "+c" (high) >> : "S" (v) >> : "eax", "edx", "memory" >> ); >> } >> >> >> ebx/ecx registers are not modified by cmpxchg8b (or the >> atomic64_set_386 emulation). Only eax/edx can be modified. Same would be true for atomic64_xchg() and the use of "+c" (v) in subsequent functions (whether unnecessarily strict or too lax varies). > Isn't it also possible to constrain the "memory" > constraint to only apply to '*v' not all of memory? > I can't remember the syntax off hand though... Absolutely - "=m" (v->counter) would be the right way to specify this. Jan ^ permalink raw reply [flat|nested] 13+ messages in thread
* [tip:x86/asm] x86: Fix and improve cmpxchg_double{,_local}() 2012-01-02 17:02 [PATCH] x86: fix and improve cmpxchg_double{,_local}() Jan Beulich 2012-01-03 15:00 ` Eric Dumazet @ 2012-01-04 15:31 ` tip-bot for Jan Beulich 1 sibling, 0 replies; 13+ messages in thread From: tip-bot for Jan Beulich @ 2012-01-04 15:31 UTC (permalink / raw) To: linux-tip-commits Cc: linux-kernel, hpa, mingo, torvalds, jbeulich, cl, akpm, JBeulich, tglx, mingo Commit-ID: cdcd629869fabcd38ebd24a03b0a05ec1cbcafb0 Gitweb: http://git.kernel.org/tip/cdcd629869fabcd38ebd24a03b0a05ec1cbcafb0 Author: Jan Beulich <JBeulich@suse.com> AuthorDate: Mon, 2 Jan 2012 17:02:18 +0000 Committer: Ingo Molnar <mingo@elte.hu> CommitDate: Wed, 4 Jan 2012 15:01:54 +0100 x86: Fix and improve cmpxchg_double{,_local}() Just like the per-CPU ones they had several problems/shortcomings: Only the first memory operand was mentioned in the asm() operands, and the 2x64-bit version didn't have a memory clobber while the 2x32-bit one did. The former allowed the compiler to not recognize the need to re-load the data in case it had it cached in some register, while the latter was overly destructive. The types of the local copies of the old and new values were incorrect (the types of the pointed-to variables should be used here, to make sure the respective old/new variable types are compatible). The __dummy/__junk variables were pointless, given that local copies of the inputs already existed (and can hence be used for discarded outputs). The 32-bit variant of cmpxchg_double_local() referenced cmpxchg16b_local(). At once also: - change the return value type to what it really is: 'bool' - unify 32- and 64-bit variants - abstract out the common part of the 'normal' and 'local' variants Signed-off-by: Jan Beulich <jbeulich@suse.com> Cc: Christoph Lameter <cl@linux.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/4F01F12A020000780006A19B@nat28.tlf.novell.com Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/include/asm/cmpxchg.h | 23 ++++++++++++++++++ arch/x86/include/asm/cmpxchg_32.h | 46 ------------------------------------- arch/x86/include/asm/cmpxchg_64.h | 43 ---------------------------------- mm/slub.c | 4 +- 4 files changed, 25 insertions(+), 91 deletions(-) diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index 5488e10..0c9fa27 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -207,4 +207,27 @@ extern void __add_wrong_size(void) #define add_smp(ptr, inc) __add((ptr), (inc), LOCK_PREFIX) #define add_sync(ptr, inc) __add((ptr), (inc), "lock; ") +#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \ +({ \ + bool __ret; \ + __typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \ + __typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \ + BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \ + BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ + VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \ + VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \ + asm volatile(pfx "cmpxchg%c4b %2; sete %0" \ + : "=a" (__ret), "+d" (__old2), \ + "+m" (*(p1)), "+m" (*(p2)) \ + : "i" (2 * sizeof(long)), "a" (__old1), \ + "b" (__new1), "c" (__new2)); \ + __ret; \ +}) + +#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \ + __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2) + +#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \ + __cmpxchg_double(, p1, p2, o1, o2, n1, n2) + #endif /* ASM_X86_CMPXCHG_H */ diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index fbebb07..53f4b21 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -166,52 +166,6 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, #endif -#define cmpxchg8b(ptr, o1, o2, n1, n2) \ -({ \ - char __ret; \ - __typeof__(o2) __dummy; \ - __typeof__(*(ptr)) __old1 = (o1); \ - __typeof__(o2) __old2 = (o2); \ - __typeof__(*(ptr)) __new1 = (n1); \ - __typeof__(o2) __new2 = (n2); \ - asm volatile(LOCK_PREFIX "cmpxchg8b %2; setz %1" \ - : "=d"(__dummy), "=a" (__ret), "+m" (*ptr)\ - : "a" (__old1), "d"(__old2), \ - "b" (__new1), "c" (__new2) \ - : "memory"); \ - __ret; }) - - -#define cmpxchg8b_local(ptr, o1, o2, n1, n2) \ -({ \ - char __ret; \ - __typeof__(o2) __dummy; \ - __typeof__(*(ptr)) __old1 = (o1); \ - __typeof__(o2) __old2 = (o2); \ - __typeof__(*(ptr)) __new1 = (n1); \ - __typeof__(o2) __new2 = (n2); \ - asm volatile("cmpxchg8b %2; setz %1" \ - : "=d"(__dummy), "=a"(__ret), "+m" (*ptr)\ - : "a" (__old), "d"(__old2), \ - "b" (__new1), "c" (__new2), \ - : "memory"); \ - __ret; }) - - -#define cmpxchg_double(ptr, o1, o2, n1, n2) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ - VM_BUG_ON((unsigned long)(ptr) % 8); \ - cmpxchg8b((ptr), (o1), (o2), (n1), (n2)); \ -}) - -#define cmpxchg_double_local(ptr, o1, o2, n1, n2) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 4); \ - VM_BUG_ON((unsigned long)(ptr) % 8); \ - cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \ -}) - #define system_has_cmpxchg_double() cpu_has_cx8 #endif /* _ASM_X86_CMPXCHG_32_H */ diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 285da02..614be87 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -20,49 +20,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val) cmpxchg_local((ptr), (o), (n)); \ }) -#define cmpxchg16b(ptr, o1, o2, n1, n2) \ -({ \ - char __ret; \ - __typeof__(o2) __junk; \ - __typeof__(*(ptr)) __old1 = (o1); \ - __typeof__(o2) __old2 = (o2); \ - __typeof__(*(ptr)) __new1 = (n1); \ - __typeof__(o2) __new2 = (n2); \ - asm volatile(LOCK_PREFIX "cmpxchg16b %2;setz %1" \ - : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \ - : "b"(__new1), "c"(__new2), \ - "a"(__old1), "d"(__old2)); \ - __ret; }) - - -#define cmpxchg16b_local(ptr, o1, o2, n1, n2) \ -({ \ - char __ret; \ - __typeof__(o2) __junk; \ - __typeof__(*(ptr)) __old1 = (o1); \ - __typeof__(o2) __old2 = (o2); \ - __typeof__(*(ptr)) __new1 = (n1); \ - __typeof__(o2) __new2 = (n2); \ - asm volatile("cmpxchg16b %2;setz %1" \ - : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \ - : "b"(__new1), "c"(__new2), \ - "a"(__old1), "d"(__old2)); \ - __ret; }) - -#define cmpxchg_double(ptr, o1, o2, n1, n2) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ - VM_BUG_ON((unsigned long)(ptr) % 16); \ - cmpxchg16b((ptr), (o1), (o2), (n1), (n2)); \ -}) - -#define cmpxchg_double_local(ptr, o1, o2, n1, n2) \ -({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ - VM_BUG_ON((unsigned long)(ptr) % 16); \ - cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2)); \ -}) - #define system_has_cmpxchg_double() cpu_has_cx16 #endif /* _ASM_X86_CMPXCHG_64_H */ diff --git a/mm/slub.c b/mm/slub.c index ed3334d..09ccee8 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -368,7 +368,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page VM_BUG_ON(!irqs_disabled()); #ifdef CONFIG_CMPXCHG_DOUBLE if (s->flags & __CMPXCHG_DOUBLE) { - if (cmpxchg_double(&page->freelist, + if (cmpxchg_double(&page->freelist, &page->counters, freelist_old, counters_old, freelist_new, counters_new)) return 1; @@ -402,7 +402,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, { #ifdef CONFIG_CMPXCHG_DOUBLE if (s->flags & __CMPXCHG_DOUBLE) { - if (cmpxchg_double(&page->freelist, + if (cmpxchg_double(&page->freelist, &page->counters, freelist_old, counters_old, freelist_new, counters_new)) return 1; ^ permalink raw reply related [flat|nested] 13+ messages in thread
end of thread, other threads:[~2012-01-04 15:33 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-01-02 17:02 [PATCH] x86: fix and improve cmpxchg_double{,_local}() Jan Beulich
2012-01-03 15:00 ` Eric Dumazet
2012-01-03 15:15 ` Eric Dumazet
2012-01-03 15:41 ` Eric Dumazet
2012-01-03 16:08 ` Jan Beulich
2012-01-03 16:13 ` Eric Dumazet
2012-01-03 16:35 ` [PATCH] x86: fix atomic64_xxx_cx8() functions Eric Dumazet
2012-01-04 15:32 ` [tip:x86/asm] x86: Fix " tip-bot for Eric Dumazet
2012-01-03 15:35 ` [PATCH] x86: fix and improve cmpxchg_double{,_local}() Jan Beulich
2012-01-03 17:31 ` Eric Dumazet
2012-01-04 10:36 ` David Laight
2012-01-04 10:54 ` Jan Beulich
2012-01-04 15:31 ` [tip:x86/asm] x86: Fix " tip-bot for Jan Beulich
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox