linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
From: will.deacon@arm.com (Will Deacon)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCH v2 12/20] arm64: cmpxchg: patch in lse instructions when supported by the CPU
Date: Fri, 24 Jul 2015 11:42:03 +0100	[thread overview]
Message-ID: <1437734531-10698-13-git-send-email-will.deacon@arm.com> (raw)
In-Reply-To: <1437734531-10698-1-git-send-email-will.deacon@arm.com>

On CPUs which support the LSE atomic instructions introduced in ARMv8.1,
it makes sense to use them in preference to ll/sc sequences.

This patch introduces runtime patching of our cmpxchg primitives so that
the LSE cas instruction is used instead.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/atomic.h       |  3 +-
 arch/arm64/include/asm/atomic_ll_sc.h | 38 ++++++++++++++++
 arch/arm64/include/asm/atomic_lse.h   | 39 ++++++++++++++++
 arch/arm64/include/asm/cmpxchg.h      | 84 ++++++++---------------------------
 4 files changed, 98 insertions(+), 66 deletions(-)

diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index cb53efa23f62..ee32776d926c 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -24,7 +24,6 @@
 #include <linux/types.h>
 
 #include <asm/barrier.h>
-#include <asm/cmpxchg.h>
 #include <asm/lse.h>
 
 #define ATOMIC_INIT(i)	{ (i) }
@@ -41,6 +40,8 @@
 
 #undef __ARM64_IN_ATOMIC_IMPL
 
+#include <asm/cmpxchg.h>
+
 /*
  * On ARM, ordinary assignment (str instruction) doesn't clear the local
  * strex/ldrex monitor on some implementations. The reason we can use it for
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index 9cf298914ac3..b4298f9a898f 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -205,4 +205,42 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
 }
 __LL_SC_EXPORT(atomic64_dec_if_positive);
 
+#define __CMPXCHG_CASE(w, sz, name, mb, cl)				\
+__LL_SC_INLINE unsigned long						\
+__LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr,		\
+				     unsigned long old,			\
+				     unsigned long new))		\
+{									\
+	unsigned long tmp, oldval;					\
+									\
+	asm volatile(							\
+	"	" #mb "\n"						\
+	"1:	ldxr" #sz "\t%" #w "[oldval], %[v]\n"			\
+	"	eor	%" #w "[tmp], %" #w "[oldval], %" #w "[old]\n"	\
+	"	cbnz	%" #w "[tmp], 2f\n"				\
+	"	stxr" #sz "\t%w[tmp], %" #w "[new], %[v]\n"		\
+	"	cbnz	%w[tmp], 1b\n"					\
+	"	" #mb "\n"						\
+	"	mov	%" #w "[oldval], %" #w "[old]\n"		\
+	"2:"								\
+	: [tmp] "=&r" (tmp), [oldval] "=&r" (oldval),			\
+	  [v] "+Q" (*(unsigned long *)ptr)				\
+	: [old] "Lr" (old), [new] "r" (new)				\
+	: cl);								\
+									\
+	return oldval;							\
+}									\
+__LL_SC_EXPORT(__cmpxchg_case_##name);
+
+__CMPXCHG_CASE(w, b,    1,        ,         )
+__CMPXCHG_CASE(w, h,    2,        ,         )
+__CMPXCHG_CASE(w,  ,    4,        ,         )
+__CMPXCHG_CASE( ,  ,    8,        ,         )
+__CMPXCHG_CASE(w, b, mb_1, dmb ish, "memory")
+__CMPXCHG_CASE(w, h, mb_2, dmb ish, "memory")
+__CMPXCHG_CASE(w,  , mb_4, dmb ish, "memory")
+__CMPXCHG_CASE( ,  , mb_8, dmb ish, "memory")
+
+#undef __CMPXCHG_CASE
+
 #endif	/* __ASM_ATOMIC_LL_SC_H */
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index e6e544bce331..5136c3ab48e9 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -243,4 +243,43 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 
 #undef __LL_SC_ATOMIC64
 
+#define __LL_SC_CMPXCHG(op)	__LL_SC_CALL(__cmpxchg_case_##op)
+
+#define __CMPXCHG_CASE(w, sz, name, mb, cl...)				\
+static inline unsigned long __cmpxchg_case_##name(volatile void *ptr,	\
+						  unsigned long old,	\
+						  unsigned long new)	\
+{									\
+	register unsigned long x0 asm ("x0") = (unsigned long)ptr;	\
+	register unsigned long x1 asm ("x1") = old;			\
+	register unsigned long x2 asm ("x2") = new;			\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"nop\n"								\
+	__LL_SC_CMPXCHG(name)						\
+	"nop",								\
+	/* LSE atomics */						\
+	"	mov	" #w "30, %" #w "[old]\n"			\
+	"	cas" #mb #sz "\t" #w "30, %" #w "[new], %[v]\n"		\
+	"	mov	%" #w "[ret], " #w "30")			\
+	: [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr)		\
+	: [old] "r" (x1), [new] "r" (x2)				\
+	: "x30" , ##cl);						\
+									\
+	return x0;							\
+}
+
+__CMPXCHG_CASE(w, b,    1,   )
+__CMPXCHG_CASE(w, h,    2,   )
+__CMPXCHG_CASE(w,  ,    4,   )
+__CMPXCHG_CASE(x,  ,    8,   )
+__CMPXCHG_CASE(w, b, mb_1, al, "memory")
+__CMPXCHG_CASE(w, h, mb_2, al, "memory")
+__CMPXCHG_CASE(w,  , mb_4, al, "memory")
+__CMPXCHG_CASE(x,  , mb_8, al, "memory")
+
+#undef __LL_SC_CMPXCHG
+#undef __CMPXCHG_CASE
+
 #endif	/* __ASM_ATOMIC_LSE_H */
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index d0cce8068902..60a558127cef 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -21,6 +21,7 @@
 #include <linux/bug.h>
 #include <linux/mmdebug.h>
 
+#include <asm/atomic.h>
 #include <asm/barrier.h>
 #include <asm/lse.h>
 
@@ -111,74 +112,20 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 				      unsigned long new, int size)
 {
-	unsigned long oldval = 0, res;
-
 	switch (size) {
 	case 1:
-		do {
-			asm volatile("// __cmpxchg1\n"
-			"	ldxrb	%w1, %2\n"
-			"	mov	%w0, #0\n"
-			"	cmp	%w1, %w3\n"
-			"	b.ne	1f\n"
-			"	stxrb	%w0, %w4, %2\n"
-			"1:\n"
-				: "=&r" (res), "=&r" (oldval), "+Q" (*(u8 *)ptr)
-				: "Ir" (old), "r" (new)
-				: "cc");
-		} while (res);
-		break;
-
+		return __cmpxchg_case_1(ptr, old, new);
 	case 2:
-		do {
-			asm volatile("// __cmpxchg2\n"
-			"	ldxrh	%w1, %2\n"
-			"	mov	%w0, #0\n"
-			"	cmp	%w1, %w3\n"
-			"	b.ne	1f\n"
-			"	stxrh	%w0, %w4, %2\n"
-			"1:\n"
-				: "=&r" (res), "=&r" (oldval), "+Q" (*(u16 *)ptr)
-				: "Ir" (old), "r" (new)
-				: "cc");
-		} while (res);
-		break;
-
+		return __cmpxchg_case_2(ptr, old, new);
 	case 4:
-		do {
-			asm volatile("// __cmpxchg4\n"
-			"	ldxr	%w1, %2\n"
-			"	mov	%w0, #0\n"
-			"	cmp	%w1, %w3\n"
-			"	b.ne	1f\n"
-			"	stxr	%w0, %w4, %2\n"
-			"1:\n"
-				: "=&r" (res), "=&r" (oldval), "+Q" (*(u32 *)ptr)
-				: "Ir" (old), "r" (new)
-				: "cc");
-		} while (res);
-		break;
-
+		return __cmpxchg_case_4(ptr, old, new);
 	case 8:
-		do {
-			asm volatile("// __cmpxchg8\n"
-			"	ldxr	%1, %2\n"
-			"	mov	%w0, #0\n"
-			"	cmp	%1, %3\n"
-			"	b.ne	1f\n"
-			"	stxr	%w0, %4, %2\n"
-			"1:\n"
-				: "=&r" (res), "=&r" (oldval), "+Q" (*(u64 *)ptr)
-				: "Ir" (old), "r" (new)
-				: "cc");
-		} while (res);
-		break;
-
+		return __cmpxchg_case_8(ptr, old, new);
 	default:
 		BUILD_BUG();
 	}
 
-	return oldval;
+	unreachable();
 }
 
 #define system_has_cmpxchg_double()     1
@@ -229,13 +176,20 @@ static inline int __cmpxchg_double_mb(volatile void *ptr1, volatile void *ptr2,
 static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
 					 unsigned long new, int size)
 {
-	unsigned long ret;
-
-	smp_mb();
-	ret = __cmpxchg(ptr, old, new, size);
-	smp_mb();
+	switch (size) {
+	case 1:
+		return __cmpxchg_case_mb_1(ptr, old, new);
+	case 2:
+		return __cmpxchg_case_mb_2(ptr, old, new);
+	case 4:
+		return __cmpxchg_case_mb_4(ptr, old, new);
+	case 8:
+		return __cmpxchg_case_mb_8(ptr, old, new);
+	default:
+		BUILD_BUG();
+	}
 
-	return ret;
+	unreachable();
 }
 
 #define cmpxchg(ptr, o, n) \
-- 
2.1.4

  parent reply	other threads:[~2015-07-24 10:42 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-07-24 10:41 [PATCH v2 00/20] arm64: support for 8.1 LSE atomic instructions Will Deacon
2015-07-24 10:41 ` [PATCH v2 01/20] arm64: rwlocks: don't fail trylock purely due to contention Will Deacon
2015-07-24 11:14   ` Catalin Marinas
2015-07-24 10:41 ` [PATCH v2 02/20] documentation: Clarify failed cmpxchg memory ordering semantics Will Deacon
2015-07-24 11:15   ` Catalin Marinas
2015-07-27 11:58   ` Will Deacon
2015-07-27 12:02     ` Peter Zijlstra
2015-07-27 13:00       ` Will Deacon
2015-07-24 10:41 ` [PATCH v2 03/20] arm64: cpufeature.h: add missing #include of kernel.h Will Deacon
2015-07-24 11:15   ` Catalin Marinas
2015-07-24 10:41 ` [PATCH v2 04/20] arm64: atomics: move ll/sc atomics into separate header file Will Deacon
2015-07-24 11:19   ` Catalin Marinas
2015-07-24 10:41 ` [PATCH v2 05/20] arm64: elf: advertise 8.1 atomic instructions as new hwcap Will Deacon
2015-07-24 11:24   ` Catalin Marinas
2015-07-24 10:41 ` [PATCH v2 06/20] arm64: alternatives: add cpu feature for lse atomics Will Deacon
2015-07-24 11:26   ` Catalin Marinas
2015-07-24 10:41 ` [PATCH v2 07/20] arm64: introduce CONFIG_ARM64_LSE_ATOMICS as fallback to ll/sc atomics Will Deacon
2015-07-24 11:38   ` Catalin Marinas
2015-07-24 10:41 ` [PATCH v2 08/20] arm64: atomics: patch in lse instructions when supported by the CPU Will Deacon
2015-07-24 14:43   ` Catalin Marinas
2015-07-24 10:42 ` [PATCH v2 09/20] arm64: locks: " Will Deacon
2015-07-24 15:08   ` Catalin Marinas
2015-07-24 10:42 ` [PATCH v2 10/20] arm64: bitops: " Will Deacon
2015-07-24 15:19   ` Catalin Marinas
2015-07-24 10:42 ` [PATCH v2 11/20] arm64: xchg: " Will Deacon
2015-07-24 15:19   ` Catalin Marinas
2015-07-24 10:42 ` Will Deacon [this message]
2015-07-24 15:21   ` [PATCH v2 12/20] arm64: cmpxchg: " Catalin Marinas
2015-07-24 10:42 ` [PATCH v2 13/20] arm64: cmpxchg_dbl: " Will Deacon
2015-07-24 15:29   ` Catalin Marinas
2015-07-24 10:42 ` [PATCH v2 14/20] arm64: cmpxchg: avoid "cc" clobber in ll/sc routines Will Deacon
2015-07-24 15:30   ` Catalin Marinas
2015-07-24 10:42 ` [PATCH v2 15/20] arm64: cmpxchg: avoid memory barrier on comparison failure Will Deacon
2015-07-24 15:32   ` Catalin Marinas
2015-07-24 10:42 ` [PATCH v2 16/20] arm64: atomics: tidy up common atomic{,64}_* macros Will Deacon
2015-07-24 15:40   ` Catalin Marinas
2015-07-24 10:42 ` [PATCH v2 17/20] arm64: atomics: prefetch the destination word for write prior to stxr Will Deacon
2015-07-24 15:42   ` Catalin Marinas
2015-07-24 10:42 ` [PATCH v2 18/20] arm64: atomics: implement atomic{, 64}_cmpxchg using cmpxchg Will Deacon
2015-07-24 15:44   ` Catalin Marinas
2015-07-24 10:42 ` [PATCH v2 19/20] arm64: atomic64_dec_if_positive: fix incorrect branch condition Will Deacon
2015-07-24 15:45   ` Catalin Marinas
2015-07-24 10:42 ` [PATCH v2 20/20] arm64: kconfig: select HAVE_CMPXCHG_LOCAL Will Deacon
2015-07-24 15:45   ` Catalin Marinas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1437734531-10698-13-git-send-email-will.deacon@arm.com \
    --to=will.deacon@arm.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).