linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
From: will.deacon@arm.com (Will Deacon)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCH 07/18] arm64: locks: patch in lse instructions when supported by the CPU
Date: Mon, 13 Jul 2015 10:25:08 +0100	[thread overview]
Message-ID: <1436779519-2232-8-git-send-email-will.deacon@arm.com> (raw)
In-Reply-To: <1436779519-2232-1-git-send-email-will.deacon@arm.com>

On CPUs which support the LSE atomic instructions introduced in ARMv8.1,
it makes sense to use them in preference to ll/sc sequences.

This patch introduces runtime patching of our locking functions so that
LSE atomic instructions are used for spinlocks and rwlocks.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/spinlock.h | 132 +++++++++++++++++++++++++++++---------
 1 file changed, 103 insertions(+), 29 deletions(-)

diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index cee128732435..7a1e852263be 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -16,6 +16,7 @@
 #ifndef __ASM_SPINLOCK_H
 #define __ASM_SPINLOCK_H
 
+#include <asm/lse.h>
 #include <asm/spinlock_types.h>
 #include <asm/processor.h>
 
@@ -38,11 +39,21 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 
 	asm volatile(
 	/* Atomically increment the next ticket. */
+	ARM64_LSE_ATOMIC_INSN(
+	/* LL/SC */
 "	prfm	pstl1strm, %3\n"
 "1:	ldaxr	%w0, %3\n"
 "	add	%w1, %w0, %w5\n"
 "	stxr	%w2, %w1, %3\n"
-"	cbnz	%w2, 1b\n"
+"	cbnz	%w2, 1b\n",
+	/* LSE atomics */
+"	mov	%w2, %w5\n"
+"	ldadda	%w2, %w0, %3\n"
+"	nop\n"
+"	nop\n"
+"	nop\n"
+	)
+
 	/* Did we get the lock? */
 "	eor	%w1, %w0, %w0, ror #16\n"
 "	cbz	%w1, 3f\n"
@@ -67,15 +78,25 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 	unsigned int tmp;
 	arch_spinlock_t lockval;
 
-	asm volatile(
-"	prfm	pstl1strm, %2\n"
-"1:	ldaxr	%w0, %2\n"
-"	eor	%w1, %w0, %w0, ror #16\n"
-"	cbnz	%w1, 2f\n"
-"	add	%w0, %w0, %3\n"
-"	stxr	%w1, %w0, %2\n"
-"	cbnz	%w1, 1b\n"
-"2:"
+	asm volatile(ARM64_LSE_ATOMIC_INSN(
+	/* LL/SC */
+	"	prfm	pstl1strm, %2\n"
+	"1:	ldaxr	%w0, %2\n"
+	"	eor	%w1, %w0, %w0, ror #16\n"
+	"	cbnz	%w1, 2f\n"
+	"	add	%w0, %w0, %3\n"
+	"	stxr	%w1, %w0, %2\n"
+	"	cbnz	%w1, 1b\n"
+	"2:",
+	/* LSE atomics */
+	"	ldar	%w0, %2\n"
+	"	eor	%w1, %w0, %w0, ror #16\n"
+	"	cbnz	%w1, 1f\n"
+	"	add	%w1, %w0, %3\n"
+	"	casa	%w0, %w1, %2\n"
+	"	and	%w1, %w1, #0xffff\n"
+	"	eor	%w1, %w1, %w0, lsr #16\n"
+	"1:")
 	: "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
 	: "I" (1 << TICKET_SHIFT)
 	: "memory");
@@ -85,10 +106,19 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
-	asm volatile(
-"	stlrh	%w1, %0\n"
-	: "=Q" (lock->owner)
-	: "r" (lock->owner + 1)
+	unsigned long tmp;
+
+	asm volatile(ARM64_LSE_ATOMIC_INSN(
+	/* LL/SC */
+	"	ldr	%w1, %0\n"
+	"	add	%w1, %w1, #1\n"
+	"	stlrh	%w1, %0",
+	/* LSE atomics */
+	"	mov	%w1, #1\n"
+	"	nop\n"
+	"	staddlh	%w1, %0")
+	: "=Q" (lock->owner), "=&r" (tmp)
+	:
 	: "memory");
 }
 
@@ -125,11 +155,19 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
 
 	asm volatile(
 	"	sevl\n"
+	ARM64_LSE_ATOMIC_INSN(
+	/* LL/SC */
 	"1:	wfe\n"
 	"2:	ldaxr	%w0, %1\n"
 	"	cbnz	%w0, 1b\n"
 	"	stxr	%w0, %w2, %1\n"
-	"	cbnz	%w0, 2b\n"
+	"	cbnz	%w0, 2b",
+	/* LSE atomics */
+	"1:	wfe\n"
+	"	mov	%w0, wzr\n"
+	"	casa	%w0, %w2, %1\n"
+	"	nop\n"
+	"	cbnz	%w0, 1b")
 	: "=&r" (tmp), "+Q" (rw->lock)
 	: "r" (0x80000000)
 	: "memory");
@@ -139,11 +177,16 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
 {
 	unsigned int tmp;
 
-	asm volatile(
+	asm volatile(ARM64_LSE_ATOMIC_INSN(
+	/* LL/SC */
 	"	ldaxr	%w0, %1\n"
 	"	cbnz	%w0, 1f\n"
 	"	stxr	%w0, %w2, %1\n"
-	"1:\n"
+	"1:",
+	/* LSE atomics */
+	"	mov	%w0, wzr\n"
+	"	casa	%w0, %w2, %1\n"
+	"	nop")
 	: "=&r" (tmp), "+Q" (rw->lock)
 	: "r" (0x80000000)
 	: "memory");
@@ -153,9 +196,10 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
 
 static inline void arch_write_unlock(arch_rwlock_t *rw)
 {
-	asm volatile(
-	"	stlr	%w1, %0\n"
-	: "=Q" (rw->lock) : "r" (0) : "memory");
+	asm volatile(ARM64_LSE_ATOMIC_INSN(
+	"	stlr	wzr, %0",
+	"	swpl	wzr, wzr, %0")
+	: "=Q" (rw->lock) :: "memory");
 }
 
 /* write_can_lock - would write_trylock() succeed? */
@@ -172,6 +216,10 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
  *
  * The memory barriers are implicit with the load-acquire and store-release
  * instructions.
+ *
+ * Note that in UNDEFINED cases, such as unlocking a lock twice, the LL/SC
+ * and LSE implementations may exhibit different behaviour (although this
+ * will have no effect on lockdep).
  */
 static inline void arch_read_lock(arch_rwlock_t *rw)
 {
@@ -179,26 +227,43 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
 
 	asm volatile(
 	"	sevl\n"
+	ARM64_LSE_ATOMIC_INSN(
+	/* LL/SC */
 	"1:	wfe\n"
 	"2:	ldaxr	%w0, %2\n"
 	"	add	%w0, %w0, #1\n"
 	"	tbnz	%w0, #31, 1b\n"
 	"	stxr	%w1, %w0, %2\n"
-	"	cbnz	%w1, 2b\n"
+	"	nop\n"
+	"	cbnz	%w1, 2b",
+	/* LSE atomics */
+	"1:	wfe\n"
+	"2:	ldr	%w0, %2\n"
+	"	adds	%w1, %w0, #1\n"
+	"	tbnz	%w1, #31, 1b\n"
+	"	casa	%w0, %w1, %2\n"
+	"	sbc	%w0, %w1, %w0\n"
+	"	cbnz	%w0, 2b")
 	: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
 	:
-	: "memory");
+	: "cc", "memory");
 }
 
 static inline void arch_read_unlock(arch_rwlock_t *rw)
 {
 	unsigned int tmp, tmp2;
 
-	asm volatile(
+	asm volatile(ARM64_LSE_ATOMIC_INSN(
+	/* LL/SC */
 	"1:	ldxr	%w0, %2\n"
 	"	sub	%w0, %w0, #1\n"
 	"	stlxr	%w1, %w0, %2\n"
-	"	cbnz	%w1, 1b\n"
+	"	cbnz	%w1, 1b",
+	/* LSE atomics */
+	"	movn	%w0, #0\n"
+	"	nop\n"
+	"	nop\n"
+	"	staddl	%w0, %2")
 	: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
 	:
 	: "memory");
@@ -206,17 +271,26 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
 
 static inline int arch_read_trylock(arch_rwlock_t *rw)
 {
-	unsigned int tmp, tmp2 = 1;
+	unsigned int tmp, tmp2;
 
-	asm volatile(
+	asm volatile(ARM64_LSE_ATOMIC_INSN(
+	/* LL/SC */
+	"	mov	%w1, #1\n"
 	"	ldaxr	%w0, %2\n"
 	"	add	%w0, %w0, #1\n"
 	"	tbnz	%w0, #31, 1f\n"
 	"	stxr	%w1, %w0, %2\n"
-	"1:\n"
-	: "=&r" (tmp), "+r" (tmp2), "+Q" (rw->lock)
+	"1:",
+	/* LSE atomics */
+	"	ldr	%w0, %2\n"
+	"	adds	%w1, %w0, #1\n"
+	"	tbnz	%w1, #31, 1f\n"
+	"	casa	%w0, %w1, %2\n"
+	"	sbc	%w1, %w1, %w0\n"
+	"1:")
+	: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
 	:
-	: "memory");
+	: "cc", "memory");
 
 	return !tmp2;
 }
-- 
2.1.4

  parent reply	other threads:[~2015-07-13  9:25 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-07-13  9:25 [PATCH 00/18] arm64: support for 8.1 LSE atomic instructions Will Deacon
2015-07-13  9:25 ` [PATCH 01/18] arm64: cpufeature.h: add missing #include of kernel.h Will Deacon
2015-07-13  9:25 ` [PATCH 02/18] arm64: atomics: move ll/sc atomics into separate header file Will Deacon
2015-07-13  9:25 ` [PATCH 03/18] arm64: elf: advertise 8.1 atomic instructions as new hwcap Will Deacon
2015-07-17 13:48   ` Catalin Marinas
2015-07-17 13:57     ` Russell King - ARM Linux
2015-07-13  9:25 ` [PATCH 04/18] arm64: alternatives: add cpu feature for lse atomics Will Deacon
2015-07-13  9:25 ` [PATCH 05/18] arm64: introduce CONFIG_ARM64_LSE_ATOMICS as fallback to ll/sc atomics Will Deacon
2015-07-17 16:32   ` Catalin Marinas
2015-07-17 17:25     ` Will Deacon
2015-07-13  9:25 ` [PATCH 06/18] arm64: atomics: patch in lse instructions when supported by the CPU Will Deacon
2015-07-13  9:25 ` Will Deacon [this message]
2015-07-21 16:53   ` [PATCH 07/18] arm64: locks: " Catalin Marinas
2015-07-21 17:29     ` Will Deacon
2015-07-23 13:39       ` Will Deacon
2015-07-23 14:14         ` Catalin Marinas
2015-07-13  9:25 ` [PATCH 08/18] arm64: bitops: " Will Deacon
2015-07-13  9:25 ` [PATCH 09/18] arm64: xchg: " Will Deacon
2015-07-13  9:25 ` [PATCH 10/18] arm64: cmpxchg: " Will Deacon
2015-07-13  9:25 ` [PATCH 11/18] arm64: cmpxchg_dbl: " Will Deacon
2015-07-13  9:25 ` [PATCH 12/18] arm64: cmpxchg: avoid "cc" clobber in ll/sc routines Will Deacon
2015-07-21 17:16   ` Catalin Marinas
2015-07-21 17:32     ` Will Deacon
2015-07-13  9:25 ` [PATCH 13/18] arm64: cmpxchg: avoid memory barrier on comparison failure Will Deacon
2015-07-13 10:28   ` Peter Zijlstra
2015-07-13 11:22     ` Will Deacon
2015-07-13 13:39       ` Peter Zijlstra
2015-07-13 14:52         ` Will Deacon
2015-07-13 15:32           ` Peter Zijlstra
2015-07-13 15:58             ` Will Deacon
2015-07-13  9:25 ` [PATCH 14/18] arm64: atomics: tidy up common atomic{,64}_* macros Will Deacon
2015-07-13  9:25 ` [PATCH 15/18] arm64: atomics: prefetch the destination word for write prior to stxr Will Deacon
2015-07-13  9:25 ` [PATCH 16/18] arm64: atomics: implement atomic{, 64}_cmpxchg using cmpxchg Will Deacon
2015-07-13  9:25 ` [PATCH 17/18] arm64: atomic64_dec_if_positive: fix incorrect branch condition Will Deacon
2015-07-13  9:25 ` [PATCH 18/18] arm64: kconfig: select HAVE_CMPXCHG_LOCAL Will Deacon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1436779519-2232-8-git-send-email-will.deacon@arm.com \
    --to=will.deacon@arm.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).