public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Vineet Gupta <Vineet.Gupta1@synopsys.com>
To: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Cc: lkml <linux-kernel@vger.kernel.org>, <arc-linux-dev@synopsys.com>,
	Vineet Gupta <Vineet.Gupta1@synopsys.com>
Subject: [PATCH 5/6] ARCv2: spinlock/rwlock/atomics: Delayed retry of failed SCOND with exponential backoff
Date: Mon, 3 Aug 2015 15:33:07 +0530	[thread overview]
Message-ID: <1438596188-10875-6-git-send-email-vgupta@synopsys.com> (raw)
In-Reply-To: <1438596188-10875-1-git-send-email-vgupta@synopsys.com>

This is to workaround the hardware livelock when multiple cores are trying to
SCOND to same location (see code comments for details)

Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Kconfig                |   5 +
 arch/arc/include/asm/atomic.h   |  73 ++++++++++
 arch/arc/include/asm/spinlock.h | 292 ++++++++++++++++++++++++++++++++++++++++
 arch/arc/kernel/setup.c         |   4 +
 4 files changed, 374 insertions(+)

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index a5fccdfbfc8f..bd4670d1b89b 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -365,6 +365,11 @@ config ARC_HAS_LLSC
 	default y
 	depends on !ARC_CANT_LLSC
 
+config ARC_STAR_9000923308
+	bool "Workaround for llock/scond livelock"
+	default y
+	depends on ISA_ARCV2 && SMP && ARC_HAS_LLSC
+
 config ARC_HAS_SWAPE
 	bool "Insn: SWAPE (endian-swap)"
 	default y
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 3dd36c1efee1..f59d2cfc00fc 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -23,6 +23,8 @@
 
 #define atomic_set(v, i) (((v)->counter) = (i))
 
+#ifndef CONFIG_ARC_STAR_9000923308
+
 #define ATOMIC_OP(op, c_op, asm_op)					\
 static inline void atomic_##op(int i, atomic_t *v)			\
 {									\
@@ -65,6 +67,74 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return val;							\
 }
 
+#else	/* CONFIG_ARC_STAR_9000923308 */
+
+#define SCOND_FAIL_RETRY_VAR_DEF						\
+	unsigned int delay = 1, tmp;						\
+
+#define SCOND_FAIL_RETRY_ASM							\
+	"	bz	4f			\n"				\
+	"   ; --- scond fail delay ---		\n"				\
+	"	mov	%[tmp], %[delay]	\n"	/* tmp = delay */	\
+	"2: 	brne.d	%[tmp], 0, 2b		\n"	/* while (tmp != 0) */	\
+	"	sub	%[tmp], %[tmp], 1	\n"	/* tmp-- */		\
+	"	asl	%[delay], %[delay], 1	\n"	/* delay *= 2 */	\
+	"	b	1b			\n"	/* start over */	\
+	"4: ; --- success ---			\n"				\
+
+#define SCOND_FAIL_RETRY_VARS							\
+	  ,[delay] "+&r" (delay),[tmp] "=&r"	(tmp)				\
+
+#define ATOMIC_OP(op, c_op, asm_op)					\
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	unsigned int val, delay = 1, tmp;				\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock   %[val], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[val], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"						\n"		\
+	SCOND_FAIL_RETRY_ASM						\
+									\
+	: [val]	"=&r"	(val) /* Early clobber to prevent reg reuse */	\
+	  SCOND_FAIL_RETRY_VARS						\
+	: [ctr]	"r"	(&v->counter), /* Not "m": llock only supports reg direct addr mode */	\
+	  [i]	"ir"	(i)						\
+	: "cc");							\
+}									\
+
+#define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+static inline int atomic_##op##_return(int i, atomic_t *v)		\
+{									\
+	unsigned int val, delay = 1, tmp;				\
+									\
+	/*								\
+	 * Explicit full memory barrier needed before/after as		\
+	 * LLOCK/SCOND thmeselves don't provide any such semantics	\
+	 */								\
+	smp_mb();							\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock   %[val], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[val], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"						\n"		\
+	SCOND_FAIL_RETRY_ASM						\
+									\
+	: [val]	"=&r"	(val)						\
+	  SCOND_FAIL_RETRY_VARS						\
+	: [ctr]	"r"	(&v->counter),					\
+	  [i]	"ir"	(i)						\
+	: "cc");							\
+									\
+	smp_mb();							\
+									\
+	return val;							\
+}
+
+#endif	/* CONFIG_ARC_STAR_9000923308 */
+
 #else	/* !CONFIG_ARC_HAS_LLSC */
 
 #ifndef CONFIG_SMP
@@ -142,6 +212,9 @@ ATOMIC_OP(and, &=, and)
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
+#undef SCOND_FAIL_RETRY_VAR_DEF
+#undef SCOND_FAIL_RETRY_ASM
+#undef SCOND_FAIL_RETRY_VARS
 
 /**
  * __atomic_add_unless - add unless the number is a given value
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index 9a7a7293f127..297c54484013 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -20,6 +20,11 @@
 
 #ifdef CONFIG_ARC_HAS_LLSC
 
+/*
+ * A normal LLOCK/SCOND based system, w/o need for livelock workaround
+ */
+#ifndef CONFIG_ARC_STAR_9000923308
+
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	unsigned int val;
@@ -246,6 +251,293 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
 	smp_mb();
 }
 
+#else	/* CONFIG_ARC_STAR_9000923308 */
+
+/*
+ * HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping
+ * coherency transactions in the SCU. The exclusive line state keeps rotating
+ * among contenting cores leading to a never ending cycle. So break the cycle
+ * by deferring the retry of failed exclusive access (SCOND). The actual delay
+ * needed is function of number of contending cores as well as the unrelated
+ * coherency traffic from other cores. To keep the code simple, start off with
+ * small delay of 1 which would suffice most cases and in case of contention
+ * double the delay. Eventually the delay is sufficient such that the coherency
+ * pipeline is drained, thus a subsequent exclusive access would succeed.
+ */
+
+#define SCOND_FAIL_RETRY_VAR_DEF						\
+	unsigned int delay, tmp;						\
+
+#define SCOND_FAIL_RETRY_ASM							\
+	"   ; --- scond fail delay ---		\n"				\
+	"	mov	%[tmp], %[delay]	\n"	/* tmp = delay */	\
+	"2: 	brne.d	%[tmp], 0, 2b		\n"	/* while (tmp != 0) */	\
+	"	sub	%[tmp], %[tmp], 1	\n"	/* tmp-- */		\
+	"	asl	%[delay], %[delay], 1	\n"	/* delay *= 2 */	\
+	"	b	1b			\n"	/* start over */	\
+	"					\n"				\
+	"4: ; --- done ---			\n"				\
+
+#define SCOND_FAIL_RETRY_VARS							\
+	  ,[delay] "=&r" (delay), [tmp] "=&r"	(tmp)				\
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	unsigned int val;
+	SCOND_FAIL_RETRY_VAR_DEF;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"0:	mov	%[delay], 1		\n"
+	"1:	llock	%[val], [%[slock]]	\n"
+	"	breq	%[val], %[LOCKED], 1b	\n"	/* spin while LOCKED */
+	"	scond	%[LOCKED], [%[slock]]	\n"	/* acquire */
+	"	bz	4f			\n"	/* done */
+	"					\n"
+	SCOND_FAIL_RETRY_ASM
+
+	: [val]		"=&r"	(val)
+	  SCOND_FAIL_RETRY_VARS
+	: [slock]	"r"	(&(lock->slock)),
+	  [LOCKED]	"r"	(__ARCH_SPIN_LOCK_LOCKED__)
+	: "memory", "cc");
+
+	smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	unsigned int val, got_it = 0;
+	SCOND_FAIL_RETRY_VAR_DEF;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"0:	mov	%[delay], 1		\n"
+	"1:	llock	%[val], [%[slock]]	\n"
+	"	breq	%[val], %[LOCKED], 4f	\n"	/* already LOCKED, just bail */
+	"	scond	%[LOCKED], [%[slock]]	\n"	/* acquire */
+	"	bz.d	4f			\n"
+	"	mov.z	%[got_it], 1		\n"	/* got it */
+	"					\n"
+	SCOND_FAIL_RETRY_ASM
+
+	: [val]		"=&r"	(val),
+	  [got_it]	"+&r"	(got_it)
+	  SCOND_FAIL_RETRY_VARS
+	: [slock]	"r"	(&(lock->slock)),
+	  [LOCKED]	"r"	(__ARCH_SPIN_LOCK_LOCKED__)
+	: "memory", "cc");
+
+	smp_mb();
+
+	return got_it;
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	smp_mb();
+
+	lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__;
+
+	smp_mb();
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers but only one writer.
+ * Unfair locking as Writers could be starved indefinitely by Reader(s)
+ */
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	unsigned int val;
+	SCOND_FAIL_RETRY_VAR_DEF;
+
+	smp_mb();
+
+	/*
+	 * zero means writer holds the lock exclusively, deny Reader.
+	 * Otherwise grant lock to first/subseq reader
+	 *
+	 * 	if (rw->counter > 0) {
+	 *		rw->counter--;
+	 *		ret = 1;
+	 *	}
+	 */
+
+	__asm__ __volatile__(
+	"0:	mov	%[delay], 1		\n"
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	brls	%[val], %[WR_LOCKED], 1b\n"	/* <= 0: spin while write locked */
+	"	sub	%[val], %[val], 1	\n"	/* reader lock */
+	"	scond	%[val], [%[rwlock]]	\n"
+	"	bz	4f			\n"	/* done */
+	"					\n"
+	SCOND_FAIL_RETRY_ASM
+
+	: [val]		"=&r"	(val)
+	  SCOND_FAIL_RETRY_VARS
+	: [rwlock]	"r"	(&(rw->counter)),
+	  [WR_LOCKED]	"ir"	(0)
+	: "memory", "cc");
+
+	smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	unsigned int val, got_it = 0;
+	SCOND_FAIL_RETRY_VAR_DEF;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"0:	mov	%[delay], 1		\n"
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	brls	%[val], %[WR_LOCKED], 4f\n"	/* <= 0: already write locked, bail */
+	"	sub	%[val], %[val], 1	\n"	/* counter-- */
+	"	scond	%[val], [%[rwlock]]	\n"
+	"	bz.d	4f			\n"
+	"	mov.z	%[got_it], 1		\n"	/* got it */
+	"					\n"
+	SCOND_FAIL_RETRY_ASM
+
+	: [val]		"=&r"	(val),
+	  [got_it]	"+&r"	(got_it)
+	  SCOND_FAIL_RETRY_VARS
+	: [rwlock]	"r"	(&(rw->counter)),
+	  [WR_LOCKED]	"ir"	(0)
+	: "memory", "cc");
+
+	smp_mb();
+
+	return got_it;
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	unsigned int val;
+	SCOND_FAIL_RETRY_VAR_DEF;
+
+	smp_mb();
+
+	/*
+	 * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__),
+	 * deny writer. Otherwise if unlocked grant to writer
+	 * Hence the claim that Linux rwlocks are unfair to writers.
+	 * (can be starved for an indefinite time by readers).
+	 *
+	 *	if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) {
+	 *		rw->counter = 0;
+	 *		ret = 1;
+	 *	}
+	 */
+
+	__asm__ __volatile__(
+	"0:	mov	%[delay], 1		\n"
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	brne	%[val], %[UNLOCKED], 1b	\n"	/* while !UNLOCKED spin */
+	"	mov	%[val], %[WR_LOCKED]	\n"
+	"	scond	%[val], [%[rwlock]]	\n"
+	"	bz	4f			\n"
+	"					\n"
+	SCOND_FAIL_RETRY_ASM
+
+	: [val]		"=&r"	(val)
+	  SCOND_FAIL_RETRY_VARS
+	: [rwlock]	"r"	(&(rw->counter)),
+	  [UNLOCKED]	"ir"	(__ARCH_RW_LOCK_UNLOCKED__),
+	  [WR_LOCKED]	"ir"	(0)
+	: "memory", "cc");
+
+	smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	unsigned int val, got_it = 0;
+	SCOND_FAIL_RETRY_VAR_DEF;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"0:	mov	%[delay], 1		\n"
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	brne	%[val], %[UNLOCKED], 4f	\n"	/* !UNLOCKED, bail */
+	"	mov	%[val], %[WR_LOCKED]	\n"
+	"	scond	%[val], [%[rwlock]]	\n"
+	"	bz.d	4f			\n"
+	"	mov.z	%[got_it], 1		\n"	/* got it */
+	"					\n"
+	SCOND_FAIL_RETRY_ASM
+
+	: [val]		"=&r"	(val),
+	  [got_it]	"+&r"	(got_it)
+	  SCOND_FAIL_RETRY_VARS
+	: [rwlock]	"r"	(&(rw->counter)),
+	  [UNLOCKED]	"ir"	(__ARCH_RW_LOCK_UNLOCKED__),
+	  [WR_LOCKED]	"ir"	(0)
+	: "memory", "cc");
+
+	smp_mb();
+
+	return got_it;
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	unsigned int val;
+
+	smp_mb();
+
+	/*
+	 * rw->counter++;
+	 */
+	__asm__ __volatile__(
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	add	%[val], %[val], 1	\n"
+	"	scond	%[val], [%[rwlock]]	\n"
+	"	bnz	1b			\n"
+	"					\n"
+	: [val]		"=&r"	(val)
+	: [rwlock]	"r"	(&(rw->counter))
+	: "memory", "cc");
+
+	smp_mb();
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	unsigned int val;
+
+	smp_mb();
+
+	/*
+	 * rw->counter = __ARCH_RW_LOCK_UNLOCKED__;
+	 */
+	__asm__ __volatile__(
+	"1:	llock	%[val], [%[rwlock]]	\n"
+	"	scond	%[UNLOCKED], [%[rwlock]]\n"
+	"	bnz	1b			\n"
+	"					\n"
+	: [val]		"=&r"	(val)
+	: [rwlock]	"r"	(&(rw->counter)),
+	  [UNLOCKED]	"r"	(__ARCH_RW_LOCK_UNLOCKED__)
+	: "memory", "cc");
+
+	smp_mb();
+}
+
+#undef SCOND_FAIL_RETRY_VAR_DEF
+#undef SCOND_FAIL_RETRY_ASM
+#undef SCOND_FAIL_RETRY_VARS
+
+#endif	/* CONFIG_ARC_STAR_9000923308 */
+
 #else	/* !CONFIG_ARC_HAS_LLSC */
 
 static inline void arch_spin_lock(arch_spinlock_t *lock)
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 18cc01591c96..1f3d9b3160fa 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -330,6 +330,10 @@ static void arc_chk_core_config(void)
 		pr_warn("CONFIG_ARC_FPU_SAVE_RESTORE needed for working apps\n");
 	else if (!cpu->extn.fpu_dp && fpu_enabled)
 		panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n");
+
+	if (is_isa_arcv2() && IS_ENABLED(CONFIG_SMP) && cpu->isa.atomic &&
+	    !IS_ENABLED(CONFIG_ARC_STAR_9000923308))
+		panic("llock/scond livelock workaround missing\n");
 }
 
 /*
-- 
1.9.1


  parent reply	other threads:[~2015-08-03 10:04 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-03 10:03 [PATCH 0/6] ARC: spinlocks/atomics rework Vineet Gupta
2015-08-03 10:03 ` [PATCH 1/6] Revert "ARCv2: STAR 9000837815 workaround hardware exclusive transactions livelock" Vineet Gupta
2015-08-03 10:03 ` [PATCH 2/6] ARC: refactor atomic inline asm operands with symbolic names Vineet Gupta
2015-08-03 10:03 ` [PATCH 3/6] ARC: LLOCK/SCOND based spin_lock Vineet Gupta
2015-08-03 11:29   ` Peter Zijlstra
2015-08-03 11:44     ` Vineet Gupta
2015-08-03 10:03 ` [PATCH 4/6] ARC: LLOCK/SCOND based rwlock Vineet Gupta
2015-08-03 11:33   ` Peter Zijlstra
2015-08-03 11:51     ` Vineet Gupta
2015-08-03 10:03 ` Vineet Gupta [this message]
2015-08-03 11:41   ` [PATCH 5/6] ARCv2: spinlock/rwlock/atomics: Delayed retry of failed SCOND with exponential backoff Peter Zijlstra
2015-08-03 13:01     ` Vineet Gupta
2015-08-03 13:50       ` Vineet Gupta
2015-08-03 14:08         ` Peter Zijlstra
2015-08-03 11:50   ` Peter Zijlstra
2015-08-03 13:02     ` Vineet Gupta
2015-08-03 13:06       ` Peter Zijlstra
2015-08-03 10:03 ` [PATCH 6/6] ARCv2: spinlock/rwlock: Reset retry delay when starting a new spin-wait cycle Vineet Gupta
2015-08-03 11:43   ` Peter Zijlstra
2015-08-03 14:40     ` Vineet Gupta
2015-08-03 14:42       ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1438596188-10875-6-git-send-email-vgupta@synopsys.com \
    --to=vineet.gupta1@synopsys.com \
    --cc=arc-linux-dev@synopsys.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox