From: Vineet Gupta <Vineet.Gupta1@synopsys.com>
To: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Cc: lkml <linux-kernel@vger.kernel.org>, <arc-linux-dev@synopsys.com>,
Vineet Gupta <Vineet.Gupta1@synopsys.com>
Subject: [PATCH 5/6] ARCv2: spinlock/rwlock/atomics: Delayed retry of failed SCOND with exponential backoff
Date: Mon, 3 Aug 2015 15:33:07 +0530 [thread overview]
Message-ID: <1438596188-10875-6-git-send-email-vgupta@synopsys.com> (raw)
In-Reply-To: <1438596188-10875-1-git-send-email-vgupta@synopsys.com>
This is to workaround the hardware livelock when multiple cores are trying to
SCOND to same location (see code comments for details)
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
arch/arc/Kconfig | 5 +
arch/arc/include/asm/atomic.h | 73 ++++++++++
arch/arc/include/asm/spinlock.h | 292 ++++++++++++++++++++++++++++++++++++++++
arch/arc/kernel/setup.c | 4 +
4 files changed, 374 insertions(+)
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index a5fccdfbfc8f..bd4670d1b89b 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -365,6 +365,11 @@ config ARC_HAS_LLSC
default y
depends on !ARC_CANT_LLSC
+config ARC_STAR_9000923308
+ bool "Workaround for llock/scond livelock"
+ default y
+ depends on ISA_ARCV2 && SMP && ARC_HAS_LLSC
+
config ARC_HAS_SWAPE
bool "Insn: SWAPE (endian-swap)"
default y
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 3dd36c1efee1..f59d2cfc00fc 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -23,6 +23,8 @@
#define atomic_set(v, i) (((v)->counter) = (i))
+#ifndef CONFIG_ARC_STAR_9000923308
+
#define ATOMIC_OP(op, c_op, asm_op) \
static inline void atomic_##op(int i, atomic_t *v) \
{ \
@@ -65,6 +67,74 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return val; \
}
+#else /* CONFIG_ARC_STAR_9000923308 */
+
+#define SCOND_FAIL_RETRY_VAR_DEF \
+ unsigned int delay = 1, tmp; \
+
+#define SCOND_FAIL_RETRY_ASM \
+ " bz 4f \n" \
+ " ; --- scond fail delay --- \n" \
+ " mov %[tmp], %[delay] \n" /* tmp = delay */ \
+ "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \
+ " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \
+ " asl %[delay], %[delay], 1 \n" /* delay *= 2 */ \
+ " b 1b \n" /* start over */ \
+ "4: ; --- success --- \n" \
+
+#define SCOND_FAIL_RETRY_VARS \
+ ,[delay] "+&r" (delay),[tmp] "=&r" (tmp) \
+
+#define ATOMIC_OP(op, c_op, asm_op) \
+static inline void atomic_##op(int i, atomic_t *v) \
+{ \
+ unsigned int val, delay = 1, tmp; \
+ \
+ __asm__ __volatile__( \
+ "1: llock %[val], [%[ctr]] \n" \
+ " " #asm_op " %[val], %[val], %[i] \n" \
+ " scond %[val], [%[ctr]] \n" \
+ " \n" \
+ SCOND_FAIL_RETRY_ASM \
+ \
+ : [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \
+ SCOND_FAIL_RETRY_VARS \
+ : [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \
+ [i] "ir" (i) \
+ : "cc"); \
+} \
+
+#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
+static inline int atomic_##op##_return(int i, atomic_t *v) \
+{ \
+ unsigned int val, delay = 1, tmp; \
+ \
+ /* \
+ * Explicit full memory barrier needed before/after as \
+ * LLOCK/SCOND thmeselves don't provide any such semantics \
+ */ \
+ smp_mb(); \
+ \
+ __asm__ __volatile__( \
+ "1: llock %[val], [%[ctr]] \n" \
+ " " #asm_op " %[val], %[val], %[i] \n" \
+ " scond %[val], [%[ctr]] \n" \
+ " \n" \
+ SCOND_FAIL_RETRY_ASM \
+ \
+ : [val] "=&r" (val) \
+ SCOND_FAIL_RETRY_VARS \
+ : [ctr] "r" (&v->counter), \
+ [i] "ir" (i) \
+ : "cc"); \
+ \
+ smp_mb(); \
+ \
+ return val; \
+}
+
+#endif /* CONFIG_ARC_STAR_9000923308 */
+
#else /* !CONFIG_ARC_HAS_LLSC */
#ifndef CONFIG_SMP
@@ -142,6 +212,9 @@ ATOMIC_OP(and, &=, and)
#undef ATOMIC_OPS
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
+#undef SCOND_FAIL_RETRY_VAR_DEF
+#undef SCOND_FAIL_RETRY_ASM
+#undef SCOND_FAIL_RETRY_VARS
/**
* __atomic_add_unless - add unless the number is a given value
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index 9a7a7293f127..297c54484013 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -20,6 +20,11 @@
#ifdef CONFIG_ARC_HAS_LLSC
+/*
+ * A normal LLOCK/SCOND based system, w/o need for livelock workaround
+ */
+#ifndef CONFIG_ARC_STAR_9000923308
+
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
unsigned int val;
@@ -246,6 +251,293 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
smp_mb();
}
+#else /* CONFIG_ARC_STAR_9000923308 */
+
+/*
+ * HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping
+ * coherency transactions in the SCU. The exclusive line state keeps rotating
+ * among contenting cores leading to a never ending cycle. So break the cycle
+ * by deferring the retry of failed exclusive access (SCOND). The actual delay
+ * needed is function of number of contending cores as well as the unrelated
+ * coherency traffic from other cores. To keep the code simple, start off with
+ * small delay of 1 which would suffice most cases and in case of contention
+ * double the delay. Eventually the delay is sufficient such that the coherency
+ * pipeline is drained, thus a subsequent exclusive access would succeed.
+ */
+
+#define SCOND_FAIL_RETRY_VAR_DEF \
+ unsigned int delay, tmp; \
+
+#define SCOND_FAIL_RETRY_ASM \
+ " ; --- scond fail delay --- \n" \
+ " mov %[tmp], %[delay] \n" /* tmp = delay */ \
+ "2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \
+ " sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \
+ " asl %[delay], %[delay], 1 \n" /* delay *= 2 */ \
+ " b 1b \n" /* start over */ \
+ " \n" \
+ "4: ; --- done --- \n" \
+
+#define SCOND_FAIL_RETRY_VARS \
+ ,[delay] "=&r" (delay), [tmp] "=&r" (tmp) \
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+ unsigned int val;
+ SCOND_FAIL_RETRY_VAR_DEF;
+
+ smp_mb();
+
+ __asm__ __volatile__(
+ "0: mov %[delay], 1 \n"
+ "1: llock %[val], [%[slock]] \n"
+ " breq %[val], %[LOCKED], 1b \n" /* spin while LOCKED */
+ " scond %[LOCKED], [%[slock]] \n" /* acquire */
+ " bz 4f \n" /* done */
+ " \n"
+ SCOND_FAIL_RETRY_ASM
+
+ : [val] "=&r" (val)
+ SCOND_FAIL_RETRY_VARS
+ : [slock] "r" (&(lock->slock)),
+ [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__)
+ : "memory", "cc");
+
+ smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+ unsigned int val, got_it = 0;
+ SCOND_FAIL_RETRY_VAR_DEF;
+
+ smp_mb();
+
+ __asm__ __volatile__(
+ "0: mov %[delay], 1 \n"
+ "1: llock %[val], [%[slock]] \n"
+ " breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */
+ " scond %[LOCKED], [%[slock]] \n" /* acquire */
+ " bz.d 4f \n"
+ " mov.z %[got_it], 1 \n" /* got it */
+ " \n"
+ SCOND_FAIL_RETRY_ASM
+
+ : [val] "=&r" (val),
+ [got_it] "+&r" (got_it)
+ SCOND_FAIL_RETRY_VARS
+ : [slock] "r" (&(lock->slock)),
+ [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__)
+ : "memory", "cc");
+
+ smp_mb();
+
+ return got_it;
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+ smp_mb();
+
+ lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__;
+
+ smp_mb();
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers but only one writer.
+ * Unfair locking as Writers could be starved indefinitely by Reader(s)
+ */
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+ unsigned int val;
+ SCOND_FAIL_RETRY_VAR_DEF;
+
+ smp_mb();
+
+ /*
+ * zero means writer holds the lock exclusively, deny Reader.
+ * Otherwise grant lock to first/subseq reader
+ *
+ * if (rw->counter > 0) {
+ * rw->counter--;
+ * ret = 1;
+ * }
+ */
+
+ __asm__ __volatile__(
+ "0: mov %[delay], 1 \n"
+ "1: llock %[val], [%[rwlock]] \n"
+ " brls %[val], %[WR_LOCKED], 1b\n" /* <= 0: spin while write locked */
+ " sub %[val], %[val], 1 \n" /* reader lock */
+ " scond %[val], [%[rwlock]] \n"
+ " bz 4f \n" /* done */
+ " \n"
+ SCOND_FAIL_RETRY_ASM
+
+ : [val] "=&r" (val)
+ SCOND_FAIL_RETRY_VARS
+ : [rwlock] "r" (&(rw->counter)),
+ [WR_LOCKED] "ir" (0)
+ : "memory", "cc");
+
+ smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+ unsigned int val, got_it = 0;
+ SCOND_FAIL_RETRY_VAR_DEF;
+
+ smp_mb();
+
+ __asm__ __volatile__(
+ "0: mov %[delay], 1 \n"
+ "1: llock %[val], [%[rwlock]] \n"
+ " brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */
+ " sub %[val], %[val], 1 \n" /* counter-- */
+ " scond %[val], [%[rwlock]] \n"
+ " bz.d 4f \n"
+ " mov.z %[got_it], 1 \n" /* got it */
+ " \n"
+ SCOND_FAIL_RETRY_ASM
+
+ : [val] "=&r" (val),
+ [got_it] "+&r" (got_it)
+ SCOND_FAIL_RETRY_VARS
+ : [rwlock] "r" (&(rw->counter)),
+ [WR_LOCKED] "ir" (0)
+ : "memory", "cc");
+
+ smp_mb();
+
+ return got_it;
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+ unsigned int val;
+ SCOND_FAIL_RETRY_VAR_DEF;
+
+ smp_mb();
+
+ /*
+ * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__),
+ * deny writer. Otherwise if unlocked grant to writer
+ * Hence the claim that Linux rwlocks are unfair to writers.
+ * (can be starved for an indefinite time by readers).
+ *
+ * if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) {
+ * rw->counter = 0;
+ * ret = 1;
+ * }
+ */
+
+ __asm__ __volatile__(
+ "0: mov %[delay], 1 \n"
+ "1: llock %[val], [%[rwlock]] \n"
+ " brne %[val], %[UNLOCKED], 1b \n" /* while !UNLOCKED spin */
+ " mov %[val], %[WR_LOCKED] \n"
+ " scond %[val], [%[rwlock]] \n"
+ " bz 4f \n"
+ " \n"
+ SCOND_FAIL_RETRY_ASM
+
+ : [val] "=&r" (val)
+ SCOND_FAIL_RETRY_VARS
+ : [rwlock] "r" (&(rw->counter)),
+ [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__),
+ [WR_LOCKED] "ir" (0)
+ : "memory", "cc");
+
+ smp_mb();
+}
+
+/* 1 - lock taken successfully */
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+ unsigned int val, got_it = 0;
+ SCOND_FAIL_RETRY_VAR_DEF;
+
+ smp_mb();
+
+ __asm__ __volatile__(
+ "0: mov %[delay], 1 \n"
+ "1: llock %[val], [%[rwlock]] \n"
+ " brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */
+ " mov %[val], %[WR_LOCKED] \n"
+ " scond %[val], [%[rwlock]] \n"
+ " bz.d 4f \n"
+ " mov.z %[got_it], 1 \n" /* got it */
+ " \n"
+ SCOND_FAIL_RETRY_ASM
+
+ : [val] "=&r" (val),
+ [got_it] "+&r" (got_it)
+ SCOND_FAIL_RETRY_VARS
+ : [rwlock] "r" (&(rw->counter)),
+ [UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__),
+ [WR_LOCKED] "ir" (0)
+ : "memory", "cc");
+
+ smp_mb();
+
+ return got_it;
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+ unsigned int val;
+
+ smp_mb();
+
+ /*
+ * rw->counter++;
+ */
+ __asm__ __volatile__(
+ "1: llock %[val], [%[rwlock]] \n"
+ " add %[val], %[val], 1 \n"
+ " scond %[val], [%[rwlock]] \n"
+ " bnz 1b \n"
+ " \n"
+ : [val] "=&r" (val)
+ : [rwlock] "r" (&(rw->counter))
+ : "memory", "cc");
+
+ smp_mb();
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+ unsigned int val;
+
+ smp_mb();
+
+ /*
+ * rw->counter = __ARCH_RW_LOCK_UNLOCKED__;
+ */
+ __asm__ __volatile__(
+ "1: llock %[val], [%[rwlock]] \n"
+ " scond %[UNLOCKED], [%[rwlock]]\n"
+ " bnz 1b \n"
+ " \n"
+ : [val] "=&r" (val)
+ : [rwlock] "r" (&(rw->counter)),
+ [UNLOCKED] "r" (__ARCH_RW_LOCK_UNLOCKED__)
+ : "memory", "cc");
+
+ smp_mb();
+}
+
+#undef SCOND_FAIL_RETRY_VAR_DEF
+#undef SCOND_FAIL_RETRY_ASM
+#undef SCOND_FAIL_RETRY_VARS
+
+#endif /* CONFIG_ARC_STAR_9000923308 */
+
#else /* !CONFIG_ARC_HAS_LLSC */
static inline void arch_spin_lock(arch_spinlock_t *lock)
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 18cc01591c96..1f3d9b3160fa 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -330,6 +330,10 @@ static void arc_chk_core_config(void)
pr_warn("CONFIG_ARC_FPU_SAVE_RESTORE needed for working apps\n");
else if (!cpu->extn.fpu_dp && fpu_enabled)
panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n");
+
+ if (is_isa_arcv2() && IS_ENABLED(CONFIG_SMP) && cpu->isa.atomic &&
+ !IS_ENABLED(CONFIG_ARC_STAR_9000923308))
+ panic("llock/scond livelock workaround missing\n");
}
/*
--
1.9.1
next prev parent reply other threads:[~2015-08-03 10:04 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-08-03 10:03 [PATCH 0/6] ARC: spinlocks/atomics rework Vineet Gupta
2015-08-03 10:03 ` [PATCH 1/6] Revert "ARCv2: STAR 9000837815 workaround hardware exclusive transactions livelock" Vineet Gupta
2015-08-03 10:03 ` [PATCH 2/6] ARC: refactor atomic inline asm operands with symbolic names Vineet Gupta
2015-08-03 10:03 ` [PATCH 3/6] ARC: LLOCK/SCOND based spin_lock Vineet Gupta
2015-08-03 11:29 ` Peter Zijlstra
2015-08-03 11:44 ` Vineet Gupta
2015-08-03 10:03 ` [PATCH 4/6] ARC: LLOCK/SCOND based rwlock Vineet Gupta
2015-08-03 11:33 ` Peter Zijlstra
2015-08-03 11:51 ` Vineet Gupta
2015-08-03 10:03 ` Vineet Gupta [this message]
2015-08-03 11:41 ` [PATCH 5/6] ARCv2: spinlock/rwlock/atomics: Delayed retry of failed SCOND with exponential backoff Peter Zijlstra
2015-08-03 13:01 ` Vineet Gupta
2015-08-03 13:50 ` Vineet Gupta
2015-08-03 14:08 ` Peter Zijlstra
2015-08-03 11:50 ` Peter Zijlstra
2015-08-03 13:02 ` Vineet Gupta
2015-08-03 13:06 ` Peter Zijlstra
2015-08-03 10:03 ` [PATCH 6/6] ARCv2: spinlock/rwlock: Reset retry delay when starting a new spin-wait cycle Vineet Gupta
2015-08-03 11:43 ` Peter Zijlstra
2015-08-03 14:40 ` Vineet Gupta
2015-08-03 14:42 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1438596188-10875-6-git-send-email-vgupta@synopsys.com \
--to=vineet.gupta1@synopsys.com \
--cc=arc-linux-dev@synopsys.com \
--cc=linux-kernel@vger.kernel.org \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox