* [PATCH v2] arm64: barrier: implement wfe-based smp_cond_load_acquire
@ 2016-06-27 17:43 Will Deacon
2016-06-30 13:00 ` Will Deacon
2016-07-07 8:32 ` [tip:locking/core] locking/barriers, arch/arm64: Implement LDXR+WFE based smp_cond_load_acquire() tip-bot for Will Deacon
0 siblings, 2 replies; 4+ messages in thread
From: Will Deacon @ 2016-06-27 17:43 UTC (permalink / raw)
To: linux-arm-kernel
smp_cond_load_acquire is used to spin on a variable until some
expression involving that variable becomes true.
On arm64, we can build this using WFE and LDXR, since clearing of the
exclusive monitor as a result of the variable being changed by another
CPU generates an event, which will wake us up out of WFE.
This patch implements smp_cond_load_acquire using LDXR and WFE, which
themselves are contained in an internal __cmpwait function.
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
Based on Peter's locking/core branch.
v1->v2: Use cmpwait_relaxed
arch/arm64/include/asm/barrier.h | 13 ++++++++++
arch/arm64/include/asm/cmpxchg.h | 51 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 64 insertions(+)
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index dae5c49618db..4eea7f618dce 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -91,6 +91,19 @@ do { \
__u.__val; \
})
+#define smp_cond_load_acquire(ptr, cond_expr) \
+({ \
+ typeof(ptr) __PTR = (ptr); \
+ typeof(*ptr) VAL; \
+ for (;;) { \
+ VAL = smp_load_acquire(__PTR); \
+ if (cond_expr) \
+ break; \
+ __cmpwait_relaxed(__PTR, VAL); \
+ } \
+ VAL; \
+})
+
#include <asm-generic/barrier.h>
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 510c7b404454..bd86a79491bc 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -224,4 +224,55 @@ __CMPXCHG_GEN(_mb)
__ret; \
})
+#define __CMPWAIT_CASE(w, sz, name) \
+static inline void __cmpwait_case_##name(volatile void *ptr, \
+ unsigned long val) \
+{ \
+ unsigned long tmp; \
+ \
+ asm volatile( \
+ " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \
+ " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \
+ " cbnz %" #w "[tmp], 1f\n" \
+ " wfe\n" \
+ "1:" \
+ : [tmp] "=&r" (tmp), [v] "+Q" (*(unsigned long *)ptr) \
+ : [val] "r" (val)); \
+}
+
+__CMPWAIT_CASE(w, b, 1);
+__CMPWAIT_CASE(w, h, 2);
+__CMPWAIT_CASE(w, , 4);
+__CMPWAIT_CASE( , , 8);
+
+#undef __CMPWAIT_CASE
+
+#define __CMPWAIT_GEN(sfx) \
+static inline void __cmpwait##sfx(volatile void *ptr, \
+ unsigned long val, \
+ int size) \
+{ \
+ switch (size) { \
+ case 1: \
+ return __cmpwait_case##sfx##_1(ptr, (u8)val); \
+ case 2: \
+ return __cmpwait_case##sfx##_2(ptr, (u16)val); \
+ case 4: \
+ return __cmpwait_case##sfx##_4(ptr, val); \
+ case 8: \
+ return __cmpwait_case##sfx##_8(ptr, val); \
+ default: \
+ BUILD_BUG(); \
+ } \
+ \
+ unreachable(); \
+}
+
+__CMPWAIT_GEN()
+
+#undef __CMPWAIT_GEN
+
+#define __cmpwait_relaxed(ptr, val) \
+ __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
+
#endif /* __ASM_CMPXCHG_H */
--
2.1.4
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v2] arm64: barrier: implement wfe-based smp_cond_load_acquire
2016-06-27 17:43 [PATCH v2] arm64: barrier: implement wfe-based smp_cond_load_acquire Will Deacon
@ 2016-06-30 13:00 ` Will Deacon
2016-06-30 13:10 ` Peter Zijlstra
2016-07-07 8:32 ` [tip:locking/core] locking/barriers, arch/arm64: Implement LDXR+WFE based smp_cond_load_acquire() tip-bot for Will Deacon
1 sibling, 1 reply; 4+ messages in thread
From: Will Deacon @ 2016-06-30 13:00 UTC (permalink / raw)
To: linux-arm-kernel
On Mon, Jun 27, 2016 at 06:43:54PM +0100, Will Deacon wrote:
> smp_cond_load_acquire is used to spin on a variable until some
> expression involving that variable becomes true.
>
> On arm64, we can build this using WFE and LDXR, since clearing of the
> exclusive monitor as a result of the variable being changed by another
> CPU generates an event, which will wake us up out of WFE.
>
> This patch implements smp_cond_load_acquire using LDXR and WFE, which
> themselves are contained in an internal __cmpwait function.
>
> Signed-off-by: Will Deacon <will.deacon@arm.com>
> ---
>
> Based on Peter's locking/core branch.
>
> v1->v2: Use cmpwait_relaxed
>
> arch/arm64/include/asm/barrier.h | 13 ++++++++++
> arch/arm64/include/asm/cmpxchg.h | 51 ++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 64 insertions(+)
Peter -- could you take this one via locking/core for 4.8, please? I don't
anticipate any conflicts with the arm64 tree and it relies on your other
cond-wait patches anyway.
Cheers,
Will
> diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
> index dae5c49618db..4eea7f618dce 100644
> --- a/arch/arm64/include/asm/barrier.h
> +++ b/arch/arm64/include/asm/barrier.h
> @@ -91,6 +91,19 @@ do { \
> __u.__val; \
> })
>
> +#define smp_cond_load_acquire(ptr, cond_expr) \
> +({ \
> + typeof(ptr) __PTR = (ptr); \
> + typeof(*ptr) VAL; \
> + for (;;) { \
> + VAL = smp_load_acquire(__PTR); \
> + if (cond_expr) \
> + break; \
> + __cmpwait_relaxed(__PTR, VAL); \
> + } \
> + VAL; \
> +})
> +
> #include <asm-generic/barrier.h>
>
> #endif /* __ASSEMBLY__ */
> diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
> index 510c7b404454..bd86a79491bc 100644
> --- a/arch/arm64/include/asm/cmpxchg.h
> +++ b/arch/arm64/include/asm/cmpxchg.h
> @@ -224,4 +224,55 @@ __CMPXCHG_GEN(_mb)
> __ret; \
> })
>
> +#define __CMPWAIT_CASE(w, sz, name) \
> +static inline void __cmpwait_case_##name(volatile void *ptr, \
> + unsigned long val) \
> +{ \
> + unsigned long tmp; \
> + \
> + asm volatile( \
> + " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \
> + " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \
> + " cbnz %" #w "[tmp], 1f\n" \
> + " wfe\n" \
> + "1:" \
> + : [tmp] "=&r" (tmp), [v] "+Q" (*(unsigned long *)ptr) \
> + : [val] "r" (val)); \
> +}
> +
> +__CMPWAIT_CASE(w, b, 1);
> +__CMPWAIT_CASE(w, h, 2);
> +__CMPWAIT_CASE(w, , 4);
> +__CMPWAIT_CASE( , , 8);
> +
> +#undef __CMPWAIT_CASE
> +
> +#define __CMPWAIT_GEN(sfx) \
> +static inline void __cmpwait##sfx(volatile void *ptr, \
> + unsigned long val, \
> + int size) \
> +{ \
> + switch (size) { \
> + case 1: \
> + return __cmpwait_case##sfx##_1(ptr, (u8)val); \
> + case 2: \
> + return __cmpwait_case##sfx##_2(ptr, (u16)val); \
> + case 4: \
> + return __cmpwait_case##sfx##_4(ptr, val); \
> + case 8: \
> + return __cmpwait_case##sfx##_8(ptr, val); \
> + default: \
> + BUILD_BUG(); \
> + } \
> + \
> + unreachable(); \
> +}
> +
> +__CMPWAIT_GEN()
> +
> +#undef __CMPWAIT_GEN
> +
> +#define __cmpwait_relaxed(ptr, val) \
> + __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
> +
> #endif /* __ASM_CMPXCHG_H */
> --
> 2.1.4
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v2] arm64: barrier: implement wfe-based smp_cond_load_acquire
2016-06-30 13:00 ` Will Deacon
@ 2016-06-30 13:10 ` Peter Zijlstra
0 siblings, 0 replies; 4+ messages in thread
From: Peter Zijlstra @ 2016-06-30 13:10 UTC (permalink / raw)
To: linux-arm-kernel
On Thu, Jun 30, 2016 at 02:00:17PM +0100, Will Deacon wrote:
> On Mon, Jun 27, 2016 at 06:43:54PM +0100, Will Deacon wrote:
> > smp_cond_load_acquire is used to spin on a variable until some
> > expression involving that variable becomes true.
> >
> > On arm64, we can build this using WFE and LDXR, since clearing of the
> > exclusive monitor as a result of the variable being changed by another
> > CPU generates an event, which will wake us up out of WFE.
> >
> > This patch implements smp_cond_load_acquire using LDXR and WFE, which
> > themselves are contained in an internal __cmpwait function.
> >
> > Signed-off-by: Will Deacon <will.deacon@arm.com>
> > ---
> >
> > Based on Peter's locking/core branch.
> >
> > v1->v2: Use cmpwait_relaxed
> >
> > arch/arm64/include/asm/barrier.h | 13 ++++++++++
> > arch/arm64/include/asm/cmpxchg.h | 51 ++++++++++++++++++++++++++++++++++++++++
> > 2 files changed, 64 insertions(+)
>
> Peter -- could you take this one via locking/core for 4.8, please? I don't
> anticipate any conflicts with the arm64 tree and it relies on your other
> cond-wait patches anyway.
OK, queued it. Thanks!
^ permalink raw reply [flat|nested] 4+ messages in thread
* [tip:locking/core] locking/barriers, arch/arm64: Implement LDXR+WFE based smp_cond_load_acquire()
2016-06-27 17:43 [PATCH v2] arm64: barrier: implement wfe-based smp_cond_load_acquire Will Deacon
2016-06-30 13:00 ` Will Deacon
@ 2016-07-07 8:32 ` tip-bot for Will Deacon
1 sibling, 0 replies; 4+ messages in thread
From: tip-bot for Will Deacon @ 2016-07-07 8:32 UTC (permalink / raw)
To: linux-tip-commits
Cc: tglx, akpm, torvalds, hpa, peterz, mingo, paulmck, will.deacon,
linux-kernel
Commit-ID: 03e3c2b7edbe1e8758196b2c7843333eb328063d
Gitweb: http://git.kernel.org/tip/03e3c2b7edbe1e8758196b2c7843333eb328063d
Author: Will Deacon <will.deacon@arm.com>
AuthorDate: Mon, 27 Jun 2016 18:43:54 +0100
Committer: Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 7 Jul 2016 09:10:53 +0200
locking/barriers, arch/arm64: Implement LDXR+WFE based smp_cond_load_acquire()
smp_cond_load_acquire() is used to spin on a variable until some
expression involving that variable becomes true.
On arm64, we can build this using the LDXR and WFE instructions, since
clearing of the exclusive monitor as a result of the variable being
changed by another CPU generates an event, which will wake us up out of WFE.
This patch implements smp_cond_load_acquire() using LDXR and WFE, which
themselves are contained in an internal __cmpwait() function.
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: catalin.marinas@arm.com
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1467049434-30451-1-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
arch/arm64/include/asm/barrier.h | 13 ++++++++++
arch/arm64/include/asm/cmpxchg.h | 51 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 64 insertions(+)
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index dae5c49..4eea7f6 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -91,6 +91,19 @@ do { \
__u.__val; \
})
+#define smp_cond_load_acquire(ptr, cond_expr) \
+({ \
+ typeof(ptr) __PTR = (ptr); \
+ typeof(*ptr) VAL; \
+ for (;;) { \
+ VAL = smp_load_acquire(__PTR); \
+ if (cond_expr) \
+ break; \
+ __cmpwait_relaxed(__PTR, VAL); \
+ } \
+ VAL; \
+})
+
#include <asm-generic/barrier.h>
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 510c7b4..bd86a79 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -224,4 +224,55 @@ __CMPXCHG_GEN(_mb)
__ret; \
})
+#define __CMPWAIT_CASE(w, sz, name) \
+static inline void __cmpwait_case_##name(volatile void *ptr, \
+ unsigned long val) \
+{ \
+ unsigned long tmp; \
+ \
+ asm volatile( \
+ " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \
+ " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \
+ " cbnz %" #w "[tmp], 1f\n" \
+ " wfe\n" \
+ "1:" \
+ : [tmp] "=&r" (tmp), [v] "+Q" (*(unsigned long *)ptr) \
+ : [val] "r" (val)); \
+}
+
+__CMPWAIT_CASE(w, b, 1);
+__CMPWAIT_CASE(w, h, 2);
+__CMPWAIT_CASE(w, , 4);
+__CMPWAIT_CASE( , , 8);
+
+#undef __CMPWAIT_CASE
+
+#define __CMPWAIT_GEN(sfx) \
+static inline void __cmpwait##sfx(volatile void *ptr, \
+ unsigned long val, \
+ int size) \
+{ \
+ switch (size) { \
+ case 1: \
+ return __cmpwait_case##sfx##_1(ptr, (u8)val); \
+ case 2: \
+ return __cmpwait_case##sfx##_2(ptr, (u16)val); \
+ case 4: \
+ return __cmpwait_case##sfx##_4(ptr, val); \
+ case 8: \
+ return __cmpwait_case##sfx##_8(ptr, val); \
+ default: \
+ BUILD_BUG(); \
+ } \
+ \
+ unreachable(); \
+}
+
+__CMPWAIT_GEN()
+
+#undef __CMPWAIT_GEN
+
+#define __cmpwait_relaxed(ptr, val) \
+ __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
+
#endif /* __ASM_CMPXCHG_H */
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2016-07-07 8:33 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-06-27 17:43 [PATCH v2] arm64: barrier: implement wfe-based smp_cond_load_acquire Will Deacon
2016-06-30 13:00 ` Will Deacon
2016-06-30 13:10 ` Peter Zijlstra
2016-07-07 8:32 ` [tip:locking/core] locking/barriers, arch/arm64: Implement LDXR+WFE based smp_cond_load_acquire() tip-bot for Will Deacon
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.