From: Will Deacon <will.deacon@arm.com>
To: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: "paulmck@linux.vnet.ibm.com" <paulmck@linux.vnet.ibm.com>,
Michael Ellerman <mpe@ellerman.id.au>,
"linux-arch@vger.kernel.org" <linux-arch@vger.kernel.org>,
"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
Peter Zijlstra <peterz@infradead.org>
Subject: Re: [RFC PATCH v2] memory-barriers: remove smp_mb__after_unlock_lock()
Date: Wed, 22 Jul 2015 17:49:44 +0100 [thread overview]
Message-ID: <20150722164943.GQ6650@arm.com> (raw)
In-Reply-To: <1437427094.3775.22.camel@kernel.crashing.org>
On Mon, Jul 20, 2015 at 10:18:14PM +0100, Benjamin Herrenschmidt wrote:
> On Mon, 2015-07-20 at 14:39 +0100, Will Deacon wrote:
> > On Fri, Jul 17, 2015 at 11:14:14PM +0100, Benjamin Herrenschmidt wrote:
> > > On Fri, 2015-07-17 at 10:32 +0100, Will Deacon wrote:
> > > > static inline void arch_spin_unlock(arch_spinlock_t *lock)
> > > > {
> > > > - SYNC_IO;
> > > > - __asm__ __volatile__("# arch_spin_unlock\n\t"
> > > > - PPC_RELEASE_BARRIER: : :"memory");
> > > > + smp_mb();
> > > > lock->slock = 0;
> > > > }
> > >
> > > That probably needs to be mb() in case somebody has the expectation that
> > > it does a barrier vs. DMA on UP.
> >
> > Hmm, but on !SMP doesn't arch_spin_unlock effectively expand to barrier()
> > in the core code?
>
> include/linux/spinlock_up.h:# define arch_spin_unlock(lock) do {
> barrier(); (void)(lock); } while (0)
>
> Indeed...
So, leaving mmiowb() alone, we end up with the patch below.
Will
--->8
From 9373a4226986dd69b6aaf896590ea43abeb1cc8e Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 22 Jul 2015 17:37:58 +0100
Subject: [PATCH] powerpc: kill smp_mb__after_unlock_lock
PowerPC is the only architecture defining smp_mb__after_unlock_lock,
but we can remove it by adding an unconditional sync (smp_mb()) to the
spin_unlock code.
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
arch/powerpc/include/asm/io.h | 13 +------------
arch/powerpc/include/asm/spinlock.h | 22 +---------------------
2 files changed, 2 insertions(+), 33 deletions(-)
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index a8d2ef30d473..a3ad51046b04 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -105,12 +105,6 @@ extern bool isa_io_special;
*
*/
-#ifdef CONFIG_PPC64
-#define IO_SET_SYNC_FLAG() do { local_paca->io_sync = 1; } while(0)
-#else
-#define IO_SET_SYNC_FLAG()
-#endif
-
/* gcc 4.0 and older doesn't have 'Z' constraint */
#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ == 0)
#define DEF_MMIO_IN_X(name, size, insn) \
@@ -127,7 +121,6 @@ static inline void name(volatile u##size __iomem *addr, u##size val) \
{ \
__asm__ __volatile__("sync;"#insn" %1,0,%2" \
: "=m" (*addr) : "r" (val), "r" (addr) : "memory"); \
- IO_SET_SYNC_FLAG(); \
}
#else /* newer gcc */
#define DEF_MMIO_IN_X(name, size, insn) \
@@ -144,7 +137,6 @@ static inline void name(volatile u##size __iomem *addr, u##size val) \
{ \
__asm__ __volatile__("sync;"#insn" %1,%y0" \
: "=Z" (*addr) : "r" (val) : "memory"); \
- IO_SET_SYNC_FLAG(); \
}
#endif
@@ -162,7 +154,6 @@ static inline void name(volatile u##size __iomem *addr, u##size val) \
{ \
__asm__ __volatile__("sync;"#insn"%U0%X0 %1,%0" \
: "=m" (*addr) : "r" (val) : "memory"); \
- IO_SET_SYNC_FLAG(); \
}
DEF_MMIO_IN_D(in_8, 8, lbz);
@@ -630,9 +621,7 @@ static inline void name at \
#define mmiowb()
#else
/*
- * Enforce synchronisation of stores vs. spin_unlock
- * (this does it explicitly, though our implementation of spin_unlock
- * does it implicitely too)
+ * Explicitly enforce synchronisation of stores vs. spin_unlock
*/
static inline void mmiowb(void)
{
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 4dbe072eecbe..9da89ea4ff31 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -28,8 +28,6 @@
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
-#define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */
-
#ifdef CONFIG_PPC64
/* use 0x800000yy when locked, where yy == CPU number */
#ifdef __BIG_ENDIAN__
@@ -41,19 +39,6 @@
#define LOCK_TOKEN 1
#endif
-#if defined(CONFIG_PPC64) && defined(CONFIG_SMP)
-#define CLEAR_IO_SYNC (get_paca()->io_sync = 0)
-#define SYNC_IO do { \
- if (unlikely(get_paca()->io_sync)) { \
- mb(); \
- get_paca()->io_sync = 0; \
- } \
- } while (0)
-#else
-#define CLEAR_IO_SYNC
-#define SYNC_IO
-#endif
-
static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
return lock.slock == 0;
@@ -91,7 +76,6 @@ static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock)
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
- CLEAR_IO_SYNC;
return __arch_spin_trylock(lock) == 0;
}
@@ -122,7 +106,6 @@ extern void __rw_yield(arch_rwlock_t *lock);
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
- CLEAR_IO_SYNC;
while (1) {
if (likely(__arch_spin_trylock(lock) == 0))
break;
@@ -140,7 +123,6 @@ void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
{
unsigned long flags_dis;
- CLEAR_IO_SYNC;
while (1) {
if (likely(__arch_spin_trylock(lock) == 0))
break;
@@ -158,9 +140,7 @@ void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
- SYNC_IO;
- __asm__ __volatile__("# arch_spin_unlock\n\t"
- PPC_RELEASE_BARRIER: : :"memory");
+ smp_mb();
lock->slock = 0;
}
--
2.1.4
WARNING: multiple messages have this Message-ID (diff)
From: Will Deacon <will.deacon@arm.com>
To: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: "paulmck@linux.vnet.ibm.com" <paulmck@linux.vnet.ibm.com>,
Michael Ellerman <mpe@ellerman.id.au>,
"linux-arch@vger.kernel.org" <linux-arch@vger.kernel.org>,
"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
Peter Zijlstra <peterz@infradead.org>
Subject: Re: [RFC PATCH v2] memory-barriers: remove smp_mb__after_unlock_lock()
Date: Wed, 22 Jul 2015 17:49:44 +0100 [thread overview]
Message-ID: <20150722164943.GQ6650@arm.com> (raw)
Message-ID: <20150722164944.Uau5lv6HCU2RlBS7K1wqnHrMzifR1ESyPP8AsVzci-Y@z> (raw)
In-Reply-To: <1437427094.3775.22.camel@kernel.crashing.org>
On Mon, Jul 20, 2015 at 10:18:14PM +0100, Benjamin Herrenschmidt wrote:
> On Mon, 2015-07-20 at 14:39 +0100, Will Deacon wrote:
> > On Fri, Jul 17, 2015 at 11:14:14PM +0100, Benjamin Herrenschmidt wrote:
> > > On Fri, 2015-07-17 at 10:32 +0100, Will Deacon wrote:
> > > > static inline void arch_spin_unlock(arch_spinlock_t *lock)
> > > > {
> > > > - SYNC_IO;
> > > > - __asm__ __volatile__("# arch_spin_unlock\n\t"
> > > > - PPC_RELEASE_BARRIER: : :"memory");
> > > > + smp_mb();
> > > > lock->slock = 0;
> > > > }
> > >
> > > That probably needs to be mb() in case somebody has the expectation that
> > > it does a barrier vs. DMA on UP.
> >
> > Hmm, but on !SMP doesn't arch_spin_unlock effectively expand to barrier()
> > in the core code?
>
> include/linux/spinlock_up.h:# define arch_spin_unlock(lock) do {
> barrier(); (void)(lock); } while (0)
>
> Indeed...
So, leaving mmiowb() alone, we end up with the patch below.
Will
--->8
WARNING: multiple messages have this Message-ID (diff)
From: Will Deacon <will.deacon@arm.com>
To: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: "paulmck@linux.vnet.ibm.com" <paulmck@linux.vnet.ibm.com>,
Michael Ellerman <mpe@ellerman.id.au>,
"linux-arch@vger.kernel.org" <linux-arch@vger.kernel.org>,
"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
Peter Zijlstra <peterz@infradead.org>
Subject: Re: [RFC PATCH v2] memory-barriers: remove smp_mb__after_unlock_lock()
Date: Wed, 22 Jul 2015 17:49:44 +0100 [thread overview]
Message-ID: <20150722164943.GQ6650@arm.com> (raw)
In-Reply-To: <1437427094.3775.22.camel@kernel.crashing.org>
On Mon, Jul 20, 2015 at 10:18:14PM +0100, Benjamin Herrenschmidt wrote:
> On Mon, 2015-07-20 at 14:39 +0100, Will Deacon wrote:
> > On Fri, Jul 17, 2015 at 11:14:14PM +0100, Benjamin Herrenschmidt wrote:
> > > On Fri, 2015-07-17 at 10:32 +0100, Will Deacon wrote:
> > > > static inline void arch_spin_unlock(arch_spinlock_t *lock)
> > > > {
> > > > - SYNC_IO;
> > > > - __asm__ __volatile__("# arch_spin_unlock\n\t"
> > > > - PPC_RELEASE_BARRIER: : :"memory");
> > > > + smp_mb();
> > > > lock->slock = 0;
> > > > }
> > >
> > > That probably needs to be mb() in case somebody has the expectation that
> > > it does a barrier vs. DMA on UP.
> >
> > Hmm, but on !SMP doesn't arch_spin_unlock effectively expand to barrier()
> > in the core code?
>
> include/linux/spinlock_up.h:# define arch_spin_unlock(lock) do {
> barrier(); (void)(lock); } while (0)
>
> Indeed...
So, leaving mmiowb() alone, we end up with the patch below.
Will
--->8
>From 9373a4226986dd69b6aaf896590ea43abeb1cc8e Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 22 Jul 2015 17:37:58 +0100
Subject: [PATCH] powerpc: kill smp_mb__after_unlock_lock
PowerPC is the only architecture defining smp_mb__after_unlock_lock,
but we can remove it by adding an unconditional sync (smp_mb()) to the
spin_unlock code.
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
arch/powerpc/include/asm/io.h | 13 +------------
arch/powerpc/include/asm/spinlock.h | 22 +---------------------
2 files changed, 2 insertions(+), 33 deletions(-)
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index a8d2ef30d473..a3ad51046b04 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -105,12 +105,6 @@ extern bool isa_io_special;
*
*/
-#ifdef CONFIG_PPC64
-#define IO_SET_SYNC_FLAG() do { local_paca->io_sync = 1; } while(0)
-#else
-#define IO_SET_SYNC_FLAG()
-#endif
-
/* gcc 4.0 and older doesn't have 'Z' constraint */
#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ == 0)
#define DEF_MMIO_IN_X(name, size, insn) \
@@ -127,7 +121,6 @@ static inline void name(volatile u##size __iomem *addr, u##size val) \
{ \
__asm__ __volatile__("sync;"#insn" %1,0,%2" \
: "=m" (*addr) : "r" (val), "r" (addr) : "memory"); \
- IO_SET_SYNC_FLAG(); \
}
#else /* newer gcc */
#define DEF_MMIO_IN_X(name, size, insn) \
@@ -144,7 +137,6 @@ static inline void name(volatile u##size __iomem *addr, u##size val) \
{ \
__asm__ __volatile__("sync;"#insn" %1,%y0" \
: "=Z" (*addr) : "r" (val) : "memory"); \
- IO_SET_SYNC_FLAG(); \
}
#endif
@@ -162,7 +154,6 @@ static inline void name(volatile u##size __iomem *addr, u##size val) \
{ \
__asm__ __volatile__("sync;"#insn"%U0%X0 %1,%0" \
: "=m" (*addr) : "r" (val) : "memory"); \
- IO_SET_SYNC_FLAG(); \
}
DEF_MMIO_IN_D(in_8, 8, lbz);
@@ -630,9 +621,7 @@ static inline void name at \
#define mmiowb()
#else
/*
- * Enforce synchronisation of stores vs. spin_unlock
- * (this does it explicitly, though our implementation of spin_unlock
- * does it implicitely too)
+ * Explicitly enforce synchronisation of stores vs. spin_unlock
*/
static inline void mmiowb(void)
{
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 4dbe072eecbe..9da89ea4ff31 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -28,8 +28,6 @@
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
-#define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */
-
#ifdef CONFIG_PPC64
/* use 0x800000yy when locked, where yy == CPU number */
#ifdef __BIG_ENDIAN__
@@ -41,19 +39,6 @@
#define LOCK_TOKEN 1
#endif
-#if defined(CONFIG_PPC64) && defined(CONFIG_SMP)
-#define CLEAR_IO_SYNC (get_paca()->io_sync = 0)
-#define SYNC_IO do { \
- if (unlikely(get_paca()->io_sync)) { \
- mb(); \
- get_paca()->io_sync = 0; \
- } \
- } while (0)
-#else
-#define CLEAR_IO_SYNC
-#define SYNC_IO
-#endif
-
static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
return lock.slock == 0;
@@ -91,7 +76,6 @@ static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock)
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
- CLEAR_IO_SYNC;
return __arch_spin_trylock(lock) == 0;
}
@@ -122,7 +106,6 @@ extern void __rw_yield(arch_rwlock_t *lock);
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
- CLEAR_IO_SYNC;
while (1) {
if (likely(__arch_spin_trylock(lock) == 0))
break;
@@ -140,7 +123,6 @@ void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
{
unsigned long flags_dis;
- CLEAR_IO_SYNC;
while (1) {
if (likely(__arch_spin_trylock(lock) == 0))
break;
@@ -158,9 +140,7 @@ void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
- SYNC_IO;
- __asm__ __volatile__("# arch_spin_unlock\n\t"
- PPC_RELEASE_BARRIER: : :"memory");
+ smp_mb();
lock->slock = 0;
}
--
2.1.4
next prev parent reply other threads:[~2015-07-22 16:49 UTC|newest]
Thread overview: 68+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-07-13 12:15 [RFC PATCH v2] memory-barriers: remove smp_mb__after_unlock_lock() Will Deacon
2015-07-13 13:09 ` Peter Hurley
2015-07-13 14:24 ` Will Deacon
2015-07-13 15:56 ` Peter Zijlstra
2015-07-13 13:11 ` Peter Zijlstra
2015-07-13 14:09 ` Will Deacon
2015-07-13 14:21 ` Will Deacon
2015-07-13 15:54 ` Peter Zijlstra
2015-07-13 17:50 ` Will Deacon
2015-07-13 20:20 ` Paul E. McKenney
2015-07-13 22:23 ` Peter Zijlstra
2015-07-13 23:04 ` Paul E. McKenney
2015-07-14 10:04 ` Will Deacon
2015-07-14 12:45 ` Paul E. McKenney
2015-07-14 12:51 ` Will Deacon
2015-07-14 14:00 ` Paul E. McKenney
2015-07-14 14:12 ` Will Deacon
2015-07-14 19:31 ` Paul E. McKenney
2015-07-15 1:38 ` Paul E. McKenney
2015-07-15 10:51 ` Will Deacon
2015-07-15 13:12 ` Paul E. McKenney
2015-07-24 11:31 ` Will Deacon
2015-07-24 15:30 ` Paul E. McKenney
2015-08-12 13:44 ` Will Deacon
2015-08-12 15:43 ` Paul E. McKenney
2015-08-12 17:59 ` Paul E. McKenney
2015-08-13 10:49 ` Will Deacon
2015-08-13 13:10 ` Paul E. McKenney
2015-08-17 4:06 ` Michael Ellerman
2015-08-17 6:15 ` Paul E. McKenney
2015-08-17 8:57 ` Will Deacon
2015-08-18 1:50 ` Michael Ellerman
2015-08-18 8:37 ` Will Deacon
2015-08-20 9:45 ` Michael Ellerman
2015-08-20 15:56 ` Will Deacon
2015-08-26 0:27 ` Paul E. McKenney
2015-08-26 4:06 ` Michael Ellerman
2015-07-13 18:23 ` Paul E. McKenney
2015-07-13 19:41 ` Peter Hurley
2015-07-13 20:16 ` Paul E. McKenney
2015-07-13 22:15 ` Peter Zijlstra
2015-07-13 22:43 ` Benjamin Herrenschmidt
2015-07-14 8:34 ` Peter Zijlstra
2015-07-13 22:53 ` Paul E. McKenney
2015-07-13 22:37 ` Benjamin Herrenschmidt
2015-07-13 22:31 ` Benjamin Herrenschmidt
2015-07-14 10:16 ` Will Deacon
2015-07-15 3:06 ` Michael Ellerman
2015-07-15 10:44 ` Will Deacon
2015-07-16 2:00 ` Michael Ellerman
2015-07-16 5:03 ` Benjamin Herrenschmidt
2015-07-16 5:14 ` Benjamin Herrenschmidt
2015-07-16 15:11 ` Paul E. McKenney
2015-07-16 22:54 ` Benjamin Herrenschmidt
2015-07-17 9:32 ` Will Deacon
2015-07-17 10:15 ` Peter Zijlstra
2015-07-17 12:40 ` Paul E. McKenney
2015-07-17 22:14 ` Benjamin Herrenschmidt
2015-07-20 13:39 ` Will Deacon
2015-07-20 13:48 ` Paul E. McKenney
2015-07-20 13:56 ` Will Deacon
2015-07-20 21:18 ` Benjamin Herrenschmidt
2015-07-22 16:49 ` Will Deacon [this message]
2015-07-22 16:49 ` Will Deacon
2015-07-22 16:49 ` Will Deacon
2015-09-01 2:57 ` Paul Mackerras
2015-07-15 14:18 ` Paul E. McKenney
2015-07-16 1:34 ` Michael Ellerman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20150722164943.GQ6650@arm.com \
--to=will.deacon@arm.com \
--cc=benh@kernel.crashing.org \
--cc=linux-arch@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mpe@ellerman.id.au \
--cc=paulmck@linux.vnet.ibm.com \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.