* [PATCH 0/3] powerpc: add ticket spinlock
@ 2015-03-16 11:33 Kevin Hao
2015-03-16 11:33 ` [PATCH 1/3] powerpc: introduce PPC_HAS_LOCK_OWNER Kevin Hao
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Kevin Hao @ 2015-03-16 11:33 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Paul Mackerras
Hi,
This patch series convert the simple spinlock to ticket-based spinlock for the
platforms without shared processors. I have run the following command ten times
on a t4240rdb board (12 dual-threaded cpus):
./perf bench sched messaging -g 100
Before After
Averaged total time [sec]: 2.623 2.479
We can see a ~5% performance enhancing. I also run the following command ten
times on a p1020rdb board (2 cpus):
./perf bench sched messaging -g 20
Before After
Averaged total time [sec]: 3.139 3.144
So the overhead for the 2 cpus platform is also trivial.
Kevin Hao (3):
powerpc: introduce PPC_HAS_LOCK_OWNER
powerpc: spinlock: refactor codes wrapped by PPC_HAS_LOCK_OWNER
powerpc: add ticket spinlock
arch/powerpc/Kconfig | 8 ++
arch/powerpc/include/asm/spinlock.h | 144 ++++++++++++++++++++++--------
arch/powerpc/include/asm/spinlock_types.h | 16 ++++
arch/powerpc/lib/locks.c | 4 +-
arch/powerpc/platforms/pseries/Kconfig | 1 +
5 files changed, 133 insertions(+), 40 deletions(-)
--
2.1.0
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/3] powerpc: introduce PPC_HAS_LOCK_OWNER
2015-03-16 11:33 [PATCH 0/3] powerpc: add ticket spinlock Kevin Hao
@ 2015-03-16 11:33 ` Kevin Hao
2015-03-16 11:33 ` [PATCH 2/3] powerpc: spinlock: refactor codes wrapped by PPC_HAS_LOCK_OWNER Kevin Hao
2015-03-16 11:33 ` [PATCH 3/3] powerpc: add ticket spinlock Kevin Hao
2 siblings, 0 replies; 4+ messages in thread
From: Kevin Hao @ 2015-03-16 11:33 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Paul Mackerras
On a system with shared processors (a physical processor is multiplexed
between several virtual processors), we encode the lock owner into the
lock token in order to avoid unnecessarily spinning on a lock if the
lock holder isn't currently scheduled on a physical processor.
In the current kernel, we unconditionally encode the lock owner into
the lock token for all the ppc64 platforms no matter it is a shared
processor or not. This introduces a new kernel option to distinguish
the platforms which need this hack.
Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
arch/powerpc/Kconfig | 3 +++
arch/powerpc/include/asm/spinlock.h | 12 ++++++++----
arch/powerpc/lib/locks.c | 4 ++--
arch/powerpc/platforms/pseries/Kconfig | 1 +
4 files changed, 14 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9b780e0d2c18..6949d6099d4c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -303,6 +303,9 @@ config PGTABLE_LEVELS
default 3 if PPC_64K_PAGES
default 4
+config PPC_HAS_LOCK_OWNER
+ bool
+
source "init/Kconfig"
source "kernel/Kconfig.freezer"
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 4dbe072eecbe..38f40ea63a8c 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -30,7 +30,7 @@
#define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_HAS_LOCK_OWNER
/* use 0x800000yy when locked, where yy == CPU number */
#ifdef __BIG_ENDIAN__
#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
@@ -109,7 +109,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
* value.
*/
-#if defined(CONFIG_PPC_SPLPAR)
+#if defined(CONFIG_PPC_HAS_LOCK_OWNER)
/* We only yield to the hypervisor if we are in shared processor mode */
#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
extern void __spin_yield(arch_spinlock_t *lock);
@@ -164,7 +164,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
lock->slock = 0;
}
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_HAS_LOCK_OWNER
extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
#else
#define arch_spin_unlock_wait(lock) \
@@ -187,9 +187,13 @@ extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
#ifdef CONFIG_PPC64
#define __DO_SIGN_EXTEND "extsw %0,%0\n"
-#define WRLOCK_TOKEN LOCK_TOKEN /* it's negative */
#else
#define __DO_SIGN_EXTEND
+#endif
+
+#ifdef CONFIG_PPC_HAS_LOCK_OWNER
+#define WRLOCK_TOKEN LOCK_TOKEN /* it's negative */
+#else
#define WRLOCK_TOKEN (-1)
#endif
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 170a0346f756..66513b3e9b0e 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -19,7 +19,7 @@
#include <linux/smp.h>
/* waiting for a spinlock... */
-#if defined(CONFIG_PPC_SPLPAR)
+#if defined(CONFIG_PPC_HAS_LOCK_OWNER)
#include <asm/hvcall.h>
#include <asm/smp.h>
@@ -66,7 +66,6 @@ void __rw_yield(arch_rwlock_t *rw)
plpar_hcall_norets(H_CONFER,
get_hard_smp_processor_id(holder_cpu), yield_count);
}
-#endif
void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
@@ -83,3 +82,4 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
}
EXPORT_SYMBOL(arch_spin_unlock_wait);
+#endif
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index a758a9c3bbba..5402fcc30c3e 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -27,6 +27,7 @@ config PPC_PSERIES
config PPC_SPLPAR
depends on PPC_PSERIES
bool "Support for shared-processor logical partitions"
+ select PPC_HAS_LOCK_OWNER
default n
help
Enabling this option will make the kernel run more efficiently
--
2.1.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/3] powerpc: spinlock: refactor codes wrapped by PPC_HAS_LOCK_OWNER
2015-03-16 11:33 [PATCH 0/3] powerpc: add ticket spinlock Kevin Hao
2015-03-16 11:33 ` [PATCH 1/3] powerpc: introduce PPC_HAS_LOCK_OWNER Kevin Hao
@ 2015-03-16 11:33 ` Kevin Hao
2015-03-16 11:33 ` [PATCH 3/3] powerpc: add ticket spinlock Kevin Hao
2 siblings, 0 replies; 4+ messages in thread
From: Kevin Hao @ 2015-03-16 11:33 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Paul Mackerras
Move all of them to one place. No function change.
Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
arch/powerpc/include/asm/spinlock.h | 71 ++++++++++++++++---------------------
1 file changed, 30 insertions(+), 41 deletions(-)
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 38f40ea63a8c..cbc9511df409 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -30,6 +30,20 @@
#define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */
+/*
+ * On a system with shared processors (that is, where a physical
+ * processor is multiplexed between several virtual processors),
+ * there is no point spinning on a lock if the holder of the lock
+ * isn't currently scheduled on a physical processor. Instead
+ * we detect this situation and ask the hypervisor to give the
+ * rest of our timeslice to the lock holder.
+ *
+ * So that we can tell which virtual processor is holding a lock,
+ * we put 0x80000000 | smp_processor_id() in the lock when it is
+ * held. Conveniently, we have a word in the paca that holds this
+ * value.
+ */
+
#ifdef CONFIG_PPC_HAS_LOCK_OWNER
/* use 0x800000yy when locked, where yy == CPU number */
#ifdef __BIG_ENDIAN__
@@ -37,9 +51,22 @@
#else
#define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index))
#endif
-#else
-#define LOCK_TOKEN 1
-#endif
+#define WRLOCK_TOKEN LOCK_TOKEN /* it's negative */
+
+/* We only yield to the hypervisor if we are in shared processor mode */
+#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
+extern void __spin_yield(arch_spinlock_t *lock);
+extern void __rw_yield(arch_rwlock_t *lock);
+extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
+#else /* CONFIG_PPC_HAS_LOCK_OWNER */
+#define LOCK_TOKEN 1
+#define WRLOCK_TOKEN (-1)
+#define SHARED_PROCESSOR 0
+#define __spin_yield(x) barrier()
+#define __rw_yield(x) barrier()
+#define arch_spin_unlock_wait(lock) \
+ do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+#endif /* CONFIG_PPC_HAS_LOCK_OWNER */
#if defined(CONFIG_PPC64) && defined(CONFIG_SMP)
#define CLEAR_IO_SYNC (get_paca()->io_sync = 0)
@@ -95,31 +122,6 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
return __arch_spin_trylock(lock) == 0;
}
-/*
- * On a system with shared processors (that is, where a physical
- * processor is multiplexed between several virtual processors),
- * there is no point spinning on a lock if the holder of the lock
- * isn't currently scheduled on a physical processor. Instead
- * we detect this situation and ask the hypervisor to give the
- * rest of our timeslice to the lock holder.
- *
- * So that we can tell which virtual processor is holding a lock,
- * we put 0x80000000 | smp_processor_id() in the lock when it is
- * held. Conveniently, we have a word in the paca that holds this
- * value.
- */
-
-#if defined(CONFIG_PPC_HAS_LOCK_OWNER)
-/* We only yield to the hypervisor if we are in shared processor mode */
-#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
-extern void __spin_yield(arch_spinlock_t *lock);
-extern void __rw_yield(arch_rwlock_t *lock);
-#else /* SPLPAR */
-#define __spin_yield(x) barrier()
-#define __rw_yield(x) barrier()
-#define SHARED_PROCESSOR 0
-#endif
-
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
CLEAR_IO_SYNC;
@@ -164,13 +166,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
lock->slock = 0;
}
-#ifdef CONFIG_PPC_HAS_LOCK_OWNER
-extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
-#else
-#define arch_spin_unlock_wait(lock) \
- do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
-#endif
-
/*
* Read-write spinlocks, allowing multiple readers
* but only one writer.
@@ -191,12 +186,6 @@ extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
#define __DO_SIGN_EXTEND
#endif
-#ifdef CONFIG_PPC_HAS_LOCK_OWNER
-#define WRLOCK_TOKEN LOCK_TOKEN /* it's negative */
-#else
-#define WRLOCK_TOKEN (-1)
-#endif
-
/*
* This returns the old value in the lock + 1,
* so we got a read lock if the return value is > 0.
--
2.1.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/3] powerpc: add ticket spinlock
2015-03-16 11:33 [PATCH 0/3] powerpc: add ticket spinlock Kevin Hao
2015-03-16 11:33 ` [PATCH 1/3] powerpc: introduce PPC_HAS_LOCK_OWNER Kevin Hao
2015-03-16 11:33 ` [PATCH 2/3] powerpc: spinlock: refactor codes wrapped by PPC_HAS_LOCK_OWNER Kevin Hao
@ 2015-03-16 11:33 ` Kevin Hao
2 siblings, 0 replies; 4+ messages in thread
From: Kevin Hao @ 2015-03-16 11:33 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Paul Mackerras
Convert the simple spinlock to ticket-based. This is based on arm64
codes and only cover the systems which doesn't support shared
processors (a physical processor is multiplexed between several
virtual processors).
Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
arch/powerpc/Kconfig | 5 ++
arch/powerpc/include/asm/spinlock.h | 77 ++++++++++++++++++++++++++++++-
arch/powerpc/include/asm/spinlock_types.h | 16 +++++++
3 files changed, 97 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6949d6099d4c..dff19e522b2d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -306,6 +306,11 @@ config PGTABLE_LEVELS
config PPC_HAS_LOCK_OWNER
bool
+config PPC_TICKET_LOCK
+ bool
+ depends on !PPC_HAS_LOCK_OWNER
+ default y
+
source "init/Kconfig"
source "kernel/Kconfig.freezer"
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index cbc9511df409..40035261d2db 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -59,7 +59,6 @@ extern void __spin_yield(arch_spinlock_t *lock);
extern void __rw_yield(arch_rwlock_t *lock);
extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
#else /* CONFIG_PPC_HAS_LOCK_OWNER */
-#define LOCK_TOKEN 1
#define WRLOCK_TOKEN (-1)
#define SHARED_PROCESSOR 0
#define __spin_yield(x) barrier()
@@ -81,6 +80,77 @@ extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
#define SYNC_IO
#endif
+#ifdef CONFIG_PPC_TICKET_LOCK
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
+{
+ arch_spinlock_t lockval = READ_ONCE(*lock);
+
+ return (lockval.next - lockval.owner) > 1;
+}
+#define arch_spin_is_contended arch_spin_is_contended
+
+static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+ return lock.owner == lock.next;
+}
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+ return !arch_spin_value_unlocked(READ_ONCE(*lock));
+}
+
+static inline unsigned long arch_spin_trylock(arch_spinlock_t *lock)
+{
+ unsigned int tmp;
+ arch_spinlock_t lockval;
+
+ CLEAR_IO_SYNC;
+ __asm__ __volatile__ (
+"1: " PPC_LWARX(%0,0,%2,1) "\n\
+ rotlwi %1,%0,16\n\
+ xor. %1,%1,%0\n\
+ bne- 2f\n\
+ add %0,%0,%3\n\
+ stwcx. %0,0,%2\n\
+ bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+"2:"
+ : "=&r" (lockval), "=&r" (tmp)
+ : "r" (lock), "r" (1 << TICKET_SHIFT)
+ : "cr0", "memory");
+
+ return !tmp;
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+ unsigned int tmp;
+ arch_spinlock_t lockval;
+
+ CLEAR_IO_SYNC;
+ __asm__ __volatile__ (
+"1: " PPC_LWARX(%0,0,%2,1) "\n\
+ add %1,%0,%4\n\
+ stwcx. %1,0,%2\n\
+ bne- 1b\n\
+ rotlwi %1,%0,16\n\
+ cmpw %1,%0\n\
+ beq 3f\n\
+ rlwinm %0,%0,16,16,31\n\
+2: or 1,1,1 # HMT_low\n\
+ lhz %1,0(%3)\n\
+ cmpw %1,%0\n\
+ bne 2b\n\
+ or 2,2,2 # HMT_medium\n\
+3:"
+ PPC_ACQUIRE_BARRIER
+ : "=&r" (lockval), "=&r" (tmp)
+ : "r"(lock), "r" (&lock->owner), "r" (1 << TICKET_SHIFT)
+ : "cr0", "memory");
+}
+#else
static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
return lock.slock == 0;
@@ -157,13 +227,18 @@ void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
local_irq_restore(flags_dis);
}
}
+#endif
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
SYNC_IO;
__asm__ __volatile__("# arch_spin_unlock\n\t"
PPC_RELEASE_BARRIER: : :"memory");
+#ifdef CONFIG_PPC_TICKET_LOCK
+ lock->owner++;
+#else
lock->slock = 0;
+#endif
}
/*
diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
index 2351adc4fdc4..371770f906dc 100644
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -5,11 +5,27 @@
# error "please don't include this file directly"
#endif
+#ifdef CONFIG_PPC_TICKET_LOCK
+#define TICKET_SHIFT 16
+
+typedef struct {
+#ifdef __BIG_ENDIAN__
+ u16 next;
+ u16 owner;
+#else
+ u16 owner;
+ u16 next;
+#endif
+} __aligned(4) arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED { 0, 0 }
+#else
typedef struct {
volatile unsigned int slock;
} arch_spinlock_t;
#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
+#endif /* CONFIG_PPC_TICKET_LOCK */
typedef struct {
volatile signed int lock;
--
2.1.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2015-03-16 11:33 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-03-16 11:33 [PATCH 0/3] powerpc: add ticket spinlock Kevin Hao
2015-03-16 11:33 ` [PATCH 1/3] powerpc: introduce PPC_HAS_LOCK_OWNER Kevin Hao
2015-03-16 11:33 ` [PATCH 2/3] powerpc: spinlock: refactor codes wrapped by PPC_HAS_LOCK_OWNER Kevin Hao
2015-03-16 11:33 ` [PATCH 3/3] powerpc: add ticket spinlock Kevin Hao
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).