linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] powerpc: add ticket spinlock
@ 2015-03-16 11:33 Kevin Hao
  2015-03-16 11:33 ` [PATCH 1/3] powerpc: introduce PPC_HAS_LOCK_OWNER Kevin Hao
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Kevin Hao @ 2015-03-16 11:33 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Paul Mackerras

Hi,

This patch series convert the simple spinlock to ticket-based spinlock for the
platforms without shared processors. I have run the following command ten times
on a t4240rdb board (12 dual-threaded cpus):
	./perf bench sched messaging -g 100

                                Before      After
    Averaged total time [sec]:  2.623       2.479 

We can see a ~5% performance enhancing. I also run the following command ten
times on a p1020rdb board (2 cpus):
	./perf bench sched messaging -g 20 

                                Before      After
    Averaged total time [sec]:  3.139       3.144 

So the overhead for the 2 cpus platform is also trivial. 


Kevin Hao (3):
  powerpc: introduce PPC_HAS_LOCK_OWNER
  powerpc: spinlock: refactor codes wrapped by PPC_HAS_LOCK_OWNER
  powerpc: add ticket spinlock

 arch/powerpc/Kconfig                      |   8 ++
 arch/powerpc/include/asm/spinlock.h       | 144 ++++++++++++++++++++++--------
 arch/powerpc/include/asm/spinlock_types.h |  16 ++++
 arch/powerpc/lib/locks.c                  |   4 +-
 arch/powerpc/platforms/pseries/Kconfig    |   1 +
 5 files changed, 133 insertions(+), 40 deletions(-)

-- 
2.1.0

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/3] powerpc: introduce PPC_HAS_LOCK_OWNER
  2015-03-16 11:33 [PATCH 0/3] powerpc: add ticket spinlock Kevin Hao
@ 2015-03-16 11:33 ` Kevin Hao
  2015-03-16 11:33 ` [PATCH 2/3] powerpc: spinlock: refactor codes wrapped by PPC_HAS_LOCK_OWNER Kevin Hao
  2015-03-16 11:33 ` [PATCH 3/3] powerpc: add ticket spinlock Kevin Hao
  2 siblings, 0 replies; 4+ messages in thread
From: Kevin Hao @ 2015-03-16 11:33 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Paul Mackerras

On a system with shared processors (a physical processor is multiplexed
between several virtual processors), we encode the lock owner into the
lock token in order to avoid unnecessarily spinning on a lock if the
lock holder isn't currently scheduled on a physical processor.

In the current kernel, we unconditionally encode the lock owner into
the lock token for all the ppc64 platforms no matter it is a shared
processor or not. This introduces a new kernel option to distinguish
the platforms which need this hack.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
 arch/powerpc/Kconfig                   |  3 +++
 arch/powerpc/include/asm/spinlock.h    | 12 ++++++++----
 arch/powerpc/lib/locks.c               |  4 ++--
 arch/powerpc/platforms/pseries/Kconfig |  1 +
 4 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9b780e0d2c18..6949d6099d4c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -303,6 +303,9 @@ config PGTABLE_LEVELS
 	default 3 if PPC_64K_PAGES
 	default 4
 
+config PPC_HAS_LOCK_OWNER
+	bool
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 4dbe072eecbe..38f40ea63a8c 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -30,7 +30,7 @@
 
 #define smp_mb__after_unlock_lock()	smp_mb()  /* Full ordering for lock. */
 
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_HAS_LOCK_OWNER
 /* use 0x800000yy when locked, where yy == CPU number */
 #ifdef __BIG_ENDIAN__
 #define LOCK_TOKEN	(*(u32 *)(&get_paca()->lock_token))
@@ -109,7 +109,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
  * value.
  */
 
-#if defined(CONFIG_PPC_SPLPAR)
+#if defined(CONFIG_PPC_HAS_LOCK_OWNER)
 /* We only yield to the hypervisor if we are in shared processor mode */
 #define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
 extern void __spin_yield(arch_spinlock_t *lock);
@@ -164,7 +164,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 	lock->slock = 0;
 }
 
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_HAS_LOCK_OWNER
 extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
 #else
 #define arch_spin_unlock_wait(lock) \
@@ -187,9 +187,13 @@ extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
 
 #ifdef CONFIG_PPC64
 #define __DO_SIGN_EXTEND	"extsw	%0,%0\n"
-#define WRLOCK_TOKEN		LOCK_TOKEN	/* it's negative */
 #else
 #define __DO_SIGN_EXTEND
+#endif
+
+#ifdef CONFIG_PPC_HAS_LOCK_OWNER
+#define WRLOCK_TOKEN		LOCK_TOKEN	/* it's negative */
+#else
 #define WRLOCK_TOKEN		(-1)
 #endif
 
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 170a0346f756..66513b3e9b0e 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -19,7 +19,7 @@
 #include <linux/smp.h>
 
 /* waiting for a spinlock... */
-#if defined(CONFIG_PPC_SPLPAR)
+#if defined(CONFIG_PPC_HAS_LOCK_OWNER)
 #include <asm/hvcall.h>
 #include <asm/smp.h>
 
@@ -66,7 +66,6 @@ void __rw_yield(arch_rwlock_t *rw)
 	plpar_hcall_norets(H_CONFER,
 		get_hard_smp_processor_id(holder_cpu), yield_count);
 }
-#endif
 
 void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
@@ -83,3 +82,4 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
 }
 
 EXPORT_SYMBOL(arch_spin_unlock_wait);
+#endif
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index a758a9c3bbba..5402fcc30c3e 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -27,6 +27,7 @@ config PPC_PSERIES
 config PPC_SPLPAR
 	depends on PPC_PSERIES
 	bool "Support for shared-processor logical partitions"
+	select PPC_HAS_LOCK_OWNER
 	default n
 	help
 	  Enabling this option will make the kernel run more efficiently
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/3] powerpc: spinlock: refactor codes wrapped by PPC_HAS_LOCK_OWNER
  2015-03-16 11:33 [PATCH 0/3] powerpc: add ticket spinlock Kevin Hao
  2015-03-16 11:33 ` [PATCH 1/3] powerpc: introduce PPC_HAS_LOCK_OWNER Kevin Hao
@ 2015-03-16 11:33 ` Kevin Hao
  2015-03-16 11:33 ` [PATCH 3/3] powerpc: add ticket spinlock Kevin Hao
  2 siblings, 0 replies; 4+ messages in thread
From: Kevin Hao @ 2015-03-16 11:33 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Paul Mackerras

Move all of them to one place. No function change.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
 arch/powerpc/include/asm/spinlock.h | 71 ++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 41 deletions(-)

diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 38f40ea63a8c..cbc9511df409 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -30,6 +30,20 @@
 
 #define smp_mb__after_unlock_lock()	smp_mb()  /* Full ordering for lock. */
 
+/*
+ * On a system with shared processors (that is, where a physical
+ * processor is multiplexed between several virtual processors),
+ * there is no point spinning on a lock if the holder of the lock
+ * isn't currently scheduled on a physical processor.  Instead
+ * we detect this situation and ask the hypervisor to give the
+ * rest of our timeslice to the lock holder.
+ *
+ * So that we can tell which virtual processor is holding a lock,
+ * we put 0x80000000 | smp_processor_id() in the lock when it is
+ * held.  Conveniently, we have a word in the paca that holds this
+ * value.
+ */
+
 #ifdef CONFIG_PPC_HAS_LOCK_OWNER
 /* use 0x800000yy when locked, where yy == CPU number */
 #ifdef __BIG_ENDIAN__
@@ -37,9 +51,22 @@
 #else
 #define LOCK_TOKEN	(*(u32 *)(&get_paca()->paca_index))
 #endif
-#else
-#define LOCK_TOKEN	1
-#endif
+#define WRLOCK_TOKEN	LOCK_TOKEN	/* it's negative */
+
+/* We only yield to the hypervisor if we are in shared processor mode */
+#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
+extern void __spin_yield(arch_spinlock_t *lock);
+extern void __rw_yield(arch_rwlock_t *lock);
+extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
+#else /* CONFIG_PPC_HAS_LOCK_OWNER */
+#define LOCK_TOKEN		1
+#define WRLOCK_TOKEN		(-1)
+#define SHARED_PROCESSOR	0
+#define __spin_yield(x)		barrier()
+#define __rw_yield(x)		barrier()
+#define arch_spin_unlock_wait(lock) \
+	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+#endif /* CONFIG_PPC_HAS_LOCK_OWNER */
 
 #if defined(CONFIG_PPC64) && defined(CONFIG_SMP)
 #define CLEAR_IO_SYNC	(get_paca()->io_sync = 0)
@@ -95,31 +122,6 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 	return __arch_spin_trylock(lock) == 0;
 }
 
-/*
- * On a system with shared processors (that is, where a physical
- * processor is multiplexed between several virtual processors),
- * there is no point spinning on a lock if the holder of the lock
- * isn't currently scheduled on a physical processor.  Instead
- * we detect this situation and ask the hypervisor to give the
- * rest of our timeslice to the lock holder.
- *
- * So that we can tell which virtual processor is holding a lock,
- * we put 0x80000000 | smp_processor_id() in the lock when it is
- * held.  Conveniently, we have a word in the paca that holds this
- * value.
- */
-
-#if defined(CONFIG_PPC_HAS_LOCK_OWNER)
-/* We only yield to the hypervisor if we are in shared processor mode */
-#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
-extern void __spin_yield(arch_spinlock_t *lock);
-extern void __rw_yield(arch_rwlock_t *lock);
-#else /* SPLPAR */
-#define __spin_yield(x)	barrier()
-#define __rw_yield(x)	barrier()
-#define SHARED_PROCESSOR	0
-#endif
-
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	CLEAR_IO_SYNC;
@@ -164,13 +166,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 	lock->slock = 0;
 }
 
-#ifdef CONFIG_PPC_HAS_LOCK_OWNER
-extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
-#else
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
-#endif
-
 /*
  * Read-write spinlocks, allowing multiple readers
  * but only one writer.
@@ -191,12 +186,6 @@ extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
 #define __DO_SIGN_EXTEND
 #endif
 
-#ifdef CONFIG_PPC_HAS_LOCK_OWNER
-#define WRLOCK_TOKEN		LOCK_TOKEN	/* it's negative */
-#else
-#define WRLOCK_TOKEN		(-1)
-#endif
-
 /*
  * This returns the old value in the lock + 1,
  * so we got a read lock if the return value is > 0.
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/3] powerpc: add ticket spinlock
  2015-03-16 11:33 [PATCH 0/3] powerpc: add ticket spinlock Kevin Hao
  2015-03-16 11:33 ` [PATCH 1/3] powerpc: introduce PPC_HAS_LOCK_OWNER Kevin Hao
  2015-03-16 11:33 ` [PATCH 2/3] powerpc: spinlock: refactor codes wrapped by PPC_HAS_LOCK_OWNER Kevin Hao
@ 2015-03-16 11:33 ` Kevin Hao
  2 siblings, 0 replies; 4+ messages in thread
From: Kevin Hao @ 2015-03-16 11:33 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Paul Mackerras

Convert the simple spinlock to ticket-based. This is based on arm64
codes and only cover the systems which doesn't support shared
processors (a physical processor is multiplexed between several
virtual processors).

Signed-off-by: Kevin Hao <haokexin@gmail.com>
---
 arch/powerpc/Kconfig                      |  5 ++
 arch/powerpc/include/asm/spinlock.h       | 77 ++++++++++++++++++++++++++++++-
 arch/powerpc/include/asm/spinlock_types.h | 16 +++++++
 3 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6949d6099d4c..dff19e522b2d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -306,6 +306,11 @@ config PGTABLE_LEVELS
 config PPC_HAS_LOCK_OWNER
 	bool
 
+config PPC_TICKET_LOCK
+	bool
+	depends on !PPC_HAS_LOCK_OWNER
+	default y
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index cbc9511df409..40035261d2db 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -59,7 +59,6 @@ extern void __spin_yield(arch_spinlock_t *lock);
 extern void __rw_yield(arch_rwlock_t *lock);
 extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
 #else /* CONFIG_PPC_HAS_LOCK_OWNER */
-#define LOCK_TOKEN		1
 #define WRLOCK_TOKEN		(-1)
 #define SHARED_PROCESSOR	0
 #define __spin_yield(x)		barrier()
@@ -81,6 +80,77 @@ extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
 #define SYNC_IO
 #endif
 
+#ifdef CONFIG_PPC_TICKET_LOCK
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
+{
+	arch_spinlock_t lockval = READ_ONCE(*lock);
+
+	return (lockval.next - lockval.owner) > 1;
+}
+#define arch_spin_is_contended	arch_spin_is_contended
+
+static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+	return lock.owner == lock.next;
+}
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	return !arch_spin_value_unlocked(READ_ONCE(*lock));
+}
+
+static inline unsigned long arch_spin_trylock(arch_spinlock_t *lock)
+{
+	unsigned int tmp;
+	arch_spinlock_t lockval;
+
+	CLEAR_IO_SYNC;
+	__asm__ __volatile__ (
+"1:	" PPC_LWARX(%0,0,%2,1) "\n\
+	rotlwi		%1,%0,16\n\
+	xor.		%1,%1,%0\n\
+	bne-		2f\n\
+	add		%0,%0,%3\n\
+	stwcx.		%0,0,%2\n\
+	bne-		1b\n"
+	PPC_ACQUIRE_BARRIER
+"2:"
+	: "=&r" (lockval), "=&r" (tmp)
+	: "r" (lock), "r" (1 << TICKET_SHIFT)
+	: "cr0", "memory");
+
+	return !tmp;
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	unsigned int tmp;
+	arch_spinlock_t lockval;
+
+	CLEAR_IO_SYNC;
+	__asm__ __volatile__ (
+"1:	" PPC_LWARX(%0,0,%2,1) "\n\
+	add		%1,%0,%4\n\
+	stwcx.		%1,0,%2\n\
+	bne-		1b\n\
+	rotlwi		%1,%0,16\n\
+	cmpw		%1,%0\n\
+	beq		3f\n\
+	rlwinm		%0,%0,16,16,31\n\
+2:	or		1,1,1	# HMT_low\n\
+	lhz		%1,0(%3)\n\
+	cmpw		%1,%0\n\
+	bne		2b\n\
+	or		2,2,2	# HMT_medium\n\
+3:"
+	PPC_ACQUIRE_BARRIER
+	: "=&r" (lockval), "=&r" (tmp)
+	: "r"(lock), "r" (&lock->owner), "r" (1 << TICKET_SHIFT)
+	: "cr0", "memory");
+}
+#else
 static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 {
 	return lock.slock == 0;
@@ -157,13 +227,18 @@ void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
 		local_irq_restore(flags_dis);
 	}
 }
+#endif
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	SYNC_IO;
 	__asm__ __volatile__("# arch_spin_unlock\n\t"
 				PPC_RELEASE_BARRIER: : :"memory");
+#ifdef CONFIG_PPC_TICKET_LOCK
+	lock->owner++;
+#else
 	lock->slock = 0;
+#endif
 }
 
 /*
diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
index 2351adc4fdc4..371770f906dc 100644
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -5,11 +5,27 @@
 # error "please don't include this file directly"
 #endif
 
+#ifdef CONFIG_PPC_TICKET_LOCK
+#define TICKET_SHIFT	16
+
+typedef struct {
+#ifdef __BIG_ENDIAN__
+	u16 next;
+	u16 owner;
+#else
+	u16 owner;
+	u16 next;
+#endif
+} __aligned(4) arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0, 0 }
+#else
 typedef struct {
 	volatile unsigned int slock;
 } arch_spinlock_t;
 
 #define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+#endif /* CONFIG_PPC_TICKET_LOCK */
 
 typedef struct {
 	volatile signed int lock;
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-03-16 11:33 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-03-16 11:33 [PATCH 0/3] powerpc: add ticket spinlock Kevin Hao
2015-03-16 11:33 ` [PATCH 1/3] powerpc: introduce PPC_HAS_LOCK_OWNER Kevin Hao
2015-03-16 11:33 ` [PATCH 2/3] powerpc: spinlock: refactor codes wrapped by PPC_HAS_LOCK_OWNER Kevin Hao
2015-03-16 11:33 ` [PATCH 3/3] powerpc: add ticket spinlock Kevin Hao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).