* [PATCH v5 1/6] qspinlock: powerpc support qspinlock
2016-06-02 9:26 [PATCH v5 0/6] powerPC/pSeries use pv-qpsinlock as the default spinlock implemention Pan Xinhui
@ 2016-06-02 9:26 ` Pan Xinhui
0 siblings, 0 replies; 7+ messages in thread
From: Pan Xinhui @ 2016-06-02 9:26 UTC (permalink / raw)
To: linux-kernel, linuxppc-dev, virtualization
Cc: benh, paulus, mpe, peterz, mingo, paulmck, waiman.long,
Pan Xinhui
Base code to enable qspinlock on powerpc. this patch add some #ifdef
here and there. Although there is no paravirt related code, we can
successfully build a qspinlock kernel after apply this patch.
Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
---
arch/powerpc/include/asm/qspinlock.h | 26 ++++++++++++++++++++++++++
arch/powerpc/include/asm/spinlock.h | 27 +++++++++++++++------------
arch/powerpc/include/asm/spinlock_types.h | 4 ++++
arch/powerpc/lib/locks.c | 4 ++++
4 files changed, 49 insertions(+), 12 deletions(-)
create mode 100644 arch/powerpc/include/asm/qspinlock.h
diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
new file mode 100644
index 0000000..fc83cd2
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -0,0 +1,26 @@
+#ifndef _ASM_POWERPC_QSPINLOCK_H
+#define _ASM_POWERPC_QSPINLOCK_H
+
+#include <asm-generic/qspinlock_types.h>
+
+#define SPIN_THRESHOLD (1 << 15)
+#define queued_spin_unlock queued_spin_unlock
+
+static inline void native_queued_spin_unlock(struct qspinlock *lock)
+{
+ u8 *locked = (u8 *)lock;
+#ifdef __BIG_ENDIAN
+ locked += 3;
+#endif
+ /* no load/store can be across the unlock()*/
+ smp_store_release(locked, 0);
+}
+
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+ native_queued_spin_unlock(lock);
+}
+
+#include <asm-generic/qspinlock.h>
+
+#endif /* _ASM_POWERPC_QSPINLOCK_H */
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 523673d..4359ee6 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -52,6 +52,20 @@
#define SYNC_IO
#endif
+#if defined(CONFIG_PPC_SPLPAR)
+/* We only yield to the hypervisor if we are in shared processor mode */
+#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
+extern void __spin_yield(arch_spinlock_t *lock);
+extern void __rw_yield(arch_rwlock_t *lock);
+#else /* SPLPAR */
+#define __spin_yield(x) barrier()
+#define __rw_yield(x) barrier()
+#define SHARED_PROCESSOR 0
+#endif
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm/qspinlock.h>
+#else
static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
return lock.slock == 0;
@@ -106,18 +120,6 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
* held. Conveniently, we have a word in the paca that holds this
* value.
*/
-
-#if defined(CONFIG_PPC_SPLPAR)
-/* We only yield to the hypervisor if we are in shared processor mode */
-#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
-extern void __spin_yield(arch_spinlock_t *lock);
-extern void __rw_yield(arch_rwlock_t *lock);
-#else /* SPLPAR */
-#define __spin_yield(x) barrier()
-#define __rw_yield(x) barrier()
-#define SHARED_PROCESSOR 0
-#endif
-
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
CLEAR_IO_SYNC;
@@ -169,6 +171,7 @@ extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
#endif
+#endif /* !CONFIG_QUEUED_SPINLOCKS */
/*
* Read-write spinlocks, allowing multiple readers
* but only one writer.
diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
index 2351adc..bd7144e 100644
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -5,11 +5,15 @@
# error "please don't include this file directly"
#endif
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm-generic/qspinlock_types.h>
+#else
typedef struct {
volatile unsigned int slock;
} arch_spinlock_t;
#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
+#endif
typedef struct {
volatile signed int lock;
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index f7deebd..a9ebd71 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -23,6 +23,7 @@
#include <asm/hvcall.h>
#include <asm/smp.h>
+#ifndef CONFIG_QUEUED_SPINLOCKS
void __spin_yield(arch_spinlock_t *lock)
{
unsigned int lock_value, holder_cpu, yield_count;
@@ -42,6 +43,7 @@ void __spin_yield(arch_spinlock_t *lock)
get_hard_smp_processor_id(holder_cpu), yield_count);
}
EXPORT_SYMBOL_GPL(__spin_yield);
+#endif
/*
* Waiting for a read lock or a write lock on a rwlock...
@@ -69,6 +71,7 @@ void __rw_yield(arch_rwlock_t *rw)
}
#endif
+#ifndef CONFIG_QUEUED_SPINLOCKS
void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
smp_mb();
@@ -84,3 +87,4 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
}
EXPORT_SYMBOL(arch_spin_unlock_wait);
+#endif
--
2.4.11
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH v5 1/6] qspinlock: powerpc support qspinlock
[not found] ` <1464917548.26773.12.camel@au1.ibm.com>
@ 2016-06-03 4:10 ` xinhui
2016-06-03 4:33 ` Benjamin Herrenschmidt
0 siblings, 1 reply; 7+ messages in thread
From: xinhui @ 2016-06-03 4:10 UTC (permalink / raw)
To: benh, linux-kernel, linuxppc-dev, virtualization
Cc: paulus, mpe, peterz, mingo, paulmck, waiman.long
On 2016年06月03日 09:32, Benjamin Herrenschmidt wrote:
> On Fri, 2016-06-03 at 11:32 +1000, Benjamin Herrenschmidt wrote:
>> On Thu, 2016-06-02 at 17:22 +0800, Pan Xinhui wrote:
>>>
>>> Base code to enable qspinlock on powerpc. this patch add some
>>> #ifdef
>>> here and there. Although there is no paravirt related code, we can
>>> successfully build a qspinlock kernel after apply this patch.
>> This is missing the IO_SYNC stuff ... It means we'll fail to do a
>> full
>> sync to order vs MMIOs.
>>
>> You need to add that back in the unlock path.
>
> Well, and in the lock path as well...
>
Oh, yes. I missed IO_SYNC stuff.
thank you, Ben :)
> Cheers,
> Ben.
>
>>>
>>> Signed-off-by: Pan Xinhui <xinhui.pan@linux.vnet.ibm.com>
>>> ---
>>> arch/powerpc/include/asm/qspinlock.h | 26
>>> ++++++++++++++++++++++++++
>>> arch/powerpc/include/asm/spinlock.h | 27 +++++++++++++++----
>>> --------
>>> arch/powerpc/include/asm/spinlock_types.h | 4 ++++
>>> arch/powerpc/lib/locks.c | 4 ++++
>>> 4 files changed, 49 insertions(+), 12 deletions(-)
>>> create mode 100644 arch/powerpc/include/asm/qspinlock.h
>>>
>>> diff --git a/arch/powerpc/include/asm/qspinlock.h
>>> b/arch/powerpc/include/asm/qspinlock.h
>>> new file mode 100644
>>> index 0000000..fc83cd2
>>> --- /dev/null
>>> +++ b/arch/powerpc/include/asm/qspinlock.h
>>> @@ -0,0 +1,26 @@
>>> +#ifndef _ASM_POWERPC_QSPINLOCK_H
>>> +#define _ASM_POWERPC_QSPINLOCK_H
>>> +
>>> +#include <asm-generic/qspinlock_types.h>
>>> +
>>> +#define SPIN_THRESHOLD (1 << 15)
>>> +#define queued_spin_unlock queued_spin_unlock
>>> +
>>> +static inline void native_queued_spin_unlock(struct qspinlock
>>> *lock)
>>> +{
>>> + u8 *locked = (u8 *)lock;
>>> +#ifdef __BIG_ENDIAN
>>> + locked += 3;
>>> +#endif
>>> + /* no load/store can be across the unlock()*/
>>> + smp_store_release(locked, 0);
>>> +}
>>> +
>>> +static inline void queued_spin_unlock(struct qspinlock *lock)
>>> +{
>>> + native_queued_spin_unlock(lock);
>>> +}
>>> +
>>> +#include <asm-generic/qspinlock.h>
>>> +
>>> +#endif /* _ASM_POWERPC_QSPINLOCK_H */
>>> diff --git a/arch/powerpc/include/asm/spinlock.h
>>> b/arch/powerpc/include/asm/spinlock.h
>>> index 523673d..4359ee6 100644
>>> --- a/arch/powerpc/include/asm/spinlock.h
>>> +++ b/arch/powerpc/include/asm/spinlock.h
>>> @@ -52,6 +52,20 @@
>>> #define SYNC_IO
>>> #endif
>>>
>>> +#if defined(CONFIG_PPC_SPLPAR)
>>> +/* We only yield to the hypervisor if we are in shared processor
>>> mode */
>>> +#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca-
>>>>
>>>> lppaca_ptr))
>>> +extern void __spin_yield(arch_spinlock_t *lock);
>>> +extern void __rw_yield(arch_rwlock_t *lock);
>>> +#else /* SPLPAR */
>>> +#define __spin_yield(x) barrier()
>>> +#define __rw_yield(x) barrier()
>>> +#define SHARED_PROCESSOR 0
>>> +#endif
>>> +
>>> +#ifdef CONFIG_QUEUED_SPINLOCKS
>>> +#include <asm/qspinlock.h>
>>> +#else
>>> static __always_inline int
>>> arch_spin_value_unlocked(arch_spinlock_t
>>> lock)
>>> {
>>> return lock.slock == 0;
>>> @@ -106,18 +120,6 @@ static inline int
>>> arch_spin_trylock(arch_spinlock_t *lock)
>>> * held. Conveniently, we have a word in the paca that holds this
>>> * value.
>>> */
>>> -
>>> -#if defined(CONFIG_PPC_SPLPAR)
>>> -/* We only yield to the hypervisor if we are in shared processor
>>> mode */
>>> -#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca-
>>>>
>>>> lppaca_ptr))
>>> -extern void __spin_yield(arch_spinlock_t *lock);
>>> -extern void __rw_yield(arch_rwlock_t *lock);
>>> -#else /* SPLPAR */
>>> -#define __spin_yield(x) barrier()
>>> -#define __rw_yield(x) barrier()
>>> -#define SHARED_PROCESSOR 0
>>> -#endif
>>> -
>>> static inline void arch_spin_lock(arch_spinlock_t *lock)
>>> {
>>> CLEAR_IO_SYNC;
>>> @@ -169,6 +171,7 @@ extern void
>>> arch_spin_unlock_wait(arch_spinlock_t
>>> *lock);
>>> do { while (arch_spin_is_locked(lock)) cpu_relax(); }
>>> while
>>> (0)
>>> #endif
>>>
>>> +#endif /* !CONFIG_QUEUED_SPINLOCKS */
>>> /*
>>> * Read-write spinlocks, allowing multiple readers
>>> * but only one writer.
>>> diff --git a/arch/powerpc/include/asm/spinlock_types.h
>>> b/arch/powerpc/include/asm/spinlock_types.h
>>> index 2351adc..bd7144e 100644
>>> --- a/arch/powerpc/include/asm/spinlock_types.h
>>> +++ b/arch/powerpc/include/asm/spinlock_types.h
>>> @@ -5,11 +5,15 @@
>>> # error "please don't include this file directly"
>>> #endif
>>>
>>> +#ifdef CONFIG_QUEUED_SPINLOCKS
>>> +#include <asm-generic/qspinlock_types.h>
>>> +#else
>>> typedef struct {
>>> volatile unsigned int slock;
>>> } arch_spinlock_t;
>>>
>>> #define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
>>> +#endif
>>>
>>> typedef struct {
>>> volatile signed int lock;
>>> diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
>>> index f7deebd..a9ebd71 100644
>>> --- a/arch/powerpc/lib/locks.c
>>> +++ b/arch/powerpc/lib/locks.c
>>> @@ -23,6 +23,7 @@
>>> #include <asm/hvcall.h>
>>> #include <asm/smp.h>
>>>
>>> +#ifndef CONFIG_QUEUED_SPINLOCKS
>>> void __spin_yield(arch_spinlock_t *lock)
>>> {
>>> unsigned int lock_value, holder_cpu, yield_count;
>>> @@ -42,6 +43,7 @@ void __spin_yield(arch_spinlock_t *lock)
>>> get_hard_smp_processor_id(holder_cpu),
>>> yield_count);
>>> }
>>> EXPORT_SYMBOL_GPL(__spin_yield);
>>> +#endif
>>>
>>> /*
>>> * Waiting for a read lock or a write lock on a rwlock...
>>> @@ -69,6 +71,7 @@ void __rw_yield(arch_rwlock_t *rw)
>>> }
>>> #endif
>>>
>>> +#ifndef CONFIG_QUEUED_SPINLOCKS
>>> void arch_spin_unlock_wait(arch_spinlock_t *lock)
>>> {
>>> smp_mb();
>>> @@ -84,3 +87,4 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
>>> }
>>>
>>> EXPORT_SYMBOL(arch_spin_unlock_wait);
>>> +#endif
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v5 1/6] qspinlock: powerpc support qspinlock
2016-06-03 4:10 ` [PATCH v5 1/6] qspinlock: powerpc support qspinlock xinhui
@ 2016-06-03 4:33 ` Benjamin Herrenschmidt
2016-06-03 7:02 ` xinhui
2016-06-06 15:59 ` Peter Zijlstra
0 siblings, 2 replies; 7+ messages in thread
From: Benjamin Herrenschmidt @ 2016-06-03 4:33 UTC (permalink / raw)
To: xinhui, linux-kernel, linuxppc-dev, virtualization
Cc: paulus, mpe, peterz, mingo, paulmck, waiman.long
On Fri, 2016-06-03 at 12:10 +0800, xinhui wrote:
> On 2016年06月03日 09:32, Benjamin Herrenschmidt wrote:
> > On Fri, 2016-06-03 at 11:32 +1000, Benjamin Herrenschmidt wrote:
> >> On Thu, 2016-06-02 at 17:22 +0800, Pan Xinhui wrote:
> >>>
> >>> Base code to enable qspinlock on powerpc. this patch add some
> >>> #ifdef
> >>> here and there. Although there is no paravirt related code, we
> can
> >>> successfully build a qspinlock kernel after apply this patch.
> >> This is missing the IO_SYNC stuff ... It means we'll fail to do a
> >> full
> >> sync to order vs MMIOs.
> >>
> >> You need to add that back in the unlock path.
> >
> > Well, and in the lock path as well...
> >
> Oh, yes. I missed IO_SYNC stuff.
>
> thank you, Ben :)
Ok couple of other things that would be nice from my perspective (and
Michael's) if you can produce them:
- Some benchmarks of the qspinlock alone, without the PV stuff,
so we understand how much of the overhead is inherent to the
qspinlock and how much is introduced by the PV bits.
- For the above, can you show (or describe) where the qspinlock
improves things compared to our current locks. While there's
theory and to some extent practice on x86, it would be nice to
validate the effects on POWER.
- Comparative benchmark with the PV stuff in on a bare metal system
to understand the overhead there.
- Comparative benchmark with the PV stuff under pHyp and KVM
Spinlocks are fiddly and a critical piece of infrastructure, it's
important we fully understand the performance implications before we
decide to switch to a new model.
Cheers,
Ben.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v5 1/6] qspinlock: powerpc support qspinlock
2016-06-03 4:33 ` Benjamin Herrenschmidt
@ 2016-06-03 7:02 ` xinhui
2016-06-06 15:59 ` Peter Zijlstra
1 sibling, 0 replies; 7+ messages in thread
From: xinhui @ 2016-06-03 7:02 UTC (permalink / raw)
To: Benjamin Herrenschmidt, linux-kernel, linuxppc-dev,
virtualization
Cc: paulus, mpe, peterz, mingo, paulmck, waiman.long
On 2016年06月03日 12:33, Benjamin Herrenschmidt wrote:
> On Fri, 2016-06-03 at 12:10 +0800, xinhui wrote:
>> On 2016年06月03日 09:32, Benjamin Herrenschmidt wrote:
>>> On Fri, 2016-06-03 at 11:32 +1000, Benjamin Herrenschmidt wrote:
>>>> On Thu, 2016-06-02 at 17:22 +0800, Pan Xinhui wrote:
>>>>>
>>>>> Base code to enable qspinlock on powerpc. this patch add some
>>>>> #ifdef
>>>>> here and there. Although there is no paravirt related code, we
>> can
>>>>> successfully build a qspinlock kernel after apply this patch.
>>>> This is missing the IO_SYNC stuff ... It means we'll fail to do a
>>>> full
>>>> sync to order vs MMIOs.
>>>>
>>>> You need to add that back in the unlock path.
>>>
>>> Well, and in the lock path as well...
>>>
>> Oh, yes. I missed IO_SYNC stuff.
>>
>> thank you, Ben :)
>
> Ok couple of other things that would be nice from my perspective (and
> Michael's) if you can produce them:
>
> - Some benchmarks of the qspinlock alone, without the PV stuff,
> so we understand how much of the overhead is inherent to the
> qspinlock and how much is introduced by the PV bits.
>
> - For the above, can you show (or describe) where the qspinlock
> improves things compared to our current locks. While there's
> theory and to some extent practice on x86, it would be nice to
> validate the effects on POWER.
>
> - Comparative benchmark with the PV stuff in on a bare metal system
> to understand the overhead there.
>
> - Comparative benchmark with the PV stuff under pHyp and KVM
>
Will do such benchmark tests in next days.
thanks for your kind suggestions. :)
> Spinlocks are fiddly and a critical piece of infrastructure, it's
> important we fully understand the performance implications before we
> decide to switch to a new model.
>
yes, We really need understand how {pv}qspinlock works in more complex cases.
thanks
xinhui
> Cheers,
> Ben.
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v5 1/6] qspinlock: powerpc support qspinlock
2016-06-03 4:33 ` Benjamin Herrenschmidt
2016-06-03 7:02 ` xinhui
@ 2016-06-06 15:59 ` Peter Zijlstra
2016-06-06 21:41 ` Benjamin Herrenschmidt
1 sibling, 1 reply; 7+ messages in thread
From: Peter Zijlstra @ 2016-06-06 15:59 UTC (permalink / raw)
To: Benjamin Herrenschmidt
Cc: xinhui, linux-kernel, linuxppc-dev, virtualization, paulus, mpe,
mingo, paulmck, waiman.long
On Fri, Jun 03, 2016 at 02:33:47PM +1000, Benjamin Herrenschmidt wrote:
> - For the above, can you show (or describe) where the qspinlock
> improves things compared to our current locks.
So currently PPC has a fairly straight forward test-and-set spinlock
IIRC. You have this because LPAR/virt muck and lock holder preemption
issues etc..
qspinlock is 1) a fair lock (like ticket locks) and 2) provides
out-of-word spinning, reducing cacheline pressure.
Esp. on multi-socket x86 we saw the out-of-word spinning being a big win
over our ticket locks.
And fairness, brought to us by the ticket locks a long time ago,
eliminated starvation issues we had, where a spinner local to the holder
would 'always' win from a spinner further away. So under heavy enough
local contention, the spinners on 'remote' CPUs would 'never' get to own
the lock.
pv-qspinlock tries to preserve the fairness while allowing limited lock
stealing and explicitly managing which vcpus to wake.
> While there's
> theory and to some extent practice on x86, it would be nice to
> validate the effects on POWER.
Right; so that will have to be from benchmarks which I cannot help you
with ;-)
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v5 1/6] qspinlock: powerpc support qspinlock
2016-06-06 15:59 ` Peter Zijlstra
@ 2016-06-06 21:41 ` Benjamin Herrenschmidt
2016-06-21 12:35 ` xinhui
0 siblings, 1 reply; 7+ messages in thread
From: Benjamin Herrenschmidt @ 2016-06-06 21:41 UTC (permalink / raw)
To: Peter Zijlstra
Cc: xinhui, linux-kernel, linuxppc-dev, virtualization, paulus, mpe,
mingo, paulmck, waiman.long
On Mon, 2016-06-06 at 17:59 +0200, Peter Zijlstra wrote:
> On Fri, Jun 03, 2016 at 02:33:47PM +1000, Benjamin Herrenschmidt wrote:
> >
> > - For the above, can you show (or describe) where the qspinlock
> > improves things compared to our current locks.
> So currently PPC has a fairly straight forward test-and-set spinlock
> IIRC. You have this because LPAR/virt muck and lock holder preemption
> issues etc..
> qspinlock is 1) a fair lock (like ticket locks) and 2) provides
> out-of-word spinning, reducing cacheline pressure.
Thanks Peter. I think I understand the theory, but I'd like see it
translate into real numbers.
> Esp. on multi-socket x86 we saw the out-of-word spinning being a big win
> over our ticket locks.
>
> And fairness, brought to us by the ticket locks a long time ago,
> eliminated starvation issues we had, where a spinner local to the holder
> would 'always' win from a spinner further away. So under heavy enough
> local contention, the spinners on 'remote' CPUs would 'never' get to own
> the lock.
I think our HW has tweaks to avoid that from happening with the simple
locks in the underlying ll/sc implementation. In any case, what I'm
asking is actual tests to verify it works as expected for us.
> pv-qspinlock tries to preserve the fairness while allowing limited lock
> stealing and explicitly managing which vcpus to wake.
Right.
> >
> > While there's
> > theory and to some extent practice on x86, it would be nice to
> > validate the effects on POWER.
> Right; so that will have to be from benchmarks which I cannot help you
> with ;-)
Precisely :-) This is what I was asking for ;-)
Cheers,
Ben.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v5 1/6] qspinlock: powerpc support qspinlock
2016-06-06 21:41 ` Benjamin Herrenschmidt
@ 2016-06-21 12:35 ` xinhui
0 siblings, 0 replies; 7+ messages in thread
From: xinhui @ 2016-06-21 12:35 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Peter Zijlstra
Cc: linux-kernel, linuxppc-dev, virtualization, paulus, mpe, mingo,
paulmck, waiman.long
On 2016年06月07日 05:41, Benjamin Herrenschmidt wrote:
> On Mon, 2016-06-06 at 17:59 +0200, Peter Zijlstra wrote:
>> On Fri, Jun 03, 2016 at 02:33:47PM +1000, Benjamin Herrenschmidt wrote:
>>>
>>> - For the above, can you show (or describe) where the qspinlock
>>> improves things compared to our current locks.
>> So currently PPC has a fairly straight forward test-and-set spinlock
>> IIRC. You have this because LPAR/virt muck and lock holder preemption
>> issues etc..
>> qspinlock is 1) a fair lock (like ticket locks) and 2) provides
>> out-of-word spinning, reducing cacheline pressure.
>
> Thanks Peter. I think I understand the theory, but I'd like see it
> translate into real numbers.
>
>> Esp. on multi-socket x86 we saw the out-of-word spinning being a big win
>> over our ticket locks.
>>
>> And fairness, brought to us by the ticket locks a long time ago,
>> eliminated starvation issues we had, where a spinner local to the holder
>> would 'always' win from a spinner further away. So under heavy enough
>> local contention, the spinners on 'remote' CPUs would 'never' get to own
>> the lock.
>
> I think our HW has tweaks to avoid that from happening with the simple
> locks in the underlying ll/sc implementation. In any case, what I'm
> asking is actual tests to verify it works as expected for us.
>
IF HW has such tweaks then there mush be performance drop when total cpu's number grows up.
And I got such clues
one simple benchmark test:
it tests how many spin_lock/spin_unlock pairs can be done within 15 seconds on all cpus.
say,
while(!done) {
spin_lock()
this_cpu_inc(loops)
spin_unlock()
}
I do the test on two machines, one is using powerKVM, and the other is using pHyp.
the result below shows what the sum of loops is in the end, with xxxxK form.
cpu count | pv-qspinlock | test-set spinlock|
----------------------------------------------------
8 (powerKVM) | 62830K | 67340K |
------------------------------------------------
8 (pHyp) | 49800K | 59330K |
------------------------------------------------
32 (pHyp) | 87580K | 20990K |
-------------------------------------------------
while cpu count grows up, the lock/unlock pairs ops of test-set spinlock drops very much.
this is because the cache bouncing in different physical cpus.
So to verify how both spinlock impact the data-cache,
another simple benchmark test.
code looks like:
struct _x {
spinlock_t lk;
unsigned long x;
} x;
while(!this_cpu_read(stop)) {
int i = 0xff
spin_lock(x.lk)
this_cpu_inc(loops)
while(i--)
READ_ONCE(x.x);
spin_unlock(x.lk)
}
the result below shows what the sum of loops is in the end, with xxxxK form.
cpu count | pv-qspinlock | test-set spinlock|
------------------------------------------------
8 (pHyp) | 13240K | 9780K |
------------------------------------------------
32 (pHyp) | 25790K | 9700K |
------------------------------------------------
obviously pv-qspinlock is more cache-friendly, and has better performance than test-set spinlock.
More test is going on, I will send out new patch set with the result.
HOPE *within* this week. unixbench really takes a long time.
thanks
xinhui
>> pv-qspinlock tries to preserve the fairness while allowing limited lock
>> stealing and explicitly managing which vcpus to wake.
>
> Right.
>>>
>>> While there's
>>> theory and to some extent practice on x86, it would be nice to
>>> validate the effects on POWER.
>> Right; so that will have to be from benchmarks which I cannot help you
>> with ;-)
>
> Precisely :-) This is what I was asking for ;-)
>
> Cheers,
> Ben.
>
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2016-06-21 12:35 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <1464859370-5162-1-git-send-email-xinhui.pan@linux.vnet.ibm.com>
[not found] ` <1464859370-5162-3-git-send-email-xinhui.pan@linux.vnet.ibm.com>
[not found] ` <1464917520.26773.11.camel@kernel.crashing.org>
[not found] ` <1464917548.26773.12.camel@au1.ibm.com>
2016-06-03 4:10 ` [PATCH v5 1/6] qspinlock: powerpc support qspinlock xinhui
2016-06-03 4:33 ` Benjamin Herrenschmidt
2016-06-03 7:02 ` xinhui
2016-06-06 15:59 ` Peter Zijlstra
2016-06-06 21:41 ` Benjamin Herrenschmidt
2016-06-21 12:35 ` xinhui
2016-06-02 9:26 [PATCH v5 0/6] powerPC/pSeries use pv-qpsinlock as the default spinlock implemention Pan Xinhui
2016-06-02 9:26 ` [PATCH v5 1/6] qspinlock: powerpc support qspinlock Pan Xinhui
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).