All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHv1] arm: reduce power use by contented spin locks with WFE/SEV
@ 2015-07-31 10:45 David Vrabel
  2015-07-31 10:47 ` Andrew Cooper
  2015-07-31 10:52 ` David Vrabel
  0 siblings, 2 replies; 7+ messages in thread
From: David Vrabel @ 2015-07-31 10:45 UTC (permalink / raw)
  To: Ian Campbell
  Cc: xen-devel, Andrew Cooper, David Vrabel, Jan Beulich,
	Stefano Stabellini

Instead of cpu_relax() while spinning and observing the ticket head,
introduce spin_relax() which executes a WFE instruction.  After the
ticket head is changed call spin_signal() to execute an SVE
instruction to wake any spinners.

This should improve power consumption when locks are contented and
spinning.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
I've not tested this but it looks straight-forward...
---
 xen/common/spinlock.c          | 5 +++--
 xen/include/asm-arm/spinlock.h | 3 ++-
 xen/include/asm-x86/spinlock.h | 3 +++
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 29149d1..fc3e8e7 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -141,7 +141,7 @@ void _spin_lock(spinlock_t *lock)
     while ( tickets.tail != observe_head(&lock->tickets) )
     {
         LOCK_PROFILE_BLOCK;
-        cpu_relax();
+        spin_relax();
     }
     LOCK_PROFILE_GOT;
     preempt_disable();
@@ -170,6 +170,7 @@ void _spin_unlock(spinlock_t *lock)
     preempt_enable();
     LOCK_PROFILE_REL;
     add_sized(&lock->tickets.head, 1);
+    spin_signal();
 }
 
 void _spin_unlock_irq(spinlock_t *lock)
@@ -228,7 +229,7 @@ void _spin_barrier(spinlock_t *lock)
     if ( sample.head != sample.tail )
     {
         while ( observe_head(&lock->tickets) == sample.head )
-            cpu_relax();
+            spin_relax();
 #ifdef LOCK_PROFILE
         if ( lock->profile )
         {
diff --git a/xen/include/asm-arm/spinlock.h b/xen/include/asm-arm/spinlock.h
index 81955d1..311764b 100644
--- a/xen/include/asm-arm/spinlock.h
+++ b/xen/include/asm-arm/spinlock.h
@@ -1,6 +1,7 @@
 #ifndef __ASM_SPINLOCK_H
 #define __ASM_SPINLOCK_H
 
-/* Nothing ARM specific. */
+#define spin_relax() asm volatile("wfe" ::: "memory");
+#define spin_signal() asm volatile("sev" ::: "memory");
 
 #endif /* __ASM_SPINLOCK_H */
diff --git a/xen/include/asm-x86/spinlock.h b/xen/include/asm-x86/spinlock.h
index 7d69e75..0c04a4e 100644
--- a/xen/include/asm-x86/spinlock.h
+++ b/xen/include/asm-x86/spinlock.h
@@ -4,4 +4,7 @@
 #define _raw_read_unlock(l) \
     asm volatile ( "lock; dec%z0 %0" : "+m" ((l)->lock) :: "memory" )
 
+#define spin_relax() cpu_relax()
+#define spin_signal()
+
 #endif /* __ASM_SPINLOCK_H */
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCHv1] arm: reduce power use by contented spin locks with WFE/SEV
  2015-07-31 10:45 [PATCHv1] arm: reduce power use by contented spin locks with WFE/SEV David Vrabel
@ 2015-07-31 10:47 ` Andrew Cooper
  2015-07-31 10:52 ` David Vrabel
  1 sibling, 0 replies; 7+ messages in thread
From: Andrew Cooper @ 2015-07-31 10:47 UTC (permalink / raw)
  To: David Vrabel, Ian Campbell; +Cc: xen-devel, Jan Beulich, Stefano Stabellini

On 31/07/15 11:45, David Vrabel wrote:
> Instead of cpu_relax() while spinning and observing the ticket head,
> introduce spin_relax() which executes a WFE instruction.  After the
> ticket head is changed call spin_signal() to execute an SVE
> instruction to wake any spinners.
>
> This should improve power consumption when locks are contented and
> spinning.
>
> Signed-off-by: David Vrabel <david.vrabel@citrix.com>

x86 bits Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

> ---
> I've not tested this but it looks straight-forward...
> ---
>  xen/common/spinlock.c          | 5 +++--
>  xen/include/asm-arm/spinlock.h | 3 ++-
>  xen/include/asm-x86/spinlock.h | 3 +++
>  3 files changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
> index 29149d1..fc3e8e7 100644
> --- a/xen/common/spinlock.c
> +++ b/xen/common/spinlock.c
> @@ -141,7 +141,7 @@ void _spin_lock(spinlock_t *lock)
>      while ( tickets.tail != observe_head(&lock->tickets) )
>      {
>          LOCK_PROFILE_BLOCK;
> -        cpu_relax();
> +        spin_relax();
>      }
>      LOCK_PROFILE_GOT;
>      preempt_disable();
> @@ -170,6 +170,7 @@ void _spin_unlock(spinlock_t *lock)
>      preempt_enable();
>      LOCK_PROFILE_REL;
>      add_sized(&lock->tickets.head, 1);
> +    spin_signal();
>  }
>  
>  void _spin_unlock_irq(spinlock_t *lock)
> @@ -228,7 +229,7 @@ void _spin_barrier(spinlock_t *lock)
>      if ( sample.head != sample.tail )
>      {
>          while ( observe_head(&lock->tickets) == sample.head )
> -            cpu_relax();
> +            spin_relax();
>  #ifdef LOCK_PROFILE
>          if ( lock->profile )
>          {
> diff --git a/xen/include/asm-arm/spinlock.h b/xen/include/asm-arm/spinlock.h
> index 81955d1..311764b 100644
> --- a/xen/include/asm-arm/spinlock.h
> +++ b/xen/include/asm-arm/spinlock.h
> @@ -1,6 +1,7 @@
>  #ifndef __ASM_SPINLOCK_H
>  #define __ASM_SPINLOCK_H
>  
> -/* Nothing ARM specific. */
> +#define spin_relax() asm volatile("wfe" ::: "memory");
> +#define spin_signal() asm volatile("sev" ::: "memory");
>  
>  #endif /* __ASM_SPINLOCK_H */
> diff --git a/xen/include/asm-x86/spinlock.h b/xen/include/asm-x86/spinlock.h
> index 7d69e75..0c04a4e 100644
> --- a/xen/include/asm-x86/spinlock.h
> +++ b/xen/include/asm-x86/spinlock.h
> @@ -4,4 +4,7 @@
>  #define _raw_read_unlock(l) \
>      asm volatile ( "lock; dec%z0 %0" : "+m" ((l)->lock) :: "memory" )
>  
> +#define spin_relax() cpu_relax()
> +#define spin_signal()
> +
>  #endif /* __ASM_SPINLOCK_H */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCHv1] arm: reduce power use by contented spin locks with WFE/SEV
  2015-07-31 10:45 [PATCHv1] arm: reduce power use by contented spin locks with WFE/SEV David Vrabel
  2015-07-31 10:47 ` Andrew Cooper
@ 2015-07-31 10:52 ` David Vrabel
  2015-07-31 11:01   ` Julien Grall
  1 sibling, 1 reply; 7+ messages in thread
From: David Vrabel @ 2015-07-31 10:52 UTC (permalink / raw)
  To: Ian Campbell
  Cc: xen-devel, Andrew Cooper, Tim Deegan, Jan Beulich,
	Stefano Stabellini

On 31/07/15 11:45, David Vrabel wrote:
> Instead of cpu_relax() while spinning and observing the ticket head,
> introduce spin_relax() which executes a WFE instruction.  After the
> ticket head is changed call spin_signal() to execute an SVE
> instruction to wake any spinners.
> 
> This should improve power consumption when locks are contented and
> spinning.
> 
> Signed-off-by: David Vrabel <david.vrabel@citrix.com>
> ---
> I've not tested this but it looks straight-forward...
> ---
>  xen/common/spinlock.c          | 5 +++--
>  xen/include/asm-arm/spinlock.h | 3 ++-
>  xen/include/asm-x86/spinlock.h | 3 +++
>  3 files changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
> index 29149d1..fc3e8e7 100644
> --- a/xen/common/spinlock.c
> +++ b/xen/common/spinlock.c
> @@ -141,7 +141,7 @@ void _spin_lock(spinlock_t *lock)
>      while ( tickets.tail != observe_head(&lock->tickets) )
>      {
>          LOCK_PROFILE_BLOCK;
> -        cpu_relax();
> +        spin_relax();
>      }
>      LOCK_PROFILE_GOT;
>      preempt_disable();
> @@ -170,6 +170,7 @@ void _spin_unlock(spinlock_t *lock)
>      preempt_enable();
>      LOCK_PROFILE_REL;
>      add_sized(&lock->tickets.head, 1);
> +    spin_signal();

It occurs to me that perhaps there should be a barrier between the
add_sized() and the spin_signal() so the update value is visible before
we signal (otherwise the spinner may be woken and observe the old value
and WFE again).

spin_relax() and spin_signal() might be better named arch_lock_relax()
and arch_lock_signal() to match the naming of existing arch_lock_*() hooks.

I think someone with more arm experience (and the means to test) should
take this patch on.

David

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCHv1] arm: reduce power use by contented spin locks with WFE/SEV
  2015-07-31 10:52 ` David Vrabel
@ 2015-07-31 11:01   ` Julien Grall
  2015-07-31 11:41     ` Stefano Stabellini
  0 siblings, 1 reply; 7+ messages in thread
From: Julien Grall @ 2015-07-31 11:01 UTC (permalink / raw)
  To: David Vrabel, Ian Campbell
  Cc: xen-devel, Stefano Stabellini, Tim Deegan, Jan Beulich,
	Andrew Cooper

Hi David,

On 31/07/15 11:52, David Vrabel wrote:
> On 31/07/15 11:45, David Vrabel wrote:
>> Instead of cpu_relax() while spinning and observing the ticket head,
>> introduce spin_relax() which executes a WFE instruction.  After the
>> ticket head is changed call spin_signal() to execute an SVE
>> instruction to wake any spinners.
>>
>> This should improve power consumption when locks are contented and
>> spinning.
>>
>> Signed-off-by: David Vrabel <david.vrabel@citrix.com>
>> ---
>> I've not tested this but it looks straight-forward...
>> ---
>>  xen/common/spinlock.c          | 5 +++--
>>  xen/include/asm-arm/spinlock.h | 3 ++-
>>  xen/include/asm-x86/spinlock.h | 3 +++
>>  3 files changed, 8 insertions(+), 3 deletions(-)
>>
>> diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
>> index 29149d1..fc3e8e7 100644
>> --- a/xen/common/spinlock.c
>> +++ b/xen/common/spinlock.c
>> @@ -141,7 +141,7 @@ void _spin_lock(spinlock_t *lock)
>>      while ( tickets.tail != observe_head(&lock->tickets) )
>>      {
>>          LOCK_PROFILE_BLOCK;
>> -        cpu_relax();
>> +        spin_relax();
>>      }
>>      LOCK_PROFILE_GOT;
>>      preempt_disable();
>> @@ -170,6 +170,7 @@ void _spin_unlock(spinlock_t *lock)
>>      preempt_enable();
>>      LOCK_PROFILE_REL;
>>      add_sized(&lock->tickets.head, 1);
>> +    spin_signal();
> 
> It occurs to me that perhaps there should be a barrier between the
> add_sized() and the spin_signal() so the update value is visible before
> we signal (otherwise the spinner may be woken and observe the old value
> and WFE again).

sev is usually precede by dsb to ensure that all the instructions before
as completed before executing the sev.

> spin_relax() and spin_signal() might be better named arch_lock_relax()
> and arch_lock_signal() to match the naming of existing arch_lock_*() hooks.
> 
> I think someone with more arm experience (and the means to test) should
> take this patch on.

I will give a look next week.

Regards,

-- 
Julien Grall

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCHv1] arm: reduce power use by contented spin locks with WFE/SEV
  2015-07-31 11:01   ` Julien Grall
@ 2015-07-31 11:41     ` Stefano Stabellini
  2015-07-31 12:39       ` Ian Campbell
  2015-07-31 12:43       ` David Vrabel
  0 siblings, 2 replies; 7+ messages in thread
From: Stefano Stabellini @ 2015-07-31 11:41 UTC (permalink / raw)
  To: Julien Grall
  Cc: Ian Campbell, Stefano Stabellini, Andrew Cooper, Tim Deegan,
	David Vrabel, Jan Beulich, xen-devel

On Fri, 31 Jul 2015, Julien Grall wrote:
> Hi David,
> 
> On 31/07/15 11:52, David Vrabel wrote:
> > On 31/07/15 11:45, David Vrabel wrote:
> >> Instead of cpu_relax() while spinning and observing the ticket head,
> >> introduce spin_relax() which executes a WFE instruction.  After the
> >> ticket head is changed call spin_signal() to execute an SVE
> >> instruction to wake any spinners.
> >>
> >> This should improve power consumption when locks are contented and
> >> spinning.
> >>
> >> Signed-off-by: David Vrabel <david.vrabel@citrix.com>
> >> ---
> >> I've not tested this but it looks straight-forward...
> >> ---
> >>  xen/common/spinlock.c          | 5 +++--
> >>  xen/include/asm-arm/spinlock.h | 3 ++-
> >>  xen/include/asm-x86/spinlock.h | 3 +++
> >>  3 files changed, 8 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
> >> index 29149d1..fc3e8e7 100644
> >> --- a/xen/common/spinlock.c
> >> +++ b/xen/common/spinlock.c
> >> @@ -141,7 +141,7 @@ void _spin_lock(spinlock_t *lock)
> >>      while ( tickets.tail != observe_head(&lock->tickets) )
> >>      {
> >>          LOCK_PROFILE_BLOCK;
> >> -        cpu_relax();
> >> +        spin_relax();
> >>      }
> >>      LOCK_PROFILE_GOT;
> >>      preempt_disable();
> >> @@ -170,6 +170,7 @@ void _spin_unlock(spinlock_t *lock)
> >>      preempt_enable();
> >>      LOCK_PROFILE_REL;
> >>      add_sized(&lock->tickets.head, 1);
> >> +    spin_signal();
> > 
> > It occurs to me that perhaps there should be a barrier between the
> > add_sized() and the spin_signal() so the update value is visible before
> > we signal (otherwise the spinner may be woken and observe the old value
> > and WFE again).
> 
> sev is usually precede by dsb to ensure that all the instructions before
> as completed before executing the sev.

Yes, a dsb() is required. This being common code, we could use wmb().

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCHv1] arm: reduce power use by contented spin locks with WFE/SEV
  2015-07-31 11:41     ` Stefano Stabellini
@ 2015-07-31 12:39       ` Ian Campbell
  2015-07-31 12:43       ` David Vrabel
  1 sibling, 0 replies; 7+ messages in thread
From: Ian Campbell @ 2015-07-31 12:39 UTC (permalink / raw)
  To: Stefano Stabellini, Julien Grall
  Cc: xen-devel, Tim Deegan, David Vrabel, Jan Beulich, Andrew Cooper

On Fri, 2015-07-31 at 12:41 +0100, Stefano Stabellini wrote:
> On Fri, 31 Jul 2015, Julien Grall wrote:
> > Hi David,
> > 
> > On 31/07/15 11:52, David Vrabel wrote:
> > > On 31/07/15 11:45, David Vrabel wrote:
> > > > Instead of cpu_relax() while spinning and observing the ticket 
> > > > head,
> > > > introduce spin_relax() which executes a WFE instruction.  After the
> > > > ticket head is changed call spin_signal() to execute an SVE
> > > > instruction to wake any spinners.
> > > > 
> > > > This should improve power consumption when locks are contented and
> > > > spinning.
> > > > 
> > > > Signed-off-by: David Vrabel <david.vrabel@citrix.com>
> > > > ---
> > > > I've not tested this but it looks straight-forward...
> > > > ---
> > > >  xen/common/spinlock.c          | 5 +++--
> > > >  xen/include/asm-arm/spinlock.h | 3 ++-
> > > >  xen/include/asm-x86/spinlock.h | 3 +++
> > > >  3 files changed, 8 insertions(+), 3 deletions(-)
> > > > 
> > > > diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
> > > > index 29149d1..fc3e8e7 100644
> > > > --- a/xen/common/spinlock.c
> > > > +++ b/xen/common/spinlock.c
> > > > @@ -141,7 +141,7 @@ void _spin_lock(spinlock_t *lock)
> > > >      while ( tickets.tail != observe_head(&lock->tickets) )
> > > >      {
> > > >          LOCK_PROFILE_BLOCK;
> > > > -        cpu_relax();
> > > > +        spin_relax();
> > > >      }
> > > >      LOCK_PROFILE_GOT;
> > > >      preempt_disable();
> > > > @@ -170,6 +170,7 @@ void _spin_unlock(spinlock_t *lock)
> > > >      preempt_enable();
> > > >      LOCK_PROFILE_REL;
> > > >      add_sized(&lock->tickets.head, 1);
> > > > +    spin_signal();
> > > 
> > > It occurs to me that perhaps there should be a barrier between the
> > > add_sized() and the spin_signal() so the update value is visible 
> > > before
> > > we signal (otherwise the spinner may be woken and observe the old 
> > > value
> > > and WFE again).
> > 
> > sev is usually precede by dsb to ensure that all the instructions 
> > before
> > as completed before executing the sev.
> 
> Yes, a dsb() is required. This being common code, we could use wmb().

IMHO it should be in spin_{relax,signal} if it is needed.

Ian.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCHv1] arm: reduce power use by contented spin locks with WFE/SEV
  2015-07-31 11:41     ` Stefano Stabellini
  2015-07-31 12:39       ` Ian Campbell
@ 2015-07-31 12:43       ` David Vrabel
  1 sibling, 0 replies; 7+ messages in thread
From: David Vrabel @ 2015-07-31 12:43 UTC (permalink / raw)
  To: Stefano Stabellini, Julien Grall
  Cc: Ian Campbell, Andrew Cooper, Tim Deegan, David Vrabel,
	Jan Beulich, xen-devel

On 31/07/15 12:41, Stefano Stabellini wrote:
> On Fri, 31 Jul 2015, Julien Grall wrote:
>> Hi David,
>>
>> On 31/07/15 11:52, David Vrabel wrote:
>>> On 31/07/15 11:45, David Vrabel wrote:
>>>> Instead of cpu_relax() while spinning and observing the ticket head,
>>>> introduce spin_relax() which executes a WFE instruction.  After the
>>>> ticket head is changed call spin_signal() to execute an SVE
>>>> instruction to wake any spinners.
>>>>
>>>> This should improve power consumption when locks are contented and
>>>> spinning.
>>>>
>>>> Signed-off-by: David Vrabel <david.vrabel@citrix.com>
>>>> ---
>>>> I've not tested this but it looks straight-forward...
>>>> ---
>>>>  xen/common/spinlock.c          | 5 +++--
>>>>  xen/include/asm-arm/spinlock.h | 3 ++-
>>>>  xen/include/asm-x86/spinlock.h | 3 +++
>>>>  3 files changed, 8 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
>>>> index 29149d1..fc3e8e7 100644
>>>> --- a/xen/common/spinlock.c
>>>> +++ b/xen/common/spinlock.c
>>>> @@ -141,7 +141,7 @@ void _spin_lock(spinlock_t *lock)
>>>>      while ( tickets.tail != observe_head(&lock->tickets) )
>>>>      {
>>>>          LOCK_PROFILE_BLOCK;
>>>> -        cpu_relax();
>>>> +        spin_relax();
>>>>      }
>>>>      LOCK_PROFILE_GOT;
>>>>      preempt_disable();
>>>> @@ -170,6 +170,7 @@ void _spin_unlock(spinlock_t *lock)
>>>>      preempt_enable();
>>>>      LOCK_PROFILE_REL;
>>>>      add_sized(&lock->tickets.head, 1);
>>>> +    spin_signal();
>>>
>>> It occurs to me that perhaps there should be a barrier between the
>>> add_sized() and the spin_signal() so the update value is visible before
>>> we signal (otherwise the spinner may be woken and observe the old value
>>> and WFE again).
>>
>> sev is usually precede by dsb to ensure that all the instructions before
>> as completed before executing the sev.
> 
> Yes, a dsb() is required. This being common code, we could use wmb().

You should put the barrier required for the SEV in spin_signal() so an
additional barrier is not required on other archs.

David

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2015-07-31 12:43 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-07-31 10:45 [PATCHv1] arm: reduce power use by contented spin locks with WFE/SEV David Vrabel
2015-07-31 10:47 ` Andrew Cooper
2015-07-31 10:52 ` David Vrabel
2015-07-31 11:01   ` Julien Grall
2015-07-31 11:41     ` Stefano Stabellini
2015-07-31 12:39       ` Ian Campbell
2015-07-31 12:43       ` David Vrabel

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.