[PATCH v2] arm: reduce power use by contented spin locks with WFE/SEV

xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed

From: Ian Campbell <ian.campbell@citrix.com>
To: stefano.stabellini@eu.citrix.com, julien.grall@citrix.com,
	xen-devel@lists.xen.org, jbeulich@suse.com,
	andrew.cooper3@citrix.com
Cc: wei.liu2@citrix.com, David Vrabel <david.vrabel@citrix.com>,
	Ian Campbell <ian.campbell@citrix.com>
Subject: [PATCH v2] arm: reduce power use by contented spin locks with WFE/SEV
Date: Mon, 3 Aug 2015 12:29:19 +0100	[thread overview]
Message-ID: <1438601359-2576-1-git-send-email-ian.campbell@citrix.com> (raw)

From: David Vrabel <david.vrabel@citrix.com>

Instead of cpu_relax() while spinning and observing the ticket head,
introduce arch_lock_relax() which executes a WFE instruction.  After
the ticket head is changed call arch_lock_signal() to execute an SEV
instruction (with the required DSB first) to wake any spinners.

This should improve power consumption when locks are contented and
spinning.

For consistency also move arch_lock_(acquire|release)_barrier to
asm/spinlock.h.

Booted the result on arm32 (Midway) and arm64 (Mustang). Build test
only on amd64.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
[ijc: add barrier, rename as arch_lock_*, move arch_lock_*_barrier, test]
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
---
v2 (ijc):
  Add dsb(ishst) to spin_relax.
  s/spin_(relax|signal)/arch_lock_\1/
  Move arch_lock_(acquire|release)_barrier to asm/spinlock.h
      (dropped Andy's Reviewed-by due to this change)

In principal the SEV could be made unnecessary on arm64, but this
requires a new hook before the wait loop as well as changing
observe_head and _spin_unlock to use the Acquire/Release instructions
instead of the non-atomic loads and stores used today, which is a lot
more refactoring of the generic code than I think we can be bothered
with at this stage.

4.6: I'm in two minds about this, the lack of WFE in the ticket
spinlocks is not a regression (the old locks lacked them as well,
oops!). On the otherhand spinning like this isn't good. I think
overall I'm inclined to say this should wait for 4.7 but be a
candidate for backport to 4.6.1.
---
 xen/common/spinlock.c          |  5 +++--
 xen/include/asm-arm/spinlock.h |  9 ++++++++-
 xen/include/asm-arm/system.h   |  3 ---
 xen/include/asm-x86/spinlock.h | 14 ++++++++++++++
 xen/include/asm-x86/system.h   | 11 -----------
 5 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 29149d1..7f89694 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -141,7 +141,7 @@ void _spin_lock(spinlock_t *lock)
     while ( tickets.tail != observe_head(&lock->tickets) )
     {
         LOCK_PROFILE_BLOCK;
-        cpu_relax();
+        arch_lock_relax();
     }
     LOCK_PROFILE_GOT;
     preempt_disable();
@@ -170,6 +170,7 @@ void _spin_unlock(spinlock_t *lock)
     preempt_enable();
     LOCK_PROFILE_REL;
     add_sized(&lock->tickets.head, 1);
+    arch_lock_signal();
 }
 
 void _spin_unlock_irq(spinlock_t *lock)
@@ -228,7 +229,7 @@ void _spin_barrier(spinlock_t *lock)
     if ( sample.head != sample.tail )
     {
         while ( observe_head(&lock->tickets) == sample.head )
-            cpu_relax();
+            arch_lock_relax();
 #ifdef LOCK_PROFILE
         if ( lock->profile )
         {
diff --git a/xen/include/asm-arm/spinlock.h b/xen/include/asm-arm/spinlock.h
index 81955d1..8cdf9e1 100644
--- a/xen/include/asm-arm/spinlock.h
+++ b/xen/include/asm-arm/spinlock.h
@@ -1,6 +1,13 @@
 #ifndef __ASM_SPINLOCK_H
 #define __ASM_SPINLOCK_H
 
-/* Nothing ARM specific. */
+#define arch_lock_acquire_barrier() smp_mb()
+#define arch_lock_release_barrier() smp_mb()
+
+#define arch_lock_relax() wfe()
+#define arch_lock_signal() do { \
+    dsb(ishst);                 \
+    sev();                      \
+} while(0)
 
 #endif /* __ASM_SPINLOCK_H */
diff --git a/xen/include/asm-arm/system.h b/xen/include/asm-arm/system.h
index f0e222f..2eb96e8 100644
--- a/xen/include/asm-arm/system.h
+++ b/xen/include/asm-arm/system.h
@@ -53,9 +53,6 @@
 
 #define arch_fetch_and_add(x, v) __sync_fetch_and_add(x, v)
 
-#define arch_lock_acquire_barrier() smp_mb()
-#define arch_lock_release_barrier() smp_mb()
-
 extern struct vcpu *__context_switch(struct vcpu *prev, struct vcpu *next);
 
 #endif
diff --git a/xen/include/asm-x86/spinlock.h b/xen/include/asm-x86/spinlock.h
index 7d69e75..70a85af 100644
--- a/xen/include/asm-x86/spinlock.h
+++ b/xen/include/asm-x86/spinlock.h
@@ -4,4 +4,18 @@
 #define _raw_read_unlock(l) \
     asm volatile ( "lock; dec%z0 %0" : "+m" ((l)->lock) :: "memory" )
 
+/*
+ * On x86 the only reordering is of reads with older writes.  In the
+ * lock case, the read in observe_head() can only be reordered with
+ * writes that precede it, and moving a write _into_ a locked section
+ * is OK.  In the release case, the write in add_sized() can only be
+ * reordered with reads that follow it, and hoisting a read _into_ a
+ * locked region is OK.
+ */
+#define arch_lock_acquire_barrier() barrier()
+#define arch_lock_release_barrier() barrier()
+
+#define arch_lock_relax() cpu_relax()
+#define arch_lock_signal()
+
 #endif /* __ASM_SPINLOCK_H */
diff --git a/xen/include/asm-x86/system.h b/xen/include/asm-x86/system.h
index 25a6a2a..9fb70f5 100644
--- a/xen/include/asm-x86/system.h
+++ b/xen/include/asm-x86/system.h
@@ -185,17 +185,6 @@ static always_inline unsigned long __xadd(
 #define set_mb(var, value) do { xchg(&var, value); } while (0)
 #define set_wmb(var, value) do { var = value; wmb(); } while (0)
 
-/*
- * On x86 the only reordering is of reads with older writes.  In the
- * lock case, the read in observe_head() can only be reordered with
- * writes that precede it, and moving a write _into_ a locked section
- * is OK.  In the release case, the write in add_sized() can only be
- * reordered with reads that follow it, and hoisting a read _into_ a
- * locked region is OK.
- */
-#define arch_lock_acquire_barrier() barrier()
-#define arch_lock_release_barrier() barrier()
-
 #define local_irq_disable()     asm volatile ( "cli" : : : "memory" )
 #define local_irq_enable()      asm volatile ( "sti" : : : "memory" )
 
-- 
2.1.4

next             reply	other threads:[~2015-08-03 11:29 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-03 11:29 Ian Campbell [this message]
2015-08-03 11:51 ` [PATCH v2] arm: reduce power use by contented spin locks with WFE/SEV Stefano Stabellini
2015-08-03 11:58   ` Ian Campbell
2015-08-03 12:00 ` Andrew Cooper
2015-08-11 15:07 ` Jan Beulich
2015-09-15 11:17   ` Ian Campbell
2015-09-15 10:28 ` Ian Campbell
2015-09-15 10:39   ` Stefano Stabellini
2015-09-15 11:17     ` Ian Campbell

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:29149d1 dfblob:7f89694 dfblob:81955d1 dfblob:8cdf9e1
dfblob:f0e222f dfblob:2eb96e8 dfblob:7d69e75 dfblob:70a85af
dfblob:25a6a2a dfblob:9fb70f5 )
 OR (
bs:"[PATCH v2] arm: reduce power use by contented spin locks with WFE/SEV" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1438601359-2576-1-git-send-email-ian.campbell@citrix.com \
    --to=ian.campbell@citrix.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=david.vrabel@citrix.com \
    --cc=jbeulich@suse.com \
    --cc=julien.grall@citrix.com \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).