virtualization.lists.linux-foundation.org archive mirror
 help / color / mirror / Atom feed
From: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
To: "H. Peter Anvin" <hpa@zytor.com>, Ingo Molnar <mingo@elte.hu>
Cc: the arch/x86 maintainers <x86@kernel.org>,
	KVM <kvm@vger.kernel.org>,
	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Stefano Stabellini <stefano.stabellini@eu.citrix.com>,
	LKML <linux-kernel@vger.kernel.org>,
	Virtualization <virtualization@lists.linux-foundation.org>,
	Andi Kleen <andi@firstfloor.org>, Avi Kivity <avi@redhat.com>,
	Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>,
	Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>,
	Attilio Rao <attilio.rao@citrix.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Xen Devel <xen-devel@lists.xensource.com>,
	Stephan Diestelhorst <stephan.diestelhorst@amd.com>
Subject: [PATCH RFC V6 1/11] x86/spinlock: replace pv spinlocks with pv ticketlocks
Date: Wed, 21 Mar 2012 15:50:52 +0530	[thread overview]
Message-ID: <20120321102052.473.40193.sendpatchset@codeblue.in.ibm.com> (raw)
In-Reply-To: <20120321102041.473.61069.sendpatchset@codeblue.in.ibm.com>

From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

Rather than outright replacing the entire spinlock implementation in
order to paravirtualize it, keep the ticket lock implementation but add
a couple of pvops hooks on the slow patch (long spin on lock, unlocking
a contended lock).

Ticket locks have a number of nice properties, but they also have some
surprising behaviours in virtual environments.  They enforce a strict
FIFO ordering on cpus trying to take a lock; however, if the hypervisor
scheduler does not schedule the cpus in the correct order, the system can
waste a huge amount of time spinning until the next cpu can take the lock.

(See Thomas Friebel's talk "Prevent Guests from Spinning Around"
http://www.xen.org/files/xensummitboston08/LHP.pdf for more details.)

To address this, we add two hooks:
 - __ticket_spin_lock which is called after the cpu has been
   spinning on the lock for a significant number of iterations but has
   failed to take the lock (presumably because the cpu holding the lock
   has been descheduled).  The lock_spinning pvop is expected to block
   the cpu until it has been kicked by the current lock holder.
 - __ticket_spin_unlock, which on releasing a contended lock
   (there are more cpus with tail tickets), it looks to see if the next
   cpu is blocked and wakes it if so.

When compiled with CONFIG_PARAVIRT_SPINLOCKS disabled, a set of stub
functions causes all the extra code to go away.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
---
 arch/x86/include/asm/paravirt.h       |   32 ++++----------------
 arch/x86/include/asm/paravirt_types.h |   10 ++----
 arch/x86/include/asm/spinlock.h       |   53 ++++++++++++++++++++++++++------
 arch/x86/include/asm/spinlock_types.h |    4 --
 arch/x86/kernel/paravirt-spinlocks.c  |   15 +--------
 arch/x86/xen/spinlock.c               |    8 ++++-
 6 files changed, 61 insertions(+), 61 deletions(-)
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a7d2db9..0fa4553 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -750,36 +750,16 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
 
 #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
 
-static inline int arch_spin_is_locked(struct arch_spinlock *lock)
+static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
+							__ticket_t ticket)
 {
-	return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock);
+	PVOP_VCALL2(pv_lock_ops.lock_spinning, lock, ticket);
 }
 
-static inline int arch_spin_is_contended(struct arch_spinlock *lock)
+static __always_inline void ____ticket_unlock_kick(struct arch_spinlock *lock,
+							__ticket_t ticket)
 {
-	return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
-}
-#define arch_spin_is_contended	arch_spin_is_contended
-
-static __always_inline void arch_spin_lock(struct arch_spinlock *lock)
-{
-	PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
-}
-
-static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock,
-						  unsigned long flags)
-{
-	PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
-}
-
-static __always_inline int arch_spin_trylock(struct arch_spinlock *lock)
-{
-	return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
-}
-
-static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
-{
-	PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
+	PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
 }
 
 #endif
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 8e8b9a4..005e24d 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -327,13 +327,11 @@ struct pv_mmu_ops {
 };
 
 struct arch_spinlock;
+#include <asm/spinlock_types.h>
+
 struct pv_lock_ops {
-	int (*spin_is_locked)(struct arch_spinlock *lock);
-	int (*spin_is_contended)(struct arch_spinlock *lock);
-	void (*spin_lock)(struct arch_spinlock *lock);
-	void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags);
-	int (*spin_trylock)(struct arch_spinlock *lock);
-	void (*spin_unlock)(struct arch_spinlock *lock);
+	void (*lock_spinning)(struct arch_spinlock *lock, __ticket_t ticket);
+	void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
 };
 
 /* This contains all the paravirt structures: we get a convenient
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index a82c2bf..7e66b85 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -37,6 +37,35 @@
 # define UNLOCK_LOCK_PREFIX
 #endif
 
+/* How long a lock should spin before we consider blocking */
+#define SPIN_THRESHOLD	(1 << 11)
+
+#ifndef CONFIG_PARAVIRT_SPINLOCKS
+
+static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
+							__ticket_t ticket)
+{
+}
+
+static __always_inline void ____ticket_unlock_kick(struct arch_spinlock *lock,
+							 __ticket_t ticket)
+{
+}
+
+#endif	/* CONFIG_PARAVIRT_SPINLOCKS */
+
+
+/*
+ * If a spinlock has someone waiting on it, then kick the appropriate
+ * waiting cpu.
+ */
+static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
+							__ticket_t next)
+{
+	if (unlikely(lock->tickets.tail != next))
+		____ticket_unlock_kick(lock, next);
+}
+
 /*
  * Ticket locks are conceptually two parts, one indicating the current head of
  * the queue, and the other indicating the current tail. The lock is acquired
@@ -50,19 +79,24 @@
  * in the high part, because a wide xadd increment of the low part would carry
  * up and contaminate the high part.
  */
-static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
+static __always_inline void __ticket_spin_lock(struct arch_spinlock *lock)
 {
 	register struct __raw_tickets inc = { .tail = 1 };
 
 	inc = xadd(&lock->tickets, inc);
 
 	for (;;) {
-		if (inc.head == inc.tail)
-			break;
-		cpu_relax();
-		inc.head = ACCESS_ONCE(lock->tickets.head);
+		unsigned count = SPIN_THRESHOLD;
+
+		do {
+			if (inc.head == inc.tail)
+				goto out;
+			cpu_relax();
+			inc.head = ACCESS_ONCE(lock->tickets.head);
+		} while (--count);
+		__ticket_lock_spinning(lock, inc.tail);
 	}
-	barrier();		/* make sure nothing creeps before the lock is taken */
+out:	barrier();	/* make sure nothing creeps before the lock is taken */
 }
 
 static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
@@ -81,7 +115,10 @@ static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
 
 static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
 {
+	__ticket_t next = lock->tickets.head + 1;
+
 	__add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX);
+	__ticket_unlock_kick(lock, next);
 }
 
 static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
@@ -98,8 +135,6 @@ static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
 	return ((tmp.tail - tmp.head) & TICKET_MASK) > 1;
 }
 
-#ifndef CONFIG_PARAVIRT_SPINLOCKS
-
 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
 	return __ticket_spin_is_locked(lock);
@@ -132,8 +167,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
 	arch_spin_lock(lock);
 }
 
-#endif	/* CONFIG_PARAVIRT_SPINLOCKS */
-
 static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
 	while (arch_spin_is_locked(lock))
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 8ebd5df..dbe223d 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -1,10 +1,6 @@
 #ifndef _ASM_X86_SPINLOCK_TYPES_H
 #define _ASM_X86_SPINLOCK_TYPES_H
 
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
-#endif
-
 #include <linux/types.h>
 
 #if (CONFIG_NR_CPUS < 256)
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 676b8c7..c2e010e 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -7,21 +7,10 @@
 
 #include <asm/paravirt.h>
 
-static inline void
-default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
-{
-	arch_spin_lock(lock);
-}
-
 struct pv_lock_ops pv_lock_ops = {
 #ifdef CONFIG_SMP
-	.spin_is_locked = __ticket_spin_is_locked,
-	.spin_is_contended = __ticket_spin_is_contended,
-
-	.spin_lock = __ticket_spin_lock,
-	.spin_lock_flags = default_spin_lock_flags,
-	.spin_trylock = __ticket_spin_trylock,
-	.spin_unlock = __ticket_spin_unlock,
+	.lock_spinning = paravirt_nop,
+	.unlock_kick = paravirt_nop,
 #endif
 };
 EXPORT_SYMBOL(pv_lock_ops);
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index d69cc6c..1258514 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -138,6 +138,9 @@ struct xen_spinlock {
 	xen_spinners_t spinners;	/* count of waiting cpus */
 };
 
+static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
+
+#if 0
 static int xen_spin_is_locked(struct arch_spinlock *lock)
 {
 	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
@@ -165,7 +168,6 @@ static int xen_spin_trylock(struct arch_spinlock *lock)
 	return old == 0;
 }
 
-static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
 static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
 
 /*
@@ -353,6 +355,7 @@ static void xen_spin_unlock(struct arch_spinlock *lock)
 	if (unlikely(xl->spinners))
 		xen_spin_unlock_slow(xl);
 }
+#endif
 
 static irqreturn_t dummy_handler(int irq, void *dev_id)
 {
@@ -389,13 +392,14 @@ void xen_uninit_lock_cpu(int cpu)
 void __init xen_init_spinlocks(void)
 {
 	BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t));
-
+#if 0
 	pv_lock_ops.spin_is_locked = xen_spin_is_locked;
 	pv_lock_ops.spin_is_contended = xen_spin_is_contended;
 	pv_lock_ops.spin_lock = xen_spin_lock;
 	pv_lock_ops.spin_lock_flags = xen_spin_lock_flags;
 	pv_lock_ops.spin_trylock = xen_spin_trylock;
 	pv_lock_ops.spin_unlock = xen_spin_unlock;
+#endif
 }
 
 #ifdef CONFIG_XEN_DEBUG_FS

  reply	other threads:[~2012-03-21 10:20 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-03-21 10:20 [PATCH RFC V6 0/11] Paravirtualized ticketlocks Raghavendra K T
2012-03-21 10:20 ` Raghavendra K T [this message]
2012-03-21 13:04   ` [PATCH RFC V6 1/11] x86/spinlock: replace pv spinlocks with pv ticketlocks Attilio Rao
     [not found]   ` <4F69D1D9.9080107@citrix.com>
2012-03-21 13:22     ` Stephan Diestelhorst
     [not found]     ` <2425963.NBpIGX9T40@chlor>
2012-03-21 13:49       ` Attilio Rao
     [not found]       ` <4F69DC68.6080200@citrix.com>
2012-03-21 14:25         ` Stephan Diestelhorst
     [not found]         ` <1363312.nixp29LUbv@chlor>
2012-03-21 14:33           ` Attilio Rao
     [not found]           ` <4F69E6BB.508@citrix.com>
2012-03-21 14:49             ` Raghavendra K T
2012-03-21 10:21 ` [PATCH RFC V6 2/11] x86/ticketlock: don't inline _spin_unlock when using paravirt spinlocks Raghavendra K T
2012-03-21 10:21 ` [PATCH RFC V6 3/11] x86/ticketlock: collapse a layer of functions Raghavendra K T
2012-03-21 10:21 ` [PATCH RFC V6 4/11] xen: defer spinlock setup until boot CPU setup Raghavendra K T
2012-03-21 10:21 ` [PATCH RFC V6 5/11] xen/pvticketlock: Xen implementation for PV ticket locks Raghavendra K T
2012-03-21 10:21 ` [PATCH RFC V6 6/11] xen/pvticketlocks: add xen_nopvspin parameter to disable xen pv ticketlocks Raghavendra K T
2012-03-21 10:21 ` [PATCH RFC V6 7/11] x86/pvticketlock: use callee-save for lock_spinning Raghavendra K T
2012-03-21 10:22 ` [PATCH RFC V6 8/11] x86/pvticketlock: when paravirtualizing ticket locks, increment by 2 Raghavendra K T
2012-03-21 10:22 ` [PATCH RFC V6 9/11] x86/ticketlock: add slowpath logic Raghavendra K T
2012-03-21 10:22 ` [PATCH RFC V6 10/11] xen/pvticketlock: allow interrupts to be enabled while blocking Raghavendra K T
2012-03-21 10:22 ` [PATCH RFC V6 11/11] xen: enable PV ticketlocks on HVM Xen Raghavendra K T
     [not found] ` <20120321102107.473.89777.sendpatchset@codeblue.in.ibm.com>
2012-03-21 17:13   ` [PATCH RFC V6 2/11] x86/ticketlock: don't inline _spin_unlock when using paravirt spinlocks Linus Torvalds
2012-03-22 10:06     ` Raghavendra K T
2012-03-26 14:25 ` [PATCH RFC V6 0/11] Paravirtualized ticketlocks Avi Kivity
2012-03-27  7:37   ` Raghavendra K T
     [not found]   ` <4F716E31.3000803@linux.vnet.ibm.com>
2012-03-28 16:09     ` Alan Meadows
2012-03-28 18:21       ` Raghavendra K T
2012-03-29  9:58         ` Avi Kivity
2012-03-29 18:03           ` Raghavendra K T
2012-03-30 10:07             ` Raghavendra K T
     [not found]             ` <4F7585EE.7060203@linux.vnet.ibm.com>
2012-04-01 13:18               ` Avi Kivity
2012-04-01 13:48                 ` Raghavendra K T
2012-04-01 13:53                   ` Avi Kivity
2012-04-01 13:56                     ` Raghavendra K T
2012-04-02  9:51                     ` Raghavendra K T
2012-04-02  9:51                     ` Raghavendra K T
2012-04-05  8:43                     ` Raghavendra K T
     [not found]                     ` <4F7976B6.5050000@linux.vnet.ibm.com>
2012-04-02 12:15                       ` Raghavendra K T
2012-04-05  9:01                       ` Avi Kivity
2012-04-05 10:40                         ` Raghavendra K T
2012-03-30 20:26 ` H. Peter Anvin
2012-03-30 22:07   ` Thomas Gleixner
2012-03-30 22:18     ` Andi Kleen
2012-03-30 23:04       ` Thomas Gleixner
2012-03-31  0:08         ` Andi Kleen
2012-03-31  8:11           ` Ingo Molnar
2012-03-31  4:07     ` Srivatsa Vaddagiri
2012-03-31  4:09       ` Srivatsa Vaddagiri
2012-04-16 15:44       ` Konrad Rzeszutek Wilk
2012-04-16 16:36         ` [Xen-devel] " Ian Campbell
2012-04-16 16:42           ` Jeremy Fitzhardinge
2012-04-17  2:54           ` Srivatsa Vaddagiri
2012-04-01 13:31     ` Avi Kivity
2012-04-02  9:26       ` Thomas Gleixner
2012-04-05  9:15         ` Avi Kivity
2012-04-02  4:36     ` [Xen-devel] " Juergen Gross
2012-04-02  9:42     ` Ian Campbell
2012-04-11  1:29     ` Marcelo Tosatti
2012-03-31  0:51   ` Raghavendra K T

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120321102052.473.40193.sendpatchset@codeblue.in.ibm.com \
    --to=raghavendra.kt@linux.vnet.ibm.com \
    --cc=andi@firstfloor.org \
    --cc=attilio.rao@citrix.com \
    --cc=avi@redhat.com \
    --cc=hpa@zytor.com \
    --cc=jeremy.fitzhardinge@citrix.com \
    --cc=konrad.wilk@oracle.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=peterz@infradead.org \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=stephan.diestelhorst@amd.com \
    --cc=torvalds@linux-foundation.org \
    --cc=vatsa@linux.vnet.ibm.com \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=x86@kernel.org \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).