xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Jeremy Fitzhardinge <jeremy@goop.org>
To: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Cc: Nick Piggin <npiggin@suse.de>,
	Peter Zijlstra <peterz@infradead.org>,
	Jan Beulich <JBeulich@novell.com>, Avi Kivity <avi@redhat.com>,
	Xen-devel <xen-devel@lists.xensource.com>
Subject: [PATCH RFC 09/12] xen/pvticketlock: Xen implementation for PV ticket locks
Date: Fri, 16 Jul 2010 18:03:04 -0700 (PDT)	[thread overview]
Message-ID: <32e63cc978ec4b3f36c7f641ce48b3d86aed22ed.1279328276.git.jeremy.fitzhardinge@citrix.com> (raw)
In-Reply-To: <cover.1279328276.git.jeremy.fitzhardinge@citrix.com>

Replace the old Xen implementation of PV spinlocks with and implementation
of xen_lock_spinning and xen_unlock_kick.

xen_lock_spinning simply registers the cpu in its entry in lock_waiting,
adds itself to the waiting_cpus set, and blocks on an event channel
until the channel becomes pending.

xen_unlock_kick searches the cpus in waiting_cpus looking for the one
which next wants this lock with the next ticket, if any.  If found,
it kicks it by making its event channel pending, which wakes it up.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/xen/spinlock.c |  281 ++++++-----------------------------------------
 1 files changed, 36 insertions(+), 245 deletions(-)

diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 1cf5705..e60d5f1 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -18,32 +18,21 @@
 #ifdef CONFIG_XEN_DEBUG_FS
 static struct xen_spinlock_stats
 {
-	u64 taken;
 	u32 taken_slow;
-	u32 taken_slow_nested;
 	u32 taken_slow_pickup;
 	u32 taken_slow_spurious;
-	u32 taken_slow_irqenable;
 
-	u64 released;
 	u32 released_slow;
 	u32 released_slow_kicked;
 
 #define HISTO_BUCKETS	30
-	u32 histo_spin_total[HISTO_BUCKETS+1];
-	u32 histo_spin_spinning[HISTO_BUCKETS+1];
 	u32 histo_spin_blocked[HISTO_BUCKETS+1];
 
-	u64 time_total;
-	u64 time_spinning;
 	u64 time_blocked;
 } spinlock_stats;
 
 static u8 zero_stats;
 
-static unsigned lock_timeout = 1 << 10;
-#define TIMEOUT lock_timeout
-
 static inline void check_zero(void)
 {
 	if (unlikely(zero_stats)) {
@@ -72,22 +61,6 @@ static void __spin_time_accum(u64 delta, u32 *array)
 		array[HISTO_BUCKETS]++;
 }
 
-static inline void spin_time_accum_spinning(u64 start)
-{
-	u32 delta = xen_clocksource_read() - start;
-
-	__spin_time_accum(delta, spinlock_stats.histo_spin_spinning);
-	spinlock_stats.time_spinning += delta;
-}
-
-static inline void spin_time_accum_total(u64 start)
-{
-	u32 delta = xen_clocksource_read() - start;
-
-	__spin_time_accum(delta, spinlock_stats.histo_spin_total);
-	spinlock_stats.time_total += delta;
-}
-
 static inline void spin_time_accum_blocked(u64 start)
 {
 	u32 delta = xen_clocksource_read() - start;
@@ -104,214 +77,76 @@ static inline u64 spin_time_start(void)
 	return 0;
 }
 
-static inline void spin_time_accum_total(u64 start)
-{
-}
-static inline void spin_time_accum_spinning(u64 start)
-{
-}
 static inline void spin_time_accum_blocked(u64 start)
 {
 }
 #endif  /* CONFIG_XEN_DEBUG_FS */
 
-struct xen_spinlock {
-	unsigned char lock;		/* 0 -> free; 1 -> locked */
-	unsigned short spinners;	/* count of waiting cpus */
+struct xen_lock_waiting {
+	struct arch_spinlock *lock;
+	__ticket_t want;
 };
 
 static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
+static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
+static cpumask_t waiting_cpus;
 
-#if 0
-static int xen_spin_is_locked(struct arch_spinlock *lock)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-
-	return xl->lock != 0;
-}
-
-static int xen_spin_is_contended(struct arch_spinlock *lock)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-
-	/* Not strictly true; this is only the count of contended
-	   lock-takers entering the slow path. */
-	return xl->spinners != 0;
-}
-
-static int xen_spin_trylock(struct arch_spinlock *lock)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-	u8 old = 1;
-
-	asm("xchgb %b0,%1"
-	    : "+q" (old), "+m" (xl->lock) : : "memory");
-
-	return old == 0;
-}
-
-static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
-
-/*
- * Mark a cpu as interested in a lock.  Returns the CPU's previous
- * lock of interest, in case we got preempted by an interrupt.
- */
-static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
-{
-	struct xen_spinlock *prev;
-
-	prev = __get_cpu_var(lock_spinners);
-	__get_cpu_var(lock_spinners) = xl;
-
-	wmb();			/* set lock of interest before count */
-
-	asm(LOCK_PREFIX " incw %0"
-	    : "+m" (xl->spinners) : : "memory");
-
-	return prev;
-}
-
-/*
- * Mark a cpu as no longer interested in a lock.  Restores previous
- * lock of interest (NULL for none).
- */
-static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev)
-{
-	asm(LOCK_PREFIX " decw %0"
-	    : "+m" (xl->spinners) : : "memory");
-	wmb();			/* decrement count before restoring lock */
-	__get_cpu_var(lock_spinners) = prev;
-}
-
-static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable)
+static void xen_lock_spinning(struct arch_spinlock *lock, unsigned want)
 {
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-	struct xen_spinlock *prev;
 	int irq = __get_cpu_var(lock_kicker_irq);
-	int ret;
+	struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting);
+	int cpu = smp_processor_id();
 	u64 start;
 
 	/* If kicker interrupts not initialized yet, just spin */
 	if (irq == -1)
-		return 0;
+		return;
 
 	start = spin_time_start();
 
-	/* announce we're spinning */
-	prev = spinning_lock(xl);
+	w->want = want;
+	w->lock = lock;
+
+	/* This uses set_bit, which atomic and therefore a barrier */
+	cpumask_set_cpu(cpu, &waiting_cpus);
 
 	ADD_STATS(taken_slow, 1);
-	ADD_STATS(taken_slow_nested, prev != NULL);
-
-	do {
-		unsigned long flags;
-
-		/* clear pending */
-		xen_clear_irq_pending(irq);
-
-		/* check again make sure it didn't become free while
-		   we weren't looking  */
-		ret = xen_spin_trylock(lock);
-		if (ret) {
-			ADD_STATS(taken_slow_pickup, 1);
-
-			/*
-			 * If we interrupted another spinlock while it
-			 * was blocking, make sure it doesn't block
-			 * without rechecking the lock.
-			 */
-			if (prev != NULL)
-				xen_set_irq_pending(irq);
-			goto out;
-		}
 
-		flags = __raw_local_save_flags();
-		if (irq_enable) {
-			ADD_STATS(taken_slow_irqenable, 1);
-			raw_local_irq_enable();
-		}
+	/* clear pending */
+	xen_clear_irq_pending(irq);
 
-		/*
-		 * Block until irq becomes pending.  If we're
-		 * interrupted at this point (after the trylock but
-		 * before entering the block), then the nested lock
-		 * handler guarantees that the irq will be left
-		 * pending if there's any chance the lock became free;
-		 * xen_poll_irq() returns immediately if the irq is
-		 * pending.
-		 */
-		xen_poll_irq(irq);
+	/* Only check lock once pending cleared */
+	barrier();
 
-		raw_local_irq_restore(flags);
+	/* check again make sure it didn't become free while
+	   we weren't looking  */
+	if (ACCESS_ONCE(lock->tickets.head) == want) {
+		ADD_STATS(taken_slow_pickup, 1);
+		goto out;
+	}
 
-		ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
-	} while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
+	/* Block until irq becomes pending (or perhaps a spurious wakeup) */
+	xen_poll_irq(irq);
+	ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
 
 	kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
 
 out:
-	unspinning_lock(xl, prev);
+	cpumask_clear_cpu(cpu, &waiting_cpus);
+	w->lock = NULL;
 	spin_time_accum_blocked(start);
-
-	return ret;
 }
 
-static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-	unsigned timeout;
-	u8 oldval;
-	u64 start_spin;
-
-	ADD_STATS(taken, 1);
-
-	start_spin = spin_time_start();
-
-	do {
-		u64 start_spin_fast = spin_time_start();
-
-		timeout = TIMEOUT;
-
-		asm("1: xchgb %1,%0\n"
-		    "   testb %1,%1\n"
-		    "   jz 3f\n"
-		    "2: rep;nop\n"
-		    "   cmpb $0,%0\n"
-		    "   je 1b\n"
-		    "   dec %2\n"
-		    "   jnz 2b\n"
-		    "3:\n"
-		    : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
-		    : "1" (1)
-		    : "memory");
-
-		spin_time_accum_spinning(start_spin_fast);
-
-	} while (unlikely(oldval != 0 &&
-			  (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable))));
-
-	spin_time_accum_total(start_spin);
-}
-
-static void xen_spin_lock(struct arch_spinlock *lock)
-{
-	__xen_spin_lock(lock, false);
-}
-
-static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags)
-{
-	__xen_spin_lock(lock, !raw_irqs_disabled_flags(flags));
-}
-
-static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
+static void xen_unlock_kick(struct arch_spinlock *lock, unsigned next)
 {
 	int cpu;
 
 	ADD_STATS(released_slow, 1);
 
-	for_each_online_cpu(cpu) {
-		/* XXX should mix up next cpu selection */
-		if (per_cpu(lock_spinners, cpu) == xl) {
+	for_each_cpu(cpu, &waiting_cpus) {
+		const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu);
+
+		if (w->lock == lock && w->want == next) {
 			ADD_STATS(released_slow_kicked, 1);
 			xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
 			break;
@@ -319,28 +154,6 @@ static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
 	}
 }
 
-static void xen_spin_unlock(struct arch_spinlock *lock)
-{
-	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
-
-	ADD_STATS(released, 1);
-
-	smp_wmb();		/* make sure no writes get moved after unlock */
-	xl->lock = 0;		/* release lock */
-
-	/*
-	 * Make sure unlock happens before checking for waiting
-	 * spinners.  We need a strong barrier to enforce the
-	 * write-read ordering to different memory locations, as the
-	 * CPU makes no implied guarantees about their ordering.
-	 */
-	mb();
-
-	if (unlikely(xl->spinners))
-		xen_spin_unlock_slow(xl);
-}
-#endif
-
 static irqreturn_t dummy_handler(int irq, void *dev_id)
 {
 	BUG();
@@ -375,14 +188,8 @@ void xen_uninit_lock_cpu(int cpu)
 
 void __init xen_init_spinlocks(void)
 {
-#if 0
-	pv_lock_ops.spin_is_locked = xen_spin_is_locked;
-	pv_lock_ops.spin_is_contended = xen_spin_is_contended;
-	pv_lock_ops.spin_lock = xen_spin_lock;
-	pv_lock_ops.spin_lock_flags = xen_spin_lock_flags;
-	pv_lock_ops.spin_trylock = xen_spin_trylock;
-	pv_lock_ops.spin_unlock = xen_spin_unlock;
-#endif
+	pv_lock_ops.lock_spinning = xen_lock_spinning;
+	pv_lock_ops.unlock_kick = xen_unlock_kick;
 }
 
 #ifdef CONFIG_XEN_DEBUG_FS
@@ -400,37 +207,21 @@ static int __init xen_spinlock_debugfs(void)
 
 	debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
 
-	debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout);
-
-	debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken);
 	debugfs_create_u32("taken_slow", 0444, d_spin_debug,
 			   &spinlock_stats.taken_slow);
-	debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug,
-			   &spinlock_stats.taken_slow_nested);
 	debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
 			   &spinlock_stats.taken_slow_pickup);
 	debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
 			   &spinlock_stats.taken_slow_spurious);
-	debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug,
-			   &spinlock_stats.taken_slow_irqenable);
 
-	debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released);
 	debugfs_create_u32("released_slow", 0444, d_spin_debug,
 			   &spinlock_stats.released_slow);
 	debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
 			   &spinlock_stats.released_slow_kicked);
 
-	debugfs_create_u64("time_spinning", 0444, d_spin_debug,
-			   &spinlock_stats.time_spinning);
 	debugfs_create_u64("time_blocked", 0444, d_spin_debug,
 			   &spinlock_stats.time_blocked);
-	debugfs_create_u64("time_total", 0444, d_spin_debug,
-			   &spinlock_stats.time_total);
 
-	xen_debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
-				     spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
-	xen_debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
-				     spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
 	xen_debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
 				     spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
 
-- 
1.7.1.1

  parent reply	other threads:[~2010-07-17  1:03 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-07-17  1:03 [PATCH RFC 00/12] X86 ticket lock cleanups and improvements Jeremy Fitzhardinge
2010-07-17  1:03 ` [PATCH RFC 08/12] x86/ticketlock: collapse a layer of functions Jeremy Fitzhardinge
2010-07-17  1:03 ` [PATCH RFC 01/12] x86/ticketlock: clean up types and accessors Jeremy Fitzhardinge
2010-07-17  1:03 ` [PATCH RFC 04/12] x86/ticketlock: make large and small ticket versions of spin_lock the same Jeremy Fitzhardinge
2010-07-17  1:03 ` [PATCH RFC 02/12] x86/ticketlock: convert spin loop to C Jeremy Fitzhardinge
2010-08-02 15:07   ` Peter Zijlstra
2010-08-02 15:17     ` Jeremy Fitzhardinge
2010-08-06 12:43       ` Jan Beulich
2010-08-06 14:53         ` Jeremy Fitzhardinge
2010-08-06 20:17           ` H. Peter Anvin
2010-08-06 20:33             ` Jeremy Fitzhardinge
2010-08-06 21:09               ` H. Peter Anvin
2010-08-06 22:03                 ` Jeremy Fitzhardinge
2010-07-17  1:03 ` [PATCH RFC 06/12] x86/ticketlock: make __ticket_spin_trylock common Jeremy Fitzhardinge
2010-07-17  1:03 ` Jeremy Fitzhardinge [this message]
2010-09-26 11:39   ` [PATCH RFC 09/12] xen/pvticketlock: Xen implementation for PV ticket locks Srivatsa Vaddagiri
2010-09-26 22:34     ` Jeremy Fitzhardinge
2011-01-18 16:27       ` Srivatsa Vaddagiri
2011-01-19  1:28         ` Jeremy Fitzhardinge
2010-07-17  1:03 ` [PATCH RFC 05/12] x86/ticketlock: make __ticket_spin_lock common Jeremy Fitzhardinge
2010-07-17  1:03 ` [PATCH RFC 12/12] x86/pvticketlock: use callee-save for unlock_kick as well Jeremy Fitzhardinge
2010-07-17  1:03 ` [PATCH RFC 07/12] x86/spinlocks: replace pv spinlocks with pv ticketlocks Jeremy Fitzhardinge
2010-07-17  1:03 ` [PATCH RFC 03/12] x86/ticketlock: Use C for __ticket_spin_unlock Jeremy Fitzhardinge
2010-07-20 15:38   ` Konrad Rzeszutek Wilk
2010-07-20 16:17     ` Jeremy Fitzhardinge
2010-08-06 17:47       ` H. Peter Anvin
2010-08-06 20:03         ` Jeremy Fitzhardinge
2010-07-17  1:03 ` [PATCH RFC 11/12] x86/pvticketlock: use callee-save for lock_spinning Jeremy Fitzhardinge
2010-07-17  1:03 ` [PATCH RFC 10/12] x86/pvticketlock: keep count of blocked cpus Jeremy Fitzhardinge
2010-08-03  8:32   ` Peter Zijlstra
2010-08-03  9:44     ` Nick Piggin
2010-08-03 15:45     ` Jeremy Fitzhardinge
  -- strict thread matches above, loose matches on Subject: below --
2010-07-03  0:20 [PATCH RFC 09/12] xen/pvticketlock: Xen implementation for PV ticket locks Jeremy Fitzhardinge

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=32e63cc978ec4b3f36c7f641ce48b3d86aed22ed.1279328276.git.jeremy.fitzhardinge@citrix.com \
    --to=jeremy@goop.org \
    --cc=JBeulich@novell.com \
    --cc=avi@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=npiggin@suse.de \
    --cc=peterz@infradead.org \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).