All of lore.kernel.org
 help / color / mirror / Atom feed
From: Johannes Weiner <hannes@saeurebad.de>
To: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>,
	LKML <linux-kernel@vger.kernel.org>, Ingo Molnar <mingo@elte.hu>,
	Jens Axboe <axboe@kernel.dk>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Christoph Lameter <clameter@linux-foundation.org>,
	Petr Tesarik <ptesarik@suse.cz>,
	Virtualization <virtualization@lists.linux-foundation.org>,
	Xen devel <xen-devel@lists.xensource.com>,
	Thomas Friebel <thomas.friebel@amd.com>,
	Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Subject: Re: [PATCH RFC 4/4] xen: implement Xen-specific spinlocks
Date: Tue, 08 Jul 2008 08:37:33 +0200	[thread overview]
Message-ID: <87tzf0q3te.fsf@saeurebad.de> (raw)
In-Reply-To: <20080707190838.710151521@goop.org> (Jeremy Fitzhardinge's message of "Mon, 07 Jul 2008 12:07:53 -0700")

Hi,

Jeremy Fitzhardinge <jeremy@goop.org> writes:

> The standard ticket spinlocks are very expensive in a virtual
> environment, because their performance depends on Xen's scheduler
> giving vcpus time in the order that they're supposed to take the
> spinlock.
>
> This implements a Xen-specific spinlock, which should be much more
> efficient.
>
> The fast-path is essentially the old Linux-x86 locks, using a single
> lock byte.  The locker decrements the byte; if the result is 0, then
> they have the lock.  If the lock is negative, then locker must spin
> until the lock is positive again.
>
> When there's contention, the locker spin for 2^16[*] iterations waiting
> to get the lock.  If it fails to get the lock in that time, it adds
> itself to the contention count in the lock and blocks on a per-cpu
> event channel.
>
> When unlocking the spinlock, the locker looks to see if there's anyone
> blocked waiting for the lock by checking for a non-zero waiter count.
> If there's a waiter, it traverses the per-cpu "lock_spinners"
> variable, which contains which lock each CPU is waiting on.  It picks
> one CPU waiting on the lock and sends it an event to wake it up.
>
> This allows efficient fast-path spinlock operation, while allowing
> spinning vcpus to give up their processor time while waiting for a
> contended lock.
>
> [*] 2^16 iterations is threshold at which 98% locks have been taken
> according to Thomas Friebel's Xen Summit talk "Preventing Guests from
> Spinning Around".  Therefore, we'd expect the lock and unlock slow
> paths will only be entered 2% of the time.
>
> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
> Cc: Thomas Friebel <thomas.friebel@amd.com>
> ---
>  arch/x86/xen/smp.c           |  172 +++++++++++++++++++++++++++++++++++++++++-
>  drivers/xen/events.c         |   27 ++++++
>  include/asm-x86/xen/events.h |    1 
>  include/xen/events.h         |    7 +
>  4 files changed, 206 insertions(+), 1 deletion(-)
>
> ===================================================================
> --- a/arch/x86/xen/smp.c
> +++ b/arch/x86/xen/smp.c
> @@ -15,6 +15,7 @@
>   * This does not handle HOTPLUG_CPU yet.
>   */
>  #include <linux/sched.h>
> +#include <linux/kernel_stat.h>
>  #include <linux/err.h>
>  #include <linux/smp.h>
>  
> @@ -34,6 +35,8 @@
>  
>  #include "xen-ops.h"
>  #include "mmu.h"
> +
> +static void __cpuinit xen_init_lock_cpu(int cpu);
>  
>  cpumask_t xen_cpu_initialized_map;
>  
> @@ -179,6 +182,8 @@
>  {
>  	unsigned cpu;
>  
> +	xen_init_lock_cpu(0);
> +
>  	smp_store_cpu_info(0);
>  	cpu_data(0).x86_max_cores = 1;
>  	set_cpu_sibling_map(0);
> @@ -301,6 +306,7 @@
>  	clear_tsk_thread_flag(idle, TIF_FORK);
>  #endif
>  	xen_setup_timer(cpu);
> +	xen_init_lock_cpu(cpu);
>  
>  	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
>  
> @@ -413,6 +419,170 @@
>  	return IRQ_HANDLED;
>  }
>  
> +struct xen_spinlock {
> +	unsigned char lock;		/* 0 -> free; 1 -> locked */
> +	unsigned short spinners;	/* count of waiting cpus */
> +};
> +
> +static int xen_spin_is_locked(struct raw_spinlock *lock)
> +{
> +	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
> +
> +	return xl->lock != 0;
> +}
> +
> +static int xen_spin_is_contended(struct raw_spinlock *lock)
> +{
> +	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
> +
> +	/* Not strictly true; this is only the count of contended
> +	   lock-takers entering the slow path. */
> +	return xl->spinners != 0;
> +}
> +
> +static int xen_spin_trylock(struct raw_spinlock *lock)
> +{
> +	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
> +	u8 old = 1;
> +
> +	asm("xchgb %b0,%1"
> +	    : "+q" (old), "+m" (xl->lock) : : "memory");
> +
> +	return old == 0;
> +}
> +
> +static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
> +static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);

The plural is a bit misleading, as this is a single pointer per CPU.

> +static inline void spinning_lock(struct xen_spinlock *xl)
> +{
> +	__get_cpu_var(lock_spinners) = xl;
> +	wmb();			/* set lock of interest before count */
> +	asm(LOCK_PREFIX " incw %0"
> +	    : "+m" (xl->spinners) : : "memory");
> +}
> +
> +static inline void unspinning_lock(struct xen_spinlock *xl)
> +{
> +	asm(LOCK_PREFIX " decw %0"
> +	    : "+m" (xl->spinners) : : "memory");
> +	wmb();			/* decrement count before clearing lock */
> +	__get_cpu_var(lock_spinners) = NULL;
> +}
> +
> +static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
> +{
> +	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
> +	int irq = __get_cpu_var(lock_kicker_irq);
> +	int ret;
> +
> +	/* If kicker interrupts not initialized yet, just spin */
> +	if (irq == -1)
> +		return 0;
> +
> +	/* announce we're spinning */
> +	spinning_lock(xl);
> +
> +	/* clear pending */
> +	xen_clear_irq_pending(irq);
> +
> +	/* check again make sure it didn't become free while
> +	   we weren't looking  */
> +	ret = xen_spin_trylock(lock);
> +	if (ret)
> +		goto out;
> +
> +	/* block until irq becomes pending */
> +	xen_poll_irq(irq);
> +	kstat_this_cpu.irqs[irq]++;
> +
> +out:
> +	unspinning_lock(xl);
> +	return ret;
> +}
> +
> +static void xen_spin_lock(struct raw_spinlock *lock)
> +{
> +	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
> +	int timeout;
> +	u8 oldval;
> +
> +	do {
> +		timeout = 1 << 10;
> +
> +		asm("1: xchgb %1,%0\n"
> +		    "   testb %1,%1\n"
> +		    "   jz 3f\n"
> +		    "2: rep;nop\n"
> +		    "   cmpb $0,%0\n"
> +		    "   je 1b\n"
> +		    "   dec %2\n"
> +		    "   jnz 2b\n"
> +		    "3:\n"
> +		    : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
> +		    : "1" (1)
> +		    : "memory");
> +
> +	} while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock)));
> +}
> +
> +static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
> +{
> +	int cpu;
> +
> +	for_each_online_cpu(cpu) {

Would it be feasible to have a bitmap for the spinning CPUs in order to
do a for_each_spinning_cpu() here instead?  Or is setting a bit in
spinning_lock() and unsetting it in unspinning_lock() more overhead than
going over all CPUs here?

> +		/* XXX should mix up next cpu selection */
> +		if (per_cpu(lock_spinners, cpu) == xl) {
> +			xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
> +			break;
> +		}
> +	}
> +}
> +
> +static void xen_spin_unlock(struct raw_spinlock *lock)
> +{
> +	struct xen_spinlock *xl = (struct xen_spinlock *)lock;
> +
> +	smp_wmb();		/* make sure no writes get moved after unlock */
> +	xl->lock = 0;		/* release lock */
> +
> +	/* make sure unlock happens before kick */
> +	barrier();
> +
> +	if (unlikely(xl->spinners))
> +		xen_spin_unlock_slow(xl);
> +}
> +
> +static __cpuinit void xen_init_lock_cpu(int cpu)
> +{
> +	int irq;
> +	const char *name;
> +
> +	name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
> +	irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
> +				     cpu,
> +				     xen_reschedule_interrupt,
> +				     IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
> +				     name,
> +				     NULL);
> +
> +	if (irq >= 0) {
> +		disable_irq(irq); /* make sure it's never delivered */
> +		per_cpu(lock_kicker_irq, cpu) = irq;
> +	}
> +
> +	printk("cpu %d spinlock event irq %d\n", cpu, irq);
> +}
> +
> +static void __init xen_init_spinlocks(void)
> +{
> +	pv_lock_ops.spin_is_locked = xen_spin_is_locked;
> +	pv_lock_ops.spin_is_contended = xen_spin_is_contended;
> +	pv_lock_ops.spin_lock = xen_spin_lock;
> +	pv_lock_ops.spin_trylock = xen_spin_trylock;
> +	pv_lock_ops.spin_unlock = xen_spin_unlock;
> +}
> +
>  static const struct smp_ops xen_smp_ops __initdata = {
>  	.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
>  	.smp_prepare_cpus = xen_smp_prepare_cpus,
> @@ -430,5 +600,5 @@
>  {
>  	smp_ops = xen_smp_ops;
>  	xen_fill_possible_map();
> -	paravirt_use_bytelocks();
> +	xen_init_spinlocks();
>  }
> ===================================================================
> --- a/drivers/xen/events.c
> +++ b/drivers/xen/events.c
> @@ -741,6 +741,33 @@
>  	}
>  }
>  
> +/* Clear an irq's pending state, in preparation for polling on it */
> +void xen_clear_irq_pending(int irq)
> +{
> +	int evtchn = evtchn_from_irq(irq);
> +
> +	if (VALID_EVTCHN(evtchn))
> +		clear_evtchn(evtchn);
> +}
> +
> +/* Poll waiting for an irq to become pending.  In the usual case, the
> +   irq will be disabled so it won't deliver an interrupt. */
> +void xen_poll_irq(int irq)
> +{
> +	evtchn_port_t evtchn = evtchn_from_irq(irq);
> +
> +	if (VALID_EVTCHN(evtchn)) {
> +		struct sched_poll poll;
> +
> +		poll.nr_ports = 1;
> +		poll.timeout = 0;
> +		poll.ports = &evtchn;
> +
> +		if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
> +			BUG();
> +	}
> +}
> +
>  void xen_irq_resume(void)
>  {
>  	unsigned int cpu, irq, evtchn;
> ===================================================================
> --- a/include/asm-x86/xen/events.h
> +++ b/include/asm-x86/xen/events.h
> @@ -5,6 +5,7 @@
>  	XEN_RESCHEDULE_VECTOR,
>  	XEN_CALL_FUNCTION_VECTOR,
>  	XEN_CALL_FUNCTION_SINGLE_VECTOR,
> +	XEN_SPIN_UNLOCK_VECTOR,
>  
>  	XEN_NR_IPIS,
>  };
> ===================================================================
> --- a/include/xen/events.h
> +++ b/include/xen/events.h
> @@ -45,4 +45,11 @@
>  extern void xen_irq_resume(void);
>  extern void xen_evtchn_do_upcall(struct pt_regs *regs);
>  
> +/* Clear an irq's pending state, in preparation for polling on it */
> +void xen_clear_irq_pending(int irq);
> +
> +/* Poll waiting for an irq to become pending.  In the usual case, the
> +   irq will be disabled so it won't deliver an interrupt. */
> +void xen_poll_irq(int irq);
> +
>  #endif	/* _XEN_EVENTS_H */

	Hannes

  reply	other threads:[~2008-07-08  6:37 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-07-07 19:07 [PATCH RFC 0/4] Paravirtual spinlocks Jeremy Fitzhardinge
2008-07-07 19:07 ` Jeremy Fitzhardinge
2008-07-07 19:07 ` [PATCH RFC 1/4] x86/paravirt: add hooks for spinlock operations Jeremy Fitzhardinge
2008-07-07 19:07   ` Jeremy Fitzhardinge
2008-07-07 19:07 ` Jeremy Fitzhardinge
2008-07-07 19:07 ` [PATCH RFC 2/4] paravirt: introduce a "lock-byte" spinlock implementation Jeremy Fitzhardinge
2008-07-07 19:07   ` Jeremy Fitzhardinge
2008-07-07 19:07 ` Jeremy Fitzhardinge
2008-07-07 19:07 ` [PATCH RFC 3/4] xen: use lock-byte " Jeremy Fitzhardinge
2008-07-07 19:07   ` Jeremy Fitzhardinge
2008-07-07 19:07 ` Jeremy Fitzhardinge
2008-07-07 19:07 ` [PATCH RFC 4/4] xen: implement Xen-specific spinlocks Jeremy Fitzhardinge
2008-07-07 19:07   ` Jeremy Fitzhardinge
2008-07-08  6:37   ` Johannes Weiner [this message]
2008-07-08  7:15     ` Jeremy Fitzhardinge
2008-07-08  7:15     ` Jeremy Fitzhardinge
2008-07-08  7:15       ` Jeremy Fitzhardinge
2008-07-08  7:30       ` Johannes Weiner
2008-07-08  7:30       ` Johannes Weiner
2008-07-08  6:37   ` Johannes Weiner
2008-07-08  0:29 ` [PATCH RFC 0/4] Paravirtual spinlocks Rusty Russell
2008-07-08  0:29 ` Rusty Russell
2008-07-08  0:37   ` Jeremy Fitzhardinge
2008-07-08  0:37     ` Jeremy Fitzhardinge
2008-07-08  1:01     ` Rusty Russell
2008-07-08  1:01     ` Rusty Russell
2008-07-08  0:37   ` Jeremy Fitzhardinge
2008-07-08  4:51   ` Nick Piggin
2008-07-08  4:51     ` Nick Piggin
2008-07-08  5:28     ` Jeremy Fitzhardinge
2008-07-08  5:28       ` Jeremy Fitzhardinge
2008-07-08  5:28     ` Jeremy Fitzhardinge
2008-07-08  4:51   ` Nick Piggin
2008-07-09 12:28 ` Ingo Molnar
2008-07-09 12:28 ` Ingo Molnar
2008-07-09 12:28   ` Ingo Molnar
2008-07-09 12:35   ` [patch] x86: paravirt spinlocks, !CONFIG_SMP build fixes (was: Re: [PATCH RFC 0/4] Paravirtual spinlocks) Ingo Molnar
2008-07-09 12:35     ` Ingo Molnar
2008-07-09 12:35   ` Ingo Molnar
2008-07-09 12:39   ` [patch] x86: paravirt spinlocks, modular build fix " Ingo Molnar
2008-07-09 12:39     ` Ingo Molnar
2008-07-09 12:39   ` Ingo Molnar
2008-07-09 13:33   ` [PATCH RFC 0/4] Paravirtual spinlocks Ingo Molnar
2008-07-09 13:33     ` Ingo Molnar
2008-07-09 13:49     ` [patch] x86, paravirt-spinlocks: fix boot hang (was: Re: [PATCH RFC 0/4] Paravirtual spinlocks) Ingo Molnar
2008-07-09 13:49       ` Ingo Molnar
2008-07-09 15:55       ` [patch] x86, paravirt-spinlocks: fix boot hang Jeremy Fitzhardinge
2008-07-09 19:26         ` Ingo Molnar
2008-07-09 19:26         ` Ingo Molnar
2008-07-09 19:26           ` Ingo Molnar
2008-07-09 15:55       ` Jeremy Fitzhardinge
2008-07-09 13:49     ` [patch] x86, paravirt-spinlocks: fix boot hang (was: Re: [PATCH RFC 0/4] Paravirtual spinlocks) Ingo Molnar
2008-07-09 13:33   ` [PATCH RFC 0/4] Paravirtual spinlocks Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87tzf0q3te.fsf@saeurebad.de \
    --to=hannes@saeurebad.de \
    --cc=a.p.zijlstra@chello.nl \
    --cc=axboe@kernel.dk \
    --cc=clameter@linux-foundation.org \
    --cc=jeremy.fitzhardinge@citrix.com \
    --cc=jeremy@goop.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=nickpiggin@yahoo.com.au \
    --cc=ptesarik@suse.cz \
    --cc=thomas.friebel@amd.com \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.