xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
To: gleb@redhat.com, mingo@redhat.com, jeremy@goop.org,
	x86@kernel.org, konrad.wilk@oracle.com, hpa@zytor.com,
	pbonzini@redhat.com
Cc: gregkh@suse.de, kvm@vger.kernel.org, linux-doc@vger.kernel.org,
	peterz@infradead.org, drjones@redhat.com,
	virtualization@lists.linux-foundation.org, andi@firstfloor.org,
	xen-devel@lists.xensource.com,
	Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>,
	habanero@linux.vnet.ibm.com, riel@redhat.com,
	stefano.stabellini@eu.citrix.com, ouyang@cs.pitt.edu,
	avi.kivity@gmail.com, tglx@linutronix.de, chegu_vinod@hp.com,
	linux-kernel@vger.kernel.org, srivatsa.vaddagiri@gmail.com,
	attilio.rao@citrix.com, torvalds@linux-foundation.org
Subject: [PATCH RFC V11 15/18] kvm : Paravirtual ticketlocks support for linux guests running on KVM hypervisor
Date: Mon, 22 Jul 2013 11:50:16 +0530	[thread overview]
Message-ID: <20130722062016.24737.54554.sendpatchset@codeblue> (raw)
In-Reply-To: <20130722061631.24737.75508.sendpatchset@codeblue>

kvm : Paravirtual ticketlocks support for linux guests running on KVM hypervisor

From: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>

During smp_boot_cpus  paravirtualied KVM guest detects if the hypervisor has
required feature (KVM_FEATURE_PV_UNHALT) to support pv-ticketlocks. If so,
 support for pv-ticketlocks is registered via pv_lock_ops.

Use KVM_HC_KICK_CPU hypercall to wakeup waiting/halted vcpu.

Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Suzuki Poulose <suzuki@in.ibm.com>
[Raghu: check_zero race fix, enum for kvm_contention_stat, jumplabel related changes,
addition of safe_halt for irq enabled case(Gleb)]
Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
---
 arch/x86/include/asm/kvm_para.h |   14 ++
 arch/x86/kernel/kvm.c           |  259 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 271 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 695399f..427afcb 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -118,10 +118,20 @@ void kvm_async_pf_task_wait(u32 token);
 void kvm_async_pf_task_wake(u32 token);
 u32 kvm_read_and_reset_pf_reason(void);
 extern void kvm_disable_steal_time(void);
-#else
-#define kvm_guest_init() do { } while (0)
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void __init kvm_spinlock_init(void);
+#else /* !CONFIG_PARAVIRT_SPINLOCKS */
+static inline void kvm_spinlock_init(void)
+{
+}
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+
+#else /* CONFIG_KVM_GUEST */
+#define kvm_guest_init() do {} while (0)
 #define kvm_async_pf_task_wait(T) do {} while(0)
 #define kvm_async_pf_task_wake(T) do {} while(0)
+
 static inline u32 kvm_read_and_reset_pf_reason(void)
 {
 	return 0;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index cd6d9a5..b5aa5f4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -34,6 +34,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/kprobes.h>
+#include <linux/debugfs.h>
 #include <asm/timer.h>
 #include <asm/cpu.h>
 #include <asm/traps.h>
@@ -419,6 +420,7 @@ static void __init kvm_smp_prepare_boot_cpu(void)
 	WARN_ON(kvm_register_clock("primary cpu clock"));
 	kvm_guest_cpu_init();
 	native_smp_prepare_boot_cpu();
+	kvm_spinlock_init();
 }
 
 static void __cpuinit kvm_guest_cpu_online(void *dummy)
@@ -523,3 +525,260 @@ static __init int activate_jump_labels(void)
 	return 0;
 }
 arch_initcall(activate_jump_labels);
+
+/* Kick a cpu by its apicid. Used to wake up a halted vcpu */
+void kvm_kick_cpu(int cpu)
+{
+	int apicid;
+	unsigned long flags = 0;
+
+	apicid = per_cpu(x86_cpu_to_apicid, cpu);
+	kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
+}
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+
+enum kvm_contention_stat {
+	TAKEN_SLOW,
+	TAKEN_SLOW_PICKUP,
+	RELEASED_SLOW,
+	RELEASED_SLOW_KICKED,
+	NR_CONTENTION_STATS
+};
+
+#ifdef CONFIG_KVM_DEBUG_FS
+#define HISTO_BUCKETS	30
+
+static struct kvm_spinlock_stats
+{
+	u32 contention_stats[NR_CONTENTION_STATS];
+	u32 histo_spin_blocked[HISTO_BUCKETS+1];
+	u64 time_blocked;
+} spinlock_stats;
+
+static u8 zero_stats;
+
+static inline void check_zero(void)
+{
+	u8 ret;
+	u8 old;
+
+	old = ACCESS_ONCE(zero_stats);
+	if (unlikely(old)) {
+		ret = cmpxchg(&zero_stats, old, 0);
+		/* This ensures only one fellow resets the stat */
+		if (ret == old)
+			memset(&spinlock_stats, 0, sizeof(spinlock_stats));
+	}
+}
+
+static inline void add_stats(enum kvm_contention_stat var, u32 val)
+{
+	check_zero();
+	spinlock_stats.contention_stats[var] += val;
+}
+
+
+static inline u64 spin_time_start(void)
+{
+	return sched_clock();
+}
+
+static void __spin_time_accum(u64 delta, u32 *array)
+{
+	unsigned index;
+
+	index = ilog2(delta);
+	check_zero();
+
+	if (index < HISTO_BUCKETS)
+		array[index]++;
+	else
+		array[HISTO_BUCKETS]++;
+}
+
+static inline void spin_time_accum_blocked(u64 start)
+{
+	u32 delta;
+
+	delta = sched_clock() - start;
+	__spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
+	spinlock_stats.time_blocked += delta;
+}
+
+static struct dentry *d_spin_debug;
+static struct dentry *d_kvm_debug;
+
+struct dentry *kvm_init_debugfs(void)
+{
+	d_kvm_debug = debugfs_create_dir("kvm", NULL);
+	if (!d_kvm_debug)
+		printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n");
+
+	return d_kvm_debug;
+}
+
+static int __init kvm_spinlock_debugfs(void)
+{
+	struct dentry *d_kvm;
+
+	d_kvm = kvm_init_debugfs();
+	if (d_kvm == NULL)
+		return -ENOMEM;
+
+	d_spin_debug = debugfs_create_dir("spinlocks", d_kvm);
+
+	debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
+
+	debugfs_create_u32("taken_slow", 0444, d_spin_debug,
+		   &spinlock_stats.contention_stats[TAKEN_SLOW]);
+	debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
+		   &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
+
+	debugfs_create_u32("released_slow", 0444, d_spin_debug,
+		   &spinlock_stats.contention_stats[RELEASED_SLOW]);
+	debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
+		   &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
+
+	debugfs_create_u64("time_blocked", 0444, d_spin_debug,
+			   &spinlock_stats.time_blocked);
+
+	debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
+		     spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
+
+	return 0;
+}
+fs_initcall(kvm_spinlock_debugfs);
+#else  /* !CONFIG_KVM_DEBUG_FS */
+static inline void add_stats(enum kvm_contention_stat var, u32 val)
+{
+}
+
+static inline u64 spin_time_start(void)
+{
+	return 0;
+}
+
+static inline void spin_time_accum_blocked(u64 start)
+{
+}
+#endif  /* CONFIG_KVM_DEBUG_FS */
+
+struct kvm_lock_waiting {
+	struct arch_spinlock *lock;
+	__ticket_t want;
+};
+
+/* cpus 'waiting' on a spinlock to become available */
+static cpumask_t waiting_cpus;
+
+/* Track spinlock on which a cpu is waiting */
+static DEFINE_PER_CPU(struct kvm_lock_waiting, lock_waiting);
+
+static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
+{
+	struct kvm_lock_waiting *w;
+	int cpu;
+	u64 start;
+	unsigned long flags;
+
+	w = &__get_cpu_var(lock_waiting);
+	cpu = smp_processor_id();
+	start = spin_time_start();
+
+	/*
+	 * Make sure an interrupt handler can't upset things in a
+	 * partially setup state.
+	 */
+	local_irq_save(flags);
+
+	/*
+	 * The ordering protocol on this is that the "lock" pointer
+	 * may only be set non-NULL if the "want" ticket is correct.
+	 * If we're updating "want", we must first clear "lock".
+	 */
+	w->lock = NULL;
+	smp_wmb();
+	w->want = want;
+	smp_wmb();
+	w->lock = lock;
+
+	add_stats(TAKEN_SLOW, 1);
+
+	/*
+	 * This uses set_bit, which is atomic but we should not rely on its
+	 * reordering gurantees. So barrier is needed after this call.
+	 */
+	cpumask_set_cpu(cpu, &waiting_cpus);
+
+	barrier();
+
+	/*
+	 * Mark entry to slowpath before doing the pickup test to make
+	 * sure we don't deadlock with an unlocker.
+	 */
+	__ticket_enter_slowpath(lock);
+
+	/*
+	 * check again make sure it didn't become free while
+	 * we weren't looking.
+	 */
+	if (ACCESS_ONCE(lock->tickets.head) == want) {
+		add_stats(TAKEN_SLOW_PICKUP, 1);
+		goto out;
+	}
+
+	/*
+	 * halt until it's our turn and kicked. Note that we do safe halt
+	 * for irq enabled case to avoid hang when lock info is overwritten
+	 * in irq spinlock slowpath and no spurious interrupt occur to save us.
+	 */
+	if (arch_irqs_disabled_flags(flags))
+		halt();
+	else
+		safe_halt();
+
+out:
+	cpumask_clear_cpu(cpu, &waiting_cpus);
+	w->lock = NULL;
+	local_irq_restore(flags);
+	spin_time_accum_blocked(start);
+}
+PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning);
+
+/* Kick vcpu waiting on @lock->head to reach value @ticket */
+static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
+{
+	int cpu;
+
+	add_stats(RELEASED_SLOW, 1);
+	for_each_cpu(cpu, &waiting_cpus) {
+		const struct kvm_lock_waiting *w = &per_cpu(lock_waiting, cpu);
+		if (ACCESS_ONCE(w->lock) == lock &&
+		    ACCESS_ONCE(w->want) == ticket) {
+			add_stats(RELEASED_SLOW_KICKED, 1);
+			kvm_kick_cpu(cpu);
+			break;
+		}
+	}
+}
+
+/*
+ * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
+ */
+void __init kvm_spinlock_init(void)
+{
+	if (!kvm_para_available())
+		return;
+	/* Does host kernel support KVM_FEATURE_PV_UNHALT? */
+	if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
+		return;
+
+	printk(KERN_INFO "KVM setup paravirtual spinlock\n");
+
+	static_key_slow_inc(&paravirt_ticketlocks_enabled);
+
+	pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
+	pv_lock_ops.unlock_kick = kvm_unlock_kick;
+}
+#endif	/* CONFIG_PARAVIRT_SPINLOCKS */

  parent reply	other threads:[~2013-07-22  6:20 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-07-22  6:16 [PATCH RFC V11 0/18] Paravirtualized ticket spinlocks Raghavendra K T
2013-07-22  6:16 ` [PATCH RFC V11 1/18] x86/spinlock: Replace pv spinlocks with pv ticketlocks Raghavendra K T
2013-07-22  6:17 ` [PATCH RFC V11 2/18] x86/ticketlock: Don't inline _spin_unlock when using paravirt spinlocks Raghavendra K T
2013-07-22  6:17 ` [PATCH RFC V11 3/18] x86/ticketlock: Collapse a layer of functions Raghavendra K T
2013-07-22  6:17 ` [PATCH RFC V11 4/18] xen: Defer spinlock setup until boot CPU setup Raghavendra K T
2013-07-22  6:17 ` [PATCH RFC V11 5/18] xen/pvticketlock: Xen implementation for PV ticket locks Raghavendra K T
2013-07-22  6:17 ` [PATCH RFC V11 6/18] xen/pvticketlocks: Add xen_nopvspin parameter to disable xen pv ticketlocks Raghavendra K T
2013-07-22  6:18 ` [PATCH RFC V11 7/18] x86/pvticketlock: Use callee-save for lock_spinning Raghavendra K T
2013-07-22  6:18 ` [PATCH RFC V11 8/18] x86/pvticketlock: When paravirtualizing ticket locks, increment by 2 Raghavendra K T
2013-07-22  6:18 ` [PATCH RFC V11 9/18] jump_label: Split out rate limiting from jump_label.h Raghavendra K T
2013-07-22  6:18 ` [PATCH RFC V11 10/18] x86/ticketlock: Add slowpath logic Raghavendra K T
2013-07-22  6:19 ` [PATCH RFC V11 11/18] xen/pvticketlock: Allow interrupts to be enabled while blocking Raghavendra K T
2013-07-22  6:19 ` [PATCH RFC V11 12/18] kvm hypervisor : Add a hypercall to KVM hypervisor to support pv-ticketlocks Raghavendra K T
2013-07-22  6:19 ` [PATCH RFC V11 13/18] kvm : Fold pv_unhalt flag into GET_MP_STATE ioctl to aid migration Raghavendra K T
2013-07-22  6:20 ` [PATCH RFC V11 14/18] kvm guest : Add configuration support to enable debug information for KVM Guests Raghavendra K T
2013-07-22  6:20 ` Raghavendra K T [this message]
2013-07-23 15:07   ` [PATCH RFC V11 15/18] kvm : Paravirtual ticketlocks support for linux guests running on KVM hypervisor Gleb Natapov
2013-07-24  9:24     ` [PATCH RESEND " Raghavendra K T
2013-07-24  9:45     ` [PATCH " Raghavendra K T
2013-07-24 10:39       ` Gleb Natapov
2013-07-24 12:00         ` Raghavendra K T
2013-07-24 12:06           ` Gleb Natapov
2013-07-24 12:36             ` Raghavendra K T
2013-07-25  9:17               ` Raghavendra K T
2013-07-25  9:15                 ` Gleb Natapov
2013-07-25  9:38                   ` Raghavendra K T
2013-07-30 16:43                     ` Raghavendra K T
2013-07-31  6:24                       ` Gleb Natapov
2013-08-01  7:38                         ` Raghavendra K T
2013-08-01  7:45                           ` Gleb Natapov
2013-08-01  9:04                             ` Raghavendra K T
2013-08-02  3:22                               ` Raghavendra K T
2013-08-02  9:23                                 ` Ingo Molnar
2013-08-02  9:44                                   ` Raghavendra K T
2013-08-02  9:25                           ` Ingo Molnar
2013-08-02  9:54                             ` Gleb Natapov
2013-08-02 10:57                               ` Raghavendra K T
2013-08-05  9:46                               ` Ingo Molnar
2013-08-05 10:42                                 ` Raghavendra K T
     [not found]                                 ` <20130805095901.GL2258@redhat.com>
2013-08-05 13:52                                   ` Ingo Molnar
2013-08-05 14:05                                     ` Paolo Bonzini
2013-08-05 14:39                                       ` Raghavendra K T
2013-08-05 14:45                                         ` Paolo Bonzini
2013-08-05 15:37                                 ` Konrad Rzeszutek Wilk
2013-07-22  6:20 ` [PATCH RFC V11 16/18] kvm hypervisor : Simplify kvm_for_each_vcpu with kvm_irq_delivery_to_apic Raghavendra K T
2013-07-22  6:20 ` [PATCH RFC V11 17/18] Documentation/kvm : Add documentation on Hypercalls and features used for PV spinlock Raghavendra K T
2013-07-22  6:20 ` [PATCH RFC V11 18/18] kvm hypervisor: Add directed yield in vcpu block path Raghavendra K T
2013-07-22 19:36 ` [PATCH RFC V11 0/18] Paravirtualized ticket spinlocks Konrad Rzeszutek Wilk
2013-07-23  2:50   ` Raghavendra K T
2013-08-05 22:50 ` H. Peter Anvin
2013-08-06  2:50   ` Raghavendra K T

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130722062016.24737.54554.sendpatchset@codeblue \
    --to=raghavendra.kt@linux.vnet.ibm.com \
    --cc=andi@firstfloor.org \
    --cc=attilio.rao@citrix.com \
    --cc=avi.kivity@gmail.com \
    --cc=chegu_vinod@hp.com \
    --cc=drjones@redhat.com \
    --cc=gleb@redhat.com \
    --cc=gregkh@suse.de \
    --cc=habanero@linux.vnet.ibm.com \
    --cc=hpa@zytor.com \
    --cc=jeremy@goop.org \
    --cc=konrad.wilk@oracle.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=ouyang@cs.pitt.edu \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=riel@redhat.com \
    --cc=srivatsa.vaddagiri@gmail.com \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=x86@kernel.org \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).