From: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
To: Jeremy Fitzhardinge <jeremy@goop.org>,
Greg Kroah-Hartman <gregkh@suse.de>,
Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>,
"H. Peter Anvin" <hpa@zytor.com>,
Marcelo Tosatti <mtosatti@redhat.com>, X86 <x86@kernel.org>,
Gleb Natapov <gleb@redhat.com>, Ingo Molnar <mingo@redhat.com>,
Avi Kivity <avi@redhat.com>
Cc: Attilio Rao <attilio.rao@citrix.com>,
Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>,
Linus Torvalds <torvalds@linux-foundation.org>,
Virtualization <virtualization@lists.linux-foundation.org>,
Xen Devel <xen-devel@lists.xensource.com>,
linux-doc@vger.kernel.org, KVM <kvm@vger.kernel.org>,
Andi Kleen <andi@firstfloor.org>,
Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>,
Stefano Stabellini <stefano.stabellini@eu.citrix.com>,
Stephan Diestelhorst <stephan.diestelhorst@amd.com>,
LKML <linux-kernel@vger.kernel.org>
Subject: [PATCH RFC V8 16/17] kvm : Paravirtual ticketlocks support for linux guests running on KVM hypervisor
Date: Wed, 02 May 2012 15:39:36 +0530 [thread overview]
Message-ID: <20120502100936.13206.8094.sendpatchset@codeblue.in.ibm.com> (raw)
In-Reply-To: <20120502100610.13206.40.sendpatchset@codeblue.in.ibm.com>
From: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
During smp_boot_cpus paravirtualied KVM guest detects if the hypervisor has
required feature (KVM_FEATURE_PV_UNHALT) to support pv-ticketlocks. If so,
support for pv-ticketlocks is registered via pv_lock_ops.
Use KVM_HC_KICK_CPU hypercall to wakeup waiting/halted vcpu.
Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Suzuki Poulose <suzuki@in.ibm.com>
Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
---
arch/x86/include/asm/kvm_para.h | 14 ++-
arch/x86/kernel/kvm.c | 256 +++++++++++++++++++++++++++++++++++++++
2 files changed, 268 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 5b647ea..77266d3 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -195,10 +195,20 @@ void kvm_async_pf_task_wait(u32 token);
void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_pf_reason(void);
extern void kvm_disable_steal_time(void);
-#else
-#define kvm_guest_init() do { } while (0)
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void __init kvm_spinlock_init(void);
+#else /* !CONFIG_PARAVIRT_SPINLOCKS */
+static inline void kvm_spinlock_init(void)
+{
+}
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+
+#else /* CONFIG_KVM_GUEST */
+#define kvm_guest_init() do {} while (0)
#define kvm_async_pf_task_wait(T) do {} while(0)
#define kvm_async_pf_task_wake(T) do {} while(0)
+
static inline u32 kvm_read_and_reset_pf_reason(void)
{
return 0;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b8ba6e4..7c46567 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -33,6 +33,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/kprobes.h>
+#include <linux/debugfs.h>
#include <asm/timer.h>
#include <asm/cpu.h>
#include <asm/traps.h>
@@ -368,6 +369,7 @@ static void __init kvm_smp_prepare_boot_cpu(void)
#endif
kvm_guest_cpu_init();
native_smp_prepare_boot_cpu();
+ kvm_spinlock_init();
}
static void __cpuinit kvm_guest_cpu_online(void *dummy)
@@ -450,3 +452,257 @@ static __init int activate_jump_labels(void)
return 0;
}
arch_initcall(activate_jump_labels);
+
+/* Kick a cpu by its apicid. Used to wake up a halted vcpu */
+void kvm_kick_cpu(int cpu)
+{
+ int apicid;
+
+ apicid = per_cpu(x86_cpu_to_apicid, cpu);
+ kvm_hypercall1(KVM_HC_KICK_CPU, apicid);
+}
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+
+enum kvm_contention_stat {
+ TAKEN_SLOW,
+ TAKEN_SLOW_PICKUP,
+ RELEASED_SLOW,
+ RELEASED_SLOW_KICKED,
+ NR_CONTENTION_STATS
+};
+
+#ifdef CONFIG_KVM_DEBUG_FS
+#define HISTO_BUCKETS 30
+
+static struct kvm_spinlock_stats
+{
+ u32 contention_stats[NR_CONTENTION_STATS];
+ u32 histo_spin_blocked[HISTO_BUCKETS+1];
+ u64 time_blocked;
+} spinlock_stats;
+
+static u8 zero_stats;
+
+static inline void check_zero(void)
+{
+ u8 ret;
+ u8 old;
+
+ old = ACCESS_ONCE(zero_stats);
+ if (unlikely(old)) {
+ ret = cmpxchg(&zero_stats, old, 0);
+ /* This ensures only one fellow resets the stat */
+ if (ret == old)
+ memset(&spinlock_stats, 0, sizeof(spinlock_stats));
+ }
+}
+
+static inline void add_stats(enum kvm_contention_stat var, u32 val)
+{
+ check_zero();
+ spinlock_stats.contention_stats[var] += val;
+}
+
+
+static inline u64 spin_time_start(void)
+{
+ return sched_clock();
+}
+
+static void __spin_time_accum(u64 delta, u32 *array)
+{
+ unsigned index;
+
+ index = ilog2(delta);
+ check_zero();
+
+ if (index < HISTO_BUCKETS)
+ array[index]++;
+ else
+ array[HISTO_BUCKETS]++;
+}
+
+static inline void spin_time_accum_blocked(u64 start)
+{
+ u32 delta;
+
+ delta = sched_clock() - start;
+ __spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
+ spinlock_stats.time_blocked += delta;
+}
+
+static struct dentry *d_spin_debug;
+static struct dentry *d_kvm_debug;
+
+struct dentry *kvm_init_debugfs(void)
+{
+ d_kvm_debug = debugfs_create_dir("kvm", NULL);
+ if (!d_kvm_debug)
+ printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n");
+
+ return d_kvm_debug;
+}
+
+static int __init kvm_spinlock_debugfs(void)
+{
+ struct dentry *d_kvm;
+
+ d_kvm = kvm_init_debugfs();
+ if (d_kvm == NULL)
+ return -ENOMEM;
+
+ d_spin_debug = debugfs_create_dir("spinlocks", d_kvm);
+
+ debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
+
+ debugfs_create_u32("taken_slow", 0444, d_spin_debug,
+ &spinlock_stats.contention_stats[TAKEN_SLOW]);
+ debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
+ &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
+
+ debugfs_create_u32("released_slow", 0444, d_spin_debug,
+ &spinlock_stats.contention_stats[RELEASED_SLOW]);
+ debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
+ &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
+
+ debugfs_create_u64("time_blocked", 0444, d_spin_debug,
+ &spinlock_stats.time_blocked);
+
+ debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
+ spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
+
+ return 0;
+}
+fs_initcall(kvm_spinlock_debugfs);
+#else /* !CONFIG_KVM_DEBUG_FS */
+#define TIMEOUT (1 << 10)
+static inline void add_stats(enum kvm_contention_stat var, u32 val)
+{
+}
+
+static inline u64 spin_time_start(void)
+{
+ return 0;
+}
+
+static inline void spin_time_accum_blocked(u64 start)
+{
+}
+#endif /* CONFIG_KVM_DEBUG_FS */
+
+struct kvm_lock_waiting {
+ struct arch_spinlock *lock;
+ __ticket_t want;
+};
+
+/* cpus 'waiting' on a spinlock to become available */
+static cpumask_t waiting_cpus;
+
+/* Track spinlock on which a cpu is waiting */
+static DEFINE_PER_CPU(struct kvm_lock_waiting, lock_waiting);
+
+static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
+{
+ struct kvm_lock_waiting *w;
+ int cpu;
+ u64 start;
+ unsigned long flags;
+
+ w = &__get_cpu_var(lock_waiting);
+ cpu = smp_processor_id();
+ start = spin_time_start();
+
+ /*
+ * Make sure an interrupt handler can't upset things in a
+ * partially setup state.
+ */
+ local_irq_save(flags);
+
+ /*
+ * The ordering protocol on this is that the "lock" pointer
+ * may only be set non-NULL if the "want" ticket is correct.
+ * If we're updating "want", we must first clear "lock".
+ */
+ w->lock = NULL;
+ smp_wmb();
+ w->want = want;
+ smp_wmb();
+ w->lock = lock;
+
+ add_stats(TAKEN_SLOW, 1);
+
+ /*
+ * This uses set_bit, which is atomic but we should not rely on its
+ * reordering gurantees. So barrier is needed after this call.
+ */
+ cpumask_set_cpu(cpu, &waiting_cpus);
+
+ barrier();
+
+ /*
+ * Mark entry to slowpath before doing the pickup test to make
+ * sure we don't deadlock with an unlocker.
+ */
+ __ticket_enter_slowpath(lock);
+
+ /*
+ * check again make sure it didn't become free while
+ * we weren't looking.
+ */
+ if (ACCESS_ONCE(lock->tickets.head) == want) {
+ add_stats(TAKEN_SLOW_PICKUP, 1);
+ goto out;
+ }
+
+ /* Allow interrupts while blocked */
+ local_irq_restore(flags);
+
+ /* halt until it's our turn and kicked. */
+ halt();
+
+ local_irq_save(flags);
+out:
+ cpumask_clear_cpu(cpu, &waiting_cpus);
+ w->lock = NULL;
+ local_irq_restore(flags);
+ spin_time_accum_blocked(start);
+}
+PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning);
+
+/* Kick vcpu waiting on @lock->head to reach value @ticket */
+static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
+{
+ int cpu;
+
+ add_stats(RELEASED_SLOW, 1);
+ for_each_cpu(cpu, &waiting_cpus) {
+ const struct kvm_lock_waiting *w = &per_cpu(lock_waiting, cpu);
+ if (ACCESS_ONCE(w->lock) == lock &&
+ ACCESS_ONCE(w->want) == ticket) {
+ add_stats(RELEASED_SLOW_KICKED, 1);
+ kvm_kick_cpu(cpu);
+ break;
+ }
+ }
+}
+
+/*
+ * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
+ */
+void __init kvm_spinlock_init(void)
+{
+ if (!kvm_para_available())
+ return;
+ /* Does host kernel support KVM_FEATURE_PV_UNHALT? */
+ if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
+ return;
+
+ printk(KERN_INFO"KVM setup paravirtual spinlock\n");
+
+ static_key_slow_inc(¶virt_ticketlocks_enabled);
+
+ pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
+ pv_lock_ops.unlock_kick = kvm_unlock_kick;
+}
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
next prev parent reply other threads:[~2012-05-02 10:09 UTC|newest]
Thread overview: 53+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-05-02 10:06 [PATCH RFC V8 0/17] Paravirtualized ticket spinlocks Raghavendra K T
2012-05-02 10:06 ` [PATCH RFC V8 1/17] x86/spinlock: Replace pv spinlocks with pv ticketlocks Raghavendra K T
2012-05-02 10:06 ` [PATCH RFC V8 2/17] x86/ticketlock: Don't inline _spin_unlock when using paravirt spinlocks Raghavendra K T
2012-05-02 10:06 ` [PATCH RFC V8 3/17] x86/ticketlock: Collapse a layer of functions Raghavendra K T
2012-05-02 10:07 ` [PATCH RFC V8 4/17] xen: Defer spinlock setup until boot CPU setup Raghavendra K T
2012-05-02 10:07 ` [PATCH RFC V8 5/17] xen/pvticketlock: Xen implementation for PV ticket locks Raghavendra K T
2012-05-02 10:07 ` [PATCH RFC V8 6/17] xen/pvticketlocks: Add xen_nopvspin parameter to disable xen pv ticketlocks Raghavendra K T
2012-05-02 10:07 ` [PATCH RFC V8 7/17] x86/pvticketlock: Use callee-save for lock_spinning Raghavendra K T
2012-05-02 10:07 ` [PATCH RFC V8 8/17] x86/pvticketlock: When paravirtualizing ticket locks, increment by 2 Raghavendra K T
2012-05-02 10:08 ` [PATCH RFC V8 9/17] Split out rate limiting from jump_label.h Raghavendra K T
2012-05-02 10:08 ` [PATCH RFC V8 10/17] x86/ticketlock: Add slowpath logic Raghavendra K T
2012-05-02 10:08 ` [PATCH RFC V8 11/17] xen/pvticketlock: Allow interrupts to be enabled while blocking Raghavendra K T
2012-05-02 10:08 ` [PATCH RFC V8 12/17] xen: Enable PV ticketlocks on HVM Xen Raghavendra K T
2012-05-02 10:08 ` [PATCH RFC V8 13/17] kvm hypervisor : Add a hypercall to KVM hypervisor to support pv-ticketlocks Raghavendra K T
2012-05-02 10:09 ` [PATCH RFC V8 14/17] kvm : Fold pv_unhalt flag into GET_MP_STATE ioctl to aid migration Raghavendra K T
2012-05-02 10:09 ` [PATCH RFC V8 15/17] kvm guest : Add configuration support to enable debug information for KVM Guests Raghavendra K T
2012-05-02 10:09 ` Raghavendra K T [this message]
2012-05-02 10:09 ` [PATCH RFC V8 17/17] Documentation/kvm : Add documentation on Hypercalls and features used for PV spinlock Raghavendra K T
2012-05-30 11:54 ` Jan Kiszka
2012-05-30 13:44 ` Raghavendra K T
2012-05-07 8:29 ` [PATCH RFC V8 0/17] Paravirtualized ticket spinlocks Ingo Molnar
2012-05-07 8:32 ` Avi Kivity
2012-05-07 10:58 ` Raghavendra K T
2012-05-07 12:06 ` Avi Kivity
2012-05-07 13:20 ` Raghavendra K T
2012-05-07 13:22 ` Avi Kivity
2012-05-07 13:38 ` Raghavendra K T
2012-05-07 13:46 ` Srivatsa Vaddagiri
2012-05-07 13:49 ` Avi Kivity
2012-05-07 13:53 ` Raghavendra K T
2012-05-07 13:58 ` Avi Kivity
2012-05-07 14:47 ` Raghavendra K T
2012-05-07 14:52 ` Avi Kivity
2012-05-07 14:54 ` Avi Kivity
2012-05-07 17:25 ` Ingo Molnar
2012-05-07 20:42 ` Thomas Gleixner
2012-05-08 6:46 ` Nikunj A Dadhania
2012-05-15 11:26 ` [Xen-devel] " Jan Beulich
2012-05-08 5:25 ` Raghavendra K T
2012-05-13 18:45 ` Raghavendra K T
2012-05-14 4:57 ` Nikunj A Dadhania
2012-05-14 9:01 ` Raghavendra K T
2012-05-14 7:38 ` Jeremy Fitzhardinge
2012-05-14 8:11 ` Raghavendra K T
2012-05-16 3:19 ` Raghavendra K T
2012-05-30 11:26 ` Raghavendra K T
2012-06-14 12:21 ` Raghavendra K T
2012-05-07 13:55 ` Srivatsa Vaddagiri
2012-05-07 23:15 ` Jeremy Fitzhardinge
2012-05-08 1:13 ` Raghavendra K T
2012-05-08 9:08 ` Avi Kivity
2012-05-07 13:56 ` Raghavendra K T
2012-05-13 17:59 ` Raghavendra K T
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120502100936.13206.8094.sendpatchset@codeblue.in.ibm.com \
--to=raghavendra.kt@linux.vnet.ibm.com \
--cc=andi@firstfloor.org \
--cc=attilio.rao@citrix.com \
--cc=avi@redhat.com \
--cc=gleb@redhat.com \
--cc=gregkh@suse.de \
--cc=hpa@zytor.com \
--cc=jeremy@goop.org \
--cc=konrad.wilk@oracle.com \
--cc=kvm@vger.kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=mtosatti@redhat.com \
--cc=stefano.stabellini@eu.citrix.com \
--cc=stephan.diestelhorst@amd.com \
--cc=torvalds@linux-foundation.org \
--cc=vatsa@linux.vnet.ibm.com \
--cc=virtualization@lists.linux-foundation.org \
--cc=x86@kernel.org \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).