From: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
To: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: gleb@redhat.com, mingo@redhat.com, jeremy@goop.org,
x86@kernel.org, hpa@zytor.com, pbonzini@redhat.com,
linux-doc@vger.kernel.org, habanero@linux.vnet.ibm.com,
xen-devel@lists.xensource.com, peterz@infradead.org,
mtosatti@redhat.com, stefano.stabellini@eu.citrix.com,
andi@firstfloor.org, attilio.rao@citrix.com, ouyang@cs.pitt.edu,
gregkh@suse.de, agraf@suse.de, chegu_vinod@hp.com,
torvalds@linux-foundation.org, avi.kivity@gmail.com,
tglx@linutronix.de, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org, riel@redhat.com,
drjones@redhat.com, virtualization@lists.linux-foundation.org,
srivatsa.vaddagiri@gmail.com
Subject: Re: [PATCH RFC V9 16/19] kvm : Paravirtual ticketlocks support for linux guests running on KVM hypervisor
Date: Tue, 04 Jun 2013 12:49:32 +0530 [thread overview]
Message-ID: <51AD9504.60508@linux.vnet.ibm.com> (raw)
In-Reply-To: <20130603160010.GF4224@phenom.dumpdata.com>
On 06/03/2013 09:30 PM, Konrad Rzeszutek Wilk wrote:
> On Sun, Jun 02, 2013 at 12:55:57AM +0530, Raghavendra K T wrote:
>> kvm : Paravirtual ticketlocks support for linux guests running on KVM hypervisor
>>
>> From: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
>>
>> During smp_boot_cpus paravirtualied KVM guest detects if the hypervisor has
>> required feature (KVM_FEATURE_PV_UNHALT) to support pv-ticketlocks. If so,
>> support for pv-ticketlocks is registered via pv_lock_ops.
>>
>> Use KVM_HC_KICK_CPU hypercall to wakeup waiting/halted vcpu.
>>
>> Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
>> Signed-off-by: Suzuki Poulose <suzuki@in.ibm.com>
>> [Raghu: check_zero race fix, enum for kvm_contention_stat
>> jumplabel related changes ]
>> Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
>> ---
>> arch/x86/include/asm/kvm_para.h | 14 ++
>> arch/x86/kernel/kvm.c | 256 +++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 268 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
>> index 695399f..427afcb 100644
>> --- a/arch/x86/include/asm/kvm_para.h
>> +++ b/arch/x86/include/asm/kvm_para.h
>> @@ -118,10 +118,20 @@ void kvm_async_pf_task_wait(u32 token);
>> void kvm_async_pf_task_wake(u32 token);
>> u32 kvm_read_and_reset_pf_reason(void);
>> extern void kvm_disable_steal_time(void);
>> -#else
>> -#define kvm_guest_init() do { } while (0)
>> +
>> +#ifdef CONFIG_PARAVIRT_SPINLOCKS
>> +void __init kvm_spinlock_init(void);
>> +#else /* !CONFIG_PARAVIRT_SPINLOCKS */
>> +static inline void kvm_spinlock_init(void)
>> +{
>> +}
>> +#endif /* CONFIG_PARAVIRT_SPINLOCKS */
>> +
>> +#else /* CONFIG_KVM_GUEST */
>> +#define kvm_guest_init() do {} while (0)
>> #define kvm_async_pf_task_wait(T) do {} while(0)
>> #define kvm_async_pf_task_wake(T) do {} while(0)
>> +
>> static inline u32 kvm_read_and_reset_pf_reason(void)
>> {
>> return 0;
>> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
>> index cd6d9a5..2715b92 100644
>> --- a/arch/x86/kernel/kvm.c
>> +++ b/arch/x86/kernel/kvm.c
>> @@ -34,6 +34,7 @@
>> #include <linux/sched.h>
>> #include <linux/slab.h>
>> #include <linux/kprobes.h>
>> +#include <linux/debugfs.h>
>> #include <asm/timer.h>
>> #include <asm/cpu.h>
>> #include <asm/traps.h>
>> @@ -419,6 +420,7 @@ static void __init kvm_smp_prepare_boot_cpu(void)
>> WARN_ON(kvm_register_clock("primary cpu clock"));
>> kvm_guest_cpu_init();
>> native_smp_prepare_boot_cpu();
>> + kvm_spinlock_init();
>> }
>>
>> static void __cpuinit kvm_guest_cpu_online(void *dummy)
>> @@ -523,3 +525,257 @@ static __init int activate_jump_labels(void)
>> return 0;
>> }
>> arch_initcall(activate_jump_labels);
>> +
>> +/* Kick a cpu by its apicid. Used to wake up a halted vcpu */
>> +void kvm_kick_cpu(int cpu)
>> +{
>> + int apicid;
>> +
>> + apicid = per_cpu(x86_cpu_to_apicid, cpu);
>> + kvm_hypercall1(KVM_HC_KICK_CPU, apicid);
>> +}
>> +
>> +#ifdef CONFIG_PARAVIRT_SPINLOCKS
>> +
>> +enum kvm_contention_stat {
>> + TAKEN_SLOW,
>> + TAKEN_SLOW_PICKUP,
>> + RELEASED_SLOW,
>> + RELEASED_SLOW_KICKED,
>> + NR_CONTENTION_STATS
>> +};
>> +
>> +#ifdef CONFIG_KVM_DEBUG_FS
>> +#define HISTO_BUCKETS 30
>> +
>> +static struct kvm_spinlock_stats
>> +{
>> + u32 contention_stats[NR_CONTENTION_STATS];
>> + u32 histo_spin_blocked[HISTO_BUCKETS+1];
>> + u64 time_blocked;
>> +} spinlock_stats;
>> +
>> +static u8 zero_stats;
>> +
>> +static inline void check_zero(void)
>> +{
>> + u8 ret;
>> + u8 old;
>> +
>> + old = ACCESS_ONCE(zero_stats);
>> + if (unlikely(old)) {
>> + ret = cmpxchg(&zero_stats, old, 0);
>> + /* This ensures only one fellow resets the stat */
>> + if (ret == old)
>> + memset(&spinlock_stats, 0, sizeof(spinlock_stats));
>> + }
>> +}
>> +
>> +static inline void add_stats(enum kvm_contention_stat var, u32 val)
>> +{
>> + check_zero();
>> + spinlock_stats.contention_stats[var] += val;
>> +}
>> +
>> +
>> +static inline u64 spin_time_start(void)
>> +{
>> + return sched_clock();
>> +}
>> +
>> +static void __spin_time_accum(u64 delta, u32 *array)
>> +{
>> + unsigned index;
>> +
>> + index = ilog2(delta);
>> + check_zero();
>> +
>> + if (index < HISTO_BUCKETS)
>> + array[index]++;
>> + else
>> + array[HISTO_BUCKETS]++;
>> +}
>> +
>> +static inline void spin_time_accum_blocked(u64 start)
>> +{
>> + u32 delta;
>> +
>> + delta = sched_clock() - start;
>> + __spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
>> + spinlock_stats.time_blocked += delta;
>> +}
>> +
>> +static struct dentry *d_spin_debug;
>> +static struct dentry *d_kvm_debug;
>> +
>> +struct dentry *kvm_init_debugfs(void)
>> +{
>> + d_kvm_debug = debugfs_create_dir("kvm", NULL);
>> + if (!d_kvm_debug)
>> + printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n");
>> +
>> + return d_kvm_debug;
>> +}
>> +
>> +static int __init kvm_spinlock_debugfs(void)
>> +{
>> + struct dentry *d_kvm;
>> +
>> + d_kvm = kvm_init_debugfs();
>> + if (d_kvm == NULL)
>> + return -ENOMEM;
>> +
>> + d_spin_debug = debugfs_create_dir("spinlocks", d_kvm);
>> +
>> + debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
>> +
>> + debugfs_create_u32("taken_slow", 0444, d_spin_debug,
>> + &spinlock_stats.contention_stats[TAKEN_SLOW]);
>> + debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
>> + &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
>> +
>> + debugfs_create_u32("released_slow", 0444, d_spin_debug,
>> + &spinlock_stats.contention_stats[RELEASED_SLOW]);
>> + debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
>> + &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
>> +
>> + debugfs_create_u64("time_blocked", 0444, d_spin_debug,
>> + &spinlock_stats.time_blocked);
>> +
>> + debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
>> + spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
>> +
>> + return 0;
>> +}
>> +fs_initcall(kvm_spinlock_debugfs);
>> +#else /* !CONFIG_KVM_DEBUG_FS */
>> +#define TIMEOUT (1 << 10)
>
> What do you use that for?
>
>
Thanks Konrad for the review. Great eyes! .. will remove this in next patch.
>> +static inline void add_stats(enum kvm_contention_stat var, u32 val)
>> +{
>> +}
>> +
>> +static inline u64 spin_time_start(void)
>> +{
>> + return 0;
>> +}
>> +
>> +static inline void spin_time_accum_blocked(u64 start)
>> +{
>> +}
>> +#endif /* CONFIG_KVM_DEBUG_FS */
>> +
>> +struct kvm_lock_waiting {
>> + struct arch_spinlock *lock;
>> + __ticket_t want;
>> +};
>> +
>> +/* cpus 'waiting' on a spinlock to become available */
>> +static cpumask_t waiting_cpus;
>> +
>> +/* Track spinlock on which a cpu is waiting */
>> +static DEFINE_PER_CPU(struct kvm_lock_waiting, lock_waiting);
>> +
>> +static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
>> +{
>> + struct kvm_lock_waiting *w;
>> + int cpu;
>> + u64 start;
>> + unsigned long flags;
>> +
>> + w = &__get_cpu_var(lock_waiting);
>> + cpu = smp_processor_id();
>> + start = spin_time_start();
>> +
>> + /*
>> + * Make sure an interrupt handler can't upset things in a
>> + * partially setup state.
>> + */
>> + local_irq_save(flags);
>> +
>> + /*
>> + * The ordering protocol on this is that the "lock" pointer
>> + * may only be set non-NULL if the "want" ticket is correct.
>> + * If we're updating "want", we must first clear "lock".
>> + */
>> + w->lock = NULL;
>> + smp_wmb();
>> + w->want = want;
>> + smp_wmb();
>> + w->lock = lock;
>> +
>> + add_stats(TAKEN_SLOW, 1);
>> +
>> + /*
>> + * This uses set_bit, which is atomic but we should not rely on its
>> + * reordering gurantees. So barrier is needed after this call.
>> + */
>> + cpumask_set_cpu(cpu, &waiting_cpus);
>> +
>> + barrier();
>> +
>> + /*
>> + * Mark entry to slowpath before doing the pickup test to make
>> + * sure we don't deadlock with an unlocker.
>> + */
>> + __ticket_enter_slowpath(lock);
>> +
>> + /*
>> + * check again make sure it didn't become free while
>> + * we weren't looking.
>> + */
>> + if (ACCESS_ONCE(lock->tickets.head) == want) {
>> + add_stats(TAKEN_SLOW_PICKUP, 1);
>> + goto out;
>> + }
>> +
>> + /* Allow interrupts while blocked */
>> + local_irq_restore(flags);
>> +
>> + /* halt until it's our turn and kicked. */
>> + halt();
>> +
>> + local_irq_save(flags);
>> +out:
>> + cpumask_clear_cpu(cpu, &waiting_cpus);
>> + w->lock = NULL;
>> + local_irq_restore(flags);
>> + spin_time_accum_blocked(start);
>> +}
>> +PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning);
>> +
>> +/* Kick vcpu waiting on @lock->head to reach value @ticket */
>> +static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
>> +{
>> + int cpu;
>> +
>> + add_stats(RELEASED_SLOW, 1);
>> + for_each_cpu(cpu, &waiting_cpus) {
>> + const struct kvm_lock_waiting *w = &per_cpu(lock_waiting, cpu);
>> + if (ACCESS_ONCE(w->lock) == lock &&
>> + ACCESS_ONCE(w->want) == ticket) {
>> + add_stats(RELEASED_SLOW_KICKED, 1);
>> + kvm_kick_cpu(cpu);
>> + break;
>> + }
>> + }
>> +}
>> +
>> +/*
>> + * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
>> + */
>> +void __init kvm_spinlock_init(void)
>> +{
>> + if (!kvm_para_available())
>> + return;
>> + /* Does host kernel support KVM_FEATURE_PV_UNHALT? */
>> + if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
>> + return;
>> +
>> + printk(KERN_INFO"KVM setup paravirtual spinlock\n");
>
> That spacing is odd.
Yes. Will modify in the next version.
>
>> +
>> + static_key_slow_inc(¶virt_ticketlocks_enabled);
>> +
>> + pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
>> + pv_lock_ops.unlock_kick = kvm_unlock_kick;
>> +}
>> +#endif /* CONFIG_PARAVIRT_SPINLOCKS */
>>
>
>
next prev parent reply other threads:[~2013-06-04 7:19 UTC|newest]
Thread overview: 78+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-06-01 19:21 [PATCH RFC V9 0/19] Paravirtualized ticket spinlocks Raghavendra K T
2013-06-01 19:21 ` [PATCH RFC V9 1/19] x86/spinlock: Replace pv spinlocks with pv ticketlocks Raghavendra K T
2013-06-01 20:32 ` Jeremy Fitzhardinge
2013-06-02 6:54 ` Raghavendra K T
2013-06-01 19:22 ` [PATCH RFC V9 2/19] x86/ticketlock: Don't inline _spin_unlock when using paravirt spinlocks Raghavendra K T
2013-06-03 15:28 ` Konrad Rzeszutek Wilk
2013-06-01 19:22 ` [PATCH RFC V9 3/19] x86/ticketlock: Collapse a layer of functions Raghavendra K T
2013-06-03 15:28 ` Konrad Rzeszutek Wilk
2013-06-01 19:22 ` [PATCH RFC V9 4/19] xen: Defer spinlock setup until boot CPU setup Raghavendra K T
2013-06-01 19:23 ` [PATCH RFC V9 5/19] xen/pvticketlock: Xen implementation for PV ticket locks Raghavendra K T
2013-06-03 16:03 ` Konrad Rzeszutek Wilk
2013-06-04 7:21 ` Raghavendra K T
2013-06-01 19:23 ` [PATCH RFC V9 6/19] xen/pvticketlocks: Add xen_nopvspin parameter to disable xen pv ticketlocks Raghavendra K T
2013-06-01 19:23 ` [PATCH RFC V9 7/19] x86/pvticketlock: Use callee-save for lock_spinning Raghavendra K T
2013-06-01 19:24 ` [PATCH RFC V9 8/19] x86/pvticketlock: When paravirtualizing ticket locks, increment by 2 Raghavendra K T
2013-06-03 15:53 ` Konrad Rzeszutek Wilk
2013-06-01 19:24 ` [PATCH RFC V9 9/19] Split out rate limiting from jump_label.h Raghavendra K T
2013-06-03 15:56 ` Konrad Rzeszutek Wilk
2013-06-04 7:15 ` Raghavendra K T
2013-06-01 19:24 ` [PATCH RFC V9 10/19] x86/ticketlock: Add slowpath logic Raghavendra K T
2013-06-01 19:24 ` [PATCH RFC V9 11/19] xen/pvticketlock: Allow interrupts to be enabled while blocking Raghavendra K T
2013-06-01 19:25 ` [PATCH RFC V9 12/19] xen: Enable PV ticketlocks on HVM Xen Raghavendra K T
2013-06-03 15:57 ` Konrad Rzeszutek Wilk
2013-06-04 7:16 ` Raghavendra K T
2013-06-04 14:44 ` Konrad Rzeszutek Wilk
2013-06-04 15:00 ` Raghavendra K T
2013-06-01 19:25 ` [PATCH RFC V9 13/19] kvm hypervisor : Add a hypercall to KVM hypervisor to support pv-ticketlocks Raghavendra K T
2013-06-01 19:25 ` [PATCH RFC V9 14/19] kvm : Fold pv_unhalt flag into GET_MP_STATE ioctl to aid migration Raghavendra K T
2013-06-01 19:25 ` [PATCH RFC V9 15/19] kvm guest : Add configuration support to enable debug information for KVM Guests Raghavendra K T
2013-06-01 19:25 ` [PATCH RFC V9 16/19] kvm : Paravirtual ticketlocks support for linux guests running on KVM hypervisor Raghavendra K T
2013-06-03 16:00 ` Konrad Rzeszutek Wilk
2013-06-04 7:19 ` Raghavendra K T [this message]
2013-06-01 19:26 ` [PATCH RFC V9 17/19] kvm hypervisor : Simplify kvm_for_each_vcpu with kvm_irq_delivery_to_apic Raghavendra K T
2013-06-01 19:26 ` [PATCH RFC V9 18/19] Documentation/kvm : Add documentation on Hypercalls and features used for PV spinlock Raghavendra K T
2013-06-03 16:04 ` Konrad Rzeszutek Wilk
2013-06-04 7:22 ` Raghavendra K T
2013-06-01 19:26 ` [PATCH RFC V9 19/19] kvm hypervisor: Add directed yield in vcpu block path Raghavendra K T
2013-06-03 16:05 ` Konrad Rzeszutek Wilk
2013-06-04 7:28 ` Raghavendra K T
2013-06-02 8:07 ` [PATCH RFC V9 0/19] Paravirtualized ticket spinlocks Gleb Natapov
2013-06-02 16:20 ` Jiannan Ouyang
2013-06-03 1:40 ` Raghavendra K T
2013-06-03 6:21 ` Raghavendra K T
2013-06-07 6:15 ` Raghavendra K T
2013-06-07 13:29 ` Andrew Theurer
2013-06-07 23:41 ` Jiannan Ouyang
2013-06-25 14:50 ` Andrew Theurer
2013-06-26 8:45 ` Raghavendra K T
2013-06-26 11:37 ` Andrew Jones
2013-06-26 12:52 ` Gleb Natapov
2013-06-26 13:40 ` Raghavendra K T
2013-06-26 14:39 ` Chegu Vinod
2013-06-26 15:37 ` Raghavendra K T
2013-06-26 16:11 ` Gleb Natapov
2013-06-26 17:54 ` Raghavendra K T
2013-07-09 9:11 ` Raghavendra K T
2013-07-10 10:33 ` Gleb Natapov
2013-07-10 10:40 ` Peter Zijlstra
2013-07-10 10:47 ` Gleb Natapov
2013-07-10 11:28 ` Raghavendra K T
2013-07-10 11:29 ` Gleb Natapov
2013-07-10 11:40 ` Raghavendra K T
2013-07-10 15:03 ` Konrad Rzeszutek Wilk
2013-07-10 15:16 ` Gleb Natapov
2013-07-11 0:12 ` Konrad Rzeszutek Wilk
2013-07-10 11:24 ` Raghavendra K T
2013-07-10 11:41 ` Gleb Natapov
2013-07-10 11:50 ` Raghavendra K T
2013-07-11 9:13 ` Raghavendra K T
2013-07-11 9:48 ` Gleb Natapov
2013-07-11 10:10 ` Raghavendra K T
2013-07-11 10:11 ` Gleb Natapov
2013-07-11 10:53 ` Raghavendra K T
2013-07-11 10:56 ` Gleb Natapov
2013-07-11 11:14 ` Raghavendra K T
2013-06-26 14:13 ` Konrad Rzeszutek Wilk
2013-06-26 15:56 ` Andrew Theurer
2013-07-01 9:30 ` Raghavendra K T
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=51AD9504.60508@linux.vnet.ibm.com \
--to=raghavendra.kt@linux.vnet.ibm.com \
--cc=agraf@suse.de \
--cc=andi@firstfloor.org \
--cc=attilio.rao@citrix.com \
--cc=avi.kivity@gmail.com \
--cc=chegu_vinod@hp.com \
--cc=drjones@redhat.com \
--cc=gleb@redhat.com \
--cc=gregkh@suse.de \
--cc=habanero@linux.vnet.ibm.com \
--cc=hpa@zytor.com \
--cc=jeremy@goop.org \
--cc=konrad.wilk@oracle.com \
--cc=kvm@vger.kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=mtosatti@redhat.com \
--cc=ouyang@cs.pitt.edu \
--cc=pbonzini@redhat.com \
--cc=peterz@infradead.org \
--cc=riel@redhat.com \
--cc=srivatsa.vaddagiri@gmail.com \
--cc=stefano.stabellini@eu.citrix.com \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
--cc=virtualization@lists.linux-foundation.org \
--cc=x86@kernel.org \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).