From: Marcelo Tosatti <mtosatti@redhat.com>
To: "Nikunj A. Dadhania" <nikunj@linux.vnet.ibm.com>
Cc: peterz@infradead.org, mingo@elte.hu, avi@redhat.com,
raghukt@linux.vnet.ibm.com, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org, x86@kernel.org, jeremy@goop.org,
vatsa@linux.vnet.ibm.com, hpa@zytor.com
Subject: Re: [PATCH v2 3/7] KVM: Add paravirt kvm_flush_tlb_others
Date: Tue, 3 Jul 2012 04:55:35 -0300 [thread overview]
Message-ID: <20120703075535.GA13291@amt.cnet> (raw)
In-Reply-To: <20120604050629.4560.85284.stgit@abhimanyu.in.ibm.com>
On Mon, Jun 04, 2012 at 10:37:24AM +0530, Nikunj A. Dadhania wrote:
> flush_tlb_others_ipi depends on lot of statics in tlb.c. Replicated
> the flush_tlb_others_ipi as kvm_flush_tlb_others to further adapt to
> paravirtualization.
>
> Use the vcpu state information inside the kvm_flush_tlb_others to
> avoid sending ipi to pre-empted vcpus.
>
> * Do not send ipi's to offline vcpus and set flush_on_enter flag
> * For online vcpus: Wait for them to clear the flag
>
> The approach was discussed here: https://lkml.org/lkml/2012/2/20/157
>
> Suggested-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Signed-off-by: Nikunj A. Dadhania <nikunj@linux.vnet.ibm.com>
>
> --
> Pseudo Algo:
>
> Write()
> ======
>
> guest_exit()
> flush_on_enter[i]=0;
> running[i] = 0;
>
> guest_enter()
> running[i] = 1;
> smp_mb();
> if(flush_on_enter[i]) {
> tlb_flush()
> flush_on_enter[i]=0;
> }
>
>
> Read()
> ======
>
> GUEST KVM-HV
>
> f->flushcpumask = cpumask - me;
>
> again:
> for_each_cpu(i, f->flushmask) {
>
> if (!running[i]) {
> case 1:
>
> running[n]=1
>
> (cpuN does not see
> flush_on_enter set,
> guest later finds it
> running and sends ipi,
> we are fine here, need
> to clear the flag on
> guest_exit)
>
> flush_on_enter[i] = 1;
> case2:
>
> running[n]=1
> (cpuN - will see flush
> on enter and an IPI as
> well - addressed in patch-4)
>
> if (!running[i])
> cpu_clear(f->flushmask); All is well, vm_enter
> will do the fixup
> }
> case 3:
> running[n] = 0;
>
> (cpuN went to sleep,
> we saw it as awake,
> ipi sent, but wait
> will break without
> zero_mask and goto
> again will take care)
>
> }
> send_ipi(f->flushmask)
>
> wait_a_while_for_zero_mask();
>
> if (!zero_mask)
> goto again;
Can you please measure increased vmentry/vmexit overhead? x86/vmexit.c
of git://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git should
help.
> arch/x86/include/asm/kvm_para.h | 3 +-
> arch/x86/include/asm/tlbflush.h | 9 ++++++
> arch/x86/kernel/kvm.c | 1 +
> arch/x86/kvm/x86.c | 14 ++++++++-
> arch/x86/mm/tlb.c | 61 +++++++++++++++++++++++++++++++++++++++
> 5 files changed, 86 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
> index f57b5cc..684a285 100644
> --- a/arch/x86/include/asm/kvm_para.h
> +++ b/arch/x86/include/asm/kvm_para.h
> @@ -55,7 +55,8 @@ struct kvm_steal_time {
>
> struct kvm_vcpu_state {
> __u32 state;
> - __u32 pad[15];
> + __u32 flush_on_enter;
> + __u32 pad[14];
> };
>
> #define KVM_VCPU_STATE_ALIGN_BITS 5
> diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
> index c0e108e..29470bd 100644
> --- a/arch/x86/include/asm/tlbflush.h
> +++ b/arch/x86/include/asm/tlbflush.h
> @@ -119,6 +119,12 @@ static inline void native_flush_tlb_others(const struct cpumask *cpumask,
> {
> }
>
> +static inline void kvm_flush_tlb_others(const struct cpumask *cpumask,
> + struct mm_struct *mm,
> + unsigned long va)
> +{
> +}
> +
> static inline void reset_lazy_tlbstate(void)
> {
> }
> @@ -145,6 +151,9 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
> void native_flush_tlb_others(const struct cpumask *cpumask,
> struct mm_struct *mm, unsigned long va);
>
> +void kvm_flush_tlb_others(const struct cpumask *cpumask,
> + struct mm_struct *mm, unsigned long va);
> +
> #define TLBSTATE_OK 1
> #define TLBSTATE_LAZY 2
>
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index bb686a6..66db54e 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -465,6 +465,7 @@ void __init kvm_guest_init(void)
> }
>
> has_vcpu_state = 1;
> + pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
>
> #ifdef CONFIG_SMP
> smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 264f172..4714a7b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1548,9 +1548,20 @@ static void kvm_set_vcpu_state(struct kvm_vcpu *vcpu)
> if (!(vcpu->arch.v_state.msr_val & KVM_MSR_ENABLED))
> return;
>
> + /*
> + * Let the guest know that we are online, make sure we do not
> + * overwrite flush_on_enter, just write the vs->state.
> + */
> vs->state = 1;
> - kvm_write_guest_cached(vcpu->kvm, ghc, vs, 2*sizeof(__u32));
> + kvm_write_guest_cached(vcpu->kvm, ghc, vs, 1*sizeof(__u32));
> smp_wmb();
> + /*
> + * Guest might have seen us offline and would have set
> + * flush_on_enter.
> + */
> + kvm_read_guest_cached(vcpu->kvm, ghc, vs, 2*sizeof(__u32));
> + if (vs->flush_on_enter)
> + kvm_x86_ops->tlb_flush(vcpu);
So flush_tlb_page which was an invlpg now flushes the entire TLB. Did
you take that into account?
> static void kvm_clear_vcpu_state(struct kvm_vcpu *vcpu)
> @@ -1561,6 +1572,7 @@ static void kvm_clear_vcpu_state(struct kvm_vcpu *vcpu)
> if (!(vcpu->arch.v_state.msr_val & KVM_MSR_ENABLED))
> return;
>
> + vs->flush_on_enter = 0;
> vs->state = 0;
> kvm_write_guest_cached(vcpu->kvm, ghc, vs, 2*sizeof(__u32));
> smp_wmb();
> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
> index d6c0418..f5dacdd 100644
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c
> @@ -6,6 +6,7 @@
> #include <linux/interrupt.h>
> #include <linux/module.h>
> #include <linux/cpu.h>
> +#include <linux/kvm_para.h>
>
> #include <asm/tlbflush.h>
> #include <asm/mmu_context.h>
> @@ -202,6 +203,66 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
> raw_spin_unlock(&f->tlbstate_lock);
> }
>
> +#ifdef CONFIG_KVM_GUEST
> +
> +DECLARE_PER_CPU(struct kvm_vcpu_state, vcpu_state) __aligned(64);
> +
> +void kvm_flush_tlb_others(const struct cpumask *cpumask,
> + struct mm_struct *mm, unsigned long va)
> +{
> + unsigned int sender;
> + union smp_flush_state *f;
> + int cpu, loop;
> + struct kvm_vcpu_state *v_state;
> +
> + /* Caller has disabled preemption */
> + sender = this_cpu_read(tlb_vector_offset);
> + f = &flush_state[sender];
> +
> + if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
> + raw_spin_lock(&f->tlbstate_lock);
> +
> + f->flush_mm = mm;
> + f->flush_va = va;
> + if (cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id()))) {
> + /*
> + * We have to send the IPI only to online vCPUs
> + * affected. And queue flush_on_enter for pre-empted
> + * vCPUs
> + */
> +again:
> + for_each_cpu(cpu, to_cpumask(f->flush_cpumask)) {
> + v_state = &per_cpu(vcpu_state, cpu);
> +
> + if (!v_state->state) {
Should use ACCESS_ONCE to make sure the value is not register cached.
> + v_state->flush_on_enter = 1;
> + smp_mb();
> + if (!v_state->state)
And here.
> + cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
> + }
> + }
> +
> + if (cpumask_empty(to_cpumask(f->flush_cpumask)))
> + goto out;
> +
> + apic->send_IPI_mask(to_cpumask(f->flush_cpumask),
> + INVALIDATE_TLB_VECTOR_START + sender);
> +
> + loop = 1000;
> + while (!cpumask_empty(to_cpumask(f->flush_cpumask)) && --loop)
> + cpu_relax();
> +
> + if (!cpumask_empty(to_cpumask(f->flush_cpumask)))
> + goto again;
Is this necessary in addition to the in-guest-mode/out-guest-mode
detection? If so, why?
> + }
> +out:
> + f->flush_mm = NULL;
> + f->flush_va = 0;
> + if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
> + raw_spin_unlock(&f->tlbstate_lock);
> +}
> +#endif /* CONFIG_KVM_GUEST */
> +
> void native_flush_tlb_others(const struct cpumask *cpumask,
> struct mm_struct *mm, unsigned long va)
> {
next prev parent reply other threads:[~2012-07-03 7:55 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-06-04 5:05 [PATCH v2 0/7] KVM paravirt remote flush tlb Nikunj A. Dadhania
2012-06-04 5:06 ` [PATCH v2 1/7] KVM Guest: Add VCPU running/pre-empted state for guest Nikunj A. Dadhania
2012-06-12 22:43 ` Marcelo Tosatti
2012-06-19 6:03 ` Nikunj A Dadhania
2012-06-04 5:06 ` [PATCH v2 2/7] KVM-HV: " Nikunj A. Dadhania
2012-06-04 5:07 ` [PATCH v2 3/7] KVM: Add paravirt kvm_flush_tlb_others Nikunj A. Dadhania
2012-06-12 23:02 ` Marcelo Tosatti
2012-06-19 6:11 ` Nikunj A Dadhania
2012-06-21 12:26 ` Marcelo Tosatti
2012-07-03 7:55 ` Marcelo Tosatti [this message]
2012-07-03 8:19 ` Nikunj A Dadhania
2012-07-05 2:09 ` Marcelo Tosatti
2012-07-05 5:55 ` Nikunj A Dadhania
2012-07-06 9:47 ` Nikunj A Dadhania
2012-07-03 8:11 ` Marcelo Tosatti
2012-07-03 8:27 ` Nikunj A Dadhania
2012-06-04 5:07 ` [PATCH v2 4/7] KVM: export kvm_kick_vcpu for pv_flush Nikunj A. Dadhania
2012-06-04 5:08 ` [PATCH v2 5/7] KVM: Introduce PV kick in flush tlb Nikunj A. Dadhania
2012-07-03 8:07 ` Marcelo Tosatti
2012-07-03 8:25 ` Nikunj A Dadhania
2012-07-05 2:37 ` Marcelo Tosatti
2012-07-05 5:53 ` Nikunj A Dadhania
2012-06-04 5:08 ` [PATCH v2 6/7] kvm,x86: RCU based table free Nikunj A. Dadhania
2012-06-05 10:48 ` Stefano Stabellini
2012-06-05 11:08 ` Nikunj A Dadhania
2012-06-05 11:58 ` Stefano Stabellini
2012-06-05 13:04 ` Nikunj A Dadhania
2012-06-05 13:08 ` Peter Zijlstra
2012-06-05 13:26 ` Stefano Stabellini
2012-06-05 13:31 ` Peter Zijlstra
2012-06-05 13:41 ` Stefano Stabellini
2012-08-01 11:23 ` Stefano Stabellini
2012-08-01 12:12 ` Nikunj A Dadhania
2012-08-01 12:59 ` Stefano Stabellini
2012-06-05 15:29 ` Nikunj A Dadhania
2012-06-05 13:21 ` Stefano Stabellini
2012-06-04 5:08 ` [PATCH v2 7/7] Flush page-table pages before freeing them Nikunj A. Dadhania
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120703075535.GA13291@amt.cnet \
--to=mtosatti@redhat.com \
--cc=avi@redhat.com \
--cc=hpa@zytor.com \
--cc=jeremy@goop.org \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=nikunj@linux.vnet.ibm.com \
--cc=peterz@infradead.org \
--cc=raghukt@linux.vnet.ibm.com \
--cc=vatsa@linux.vnet.ibm.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.