Re: [PATCH v2 3/7] KVM: Add paravirt kvm_flush_tlb_others

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Marcelo Tosatti <mtosatti@redhat.com>
To: "Nikunj A. Dadhania" <nikunj@linux.vnet.ibm.com>
Cc: peterz@infradead.org, mingo@elte.hu, avi@redhat.com,
	raghukt@linux.vnet.ibm.com, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org, x86@kernel.org, jeremy@goop.org,
	vatsa@linux.vnet.ibm.com, hpa@zytor.com
Subject: Re: [PATCH v2 3/7] KVM: Add paravirt kvm_flush_tlb_others
Date: Tue, 3 Jul 2012 04:55:35 -0300	[thread overview]
Message-ID: <20120703075535.GA13291@amt.cnet> (raw)
In-Reply-To: <20120604050629.4560.85284.stgit@abhimanyu.in.ibm.com>

On Mon, Jun 04, 2012 at 10:37:24AM +0530, Nikunj A. Dadhania wrote:
> flush_tlb_others_ipi depends on lot of statics in tlb.c.  Replicated
> the flush_tlb_others_ipi as kvm_flush_tlb_others to further adapt to
> paravirtualization.
> 
> Use the vcpu state information inside the kvm_flush_tlb_others to
> avoid sending ipi to pre-empted vcpus.
> 
> * Do not send ipi's to offline vcpus and set flush_on_enter flag
> * For online vcpus: Wait for them to clear the flag
> 
> The approach was discussed here: https://lkml.org/lkml/2012/2/20/157
> 
> Suggested-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Signed-off-by: Nikunj A. Dadhania <nikunj@linux.vnet.ibm.com>
> 
> --
> Pseudo Algo:
> 
>    Write()
>    ======
> 
> 	   guest_exit()
> 		   flush_on_enter[i]=0;
> 		   running[i] = 0;
> 
> 	   guest_enter()
> 		   running[i] = 1;
> 		   smp_mb();
> 		   if(flush_on_enter[i]) {
> 			   tlb_flush()
> 			   flush_on_enter[i]=0;
> 		   }
> 
> 
>    Read()
>    ======
> 
> 	   GUEST                                                KVM-HV
> 
>    f->flushcpumask = cpumask - me;
> 
> again:
>    for_each_cpu(i, f->flushmask) {
> 
> 	   if (!running[i]) {
> 						   case 1:
> 
> 						   running[n]=1
> 
> 						   (cpuN does not see
> 						   flush_on_enter set,
> 						   guest later finds it
> 						   running and sends ipi,
> 						   we are fine here, need
> 						   to clear the flag on
> 						   guest_exit)
> 
> 		  flush_on_enter[i] = 1;
> 						   case2:
> 
> 						   running[n]=1
> 						   (cpuN - will see flush
> 						   on enter and an IPI as
> 						   well - addressed in patch-4)
> 
> 		  if (!running[i])
> 		     cpu_clear(f->flushmask);      All is well, vm_enter
> 						   will do the fixup
> 	   }
> 						   case 3:
> 						   running[n] = 0;
> 
> 						   (cpuN went to sleep,
> 						   we saw it as awake,
> 						   ipi sent, but wait
> 						   will break without
> 						   zero_mask and goto
> 						   again will take care)
> 
>    }
>    send_ipi(f->flushmask)
> 
>    wait_a_while_for_zero_mask();
> 
>    if (!zero_mask)
> 	   goto again;

Can you please measure increased vmentry/vmexit overhead? x86/vmexit.c 
of git://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git should 
help.

>  arch/x86/include/asm/kvm_para.h |    3 +-
>  arch/x86/include/asm/tlbflush.h |    9 ++++++
>  arch/x86/kernel/kvm.c           |    1 +
>  arch/x86/kvm/x86.c              |   14 ++++++++-
>  arch/x86/mm/tlb.c               |   61 +++++++++++++++++++++++++++++++++++++++
>  5 files changed, 86 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
> index f57b5cc..684a285 100644
> --- a/arch/x86/include/asm/kvm_para.h
> +++ b/arch/x86/include/asm/kvm_para.h
> @@ -55,7 +55,8 @@ struct kvm_steal_time {
>  
>  struct kvm_vcpu_state {
>  	__u32 state;
> -	__u32 pad[15];
> +	__u32 flush_on_enter;
> +	__u32 pad[14];
>  };
>  
>  #define KVM_VCPU_STATE_ALIGN_BITS 5
> diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
> index c0e108e..29470bd 100644
> --- a/arch/x86/include/asm/tlbflush.h
> +++ b/arch/x86/include/asm/tlbflush.h
> @@ -119,6 +119,12 @@ static inline void native_flush_tlb_others(const struct cpumask *cpumask,
>  {
>  }
>  
> +static inline void kvm_flush_tlb_others(const struct cpumask *cpumask,
> +					struct mm_struct *mm,
> +					unsigned long va)
> +{
> +}
> +
>  static inline void reset_lazy_tlbstate(void)
>  {
>  }
> @@ -145,6 +151,9 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
>  void native_flush_tlb_others(const struct cpumask *cpumask,
>  			     struct mm_struct *mm, unsigned long va);
>  
> +void kvm_flush_tlb_others(const struct cpumask *cpumask,
> +			  struct mm_struct *mm, unsigned long va);
> +
>  #define TLBSTATE_OK	1
>  #define TLBSTATE_LAZY	2
>  
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index bb686a6..66db54e 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -465,6 +465,7 @@ void __init kvm_guest_init(void)
>  	}
>  
>  	has_vcpu_state = 1;
> +	pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
>  
>  #ifdef CONFIG_SMP
>  	smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 264f172..4714a7b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1548,9 +1548,20 @@ static void kvm_set_vcpu_state(struct kvm_vcpu *vcpu)
>  	if (!(vcpu->arch.v_state.msr_val & KVM_MSR_ENABLED))
>  		return;
>  
> +	/* 
> +	 * Let the guest know that we are online, make sure we do not
> +	 * overwrite flush_on_enter, just write the vs->state.
> +	 */
>  	vs->state = 1;
> -	kvm_write_guest_cached(vcpu->kvm, ghc, vs, 2*sizeof(__u32));
> +	kvm_write_guest_cached(vcpu->kvm, ghc, vs, 1*sizeof(__u32));
>  	smp_wmb();
> +	/* 
> +	 * Guest might have seen us offline and would have set
> +	 * flush_on_enter. 
> +	 */
> +	kvm_read_guest_cached(vcpu->kvm, ghc, vs, 2*sizeof(__u32));
> +	if (vs->flush_on_enter) 
> +		kvm_x86_ops->tlb_flush(vcpu);


So flush_tlb_page which was an invlpg now flushes the entire TLB. Did
you take that into account?

>  static void kvm_clear_vcpu_state(struct kvm_vcpu *vcpu)
> @@ -1561,6 +1572,7 @@ static void kvm_clear_vcpu_state(struct kvm_vcpu *vcpu)
>  	if (!(vcpu->arch.v_state.msr_val & KVM_MSR_ENABLED))
>  		return;
>  
> +	vs->flush_on_enter = 0;
>  	vs->state = 0;
>  	kvm_write_guest_cached(vcpu->kvm, ghc, vs, 2*sizeof(__u32));
>  	smp_wmb();
> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
> index d6c0418..f5dacdd 100644
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c
> @@ -6,6 +6,7 @@
>  #include <linux/interrupt.h>
>  #include <linux/module.h>
>  #include <linux/cpu.h>
> +#include <linux/kvm_para.h>
>  
>  #include <asm/tlbflush.h>
>  #include <asm/mmu_context.h>
> @@ -202,6 +203,66 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
>  		raw_spin_unlock(&f->tlbstate_lock);
>  }
>  
> +#ifdef CONFIG_KVM_GUEST
> +
> +DECLARE_PER_CPU(struct kvm_vcpu_state, vcpu_state) __aligned(64);
> +
> +void kvm_flush_tlb_others(const struct cpumask *cpumask,
> +			struct mm_struct *mm, unsigned long va)
> +{
> +	unsigned int sender;
> +	union smp_flush_state *f;
> +	int cpu, loop;
> +	struct kvm_vcpu_state *v_state;
> +
> +	/* Caller has disabled preemption */
> +	sender = this_cpu_read(tlb_vector_offset);
> +	f = &flush_state[sender];
> +
> +	if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
> +		raw_spin_lock(&f->tlbstate_lock);
> +
> +	f->flush_mm = mm;
> +	f->flush_va = va;
> +	if (cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id()))) {
> +		/*
> +		 * We have to send the IPI only to online vCPUs
> +		 * affected. And queue flush_on_enter for pre-empted
> +		 * vCPUs
> +		 */
> +again:
> +		for_each_cpu(cpu, to_cpumask(f->flush_cpumask)) {
> +			v_state = &per_cpu(vcpu_state, cpu);
> +
> +			if (!v_state->state) {

Should use ACCESS_ONCE to make sure the value is not register cached.

> +				v_state->flush_on_enter = 1;
> +				smp_mb();
> +				if (!v_state->state)

And here.

> +					cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
> +			}
> +		}
> +
> +		if (cpumask_empty(to_cpumask(f->flush_cpumask)))
> +			goto out;
> +
> +		apic->send_IPI_mask(to_cpumask(f->flush_cpumask),
> +				    INVALIDATE_TLB_VECTOR_START + sender);
> +
> +		loop = 1000;
> +		while (!cpumask_empty(to_cpumask(f->flush_cpumask)) && --loop)
> +			cpu_relax();
> +
> +		if (!cpumask_empty(to_cpumask(f->flush_cpumask)))
> +			goto again;

Is this necessary in addition to the in-guest-mode/out-guest-mode
detection? If so, why?

> +	}
> +out:
> +	f->flush_mm = NULL;
> +	f->flush_va = 0;
> +	if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
> +		raw_spin_unlock(&f->tlbstate_lock);
> +}
> +#endif /* CONFIG_KVM_GUEST */
> +
>  void native_flush_tlb_others(const struct cpumask *cpumask,
>  			     struct mm_struct *mm, unsigned long va)
>  {

next prev parent reply	other threads:[~2012-07-03  7:56 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-06-04  5:05 [PATCH v2 0/7] KVM paravirt remote flush tlb Nikunj A. Dadhania
2012-06-04  5:06 ` [PATCH v2 1/7] KVM Guest: Add VCPU running/pre-empted state for guest Nikunj A. Dadhania
2012-06-12 22:43   ` Marcelo Tosatti
2012-06-19  6:03     ` Nikunj A Dadhania
2012-06-04  5:06 ` [PATCH v2 2/7] KVM-HV: " Nikunj A. Dadhania
2012-06-04  5:07 ` [PATCH v2 3/7] KVM: Add paravirt kvm_flush_tlb_others Nikunj A. Dadhania
2012-06-12 23:02   ` Marcelo Tosatti
2012-06-19  6:11     ` Nikunj A Dadhania
2012-06-21 12:26       ` Marcelo Tosatti
2012-07-03  7:55   ` Marcelo Tosatti [this message]
2012-07-03  8:19     ` Nikunj A Dadhania
2012-07-05  2:09       ` Marcelo Tosatti
2012-07-05  5:55         ` Nikunj A Dadhania
2012-07-06  9:47     ` Nikunj A Dadhania
2012-07-03  8:11   ` Marcelo Tosatti
2012-07-03  8:27     ` Nikunj A Dadhania
2012-06-04  5:07 ` [PATCH v2 4/7] KVM: export kvm_kick_vcpu for pv_flush Nikunj A. Dadhania
2012-06-04  5:08 ` [PATCH v2 5/7] KVM: Introduce PV kick in flush tlb Nikunj A. Dadhania
2012-07-03  8:07   ` Marcelo Tosatti
2012-07-03  8:25     ` Nikunj A Dadhania
2012-07-05  2:37       ` Marcelo Tosatti
2012-07-05  5:53         ` Nikunj A Dadhania
2012-06-04  5:08 ` [PATCH v2 6/7] kvm,x86: RCU based table free Nikunj A. Dadhania
2012-06-05 10:48   ` Stefano Stabellini
2012-06-05 11:08     ` Nikunj A Dadhania
2012-06-05 11:58       ` Stefano Stabellini
2012-06-05 13:04         ` Nikunj A Dadhania
2012-06-05 13:08           ` Peter Zijlstra
2012-06-05 13:26             ` Stefano Stabellini
2012-06-05 13:31               ` Peter Zijlstra
2012-06-05 13:41                 ` Stefano Stabellini
2012-08-01 11:23               ` Stefano Stabellini
2012-08-01 12:12                 ` Nikunj A Dadhania
2012-08-01 12:59                   ` Stefano Stabellini
2012-06-05 15:29             ` Nikunj A Dadhania
2012-06-05 13:21           ` Stefano Stabellini
2012-06-04  5:08 ` [PATCH v2 7/7] Flush page-table pages before freeing them Nikunj A. Dadhania

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120703075535.GA13291@amt.cnet \
    --to=mtosatti@redhat.com \
    --cc=avi@redhat.com \
    --cc=hpa@zytor.com \
    --cc=jeremy@goop.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=nikunj@linux.vnet.ibm.com \
    --cc=peterz@infradead.org \
    --cc=raghukt@linux.vnet.ibm.com \
    --cc=vatsa@linux.vnet.ibm.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox