From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Theurer Subject: Re: [PATCH] KVM: Defer remote tlb flushes on invlpg (v3) Date: Thu, 19 Mar 2009 08:46:04 -0500 Message-ID: <49C24C9C.9080101@linux.vnet.ibm.com> References: <1237468666-30700-1-git-send-email-avi@redhat.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Cc: Marcelo Tosatti , Andrea Arcangeli , kvm@vger.kernel.org To: Avi Kivity Return-path: Received: from e31.co.us.ibm.com ([32.97.110.149]:41281 "EHLO e31.co.us.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756517AbZCSNqd (ORCPT ); Thu, 19 Mar 2009 09:46:33 -0400 Received: from d03relay03.boulder.ibm.com (d03relay03.boulder.ibm.com [9.17.195.228]) by e31.co.us.ibm.com (8.13.1/8.13.1) with ESMTP id n2JDhdBY012018 for ; Thu, 19 Mar 2009 07:43:39 -0600 Received: from d03av02.boulder.ibm.com (d03av02.boulder.ibm.com [9.17.195.168]) by d03relay03.boulder.ibm.com (8.13.8/8.13.8/NCO v9.2) with ESMTP id n2JDkQme114744 for ; Thu, 19 Mar 2009 07:46:26 -0600 Received: from d03av02.boulder.ibm.com (loopback [127.0.0.1]) by d03av02.boulder.ibm.com (8.12.11.20060308/8.13.3) with ESMTP id n2JDkGa7000985 for ; Thu, 19 Mar 2009 07:46:16 -0600 In-Reply-To: <1237468666-30700-1-git-send-email-avi@redhat.com> Sender: kvm-owner@vger.kernel.org List-ID: Avi Kivity wrote: > KVM currently flushes the tlbs on all cpus when emulating invlpg. This > is because at the time of invlpg we lose track of the page, and leaving > stale tlb entries could cause the guest to access the page when it is > later freed (say after being swapped out). > > However we have a second change to flush the tlbs, when an mmu notifier is > called to let us know the host pte has been invalidated. We can safely > defer the flush to this point, which occurs much less frequently. Of course, > we still do a local tlb flush when emulating invlpg. > I should be able to run some performance comparisons with this in the next day or two. -Andrew > Signed-off-by: Avi Kivity > --- > > Changes from v2: > - dropped remote flushes from guest pagetable write protect paths > - fixed up memory barriers > - use existing local tlb flush in invlpg, no need to add another one > > arch/x86/kvm/mmu.c | 3 +-- > arch/x86/kvm/paging_tmpl.h | 5 +---- > include/linux/kvm_host.h | 2 ++ > virt/kvm/kvm_main.c | 17 +++++++++++------ > 4 files changed, 15 insertions(+), 12 deletions(-) > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index 2a36f7f..f0ea56c 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -1184,8 +1184,7 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, > for_each_sp(pages, sp, parents, i) > protected |= rmap_write_protect(vcpu->kvm, sp->gfn); > > - if (protected) > - kvm_flush_remote_tlbs(vcpu->kvm); > + kvm_flush_remote_tlbs_cond(vcpu->kvm, protected); > > for_each_sp(pages, sp, parents, i) { > kvm_sync_page(vcpu, sp); > diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h > index 855eb71..2273b26 100644 > --- a/arch/x86/kvm/paging_tmpl.h > +++ b/arch/x86/kvm/paging_tmpl.h > @@ -445,7 +445,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) > gpa_t pte_gpa = -1; > int level; > u64 *sptep; > - int need_flush = 0; > > spin_lock(&vcpu->kvm->mmu_lock); > > @@ -465,7 +464,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) > rmap_remove(vcpu->kvm, sptep); > if (is_large_pte(*sptep)) > --vcpu->kvm->stat.lpages; > - need_flush = 1; > + vcpu->kvm->remote_tlbs_dirty = true; > } > set_shadow_pte(sptep, shadow_trap_nonpresent_pte); > break; > @@ -475,8 +474,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) > break; > } > > - if (need_flush) > - kvm_flush_remote_tlbs(vcpu->kvm); > spin_unlock(&vcpu->kvm->mmu_lock); > > if (pte_gpa == -1) > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 11eb702..b779c57 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -125,6 +125,7 @@ struct kvm_kernel_irq_routing_entry { > struct kvm { > struct mutex lock; /* protects the vcpus array and APIC accesses */ > spinlock_t mmu_lock; > + bool remote_tlbs_dirty; > struct rw_semaphore slots_lock; > struct mm_struct *mm; /* userspace tied to this vm */ > int nmemslots; > @@ -235,6 +236,7 @@ void kvm_resched(struct kvm_vcpu *vcpu); > void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); > void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); > void kvm_flush_remote_tlbs(struct kvm *kvm); > +void kvm_flush_remote_tlbs_cond(struct kvm *kvm, bool cond); > void kvm_reload_remote_mmus(struct kvm *kvm); > > long kvm_arch_dev_ioctl(struct file *filp, > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 68b217e..12afa50 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -758,10 +758,18 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) > > void kvm_flush_remote_tlbs(struct kvm *kvm) > { > + kvm->remote_tlbs_dirty = false; > + smp_wmb(); > if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) > ++kvm->stat.remote_tlb_flush; > } > > +void kvm_flush_remote_tlbs_cond(struct kvm *kvm, bool cond) > +{ > + if (cond || kvm->remote_tlbs_dirty) > + kvm_flush_remote_tlbs(kvm); > +} > + > void kvm_reload_remote_mmus(struct kvm *kvm) > { > make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); > @@ -841,8 +849,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, > spin_unlock(&kvm->mmu_lock); > > /* we've to flush the tlb before the pages can be freed */ > - if (need_tlb_flush) > - kvm_flush_remote_tlbs(kvm); > + kvm_flush_remote_tlbs_cond(kvm, need_tlb_flush); > > } > > @@ -866,8 +873,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, > spin_unlock(&kvm->mmu_lock); > > /* we've to flush the tlb before the pages can be freed */ > - if (need_tlb_flush) > - kvm_flush_remote_tlbs(kvm); > + kvm_flush_remote_tlbs_cond(kvm, need_tlb_flush); > } > > static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, > @@ -907,8 +913,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, > young = kvm_age_hva(kvm, address); > spin_unlock(&kvm->mmu_lock); > > - if (young) > - kvm_flush_remote_tlbs(kvm); > + kvm_flush_remote_tlbs_cond(kvm, young); > > return young; > } >