From mboxrd@z Thu Jan 1 00:00:00 1970 From: Marcelo Tosatti Subject: [patch 3/3] KVM: MMU: prepopulate the shadow on invlpg Date: Sat, 25 Oct 2008 20:31:14 -0200 Message-ID: <20081025223243.946600413@localhost.localdomain> References: <20081025223111.498934405@localhost.localdomain> Cc: kvm@vger.kernel.org, Marcelo Tosatti To: Avi Kivity Return-path: Received: from mx2.redhat.com ([66.187.237.31]:52582 "EHLO mx2.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752374AbYJYWec (ORCPT ); Sat, 25 Oct 2008 18:34:32 -0400 Content-Disposition: inline; filename=invlpg-opt Sender: kvm-owner@vger.kernel.org List-ID: If the guest executes invlpg on a non present entry, peek into th pagetable and attempt to prepopulate the shadow entry. Also stop dirty fault updates from interfering with the fork dete For RHEL3 and 32-bit Vista guests the success rate is high. With Win2003 there is a 1:2 success/fail ratio, but even then compilation benchmarks are slightly faster. 2000 and XP rarely execute invlpg on non present entries. 2% improvement on RHEL3/AIM7. Signed-off-by: Marcelo Tosatti Index: kvm/arch/x86/kvm/mmu.c =================================================================== --- kvm.orig/arch/x86/kvm/mmu.c +++ kvm/arch/x86/kvm/mmu.c @@ -2365,7 +2365,8 @@ static void kvm_mmu_access_page(struct k } void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *new, int bytes) + const u8 *new, int bytes, + bool speculative) { gfn_t gfn = gpa >> PAGE_SHIFT; struct kvm_mmu_page *sp; @@ -2391,15 +2392,17 @@ void kvm_mmu_pte_write(struct kvm_vcpu * kvm_mmu_free_some_pages(vcpu); ++vcpu->kvm->stat.mmu_pte_write; kvm_mmu_audit(vcpu, "pre pte write"); - if (gfn == vcpu->arch.last_pt_write_gfn - && !last_updated_pte_accessed(vcpu)) { - ++vcpu->arch.last_pt_write_count; - if (vcpu->arch.last_pt_write_count >= 3) - flooded = 1; - } else { - vcpu->arch.last_pt_write_gfn = gfn; - vcpu->arch.last_pt_write_count = 1; - vcpu->arch.last_pte_updated = NULL; + if (!speculative) { + if (gfn == vcpu->arch.last_pt_write_gfn + && !last_updated_pte_accessed(vcpu)) { + ++vcpu->arch.last_pt_write_count; + if (vcpu->arch.last_pt_write_count >= 3) + flooded = 1; + } else { + vcpu->arch.last_pt_write_gfn = gfn; + vcpu->arch.last_pt_write_count = 1; + vcpu->arch.last_pte_updated = NULL; + } } index = kvm_page_table_hashfn(gfn); bucket = &vcpu->kvm->arch.mmu_page_hash[index]; @@ -2539,9 +2542,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { - spin_lock(&vcpu->kvm->mmu_lock); vcpu->arch.mmu.invlpg(vcpu, gva); - spin_unlock(&vcpu->kvm->mmu_lock); kvm_mmu_flush_tlb(vcpu); ++vcpu->stat.invlpg; } Index: kvm/arch/x86/kvm/paging_tmpl.h =================================================================== --- kvm.orig/arch/x86/kvm/paging_tmpl.h +++ kvm/arch/x86/kvm/paging_tmpl.h @@ -82,6 +82,7 @@ struct shadow_walker { int *ptwrite; pfn_t pfn; u64 *sptep; + gpa_t pte_gpa; }; static gfn_t gpte_to_gfn(pt_element_t gpte) @@ -222,7 +223,7 @@ walk: if (ret) goto walk; pte |= PT_DIRTY_MASK; - kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte)); + kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 1); walker->ptes[walker->level - 1] = pte; } @@ -467,10 +468,18 @@ static int FNAME(shadow_invlpg_entry)(st struct kvm_vcpu *vcpu, u64 addr, u64 *sptep, int level) { + struct shadow_walker *sw = + container_of(_sw, struct shadow_walker, walker); if (level == PT_PAGE_TABLE_LEVEL) { - if (is_shadow_present_pte(*sptep)) + struct kvm_mmu_page *sp = page_header(__pa(sptep)); + sw->pte_gpa = (sp->gfn << PAGE_SHIFT); + sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); + + if (is_shadow_present_pte(*sptep)) { rmap_remove(vcpu->kvm, sptep); + sw->pte_gpa = -1; + } set_shadow_pte(sptep, shadow_trap_nonpresent_pte); return 1; } @@ -481,11 +490,26 @@ static int FNAME(shadow_invlpg_entry)(st static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) { + pt_element_t gpte; struct shadow_walker walker = { .walker = { .entry = FNAME(shadow_invlpg_entry), }, + .pte_gpa = -1, }; + spin_lock(&vcpu->kvm->mmu_lock); walk_shadow(&walker.walker, vcpu, gva); + spin_unlock(&vcpu->kvm->mmu_lock); + if (walker.pte_gpa == -1) + return; + if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte, + sizeof(pt_element_t))) + return; + if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) { + if (mmu_topup_memory_caches(vcpu)) + return; + kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte, + sizeof(pt_element_t), 1); + } } static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) Index: kvm/arch/x86/kvm/x86.c =================================================================== --- kvm.orig/arch/x86/kvm/x86.c +++ kvm/arch/x86/kvm/x86.c @@ -2044,7 +2044,7 @@ int emulator_write_phys(struct kvm_vcpu ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); if (ret < 0) return 0; - kvm_mmu_pte_write(vcpu, gpa, val, bytes); + kvm_mmu_pte_write(vcpu, gpa, val, bytes, 0); return 1; } Index: kvm/include/asm-x86/kvm_host.h =================================================================== --- kvm.orig/include/asm-x86/kvm_host.h +++ kvm/include/asm-x86/kvm_host.h @@ -596,7 +596,8 @@ unsigned long segment_base(u16 selector) void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *new, int bytes); + const u8 *new, int bytes, + bool speculative); int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); --