From mboxrd@z Thu Jan 1 00:00:00 1970 From: Marcelo Tosatti Subject: [patch 1/3] KVM: MMU: collapse remote TLB flushes on root sync Date: Sat, 25 Oct 2008 20:31:12 -0200 Message-ID: <20081025223243.782692567@localhost.localdomain> References: <20081025223111.498934405@localhost.localdomain> Cc: kvm@vger.kernel.org, Marcelo Tosatti To: Avi Kivity Return-path: Received: from mx2.redhat.com ([66.187.237.31]:52583 "EHLO mx2.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751806AbYJYWec (ORCPT ); Sat, 25 Oct 2008 18:34:32 -0400 Content-Disposition: inline; filename=kvm-oos-collapse-remote-tlb-flush Sender: kvm-owner@vger.kernel.org List-ID: Instead of flushing remote TLB's at every page resync, do an initial pass to write protect the sptes, collapsing the flushes on a single remote TLB invalidation. kernbench is 2.3% faster on 4-way guest. Improvements have been seen with other loads such as AIM7. Avi: feel free to change this if you dislike the style (I do, but can't think of anything nicer). Signed-off-by: Marcelo Tosatti Index: kvm/arch/x86/kvm/mmu.c =================================================================== --- kvm.orig/arch/x86/kvm/mmu.c +++ kvm/arch/x86/kvm/mmu.c @@ -152,6 +152,7 @@ struct kvm_shadow_walk { struct kvm_unsync_walk { int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk); + bool clear_unsync; }; typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); @@ -615,7 +616,7 @@ static u64 *rmap_next(struct kvm *kvm, u return NULL; } -static void rmap_write_protect(struct kvm *kvm, u64 gfn) +static int __rmap_write_protect(struct kvm *kvm, u64 gfn) { unsigned long *rmapp; u64 *spte; @@ -661,7 +662,12 @@ static void rmap_write_protect(struct kv spte = rmap_next(kvm, rmapp, spte); } - if (write_protected) + return write_protected; +} + +static void rmap_write_protect(struct kvm *kvm, u64 gfn) +{ + if (__rmap_write_protect(kvm, gfn)) kvm_flush_remote_tlbs(kvm); } @@ -985,12 +991,14 @@ static int mmu_unsync_walk(struct kvm_mm ret = mmu_unsync_walk(child, walker); if (ret) return ret; - __clear_bit(i, sp->unsync_child_bitmap); + if (walker->clear_unsync) + __clear_bit(i, sp->unsync_child_bitmap); } if (child->unsync) { ret = walker->entry(child, walker); - __clear_bit(i, sp->unsync_child_bitmap); + if (walker->clear_unsync) + __clear_bit(i, sp->unsync_child_bitmap); if (ret) return ret; } @@ -1053,6 +1061,7 @@ static int kvm_sync_page(struct kvm_vcpu struct sync_walker { struct kvm_vcpu *vcpu; struct kvm_unsync_walk walker; + bool write_protected; }; static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) @@ -1065,13 +1074,35 @@ static int mmu_sync_fn(struct kvm_mmu_pa return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)); } +static int mmu_wprotect_fn(struct kvm_mmu_page *sp, + struct kvm_unsync_walk *walk) +{ + struct sync_walker *sync_walk = container_of(walk, struct sync_walker, + walker); + struct kvm_vcpu *vcpu = sync_walk->vcpu; + + if (__rmap_write_protect(vcpu->kvm, sp->gfn)) + sync_walk->write_protected = true; + return need_resched(); +} + static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { struct sync_walker walker = { - .walker = { .entry = mmu_sync_fn, }, + .walker = { .entry = mmu_wprotect_fn, + .clear_unsync = false, }, .vcpu = vcpu, + .write_protected = false }; + /* collapse the TLB flushes as an optimization */ + mmu_unsync_walk(sp, &walker.walker); + if (walker.write_protected) + kvm_flush_remote_tlbs(vcpu->kvm); + + walker.walker.entry = mmu_sync_fn; + walker.walker.clear_unsync = true; + while (mmu_unsync_walk(sp, &walker.walker)) cond_resched_lock(&vcpu->kvm->mmu_lock); } @@ -1257,7 +1288,8 @@ static int mmu_zap_fn(struct kvm_mmu_pag static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp) { struct zap_walker walker = { - .walker = { .entry = mmu_zap_fn, }, + .walker = { .entry = mmu_zap_fn, + .clear_unsync = true, }, .kvm = kvm, .zapped = 0, }; --