public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] KVM: MMU: Fix rmap_remove() race
@ 2008-03-26 15:02 Avi Kivity
  2008-03-26 15:15 ` Avi Kivity
                   ` (3 more replies)
  0 siblings, 4 replies; 21+ messages in thread
From: Avi Kivity @ 2008-03-26 15:02 UTC (permalink / raw)
  To: Marcelo Tosatti, Andrea Arcangeli; +Cc: kvm-devel

Andrea notes that freeing the page before flushing the tlb is a race, as the
guest can sneak in one last write before the tlb is flushed, writing to a
page that may belong to someone else.

Fix be reversing the order of freeing and flushing the tlb.  Since the tlb
flush is expensive, queue the pages to be freed so we need to flush just once.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/mmu.c         |   66 +++++++++++++++++++++++++++++++++++++++----
 include/asm-x86/kvm_host.h |    8 +++++
 2 files changed, 67 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 95c12bc..2033879 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -536,6 +536,52 @@ static void rmap_desc_remove_entry(unsigned long *rmapp,
 	mmu_free_rmap_desc(desc);
 }
 
+static void rmap_remove_complete(struct kvm *kvm)
+{
+	int i;
+	struct page *page;
+
+	if (!kvm->arch.nr_page_release_queue)
+		return;
+	kvm_flush_remote_tlbs(kvm);
+	for (i = 0; i < kvm->arch.nr_page_release_queue; ++i) {
+		page = kvm->arch.page_release_queue[i].page;
+		if (kvm->arch.page_release_queue[i].dirty)
+			kvm_release_page_dirty(page);
+		else
+			kvm_release_page_clean(page);
+	}
+	kvm->arch.nr_page_release_queue = 0;
+}
+
+static void kvm_release_page_deferred(struct kvm *kvm,
+				      struct page *page,
+				      bool dirty)
+{
+	int i;
+
+	i = kvm->arch.nr_page_release_queue;
+	if (i == KVM_MAX_PAGE_RELEASE_QUEUE) {
+		rmap_remove_complete(kvm);
+		i = 0;
+	}
+	kvm->arch.page_release_queue[i].page = page;
+	kvm->arch.page_release_queue[i].dirty = dirty;
+	kvm->arch.nr_page_release_queue = i + 1;
+}
+
+static void kvm_release_page_dirty_deferred(struct kvm *kvm,
+					    struct page *page)
+{
+	kvm_release_page_deferred(kvm, page, true);
+}
+
+static void kvm_release_page_clean_deferred(struct kvm *kvm,
+					    struct page *page)
+{
+	kvm_release_page_deferred(kvm, page, true);
+}
+
 static void rmap_remove(struct kvm *kvm, u64 *spte)
 {
 	struct kvm_rmap_desc *desc;
@@ -551,9 +597,9 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
 	page = spte_to_page(*spte);
 	mark_page_accessed(page);
 	if (is_writeble_pte(*spte))
-		kvm_release_page_dirty(page);
+		kvm_release_page_dirty_deferred(kvm, page);
 	else
-		kvm_release_page_clean(page);
+		kvm_release_page_clean_deferred(kvm, page);
 	rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], is_large_pte(*spte));
 	if (!*rmapp) {
 		printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
@@ -585,6 +631,12 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
 	}
 }
 
+static void rmap_remove_one(struct kvm *kvm, u64 *spte)
+{
+	rmap_remove(kvm, spte);
+	rmap_remove_complete(kvm);
+}
+
 static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
 {
 	struct kvm_rmap_desc *desc;
@@ -650,7 +702,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
 		BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK));
 		pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
 		if (is_writeble_pte(*spte)) {
-			rmap_remove(kvm, spte);
+			rmap_remove_one(kvm, spte);
 			--kvm->stat.lpages;
 			set_shadow_pte(spte, shadow_trap_nonpresent_pte);
 			write_protected = 1;
@@ -872,7 +924,7 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
 				rmap_remove(kvm, &pt[i]);
 			pt[i] = shadow_trap_nonpresent_pte;
 		}
-		kvm_flush_remote_tlbs(kvm);
+		rmap_remove_complete(kvm);
 		return;
 	}
 
@@ -891,7 +943,7 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
 		}
 		pt[i] = shadow_trap_nonpresent_pte;
 	}
-	kvm_flush_remote_tlbs(kvm);
+	rmap_remove_complete(kvm);
 }
 
 static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
@@ -1048,7 +1100,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 
 	if (is_rmap_pte(*shadow_pte)) {
 		if (page != spte_to_page(*shadow_pte))
-			rmap_remove(vcpu->kvm, shadow_pte);
+			rmap_remove_one(vcpu->kvm, shadow_pte);
 		else
 			was_rmapped = 1;
 	}
@@ -1583,7 +1635,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
 	if (is_shadow_present_pte(pte)) {
 		if (sp->role.level == PT_PAGE_TABLE_LEVEL ||
 		    is_large_pte(pte))
-			rmap_remove(vcpu->kvm, spte);
+			rmap_remove_one(vcpu->kvm, spte);
 		else {
 			child = page_header(pte & PT64_BASE_ADDR_MASK);
 			mmu_page_remove_parent_pte(child, spte);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 4382ca0..8a71616 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -77,6 +77,8 @@
 #define KVM_REFILL_PAGES 25
 #define KVM_MAX_CPUID_ENTRIES 40
 
+#define KVM_MAX_PAGE_RELEASE_QUEUE 16
+
 extern spinlock_t kvm_lock;
 extern struct list_head vm_list;
 
@@ -312,6 +314,12 @@ struct kvm_arch{
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
 
+	int nr_page_release_queue;
+	struct {
+		struct page *page;
+		bool dirty;
+	} page_release_queue[KVM_MAX_PAGE_RELEASE_QUEUE];
+
 	int round_robin_prev_vcpu;
 	unsigned int tss_addr;
 	struct page *apic_access_page;
-- 
1.5.4.2


-------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace

^ permalink raw reply related	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2008-03-31  9:25 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-03-26 15:02 [PATCH] KVM: MMU: Fix rmap_remove() race Avi Kivity
2008-03-26 15:15 ` Avi Kivity
2008-03-26 17:51 ` Marcelo Tosatti
2008-03-26 18:12   ` Andrea Arcangeli
2008-03-26 19:01     ` Marcelo Tosatti
2008-03-27  8:01     ` Avi Kivity
2008-03-26 19:22 ` Andrea Arcangeli
2008-03-26 19:27   ` Andrea Arcangeli
2008-03-27  8:06     ` Avi Kivity
2008-03-27  8:11     ` Avi Kivity
2008-03-27 13:52       ` Andrea Arcangeli
2008-03-27 13:56         ` Avi Kivity
2008-03-27 14:26           ` Andrea Arcangeli
2008-03-27 14:35             ` Avi Kivity
2008-03-27 14:50               ` Andrea Arcangeli
2008-03-27 14:56                 ` Avi Kivity
2008-03-28 14:01                 ` Andrea Arcangeli
2008-03-28 20:07                   ` Andrea Arcangeli
2008-03-31  6:35                   ` Avi Kivity
2008-03-31  9:25                     ` Andrea Arcangeli
2008-03-27 15:26 ` Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox