public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Marcelo Tosatti <mtosatti@redhat.com>
To: Avi Kivity <avi@redhat.com>
Cc: kvm@vger.kernel.org, Marcelo Tosatti <mtosatti@redhat.com>
Subject: [patch 11/13] KVM: MMU: out of sync shadow core v2
Date: Mon, 22 Sep 2008 16:16:38 -0300	[thread overview]
Message-ID: <20080922192215.654700943@localhost.localdomain> (raw)
In-Reply-To: 20080922191627.671519725@localhost.localdomain

[-- Attachment #1: kvm-oos-core --]
[-- Type: text/plain, Size: 10788 bytes --]

Allow guest pagetables to go out of sync.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/arch/x86/kvm/mmu.c
===================================================================
--- kvm.orig/arch/x86/kvm/mmu.c
+++ kvm/arch/x86/kvm/mmu.c
@@ -147,6 +147,10 @@ struct kvm_shadow_walk {
 		     u64 addr, u64 *spte, int level);
 };
 
+struct kvm_unsync_walk {
+	int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk);
+};
+
 typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp);
 
 static struct kmem_cache *pte_chain_cache;
@@ -942,6 +946,39 @@ static void nonpaging_invlpg(struct kvm_
 {
 }
 
+static int mmu_unsync_walk(struct kvm_mmu_page *parent,
+			   struct kvm_unsync_walk *walker)
+{
+	int i, ret;
+	struct kvm_mmu_page *sp = parent;
+
+	while (parent->unsync_children) {
+		for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
+			u64 ent = sp->spt[i];
+
+			if (is_shadow_present_pte(ent)) {
+				struct kvm_mmu_page *child;
+				child = page_header(ent & PT64_BASE_ADDR_MASK);
+
+				if (child->unsync_children) {
+					sp = child;
+					break;
+				}
+				if (child->unsync) {
+					ret = walker->entry(child, walker);
+					if (ret)
+						return ret;
+				}
+			}
+		}
+		if (i == PT64_ENT_PER_PAGE) {
+			sp->unsync_children = 0;
+			sp = parent;
+		}
+	}
+	return 0;
+}
+
 static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
 {
 	unsigned index;
@@ -962,6 +999,59 @@ static struct kvm_mmu_page *kvm_mmu_look
 	return NULL;
 }
 
+static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	WARN_ON(!sp->unsync);
+	sp->unsync = 0;
+	--kvm->stat.mmu_unsync;
+}
+
+static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp);
+
+static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+	if (sp->role.glevels != vcpu->arch.mmu.root_level) {
+		kvm_mmu_zap_page(vcpu->kvm, sp);
+		return 1;
+	}
+
+	rmap_write_protect(vcpu->kvm, sp->gfn);
+	if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
+		kvm_mmu_zap_page(vcpu->kvm, sp);
+		return 1;
+	}
+
+	kvm_mmu_flush_tlb(vcpu);
+	kvm_unlink_unsync_page(vcpu->kvm, sp);
+	return 0;
+}
+
+struct sync_walker {
+	struct kvm_vcpu *vcpu;
+	struct kvm_unsync_walk walker;
+};
+
+static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
+{
+	struct sync_walker *sync_walk = container_of(walk, struct sync_walker,
+						     walker);
+	struct kvm_vcpu *vcpu = sync_walk->vcpu;
+
+	kvm_sync_page(vcpu, sp);
+	return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock));
+}
+
+static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+	struct sync_walker walker = {
+		.walker = { .entry = mmu_sync_fn, },
+		.vcpu = vcpu,
+	};
+
+	while (mmu_unsync_walk(sp, &walker.walker))
+		cond_resched_lock(&vcpu->kvm->mmu_lock);
+}
+
 static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 					     gfn_t gfn,
 					     gva_t gaddr,
@@ -975,7 +1065,7 @@ static struct kvm_mmu_page *kvm_mmu_get_
 	unsigned quadrant;
 	struct hlist_head *bucket;
 	struct kvm_mmu_page *sp;
-	struct hlist_node *node;
+	struct hlist_node *node, *tmp;
 
 	role.word = 0;
 	role.glevels = vcpu->arch.mmu.root_level;
@@ -991,8 +1081,18 @@ static struct kvm_mmu_page *kvm_mmu_get_
 		 gfn, role.word);
 	index = kvm_page_table_hashfn(gfn);
 	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
-	hlist_for_each_entry(sp, node, bucket, hash_link)
-		if (sp->gfn == gfn && sp->role.word == role.word) {
+	hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link)
+		if (sp->gfn == gfn) {
+			if (sp->unsync)
+				if (kvm_sync_page(vcpu, sp))
+					continue;
+
+			if (sp->role.word != role.word)
+				continue;
+
+			if (sp->unsync_children)
+				set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests);
+
 			mmu_page_add_parent_pte(vcpu, sp, parent_pte);
 			pgprintk("%s: found\n", __func__);
 			return sp;
@@ -1112,14 +1212,47 @@ static void kvm_mmu_unlink_parents(struc
 	}
 }
 
+struct zap_walker {
+	struct kvm_unsync_walk walker;
+	struct kvm *kvm;
+	int zapped;
+};
+
+static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
+{
+	struct zap_walker *zap_walk = container_of(walk, struct zap_walker,
+						     walker);
+	kvm_mmu_zap_page(zap_walk->kvm, sp);
+	zap_walk->zapped = 1;
+	return 0;
+}
+
+static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	struct zap_walker walker = {
+		.walker = { .entry = mmu_zap_fn, },
+		.kvm = kvm,
+		.zapped = 0,
+	};
+
+	if (sp->role.level == PT_PAGE_TABLE_LEVEL)
+		return 0;
+	mmu_unsync_walk(sp, &walker.walker);
+	return walker.zapped;
+}
+
 static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
+	int ret;
 	++kvm->stat.mmu_shadow_zapped;
+	ret = mmu_zap_unsync_children(kvm, sp);
 	kvm_mmu_page_unlink_children(kvm, sp);
 	kvm_mmu_unlink_parents(kvm, sp);
 	kvm_flush_remote_tlbs(kvm);
 	if (!sp->role.invalid && !sp->role.metaphysical)
 		unaccount_shadowed(kvm, sp->gfn);
+	if (sp->unsync)
+		kvm_unlink_unsync_page(kvm, sp);
 	if (!sp->root_count) {
 		hlist_del(&sp->hash_link);
 		kvm_mmu_free_page(kvm, sp);
@@ -1129,7 +1262,7 @@ static int kvm_mmu_zap_page(struct kvm *
 		kvm_reload_remote_mmus(kvm);
 	}
 	kvm_mmu_reset_last_pte_updated(kvm);
-	return 0;
+	return ret;
 }
 
 /*
@@ -1235,10 +1368,58 @@ struct page *gva_to_page(struct kvm_vcpu
 	return page;
 }
 
+static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+	sp->unsync_children = 1;
+	return 1;
+}
+
+static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+	unsigned index;
+	struct hlist_head *bucket;
+	struct kvm_mmu_page *s;
+	struct hlist_node *node, *n;
+
+	index = kvm_page_table_hashfn(sp->gfn);
+	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
+	/* don't unsync if pagetable is shadowed with multiple roles */
+	hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
+		if (s->gfn != sp->gfn || s->role.metaphysical)
+			continue;
+		if (s->role.word != sp->role.word)
+			return 1;
+	}
+	mmu_parent_walk(vcpu, sp, unsync_walk_fn);
+	++vcpu->kvm->stat.mmu_unsync;
+	sp->unsync = 1;
+	mmu_convert_notrap(sp);
+	return 0;
+}
+
+static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
+				  bool can_unsync)
+{
+	struct kvm_mmu_page *shadow;
+
+	shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
+	if (shadow) {
+		if (shadow->role.level != PT_PAGE_TABLE_LEVEL)
+			return 1;
+		if (shadow->unsync)
+			return 0;
+		if (can_unsync)
+			return kvm_unsync_page(vcpu, shadow);
+		return 1;
+	}
+	return 0;
+}
+
 static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		    unsigned pte_access, int user_fault,
 		    int write_fault, int dirty, int largepage,
-		    gfn_t gfn, pfn_t pfn, bool speculative)
+		    gfn_t gfn, pfn_t pfn, bool speculative,
+		    bool can_unsync)
 {
 	u64 spte;
 	int ret = 0;
@@ -1265,7 +1446,6 @@ static int set_spte(struct kvm_vcpu *vcp
 
 	if ((pte_access & ACC_WRITE_MASK)
 	    || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
-		struct kvm_mmu_page *shadow;
 
 		if (largepage && has_wrprotected_page(vcpu->kvm, gfn)) {
 			ret = 1;
@@ -1275,8 +1455,7 @@ static int set_spte(struct kvm_vcpu *vcp
 
 		spte |= PT_WRITABLE_MASK;
 
-		shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
-		if (shadow) {
+		if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
 			pgprintk("%s: found shadow page for %lx, marking ro\n",
 				 __func__, gfn);
 			ret = 1;
@@ -1294,7 +1473,6 @@ set_pte:
 	return ret;
 }
 
-
 static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 			 unsigned pt_access, unsigned pte_access,
 			 int user_fault, int write_fault, int dirty,
@@ -1332,7 +1510,7 @@ static void mmu_set_spte(struct kvm_vcpu
 		}
 	}
 	if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
-		      dirty, largepage, gfn, pfn, speculative)) {
+		      dirty, largepage, gfn, pfn, speculative, true)) {
 		if (write_fault)
 			*ptwrite = 1;
 		kvm_x86_ops->tlb_flush(vcpu);
@@ -1552,10 +1730,6 @@ static void mmu_alloc_roots(struct kvm_v
 	vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
 }
 
-static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
-{
-}
-
 static void mmu_sync_roots(struct kvm_vcpu *vcpu)
 {
 	int i;
Index: kvm/include/asm-x86/kvm_host.h
===================================================================
--- kvm.orig/include/asm-x86/kvm_host.h
+++ kvm/include/asm-x86/kvm_host.h
@@ -195,6 +195,8 @@ struct kvm_mmu_page {
 				    */
 	int multimapped;         /* More than one parent_pte? */
 	int root_count;          /* Currently serving as active root */
+	bool unsync;
+	bool unsync_children;
 	union {
 		u64 *parent_pte;               /* !multimapped */
 		struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
@@ -371,6 +373,7 @@ struct kvm_vm_stat {
 	u32 mmu_flooded;
 	u32 mmu_recycled;
 	u32 mmu_cache_miss;
+	u32 mmu_unsync;
 	u32 remote_tlb_flush;
 	u32 lpages;
 };
Index: kvm/arch/x86/kvm/x86.c
===================================================================
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -101,6 +101,7 @@ struct kvm_stats_debugfs_item debugfs_en
 	{ "mmu_flooded", VM_STAT(mmu_flooded) },
 	{ "mmu_recycled", VM_STAT(mmu_recycled) },
 	{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
+	{ "mmu_unsync", VM_STAT(mmu_unsync) },
 	{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
 	{ "largepages", VM_STAT(lpages) },
 	{ NULL }
@@ -3120,6 +3121,8 @@ static int vcpu_enter_guest(struct kvm_v
 	if (vcpu->requests) {
 		if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
 			__kvm_migrate_timers(vcpu);
+		if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
+			kvm_mmu_sync_roots(vcpu);
 		if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
 			kvm_x86_ops->tlb_flush(vcpu);
 		if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
Index: kvm/arch/x86/kvm/paging_tmpl.h
===================================================================
--- kvm.orig/arch/x86/kvm/paging_tmpl.h
+++ kvm/arch/x86/kvm/paging_tmpl.h
@@ -580,7 +580,7 @@ static int FNAME(sync_page)(struct kvm_v
 		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
 		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
 			 is_dirty_pte(gpte), 0, gfn,
-			 spte_to_pfn(sp->spt[i]), true);
+			 spte_to_pfn(sp->spt[i]), true, false);
 	}
 
 	return !nr_present;
Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -35,6 +35,7 @@
 #define KVM_REQ_TRIPLE_FAULT       4
 #define KVM_REQ_PENDING_TIMER      5
 #define KVM_REQ_UNHALT             6
+#define KVM_REQ_MMU_SYNC           7
 
 struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;

-- 


  parent reply	other threads:[~2008-09-22 19:26 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-09-22 19:16 [patch 00/13] out of sync shadow v3 Marcelo Tosatti
2008-09-22 19:16 ` [patch 01/13] KVM: MMU: flush remote TLBs on large->normal entry overwrite Marcelo Tosatti
2008-09-22 19:16 ` [patch 02/13] KVM: MMU: split mmu_set_spte Marcelo Tosatti
2008-09-22 19:16 ` [patch 03/13] KVM: MMU: move local TLB flush to mmu_set_spte Marcelo Tosatti
2008-09-22 19:16 ` [patch 04/13] KVM: MMU: do not write-protect large mappings Marcelo Tosatti
2008-09-22 19:16 ` [patch 05/13] KVM: MMU: mode specific sync_page Marcelo Tosatti
2008-09-22 19:16 ` [patch 06/13] KVM: MMU: sync roots on mmu reload Marcelo Tosatti
2008-09-22 19:16 ` [patch 07/13] KVM: x86: trap invlpg Marcelo Tosatti
2008-09-22 19:16 ` [patch 08/13] KVM: MMU: mmu_parent_walk Marcelo Tosatti
2008-09-22 19:16 ` [patch 09/13] KVM: MMU: awareness of new kvm_mmu_zap_page behaviour Marcelo Tosatti
2008-09-22 19:16 ` [patch 10/13] KVM: MMU: mmu_convert_notrap helper Marcelo Tosatti
2008-09-22 19:16 ` Marcelo Tosatti [this message]
2008-09-22 19:16 ` [patch 12/13] KVM: MMU: speed up mmu_unsync_walk Marcelo Tosatti
2008-09-22 19:16 ` [patch 13/13] KVM: MMU: add "oos_shadow" parameter to disable oos Marcelo Tosatti
2008-10-14 18:37 ` [patch 00/13] out of sync shadow v3 Alex Williamson
2008-10-14 22:08   ` Alex Williamson
2008-10-15  2:37     ` Marcelo Tosatti
2008-10-15  3:56       ` Alex Williamson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080922192215.654700943@localhost.localdomain \
    --to=mtosatti@redhat.com \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox