All of lore.kernel.org
 help / color / mirror / Atom feed
From: Marcelo Tosatti <mtosatti@redhat.com>
To: Avi Kivity <avi@qumranet.com>
Cc: kvm@vger.kernel.org, "David S. Ahern" <daahern@cisco.com>,
	Marcelo Tosatti <mtosatti@redhat.com>
Subject: [patch 09/10] KVM: MMU: out of sync shadow core v2
Date: Thu, 18 Sep 2008 18:27:58 -0300	[thread overview]
Message-ID: <20080918213337.148804603@localhost.localdomain> (raw)
In-Reply-To: 20080918212749.800177179@localhost.localdomain

[-- Attachment #1: kvm-oos-core --]
[-- Type: text/plain, Size: 9878 bytes --]

Allow guest pagetables to go out of sync.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/arch/x86/kvm/mmu.c
===================================================================
--- kvm.orig/arch/x86/kvm/mmu.c
+++ kvm/arch/x86/kvm/mmu.c
@@ -148,6 +148,7 @@ struct kvm_shadow_walk {
 };
 
 typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp);
+typedef int (*mmu_unsync_fn) (struct kvm_mmu_page *sp, void *priv);
 
 static struct kmem_cache *pte_chain_cache;
 static struct kmem_cache *rmap_desc_cache;
@@ -942,6 +943,39 @@ static void nonpaging_invlpg(struct kvm_
 {
 }
 
+static int mmu_unsync_walk(struct kvm_mmu_page *parent, mmu_unsync_fn fn,
+			   void *priv)
+{
+	int i, ret;
+	struct kvm_mmu_page *sp = parent;
+
+	while (parent->unsync_children) {
+		for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
+			u64 ent = sp->spt[i];
+
+			if (is_shadow_present_pte(ent)) {
+				struct kvm_mmu_page *child;
+				child = page_header(ent & PT64_BASE_ADDR_MASK);
+
+				if (child->unsync_children) {
+					sp = child;
+					break;
+				}
+				if (child->unsync) {
+					ret = fn(child, priv);
+					if (ret)
+						return ret;
+				}
+			}
+		}
+		if (i == PT64_ENT_PER_PAGE) {
+			sp->unsync_children = 0;
+			sp = parent;
+		}
+	}
+	return 0;
+}
+
 static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
 {
 	unsigned index;
@@ -962,6 +996,47 @@ static struct kvm_mmu_page *kvm_mmu_look
 	return NULL;
 }
 
+static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	WARN_ON(!sp->unsync);
+	sp->unsync = 0;
+	--kvm->stat.mmu_unsync;
+}
+
+static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp);
+
+static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+	if (sp->role.glevels != vcpu->arch.mmu.root_level) {
+		kvm_mmu_zap_page(vcpu->kvm, sp);
+		return 1;
+	}
+
+	rmap_write_protect(vcpu->kvm, sp->gfn);
+	if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
+		kvm_mmu_zap_page(vcpu->kvm, sp);
+		return 1;
+	}
+
+	kvm_mmu_flush_tlb(vcpu);
+	kvm_unlink_unsync_page(vcpu->kvm, sp);
+	return 0;
+}
+
+static int mmu_sync_fn(struct kvm_mmu_page *sp, void *priv)
+{
+	struct kvm_vcpu *vcpu = priv;
+
+	kvm_sync_page(vcpu, sp);
+	return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock));
+}
+
+static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+	while (mmu_unsync_walk(sp, mmu_sync_fn, vcpu))
+		cond_resched_lock(&vcpu->kvm->mmu_lock);
+}
+
 static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 					     gfn_t gfn,
 					     gva_t gaddr,
@@ -975,7 +1050,7 @@ static struct kvm_mmu_page *kvm_mmu_get_
 	unsigned quadrant;
 	struct hlist_head *bucket;
 	struct kvm_mmu_page *sp;
-	struct hlist_node *node;
+	struct hlist_node *node, *tmp;
 
 	role.word = 0;
 	role.glevels = vcpu->arch.mmu.root_level;
@@ -991,8 +1066,18 @@ static struct kvm_mmu_page *kvm_mmu_get_
 		 gfn, role.word);
 	index = kvm_page_table_hashfn(gfn);
 	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
-	hlist_for_each_entry(sp, node, bucket, hash_link)
-		if (sp->gfn == gfn && sp->role.word == role.word) {
+	hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link)
+		if (sp->gfn == gfn) {
+			if (sp->unsync)
+				if (kvm_sync_page(vcpu, sp))
+					continue;
+
+			if (sp->role.word != role.word)
+				continue;
+
+			if (sp->unsync_children)
+				vcpu->arch.mmu.need_root_sync = 1;
+
 			mmu_page_add_parent_pte(vcpu, sp, parent_pte);
 			pgprintk("%s: found\n", __func__);
 			return sp;
@@ -1112,14 +1197,45 @@ static void kvm_mmu_unlink_parents(struc
 	}
 }
 
+struct mmu_zap_walk {
+	struct kvm *kvm;
+	int zapped;
+};
+
+static int mmu_zap_fn(struct kvm_mmu_page *sp, void *private)
+{
+	struct mmu_zap_walk *zap_walk = private;
+
+	kvm_mmu_zap_page(zap_walk->kvm, sp);
+	zap_walk->zapped = 1;
+	return 0;
+}
+
+static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	struct mmu_zap_walk mmu_zap_walk = {
+		.kvm = kvm,
+		.zapped = 0,
+	};
+
+	if (sp->role.level == PT_PAGE_TABLE_LEVEL)
+		return 0;
+	mmu_unsync_walk(sp, mmu_zap_fn, &mmu_zap_walk);
+	return mmu_zap_walk.zapped;
+}
+
 static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
+	int ret;
 	++kvm->stat.mmu_shadow_zapped;
+	ret = mmu_zap_unsync_children(kvm, sp);
 	kvm_mmu_page_unlink_children(kvm, sp);
 	kvm_mmu_unlink_parents(kvm, sp);
 	kvm_flush_remote_tlbs(kvm);
 	if (!sp->role.invalid && !sp->role.metaphysical)
 		unaccount_shadowed(kvm, sp->gfn);
+	if (sp->unsync)
+		kvm_unlink_unsync_page(kvm, sp);
 	if (!sp->root_count) {
 		hlist_del(&sp->hash_link);
 		kvm_mmu_free_page(kvm, sp);
@@ -1129,7 +1245,7 @@ static int kvm_mmu_zap_page(struct kvm *
 		kvm_reload_remote_mmus(kvm);
 	}
 	kvm_mmu_reset_last_pte_updated(kvm);
-	return 0;
+	return ret;
 }
 
 /*
@@ -1221,10 +1337,57 @@ struct page *gva_to_page(struct kvm_vcpu
 	return page;
 }
 
+static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+	sp->unsync_children = 1;
+	return 1;
+}
+
+static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+	unsigned index;
+	struct hlist_head *bucket;
+	struct kvm_mmu_page *s;
+	struct hlist_node *node, *n;
+
+	index = kvm_page_table_hashfn(sp->gfn);
+	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
+	/* don't unsync if pagetable is shadowed with multiple roles */
+	hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
+		if (s->gfn != sp->gfn || s->role.metaphysical)
+			continue;
+		if (s->role.word != sp->role.word)
+			return 1;
+	}
+	mmu_parent_walk(vcpu, sp, unsync_walk_fn);
+	++vcpu->kvm->stat.mmu_unsync;
+	sp->unsync = 1;
+	return 0;
+}
+
+static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
+				  bool can_unsync)
+{
+	struct kvm_mmu_page *shadow;
+
+	shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
+	if (shadow) {
+		if (shadow->role.level != PT_PAGE_TABLE_LEVEL)
+			return 1;
+		if (shadow->unsync)
+			return 0;
+		if (can_unsync)
+			return kvm_unsync_page(vcpu, shadow);
+		return 1;
+	}
+	return 0;
+}
+
 static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		    unsigned pte_access, int user_fault,
 		    int write_fault, int dirty, int largepage,
-		    gfn_t gfn, pfn_t pfn, bool speculative)
+		    gfn_t gfn, pfn_t pfn, bool speculative,
+		    bool can_unsync)
 {
 	u64 spte;
 	int ret = 0;
@@ -1251,7 +1414,6 @@ static int set_spte(struct kvm_vcpu *vcp
 
 	if ((pte_access & ACC_WRITE_MASK)
 	    || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
-		struct kvm_mmu_page *shadow;
 
 		if (largepage && has_wrprotected_page(vcpu->kvm, gfn)) {
 			ret = 1;
@@ -1261,8 +1423,7 @@ static int set_spte(struct kvm_vcpu *vcp
 
 		spte |= PT_WRITABLE_MASK;
 
-		shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
-		if (shadow) {
+		if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
 			pgprintk("%s: found shadow page for %lx, marking ro\n",
 				 __func__, gfn);
 			ret = 1;
@@ -1280,7 +1441,6 @@ set_pte:
 	return ret;
 }
 
-
 static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 			 unsigned pt_access, unsigned pte_access,
 			 int user_fault, int write_fault, int dirty,
@@ -1318,7 +1478,7 @@ static void mmu_set_spte(struct kvm_vcpu
 		}
 	}
 	if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
-		      dirty, largepage, gfn, pfn, speculative)) {
+		      dirty, largepage, gfn, pfn, speculative, true)) {
 		if (write_fault)
 			*ptwrite = 1;
 		if (was_writeble)
@@ -1539,10 +1699,6 @@ static void mmu_alloc_roots(struct kvm_v
 	vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
 }
 
-static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
-{
-}
-
 static void mmu_sync_roots(struct kvm_vcpu *vcpu)
 {
 	int i;
Index: kvm/include/asm-x86/kvm_host.h
===================================================================
--- kvm.orig/include/asm-x86/kvm_host.h
+++ kvm/include/asm-x86/kvm_host.h
@@ -195,6 +195,8 @@ struct kvm_mmu_page {
 				    */
 	int multimapped;         /* More than one parent_pte? */
 	int root_count;          /* Currently serving as active root */
+	bool unsync;
+	bool unsync_children;
 	union {
 		u64 *parent_pte;               /* !multimapped */
 		struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
@@ -226,6 +228,7 @@ struct kvm_mmu {
 	hpa_t root_hpa;
 	int root_level;
 	int shadow_root_level;
+	bool need_root_sync;
 
 	u64 *pae_root;
 };
@@ -371,6 +374,7 @@ struct kvm_vm_stat {
 	u32 mmu_flooded;
 	u32 mmu_recycled;
 	u32 mmu_cache_miss;
+	u32 mmu_unsync;
 	u32 remote_tlb_flush;
 	u32 lpages;
 };
Index: kvm/arch/x86/kvm/x86.c
===================================================================
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -101,6 +101,7 @@ struct kvm_stats_debugfs_item debugfs_en
 	{ "mmu_flooded", VM_STAT(mmu_flooded) },
 	{ "mmu_recycled", VM_STAT(mmu_recycled) },
 	{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
+	{ "mmu_unsync", VM_STAT(mmu_unsync) },
 	{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
 	{ "largepages", VM_STAT(lpages) },
 	{ NULL }
Index: kvm/arch/x86/kvm/paging_tmpl.h
===================================================================
--- kvm.orig/arch/x86/kvm/paging_tmpl.h
+++ kvm/arch/x86/kvm/paging_tmpl.h
@@ -449,6 +449,11 @@ static int FNAME(page_fault)(struct kvm_
 	kvm_mmu_audit(vcpu, "post page fault (fixed)");
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
+	if (vcpu->arch.mmu.need_root_sync) {
+		kvm_mmu_sync_roots(vcpu);
+		vcpu->arch.mmu.need_root_sync = 0;
+	}
+
 	return write_pt;
 
 out_unlock:
@@ -576,7 +581,7 @@ static int FNAME(sync_page)(struct kvm_v
 			pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
 			set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
 				 is_dirty_pte(gpte), 0, gfn,
-				 spte_to_pfn(sp->spt[i]), true);
+				 spte_to_pfn(sp->spt[i]), true, false);
 		}
 	}
 

-- 


  parent reply	other threads:[~2008-09-18 21:49 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-09-18 21:27 [patch 00/10] out of sync shadow v2 Marcelo Tosatti
2008-09-18 21:27 ` [patch 01/10] KVM: MMU: split mmu_set_spte Marcelo Tosatti
2008-09-18 21:27 ` [patch 02/10] KVM: MMU: move local TLB flush to mmu_set_spte Marcelo Tosatti
2008-09-20  0:21   ` Avi Kivity
2008-09-18 21:27 ` [patch 03/10] KVM: MMU: do not write-protect large mappings Marcelo Tosatti
2008-09-20  0:29   ` Avi Kivity
2008-09-21  0:41     ` Marcelo Tosatti
2008-09-18 21:27 ` [patch 04/10] KVM: MMU: mode specific sync_page Marcelo Tosatti
2008-09-20  0:44   ` Avi Kivity
2008-09-18 21:27 ` [patch 05/10] KVM: MMU: sync roots on mmu reload Marcelo Tosatti
2008-09-18 21:27 ` [patch 06/10] KVM: x86: trap invlpg Marcelo Tosatti
2008-09-20  0:53   ` Avi Kivity
2008-09-21  0:43     ` Marcelo Tosatti
2008-09-18 21:27 ` [patch 07/10] KVM: MMU: mmu_parent_walk Marcelo Tosatti
2008-09-20  0:56   ` Avi Kivity
2008-09-21  0:44     ` Marcelo Tosatti
2008-09-22 20:30       ` Avi Kivity
2008-09-22 22:04         ` Marcelo Tosatti
2008-09-18 21:27 ` [patch 08/10] KVM: MMU: awareness of new kvm_mmu_zap_page behaviour Marcelo Tosatti
2008-09-18 21:27 ` Marcelo Tosatti [this message]
2008-09-20  1:22   ` [patch 09/10] KVM: MMU: out of sync shadow core v2 Avi Kivity
2008-09-21  0:45     ` Marcelo Tosatti
2008-09-22 20:41       ` Avi Kivity
2008-09-22 21:55         ` Marcelo Tosatti
2008-09-22 22:51           ` Marcelo Tosatti
2008-09-23 10:46             ` Avi Kivity
2008-09-23 10:46           ` Avi Kivity
2008-09-23 13:17             ` Marcelo Tosatti
2008-09-18 21:27 ` [patch 10/10] KVM: MMU: speed up mmu_unsync_walk Marcelo Tosatti
2008-09-20  1:26   ` Avi Kivity
2008-09-21  0:45     ` Marcelo Tosatti
2008-09-22 20:43       ` Avi Kivity
2008-09-18 22:36 ` [patch 00/10] out of sync shadow v2 Marcelo Tosatti
2008-09-20  1:28   ` Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080918213337.148804603@localhost.localdomain \
    --to=mtosatti@redhat.com \
    --cc=avi@qumranet.com \
    --cc=daahern@cisco.com \
    --cc=kvm@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.