[PATCH v3 1/10] KVM MMU: fix for calculating gpa in invlpg code

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH v3 1/10] KVM MMU: fix for calculating gpa in invlpg code
       [not found] <4BD7AE34.5000408@cn.fujitsu.com>
@ 2010-04-28  3:54 ` Xiao Guangrong
  2010-04-28  3:54 ` [PATCH v3 2/10] KVM MMU: convert mmu tracepoints Xiao Guangrong
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Xiao Guangrong @ 2010-04-28  3:54 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML

If the guest is 32-bit, we should use 'quadrant' to adjust gpa
offset

Changelog v3:
- use smart way to fix this bug address Avi's suggestion

Changelog v2:
- when level is PT_DIRECTORY_LEVEL, the 'offset' should be
  'role.quadrant << 8', thanks Avi for point it out

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/kvm/paging_tmpl.h |    7 ++++++-
 1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index d0cc07e..3464fdb 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -478,8 +478,13 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 		    ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
 		    ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
 			struct kvm_mmu_page *sp = page_header(__pa(sptep));
+			int offset, shift;
 
-			pte_gpa = (sp->gfn << PAGE_SHIFT);
+			shift = PAGE_SHIFT -
+				  (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level;
+			offset = sp->role.quadrant << shift;
+
+			pte_gpa = (sp->gfn << PAGE_SHIFT) + offset;
 			pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
 
 			if (is_shadow_present_pte(*sptep)) {
-- 
1.6.1.2




^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 2/10] KVM MMU: convert mmu tracepoints
       [not found] <4BD7AE34.5000408@cn.fujitsu.com>
  2010-04-28  3:54 ` [PATCH v3 1/10] KVM MMU: fix for calculating gpa in invlpg code Xiao Guangrong
@ 2010-04-28  3:54 ` Xiao Guangrong
  2010-04-28  3:55 ` [PATCH v3 3/10] KVM MMU: move unsync/sync tracpoints to proper place Xiao Guangrong
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Xiao Guangrong @ 2010-04-28  3:54 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML

Convert mmu tracepoints by using DECLARE_EVENT_CLASS

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/kvm/mmutrace.h |   69 +++++++++++++++++-----------------------------
 1 files changed, 26 insertions(+), 43 deletions(-)

diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index 40a1786..42f07b1 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -92,15 +92,15 @@ TRACE_EVENT(
 	TP_printk("pte %llx level %u", __entry->pte, __entry->level)
 );
 
-/* We set a pte accessed bit */
-TRACE_EVENT(
-	kvm_mmu_set_accessed_bit,
+DECLARE_EVENT_CLASS(kvm_mmu_set_bit_class,
+
 	TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size),
+
 	TP_ARGS(table_gfn, index, size),
 
 	TP_STRUCT__entry(
 		__field(__u64, gpa)
-		),
+	),
 
 	TP_fast_assign(
 		__entry->gpa = ((u64)table_gfn << PAGE_SHIFT)
@@ -110,22 +110,20 @@ TRACE_EVENT(
 	TP_printk("gpa %llx", __entry->gpa)
 );
 
-/* We set a pte dirty bit */
-TRACE_EVENT(
-	kvm_mmu_set_dirty_bit,
+/* We set a pte accessed bit */
+DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_accessed_bit,
+
 	TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size),
-	TP_ARGS(table_gfn, index, size),
 
-	TP_STRUCT__entry(
-		__field(__u64, gpa)
-		),
+	TP_ARGS(table_gfn, index, size)
+);
 
-	TP_fast_assign(
-		__entry->gpa = ((u64)table_gfn << PAGE_SHIFT)
-				+ index * size;
-		),
+/* We set a pte dirty bit */
+DEFINE_EVENT(kvm_mmu_set_bit_class, kvm_mmu_set_dirty_bit,
 
-	TP_printk("gpa %llx", __entry->gpa)
+	TP_PROTO(unsigned long table_gfn, unsigned index, unsigned size),
+
+	TP_ARGS(table_gfn, index, size)
 );
 
 TRACE_EVENT(
@@ -164,54 +162,39 @@ TRACE_EVENT(
 		  __entry->created ? "new" : "existing")
 );
 
-TRACE_EVENT(
-	kvm_mmu_sync_page,
+DECLARE_EVENT_CLASS(kvm_mmu_page_class,
+
 	TP_PROTO(struct kvm_mmu_page *sp),
 	TP_ARGS(sp),
 
 	TP_STRUCT__entry(
 		KVM_MMU_PAGE_FIELDS
-		),
+	),
 
 	TP_fast_assign(
 		KVM_MMU_PAGE_ASSIGN(sp)
-		),
+	),
 
 	TP_printk("%s", KVM_MMU_PAGE_PRINTK())
 );
 
-TRACE_EVENT(
-	kvm_mmu_unsync_page,
+DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_sync_page,
 	TP_PROTO(struct kvm_mmu_page *sp),
-	TP_ARGS(sp),
-
-	TP_STRUCT__entry(
-		KVM_MMU_PAGE_FIELDS
-		),
-
-	TP_fast_assign(
-		KVM_MMU_PAGE_ASSIGN(sp)
-		),
 
-	TP_printk("%s", KVM_MMU_PAGE_PRINTK())
+	TP_ARGS(sp)
 );
 
-TRACE_EVENT(
-	kvm_mmu_zap_page,
+DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_unsync_page,
 	TP_PROTO(struct kvm_mmu_page *sp),
-	TP_ARGS(sp),
 
-	TP_STRUCT__entry(
-		KVM_MMU_PAGE_FIELDS
-		),
+	TP_ARGS(sp)
+);
 
-	TP_fast_assign(
-		KVM_MMU_PAGE_ASSIGN(sp)
-		),
+DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_zap_page,
+	TP_PROTO(struct kvm_mmu_page *sp),
 
-	TP_printk("%s", KVM_MMU_PAGE_PRINTK())
+	TP_ARGS(sp)
 );
-
 #endif /* _TRACE_KVMMMU_H */
 
 #undef TRACE_INCLUDE_PATH
-- 
1.6.1.2




^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 3/10] KVM MMU: move unsync/sync tracpoints to proper place
       [not found] <4BD7AE34.5000408@cn.fujitsu.com>
  2010-04-28  3:54 ` [PATCH v3 1/10] KVM MMU: fix for calculating gpa in invlpg code Xiao Guangrong
  2010-04-28  3:54 ` [PATCH v3 2/10] KVM MMU: convert mmu tracepoints Xiao Guangrong
@ 2010-04-28  3:55 ` Xiao Guangrong
  2010-04-28  3:55 ` [PATCH v3 4/10] KVM MMU: cleanup invlpg code Xiao Guangrong
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Xiao Guangrong @ 2010-04-28  3:55 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML

Move unsync/sync tracepoints to the proper place, it's good
for us to obtain unsync page live time

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/kvm/mmu.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9696d65..e1ac325 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1189,6 +1189,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
 static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	WARN_ON(!sp->unsync);
+	trace_kvm_mmu_sync_page(sp);
 	sp->unsync = 0;
 	--kvm->stat.mmu_unsync;
 }
@@ -1202,7 +1203,6 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 		return 1;
 	}
 
-	trace_kvm_mmu_sync_page(sp);
 	if (rmap_write_protect(vcpu->kvm, sp->gfn))
 		kvm_flush_remote_tlbs(vcpu->kvm);
 	kvm_unlink_unsync_page(vcpu->kvm, sp);
@@ -1730,7 +1730,6 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	struct kvm_mmu_page *s;
 	struct hlist_node *node, *n;
 
-	trace_kvm_mmu_unsync_page(sp);
 	index = kvm_page_table_hashfn(sp->gfn);
 	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
 	/* don't unsync if pagetable is shadowed with multiple roles */
@@ -1740,6 +1739,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 		if (s->role.word != sp->role.word)
 			return 1;
 	}
+	trace_kvm_mmu_unsync_page(sp);
 	++vcpu->kvm->stat.mmu_unsync;
 	sp->unsync = 1;
 
-- 
1.6.1.2




^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 4/10] KVM MMU: cleanup invlpg code
       [not found] <4BD7AE34.5000408@cn.fujitsu.com>
                   ` (2 preceding siblings ...)
  2010-04-28  3:55 ` [PATCH v3 3/10] KVM MMU: move unsync/sync tracpoints to proper place Xiao Guangrong
@ 2010-04-28  3:55 ` Xiao Guangrong
  2010-04-28  3:55 ` [PATCH v3 5/10] KVM MMU: split kvm_sync_page() function Xiao Guangrong
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Xiao Guangrong @ 2010-04-28  3:55 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML

Using is_last_spte() to cleanup invlpg code

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/kvm/paging_tmpl.h |    4 +---
 1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 3464fdb..89d66ca 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -474,9 +474,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 		level = iterator.level;
 		sptep = iterator.sptep;
 
-		if (level == PT_PAGE_TABLE_LEVEL  ||
-		    ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
-		    ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
+		if (is_last_spte(*sptep, level)) {
 			struct kvm_mmu_page *sp = page_header(__pa(sptep));
 			int offset, shift;
 
-- 
1.6.1.2




^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 5/10] KVM MMU: split kvm_sync_page() function
       [not found] <4BD7AE34.5000408@cn.fujitsu.com>
                   ` (3 preceding siblings ...)
  2010-04-28  3:55 ` [PATCH v3 4/10] KVM MMU: cleanup invlpg code Xiao Guangrong
@ 2010-04-28  3:55 ` Xiao Guangrong
  2010-04-28  3:55 ` [PATCH v3 6/10] KVM MMU: don't write-protect if have new mapping to unsync page Xiao Guangrong
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Xiao Guangrong @ 2010-04-28  3:55 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML

Split kvm_sync_page() into kvm_sync_page() and kvm_sync_page_transient()
to clarify the code address Avi's suggestion

kvm_sync_page_transient() function only update shadow page but not mark
it sync and not write protect sp->gfn. it will be used by later patch

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/kvm/mmu.c |   28 ++++++++++++++++++++++++----
 1 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e1ac325..ec283c3 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1196,16 +1196,20 @@ static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 
 static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp);
 
-static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+			   bool clear_unsync)
 {
 	if (sp->role.cr4_pae != !!is_pae(vcpu)) {
 		kvm_mmu_zap_page(vcpu->kvm, sp);
 		return 1;
 	}
 
-	if (rmap_write_protect(vcpu->kvm, sp->gfn))
-		kvm_flush_remote_tlbs(vcpu->kvm);
-	kvm_unlink_unsync_page(vcpu->kvm, sp);
+	if (clear_unsync) {
+		if (rmap_write_protect(vcpu->kvm, sp->gfn))
+			kvm_flush_remote_tlbs(vcpu->kvm);
+		kvm_unlink_unsync_page(vcpu->kvm, sp);
+	}
+
 	if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
 		kvm_mmu_zap_page(vcpu->kvm, sp);
 		return 1;
@@ -1215,6 +1219,22 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	return 0;
 }
 
+static void mmu_convert_notrap(struct kvm_mmu_page *sp);
+static int kvm_sync_page_transient(struct kvm_vcpu *vcpu,
+				   struct kvm_mmu_page *sp)
+{
+	int ret;
+
+	ret = __kvm_sync_page(vcpu, sp, false);
+	mmu_convert_notrap(sp);
+	return ret;
+}
+
+static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+	return __kvm_sync_page(vcpu, sp, true);
+}
+
 struct mmu_page_path {
 	struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1];
 	unsigned int idx[PT64_ROOT_LEVEL-1];
-- 
1.6.1.2




^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 6/10] KVM MMU: don't write-protect if have new mapping to unsync page
       [not found] <4BD7AE34.5000408@cn.fujitsu.com>
                   ` (4 preceding siblings ...)
  2010-04-28  3:55 ` [PATCH v3 5/10] KVM MMU: split kvm_sync_page() function Xiao Guangrong
@ 2010-04-28  3:55 ` Xiao Guangrong
  2010-04-28  3:55 ` [PATCH v3 7/10] KVM MMU: allow more page become unsync at gfn mapping time Xiao Guangrong
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Xiao Guangrong @ 2010-04-28  3:55 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML

Two cases maybe happen in kvm_mmu_get_page() function:

- one case is, the goal sp is already in cache, if the sp is unsync,
  we only need update it to assure this mapping is valid, but not
  mark it sync and not write-protect sp->gfn since it not broke unsync
  rule(one shadow page for a gfn)

- another case is, the goal sp not existed, we need create a new sp
  for gfn, i.e, gfn (may)has another shadow page, to keep unsync rule,
  we should sync(mark sync and write-protect) gfn's unsync shadow page.
  After enabling multiple unsync shadows, we sync those shadow pages
  only when the new sp not allow to become unsync(also for the unsyc
  rule, the new rule is: allow all pte page become unsync)

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/kvm/mmu.c |   14 +++++++++++---
 1 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ec283c3..fb0c33c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1333,7 +1333,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	unsigned index;
 	unsigned quadrant;
 	struct hlist_head *bucket;
-	struct kvm_mmu_page *sp;
+	struct kvm_mmu_page *sp, *unsync_sp = NULL;
 	struct hlist_node *node, *tmp;
 
 	role = vcpu->arch.mmu.base_role;
@@ -1352,12 +1352,17 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link)
 		if (sp->gfn == gfn) {
 			if (sp->unsync)
-				if (kvm_sync_page(vcpu, sp))
-					continue;
+				unsync_sp = sp;
 
 			if (sp->role.word != role.word)
 				continue;
 
+			if (!direct && unsync_sp &&
+			      kvm_sync_page_transient(vcpu, unsync_sp)) {
+				unsync_sp = NULL;
+				break;
+			}
+
 			mmu_page_add_parent_pte(vcpu, sp, parent_pte);
 			if (sp->unsync_children) {
 				set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests);
@@ -1366,6 +1371,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 			trace_kvm_mmu_get_page(sp, false);
 			return sp;
 		}
+	if (!direct && unsync_sp)
+		kvm_sync_page(vcpu, unsync_sp);
+
 	++vcpu->kvm->stat.mmu_cache_miss;
 	sp = kvm_mmu_alloc_page(vcpu, parent_pte);
 	if (!sp)
-- 
1.6.1.2




^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 7/10] KVM MMU: allow more page become unsync at gfn mapping time
       [not found] <4BD7AE34.5000408@cn.fujitsu.com>
                   ` (5 preceding siblings ...)
  2010-04-28  3:55 ` [PATCH v3 6/10] KVM MMU: don't write-protect if have new mapping to unsync page Xiao Guangrong
@ 2010-04-28  3:55 ` Xiao Guangrong
  2010-04-30 19:33   ` Marcelo Tosatti
  2010-04-28  3:55 ` [PATCH v3 8/10] KVM MMU: allow more page become unsync at getting sp time Xiao Guangrong
                   ` (2 subsequent siblings)
  9 siblings, 1 reply; 12+ messages in thread
From: Xiao Guangrong @ 2010-04-28  3:55 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML

In current code, shadow page can become asynchronous only if one
shadow page for a gfn, this rule is too strict, in fact, we can
let all last mapping page(i.e, it's the pte page) become unsync,
and sync them at invlpg or flush tlb time.

This patch allow more page become asynchronous at gfn mapping time 

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/kvm/mmu.c |   81 +++++++++++++++++++++++----------------------------
 1 files changed, 37 insertions(+), 44 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index fb0c33c..a60cd51 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1166,26 +1166,6 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
 	return __mmu_unsync_walk(sp, pvec);
 }
 
-static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
-{
-	unsigned index;
-	struct hlist_head *bucket;
-	struct kvm_mmu_page *sp;
-	struct hlist_node *node;
-
-	pgprintk("%s: looking for gfn %lx\n", __func__, gfn);
-	index = kvm_page_table_hashfn(gfn);
-	bucket = &kvm->arch.mmu_page_hash[index];
-	hlist_for_each_entry(sp, node, bucket, hash_link)
-		if (sp->gfn == gfn && !sp->role.direct
-		    && !sp->role.invalid) {
-			pgprintk("%s: found role %x\n",
-				 __func__, sp->role.word);
-			return sp;
-		}
-	return NULL;
-}
-
 static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	WARN_ON(!sp->unsync);
@@ -1751,47 +1731,60 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type);
 
-static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+	trace_kvm_mmu_unsync_page(sp);
+	++vcpu->kvm->stat.mmu_unsync;
+	sp->unsync = 1;
+
+	kvm_mmu_mark_parents_unsync(sp);
+	mmu_convert_notrap(sp);
+}
+
+static void kvm_unsync_pages(struct kvm_vcpu *vcpu,  gfn_t gfn)
 {
-	unsigned index;
 	struct hlist_head *bucket;
 	struct kvm_mmu_page *s;
 	struct hlist_node *node, *n;
+	unsigned index;
 
-	index = kvm_page_table_hashfn(sp->gfn);
+	index = kvm_page_table_hashfn(gfn);
 	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
-	/* don't unsync if pagetable is shadowed with multiple roles */
+
 	hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
-		if (s->gfn != sp->gfn || s->role.direct)
+		if (s->gfn != gfn || s->role.direct || s->unsync)
 			continue;
-		if (s->role.word != sp->role.word)
-			return 1;
+		WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
+		__kvm_unsync_page(vcpu, s);
 	}
-	trace_kvm_mmu_unsync_page(sp);
-	++vcpu->kvm->stat.mmu_unsync;
-	sp->unsync = 1;
-
-	kvm_mmu_mark_parents_unsync(sp);
-
-	mmu_convert_notrap(sp);
-	return 0;
 }
 
 static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
 				  bool can_unsync)
 {
-	struct kvm_mmu_page *shadow;
+	unsigned index;
+	struct hlist_head *bucket;
+	struct kvm_mmu_page *s;
+	struct hlist_node *node, *n;
+	bool need_unsync = false;
+
+	index = kvm_page_table_hashfn(gfn);
+	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
+	hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
+		if (s->gfn != gfn || s->role.direct)
+			continue;
 
-	shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
-	if (shadow) {
-		if (shadow->role.level != PT_PAGE_TABLE_LEVEL)
+		if (s->role.level != PT_PAGE_TABLE_LEVEL)
 			return 1;
-		if (shadow->unsync)
-			return 0;
-		if (can_unsync && oos_shadow)
-			return kvm_unsync_page(vcpu, shadow);
-		return 1;
+
+		if (!need_unsync && !s->unsync) {
+			if (!can_unsync || !oos_shadow)
+				return 1;
+			need_unsync = true;
+		}
 	}
+	if (need_unsync)
+		kvm_unsync_pages(vcpu, gfn);
 	return 0;
 }
 
-- 
1.6.1.2




^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 8/10] KVM MMU: allow more page become unsync at getting sp time
       [not found] <4BD7AE34.5000408@cn.fujitsu.com>
                   ` (6 preceding siblings ...)
  2010-04-28  3:55 ` [PATCH v3 7/10] KVM MMU: allow more page become unsync at gfn mapping time Xiao Guangrong
@ 2010-04-28  3:55 ` Xiao Guangrong
  2010-04-28  3:56 ` [PATCH v3 9/10] KVM MMU: separate invlpg code form kvm_mmu_pte_write() Xiao Guangrong
  2010-04-28  3:56 ` [PATCH v3 10/10] KVM MMU: optimize sync/update unsync-page Xiao Guangrong
  9 siblings, 0 replies; 12+ messages in thread
From: Xiao Guangrong @ 2010-04-28  3:55 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML

Allow more page become asynchronous at getting sp time, if need create new
shadow page for gfn but it not allow unsync(level > 1), we should unsync all
gfn's unsync page

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/kvm/mmu.c |   47 +++++++++++++++++++++++++++++++++++++----------
 1 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a60cd51..ef32449 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1215,6 +1215,35 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	return __kvm_sync_page(vcpu, sp, true);
 }
 
+/* @gfn should be write-protected at the call site */
+static void kvm_sync_pages(struct kvm_vcpu *vcpu,  gfn_t gfn)
+{
+	struct hlist_head *bucket;
+	struct kvm_mmu_page *s;
+	struct hlist_node *node, *n;
+	unsigned index;
+	bool flush = false;
+
+	index = kvm_page_table_hashfn(gfn);
+	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
+	hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
+		if (s->gfn != gfn || !s->unsync)
+			continue;
+
+		WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
+		if ((s->role.cr4_pae != !!is_pae(vcpu)) ||
+			(vcpu->arch.mmu.sync_page(vcpu, s))) {
+			kvm_mmu_zap_page(vcpu->kvm, s);
+			continue;
+		}
+		kvm_unlink_unsync_page(vcpu->kvm, s);
+		flush = true;
+	}
+
+	if (flush)
+		kvm_mmu_flush_tlb(vcpu);
+}
+
 struct mmu_page_path {
 	struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1];
 	unsigned int idx[PT64_ROOT_LEVEL-1];
@@ -1313,8 +1342,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	unsigned index;
 	unsigned quadrant;
 	struct hlist_head *bucket;
-	struct kvm_mmu_page *sp, *unsync_sp = NULL;
+	struct kvm_mmu_page *sp;
 	struct hlist_node *node, *tmp;
+	bool need_sync = false;
 
 	role = vcpu->arch.mmu.base_role;
 	role.level = level;
@@ -1331,17 +1361,14 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
 	hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link)
 		if (sp->gfn == gfn) {
-			if (sp->unsync)
-				unsync_sp = sp;
+			if (!need_sync && sp->unsync)
+				need_sync = true;
 
 			if (sp->role.word != role.word)
 				continue;
 
-			if (!direct && unsync_sp &&
-			      kvm_sync_page_transient(vcpu, unsync_sp)) {
-				unsync_sp = NULL;
+			if (sp->unsync && kvm_sync_page_transient(vcpu, sp))
 				break;
-			}
 
 			mmu_page_add_parent_pte(vcpu, sp, parent_pte);
 			if (sp->unsync_children) {
@@ -1351,9 +1378,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 			trace_kvm_mmu_get_page(sp, false);
 			return sp;
 		}
-	if (!direct && unsync_sp)
-		kvm_sync_page(vcpu, unsync_sp);
-
 	++vcpu->kvm->stat.mmu_cache_miss;
 	sp = kvm_mmu_alloc_page(vcpu, parent_pte);
 	if (!sp)
@@ -1364,6 +1388,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	if (!direct) {
 		if (rmap_write_protect(vcpu->kvm, gfn))
 			kvm_flush_remote_tlbs(vcpu->kvm);
+		if (level > PT_PAGE_TABLE_LEVEL && need_sync)
+			kvm_sync_pages(vcpu, gfn);
+
 		account_shadowed(vcpu->kvm, gfn);
 	}
 	if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte)
-- 
1.6.1.2




^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 9/10] KVM MMU: separate invlpg code form kvm_mmu_pte_write()
       [not found] <4BD7AE34.5000408@cn.fujitsu.com>
                   ` (7 preceding siblings ...)
  2010-04-28  3:55 ` [PATCH v3 8/10] KVM MMU: allow more page become unsync at getting sp time Xiao Guangrong
@ 2010-04-28  3:56 ` Xiao Guangrong
  2010-04-28  3:56 ` [PATCH v3 10/10] KVM MMU: optimize sync/update unsync-page Xiao Guangrong
  9 siblings, 0 replies; 12+ messages in thread
From: Xiao Guangrong @ 2010-04-28  3:56 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML

Let invlpg not depends on kvm_mmu_pte_write path, later patch will need
this feature

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/kvm/mmu.c         |   22 +++++++++++++---------
 arch/x86/kvm/paging_tmpl.h |   36 +++++++++++++++++++++++-------------
 2 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ef32449..aa6faa7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2308,6 +2308,10 @@ static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)
 	return (gpte & vcpu->arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0;
 }
 
+static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+					  u64 gpte);
+static void mmu_release_page_from_pte_write(struct kvm_vcpu *vcpu);
+
 #define PTTYPE 64
 #include "paging_tmpl.h"
 #undef PTTYPE
@@ -2619,6 +2623,14 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	vcpu->arch.update_pte.pfn = pfn;
 }
 
+static void mmu_release_page_from_pte_write(struct kvm_vcpu *vcpu)
+{
+	if (!is_error_pfn(vcpu->arch.update_pte.pfn)) {
+		kvm_release_pfn_clean(vcpu->arch.update_pte.pfn);
+		vcpu->arch.update_pte.pfn = bad_pfn;
+	}
+}
+
 static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn)
 {
 	u64 *spte = vcpu->arch.last_pte_updated;
@@ -2651,12 +2663,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	int flooded = 0;
 	int npte;
 	int r;
-	int invlpg_counter;
 
 	pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
 
-	invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter);
-
 	/*
 	 * Assume that the pte write on a page table of the same type
 	 * as the current vcpu paging mode.  This is nearly always true
@@ -2689,8 +2698,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 
 	mmu_guess_page_from_pte_write(vcpu, gpa, gentry);
 	spin_lock(&vcpu->kvm->mmu_lock);
-	if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
-		gentry = 0;
 	kvm_mmu_access_page(vcpu, gfn);
 	kvm_mmu_free_some_pages(vcpu);
 	++vcpu->kvm->stat.mmu_pte_write;
@@ -2767,10 +2774,7 @@ restart:
 	}
 	kvm_mmu_audit(vcpu, "post pte write");
 	spin_unlock(&vcpu->kvm->mmu_lock);
-	if (!is_error_pfn(vcpu->arch.update_pte.pfn)) {
-		kvm_release_pfn_clean(vcpu->arch.update_pte.pfn);
-		vcpu->arch.update_pte.pfn = bad_pfn;
-	}
+	mmu_release_page_from_pte_write(vcpu);
 }
 
 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 89d66ca..749cb1a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -462,11 +462,11 @@ out_unlock:
 
 static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 {
+	struct kvm_mmu_page *sp = NULL;
 	struct kvm_shadow_walk_iterator iterator;
-	gpa_t pte_gpa = -1;
-	int level;
-	u64 *sptep;
-	int need_flush = 0;
+	gfn_t gfn = -1;
+	u64 *sptep = NULL, gentry;
+	int invlpg_counter, level, offset = 0, need_flush = 0;
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 
@@ -475,15 +475,14 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 		sptep = iterator.sptep;
 
 		if (is_last_spte(*sptep, level)) {
-			struct kvm_mmu_page *sp = page_header(__pa(sptep));
-			int offset, shift;
+			int shift;
 
+			sp = page_header(__pa(sptep));
 			shift = PAGE_SHIFT -
 				  (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level;
+			gfn = sp->gfn;
 			offset = sp->role.quadrant << shift;
-
-			pte_gpa = (sp->gfn << PAGE_SHIFT) + offset;
-			pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
+			offset += (sptep - sp->spt) * sizeof(pt_element_t);
 
 			if (is_shadow_present_pte(*sptep)) {
 				rmap_remove(vcpu->kvm, sptep);
@@ -502,16 +501,27 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 	if (need_flush)
 		kvm_flush_remote_tlbs(vcpu->kvm);
 
-	atomic_inc(&vcpu->kvm->arch.invlpg_counter);
-
+	invlpg_counter = atomic_add_return(1, &vcpu->kvm->arch.invlpg_counter);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
-	if (pte_gpa == -1)
+	if (gfn == -1)
 		return;
 
 	if (mmu_topup_memory_caches(vcpu))
 		return;
-	kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0);
+	if (kvm_read_guest_page(vcpu->kvm, gfn, &gentry, offset,
+			sizeof(pt_element_t)))
+		return;
+
+	mmu_guess_page_from_pte_write(vcpu, gfn_to_gpa(gfn) + offset, gentry);
+	spin_lock(&vcpu->kvm->mmu_lock);
+	if (atomic_read(&vcpu->kvm->arch.invlpg_counter) == invlpg_counter &&
+			sp->role.level == PT_PAGE_TABLE_LEVEL) {
+		++vcpu->kvm->stat.mmu_pte_updated;
+		FNAME(update_pte)(vcpu, sp, sptep, &gentry);
+	}
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	mmu_release_page_from_pte_write(vcpu);
 }
 
 static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
-- 
1.6.1.2




^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 10/10] KVM MMU: optimize sync/update unsync-page
       [not found] <4BD7AE34.5000408@cn.fujitsu.com>
                   ` (8 preceding siblings ...)
  2010-04-28  3:56 ` [PATCH v3 9/10] KVM MMU: separate invlpg code form kvm_mmu_pte_write() Xiao Guangrong
@ 2010-04-28  3:56 ` Xiao Guangrong
  9 siblings, 0 replies; 12+ messages in thread
From: Xiao Guangrong @ 2010-04-28  3:56 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, KVM list, LKML

invlpg only need update unsync page, sp->unsync and sp->unsync_children
can help us to find it

Now, a gfn may have many shadow pages, when one sp need be synced, we
write protect sp->gfn and sync this sp but we keep other shadow pages
asynchronous

So, while gfn happen page fault, let it not touch unsync page, the unsync
page only updated at invlpg/flush TLB time

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/kvm/mmu.c         |    3 ++-
 arch/x86/kvm/paging_tmpl.h |   11 +++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index aa6faa7..c093d69 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2719,7 +2719,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 
 restart:
 	hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) {
-		if (sp->gfn != gfn || sp->role.direct || sp->role.invalid)
+		if (sp->gfn != gfn || sp->role.direct || sp->role.invalid ||
+		      sp->unsync)
 			continue;
 		pte_size = sp->role.cr4_pae ? 8 : 4;
 		misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 749cb1a..624b38f 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -474,10 +474,14 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 		level = iterator.level;
 		sptep = iterator.sptep;
 
+		sp = page_header(__pa(sptep));
 		if (is_last_spte(*sptep, level)) {
 			int shift;
 
-			sp = page_header(__pa(sptep));
+			if (!sp->unsync)
+				break;
+
+			WARN_ON(level != PT_PAGE_TABLE_LEVEL);
 			shift = PAGE_SHIFT -
 				  (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level;
 			gfn = sp->gfn;
@@ -494,7 +498,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 			break;
 		}
 
-		if (!is_shadow_present_pte(*sptep))
+		if (!is_shadow_present_pte(*sptep) || !sp->unsync_children)
 			break;
 	}
 
@@ -515,8 +519,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 
 	mmu_guess_page_from_pte_write(vcpu, gfn_to_gpa(gfn) + offset, gentry);
 	spin_lock(&vcpu->kvm->mmu_lock);
-	if (atomic_read(&vcpu->kvm->arch.invlpg_counter) == invlpg_counter &&
-			sp->role.level == PT_PAGE_TABLE_LEVEL) {
+	if (atomic_read(&vcpu->kvm->arch.invlpg_counter) == invlpg_counter) {
 		++vcpu->kvm->stat.mmu_pte_updated;
 		FNAME(update_pte)(vcpu, sp, sptep, &gentry);
 	}
-- 
1.6.1.2




^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 7/10] KVM MMU: allow more page become unsync at gfn mapping time
  2010-04-28  3:55 ` [PATCH v3 7/10] KVM MMU: allow more page become unsync at gfn mapping time Xiao Guangrong
@ 2010-04-30 19:33   ` Marcelo Tosatti
  2010-05-05 12:16     ` Xiao Guangrong
  0 siblings, 1 reply; 12+ messages in thread
From: Marcelo Tosatti @ 2010-04-30 19:33 UTC (permalink / raw)
  To: Xiao Guangrong; +Cc: Avi Kivity, KVM list, LKML

On Wed, Apr 28, 2010 at 11:55:49AM +0800, Xiao Guangrong wrote:
> In current code, shadow page can become asynchronous only if one
> shadow page for a gfn, this rule is too strict, in fact, we can
> let all last mapping page(i.e, it's the pte page) become unsync,
> and sync them at invlpg or flush tlb time.
> 
> This patch allow more page become asynchronous at gfn mapping time 
> 
> Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>

Xiao,

This patch breaks Fedora 8 32 install. Reverted patches 5-10.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 7/10] KVM MMU: allow more page become unsync at gfn mapping time
  2010-04-30 19:33   ` Marcelo Tosatti
@ 2010-05-05 12:16     ` Xiao Guangrong
  0 siblings, 0 replies; 12+ messages in thread
From: Xiao Guangrong @ 2010-05-05 12:16 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Avi Kivity, KVM list, LKML



Marcelo Tosatti wrote:
> On Wed, Apr 28, 2010 at 11:55:49AM +0800, Xiao Guangrong wrote:
>> In current code, shadow page can become asynchronous only if one
>> shadow page for a gfn, this rule is too strict, in fact, we can
>> let all last mapping page(i.e, it's the pte page) become unsync,
>> and sync them at invlpg or flush tlb time.
>>
>> This patch allow more page become asynchronous at gfn mapping time 
>>
>> Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
> 
> Xiao,
> 
> This patch breaks Fedora 8 32 install. Reverted patches 5-10.

Hi Marcelo,

Sorry for the delay reply since i'm on holiday.

I have found the reason of this issue, two fix patches will be sent soon,
could you please try it?

Thanks,
Xiao

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2010-05-05 12:19 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <4BD7AE34.5000408@cn.fujitsu.com>
2010-04-28  3:54 ` [PATCH v3 1/10] KVM MMU: fix for calculating gpa in invlpg code Xiao Guangrong
2010-04-28  3:54 ` [PATCH v3 2/10] KVM MMU: convert mmu tracepoints Xiao Guangrong
2010-04-28  3:55 ` [PATCH v3 3/10] KVM MMU: move unsync/sync tracpoints to proper place Xiao Guangrong
2010-04-28  3:55 ` [PATCH v3 4/10] KVM MMU: cleanup invlpg code Xiao Guangrong
2010-04-28  3:55 ` [PATCH v3 5/10] KVM MMU: split kvm_sync_page() function Xiao Guangrong
2010-04-28  3:55 ` [PATCH v3 6/10] KVM MMU: don't write-protect if have new mapping to unsync page Xiao Guangrong
2010-04-28  3:55 ` [PATCH v3 7/10] KVM MMU: allow more page become unsync at gfn mapping time Xiao Guangrong
2010-04-30 19:33   ` Marcelo Tosatti
2010-05-05 12:16     ` Xiao Guangrong
2010-04-28  3:55 ` [PATCH v3 8/10] KVM MMU: allow more page become unsync at getting sp time Xiao Guangrong
2010-04-28  3:56 ` [PATCH v3 9/10] KVM MMU: separate invlpg code form kvm_mmu_pte_write() Xiao Guangrong
2010-04-28  3:56 ` [PATCH v3 10/10] KVM MMU: optimize sync/update unsync-page Xiao Guangrong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox