All of lore.kernel.org
 help / color / mirror / Atom feed
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
To: Avi Kivity <avi@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>,
	Gleb Natapov <gleb@redhat.com>,
	LKML <linux-kernel@vger.kernel.org>, KVM <kvm@vger.kernel.org>
Subject: [PATCH v3 3/3] KVM: MMU: retry #PF for softmmu
Date: Tue, 30 Nov 2010 17:37:36 +0800	[thread overview]
Message-ID: <4CF4C5E0.2020301@cn.fujitsu.com> (raw)
In-Reply-To: <4CF4C535.8080405@cn.fujitsu.com>

Retry #PF for softmmu only when the current vcpu has the same root shadow page
as the time when #PF occurs. it means they have same paging environment

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/include/asm/kvm_host.h |    5 +++++
 arch/x86/kvm/mmu.c              |   33 ++++++++++++++++++++++++++++++++-
 arch/x86/kvm/paging_tmpl.h      |   34 +++++++++++++++++++++++-----------
 arch/x86/kvm/x86.c              |   13 ++++++++++++-
 virt/kvm/async_pf.c             |    1 +
 5 files changed, 73 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 209da89..5acbcab 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -191,6 +191,7 @@ union kvm_mmu_page_role {
 struct kvm_mmu_page {
 	struct list_head link;
 	struct hlist_node hash_link;
+	struct kref apfs_counter;
 
 	/*
 	 * The following two entries are used to key the shadow page in the
@@ -602,6 +603,7 @@ struct kvm_x86_ops {
 struct kvm_arch_async_pf {
 	u32 token;
 	gfn_t gfn;
+	struct kvm_mmu_page *root_sp;
 	bool direct_map;
 };
 
@@ -701,6 +703,8 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu);
 
 int fx_init(struct kvm_vcpu *vcpu);
 
+struct kvm_mmu_page *get_vcpu_root_sp(struct kvm_vcpu *vcpu, gva_t gva);
+void kvm_mmu_release_apf_sp(struct kvm_mmu_page *sp);
 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 		       const u8 *new, int bytes,
@@ -819,6 +823,7 @@ void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
 
 bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
 
+void kvm_arch_clear_async_pf(struct kvm_async_pf *work);
 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
 				     struct kvm_async_pf *work);
 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f34987d..b9e1681 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -993,6 +993,19 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
 	percpu_counter_add(&kvm_total_used_mmu_pages, nr);
 }
 
+static void free_shadow_page(struct kref *kref)
+{
+	struct kvm_mmu_page *sp;
+
+	sp = container_of(kref, struct kvm_mmu_page, apfs_counter);
+	kmem_cache_free(mmu_page_header_cache, sp);
+}
+
+void kvm_mmu_release_apf_sp(struct kvm_mmu_page *sp)
+{
+	kref_put(&sp->apfs_counter, free_shadow_page);
+}
+
 static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	ASSERT(is_empty_shadow_page(sp->spt));
@@ -1001,7 +1014,7 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 	__free_page(virt_to_page(sp->spt));
 	if (!sp->role.direct)
 		__free_page(virt_to_page(sp->gfns));
-	kmem_cache_free(mmu_page_header_cache, sp);
+	kvm_mmu_release_apf_sp(sp);
 	kvm_mod_used_mmu_pages(kvm, -1);
 }
 
@@ -1025,6 +1038,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 	bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
 	sp->multimapped = 0;
 	sp->parent_pte = parent_pte;
+	kref_init(&sp->apfs_counter);
 	kvm_mod_used_mmu_pages(vcpu->kvm, +1);
 	return sp;
 }
@@ -2603,12 +2617,29 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
 			     error_code & PFERR_WRITE_MASK, gfn, prefault);
 }
 
+struct kvm_mmu_page *get_vcpu_root_sp(struct kvm_vcpu *vcpu, gva_t gva)
+{
+	struct kvm_shadow_walk_iterator iterator;
+	bool ret;
+
+	shadow_walk_init(&iterator, vcpu, gva);
+	ret = shadow_walk_okay(&iterator);
+	WARN_ON(!ret);
+
+	return page_header(__pa(iterator.sptep));
+}
+
 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
 {
 	struct kvm_arch_async_pf arch;
+
 	arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
 	arch.gfn = gfn;
 	arch.direct_map = vcpu->arch.mmu.direct_map;
+	if (!arch.direct_map) {
+		arch.root_sp = get_vcpu_root_sp(vcpu, gva);
+		kref_get(&arch.root_sp->apfs_counter);
+	}
 
 	return kvm_setup_async_pf(vcpu, gva, gfn, &arch);
 }
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index f04162d..2ca0b67 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -116,7 +116,7 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
  */
 static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 				    struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
-				    gva_t addr, u32 access)
+				    gva_t addr, u32 access, bool prefault)
 {
 	pt_element_t pte;
 	gfn_t table_gfn;
@@ -194,6 +194,13 @@ walk:
 #endif
 
 		if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) {
+			/*
+			 * Don't set gpte accessed bit if it's on
+			 * speculative path.
+			 */
+			if (prefault)
+				goto error;
+
 			trace_kvm_mmu_set_accessed_bit(table_gfn, index,
 						       sizeof(pte));
 			if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
@@ -285,10 +292,11 @@ error:
 }
 
 static int FNAME(walk_addr)(struct guest_walker *walker,
-			    struct kvm_vcpu *vcpu, gva_t addr, u32 access)
+			    struct kvm_vcpu *vcpu, gva_t addr,
+			    u32 access, bool prefault)
 {
 	return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.mmu, addr,
-					access);
+					access, prefault);
 }
 
 static int FNAME(walk_addr_nested)(struct guest_walker *walker,
@@ -296,7 +304,7 @@ static int FNAME(walk_addr_nested)(struct guest_walker *walker,
 				   u32 access)
 {
 	return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu,
-					addr, access);
+					addr, access, false);
 }
 
 static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
@@ -436,7 +444,8 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
 static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 			 struct guest_walker *gw,
 			 int user_fault, int write_fault, int hlevel,
-			 int *ptwrite, pfn_t pfn, bool map_writable)
+			 int *ptwrite, pfn_t pfn, bool map_writable,
+			 bool prefault)
 {
 	unsigned access = gw->pt_access;
 	struct kvm_mmu_page *sp = NULL;
@@ -510,7 +519,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 
 	mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access,
 		     user_fault, write_fault, dirty, ptwrite, it.level,
-		     gw->gfn, pfn, false, map_writable);
+		     gw->gfn, pfn, prefault, map_writable);
 	FNAME(pte_prefetch)(vcpu, gw, it.sptep);
 
 	return it.sptep;
@@ -559,15 +568,18 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
 	/*
 	 * Look up the guest pte for the faulting address.
 	 */
-	r = FNAME(walk_addr)(&walker, vcpu, addr, error_code);
+	r = FNAME(walk_addr)(&walker, vcpu, addr, error_code, prefault);
 
 	/*
 	 * The page is not mapped by the guest.  Let the guest handle it.
 	 */
 	if (!r) {
 		pgprintk("%s: guest page fault\n", __func__);
-		inject_page_fault(vcpu);
-		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
+		if (!prefault) {
+			inject_page_fault(vcpu);
+			/* reset fork detector */
+			vcpu->arch.last_pt_write_count = 0;
+		}
 		return 0;
 	}
 
@@ -597,7 +609,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
 	trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
 	kvm_mmu_free_some_pages(vcpu);
 	sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
-			     level, &write_pt, pfn, map_writable);
+			     level, &write_pt, pfn, map_writable, prefault);
 	(void)sptep;
 	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
 		 sptep, *sptep, write_pt);
@@ -683,7 +695,7 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
 	gpa_t gpa = UNMAPPED_GVA;
 	int r;
 
-	r = FNAME(walk_addr)(&walker, vcpu, vaddr, access);
+	r = FNAME(walk_addr)(&walker, vcpu, vaddr, access, false);
 
 	if (r) {
 		gpa = gfn_to_gpa(walker.gfn);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 83ed55f..33dcbce 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6181,7 +6181,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
 	if (!shadow_accessed_mask)
 		return;
 
-	if (!vcpu->arch.mmu.direct_map || !work->arch.direct_map ||
+	if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
 	      is_error_page(work->page))
 		return;
 
@@ -6189,6 +6189,10 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
 	if (unlikely(r))
 		return;
 
+	if (!vcpu->arch.mmu.direct_map &&
+	      get_vcpu_root_sp(vcpu, work->gva) != work->arch.root_sp)
+		return;
+
 	vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
 }
 
@@ -6260,6 +6264,12 @@ static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
 				      sizeof(val));
 }
 
+void kvm_arch_clear_async_pf(struct kvm_async_pf *work)
+{
+	if (!work->arch.direct_map)
+		kvm_mmu_release_apf_sp(work->arch.root_sp);
+}
+
 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
 				     struct kvm_async_pf *work)
 {
@@ -6281,6 +6291,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 				 struct kvm_async_pf *work)
 {
 	trace_kvm_async_pf_ready(work->arch.token, work->gva);
+	kvm_arch_clear_async_pf(work);
 	if (is_error_page(work->page))
 		work->arch.token = ~0; /* broadcast wakeup */
 	else
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 74268b4..c3d4788 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -101,6 +101,7 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 				   typeof(*work), queue);
 		cancel_work_sync(&work->work);
 		list_del(&work->queue);
+		kvm_arch_clear_async_pf(work);
 		if (!work->done) /* work was canceled */
 			kmem_cache_free(async_pf_cache, work);
 	}
-- 
1.7.0.4

  parent reply	other threads:[~2010-11-30  9:33 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-11-30  9:34 [PATCH v3 1/3] KVM: MMU: rename 'no_apf' to 'prefault' Xiao Guangrong
2010-11-30  9:36 ` [PATCH v3 2/3] KVM: MMU: fix accessed bit set on prefault path Xiao Guangrong
2010-11-30 13:29   ` Gleb Natapov
2010-11-30 16:52     ` Xiao Guangrong
2010-11-30 17:50       ` Gleb Natapov
2010-11-30 18:15         ` Xiao Guangrong
2010-11-30 18:38           ` Gleb Natapov
2010-11-30 19:11             ` Xiao Guangrong
2010-11-30 19:20               ` Gleb Natapov
2010-12-01  2:20                 ` Xiao Guangrong
2010-12-01 20:42                   ` Marcelo Tosatti
2010-11-30  9:37 ` Xiao Guangrong [this message]
2010-12-02  1:19   ` [PATCH v3 3/3] KVM: MMU: retry #PF for softmmu Marcelo Tosatti
2010-12-02  3:44     ` Xiao Guangrong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4CF4C5E0.2020301@cn.fujitsu.com \
    --to=xiaoguangrong@cn.fujitsu.com \
    --cc=avi@redhat.com \
    --cc=gleb@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.