All of lore.kernel.org
 help / color / mirror / Atom feed
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
To: Avi Kivity <avi@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>,
	LKML <linux-kernel@vger.kernel.org>, KVM <kvm@vger.kernel.org>
Subject: [PATCH v4 03/11] KVM: x86: retry non-page-table writing instructions
Date: Thu, 22 Sep 2011 17:02:48 +0800	[thread overview]
Message-ID: <4E7AF9B8.4030500@cn.fujitsu.com> (raw)
In-Reply-To: <4E7AF758.7060900@cn.fujitsu.com>

If the emulation is caused by #PF and it is non-page_table writing instruction,
it means the VM-EXIT is caused by shadow page protected, we can zap the shadow
page and retry this instruction directly

The idea is from Avi

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
---
 arch/x86/include/asm/kvm_emulate.h |    1 +
 arch/x86/include/asm/kvm_host.h    |    5 ++++
 arch/x86/kvm/emulate.c             |    5 ++++
 arch/x86/kvm/mmu.c                 |   25 ++++++++++++++----
 arch/x86/kvm/x86.c                 |   47 ++++++++++++++++++++++++++++++++++++
 5 files changed, 77 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index a026507..9a4acf4 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -364,6 +364,7 @@ enum x86_intercept {
 #endif
 
 int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len);
+bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
 #define EMULATION_FAILED -1
 #define EMULATION_OK 0
 #define EMULATION_RESTART 1
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6ab4241..27a25df 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -443,6 +443,9 @@ struct kvm_vcpu_arch {
 
 	cpumask_var_t wbinvd_dirty_mask;
 
+	unsigned long last_retry_eip;
+	unsigned long last_retry_addr;
+
 	struct {
 		bool halted;
 		gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
@@ -689,6 +692,7 @@ enum emulation_result {
 #define EMULTYPE_NO_DECODE	    (1 << 0)
 #define EMULTYPE_TRAP_UD	    (1 << 1)
 #define EMULTYPE_SKIP		    (1 << 2)
+#define EMULTYPE_RETRY		    (1 << 3)
 int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
 			    int emulation_type, void *insn, int insn_len);
 
@@ -753,6 +757,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 		       const u8 *new, int bytes,
 		       bool guest_initiated);
+int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a10950a..8547958 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3702,6 +3702,11 @@ done:
 	return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
 }
 
+bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
+{
+	return ctxt->d & PageTable;
+}
+
 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
 {
 	/* The second termination condition only applies for REPE
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b01afee..4e53d6b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1997,7 +1997,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
 	kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages;
 }
 
-static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
+int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 {
 	struct kvm_mmu_page *sp;
 	struct hlist_node *node;
@@ -2006,7 +2006,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 
 	pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
 	r = 0;
-
+	spin_lock(&kvm->mmu_lock);
 	for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) {
 		pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
 			 sp->role.word);
@@ -2014,8 +2014,11 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 		kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
 	}
 	kvm_mmu_commit_zap_page(kvm, &invalid_list);
+	spin_unlock(&kvm->mmu_lock);
+
 	return r;
 }
+EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
 
 static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
 {
@@ -3697,9 +3700,8 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
 
 	gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
 
-	spin_lock(&vcpu->kvm->mmu_lock);
 	r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
-	spin_unlock(&vcpu->kvm->mmu_lock);
+
 	return r;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
@@ -3720,10 +3722,18 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
 	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
 }
 
+static bool is_mmio_page_fault(struct kvm_vcpu *vcpu, gva_t addr)
+{
+	if (vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu))
+		return vcpu_match_mmio_gpa(vcpu, addr);
+
+	return vcpu_match_mmio_gva(vcpu, addr);
+}
+
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
 		       void *insn, int insn_len)
 {
-	int r;
+	int r, emulation_type = EMULTYPE_RETRY;
 	enum emulation_result er;
 
 	r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false);
@@ -3735,7 +3745,10 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
 		goto out;
 	}
 
-	er = x86_emulate_instruction(vcpu, cr2, 0, insn, insn_len);
+	if (is_mmio_page_fault(vcpu, cr2))
+		emulation_type = 0;
+
+	er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
 
 	switch (er) {
 	case EMULATE_DONE:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6b37f18..727a6af 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4814,6 +4814,50 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
 	return false;
 }
 
+static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
+			      unsigned long cr2,  int emulation_type)
+{
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+	unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
+
+	last_retry_eip = vcpu->arch.last_retry_eip;
+	last_retry_addr = vcpu->arch.last_retry_addr;
+
+	/*
+	 * If the emulation is caused by #PF and it is non-page_table
+	 * writing instruction, it means the VM-EXIT is caused by shadow
+	 * page protected, we can zap the shadow page and retry this
+	 * instruction directly.
+	 *
+	 * Note: if the guest uses a non-page-table modifying instruction
+	 * on the PDE that points to the instruction, then we will unmap
+	 * the instruction and go to an infinite loop. So, we cache the
+	 * last retried eip and the last fault address, if we meet the eip
+	 * and the address again, we can break out of the potential infinite
+	 * loop.
+	 */
+	vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
+
+	if (!(emulation_type & EMULTYPE_RETRY))
+		return false;
+
+	if (x86_page_table_writing_insn(ctxt))
+		return false;
+
+	if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
+		return false;
+
+	vcpu->arch.last_retry_eip = ctxt->eip;
+	vcpu->arch.last_retry_addr = cr2;
+
+	if (!vcpu->arch.mmu.direct_map)
+		gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
+
+	kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+
+	return true;
+}
+
 int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 			    unsigned long cr2,
 			    int emulation_type,
@@ -4855,6 +4899,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 		return EMULATE_DONE;
 	}
 
+	if (retry_instruction(ctxt, cr2, emulation_type))
+		return EMULATE_DONE;
+
 	/* this is needed for vmware backdoor interface to work since it
 	   changes registers values  during IO operation */
 	if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
-- 
1.7.5.4


  parent reply	other threads:[~2011-09-22  9:00 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-09-22  8:52 [PATCH v4 00/11] KVM: x86: optimize for writing guest page Xiao Guangrong
2011-09-22  8:53 ` [PATCH v4 01/11] KVM: MMU: avoid pte_list_desc running out in kvm_mmu_pte_write Xiao Guangrong
2011-09-22  8:53 ` [PATCH v4 02/11] KVM: x86: tag the instructions which are used to write page table Xiao Guangrong
2011-09-22  8:55 ` [PATCH v4 04/11] KVM: x86: cleanup port-in/port-out emulated Xiao Guangrong
2011-09-22  8:55 ` [PATCH v4 05/11] KVM: MMU: do not mark accessed bit on pte write path Xiao Guangrong
2011-09-22  8:56 ` [PATCH v4 06/11] KVM: MMU: cleanup FNAME(invlpg) Xiao Guangrong
2011-09-22  8:56 ` [PATCH v4 07/11] KVM: MMU: fast prefetch spte on invlpg path Xiao Guangrong
2011-09-22  8:56 ` [PATCH v4 08/11] KVM: MMU: remove unnecessary kvm_mmu_free_some_pages Xiao Guangrong
2011-09-22  8:57 ` [PATCH v4 09/11] KVM: MMU: split kvm_mmu_pte_write function Xiao Guangrong
2011-09-22  8:57 ` [PATCH v4 10/11] KVM: MMU: fix detecting misaligned accessed Xiao Guangrong
2011-09-22  8:58 ` [PATCH v4 11/11] KVM: MMU: improve write flooding detected Xiao Guangrong
2011-09-22  9:02 ` Xiao Guangrong [this message]
2011-09-23 11:51 ` [PATCH v4 00/11] KVM: x86: optimize for writing guest page Marcelo Tosatti
2011-09-30  3:49   ` Xiao Guangrong
2011-10-05 13:25   ` Avi Kivity
2011-10-06 17:50 ` Marcelo Tosatti
2011-10-06 17:53 ` Marcelo Tosatti
2011-10-08  4:06   ` Xiao Guangrong
2011-10-09 12:24     ` Avi Kivity
2011-10-09 13:37       ` Avi Kivity
2011-10-11  8:36         ` Xiao Guangrong
2011-11-04  9:16           ` Xiao Guangrong
2011-11-06 15:35             ` Avi Kivity
2011-11-10 13:28               ` Xiao Guangrong
2011-11-10 14:05                 ` Avi Kivity
2011-11-11  3:42                   ` Xiao Guangrong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4E7AF9B8.4030500@cn.fujitsu.com \
    --to=xiaoguangrong@cn.fujitsu.com \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.