[PATCH v6 11/15] nEPT: Add nEPT violation/misconfigration support

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

From: Gleb Natapov <gleb@redhat.com>
To: kvm@vger.kernel.org
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>,
	Jun Nakajima <jun.nakajima@intel.com>,
	Yang Zhang <yang.z.zhang@intel.com>,
	pbonzini@redhat.com
Subject: [PATCH v6 11/15] nEPT: Add nEPT violation/misconfigration support
Date: Thu,  1 Aug 2013 17:08:33 +0300	[thread overview]
Message-ID: <1375366117-9014-12-git-send-email-gleb@redhat.com> (raw)
In-Reply-To: <1375366117-9014-1-git-send-email-gleb@redhat.com>

From: Yang Zhang <yang.z.zhang@Intel.com>

Inject nEPT fault to L1 guest. This patch is original from Xinhao.

Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    4 +++
 arch/x86/kvm/mmu.c              |   61 ++++++++++++++++++++++++++++++++-------
 arch/x86/kvm/paging_tmpl.h      |   25 ++++++++++++++--
 arch/x86/kvm/vmx.c              |   19 ++++++++++++
 4 files changed, 95 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 531f47c..58a17c0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -286,6 +286,7 @@ struct kvm_mmu {
 	u64 *pae_root;
 	u64 *lm_root;
 	u64 rsvd_bits_mask[2][4];
+	u64 bad_mt_xwr;
 
 	/*
 	 * Bitmap: bit set = last pte in walk
@@ -512,6 +513,9 @@ struct kvm_vcpu_arch {
 	 * instruction.
 	 */
 	bool write_fault_to_shadow_pgtable;
+
+	/* set at EPT violation at this point */
+	unsigned long exit_qualification;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a1f8f7b..81b73bc 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3521,6 +3521,8 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 	int maxphyaddr = cpuid_maxphyaddr(vcpu);
 	u64 exb_bit_rsvd = 0;
 
+	context->bad_mt_xwr = 0;
+
 	if (!context->nx)
 		exb_bit_rsvd = rsvd_bits(63, 63);
 	switch (context->root_level) {
@@ -3576,7 +3578,40 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 	}
 }
 
-static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
+static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
+		struct kvm_mmu *context, bool execonly)
+{
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+	int pte;
+
+	context->rsvd_bits_mask[0][3] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7);
+	context->rsvd_bits_mask[0][2] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
+	context->rsvd_bits_mask[0][1] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6);
+	context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+
+	/* large page */
+	context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
+	context->rsvd_bits_mask[1][2] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29);
+	context->rsvd_bits_mask[1][1] =
+		rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20);
+	context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+	
+	for (pte = 0; pte < 64; pte++) {
+		int rwx_bits = pte & 7;
+		int mt = pte >> 3;
+		if (mt == 0x2 || mt == 0x3 || mt == 0x7 ||
+				rwx_bits == 0x2 || rwx_bits == 0x6 ||
+				(rwx_bits == 0x4 && !execonly))
+			context->bad_mt_xwr |= (1ull << pte);
+	}
+}
+
+static void update_permission_bitmask(struct kvm_vcpu *vcpu,
+		struct kvm_mmu *mmu, bool ept)
 {
 	unsigned bit, byte, pfec;
 	u8 map;
@@ -3594,12 +3629,16 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu
 			w = bit & ACC_WRITE_MASK;
 			u = bit & ACC_USER_MASK;
 
-			/* Not really needed: !nx will cause pte.nx to fault */
-			x |= !mmu->nx;
-			/* Allow supervisor writes if !cr0.wp */
-			w |= !is_write_protection(vcpu) && !uf;
-			/* Disallow supervisor fetches of user code if cr4.smep */
-			x &= !(smep && u && !uf);
+			if (!ept) {
+				/* Not really needed: !nx will cause pte.nx to fault */
+				x |= !mmu->nx;
+				/* Allow supervisor writes if !cr0.wp */
+				w |= !is_write_protection(vcpu) && !uf;
+				/* Disallow supervisor fetches of user code if cr4.smep */
+				x &= !(smep && u && !uf);
+			} else
+				/* Not really needed: no U/S accesses on ept  */
+				u = 1;
 
 			fault = (ff && !x) || (uf && !u) || (wf && !w);
 			map |= fault << bit;
@@ -3634,7 +3673,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
 	context->root_level = level;
 
 	reset_rsvds_bits_mask(vcpu, context);
-	update_permission_bitmask(vcpu, context);
+	update_permission_bitmask(vcpu, context, false);
 	update_last_pte_bitmap(vcpu, context);
 
 	ASSERT(is_pae(vcpu));
@@ -3664,7 +3703,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
 	context->root_level = PT32_ROOT_LEVEL;
 
 	reset_rsvds_bits_mask(vcpu, context);
-	update_permission_bitmask(vcpu, context);
+	update_permission_bitmask(vcpu, context, false);
 	update_last_pte_bitmap(vcpu, context);
 
 	context->new_cr3 = paging_new_cr3;
@@ -3726,7 +3765,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 		context->gva_to_gpa = paging32_gva_to_gpa;
 	}
 
-	update_permission_bitmask(vcpu, context);
+	update_permission_bitmask(vcpu, context, false);
 	update_last_pte_bitmap(vcpu, context);
 
 	return 0;
@@ -3805,7 +3844,7 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 		g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
 	}
 
-	update_permission_bitmask(vcpu, g_context);
+	update_permission_bitmask(vcpu, g_context, false);
 	update_last_pte_bitmap(vcpu, g_context);
 
 	return 0;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index f8e5680..7f6af8e 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -129,10 +129,10 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)
 
 static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level)
 {
-	int bit7;
+	int bit7 = (gpte >> 7) & 1, low6 = gpte & 0x3f;
 
-	bit7 = (gpte >> 7) & 1;
-	return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
+	return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) |
+		((mmu->bad_mt_xwr & (1ull << low6)) != 0);
 }
 
 static inline int FNAME(is_present_gpte)(unsigned long pte)
@@ -386,6 +386,25 @@ error:
 	walker->fault.vector = PF_VECTOR;
 	walker->fault.error_code_valid = true;
 	walker->fault.error_code = errcode;
+
+#if PTTYPE == PTTYPE_EPT
+	/*
+	 * Use PFERR_RSVD_MASK in error_code to to tell if EPT
+	 * misconfiguration requires to be injected. The detection is
+	 * done by is_rsvd_bits_set() above.
+	 *
+	 * We set up the value of exit_qualification to inject:
+	 * [2:0] - Derive from [2:0] of real exit_qualification at EPT violation
+	 * [5:3] - Calculated by the page walk of the guest EPT page tables
+	 * [7:8] - Derived from [7:8] of real exit_qualification
+	 *
+	 * The other bits are set to 0.
+	 */
+	if (!(errcode & PFERR_RSVD_MASK)) {
+		vcpu->arch.exit_qualification &= 0x187;
+		vcpu->arch.exit_qualification |= ((pt_access & pte) & 0x7) << 3;
+	}
+#endif
 	walker->fault.address = addr;
 	walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu;
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a4d9385..2d84875 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5318,9 +5318,13 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 
 	/* It is a write fault? */
 	error_code = exit_qualification & (1U << 1);
+	/* It is a fetch fault? */
+	error_code |= (exit_qualification & (1U << 2)) << 2;
 	/* ept page table is present? */
 	error_code |= (exit_qualification >> 3) & 0x1;
 
+	vcpu->arch.exit_qualification = exit_qualification;
+
 	return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
 }
 
@@ -7415,6 +7419,21 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
 		entry->ecx |= bit(X86_FEATURE_VMX);
 }
 
+static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
+		struct x86_exception *fault)
+{
+	struct vmcs12 *vmcs12;
+	nested_vmx_vmexit(vcpu);
+	vmcs12 = get_vmcs12(vcpu);
+
+	if (fault->error_code & PFERR_RSVD_MASK)
+		vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
+	else
+		vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
+	vmcs12->exit_qualification = vcpu->arch.exit_qualification;
+	vmcs12->guest_physical_address = fault->address;
+}
+
 /*
  * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
  * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
-- 
1.7.10.4

next prev parent reply	other threads:[~2013-08-01 14:08 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-08-01 14:08 [PATCH v6 00/15] Nested EPT Gleb Natapov
2013-08-01 14:08 ` [PATCH v6 01/15] nEPT: Support LOAD_IA32_EFER entry/exit controls for L1 Gleb Natapov
2013-08-02  3:04   ` Zhang, Yang Z
2013-08-02  6:35     ` Jan Kiszka
2013-08-02  7:27       ` Zhang, Yang Z
2013-08-02  7:33         ` Jan Kiszka
2013-08-01 14:08 ` [PATCH v6 02/15] nEPT: Fix cr3 handling in nested exit and entry Gleb Natapov
2013-08-02  9:23   ` Xiao Guangrong
2013-08-01 14:08 ` [PATCH v6 03/15] nEPT: Fix wrong test in kvm_set_cr3 Gleb Natapov
2013-08-01 14:08 ` [PATCH v6 04/15] nEPT: Move common code to paging_tmpl.h Gleb Natapov
2013-08-01 14:08 ` [PATCH v6 05/15] nEPT: make guest's A/D bits depends on guest's paging mode Gleb Natapov
2013-08-01 14:08 ` [PATCH v6 06/15] nEPT: Support shadow paging for guest paging without A/D bits Gleb Natapov
2013-08-01 14:08 ` [PATCH v6 07/15] nEPT: Add EPT tables support to paging_tmpl.h Gleb Natapov
2013-08-01 14:08 ` [PATCH v6 08/15] nEPT: Redefine EPT-specific link_shadow_page() Gleb Natapov
2013-08-01 14:08 ` [PATCH v6 09/15] nEPT: correctly check if remote tlb flush is needed for shadowed EPT tables Gleb Natapov
2013-08-02  5:58   ` Xiao Guangrong
2013-08-01 14:08 ` [PATCH v6 10/15] nEPT: Nested INVEPT Gleb Natapov
2013-08-02  8:06   ` Xiao Guangrong
2013-08-02 10:00     ` Gleb Natapov
2013-08-01 14:08 ` Gleb Natapov [this message]
2013-08-02  6:12   ` [PATCH v6 11/15] nEPT: Add nEPT violation/misconfigration support Xiao Guangrong
2013-08-01 14:08 ` [PATCH v6 12/15] nEPT: MMU context for nested EPT Gleb Natapov
2013-08-02  6:13   ` Xiao Guangrong
2013-08-01 14:08 ` [PATCH v6 13/15] nEPT: Advertise EPT to L1 Gleb Natapov
2013-08-02  8:29   ` Xiao Guangrong
2013-08-01 14:08 ` [PATCH v6 14/15] nEPT: Some additional comments Gleb Natapov
2013-08-02  6:26   ` Xiao Guangrong
2013-08-01 14:08 ` [PATCH v6 15/15] nEPT: Miscelleneous cleanups Gleb Natapov
2013-08-02  6:45   ` Xiao Guangrong
2013-08-04  9:24 ` [PATCH v6 00/15] Nested EPT Jan Kiszka
2013-08-04  9:32   ` Gleb Natapov
2013-08-04  9:53     ` Gleb Natapov
2013-08-04 13:44       ` Gleb Natapov
2013-08-04 15:14         ` Jan Kiszka
2013-08-04 16:15           ` Xiao Guangrong
2013-08-04 16:42             ` Jan Kiszka
2013-08-04 16:58               ` Gleb Natapov
2013-08-04 17:19                 ` Xiao Guangrong
2013-08-04 17:24                   ` Gleb Natapov

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:531f47c dfblob:58a17c0 dfblob:a1f8f7b dfblob:81b73bc
dfblob:f8e5680 dfblob:7f6af8e dfblob:a4d9385 dfblob:2d84875 )
 OR (
bs:"[PATCH v6 11/15] nEPT: Add nEPT violation/misconfigration support" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1375366117-9014-12-git-send-email-gleb@redhat.com \
    --to=gleb@redhat.com \
    --cc=jun.nakajima@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=xiaoguangrong@linux.vnet.ibm.com \
    --cc=yang.z.zhang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox