[PATCH 08/11] nEPT: Nested INVEPT

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

From: Jun Nakajima <jun.nakajima@intel.com>
To: kvm@vger.kernel.org
Subject: [PATCH 08/11] nEPT: Nested INVEPT
Date: Thu, 25 Apr 2013 23:43:28 -0700	[thread overview]
Message-ID: <1366958611-6935-8-git-send-email-jun.nakajima@intel.com> (raw)
In-Reply-To: <1366958611-6935-7-git-send-email-jun.nakajima@intel.com>

If we let L1 use EPT, we should probably also support the INVEPT instruction.

In our current nested EPT implementation, when L1 changes its EPT table for
L2 (i.e., EPT12), L0 modifies the shadow EPT table (EPT02), and in the course
of this modification already calls INVEPT. Therefore, when L1 calls INVEPT,
we don't really need to do anything. In particular we *don't* need to call
the real INVEPT again. All we do in our INVEPT is verify the validity of the
call, and its parameters, and then do nothing.

In KVM Forum 2010, Dong et al. presented "Nested Virtualization Friendly KVM"
and classified our current nested EPT implementation as "shadow-like virtual
EPT". He recommended instead a different approach, which he called "VTLB-like
virtual EPT". If we had taken that alternative approach, INVEPT would have had
a bigger role: L0 would only rebuild the shadow EPT table when L1 calls INVEPT.

Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
---
 arch/x86/include/asm/vmx.h      |  4 +-
 arch/x86/include/uapi/asm/vmx.h |  1 +
 arch/x86/kvm/vmx.c              | 83 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 87 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index b6fbf86..0ce54f3 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -376,7 +376,9 @@ enum vmcs_field {
 #define VMX_EPTP_WB_BIT				(1ull << 14)
 #define VMX_EPT_2MB_PAGE_BIT			(1ull << 16)
 #define VMX_EPT_1GB_PAGE_BIT			(1ull << 17)
-#define VMX_EPT_AD_BIT				    (1ull << 21)
+#define VMX_EPT_INVEPT_BIT			(1ull << 20)
+#define VMX_EPT_AD_BIT				(1ull << 21)
+#define VMX_EPT_EXTENT_INDIVIDUAL_BIT		(1ull << 24)
 #define VMX_EPT_EXTENT_CONTEXT_BIT		(1ull << 25)
 #define VMX_EPT_EXTENT_GLOBAL_BIT		(1ull << 26)
 
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 2871fcc..5662cef 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -65,6 +65,7 @@
 #define EXIT_REASON_EOI_INDUCED         45
 #define EXIT_REASON_EPT_VIOLATION       48
 #define EXIT_REASON_EPT_MISCONFIG       49
+#define EXIT_REASON_INVEPT		        50
 #define EXIT_REASON_WBINVD              54
 #define EXIT_REASON_XSETBV              55
 #define EXIT_REASON_APIC_WRITE          56
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 76df3a8..c67eb06 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5879,6 +5879,87 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+/* Emulate the INVEPT instruction */
+static int handle_invept(struct kvm_vcpu *vcpu)
+{
+	u32 vmx_instruction_info;
+	unsigned long type;
+	gva_t gva;
+	struct x86_exception e;
+	struct {
+		u64 eptp, gpa;
+	} operand;
+
+	if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) ||
+	    !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 1;
+	}
+
+	if (!nested_vmx_check_permission(vcpu))
+		return 1;
+
+	if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 1;
+	}
+
+	/* According to the Intel VMX instruction reference, the memory
+	 * operand is read even if it isn't needed (e.g., for type==global)
+	 */
+	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+	if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+			vmx_instruction_info, &gva))
+		return 1;
+	if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
+				sizeof(operand), &e)) {
+		kvm_inject_page_fault(vcpu, &e);
+		return 1;
+	}
+
+	type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
+
+	switch (type) {
+	case VMX_EPT_EXTENT_GLOBAL:
+		if (!(nested_vmx_ept_caps & VMX_EPT_EXTENT_GLOBAL_BIT))
+			nested_vmx_failValid(vcpu,
+				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+		else {
+			/*
+			 * Do nothing: when L1 changes EPT12, we already
+			 * update EPT02 (the shadow EPT table) and call INVEPT.
+			 * So when L1 calls INVEPT, there's nothing left to do.
+			 */
+			nested_vmx_succeed(vcpu);
+		}
+		break;
+	case VMX_EPT_EXTENT_CONTEXT:
+		if (!(nested_vmx_ept_caps & VMX_EPT_EXTENT_CONTEXT_BIT))
+			nested_vmx_failValid(vcpu,
+				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+		else {
+			/* Do nothing */
+			nested_vmx_succeed(vcpu);
+		}
+		break;
+	case VMX_EPT_EXTENT_INDIVIDUAL_ADDR:
+		if (!(nested_vmx_ept_caps & VMX_EPT_EXTENT_INDIVIDUAL_BIT))
+			nested_vmx_failValid(vcpu,
+				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+		else {
+			/* Do nothing */
+			nested_vmx_succeed(vcpu);
+		}
+		break;
+	default:
+		nested_vmx_failValid(vcpu,
+			VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+	}
+
+	skip_emulated_instruction(vcpu);
+	return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -5923,6 +6004,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
 	[EXIT_REASON_MWAIT_INSTRUCTION]	      = handle_invalid_op,
 	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_invalid_op,
+	[EXIT_REASON_INVEPT]                  = handle_invept,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -6107,6 +6189,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 	case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
 	case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
 	case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
+	case EXIT_REASON_INVEPT:
 		/*
 		 * VMX instructions trap unconditionally. This allows L1 to
 		 * emulate them for its L2 guest, i.e., allows 3-level nesting!
-- 
1.8.2.1.610.g562af5b

next prev parent reply	other threads:[~2013-04-26  6:43 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-04-26  6:43 [PATCH 01/11] nEPT: Support LOAD_IA32_EFER entry/exit controls for L1 Jun Nakajima
2013-04-26  6:43 ` [PATCH 02/11] nEPT: Add EPT tables support to paging_tmpl.h Jun Nakajima
2013-04-26  6:43   ` [PATCH 03/11] nEPT: MMU context for nested EPT Jun Nakajima
2013-04-26  6:43     ` [PATCH 04/11] nEPT: Fix cr3 handling in nested exit and entry Jun Nakajima
2013-04-26  6:43       ` [PATCH 05/11] nEPT: Fix wrong test in kvm_set_cr3 Jun Nakajima
2013-04-26  6:43         ` [PATCH 06/11] nEPT: Some additional comments Jun Nakajima
2013-04-26  6:43           ` [PATCH 07/11] nEPT: Advertise EPT to L1 Jun Nakajima
2013-04-26  6:43             ` Jun Nakajima [this message]
2013-04-26  6:43               ` [PATCH 09/11] nEPT: Documentation Jun Nakajima
2013-04-26  6:43                 ` [PATCH 10/11] nEPT: Miscelleneous cleanups Jun Nakajima
2013-04-26  6:43                   ` [PATCH 11/11] nEPT: Provide the correct exit qualification upon EPT Jun Nakajima
2013-04-27  6:42                     ` Xu, Xinhao
2013-04-28  9:35                       ` Jan Kiszka
2013-05-02  6:59                         ` Xu, Xinhao
2013-05-02  8:50                           ` Jan Kiszka
2013-04-29 15:37                     ` Paolo Bonzini
2013-04-29 15:50                       ` Nakajima, Jun
2013-04-29 15:12                   ` [PATCH 10/11] nEPT: Miscelleneous cleanups Paolo Bonzini
2013-04-28  9:22             ` [PATCH 07/11] nEPT: Advertise EPT to L1 Jan Kiszka
2013-04-29 15:05   ` [PATCH 02/11] nEPT: Add EPT tables support to paging_tmpl.h Paolo Bonzini
2013-05-02 23:54   ` Marcelo Tosatti
2013-05-03 17:27     ` Nakajima, Jun

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:b6fbf86 dfblob:0ce54f3 dfblob:2871fcc dfblob:5662cef
dfblob:76df3a8 dfblob:c67eb06 )
 OR (
bs:"[PATCH 08/11] nEPT: Nested INVEPT" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1366958611-6935-8-git-send-email-jun.nakajima@intel.com \
    --to=jun.nakajima@intel.com \
    --cc=kvm@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox