Re: [PATCH 4/5] KVM: nVMX: implement enlightened VMPTRLD and VMCLEAR

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* Re: [PATCH 4/5] KVM: nVMX: implement enlightened VMPTRLD and VMCLEAR
@ 2018-06-14 23:28 Liran Alon
  0 siblings, 0 replies; 2+ messages in thread
From: Liran Alon @ 2018-06-14 23:28 UTC (permalink / raw)
  To: vkuznets
  Cc: mmorsy, rkagan, jmattson, Michael.H.Kelley, kys, rkrcmar, cavery,
	pbonzini, linux-kernel, sthemmin, haiyangz, kvm


----- vkuznets@redhat.com wrote:

> Per Hyper-V TLFS 5.0b:
> 
> "The L1 hypervisor may choose to use enlightened VMCSs by writing 1
> to
> the corresponding field in the VP assist page (see section 7.8.7).
> Another field in the VP assist page controls the currently active
> enlightened VMCS. Each enlightened VMCS is exactly one page (4 KB) in
> size and must be initially zeroed. No VMPTRLD instruction must be
> executed to make an enlightened VMCS active or current.
> 
> After the L1 hypervisor performs a VM entry with an enlightened VMCS,
> the VMCS is considered active on the processor. An enlightened VMCS
> can only be active on a single processor at the same time. The L1
> hypervisor can execute a VMCLEAR instruction to transition an
> enlightened VMCS from the active to the non-active state. Any VMREAD
> or VMWRITE instructions while an enlightened VMCS is active is
> unsupported and can result in unexpected behavior."
> 
> Keep Enlightened VMCS structure for the current L2 guest permanently
> mapped
> from struct nested_vmx instead of mapping it every time.
> 
> Suggested-by: Ladi Prosek <lprosek@redhat.com>
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> ---
>  arch/x86/kvm/vmx.c | 98
> ++++++++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 91 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index e7fa9f9c6e36..6802ba91468c 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -20,6 +20,7 @@
>  #include "mmu.h"
>  #include "cpuid.h"
>  #include "lapic.h"
> +#include "hyperv.h"
>  
>  #include <linux/kvm_host.h>
>  #include <linux/module.h>
> @@ -690,6 +691,8 @@ struct nested_vmx {
>  		bool guest_mode;
>  	} smm;
>  
> +	gpa_t hv_evmcs_vmptr;
> +	struct page *hv_evmcs_page;
>  	struct hv_enlightened_vmcs *hv_evmcs;
>  };
>  
> @@ -7695,7 +7698,9 @@ static void nested_vmx_failInvalid(struct
> kvm_vcpu *vcpu)
>  static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
>  					u32 vm_instruction_error)
>  {
> -	if (to_vmx(vcpu)->nested.current_vmptr == -1ull) {
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> +	if (vmx->nested.current_vmptr == -1ull && !vmx->nested.hv_evmcs) {
>  		/*
>  		 * failValid writes the error number to the current VMCS, which
>  		 * can't be done there isn't a current VMCS.
> @@ -8003,6 +8008,18 @@ static void vmx_disable_shadow_vmcs(struct
> vcpu_vmx *vmx)
>  	vmcs_write64(VMCS_LINK_POINTER, -1ull);
>  }
>  
> +static inline void nested_release_evmcs(struct vcpu_vmx *vmx)
> +{
> +	if (!vmx->nested.hv_evmcs)
> +		return;
> +
> +	kunmap(vmx->nested.hv_evmcs_page);
> +	kvm_release_page_dirty(vmx->nested.hv_evmcs_page);
> +	vmx->nested.hv_evmcs_vmptr = -1ull;
> +	vmx->nested.hv_evmcs_page = NULL;
> +	vmx->nested.hv_evmcs = NULL;
> +}
> +
>  static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
>  {
>  	if (vmx->nested.current_vmptr == -1ull)
> @@ -8062,6 +8079,8 @@ static void free_nested(struct vcpu_vmx *vmx)
>  		vmx->nested.pi_desc = NULL;
>  	}
>  
> +	nested_release_evmcs(vmx);
> +
>  	free_loaded_vmcs(&vmx->nested.vmcs02);
>  }
>  
> @@ -8098,12 +8117,18 @@ static int handle_vmclear(struct kvm_vcpu
> *vcpu)
>  		return kvm_skip_emulated_instruction(vcpu);
>  	}
>  
> -	if (vmptr == vmx->nested.current_vmptr)
> -		nested_release_vmcs12(vmx);
> +	if (vmx->nested.hv_evmcs_page) {
> +		if (vmptr == vmx->nested.hv_evmcs_vmptr)
> +			nested_release_evmcs(vmx);
> +	} else {
> +		if (vmptr == vmx->nested.current_vmptr)
> +			nested_release_vmcs12(vmx);
>  
> -	kvm_vcpu_write_guest(vcpu,
> -			vmptr + offsetof(struct vmcs12, launch_state),
> -			&zero, sizeof(zero));
> +		kvm_vcpu_write_guest(vcpu,
> +				     vmptr + offsetof(struct vmcs12,
> +						      launch_state),
> +				     &zero, sizeof(zero));
> +	}
>  
>  	nested_vmx_succeed(vcpu);
>  	return kvm_skip_emulated_instruction(vcpu);
> @@ -8814,6 +8839,10 @@ static int handle_vmptrld(struct kvm_vcpu
> *vcpu)
>  		return kvm_skip_emulated_instruction(vcpu);
>  	}
>  
> +	/* Forbid normal VMPTRLD if Enlightened version was used */
> +	if (vmx->nested.hv_evmcs)
> +		return 1;
> +
>  	if (vmx->nested.current_vmptr != vmptr) {
>  		struct vmcs12 *new_vmcs12;
>  		struct page *page;
> @@ -8847,6 +8876,55 @@ static int handle_vmptrld(struct kvm_vcpu
> *vcpu)
>  	return kvm_skip_emulated_instruction(vcpu);
>  }
>  
> +/*
> + * This is an equivalent of the nested hypervisor executing the
> vmptrld
> + * instruction.
> + */
> +static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu
> *vcpu)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	struct hv_vp_assist_page assist_page;
> +
> +	if (likely(!vmx->nested.enlightened_vmcs_enabled))
> +		return 1;
> +
> +	if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page)))
> +		return 1;
> +
> +	if (unlikely(!assist_page.enlighten_vmentry))
> +		return 1;
> +
> +	if (unlikely(assist_page.current_nested_vmcs !=
> +		     vmx->nested.hv_evmcs_vmptr)) {
> +
> +		if (!vmx->nested.hv_evmcs)
> +			vmx->nested.current_vmptr = -1ull;
> +
> +		nested_release_evmcs(vmx);
> +
> +		vmx->nested.hv_evmcs_page = kvm_vcpu_gpa_to_page(
> +			vcpu, assist_page.current_nested_vmcs);
> +
> +		if (unlikely(is_error_page(vmx->nested.hv_evmcs_page)))
> +			return 0;
> +
> +		vmx->nested.hv_evmcs = kmap(vmx->nested.hv_evmcs_page);
> +		vmx->nested.dirty_vmcs12 = true;
> +		vmx->nested.hv_evmcs_vmptr = assist_page.current_nested_vmcs;
> +
> +		/*
> +		 * Unlike normal vmcs12, enlightened vmcs12 is not fully
> +		 * reloaded from guest's memory (read only fields, fields not
> +		 * present in struct hv_enlightened_vmcs, ...). Make sure there
> +		 * are no leftovers.
> +		 */
> +		memset(vmx->nested.cached_vmcs12, 0,
> +		       sizeof(*vmx->nested.cached_vmcs12));
> +
> +	}
> +	return 1;
> +}
> +
>  /* Emulate the VMPTRST instruction */
>  static int handle_vmptrst(struct kvm_vcpu *vcpu)
>  {
> @@ -8858,6 +8936,9 @@ static int handle_vmptrst(struct kvm_vcpu
> *vcpu)
>  	if (!nested_vmx_check_permission(vcpu))
>  		return 1;
>  
> +	if (unlikely(to_vmx(vcpu)->nested.hv_evmcs))
> +		return 1;
> +
>  	if (get_vmx_mem_address(vcpu, exit_qualification,
>  			vmx_instruction_info, true, &vmcs_gva))
>  		return 1;
> @@ -12148,7 +12229,10 @@ static int nested_vmx_run(struct kvm_vcpu
> *vcpu, bool launch)
>  	if (!nested_vmx_check_permission(vcpu))
>  		return 1;
>  
> -	if (!nested_vmx_check_vmcs12(vcpu))
> +	if (!nested_vmx_handle_enlightened_vmptrld(vcpu))
> +		return 1;
> +
> +	if (!vmx->nested.hv_evmcs && !nested_vmx_check_vmcs12(vcpu))
>  		goto out;
>  
>  	vmcs12 = get_vmcs12(vcpu);
> -- 
> 2.14.4

Reviewed-By: Liran Alon <liran.alon@oracle.com>

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [PATCH 0/5] KVM: nVMX: Enlightened VMCS for Hyper-V on KVM
@ 2018-06-14  8:24 Vitaly Kuznetsov
  2018-06-14  8:24 ` [PATCH 4/5] KVM: nVMX: implement enlightened VMPTRLD and VMCLEAR Vitaly Kuznetsov
  0 siblings, 1 reply; 2+ messages in thread
From: Vitaly Kuznetsov @ 2018-06-14  8:24 UTC (permalink / raw)
  To: kvm
  Cc: Paolo Bonzini, Radim Krčmář, Roman Kagan,
	K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger,
	Michael Kelley (EOSG), Mohammed Gamal, Cathy Avery, linux-kernel,
	Jim Mattson

This is an initial implementation of Enlightened VMCS for nested Hyper-V on
KVM. Using it helps to spare 1500 cpu cycles for nested vmexit (tight cpuid
loop in WS2016 with Hyper-V role on KVM: 15200 cycles -> 13700 cycles).

Changes since RFCv2:
- Rename sync_shadow_vmcs to need_vmcs12_sync and reuse for eVMCS case
  [Paolo Bonzini]

Ladi Prosek (1):
  KVM: hyperv: define VP assist page helpers

Vitaly Kuznetsov (4):
  KVM: nVMX: add KVM_CAP_HYPERV_ENLIGHTENED_VMCS capability
  KVM: nVMX: add enlightened VMCS state
  KVM: nVMX: implement enlightened VMPTRLD and VMCLEAR
  KVM: nVMX: optimize prepare_vmcs02{,_full} for Enlightened VMCS case

 arch/x86/include/asm/kvm_host.h |   3 +
 arch/x86/kvm/hyperv.c           |  23 +-
 arch/x86/kvm/hyperv.h           |   4 +
 arch/x86/kvm/lapic.c            |   4 +-
 arch/x86/kvm/lapic.h            |   2 +-
 arch/x86/kvm/svm.c              |   9 +
 arch/x86/kvm/vmx.c              | 810 +++++++++++++++++++++++++++++++++-------
 arch/x86/kvm/x86.c              |  17 +-
 include/uapi/linux/kvm.h        |   1 +
 9 files changed, 724 insertions(+), 149 deletions(-)

-- 
2.14.4


^ permalink raw reply	[flat|nested] 2+ messages in thread

* [PATCH 4/5] KVM: nVMX: implement enlightened VMPTRLD and VMCLEAR
  2018-06-14  8:24 [PATCH 0/5] KVM: nVMX: Enlightened VMCS for Hyper-V on KVM Vitaly Kuznetsov
@ 2018-06-14  8:24 ` Vitaly Kuznetsov
  0 siblings, 0 replies; 2+ messages in thread
From: Vitaly Kuznetsov @ 2018-06-14  8:24 UTC (permalink / raw)
  To: kvm
  Cc: Paolo Bonzini, Radim Krčmář, Roman Kagan,
	K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger,
	Michael Kelley (EOSG), Mohammed Gamal, Cathy Avery, linux-kernel,
	Jim Mattson

Per Hyper-V TLFS 5.0b:

"The L1 hypervisor may choose to use enlightened VMCSs by writing 1 to
the corresponding field in the VP assist page (see section 7.8.7).
Another field in the VP assist page controls the currently active
enlightened VMCS. Each enlightened VMCS is exactly one page (4 KB) in
size and must be initially zeroed. No VMPTRLD instruction must be
executed to make an enlightened VMCS active or current.

After the L1 hypervisor performs a VM entry with an enlightened VMCS,
the VMCS is considered active on the processor. An enlightened VMCS
can only be active on a single processor at the same time. The L1
hypervisor can execute a VMCLEAR instruction to transition an
enlightened VMCS from the active to the non-active state. Any VMREAD
or VMWRITE instructions while an enlightened VMCS is active is
unsupported and can result in unexpected behavior."

Keep Enlightened VMCS structure for the current L2 guest permanently mapped
from struct nested_vmx instead of mapping it every time.

Suggested-by: Ladi Prosek <lprosek@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/kvm/vmx.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 91 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e7fa9f9c6e36..6802ba91468c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -20,6 +20,7 @@
 #include "mmu.h"
 #include "cpuid.h"
 #include "lapic.h"
+#include "hyperv.h"
 
 #include <linux/kvm_host.h>
 #include <linux/module.h>
@@ -690,6 +691,8 @@ struct nested_vmx {
 		bool guest_mode;
 	} smm;
 
+	gpa_t hv_evmcs_vmptr;
+	struct page *hv_evmcs_page;
 	struct hv_enlightened_vmcs *hv_evmcs;
 };
 
@@ -7695,7 +7698,9 @@ static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
 static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
 					u32 vm_instruction_error)
 {
-	if (to_vmx(vcpu)->nested.current_vmptr == -1ull) {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (vmx->nested.current_vmptr == -1ull && !vmx->nested.hv_evmcs) {
 		/*
 		 * failValid writes the error number to the current VMCS, which
 		 * can't be done there isn't a current VMCS.
@@ -8003,6 +8008,18 @@ static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
 	vmcs_write64(VMCS_LINK_POINTER, -1ull);
 }
 
+static inline void nested_release_evmcs(struct vcpu_vmx *vmx)
+{
+	if (!vmx->nested.hv_evmcs)
+		return;
+
+	kunmap(vmx->nested.hv_evmcs_page);
+	kvm_release_page_dirty(vmx->nested.hv_evmcs_page);
+	vmx->nested.hv_evmcs_vmptr = -1ull;
+	vmx->nested.hv_evmcs_page = NULL;
+	vmx->nested.hv_evmcs = NULL;
+}
+
 static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
 {
 	if (vmx->nested.current_vmptr == -1ull)
@@ -8062,6 +8079,8 @@ static void free_nested(struct vcpu_vmx *vmx)
 		vmx->nested.pi_desc = NULL;
 	}
 
+	nested_release_evmcs(vmx);
+
 	free_loaded_vmcs(&vmx->nested.vmcs02);
 }
 
@@ -8098,12 +8117,18 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
 		return kvm_skip_emulated_instruction(vcpu);
 	}
 
-	if (vmptr == vmx->nested.current_vmptr)
-		nested_release_vmcs12(vmx);
+	if (vmx->nested.hv_evmcs_page) {
+		if (vmptr == vmx->nested.hv_evmcs_vmptr)
+			nested_release_evmcs(vmx);
+	} else {
+		if (vmptr == vmx->nested.current_vmptr)
+			nested_release_vmcs12(vmx);
 
-	kvm_vcpu_write_guest(vcpu,
-			vmptr + offsetof(struct vmcs12, launch_state),
-			&zero, sizeof(zero));
+		kvm_vcpu_write_guest(vcpu,
+				     vmptr + offsetof(struct vmcs12,
+						      launch_state),
+				     &zero, sizeof(zero));
+	}
 
 	nested_vmx_succeed(vcpu);
 	return kvm_skip_emulated_instruction(vcpu);
@@ -8814,6 +8839,10 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 		return kvm_skip_emulated_instruction(vcpu);
 	}
 
+	/* Forbid normal VMPTRLD if Enlightened version was used */
+	if (vmx->nested.hv_evmcs)
+		return 1;
+
 	if (vmx->nested.current_vmptr != vmptr) {
 		struct vmcs12 *new_vmcs12;
 		struct page *page;
@@ -8847,6 +8876,55 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 	return kvm_skip_emulated_instruction(vcpu);
 }
 
+/*
+ * This is an equivalent of the nested hypervisor executing the vmptrld
+ * instruction.
+ */
+static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct hv_vp_assist_page assist_page;
+
+	if (likely(!vmx->nested.enlightened_vmcs_enabled))
+		return 1;
+
+	if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page)))
+		return 1;
+
+	if (unlikely(!assist_page.enlighten_vmentry))
+		return 1;
+
+	if (unlikely(assist_page.current_nested_vmcs !=
+		     vmx->nested.hv_evmcs_vmptr)) {
+
+		if (!vmx->nested.hv_evmcs)
+			vmx->nested.current_vmptr = -1ull;
+
+		nested_release_evmcs(vmx);
+
+		vmx->nested.hv_evmcs_page = kvm_vcpu_gpa_to_page(
+			vcpu, assist_page.current_nested_vmcs);
+
+		if (unlikely(is_error_page(vmx->nested.hv_evmcs_page)))
+			return 0;
+
+		vmx->nested.hv_evmcs = kmap(vmx->nested.hv_evmcs_page);
+		vmx->nested.dirty_vmcs12 = true;
+		vmx->nested.hv_evmcs_vmptr = assist_page.current_nested_vmcs;
+
+		/*
+		 * Unlike normal vmcs12, enlightened vmcs12 is not fully
+		 * reloaded from guest's memory (read only fields, fields not
+		 * present in struct hv_enlightened_vmcs, ...). Make sure there
+		 * are no leftovers.
+		 */
+		memset(vmx->nested.cached_vmcs12, 0,
+		       sizeof(*vmx->nested.cached_vmcs12));
+
+	}
+	return 1;
+}
+
 /* Emulate the VMPTRST instruction */
 static int handle_vmptrst(struct kvm_vcpu *vcpu)
 {
@@ -8858,6 +8936,9 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
 
+	if (unlikely(to_vmx(vcpu)->nested.hv_evmcs))
+		return 1;
+
 	if (get_vmx_mem_address(vcpu, exit_qualification,
 			vmx_instruction_info, true, &vmcs_gva))
 		return 1;
@@ -12148,7 +12229,10 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
 
-	if (!nested_vmx_check_vmcs12(vcpu))
+	if (!nested_vmx_handle_enlightened_vmptrld(vcpu))
+		return 1;
+
+	if (!vmx->nested.hv_evmcs && !nested_vmx_check_vmcs12(vcpu))
 		goto out;
 
 	vmcs12 = get_vmcs12(vcpu);
-- 
2.14.4


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2018-06-14 23:29 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-06-14 23:28 [PATCH 4/5] KVM: nVMX: implement enlightened VMPTRLD and VMCLEAR Liran Alon
  -- strict thread matches above, loose matches on Subject: below --
2018-06-14  8:24 [PATCH 0/5] KVM: nVMX: Enlightened VMCS for Hyper-V on KVM Vitaly Kuznetsov
2018-06-14  8:24 ` [PATCH 4/5] KVM: nVMX: implement enlightened VMPTRLD and VMCLEAR Vitaly Kuznetsov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox