public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] KVM: x86: Implement PCID/INVPCID for guests with EPT
@ 2012-05-10  0:32 Mao, Junjie
  2012-05-10 11:48 ` Avi Kivity
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Mao, Junjie @ 2012-05-10  0:32 UTC (permalink / raw)
  To: 'kvm@vger.kernel.org'

This patch handles PCID/INVPCID for guests.

Process-context identifiers (PCIDs) are a facility by which a logical processor may cache information for multiple linear-address spaces so that the processor may retain cached information when software switches to a different linear-address space. Refer to section 4.10.1 in IA32 Intel Software Developer's Manual Volume 3A for details.

For guests with EPT, the PCID feature is enabled and INVPCID behaves as running natively.
For guests without EPT, the PCID feature is disabled and INVPCID triggers #UD.

Signed-off-by: Mao, Junjie <junjie.mao@intel.com>
---
 arch/x86/include/asm/cpufeature.h      |    1 +
 arch/x86/include/asm/kvm_host.h        |    3 +-
 arch/x86/include/asm/processor-flags.h |    2 +
 arch/x86/include/asm/vmx.h             |    2 +
 arch/x86/kvm/cpuid.c                   |    6 ++-
 arch/x86/kvm/cpuid.h                   |    8 ++++
 arch/x86/kvm/svm.c                     |    6 +++
 arch/x86/kvm/vmx.c                     |   63 ++++++++++++++++++++++++++++++--
 8 files changed, 85 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 8d67d42..1aedbc0 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -203,6 +203,7 @@
 #define X86_FEATURE_SMEP	(9*32+ 7) /* Supervisor Mode Execution Protection */
 #define X86_FEATURE_BMI2	(9*32+ 8) /* 2nd group bit manipulation extensions */
 #define X86_FEATURE_ERMS	(9*32+ 9) /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_INVPCID	(9*32+10) /* INVPCID instruction */
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 74c9edf..bb9a707 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -52,7 +52,7 @@
 #define CR4_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
 			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
-			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR  \
+			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
 			  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_RDWRGSFS \
 			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
 
@@ -660,6 +660,7 @@ struct kvm_x86_ops {
 	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
 	int (*get_lpage_level)(void);
 	bool (*rdtscp_supported)(void);
+	bool (*pcid_supported)(void);
 	void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host);
 
 	void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index f8ab3ea..aea1d1d 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -44,6 +44,7 @@
  */
 #define X86_CR3_PWT	0x00000008 /* Page Write Through */
 #define X86_CR3_PCD	0x00000010 /* Page Cache Disable */
+#define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */
 
 /*
  * Intel CPU features in CR4
@@ -61,6 +62,7 @@
 #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
 #define X86_CR4_VMXE	0x00002000 /* enable VMX virtualization */
 #define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */
+#define X86_CR4_PCIDE	0x00020000 /* enable PCID support */
 #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
 #define X86_CR4_SMEP	0x00100000 /* enable SMEP support */
 
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 31f180c..b81525c 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -60,6 +60,7 @@
 #define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
 #define SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
+#define SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
 
 
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
@@ -281,6 +282,7 @@ enum vmcs_field {
 #define EXIT_REASON_EPT_MISCONFIG       49
 #define EXIT_REASON_WBINVD		54
 #define EXIT_REASON_XSETBV		55
+#define EXIT_REASON_INVPCID		58
 
 /*
  * Interruption-information format
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 9fed5be..8d4a361 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -201,6 +201,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	unsigned f_lm = 0;
 #endif
 	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
+	unsigned f_pcid = kvm_x86_ops->pcid_supported() ? F(PCID) : 0;
+	unsigned f_invpcid = kvm_x86_ops->pcid_supported() ? F(INVPCID) : 0;
 
 	/* cpuid 1.edx */
 	const u32 kvm_supported_word0_x86_features =
@@ -228,7 +230,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		0 /* DS-CPL, VMX, SMX, EST */ |
 		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
 		F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
-		0 /* Reserved, DCA */ | F(XMM4_1) |
+		f_pcid | 0 /* Reserved, DCA */ | F(XMM4_1) |
 		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
 		0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
 		F(F16C) | F(RDRAND);
@@ -247,7 +249,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
 	/* cpuid 7.0.ebx */
 	const u32 kvm_supported_word9_x86_features =
-		F(FSGSBASE) | F(BMI1) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS);
+		F(FSGSBASE) | F(BMI1) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS) | f_invpcid;
 
 	/* all calls to cpuid_count() should be made on the same cpu */
 	get_cpu();
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 26d1fb4..e531d39 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -51,4 +51,12 @@ static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
 	return best && (best->ecx & bit(X86_FEATURE_OSVW));
 }
 
+static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 1, 0);
+	return best && (best->ecx & bit(X86_FEATURE_PCID));
+}
+
 #endif
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 0b7690e..42726cf 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4012,6 +4012,11 @@ static bool svm_rdtscp_supported(void)
 	return false;
 }
 
+static bool svm_pcid_supported(void)
+{
+	return false;
+}
+
 static bool svm_has_wbinvd_exit(void)
 {
 	return true;
@@ -4280,6 +4285,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.cpuid_update = svm_cpuid_update,
 
 	.rdtscp_supported = svm_rdtscp_supported,
+	.pcid_supported = svm_pcid_supported,
 
 	.set_supported_cpuid = svm_set_supported_cpuid,
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d2bd719..ba00789 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -413,6 +413,7 @@ struct vcpu_vmx {
 	u32 exit_reason;
 
 	bool rdtscp_enabled;
+	bool invpcid_enabled;
 
 	/* Support for a guest hypervisor (nested VMX) */
 	struct nested_vmx nested;
@@ -839,6 +840,12 @@ static inline bool cpu_has_vmx_rdtscp(void)
 		SECONDARY_EXEC_RDTSCP;
 }
 
+static inline bool cpu_has_vmx_invpcid(void)
+{
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_ENABLE_INVPCID;
+}
+
 static inline bool cpu_has_virtual_nmis(void)
 {
 	return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
@@ -1711,6 +1718,12 @@ static bool vmx_rdtscp_supported(void)
 	return cpu_has_vmx_rdtscp();
 }
 
+static bool vmx_pcid_supported(void)
+{
+	/* Enable PCID for non-ept guests may cause performance regression */
+	return enable_ept && (boot_cpu_data.x86_capability[4] & bit(X86_FEATURE_PCID));
+}
+
 /*
  * Swap MSR entry in host/guest MSR entry array.
  */
@@ -2425,6 +2438,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 			SECONDARY_EXEC_UNRESTRICTED_GUEST |
 			SECONDARY_EXEC_PAUSE_LOOP_EXITING |
 			SECONDARY_EXEC_RDTSCP;
+		if (enable_ept)
+			opt2 |= SECONDARY_EXEC_ENABLE_INVPCID;
 		if (adjust_vmx_controls(min2, opt2,
 					MSR_IA32_VMX_PROCBASED_CTLS2,
 					&_cpu_based_2nd_exec_control) < 0)
@@ -4337,8 +4352,14 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
 			return 1;
 		vmcs_writel(CR0_READ_SHADOW, val);
 		return 0;
-	} else
+	} else {
+		unsigned long old_cr0 = kvm_read_cr0(vcpu);
+		if ((old_cr0 & X86_CR0_PG) && !(val & X86_CR0_PG) &&
+		    (kvm_read_cr4(vcpu) & X86_CR4_PCIDE))
+			return 1;
+
 		return kvm_set_cr0(vcpu, val);
+	}
 }
 
 static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
@@ -4349,8 +4370,26 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
 			return 1;
 		vmcs_writel(CR4_READ_SHADOW, val);
 		return 0;
-	} else
-		return kvm_set_cr4(vcpu, val);
+	} else {
+		unsigned long old_cr4 = kvm_read_cr4(vcpu);
+		int ret = 1;
+
+		if ((val & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
+			if (!guest_cpuid_has_pcid(vcpu))
+				return ret;
+
+			/* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
+			if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
+				return ret;
+		}
+
+		ret = kvm_set_cr4(vcpu, val);
+
+		if (!ret && (!(val & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
+			kvm_mmu_reset_context(vcpu);
+
+		return ret;
+	}
 }
 
 /* called to set cr0 as approriate for clts instruction exit. */
@@ -6420,6 +6459,23 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 			}
 		}
 	}
+
+	vmx->invpcid_enabled = false;
+	if (vmx_pcid_supported()) {
+		exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+		if (exec_control & SECONDARY_EXEC_ENABLE_INVPCID) {
+			best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+			if (best && (best->ecx & bit(X86_FEATURE_PCID)))
+				vmx->invpcid_enabled = true;
+			else {
+				exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
+				vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
+						exec_control);
+				best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
+				best->ecx &= ~bit(X86_FEATURE_INVPCID);
+			}
+		}
+	}
 }
 
 static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -7154,6 +7210,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.cpuid_update = vmx_cpuid_update,
 
 	.rdtscp_supported = vmx_rdtscp_supported,
+	.pcid_supported = vmx_pcid_supported,
 
 	.set_supported_cpuid = vmx_set_supported_cpuid,
 

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] KVM: x86: Implement PCID/INVPCID for guests with EPT
  2012-05-10  0:32 [PATCH] KVM: x86: Implement PCID/INVPCID for guests with EPT Mao, Junjie
@ 2012-05-10 11:48 ` Avi Kivity
  2012-05-11  5:58   ` Mao, Junjie
  2012-05-10 11:49 ` Avi Kivity
       [not found] ` <CAG7+5M2XSOoHqqpbp0YbjgNNfa6DwrfP+88TwRUbhBDUDH6q6A@mail.gmail.com>
  2 siblings, 1 reply; 10+ messages in thread
From: Avi Kivity @ 2012-05-10 11:48 UTC (permalink / raw)
  To: Mao, Junjie; +Cc: 'kvm@vger.kernel.org'

On 05/10/2012 03:32 AM, Mao, Junjie wrote:
> This patch handles PCID/INVPCID for guests.
>
> Process-context identifiers (PCIDs) are a facility by which a logical processor may cache information for multiple linear-address spaces so that the processor may retain cached information when software switches to a different linear-address space. Refer to section 4.10.1 in IA32 Intel Software Developer's Manual Volume 3A for details.
>
> For guests with EPT, the PCID feature is enabled and INVPCID behaves as running natively.
> For guests without EPT, the PCID feature is disabled and INVPCID triggers #UD.
>
>  
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 74c9edf..bb9a707 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -52,7 +52,7 @@
>  #define CR4_RESERVED_BITS                                               \
>  	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
>  			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
> -			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR  \
> +			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
>  			  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_RDWRGSFS \
>  			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))

We should hide cr4.pcide from nested vmx, until we prepare that code to
handle it.

> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index d2bd719..ba00789 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -413,6 +413,7 @@ struct vcpu_vmx {
>  	u32 exit_reason;
>  
>  	bool rdtscp_enabled;
> +	bool invpcid_enabled;
>  
>  	/* Support for a guest hypervisor (nested VMX) */
>  	struct nested_vmx nested;
> @@ -839,6 +840,12 @@ static inline bool cpu_has_vmx_rdtscp(void)
>  		SECONDARY_EXEC_RDTSCP;
>  }
>  
> +static bool vmx_pcid_supported(void)
> +{
> +	/* Enable PCID for non-ept guests may cause performance regression */

Why is that?

> +	return enable_ept && (boot_cpu_data.x86_capability[4] & bit(X86_FEATURE_PCID));
> +}
> +
>  /*
>   * Swap MSR entry in host/guest MSR entry array.
>   */
> @@ -4337,8 +4352,14 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
>  			return 1;
>  		vmcs_writel(CR0_READ_SHADOW, val);
>  		return 0;
> -	} else
> +	} else {
> +		unsigned long old_cr0 = kvm_read_cr0(vcpu);
> +		if ((old_cr0 & X86_CR0_PG) && !(val & X86_CR0_PG) &&
> +		    (kvm_read_cr4(vcpu) & X86_CR4_PCIDE))

Use kvm_read_cr4_bits(), it's slightly faster.  Also move this to x86.c.

> +			return 1;
> +
>  		return kvm_set_cr0(vcpu, val);
> +	}
>  }
>  
>  static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
> @@ -4349,8 +4370,26 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
>  			return 1;
>  		vmcs_writel(CR4_READ_SHADOW, val);
>  		return 0;
> -	} else
> -		return kvm_set_cr4(vcpu, val);
> +	} else {
> +		unsigned long old_cr4 = kvm_read_cr4(vcpu);
> +		int ret = 1;
> +
> +		if ((val & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
> +			if (!guest_cpuid_has_pcid(vcpu))
> +				return ret;
> +
> +			/* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
> +			if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
> +				return ret;
> +		}
> +
> +		ret = kvm_set_cr4(vcpu, val);
> +
> +		if (!ret && (!(val & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
> +			kvm_mmu_reset_context(vcpu);
> +
> +		return ret;
> +	}

Move to x86.c please.

>  }
>  
>  /* called to set cr0 as approriate for clts instruction exit. */
> @@ -6420,6 +6459,23 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
>  			}
>  		}
>  	}
> +
> +	vmx->invpcid_enabled = false;
> +	if (vmx_pcid_supported()) {
> +		exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
> +		if (exec_control & SECONDARY_EXEC_ENABLE_INVPCID) {
> +			best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
> +			if (best && (best->ecx & bit(X86_FEATURE_PCID)))
> +				vmx->invpcid_enabled = true;
> +			else {
> +				exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> +				vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> +						exec_control);
> +				best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
> +				best->ecx &= ~bit(X86_FEATURE_INVPCID);
> +			}
> +		}
> +	}
>  }
>  
>

If we enter a nested guest (which is running without PCID), we need
either to handle INVPCID exits (and inject a #UD) or disable INVPCID in
exec controls.  The first is faster since it doesn't involve VMWRITEs. 
If we do that, we don't need this code (since it will work for
non-nested guests as well).

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] KVM: x86: Implement PCID/INVPCID for guests with EPT
  2012-05-10  0:32 [PATCH] KVM: x86: Implement PCID/INVPCID for guests with EPT Mao, Junjie
  2012-05-10 11:48 ` Avi Kivity
@ 2012-05-10 11:49 ` Avi Kivity
       [not found] ` <CAG7+5M2XSOoHqqpbp0YbjgNNfa6DwrfP+88TwRUbhBDUDH6q6A@mail.gmail.com>
  2 siblings, 0 replies; 10+ messages in thread
From: Avi Kivity @ 2012-05-10 11:49 UTC (permalink / raw)
  To: Mao, Junjie; +Cc: 'kvm@vger.kernel.org'

On 05/10/2012 03:32 AM, Mao, Junjie wrote:
> This patch handles PCID/INVPCID for guests.
>
> Process-context identifiers (PCIDs) are a facility by which a logical processor may cache information for multiple linear-address spaces so that the processor may retain cached information when software switches to a different linear-address space. Refer to section 4.10.1 in IA32 Intel Software Developer's Manual Volume 3A for details.
>
> For guests with EPT, the PCID feature is enabled and INVPCID behaves as running natively.
> For guests without EPT, the PCID feature is disabled and INVPCID triggers #UD.
>

btw, please post a unit test for basic functionality - enabling
cr4.pcide etc.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] KVM: x86: Implement PCID/INVPCID for guests with EPT
  2012-05-10 11:48 ` Avi Kivity
@ 2012-05-11  5:58   ` Mao, Junjie
  2012-05-13 10:02     ` Avi Kivity
  0 siblings, 1 reply; 10+ messages in thread
From: Mao, Junjie @ 2012-05-11  5:58 UTC (permalink / raw)
  To: Avi Kivity; +Cc: 'kvm@vger.kernel.org'

> On 05/10/2012 03:32 AM, Mao, Junjie wrote:
> > This patch handles PCID/INVPCID for guests.
> >
> > Process-context identifiers (PCIDs) are a facility by which a logical processor
> may cache information for multiple linear-address spaces so that the processor
> may retain cached information when software switches to a different
> linear-address space. Refer to section 4.10.1 in IA32 Intel Software Developer's
> Manual Volume 3A for details.
> >
> > For guests with EPT, the PCID feature is enabled and INVPCID behaves as
> running natively.
> > For guests without EPT, the PCID feature is disabled and INVPCID triggers
> #UD.
> >
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h
> > b/arch/x86/include/asm/kvm_host.h index 74c9edf..bb9a707 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -52,7 +52,7 @@
> >  #define CR4_RESERVED_BITS
> \
> >  	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD |
> X86_CR4_DE\
> >  			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
> > -			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR  \
> > +			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR |
> X86_CR4_PCIDE \
> >  			  | X86_CR4_OSXSAVE | X86_CR4_SMEP |
> X86_CR4_RDWRGSFS \
> >  			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
> 
> We should hide cr4.pcide from nested vmx, until we prepare that code to
> handle it.

I'll hide it from nested guests.

> 
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index
> > d2bd719..ba00789 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -413,6 +413,7 @@ struct vcpu_vmx {
> >  	u32 exit_reason;
> >
> >  	bool rdtscp_enabled;
> > +	bool invpcid_enabled;
> >
> >  	/* Support for a guest hypervisor (nested VMX) */
> >  	struct nested_vmx nested;
> > @@ -839,6 +840,12 @@ static inline bool cpu_has_vmx_rdtscp(void)
> >  		SECONDARY_EXEC_RDTSCP;
> >  }
> >
> > +static bool vmx_pcid_supported(void)
> > +{
> > +	/* Enable PCID for non-ept guests may cause performance regression
> > +*/
> 
> Why is that?

For guests using shadow page tables, every INVPCID must be intercepted so that changes in guest page tables can be reflected on the shadow ones, which brings about performance troubles. Without INVPCID, the PCID feature has little benefits. As a result, PCID/INVPCID is not exposed to non-ept guests. Sorry for being unclear in the comment.

> 
> > +	return enable_ept && (boot_cpu_data.x86_capability[4] &
> > +bit(X86_FEATURE_PCID)); }
> > +
> >  /*
> >   * Swap MSR entry in host/guest MSR entry array.
> >   */
> > @@ -4337,8 +4352,14 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu,
> unsigned long val)
> >  			return 1;
> >  		vmcs_writel(CR0_READ_SHADOW, val);
> >  		return 0;
> > -	} else
> > +	} else {
> > +		unsigned long old_cr0 = kvm_read_cr0(vcpu);
> > +		if ((old_cr0 & X86_CR0_PG) && !(val & X86_CR0_PG) &&
> > +		    (kvm_read_cr4(vcpu) & X86_CR4_PCIDE))
> 
> Use kvm_read_cr4_bits(), it's slightly faster.  Also move this to x86.c.
> 
> > +			return 1;
> > +
> >  		return kvm_set_cr0(vcpu, val);
> > +	}
> >  }
> >
> >  static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
> > @@ -4349,8 +4370,26 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu,
> unsigned long val)
> >  			return 1;
> >  		vmcs_writel(CR4_READ_SHADOW, val);
> >  		return 0;
> > -	} else
> > -		return kvm_set_cr4(vcpu, val);
> > +	} else {
> > +		unsigned long old_cr4 = kvm_read_cr4(vcpu);
> > +		int ret = 1;
> > +
> > +		if ((val & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
> > +			if (!guest_cpuid_has_pcid(vcpu))
> > +				return ret;
> > +
> > +			/* PCID can not be enabled when cr3[11:0]!=000H or
> EFER.LMA=0 */
> > +			if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK)
> || !is_long_mode(vcpu))
> > +				return ret;
> > +		}
> > +
> > +		ret = kvm_set_cr4(vcpu, val);
> > +
> > +		if (!ret && (!(val & X86_CR4_PCIDE) && (old_cr4 &
> X86_CR4_PCIDE)))
> > +			kvm_mmu_reset_context(vcpu);
> > +
> > +		return ret;
> > +	}
> 
> Move to x86.c please.
> 
> >  }
> >
> >  /* called to set cr0 as approriate for clts instruction exit. */ @@
> > -6420,6 +6459,23 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
> >  			}
> >  		}
> >  	}
> > +
> > +	vmx->invpcid_enabled = false;
> > +	if (vmx_pcid_supported()) {
> > +		exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
> > +		if (exec_control & SECONDARY_EXEC_ENABLE_INVPCID) {
> > +			best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
> > +			if (best && (best->ecx & bit(X86_FEATURE_PCID)))
> > +				vmx->invpcid_enabled = true;
> > +			else {
> > +				exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> > +				vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> > +						exec_control);
> > +				best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
> > +				best->ecx &= ~bit(X86_FEATURE_INVPCID);
> > +			}
> > +		}
> > +	}
> >  }
> >
> >
> 
> If we enter a nested guest (which is running without PCID), we need either to
> handle INVPCID exits (and inject a #UD) or disable INVPCID in exec controls.
> The first is faster since it doesn't involve VMWRITEs.
> If we do that, we don't need this code (since it will work for non-nested guests
> as well).

I'm not that familiar with how nested guests work. So excuse me for a possibly silly question: if we choose to trigger INVPCID exits and inject #UD for both non-nested and nested guests without INVPCID, that means 'INVLPG exiting' should also be set (which is a must for triggering INVPCID exits). Can it cause performance problems for non-nested ept guests?

> 
> --
> error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] KVM: x86: Implement PCID/INVPCID for guests with EPT
  2012-05-11  5:58   ` Mao, Junjie
@ 2012-05-13 10:02     ` Avi Kivity
  2012-05-14  7:18       ` Mao, Junjie
  0 siblings, 1 reply; 10+ messages in thread
From: Avi Kivity @ 2012-05-13 10:02 UTC (permalink / raw)
  To: Mao, Junjie; +Cc: 'kvm@vger.kernel.org'

On 05/11/2012 08:58 AM, Mao, Junjie wrote:
> > >
> > > +static bool vmx_pcid_supported(void)
> > > +{
> > > +	/* Enable PCID for non-ept guests may cause performance regression
> > > +*/
> > 
> > Why is that?
>
> For guests using shadow page tables, every INVPCID must be intercepted so that changes in guest page tables can be reflected on the shadow ones, which brings about performance troubles. Without INVPCID, the PCID feature has little benefits. As a result, PCID/INVPCID is not exposed to non-ept guests. Sorry for being unclear in the comment.

Okay, please update the comment.  btw, are there plans to add PCID
support to Linux in the OS role?

> > > +
> > > +	vmx->invpcid_enabled = false;
> > > +	if (vmx_pcid_supported()) {
> > > +		exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
> > > +		if (exec_control & SECONDARY_EXEC_ENABLE_INVPCID) {
> > > +			best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
> > > +			if (best && (best->ecx & bit(X86_FEATURE_PCID)))
> > > +				vmx->invpcid_enabled = true;
> > > +			else {
> > > +				exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> > > +				vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> > > +						exec_control);
> > > +				best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
> > > +				best->ecx &= ~bit(X86_FEATURE_INVPCID);
> > > +			}
> > > +		}
> > > +	}
> > >  }
> > >
> > >
> > 
> > If we enter a nested guest (which is running without PCID), we need either to
> > handle INVPCID exits (and inject a #UD) or disable INVPCID in exec controls.
> > The first is faster since it doesn't involve VMWRITEs.
> > If we do that, we don't need this code (since it will work for non-nested guests
> > as well).
>
> I'm not that familiar with how nested guests work. So excuse me for a possibly silly question: if we choose to trigger INVPCID exits and inject #UD for both non-nested and nested guests without INVPCID, that means 'INVLPG exiting' should also be set (which is a must for triggering INVPCID exits). Can it cause performance problems for non-nested ept guests?

It can.  It was my comment that was silly, the guest and the nested
guest use different vmcses, so all you need is to make sure the write
here goes to the non-nested vmcs (and "enable INVPCID" is kept as zero
for nested vmcses).


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] KVM: x86: Implement PCID/INVPCID for guests with EPT
       [not found] ` <CAG7+5M2XSOoHqqpbp0YbjgNNfa6DwrfP+88TwRUbhBDUDH6q6A@mail.gmail.com>
@ 2012-05-14  7:15   ` Mao, Junjie
  2012-05-15  2:20     ` Marcelo Tosatti
  0 siblings, 1 reply; 10+ messages in thread
From: Mao, Junjie @ 2012-05-14  7:15 UTC (permalink / raw)
  To: Eric Northup; +Cc: kvm@vger.kernel.org

> On Wed, May 9, 2012 at 5:32 PM, Mao, Junjie <junjie.mao@intel.com> wrote:
> > This patch handles PCID/INVPCID for guests.
> >
> > Process-context identifiers (PCIDs) are a facility by which a logical processor may cache information for multiple linear-address spaces so that the processor may retain cached information when software switches to a different linear-address space. Refer to section 4.10.1 in IA32 Intel Software Developer's Manual Volume 3A for details.
> >
> > For guests with EPT, the PCID feature is enabled and INVPCID behaves as running natively.
> > For guests without EPT, the PCID feature is disabled and INVPCID triggers #UD.
> >
> Do I understand correctly that this means it is impossible to migrate a guest which is using PCID from a host with EPT to a host without EPT (by passing enable_ept=0 to the module, for example) ?

I think you are right. Guests using PCID/INVPCID cannot migrate to a host without it, and EPT is a precondition of this feature.

> Does the emulated CR3 load need to learn about the new function of the low bits?

I have not found any restrictions on writing CR3 brings about by PCID.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] KVM: x86: Implement PCID/INVPCID for guests with EPT
  2012-05-13 10:02     ` Avi Kivity
@ 2012-05-14  7:18       ` Mao, Junjie
  0 siblings, 0 replies; 10+ messages in thread
From: Mao, Junjie @ 2012-05-14  7:18 UTC (permalink / raw)
  To: Avi Kivity; +Cc: 'kvm@vger.kernel.org'



Best Regards
Junjie Mao


> -----Original Message-----
> From: Avi Kivity [mailto:avi@redhat.com]
> Sent: Sunday, May 13, 2012 6:03 PM
> To: Mao, Junjie
> Cc: 'kvm@vger.kernel.org'
> Subject: Re: [PATCH] KVM: x86: Implement PCID/INVPCID for guests with EPT
> 
> On 05/11/2012 08:58 AM, Mao, Junjie wrote:
> > > >
> > > > +static bool vmx_pcid_supported(void) {
> > > > +	/* Enable PCID for non-ept guests may cause performance
> > > > +regression */
> > >
> > > Why is that?
> >
> > For guests using shadow page tables, every INVPCID must be intercepted so
> that changes in guest page tables can be reflected on the shadow ones, which
> brings about performance troubles. Without INVPCID, the PCID feature has
> little benefits. As a result, PCID/INVPCID is not exposed to non-ept guests.
> Sorry for being unclear in the comment.
> 
> Okay, please update the comment.  btw, are there plans to add PCID support
> to Linux in the OS role?
> 

As far as I know, there isn't any plans to do it yet.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] KVM: x86: Implement PCID/INVPCID for guests with EPT
  2012-05-14  7:15   ` Mao, Junjie
@ 2012-05-15  2:20     ` Marcelo Tosatti
  2012-05-15  3:28       ` Mao, Junjie
  0 siblings, 1 reply; 10+ messages in thread
From: Marcelo Tosatti @ 2012-05-15  2:20 UTC (permalink / raw)
  To: Mao, Junjie; +Cc: Eric Northup, kvm@vger.kernel.org

On Mon, May 14, 2012 at 07:15:18AM +0000, Mao, Junjie wrote:
> > On Wed, May 9, 2012 at 5:32 PM, Mao, Junjie <junjie.mao@intel.com> wrote:
> > > This patch handles PCID/INVPCID for guests.
> > >
> > > Process-context identifiers (PCIDs) are a facility by which a logical processor may cache information for multiple linear-address spaces so that the processor may retain cached information when software switches to a different linear-address space. Refer to section 4.10.1 in IA32 Intel Software Developer's Manual Volume 3A for details.
> > >
> > > For guests with EPT, the PCID feature is enabled and INVPCID behaves as running natively.
> > > For guests without EPT, the PCID feature is disabled and INVPCID triggers #UD.
> > >
> > Do I understand correctly that this means it is impossible to migrate a guest which is using PCID from a host with EPT to a host without EPT (by passing enable_ept=0 to the module, for example) ?
> 
> I think you are right. Guests using PCID/INVPCID cannot migrate to a host without it, and EPT is a precondition of this feature.

Are there processors that support PCI/INVPCID for the host but lack
support for SECONDARY_EXEC_ENABLE_INVPCID?

> > Does the emulated CR3 load need to learn about the new function of the low bits?
> 
> I have not found any restrictions on writing CR3 brings about by PCID.

It would be good for completeness.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] KVM: x86: Implement PCID/INVPCID for guests with EPT
  2012-05-15  3:28       ` Mao, Junjie
@ 2012-05-15  3:27         ` Marcelo Tosatti
  0 siblings, 0 replies; 10+ messages in thread
From: Marcelo Tosatti @ 2012-05-15  3:27 UTC (permalink / raw)
  To: Mao, Junjie; +Cc: Eric Northup, kvm@vger.kernel.org

On Tue, May 15, 2012 at 03:28:13AM +0000, Mao, Junjie wrote:
> > -----Original Message-----
> > From: Marcelo Tosatti [mailto:mtosatti@redhat.com]
> > Sent: Tuesday, May 15, 2012 10:20 AM
> > To: Mao, Junjie
> > Cc: Eric Northup; kvm@vger.kernel.org
> > Subject: Re: [PATCH] KVM: x86: Implement PCID/INVPCID for guests with EPT
> > 
> > On Mon, May 14, 2012 at 07:15:18AM +0000, Mao, Junjie wrote:
> > > > On Wed, May 9, 2012 at 5:32 PM, Mao, Junjie <junjie.mao@intel.com>
> > wrote:
> > > > > This patch handles PCID/INVPCID for guests.
> > > > >
> > > > > Process-context identifiers (PCIDs) are a facility by which a logical
> > processor may cache information for multiple linear-address spaces so that the
> > processor may retain cached information when software switches to a
> > different linear-address space. Refer to section 4.10.1 in IA32 Intel Software
> > Developer's Manual Volume 3A for details.
> > > > >
> > > > > For guests with EPT, the PCID feature is enabled and INVPCID behaves as
> > running natively.
> > > > > For guests without EPT, the PCID feature is disabled and INVPCID triggers
> > #UD.
> > > > >
> > > > Do I understand correctly that this means it is impossible to migrate a
> > guest which is using PCID from a host with EPT to a host without EPT (by
> > passing enable_ept=0 to the module, for example) ?
> > >
> > > I think you are right. Guests using PCID/INVPCID cannot migrate to a host
> > without it, and EPT is a precondition of this feature.
> > 
> > Are there processors that support PCI/INVPCID for the host but lack support
> > for SECONDARY_EXEC_ENABLE_INVPCID?
> 
> There're some with PCID but without INVPCID. For those processors support INVPCID, SECONDARY_EXEC_ENABLE_INVPCID support should be present. As PCID has little benefits without INVPCID, these two features are exposed/hidden as a whole.

That means migration control should check INVPCID host support before deciding
whether to migrate.

Thanks


^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] KVM: x86: Implement PCID/INVPCID for guests with EPT
  2012-05-15  2:20     ` Marcelo Tosatti
@ 2012-05-15  3:28       ` Mao, Junjie
  2012-05-15  3:27         ` Marcelo Tosatti
  0 siblings, 1 reply; 10+ messages in thread
From: Mao, Junjie @ 2012-05-15  3:28 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Eric Northup, kvm@vger.kernel.org

> -----Original Message-----
> From: Marcelo Tosatti [mailto:mtosatti@redhat.com]
> Sent: Tuesday, May 15, 2012 10:20 AM
> To: Mao, Junjie
> Cc: Eric Northup; kvm@vger.kernel.org
> Subject: Re: [PATCH] KVM: x86: Implement PCID/INVPCID for guests with EPT
> 
> On Mon, May 14, 2012 at 07:15:18AM +0000, Mao, Junjie wrote:
> > > On Wed, May 9, 2012 at 5:32 PM, Mao, Junjie <junjie.mao@intel.com>
> wrote:
> > > > This patch handles PCID/INVPCID for guests.
> > > >
> > > > Process-context identifiers (PCIDs) are a facility by which a logical
> processor may cache information for multiple linear-address spaces so that the
> processor may retain cached information when software switches to a
> different linear-address space. Refer to section 4.10.1 in IA32 Intel Software
> Developer's Manual Volume 3A for details.
> > > >
> > > > For guests with EPT, the PCID feature is enabled and INVPCID behaves as
> running natively.
> > > > For guests without EPT, the PCID feature is disabled and INVPCID triggers
> #UD.
> > > >
> > > Do I understand correctly that this means it is impossible to migrate a
> guest which is using PCID from a host with EPT to a host without EPT (by
> passing enable_ept=0 to the module, for example) ?
> >
> > I think you are right. Guests using PCID/INVPCID cannot migrate to a host
> without it, and EPT is a precondition of this feature.
> 
> Are there processors that support PCI/INVPCID for the host but lack support
> for SECONDARY_EXEC_ENABLE_INVPCID?

There're some with PCID but without INVPCID. For those processors support INVPCID, SECONDARY_EXEC_ENABLE_INVPCID support should be present. As PCID has little benefits without INVPCID, these two features are exposed/hidden as a whole.

> 
> > > Does the emulated CR3 load need to learn about the new function of the
> low bits?
> >
> > I have not found any restrictions on writing CR3 brings about by PCID.
> 
> It would be good for completeness.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2012-05-15  3:32 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-05-10  0:32 [PATCH] KVM: x86: Implement PCID/INVPCID for guests with EPT Mao, Junjie
2012-05-10 11:48 ` Avi Kivity
2012-05-11  5:58   ` Mao, Junjie
2012-05-13 10:02     ` Avi Kivity
2012-05-14  7:18       ` Mao, Junjie
2012-05-10 11:49 ` Avi Kivity
     [not found] ` <CAG7+5M2XSOoHqqpbp0YbjgNNfa6DwrfP+88TwRUbhBDUDH6q6A@mail.gmail.com>
2012-05-14  7:15   ` Mao, Junjie
2012-05-15  2:20     ` Marcelo Tosatti
2012-05-15  3:28       ` Mao, Junjie
2012-05-15  3:27         ` Marcelo Tosatti

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox