public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] KVM: x86: Implement PCID/INVPCID for guests with EPT
@ 2012-05-14  6:25 Mao, Junjie
  2012-05-15  2:42 ` Marcelo Tosatti
  0 siblings, 1 reply; 7+ messages in thread
From: Mao, Junjie @ 2012-05-14  6:25 UTC (permalink / raw)
  To: 'kvm@vger.kernel.org'

This patch handles PCID/INVPCID for guests.

Process-context identifiers (PCIDs) are a facility by which a logical processor
may cache information for multiple linear-address spaces so that the processor
may retain cached information when software switches to a different linear
address space. Refer to section 4.10.1 in IA32 Intel Software Developer's Manual
Volume 3A for details.

For guests with EPT, the PCID feature is enabled and INVPCID behaves as running
natively.
For guests without EPT, the PCID feature is disabled and INVPCID triggers #UD.

Changes from v1:
	Move cr0/cr4 writing checks to x86.c
	Update comments for the reason why PCID is disabled for non-EPT guests
	Do not support PCID/INVPCID for nested guests at present
	Clean up useless symbols

Signed-off-by: Junjie Mao <junjie.mao@intel.com>
---
 arch/x86/include/asm/cpufeature.h      |    1 +
 arch/x86/include/asm/kvm_host.h        |    3 ++-
 arch/x86/include/asm/processor-flags.h |    2 ++
 arch/x86/include/asm/vmx.h             |    2 ++
 arch/x86/kvm/cpuid.c                   |    6 ++++--
 arch/x86/kvm/cpuid.h                   |    8 ++++++++
 arch/x86/kvm/svm.c                     |    6 ++++++
 arch/x86/kvm/vmx.c                     |   29 +++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c                     |   16 +++++++++++++++-
 9 files changed, 69 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 8d67d42..1aedbc0 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -203,6 +203,7 @@
 #define X86_FEATURE_SMEP	(9*32+ 7) /* Supervisor Mode Execution Protection */
 #define X86_FEATURE_BMI2	(9*32+ 8) /* 2nd group bit manipulation extensions */
 #define X86_FEATURE_ERMS	(9*32+ 9) /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_INVPCID	(9*32+10) /* INVPCID instruction */
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 74c9edf..bb9a707 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -52,7 +52,7 @@
 #define CR4_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
 			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
-			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR  \
+			  | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
 			  | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_RDWRGSFS \
 			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
 
@@ -660,6 +660,7 @@ struct kvm_x86_ops {
 	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
 	int (*get_lpage_level)(void);
 	bool (*rdtscp_supported)(void);
+	bool (*pcid_supported)(void);
 	void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host);
 
 	void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index f8ab3ea..aea1d1d 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -44,6 +44,7 @@
  */
 #define X86_CR3_PWT	0x00000008 /* Page Write Through */
 #define X86_CR3_PCD	0x00000010 /* Page Cache Disable */
+#define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */
 
 /*
  * Intel CPU features in CR4
@@ -61,6 +62,7 @@
 #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
 #define X86_CR4_VMXE	0x00002000 /* enable VMX virtualization */
 #define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */
+#define X86_CR4_PCIDE	0x00020000 /* enable PCID support */
 #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
 #define X86_CR4_SMEP	0x00100000 /* enable SMEP support */
 
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 31f180c..b81525c 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -60,6 +60,7 @@
 #define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
 #define SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
+#define SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
 
 
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
@@ -281,6 +282,7 @@ enum vmcs_field {
 #define EXIT_REASON_EPT_MISCONFIG       49
 #define EXIT_REASON_WBINVD		54
 #define EXIT_REASON_XSETBV		55
+#define EXIT_REASON_INVPCID		58
 
 /*
  * Interruption-information format
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 9fed5be..8d4a361 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -201,6 +201,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	unsigned f_lm = 0;
 #endif
 	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
+	unsigned f_pcid = kvm_x86_ops->pcid_supported() ? F(PCID) : 0;
+	unsigned f_invpcid = kvm_x86_ops->pcid_supported() ? F(INVPCID) : 0;
 
 	/* cpuid 1.edx */
 	const u32 kvm_supported_word0_x86_features =
@@ -228,7 +230,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		0 /* DS-CPL, VMX, SMX, EST */ |
 		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
 		F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
-		0 /* Reserved, DCA */ | F(XMM4_1) |
+		f_pcid | 0 /* Reserved, DCA */ | F(XMM4_1) |
 		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
 		0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
 		F(F16C) | F(RDRAND);
@@ -247,7 +249,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 
 	/* cpuid 7.0.ebx */
 	const u32 kvm_supported_word9_x86_features =
-		F(FSGSBASE) | F(BMI1) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS);
+		F(FSGSBASE) | F(BMI1) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS) | f_invpcid;
 
 	/* all calls to cpuid_count() should be made on the same cpu */
 	get_cpu();
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 26d1fb4..e531d39 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -51,4 +51,12 @@ static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
 	return best && (best->ecx & bit(X86_FEATURE_OSVW));
 }
 
+static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 1, 0);
+	return best && (best->ecx & bit(X86_FEATURE_PCID));
+}
+
 #endif
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 0b7690e..42726cf 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4012,6 +4012,11 @@ static bool svm_rdtscp_supported(void)
 	return false;
 }
 
+static bool svm_pcid_supported(void)
+{
+	return false;
+}
+
 static bool svm_has_wbinvd_exit(void)
 {
 	return true;
@@ -4280,6 +4285,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.cpuid_update = svm_cpuid_update,
 
 	.rdtscp_supported = svm_rdtscp_supported,
+	.pcid_supported = svm_pcid_supported,
 
 	.set_supported_cpuid = svm_set_supported_cpuid,
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d2bd719..9c77711 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1711,6 +1711,18 @@ static bool vmx_rdtscp_supported(void)
 	return cpu_has_vmx_rdtscp();
 }
 
+static bool vmx_pcid_supported(void)
+{
+	/*
+	 * Enable INVPCID for non-ept guests may cause performance regression,
+	 * and without INVPCID, PCID has little benefits. So disable them all
+	 * for non-ept guests.
+	 *
+	 * PCID is not supported for nested guests yet.
+	 */
+	return enable_ept && (boot_cpu_data.x86_capability[4] & bit(X86_FEATURE_PCID)) && !cpu_has_hypervisor;
+}
+
 /*
  * Swap MSR entry in host/guest MSR entry array.
  */
@@ -2425,6 +2437,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 			SECONDARY_EXEC_UNRESTRICTED_GUEST |
 			SECONDARY_EXEC_PAUSE_LOOP_EXITING |
 			SECONDARY_EXEC_RDTSCP;
+		if (vmx_pcid_supported())
+			opt2 |= SECONDARY_EXEC_ENABLE_INVPCID;
 		if (adjust_vmx_controls(min2, opt2,
 					MSR_IA32_VMX_PROCBASED_CTLS2,
 					&_cpu_based_2nd_exec_control) < 0)
@@ -6420,6 +6434,20 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 			}
 		}
 	}
+
+	if (vmx_pcid_supported()) {
+		best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+		if (!best || !(best->ecx & bit(X86_FEATURE_PCID))) {
+			/* Hiding INVPCID when PCID is not exposed. */
+			exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+			exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
+			vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
+				     exec_control);
+			best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
+			if (best)
+				best->ecx &= ~bit(X86_FEATURE_INVPCID);
+		}
+	}
 }
 
 static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -7154,6 +7182,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.cpuid_update = vmx_cpuid_update,
 
 	.rdtscp_supported = vmx_rdtscp_supported,
+	.pcid_supported = vmx_pcid_supported,
 
 	.set_supported_cpuid = vmx_set_supported_cpuid,
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c9d99e5..f930597 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -527,6 +527,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 			return 1;
 	}
 
+	if ((old_cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PG) &&
+	    kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
+		return 1;
+
 	kvm_x86_ops->set_cr0(vcpu, cr0);
 
 	if ((cr0 ^ old_cr0) & X86_CR0_PG) {
@@ -603,10 +607,20 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 				   kvm_read_cr3(vcpu)))
 		return 1;
 
+	if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
+		if (!guest_cpuid_has_pcid(vcpu))
+			return 1;
+
+		/* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
+		if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
+			return 1;
+	}
+
 	if (kvm_x86_ops->set_cr4(vcpu, cr4))
 		return 1;
 
-	if ((cr4 ^ old_cr4) & pdptr_bits)
+	if (((cr4 ^ old_cr4) & pdptr_bits) ||
+	    (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
 		kvm_mmu_reset_context(vcpu);
 
 	if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] KVM: x86: Implement PCID/INVPCID for guests with EPT
  2012-05-14  6:25 [PATCH v2] KVM: x86: Implement PCID/INVPCID for guests with EPT Mao, Junjie
@ 2012-05-15  2:42 ` Marcelo Tosatti
  2012-05-15  5:50   ` Nadav Har'El
  2012-05-17  1:22   ` Mao, Junjie
  0 siblings, 2 replies; 7+ messages in thread
From: Marcelo Tosatti @ 2012-05-15  2:42 UTC (permalink / raw)
  To: Mao, Junjie; +Cc: 'kvm@vger.kernel.org'

On Mon, May 14, 2012 at 06:25:40AM +0000, Mao, Junjie wrote:
> This patch handles PCID/INVPCID for guests.
> 
> Process-context identifiers (PCIDs) are a facility by which a logical processor
> may cache information for multiple linear-address spaces so that the processor
> may retain cached information when software switches to a different linear
> address space. Refer to section 4.10.1 in IA32 Intel Software Developer's Manual
> Volume 3A for details.
> 
> For guests with EPT, the PCID feature is enabled and INVPCID behaves as running
> natively.
> For guests without EPT, the PCID feature is disabled and INVPCID triggers #UD.
> 
> Changes from v1:
> 	Move cr0/cr4 writing checks to x86.c
> 	Update comments for the reason why PCID is disabled for non-EPT guests
> 	Do not support PCID/INVPCID for nested guests at present
> 	Clean up useless symbols
> 
> Signed-off-by: Junjie Mao <junjie.mao@intel.com>
> +++ b/arch/x86/kvm/vmx.c
> @@ -1711,6 +1711,18 @@ static bool vmx_rdtscp_supported(void)
>  	return cpu_has_vmx_rdtscp();
>  }
>  
> +static bool vmx_pcid_supported(void)
> +{
> +	/*
> +	 * Enable INVPCID for non-ept guests may cause performance regression,
> +	 * and without INVPCID, PCID has little benefits. So disable them all
> +	 * for non-ept guests.
> +	 *
> +	 * PCID is not supported for nested guests yet.
> +	 */
> +	return enable_ept && (boot_cpu_data.x86_capability[4] & bit(X86_FEATURE_PCID)) && !cpu_has_hypervisor;
> +}

The comment Avi made was regarding running a nested guest, not running
_as_ a nested guest (which is what cpu_has_hypervisor is about).

You can disable INVPCID exec control (which #UDs), if its in Level-2
guest mode (see if_guest_mode()), and restore the Level-1 value when
leaving nested mode.

> +
>  /*
>   * Swap MSR entry in host/guest MSR entry array.
>   */
> @@ -2425,6 +2437,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
>  			SECONDARY_EXEC_UNRESTRICTED_GUEST |
>  			SECONDARY_EXEC_PAUSE_LOOP_EXITING |
>  			SECONDARY_EXEC_RDTSCP;
> +		if (vmx_pcid_supported())
> +			opt2 |= SECONDARY_EXEC_ENABLE_INVPCID;


You should allow ENABLE_INVPCID control to be set unconditionally here, 
and adjusted in vmx_secondary_exec_control().

(note that "enable_ept" might be cleared after setup_vmcs_config).

>  		if (adjust_vmx_controls(min2, opt2,
>  					MSR_IA32_VMX_PROCBASED_CTLS2,
>  					&_cpu_based_2nd_exec_control) < 0)
> @@ -6420,6 +6434,20 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
>  			}
>  		}
>  	}
> +
> +	if (vmx_pcid_supported()) {
> +		best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
> +		if (!best || !(best->ecx & bit(X86_FEATURE_PCID))) {
> +			/* Hiding INVPCID when PCID is not exposed. */
> +			exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
> +			exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> +			vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> +				     exec_control);
> +			best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
> +			if (best)
> +				best->ecx &= ~bit(X86_FEATURE_INVPCID);
> +		}
> +	}

- If X86_FEATURE_CPUID bit is set by guest, but X86_FEATURE_INVPCID is
  cleared, this allows invpcid to execute (which is wrong, it should
  #UD).
- Must enable vm_exec control bit if cpuid reports the feature enabled,
  not only disable it.

>  }
>  
>  static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
> @@ -7154,6 +7182,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
>  	.cpuid_update = vmx_cpuid_update,
>  
>  	.rdtscp_supported = vmx_rdtscp_supported,
> +	.pcid_supported = vmx_pcid_supported,
>  
>  	.set_supported_cpuid = vmx_set_supported_cpuid,
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index c9d99e5..f930597 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -527,6 +527,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
>  			return 1;
>  	}
>  
> +	if ((old_cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PG) &&
> +	    kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
> +		return 1;
> +
>  	kvm_x86_ops->set_cr0(vcpu, cr0);

For completeness, kvm_set_cr3 should deal with the CR3 bits. It is used
by nested VMX for example.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] KVM: x86: Implement PCID/INVPCID for guests with EPT
  2012-05-15  2:42 ` Marcelo Tosatti
@ 2012-05-15  5:50   ` Nadav Har'El
  2012-05-17  1:22   ` Mao, Junjie
  1 sibling, 0 replies; 7+ messages in thread
From: Nadav Har'El @ 2012-05-15  5:50 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Mao, Junjie, 'kvm@vger.kernel.org'

On Mon, May 14, 2012, Marcelo Tosatti wrote about "Re: [PATCH v2] KVM: x86: Implement PCID/INVPCID for guests with EPT":
> > +	 * Enable INVPCID for non-ept guests may cause performance regression,
> > +	 * and without INVPCID, PCID has little benefits. So disable them all
> > +	 * for non-ept guests.
> > +	 *
> > +	 * PCID is not supported for nested guests yet.
> > +	 */
> > +	return enable_ept && (boot_cpu_data.x86_capability[4] & bit(X86_FEATURE_PCID)) && !cpu_has_hypervisor;
> > +}
> 
> The comment Avi made was regarding running a nested guest, not running
> _as_ a nested guest (which is what cpu_has_hypervisor is about).
> 
> You can disable INVPCID exec control (which #UDs), if its in Level-2
> guest mode (see if_guest_mode()), and restore the Level-1 value when
> leaving nested mode.


I also don't understand the !cpu_has_hypervisor thing. Regarding
running a nested guest - the code in prepare_vmcs02() sets (among other
things) secondary exec controls used to run the L2 guest. It mostly
"or"s the bits requested by KVM (L0) and the guest (L1). So:

 1. If you think L0 mustn't run L2 with a certain bit, despite running L1
    with it (I didn't follow the reasoning why this is the case), you can
    specificially turn it off in that function (like we already do for
    SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES).

 2. The other thing that could theoretically happen is that L1 will ask
    to turn on this bit for L2 (I think this is the case you wanted to
    avoid). This *won't* happen, because we tell L1 via MSR which bits
    it is allowed to set on secondary controls (see
    nested_vmx_secondary_ctls_high), and currently this only includes
    SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES and
    SECONDARY_EXEC_ENABLE_EPT (the latter only for nested EPT), and enforce
    that L1 doesn't try to turn on more bits. 

-- 
Nadav Har'El                        |                   Tuesday, May 15 2012, 
nyh@math.technion.ac.il             |-----------------------------------------
Phone +972-523-790466, ICQ 13349191 |A bird in the hand is safer than one
http://nadav.harel.org.il           |overhead.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH v2] KVM: x86: Implement PCID/INVPCID for guests with EPT
  2012-05-15  2:42 ` Marcelo Tosatti
  2012-05-15  5:50   ` Nadav Har'El
@ 2012-05-17  1:22   ` Mao, Junjie
  2012-05-17  2:37     ` Mao, Junjie
  2012-05-17  2:54     ` Marcelo Tosatti
  1 sibling, 2 replies; 7+ messages in thread
From: Mao, Junjie @ 2012-05-17  1:22 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: 'kvm@vger.kernel.org'

> -----Original Message-----
> From: Marcelo Tosatti [mailto:mtosatti@redhat.com]
> Sent: Tuesday, May 15, 2012 10:42 AM
> To: Mao, Junjie
> Cc: 'kvm@vger.kernel.org'
> Subject: Re: [PATCH v2] KVM: x86: Implement PCID/INVPCID for guests with
> EPT
> 
> On Mon, May 14, 2012 at 06:25:40AM +0000, Mao, Junjie wrote:
> > This patch handles PCID/INVPCID for guests.
> >
> > Process-context identifiers (PCIDs) are a facility by which a logical
> > processor may cache information for multiple linear-address spaces so
> > that the processor may retain cached information when software
> > switches to a different linear address space. Refer to section 4.10.1
> > in IA32 Intel Software Developer's Manual Volume 3A for details.
> >
> > For guests with EPT, the PCID feature is enabled and INVPCID behaves
> > as running natively.
> > For guests without EPT, the PCID feature is disabled and INVPCID triggers
> #UD.
> >
> > Changes from v1:
> > 	Move cr0/cr4 writing checks to x86.c
> > 	Update comments for the reason why PCID is disabled for non-EPT guests
> > 	Do not support PCID/INVPCID for nested guests at present
> > 	Clean up useless symbols
> >
> > Signed-off-by: Junjie Mao <junjie.mao@intel.com>
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -1711,6 +1711,18 @@ static bool vmx_rdtscp_supported(void)
> >  	return cpu_has_vmx_rdtscp();
> >  }
> >
> > +static bool vmx_pcid_supported(void)
> > +{
> > +	/*
> > +	 * Enable INVPCID for non-ept guests may cause performance regression,
> > +	 * and without INVPCID, PCID has little benefits. So disable them all
> > +	 * for non-ept guests.
> > +	 *
> > +	 * PCID is not supported for nested guests yet.
> > +	 */
> > +	return enable_ept && (boot_cpu_data.x86_capability[4] &
> > +bit(X86_FEATURE_PCID)) && !cpu_has_hypervisor; }
> 
> The comment Avi made was regarding running a nested guest, not running
> _as_ a nested guest (which is what cpu_has_hypervisor is about).
> 
> You can disable INVPCID exec control (which #UDs), if its in Level-2 guest mode
> (see if_guest_mode()), and restore the Level-1 value when leaving nested
> mode.

This "!cpu_has_hypervisor " is brought by my ignorance on nested vmx. Sorry for that.

> 
> > +
> >  /*
> >   * Swap MSR entry in host/guest MSR entry array.
> >   */
> > @@ -2425,6 +2437,8 @@ static __init int setup_vmcs_config(struct
> vmcs_config *vmcs_conf)
> >  			SECONDARY_EXEC_UNRESTRICTED_GUEST |
> >  			SECONDARY_EXEC_PAUSE_LOOP_EXITING |
> >  			SECONDARY_EXEC_RDTSCP;
> > +		if (vmx_pcid_supported())
> > +			opt2 |= SECONDARY_EXEC_ENABLE_INVPCID;
> 
> 
> You should allow ENABLE_INVPCID control to be set unconditionally here, and
> adjusted in vmx_secondary_exec_control().
> 
> (note that "enable_ept" might be cleared after setup_vmcs_config).
> 
> >  		if (adjust_vmx_controls(min2, opt2,
> >  					MSR_IA32_VMX_PROCBASED_CTLS2,
> >  					&_cpu_based_2nd_exec_control) < 0) @@ -6420,6
> +6434,20 @@ static
> > void vmx_cpuid_update(struct kvm_vcpu *vcpu)
> >  			}
> >  		}
> >  	}
> > +
> > +	if (vmx_pcid_supported()) {
> > +		best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
> > +		if (!best || !(best->ecx & bit(X86_FEATURE_PCID))) {
> > +			/* Hiding INVPCID when PCID is not exposed. */
> > +			exec_control =
> vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
> > +			exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> > +			vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> > +				     exec_control);
> > +			best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
> > +			if (best)
> > +				best->ecx &= ~bit(X86_FEATURE_INVPCID);
> > +		}
> > +	}
> 
> - If X86_FEATURE_CPUID bit is set by guest, but X86_FEATURE_INVPCID is
>   cleared, this allows invpcid to execute (which is wrong, it should
>   #UD).

Qemu doesn't support adjusting cpuid leaf 7 at present. The PCID/INVPCID features are both controlled by the 'pcid' bit in cpuid leaf 1, i.e.
	- On platforms supporting both the features, 'cpu xxx,+pcid' exposing both PCID and INVPCID and 'cpu xxx,-pcid' hiding both.
	- On platforms supporting PCID only, INVPCID is always not available to guests and PCID is exposed or hidden according to the command line.
	- On platforms supporting neither, both of them are always hidden.
This is done because PCID brings little (if any) benefits without the INVPCID instruction.
Thus if X86_FEATURE_INVPCID is cleared at this point, this means the host doesn't support it and setup_vmcs_config has already masked SECONDARY_VM_EXEC_CONTROL according to IA32_VMX_PROCBASED_CTLS2.

> - Must enable vm_exec control bit if cpuid reports the feature enabled,
>   not only disable it.

The bit is default to be set if the host supports it. The code here is only used when qemu explicitly disables PCID on platforms supporting the feature.

> 
> >  }
> >
> >  static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2
> > *entry) @@ -7154,6 +7182,7 @@ static struct kvm_x86_ops vmx_x86_ops =
> {
> >  	.cpuid_update = vmx_cpuid_update,
> >
> >  	.rdtscp_supported = vmx_rdtscp_supported,
> > +	.pcid_supported = vmx_pcid_supported,
> >
> >  	.set_supported_cpuid = vmx_set_supported_cpuid,
> >
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index
> > c9d99e5..f930597 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -527,6 +527,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned
> long cr0)
> >  			return 1;
> >  	}
> >
> > +	if ((old_cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PG) &&
> > +	    kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
> > +		return 1;
> > +
> >  	kvm_x86_ops->set_cr0(vcpu, cr0);
> 
> For completeness, kvm_set_cr3 should deal with the CR3 bits. It is used by
> nested VMX for example.

I think you're talking about the reserved bit checks. I'll update it in the next version.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH v2] KVM: x86: Implement PCID/INVPCID for guests with EPT
  2012-05-17  1:22   ` Mao, Junjie
@ 2012-05-17  2:37     ` Mao, Junjie
  2012-05-17 14:46       ` Marcelo Tosatti
  2012-05-17  2:54     ` Marcelo Tosatti
  1 sibling, 1 reply; 7+ messages in thread
From: Mao, Junjie @ 2012-05-17  2:37 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: 'kvm@vger.kernel.org'

> -----Original Message-----
> From: kvm-owner@vger.kernel.org [mailto:kvm-owner@vger.kernel.org] On
> Behalf Of Mao, Junjie
> Sent: Thursday, May 17, 2012 9:22 AM
> To: Marcelo Tosatti
> Cc: 'kvm@vger.kernel.org'
> Subject: RE: [PATCH v2] KVM: x86: Implement PCID/INVPCID for guests with
> EPT
> 
> > -----Original Message-----
> > From: Marcelo Tosatti [mailto:mtosatti@redhat.com]
> > Sent: Tuesday, May 15, 2012 10:42 AM
> > To: Mao, Junjie
> > Cc: 'kvm@vger.kernel.org'
> > Subject: Re: [PATCH v2] KVM: x86: Implement PCID/INVPCID for guests
> > with EPT
> >
> > On Mon, May 14, 2012 at 06:25:40AM +0000, Mao, Junjie wrote:
> > > This patch handles PCID/INVPCID for guests.
> > >
> > > Process-context identifiers (PCIDs) are a facility by which a
> > > logical processor may cache information for multiple linear-address
> > > spaces so that the processor may retain cached information when
> > > software switches to a different linear address space. Refer to
> > > section 4.10.1 in IA32 Intel Software Developer's Manual Volume 3A for
> details.
> > >
> > > For guests with EPT, the PCID feature is enabled and INVPCID behaves
> > > as running natively.
> > > For guests without EPT, the PCID feature is disabled and INVPCID
> > > triggers
> > #UD.
> > >
> > > Changes from v1:
> > > 	Move cr0/cr4 writing checks to x86.c
> > > 	Update comments for the reason why PCID is disabled for non-EPT guests
> > > 	Do not support PCID/INVPCID for nested guests at present
> > > 	Clean up useless symbols
> > >
> > > Signed-off-by: Junjie Mao <junjie.mao@intel.com>
> > > +++ b/arch/x86/kvm/vmx.c
> > > @@ -1711,6 +1711,18 @@ static bool vmx_rdtscp_supported(void)
> > >  	return cpu_has_vmx_rdtscp();
> > >  }
> > >
> > > +static bool vmx_pcid_supported(void) {
> > > +	/*
> > > +	 * Enable INVPCID for non-ept guests may cause performance
> regression,
> > > +	 * and without INVPCID, PCID has little benefits. So disable them all
> > > +	 * for non-ept guests.
> > > +	 *
> > > +	 * PCID is not supported for nested guests yet.
> > > +	 */
> > > +	return enable_ept && (boot_cpu_data.x86_capability[4] &
> > > +bit(X86_FEATURE_PCID)) && !cpu_has_hypervisor; }
> >
> > The comment Avi made was regarding running a nested guest, not running
> > _as_ a nested guest (which is what cpu_has_hypervisor is about).
> >
> > You can disable INVPCID exec control (which #UDs), if its in Level-2
> > guest mode (see if_guest_mode()), and restore the Level-1 value when
> > leaving nested mode.
> 
> This "!cpu_has_hypervisor " is brought by my ignorance on nested vmx. Sorry
> for that.
> 

BTW, this 'vmx_pcid_supported' is used for determining whether X86_FEATURE_[PCID|INVPCID] should be exposed for KVM_GET_SUPPORTED_CPUID ioctl. These bits are exposed to qemu in L0 if cpuid of L0 has them, but should now always be hidden from qemu in L1 no matter cpuid of L1 has them or not. I think that, for guest hypervisor, 'do_cpuid_ent' is run in L1 which has this hypervisor bit in its cpuid, giving rise to this '!cpu_has_hypervisor'. Do I understand things in the right way?

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] KVM: x86: Implement PCID/INVPCID for guests with EPT
  2012-05-17  1:22   ` Mao, Junjie
  2012-05-17  2:37     ` Mao, Junjie
@ 2012-05-17  2:54     ` Marcelo Tosatti
  1 sibling, 0 replies; 7+ messages in thread
From: Marcelo Tosatti @ 2012-05-17  2:54 UTC (permalink / raw)
  To: Mao, Junjie; +Cc: 'kvm@vger.kernel.org'

On Thu, May 17, 2012 at 01:22:05AM +0000, Mao, Junjie wrote:
> > -----Original Message-----
> > From: Marcelo Tosatti [mailto:mtosatti@redhat.com]
> > Sent: Tuesday, May 15, 2012 10:42 AM
> > To: Mao, Junjie
> > Cc: 'kvm@vger.kernel.org'
> > Subject: Re: [PATCH v2] KVM: x86: Implement PCID/INVPCID for guests with
> > EPT
> > 
> > On Mon, May 14, 2012 at 06:25:40AM +0000, Mao, Junjie wrote:
> > > This patch handles PCID/INVPCID for guests.
> > >
> > > Process-context identifiers (PCIDs) are a facility by which a logical
> > > processor may cache information for multiple linear-address spaces so
> > > that the processor may retain cached information when software
> > > switches to a different linear address space. Refer to section 4.10.1
> > > in IA32 Intel Software Developer's Manual Volume 3A for details.
> > >
> > > For guests with EPT, the PCID feature is enabled and INVPCID behaves
> > > as running natively.
> > > For guests without EPT, the PCID feature is disabled and INVPCID triggers
> > #UD.
> > >
> > > Changes from v1:
> > > 	Move cr0/cr4 writing checks to x86.c
> > > 	Update comments for the reason why PCID is disabled for non-EPT guests
> > > 	Do not support PCID/INVPCID for nested guests at present
> > > 	Clean up useless symbols
> > >
> > > Signed-off-by: Junjie Mao <junjie.mao@intel.com>
> > > +++ b/arch/x86/kvm/vmx.c
> > > @@ -1711,6 +1711,18 @@ static bool vmx_rdtscp_supported(void)
> > >  	return cpu_has_vmx_rdtscp();
> > >  }
> > >
> > > +static bool vmx_pcid_supported(void)
> > > +{
> > > +	/*
> > > +	 * Enable INVPCID for non-ept guests may cause performance regression,
> > > +	 * and without INVPCID, PCID has little benefits. So disable them all
> > > +	 * for non-ept guests.
> > > +	 *
> > > +	 * PCID is not supported for nested guests yet.
> > > +	 */
> > > +	return enable_ept && (boot_cpu_data.x86_capability[4] &
> > > +bit(X86_FEATURE_PCID)) && !cpu_has_hypervisor; }
> > 
> > The comment Avi made was regarding running a nested guest, not running
> > _as_ a nested guest (which is what cpu_has_hypervisor is about).
> > 
> > You can disable INVPCID exec control (which #UDs), if its in Level-2 guest mode
> > (see if_guest_mode()), and restore the Level-1 value when leaving nested
> > mode.
> 
> This "!cpu_has_hypervisor " is brought by my ignorance on nested vmx. Sorry for that.

That makes two of us. No problem.

> > > +
> > >  /*
> > >   * Swap MSR entry in host/guest MSR entry array.
> > >   */
> > > @@ -2425,6 +2437,8 @@ static __init int setup_vmcs_config(struct
> > vmcs_config *vmcs_conf)
> > >  			SECONDARY_EXEC_UNRESTRICTED_GUEST |
> > >  			SECONDARY_EXEC_PAUSE_LOOP_EXITING |
> > >  			SECONDARY_EXEC_RDTSCP;
> > > +		if (vmx_pcid_supported())
> > > +			opt2 |= SECONDARY_EXEC_ENABLE_INVPCID;
> > 
> > 
> > You should allow ENABLE_INVPCID control to be set unconditionally here, and
> > adjusted in vmx_secondary_exec_control().
> > 
> > (note that "enable_ept" might be cleared after setup_vmcs_config).
> > 
> > >  		if (adjust_vmx_controls(min2, opt2,
> > >  					MSR_IA32_VMX_PROCBASED_CTLS2,
> > >  					&_cpu_based_2nd_exec_control) < 0) @@ -6420,6
> > +6434,20 @@ static
> > > void vmx_cpuid_update(struct kvm_vcpu *vcpu)
> > >  			}
> > >  		}
> > >  	}
> > > +
> > > +	if (vmx_pcid_supported()) {
> > > +		best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
> > > +		if (!best || !(best->ecx & bit(X86_FEATURE_PCID))) {
> > > +			/* Hiding INVPCID when PCID is not exposed. */
> > > +			exec_control =
> > vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
> > > +			exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> > > +			vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> > > +				     exec_control);
> > > +			best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
> > > +			if (best)
> > > +				best->ecx &= ~bit(X86_FEATURE_INVPCID);
> > > +		}
> > > +	}
> > 
> > - If X86_FEATURE_CPUID bit is set by guest, but X86_FEATURE_INVPCID is
> >   cleared, this allows invpcid to execute (which is wrong, it should
> >   #UD).
> 
> Qemu doesn't support adjusting cpuid leaf 7 at present. The PCID/INVPCID features are both controlled by the 'pcid' bit in cpuid leaf 1, i.e.
> 	- On platforms supporting both the features, 'cpu xxx,+pcid' exposing both PCID and INVPCID and 'cpu xxx,-pcid' hiding both.
> 	- On platforms supporting PCID only, INVPCID is always not available to guests and PCID is exposed or hidden according to the command line.
> 	- On platforms supporting neither, both of them are always hidden.
> This is done because PCID brings little (if any) benefits without the INVPCID instruction.
> Thus if X86_FEATURE_INVPCID is cleared at this point, this means the host doesn't support it and setup_vmcs_config has already masked SECONDARY_VM_EXEC_CONTROL according to IA32_VMX_PROCBASED_CTLS2.

QEMU is not the only user of KVM. Just

if (X86_FEATURE_INVPCID OR X86_FEATURE_INVPCID) are unset
    clear SECONDARY_VM_EXEC_CONTROL

Independently of the facts above, this makes the code more robust.

> > - Must enable vm_exec control bit if cpuid reports the feature enabled,
> >   not only disable it.
> 
> The bit is default to be set if the host supports it. The code here is only used when qemu explicitly disables PCID on platforms supporting the feature.

Userspace is free to call ioctl(SET_CPUID) in any order and number of 
times it pleases. Please support both enable and disable (it is easy,
see RDTSCP code in the function).

> > >  }
> > >
> > >  static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2
> > > *entry) @@ -7154,6 +7182,7 @@ static struct kvm_x86_ops vmx_x86_ops =
> > {
> > >  	.cpuid_update = vmx_cpuid_update,
> > >
> > >  	.rdtscp_supported = vmx_rdtscp_supported,
> > > +	.pcid_supported = vmx_pcid_supported,
> > >
> > >  	.set_supported_cpuid = vmx_set_supported_cpuid,
> > >
> > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index
> > > c9d99e5..f930597 100644
> > > --- a/arch/x86/kvm/x86.c
> > > +++ b/arch/x86/kvm/x86.c
> > > @@ -527,6 +527,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned
> > long cr0)
> > >  			return 1;
> > >  	}
> > >
> > > +	if ((old_cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PG) &&
> > > +	    kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
> > > +		return 1;
> > > +
> > >  	kvm_x86_ops->set_cr0(vcpu, cr0);
> > 
> > For completeness, kvm_set_cr3 should deal with the CR3 bits. It is used by
> > nested VMX for example.
> 
> I think you're talking about the reserved bit checks. I'll update it in the next version.

Yes, thanks.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] KVM: x86: Implement PCID/INVPCID for guests with EPT
  2012-05-17  2:37     ` Mao, Junjie
@ 2012-05-17 14:46       ` Marcelo Tosatti
  0 siblings, 0 replies; 7+ messages in thread
From: Marcelo Tosatti @ 2012-05-17 14:46 UTC (permalink / raw)
  To: Mao, Junjie; +Cc: 'kvm@vger.kernel.org'

On Thu, May 17, 2012 at 02:37:21AM +0000, Mao, Junjie wrote:
> > > You can disable INVPCID exec control (which #UDs), if its in Level-2
> > > guest mode (see if_guest_mode()), and restore the Level-1 value when
> > > leaving nested mode.
> > 
> > This "!cpu_has_hypervisor " is brought by my ignorance on nested vmx. Sorry
> > for that.
> > 
> 
> BTW, this 'vmx_pcid_supported' is used for determining whether X86_FEATURE_[PCID|INVPCID] should be exposed for KVM_GET_SUPPORTED_CPUID ioctl. These bits are exposed to qemu in L0 if cpuid of L0 has them, but should now always be hidden from qemu in L1 no matter cpuid of L1 has them or not. I think that, for guest hypervisor, 'do_cpuid_ent' is run in L1 which has this hypervisor bit in its cpuid, giving rise to this '!cpu_has_hypervisor'. Do I understand things in the right way?

The L2 guest should not execute with INVPCID_ENABLE secondary exec
control set (because PCID is not supported for the L2 guest). 

INVPCID in L2 should #UD.

See item 1 in Nadav's message.


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2012-05-17 14:51 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-05-14  6:25 [PATCH v2] KVM: x86: Implement PCID/INVPCID for guests with EPT Mao, Junjie
2012-05-15  2:42 ` Marcelo Tosatti
2012-05-15  5:50   ` Nadav Har'El
2012-05-17  1:22   ` Mao, Junjie
2012-05-17  2:37     ` Mao, Junjie
2012-05-17 14:46       ` Marcelo Tosatti
2012-05-17  2:54     ` Marcelo Tosatti

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox