* [PATCH v4] KVM: x86: Implement PCID/INVPCID for guests with EPT
@ 2012-06-14 2:04 Mao, Junjie
2012-06-16 2:32 ` Marcelo Tosatti
2012-06-28 15:49 ` Avi Kivity
0 siblings, 2 replies; 9+ messages in thread
From: Mao, Junjie @ 2012-06-14 2:04 UTC (permalink / raw)
To: 'kvm@vger.kernel.org'
This patch handles PCID/INVPCID for guests.
Process-context identifiers (PCIDs) are a facility by which a logical processor
may cache information for multiple linear-address spaces so that the processor
may retain cached information when software switches to a different linear
address space. Refer to section 4.10.1 in IA32 Intel Software Developer's Manual
Volume 3A for details.
For guests with EPT, the PCID feature is enabled and INVPCID behaves as running
natively.
For guests without EPT, the PCID feature is disabled and INVPCID triggers #UD.
Changes from v3:
Rebase to the latest tree
Expose PCID to nested guests
Remove the pcid_supported callback
Changes from v2:
Seperate management of PCID and INVPCID
Prevent PCID bit in CPUID from exposing on guest hypervisors
Don't check the lower 12 bits when loading cr3 if cr4.PCIDE is set
Explicitly disable INVPCID for L2 guests
Support both enable and disable INVPCID in vmx_cpuid_update()
Changes from v1:
Move cr0/cr4 writing checks to x86.c
Update comments for the reason why PCID is disabled for non-EPT guests
Do not support PCID/INVPCID for nested guests at present
Clean up useless symbols
Signed-off-by: Junjie Mao <junjie.mao@intel.com>
---
arch/x86/include/asm/kvm_host.h | 4 ++-
arch/x86/include/asm/processor-flags.h | 2 +
arch/x86/include/asm/vmx.h | 2 +
arch/x86/kvm/cpuid.c | 6 +++-
arch/x86/kvm/cpuid.h | 8 +++++++
arch/x86/kvm/svm.c | 6 +++++
arch/x86/kvm/vmx.c | 37 +++++++++++++++++++++++++++++++-
arch/x86/kvm/x86.c | 24 ++++++++++++++++++--
8 files changed, 82 insertions(+), 7 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index db7c1f2..95828a4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -48,12 +48,13 @@
#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
#define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
+#define CR3_PCID_ENABLED_RESERVED_BITS 0xFFFFFF0000000000ULL
#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \
0xFFFFFF0000000000ULL)
#define CR4_RESERVED_BITS \
(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
- | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
+ | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_RDWRGSFS \
| X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
@@ -661,6 +662,7 @@ struct kvm_x86_ops {
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
int (*get_lpage_level)(void);
bool (*rdtscp_supported)(void);
+ bool (*invpcid_supported)(void);
void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host);
void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index f8ab3ea..aea1d1d 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -44,6 +44,7 @@
*/
#define X86_CR3_PWT 0x00000008 /* Page Write Through */
#define X86_CR3_PCD 0x00000010 /* Page Cache Disable */
+#define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */
/*
* Intel CPU features in CR4
@@ -61,6 +62,7 @@
#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
#define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */
#define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */
+#define X86_CR4_PCIDE 0x00020000 /* enable PCID support */
#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
#define X86_CR4_SMEP 0x00100000 /* enable SMEP support */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 31f180c..b81525c 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -60,6 +60,7 @@
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
+#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
#define PIN_BASED_EXT_INTR_MASK 0x00000001
@@ -281,6 +282,7 @@ enum vmcs_field {
#define EXIT_REASON_EPT_MISCONFIG 49
#define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55
+#define EXIT_REASON_INVPCID 58
/*
* Interruption-information format
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7df1c6d..d13408a 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -201,6 +201,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
unsigned f_lm = 0;
#endif
unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
+ unsigned f_pcid = boot_cpu_has(X86_FEATURE_PCID) ? F(PCID) : 0;
+ unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
/* cpuid 1.edx */
const u32 kvm_supported_word0_x86_features =
@@ -228,7 +230,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
0 /* DS-CPL, VMX, SMX, EST */ |
0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
- 0 /* Reserved, DCA */ | F(XMM4_1) |
+ f_pcid | 0 /* Reserved, DCA */ | F(XMM4_1) |
F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
F(F16C) | F(RDRAND);
@@ -248,7 +250,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.ebx */
const u32 kvm_supported_word9_x86_features =
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
- F(BMI2) | F(ERMS) | F(RTM);
+ F(BMI2) | F(ERMS) | f_invpcid | F(RTM);
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 26d1fb4..e531d39 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -51,4 +51,12 @@ static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
return best && (best->ecx & bit(X86_FEATURE_OSVW));
}
+static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 1, 0);
+ return best && (best->ecx & bit(X86_FEATURE_PCID));
+}
+
#endif
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f75af40..81ed0ba 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4044,6 +4044,11 @@ static bool svm_rdtscp_supported(void)
return false;
}
+static bool svm_invpcid_supported(void)
+{
+ return false;
+}
+
static bool svm_has_wbinvd_exit(void)
{
return true;
@@ -4312,6 +4317,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.cpuid_update = svm_cpuid_update,
.rdtscp_supported = svm_rdtscp_supported,
+ .invpcid_supported = svm_invpcid_supported,
.set_supported_cpuid = svm_set_supported_cpuid,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 32eb588..21760b9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -849,6 +849,12 @@ static inline bool cpu_has_vmx_rdtscp(void)
SECONDARY_EXEC_RDTSCP;
}
+static inline bool cpu_has_vmx_invpcid(void)
+{
+ return vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_ENABLE_INVPCID;
+}
+
static inline bool cpu_has_virtual_nmis(void)
{
return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
@@ -1739,6 +1745,11 @@ static bool vmx_rdtscp_supported(void)
return cpu_has_vmx_rdtscp();
}
+static bool vmx_invpcid_supported(void)
+{
+ return cpu_has_vmx_invpcid();
+}
+
/*
* Swap MSR entry in host/guest MSR entry array.
*/
@@ -2458,7 +2469,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_ENABLE_EPT |
SECONDARY_EXEC_UNRESTRICTED_GUEST |
SECONDARY_EXEC_PAUSE_LOOP_EXITING |
- SECONDARY_EXEC_RDTSCP;
+ SECONDARY_EXEC_RDTSCP |
+ SECONDARY_EXEC_ENABLE_INVPCID;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
&_cpu_based_2nd_exec_control) < 0)
@@ -3731,6 +3743,8 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
if (!enable_ept) {
exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
enable_unrestricted_guest = 0;
+ /* Enable INVPCID for non-ept guests may cause performance regression. */
+ exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
}
if (!enable_unrestricted_guest)
exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
@@ -6467,6 +6481,23 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
}
}
}
+
+ if (vmx_invpcid_supported()) {
+ exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+ /* Exposing INVPCID only when PCID is exposed */
+ best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
+ if (best && (best->ecx & bit(X86_FEATURE_INVPCID)) && guest_cpuid_has_pcid(vcpu)) {
+ exec_control |= SECONDARY_EXEC_ENABLE_INVPCID;
+ vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
+ exec_control);
+ } else {
+ exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
+ vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
+ exec_control);
+ if (best)
+ best->ecx &= ~bit(X86_FEATURE_INVPCID);
+ }
+ }
}
static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -6610,6 +6641,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
page_to_phys(vmx->nested.apic_access_page));
}
+ /* Explicitly disable INVPCID until PCID for L2 guest is supported */
+ exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
+
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
}
@@ -7201,6 +7235,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.cpuid_update = vmx_cpuid_update,
.rdtscp_supported = vmx_rdtscp_supported,
+ .invpcid_supported = vmx_invpcid_supported,
.set_supported_cpuid = vmx_set_supported_cpuid,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index be6d549..3a66d7b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -528,6 +528,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
return 1;
}
+ if ((old_cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PG) &&
+ kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
+ return 1;
+
kvm_x86_ops->set_cr0(vcpu, cr0);
if ((cr0 ^ old_cr0) & X86_CR0_PG) {
@@ -604,10 +608,20 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
kvm_read_cr3(vcpu)))
return 1;
+ if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
+ if (!guest_cpuid_has_pcid(vcpu))
+ return 1;
+
+ /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
+ if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
+ return 1;
+ }
+
if (kvm_x86_ops->set_cr4(vcpu, cr4))
return 1;
- if ((cr4 ^ old_cr4) & pdptr_bits)
+ if (((cr4 ^ old_cr4) & pdptr_bits) ||
+ (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
kvm_mmu_reset_context(vcpu);
if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
@@ -626,8 +640,12 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
}
if (is_long_mode(vcpu)) {
- if (cr3 & CR3_L_MODE_RESERVED_BITS)
- return 1;
+ if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) {
+ if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
+ return 1;
+ } else
+ if (cr3 & CR3_L_MODE_RESERVED_BITS)
+ return 1;
} else {
if (is_pae(vcpu)) {
if (cr3 & CR3_PAE_RESERVED_BITS)
--
1.7.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v4] KVM: x86: Implement PCID/INVPCID for guests with EPT
2012-06-14 2:04 [PATCH v4] KVM: x86: Implement PCID/INVPCID for guests with EPT Mao, Junjie
@ 2012-06-16 2:32 ` Marcelo Tosatti
2012-06-19 8:24 ` Mao, Junjie
2012-06-28 15:49 ` Avi Kivity
1 sibling, 1 reply; 9+ messages in thread
From: Marcelo Tosatti @ 2012-06-16 2:32 UTC (permalink / raw)
To: Mao, Junjie; +Cc: 'kvm@vger.kernel.org', Avi Kivity
On Thu, Jun 14, 2012 at 02:04:25AM +0000, Mao, Junjie wrote:
> This patch handles PCID/INVPCID for guests.
>
> Process-context identifiers (PCIDs) are a facility by which a logical processor
> may cache information for multiple linear-address spaces so that the processor
> may retain cached information when software switches to a different linear
> address space. Refer to section 4.10.1 in IA32 Intel Software Developer's Manual
> Volume 3A for details.
>
> For guests with EPT, the PCID feature is enabled and INVPCID behaves as running
> natively.
> For guests without EPT, the PCID feature is disabled and INVPCID triggers #UD.
>
> Changes from v3:
> Rebase to the latest tree
> Expose PCID to nested guests
> Remove the pcid_supported callback
>
> Changes from v2:
> Seperate management of PCID and INVPCID
> Prevent PCID bit in CPUID from exposing on guest hypervisors
> Don't check the lower 12 bits when loading cr3 if cr4.PCIDE is set
> Explicitly disable INVPCID for L2 guests
> Support both enable and disable INVPCID in vmx_cpuid_update()
>
> Changes from v1:
> Move cr0/cr4 writing checks to x86.c
> Update comments for the reason why PCID is disabled for non-EPT guests
> Do not support PCID/INVPCID for nested guests at present
> Clean up useless symbols
>
> Signed-off-by: Junjie Mao <junjie.mao@intel.com>
Looks good to me.
> ---
> arch/x86/include/asm/kvm_host.h | 4 ++-
> arch/x86/include/asm/processor-flags.h | 2 +
> arch/x86/include/asm/vmx.h | 2 +
> arch/x86/kvm/cpuid.c | 6 +++-
> arch/x86/kvm/cpuid.h | 8 +++++++
> arch/x86/kvm/svm.c | 6 +++++
> arch/x86/kvm/vmx.c | 37 +++++++++++++++++++++++++++++++-
> arch/x86/kvm/x86.c | 24 ++++++++++++++++++--
> 8 files changed, 82 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index db7c1f2..95828a4 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -48,12 +48,13 @@
>
> #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
> #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
> +#define CR3_PCID_ENABLED_RESERVED_BITS 0xFFFFFF0000000000ULL
> #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \
> 0xFFFFFF0000000000ULL)
> #define CR4_RESERVED_BITS \
> (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
> | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
> - | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
> + | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
> | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_RDWRGSFS \
> | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
>
> @@ -661,6 +662,7 @@ struct kvm_x86_ops {
> u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
> int (*get_lpage_level)(void);
> bool (*rdtscp_supported)(void);
> + bool (*invpcid_supported)(void);
> void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host);
>
> void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
> diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
> index f8ab3ea..aea1d1d 100644
> --- a/arch/x86/include/asm/processor-flags.h
> +++ b/arch/x86/include/asm/processor-flags.h
> @@ -44,6 +44,7 @@
> */
> #define X86_CR3_PWT 0x00000008 /* Page Write Through */
> #define X86_CR3_PCD 0x00000010 /* Page Cache Disable */
> +#define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */
>
> /*
> * Intel CPU features in CR4
> @@ -61,6 +62,7 @@
> #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
> #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */
> #define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */
> +#define X86_CR4_PCIDE 0x00020000 /* enable PCID support */
> #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
> #define X86_CR4_SMEP 0x00100000 /* enable SMEP support */
>
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 31f180c..b81525c 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -60,6 +60,7 @@
> #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
> #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
> #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
> +#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
>
>
> #define PIN_BASED_EXT_INTR_MASK 0x00000001
> @@ -281,6 +282,7 @@ enum vmcs_field {
> #define EXIT_REASON_EPT_MISCONFIG 49
> #define EXIT_REASON_WBINVD 54
> #define EXIT_REASON_XSETBV 55
> +#define EXIT_REASON_INVPCID 58
>
> /*
> * Interruption-information format
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 7df1c6d..d13408a 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -201,6 +201,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
> unsigned f_lm = 0;
> #endif
> unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
> + unsigned f_pcid = boot_cpu_has(X86_FEATURE_PCID) ? F(PCID) : 0;
> + unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
>
> /* cpuid 1.edx */
> const u32 kvm_supported_word0_x86_features =
> @@ -228,7 +230,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
> 0 /* DS-CPL, VMX, SMX, EST */ |
> 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
> F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
> - 0 /* Reserved, DCA */ | F(XMM4_1) |
> + f_pcid | 0 /* Reserved, DCA */ | F(XMM4_1) |
> F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
> 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
> F(F16C) | F(RDRAND);
> @@ -248,7 +250,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
> /* cpuid 7.0.ebx */
> const u32 kvm_supported_word9_x86_features =
> F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
> - F(BMI2) | F(ERMS) | F(RTM);
> + F(BMI2) | F(ERMS) | f_invpcid | F(RTM);
>
> /* all calls to cpuid_count() should be made on the same cpu */
> get_cpu();
> diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
> index 26d1fb4..e531d39 100644
> --- a/arch/x86/kvm/cpuid.h
> +++ b/arch/x86/kvm/cpuid.h
> @@ -51,4 +51,12 @@ static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
> return best && (best->ecx & bit(X86_FEATURE_OSVW));
> }
>
> +static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_cpuid_entry2 *best;
> +
> + best = kvm_find_cpuid_entry(vcpu, 1, 0);
> + return best && (best->ecx & bit(X86_FEATURE_PCID));
> +}
> +
> #endif
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index f75af40..81ed0ba 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -4044,6 +4044,11 @@ static bool svm_rdtscp_supported(void)
> return false;
> }
>
> +static bool svm_invpcid_supported(void)
> +{
> + return false;
> +}
> +
> static bool svm_has_wbinvd_exit(void)
> {
> return true;
> @@ -4312,6 +4317,7 @@ static struct kvm_x86_ops svm_x86_ops = {
> .cpuid_update = svm_cpuid_update,
>
> .rdtscp_supported = svm_rdtscp_supported,
> + .invpcid_supported = svm_invpcid_supported,
>
> .set_supported_cpuid = svm_set_supported_cpuid,
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 32eb588..21760b9 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -849,6 +849,12 @@ static inline bool cpu_has_vmx_rdtscp(void)
> SECONDARY_EXEC_RDTSCP;
> }
>
> +static inline bool cpu_has_vmx_invpcid(void)
> +{
> + return vmcs_config.cpu_based_2nd_exec_ctrl &
> + SECONDARY_EXEC_ENABLE_INVPCID;
> +}
> +
> static inline bool cpu_has_virtual_nmis(void)
> {
> return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
> @@ -1739,6 +1745,11 @@ static bool vmx_rdtscp_supported(void)
> return cpu_has_vmx_rdtscp();
> }
>
> +static bool vmx_invpcid_supported(void)
> +{
> + return cpu_has_vmx_invpcid();
> +}
> +
> /*
> * Swap MSR entry in host/guest MSR entry array.
> */
> @@ -2458,7 +2469,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
> SECONDARY_EXEC_ENABLE_EPT |
> SECONDARY_EXEC_UNRESTRICTED_GUEST |
> SECONDARY_EXEC_PAUSE_LOOP_EXITING |
> - SECONDARY_EXEC_RDTSCP;
> + SECONDARY_EXEC_RDTSCP |
> + SECONDARY_EXEC_ENABLE_INVPCID;
> if (adjust_vmx_controls(min2, opt2,
> MSR_IA32_VMX_PROCBASED_CTLS2,
> &_cpu_based_2nd_exec_control) < 0)
> @@ -3731,6 +3743,8 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
> if (!enable_ept) {
> exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
> enable_unrestricted_guest = 0;
> + /* Enable INVPCID for non-ept guests may cause performance regression. */
> + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> }
> if (!enable_unrestricted_guest)
> exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
> @@ -6467,6 +6481,23 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
> }
> }
> }
> +
> + if (vmx_invpcid_supported()) {
> + exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
> + /* Exposing INVPCID only when PCID is exposed */
> + best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
> + if (best && (best->ecx & bit(X86_FEATURE_INVPCID)) && guest_cpuid_has_pcid(vcpu)) {
> + exec_control |= SECONDARY_EXEC_ENABLE_INVPCID;
> + vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> + exec_control);
> + } else {
> + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> + vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> + exec_control);
> + if (best)
> + best->ecx &= ~bit(X86_FEATURE_INVPCID);
> + }
> + }
> }
>
> static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
> @@ -6610,6 +6641,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> page_to_phys(vmx->nested.apic_access_page));
> }
>
> + /* Explicitly disable INVPCID until PCID for L2 guest is supported */
> + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> +
> vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
> }
>
> @@ -7201,6 +7235,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
> .cpuid_update = vmx_cpuid_update,
>
> .rdtscp_supported = vmx_rdtscp_supported,
> + .invpcid_supported = vmx_invpcid_supported,
>
> .set_supported_cpuid = vmx_set_supported_cpuid,
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index be6d549..3a66d7b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -528,6 +528,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
> return 1;
> }
>
> + if ((old_cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PG) &&
> + kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
> + return 1;
> +
> kvm_x86_ops->set_cr0(vcpu, cr0);
>
> if ((cr0 ^ old_cr0) & X86_CR0_PG) {
> @@ -604,10 +608,20 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
> kvm_read_cr3(vcpu)))
> return 1;
>
> + if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
> + if (!guest_cpuid_has_pcid(vcpu))
> + return 1;
> +
> + /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
> + if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
> + return 1;
> + }
> +
> if (kvm_x86_ops->set_cr4(vcpu, cr4))
> return 1;
>
> - if ((cr4 ^ old_cr4) & pdptr_bits)
> + if (((cr4 ^ old_cr4) & pdptr_bits) ||
> + (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
> kvm_mmu_reset_context(vcpu);
>
> if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
> @@ -626,8 +640,12 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
> }
>
> if (is_long_mode(vcpu)) {
> - if (cr3 & CR3_L_MODE_RESERVED_BITS)
> - return 1;
> + if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) {
> + if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
> + return 1;
> + } else
> + if (cr3 & CR3_L_MODE_RESERVED_BITS)
> + return 1;
> } else {
> if (is_pae(vcpu)) {
> if (cr3 & CR3_PAE_RESERVED_BITS)
> --
> 1.7.1
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH v4] KVM: x86: Implement PCID/INVPCID for guests with EPT
2012-06-16 2:32 ` Marcelo Tosatti
@ 2012-06-19 8:24 ` Mao, Junjie
0 siblings, 0 replies; 9+ messages in thread
From: Mao, Junjie @ 2012-06-19 8:24 UTC (permalink / raw)
To: Avi Kivity; +Cc: 'kvm@vger.kernel.org'
Hi, Avi
Any comments for the patch?
Best Regards
Junjie Mao
> -----Original Message-----
> From: Marcelo Tosatti [mailto:mtosatti@redhat.com]
> Sent: Saturday, June 16, 2012 10:32 AM
> To: Mao, Junjie
> Cc: 'kvm@vger.kernel.org'; Avi Kivity
> Subject: Re: [PATCH v4] KVM: x86: Implement PCID/INVPCID for guests with
> EPT
>
> On Thu, Jun 14, 2012 at 02:04:25AM +0000, Mao, Junjie wrote:
> > This patch handles PCID/INVPCID for guests.
> >
> > Process-context identifiers (PCIDs) are a facility by which a logical
> > processor may cache information for multiple linear-address spaces so
> > that the processor may retain cached information when software
> > switches to a different linear address space. Refer to section 4.10.1
> > in IA32 Intel Software Developer's Manual Volume 3A for details.
> >
> > For guests with EPT, the PCID feature is enabled and INVPCID behaves
> > as running natively.
> > For guests without EPT, the PCID feature is disabled and INVPCID triggers
> #UD.
> >
> > Changes from v3:
> > Rebase to the latest tree
> > Expose PCID to nested guests
> > Remove the pcid_supported callback
> >
> > Changes from v2:
> > Seperate management of PCID and INVPCID
> > Prevent PCID bit in CPUID from exposing on guest hypervisors
> > Don't check the lower 12 bits when loading cr3 if cr4.PCIDE is set
> > Explicitly disable INVPCID for L2 guests
> > Support both enable and disable INVPCID in vmx_cpuid_update()
> >
> > Changes from v1:
> > Move cr0/cr4 writing checks to x86.c
> > Update comments for the reason why PCID is disabled for non-EPT guests
> > Do not support PCID/INVPCID for nested guests at present
> > Clean up useless symbols
> >
> > Signed-off-by: Junjie Mao <junjie.mao@intel.com>
>
> Looks good to me.
>
> > ---
> > arch/x86/include/asm/kvm_host.h | 4 ++-
> > arch/x86/include/asm/processor-flags.h | 2 +
> > arch/x86/include/asm/vmx.h | 2 +
> > arch/x86/kvm/cpuid.c | 6 +++-
> > arch/x86/kvm/cpuid.h | 8 +++++++
> > arch/x86/kvm/svm.c | 6 +++++
> > arch/x86/kvm/vmx.c | 37
> +++++++++++++++++++++++++++++++-
> > arch/x86/kvm/x86.c | 24
> ++++++++++++++++++--
> > 8 files changed, 82 insertions(+), 7 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h
> > b/arch/x86/include/asm/kvm_host.h index db7c1f2..95828a4 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -48,12 +48,13 @@
> >
> > #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
> > #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT |
> > X86_CR3_PCD))
> > +#define CR3_PCID_ENABLED_RESERVED_BITS 0xFFFFFF0000000000ULL
> > #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS |
> \
> > 0xFFFFFF0000000000ULL)
> > #define CR4_RESERVED_BITS
> \
> > (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD |
> X86_CR4_DE\
> > | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
> > - | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
> > + | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR |
> X86_CR4_PCIDE \
> > | X86_CR4_OSXSAVE | X86_CR4_SMEP |
> X86_CR4_RDWRGSFS \
> > | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
> >
> > @@ -661,6 +662,7 @@ struct kvm_x86_ops {
> > u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
> > int (*get_lpage_level)(void);
> > bool (*rdtscp_supported)(void);
> > + bool (*invpcid_supported)(void);
> > void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment,
> > bool host);
> >
> > void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); diff
> > --git a/arch/x86/include/asm/processor-flags.h
> > b/arch/x86/include/asm/processor-flags.h
> > index f8ab3ea..aea1d1d 100644
> > --- a/arch/x86/include/asm/processor-flags.h
> > +++ b/arch/x86/include/asm/processor-flags.h
> > @@ -44,6 +44,7 @@
> > */
> > #define X86_CR3_PWT 0x00000008 /* Page Write Through */
> > #define X86_CR3_PCD 0x00000010 /* Page Cache Disable */
> > +#define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */
> >
> > /*
> > * Intel CPU features in CR4
> > @@ -61,6 +62,7 @@
> > #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE
> exceptions */
> > #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */
> > #define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */
> > +#define X86_CR4_PCIDE 0x00020000 /* enable PCID support */
> > #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
> > #define X86_CR4_SMEP 0x00100000 /* enable SMEP support */
> >
> > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> > index 31f180c..b81525c 100644
> > --- a/arch/x86/include/asm/vmx.h
> > +++ b/arch/x86/include/asm/vmx.h
> > @@ -60,6 +60,7 @@
> > #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
> > #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
> > #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
> > +#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
> >
> >
> > #define PIN_BASED_EXT_INTR_MASK 0x00000001
> > @@ -281,6 +282,7 @@ enum vmcs_field {
> > #define EXIT_REASON_EPT_MISCONFIG 49
> > #define EXIT_REASON_WBINVD 54
> > #define EXIT_REASON_XSETBV 55
> > +#define EXIT_REASON_INVPCID 58
> >
> > /*
> > * Interruption-information format
> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index
> > 7df1c6d..d13408a 100644
> > --- a/arch/x86/kvm/cpuid.c
> > +++ b/arch/x86/kvm/cpuid.c
> > @@ -201,6 +201,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2
> *entry, u32 function,
> > unsigned f_lm = 0;
> > #endif
> > unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
> > + unsigned f_pcid = boot_cpu_has(X86_FEATURE_PCID) ? F(PCID) : 0;
> > + unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) :
> > +0;
> >
> > /* cpuid 1.edx */
> > const u32 kvm_supported_word0_x86_features = @@ -228,7 +230,7 @@
> > static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
> > 0 /* DS-CPL, VMX, SMX, EST */ |
> > 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
> > F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
> > - 0 /* Reserved, DCA */ | F(XMM4_1) |
> > + f_pcid | 0 /* Reserved, DCA */ | F(XMM4_1) |
> > F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
> > 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
> > F(F16C) | F(RDRAND);
> > @@ -248,7 +250,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2
> *entry, u32 function,
> > /* cpuid 7.0.ebx */
> > const u32 kvm_supported_word9_x86_features =
> > F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
> > - F(BMI2) | F(ERMS) | F(RTM);
> > + F(BMI2) | F(ERMS) | f_invpcid | F(RTM);
> >
> > /* all calls to cpuid_count() should be made on the same cpu */
> > get_cpu();
> > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index
> > 26d1fb4..e531d39 100644
> > --- a/arch/x86/kvm/cpuid.h
> > +++ b/arch/x86/kvm/cpuid.h
> > @@ -51,4 +51,12 @@ static inline bool guest_cpuid_has_osvw(struct
> kvm_vcpu *vcpu)
> > return best && (best->ecx & bit(X86_FEATURE_OSVW)); }
> >
> > +static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu) {
> > + struct kvm_cpuid_entry2 *best;
> > +
> > + best = kvm_find_cpuid_entry(vcpu, 1, 0);
> > + return best && (best->ecx & bit(X86_FEATURE_PCID)); }
> > +
> > #endif
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index
> > f75af40..81ed0ba 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -4044,6 +4044,11 @@ static bool svm_rdtscp_supported(void)
> > return false;
> > }
> >
> > +static bool svm_invpcid_supported(void) {
> > + return false;
> > +}
> > +
> > static bool svm_has_wbinvd_exit(void) {
> > return true;
> > @@ -4312,6 +4317,7 @@ static struct kvm_x86_ops svm_x86_ops = {
> > .cpuid_update = svm_cpuid_update,
> >
> > .rdtscp_supported = svm_rdtscp_supported,
> > + .invpcid_supported = svm_invpcid_supported,
> >
> > .set_supported_cpuid = svm_set_supported_cpuid,
> >
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index
> > 32eb588..21760b9 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -849,6 +849,12 @@ static inline bool cpu_has_vmx_rdtscp(void)
> > SECONDARY_EXEC_RDTSCP;
> > }
> >
> > +static inline bool cpu_has_vmx_invpcid(void) {
> > + return vmcs_config.cpu_based_2nd_exec_ctrl &
> > + SECONDARY_EXEC_ENABLE_INVPCID;
> > +}
> > +
> > static inline bool cpu_has_virtual_nmis(void) {
> > return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
> @@
> > -1739,6 +1745,11 @@ static bool vmx_rdtscp_supported(void)
> > return cpu_has_vmx_rdtscp();
> > }
> >
> > +static bool vmx_invpcid_supported(void) {
> > + return cpu_has_vmx_invpcid();
> > +}
> > +
> > /*
> > * Swap MSR entry in host/guest MSR entry array.
> > */
> > @@ -2458,7 +2469,8 @@ static __init int setup_vmcs_config(struct
> vmcs_config *vmcs_conf)
> > SECONDARY_EXEC_ENABLE_EPT |
> > SECONDARY_EXEC_UNRESTRICTED_GUEST |
> > SECONDARY_EXEC_PAUSE_LOOP_EXITING |
> > - SECONDARY_EXEC_RDTSCP;
> > + SECONDARY_EXEC_RDTSCP |
> > + SECONDARY_EXEC_ENABLE_INVPCID;
> > if (adjust_vmx_controls(min2, opt2,
> > MSR_IA32_VMX_PROCBASED_CTLS2,
> > &_cpu_based_2nd_exec_control) < 0) @@ -3731,6
> +3743,8 @@ static
> > u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
> > if (!enable_ept) {
> > exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
> > enable_unrestricted_guest = 0;
> > + /* Enable INVPCID for non-ept guests may cause performance
> regression. */
> > + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> > }
> > if (!enable_unrestricted_guest)
> > exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
> > @@ -6467,6 +6481,23 @@ static void vmx_cpuid_update(struct kvm_vcpu
> *vcpu)
> > }
> > }
> > }
> > +
> > + if (vmx_invpcid_supported()) {
> > + exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
> > + /* Exposing INVPCID only when PCID is exposed */
> > + best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
> > + if (best && (best->ecx & bit(X86_FEATURE_INVPCID)) &&
> guest_cpuid_has_pcid(vcpu)) {
> > + exec_control |= SECONDARY_EXEC_ENABLE_INVPCID;
> > + vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> > + exec_control);
> > + } else {
> > + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> > + vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> > + exec_control);
> > + if (best)
> > + best->ecx &= ~bit(X86_FEATURE_INVPCID);
> > + }
> > + }
> > }
> >
> > static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2
> > *entry) @@ -6610,6 +6641,9 @@ static void prepare_vmcs02(struct
> kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> > page_to_phys(vmx->nested.apic_access_page));
> > }
> >
> > + /* Explicitly disable INVPCID until PCID for L2 guest is supported */
> > + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> > +
> > vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
> > }
> >
> > @@ -7201,6 +7235,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
> > .cpuid_update = vmx_cpuid_update,
> >
> > .rdtscp_supported = vmx_rdtscp_supported,
> > + .invpcid_supported = vmx_invpcid_supported,
> >
> > .set_supported_cpuid = vmx_set_supported_cpuid,
> >
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index
> > be6d549..3a66d7b 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -528,6 +528,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned
> long cr0)
> > return 1;
> > }
> >
> > + if ((old_cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PG) &&
> > + kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
> > + return 1;
> > +
> > kvm_x86_ops->set_cr0(vcpu, cr0);
> >
> > if ((cr0 ^ old_cr0) & X86_CR0_PG) {
> > @@ -604,10 +608,20 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned
> long cr4)
> > kvm_read_cr3(vcpu)))
> > return 1;
> >
> > + if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
> > + if (!guest_cpuid_has_pcid(vcpu))
> > + return 1;
> > +
> > + /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
> > + if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK)
> || !is_long_mode(vcpu))
> > + return 1;
> > + }
> > +
> > if (kvm_x86_ops->set_cr4(vcpu, cr4))
> > return 1;
> >
> > - if ((cr4 ^ old_cr4) & pdptr_bits)
> > + if (((cr4 ^ old_cr4) & pdptr_bits) ||
> > + (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
> > kvm_mmu_reset_context(vcpu);
> >
> > if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) @@ -626,8 +640,12 @@ int
> > kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
> > }
> >
> > if (is_long_mode(vcpu)) {
> > - if (cr3 & CR3_L_MODE_RESERVED_BITS)
> > - return 1;
> > + if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) {
> > + if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
> > + return 1;
> > + } else
> > + if (cr3 & CR3_L_MODE_RESERVED_BITS)
> > + return 1;
> > } else {
> > if (is_pae(vcpu)) {
> > if (cr3 & CR3_PAE_RESERVED_BITS)
> > --
> > 1.7.1
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in the
> > body of a message to majordomo@vger.kernel.org More majordomo info at
> > http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v4] KVM: x86: Implement PCID/INVPCID for guests with EPT
2012-06-14 2:04 [PATCH v4] KVM: x86: Implement PCID/INVPCID for guests with EPT Mao, Junjie
2012-06-16 2:32 ` Marcelo Tosatti
@ 2012-06-28 15:49 ` Avi Kivity
2012-06-28 15:49 ` Avi Kivity
2012-06-29 2:37 ` Mao, Junjie
1 sibling, 2 replies; 9+ messages in thread
From: Avi Kivity @ 2012-06-28 15:49 UTC (permalink / raw)
To: Mao, Junjie; +Cc: 'kvm@vger.kernel.org'
On 06/14/2012 05:04 AM, Mao, Junjie wrote:
> This patch handles PCID/INVPCID for guests.
>
> Process-context identifiers (PCIDs) are a facility by which a logical processor
> may cache information for multiple linear-address spaces so that the processor
> may retain cached information when software switches to a different linear
> address space. Refer to section 4.10.1 in IA32 Intel Software Developer's Manual
> Volume 3A for details.
>
> For guests with EPT, the PCID feature is enabled and INVPCID behaves as running
> natively.
> For guests without EPT, the PCID feature is disabled and INVPCID triggers #UD.
>
>
> #endif
> unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
> + unsigned f_pcid = boot_cpu_has(X86_FEATURE_PCID) ? F(PCID) : 0;
Unneeded, just put F(PCID) below.
> + unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
>
> /* cpuid 1.edx */
> const u32 kvm_supported_word0_x86_features =
> @@ -228,7 +230,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
> 0 /* DS-CPL, VMX, SMX, EST */ |
> 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
> F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
> - 0 /* Reserved, DCA */ | F(XMM4_1) |
> + f_pcid | 0 /* Reserved, DCA */ | F(XMM4_1) |
> @@ -1739,6 +1745,11 @@ static bool vmx_rdtscp_supported(void)
> return cpu_has_vmx_rdtscp();
> }
>
> +static bool vmx_invpcid_supported(void)
> +{
> + return cpu_has_vmx_invpcid();
Should that have && ept_enabled? You turn off invpcid below if
!ept_enabled.
> +}
> +
> /*
> * Swap MSR entry in host/guest MSR entry array.
> */
> @@ -2458,7 +2469,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
> SECONDARY_EXEC_ENABLE_EPT |
> SECONDARY_EXEC_UNRESTRICTED_GUEST |
> SECONDARY_EXEC_PAUSE_LOOP_EXITING |
> - SECONDARY_EXEC_RDTSCP;
> + SECONDARY_EXEC_RDTSCP |
> + SECONDARY_EXEC_ENABLE_INVPCID;
> if (adjust_vmx_controls(min2, opt2,
> MSR_IA32_VMX_PROCBASED_CTLS2,
> &_cpu_based_2nd_exec_control) < 0)
> @@ -3731,6 +3743,8 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
> if (!enable_ept) {
> exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
> enable_unrestricted_guest = 0;
> + /* Enable INVPCID for non-ept guests may cause performance regression. */
> + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> }
> if (!enable_unrestricted_guest)
> exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
This code is unneeded..
> @@ -6467,6 +6481,23 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
> }
> }
> }
> +
> + if (vmx_invpcid_supported()) {
> + exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
> + /* Exposing INVPCID only when PCID is exposed */
> + best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
> + if (best && (best->ecx & bit(X86_FEATURE_INVPCID)) && guest_cpuid_has_pcid(vcpu)) {
> + exec_control |= SECONDARY_EXEC_ENABLE_INVPCID;
> + vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> + exec_control);
> + } else {
> + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> + vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> + exec_control);
> + if (best)
> + best->ecx &= ~bit(X86_FEATURE_INVPCID);
> + }
> + }
> }
Since you override it here anyway. But maybe it's needed if the host
never calls KVM_SET_CPUID.
>
> static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
> @@ -6610,6 +6641,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> page_to_phys(vmx->nested.apic_access_page));
> }
>
> + /* Explicitly disable INVPCID until PCID for L2 guest is supported */
> + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> +
We can't just disable it without the guest knowing. If we don't expose
INCPCID through the MSR, then we should fail VMKLAUNCH or VMRESUME is
this bit is set.
> vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
> }
>
> @@ -528,6 +528,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
> return 1;
> }
>
> + if ((old_cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PG) &&
> + kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
> + return 1;
Why check old_cr0?
>
> + if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
> + if (!guest_cpuid_has_pcid(vcpu))
> + return 1;
> +
> + /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
> + if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
> + return 1;
> + }
> +
> if (kvm_x86_ops->set_cr4(vcpu, cr4))
> return 1;
>
> - if ((cr4 ^ old_cr4) & pdptr_bits)
> + if (((cr4 ^ old_cr4) & pdptr_bits) ||
> + (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
> kvm_mmu_reset_context(vcpu);
You can do
if ((cr4 ^ old_cr4) & (pdptr_bits | X86_CR4_PCIDE))
...
> @@ -626,8 +640,12 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
> }
>
> if (is_long_mode(vcpu)) {
> - if (cr3 & CR3_L_MODE_RESERVED_BITS)
> - return 1;
> + if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) {
> + if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
> + return 1;
> + } else
> + if (cr3 & CR3_L_MODE_RESERVED_BITS)
> + return 1;
> } else {
> if (is_pae(vcpu)) {
> if (cr3 & CR3_PAE_RESERVED_BITS)
I'm worried about the order of writes in
kvm_arch_vcpu_ioctl_set_sregs(). We might end up in a situation where
we we can't load all registers due to all those checks.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v4] KVM: x86: Implement PCID/INVPCID for guests with EPT
2012-06-28 15:49 ` Avi Kivity
@ 2012-06-28 15:49 ` Avi Kivity
2012-06-29 2:37 ` Mao, Junjie
1 sibling, 0 replies; 9+ messages in thread
From: Avi Kivity @ 2012-06-28 15:49 UTC (permalink / raw)
To: Mao, Junjie; +Cc: 'kvm@vger.kernel.org'
On 06/28/2012 06:49 PM, Avi Kivity wrote:
> On 06/14/2012 05:04 AM, Mao, Junjie wrote:
>> This patch handles PCID/INVPCID for guests.
>>
>> Process-context identifiers (PCIDs) are a facility by which a logical processor
>> may cache information for multiple linear-address spaces so that the processor
>> may retain cached information when software switches to a different linear
>> address space. Refer to section 4.10.1 in IA32 Intel Software Developer's Manual
>> Volume 3A for details.
>>
>> For guests with EPT, the PCID feature is enabled and INVPCID behaves as running
>> natively.
>> For guests without EPT, the PCID feature is disabled and INVPCID triggers #UD.
>>
Oh, and sorry about the late review.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH v4] KVM: x86: Implement PCID/INVPCID for guests with EPT
2012-06-28 15:49 ` Avi Kivity
2012-06-28 15:49 ` Avi Kivity
@ 2012-06-29 2:37 ` Mao, Junjie
2012-06-29 14:51 ` Avi Kivity
1 sibling, 1 reply; 9+ messages in thread
From: Mao, Junjie @ 2012-06-29 2:37 UTC (permalink / raw)
To: Avi Kivity; +Cc: 'kvm@vger.kernel.org'
> -----Original Message-----
> From: Avi Kivity [mailto:avi@redhat.com]
> Sent: Thursday, June 28, 2012 11:49 PM
> To: Mao, Junjie
> Cc: 'kvm@vger.kernel.org'
> Subject: Re: [PATCH v4] KVM: x86: Implement PCID/INVPCID for guests with
> EPT
>
> On 06/14/2012 05:04 AM, Mao, Junjie wrote:
> > This patch handles PCID/INVPCID for guests.
> >
> > Process-context identifiers (PCIDs) are a facility by which a logical
> > processor may cache information for multiple linear-address spaces so
> > that the processor may retain cached information when software
> > switches to a different linear address space. Refer to section 4.10.1
> > in IA32 Intel Software Developer's Manual Volume 3A for details.
> >
> > For guests with EPT, the PCID feature is enabled and INVPCID behaves
> > as running natively.
> > For guests without EPT, the PCID feature is disabled and INVPCID triggers
> #UD.
> >
> >
> > #endif
> > unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
> > + unsigned f_pcid = boot_cpu_has(X86_FEATURE_PCID) ? F(PCID) : 0;
>
> Unneeded, just put F(PCID) below.
Understood. Thanks for your suggestion.
>
> > + unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) :
> > +0;
> >
> > /* cpuid 1.edx */
> > const u32 kvm_supported_word0_x86_features = @@ -228,7 +230,7 @@
> > static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
> > 0 /* DS-CPL, VMX, SMX, EST */ |
> > 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
> > F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
> > - 0 /* Reserved, DCA */ | F(XMM4_1) |
> > + f_pcid | 0 /* Reserved, DCA */ | F(XMM4_1) |
>
>
> > @@ -1739,6 +1745,11 @@ static bool vmx_rdtscp_supported(void)
> > return cpu_has_vmx_rdtscp();
> > }
> >
> > +static bool vmx_invpcid_supported(void) {
> > + return cpu_has_vmx_invpcid();
>
> Should that have && ept_enabled? You turn off invpcid below if !ept_enabled.
>
> > +}
> > +
> > /*
> > * Swap MSR entry in host/guest MSR entry array.
> > */
> > @@ -2458,7 +2469,8 @@ static __init int setup_vmcs_config(struct
> vmcs_config *vmcs_conf)
> > SECONDARY_EXEC_ENABLE_EPT |
> > SECONDARY_EXEC_UNRESTRICTED_GUEST |
> > SECONDARY_EXEC_PAUSE_LOOP_EXITING |
> > - SECONDARY_EXEC_RDTSCP;
> > + SECONDARY_EXEC_RDTSCP |
> > + SECONDARY_EXEC_ENABLE_INVPCID;
> > if (adjust_vmx_controls(min2, opt2,
> > MSR_IA32_VMX_PROCBASED_CTLS2,
> > &_cpu_based_2nd_exec_control) < 0) @@ -3731,6
> +3743,8 @@ static
> > u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
> > if (!enable_ept) {
> > exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
> > enable_unrestricted_guest = 0;
> > + /* Enable INVPCID for non-ept guests may cause performance
> regression. */
> > + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> > }
> > if (!enable_unrestricted_guest)
> > exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
>
> This code is unneeded..
>
> > @@ -6467,6 +6481,23 @@ static void vmx_cpuid_update(struct kvm_vcpu
> *vcpu)
> > }
> > }
> > }
> > +
> > + if (vmx_invpcid_supported()) {
> > + exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
> > + /* Exposing INVPCID only when PCID is exposed */
> > + best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
> > + if (best && (best->ecx & bit(X86_FEATURE_INVPCID)) &&
> guest_cpuid_has_pcid(vcpu)) {
> > + exec_control |= SECONDARY_EXEC_ENABLE_INVPCID;
> > + vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> > + exec_control);
> > + } else {
> > + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> > + vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> > + exec_control);
> > + if (best)
> > + best->ecx &= ~bit(X86_FEATURE_INVPCID);
> > + }
> > + }
> > }
>
> Since you override it here anyway. But maybe it's needed if the host never
> calls KVM_SET_CPUID.
The code in vmx_secondary_exec_control may be a must in the situation you have mentioned. The missing of enable_ept in vmx_invpcid_supported will make the guest think it can enable INVPCID support, though it actually does not, when the platform supports it but enable_ept is not set. I'll fix that in the next version.
>
> >
> > static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2
> > *entry) @@ -6610,6 +6641,9 @@ static void prepare_vmcs02(struct
> kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> > page_to_phys(vmx->nested.apic_access_page));
> > }
> >
> > + /* Explicitly disable INVPCID until PCID for L2 guest is supported */
> > + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> > +
>
> We can't just disable it without the guest knowing. If we don't expose
> INCPCID through the MSR, then we should fail VMKLAUNCH or VMRESUME is
> this bit is set.
I think this means I can replace the code here with a check in nested_vmx_run. Do I understand correctly?
>
> > vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
> > }
> >
> > @@ -528,6 +528,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned
> long cr0)
> > return 1;
> > }
> >
> > + if ((old_cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PG) &&
> > + kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
> > + return 1;
>
> Why check old_cr0?
MOV to CR0 causes a general-protection exception if it would clear CR0.PG to 0 while CR4.PCIDE = 1. This check reflects the restriction.
>
> >
> > + if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
> > + if (!guest_cpuid_has_pcid(vcpu))
> > + return 1;
> > +
> > + /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
> > + if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK)
> || !is_long_mode(vcpu))
> > + return 1;
> > + }
> > +
> > if (kvm_x86_ops->set_cr4(vcpu, cr4))
> > return 1;
> >
> > - if ((cr4 ^ old_cr4) & pdptr_bits)
> > + if (((cr4 ^ old_cr4) & pdptr_bits) ||
> > + (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
> > kvm_mmu_reset_context(vcpu);
>
>
> You can do
>
>
> if ((cr4 ^ old_cr4) & (pdptr_bits | X86_CR4_PCIDE))
> ...
TLB is not invalidated when CR4.PCIDE is changed from 0 to 1.
>
> > @@ -626,8 +640,12 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned
> long cr3)
> > }
> >
> > if (is_long_mode(vcpu)) {
> > - if (cr3 & CR3_L_MODE_RESERVED_BITS)
> > - return 1;
> > + if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) {
> > + if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
> > + return 1;
> > + } else
> > + if (cr3 & CR3_L_MODE_RESERVED_BITS)
> > + return 1;
> > } else {
> > if (is_pae(vcpu)) {
> > if (cr3 & CR3_PAE_RESERVED_BITS)
>
> I'm worried about the order of writes in kvm_arch_vcpu_ioctl_set_sregs().
> We might end up in a situation where we we can't load all registers due to all
> those checks.
>
The check here requires that cr4 has been updated already. I'm not sure if it is so in the ioctl.
>
> --
> error compiling committee.c: too many arguments to function
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v4] KVM: x86: Implement PCID/INVPCID for guests with EPT
2012-06-29 2:37 ` Mao, Junjie
@ 2012-06-29 14:51 ` Avi Kivity
2012-07-02 0:32 ` Mao, Junjie
0 siblings, 1 reply; 9+ messages in thread
From: Avi Kivity @ 2012-06-29 14:51 UTC (permalink / raw)
To: Mao, Junjie; +Cc: 'kvm@vger.kernel.org'
On 06/29/2012 05:37 AM, Mao, Junjie wrote:
> >
> > >
> > > static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2
> > > *entry) @@ -6610,6 +6641,9 @@ static void prepare_vmcs02(struct
> > kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> > > page_to_phys(vmx->nested.apic_access_page));
> > > }
> > >
> > > + /* Explicitly disable INVPCID until PCID for L2 guest is supported */
> > > + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> > > +
> >
> > We can't just disable it without the guest knowing. If we don't expose
> > INCPCID through the MSR, then we should fail VMKLAUNCH or VMRESUME is
> > this bit is set.
>
> I think this means I can replace the code here with a check in nested_vmx_run. Do I understand correctly?
Correct, but the check already exists:
if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high) ||
!vmx_control_verify(vmcs12->secondary_vm_exec_control,
nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high) ||
!vmx_control_verify(vmcs12->pin_based_vm_exec_control,
nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high) ||
!vmx_control_verify(vmcs12->vm_exit_controls,
nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high) ||
!vmx_control_verify(vmcs12->vm_entry_controls,
nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high))
{
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
return 1;
}
So all that is needed is to initializr nested_vmx_entry_ctls_high properly.
> >
> > > vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
> > > }
> > >
> > > @@ -528,6 +528,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned
> > long cr0)
> > > return 1;
> > > }
> > >
> > > + if ((old_cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PG) &&
> > > + kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
> > > + return 1;
> >
> > Why check old_cr0?
>
> MOV to CR0 causes a general-protection exception if it would clear CR0.PG to 0 while CR4.PCIDE = 1. This check reflects the restriction.
It should not be possible to have cr0.pg=0 and cr4.pcide=1 in the first
place. We are guaranteed that old_cr0.pg=1.
>
> >
> > >
> > > + if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
> > > + if (!guest_cpuid_has_pcid(vcpu))
> > > + return 1;
> > > +
> > > + /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
> > > + if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK)
> > || !is_long_mode(vcpu))
> > > + return 1;
> > > + }
> > > +
> > > if (kvm_x86_ops->set_cr4(vcpu, cr4))
> > > return 1;
> > >
> > > - if ((cr4 ^ old_cr4) & pdptr_bits)
> > > + if (((cr4 ^ old_cr4) & pdptr_bits) ||
> > > + (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
> > > kvm_mmu_reset_context(vcpu);
> >
> >
> > You can do
> >
> >
> > if ((cr4 ^ old_cr4) & (pdptr_bits | X86_CR4_PCIDE))
> > ...
>
> TLB is not invalidated when CR4.PCIDE is changed from 0 to 1.
Ok.
--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH v4] KVM: x86: Implement PCID/INVPCID for guests with EPT
2012-06-29 14:51 ` Avi Kivity
@ 2012-07-02 0:32 ` Mao, Junjie
2012-07-02 8:59 ` Avi Kivity
0 siblings, 1 reply; 9+ messages in thread
From: Mao, Junjie @ 2012-07-02 0:32 UTC (permalink / raw)
To: Avi Kivity; +Cc: 'kvm@vger.kernel.org'
> -----Original Message-----
> From: Avi Kivity [mailto:avi@redhat.com]
> Sent: Friday, June 29, 2012 10:52 PM
> To: Mao, Junjie
> Cc: 'kvm@vger.kernel.org'
> Subject: Re: [PATCH v4] KVM: x86: Implement PCID/INVPCID for guests with
> EPT
>
> On 06/29/2012 05:37 AM, Mao, Junjie wrote:
> > >
> > > >
> > > > static void vmx_set_supported_cpuid(u32 func, struct
> > > > kvm_cpuid_entry2
> > > > *entry) @@ -6610,6 +6641,9 @@ static void prepare_vmcs02(struct
> > > kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> > > > page_to_phys(vmx->nested.apic_access_page));
> > > > }
> > > >
> > > > + /* Explicitly disable INVPCID until PCID for L2 guest is supported
> */
> > > > + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> > > > +
> > >
> > > We can't just disable it without the guest knowing. If we don't
> > > expose INCPCID through the MSR, then we should fail VMKLAUNCH or
> > > VMRESUME is this bit is set.
> >
> > I think this means I can replace the code here with a check in
> nested_vmx_run. Do I understand correctly?
>
> Correct, but the check already exists:
> if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
> nested_vmx_procbased_ctls_low,
> nested_vmx_procbased_ctls_high) ||
> !vmx_control_verify(vmcs12->secondary_vm_exec_control,
> nested_vmx_secondary_ctls_low,
> nested_vmx_secondary_ctls_high) ||
> !vmx_control_verify(vmcs12->pin_based_vm_exec_control,
> nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high)
> ||
> !vmx_control_verify(vmcs12->vm_exit_controls,
> nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high) ||
> !vmx_control_verify(vmcs12->vm_entry_controls,
> nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high))
> {
> nested_vmx_failValid(vcpu,
> VMXERR_ENTRY_INVALID_CONTROL_FIELD);
> return 1;
> }
>
> So all that is needed is to initializr nested_vmx_entry_ctls_high properly.
nested_vmx_entry_ctls_high only contains SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES at present, which means it should be safe to simply remove the code.
>
> > >
> > > > vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> exec_control);
> > > > }
> > > >
> > > > @@ -528,6 +528,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu,
> > > > unsigned
> > > long cr0)
> > > > return 1;
> > > > }
> > > >
> > > > + if ((old_cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PG) &&
> > > > + kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
> > > > + return 1;
> > >
> > > Why check old_cr0?
> >
> > MOV to CR0 causes a general-protection exception if it would clear CR0.PG to
> 0 while CR4.PCIDE = 1. This check reflects the restriction.
>
> It should not be possible to have cr0.pg=0 and cr4.pcide=1 in the first place.
> We are guaranteed that old_cr0.pg=1.
>
I see. Thanks for the suggestion.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v4] KVM: x86: Implement PCID/INVPCID for guests with EPT
2012-07-02 0:32 ` Mao, Junjie
@ 2012-07-02 8:59 ` Avi Kivity
0 siblings, 0 replies; 9+ messages in thread
From: Avi Kivity @ 2012-07-02 8:59 UTC (permalink / raw)
To: Mao, Junjie; +Cc: 'kvm@vger.kernel.org'
On 07/02/2012 03:32 AM, Mao, Junjie wrote:
>> > I think this means I can replace the code here with a check in
>> nested_vmx_run. Do I understand correctly?
>>
>> Correct, but the check already exists:
>> if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
>> nested_vmx_procbased_ctls_low,
>> nested_vmx_procbased_ctls_high) ||
>> !vmx_control_verify(vmcs12->secondary_vm_exec_control,
>> nested_vmx_secondary_ctls_low,
>> nested_vmx_secondary_ctls_high) ||
>> !vmx_control_verify(vmcs12->pin_based_vm_exec_control,
>> nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high)
>> ||
>> !vmx_control_verify(vmcs12->vm_exit_controls,
>> nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high) ||
>> !vmx_control_verify(vmcs12->vm_entry_controls,
>> nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high))
>> {
>> nested_vmx_failValid(vcpu,
>> VMXERR_ENTRY_INVALID_CONTROL_FIELD);
>> return 1;
>> }
>>
>> So all that is needed is to initializr nested_vmx_entry_ctls_high properly.
>
> nested_vmx_entry_ctls_high only contains SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES at present, which means it should be safe to simply remove the code.
Yes, I misread the code as initializing it to what the cpu supports, but
it is correct as is. So just drop this check.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2012-07-02 8:59 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-06-14 2:04 [PATCH v4] KVM: x86: Implement PCID/INVPCID for guests with EPT Mao, Junjie
2012-06-16 2:32 ` Marcelo Tosatti
2012-06-19 8:24 ` Mao, Junjie
2012-06-28 15:49 ` Avi Kivity
2012-06-28 15:49 ` Avi Kivity
2012-06-29 2:37 ` Mao, Junjie
2012-06-29 14:51 ` Avi Kivity
2012-07-02 0:32 ` Mao, Junjie
2012-07-02 8:59 ` Avi Kivity
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).