From: Marcelo Tosatti <mtosatti@redhat.com>
To: "Mao, Junjie" <junjie.mao@intel.com>
Cc: "'kvm@vger.kernel.org'" <kvm@vger.kernel.org>,
Avi Kivity <avi@redhat.com>
Subject: Re: [PATCH v4] KVM: x86: Implement PCID/INVPCID for guests with EPT
Date: Fri, 15 Jun 2012 23:32:11 -0300 [thread overview]
Message-ID: <20120616023210.GA6275@amt.cnet> (raw)
In-Reply-To: <EF5A1D57CFBD5A4BA5EB3ED985B6DC6E080CF0@SHSMSX101.ccr.corp.intel.com>
On Thu, Jun 14, 2012 at 02:04:25AM +0000, Mao, Junjie wrote:
> This patch handles PCID/INVPCID for guests.
>
> Process-context identifiers (PCIDs) are a facility by which a logical processor
> may cache information for multiple linear-address spaces so that the processor
> may retain cached information when software switches to a different linear
> address space. Refer to section 4.10.1 in IA32 Intel Software Developer's Manual
> Volume 3A for details.
>
> For guests with EPT, the PCID feature is enabled and INVPCID behaves as running
> natively.
> For guests without EPT, the PCID feature is disabled and INVPCID triggers #UD.
>
> Changes from v3:
> Rebase to the latest tree
> Expose PCID to nested guests
> Remove the pcid_supported callback
>
> Changes from v2:
> Seperate management of PCID and INVPCID
> Prevent PCID bit in CPUID from exposing on guest hypervisors
> Don't check the lower 12 bits when loading cr3 if cr4.PCIDE is set
> Explicitly disable INVPCID for L2 guests
> Support both enable and disable INVPCID in vmx_cpuid_update()
>
> Changes from v1:
> Move cr0/cr4 writing checks to x86.c
> Update comments for the reason why PCID is disabled for non-EPT guests
> Do not support PCID/INVPCID for nested guests at present
> Clean up useless symbols
>
> Signed-off-by: Junjie Mao <junjie.mao@intel.com>
Looks good to me.
> ---
> arch/x86/include/asm/kvm_host.h | 4 ++-
> arch/x86/include/asm/processor-flags.h | 2 +
> arch/x86/include/asm/vmx.h | 2 +
> arch/x86/kvm/cpuid.c | 6 +++-
> arch/x86/kvm/cpuid.h | 8 +++++++
> arch/x86/kvm/svm.c | 6 +++++
> arch/x86/kvm/vmx.c | 37 +++++++++++++++++++++++++++++++-
> arch/x86/kvm/x86.c | 24 ++++++++++++++++++--
> 8 files changed, 82 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index db7c1f2..95828a4 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -48,12 +48,13 @@
>
> #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
> #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
> +#define CR3_PCID_ENABLED_RESERVED_BITS 0xFFFFFF0000000000ULL
> #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \
> 0xFFFFFF0000000000ULL)
> #define CR4_RESERVED_BITS \
> (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
> | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
> - | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
> + | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
> | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_RDWRGSFS \
> | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
>
> @@ -661,6 +662,7 @@ struct kvm_x86_ops {
> u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
> int (*get_lpage_level)(void);
> bool (*rdtscp_supported)(void);
> + bool (*invpcid_supported)(void);
> void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host);
>
> void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
> diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
> index f8ab3ea..aea1d1d 100644
> --- a/arch/x86/include/asm/processor-flags.h
> +++ b/arch/x86/include/asm/processor-flags.h
> @@ -44,6 +44,7 @@
> */
> #define X86_CR3_PWT 0x00000008 /* Page Write Through */
> #define X86_CR3_PCD 0x00000010 /* Page Cache Disable */
> +#define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */
>
> /*
> * Intel CPU features in CR4
> @@ -61,6 +62,7 @@
> #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
> #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */
> #define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */
> +#define X86_CR4_PCIDE 0x00020000 /* enable PCID support */
> #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
> #define X86_CR4_SMEP 0x00100000 /* enable SMEP support */
>
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 31f180c..b81525c 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -60,6 +60,7 @@
> #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
> #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
> #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
> +#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
>
>
> #define PIN_BASED_EXT_INTR_MASK 0x00000001
> @@ -281,6 +282,7 @@ enum vmcs_field {
> #define EXIT_REASON_EPT_MISCONFIG 49
> #define EXIT_REASON_WBINVD 54
> #define EXIT_REASON_XSETBV 55
> +#define EXIT_REASON_INVPCID 58
>
> /*
> * Interruption-information format
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 7df1c6d..d13408a 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -201,6 +201,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
> unsigned f_lm = 0;
> #endif
> unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
> + unsigned f_pcid = boot_cpu_has(X86_FEATURE_PCID) ? F(PCID) : 0;
> + unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
>
> /* cpuid 1.edx */
> const u32 kvm_supported_word0_x86_features =
> @@ -228,7 +230,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
> 0 /* DS-CPL, VMX, SMX, EST */ |
> 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
> F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
> - 0 /* Reserved, DCA */ | F(XMM4_1) |
> + f_pcid | 0 /* Reserved, DCA */ | F(XMM4_1) |
> F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
> 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
> F(F16C) | F(RDRAND);
> @@ -248,7 +250,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
> /* cpuid 7.0.ebx */
> const u32 kvm_supported_word9_x86_features =
> F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
> - F(BMI2) | F(ERMS) | F(RTM);
> + F(BMI2) | F(ERMS) | f_invpcid | F(RTM);
>
> /* all calls to cpuid_count() should be made on the same cpu */
> get_cpu();
> diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
> index 26d1fb4..e531d39 100644
> --- a/arch/x86/kvm/cpuid.h
> +++ b/arch/x86/kvm/cpuid.h
> @@ -51,4 +51,12 @@ static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
> return best && (best->ecx & bit(X86_FEATURE_OSVW));
> }
>
> +static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_cpuid_entry2 *best;
> +
> + best = kvm_find_cpuid_entry(vcpu, 1, 0);
> + return best && (best->ecx & bit(X86_FEATURE_PCID));
> +}
> +
> #endif
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index f75af40..81ed0ba 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -4044,6 +4044,11 @@ static bool svm_rdtscp_supported(void)
> return false;
> }
>
> +static bool svm_invpcid_supported(void)
> +{
> + return false;
> +}
> +
> static bool svm_has_wbinvd_exit(void)
> {
> return true;
> @@ -4312,6 +4317,7 @@ static struct kvm_x86_ops svm_x86_ops = {
> .cpuid_update = svm_cpuid_update,
>
> .rdtscp_supported = svm_rdtscp_supported,
> + .invpcid_supported = svm_invpcid_supported,
>
> .set_supported_cpuid = svm_set_supported_cpuid,
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 32eb588..21760b9 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -849,6 +849,12 @@ static inline bool cpu_has_vmx_rdtscp(void)
> SECONDARY_EXEC_RDTSCP;
> }
>
> +static inline bool cpu_has_vmx_invpcid(void)
> +{
> + return vmcs_config.cpu_based_2nd_exec_ctrl &
> + SECONDARY_EXEC_ENABLE_INVPCID;
> +}
> +
> static inline bool cpu_has_virtual_nmis(void)
> {
> return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
> @@ -1739,6 +1745,11 @@ static bool vmx_rdtscp_supported(void)
> return cpu_has_vmx_rdtscp();
> }
>
> +static bool vmx_invpcid_supported(void)
> +{
> + return cpu_has_vmx_invpcid();
> +}
> +
> /*
> * Swap MSR entry in host/guest MSR entry array.
> */
> @@ -2458,7 +2469,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
> SECONDARY_EXEC_ENABLE_EPT |
> SECONDARY_EXEC_UNRESTRICTED_GUEST |
> SECONDARY_EXEC_PAUSE_LOOP_EXITING |
> - SECONDARY_EXEC_RDTSCP;
> + SECONDARY_EXEC_RDTSCP |
> + SECONDARY_EXEC_ENABLE_INVPCID;
> if (adjust_vmx_controls(min2, opt2,
> MSR_IA32_VMX_PROCBASED_CTLS2,
> &_cpu_based_2nd_exec_control) < 0)
> @@ -3731,6 +3743,8 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
> if (!enable_ept) {
> exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
> enable_unrestricted_guest = 0;
> + /* Enable INVPCID for non-ept guests may cause performance regression. */
> + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> }
> if (!enable_unrestricted_guest)
> exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
> @@ -6467,6 +6481,23 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
> }
> }
> }
> +
> + if (vmx_invpcid_supported()) {
> + exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
> + /* Exposing INVPCID only when PCID is exposed */
> + best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
> + if (best && (best->ecx & bit(X86_FEATURE_INVPCID)) && guest_cpuid_has_pcid(vcpu)) {
> + exec_control |= SECONDARY_EXEC_ENABLE_INVPCID;
> + vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> + exec_control);
> + } else {
> + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> + vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
> + exec_control);
> + if (best)
> + best->ecx &= ~bit(X86_FEATURE_INVPCID);
> + }
> + }
> }
>
> static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
> @@ -6610,6 +6641,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> page_to_phys(vmx->nested.apic_access_page));
> }
>
> + /* Explicitly disable INVPCID until PCID for L2 guest is supported */
> + exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
> +
> vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
> }
>
> @@ -7201,6 +7235,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
> .cpuid_update = vmx_cpuid_update,
>
> .rdtscp_supported = vmx_rdtscp_supported,
> + .invpcid_supported = vmx_invpcid_supported,
>
> .set_supported_cpuid = vmx_set_supported_cpuid,
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index be6d549..3a66d7b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -528,6 +528,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
> return 1;
> }
>
> + if ((old_cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PG) &&
> + kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
> + return 1;
> +
> kvm_x86_ops->set_cr0(vcpu, cr0);
>
> if ((cr0 ^ old_cr0) & X86_CR0_PG) {
> @@ -604,10 +608,20 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
> kvm_read_cr3(vcpu)))
> return 1;
>
> + if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
> + if (!guest_cpuid_has_pcid(vcpu))
> + return 1;
> +
> + /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
> + if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
> + return 1;
> + }
> +
> if (kvm_x86_ops->set_cr4(vcpu, cr4))
> return 1;
>
> - if ((cr4 ^ old_cr4) & pdptr_bits)
> + if (((cr4 ^ old_cr4) & pdptr_bits) ||
> + (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
> kvm_mmu_reset_context(vcpu);
>
> if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
> @@ -626,8 +640,12 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
> }
>
> if (is_long_mode(vcpu)) {
> - if (cr3 & CR3_L_MODE_RESERVED_BITS)
> - return 1;
> + if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) {
> + if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
> + return 1;
> + } else
> + if (cr3 & CR3_L_MODE_RESERVED_BITS)
> + return 1;
> } else {
> if (is_pae(vcpu)) {
> if (cr3 & CR3_PAE_RESERVED_BITS)
> --
> 1.7.1
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2012-06-16 2:32 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-06-14 2:04 [PATCH v4] KVM: x86: Implement PCID/INVPCID for guests with EPT Mao, Junjie
2012-06-16 2:32 ` Marcelo Tosatti [this message]
2012-06-19 8:24 ` Mao, Junjie
2012-06-28 15:49 ` Avi Kivity
2012-06-28 15:49 ` Avi Kivity
2012-06-29 2:37 ` Mao, Junjie
2012-06-29 14:51 ` Avi Kivity
2012-07-02 0:32 ` Mao, Junjie
2012-07-02 8:59 ` Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120616023210.GA6275@amt.cnet \
--to=mtosatti@redhat.com \
--cc=avi@redhat.com \
--cc=junjie.mao@intel.com \
--cc=kvm@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).