From: Paolo Bonzini <pbonzini@redhat.com>
To: Tiejun Chen <tiejun.chen@intel.com>
Cc: kvm@vger.kernel.org
Subject: Re: [RFC][PATCH] kvm: x86: vmx: move some vmx setting from vmx_init() to hardware_setup()
Date: Fri, 24 Oct 2014 12:48:50 +0200 [thread overview]
Message-ID: <544A2E92.5090309@redhat.com> (raw)
In-Reply-To: <1414142304-6635-1-git-send-email-tiejun.chen@intel.com>
On 10/24/2014 11:18 AM, Tiejun Chen wrote:
> Instead of vmx_init(), actually it would make reasonable sense to do
> anything specific to vmx hardware setting in vmx_x86_ops->hardware_setup().
>
> Signed-off-by: Tiejun Chen <tiejun.chen@intel.com>
Please split this patch in multiple parts. It is quite hard to review
this way.
Paolo
> ---
> arch/x86/kvm/vmx.c | 720 +++++++++++++++++++++++++++--------------------------
> 1 file changed, 361 insertions(+), 359 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 04fa1b8..9270076 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3106,10 +3106,302 @@ static __init int alloc_kvm_area(void)
> return 0;
> }
>
> +#define MSR_TYPE_R 1
> +#define MSR_TYPE_W 2
> +static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
> + u32 msr, int type)
> +{
> + int f = sizeof(unsigned long);
> +
> + if (!cpu_has_vmx_msr_bitmap())
> + return;
> +
> + /*
> + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
> + * have the write-low and read-high bitmap offsets the wrong way round.
> + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
> + */
> + if (msr <= 0x1fff) {
> + if (type & MSR_TYPE_R)
> + /* read-low */
> + __clear_bit(msr, msr_bitmap + 0x000 / f);
> +
> + if (type & MSR_TYPE_W)
> + /* write-low */
> + __clear_bit(msr, msr_bitmap + 0x800 / f);
> +
> + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
> + msr &= 0x1fff;
> + if (type & MSR_TYPE_R)
> + /* read-high */
> + __clear_bit(msr, msr_bitmap + 0x400 / f);
> +
> + if (type & MSR_TYPE_W)
> + /* write-high */
> + __clear_bit(msr, msr_bitmap + 0xc00 / f);
> +
> + }
> +}
> +
> +static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
> + u32 msr, int type)
> +{
> + int f = sizeof(unsigned long);
> +
> + if (!cpu_has_vmx_msr_bitmap())
> + return;
> +
> + /*
> + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
> + * have the write-low and read-high bitmap offsets the wrong way round.
> + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
> + */
> + if (msr <= 0x1fff) {
> + if (type & MSR_TYPE_R)
> + /* read-low */
> + __set_bit(msr, msr_bitmap + 0x000 / f);
> +
> + if (type & MSR_TYPE_W)
> + /* write-low */
> + __set_bit(msr, msr_bitmap + 0x800 / f);
> +
> + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
> + msr &= 0x1fff;
> + if (type & MSR_TYPE_R)
> + /* read-high */
> + __set_bit(msr, msr_bitmap + 0x400 / f);
> +
> + if (type & MSR_TYPE_W)
> + /* write-high */
> + __set_bit(msr, msr_bitmap + 0xc00 / f);
> +
> + }
> +}
> +
> +static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
> +{
> + if (!longmode_only)
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
> + msr, MSR_TYPE_R | MSR_TYPE_W);
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
> + msr, MSR_TYPE_R | MSR_TYPE_W);
> +}
> +
> +static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
> +{
> + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> + msr, MSR_TYPE_R);
> + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> + msr, MSR_TYPE_R);
> +}
> +
> +static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
> +{
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> + msr, MSR_TYPE_R);
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> + msr, MSR_TYPE_R);
> +}
> +
> +static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
> +{
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> + msr, MSR_TYPE_W);
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> + msr, MSR_TYPE_W);
> +}
> +
> +static int vmx_vm_has_apicv(struct kvm *kvm)
> +{
> + return enable_apicv && irqchip_in_kernel(kvm);
> +}
> +
> +static void ept_set_mmio_spte_mask(void)
> +{
> + /*
> + * EPT Misconfigurations can be generated if the value of bits 2:0
> + * of an EPT paging-structure entry is 110b (write/execute).
> + * Also, magic bits (0x3ull << 62) is set to quickly identify mmio
> + * spte.
> + */
> + kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
> +}
> +
> +static int __grow_ple_window(int val)
> +{
> + if (ple_window_grow < 1)
> + return ple_window;
> +
> + val = min(val, ple_window_actual_max);
> +
> + if (ple_window_grow < ple_window)
> + val *= ple_window_grow;
> + else
> + val += ple_window_grow;
> +
> + return val;
> +}
> +
> +static int __shrink_ple_window(int val, int modifier, int minimum)
> +{
> + if (modifier < 1)
> + return ple_window;
> +
> + if (modifier < ple_window)
> + val /= modifier;
> + else
> + val -= modifier;
> +
> + return max(val, minimum);
> +}
> +
> +static void grow_ple_window(struct kvm_vcpu *vcpu)
> +{
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> + int old = vmx->ple_window;
> +
> + vmx->ple_window = __grow_ple_window(old);
> +
> + if (vmx->ple_window != old)
> + vmx->ple_window_dirty = true;
> +
> + trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
> +}
> +
> +static void shrink_ple_window(struct kvm_vcpu *vcpu)
> +{
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> + int old = vmx->ple_window;
> +
> + vmx->ple_window = __shrink_ple_window(old,
> + ple_window_shrink, ple_window);
> +
> + if (vmx->ple_window != old)
> + vmx->ple_window_dirty = true;
> +
> + trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
> +}
> +
> +/*
> + * ple_window_actual_max is computed to be one grow_ple_window() below
> + * ple_window_max. (See __grow_ple_window for the reason.)
> + * This prevents overflows, because ple_window_max is int.
> + * ple_window_max effectively rounded down to a multiple of ple_window_grow in
> + * this process.
> + * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
> + */
> +static void update_ple_window_actual_max(void)
> +{
> + ple_window_actual_max =
> + __shrink_ple_window(max(ple_window_max, ple_window),
> + ple_window_grow, INT_MIN);
> +}
> +
> +
> static __init int hardware_setup(void)
> {
> - if (setup_vmcs_config(&vmcs_config) < 0)
> - return -EIO;
> + int r = -ENOMEM, i, msr;
> +
> + rdmsrl_safe(MSR_EFER, &host_efer);
> +
> + for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
> + kvm_define_shared_msr(i, vmx_msr_index[i]);
> +
> + vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_io_bitmap_a)
> + return r;
> +
> + vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_io_bitmap_b)
> + goto out;
> +
> + vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_msr_bitmap_legacy)
> + goto out1;
> +
> + vmx_msr_bitmap_legacy_x2apic =
> + (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_msr_bitmap_legacy_x2apic)
> + goto out2;
> +
> + vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_msr_bitmap_longmode)
> + goto out3;
> +
> + vmx_msr_bitmap_longmode_x2apic =
> + (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_msr_bitmap_longmode_x2apic)
> + goto out4;
> + vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_vmread_bitmap)
> + goto out5;
> +
> + vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_vmwrite_bitmap)
> + goto out6;
> +
> + memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
> + memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
> +
> + /*
> + * Allow direct access to the PC debug port (it is often used for I/O
> + * delays, but the vmexits simply slow things down).
> + */
> + memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
> + clear_bit(0x80, vmx_io_bitmap_a);
> +
> + memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
> +
> + memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
> + memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
> +
> + vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
> + vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
> + vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
> + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
> + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
> + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
> + vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
> +
> + memcpy(vmx_msr_bitmap_legacy_x2apic,
> + vmx_msr_bitmap_legacy, PAGE_SIZE);
> + memcpy(vmx_msr_bitmap_longmode_x2apic,
> + vmx_msr_bitmap_longmode, PAGE_SIZE);
> +
> + if (enable_apicv) {
> + for (msr = 0x800; msr <= 0x8ff; msr++)
> + vmx_disable_intercept_msr_read_x2apic(msr);
> +
> + /* According SDM, in x2apic mode, the whole id reg is used.
> + * But in KVM, it only use the highest eight bits. Need to
> + * intercept it */
> + vmx_enable_intercept_msr_read_x2apic(0x802);
> + /* TMCCT */
> + vmx_enable_intercept_msr_read_x2apic(0x839);
> + /* TPR */
> + vmx_disable_intercept_msr_write_x2apic(0x808);
> + /* EOI */
> + vmx_disable_intercept_msr_write_x2apic(0x80b);
> + /* SELF-IPI */
> + vmx_disable_intercept_msr_write_x2apic(0x83f);
> + }
> +
> + if (enable_ept) {
> + kvm_mmu_set_mask_ptes(0ull,
> + (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
> + (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
> + 0ull, VMX_EPT_EXECUTABLE_MASK);
> + ept_set_mmio_spte_mask();
> + kvm_enable_tdp();
> + } else
> + kvm_disable_tdp();
> +
> + update_ple_window_actual_max();
> +
> + if (setup_vmcs_config(&vmcs_config) < 0) {
> + r = -EIO;
> + goto out7;
> + }
>
> if (boot_cpu_has(X86_FEATURE_NX))
> kvm_enable_efer_bits(EFER_NX);
> @@ -3169,10 +3461,38 @@ static __init int hardware_setup(void)
> nested_vmx_setup_ctls_msrs();
>
> return alloc_kvm_area();
> +
> +out7:
> + free_page((unsigned long)vmx_vmwrite_bitmap);
> +out6:
> + free_page((unsigned long)vmx_vmread_bitmap);
> +out5:
> + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
> +out4:
> + free_page((unsigned long)vmx_msr_bitmap_longmode);
> +out3:
> + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
> +out2:
> + free_page((unsigned long)vmx_msr_bitmap_legacy);
> +out1:
> + free_page((unsigned long)vmx_io_bitmap_b);
> +out:
> + free_page((unsigned long)vmx_io_bitmap_a);
> +
> + return r;
> }
>
> static __exit void hardware_unsetup(void)
> {
> + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
> + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
> + free_page((unsigned long)vmx_msr_bitmap_legacy);
> + free_page((unsigned long)vmx_msr_bitmap_longmode);
> + free_page((unsigned long)vmx_io_bitmap_b);
> + free_page((unsigned long)vmx_io_bitmap_a);
> + free_page((unsigned long)vmx_vmwrite_bitmap);
> + free_page((unsigned long)vmx_vmread_bitmap);
> +
> free_kvm_area();
> }
>
> @@ -4057,162 +4377,52 @@ static int alloc_apic_access_page(struct kvm *kvm)
> kvm->arch.apic_access_page_done = true;
> out:
> mutex_unlock(&kvm->slots_lock);
> - return r;
> -}
> -
> -static int alloc_identity_pagetable(struct kvm *kvm)
> -{
> - /* Called with kvm->slots_lock held. */
> -
> - struct kvm_userspace_memory_region kvm_userspace_mem;
> - int r = 0;
> -
> - BUG_ON(kvm->arch.ept_identity_pagetable_done);
> -
> - kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
> - kvm_userspace_mem.flags = 0;
> - kvm_userspace_mem.guest_phys_addr =
> - kvm->arch.ept_identity_map_addr;
> - kvm_userspace_mem.memory_size = PAGE_SIZE;
> - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
> -
> - return r;
> -}
> -
> -static void allocate_vpid(struct vcpu_vmx *vmx)
> -{
> - int vpid;
> -
> - vmx->vpid = 0;
> - if (!enable_vpid)
> - return;
> - spin_lock(&vmx_vpid_lock);
> - vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
> - if (vpid < VMX_NR_VPIDS) {
> - vmx->vpid = vpid;
> - __set_bit(vpid, vmx_vpid_bitmap);
> - }
> - spin_unlock(&vmx_vpid_lock);
> -}
> -
> -static void free_vpid(struct vcpu_vmx *vmx)
> -{
> - if (!enable_vpid)
> - return;
> - spin_lock(&vmx_vpid_lock);
> - if (vmx->vpid != 0)
> - __clear_bit(vmx->vpid, vmx_vpid_bitmap);
> - spin_unlock(&vmx_vpid_lock);
> -}
> -
> -#define MSR_TYPE_R 1
> -#define MSR_TYPE_W 2
> -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
> - u32 msr, int type)
> -{
> - int f = sizeof(unsigned long);
> -
> - if (!cpu_has_vmx_msr_bitmap())
> - return;
> -
> - /*
> - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
> - * have the write-low and read-high bitmap offsets the wrong way round.
> - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
> - */
> - if (msr <= 0x1fff) {
> - if (type & MSR_TYPE_R)
> - /* read-low */
> - __clear_bit(msr, msr_bitmap + 0x000 / f);
> -
> - if (type & MSR_TYPE_W)
> - /* write-low */
> - __clear_bit(msr, msr_bitmap + 0x800 / f);
> -
> - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
> - msr &= 0x1fff;
> - if (type & MSR_TYPE_R)
> - /* read-high */
> - __clear_bit(msr, msr_bitmap + 0x400 / f);
> -
> - if (type & MSR_TYPE_W)
> - /* write-high */
> - __clear_bit(msr, msr_bitmap + 0xc00 / f);
> -
> - }
> -}
> -
> -static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
> - u32 msr, int type)
> -{
> - int f = sizeof(unsigned long);
> -
> - if (!cpu_has_vmx_msr_bitmap())
> - return;
> -
> - /*
> - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
> - * have the write-low and read-high bitmap offsets the wrong way round.
> - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
> - */
> - if (msr <= 0x1fff) {
> - if (type & MSR_TYPE_R)
> - /* read-low */
> - __set_bit(msr, msr_bitmap + 0x000 / f);
> -
> - if (type & MSR_TYPE_W)
> - /* write-low */
> - __set_bit(msr, msr_bitmap + 0x800 / f);
> -
> - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
> - msr &= 0x1fff;
> - if (type & MSR_TYPE_R)
> - /* read-high */
> - __set_bit(msr, msr_bitmap + 0x400 / f);
> -
> - if (type & MSR_TYPE_W)
> - /* write-high */
> - __set_bit(msr, msr_bitmap + 0xc00 / f);
> -
> - }
> -}
> -
> -static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
> -{
> - if (!longmode_only)
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
> - msr, MSR_TYPE_R | MSR_TYPE_W);
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
> - msr, MSR_TYPE_R | MSR_TYPE_W);
> -}
> -
> -static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
> -{
> - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> - msr, MSR_TYPE_R);
> - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> - msr, MSR_TYPE_R);
> + return r;
> }
>
> -static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
> +static int alloc_identity_pagetable(struct kvm *kvm)
> {
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> - msr, MSR_TYPE_R);
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> - msr, MSR_TYPE_R);
> + /* Called with kvm->slots_lock held. */
> +
> + struct kvm_userspace_memory_region kvm_userspace_mem;
> + int r = 0;
> +
> + BUG_ON(kvm->arch.ept_identity_pagetable_done);
> +
> + kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
> + kvm_userspace_mem.flags = 0;
> + kvm_userspace_mem.guest_phys_addr =
> + kvm->arch.ept_identity_map_addr;
> + kvm_userspace_mem.memory_size = PAGE_SIZE;
> + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
> +
> + return r;
> }
>
> -static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
> +static void allocate_vpid(struct vcpu_vmx *vmx)
> {
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> - msr, MSR_TYPE_W);
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> - msr, MSR_TYPE_W);
> + int vpid;
> +
> + vmx->vpid = 0;
> + if (!enable_vpid)
> + return;
> + spin_lock(&vmx_vpid_lock);
> + vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
> + if (vpid < VMX_NR_VPIDS) {
> + vmx->vpid = vpid;
> + __set_bit(vpid, vmx_vpid_bitmap);
> + }
> + spin_unlock(&vmx_vpid_lock);
> }
>
> -static int vmx_vm_has_apicv(struct kvm *kvm)
> +static void free_vpid(struct vcpu_vmx *vmx)
> {
> - return enable_apicv && irqchip_in_kernel(kvm);
> + if (!enable_vpid)
> + return;
> + spin_lock(&vmx_vpid_lock);
> + if (vmx->vpid != 0)
> + __clear_bit(vmx->vpid, vmx_vpid_bitmap);
> + spin_unlock(&vmx_vpid_lock);
> }
>
> /*
> @@ -4376,17 +4586,6 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
> return exec_control;
> }
>
> -static void ept_set_mmio_spte_mask(void)
> -{
> - /*
> - * EPT Misconfigurations can be generated if the value of bits 2:0
> - * of an EPT paging-structure entry is 110b (write/execute).
> - * Also, magic bits (0x3ull << 62) is set to quickly identify mmio
> - * spte.
> - */
> - kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
> -}
> -
> /*
> * Sets up the vmcs for emulated real mode.
> */
> @@ -5706,76 +5905,6 @@ out:
> return ret;
> }
>
> -static int __grow_ple_window(int val)
> -{
> - if (ple_window_grow < 1)
> - return ple_window;
> -
> - val = min(val, ple_window_actual_max);
> -
> - if (ple_window_grow < ple_window)
> - val *= ple_window_grow;
> - else
> - val += ple_window_grow;
> -
> - return val;
> -}
> -
> -static int __shrink_ple_window(int val, int modifier, int minimum)
> -{
> - if (modifier < 1)
> - return ple_window;
> -
> - if (modifier < ple_window)
> - val /= modifier;
> - else
> - val -= modifier;
> -
> - return max(val, minimum);
> -}
> -
> -static void grow_ple_window(struct kvm_vcpu *vcpu)
> -{
> - struct vcpu_vmx *vmx = to_vmx(vcpu);
> - int old = vmx->ple_window;
> -
> - vmx->ple_window = __grow_ple_window(old);
> -
> - if (vmx->ple_window != old)
> - vmx->ple_window_dirty = true;
> -
> - trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
> -}
> -
> -static void shrink_ple_window(struct kvm_vcpu *vcpu)
> -{
> - struct vcpu_vmx *vmx = to_vmx(vcpu);
> - int old = vmx->ple_window;
> -
> - vmx->ple_window = __shrink_ple_window(old,
> - ple_window_shrink, ple_window);
> -
> - if (vmx->ple_window != old)
> - vmx->ple_window_dirty = true;
> -
> - trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
> -}
> -
> -/*
> - * ple_window_actual_max is computed to be one grow_ple_window() below
> - * ple_window_max. (See __grow_ple_window for the reason.)
> - * This prevents overflows, because ple_window_max is int.
> - * ple_window_max effectively rounded down to a multiple of ple_window_grow in
> - * this process.
> - * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
> - */
> -static void update_ple_window_actual_max(void)
> -{
> - ple_window_actual_max =
> - __shrink_ple_window(max(ple_window_max, ple_window),
> - ple_window_grow, INT_MIN);
> -}
> -
> /*
> * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
> * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
> @@ -9158,150 +9287,23 @@ static struct kvm_x86_ops vmx_x86_ops = {
>
> static int __init vmx_init(void)
> {
> - int r, i, msr;
> -
> - rdmsrl_safe(MSR_EFER, &host_efer);
> -
> - for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
> - kvm_define_shared_msr(i, vmx_msr_index[i]);
> -
> - vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_io_bitmap_a)
> - return -ENOMEM;
> -
> - r = -ENOMEM;
> -
> - vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_io_bitmap_b)
> - goto out;
> -
> - vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_msr_bitmap_legacy)
> - goto out1;
> -
> - vmx_msr_bitmap_legacy_x2apic =
> - (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_msr_bitmap_legacy_x2apic)
> - goto out2;
> -
> - vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_msr_bitmap_longmode)
> - goto out3;
> -
> - vmx_msr_bitmap_longmode_x2apic =
> - (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_msr_bitmap_longmode_x2apic)
> - goto out4;
> - vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_vmread_bitmap)
> - goto out5;
> -
> - vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_vmwrite_bitmap)
> - goto out6;
> -
> - memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
> - memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
> -
> - /*
> - * Allow direct access to the PC debug port (it is often used for I/O
> - * delays, but the vmexits simply slow things down).
> - */
> - memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
> - clear_bit(0x80, vmx_io_bitmap_a);
> -
> - memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
> -
> - memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
> - memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
> -
> - set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
> + int r = -ENOMEM;
>
> r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
> __alignof__(struct vcpu_vmx), THIS_MODULE);
> if (r)
> - goto out7;
> + return r;
>
> #ifdef CONFIG_KEXEC
> rcu_assign_pointer(crash_vmclear_loaded_vmcss,
> crash_vmclear_local_loaded_vmcss);
> #endif
>
> - vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
> - vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
> - vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
> - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
> - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
> - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
> - vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
> -
> - memcpy(vmx_msr_bitmap_legacy_x2apic,
> - vmx_msr_bitmap_legacy, PAGE_SIZE);
> - memcpy(vmx_msr_bitmap_longmode_x2apic,
> - vmx_msr_bitmap_longmode, PAGE_SIZE);
> -
> - if (enable_apicv) {
> - for (msr = 0x800; msr <= 0x8ff; msr++)
> - vmx_disable_intercept_msr_read_x2apic(msr);
> -
> - /* According SDM, in x2apic mode, the whole id reg is used.
> - * But in KVM, it only use the highest eight bits. Need to
> - * intercept it */
> - vmx_enable_intercept_msr_read_x2apic(0x802);
> - /* TMCCT */
> - vmx_enable_intercept_msr_read_x2apic(0x839);
> - /* TPR */
> - vmx_disable_intercept_msr_write_x2apic(0x808);
> - /* EOI */
> - vmx_disable_intercept_msr_write_x2apic(0x80b);
> - /* SELF-IPI */
> - vmx_disable_intercept_msr_write_x2apic(0x83f);
> - }
> -
> - if (enable_ept) {
> - kvm_mmu_set_mask_ptes(0ull,
> - (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
> - (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
> - 0ull, VMX_EPT_EXECUTABLE_MASK);
> - ept_set_mmio_spte_mask();
> - kvm_enable_tdp();
> - } else
> - kvm_disable_tdp();
> -
> - update_ple_window_actual_max();
> -
> return 0;
> -
> -out7:
> - free_page((unsigned long)vmx_vmwrite_bitmap);
> -out6:
> - free_page((unsigned long)vmx_vmread_bitmap);
> -out5:
> - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
> -out4:
> - free_page((unsigned long)vmx_msr_bitmap_longmode);
> -out3:
> - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
> -out2:
> - free_page((unsigned long)vmx_msr_bitmap_legacy);
> -out1:
> - free_page((unsigned long)vmx_io_bitmap_b);
> -out:
> - free_page((unsigned long)vmx_io_bitmap_a);
> - return r;
> }
>
> static void __exit vmx_exit(void)
> {
> - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
> - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
> - free_page((unsigned long)vmx_msr_bitmap_legacy);
> - free_page((unsigned long)vmx_msr_bitmap_longmode);
> - free_page((unsigned long)vmx_io_bitmap_b);
> - free_page((unsigned long)vmx_io_bitmap_a);
> - free_page((unsigned long)vmx_vmwrite_bitmap);
> - free_page((unsigned long)vmx_vmread_bitmap);
> -
> #ifdef CONFIG_KEXEC
> RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
> synchronize_rcu();
>
prev parent reply other threads:[~2014-10-24 10:48 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-10-24 9:18 [RFC][PATCH] kvm: x86: vmx: move some vmx setting from vmx_init() to hardware_setup() Tiejun Chen
2014-10-24 10:48 ` Paolo Bonzini [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=544A2E92.5090309@redhat.com \
--to=pbonzini@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=tiejun.chen@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.