* [RFC][PATCH] kvm: x86: vmx: move some vmx setting from vmx_init() to hardware_setup()
@ 2014-10-24 9:18 Tiejun Chen
2014-10-24 10:48 ` Paolo Bonzini
0 siblings, 1 reply; 2+ messages in thread
From: Tiejun Chen @ 2014-10-24 9:18 UTC (permalink / raw)
To: pbonzini; +Cc: kvm
Instead of vmx_init(), actually it would make reasonable sense to do
anything specific to vmx hardware setting in vmx_x86_ops->hardware_setup().
Signed-off-by: Tiejun Chen <tiejun.chen@intel.com>
---
arch/x86/kvm/vmx.c | 720 +++++++++++++++++++++++++++--------------------------
1 file changed, 361 insertions(+), 359 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 04fa1b8..9270076 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3106,10 +3106,302 @@ static __init int alloc_kvm_area(void)
return 0;
}
+#define MSR_TYPE_R 1
+#define MSR_TYPE_W 2
+static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+ u32 msr, int type)
+{
+ int f = sizeof(unsigned long);
+
+ if (!cpu_has_vmx_msr_bitmap())
+ return;
+
+ /*
+ * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
+ * have the write-low and read-high bitmap offsets the wrong way round.
+ * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
+ */
+ if (msr <= 0x1fff) {
+ if (type & MSR_TYPE_R)
+ /* read-low */
+ __clear_bit(msr, msr_bitmap + 0x000 / f);
+
+ if (type & MSR_TYPE_W)
+ /* write-low */
+ __clear_bit(msr, msr_bitmap + 0x800 / f);
+
+ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+ msr &= 0x1fff;
+ if (type & MSR_TYPE_R)
+ /* read-high */
+ __clear_bit(msr, msr_bitmap + 0x400 / f);
+
+ if (type & MSR_TYPE_W)
+ /* write-high */
+ __clear_bit(msr, msr_bitmap + 0xc00 / f);
+
+ }
+}
+
+static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
+ u32 msr, int type)
+{
+ int f = sizeof(unsigned long);
+
+ if (!cpu_has_vmx_msr_bitmap())
+ return;
+
+ /*
+ * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
+ * have the write-low and read-high bitmap offsets the wrong way round.
+ * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
+ */
+ if (msr <= 0x1fff) {
+ if (type & MSR_TYPE_R)
+ /* read-low */
+ __set_bit(msr, msr_bitmap + 0x000 / f);
+
+ if (type & MSR_TYPE_W)
+ /* write-low */
+ __set_bit(msr, msr_bitmap + 0x800 / f);
+
+ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+ msr &= 0x1fff;
+ if (type & MSR_TYPE_R)
+ /* read-high */
+ __set_bit(msr, msr_bitmap + 0x400 / f);
+
+ if (type & MSR_TYPE_W)
+ /* write-high */
+ __set_bit(msr, msr_bitmap + 0xc00 / f);
+
+ }
+}
+
+static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
+{
+ if (!longmode_only)
+ __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
+ msr, MSR_TYPE_R | MSR_TYPE_W);
+ __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
+ msr, MSR_TYPE_R | MSR_TYPE_W);
+}
+
+static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
+{
+ __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
+ msr, MSR_TYPE_R);
+ __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
+ msr, MSR_TYPE_R);
+}
+
+static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
+{
+ __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
+ msr, MSR_TYPE_R);
+ __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
+ msr, MSR_TYPE_R);
+}
+
+static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
+{
+ __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
+ msr, MSR_TYPE_W);
+ __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
+ msr, MSR_TYPE_W);
+}
+
+static int vmx_vm_has_apicv(struct kvm *kvm)
+{
+ return enable_apicv && irqchip_in_kernel(kvm);
+}
+
+static void ept_set_mmio_spte_mask(void)
+{
+ /*
+ * EPT Misconfigurations can be generated if the value of bits 2:0
+ * of an EPT paging-structure entry is 110b (write/execute).
+ * Also, magic bits (0x3ull << 62) is set to quickly identify mmio
+ * spte.
+ */
+ kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
+}
+
+static int __grow_ple_window(int val)
+{
+ if (ple_window_grow < 1)
+ return ple_window;
+
+ val = min(val, ple_window_actual_max);
+
+ if (ple_window_grow < ple_window)
+ val *= ple_window_grow;
+ else
+ val += ple_window_grow;
+
+ return val;
+}
+
+static int __shrink_ple_window(int val, int modifier, int minimum)
+{
+ if (modifier < 1)
+ return ple_window;
+
+ if (modifier < ple_window)
+ val /= modifier;
+ else
+ val -= modifier;
+
+ return max(val, minimum);
+}
+
+static void grow_ple_window(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int old = vmx->ple_window;
+
+ vmx->ple_window = __grow_ple_window(old);
+
+ if (vmx->ple_window != old)
+ vmx->ple_window_dirty = true;
+
+ trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
+}
+
+static void shrink_ple_window(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int old = vmx->ple_window;
+
+ vmx->ple_window = __shrink_ple_window(old,
+ ple_window_shrink, ple_window);
+
+ if (vmx->ple_window != old)
+ vmx->ple_window_dirty = true;
+
+ trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
+}
+
+/*
+ * ple_window_actual_max is computed to be one grow_ple_window() below
+ * ple_window_max. (See __grow_ple_window for the reason.)
+ * This prevents overflows, because ple_window_max is int.
+ * ple_window_max effectively rounded down to a multiple of ple_window_grow in
+ * this process.
+ * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
+ */
+static void update_ple_window_actual_max(void)
+{
+ ple_window_actual_max =
+ __shrink_ple_window(max(ple_window_max, ple_window),
+ ple_window_grow, INT_MIN);
+}
+
+
static __init int hardware_setup(void)
{
- if (setup_vmcs_config(&vmcs_config) < 0)
- return -EIO;
+ int r = -ENOMEM, i, msr;
+
+ rdmsrl_safe(MSR_EFER, &host_efer);
+
+ for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
+ kvm_define_shared_msr(i, vmx_msr_index[i]);
+
+ vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx_io_bitmap_a)
+ return r;
+
+ vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx_io_bitmap_b)
+ goto out;
+
+ vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx_msr_bitmap_legacy)
+ goto out1;
+
+ vmx_msr_bitmap_legacy_x2apic =
+ (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx_msr_bitmap_legacy_x2apic)
+ goto out2;
+
+ vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx_msr_bitmap_longmode)
+ goto out3;
+
+ vmx_msr_bitmap_longmode_x2apic =
+ (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx_msr_bitmap_longmode_x2apic)
+ goto out4;
+ vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx_vmread_bitmap)
+ goto out5;
+
+ vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx_vmwrite_bitmap)
+ goto out6;
+
+ memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
+ memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
+
+ /*
+ * Allow direct access to the PC debug port (it is often used for I/O
+ * delays, but the vmexits simply slow things down).
+ */
+ memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
+ clear_bit(0x80, vmx_io_bitmap_a);
+
+ memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
+
+ memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
+ memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
+
+ vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
+ vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
+ vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
+ vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
+ vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
+ vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
+ vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
+
+ memcpy(vmx_msr_bitmap_legacy_x2apic,
+ vmx_msr_bitmap_legacy, PAGE_SIZE);
+ memcpy(vmx_msr_bitmap_longmode_x2apic,
+ vmx_msr_bitmap_longmode, PAGE_SIZE);
+
+ if (enable_apicv) {
+ for (msr = 0x800; msr <= 0x8ff; msr++)
+ vmx_disable_intercept_msr_read_x2apic(msr);
+
+ /* According SDM, in x2apic mode, the whole id reg is used.
+ * But in KVM, it only use the highest eight bits. Need to
+ * intercept it */
+ vmx_enable_intercept_msr_read_x2apic(0x802);
+ /* TMCCT */
+ vmx_enable_intercept_msr_read_x2apic(0x839);
+ /* TPR */
+ vmx_disable_intercept_msr_write_x2apic(0x808);
+ /* EOI */
+ vmx_disable_intercept_msr_write_x2apic(0x80b);
+ /* SELF-IPI */
+ vmx_disable_intercept_msr_write_x2apic(0x83f);
+ }
+
+ if (enable_ept) {
+ kvm_mmu_set_mask_ptes(0ull,
+ (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
+ (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
+ 0ull, VMX_EPT_EXECUTABLE_MASK);
+ ept_set_mmio_spte_mask();
+ kvm_enable_tdp();
+ } else
+ kvm_disable_tdp();
+
+ update_ple_window_actual_max();
+
+ if (setup_vmcs_config(&vmcs_config) < 0) {
+ r = -EIO;
+ goto out7;
+ }
if (boot_cpu_has(X86_FEATURE_NX))
kvm_enable_efer_bits(EFER_NX);
@@ -3169,10 +3461,38 @@ static __init int hardware_setup(void)
nested_vmx_setup_ctls_msrs();
return alloc_kvm_area();
+
+out7:
+ free_page((unsigned long)vmx_vmwrite_bitmap);
+out6:
+ free_page((unsigned long)vmx_vmread_bitmap);
+out5:
+ free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+out4:
+ free_page((unsigned long)vmx_msr_bitmap_longmode);
+out3:
+ free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+out2:
+ free_page((unsigned long)vmx_msr_bitmap_legacy);
+out1:
+ free_page((unsigned long)vmx_io_bitmap_b);
+out:
+ free_page((unsigned long)vmx_io_bitmap_a);
+
+ return r;
}
static __exit void hardware_unsetup(void)
{
+ free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+ free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+ free_page((unsigned long)vmx_msr_bitmap_legacy);
+ free_page((unsigned long)vmx_msr_bitmap_longmode);
+ free_page((unsigned long)vmx_io_bitmap_b);
+ free_page((unsigned long)vmx_io_bitmap_a);
+ free_page((unsigned long)vmx_vmwrite_bitmap);
+ free_page((unsigned long)vmx_vmread_bitmap);
+
free_kvm_area();
}
@@ -4057,162 +4377,52 @@ static int alloc_apic_access_page(struct kvm *kvm)
kvm->arch.apic_access_page_done = true;
out:
mutex_unlock(&kvm->slots_lock);
- return r;
-}
-
-static int alloc_identity_pagetable(struct kvm *kvm)
-{
- /* Called with kvm->slots_lock held. */
-
- struct kvm_userspace_memory_region kvm_userspace_mem;
- int r = 0;
-
- BUG_ON(kvm->arch.ept_identity_pagetable_done);
-
- kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
- kvm_userspace_mem.flags = 0;
- kvm_userspace_mem.guest_phys_addr =
- kvm->arch.ept_identity_map_addr;
- kvm_userspace_mem.memory_size = PAGE_SIZE;
- r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
-
- return r;
-}
-
-static void allocate_vpid(struct vcpu_vmx *vmx)
-{
- int vpid;
-
- vmx->vpid = 0;
- if (!enable_vpid)
- return;
- spin_lock(&vmx_vpid_lock);
- vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
- if (vpid < VMX_NR_VPIDS) {
- vmx->vpid = vpid;
- __set_bit(vpid, vmx_vpid_bitmap);
- }
- spin_unlock(&vmx_vpid_lock);
-}
-
-static void free_vpid(struct vcpu_vmx *vmx)
-{
- if (!enable_vpid)
- return;
- spin_lock(&vmx_vpid_lock);
- if (vmx->vpid != 0)
- __clear_bit(vmx->vpid, vmx_vpid_bitmap);
- spin_unlock(&vmx_vpid_lock);
-}
-
-#define MSR_TYPE_R 1
-#define MSR_TYPE_W 2
-static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
- u32 msr, int type)
-{
- int f = sizeof(unsigned long);
-
- if (!cpu_has_vmx_msr_bitmap())
- return;
-
- /*
- * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
- * have the write-low and read-high bitmap offsets the wrong way round.
- * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
- */
- if (msr <= 0x1fff) {
- if (type & MSR_TYPE_R)
- /* read-low */
- __clear_bit(msr, msr_bitmap + 0x000 / f);
-
- if (type & MSR_TYPE_W)
- /* write-low */
- __clear_bit(msr, msr_bitmap + 0x800 / f);
-
- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
- msr &= 0x1fff;
- if (type & MSR_TYPE_R)
- /* read-high */
- __clear_bit(msr, msr_bitmap + 0x400 / f);
-
- if (type & MSR_TYPE_W)
- /* write-high */
- __clear_bit(msr, msr_bitmap + 0xc00 / f);
-
- }
-}
-
-static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
- u32 msr, int type)
-{
- int f = sizeof(unsigned long);
-
- if (!cpu_has_vmx_msr_bitmap())
- return;
-
- /*
- * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
- * have the write-low and read-high bitmap offsets the wrong way round.
- * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
- */
- if (msr <= 0x1fff) {
- if (type & MSR_TYPE_R)
- /* read-low */
- __set_bit(msr, msr_bitmap + 0x000 / f);
-
- if (type & MSR_TYPE_W)
- /* write-low */
- __set_bit(msr, msr_bitmap + 0x800 / f);
-
- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
- msr &= 0x1fff;
- if (type & MSR_TYPE_R)
- /* read-high */
- __set_bit(msr, msr_bitmap + 0x400 / f);
-
- if (type & MSR_TYPE_W)
- /* write-high */
- __set_bit(msr, msr_bitmap + 0xc00 / f);
-
- }
-}
-
-static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
-{
- if (!longmode_only)
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
- msr, MSR_TYPE_R | MSR_TYPE_W);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
- msr, MSR_TYPE_R | MSR_TYPE_W);
-}
-
-static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
-{
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_R);
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_R);
+ return r;
}
-static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
+static int alloc_identity_pagetable(struct kvm *kvm)
{
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_R);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_R);
+ /* Called with kvm->slots_lock held. */
+
+ struct kvm_userspace_memory_region kvm_userspace_mem;
+ int r = 0;
+
+ BUG_ON(kvm->arch.ept_identity_pagetable_done);
+
+ kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
+ kvm_userspace_mem.flags = 0;
+ kvm_userspace_mem.guest_phys_addr =
+ kvm->arch.ept_identity_map_addr;
+ kvm_userspace_mem.memory_size = PAGE_SIZE;
+ r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
+
+ return r;
}
-static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
+static void allocate_vpid(struct vcpu_vmx *vmx)
{
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_W);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_W);
+ int vpid;
+
+ vmx->vpid = 0;
+ if (!enable_vpid)
+ return;
+ spin_lock(&vmx_vpid_lock);
+ vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
+ if (vpid < VMX_NR_VPIDS) {
+ vmx->vpid = vpid;
+ __set_bit(vpid, vmx_vpid_bitmap);
+ }
+ spin_unlock(&vmx_vpid_lock);
}
-static int vmx_vm_has_apicv(struct kvm *kvm)
+static void free_vpid(struct vcpu_vmx *vmx)
{
- return enable_apicv && irqchip_in_kernel(kvm);
+ if (!enable_vpid)
+ return;
+ spin_lock(&vmx_vpid_lock);
+ if (vmx->vpid != 0)
+ __clear_bit(vmx->vpid, vmx_vpid_bitmap);
+ spin_unlock(&vmx_vpid_lock);
}
/*
@@ -4376,17 +4586,6 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
return exec_control;
}
-static void ept_set_mmio_spte_mask(void)
-{
- /*
- * EPT Misconfigurations can be generated if the value of bits 2:0
- * of an EPT paging-structure entry is 110b (write/execute).
- * Also, magic bits (0x3ull << 62) is set to quickly identify mmio
- * spte.
- */
- kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
-}
-
/*
* Sets up the vmcs for emulated real mode.
*/
@@ -5706,76 +5905,6 @@ out:
return ret;
}
-static int __grow_ple_window(int val)
-{
- if (ple_window_grow < 1)
- return ple_window;
-
- val = min(val, ple_window_actual_max);
-
- if (ple_window_grow < ple_window)
- val *= ple_window_grow;
- else
- val += ple_window_grow;
-
- return val;
-}
-
-static int __shrink_ple_window(int val, int modifier, int minimum)
-{
- if (modifier < 1)
- return ple_window;
-
- if (modifier < ple_window)
- val /= modifier;
- else
- val -= modifier;
-
- return max(val, minimum);
-}
-
-static void grow_ple_window(struct kvm_vcpu *vcpu)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- int old = vmx->ple_window;
-
- vmx->ple_window = __grow_ple_window(old);
-
- if (vmx->ple_window != old)
- vmx->ple_window_dirty = true;
-
- trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
-}
-
-static void shrink_ple_window(struct kvm_vcpu *vcpu)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- int old = vmx->ple_window;
-
- vmx->ple_window = __shrink_ple_window(old,
- ple_window_shrink, ple_window);
-
- if (vmx->ple_window != old)
- vmx->ple_window_dirty = true;
-
- trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
-}
-
-/*
- * ple_window_actual_max is computed to be one grow_ple_window() below
- * ple_window_max. (See __grow_ple_window for the reason.)
- * This prevents overflows, because ple_window_max is int.
- * ple_window_max effectively rounded down to a multiple of ple_window_grow in
- * this process.
- * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
- */
-static void update_ple_window_actual_max(void)
-{
- ple_window_actual_max =
- __shrink_ple_window(max(ple_window_max, ple_window),
- ple_window_grow, INT_MIN);
-}
-
/*
* Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
* exiting, so only get here on cpu with PAUSE-Loop-Exiting.
@@ -9158,150 +9287,23 @@ static struct kvm_x86_ops vmx_x86_ops = {
static int __init vmx_init(void)
{
- int r, i, msr;
-
- rdmsrl_safe(MSR_EFER, &host_efer);
-
- for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
- kvm_define_shared_msr(i, vmx_msr_index[i]);
-
- vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_io_bitmap_a)
- return -ENOMEM;
-
- r = -ENOMEM;
-
- vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_io_bitmap_b)
- goto out;
-
- vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_legacy)
- goto out1;
-
- vmx_msr_bitmap_legacy_x2apic =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_legacy_x2apic)
- goto out2;
-
- vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_longmode)
- goto out3;
-
- vmx_msr_bitmap_longmode_x2apic =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_longmode_x2apic)
- goto out4;
- vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_vmread_bitmap)
- goto out5;
-
- vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_vmwrite_bitmap)
- goto out6;
-
- memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
- memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
-
- /*
- * Allow direct access to the PC debug port (it is often used for I/O
- * delays, but the vmexits simply slow things down).
- */
- memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
- clear_bit(0x80, vmx_io_bitmap_a);
-
- memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
-
- memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
- memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
-
- set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
+ int r = -ENOMEM;
r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
__alignof__(struct vcpu_vmx), THIS_MODULE);
if (r)
- goto out7;
+ return r;
#ifdef CONFIG_KEXEC
rcu_assign_pointer(crash_vmclear_loaded_vmcss,
crash_vmclear_local_loaded_vmcss);
#endif
- vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
- vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
- vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
- vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
- vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
-
- memcpy(vmx_msr_bitmap_legacy_x2apic,
- vmx_msr_bitmap_legacy, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_longmode_x2apic,
- vmx_msr_bitmap_longmode, PAGE_SIZE);
-
- if (enable_apicv) {
- for (msr = 0x800; msr <= 0x8ff; msr++)
- vmx_disable_intercept_msr_read_x2apic(msr);
-
- /* According SDM, in x2apic mode, the whole id reg is used.
- * But in KVM, it only use the highest eight bits. Need to
- * intercept it */
- vmx_enable_intercept_msr_read_x2apic(0x802);
- /* TMCCT */
- vmx_enable_intercept_msr_read_x2apic(0x839);
- /* TPR */
- vmx_disable_intercept_msr_write_x2apic(0x808);
- /* EOI */
- vmx_disable_intercept_msr_write_x2apic(0x80b);
- /* SELF-IPI */
- vmx_disable_intercept_msr_write_x2apic(0x83f);
- }
-
- if (enable_ept) {
- kvm_mmu_set_mask_ptes(0ull,
- (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
- (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
- 0ull, VMX_EPT_EXECUTABLE_MASK);
- ept_set_mmio_spte_mask();
- kvm_enable_tdp();
- } else
- kvm_disable_tdp();
-
- update_ple_window_actual_max();
-
return 0;
-
-out7:
- free_page((unsigned long)vmx_vmwrite_bitmap);
-out6:
- free_page((unsigned long)vmx_vmread_bitmap);
-out5:
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-out4:
- free_page((unsigned long)vmx_msr_bitmap_longmode);
-out3:
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-out2:
- free_page((unsigned long)vmx_msr_bitmap_legacy);
-out1:
- free_page((unsigned long)vmx_io_bitmap_b);
-out:
- free_page((unsigned long)vmx_io_bitmap_a);
- return r;
}
static void __exit vmx_exit(void)
{
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
- free_page((unsigned long)vmx_msr_bitmap_legacy);
- free_page((unsigned long)vmx_msr_bitmap_longmode);
- free_page((unsigned long)vmx_io_bitmap_b);
- free_page((unsigned long)vmx_io_bitmap_a);
- free_page((unsigned long)vmx_vmwrite_bitmap);
- free_page((unsigned long)vmx_vmread_bitmap);
-
#ifdef CONFIG_KEXEC
RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
synchronize_rcu();
--
1.9.1
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [RFC][PATCH] kvm: x86: vmx: move some vmx setting from vmx_init() to hardware_setup()
2014-10-24 9:18 [RFC][PATCH] kvm: x86: vmx: move some vmx setting from vmx_init() to hardware_setup() Tiejun Chen
@ 2014-10-24 10:48 ` Paolo Bonzini
0 siblings, 0 replies; 2+ messages in thread
From: Paolo Bonzini @ 2014-10-24 10:48 UTC (permalink / raw)
To: Tiejun Chen; +Cc: kvm
On 10/24/2014 11:18 AM, Tiejun Chen wrote:
> Instead of vmx_init(), actually it would make reasonable sense to do
> anything specific to vmx hardware setting in vmx_x86_ops->hardware_setup().
>
> Signed-off-by: Tiejun Chen <tiejun.chen@intel.com>
Please split this patch in multiple parts. It is quite hard to review
this way.
Paolo
> ---
> arch/x86/kvm/vmx.c | 720 +++++++++++++++++++++++++++--------------------------
> 1 file changed, 361 insertions(+), 359 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 04fa1b8..9270076 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3106,10 +3106,302 @@ static __init int alloc_kvm_area(void)
> return 0;
> }
>
> +#define MSR_TYPE_R 1
> +#define MSR_TYPE_W 2
> +static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
> + u32 msr, int type)
> +{
> + int f = sizeof(unsigned long);
> +
> + if (!cpu_has_vmx_msr_bitmap())
> + return;
> +
> + /*
> + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
> + * have the write-low and read-high bitmap offsets the wrong way round.
> + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
> + */
> + if (msr <= 0x1fff) {
> + if (type & MSR_TYPE_R)
> + /* read-low */
> + __clear_bit(msr, msr_bitmap + 0x000 / f);
> +
> + if (type & MSR_TYPE_W)
> + /* write-low */
> + __clear_bit(msr, msr_bitmap + 0x800 / f);
> +
> + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
> + msr &= 0x1fff;
> + if (type & MSR_TYPE_R)
> + /* read-high */
> + __clear_bit(msr, msr_bitmap + 0x400 / f);
> +
> + if (type & MSR_TYPE_W)
> + /* write-high */
> + __clear_bit(msr, msr_bitmap + 0xc00 / f);
> +
> + }
> +}
> +
> +static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
> + u32 msr, int type)
> +{
> + int f = sizeof(unsigned long);
> +
> + if (!cpu_has_vmx_msr_bitmap())
> + return;
> +
> + /*
> + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
> + * have the write-low and read-high bitmap offsets the wrong way round.
> + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
> + */
> + if (msr <= 0x1fff) {
> + if (type & MSR_TYPE_R)
> + /* read-low */
> + __set_bit(msr, msr_bitmap + 0x000 / f);
> +
> + if (type & MSR_TYPE_W)
> + /* write-low */
> + __set_bit(msr, msr_bitmap + 0x800 / f);
> +
> + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
> + msr &= 0x1fff;
> + if (type & MSR_TYPE_R)
> + /* read-high */
> + __set_bit(msr, msr_bitmap + 0x400 / f);
> +
> + if (type & MSR_TYPE_W)
> + /* write-high */
> + __set_bit(msr, msr_bitmap + 0xc00 / f);
> +
> + }
> +}
> +
> +static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
> +{
> + if (!longmode_only)
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
> + msr, MSR_TYPE_R | MSR_TYPE_W);
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
> + msr, MSR_TYPE_R | MSR_TYPE_W);
> +}
> +
> +static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
> +{
> + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> + msr, MSR_TYPE_R);
> + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> + msr, MSR_TYPE_R);
> +}
> +
> +static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
> +{
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> + msr, MSR_TYPE_R);
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> + msr, MSR_TYPE_R);
> +}
> +
> +static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
> +{
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> + msr, MSR_TYPE_W);
> + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> + msr, MSR_TYPE_W);
> +}
> +
> +static int vmx_vm_has_apicv(struct kvm *kvm)
> +{
> + return enable_apicv && irqchip_in_kernel(kvm);
> +}
> +
> +static void ept_set_mmio_spte_mask(void)
> +{
> + /*
> + * EPT Misconfigurations can be generated if the value of bits 2:0
> + * of an EPT paging-structure entry is 110b (write/execute).
> + * Also, magic bits (0x3ull << 62) is set to quickly identify mmio
> + * spte.
> + */
> + kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
> +}
> +
> +static int __grow_ple_window(int val)
> +{
> + if (ple_window_grow < 1)
> + return ple_window;
> +
> + val = min(val, ple_window_actual_max);
> +
> + if (ple_window_grow < ple_window)
> + val *= ple_window_grow;
> + else
> + val += ple_window_grow;
> +
> + return val;
> +}
> +
> +static int __shrink_ple_window(int val, int modifier, int minimum)
> +{
> + if (modifier < 1)
> + return ple_window;
> +
> + if (modifier < ple_window)
> + val /= modifier;
> + else
> + val -= modifier;
> +
> + return max(val, minimum);
> +}
> +
> +static void grow_ple_window(struct kvm_vcpu *vcpu)
> +{
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> + int old = vmx->ple_window;
> +
> + vmx->ple_window = __grow_ple_window(old);
> +
> + if (vmx->ple_window != old)
> + vmx->ple_window_dirty = true;
> +
> + trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
> +}
> +
> +static void shrink_ple_window(struct kvm_vcpu *vcpu)
> +{
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> + int old = vmx->ple_window;
> +
> + vmx->ple_window = __shrink_ple_window(old,
> + ple_window_shrink, ple_window);
> +
> + if (vmx->ple_window != old)
> + vmx->ple_window_dirty = true;
> +
> + trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
> +}
> +
> +/*
> + * ple_window_actual_max is computed to be one grow_ple_window() below
> + * ple_window_max. (See __grow_ple_window for the reason.)
> + * This prevents overflows, because ple_window_max is int.
> + * ple_window_max effectively rounded down to a multiple of ple_window_grow in
> + * this process.
> + * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
> + */
> +static void update_ple_window_actual_max(void)
> +{
> + ple_window_actual_max =
> + __shrink_ple_window(max(ple_window_max, ple_window),
> + ple_window_grow, INT_MIN);
> +}
> +
> +
> static __init int hardware_setup(void)
> {
> - if (setup_vmcs_config(&vmcs_config) < 0)
> - return -EIO;
> + int r = -ENOMEM, i, msr;
> +
> + rdmsrl_safe(MSR_EFER, &host_efer);
> +
> + for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
> + kvm_define_shared_msr(i, vmx_msr_index[i]);
> +
> + vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_io_bitmap_a)
> + return r;
> +
> + vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_io_bitmap_b)
> + goto out;
> +
> + vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_msr_bitmap_legacy)
> + goto out1;
> +
> + vmx_msr_bitmap_legacy_x2apic =
> + (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_msr_bitmap_legacy_x2apic)
> + goto out2;
> +
> + vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_msr_bitmap_longmode)
> + goto out3;
> +
> + vmx_msr_bitmap_longmode_x2apic =
> + (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_msr_bitmap_longmode_x2apic)
> + goto out4;
> + vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_vmread_bitmap)
> + goto out5;
> +
> + vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
> + if (!vmx_vmwrite_bitmap)
> + goto out6;
> +
> + memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
> + memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
> +
> + /*
> + * Allow direct access to the PC debug port (it is often used for I/O
> + * delays, but the vmexits simply slow things down).
> + */
> + memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
> + clear_bit(0x80, vmx_io_bitmap_a);
> +
> + memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
> +
> + memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
> + memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
> +
> + vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
> + vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
> + vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
> + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
> + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
> + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
> + vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
> +
> + memcpy(vmx_msr_bitmap_legacy_x2apic,
> + vmx_msr_bitmap_legacy, PAGE_SIZE);
> + memcpy(vmx_msr_bitmap_longmode_x2apic,
> + vmx_msr_bitmap_longmode, PAGE_SIZE);
> +
> + if (enable_apicv) {
> + for (msr = 0x800; msr <= 0x8ff; msr++)
> + vmx_disable_intercept_msr_read_x2apic(msr);
> +
> + /* According SDM, in x2apic mode, the whole id reg is used.
> + * But in KVM, it only use the highest eight bits. Need to
> + * intercept it */
> + vmx_enable_intercept_msr_read_x2apic(0x802);
> + /* TMCCT */
> + vmx_enable_intercept_msr_read_x2apic(0x839);
> + /* TPR */
> + vmx_disable_intercept_msr_write_x2apic(0x808);
> + /* EOI */
> + vmx_disable_intercept_msr_write_x2apic(0x80b);
> + /* SELF-IPI */
> + vmx_disable_intercept_msr_write_x2apic(0x83f);
> + }
> +
> + if (enable_ept) {
> + kvm_mmu_set_mask_ptes(0ull,
> + (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
> + (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
> + 0ull, VMX_EPT_EXECUTABLE_MASK);
> + ept_set_mmio_spte_mask();
> + kvm_enable_tdp();
> + } else
> + kvm_disable_tdp();
> +
> + update_ple_window_actual_max();
> +
> + if (setup_vmcs_config(&vmcs_config) < 0) {
> + r = -EIO;
> + goto out7;
> + }
>
> if (boot_cpu_has(X86_FEATURE_NX))
> kvm_enable_efer_bits(EFER_NX);
> @@ -3169,10 +3461,38 @@ static __init int hardware_setup(void)
> nested_vmx_setup_ctls_msrs();
>
> return alloc_kvm_area();
> +
> +out7:
> + free_page((unsigned long)vmx_vmwrite_bitmap);
> +out6:
> + free_page((unsigned long)vmx_vmread_bitmap);
> +out5:
> + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
> +out4:
> + free_page((unsigned long)vmx_msr_bitmap_longmode);
> +out3:
> + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
> +out2:
> + free_page((unsigned long)vmx_msr_bitmap_legacy);
> +out1:
> + free_page((unsigned long)vmx_io_bitmap_b);
> +out:
> + free_page((unsigned long)vmx_io_bitmap_a);
> +
> + return r;
> }
>
> static __exit void hardware_unsetup(void)
> {
> + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
> + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
> + free_page((unsigned long)vmx_msr_bitmap_legacy);
> + free_page((unsigned long)vmx_msr_bitmap_longmode);
> + free_page((unsigned long)vmx_io_bitmap_b);
> + free_page((unsigned long)vmx_io_bitmap_a);
> + free_page((unsigned long)vmx_vmwrite_bitmap);
> + free_page((unsigned long)vmx_vmread_bitmap);
> +
> free_kvm_area();
> }
>
> @@ -4057,162 +4377,52 @@ static int alloc_apic_access_page(struct kvm *kvm)
> kvm->arch.apic_access_page_done = true;
> out:
> mutex_unlock(&kvm->slots_lock);
> - return r;
> -}
> -
> -static int alloc_identity_pagetable(struct kvm *kvm)
> -{
> - /* Called with kvm->slots_lock held. */
> -
> - struct kvm_userspace_memory_region kvm_userspace_mem;
> - int r = 0;
> -
> - BUG_ON(kvm->arch.ept_identity_pagetable_done);
> -
> - kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
> - kvm_userspace_mem.flags = 0;
> - kvm_userspace_mem.guest_phys_addr =
> - kvm->arch.ept_identity_map_addr;
> - kvm_userspace_mem.memory_size = PAGE_SIZE;
> - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
> -
> - return r;
> -}
> -
> -static void allocate_vpid(struct vcpu_vmx *vmx)
> -{
> - int vpid;
> -
> - vmx->vpid = 0;
> - if (!enable_vpid)
> - return;
> - spin_lock(&vmx_vpid_lock);
> - vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
> - if (vpid < VMX_NR_VPIDS) {
> - vmx->vpid = vpid;
> - __set_bit(vpid, vmx_vpid_bitmap);
> - }
> - spin_unlock(&vmx_vpid_lock);
> -}
> -
> -static void free_vpid(struct vcpu_vmx *vmx)
> -{
> - if (!enable_vpid)
> - return;
> - spin_lock(&vmx_vpid_lock);
> - if (vmx->vpid != 0)
> - __clear_bit(vmx->vpid, vmx_vpid_bitmap);
> - spin_unlock(&vmx_vpid_lock);
> -}
> -
> -#define MSR_TYPE_R 1
> -#define MSR_TYPE_W 2
> -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
> - u32 msr, int type)
> -{
> - int f = sizeof(unsigned long);
> -
> - if (!cpu_has_vmx_msr_bitmap())
> - return;
> -
> - /*
> - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
> - * have the write-low and read-high bitmap offsets the wrong way round.
> - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
> - */
> - if (msr <= 0x1fff) {
> - if (type & MSR_TYPE_R)
> - /* read-low */
> - __clear_bit(msr, msr_bitmap + 0x000 / f);
> -
> - if (type & MSR_TYPE_W)
> - /* write-low */
> - __clear_bit(msr, msr_bitmap + 0x800 / f);
> -
> - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
> - msr &= 0x1fff;
> - if (type & MSR_TYPE_R)
> - /* read-high */
> - __clear_bit(msr, msr_bitmap + 0x400 / f);
> -
> - if (type & MSR_TYPE_W)
> - /* write-high */
> - __clear_bit(msr, msr_bitmap + 0xc00 / f);
> -
> - }
> -}
> -
> -static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
> - u32 msr, int type)
> -{
> - int f = sizeof(unsigned long);
> -
> - if (!cpu_has_vmx_msr_bitmap())
> - return;
> -
> - /*
> - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
> - * have the write-low and read-high bitmap offsets the wrong way round.
> - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
> - */
> - if (msr <= 0x1fff) {
> - if (type & MSR_TYPE_R)
> - /* read-low */
> - __set_bit(msr, msr_bitmap + 0x000 / f);
> -
> - if (type & MSR_TYPE_W)
> - /* write-low */
> - __set_bit(msr, msr_bitmap + 0x800 / f);
> -
> - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
> - msr &= 0x1fff;
> - if (type & MSR_TYPE_R)
> - /* read-high */
> - __set_bit(msr, msr_bitmap + 0x400 / f);
> -
> - if (type & MSR_TYPE_W)
> - /* write-high */
> - __set_bit(msr, msr_bitmap + 0xc00 / f);
> -
> - }
> -}
> -
> -static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
> -{
> - if (!longmode_only)
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
> - msr, MSR_TYPE_R | MSR_TYPE_W);
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
> - msr, MSR_TYPE_R | MSR_TYPE_W);
> -}
> -
> -static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
> -{
> - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> - msr, MSR_TYPE_R);
> - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> - msr, MSR_TYPE_R);
> + return r;
> }
>
> -static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
> +static int alloc_identity_pagetable(struct kvm *kvm)
> {
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> - msr, MSR_TYPE_R);
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> - msr, MSR_TYPE_R);
> + /* Called with kvm->slots_lock held. */
> +
> + struct kvm_userspace_memory_region kvm_userspace_mem;
> + int r = 0;
> +
> + BUG_ON(kvm->arch.ept_identity_pagetable_done);
> +
> + kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
> + kvm_userspace_mem.flags = 0;
> + kvm_userspace_mem.guest_phys_addr =
> + kvm->arch.ept_identity_map_addr;
> + kvm_userspace_mem.memory_size = PAGE_SIZE;
> + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
> +
> + return r;
> }
>
> -static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
> +static void allocate_vpid(struct vcpu_vmx *vmx)
> {
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
> - msr, MSR_TYPE_W);
> - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
> - msr, MSR_TYPE_W);
> + int vpid;
> +
> + vmx->vpid = 0;
> + if (!enable_vpid)
> + return;
> + spin_lock(&vmx_vpid_lock);
> + vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
> + if (vpid < VMX_NR_VPIDS) {
> + vmx->vpid = vpid;
> + __set_bit(vpid, vmx_vpid_bitmap);
> + }
> + spin_unlock(&vmx_vpid_lock);
> }
>
> -static int vmx_vm_has_apicv(struct kvm *kvm)
> +static void free_vpid(struct vcpu_vmx *vmx)
> {
> - return enable_apicv && irqchip_in_kernel(kvm);
> + if (!enable_vpid)
> + return;
> + spin_lock(&vmx_vpid_lock);
> + if (vmx->vpid != 0)
> + __clear_bit(vmx->vpid, vmx_vpid_bitmap);
> + spin_unlock(&vmx_vpid_lock);
> }
>
> /*
> @@ -4376,17 +4586,6 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
> return exec_control;
> }
>
> -static void ept_set_mmio_spte_mask(void)
> -{
> - /*
> - * EPT Misconfigurations can be generated if the value of bits 2:0
> - * of an EPT paging-structure entry is 110b (write/execute).
> - * Also, magic bits (0x3ull << 62) is set to quickly identify mmio
> - * spte.
> - */
> - kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
> -}
> -
> /*
> * Sets up the vmcs for emulated real mode.
> */
> @@ -5706,76 +5905,6 @@ out:
> return ret;
> }
>
> -static int __grow_ple_window(int val)
> -{
> - if (ple_window_grow < 1)
> - return ple_window;
> -
> - val = min(val, ple_window_actual_max);
> -
> - if (ple_window_grow < ple_window)
> - val *= ple_window_grow;
> - else
> - val += ple_window_grow;
> -
> - return val;
> -}
> -
> -static int __shrink_ple_window(int val, int modifier, int minimum)
> -{
> - if (modifier < 1)
> - return ple_window;
> -
> - if (modifier < ple_window)
> - val /= modifier;
> - else
> - val -= modifier;
> -
> - return max(val, minimum);
> -}
> -
> -static void grow_ple_window(struct kvm_vcpu *vcpu)
> -{
> - struct vcpu_vmx *vmx = to_vmx(vcpu);
> - int old = vmx->ple_window;
> -
> - vmx->ple_window = __grow_ple_window(old);
> -
> - if (vmx->ple_window != old)
> - vmx->ple_window_dirty = true;
> -
> - trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
> -}
> -
> -static void shrink_ple_window(struct kvm_vcpu *vcpu)
> -{
> - struct vcpu_vmx *vmx = to_vmx(vcpu);
> - int old = vmx->ple_window;
> -
> - vmx->ple_window = __shrink_ple_window(old,
> - ple_window_shrink, ple_window);
> -
> - if (vmx->ple_window != old)
> - vmx->ple_window_dirty = true;
> -
> - trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
> -}
> -
> -/*
> - * ple_window_actual_max is computed to be one grow_ple_window() below
> - * ple_window_max. (See __grow_ple_window for the reason.)
> - * This prevents overflows, because ple_window_max is int.
> - * ple_window_max effectively rounded down to a multiple of ple_window_grow in
> - * this process.
> - * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
> - */
> -static void update_ple_window_actual_max(void)
> -{
> - ple_window_actual_max =
> - __shrink_ple_window(max(ple_window_max, ple_window),
> - ple_window_grow, INT_MIN);
> -}
> -
> /*
> * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
> * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
> @@ -9158,150 +9287,23 @@ static struct kvm_x86_ops vmx_x86_ops = {
>
> static int __init vmx_init(void)
> {
> - int r, i, msr;
> -
> - rdmsrl_safe(MSR_EFER, &host_efer);
> -
> - for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
> - kvm_define_shared_msr(i, vmx_msr_index[i]);
> -
> - vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_io_bitmap_a)
> - return -ENOMEM;
> -
> - r = -ENOMEM;
> -
> - vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_io_bitmap_b)
> - goto out;
> -
> - vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_msr_bitmap_legacy)
> - goto out1;
> -
> - vmx_msr_bitmap_legacy_x2apic =
> - (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_msr_bitmap_legacy_x2apic)
> - goto out2;
> -
> - vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_msr_bitmap_longmode)
> - goto out3;
> -
> - vmx_msr_bitmap_longmode_x2apic =
> - (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_msr_bitmap_longmode_x2apic)
> - goto out4;
> - vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_vmread_bitmap)
> - goto out5;
> -
> - vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
> - if (!vmx_vmwrite_bitmap)
> - goto out6;
> -
> - memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
> - memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
> -
> - /*
> - * Allow direct access to the PC debug port (it is often used for I/O
> - * delays, but the vmexits simply slow things down).
> - */
> - memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
> - clear_bit(0x80, vmx_io_bitmap_a);
> -
> - memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
> -
> - memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
> - memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
> -
> - set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
> + int r = -ENOMEM;
>
> r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
> __alignof__(struct vcpu_vmx), THIS_MODULE);
> if (r)
> - goto out7;
> + return r;
>
> #ifdef CONFIG_KEXEC
> rcu_assign_pointer(crash_vmclear_loaded_vmcss,
> crash_vmclear_local_loaded_vmcss);
> #endif
>
> - vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
> - vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
> - vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
> - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
> - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
> - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
> - vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
> -
> - memcpy(vmx_msr_bitmap_legacy_x2apic,
> - vmx_msr_bitmap_legacy, PAGE_SIZE);
> - memcpy(vmx_msr_bitmap_longmode_x2apic,
> - vmx_msr_bitmap_longmode, PAGE_SIZE);
> -
> - if (enable_apicv) {
> - for (msr = 0x800; msr <= 0x8ff; msr++)
> - vmx_disable_intercept_msr_read_x2apic(msr);
> -
> - /* According SDM, in x2apic mode, the whole id reg is used.
> - * But in KVM, it only use the highest eight bits. Need to
> - * intercept it */
> - vmx_enable_intercept_msr_read_x2apic(0x802);
> - /* TMCCT */
> - vmx_enable_intercept_msr_read_x2apic(0x839);
> - /* TPR */
> - vmx_disable_intercept_msr_write_x2apic(0x808);
> - /* EOI */
> - vmx_disable_intercept_msr_write_x2apic(0x80b);
> - /* SELF-IPI */
> - vmx_disable_intercept_msr_write_x2apic(0x83f);
> - }
> -
> - if (enable_ept) {
> - kvm_mmu_set_mask_ptes(0ull,
> - (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
> - (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
> - 0ull, VMX_EPT_EXECUTABLE_MASK);
> - ept_set_mmio_spte_mask();
> - kvm_enable_tdp();
> - } else
> - kvm_disable_tdp();
> -
> - update_ple_window_actual_max();
> -
> return 0;
> -
> -out7:
> - free_page((unsigned long)vmx_vmwrite_bitmap);
> -out6:
> - free_page((unsigned long)vmx_vmread_bitmap);
> -out5:
> - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
> -out4:
> - free_page((unsigned long)vmx_msr_bitmap_longmode);
> -out3:
> - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
> -out2:
> - free_page((unsigned long)vmx_msr_bitmap_legacy);
> -out1:
> - free_page((unsigned long)vmx_io_bitmap_b);
> -out:
> - free_page((unsigned long)vmx_io_bitmap_a);
> - return r;
> }
>
> static void __exit vmx_exit(void)
> {
> - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
> - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
> - free_page((unsigned long)vmx_msr_bitmap_legacy);
> - free_page((unsigned long)vmx_msr_bitmap_longmode);
> - free_page((unsigned long)vmx_io_bitmap_b);
> - free_page((unsigned long)vmx_io_bitmap_a);
> - free_page((unsigned long)vmx_vmwrite_bitmap);
> - free_page((unsigned long)vmx_vmread_bitmap);
> -
> #ifdef CONFIG_KEXEC
> RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
> synchronize_rcu();
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2014-10-24 10:48 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-10-24 9:18 [RFC][PATCH] kvm: x86: vmx: move some vmx setting from vmx_init() to hardware_setup() Tiejun Chen
2014-10-24 10:48 ` Paolo Bonzini
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.