* [PATCH v2 0/4] KVM: x86: Fixes for IA32_APIC_BASE and nVMX
@ 2014-01-24 15:48 Jan Kiszka
2014-01-24 15:48 ` [PATCH v2 1/4] KVM: x86: Validate guest writes to MSR_IA32_APICBASE Jan Kiszka
` (3 more replies)
0 siblings, 4 replies; 7+ messages in thread
From: Jan Kiszka @ 2014-01-24 15:48 UTC (permalink / raw)
To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm
This is the yet unmerged part of the previous round. Changes since v1:
- rebase over next
- switched APIC_BASE reserved bits check to guest's number of physical
bits
- addressed small review comment on "Rework interception of IRQs and
NMIs"
- added fix for improper EXCEPTION_NMI vmexit injection with valid IDT
vectoring info
Paolo, did you already look into nested event handling for SVM? I assume
you will want to (re-)base it on top of this.
Jan
Jan Kiszka (4):
KVM: x86: Validate guest writes to MSR_IA32_APICBASE
KVM: nVMX: Rework interception of IRQs and NMIs
KVM: nVMX: Fully emulate preemption timer
KVM: nVMX: Do not inject NMI vmexits when L2 has a pending interrupt
arch/x86/include/asm/kvm_host.h | 2 +
arch/x86/kvm/cpuid.h | 16 +++
arch/x86/kvm/lapic.h | 2 +-
arch/x86/kvm/vmx.c | 228 ++++++++++++++++++++++++----------------
arch/x86/kvm/x86.c | 47 +++++++--
5 files changed, 197 insertions(+), 98 deletions(-)
--
1.8.1.1.298.ge7eed54
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v2 1/4] KVM: x86: Validate guest writes to MSR_IA32_APICBASE
2014-01-24 15:48 [PATCH v2 0/4] KVM: x86: Fixes for IA32_APIC_BASE and nVMX Jan Kiszka
@ 2014-01-24 15:48 ` Jan Kiszka
2014-01-24 15:59 ` Paolo Bonzini
2014-01-24 16:01 ` Paolo Bonzini
2014-01-24 15:48 ` [PATCH v2 2/4] KVM: nVMX: Rework interception of IRQs and NMIs Jan Kiszka
` (2 subsequent siblings)
3 siblings, 2 replies; 7+ messages in thread
From: Jan Kiszka @ 2014-01-24 15:48 UTC (permalink / raw)
To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm
Check for invalid state transitions on guest-initiated updates of
MSR_IA32_APICBASE. This address both enabling of the x2APIC when it is
not supported and all invalid transitions as described in SDM section
10.12.5. It also checks that no reserved bit is set in APICBASE by the
guest.
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
arch/x86/kvm/cpuid.h | 16 ++++++++++++++++
arch/x86/kvm/lapic.h | 2 +-
arch/x86/kvm/vmx.c | 9 +++++----
arch/x86/kvm/x86.c | 32 +++++++++++++++++++++++++-------
4 files changed, 47 insertions(+), 12 deletions(-)
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index f1e4895..b012ad2 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -72,4 +72,20 @@ static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu)
return best && (best->ecx & bit(X86_FEATURE_PCID));
}
+static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 1, 0);
+ return best && (best->ecx & bit(X86_FEATURE_X2APIC));
+}
+
+static inline unsigned int guest_cpuid_get_phys_bits(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+ return best ? best->eax & 0xff : 36;
+}
+
#endif
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index c8b0d0d..6a11845 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -65,7 +65,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
-void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
+int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5c88791..a06f101 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4392,7 +4392,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u64 msr;
+ struct msr_data apic_base_msr;
vmx->rmode.vm86_active = 0;
@@ -4400,10 +4400,11 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
kvm_set_cr8(&vmx->vcpu, 0);
- msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+ apic_base_msr.data = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
if (kvm_vcpu_is_bsp(&vmx->vcpu))
- msr |= MSR_IA32_APICBASE_BSP;
- kvm_set_apic_base(&vmx->vcpu, msr);
+ apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
+ apic_base_msr.host_initiated = true;
+ kvm_set_apic_base(&vmx->vcpu, &apic_base_msr);
vmx_segment_cache_clear(vmx);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0c76f7c..f4b0591 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -257,10 +257,26 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_get_apic_base);
-void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
-{
- /* TODO: reserve bits check */
- kvm_lapic_set_base(vcpu, data);
+int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+ u64 old_state = vcpu->arch.apic_base &
+ (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
+ u64 new_state = msr_info->data &
+ (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
+ u64 reserved_bits = ((~0ULL) << guest_cpuid_get_phys_bits(vcpu)) |
+ 0x2ff | (guest_cpuid_has_x2apic(vcpu) ? 0 : X2APIC_ENABLE);
+
+ if (!msr_info->host_initiated &&
+ ((msr_info->data & reserved_bits) != 0 ||
+ new_state == X2APIC_ENABLE ||
+ (new_state == MSR_IA32_APICBASE_ENABLE &&
+ old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) ||
+ (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) &&
+ old_state == 0)))
+ return 1;
+
+ kvm_lapic_set_base(vcpu, msr_info->data);
+ return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_apic_base);
@@ -2006,8 +2022,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case 0x200 ... 0x2ff:
return set_msr_mtrr(vcpu, msr, data);
case MSR_IA32_APICBASE:
- kvm_set_apic_base(vcpu, data);
- break;
+ return kvm_set_apic_base(vcpu, msr_info);
case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
return kvm_x2apic_msr_write(vcpu, msr, data);
case MSR_IA32_TSCDEADLINE:
@@ -6409,6 +6424,7 @@ EXPORT_SYMBOL_GPL(kvm_task_switch);
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
+ struct msr_data apic_base_msr;
int mmu_reset_needed = 0;
int pending_vec, max_bits, idx;
struct desc_ptr dt;
@@ -6432,7 +6448,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
kvm_x86_ops->set_efer(vcpu, sregs->efer);
- kvm_set_apic_base(vcpu, sregs->apic_base);
+ apic_base_msr.data = sregs->apic_base;
+ apic_base_msr.host_initiated = true;
+ kvm_set_apic_base(vcpu, &apic_base_msr);
mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
--
1.8.1.1.298.ge7eed54
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v2 2/4] KVM: nVMX: Rework interception of IRQs and NMIs
2014-01-24 15:48 [PATCH v2 0/4] KVM: x86: Fixes for IA32_APIC_BASE and nVMX Jan Kiszka
2014-01-24 15:48 ` [PATCH v2 1/4] KVM: x86: Validate guest writes to MSR_IA32_APICBASE Jan Kiszka
@ 2014-01-24 15:48 ` Jan Kiszka
2014-01-24 15:48 ` [PATCH v2 3/4] KVM: nVMX: Fully emulate preemption timer Jan Kiszka
2014-01-24 15:48 ` [PATCH v2 4/4] KVM: nVMX: Do not inject NMI vmexits when L2 has a pending interrupt Jan Kiszka
3 siblings, 0 replies; 7+ messages in thread
From: Jan Kiszka @ 2014-01-24 15:48 UTC (permalink / raw)
To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm
Move the check for leaving L2 on pending and intercepted IRQs or NMIs
from the *_allowed handler into a dedicated callback. Invoke this
callback at the relevant points before KVM checks if IRQs/NMIs can be
injected. The callback has the task to switch from L2 to L1 if needed
and inject the proper vmexit events.
The rework fixes L2 wakeups from HLT and provides the foundation for
preemption timer emulation.
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/kvm/vmx.c | 67 +++++++++++++++++++++++------------------
arch/x86/kvm/x86.c | 15 +++++++--
3 files changed, 53 insertions(+), 31 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fdf83af..8d6cc7a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -765,6 +765,8 @@ struct kvm_x86_ops {
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
void (*handle_external_intr)(struct kvm_vcpu *vcpu);
+
+ int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
};
struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a06f101..089aa3c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4620,22 +4620,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(vcpu)) {
- if (to_vmx(vcpu)->nested.nested_run_pending)
- return 0;
- if (nested_exit_on_nmi(vcpu)) {
- nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
- NMI_VECTOR | INTR_TYPE_NMI_INTR |
- INTR_INFO_VALID_MASK, 0);
- /*
- * The NMI-triggered VM exit counts as injection:
- * clear this one and block further NMIs.
- */
- vcpu->arch.nmi_pending = 0;
- vmx_set_nmi_mask(vcpu, true);
- return 0;
- }
- }
+ if (to_vmx(vcpu)->nested.nested_run_pending)
+ return 0;
if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
return 0;
@@ -4647,19 +4633,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(vcpu)) {
- if (to_vmx(vcpu)->nested.nested_run_pending)
- return 0;
- if (nested_exit_on_intr(vcpu)) {
- nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
- 0, 0);
- /*
- * fall through to normal code, but now in L1, not L2
- */
- }
- }
-
- return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+ return (!to_vmx(vcpu)->nested.nested_run_pending &&
+ vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
}
@@ -8155,6 +8130,35 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
}
}
+static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
+ if (vmx->nested.nested_run_pending)
+ return -EBUSY;
+ nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
+ NMI_VECTOR | INTR_TYPE_NMI_INTR |
+ INTR_INFO_VALID_MASK, 0);
+ /*
+ * The NMI-triggered VM exit counts as injection:
+ * clear this one and block further NMIs.
+ */
+ vcpu->arch.nmi_pending = 0;
+ vmx_set_nmi_mask(vcpu, true);
+ return 0;
+ }
+
+ if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
+ nested_exit_on_intr(vcpu)) {
+ if (vmx->nested.nested_run_pending)
+ return -EBUSY;
+ nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
+ }
+
+ return 0;
+}
+
/*
* prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
* and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
@@ -8495,6 +8499,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
nested_vmx_succeed(vcpu);
if (enable_shadow_vmcs)
vmx->nested.sync_shadow_vmcs = true;
+
+ /* in case we halted in L2 */
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
}
/*
@@ -8634,6 +8641,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.check_intercept = vmx_check_intercept,
.handle_external_intr = vmx_handle_external_intr,
+
+ .check_nested_events = vmx_check_nested_events,
};
static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f4b0591..fdcdc6d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5803,6 +5803,9 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
return;
}
+ if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
+ kvm_x86_ops->check_nested_events(vcpu, false);
+
/* try to inject new event if pending */
if (vcpu->arch.nmi_pending) {
if (kvm_x86_ops->nmi_allowed(vcpu)) {
@@ -5923,12 +5926,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
inject_pending_event(vcpu);
+ if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
+ req_immediate_exit |=
+ kvm_x86_ops->check_nested_events(vcpu,
+ req_int_win) != 0;
+
/* enable NMI/IRQ window open exits if needed */
if (vcpu->arch.nmi_pending)
- req_immediate_exit =
+ req_immediate_exit |=
kvm_x86_ops->enable_nmi_window(vcpu) != 0;
else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
- req_immediate_exit =
+ req_immediate_exit |=
kvm_x86_ops->enable_irq_window(vcpu) != 0;
if (kvm_lapic_enabled(vcpu)) {
@@ -7245,6 +7253,9 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
+ if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
+ kvm_x86_ops->check_nested_events(vcpu, false);
+
return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
!vcpu->arch.apf.halted)
|| !list_empty_careful(&vcpu->async_pf.done)
--
1.8.1.1.298.ge7eed54
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v2 3/4] KVM: nVMX: Fully emulate preemption timer
2014-01-24 15:48 [PATCH v2 0/4] KVM: x86: Fixes for IA32_APIC_BASE and nVMX Jan Kiszka
2014-01-24 15:48 ` [PATCH v2 1/4] KVM: x86: Validate guest writes to MSR_IA32_APICBASE Jan Kiszka
2014-01-24 15:48 ` [PATCH v2 2/4] KVM: nVMX: Rework interception of IRQs and NMIs Jan Kiszka
@ 2014-01-24 15:48 ` Jan Kiszka
2014-01-24 15:48 ` [PATCH v2 4/4] KVM: nVMX: Do not inject NMI vmexits when L2 has a pending interrupt Jan Kiszka
3 siblings, 0 replies; 7+ messages in thread
From: Jan Kiszka @ 2014-01-24 15:48 UTC (permalink / raw)
To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm
We cannot rely on the hardware-provided preemption timer support because
we are holding L2 in HLT outside non-root mode. Furthermore, emulating
the preemption will resolve tick rate errata on older Intel CPUs.
The emulation is based on hrtimer which is started on L2 entry, stopped
on L2 exit and evaluated via the new check_nested_events hook. As we no
longer rely on hardware features, we can enable both the preemption
timer support and value saving unconditionally.
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
arch/x86/kvm/vmx.c | 151 ++++++++++++++++++++++++++++++++++-------------------
1 file changed, 96 insertions(+), 55 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 089aa3c..081a15c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -31,6 +31,7 @@
#include <linux/ftrace_event.h>
#include <linux/slab.h>
#include <linux/tboot.h>
+#include <linux/hrtimer.h>
#include "kvm_cache_regs.h"
#include "x86.h"
@@ -110,6 +111,8 @@ module_param(nested, bool, S_IRUGO);
#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
+#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
+
/*
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
* ple_gap: upper bound on the amount of time between two successive
@@ -374,6 +377,9 @@ struct nested_vmx {
*/
struct page *apic_access_page;
u64 msr_ia32_feature_control;
+
+ struct hrtimer preemption_timer;
+ bool preemption_timer_expired;
};
#define POSTED_INTR_ON 0
@@ -1047,6 +1053,12 @@ static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
}
+static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
+{
+ return vmcs12->pin_based_vm_exec_control &
+ PIN_BASED_VMX_PREEMPTION_TIMER;
+}
+
static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
{
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
@@ -2248,9 +2260,9 @@ static __init void nested_vmx_setup_ctls_msrs(void)
*/
nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK |
- PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS |
+ PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS;
+ nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
PIN_BASED_VMX_PREEMPTION_TIMER;
- nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
/*
* Exit controls
@@ -2265,15 +2277,10 @@ static __init void nested_vmx_setup_ctls_msrs(void)
#ifdef CONFIG_X86_64
VM_EXIT_HOST_ADDR_SPACE_SIZE |
#endif
- VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
+ VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
+ nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
+ VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
- if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) ||
- !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
- nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
- nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
- }
- nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
- VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
/* entry controls */
rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2342,9 +2349,9 @@ static __init void nested_vmx_setup_ctls_msrs(void)
/* miscellaneous data */
rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
- nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK |
- VMX_MISC_SAVE_EFER_LMA;
- nested_vmx_misc_low |= VMX_MISC_ACTIVITY_HLT;
+ nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA;
+ nested_vmx_misc_low |= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
+ VMX_MISC_ACTIVITY_HLT;
nested_vmx_misc_high = 0;
}
@@ -5702,6 +5709,18 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
*/
}
+static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
+{
+ struct vcpu_vmx *vmx =
+ container_of(timer, struct vcpu_vmx, nested.preemption_timer);
+
+ vmx->nested.preemption_timer_expired = true;
+ kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
+ kvm_vcpu_kick(&vmx->vcpu);
+
+ return HRTIMER_NORESTART;
+}
+
/*
* Emulate the VMXON instruction.
* Currently, we just remember that VMX is active, and do not save or even
@@ -5766,6 +5785,10 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
vmx->nested.vmcs02_num = 0;
+ hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
+ vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
+
vmx->nested.vmxon = true;
skip_emulated_instruction(vcpu);
@@ -6742,9 +6765,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
* table is L0's fault.
*/
return 0;
- case EXIT_REASON_PREEMPTION_TIMER:
- return vmcs12->pin_based_vm_exec_control &
- PIN_BASED_VMX_PREEMPTION_TIMER;
case EXIT_REASON_WBINVD:
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
case EXIT_REASON_XSETBV:
@@ -6760,27 +6780,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
}
-static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
-{
- u64 delta_tsc_l1;
- u32 preempt_val_l1, preempt_val_l2, preempt_scale;
-
- if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control &
- PIN_BASED_VMX_PREEMPTION_TIMER))
- return;
- preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
- MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
- preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
- delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc())
- - vcpu->arch.last_guest_tsc;
- preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
- if (preempt_val_l2 <= preempt_val_l1)
- preempt_val_l2 = 0;
- else
- preempt_val_l2 -= preempt_val_l1;
- vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
-}
-
/*
* The guest has exited. See if we can fix it or if we need userspace
* assistance.
@@ -7193,8 +7192,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
atomic_switch_perf_msrs(vmx);
debugctlmsr = get_debugctlmsr();
- if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending)
- nested_adjust_preemption_timer(vcpu);
vmx->__launched = vmx->loaded_vmcs->launched;
asm(
/* Store host registers */
@@ -7591,6 +7588,28 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
kvm_inject_page_fault(vcpu, fault);
}
+static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
+{
+ u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (vcpu->arch.virtual_tsc_khz == 0)
+ return;
+
+ /* Make sure short timeouts reliably trigger an immediate vmexit.
+ * hrtimer_start does not guarantee this. */
+ if (preemption_timeout <= 1) {
+ vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
+ return;
+ }
+
+ preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
+ preemption_timeout *= 1000000;
+ do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
+ hrtimer_start(&vmx->nested.preemption_timer,
+ ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
+}
+
/*
* prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
* L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@ -7604,7 +7623,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exec_control;
- u32 exit_control;
vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -7662,13 +7680,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs_write64(VMCS_LINK_POINTER, -1ull);
- vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
- (vmcs_config.pin_based_exec_ctrl |
- vmcs12->pin_based_vm_exec_control));
+ exec_control = vmcs12->pin_based_vm_exec_control;
+ exec_control |= vmcs_config.pin_based_exec_ctrl;
+ exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+ vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
- if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
- vmcs_write32(VMX_PREEMPTION_TIMER_VALUE,
- vmcs12->vmx_preemption_timer_value);
+ vmx->nested.preemption_timer_expired = false;
+ if (nested_cpu_has_preemption_timer(vmcs12))
+ vmx_start_preemption_timer(vcpu);
/*
* Whether page-faults are trapped is determined by a combination of
@@ -7696,7 +7715,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
enable_ept ? vmcs12->page_fault_error_code_match : 0);
if (cpu_has_secondary_exec_ctrls()) {
- u32 exec_control = vmx_secondary_exec_control(vmx);
+ exec_control = vmx_secondary_exec_control(vmx);
if (!vmx->rdtscp_enabled)
exec_control &= ~SECONDARY_EXEC_RDTSCP;
/* Take the following fields only from vmcs12 */
@@ -7783,10 +7802,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
* we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
* bits are further modified by vmx_set_efer() below.
*/
- exit_control = vmcs_config.vmexit_ctrl;
- if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
- exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
- vm_exit_controls_init(vmx, exit_control);
+ vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
* emulated by vmx_set_efer(), below.
@@ -8134,6 +8150,14 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
+ vmx->nested.preemption_timer_expired) {
+ if (vmx->nested.nested_run_pending)
+ return -EBUSY;
+ nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
+ return 0;
+ }
+
if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
if (vmx->nested.nested_run_pending)
return -EBUSY;
@@ -8159,6 +8183,20 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
return 0;
}
+static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
+{
+ ktime_t remaining =
+ hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
+ u64 value;
+
+ if (ktime_to_ns(remaining) <= 0)
+ return 0;
+
+ value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
+ do_div(value, 1000000);
+ return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
+}
+
/*
* prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
* and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
@@ -8229,10 +8267,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
else
vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
- if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) &&
- (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
- vmcs12->vmx_preemption_timer_value =
- vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
+ if (nested_cpu_has_preemption_timer(vmcs12)) {
+ if (vmcs12->vm_exit_controls &
+ VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
+ vmcs12->vmx_preemption_timer_value =
+ vmx_get_preemption_timer_value(vcpu);
+ hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
+ }
/*
* In some cases (usually, nested EPT), L2 is allowed to change its
--
1.8.1.1.298.ge7eed54
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v2 4/4] KVM: nVMX: Do not inject NMI vmexits when L2 has a pending interrupt
2014-01-24 15:48 [PATCH v2 0/4] KVM: x86: Fixes for IA32_APIC_BASE and nVMX Jan Kiszka
` (2 preceding siblings ...)
2014-01-24 15:48 ` [PATCH v2 3/4] KVM: nVMX: Fully emulate preemption timer Jan Kiszka
@ 2014-01-24 15:48 ` Jan Kiszka
3 siblings, 0 replies; 7+ messages in thread
From: Jan Kiszka @ 2014-01-24 15:48 UTC (permalink / raw)
To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm
According to SDM 27.2.3, IDT vectoring information will not be valid on
vmexits caused by external NMIs. So we have to avoid creating such
scenarios by delaying EXIT_REASON_EXCEPTION_NMI injection as long as we
have a pending interrupt because that one would be migrated to L1's IDT
vectoring info on nested exit.
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
arch/x86/kvm/vmx.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 081a15c..7ed0ecc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8159,7 +8159,8 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
}
if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
- if (vmx->nested.nested_run_pending)
+ if (vmx->nested.nested_run_pending ||
+ vcpu->arch.interrupt.pending)
return -EBUSY;
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
NMI_VECTOR | INTR_TYPE_NMI_INTR |
--
1.8.1.1.298.ge7eed54
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH v2 1/4] KVM: x86: Validate guest writes to MSR_IA32_APICBASE
2014-01-24 15:48 ` [PATCH v2 1/4] KVM: x86: Validate guest writes to MSR_IA32_APICBASE Jan Kiszka
@ 2014-01-24 15:59 ` Paolo Bonzini
2014-01-24 16:01 ` Paolo Bonzini
1 sibling, 0 replies; 7+ messages in thread
From: Paolo Bonzini @ 2014-01-24 15:59 UTC (permalink / raw)
To: Jan Kiszka, Gleb Natapov, Marcelo Tosatti; +Cc: kvm
-----BEGIN PGP MESSAGE-----
Charset: ISO-8859-15
Version: GnuPG v2.0.22 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/
hQIMA9Qj09KLn/EdARAAlKm0iclhSEbbzfzFlbJoqImUMiS+6Z/Ir1Lb1Z6xiwWp
3GXiVyMl8KXs0XzqaJF8lETKcurOFmCEdmaUDQT7iQa9k+KoXi4o2qR1K+HthiUK
4hMaHsBP94hsZjQBrTK6nWKVyChA849FG5IVudmp1HP4npWSZmVFvxCPUEdgvF68
VhG0WTN9u1IAPhpVww7EfR24WgVwWQbxSzzJfa9PKrtkKIB+oYK9wddLAPrFjW3t
1KPhkPwW98F/38eYwzHWoeaSPlLc+m84AAtkJNp1eurJCtXAGXPUf8XzU5NmJ5dp
qOkt0s6GM5nm/krzsjCOcikca7JugC1u9Zyr9H3VV8mBkmOFLXBs9a3PbA7ZYtob
qEAa1gbE44BnkMiOfrLgGf5Kgfg6ABcwQy1AHQi+LPPCrQBztAGmGyj8ZNQc6zYa
qo5gtK+M90BBg/tVmrCe0KwsY2IBf1vxnh7YM/m/yayDF0VGWnAmK2FlHkHscDH9
PYcMI7SFFn9S4Y8zcvZPv7qG8i7jb8AaRclILGduj1KveFQEOrAdX/AW+r4W5tC+
UIxgFJLUlIciKQcbzEkLKhtZDtoXL026h82OhEwitJgOpw2BCs1owthVl0MrffCK
ZlntWSDeOj3PhUpV8siOz3ACKiY5DkKM60az3N9ktNASdmJ0mv2kWIe+4Qtu+BqF
Ag4DKb7ACrQJWQ0QB/9yyyYc+W+vymJ6pPsIL9SPCO1f/gPT/7r1ulYDVt6flff1
FrGkOFVvMMw8ftVgakBDdKt4y3EOqX930/HaYcUu5ZpfvfEW8qZkJSSckrqy3UYB
StipXwcXZ+14lCGesibSzBAJW4Egv9AueKLMYHA+qaCdN02qrHoBm9Upb67ioKvX
QhvJEGPdU5yEMMy5fKy+BJGJqU+7o4OKnvOQHzMqLrxxzy54C4sBvp5zNgA6KQXH
B65CIjtvkfpeB1l/uwOUgziz7J58r58al3UqLqz5P30kBF99KABl06P7jKnAOAR8
qYLjKcRRGbIGZXCnK/xPHrs5iVlJmAUEnNjg2qbDB/9QOyI01DpOI2hoOTmQV27E
fePWW6L415LEJ0iAm8Snmdi8lp66/bSAl4dhDoAz6wd3Vnq7LaCHsGn1Vwe+kcah
hFJL3Mhvi3iag5+PhbYHGbvAQ/Xzm+PTpP7GyElgljKB/1ILk6uvGsDDH9uxCLiK
tNq2MSIZAwtckcdXyEm5YL4+v+YNNx+Dwnct6WoYP8CnuFhz2b70BLNw5B0/QZMf
GWou3nwNt6RdexwCgm4bsJJElQJAAR7u29fFlAcVmXHqNMvsebTUZnIwpqjWkgOc
yrplYw+vLagiqCH8gsyQeFqWsWtZBLBJYkPXOamyD9UpHZwkhi/TqTvIdVUBz985
hQIMA1uMoGen9Cu3AQ/9HjsJgJQHJVCCHG3lCCGFdrTRGiiifRb08ik0ZEU9OKFX
tB0Ka8LdrJ4gwY9YnnQKWel11BoSgToAc1cil0NJoCSY05/AK8LhnJnaGbZn8CsE
PK6hCiFDXucpvO9EVF/dTF9Xqi5HMDhKRd8K1BRDo+ZSj6K1j3W5INMPPTm5nL7P
RPNnaMuf4e0YCK2buuAKEagnJKQL4uC/Xi6lEUq680g19TL+vXubWLLEo0Ny5Iq6
oa5ZVCII9TsJl9OMJpYmo77zByBJ482nobX2X6+N9IQFv4VuCsJ1KKZC3fLykvQ+
aMT/t2+MY3ovFM+ra2cHyxWQjkLvcTZPe/o2rG5QAPuGtf+2X8T9Mph8mqBhT+NK
/mr78zp+1JsgLM/DYWO0Zg+GRx3Sk2P+8qsJe2+tUUDl7vcqDvF54dNJ19AL9Xwr
5sWME/C4FhAbLtgMQv2SbdPx2CUBk6dBJQRPvcnmTr25TiGAO1yRhv4cF1S/9Hnk
6ace9PZBTKcivc1KKqtTGDM5zSEOk5DZLv2FncMITFABsk+v7eG7eQ2YG3v5Tiq6
gs+4cJyq+fYwKES2DiT54iRIlHsIB9T53ng+z6fx/wThIu9wcqdGRgeZvicViyZ6
GoTzU6HV6jgm4n1xc91tRzFCFgOu6oCSOUS1k1wJ9NhFWN74TiKvratXZFDVQNuF
AgwDyn8FWYRSbREBEACnZNEkJqKMKuY3Mlbh4Pck6DM6CTinaoy0Dv+DkXI5riPQ
kzXXv+M2LOlaJ3A5Sh/XDuB9CyUJObeHK12tLmWYg27WY59EAsEd0Vgu4zpaIKvK
2zREEq8PFpchi2hMfIiReSHrlw1a459mbeC0z6Gd44hBoOUFsFVCNt8khQAP0TKT
m4BGdNkxF9Cl3VUqqP7LdyT6Xo3g12tmsGKYO3rs3Tm8QoiUsfYTzSDHPN9iupLZ
wVsdzydgB8vQ1GXL1JNoFZXLXbYAraRs2wgwvGo6auLhE/zwURHP6ky7QmmCEEjb
UvWbHg5xiZga/Gx5Y+Ghtzb5yMHBn9HrZa9wxjuk6SqrIBMCWzhJP9t38AhiomiG
d4L3sQsmRKlH+YsC+TVdCVVGKo7d+Zox/pM25/lo2PkbDGQv6A5JfxT7K+O/cK6K
G2HtkIhuVQoLwdGu5quMrty6uIVfvtrMgnNFxOwsC/qwfPj50+6sDtNYyxvl4+d9
7S2hBSieCJPohQbzdgdDyNvbj9D/913Evq5M/lPdUMvue/Oq2RGWF4ZzKfr+f6k6
79rhvm2cO5fGzgF6AVNjhzWbTfvdZfMQo7A6Cew/y70ukwwoX0vkYYbg4pWbrQ1t
EyjdU6sSivEH60Jkev1DpkCVZpDizXv+pmDY+g5sxIhHbL0a3XNnyDi+ni1uidLr
Aaq+G4yujcM50b8cjUjYSnd+1ISz22qoFK/V2dpaRzi1QKVkcHI95yoTWG35T3fb
SabLhZ1yzExBTt1uZ73P7qHuJN8MvBbvjo8mEvpBFyGb555fdBlPlbZ8FdtJodBv
OsJhHbc0/bUYbflVkGlbygoG2Ykk7DPwL7hCQmwSBLzhIyGp0Ak9lgqASByzWfDP
num1/3zWQVq7dWMFEEzfnmMaWsWzBEK2h5dLP5RDNYjIUYVzA0EZJPNCZSO2Tin+
77OQxaZw00yrghK8+NTutV1xzkSzw42cykUkqx2/FHmEPwthsuVb6rT7eu3xTqWU
jg6cjsdjQkMy5S7WKCeGjQiY1629qzkVjuD8QHOgcYKutRB7mzXUfW9ABUJyQ9mS
4hsdS0tteH4ang8YVoli2pB9WGxQ437AaJ5nWya1Xkr/ztYNb8uQEpgjoaUPc6ca
fdu4bZuuGTqAz+IokgBn8mZR2yPAuAmypqsGmriQ0Y5G10OIuX/ULfvd5DZD8ijM
l0qMQUKaQhIHIInu/rv0SMD2WGECJmAxhXQWYwe0YCTDO/Xyet91P8xpwq+U87v+
jxx+copEuFHTvpA4IdMNIS1xDuvIQdXVHdQuCUgagHgpQZ6ZT0RrRFb7QTCZuR80
dR3fDp2YraZQufvLEnAxvti1uXzgnzhFbpjDlzOXgzwYXBH56XRhWSg8ok/dwI+x
qIOuzIi5uy1h11STLfiS5fOLSI2oadl5jrR4H65YOY1CVXLjOoNUJVLZ6lY9GDER
9Mtae0sgXum4mFvOiZCDCDpDokIM/6nNGbuQruzVgUlbF4uL0/+sWpXrEpFFN4t9
xYVbT97HyGj7myRkTpIhtr/2xdMEeK9Sjd5QPzIzk11QuEPMQfMGYP3sGOjc5iY8
e/VheXlHgKGPeDsj1y6PvST4SXLxm/MgppO6yx342g2GVOBWDd7mjKnoT5b+n//V
8bircdvcZq18lAZsLFKpSxqtHgdAHfCDTCL+ZLdiaOl1Pa6ZQZdUgOA5Om9V38HF
gfGoQStYUUC6cxPPSuTXl4294ZK8IZ2F2Jg2fmzyGacNETfNoA2y+/u2FyeUO1zv
jZcpiOnnR/ciZ0vGAQzPtqdkTAiFhfCgFlVfBajDWDdRGKkOxliqXrI+5KTlEID7
yxAQZ3f7wDN2OKFZQcXGErPqs71D8yGPVJh6qhRlrV/v4thRynZx8tzyiXKtsPSC
JWdRszPzL9PyyBtTQgwC/PzU35DjJqgUdZ3XxHcjpgRBLbwIu9xFAy4W5/cALAwF
HlMM79cxHsHKwJ3aeql8Rm5ZeP6Rp2P1F918F/4LyiZyNEd3iIO0GUsVicWbSfBz
YFnWxjdVR16A2enr/3DT0+yT5drep9huvV8gaLoPxi5XmLpb2tQebBeG4cvV04VP
iaLjdcStoJPYSMx/A3s5JWmc0k/60WjacHmNnAVwE4uUyfwGeZdJqTWGORth1PU4
5vKr76tuw5Vd9UUQhF936gucEL+O+NkxywR+iBl8oKEecyXgHjzH2OFKuDsnBy/C
y3uQ0qRsj8JXu2O1N0Cdb5REFH+ZFTZa3qlSpqsjNMqdm9msYICaGIBHHQ12QIQw
3wa8FikGOmAhLCIZscUoVAt5mE7114aNpsRpxfGY+0aPjAFmK822LmCuZ/r0hmAe
Ma2nd3F3BgWXJsCxEaaf2tcmF1OM0uDe8MKvBS0sX6La+z7CJ+4WcA6iWAC8C18h
LnVtiM2SI8BmkrPNyYI5QakBOFaUO+7MifjAcobvLxTkHr2lcRV8bTOyDQM4H2CV
oYLbU5e+banLW7LD/icfwiKM1IrZ6azzM9sKLdQWq86tTpmrotZf7xdAJfPNZ46Y
fnZhJCd9zTKtp3Hv4UFupj8z3/hKPDAT2HXN+VSQ4nlqBZcNdXNoSr3hJ03qhc2B
WfYwIOnha2m669rBn8JvM7XMfiTeLkAQxTycVXLikBVjhaFVXTwBR4BuvaxxLqbI
LDntx67ByKoYyTUf8Ga0m6m342HwZqhpy98PFjq3c5Vv/zE6+HLsAFEEKYN0kJB4
EG/29ZAVbAQ35EKWq6m4IQ9hosvgf9wCtOo5ymmP+AFfZuYOPY5IW0HS99WkgReF
HotTHl8/dGWg4R44WJtnwWl9WKlc4EjB8JOMb1O5KkdFc5SXpcLcltHSjfffZiGw
/wLKMutyg8NgcZzqZwxQzI+4TLjjUezCEx3B//exlHFPwV3+QJCtQq3DgsJ/NXax
L2kwwX9NrPNZQgix6tCJijM6Qp3+VKDowJdLpJBWuMCbC9Tg8Mb6zwh6t70yJqEb
ikO2W/K35TKZ8Vx3CKXIr5avtv0PrChect8zk/3hKy4sBm71UJffiV/xxsgPh+7T
Cn0O3cO0PhEJyIvLnrghHqUG54YrSYTKCoWyeL5ry/WBEHF5zha+8unMaOiY2/u7
ttisAweKJ0tBxQfk68VA1UuesToCGNqAd+oJj51kfgF3Yb1qTfTeWumWgv0d57oh
/jUaKFcDqPfb0mnCBrwwDEk5eKDa53R4AhvhWkTNszMnGoW4Yjf2f5KbiUtZoW/Z
lVoR+3xnPJEQcjK0rNDszZDyW18rVpO7mAo13nKjzMJCwmgSoh1IL0GMGZmrTlNr
tDMw2fXAVCbmaew8vvikgwgr70t/fkJQeEuMF8vduARfLJizxfJ0iHVXurRQfjeZ
qqZeApCfi/QXw4NjINhhu7gVp2IVBPKuLCHcf6wDxRLpCZcBjPZoSBlqG3+6ysvC
qdsL8iV5AbOQyN0AA7mvGCu/Cbj22pwK/oD/vMyt48FOq35rxP/vb6JqblnW/sLj
ZblRNetWLsTIVSI9Gepq4jL6Yhv8iM0QaP9fax8nOckE8hoNKCTdNgwbiO4o5vtI
KZS0JP7aZyzZYpFFnOxGmD9THUYXUoClzr6KCwWARqDdpSTsHzPpPcrIfEhwm3dS
yZba/dWB/n195G8Ct6nRpeWTdQtVM0nNepEu/X6gsl6waFNiBhASy8v9fbmSBCim
7zXpU+iGNqUyjc/rJzhJixHoDeMl2SExO48R96u9Tr0glbdRW5Fwpq/uXwY5MOFg
AU1noQi+1C9G93yLk+g31OwLPRRFtliyK5WST2mo+9BFNQZSPJuqhjDPYYH0U7YA
X9hUHOqTA66ilgj+kJ2iEljG+DpEy+1x3GKlg+LW+YN9qgEk7ubmi5nZdk07dYjd
jTXfhGJZ9R1B/GjR1IkuVyBqwjuPDOYHBu5wtsA9+S/jEiUsEHWrUsXCuSEwC9JD
e5wBNjMpXc8r5SyviJuMwbS1UX4fMEn8bhTiL08+KwdWk5EQpqkgVqNopbXp99f4
kbKE9gg4n5zWAWSYlrIPKv/sJGgdFsXNr0wp7VWfBkV7XCCUfuNxnlqCLX/R1Tg9
LhnqYkaCuaTUI8QhqNkMdCqHiv2c4Pbi4jTyfJeiUdO19qKxTa1hzM2EJPaL+70T
Zp2ZUQGJkIgv/kh3C8+medZO7XyW86BR2FQ6ToorVKaF1NIa6tEC5Vf0KhcdhQm1
Pq9NTUOvux0pSKAuYnH0Zg30tODpN70oiEzIusFTrazSqT5CdAZnKMGSywMh/Tbn
fXe0vQUvuEXs
=POu1
-----END PGP MESSAGE-----
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v2 1/4] KVM: x86: Validate guest writes to MSR_IA32_APICBASE
2014-01-24 15:48 ` [PATCH v2 1/4] KVM: x86: Validate guest writes to MSR_IA32_APICBASE Jan Kiszka
2014-01-24 15:59 ` Paolo Bonzini
@ 2014-01-24 16:01 ` Paolo Bonzini
1 sibling, 0 replies; 7+ messages in thread
From: Paolo Bonzini @ 2014-01-24 16:01 UTC (permalink / raw)
To: Jan Kiszka, Gleb Natapov, Marcelo Tosatti; +Cc: kvm
Il 24/01/2014 16:48, Jan Kiszka ha scritto:
> Check for invalid state transitions on guest-initiated updates of
> MSR_IA32_APICBASE. This address both enabling of the x2APIC when it is
> not supported and all invalid transitions as described in SDM section
> 10.12.5. It also checks that no reserved bit is set in APICBASE by the
> guest.
>
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
> arch/x86/kvm/cpuid.h | 16 ++++++++++++++++
> arch/x86/kvm/lapic.h | 2 +-
> arch/x86/kvm/vmx.c | 9 +++++----
> arch/x86/kvm/x86.c | 32 +++++++++++++++++++++++++-------
> 4 files changed, 47 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
> index f1e4895..b012ad2 100644
> --- a/arch/x86/kvm/cpuid.h
> +++ b/arch/x86/kvm/cpuid.h
> @@ -72,4 +72,20 @@ static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu)
> return best && (best->ecx & bit(X86_FEATURE_PCID));
> }
>
> +static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_cpuid_entry2 *best;
> +
> + best = kvm_find_cpuid_entry(vcpu, 1, 0);
> + return best && (best->ecx & bit(X86_FEATURE_X2APIC));
> +}
> +
> +static inline unsigned int guest_cpuid_get_phys_bits(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_cpuid_entry2 *best;
> +
> + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
> + return best ? best->eax & 0xff : 36;
> +}
[Resending after learning that Ctrl-Shift-C does other things beyond
copying to clipboard]
There's already cpuid_maxphyaddr for this. I can adjust it when committing.
This is applied to kvm/queue. The other three will have to wait for
after the merge window.
Paolo
> #endif
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index c8b0d0d..6a11845 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -65,7 +65,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
> struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
>
> u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
> -void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
> +int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
> void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
> struct kvm_lapic_state *s);
> int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 5c88791..a06f101 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -4392,7 +4392,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
> static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> - u64 msr;
> + struct msr_data apic_base_msr;
>
> vmx->rmode.vm86_active = 0;
>
> @@ -4400,10 +4400,11 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
>
> vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
> kvm_set_cr8(&vmx->vcpu, 0);
> - msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
> + apic_base_msr.data = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
> if (kvm_vcpu_is_bsp(&vmx->vcpu))
> - msr |= MSR_IA32_APICBASE_BSP;
> - kvm_set_apic_base(&vmx->vcpu, msr);
> + apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
> + apic_base_msr.host_initiated = true;
> + kvm_set_apic_base(&vmx->vcpu, &apic_base_msr);
>
> vmx_segment_cache_clear(vmx);
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 0c76f7c..f4b0591 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -257,10 +257,26 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
> }
> EXPORT_SYMBOL_GPL(kvm_get_apic_base);
>
> -void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
> -{
> - /* TODO: reserve bits check */
> - kvm_lapic_set_base(vcpu, data);
> +int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> +{
> + u64 old_state = vcpu->arch.apic_base &
> + (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
> + u64 new_state = msr_info->data &
> + (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
> + u64 reserved_bits = ((~0ULL) << guest_cpuid_get_phys_bits(vcpu)) |
> + 0x2ff | (guest_cpuid_has_x2apic(vcpu) ? 0 : X2APIC_ENABLE);
> +
> + if (!msr_info->host_initiated &&
> + ((msr_info->data & reserved_bits) != 0 ||
> + new_state == X2APIC_ENABLE ||
> + (new_state == MSR_IA32_APICBASE_ENABLE &&
> + old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) ||
> + (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) &&
> + old_state == 0)))
> + return 1;
> +
> + kvm_lapic_set_base(vcpu, msr_info->data);
> + return 0;
> }
> EXPORT_SYMBOL_GPL(kvm_set_apic_base);
>
> @@ -2006,8 +2022,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> case 0x200 ... 0x2ff:
> return set_msr_mtrr(vcpu, msr, data);
> case MSR_IA32_APICBASE:
> - kvm_set_apic_base(vcpu, data);
> - break;
> + return kvm_set_apic_base(vcpu, msr_info);
> case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
> return kvm_x2apic_msr_write(vcpu, msr, data);
> case MSR_IA32_TSCDEADLINE:
> @@ -6409,6 +6424,7 @@ EXPORT_SYMBOL_GPL(kvm_task_switch);
> int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
> struct kvm_sregs *sregs)
> {
> + struct msr_data apic_base_msr;
> int mmu_reset_needed = 0;
> int pending_vec, max_bits, idx;
> struct desc_ptr dt;
> @@ -6432,7 +6448,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
>
> mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
> kvm_x86_ops->set_efer(vcpu, sregs->efer);
> - kvm_set_apic_base(vcpu, sregs->apic_base);
> + apic_base_msr.data = sregs->apic_base;
> + apic_base_msr.host_initiated = true;
> + kvm_set_apic_base(vcpu, &apic_base_msr);
>
> mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
> kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
>
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2014-01-24 16:01 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-01-24 15:48 [PATCH v2 0/4] KVM: x86: Fixes for IA32_APIC_BASE and nVMX Jan Kiszka
2014-01-24 15:48 ` [PATCH v2 1/4] KVM: x86: Validate guest writes to MSR_IA32_APICBASE Jan Kiszka
2014-01-24 15:59 ` Paolo Bonzini
2014-01-24 16:01 ` Paolo Bonzini
2014-01-24 15:48 ` [PATCH v2 2/4] KVM: nVMX: Rework interception of IRQs and NMIs Jan Kiszka
2014-01-24 15:48 ` [PATCH v2 3/4] KVM: nVMX: Fully emulate preemption timer Jan Kiszka
2014-01-24 15:48 ` [PATCH v2 4/4] KVM: nVMX: Do not inject NMI vmexits when L2 has a pending interrupt Jan Kiszka
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).