* [PATCH v3 0/4] KVM: x86: Pending nVMX fixes
@ 2014-03-07 19:03 Jan Kiszka
2014-03-07 19:03 ` [PATCH v3 1/4] KVM: nVMX: Rework interception of IRQs and NMIs Jan Kiszka
` (4 more replies)
0 siblings, 5 replies; 12+ messages in thread
From: Jan Kiszka @ 2014-03-07 19:03 UTC (permalink / raw)
To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm
As I noticed a rebase conflict of these pending patches and I wanted to
remind the fact that their are still pending ;), a quick update round.
No functional changes since v2.
Jan
Jan Kiszka (4):
KVM: nVMX: Rework interception of IRQs and NMIs
KVM: nVMX: Fully emulate preemption timer
KVM: nVMX: Do not inject NMI vmexits when L2 has a pending interrupt
x86: Remove return code from enable_irq/nmi_window
arch/x86/include/asm/kvm_host.h | 6 +-
arch/x86/kvm/svm.c | 8 +-
arch/x86/kvm/vmx.c | 244 +++++++++++++++++++++++-----------------
arch/x86/kvm/x86.c | 32 ++++--
4 files changed, 170 insertions(+), 120 deletions(-)
--
1.8.1.1.298.ge7eed54
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v3 1/4] KVM: nVMX: Rework interception of IRQs and NMIs
2014-03-07 19:03 [PATCH v3 0/4] KVM: x86: Pending nVMX fixes Jan Kiszka
@ 2014-03-07 19:03 ` Jan Kiszka
2014-03-07 19:48 ` Paolo Bonzini
2014-03-07 19:03 ` [PATCH v3 2/4] KVM: nVMX: Fully emulate preemption timer Jan Kiszka
` (3 subsequent siblings)
4 siblings, 1 reply; 12+ messages in thread
From: Jan Kiszka @ 2014-03-07 19:03 UTC (permalink / raw)
To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm
Move the check for leaving L2 on pending and intercepted IRQs or NMIs
from the *_allowed handler into a dedicated callback. Invoke this
callback at the relevant points before KVM checks if IRQs/NMIs can be
injected. The callback has the task to switch from L2 to L1 if needed
and inject the proper vmexit events.
The rework fixes L2 wakeups from HLT and provides the foundation for
preemption timer emulation.
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/kvm/vmx.c | 67 +++++++++++++++++++++++------------------
arch/x86/kvm/x86.c | 26 +++++++++++-----
3 files changed, 59 insertions(+), 36 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e714f8c..84f7373 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -765,6 +765,8 @@ struct kvm_x86_ops {
enum x86_intercept_stage stage);
void (*handle_external_intr)(struct kvm_vcpu *vcpu);
bool (*mpx_supported)(void);
+
+ int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
};
struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 83ee24f..d66f739 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4631,22 +4631,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(vcpu)) {
- if (to_vmx(vcpu)->nested.nested_run_pending)
- return 0;
- if (nested_exit_on_nmi(vcpu)) {
- nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
- NMI_VECTOR | INTR_TYPE_NMI_INTR |
- INTR_INFO_VALID_MASK, 0);
- /*
- * The NMI-triggered VM exit counts as injection:
- * clear this one and block further NMIs.
- */
- vcpu->arch.nmi_pending = 0;
- vmx_set_nmi_mask(vcpu, true);
- return 0;
- }
- }
+ if (to_vmx(vcpu)->nested.nested_run_pending)
+ return 0;
if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
return 0;
@@ -4658,19 +4644,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(vcpu)) {
- if (to_vmx(vcpu)->nested.nested_run_pending)
- return 0;
- if (nested_exit_on_intr(vcpu)) {
- nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
- 0, 0);
- /*
- * fall through to normal code, but now in L1, not L2
- */
- }
- }
-
- return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+ return (!to_vmx(vcpu)->nested.nested_run_pending &&
+ vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
}
@@ -8172,6 +8147,35 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
}
}
+static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
+ if (vmx->nested.nested_run_pending)
+ return -EBUSY;
+ nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
+ NMI_VECTOR | INTR_TYPE_NMI_INTR |
+ INTR_INFO_VALID_MASK, 0);
+ /*
+ * The NMI-triggered VM exit counts as injection:
+ * clear this one and block further NMIs.
+ */
+ vcpu->arch.nmi_pending = 0;
+ vmx_set_nmi_mask(vcpu, true);
+ return 0;
+ }
+
+ if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
+ nested_exit_on_intr(vcpu)) {
+ if (vmx->nested.nested_run_pending)
+ return -EBUSY;
+ nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
+ }
+
+ return 0;
+}
+
/*
* prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
* and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
@@ -8512,6 +8516,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
nested_vmx_succeed(vcpu);
if (enable_shadow_vmcs)
vmx->nested.sync_shadow_vmcs = true;
+
+ /* in case we halted in L2 */
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
}
/*
@@ -8652,6 +8659,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.check_intercept = vmx_check_intercept,
.handle_external_intr = vmx_handle_external_intr,
.mpx_supported = vmx_mpx_supported,
+
+ .check_nested_events = vmx_check_nested_events,
};
static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4cca458..9ec2f26 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5788,8 +5788,10 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
}
-static void inject_pending_event(struct kvm_vcpu *vcpu)
+static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
{
+ int r;
+
/* try to reinject previous events if any */
if (vcpu->arch.exception.pending) {
trace_kvm_inj_exception(vcpu->arch.exception.nr,
@@ -5799,17 +5801,23 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
vcpu->arch.exception.has_error_code,
vcpu->arch.exception.error_code,
vcpu->arch.exception.reinject);
- return;
+ return 0;
}
if (vcpu->arch.nmi_injected) {
kvm_x86_ops->set_nmi(vcpu);
- return;
+ return 0;
}
if (vcpu->arch.interrupt.pending) {
kvm_x86_ops->set_irq(vcpu);
- return;
+ return 0;
+ }
+
+ if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
+ r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
+ if (r != 0)
+ return r;
}
/* try to inject new event if pending */
@@ -5826,6 +5834,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
kvm_x86_ops->set_irq(vcpu);
}
}
+ return 0;
}
static void process_nmi(struct kvm_vcpu *vcpu)
@@ -5930,10 +5939,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto out;
}
- inject_pending_event(vcpu);
-
+ if (inject_pending_event(vcpu, req_int_win) != 0)
+ req_immediate_exit = true;
/* enable NMI/IRQ window open exits if needed */
- if (vcpu->arch.nmi_pending)
+ else if (vcpu->arch.nmi_pending)
req_immediate_exit =
kvm_x86_ops->enable_nmi_window(vcpu) != 0;
else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
@@ -7254,6 +7263,9 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
+ if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
+ kvm_x86_ops->check_nested_events(vcpu, false);
+
return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
!vcpu->arch.apf.halted)
|| !list_empty_careful(&vcpu->async_pf.done)
--
1.8.1.1.298.ge7eed54
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH v3 2/4] KVM: nVMX: Fully emulate preemption timer
2014-03-07 19:03 [PATCH v3 0/4] KVM: x86: Pending nVMX fixes Jan Kiszka
2014-03-07 19:03 ` [PATCH v3 1/4] KVM: nVMX: Rework interception of IRQs and NMIs Jan Kiszka
@ 2014-03-07 19:03 ` Jan Kiszka
2014-03-07 19:03 ` [PATCH v3 3/4] KVM: nVMX: Do not inject NMI vmexits when L2 has a pending interrupt Jan Kiszka
` (2 subsequent siblings)
4 siblings, 0 replies; 12+ messages in thread
From: Jan Kiszka @ 2014-03-07 19:03 UTC (permalink / raw)
To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm
We cannot rely on the hardware-provided preemption timer support because
we are holding L2 in HLT outside non-root mode. Furthermore, emulating
the preemption will resolve tick rate errata on older Intel CPUs.
The emulation is based on hrtimer which is started on L2 entry, stopped
on L2 exit and evaluated via the new check_nested_events hook. As we no
longer rely on hardware features, we can enable both the preemption
timer support and value saving unconditionally.
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
arch/x86/kvm/vmx.c | 151 ++++++++++++++++++++++++++++++++++-------------------
1 file changed, 96 insertions(+), 55 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d66f739..22fa0f7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -31,6 +31,7 @@
#include <linux/ftrace_event.h>
#include <linux/slab.h>
#include <linux/tboot.h>
+#include <linux/hrtimer.h>
#include "kvm_cache_regs.h"
#include "x86.h"
@@ -110,6 +111,8 @@ module_param(nested, bool, S_IRUGO);
#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
+#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
+
/*
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
* ple_gap: upper bound on the amount of time between two successive
@@ -374,6 +377,9 @@ struct nested_vmx {
*/
struct page *apic_access_page;
u64 msr_ia32_feature_control;
+
+ struct hrtimer preemption_timer;
+ bool preemption_timer_expired;
};
#define POSTED_INTR_ON 0
@@ -1048,6 +1054,12 @@ static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
}
+static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
+{
+ return vmcs12->pin_based_vm_exec_control &
+ PIN_BASED_VMX_PREEMPTION_TIMER;
+}
+
static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
{
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
@@ -2253,9 +2265,9 @@ static __init void nested_vmx_setup_ctls_msrs(void)
*/
nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK |
- PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS |
+ PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS;
+ nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
PIN_BASED_VMX_PREEMPTION_TIMER;
- nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
/*
* Exit controls
@@ -2270,15 +2282,10 @@ static __init void nested_vmx_setup_ctls_msrs(void)
#ifdef CONFIG_X86_64
VM_EXIT_HOST_ADDR_SPACE_SIZE |
#endif
- VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
+ VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
+ nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
+ VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
- if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) ||
- !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
- nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
- nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
- }
- nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
- VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
/* entry controls */
rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2347,9 +2354,9 @@ static __init void nested_vmx_setup_ctls_msrs(void)
/* miscellaneous data */
rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
- nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK |
- VMX_MISC_SAVE_EFER_LMA;
- nested_vmx_misc_low |= VMX_MISC_ACTIVITY_HLT;
+ nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA;
+ nested_vmx_misc_low |= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
+ VMX_MISC_ACTIVITY_HLT;
nested_vmx_misc_high = 0;
}
@@ -5713,6 +5720,18 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
*/
}
+static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
+{
+ struct vcpu_vmx *vmx =
+ container_of(timer, struct vcpu_vmx, nested.preemption_timer);
+
+ vmx->nested.preemption_timer_expired = true;
+ kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
+ kvm_vcpu_kick(&vmx->vcpu);
+
+ return HRTIMER_NORESTART;
+}
+
/*
* Emulate the VMXON instruction.
* Currently, we just remember that VMX is active, and do not save or even
@@ -5777,6 +5796,10 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
vmx->nested.vmcs02_num = 0;
+ hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
+ vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
+
vmx->nested.vmxon = true;
skip_emulated_instruction(vcpu);
@@ -6753,9 +6776,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
* table is L0's fault.
*/
return 0;
- case EXIT_REASON_PREEMPTION_TIMER:
- return vmcs12->pin_based_vm_exec_control &
- PIN_BASED_VMX_PREEMPTION_TIMER;
case EXIT_REASON_WBINVD:
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
case EXIT_REASON_XSETBV:
@@ -6771,27 +6791,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
}
-static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
-{
- u64 delta_tsc_l1;
- u32 preempt_val_l1, preempt_val_l2, preempt_scale;
-
- if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control &
- PIN_BASED_VMX_PREEMPTION_TIMER))
- return;
- preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
- MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
- preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
- delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc())
- - vcpu->arch.last_guest_tsc;
- preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
- if (preempt_val_l2 <= preempt_val_l1)
- preempt_val_l2 = 0;
- else
- preempt_val_l2 -= preempt_val_l1;
- vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
-}
-
/*
* The guest has exited. See if we can fix it or if we need userspace
* assistance.
@@ -7210,8 +7209,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
atomic_switch_perf_msrs(vmx);
debugctlmsr = get_debugctlmsr();
- if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending)
- nested_adjust_preemption_timer(vcpu);
vmx->__launched = vmx->loaded_vmcs->launched;
asm(
/* Store host registers */
@@ -7608,6 +7605,28 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
kvm_inject_page_fault(vcpu, fault);
}
+static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
+{
+ u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (vcpu->arch.virtual_tsc_khz == 0)
+ return;
+
+ /* Make sure short timeouts reliably trigger an immediate vmexit.
+ * hrtimer_start does not guarantee this. */
+ if (preemption_timeout <= 1) {
+ vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
+ return;
+ }
+
+ preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
+ preemption_timeout *= 1000000;
+ do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
+ hrtimer_start(&vmx->nested.preemption_timer,
+ ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
+}
+
/*
* prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
* L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@ -7621,7 +7640,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exec_control;
- u32 exit_control;
vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -7679,13 +7697,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs_write64(VMCS_LINK_POINTER, -1ull);
- vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
- (vmcs_config.pin_based_exec_ctrl |
- vmcs12->pin_based_vm_exec_control));
+ exec_control = vmcs12->pin_based_vm_exec_control;
+ exec_control |= vmcs_config.pin_based_exec_ctrl;
+ exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+ vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
- if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
- vmcs_write32(VMX_PREEMPTION_TIMER_VALUE,
- vmcs12->vmx_preemption_timer_value);
+ vmx->nested.preemption_timer_expired = false;
+ if (nested_cpu_has_preemption_timer(vmcs12))
+ vmx_start_preemption_timer(vcpu);
/*
* Whether page-faults are trapped is determined by a combination of
@@ -7713,7 +7732,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
enable_ept ? vmcs12->page_fault_error_code_match : 0);
if (cpu_has_secondary_exec_ctrls()) {
- u32 exec_control = vmx_secondary_exec_control(vmx);
+ exec_control = vmx_secondary_exec_control(vmx);
if (!vmx->rdtscp_enabled)
exec_control &= ~SECONDARY_EXEC_RDTSCP;
/* Take the following fields only from vmcs12 */
@@ -7800,10 +7819,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
* we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
* bits are further modified by vmx_set_efer() below.
*/
- exit_control = vmcs_config.vmexit_ctrl;
- if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
- exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
- vm_exit_controls_init(vmx, exit_control);
+ vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
* emulated by vmx_set_efer(), below.
@@ -8151,6 +8167,14 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
+ vmx->nested.preemption_timer_expired) {
+ if (vmx->nested.nested_run_pending)
+ return -EBUSY;
+ nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
+ return 0;
+ }
+
if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
if (vmx->nested.nested_run_pending)
return -EBUSY;
@@ -8176,6 +8200,20 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
return 0;
}
+static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
+{
+ ktime_t remaining =
+ hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
+ u64 value;
+
+ if (ktime_to_ns(remaining) <= 0)
+ return 0;
+
+ value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
+ do_div(value, 1000000);
+ return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
+}
+
/*
* prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
* and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
@@ -8246,10 +8284,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
else
vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
- if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) &&
- (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
- vmcs12->vmx_preemption_timer_value =
- vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
+ if (nested_cpu_has_preemption_timer(vmcs12)) {
+ if (vmcs12->vm_exit_controls &
+ VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
+ vmcs12->vmx_preemption_timer_value =
+ vmx_get_preemption_timer_value(vcpu);
+ hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
+ }
/*
* In some cases (usually, nested EPT), L2 is allowed to change its
--
1.8.1.1.298.ge7eed54
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH v3 3/4] KVM: nVMX: Do not inject NMI vmexits when L2 has a pending interrupt
2014-03-07 19:03 [PATCH v3 0/4] KVM: x86: Pending nVMX fixes Jan Kiszka
2014-03-07 19:03 ` [PATCH v3 1/4] KVM: nVMX: Rework interception of IRQs and NMIs Jan Kiszka
2014-03-07 19:03 ` [PATCH v3 2/4] KVM: nVMX: Fully emulate preemption timer Jan Kiszka
@ 2014-03-07 19:03 ` Jan Kiszka
2014-03-07 19:03 ` [PATCH v3 4/4] x86: Remove return code from enable_irq/nmi_window Jan Kiszka
2014-03-07 19:06 ` [PATCH v3 0/4] KVM: x86: Pending nVMX fixes Jan Kiszka
4 siblings, 0 replies; 12+ messages in thread
From: Jan Kiszka @ 2014-03-07 19:03 UTC (permalink / raw)
To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm
According to SDM 27.2.3, IDT vectoring information will not be valid on
vmexits caused by external NMIs. So we have to avoid creating such
scenarios by delaying EXIT_REASON_EXCEPTION_NMI injection as long as we
have a pending interrupt because that one would be migrated to L1's IDT
vectoring info on nested exit.
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
arch/x86/kvm/vmx.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 22fa0f7..9abb21f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8176,7 +8176,8 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
}
if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
- if (vmx->nested.nested_run_pending)
+ if (vmx->nested.nested_run_pending ||
+ vcpu->arch.interrupt.pending)
return -EBUSY;
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
NMI_VECTOR | INTR_TYPE_NMI_INTR |
--
1.8.1.1.298.ge7eed54
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH v3 4/4] x86: Remove return code from enable_irq/nmi_window
2014-03-07 19:03 [PATCH v3 0/4] KVM: x86: Pending nVMX fixes Jan Kiszka
` (2 preceding siblings ...)
2014-03-07 19:03 ` [PATCH v3 3/4] KVM: nVMX: Do not inject NMI vmexits when L2 has a pending interrupt Jan Kiszka
@ 2014-03-07 19:03 ` Jan Kiszka
2014-03-07 19:06 ` [PATCH v3 0/4] KVM: x86: Pending nVMX fixes Jan Kiszka
4 siblings, 0 replies; 12+ messages in thread
From: Jan Kiszka @ 2014-03-07 19:03 UTC (permalink / raw)
To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm
It's no longer possible to enter enable_irq_window in guest mode when
L1 intercepts external interrupts and we are entering L2. This is now
caught in vcpu_enter_guest. So we can remove the check from the VMX
version of enable_irq_window, thus the need to return an error code from
both enable_irq_window and enable_nmi_window.
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
arch/x86/include/asm/kvm_host.h | 4 ++--
arch/x86/kvm/svm.c | 8 +++-----
arch/x86/kvm/vmx.c | 25 +++++++------------------
arch/x86/kvm/x86.c | 6 ++----
4 files changed, 14 insertions(+), 29 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 84f7373..faf1c7f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -727,8 +727,8 @@ struct kvm_x86_ops {
int (*nmi_allowed)(struct kvm_vcpu *vcpu);
bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
- int (*enable_nmi_window)(struct kvm_vcpu *vcpu);
- int (*enable_irq_window)(struct kvm_vcpu *vcpu);
+ void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
+ void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
int (*vm_has_apicv)(struct kvm *kvm);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 64d9bb9..1e8616e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3650,7 +3650,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
return ret;
}
-static int enable_irq_window(struct kvm_vcpu *vcpu)
+static void enable_irq_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3664,16 +3664,15 @@ static int enable_irq_window(struct kvm_vcpu *vcpu)
svm_set_vintr(svm);
svm_inject_irq(svm, 0x0);
}
- return 0;
}
-static int enable_nmi_window(struct kvm_vcpu *vcpu)
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
== HF_NMI_MASK)
- return 0; /* IRET will cause a vm exit */
+ return; /* IRET will cause a vm exit */
/*
* Something prevents NMI from been injected. Single step over possible
@@ -3682,7 +3681,6 @@ static int enable_nmi_window(struct kvm_vcpu *vcpu)
svm->nmi_singlestep = true;
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
update_db_bp_intercept(vcpu);
- return 0;
}
static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9abb21f..4d12bfc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4514,39 +4514,28 @@ static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
PIN_BASED_NMI_EXITING;
}
-static int enable_irq_window(struct kvm_vcpu *vcpu)
+static void enable_irq_window(struct kvm_vcpu *vcpu)
{
u32 cpu_based_vm_exec_control;
- if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
- /*
- * We get here if vmx_interrupt_allowed() said we can't
- * inject to L1 now because L2 must run. The caller will have
- * to make L2 exit right after entry, so we can inject to L1
- * more promptly.
- */
- return -EBUSY;
-
cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
- return 0;
}
-static int enable_nmi_window(struct kvm_vcpu *vcpu)
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
{
u32 cpu_based_vm_exec_control;
- if (!cpu_has_virtual_nmis())
- return enable_irq_window(vcpu);
-
- if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI)
- return enable_irq_window(vcpu);
+ if (!cpu_has_virtual_nmis() ||
+ vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
+ enable_irq_window(vcpu);
+ return;
+ }
cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
- return 0;
}
static void vmx_inject_irq(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9ec2f26..ac423f6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5943,11 +5943,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
req_immediate_exit = true;
/* enable NMI/IRQ window open exits if needed */
else if (vcpu->arch.nmi_pending)
- req_immediate_exit =
- kvm_x86_ops->enable_nmi_window(vcpu) != 0;
+ kvm_x86_ops->enable_nmi_window(vcpu);
else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
- req_immediate_exit =
- kvm_x86_ops->enable_irq_window(vcpu) != 0;
+ kvm_x86_ops->enable_irq_window(vcpu);
if (kvm_lapic_enabled(vcpu)) {
/*
--
1.8.1.1.298.ge7eed54
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v3 0/4] KVM: x86: Pending nVMX fixes
2014-03-07 19:03 [PATCH v3 0/4] KVM: x86: Pending nVMX fixes Jan Kiszka
` (3 preceding siblings ...)
2014-03-07 19:03 ` [PATCH v3 4/4] x86: Remove return code from enable_irq/nmi_window Jan Kiszka
@ 2014-03-07 19:06 ` Jan Kiszka
4 siblings, 0 replies; 12+ messages in thread
From: Jan Kiszka @ 2014-03-07 19:06 UTC (permalink / raw)
To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm
On 2014-03-07 20:03, Jan Kiszka wrote:
> As I noticed a rebase conflict of these pending patches and I wanted to
> remind the fact that their are still pending ;), a quick update round.
> No functional changes since v2.
>
Forgot to press save to send this as well:
"Also passed some stress testing of high NMI and IRQ load in nested
scenario here."
Jan
--
Siemens AG, Corporate Technology, CT RTC ITP SES-DE
Corporate Competence Center Embedded Linux
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3 1/4] KVM: nVMX: Rework interception of IRQs and NMIs
2014-03-07 19:03 ` [PATCH v3 1/4] KVM: nVMX: Rework interception of IRQs and NMIs Jan Kiszka
@ 2014-03-07 19:48 ` Paolo Bonzini
2014-03-08 9:21 ` Jan Kiszka
0 siblings, 1 reply; 12+ messages in thread
From: Paolo Bonzini @ 2014-03-07 19:48 UTC (permalink / raw)
To: Jan Kiszka, Gleb Natapov, Marcelo Tosatti; +Cc: kvm
Il 07/03/2014 20:03, Jan Kiszka ha scritto:
> @@ -4631,22 +4631,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
>
> static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
> {
> - if (is_guest_mode(vcpu)) {
> - if (to_vmx(vcpu)->nested.nested_run_pending)
> - return 0;
> - if (nested_exit_on_nmi(vcpu)) {
> - nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
> - NMI_VECTOR | INTR_TYPE_NMI_INTR |
> - INTR_INFO_VALID_MASK, 0);
> - /*
> - * The NMI-triggered VM exit counts as injection:
> - * clear this one and block further NMIs.
> - */
> - vcpu->arch.nmi_pending = 0;
> - vmx_set_nmi_mask(vcpu, true);
> - return 0;
> - }
> - }
> + if (to_vmx(vcpu)->nested.nested_run_pending)
> + return 0;
>
> if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
> return 0;
> @@ -4658,19 +4644,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>
> static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
> {
> - if (is_guest_mode(vcpu)) {
> - if (to_vmx(vcpu)->nested.nested_run_pending)
> - return 0;
> - if (nested_exit_on_intr(vcpu)) {
> - nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
> - 0, 0);
> - /*
> - * fall through to normal code, but now in L1, not L2
> - */
> - }
> - }
> -
> - return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
> + return (!to_vmx(vcpu)->nested.nested_run_pending &&
> + vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
> !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
> (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
The checks on nested_run_pending are not needed anymore and can be
replaced with a WARN_ON. Otherwise,
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Paolo
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3 1/4] KVM: nVMX: Rework interception of IRQs and NMIs
2014-03-07 19:48 ` Paolo Bonzini
@ 2014-03-08 9:21 ` Jan Kiszka
2014-03-09 7:33 ` Paolo Bonzini
0 siblings, 1 reply; 12+ messages in thread
From: Jan Kiszka @ 2014-03-08 9:21 UTC (permalink / raw)
To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm
[-- Attachment #1: Type: text/plain, Size: 2496 bytes --]
On 2014-03-07 20:48, Paolo Bonzini wrote:
> Il 07/03/2014 20:03, Jan Kiszka ha scritto:
>> @@ -4631,22 +4631,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu
>> *vcpu, bool masked)
>>
>> static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>> {
>> - if (is_guest_mode(vcpu)) {
>> - if (to_vmx(vcpu)->nested.nested_run_pending)
>> - return 0;
>> - if (nested_exit_on_nmi(vcpu)) {
>> - nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
>> - NMI_VECTOR | INTR_TYPE_NMI_INTR |
>> - INTR_INFO_VALID_MASK, 0);
>> - /*
>> - * The NMI-triggered VM exit counts as injection:
>> - * clear this one and block further NMIs.
>> - */
>> - vcpu->arch.nmi_pending = 0;
>> - vmx_set_nmi_mask(vcpu, true);
>> - return 0;
>> - }
>> - }
>> + if (to_vmx(vcpu)->nested.nested_run_pending)
>> + return 0;
>>
>> if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
>> return 0;
>> @@ -4658,19 +4644,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>>
>> static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
>> {
>> - if (is_guest_mode(vcpu)) {
>> - if (to_vmx(vcpu)->nested.nested_run_pending)
>> - return 0;
>> - if (nested_exit_on_intr(vcpu)) {
>> - nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
>> - 0, 0);
>> - /*
>> - * fall through to normal code, but now in L1, not L2
>> - */
>> - }
>> - }
>> -
>> - return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>> + return (!to_vmx(vcpu)->nested.nested_run_pending &&
>> + vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>> !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
>> (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
>
> The checks on nested_run_pending are not needed anymore and can be
> replaced with a WARN_ON. Otherwise,
Nope, that won't be correct: If we have a pending interrupt that L1 does
not intercept, we still trigger this condition legally.
Jan
>
> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
>
> Paolo
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 263 bytes --]
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3 1/4] KVM: nVMX: Rework interception of IRQs and NMIs
2014-03-08 9:21 ` Jan Kiszka
@ 2014-03-09 7:33 ` Paolo Bonzini
2014-03-09 8:01 ` Jan Kiszka
2014-03-09 8:03 ` Paolo Bonzini
0 siblings, 2 replies; 12+ messages in thread
From: Paolo Bonzini @ 2014-03-09 7:33 UTC (permalink / raw)
To: Jan Kiszka, Gleb Natapov, Marcelo Tosatti; +Cc: kvm
Il 08/03/2014 10:21, Jan Kiszka ha scritto:
> On 2014-03-07 20:48, Paolo Bonzini wrote:
>> Il 07/03/2014 20:03, Jan Kiszka ha scritto:
>>> @@ -4631,22 +4631,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu
>>> *vcpu, bool masked)
>>>
>>> static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>>> {
>>> - if (is_guest_mode(vcpu)) {
>>> - if (to_vmx(vcpu)->nested.nested_run_pending)
>>> - return 0;
>>> - if (nested_exit_on_nmi(vcpu)) {
>>> - nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
>>> - NMI_VECTOR | INTR_TYPE_NMI_INTR |
>>> - INTR_INFO_VALID_MASK, 0);
>>> - /*
>>> - * The NMI-triggered VM exit counts as injection:
>>> - * clear this one and block further NMIs.
>>> - */
>>> - vcpu->arch.nmi_pending = 0;
>>> - vmx_set_nmi_mask(vcpu, true);
>>> - return 0;
>>> - }
>>> - }
>>> + if (to_vmx(vcpu)->nested.nested_run_pending)
>>> + return 0;
>>>
>>> if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
>>> return 0;
>>> @@ -4658,19 +4644,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>>>
>>> static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
>>> {
>>> - if (is_guest_mode(vcpu)) {
>>> - if (to_vmx(vcpu)->nested.nested_run_pending)
>>> - return 0;
>>> - if (nested_exit_on_intr(vcpu)) {
>>> - nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
>>> - 0, 0);
>>> - /*
>>> - * fall through to normal code, but now in L1, not L2
>>> - */
>>> - }
>>> - }
>>> -
>>> - return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>>> + return (!to_vmx(vcpu)->nested.nested_run_pending &&
>>> + vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>>> !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
>>> (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
>>
>> The checks on nested_run_pending are not needed anymore and can be
>> replaced with a WARN_ON. Otherwise,
>
> Nope, that won't be correct: If we have a pending interrupt that L1 does
> not intercept, we still trigger this condition legally.
Right, this is the case of !nested_exit_on_intr(vcpu) or
!nested_exit_on_nmi(vcpu).
Why don't we need to request an immediate exit in that case, in order to
inject the interrupt into L2?
Paolo
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3 1/4] KVM: nVMX: Rework interception of IRQs and NMIs
2014-03-09 7:33 ` Paolo Bonzini
@ 2014-03-09 8:01 ` Jan Kiszka
2014-03-09 8:03 ` Paolo Bonzini
1 sibling, 0 replies; 12+ messages in thread
From: Jan Kiszka @ 2014-03-09 8:01 UTC (permalink / raw)
To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm
[-- Attachment #1: Type: text/plain, Size: 2707 bytes --]
On 2014-03-09 08:33, Paolo Bonzini wrote:
> Il 08/03/2014 10:21, Jan Kiszka ha scritto:
>> On 2014-03-07 20:48, Paolo Bonzini wrote:
>>> Il 07/03/2014 20:03, Jan Kiszka ha scritto:
>>>> @@ -4631,22 +4631,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu
>>>> *vcpu, bool masked)
>>>>
>>>> static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>>>> {
>>>> - if (is_guest_mode(vcpu)) {
>>>> - if (to_vmx(vcpu)->nested.nested_run_pending)
>>>> - return 0;
>>>> - if (nested_exit_on_nmi(vcpu)) {
>>>> - nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
>>>> - NMI_VECTOR | INTR_TYPE_NMI_INTR |
>>>> - INTR_INFO_VALID_MASK, 0);
>>>> - /*
>>>> - * The NMI-triggered VM exit counts as injection:
>>>> - * clear this one and block further NMIs.
>>>> - */
>>>> - vcpu->arch.nmi_pending = 0;
>>>> - vmx_set_nmi_mask(vcpu, true);
>>>> - return 0;
>>>> - }
>>>> - }
>>>> + if (to_vmx(vcpu)->nested.nested_run_pending)
>>>> + return 0;
>>>>
>>>> if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
>>>> return 0;
>>>> @@ -4658,19 +4644,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu
>>>> *vcpu)
>>>>
>>>> static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
>>>> {
>>>> - if (is_guest_mode(vcpu)) {
>>>> - if (to_vmx(vcpu)->nested.nested_run_pending)
>>>> - return 0;
>>>> - if (nested_exit_on_intr(vcpu)) {
>>>> - nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
>>>> - 0, 0);
>>>> - /*
>>>> - * fall through to normal code, but now in L1, not L2
>>>> - */
>>>> - }
>>>> - }
>>>> -
>>>> - return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>>>> + return (!to_vmx(vcpu)->nested.nested_run_pending &&
>>>> + vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>>>> !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
>>>> (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
>>>
>>> The checks on nested_run_pending are not needed anymore and can be
>>> replaced with a WARN_ON. Otherwise,
>>
>> Nope, that won't be correct: If we have a pending interrupt that L1 does
>> not intercept, we still trigger this condition legally.
>
> Right, this is the case of !nested_exit_on_intr(vcpu) or
> !nested_exit_on_nmi(vcpu).
>
> Why don't we need to request an immediate exit in that case, in order to
> inject the interrupt into L2?
We enable the hardware interrupt/NMI window request for L2 instead.
Jan
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 263 bytes --]
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3 1/4] KVM: nVMX: Rework interception of IRQs and NMIs
2014-03-09 7:33 ` Paolo Bonzini
2014-03-09 8:01 ` Jan Kiszka
@ 2014-03-09 8:03 ` Paolo Bonzini
2014-03-09 8:07 ` Jan Kiszka
1 sibling, 1 reply; 12+ messages in thread
From: Paolo Bonzini @ 2014-03-09 8:03 UTC (permalink / raw)
To: Jan Kiszka, Gleb Natapov, Marcelo Tosatti; +Cc: kvm
Il 09/03/2014 08:33, Paolo Bonzini ha scritto:
> Il 08/03/2014 10:21, Jan Kiszka ha scritto:
>> On 2014-03-07 20:48, Paolo Bonzini wrote:
>>> Il 07/03/2014 20:03, Jan Kiszka ha scritto:
>>>> @@ -4631,22 +4631,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu
>>>> *vcpu, bool masked)
>>>>
>>>> static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>>>> {
>>>> - if (is_guest_mode(vcpu)) {
>>>> - if (to_vmx(vcpu)->nested.nested_run_pending)
>>>> - return 0;
>>>> - if (nested_exit_on_nmi(vcpu)) {
>>>> - nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
>>>> - NMI_VECTOR | INTR_TYPE_NMI_INTR |
>>>> - INTR_INFO_VALID_MASK, 0);
>>>> - /*
>>>> - * The NMI-triggered VM exit counts as injection:
>>>> - * clear this one and block further NMIs.
>>>> - */
>>>> - vcpu->arch.nmi_pending = 0;
>>>> - vmx_set_nmi_mask(vcpu, true);
>>>> - return 0;
>>>> - }
>>>> - }
>>>> + if (to_vmx(vcpu)->nested.nested_run_pending)
>>>> + return 0;
>>>>
>>>> if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
>>>> return 0;
>>>> @@ -4658,19 +4644,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu
>>>> *vcpu)
>>>>
>>>> static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
>>>> {
>>>> - if (is_guest_mode(vcpu)) {
>>>> - if (to_vmx(vcpu)->nested.nested_run_pending)
>>>> - return 0;
>>>> - if (nested_exit_on_intr(vcpu)) {
>>>> - nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
>>>> - 0, 0);
>>>> - /*
>>>> - * fall through to normal code, but now in L1, not L2
>>>> - */
>>>> - }
>>>> - }
>>>> -
>>>> - return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>>>> + return (!to_vmx(vcpu)->nested.nested_run_pending &&
>>>> + vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>>>> !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
>>>> (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
>>>
>>> The checks on nested_run_pending are not needed anymore and can be
>>> replaced with a WARN_ON. Otherwise,
>>
>> Nope, that won't be correct: If we have a pending interrupt that L1 does
>> not intercept, we still trigger this condition legally.
>
> Right, this is the case of !nested_exit_on_intr(vcpu) or
> !nested_exit_on_nmi(vcpu).
>
> Why don't we need to request an immediate exit in that case, in order to
> inject the interrupt into L2?
Nevermind, this makes no sense. I was confusing *_allowed with
enable_*_window.
Applying v3 to kvm/queue, thanks!
Paolo
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3 1/4] KVM: nVMX: Rework interception of IRQs and NMIs
2014-03-09 8:03 ` Paolo Bonzini
@ 2014-03-09 8:07 ` Jan Kiszka
0 siblings, 0 replies; 12+ messages in thread
From: Jan Kiszka @ 2014-03-09 8:07 UTC (permalink / raw)
To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm
[-- Attachment #1: Type: text/plain, Size: 3045 bytes --]
On 2014-03-09 09:03, Paolo Bonzini wrote:
> Il 09/03/2014 08:33, Paolo Bonzini ha scritto:
>> Il 08/03/2014 10:21, Jan Kiszka ha scritto:
>>> On 2014-03-07 20:48, Paolo Bonzini wrote:
>>>> Il 07/03/2014 20:03, Jan Kiszka ha scritto:
>>>>> @@ -4631,22 +4631,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu
>>>>> *vcpu, bool masked)
>>>>>
>>>>> static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>>>>> {
>>>>> - if (is_guest_mode(vcpu)) {
>>>>> - if (to_vmx(vcpu)->nested.nested_run_pending)
>>>>> - return 0;
>>>>> - if (nested_exit_on_nmi(vcpu)) {
>>>>> - nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
>>>>> - NMI_VECTOR | INTR_TYPE_NMI_INTR |
>>>>> - INTR_INFO_VALID_MASK, 0);
>>>>> - /*
>>>>> - * The NMI-triggered VM exit counts as injection:
>>>>> - * clear this one and block further NMIs.
>>>>> - */
>>>>> - vcpu->arch.nmi_pending = 0;
>>>>> - vmx_set_nmi_mask(vcpu, true);
>>>>> - return 0;
>>>>> - }
>>>>> - }
>>>>> + if (to_vmx(vcpu)->nested.nested_run_pending)
>>>>> + return 0;
>>>>>
>>>>> if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
>>>>> return 0;
>>>>> @@ -4658,19 +4644,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu
>>>>> *vcpu)
>>>>>
>>>>> static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
>>>>> {
>>>>> - if (is_guest_mode(vcpu)) {
>>>>> - if (to_vmx(vcpu)->nested.nested_run_pending)
>>>>> - return 0;
>>>>> - if (nested_exit_on_intr(vcpu)) {
>>>>> - nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
>>>>> - 0, 0);
>>>>> - /*
>>>>> - * fall through to normal code, but now in L1, not L2
>>>>> - */
>>>>> - }
>>>>> - }
>>>>> -
>>>>> - return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>>>>> + return (!to_vmx(vcpu)->nested.nested_run_pending &&
>>>>> + vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>>>>> !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
>>>>> (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
>>>>
>>>> The checks on nested_run_pending are not needed anymore and can be
>>>> replaced with a WARN_ON. Otherwise,
>>>
>>> Nope, that won't be correct: If we have a pending interrupt that L1 does
>>> not intercept, we still trigger this condition legally.
>>
>> Right, this is the case of !nested_exit_on_intr(vcpu) or
>> !nested_exit_on_nmi(vcpu).
>>
>> Why don't we need to request an immediate exit in that case, in order to
>> inject the interrupt into L2?
>
> Nevermind, this makes no sense. I was confusing *_allowed with
> enable_*_window.
This code is mind-blowing and probably still not perfect. I wouldn't be
surprised if we are going to find bugs there until we retire. ;)
>
> Applying v3 to kvm/queue, thanks!
>
Great, thank you!
Jan
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 263 bytes --]
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2014-03-09 8:07 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-03-07 19:03 [PATCH v3 0/4] KVM: x86: Pending nVMX fixes Jan Kiszka
2014-03-07 19:03 ` [PATCH v3 1/4] KVM: nVMX: Rework interception of IRQs and NMIs Jan Kiszka
2014-03-07 19:48 ` Paolo Bonzini
2014-03-08 9:21 ` Jan Kiszka
2014-03-09 7:33 ` Paolo Bonzini
2014-03-09 8:01 ` Jan Kiszka
2014-03-09 8:03 ` Paolo Bonzini
2014-03-09 8:07 ` Jan Kiszka
2014-03-07 19:03 ` [PATCH v3 2/4] KVM: nVMX: Fully emulate preemption timer Jan Kiszka
2014-03-07 19:03 ` [PATCH v3 3/4] KVM: nVMX: Do not inject NMI vmexits when L2 has a pending interrupt Jan Kiszka
2014-03-07 19:03 ` [PATCH v3 4/4] x86: Remove return code from enable_irq/nmi_window Jan Kiszka
2014-03-07 19:06 ` [PATCH v3 0/4] KVM: x86: Pending nVMX fixes Jan Kiszka
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox