kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 0/3] KVM: x86: Pending nVMX fixes
@ 2014-03-06 17:33 Jan Kiszka
  2014-03-06 17:33 ` [PATCH v3 1/3] KVM: nVMX: Rework interception of IRQs and NMIs Jan Kiszka
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Jan Kiszka @ 2014-03-06 17:33 UTC (permalink / raw)
  To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm

As I noticed a rebase conflict of these pending patches and I wanted to
remind the fact that their are still pending ;), a quick update round.
No functional changes since v2.

Jan

Jan Kiszka (3):
  KVM: nVMX: Rework interception of IRQs and NMIs
  KVM: nVMX: Fully emulate preemption timer
  KVM: nVMX: Do not inject NMI vmexits when L2 has a pending interrupt

 arch/x86/include/asm/kvm_host.h |   2 +
 arch/x86/kvm/vmx.c              | 219 +++++++++++++++++++++++++---------------
 arch/x86/kvm/x86.c              |  15 ++-
 3 files changed, 150 insertions(+), 86 deletions(-)

-- 
1.8.1.1.298.ge7eed54


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v3 1/3] KVM: nVMX: Rework interception of IRQs and NMIs
  2014-03-06 17:33 [PATCH v3 0/3] KVM: x86: Pending nVMX fixes Jan Kiszka
@ 2014-03-06 17:33 ` Jan Kiszka
  2014-03-07 15:44   ` Paolo Bonzini
  2014-03-06 17:33 ` [PATCH v3 2/3] KVM: nVMX: Fully emulate preemption timer Jan Kiszka
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 15+ messages in thread
From: Jan Kiszka @ 2014-03-06 17:33 UTC (permalink / raw)
  To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm

Move the check for leaving L2 on pending and intercepted IRQs or NMIs
from the *_allowed handler into a dedicated callback. Invoke this
callback at the relevant points before KVM checks if IRQs/NMIs can be
injected. The callback has the task to switch from L2 to L1 if needed
and inject the proper vmexit events.

The rework fixes L2 wakeups from HLT and provides the foundation for
preemption timer emulation.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/vmx.c              | 67 +++++++++++++++++++++++------------------
 arch/x86/kvm/x86.c              | 15 +++++++--
 3 files changed, 53 insertions(+), 31 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 85be627..461d00a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -767,6 +767,8 @@ struct kvm_x86_ops {
 			       enum x86_intercept_stage stage);
 	void (*handle_external_intr)(struct kvm_vcpu *vcpu);
 	bool (*mpx_supported)(void);
+
+	int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 53c324f..11718b44 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4631,22 +4631,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 
 static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
 {
-	if (is_guest_mode(vcpu)) {
-		if (to_vmx(vcpu)->nested.nested_run_pending)
-			return 0;
-		if (nested_exit_on_nmi(vcpu)) {
-			nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
-					  NMI_VECTOR | INTR_TYPE_NMI_INTR |
-					  INTR_INFO_VALID_MASK, 0);
-			/*
-			 * The NMI-triggered VM exit counts as injection:
-			 * clear this one and block further NMIs.
-			 */
-			vcpu->arch.nmi_pending = 0;
-			vmx_set_nmi_mask(vcpu, true);
-			return 0;
-		}
-	}
+	if (to_vmx(vcpu)->nested.nested_run_pending)
+		return 0;
 
 	if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
 		return 0;
@@ -4658,19 +4644,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
 
 static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
-	if (is_guest_mode(vcpu)) {
-		if (to_vmx(vcpu)->nested.nested_run_pending)
-			return 0;
-		if (nested_exit_on_intr(vcpu)) {
-			nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
-					  0, 0);
-			/*
-			 * fall through to normal code, but now in L1, not L2
-			 */
-		}
-	}
-
-	return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+	return (!to_vmx(vcpu)->nested.nested_run_pending &&
+		vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
 		!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
 			(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
 }
@@ -8172,6 +8147,35 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
 	}
 }
 
+static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
+		if (vmx->nested.nested_run_pending)
+			return -EBUSY;
+		nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
+				  NMI_VECTOR | INTR_TYPE_NMI_INTR |
+				  INTR_INFO_VALID_MASK, 0);
+		/*
+		 * The NMI-triggered VM exit counts as injection:
+		 * clear this one and block further NMIs.
+		 */
+		vcpu->arch.nmi_pending = 0;
+		vmx_set_nmi_mask(vcpu, true);
+		return 0;
+	}
+
+	if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
+	    nested_exit_on_intr(vcpu)) {
+		if (vmx->nested.nested_run_pending)
+			return -EBUSY;
+		nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
+	}
+
+	return 0;
+}
+
 /*
  * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
  * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
@@ -8512,6 +8516,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 		nested_vmx_succeed(vcpu);
 	if (enable_shadow_vmcs)
 		vmx->nested.sync_shadow_vmcs = true;
+
+	/* in case we halted in L2 */
+	vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 }
 
 /*
@@ -8652,6 +8659,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.check_intercept = vmx_check_intercept,
 	.handle_external_intr = vmx_handle_external_intr,
 	.mpx_supported = vmx_mpx_supported,
+
+	.check_nested_events = vmx_check_nested_events,
 };
 
 static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1e91a24..9f39aa6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5845,6 +5845,9 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
 		return;
 	}
 
+	if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
+		kvm_x86_ops->check_nested_events(vcpu, false);
+
 	/* try to inject new event if pending */
 	if (vcpu->arch.nmi_pending) {
 		if (kvm_x86_ops->nmi_allowed(vcpu)) {
@@ -5965,12 +5968,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 		inject_pending_event(vcpu);
 
+		if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
+			req_immediate_exit |=
+				kvm_x86_ops->check_nested_events(vcpu,
+							req_int_win) != 0;
+
 		/* enable NMI/IRQ window open exits if needed */
 		if (vcpu->arch.nmi_pending)
-			req_immediate_exit =
+			req_immediate_exit |=
 				kvm_x86_ops->enable_nmi_window(vcpu) != 0;
 		else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
-			req_immediate_exit =
+			req_immediate_exit |=
 				kvm_x86_ops->enable_irq_window(vcpu) != 0;
 
 		if (kvm_lapic_enabled(vcpu)) {
@@ -7296,6 +7304,9 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
+	if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
+		kvm_x86_ops->check_nested_events(vcpu, false);
+
 	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
 		!vcpu->arch.apf.halted)
 		|| !list_empty_careful(&vcpu->async_pf.done)
-- 
1.8.1.1.298.ge7eed54


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v3 2/3] KVM: nVMX: Fully emulate preemption timer
  2014-03-06 17:33 [PATCH v3 0/3] KVM: x86: Pending nVMX fixes Jan Kiszka
  2014-03-06 17:33 ` [PATCH v3 1/3] KVM: nVMX: Rework interception of IRQs and NMIs Jan Kiszka
@ 2014-03-06 17:33 ` Jan Kiszka
  2014-03-07 17:20   ` Marcelo Tosatti
  2014-03-06 17:33 ` [PATCH v3 3/3] KVM: nVMX: Do not inject NMI vmexits when L2 has a pending interrupt Jan Kiszka
  2014-03-06 17:41 ` [PATCH v3 0/3] KVM: x86: Pending nVMX fixes Paolo Bonzini
  3 siblings, 1 reply; 15+ messages in thread
From: Jan Kiszka @ 2014-03-06 17:33 UTC (permalink / raw)
  To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm

We cannot rely on the hardware-provided preemption timer support because
we are holding L2 in HLT outside non-root mode. Furthermore, emulating
the preemption will resolve tick rate errata on older Intel CPUs.

The emulation is based on hrtimer which is started on L2 entry, stopped
on L2 exit and evaluated via the new check_nested_events hook. As we no
longer rely on hardware features, we can enable both the preemption
timer support and value saving unconditionally.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/kvm/vmx.c | 151 ++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 96 insertions(+), 55 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 11718b44..e559675 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -31,6 +31,7 @@
 #include <linux/ftrace_event.h>
 #include <linux/slab.h>
 #include <linux/tboot.h>
+#include <linux/hrtimer.h>
 #include "kvm_cache_regs.h"
 #include "x86.h"
 
@@ -110,6 +111,8 @@ module_param(nested, bool, S_IRUGO);
 
 #define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
 
+#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
+
 /*
  * These 2 parameters are used to config the controls for Pause-Loop Exiting:
  * ple_gap:    upper bound on the amount of time between two successive
@@ -374,6 +377,9 @@ struct nested_vmx {
 	 */
 	struct page *apic_access_page;
 	u64 msr_ia32_feature_control;
+
+	struct hrtimer preemption_timer;
+	bool preemption_timer_expired;
 };
 
 #define POSTED_INTR_ON  0
@@ -1048,6 +1054,12 @@ static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
 	return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
 }
 
+static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
+{
+	return vmcs12->pin_based_vm_exec_control &
+		PIN_BASED_VMX_PREEMPTION_TIMER;
+}
+
 static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
 {
 	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
@@ -2253,9 +2265,9 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 	 */
 	nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
 	nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK |
-		PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS |
+		PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS;
+	nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
 		PIN_BASED_VMX_PREEMPTION_TIMER;
-	nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
 
 	/*
 	 * Exit controls
@@ -2270,15 +2282,10 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 #ifdef CONFIG_X86_64
 		VM_EXIT_HOST_ADDR_SPACE_SIZE |
 #endif
-		VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
+		VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
+	nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
+		VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
 		VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
-	if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) ||
-	    !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
-		nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
-		nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
-	}
-	nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
-		VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
 
 	/* entry controls */
 	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2347,9 +2354,9 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 
 	/* miscellaneous data */
 	rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
-	nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK |
-		VMX_MISC_SAVE_EFER_LMA;
-	nested_vmx_misc_low |= VMX_MISC_ACTIVITY_HLT;
+	nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA;
+	nested_vmx_misc_low |= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
+		VMX_MISC_ACTIVITY_HLT;
 	nested_vmx_misc_high = 0;
 }
 
@@ -5713,6 +5720,18 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
 	 */
 }
 
+static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
+{
+	struct vcpu_vmx *vmx =
+		container_of(timer, struct vcpu_vmx, nested.preemption_timer);
+
+	vmx->nested.preemption_timer_expired = true;
+	kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
+	kvm_vcpu_kick(&vmx->vcpu);
+
+	return HRTIMER_NORESTART;
+}
+
 /*
  * Emulate the VMXON instruction.
  * Currently, we just remember that VMX is active, and do not save or even
@@ -5777,6 +5796,10 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
 	INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
 	vmx->nested.vmcs02_num = 0;
 
+	hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL);
+	vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
+
 	vmx->nested.vmxon = true;
 
 	skip_emulated_instruction(vcpu);
@@ -6753,9 +6776,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		 * table is L0's fault.
 		 */
 		return 0;
-	case EXIT_REASON_PREEMPTION_TIMER:
-		return vmcs12->pin_based_vm_exec_control &
-			PIN_BASED_VMX_PREEMPTION_TIMER;
 	case EXIT_REASON_WBINVD:
 		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
 	case EXIT_REASON_XSETBV:
@@ -6771,27 +6791,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
 	*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
 }
 
-static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
-{
-	u64 delta_tsc_l1;
-	u32 preempt_val_l1, preempt_val_l2, preempt_scale;
-
-	if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control &
-			PIN_BASED_VMX_PREEMPTION_TIMER))
-		return;
-	preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
-			MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
-	preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
-	delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc())
-		- vcpu->arch.last_guest_tsc;
-	preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
-	if (preempt_val_l2 <= preempt_val_l1)
-		preempt_val_l2 = 0;
-	else
-		preempt_val_l2 -= preempt_val_l1;
-	vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
-}
-
 /*
  * The guest has exited.  See if we can fix it or if we need userspace
  * assistance.
@@ -7210,8 +7209,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	atomic_switch_perf_msrs(vmx);
 	debugctlmsr = get_debugctlmsr();
 
-	if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending)
-		nested_adjust_preemption_timer(vcpu);
 	vmx->__launched = vmx->loaded_vmcs->launched;
 	asm(
 		/* Store host registers */
@@ -7608,6 +7605,28 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
 		kvm_inject_page_fault(vcpu, fault);
 }
 
+static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
+{
+	u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (vcpu->arch.virtual_tsc_khz == 0)
+		return;
+
+	/* Make sure short timeouts reliably trigger an immediate vmexit.
+	 * hrtimer_start does not guarantee this. */
+	if (preemption_timeout <= 1) {
+		vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
+		return;
+	}
+
+	preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
+	preemption_timeout *= 1000000;
+	do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
+	hrtimer_start(&vmx->nested.preemption_timer,
+		      ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
+}
+
 /*
  * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
  * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@ -7621,7 +7640,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 exec_control;
-	u32 exit_control;
 
 	vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
 	vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -7679,13 +7697,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
 	vmcs_write64(VMCS_LINK_POINTER, -1ull);
 
-	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
-		(vmcs_config.pin_based_exec_ctrl |
-		 vmcs12->pin_based_vm_exec_control));
+	exec_control = vmcs12->pin_based_vm_exec_control;
+	exec_control |= vmcs_config.pin_based_exec_ctrl;
+	exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
 
-	if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
-		vmcs_write32(VMX_PREEMPTION_TIMER_VALUE,
-			     vmcs12->vmx_preemption_timer_value);
+	vmx->nested.preemption_timer_expired = false;
+	if (nested_cpu_has_preemption_timer(vmcs12))
+		vmx_start_preemption_timer(vcpu);
 
 	/*
 	 * Whether page-faults are trapped is determined by a combination of
@@ -7713,7 +7732,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 		enable_ept ? vmcs12->page_fault_error_code_match : 0);
 
 	if (cpu_has_secondary_exec_ctrls()) {
-		u32 exec_control = vmx_secondary_exec_control(vmx);
+		exec_control = vmx_secondary_exec_control(vmx);
 		if (!vmx->rdtscp_enabled)
 			exec_control &= ~SECONDARY_EXEC_RDTSCP;
 		/* Take the following fields only from vmcs12 */
@@ -7800,10 +7819,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
 	 * bits are further modified by vmx_set_efer() below.
 	 */
-	exit_control = vmcs_config.vmexit_ctrl;
-	if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
-		exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
-	vm_exit_controls_init(vmx, exit_control);
+	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
 
 	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
 	 * emulated by vmx_set_efer(), below.
@@ -8151,6 +8167,14 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
+	    vmx->nested.preemption_timer_expired) {
+		if (vmx->nested.nested_run_pending)
+			return -EBUSY;
+		nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
+		return 0;
+	}
+
 	if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
 		if (vmx->nested.nested_run_pending)
 			return -EBUSY;
@@ -8176,6 +8200,20 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
 	return 0;
 }
 
+static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
+{
+	ktime_t remaining =
+		hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
+	u64 value;
+
+	if (ktime_to_ns(remaining) <= 0)
+		return 0;
+
+	value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
+	do_div(value, 1000000);
+	return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
+}
+
 /*
  * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
  * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
@@ -8246,10 +8284,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	else
 		vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
 
-	if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) &&
-	    (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
-		vmcs12->vmx_preemption_timer_value =
-			vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
+	if (nested_cpu_has_preemption_timer(vmcs12)) {
+		if (vmcs12->vm_exit_controls &
+		    VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
+			vmcs12->vmx_preemption_timer_value =
+				vmx_get_preemption_timer_value(vcpu);
+		hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
+	}
 
 	/*
 	 * In some cases (usually, nested EPT), L2 is allowed to change its
-- 
1.8.1.1.298.ge7eed54


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v3 3/3] KVM: nVMX: Do not inject NMI vmexits when L2 has a pending interrupt
  2014-03-06 17:33 [PATCH v3 0/3] KVM: x86: Pending nVMX fixes Jan Kiszka
  2014-03-06 17:33 ` [PATCH v3 1/3] KVM: nVMX: Rework interception of IRQs and NMIs Jan Kiszka
  2014-03-06 17:33 ` [PATCH v3 2/3] KVM: nVMX: Fully emulate preemption timer Jan Kiszka
@ 2014-03-06 17:33 ` Jan Kiszka
  2014-03-06 17:41 ` [PATCH v3 0/3] KVM: x86: Pending nVMX fixes Paolo Bonzini
  3 siblings, 0 replies; 15+ messages in thread
From: Jan Kiszka @ 2014-03-06 17:33 UTC (permalink / raw)
  To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm

According to SDM 27.2.3, IDT vectoring information will not be valid on
vmexits caused by external NMIs. So we have to avoid creating such
scenarios by delaying EXIT_REASON_EXCEPTION_NMI injection as long as we
have a pending interrupt because that one would be migrated to L1's IDT
vectoring info on nested exit.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/kvm/vmx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e559675..2c9d21e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8176,7 +8176,8 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
 	}
 
 	if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
-		if (vmx->nested.nested_run_pending)
+		if (vmx->nested.nested_run_pending ||
+		    vcpu->arch.interrupt.pending)
 			return -EBUSY;
 		nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
 				  NMI_VECTOR | INTR_TYPE_NMI_INTR |
-- 
1.8.1.1.298.ge7eed54


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 0/3] KVM: x86: Pending nVMX fixes
  2014-03-06 17:33 [PATCH v3 0/3] KVM: x86: Pending nVMX fixes Jan Kiszka
                   ` (2 preceding siblings ...)
  2014-03-06 17:33 ` [PATCH v3 3/3] KVM: nVMX: Do not inject NMI vmexits when L2 has a pending interrupt Jan Kiszka
@ 2014-03-06 17:41 ` Paolo Bonzini
  3 siblings, 0 replies; 15+ messages in thread
From: Paolo Bonzini @ 2014-03-06 17:41 UTC (permalink / raw)
  To: Jan Kiszka, Gleb Natapov, Marcelo Tosatti; +Cc: kvm

Il 06/03/2014 18:33, Jan Kiszka ha scritto:
> As I noticed a rebase conflict of these pending patches and I wanted to
> remind the fact that their are still pending ;), a quick update round.
> No functional changes since v2.

I wanted to look at these next week indeed.

Paolo

> Jan
>
> Jan Kiszka (3):
>   KVM: nVMX: Rework interception of IRQs and NMIs
>   KVM: nVMX: Fully emulate preemption timer
>   KVM: nVMX: Do not inject NMI vmexits when L2 has a pending interrupt


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 1/3] KVM: nVMX: Rework interception of IRQs and NMIs
  2014-03-06 17:33 ` [PATCH v3 1/3] KVM: nVMX: Rework interception of IRQs and NMIs Jan Kiszka
@ 2014-03-07 15:44   ` Paolo Bonzini
  2014-03-07 16:29     ` Jan Kiszka
  0 siblings, 1 reply; 15+ messages in thread
From: Paolo Bonzini @ 2014-03-07 15:44 UTC (permalink / raw)
  To: Jan Kiszka, Gleb Natapov, Marcelo Tosatti; +Cc: kvm

Il 06/03/2014 18:33, Jan Kiszka ha scritto:
> Move the check for leaving L2 on pending and intercepted IRQs or NMIs
> from the *_allowed handler into a dedicated callback. Invoke this
> callback at the relevant points before KVM checks if IRQs/NMIs can be
> injected. The callback has the task to switch from L2 to L1 if needed
> and inject the proper vmexit events.
> 
> The rework fixes L2 wakeups from HLT and provides the foundation for
> preemption timer emulation.
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  2 ++
>  arch/x86/kvm/vmx.c              | 67 +++++++++++++++++++++++------------------
>  arch/x86/kvm/x86.c              | 15 +++++++--
>  3 files changed, 53 insertions(+), 31 deletions(-)

With this patch do we still need

        if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
                /*
                 * We get here if vmx_interrupt_allowed() said we can't
                 * inject to L1 now because L2 must run. The caller will have
                 * to make L2 exit right after entry, so we can inject to L1
                 * more promptly.
                 */
                return -EBUSY;

in enable_irq_window?  If not, enable_nmi_window and enable_irq_window
can both return void.

And perhaps, vmx_check_nested_events could use the preemption timer trick
from Jailhouse instead of smp_send_reschedule.

Paolo


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 1/3] KVM: nVMX: Rework interception of IRQs and NMIs
  2014-03-07 15:44   ` Paolo Bonzini
@ 2014-03-07 16:29     ` Jan Kiszka
  2014-03-07 16:46       ` Paolo Bonzini
  0 siblings, 1 reply; 15+ messages in thread
From: Jan Kiszka @ 2014-03-07 16:29 UTC (permalink / raw)
  To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm

On 2014-03-07 16:44, Paolo Bonzini wrote:
> Il 06/03/2014 18:33, Jan Kiszka ha scritto:
>> Move the check for leaving L2 on pending and intercepted IRQs or NMIs
>> from the *_allowed handler into a dedicated callback. Invoke this
>> callback at the relevant points before KVM checks if IRQs/NMIs can be
>> injected. The callback has the task to switch from L2 to L1 if needed
>> and inject the proper vmexit events.
>>
>> The rework fixes L2 wakeups from HLT and provides the foundation for
>> preemption timer emulation.
>>
>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>> ---
>>  arch/x86/include/asm/kvm_host.h |  2 ++
>>  arch/x86/kvm/vmx.c              | 67 +++++++++++++++++++++++------------------
>>  arch/x86/kvm/x86.c              | 15 +++++++--
>>  3 files changed, 53 insertions(+), 31 deletions(-)
> 
> With this patch do we still need
> 
>         if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
>                 /*
>                  * We get here if vmx_interrupt_allowed() said we can't
>                  * inject to L1 now because L2 must run. The caller will have
>                  * to make L2 exit right after entry, so we can inject to L1
>                  * more promptly.
>                  */
>                 return -EBUSY;
> 
> in enable_irq_window?  If not, enable_nmi_window and enable_irq_window
> can both return void.

I don't see right now why this should have changed. We still cannot
interrupt vmlaunch/vmresume.

> 
> And perhaps, vmx_check_nested_events could use the preemption timer trick
> from Jailhouse instead of smp_send_reschedule.

That would exclude CPUs without preemption timer support...

Jan

-- 
Siemens AG, Corporate Technology, CT RTC ITP SES-DE
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 1/3] KVM: nVMX: Rework interception of IRQs and NMIs
  2014-03-07 16:29     ` Jan Kiszka
@ 2014-03-07 16:46       ` Paolo Bonzini
  2014-03-07 17:28         ` Jan Kiszka
  0 siblings, 1 reply; 15+ messages in thread
From: Paolo Bonzini @ 2014-03-07 16:46 UTC (permalink / raw)
  To: Jan Kiszka, Gleb Natapov, Marcelo Tosatti; +Cc: kvm

Il 07/03/2014 17:29, Jan Kiszka ha scritto:
> On 2014-03-07 16:44, Paolo Bonzini wrote:
>> With this patch do we still need
>>
>>         if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
>>                 /*
>>                  * We get here if vmx_interrupt_allowed() said we can't
>>                  * inject to L1 now because L2 must run. The caller will have
>>                  * to make L2 exit right after entry, so we can inject to L1
>>                  * more promptly.
>>                  */
>>                 return -EBUSY;
>>
>> in enable_irq_window?  If not, enable_nmi_window and enable_irq_window
>> can both return void.
> 
> I don't see right now why this should have changed. We still cannot
> interrupt vmlaunch/vmresume.

But then shouldn't the ame be true for enable_nmi_window?  It doesn't
check is_guest_mode(vcpu) && nested_exit_on_nmi(vcpu).

Since check_nested_events has already returned -EBUSY, perhaps the
following:

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fda1028..df320e9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4522,15 +4522,6 @@ static int enable_irq_window(struct kvm_vcpu *vcpu)
 {
 	u32 cpu_based_vm_exec_control;
 
-	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
-		/*
-		 * We get here if vmx_interrupt_allowed() said we can't
-		 * inject to L1 now because L2 must run. The caller will have
-		 * to make L2 exit right after entry, so we can inject to L1
-		 * more promptly.
-		 */
-		return -EBUSY;
-
 	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
 	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a03d611..83c2df5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5970,13 +5970,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 		inject_pending_event(vcpu);
 
-		if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
-			req_immediate_exit |=
-				kvm_x86_ops->check_nested_events(vcpu,
-							req_int_win) != 0;
+		if (is_guest_mode(vcpu) &&
+		    kvm_x86_ops->check_nested_events &&
+		    kvm_x86_ops->check_nested_events(vcpu, req_int_win) != 0)
+			req_immediate_exit = true;
 
 		/* enable NMI/IRQ window open exits if needed */
-		if (vcpu->arch.nmi_pending)
+		else if (vcpu->arch.nmi_pending)
 			req_immediate_exit |=
 				kvm_x86_ops->enable_nmi_window(vcpu) != 0;
                else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/3] KVM: nVMX: Fully emulate preemption timer
  2014-03-06 17:33 ` [PATCH v3 2/3] KVM: nVMX: Fully emulate preemption timer Jan Kiszka
@ 2014-03-07 17:20   ` Marcelo Tosatti
  2014-03-07 17:24     ` Paolo Bonzini
  0 siblings, 1 reply; 15+ messages in thread
From: Marcelo Tosatti @ 2014-03-07 17:20 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Paolo Bonzini, Gleb Natapov, kvm

On Thu, Mar 06, 2014 at 06:33:58PM +0100, Jan Kiszka wrote:
> We cannot rely on the hardware-provided preemption timer support because
> we are holding L2 in HLT outside non-root mode. 

> Furthermore, emulating
> the preemption will resolve tick rate errata on older Intel CPUs.

Can you describe this in more detail please? Errata number?


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/3] KVM: nVMX: Fully emulate preemption timer
  2014-03-07 17:20   ` Marcelo Tosatti
@ 2014-03-07 17:24     ` Paolo Bonzini
  0 siblings, 0 replies; 15+ messages in thread
From: Paolo Bonzini @ 2014-03-07 17:24 UTC (permalink / raw)
  To: Marcelo Tosatti, Jan Kiszka; +Cc: Gleb Natapov, kvm

Il 07/03/2014 18:20, Marcelo Tosatti ha scritto:
> On Thu, Mar 06, 2014 at 06:33:58PM +0100, Jan Kiszka wrote:
>> > We cannot rely on the hardware-provided preemption timer support because
>> > we are holding L2 in HLT outside non-root mode.
>> > Furthermore, emulating
>> > the preemption will resolve tick rate errata on older Intel CPUs.
> Can you describe this in more detail please? Errata number?

AAT59 and AAK139.  Basically the rate of the preemption timer is wrong 
and unpredictable.

http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/xeon-5500-specification-update.pdf
http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/core-mobile-spec-update.pdf

Paolo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 1/3] KVM: nVMX: Rework interception of IRQs and NMIs
  2014-03-07 16:46       ` Paolo Bonzini
@ 2014-03-07 17:28         ` Jan Kiszka
  2014-03-07 18:19           ` Jan Kiszka
  0 siblings, 1 reply; 15+ messages in thread
From: Jan Kiszka @ 2014-03-07 17:28 UTC (permalink / raw)
  To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm

On 2014-03-07 17:46, Paolo Bonzini wrote:
> Il 07/03/2014 17:29, Jan Kiszka ha scritto:
>> On 2014-03-07 16:44, Paolo Bonzini wrote:
>>> With this patch do we still need
>>>
>>>         if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
>>>                 /*
>>>                  * We get here if vmx_interrupt_allowed() said we can't
>>>                  * inject to L1 now because L2 must run. The caller will have
>>>                  * to make L2 exit right after entry, so we can inject to L1
>>>                  * more promptly.
>>>                  */
>>>                 return -EBUSY;
>>>
>>> in enable_irq_window?  If not, enable_nmi_window and enable_irq_window
>>> can both return void.
>>
>> I don't see right now why this should have changed. We still cannot
>> interrupt vmlaunch/vmresume.
> 
> But then shouldn't the ame be true for enable_nmi_window?  It doesn't
> check is_guest_mode(vcpu) && nested_exit_on_nmi(vcpu).

Yes, that seems wrong now. But I need to think this through again, why
we may have excluded NMIs from this test so far.

> 
> Since check_nested_events has already returned -EBUSY, perhaps the
> following:
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index fda1028..df320e9 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -4522,15 +4522,6 @@ static int enable_irq_window(struct kvm_vcpu *vcpu)
>  {
>  	u32 cpu_based_vm_exec_control;
>  
> -	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
> -		/*
> -		 * We get here if vmx_interrupt_allowed() said we can't
> -		 * inject to L1 now because L2 must run. The caller will have
> -		 * to make L2 exit right after entry, so we can inject to L1
> -		 * more promptly.
> -		 */
> -		return -EBUSY;
> -
>  	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
>  	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
>  	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index a03d611..83c2df5 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -5970,13 +5970,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  
>  		inject_pending_event(vcpu);
>  
> -		if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
> -			req_immediate_exit |=
> -				kvm_x86_ops->check_nested_events(vcpu,
> -							req_int_win) != 0;
> +		if (is_guest_mode(vcpu) &&
> +		    kvm_x86_ops->check_nested_events &&
> +		    kvm_x86_ops->check_nested_events(vcpu, req_int_win) != 0)
> +			req_immediate_exit = true;
>  
>  		/* enable NMI/IRQ window open exits if needed */
> -		if (vcpu->arch.nmi_pending)
> +		else if (vcpu->arch.nmi_pending)
>  			req_immediate_exit |=
>  				kvm_x86_ops->enable_nmi_window(vcpu) != 0;
>                 else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
> 

Hmm, looks reasonable.

Jan

-- 
Siemens AG, Corporate Technology, CT RTC ITP SES-DE
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 1/3] KVM: nVMX: Rework interception of IRQs and NMIs
  2014-03-07 17:28         ` Jan Kiszka
@ 2014-03-07 18:19           ` Jan Kiszka
  2014-03-07 18:24             ` Paolo Bonzini
  2014-03-07 18:26             ` Jan Kiszka
  0 siblings, 2 replies; 15+ messages in thread
From: Jan Kiszka @ 2014-03-07 18:19 UTC (permalink / raw)
  To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm

On 2014-03-07 18:28, Jan Kiszka wrote:
> On 2014-03-07 17:46, Paolo Bonzini wrote:
>> Il 07/03/2014 17:29, Jan Kiszka ha scritto:
>>> On 2014-03-07 16:44, Paolo Bonzini wrote:
>>>> With this patch do we still need
>>>>
>>>>         if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
>>>>                 /*
>>>>                  * We get here if vmx_interrupt_allowed() said we can't
>>>>                  * inject to L1 now because L2 must run. The caller will have
>>>>                  * to make L2 exit right after entry, so we can inject to L1
>>>>                  * more promptly.
>>>>                  */
>>>>                 return -EBUSY;
>>>>
>>>> in enable_irq_window?  If not, enable_nmi_window and enable_irq_window
>>>> can both return void.
>>>
>>> I don't see right now why this should have changed. We still cannot
>>> interrupt vmlaunch/vmresume.
>>
>> But then shouldn't the ame be true for enable_nmi_window?  It doesn't
>> check is_guest_mode(vcpu) && nested_exit_on_nmi(vcpu).
> 
> Yes, that seems wrong now. But I need to think this through again, why
> we may have excluded NMIs from this test so far.
> 
>>
>> Since check_nested_events has already returned -EBUSY, perhaps the
>> following:
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index fda1028..df320e9 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -4522,15 +4522,6 @@ static int enable_irq_window(struct kvm_vcpu *vcpu)
>>  {
>>  	u32 cpu_based_vm_exec_control;
>>  
>> -	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
>> -		/*
>> -		 * We get here if vmx_interrupt_allowed() said we can't
>> -		 * inject to L1 now because L2 must run. The caller will have
>> -		 * to make L2 exit right after entry, so we can inject to L1
>> -		 * more promptly.
>> -		 */
>> -		return -EBUSY;
>> -
>>  	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
>>  	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
>>  	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index a03d611..83c2df5 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -5970,13 +5970,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>>  
>>  		inject_pending_event(vcpu);
>>  
>> -		if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
>> -			req_immediate_exit |=
>> -				kvm_x86_ops->check_nested_events(vcpu,
>> -							req_int_win) != 0;
>> +		if (is_guest_mode(vcpu) &&
>> +		    kvm_x86_ops->check_nested_events &&
>> +		    kvm_x86_ops->check_nested_events(vcpu, req_int_win) != 0)
>> +			req_immediate_exit = true;
>>  
>>  		/* enable NMI/IRQ window open exits if needed */
>> -		if (vcpu->arch.nmi_pending)
>> +		else if (vcpu->arch.nmi_pending)
>>  			req_immediate_exit |=
>>  				kvm_x86_ops->enable_nmi_window(vcpu) != 0;
>>                 else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
>>
> 
> Hmm, looks reasonable.

Also on second thought. I can give this hunk some test cycles here, just
in case.

Reading through my code again, I'm now wondering why I added
check_nested_events to both inject_pending_event and vcpu_enter_guest.
The former seems redundant, only vcpu_enter_guest calls
inject_pending_event. I guess I forgot a cleanup here.

I can fold in your changes when I resend for the other cleanup.

Jan

-- 
Siemens AG, Corporate Technology, CT RTC ITP SES-DE
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 1/3] KVM: nVMX: Rework interception of IRQs and NMIs
  2014-03-07 18:19           ` Jan Kiszka
@ 2014-03-07 18:24             ` Paolo Bonzini
  2014-03-07 18:26             ` Jan Kiszka
  1 sibling, 0 replies; 15+ messages in thread
From: Paolo Bonzini @ 2014-03-07 18:24 UTC (permalink / raw)
  To: Jan Kiszka, Gleb Natapov, Marcelo Tosatti; +Cc: kvm

Il 07/03/2014 19:19, Jan Kiszka ha scritto:
> On 2014-03-07 18:28, Jan Kiszka wrote:
>> On 2014-03-07 17:46, Paolo Bonzini wrote:
>>> Il 07/03/2014 17:29, Jan Kiszka ha scritto:
>>>> On 2014-03-07 16:44, Paolo Bonzini wrote:
>>>>> With this patch do we still need
>>>>>
>>>>>         if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
>>>>>                 /*
>>>>>                  * We get here if vmx_interrupt_allowed() said we can't
>>>>>                  * inject to L1 now because L2 must run. The caller will have
>>>>>                  * to make L2 exit right after entry, so we can inject to L1
>>>>>                  * more promptly.
>>>>>                  */
>>>>>                 return -EBUSY;
>>>>>
>>>>> in enable_irq_window?  If not, enable_nmi_window and enable_irq_window
>>>>> can both return void.
>>>>
>>>> I don't see right now why this should have changed. We still cannot
>>>> interrupt vmlaunch/vmresume.
>>>
>>> But then shouldn't the ame be true for enable_nmi_window?  It doesn't
>>> check is_guest_mode(vcpu) && nested_exit_on_nmi(vcpu).
>>
>> Yes, that seems wrong now. But I need to think this through again, why
>> we may have excluded NMIs from this test so far.
>>
>>>
>>> Since check_nested_events has already returned -EBUSY, perhaps the
>>> following:
>>>
>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>> index fda1028..df320e9 100644
>>> --- a/arch/x86/kvm/vmx.c
>>> +++ b/arch/x86/kvm/vmx.c
>>> @@ -4522,15 +4522,6 @@ static int enable_irq_window(struct kvm_vcpu *vcpu)
>>>  {
>>>  	u32 cpu_based_vm_exec_control;
>>>
>>> -	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
>>> -		/*
>>> -		 * We get here if vmx_interrupt_allowed() said we can't
>>> -		 * inject to L1 now because L2 must run. The caller will have
>>> -		 * to make L2 exit right after entry, so we can inject to L1
>>> -		 * more promptly.
>>> -		 */
>>> -		return -EBUSY;
>>> -
>>>  	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
>>>  	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
>>>  	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> index a03d611..83c2df5 100644
>>> --- a/arch/x86/kvm/x86.c
>>> +++ b/arch/x86/kvm/x86.c
>>> @@ -5970,13 +5970,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>>>
>>>  		inject_pending_event(vcpu);
>>>
>>> -		if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
>>> -			req_immediate_exit |=
>>> -				kvm_x86_ops->check_nested_events(vcpu,
>>> -							req_int_win) != 0;
>>> +		if (is_guest_mode(vcpu) &&
>>> +		    kvm_x86_ops->check_nested_events &&
>>> +		    kvm_x86_ops->check_nested_events(vcpu, req_int_win) != 0)
>>> +			req_immediate_exit = true;
>>>
>>>  		/* enable NMI/IRQ window open exits if needed */
>>> -		if (vcpu->arch.nmi_pending)
>>> +		else if (vcpu->arch.nmi_pending)
>>>  			req_immediate_exit |=
>>>  				kvm_x86_ops->enable_nmi_window(vcpu) != 0;
>>>                 else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
>>>
>>
>> Hmm, looks reasonable.
>
> Also on second thought. I can give this hunk some test cycles here, just
> in case.

Thanks.

> Reading through my code again, I'm now wondering why I added
> check_nested_events to both inject_pending_event and vcpu_enter_guest.
> The former seems redundant, only vcpu_enter_guest calls
> inject_pending_event. I guess I forgot a cleanup here.
>
> I can fold in your changes when I resend for the other cleanup.

As you prefer, I can also post it as a separate patch (my changes above 
do not have the int->void change).

I had pushed your patches already to kvm/queue.  You can post v4 
relative to kvm/next.

Paolo


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 1/3] KVM: nVMX: Rework interception of IRQs and NMIs
  2014-03-07 18:19           ` Jan Kiszka
  2014-03-07 18:24             ` Paolo Bonzini
@ 2014-03-07 18:26             ` Jan Kiszka
  2014-03-07 19:45               ` Paolo Bonzini
  1 sibling, 1 reply; 15+ messages in thread
From: Jan Kiszka @ 2014-03-07 18:26 UTC (permalink / raw)
  To: Paolo Bonzini, Gleb Natapov, Marcelo Tosatti; +Cc: kvm

On 2014-03-07 19:19, Jan Kiszka wrote:
> On 2014-03-07 18:28, Jan Kiszka wrote:
>> On 2014-03-07 17:46, Paolo Bonzini wrote:
>>> Il 07/03/2014 17:29, Jan Kiszka ha scritto:
>>>> On 2014-03-07 16:44, Paolo Bonzini wrote:
>>>>> With this patch do we still need
>>>>>
>>>>>         if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
>>>>>                 /*
>>>>>                  * We get here if vmx_interrupt_allowed() said we can't
>>>>>                  * inject to L1 now because L2 must run. The caller will have
>>>>>                  * to make L2 exit right after entry, so we can inject to L1
>>>>>                  * more promptly.
>>>>>                  */
>>>>>                 return -EBUSY;
>>>>>
>>>>> in enable_irq_window?  If not, enable_nmi_window and enable_irq_window
>>>>> can both return void.
>>>>
>>>> I don't see right now why this should have changed. We still cannot
>>>> interrupt vmlaunch/vmresume.
>>>
>>> But then shouldn't the ame be true for enable_nmi_window?  It doesn't
>>> check is_guest_mode(vcpu) && nested_exit_on_nmi(vcpu).
>>
>> Yes, that seems wrong now. But I need to think this through again, why
>> we may have excluded NMIs from this test so far.
>>
>>>
>>> Since check_nested_events has already returned -EBUSY, perhaps the
>>> following:
>>>
>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>> index fda1028..df320e9 100644
>>> --- a/arch/x86/kvm/vmx.c
>>> +++ b/arch/x86/kvm/vmx.c
>>> @@ -4522,15 +4522,6 @@ static int enable_irq_window(struct kvm_vcpu *vcpu)
>>>  {
>>>  	u32 cpu_based_vm_exec_control;
>>>  
>>> -	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
>>> -		/*
>>> -		 * We get here if vmx_interrupt_allowed() said we can't
>>> -		 * inject to L1 now because L2 must run. The caller will have
>>> -		 * to make L2 exit right after entry, so we can inject to L1
>>> -		 * more promptly.
>>> -		 */
>>> -		return -EBUSY;
>>> -
>>>  	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
>>>  	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
>>>  	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> index a03d611..83c2df5 100644
>>> --- a/arch/x86/kvm/x86.c
>>> +++ b/arch/x86/kvm/x86.c
>>> @@ -5970,13 +5970,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>>>  
>>>  		inject_pending_event(vcpu);
>>>  
>>> -		if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
>>> -			req_immediate_exit |=
>>> -				kvm_x86_ops->check_nested_events(vcpu,
>>> -							req_int_win) != 0;
>>> +		if (is_guest_mode(vcpu) &&
>>> +		    kvm_x86_ops->check_nested_events &&
>>> +		    kvm_x86_ops->check_nested_events(vcpu, req_int_win) != 0)
>>> +			req_immediate_exit = true;
>>>  
>>>  		/* enable NMI/IRQ window open exits if needed */
>>> -		if (vcpu->arch.nmi_pending)
>>> +		else if (vcpu->arch.nmi_pending)
>>>  			req_immediate_exit |=
>>>  				kvm_x86_ops->enable_nmi_window(vcpu) != 0;
>>>                 else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
>>>
>>
>> Hmm, looks reasonable.
> 
> Also on second thought. I can give this hunk some test cycles here, just
> in case.
> 
> Reading through my code again, I'm now wondering why I added
> check_nested_events to both inject_pending_event and vcpu_enter_guest.
> The former seems redundant, only vcpu_enter_guest calls
> inject_pending_event. I guess I forgot a cleanup here.

Nah, it's not redundant, we need to check for potential L2->L2 switches
*before* trying deliver events to L2. But think I can (and probably
should) get rid of the second test in vcpu_enter_guest.

Jan

> 
> I can fold in your changes when I resend for the other cleanup.
> 
> Jan
> 

-- 
Siemens AG, Corporate Technology, CT RTC ITP SES-DE
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 1/3] KVM: nVMX: Rework interception of IRQs and NMIs
  2014-03-07 18:26             ` Jan Kiszka
@ 2014-03-07 19:45               ` Paolo Bonzini
  0 siblings, 0 replies; 15+ messages in thread
From: Paolo Bonzini @ 2014-03-07 19:45 UTC (permalink / raw)
  To: Jan Kiszka, Gleb Natapov, Marcelo Tosatti; +Cc: kvm

Il 07/03/2014 19:26, Jan Kiszka ha scritto:
>> > Reading through my code again, I'm now wondering why I added
>> > check_nested_events to both inject_pending_event and vcpu_enter_guest.
>> > The former seems redundant, only vcpu_enter_guest calls
>> > inject_pending_event. I guess I forgot a cleanup here.
> Nah, it's not redundant, we need to check for potential L2->L2 switches
> *before* trying deliver events to L2.

Yeah, and after the call in inject_pending_event you can similarly have 
an "else if" since vmx_nmi/interrupt_allowed will check 
nested_run_pending and never return true.

> But think I can (and probably
> should) get rid of the second test in vcpu_enter_guest.

If so inject_pending_event would return true to request an immediate 
exit, right?

Paolo

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2014-03-07 19:45 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-03-06 17:33 [PATCH v3 0/3] KVM: x86: Pending nVMX fixes Jan Kiszka
2014-03-06 17:33 ` [PATCH v3 1/3] KVM: nVMX: Rework interception of IRQs and NMIs Jan Kiszka
2014-03-07 15:44   ` Paolo Bonzini
2014-03-07 16:29     ` Jan Kiszka
2014-03-07 16:46       ` Paolo Bonzini
2014-03-07 17:28         ` Jan Kiszka
2014-03-07 18:19           ` Jan Kiszka
2014-03-07 18:24             ` Paolo Bonzini
2014-03-07 18:26             ` Jan Kiszka
2014-03-07 19:45               ` Paolo Bonzini
2014-03-06 17:33 ` [PATCH v3 2/3] KVM: nVMX: Fully emulate preemption timer Jan Kiszka
2014-03-07 17:20   ` Marcelo Tosatti
2014-03-07 17:24     ` Paolo Bonzini
2014-03-06 17:33 ` [PATCH v3 3/3] KVM: nVMX: Do not inject NMI vmexits when L2 has a pending interrupt Jan Kiszka
2014-03-06 17:41 ` [PATCH v3 0/3] KVM: x86: Pending nVMX fixes Paolo Bonzini

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).