[PATCH 1/9] VMX: include all IRQ window exits in statistics

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/9] VMX: include all IRQ window exits in statistics
  2008-09-19 12:06 [PATCH 0/9] Enhance NMI support of KVM - v2 Jan Kiszka
@ 2008-09-19 11:59 ` Jan Kiszka
  2008-09-19 12:01 ` [PATCH 2/9] VMX: refactor/fix IRQ and NMI injectability determination Jan Kiszka
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Jan Kiszka @ 2008-09-19 11:59 UTC (permalink / raw)
  To: kvm-devel; +Cc: Yang, Sheng, Avi Kivity

irq_window_exits only tracks IRQ window exits due to user space
requests, nmi_window_exits include all exits. The latter makes more
sense, so let's adjust irq_window_exits accounting.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/kvm/vmx.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Index: b/arch/x86/kvm/vmx.c
===================================================================
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2763,6 +2763,7 @@ static int handle_interrupt_window(struc
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 
 	KVMTRACE_0D(PEND_INTR, vcpu, handler);
+	++vcpu->stat.irq_window_exits;
 
 	/*
 	 * If the user space waits to inject interrupts, exit as soon as
@@ -2771,7 +2772,6 @@ static int handle_interrupt_window(struc
 	if (kvm_run->request_interrupt_window &&
 	    !vcpu->arch.irq_summary) {
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
-		++vcpu->stat.irq_window_exits;
 		return 0;
 	}
 	return 1;


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 2/9] VMX: refactor/fix IRQ and NMI injectability determination
  2008-09-19 12:06 [PATCH 0/9] Enhance NMI support of KVM - v2 Jan Kiszka
  2008-09-19 11:59 ` [PATCH 1/9] VMX: include all IRQ window exits in statistics Jan Kiszka
@ 2008-09-19 12:01 ` Jan Kiszka
  2008-09-19 12:01 ` [PATCH 3/9] VMX: refactor IRQ and NMI window enabling Jan Kiszka
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Jan Kiszka @ 2008-09-19 12:01 UTC (permalink / raw)
  To: kvm-devel; +Cc: Yang, Sheng, Avi Kivity

There are currently two ways in VMX to check if an IRQ or NMI can be
injected:
 - vmx_{nmi|irq}_enabled and
 - vcpu.arch.{nmi|interrupt}_window_open.
Even worse, one test (at the end of vmx_vcpu_run) uses an inconsistent,
likely incorrect logic.

This patch consolidates and unifies the tests over
{nmi|interrupt}_window_open as cache + vmx_update_window_states
for updating the cache content.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/kvm/vmx.c         |   46 ++++++++++++++++++++-------------------------
 include/asm-x86/kvm_host.h |    1 
 2 files changed, 22 insertions(+), 25 deletions(-)

Index: b/arch/x86/kvm/vmx.c
===================================================================
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2358,6 +2358,21 @@ static void vmx_inject_nmi(struct kvm_vc
 			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
 }
 
+static void vmx_update_window_states(struct kvm_vcpu *vcpu)
+{
+	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+
+	vcpu->arch.nmi_window_open =
+		!(guest_intr & (GUEST_INTR_STATE_STI |
+				GUEST_INTR_STATE_MOV_SS |
+				GUEST_INTR_STATE_NMI));
+
+	vcpu->arch.interrupt_window_open =
+		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+		 !(guest_intr & (GUEST_INTR_STATE_STI |
+				 GUEST_INTR_STATE_MOV_SS)));
+}
+
 static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 {
 	int word_index = __ffs(vcpu->arch.irq_summary);
@@ -2370,15 +2385,12 @@ static void kvm_do_inject_irq(struct kvm
 	kvm_queue_interrupt(vcpu, irq);
 }
 
-
 static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 				       struct kvm_run *kvm_run)
 {
 	u32 cpu_based_vm_exec_control;
 
-	vcpu->arch.interrupt_window_open =
-		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
-		 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
+	vmx_update_window_states(vcpu);
 
 	if (vcpu->arch.interrupt_window_open &&
 	    vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
@@ -3049,22 +3061,6 @@ static void enable_nmi_window(struct kvm
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 }
 
-static int vmx_nmi_enabled(struct kvm_vcpu *vcpu)
-{
-	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
-	return !(guest_intr & (GUEST_INTR_STATE_NMI |
-			       GUEST_INTR_STATE_MOV_SS |
-			       GUEST_INTR_STATE_STI));
-}
-
-static int vmx_irq_enabled(struct kvm_vcpu *vcpu)
-{
-	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
-	return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS |
-			       GUEST_INTR_STATE_STI)) &&
-		(vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
-}
-
 static void enable_intr_window(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->arch.nmi_pending)
@@ -3133,9 +3129,11 @@ static void vmx_intr_assist(struct kvm_v
 {
 	update_tpr_threshold(vcpu);
 
+	vmx_update_window_states(vcpu);
+
 	if (cpu_has_virtual_nmis()) {
 		if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-			if (vmx_nmi_enabled(vcpu)) {
+			if (vcpu->arch.nmi_window_open) {
 				vcpu->arch.nmi_pending = false;
 				vcpu->arch.nmi_injected = true;
 			} else {
@@ -3150,7 +3148,7 @@ static void vmx_intr_assist(struct kvm_v
 		}
 	}
 	if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
-		if (vmx_irq_enabled(vcpu))
+		if (vcpu->arch.interrupt_window_open)
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
 		else
 			enable_irq_window(vcpu);
@@ -3311,9 +3309,7 @@ static void vmx_vcpu_run(struct kvm_vcpu
 	if (vmx->rmode.irq.pending)
 		fixup_rmode_irq(vmx);
 
-	vcpu->arch.interrupt_window_open =
-		(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
-		 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
+	vmx_update_window_states(vcpu);
 
 	asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
 	vmx->launched = 1;
Index: b/include/asm-x86/kvm_host.h
===================================================================
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -321,6 +321,7 @@ struct kvm_vcpu_arch {
 
 	bool nmi_pending;
 	bool nmi_injected;
+	bool nmi_window_open;
 
 	u64 mtrr[0x100];
 };


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 3/9] VMX: refactor IRQ and NMI window enabling
  2008-09-19 12:06 [PATCH 0/9] Enhance NMI support of KVM - v2 Jan Kiszka
  2008-09-19 11:59 ` [PATCH 1/9] VMX: include all IRQ window exits in statistics Jan Kiszka
  2008-09-19 12:01 ` [PATCH 2/9] VMX: refactor/fix IRQ and NMI injectability determination Jan Kiszka
@ 2008-09-19 12:01 ` Jan Kiszka
  2008-09-19 12:02 ` [PATCH 4/9] VMX: fix real-mode NMI support Jan Kiszka
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Jan Kiszka @ 2008-09-19 12:01 UTC (permalink / raw)
  To: kvm-devel; +Cc: Yang, Sheng, Avi Kivity

do_interrupt_requests and vmx_intr_assist go different way for
achieving the same: enabling the nmi/irq window start notification.
Unify their code over enable_{irq|nmi}_window, get rid of a redundant
call to enable_intr_window instead of direct enable_nmi_window
invocation and unroll enable_intr_window for both in-kernel and user
space irq injection accordingly.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/kvm/vmx.c |   78 +++++++++++++++++++++--------------------------------
 1 file changed, 32 insertions(+), 46 deletions(-)

Index: b/arch/x86/kvm/vmx.c
===================================================================
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2385,30 +2385,42 @@ static void kvm_do_inject_irq(struct kvm
 	kvm_queue_interrupt(vcpu, irq);
 }
 
-static void do_interrupt_requests(struct kvm_vcpu *vcpu,
-				       struct kvm_run *kvm_run)
+static void enable_irq_window(struct kvm_vcpu *vcpu)
 {
 	u32 cpu_based_vm_exec_control;
 
-	vmx_update_window_states(vcpu);
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
 
-	if (vcpu->arch.interrupt_window_open &&
-	    vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
-		kvm_do_inject_irq(vcpu);
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
+{
+	u32 cpu_based_vm_exec_control;
 
-	if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending)
-		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+	if (!cpu_has_virtual_nmis())
+		return;
 
 	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
+static void do_interrupt_requests(struct kvm_vcpu *vcpu,
+				       struct kvm_run *kvm_run)
+{
+	vmx_update_window_states(vcpu);
+
+	if (vcpu->arch.interrupt_window_open) {
+		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
+			kvm_do_inject_irq(vcpu);
+
+		if (vcpu->arch.interrupt.pending)
+			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+	}
 	if (!vcpu->arch.interrupt_window_open &&
 	    (vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
-		/*
-		 * Interrupts blocked.  Wait for unblock.
-		 */
-		cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
-	else
-		cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+		enable_irq_window(vcpu);
 }
 
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -3040,35 +3052,6 @@ static void update_tpr_threshold(struct
 	vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4);
 }
 
-static void enable_irq_window(struct kvm_vcpu *vcpu)
-{
-	u32 cpu_based_vm_exec_control;
-
-	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
-static void enable_nmi_window(struct kvm_vcpu *vcpu)
-{
-	u32 cpu_based_vm_exec_control;
-
-	if (!cpu_has_virtual_nmis())
-		return;
-
-	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
-static void enable_intr_window(struct kvm_vcpu *vcpu)
-{
-	if (vcpu->arch.nmi_pending)
-		enable_nmi_window(vcpu);
-	else if (kvm_cpu_has_interrupt(vcpu))
-		enable_irq_window(vcpu);
-}
-
 static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 {
 	u32 exit_intr_info;
@@ -3137,13 +3120,16 @@ static void vmx_intr_assist(struct kvm_v
 				vcpu->arch.nmi_pending = false;
 				vcpu->arch.nmi_injected = true;
 			} else {
-				enable_intr_window(vcpu);
+				enable_nmi_window(vcpu);
 				return;
 			}
 		}
 		if (vcpu->arch.nmi_injected) {
 			vmx_inject_nmi(vcpu);
-			enable_intr_window(vcpu);
+			if (vcpu->arch.nmi_pending)
+				enable_nmi_window(vcpu);
+			else if (kvm_cpu_has_interrupt(vcpu))
+				enable_irq_window(vcpu);
 			return;
 		}
 	}


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 4/9] VMX: fix real-mode NMI support
  2008-09-19 12:06 [PATCH 0/9] Enhance NMI support of KVM - v2 Jan Kiszka
                   ` (2 preceding siblings ...)
  2008-09-19 12:01 ` [PATCH 3/9] VMX: refactor IRQ and NMI window enabling Jan Kiszka
@ 2008-09-19 12:02 ` Jan Kiszka
  2008-09-19 12:03 ` [PATCH 5/9] kvm-x86: Enable NMI Watchdog via in-kernel PIT source Jan Kiszka
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Jan Kiszka @ 2008-09-19 12:02 UTC (permalink / raw)
  To: kvm-devel; +Cc: Yang, Sheng, Avi Kivity

Fix NMI injection in real-mode with the same pattern we perform IRQ
injection.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/kvm/vmx.c |   13 +++++++++++++
 1 file changed, 13 insertions(+)

Index: b/arch/x86/kvm/vmx.c
===================================================================
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2354,6 +2354,19 @@ static void vmx_inject_irq(struct kvm_vc
 
 static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (vcpu->arch.rmode.active) {
+		vmx->rmode.irq.pending = true;
+		vmx->rmode.irq.vector = NMI_VECTOR;
+		vmx->rmode.irq.rip = kvm_rip_read(vcpu);
+		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+			     NMI_VECTOR | INTR_TYPE_SOFT_INTR |
+			     INTR_INFO_VALID_MASK);
+		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
+		kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
+		return;
+	}
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
 			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
 }


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 5/9] kvm-x86: Enable NMI Watchdog via in-kernel PIT source
  2008-09-19 12:06 [PATCH 0/9] Enhance NMI support of KVM - v2 Jan Kiszka
                   ` (3 preceding siblings ...)
  2008-09-19 12:02 ` [PATCH 4/9] VMX: fix real-mode NMI support Jan Kiszka
@ 2008-09-19 12:03 ` Jan Kiszka
  2008-09-19 16:55   ` Jan Kiszka
  2008-09-23  6:10   ` Yang, Sheng
  2008-09-19 12:03 ` [PATCH 6/9] kvm-x86: Support for user space injected NMIs Jan Kiszka
                   ` (4 subsequent siblings)
  9 siblings, 2 replies; 22+ messages in thread
From: Jan Kiszka @ 2008-09-19 12:03 UTC (permalink / raw)
  To: kvm-devel; +Cc: Yang, Sheng, Avi Kivity

LINT0 of the LAPIC can be used to route PIT events as NMI watchdog
ticks into the guest. This patch aligns the in-kernel irqchip emulation
with the user space irqchip with already supports this feature. The
trick is to route PIT interrupts to all LAPIC's LVT0 lines.

Rebased patch and slightly polished patch originally posted by Sheng
Yang.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/kvm/i8254.c |   15 +++++++++++++++
 arch/x86/kvm/irq.h   |    1 +
 arch/x86/kvm/lapic.c |   32 ++++++++++++++++++++++++++++----
 3 files changed, 44 insertions(+), 4 deletions(-)

Index: b/arch/x86/kvm/i8254.c
===================================================================
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -594,10 +594,25 @@ void kvm_free_pit(struct kvm *kvm)
 
 static void __inject_pit_timer_intr(struct kvm *kvm)
 {
+	struct kvm_vcpu *vcpu;
+	int i;
+
 	mutex_lock(&kvm->lock);
 	kvm_set_irq(kvm, 0, 1);
 	kvm_set_irq(kvm, 0, 0);
 	mutex_unlock(&kvm->lock);
+
+	/*
+	 * Provideds NMI watchdog support in IOAPIC mode.
+	 * The route is: PIT -> PIC -> LVT0 in NMI mode,
+	 * timer IRQs will continue to flow through the IOAPIC.
+	 */
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		vcpu = kvm->vcpus[i];
+		if (!vcpu)
+			continue;
+		kvm_apic_local_deliver(vcpu, APIC_LVT0);
+	}
 }
 
 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
Index: b/arch/x86/kvm/irq.h
===================================================================
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -93,6 +93,7 @@ void kvm_unregister_irq_ack_notifier(str
 void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
+int kvm_apic_local_deliver(struct kvm_vcpu *vcpu, int lvt_type);
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
Index: b/arch/x86/kvm/lapic.c
===================================================================
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -382,6 +382,14 @@ static int __apic_accept_irq(struct kvm_
 		}
 		break;
 
+	case APIC_DM_EXTINT:
+		/*
+		 * Should only be called by kvm_apic_local_deliver() with LVT0,
+		 * before NMI watchdog was enabled. Already handled by
+		 * kvm_apic_accept_pic_intr().
+		 */
+		break;
+
 	default:
 		printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
 		       delivery_mode);
@@ -749,6 +757,9 @@ static void apic_mmio_write(struct kvm_i
 	case APIC_LVTTHMR:
 	case APIC_LVTPC:
 	case APIC_LVT0:
+		if (val == APIC_DM_NMI)
+			apic_debug("Receive NMI setting on APIC_LVT0 "
+				"for cpu %d\n", apic->vcpu->vcpu_id);
 	case APIC_LVT1:
 	case APIC_LVTERR:
 		/* TODO: Check vector */
@@ -965,12 +976,25 @@ int apic_has_pending_timer(struct kvm_vc
 	return 0;
 }
 
-static int __inject_apic_timer_irq(struct kvm_lapic *apic)
+int kvm_apic_local_deliver(struct kvm_vcpu *vcpu, int lvt_type)
 {
-	int vector;
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	int vector, mode, trig_mode;
+	u32 reg;
+
+	if (apic && apic_enabled(apic)) {
+		reg = apic_get_reg(apic, lvt_type);
+		vector = reg & APIC_VECTOR_MASK;
+		mode = reg & APIC_MODE_MASK;
+		trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
+		return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
+	}
+	return 0;
+}
 
-	vector = apic_lvt_vector(apic, APIC_LVTT);
-	return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0);
+static inline int __inject_apic_timer_irq(struct kvm_lapic *apic)
+{
+	return kvm_apic_local_deliver(apic->vcpu, APIC_LVTT);
 }
 
 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 6/9] kvm-x86: Support for user space injected NMIs
  2008-09-19 12:06 [PATCH 0/9] Enhance NMI support of KVM - v2 Jan Kiszka
                   ` (4 preceding siblings ...)
  2008-09-19 12:03 ` [PATCH 5/9] kvm-x86: Enable NMI Watchdog via in-kernel PIT source Jan Kiszka
@ 2008-09-19 12:03 ` Jan Kiszka
  2008-09-19 12:03 ` [PATCH 7/9] VMX: Provide support " Jan Kiszka
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Jan Kiszka @ 2008-09-19 12:03 UTC (permalink / raw)
  To: kvm-devel; +Cc: Yang, Sheng, Avi Kivity

Introduces the KVM_NMI IOCTL to the generic x86 part of KVM for
injecting NMIs from user space and also extends the statistic report
accordingly.

Based on the original patch by Sheng Yang.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/kvm/x86.c         |   46 +++++++++++++++++++++++++++++++++++++++++++--
 include/asm-x86/kvm_host.h |    2 +
 include/linux/kvm.h        |   11 ++++++++--
 3 files changed, 55 insertions(+), 4 deletions(-)

Index: b/arch/x86/kvm/x86.c
===================================================================
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -87,6 +87,7 @@ struct kvm_stats_debugfs_item debugfs_en
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
 	{ "hypercalls", VCPU_STAT(hypercalls) },
 	{ "request_irq", VCPU_STAT(request_irq_exits) },
+	{ "request_nmi", VCPU_STAT(request_nmi_exits) },
 	{ "irq_exits", VCPU_STAT(irq_exits) },
 	{ "host_state_reload", VCPU_STAT(host_state_reload) },
 	{ "efer_reload", VCPU_STAT(efer_reload) },
@@ -94,6 +95,7 @@ struct kvm_stats_debugfs_item debugfs_en
 	{ "insn_emulation", VCPU_STAT(insn_emulation) },
 	{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
 	{ "irq_injections", VCPU_STAT(irq_injections) },
+	{ "nmi_injections", VCPU_STAT(nmi_injections) },
 	{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
 	{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
 	{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@@ -1549,6 +1551,15 @@ static int kvm_vcpu_ioctl_interrupt(stru
 	return 0;
 }
 
+static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
+{
+	vcpu_load(vcpu);
+	kvm_inject_nmi(vcpu);
+	vcpu_put(vcpu);
+
+	return 0;
+}
+
 static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
 					   struct kvm_tpr_access_ctl *tac)
 {
@@ -1608,6 +1619,13 @@ long kvm_arch_vcpu_ioctl(struct file *fi
 		r = 0;
 		break;
 	}
+	case KVM_NMI: {
+		r = kvm_vcpu_ioctl_nmi(vcpu);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+	}
 	case KVM_SET_CPUID: {
 		struct kvm_cpuid __user *cpuid_arg = argp;
 		struct kvm_cpuid cpuid;
@@ -3063,18 +3081,37 @@ static int dm_request_for_irq_injection(
 		(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
 }
 
+/*
+ * Check if userspace requested a NMI window, and that the NMI window
+ * is open.
+ *
+ * No need to exit to userspace if we already have a NMI queued.
+ */
+static int dm_request_for_nmi_injection(struct kvm_vcpu *vcpu,
+					struct kvm_run *kvm_run)
+{
+	return (!vcpu->arch.nmi_pending &&
+		kvm_run->request_nmi_window &&
+		vcpu->arch.nmi_window_open);
+}
+
 static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 			      struct kvm_run *kvm_run)
 {
 	kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
 	kvm_run->cr8 = kvm_get_cr8(vcpu);
 	kvm_run->apic_base = kvm_get_apic_base(vcpu);
-	if (irqchip_in_kernel(vcpu->kvm))
+	if (irqchip_in_kernel(vcpu->kvm)) {
 		kvm_run->ready_for_interrupt_injection = 1;
-	else
+		kvm_run->ready_for_nmi_injection = 1;
+	} else {
 		kvm_run->ready_for_interrupt_injection =
 					(vcpu->arch.interrupt_window_open &&
 					 vcpu->arch.irq_summary == 0);
+		kvm_run->ready_for_nmi_injection =
+					(vcpu->arch.nmi_window_open &&
+					 vcpu->arch.nmi_pending == 0);
+	}
 }
 
 static void vapic_enter(struct kvm_vcpu *vcpu)
@@ -3248,6 +3285,11 @@ static int __vcpu_run(struct kvm_vcpu *v
 		}
 
 		if (r > 0) {
+			if (dm_request_for_nmi_injection(vcpu, kvm_run)) {
+				r = -EINTR;
+				kvm_run->exit_reason = KVM_EXIT_NMI;
+				++vcpu->stat.request_nmi_exits;
+			}
 			if (dm_request_for_irq_injection(vcpu, kvm_run)) {
 				r = -EINTR;
 				kvm_run->exit_reason = KVM_EXIT_INTR;
Index: b/include/asm-x86/kvm_host.h
===================================================================
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -388,6 +388,7 @@ struct kvm_vcpu_stat {
 	u32 halt_exits;
 	u32 halt_wakeup;
 	u32 request_irq_exits;
+	u32 request_nmi_exits;
 	u32 irq_exits;
 	u32 host_state_reload;
 	u32 efer_reload;
@@ -396,6 +397,7 @@ struct kvm_vcpu_stat {
 	u32 insn_emulation_fail;
 	u32 hypercalls;
 	u32 irq_injections;
+	u32 nmi_injections;
 };
 
 struct descriptor_table {
Index: b/include/linux/kvm.h
===================================================================
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -83,18 +83,22 @@ struct kvm_irqchip {
 #define KVM_EXIT_S390_SIEIC       13
 #define KVM_EXIT_S390_RESET       14
 #define KVM_EXIT_DCR              15
+#define KVM_EXIT_NMI              16
+#define KVM_EXIT_NMI_WINDOW_OPEN  17
 
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
 	/* in */
 	__u8 request_interrupt_window;
-	__u8 padding1[7];
+	__u8 request_nmi_window;
+	__u8 padding1[6];
 
 	/* out */
 	__u32 exit_reason;
 	__u8 ready_for_interrupt_injection;
 	__u8 if_flag;
-	__u8 padding2[2];
+	__u8 ready_for_nmi_injection;
+	__u8 padding2;
 
 	/* in (pre_kvm_run), out (post_kvm_run) */
 	__u64 cr8;
@@ -385,6 +389,7 @@ struct kvm_trace_rec {
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
 #define KVM_CAP_IOMMU 18
+#define KVM_CAP_NMI 19
 
 /*
  * ioctls for VM fds
@@ -456,6 +461,8 @@ struct kvm_trace_rec {
 #define KVM_S390_INITIAL_RESET    _IO(KVMIO,  0x97)
 #define KVM_GET_MP_STATE          _IOR(KVMIO,  0x98, struct kvm_mp_state)
 #define KVM_SET_MP_STATE          _IOW(KVMIO,  0x99, struct kvm_mp_state)
+/* Available with KVM_CAP_NMI */
+#define KVM_NMI                   _IO(KVMIO,  0x9a)
 
 #define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
 #define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 7/9] VMX: Provide support for user space injected NMIs
  2008-09-19 12:06 [PATCH 0/9] Enhance NMI support of KVM - v2 Jan Kiszka
                   ` (5 preceding siblings ...)
  2008-09-19 12:03 ` [PATCH 6/9] kvm-x86: Support for user space injected NMIs Jan Kiszka
@ 2008-09-19 12:03 ` Jan Kiszka
  2008-09-19 12:04 ` [PATCH 8/9] VMX: work around lacking VNMI support Jan Kiszka
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Jan Kiszka @ 2008-09-19 12:03 UTC (permalink / raw)
  To: kvm-devel; +Cc: Yang, Sheng, Avi Kivity

This patch adds the required bits to the VMX side for user space
injected NMIs. As with the preexisting in-kernel irqchip support, the
CPU must provide the "virtual NMI" feature for proper tracking of the
NMI blocking state.

Based on the original patch by Sheng Yang.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/kvm/vmx.c |   29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

Index: b/arch/x86/kvm/vmx.c
===================================================================
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2356,6 +2356,7 @@ static void vmx_inject_nmi(struct kvm_vc
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	++vcpu->stat.nmi_injections;
 	if (vcpu->arch.rmode.active) {
 		vmx->rmode.irq.pending = true;
 		vmx->rmode.irq.vector = NMI_VECTOR;
@@ -2424,6 +2425,26 @@ static void do_interrupt_requests(struct
 {
 	vmx_update_window_states(vcpu);
 
+	if (cpu_has_virtual_nmis()) {
+		if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
+			if (vcpu->arch.nmi_window_open) {
+				vcpu->arch.nmi_pending = false;
+				vcpu->arch.nmi_injected = true;
+			} else {
+				enable_nmi_window(vcpu);
+				return;
+			}
+		}
+		if (vcpu->arch.nmi_injected) {
+			vmx_inject_nmi(vcpu);
+			if (vcpu->arch.nmi_pending)
+				enable_nmi_window(vcpu);
+			else if (vcpu->arch.irq_summary)
+				enable_irq_window(vcpu);
+			return;
+		}
+	}
+
 	if (vcpu->arch.interrupt_window_open) {
 		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
 			kvm_do_inject_irq(vcpu);
@@ -2936,6 +2957,14 @@ static int handle_nmi_window(struct kvm_
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 	++vcpu->stat.nmi_window_exits;
 
+	/*
+	 * If the user space waits to inject a NNI, exit as soon as possible
+	 */
+	if (kvm_run->request_nmi_window && !vcpu->arch.nmi_pending) {
+		kvm_run->exit_reason = KVM_EXIT_NMI_WINDOW_OPEN;
+		return 0;
+	}
+
 	return 1;
 }
 


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 8/9] VMX: work around lacking VNMI support
  2008-09-19 12:06 [PATCH 0/9] Enhance NMI support of KVM - v2 Jan Kiszka
                   ` (6 preceding siblings ...)
  2008-09-19 12:03 ` [PATCH 7/9] VMX: Provide support " Jan Kiszka
@ 2008-09-19 12:04 ` Jan Kiszka
  2008-09-21 14:31   ` Gleb Natapov
  2008-09-19 12:05 ` [PATCH 9/9] kvm: Enable NMI support for userspace irqchip Jan Kiszka
  2008-09-19 12:10 ` [PATCH 0/9] Enhance NMI support of KVM - v2 Jan Kiszka
  9 siblings, 1 reply; 22+ messages in thread
From: Jan Kiszka @ 2008-09-19 12:04 UTC (permalink / raw)
  To: kvm-devel; +Cc: Yang, Sheng, Avi Kivity

Older VMX supporting CPUs do not provide the "Virtual NMI" feature for
tracking the NMI-blocked state after injecting such events. For now
KVM is unable to inject NMIs on those CPUs.

Derived from Sheng Yang's suggestion to use the IRQ window notification
for detecting the end of NMI handlers, this patch implements virtual
NMI support without impact on the host's ability to receive real NMIs.
The downside is that the given approach requires some heuristics that
can cause NMI nesting in vary rare corner cases.

The approach works as follows:
 - check if the guest will receive the next NMI via an interrupt gate
   (i.e. handler will have interrupts disable), reject injection if not
 - inject NMI and set a software-based NMI-blocked flag
 - arm the IRQ window start notification whenever an NMI window is
   requested
 - if the guest exits due to an opening IRQ window, clear the emulated
   NMI-blocked flag
 - if the guest net execution time with NMI-blocked but without an IRQ
   window exceeds 1 second, force NMI-blocked reset and inject anyway

This approach covers most practical scenarios:
 - succeeding NMIs are seperated by at least one open IRQ window
 - the guest may spin with IRQs disabled (e.g. due to a bug), but
   leaving the NMI handler takes much less time than one second
 - the guest does not rely on strict ordering or timing of NMIs
   (would be problematic in virtualized environments anyway)

Successfully tested with the 'nmi n' monitor command, the kgdbts
testsuite on smp guests (additional patches required to add debug
register support to kvm), the kernel's nmi_watchdog=1, and a Siemens-
specific board emulation (+ guest) that comes with its own NMI
watchdog mechanism.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/kvm/vmx.c |  173 ++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 120 insertions(+), 53 deletions(-)

Index: b/arch/x86/kvm/vmx.c
===================================================================
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -90,6 +90,11 @@ struct vcpu_vmx {
 	} rmode;
 	int vpid;
 	bool emulation_required;
+
+	/* Support for vnmi-less CPUs */
+	int soft_vnmi_blocked;
+	ktime_t entry_time;
+	s64 vnmi_blocked_time;
 };
 
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -2331,6 +2336,29 @@ out:
 	return ret;
 }
 
+static void enable_irq_window(struct kvm_vcpu *vcpu)
+{
+	u32 cpu_based_vm_exec_control;
+
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
+{
+	u32 cpu_based_vm_exec_control;
+
+	if (!cpu_has_virtual_nmis()) {
+		enable_irq_window(vcpu);
+		return;
+	}
+
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
 static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2356,6 +2384,29 @@ static void vmx_inject_nmi(struct kvm_vc
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	if (!cpu_has_virtual_nmis()) {
+		int desc_size = is_long_mode(vcpu) ? 16 : 8;
+		struct descriptor_table dt;
+		gpa_t gpa;
+		u64 desc;
+
+		/*
+		 * Deny delivery if the NMI will not be handled by an
+		 * interrupt gate (workaround depends on IRQ masking).
+		 */
+		vmx_get_idt(vcpu, &dt);
+		if (!vcpu->arch.rmode.active && dt.limit
+		    >= desc_size * (NMI_VECTOR + 1) - 1) {
+			gpa = vcpu->arch.mmu.gva_to_gpa(vcpu,
+					dt.base + desc_size * NMI_VECTOR);
+			if (kvm_read_guest(vcpu->kvm, gpa, &desc, 8) == 0
+			    && ((desc >> 40) & 0x7) != 0x6)
+				return;
+		}
+		vmx->soft_vnmi_blocked = 1;
+		vmx->vnmi_blocked_time = 0;
+	}
+
 	++vcpu->stat.nmi_injections;
 	if (vcpu->arch.rmode.active) {
 		vmx->rmode.irq.pending = true;
@@ -2374,6 +2425,7 @@ static void vmx_inject_nmi(struct kvm_vc
 
 static void vmx_update_window_states(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
 
 	vcpu->arch.nmi_window_open =
@@ -2385,6 +2437,13 @@ static void vmx_update_window_states(str
 		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
 		 !(guest_intr & (GUEST_INTR_STATE_STI |
 				 GUEST_INTR_STATE_MOV_SS)));
+
+	if (!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked) {
+		if (vcpu->arch.interrupt_window_open)
+			vmx->soft_vnmi_blocked = 0;
+		else
+			vcpu->arch.nmi_window_open = 0;
+	}
 }
 
 static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
@@ -2399,51 +2458,28 @@ static void kvm_do_inject_irq(struct kvm
 	kvm_queue_interrupt(vcpu, irq);
 }
 
-static void enable_irq_window(struct kvm_vcpu *vcpu)
-{
-	u32 cpu_based_vm_exec_control;
-
-	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
-static void enable_nmi_window(struct kvm_vcpu *vcpu)
-{
-	u32 cpu_based_vm_exec_control;
-
-	if (!cpu_has_virtual_nmis())
-		return;
-
-	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
 static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 				       struct kvm_run *kvm_run)
 {
 	vmx_update_window_states(vcpu);
 
-	if (cpu_has_virtual_nmis()) {
-		if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-			if (vcpu->arch.nmi_window_open) {
-				vcpu->arch.nmi_pending = false;
-				vcpu->arch.nmi_injected = true;
-			} else {
-				enable_nmi_window(vcpu);
-				return;
-			}
-		}
-		if (vcpu->arch.nmi_injected) {
-			vmx_inject_nmi(vcpu);
-			if (vcpu->arch.nmi_pending)
-				enable_nmi_window(vcpu);
-			else if (vcpu->arch.irq_summary)
-				enable_irq_window(vcpu);
+	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
+		if (vcpu->arch.nmi_window_open) {
+			vcpu->arch.nmi_pending = false;
+			vcpu->arch.nmi_injected = true;
+		} else {
+			enable_nmi_window(vcpu);
 			return;
 		}
 	}
+	if (vcpu->arch.nmi_injected) {
+		vmx_inject_nmi(vcpu);
+		if (vcpu->arch.nmi_pending)
+			enable_nmi_window(vcpu);
+		else if (vcpu->arch.irq_summary)
+			enable_irq_window(vcpu);
+		return;
+	}
 
 	if (vcpu->arch.interrupt_window_open) {
 		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
@@ -2813,6 +2849,7 @@ static int handle_tpr_below_threshold(st
 static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 				   struct kvm_run *kvm_run)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 cpu_based_vm_exec_control;
 
 	/* clear pending irq */
@@ -2823,6 +2860,19 @@ static int handle_interrupt_window(struc
 	KVMTRACE_0D(PEND_INTR, vcpu, handler);
 	++vcpu->stat.irq_window_exits;
 
+	if (!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked) {
+		vmx->soft_vnmi_blocked = 0;
+
+		/*
+		 * If the user space waits to inject an NNI, exit ASAP
+		 */
+		if (kvm_run->request_nmi_window && !vcpu->arch.nmi_pending) {
+			kvm_run->exit_reason = KVM_EXIT_NMI_WINDOW_OPEN;
+			++vcpu->stat.nmi_window_exits;
+			return 0;
+		}
+	}
+
 	/*
 	 * If the user space waits to inject interrupts, exit as soon as
 	 * possible
@@ -3116,6 +3166,21 @@ static void vmx_complete_interrupts(stru
 		if (unblock_nmi && vector != DF_VECTOR)
 			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
 				      GUEST_INTR_STATE_NMI);
+	} else if (unlikely(vmx->soft_vnmi_blocked)) {
+		vmx->vnmi_blocked_time +=
+			ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
+		if (vmx->vnmi_blocked_time > 1000000000LL) {
+			/*
+			 * This CPU don't support us in finding the end of an
+			 * NMI-blocked window if the guest runs with IRQs
+			 * disabled. So we pull the trigger after 1 s of
+			 * futile waiting, but inform the user about this.
+			 */
+			vmx->soft_vnmi_blocked = 0;
+			vmx->vcpu.arch.nmi_window_open = 1;
+			printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
+			       "state after 1 s timeout\n", __func__);
+		}
 	}
 
 	idt_vectoring_info = vmx->idt_vectoring_info;
@@ -3156,25 +3221,23 @@ static void vmx_intr_assist(struct kvm_v
 
 	vmx_update_window_states(vcpu);
 
-	if (cpu_has_virtual_nmis()) {
-		if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-			if (vcpu->arch.nmi_window_open) {
-				vcpu->arch.nmi_pending = false;
-				vcpu->arch.nmi_injected = true;
-			} else {
-				enable_nmi_window(vcpu);
-				return;
-			}
-		}
-		if (vcpu->arch.nmi_injected) {
-			vmx_inject_nmi(vcpu);
-			if (vcpu->arch.nmi_pending)
-				enable_nmi_window(vcpu);
-			else if (kvm_cpu_has_interrupt(vcpu))
-				enable_irq_window(vcpu);
+	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
+		if (vcpu->arch.nmi_window_open) {
+			vcpu->arch.nmi_pending = false;
+			vcpu->arch.nmi_injected = true;
+		} else {
+			enable_nmi_window(vcpu);
 			return;
 		}
 	}
+	if (vcpu->arch.nmi_injected) {
+		vmx_inject_nmi(vcpu);
+		if (vcpu->arch.nmi_pending)
+			enable_nmi_window(vcpu);
+		else if (kvm_cpu_has_interrupt(vcpu))
+			enable_irq_window(vcpu);
+		return;
+	}
 	if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
 		if (vcpu->arch.interrupt_window_open)
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
@@ -3223,6 +3286,10 @@ static void vmx_vcpu_run(struct kvm_vcpu
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 intr_info;
 
+	/* Record the guest's net vcpu time for enforced NMI injections. */
+	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
+		vmx->entry_time = ktime_get();
+
 	/* Handle invalid guest state instead of entering VMX */
 	if (vmx->emulation_required && emulate_invalid_guest_state) {
 		handle_invalid_guest_state(vcpu, kvm_run);


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 9/9] kvm: Enable NMI support for userspace irqchip
  2008-09-19 12:06 [PATCH 0/9] Enhance NMI support of KVM - v2 Jan Kiszka
                   ` (7 preceding siblings ...)
  2008-09-19 12:04 ` [PATCH 8/9] VMX: work around lacking VNMI support Jan Kiszka
@ 2008-09-19 12:05 ` Jan Kiszka
  2008-09-19 12:10 ` [PATCH 0/9] Enhance NMI support of KVM - v2 Jan Kiszka
  9 siblings, 0 replies; 22+ messages in thread
From: Jan Kiszka @ 2008-09-19 12:05 UTC (permalink / raw)
  To: kvm-devel; +Cc: Yang, Sheng, Avi Kivity

Make use of the new KVM_NMI IOCTL to push NMIs into the KVM guest if the
user space APIC emulation or some other source raised them.

In order to use the 'nmi' monitor command which asynchroniously injects
NMIs for the given CPU, a new service called kvm_inject_interrupt is
required. This will invoke cpu_interrupt on the target VCPU, working
around the fact that the QEMU service is not thread-safe.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 libkvm/libkvm.c     |   31 +++++++++++++++++++++++++++++++
 libkvm/libkvm.h     |   23 +++++++++++++++++++++++
 qemu/monitor.c      |    5 ++++-
 qemu/qemu-kvm-x86.c |   26 +++++++++++++++++++++++---
 qemu/qemu-kvm.c     |   18 +++++++++++++++++-
 qemu/qemu-kvm.h     |    2 ++
 6 files changed, 100 insertions(+), 5 deletions(-)

Index: b/libkvm/libkvm.c
===================================================================
--- a/libkvm/libkvm.c
+++ b/libkvm/libkvm.c
@@ -811,6 +811,11 @@ int try_push_interrupts(kvm_context_t kv
 	return kvm->callbacks->try_push_interrupts(kvm->opaque);
 }
 
+int try_push_nmi(kvm_context_t kvm)
+{
+	return kvm->callbacks->try_push_nmi(kvm->opaque);
+}
+
 void post_kvm_run(kvm_context_t kvm, int vcpu)
 {
 	kvm->callbacks->post_kvm_run(kvm->opaque, vcpu);
@@ -835,6 +840,17 @@ int kvm_is_ready_for_interrupt_injection
 	return run->ready_for_interrupt_injection;
 }
 
+int kvm_is_ready_for_nmi_injection(kvm_context_t kvm, int vcpu)
+{
+#ifdef KVM_CAP_NMI
+	struct kvm_run *run = kvm->run[vcpu];
+
+	return run->ready_for_nmi_injection;
+#else
+	return 0;
+#endif
+}
+
 int kvm_run(kvm_context_t kvm, int vcpu)
 {
 	int r;
@@ -842,6 +858,9 @@ int kvm_run(kvm_context_t kvm, int vcpu)
 	struct kvm_run *run = kvm->run[vcpu];
 
 again:
+#ifdef KVM_CAP_NMI
+	run->request_nmi_window = try_push_nmi(kvm);
+#endif
 #if !defined(__s390__)
 	if (!kvm->irqchip_in_kernel)
 		run->request_interrupt_window = try_push_interrupts(kvm);
@@ -917,6 +936,9 @@ again:
 			r = handle_halt(kvm, vcpu);
 			break;
 		case KVM_EXIT_IRQ_WINDOW_OPEN:
+#ifdef KVM_CAP_NMI
+		case KVM_EXIT_NMI_WINDOW_OPEN:
+#endif
 			break;
 		case KVM_EXIT_SHUTDOWN:
 			r = handle_shutdown(kvm, vcpu);
@@ -1001,6 +1023,15 @@ int kvm_has_sync_mmu(kvm_context_t kvm)
         return r;
 }
 
+int kvm_inject_nmi(kvm_context_t kvm, int vcpu)
+{
+#ifdef KVM_CAP_NMI
+	return ioctl(kvm->vcpu_fd[vcpu], KVM_NMI);
+#else
+	return -ENOSYS;
+#endif
+}
+
 int kvm_init_coalesced_mmio(kvm_context_t kvm)
 {
 	int r = 0;
Index: b/libkvm/libkvm.h
===================================================================
--- a/libkvm/libkvm.h
+++ b/libkvm/libkvm.h
@@ -66,6 +66,7 @@ struct kvm_callbacks {
     int (*shutdown)(void *opaque, int vcpu);
     int (*io_window)(void *opaque);
     int (*try_push_interrupts)(void *opaque);
+    int (*try_push_nmi)(void *opaque);
     void (*post_kvm_run)(void *opaque, int vcpu);
     int (*pre_kvm_run)(void *opaque, int vcpu);
     int (*tpr_access)(void *opaque, int vcpu, uint64_t rip, int is_write);
@@ -216,6 +217,17 @@ uint64_t kvm_get_apic_base(kvm_context_t
 int kvm_is_ready_for_interrupt_injection(kvm_context_t kvm, int vcpu);
 
 /*!
+ * \brief Check if a vcpu is ready for NMI injection
+ *
+ * This checks if vcpu is not already running in NMI context.
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param vcpu Which virtual CPU should get dumped
+ * \return boolean indicating NMI injection readiness
+ */
+int kvm_is_ready_for_nmi_injection(kvm_context_t kvm, int vcpu);
+
+/*!
  * \brief Read VCPU registers
  *
  * This gets the GP registers from the VCPU and outputs them
@@ -579,6 +591,17 @@ int kvm_set_lapic(kvm_context_t kvm, int
 
 #endif
 
+/*!
+ * \brief Simulate an NMI
+ *
+ * This allows you to simulate a non-maskable interrupt.
+ *
+ * \param kvm Pointer to the current kvm_context
+ * \param vcpu Which virtual CPU should get dumped
+ * \return 0 on success
+ */
+int kvm_inject_nmi(kvm_context_t kvm, int vcpu);
+
 #endif
 
 /*!
Index: b/qemu/qemu-kvm-x86.c
===================================================================
--- a/qemu/qemu-kvm-x86.c
+++ b/qemu/qemu-kvm-x86.c
@@ -598,7 +598,8 @@ int kvm_arch_halt(void *opaque, int vcpu
     CPUState *env = cpu_single_env;
 
     if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
-	  (env->eflags & IF_MASK))) {
+	  (env->eflags & IF_MASK)) &&
+	!(env->interrupt_request & CPU_INTERRUPT_NMI)) {
             env->halted = 1;
 	    env->exception_index = EXCP_HLT;
     }
@@ -627,8 +628,9 @@ void kvm_arch_post_kvm_run(void *opaque,
 
 int kvm_arch_has_work(CPUState *env)
 {
-    if ((env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXIT)) &&
-	(env->eflags & IF_MASK))
+    if (((env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_EXIT)) &&
+	 (env->eflags & IF_MASK)) ||
+	(env->interrupt_request & CPU_INTERRUPT_NMI))
 	return 1;
     return 0;
 }
@@ -653,6 +655,24 @@ int kvm_arch_try_push_interrupts(void *o
     return (env->interrupt_request & CPU_INTERRUPT_HARD) != 0;
 }
 
+int kvm_arch_try_push_nmi(void *opaque)
+{
+    CPUState *env = cpu_single_env;
+    int r;
+
+    if (likely(!(env->interrupt_request & CPU_INTERRUPT_NMI)))
+        return 0;
+
+    if (kvm_is_ready_for_nmi_injection(kvm_context, env->cpu_index)) {
+        env->interrupt_request &= ~CPU_INTERRUPT_NMI;
+        r = kvm_inject_nmi(kvm_context, env->cpu_index);
+        if (r < 0)
+            printf("cpu %d fail inject NMI\n", env->cpu_index);
+    }
+
+    return (env->interrupt_request & CPU_INTERRUPT_NMI) != 0;
+}
+
 void kvm_arch_update_regs_for_sipi(CPUState *env)
 {
     SegmentCache cs = env->segs[R_CS];
Index: b/qemu/qemu-kvm.c
===================================================================
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -128,6 +128,16 @@ static void on_vcpu(CPUState *env, void
         qemu_cond_wait(&qemu_work_cond);
 }
 
+static void inject_interrupt(void *data)
+{
+    cpu_interrupt(vcpu->env, (int)data);
+}
+
+void kvm_inject_interrupt(CPUState *env, int mask)
+{
+    on_vcpu(env, inject_interrupt, (void *)mask);
+}
+
 void kvm_update_interrupt_request(CPUState *env)
 {
     int signal = 0;
@@ -166,6 +176,11 @@ static int try_push_interrupts(void *opa
     return kvm_arch_try_push_interrupts(opaque);
 }
 
+static int try_push_nmi(void *opaque)
+{
+    return kvm_arch_try_push_nmi(opaque);
+}
+
 static void post_kvm_run(void *opaque, int vcpu)
 {
 
@@ -397,7 +412,7 @@ static int kvm_main_loop_cpu(CPUState *e
     while (1) {
 	while (!has_work(env))
 	    kvm_main_loop_wait(env, 1000);
-	if (env->interrupt_request & CPU_INTERRUPT_HARD)
+	if (env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI))
 	    env->halted = 0;
 	if (!kvm_irqchip_in_kernel(kvm_context) && info->sipi_needed)
 	    update_regs_for_sipi(env);
@@ -719,6 +734,7 @@ static struct kvm_callbacks qemu_kvm_ops
     .shutdown = kvm_shutdown,
     .io_window = kvm_io_window,
     .try_push_interrupts = try_push_interrupts,
+    .try_push_nmi = try_push_nmi,
     .post_kvm_run = post_kvm_run,
     .pre_kvm_run = pre_kvm_run,
 #ifdef TARGET_I386
Index: b/qemu/qemu-kvm.h
===================================================================
--- a/qemu/qemu-kvm.h
+++ b/qemu/qemu-kvm.h
@@ -35,6 +35,7 @@ int kvm_get_phys_ram_page_bitmap(unsigne
 
 void qemu_kvm_call_with_env(void (*func)(void *), void *data, CPUState *env);
 void qemu_kvm_cpuid_on_env(CPUState *env);
+void kvm_inject_interrupt(CPUState *env, int mask);
 void kvm_update_after_sipi(CPUState *env);
 void kvm_update_interrupt_request(CPUState *env);
 void kvm_cpu_register_physical_memory(target_phys_addr_t start_addr,
@@ -57,6 +58,7 @@ void kvm_arch_pre_kvm_run(void *opaque,
 void kvm_arch_post_kvm_run(void *opaque, int vcpu);
 int kvm_arch_has_work(CPUState *env);
 int kvm_arch_try_push_interrupts(void *opaque);
+int kvm_arch_try_push_nmi(void *opaque);
 void kvm_arch_update_regs_for_sipi(CPUState *env);
 void kvm_arch_cpu_reset(CPUState *env);
 
Index: b/qemu/monitor.c
===================================================================
--- a/qemu/monitor.c
+++ b/qemu/monitor.c
@@ -1399,7 +1399,10 @@ static void do_inject_nmi(int cpu_index)
 
     for (env = first_cpu; env != NULL; env = env->next_cpu)
         if (env->cpu_index == cpu_index) {
-            cpu_interrupt(env, CPU_INTERRUPT_NMI);
+            if (kvm_enabled())
+                kvm_inject_interrupt(env, CPU_INTERRUPT_NMI);
+            else
+                cpu_interrupt(env, CPU_INTERRUPT_NMI);
             break;
         }
 }


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 0/9] Enhance NMI support of KVM - v2
@ 2008-09-19 12:06 Jan Kiszka
  2008-09-19 11:59 ` [PATCH 1/9] VMX: include all IRQ window exits in statistics Jan Kiszka
                   ` (9 more replies)
  0 siblings, 10 replies; 22+ messages in thread
From: Jan Kiszka @ 2008-09-19 12:06 UTC (permalink / raw)
  To: kvm-devel; +Cc: Yang, Sheng, Avi Kivity

After going through the NMI patches again, implementing a workaround for
older VMX CPUs without virtual NMIs, I came across several inconsistency
and lacking/forgotten features around NMI (and also a bit IRQ) handling.
So here is an enhanced patch series. Changes are:

 - VMX: workaround for lacking VNMI support on older CPUs with VMX
 - VMX: consolidate and fix NMI/IRQ window state determination
 - VMX: consolidate enabling code for NMI/IRQ window notification
 - VMX: fix NMI delivery in real-mode
 - rebased patch for in-kernel NMI watchdog support

Looking forward to feedback.

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux



^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/9] Enhance NMI support of KVM - v2
  2008-09-19 12:06 [PATCH 0/9] Enhance NMI support of KVM - v2 Jan Kiszka
                   ` (8 preceding siblings ...)
  2008-09-19 12:05 ` [PATCH 9/9] kvm: Enable NMI support for userspace irqchip Jan Kiszka
@ 2008-09-19 12:10 ` Jan Kiszka
  9 siblings, 0 replies; 22+ messages in thread
From: Jan Kiszka @ 2008-09-19 12:10 UTC (permalink / raw)
  Cc: kvm-devel, Yang, Sheng, Avi Kivity

Jan Kiszka wrote:
> After going through the NMI patches again, implementing a workaround for
> older VMX CPUs without virtual NMIs, I came across several inconsistency
> and lacking/forgotten features around NMI (and also a bit IRQ) handling.
> So here is an enhanced patch series. Changes are:
> 
>  - VMX: workaround for lacking VNMI support on older CPUs with VMX
>  - VMX: consolidate and fix NMI/IRQ window state determination
>  - VMX: consolidate enabling code for NMI/IRQ window notification
>  - VMX: fix NMI delivery in real-mode
>  - rebased patch for in-kernel NMI watchdog support

Oh, and:
 - make 'nmi' monitor command kvm-safe

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 5/9] kvm-x86: Enable NMI Watchdog via in-kernel PIT source
  2008-09-19 12:03 ` [PATCH 5/9] kvm-x86: Enable NMI Watchdog via in-kernel PIT source Jan Kiszka
@ 2008-09-19 16:55   ` Jan Kiszka
  2008-09-23  6:10   ` Yang, Sheng
  1 sibling, 0 replies; 22+ messages in thread
From: Jan Kiszka @ 2008-09-19 16:55 UTC (permalink / raw)
  Cc: kvm-devel, Yang, Sheng, Avi Kivity

[ Updated version with typo-- and without some spurious apic_debug
  outputs. ]

LINT0 of the LAPIC can be used to route PIT events as NMI watchdog ticks
into the guest. This patch aligns the in-kernel irqchip emulation with
the user space irqchip with already supports this feature. The trick is
to route PIT interrupts to all LAPIC's LVT0 lines.

Rebased and slightly polished patch originally posted by Sheng Yang.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/kvm/i8254.c |   15 +++++++++++++++
 arch/x86/kvm/irq.h   |    1 +
 arch/x86/kvm/lapic.c |   34 +++++++++++++++++++++++++++++-----
 3 files changed, 45 insertions(+), 5 deletions(-)

Index: b/arch/x86/kvm/i8254.c
===================================================================
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -594,10 +594,25 @@ void kvm_free_pit(struct kvm *kvm)
 
 static void __inject_pit_timer_intr(struct kvm *kvm)
 {
+	struct kvm_vcpu *vcpu;
+	int i;
+
 	mutex_lock(&kvm->lock);
 	kvm_set_irq(kvm, 0, 1);
 	kvm_set_irq(kvm, 0, 0);
 	mutex_unlock(&kvm->lock);
+
+	/*
+	 * Provides NMI watchdog support in IOAPIC mode.
+	 * The route is: PIT -> PIC -> LVT0 in NMI mode,
+	 * timer IRQs will continue to flow through the IOAPIC.
+	 */
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		vcpu = kvm->vcpus[i];
+		if (!vcpu)
+			continue;
+		kvm_apic_local_deliver(vcpu, APIC_LVT0);
+	}
 }
 
 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
Index: b/arch/x86/kvm/irq.h
===================================================================
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -93,6 +93,7 @@ void kvm_unregister_irq_ack_notifier(str
 void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
+int kvm_apic_local_deliver(struct kvm_vcpu *vcpu, int lvt_type);
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
 void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
Index: b/arch/x86/kvm/lapic.c
===================================================================
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -382,6 +382,14 @@ static int __apic_accept_irq(struct kvm_
 		}
 		break;
 
+	case APIC_DM_EXTINT:
+		/*
+		 * Should only be called by kvm_apic_local_deliver() with LVT0,
+		 * before NMI watchdog was enabled. Already handled by
+		 * kvm_apic_accept_pic_intr().
+		 */
+		break;
+
 	default:
 		printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
 		       delivery_mode);
@@ -745,10 +753,13 @@ static void apic_mmio_write(struct kvm_i
 		apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
 		break;
 
+	case APIC_LVT0:
+		if (val == APIC_DM_NMI)
+			apic_debug("Receive NMI setting on APIC_LVT0 "
+				"for cpu %d\n", apic->vcpu->vcpu_id);
 	case APIC_LVTT:
 	case APIC_LVTTHMR:
 	case APIC_LVTPC:
-	case APIC_LVT0:
 	case APIC_LVT1:
 	case APIC_LVTERR:
 		/* TODO: Check vector */
@@ -965,12 +976,25 @@ int apic_has_pending_timer(struct kvm_vc
 	return 0;
 }
 
-static int __inject_apic_timer_irq(struct kvm_lapic *apic)
+int kvm_apic_local_deliver(struct kvm_vcpu *vcpu, int lvt_type)
 {
-	int vector;
+	struct kvm_lapic *apic = vcpu->arch.apic;
+	int vector, mode, trig_mode;
+	u32 reg;
 
-	vector = apic_lvt_vector(apic, APIC_LVTT);
-	return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0);
+	if (apic && apic_enabled(apic)) {
+		reg = apic_get_reg(apic, lvt_type);
+		vector = reg & APIC_VECTOR_MASK;
+		mode = reg & APIC_MODE_MASK;
+		trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
+		return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
+	}
+	return 0;
+}
+
+static inline int __inject_apic_timer_irq(struct kvm_lapic *apic)
+{
+	return kvm_apic_local_deliver(apic->vcpu, APIC_LVTT);
 }
 
 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 8/9] VMX: work around lacking VNMI support
  2008-09-19 12:04 ` [PATCH 8/9] VMX: work around lacking VNMI support Jan Kiszka
@ 2008-09-21 14:31   ` Gleb Natapov
  2008-09-21 16:57     ` Jan Kiszka
  0 siblings, 1 reply; 22+ messages in thread
From: Gleb Natapov @ 2008-09-21 14:31 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: kvm-devel, Yang, Sheng, Avi Kivity

Hi Jan,

On Fri, Sep 19, 2008 at 02:04:37PM +0200, Jan Kiszka wrote:
>  static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> @@ -2356,6 +2384,29 @@ static void vmx_inject_nmi(struct kvm_vc
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  
> +	if (!cpu_has_virtual_nmis()) {
> +		int desc_size = is_long_mode(vcpu) ? 16 : 8;
> +		struct descriptor_table dt;
> +		gpa_t gpa;
> +		u64 desc;
> +
> +		/*
> +		 * Deny delivery if the NMI will not be handled by an
> +		 * interrupt gate (workaround depends on IRQ masking).
> +		 */
> +		vmx_get_idt(vcpu, &dt);
> +		if (!vcpu->arch.rmode.active && dt.limit
> +		    >= desc_size * (NMI_VECTOR + 1) - 1) {
> +			gpa = vcpu->arch.mmu.gva_to_gpa(vcpu,
> +					dt.base + desc_size * NMI_VECTOR);
> +			if (kvm_read_guest(vcpu->kvm, gpa, &desc, 8) == 0
> +			    && ((desc >> 40) & 0x7) != 0x6)
> +				return;
> +		}

Windows2003 sets NMI entry in IDT as a task gate (0x5) during hibernation and this check
prevents it from shutting down itself. It hangs in "It is save to turn
your computer now" screen.
If I replace this part by:
    if(vmx->soft_vnmi_blocked)
	  return;
It shut itself down properly.

> +		vmx->soft_vnmi_blocked = 1;
> +		vmx->vnmi_blocked_time = 0;
> +	}
> +

--
			Gleb.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 8/9] VMX: work around lacking VNMI support
  2008-09-21 14:31   ` Gleb Natapov
@ 2008-09-21 16:57     ` Jan Kiszka
  2008-09-21 18:08       ` Jan Kiszka
  2008-09-22  6:41       ` Gleb Natapov
  0 siblings, 2 replies; 22+ messages in thread
From: Jan Kiszka @ 2008-09-21 16:57 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm-devel, Yang, Sheng, Avi Kivity

[-- Attachment #1: Type: text/plain, Size: 1895 bytes --]

Gleb Natapov wrote:
> Hi Jan,
> 
> On Fri, Sep 19, 2008 at 02:04:37PM +0200, Jan Kiszka wrote:
>>  static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
>>  {
>>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>> @@ -2356,6 +2384,29 @@ static void vmx_inject_nmi(struct kvm_vc
>>  {
>>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>>  
>> +	if (!cpu_has_virtual_nmis()) {
>> +		int desc_size = is_long_mode(vcpu) ? 16 : 8;
>> +		struct descriptor_table dt;
>> +		gpa_t gpa;
>> +		u64 desc;
>> +
>> +		/*
>> +		 * Deny delivery if the NMI will not be handled by an
>> +		 * interrupt gate (workaround depends on IRQ masking).
>> +		 */
>> +		vmx_get_idt(vcpu, &dt);
>> +		if (!vcpu->arch.rmode.active && dt.limit
>> +		    >= desc_size * (NMI_VECTOR + 1) - 1) {
>> +			gpa = vcpu->arch.mmu.gva_to_gpa(vcpu,
>> +					dt.base + desc_size * NMI_VECTOR);
>> +			if (kvm_read_guest(vcpu->kvm, gpa, &desc, 8) == 0
>> +			    && ((desc >> 40) & 0x7) != 0x6)
>> +				return;
>> +		}
> 
> Windows2003 sets NMI entry in IDT as a task gate (0x5) during hibernation and this check
> prevents it from shutting down itself. It hangs in "It is save to turn
> your computer now" screen.

Grmbl, what a weird guest...

Is this a regression of this patch because NMIs were considered broken
by Windows on that host CPU so far?

> If I replace this part by:
>     if(vmx->soft_vnmi_blocked)
> 	  return;
> It shut itself down properly.

OK, but that almost always evaluates to false here.

I guess (hope) Windows will switch to an NMI task which has IRQs
disabled (!(TSS.EFLAGS & IF)), so this may become another check for
those weird task users. Waiting on the next IRQ window for NMI injection
should work equally well with tasks, given they disable IRQs properly.

Thanks for testing!

Jan

PS: There are more issues in my v1 series, update will follow tomorrow.


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 8/9] VMX: work around lacking VNMI support
  2008-09-21 16:57     ` Jan Kiszka
@ 2008-09-21 18:08       ` Jan Kiszka
  2008-09-22  6:41       ` Gleb Natapov
  1 sibling, 0 replies; 22+ messages in thread
From: Jan Kiszka @ 2008-09-21 18:08 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm-devel, Yang, Sheng, Avi Kivity

[-- Attachment #1: Type: text/plain, Size: 2541 bytes --]

Jan Kiszka wrote:
> Gleb Natapov wrote:
>> Hi Jan,
>>
>> On Fri, Sep 19, 2008 at 02:04:37PM +0200, Jan Kiszka wrote:
>>>  static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
>>>  {
>>>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>>> @@ -2356,6 +2384,29 @@ static void vmx_inject_nmi(struct kvm_vc
>>>  {
>>>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>>>  
>>> +	if (!cpu_has_virtual_nmis()) {
>>> +		int desc_size = is_long_mode(vcpu) ? 16 : 8;
>>> +		struct descriptor_table dt;
>>> +		gpa_t gpa;
>>> +		u64 desc;
>>> +
>>> +		/*
>>> +		 * Deny delivery if the NMI will not be handled by an
>>> +		 * interrupt gate (workaround depends on IRQ masking).
>>> +		 */
>>> +		vmx_get_idt(vcpu, &dt);
>>> +		if (!vcpu->arch.rmode.active && dt.limit
>>> +		    >= desc_size * (NMI_VECTOR + 1) - 1) {
>>> +			gpa = vcpu->arch.mmu.gva_to_gpa(vcpu,
>>> +					dt.base + desc_size * NMI_VECTOR);
>>> +			if (kvm_read_guest(vcpu->kvm, gpa, &desc, 8) == 0
>>> +			    && ((desc >> 40) & 0x7) != 0x6)
>>> +				return;
>>> +		}
>> Windows2003 sets NMI entry in IDT as a task gate (0x5) during hibernation and this check
>> prevents it from shutting down itself. It hangs in "It is save to turn
>> your computer now" screen.
> 
> Grmbl, what a weird guest...
> 
> Is this a regression of this patch because NMIs were considered broken
> by Windows on that host CPU so far?
> 
>> If I replace this part by:
>>     if(vmx->soft_vnmi_blocked)
>> 	  return;
>> It shut itself down properly.
> 
> OK, but that almost always evaluates to false here.
> 
> I guess (hope) Windows will switch to an NMI task which has IRQs
> disabled (!(TSS.EFLAGS & IF)), so this may become another check for
> those weird task users. Waiting on the next IRQ window for NMI injection
> should work equally well with tasks, given they disable IRQs properly.

I thought about it again, fortunately before implementing this horribly
complex additional check: These safety belts make no real sense. Even if
we double-check that the guest is starting to handle NMIs with IRQs
disabled, it may still fiddle with its IRQ mask before the originally
NMI-enabling iret, confusing our soft_vmi_blocked maintenance.

So, instead of adding more paranoid checks, let's drop them altogether,
relying on reasonably designed guests that have IRQs disabled as long as
NMIs are handled. If they come with such weirdness, all we may cause is
virtual NMI nesting in fairly rare cases for such unusual guests (if
they exist at all).

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 8/9] VMX: work around lacking VNMI support
  2008-09-21 16:57     ` Jan Kiszka
  2008-09-21 18:08       ` Jan Kiszka
@ 2008-09-22  6:41       ` Gleb Natapov
  2008-09-22  7:19         ` Jan Kiszka
  1 sibling, 1 reply; 22+ messages in thread
From: Gleb Natapov @ 2008-09-22  6:41 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: kvm-devel, Yang, Sheng, Avi Kivity

On Sun, Sep 21, 2008 at 06:57:32PM +0200, Jan Kiszka wrote:
> Gleb Natapov wrote:
> > Hi Jan,
> > 
> > On Fri, Sep 19, 2008 at 02:04:37PM +0200, Jan Kiszka wrote:
> >>  static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
> >>  {
> >>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> >> @@ -2356,6 +2384,29 @@ static void vmx_inject_nmi(struct kvm_vc
> >>  {
> >>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> >>  
> >> +	if (!cpu_has_virtual_nmis()) {
> >> +		int desc_size = is_long_mode(vcpu) ? 16 : 8;
> >> +		struct descriptor_table dt;
> >> +		gpa_t gpa;
> >> +		u64 desc;
> >> +
> >> +		/*
> >> +		 * Deny delivery if the NMI will not be handled by an
> >> +		 * interrupt gate (workaround depends on IRQ masking).
> >> +		 */
> >> +		vmx_get_idt(vcpu, &dt);
> >> +		if (!vcpu->arch.rmode.active && dt.limit
> >> +		    >= desc_size * (NMI_VECTOR + 1) - 1) {
> >> +			gpa = vcpu->arch.mmu.gva_to_gpa(vcpu,
> >> +					dt.base + desc_size * NMI_VECTOR);
> >> +			if (kvm_read_guest(vcpu->kvm, gpa, &desc, 8) == 0
> >> +			    && ((desc >> 40) & 0x7) != 0x6)
> >> +				return;
> >> +		}
> > 
> > Windows2003 sets NMI entry in IDT as a task gate (0x5) during hibernation and this check
> > prevents it from shutting down itself. It hangs in "It is save to turn
> > your computer now" screen.
> 
> Grmbl, what a weird guest...
This is a known trick that some OSes use.

> 
> Is this a regression of this patch because NMIs were considered broken
> by Windows on that host CPU so far?
> 
Nope. This is not a regression. Hibernation hangs in the same place with
the current git on this machine. It works on newer CPUs.


> > If I replace this part by:
> >     if(vmx->soft_vnmi_blocked)
> > 	  return;
> > It shut itself down properly.
> 
> OK, but that almost always evaluates to false here.
Without this check guest BSODs. CPU 0 send two NMI in a row to CPU 1 and
if second one is accepted something goes wrong.

--
			Gleb.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 8/9] VMX: work around lacking VNMI support
  2008-09-22  6:41       ` Gleb Natapov
@ 2008-09-22  7:19         ` Jan Kiszka
  2008-09-22  7:39           ` Gleb Natapov
  0 siblings, 1 reply; 22+ messages in thread
From: Jan Kiszka @ 2008-09-22  7:19 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm-devel, Yang, Sheng, Avi Kivity

[-- Attachment #1: Type: text/plain, Size: 2358 bytes --]

Gleb Natapov wrote:
> On Sun, Sep 21, 2008 at 06:57:32PM +0200, Jan Kiszka wrote:
>> Gleb Natapov wrote:
>>> Hi Jan,
>>>
>>> On Fri, Sep 19, 2008 at 02:04:37PM +0200, Jan Kiszka wrote:
>>>>  static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
>>>>  {
>>>>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>>>> @@ -2356,6 +2384,29 @@ static void vmx_inject_nmi(struct kvm_vc
>>>>  {
>>>>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>>>>  
>>>> +	if (!cpu_has_virtual_nmis()) {
>>>> +		int desc_size = is_long_mode(vcpu) ? 16 : 8;
>>>> +		struct descriptor_table dt;
>>>> +		gpa_t gpa;
>>>> +		u64 desc;
>>>> +
>>>> +		/*
>>>> +		 * Deny delivery if the NMI will not be handled by an
>>>> +		 * interrupt gate (workaround depends on IRQ masking).
>>>> +		 */
>>>> +		vmx_get_idt(vcpu, &dt);
>>>> +		if (!vcpu->arch.rmode.active && dt.limit
>>>> +		    >= desc_size * (NMI_VECTOR + 1) - 1) {
>>>> +			gpa = vcpu->arch.mmu.gva_to_gpa(vcpu,
>>>> +					dt.base + desc_size * NMI_VECTOR);
>>>> +			if (kvm_read_guest(vcpu->kvm, gpa, &desc, 8) == 0
>>>> +			    && ((desc >> 40) & 0x7) != 0x6)
>>>> +				return;
>>>> +		}
>>> Windows2003 sets NMI entry in IDT as a task gate (0x5) during hibernation and this check
>>> prevents it from shutting down itself. It hangs in "It is save to turn
>>> your computer now" screen.
>> Grmbl, what a weird guest...
> This is a known trick that some OSes use.

OK. Out of curiosity: What is that trick precisely?

> 
>> Is this a regression of this patch because NMIs were considered broken
>> by Windows on that host CPU so far?
>>
> Nope. This is not a regression. Hibernation hangs in the same place with
> the current git on this machine. It works on newer CPUs.
> 
> 
>>> If I replace this part by:
>>>     if(vmx->soft_vnmi_blocked)
>>> 	  return;
>>> It shut itself down properly.
>> OK, but that almost always evaluates to false here.
> Without this check guest BSODs. CPU 0 send two NMI in a row to CPU 1 and
> if second one is accepted something goes wrong.

That should have been caught at the caller site of vmx_inject_nmi
already, having to catch it here is an indication of a deeper problem.
Are you sure the NMIs are sent by CPU (i.e. kvm_inject_nmi is called
twice)? Maybe it is a bug I fixed meanwhile, an updated series goes out
later this morning.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 8/9] VMX: work around lacking VNMI support
  2008-09-22  7:19         ` Jan Kiszka
@ 2008-09-22  7:39           ` Gleb Natapov
  2008-09-22  7:48             ` Jan Kiszka
  0 siblings, 1 reply; 22+ messages in thread
From: Gleb Natapov @ 2008-09-22  7:39 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: kvm-devel, Yang, Sheng, Avi Kivity

On Mon, Sep 22, 2008 at 09:19:27AM +0200, Jan Kiszka wrote:
> Gleb Natapov wrote:
> > On Sun, Sep 21, 2008 at 06:57:32PM +0200, Jan Kiszka wrote:
> >> Gleb Natapov wrote:
> >>> Hi Jan,
> >>>
> >>> On Fri, Sep 19, 2008 at 02:04:37PM +0200, Jan Kiszka wrote:
> >>>>  static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
> >>>>  {
> >>>>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> >>>> @@ -2356,6 +2384,29 @@ static void vmx_inject_nmi(struct kvm_vc
> >>>>  {
> >>>>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> >>>>  
> >>>> +	if (!cpu_has_virtual_nmis()) {
> >>>> +		int desc_size = is_long_mode(vcpu) ? 16 : 8;
> >>>> +		struct descriptor_table dt;
> >>>> +		gpa_t gpa;
> >>>> +		u64 desc;
> >>>> +
> >>>> +		/*
> >>>> +		 * Deny delivery if the NMI will not be handled by an
> >>>> +		 * interrupt gate (workaround depends on IRQ masking).
> >>>> +		 */
> >>>> +		vmx_get_idt(vcpu, &dt);
> >>>> +		if (!vcpu->arch.rmode.active && dt.limit
> >>>> +		    >= desc_size * (NMI_VECTOR + 1) - 1) {
> >>>> +			gpa = vcpu->arch.mmu.gva_to_gpa(vcpu,
> >>>> +					dt.base + desc_size * NMI_VECTOR);
> >>>> +			if (kvm_read_guest(vcpu->kvm, gpa, &desc, 8) == 0
> >>>> +			    && ((desc >> 40) & 0x7) != 0x6)
> >>>> +				return;
> >>>> +		}
> >>> Windows2003 sets NMI entry in IDT as a task gate (0x5) during hibernation and this check
> >>> prevents it from shutting down itself. It hangs in "It is save to turn
> >>> your computer now" screen.
> >> Grmbl, what a weird guest...
> > This is a known trick that some OSes use.
> 
> OK. Out of curiosity: What is that trick precisely?
> 
As far as I understand it this way it can be guaranties that the handler
will be executed with a known state. Here is the attempt to make linux
do the same:
http://marc.info/?l=linux-kernel&m=121638440618671&w=4

> > 
> >> Is this a regression of this patch because NMIs were considered broken
> >> by Windows on that host CPU so far?
> >>
> > Nope. This is not a regression. Hibernation hangs in the same place with
> > the current git on this machine. It works on newer CPUs.
> > 
> > 
> >>> If I replace this part by:
> >>>     if(vmx->soft_vnmi_blocked)
> >>> 	  return;
> >>> It shut itself down properly.
> >> OK, but that almost always evaluates to false here.
> > Without this check guest BSODs. CPU 0 send two NMI in a row to CPU 1 and
> > if second one is accepted something goes wrong.
> 
> That should have been caught at the caller site of vmx_inject_nmi
> already, having to catch it here is an indication of a deeper problem.
I understand that. I am trying to find out why we are getting there with
vmx->soft_vnmi_blocked == 1 at all.

> Are you sure the NMIs are sent by CPU (i.e. kvm_inject_nmi is called
> twice)?
I am sure. I have a printk there :)

>          Maybe it is a bug I fixed meanwhile, an updated series goes out
> later this morning.
Will try it. 

--
			Gleb.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 8/9] VMX: work around lacking VNMI support
  2008-09-22  7:39           ` Gleb Natapov
@ 2008-09-22  7:48             ` Jan Kiszka
  0 siblings, 0 replies; 22+ messages in thread
From: Jan Kiszka @ 2008-09-22  7:48 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm-devel, Yang, Sheng, Avi Kivity

[-- Attachment #1: Type: text/plain, Size: 3135 bytes --]

Gleb Natapov wrote:
> On Mon, Sep 22, 2008 at 09:19:27AM +0200, Jan Kiszka wrote:
>> Gleb Natapov wrote:
>>> On Sun, Sep 21, 2008 at 06:57:32PM +0200, Jan Kiszka wrote:
>>>> Gleb Natapov wrote:
>>>>> Hi Jan,
>>>>>
>>>>> On Fri, Sep 19, 2008 at 02:04:37PM +0200, Jan Kiszka wrote:
>>>>>>  static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
>>>>>>  {
>>>>>>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>>>>>> @@ -2356,6 +2384,29 @@ static void vmx_inject_nmi(struct kvm_vc
>>>>>>  {
>>>>>>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>>>>>>  
>>>>>> +	if (!cpu_has_virtual_nmis()) {
>>>>>> +		int desc_size = is_long_mode(vcpu) ? 16 : 8;
>>>>>> +		struct descriptor_table dt;
>>>>>> +		gpa_t gpa;
>>>>>> +		u64 desc;
>>>>>> +
>>>>>> +		/*
>>>>>> +		 * Deny delivery if the NMI will not be handled by an
>>>>>> +		 * interrupt gate (workaround depends on IRQ masking).
>>>>>> +		 */
>>>>>> +		vmx_get_idt(vcpu, &dt);
>>>>>> +		if (!vcpu->arch.rmode.active && dt.limit
>>>>>> +		    >= desc_size * (NMI_VECTOR + 1) - 1) {
>>>>>> +			gpa = vcpu->arch.mmu.gva_to_gpa(vcpu,
>>>>>> +					dt.base + desc_size * NMI_VECTOR);
>>>>>> +			if (kvm_read_guest(vcpu->kvm, gpa, &desc, 8) == 0
>>>>>> +			    && ((desc >> 40) & 0x7) != 0x6)
>>>>>> +				return;
>>>>>> +		}
>>>>> Windows2003 sets NMI entry in IDT as a task gate (0x5) during hibernation and this check
>>>>> prevents it from shutting down itself. It hangs in "It is save to turn
>>>>> your computer now" screen.
>>>> Grmbl, what a weird guest...
>>> This is a known trick that some OSes use.
>> OK. Out of curiosity: What is that trick precisely?
>>
> As far as I understand it this way it can be guaranties that the handler
> will be executed with a known state. Here is the attempt to make linux
> do the same:
> http://marc.info/?l=linux-kernel&m=121638440618671&w=4

I see, dedicated stacks for intra-privilege level switching on good old
32-bit - makes sense.

> 
>>>> Is this a regression of this patch because NMIs were considered broken
>>>> by Windows on that host CPU so far?
>>>>
>>> Nope. This is not a regression. Hibernation hangs in the same place with
>>> the current git on this machine. It works on newer CPUs.
>>>
>>>
>>>>> If I replace this part by:
>>>>>     if(vmx->soft_vnmi_blocked)
>>>>> 	  return;
>>>>> It shut itself down properly.
>>>> OK, but that almost always evaluates to false here.
>>> Without this check guest BSODs. CPU 0 send two NMI in a row to CPU 1 and
>>> if second one is accepted something goes wrong.
>> That should have been caught at the caller site of vmx_inject_nmi
>> already, having to catch it here is an indication of a deeper problem.
> I understand that. I am trying to find out why we are getting there with
> vmx->soft_vnmi_blocked == 1 at all.

Much appreciated.

> 
>> Are you sure the NMIs are sent by CPU (i.e. kvm_inject_nmi is called
>> twice)?
> I am sure. I have a printk there :)
> 
>>          Maybe it is a bug I fixed meanwhile, an updated series goes out
>> later this morning.
> Will try it. 

/me is preparing the posting now.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 5/9] kvm-x86: Enable NMI Watchdog via in-kernel PIT source
  2008-09-19 12:03 ` [PATCH 5/9] kvm-x86: Enable NMI Watchdog via in-kernel PIT source Jan Kiszka
  2008-09-19 16:55   ` Jan Kiszka
@ 2008-09-23  6:10   ` Yang, Sheng
  2008-09-23 15:04     ` Jan Kiszka
  1 sibling, 1 reply; 22+ messages in thread
From: Yang, Sheng @ 2008-09-23  6:10 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: kvm-devel, Avi Kivity

On Friday 19 September 2008 20:03:02 Jan Kiszka wrote:
> LINT0 of the LAPIC can be used to route PIT events as NMI watchdog
> ticks into the guest. This patch aligns the in-kernel irqchip emulation
> with the user space irqchip with already supports this feature. The
> trick is to route PIT interrupts to all LAPIC's LVT0 lines.
>
> Rebased patch and slightly polished patch originally posted by Sheng
> Yang.

Signed-off-by: Sheng Yang <sheng.yang@intel.com>

Thanks for pick up this patch again! 

Have you test some Windows guest with this watchdog? Last time I dropped it 
because it cause BSOD on some version of Windows(IRQ_NOT_EQUAL_OR_LESS). I 
don't remember the exactly situation there, but you may have a try. 

-- 
regards
Yang, Sheng
>
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
>  arch/x86/kvm/i8254.c |   15 +++++++++++++++
>  arch/x86/kvm/irq.h   |    1 +
>  arch/x86/kvm/lapic.c |   32 ++++++++++++++++++++++++++++----
>  3 files changed, 44 insertions(+), 4 deletions(-)
>
> Index: b/arch/x86/kvm/i8254.c
> ===================================================================
> --- a/arch/x86/kvm/i8254.c
> +++ b/arch/x86/kvm/i8254.c
> @@ -594,10 +594,25 @@ void kvm_free_pit(struct kvm *kvm)
>
>  static void __inject_pit_timer_intr(struct kvm *kvm)
>  {
> +       struct kvm_vcpu *vcpu;
> +       int i;
> +
>         mutex_lock(&kvm->lock);
>         kvm_set_irq(kvm, 0, 1);
>         kvm_set_irq(kvm, 0, 0);
>         mutex_unlock(&kvm->lock);
> +
> +       /*
> +        * Provideds NMI watchdog support in IOAPIC mode.
> +        * The route is: PIT -> PIC -> LVT0 in NMI mode,
> +        * timer IRQs will continue to flow through the IOAPIC.
> +        */
> +       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
> +               vcpu = kvm->vcpus[i];
> +               if (!vcpu)
> +                       continue;
> +               kvm_apic_local_deliver(vcpu, APIC_LVT0);
> +       }
>  }
>
>  void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
> Index: b/arch/x86/kvm/irq.h
> ===================================================================
> --- a/arch/x86/kvm/irq.h
> +++ b/arch/x86/kvm/irq.h
> @@ -93,6 +93,7 @@ void kvm_unregister_irq_ack_notifier(str
>  void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
>  void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
>  void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
> +int kvm_apic_local_deliver(struct kvm_vcpu *vcpu, int lvt_type);
>  void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
>  void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
>  void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
> Index: b/arch/x86/kvm/lapic.c
> ===================================================================
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -382,6 +382,14 @@ static int __apic_accept_irq(struct kvm_
>                 }
>                 break;
>
> +       case APIC_DM_EXTINT:
> +               /*
> +                * Should only be called by kvm_apic_local_deliver() with
> LVT0, +                * before NMI watchdog was enabled. Already handled
> by +                * kvm_apic_accept_pic_intr().
> +                */
> +               break;
> +
>         default:
>                 printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
>                        delivery_mode);
> @@ -749,6 +757,9 @@ static void apic_mmio_write(struct kvm_i
>         case APIC_LVTTHMR:
>         case APIC_LVTPC:
>         case APIC_LVT0:
> +               if (val == APIC_DM_NMI)
> +                       apic_debug("Receive NMI setting on APIC_LVT0 "
> +                               "for cpu %d\n", apic->vcpu->vcpu_id);
>         case APIC_LVT1:
>         case APIC_LVTERR:
>                 /* TODO: Check vector */
> @@ -965,12 +976,25 @@ int apic_has_pending_timer(struct kvm_vc
>         return 0;
>  }
>
> -static int __inject_apic_timer_irq(struct kvm_lapic *apic)
> +int kvm_apic_local_deliver(struct kvm_vcpu *vcpu, int lvt_type)
>  {
> -       int vector;
> +       struct kvm_lapic *apic = vcpu->arch.apic;
> +       int vector, mode, trig_mode;
> +       u32 reg;
> +
> +       if (apic && apic_enabled(apic)) {
> +               reg = apic_get_reg(apic, lvt_type);
> +               vector = reg & APIC_VECTOR_MASK;
> +               mode = reg & APIC_MODE_MASK;
> +               trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
> +               return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
> +       }
> +       return 0;
> +}
>
> -       vector = apic_lvt_vector(apic, APIC_LVTT);
> -       return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0);
> +static inline int __inject_apic_timer_irq(struct kvm_lapic *apic)
> +{
> +       return kvm_apic_local_deliver(apic->vcpu, APIC_LVTT);
>  }
>
>  static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)



^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 5/9] kvm-x86: Enable NMI Watchdog via in-kernel PIT source
  2008-09-23  6:10   ` Yang, Sheng
@ 2008-09-23 15:04     ` Jan Kiszka
  2008-09-24 10:18       ` Yang, Sheng
  0 siblings, 1 reply; 22+ messages in thread
From: Jan Kiszka @ 2008-09-23 15:04 UTC (permalink / raw)
  To: Yang, Sheng; +Cc: kvm-devel, Avi Kivity

Yang, Sheng wrote:
> On Friday 19 September 2008 20:03:02 Jan Kiszka wrote:
>> LINT0 of the LAPIC can be used to route PIT events as NMI watchdog
>> ticks into the guest. This patch aligns the in-kernel irqchip emulation
>> with the user space irqchip with already supports this feature. The
>> trick is to route PIT interrupts to all LAPIC's LVT0 lines.
>>
>> Rebased patch and slightly polished patch originally posted by Sheng
>> Yang.
> 
> Signed-off-by: Sheng Yang <sheng.yang@intel.com>
> 
> Thanks for pick up this patch again! 
> 
> Have you test some Windows guest with this watchdog? Last time I dropped it 
> because it cause BSOD on some version of Windows(IRQ_NOT_EQUAL_OR_LESS). I 
> don't remember the exactly situation there, but you may have a try. 

Not yet. I always tell my colleagues that I don't need Windows on my
desktop, I just need a few VM images - for testing... :)

I will try to dig out / generate some image and reproduce the issue you
and Gleb see. Hope it will trigger here as well. Anything special
required to make Windows use the NMI as watchdog?

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 5/9] kvm-x86: Enable NMI Watchdog via in-kernel PIT source
  2008-09-23 15:04     ` Jan Kiszka
@ 2008-09-24 10:18       ` Yang, Sheng
  0 siblings, 0 replies; 22+ messages in thread
From: Yang, Sheng @ 2008-09-24 10:18 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: kvm-devel, Avi Kivity

On Tuesday 23 September 2008 23:04:48 Jan Kiszka wrote:
> Yang, Sheng wrote:
> > On Friday 19 September 2008 20:03:02 Jan Kiszka wrote:
> >> LINT0 of the LAPIC can be used to route PIT events as NMI watchdog
> >> ticks into the guest. This patch aligns the in-kernel irqchip emulation
> >> with the user space irqchip with already supports this feature. The
> >> trick is to route PIT interrupts to all LAPIC's LVT0 lines.
> >>
> >> Rebased patch and slightly polished patch originally posted by Sheng
> >> Yang.
> >
> > Signed-off-by: Sheng Yang <sheng.yang@intel.com>
> >
> > Thanks for pick up this patch again!
> >
> > Have you test some Windows guest with this watchdog? Last time I dropped
> > it because it cause BSOD on some version of
> > Windows(IRQ_NOT_EQUAL_OR_LESS). I don't remember the exactly situation
> > there, but you may have a try.
>
> Not yet. I always tell my colleagues that I don't need Windows on my
> desktop, I just need a few VM images - for testing... :)
>
> I will try to dig out / generate some image and reproduce the issue you
> and Gleb see. Hope it will trigger here as well. Anything special
> required to make Windows use the NMI as watchdog?
>
I don't know if Windows use NMI watchdog. In fact, my original patch just 
cause Windows BSOD, and I think Windows don't use it(Linux NMI watchdog 
mechanism is a little tricky one)...

--
regards
Yang, Sheng

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2008-09-24 10:17 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-09-19 12:06 [PATCH 0/9] Enhance NMI support of KVM - v2 Jan Kiszka
2008-09-19 11:59 ` [PATCH 1/9] VMX: include all IRQ window exits in statistics Jan Kiszka
2008-09-19 12:01 ` [PATCH 2/9] VMX: refactor/fix IRQ and NMI injectability determination Jan Kiszka
2008-09-19 12:01 ` [PATCH 3/9] VMX: refactor IRQ and NMI window enabling Jan Kiszka
2008-09-19 12:02 ` [PATCH 4/9] VMX: fix real-mode NMI support Jan Kiszka
2008-09-19 12:03 ` [PATCH 5/9] kvm-x86: Enable NMI Watchdog via in-kernel PIT source Jan Kiszka
2008-09-19 16:55   ` Jan Kiszka
2008-09-23  6:10   ` Yang, Sheng
2008-09-23 15:04     ` Jan Kiszka
2008-09-24 10:18       ` Yang, Sheng
2008-09-19 12:03 ` [PATCH 6/9] kvm-x86: Support for user space injected NMIs Jan Kiszka
2008-09-19 12:03 ` [PATCH 7/9] VMX: Provide support " Jan Kiszka
2008-09-19 12:04 ` [PATCH 8/9] VMX: work around lacking VNMI support Jan Kiszka
2008-09-21 14:31   ` Gleb Natapov
2008-09-21 16:57     ` Jan Kiszka
2008-09-21 18:08       ` Jan Kiszka
2008-09-22  6:41       ` Gleb Natapov
2008-09-22  7:19         ` Jan Kiszka
2008-09-22  7:39           ` Gleb Natapov
2008-09-22  7:48             ` Jan Kiszka
2008-09-19 12:05 ` [PATCH 9/9] kvm: Enable NMI support for userspace irqchip Jan Kiszka
2008-09-19 12:10 ` [PATCH 0/9] Enhance NMI support of KVM - v2 Jan Kiszka

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox