public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] KVM: VMX: Fix race between pending IRQ and NMI
@ 2008-11-10 15:52 Jan Kiszka
  2008-11-16 12:29 ` Avi Kivity
  0 siblings, 1 reply; 15+ messages in thread
From: Jan Kiszka @ 2008-11-10 15:52 UTC (permalink / raw)
  To: kvm-devel; +Cc: Xu, Jiajun, Yang, Sheng, Avi Kivity

This patch addresses item #2215532 in the kvm bug tracker, but was
finally also visible with other Linux guests that use the NMI watchdog:

There is a subtle race in kvm-intel between a pending IRQ and a briefly
later arriving NMI (e.g. from the watchdog). If the IRQ was injected but
the guest exited again on ejection due to some page fault, the flag
interrupt.pending remained true. If now some NMI just happened to be
pended as well, that one overruled the IRQ and was re-injected instead
(what is OK!). But during the next run of vmx_complete_interrupts the
originally pending IRQ fell on the floor and was forgotten. That means
we dequeued some IRQ from the [A]PIC, but never delivered it,
effectively causing a stall of IRQ deliveries. You may guess that it
took me a while to understand this...

The patch below addresses the issue by turning interrupt.pending into a
three-state variable: NONE, QUEUED (but not currently injected), and
INJECTED. If we overwrite some IRQ injection with an NMI, the state gets
properly updated. Moreover, we only transit from INJECTED to NONE to
avoid loosing IRQs.

To simplify review and maintenance, the patch aligns the decision
pattern in vmx_intr_assist with do_interrupt_requests.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/include/asm/kvm_host.h |    6 +++
 arch/x86/kvm/vmx.c              |   61 +++++++++++++++++++++++++++-------------
 arch/x86/kvm/x86.h              |    4 +-
 3 files changed, 49 insertions(+), 22 deletions(-)

Index: b/arch/x86/include/asm/kvm_host.h
===================================================================
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -301,7 +301,11 @@ struct kvm_vcpu_arch {
 	} exception;
 
 	struct kvm_queued_interrupt {
-		bool pending;
+		enum {
+			KVMIRQ_NONE,
+			KVMIRQ_QUEUED,
+			KVMIRQ_INJECTED
+		} pending;
 		u8 nr;
 	} interrupt;
 
Index: b/arch/x86/kvm/vmx.c
===================================================================
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1037,7 +1037,7 @@ static int set_guest_debug(struct kvm_vc
 
 static int vmx_get_irq(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->arch.interrupt.pending)
+	if (vcpu->arch.interrupt.pending == KVMIRQ_NONE)
 		return -1;
 	return vcpu->arch.interrupt.nr;
 }
@@ -2487,9 +2487,16 @@ static void do_interrupt_requests(struct
 	}
 	if (vcpu->arch.nmi_injected) {
 		vmx_inject_nmi(vcpu);
+		if (vcpu->arch.interrupt.pending == KVMIRQ_INJECTED)
+			/*
+			 * Degrade pending state, we will properly reinject
+			 * after the NMI.
+			 */
+			vcpu->arch.interrupt.pending = KVMIRQ_QUEUED;
 		if (vcpu->arch.nmi_pending || kvm_run->request_nmi_window)
 			enable_nmi_window(vcpu);
-		else if (vcpu->arch.irq_summary
+		else if (vcpu->arch.interrupt.pending != KVMIRQ_NONE
+			 || vcpu->arch.irq_summary
 			 || kvm_run->request_interrupt_window)
 			enable_irq_window(vcpu);
 		return;
@@ -2498,14 +2505,18 @@ static void do_interrupt_requests(struct
 		enable_nmi_window(vcpu);
 
 	if (vcpu->arch.interrupt_window_open) {
-		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
+		if (vcpu->arch.irq_summary &&
+		    vcpu->arch.interrupt.pending == KVMIRQ_NONE)
 			kvm_do_inject_irq(vcpu);
 
-		if (vcpu->arch.interrupt.pending)
+		if (vcpu->arch.interrupt.pending != KVMIRQ_NONE) {
+			vcpu->arch.interrupt.pending = KVMIRQ_INJECTED;
 			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+		}
 	}
 	if (!vcpu->arch.interrupt_window_open &&
-	    (vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
+	    (vcpu->arch.irq_summary || kvm_run->request_interrupt_window
+	     || vcpu->arch.interrupt.pending != KVMIRQ_NONE))
 		enable_irq_window(vcpu);
 }
 
@@ -2624,7 +2635,8 @@ static int handle_exception(struct kvm_v
 		cr2 = vmcs_readl(EXIT_QUALIFICATION);
 		KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
 			    (u32)((u64)cr2 >> 32), handler);
-		if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending)
+		if (vcpu->arch.interrupt.pending != KVMIRQ_NONE
+		    || vcpu->arch.exception.pending)
 			kvm_mmu_unprotect_page_virt(vcpu, cr2);
 		return kvm_mmu_page_fault(vcpu, cr2, error_code);
 	}
@@ -3244,7 +3256,8 @@ static void vmx_complete_interrupts(stru
 					GUEST_INTR_STATE_NMI);
 		else
 			vmx->vcpu.arch.nmi_injected = false;
-	}
+	} else if (vmx->vcpu.arch.interrupt.pending == KVMIRQ_INJECTED)
+		kvm_clear_interrupt_queue(&vmx->vcpu);
 	kvm_clear_exception_queue(&vmx->vcpu);
 	if (idtv_info_valid && type == INTR_TYPE_EXCEPTION) {
 		if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
@@ -3253,9 +3266,7 @@ static void vmx_complete_interrupts(stru
 		} else
 			kvm_queue_exception(&vmx->vcpu, vector);
 		vmx->idt_vectoring_info = 0;
-	}
-	kvm_clear_interrupt_queue(&vmx->vcpu);
-	if (idtv_info_valid && type == INTR_TYPE_EXT_INTR) {
+	} else if (idtv_info_valid && type == INTR_TYPE_EXT_INTR) {
 		kvm_queue_interrupt(&vmx->vcpu, vector);
 		vmx->idt_vectoring_info = 0;
 	}
@@ -3278,22 +3289,34 @@ static void vmx_intr_assist(struct kvm_v
 	}
 	if (vcpu->arch.nmi_injected) {
 		vmx_inject_nmi(vcpu);
+		if (vcpu->arch.interrupt.pending == KVMIRQ_INJECTED)
+			/*
+			 * Degrade pending state, we will properly reinject
+			 * after the NMI.
+			 */
+			vcpu->arch.interrupt.pending = KVMIRQ_QUEUED;
 		if (vcpu->arch.nmi_pending)
 			enable_nmi_window(vcpu);
-		else if (kvm_cpu_has_interrupt(vcpu))
+		else if (vcpu->arch.interrupt.pending != KVMIRQ_NONE
+			 || kvm_cpu_has_interrupt(vcpu))
 			enable_irq_window(vcpu);
 		return;
 	}
-	if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
-		if (vcpu->arch.interrupt_window_open)
+	if (vcpu->arch.interrupt_window_open) {
+		if (vcpu->arch.interrupt.pending == KVMIRQ_NONE
+		    && kvm_cpu_has_interrupt(vcpu))
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-		else
-			enable_irq_window(vcpu);
-	}
-	if (vcpu->arch.interrupt.pending) {
-		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-		kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
+
+		if (vcpu->arch.interrupt.pending != KVMIRQ_NONE) {
+			vcpu->arch.interrupt.pending = KVMIRQ_INJECTED;
+			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+			kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
+		}
 	}
+	if (!vcpu->arch.interrupt_window_open
+	    && (vcpu->arch.interrupt.pending != KVMIRQ_NONE
+		|| kvm_cpu_has_interrupt(vcpu)))
+		enable_irq_window(vcpu);
 }
 
 /*
Index: b/arch/x86/kvm/x86.h
===================================================================
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -10,13 +10,13 @@ static inline void kvm_clear_exception_q
 
 static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector)
 {
-	vcpu->arch.interrupt.pending = true;
+	vcpu->arch.interrupt.pending = KVMIRQ_QUEUED;
 	vcpu->arch.interrupt.nr = vector;
 }
 
 static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.interrupt.pending = false;
+	vcpu->arch.interrupt.pending = KVMIRQ_NONE;
 }
 
 #endif

-- 
Siemens AG, Corporate Technology, CT SE 2 ES-OS
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2008-11-25 14:57 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-11-10 15:52 [PATCH] KVM: VMX: Fix race between pending IRQ and NMI Jan Kiszka
2008-11-16 12:29 ` Avi Kivity
2008-11-16 14:58   ` Jan Kiszka
2008-11-16 15:15     ` Avi Kivity
2008-11-16 15:39       ` Jan Kiszka
2008-11-19 17:38         ` Avi Kivity
2008-11-19 21:28           ` Avi Kivity
2008-11-20 13:29             ` Jan Kiszka
2008-11-20 13:59               ` Avi Kivity
2008-11-21 10:04                 ` Jan Kiszka
2008-11-21 11:14                   ` Avi Kivity
2008-11-22 12:25                   ` Avi Kivity
2008-11-24  9:55                     ` Jan Kiszka
2008-11-25 14:45                       ` Avi Kivity
2008-11-25 14:55                         ` Jan Kiszka

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox