public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Jan Kiszka <jan.kiszka@siemens.com>
To: kvm-devel <kvm@vger.kernel.org>
Cc: "Xu, Jiajun" <jiajun.xu@intel.com>,
	"Yang, Sheng" <sheng.yang@intel.com>, Avi Kivity <avi@redhat.com>
Subject: [PATCH] KVM: VMX: Fix race between pending IRQ and NMI
Date: Mon, 10 Nov 2008 16:52:40 +0100	[thread overview]
Message-ID: <491858C8.2040401@siemens.com> (raw)

This patch addresses item #2215532 in the kvm bug tracker, but was
finally also visible with other Linux guests that use the NMI watchdog:

There is a subtle race in kvm-intel between a pending IRQ and a briefly
later arriving NMI (e.g. from the watchdog). If the IRQ was injected but
the guest exited again on ejection due to some page fault, the flag
interrupt.pending remained true. If now some NMI just happened to be
pended as well, that one overruled the IRQ and was re-injected instead
(what is OK!). But during the next run of vmx_complete_interrupts the
originally pending IRQ fell on the floor and was forgotten. That means
we dequeued some IRQ from the [A]PIC, but never delivered it,
effectively causing a stall of IRQ deliveries. You may guess that it
took me a while to understand this...

The patch below addresses the issue by turning interrupt.pending into a
three-state variable: NONE, QUEUED (but not currently injected), and
INJECTED. If we overwrite some IRQ injection with an NMI, the state gets
properly updated. Moreover, we only transit from INJECTED to NONE to
avoid loosing IRQs.

To simplify review and maintenance, the patch aligns the decision
pattern in vmx_intr_assist with do_interrupt_requests.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/include/asm/kvm_host.h |    6 +++
 arch/x86/kvm/vmx.c              |   61 +++++++++++++++++++++++++++-------------
 arch/x86/kvm/x86.h              |    4 +-
 3 files changed, 49 insertions(+), 22 deletions(-)

Index: b/arch/x86/include/asm/kvm_host.h
===================================================================
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -301,7 +301,11 @@ struct kvm_vcpu_arch {
 	} exception;
 
 	struct kvm_queued_interrupt {
-		bool pending;
+		enum {
+			KVMIRQ_NONE,
+			KVMIRQ_QUEUED,
+			KVMIRQ_INJECTED
+		} pending;
 		u8 nr;
 	} interrupt;
 
Index: b/arch/x86/kvm/vmx.c
===================================================================
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1037,7 +1037,7 @@ static int set_guest_debug(struct kvm_vc
 
 static int vmx_get_irq(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->arch.interrupt.pending)
+	if (vcpu->arch.interrupt.pending == KVMIRQ_NONE)
 		return -1;
 	return vcpu->arch.interrupt.nr;
 }
@@ -2487,9 +2487,16 @@ static void do_interrupt_requests(struct
 	}
 	if (vcpu->arch.nmi_injected) {
 		vmx_inject_nmi(vcpu);
+		if (vcpu->arch.interrupt.pending == KVMIRQ_INJECTED)
+			/*
+			 * Degrade pending state, we will properly reinject
+			 * after the NMI.
+			 */
+			vcpu->arch.interrupt.pending = KVMIRQ_QUEUED;
 		if (vcpu->arch.nmi_pending || kvm_run->request_nmi_window)
 			enable_nmi_window(vcpu);
-		else if (vcpu->arch.irq_summary
+		else if (vcpu->arch.interrupt.pending != KVMIRQ_NONE
+			 || vcpu->arch.irq_summary
 			 || kvm_run->request_interrupt_window)
 			enable_irq_window(vcpu);
 		return;
@@ -2498,14 +2505,18 @@ static void do_interrupt_requests(struct
 		enable_nmi_window(vcpu);
 
 	if (vcpu->arch.interrupt_window_open) {
-		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
+		if (vcpu->arch.irq_summary &&
+		    vcpu->arch.interrupt.pending == KVMIRQ_NONE)
 			kvm_do_inject_irq(vcpu);
 
-		if (vcpu->arch.interrupt.pending)
+		if (vcpu->arch.interrupt.pending != KVMIRQ_NONE) {
+			vcpu->arch.interrupt.pending = KVMIRQ_INJECTED;
 			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+		}
 	}
 	if (!vcpu->arch.interrupt_window_open &&
-	    (vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
+	    (vcpu->arch.irq_summary || kvm_run->request_interrupt_window
+	     || vcpu->arch.interrupt.pending != KVMIRQ_NONE))
 		enable_irq_window(vcpu);
 }
 
@@ -2624,7 +2635,8 @@ static int handle_exception(struct kvm_v
 		cr2 = vmcs_readl(EXIT_QUALIFICATION);
 		KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
 			    (u32)((u64)cr2 >> 32), handler);
-		if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending)
+		if (vcpu->arch.interrupt.pending != KVMIRQ_NONE
+		    || vcpu->arch.exception.pending)
 			kvm_mmu_unprotect_page_virt(vcpu, cr2);
 		return kvm_mmu_page_fault(vcpu, cr2, error_code);
 	}
@@ -3244,7 +3256,8 @@ static void vmx_complete_interrupts(stru
 					GUEST_INTR_STATE_NMI);
 		else
 			vmx->vcpu.arch.nmi_injected = false;
-	}
+	} else if (vmx->vcpu.arch.interrupt.pending == KVMIRQ_INJECTED)
+		kvm_clear_interrupt_queue(&vmx->vcpu);
 	kvm_clear_exception_queue(&vmx->vcpu);
 	if (idtv_info_valid && type == INTR_TYPE_EXCEPTION) {
 		if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
@@ -3253,9 +3266,7 @@ static void vmx_complete_interrupts(stru
 		} else
 			kvm_queue_exception(&vmx->vcpu, vector);
 		vmx->idt_vectoring_info = 0;
-	}
-	kvm_clear_interrupt_queue(&vmx->vcpu);
-	if (idtv_info_valid && type == INTR_TYPE_EXT_INTR) {
+	} else if (idtv_info_valid && type == INTR_TYPE_EXT_INTR) {
 		kvm_queue_interrupt(&vmx->vcpu, vector);
 		vmx->idt_vectoring_info = 0;
 	}
@@ -3278,22 +3289,34 @@ static void vmx_intr_assist(struct kvm_v
 	}
 	if (vcpu->arch.nmi_injected) {
 		vmx_inject_nmi(vcpu);
+		if (vcpu->arch.interrupt.pending == KVMIRQ_INJECTED)
+			/*
+			 * Degrade pending state, we will properly reinject
+			 * after the NMI.
+			 */
+			vcpu->arch.interrupt.pending = KVMIRQ_QUEUED;
 		if (vcpu->arch.nmi_pending)
 			enable_nmi_window(vcpu);
-		else if (kvm_cpu_has_interrupt(vcpu))
+		else if (vcpu->arch.interrupt.pending != KVMIRQ_NONE
+			 || kvm_cpu_has_interrupt(vcpu))
 			enable_irq_window(vcpu);
 		return;
 	}
-	if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
-		if (vcpu->arch.interrupt_window_open)
+	if (vcpu->arch.interrupt_window_open) {
+		if (vcpu->arch.interrupt.pending == KVMIRQ_NONE
+		    && kvm_cpu_has_interrupt(vcpu))
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-		else
-			enable_irq_window(vcpu);
-	}
-	if (vcpu->arch.interrupt.pending) {
-		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-		kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
+
+		if (vcpu->arch.interrupt.pending != KVMIRQ_NONE) {
+			vcpu->arch.interrupt.pending = KVMIRQ_INJECTED;
+			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+			kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
+		}
 	}
+	if (!vcpu->arch.interrupt_window_open
+	    && (vcpu->arch.interrupt.pending != KVMIRQ_NONE
+		|| kvm_cpu_has_interrupt(vcpu)))
+		enable_irq_window(vcpu);
 }
 
 /*
Index: b/arch/x86/kvm/x86.h
===================================================================
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -10,13 +10,13 @@ static inline void kvm_clear_exception_q
 
 static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector)
 {
-	vcpu->arch.interrupt.pending = true;
+	vcpu->arch.interrupt.pending = KVMIRQ_QUEUED;
 	vcpu->arch.interrupt.nr = vector;
 }
 
 static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.interrupt.pending = false;
+	vcpu->arch.interrupt.pending = KVMIRQ_NONE;
 }
 
 #endif

-- 
Siemens AG, Corporate Technology, CT SE 2 ES-OS
Corporate Competence Center Embedded Linux

             reply	other threads:[~2008-11-10 15:53 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-11-10 15:52 Jan Kiszka [this message]
2008-11-16 12:29 ` [PATCH] KVM: VMX: Fix race between pending IRQ and NMI Avi Kivity
2008-11-16 14:58   ` Jan Kiszka
2008-11-16 15:15     ` Avi Kivity
2008-11-16 15:39       ` Jan Kiszka
2008-11-19 17:38         ` Avi Kivity
2008-11-19 21:28           ` Avi Kivity
2008-11-20 13:29             ` Jan Kiszka
2008-11-20 13:59               ` Avi Kivity
2008-11-21 10:04                 ` Jan Kiszka
2008-11-21 11:14                   ` Avi Kivity
2008-11-22 12:25                   ` Avi Kivity
2008-11-24  9:55                     ` Jan Kiszka
2008-11-25 14:45                       ` Avi Kivity
2008-11-25 14:55                         ` Jan Kiszka

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=491858C8.2040401@siemens.com \
    --to=jan.kiszka@siemens.com \
    --cc=avi@redhat.com \
    --cc=jiajun.xu@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=sheng.yang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox