From: Jan Kiszka <jan.kiszka@siemens.com>
To: kvm-devel <kvm@vger.kernel.org>
Cc: "Xu, Jiajun" <jiajun.xu@intel.com>,
"Yang, Sheng" <sheng.yang@intel.com>, Avi Kivity <avi@redhat.com>
Subject: [PATCH] KVM: VMX: Fix race between pending IRQ and NMI
Date: Mon, 10 Nov 2008 16:52:40 +0100 [thread overview]
Message-ID: <491858C8.2040401@siemens.com> (raw)
This patch addresses item #2215532 in the kvm bug tracker, but was
finally also visible with other Linux guests that use the NMI watchdog:
There is a subtle race in kvm-intel between a pending IRQ and a briefly
later arriving NMI (e.g. from the watchdog). If the IRQ was injected but
the guest exited again on ejection due to some page fault, the flag
interrupt.pending remained true. If now some NMI just happened to be
pended as well, that one overruled the IRQ and was re-injected instead
(what is OK!). But during the next run of vmx_complete_interrupts the
originally pending IRQ fell on the floor and was forgotten. That means
we dequeued some IRQ from the [A]PIC, but never delivered it,
effectively causing a stall of IRQ deliveries. You may guess that it
took me a while to understand this...
The patch below addresses the issue by turning interrupt.pending into a
three-state variable: NONE, QUEUED (but not currently injected), and
INJECTED. If we overwrite some IRQ injection with an NMI, the state gets
properly updated. Moreover, we only transit from INJECTED to NONE to
avoid loosing IRQs.
To simplify review and maintenance, the patch aligns the decision
pattern in vmx_intr_assist with do_interrupt_requests.
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
arch/x86/include/asm/kvm_host.h | 6 +++
arch/x86/kvm/vmx.c | 61 +++++++++++++++++++++++++++-------------
arch/x86/kvm/x86.h | 4 +-
3 files changed, 49 insertions(+), 22 deletions(-)
Index: b/arch/x86/include/asm/kvm_host.h
===================================================================
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -301,7 +301,11 @@ struct kvm_vcpu_arch {
} exception;
struct kvm_queued_interrupt {
- bool pending;
+ enum {
+ KVMIRQ_NONE,
+ KVMIRQ_QUEUED,
+ KVMIRQ_INJECTED
+ } pending;
u8 nr;
} interrupt;
Index: b/arch/x86/kvm/vmx.c
===================================================================
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1037,7 +1037,7 @@ static int set_guest_debug(struct kvm_vc
static int vmx_get_irq(struct kvm_vcpu *vcpu)
{
- if (!vcpu->arch.interrupt.pending)
+ if (vcpu->arch.interrupt.pending == KVMIRQ_NONE)
return -1;
return vcpu->arch.interrupt.nr;
}
@@ -2487,9 +2487,16 @@ static void do_interrupt_requests(struct
}
if (vcpu->arch.nmi_injected) {
vmx_inject_nmi(vcpu);
+ if (vcpu->arch.interrupt.pending == KVMIRQ_INJECTED)
+ /*
+ * Degrade pending state, we will properly reinject
+ * after the NMI.
+ */
+ vcpu->arch.interrupt.pending = KVMIRQ_QUEUED;
if (vcpu->arch.nmi_pending || kvm_run->request_nmi_window)
enable_nmi_window(vcpu);
- else if (vcpu->arch.irq_summary
+ else if (vcpu->arch.interrupt.pending != KVMIRQ_NONE
+ || vcpu->arch.irq_summary
|| kvm_run->request_interrupt_window)
enable_irq_window(vcpu);
return;
@@ -2498,14 +2505,18 @@ static void do_interrupt_requests(struct
enable_nmi_window(vcpu);
if (vcpu->arch.interrupt_window_open) {
- if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
+ if (vcpu->arch.irq_summary &&
+ vcpu->arch.interrupt.pending == KVMIRQ_NONE)
kvm_do_inject_irq(vcpu);
- if (vcpu->arch.interrupt.pending)
+ if (vcpu->arch.interrupt.pending != KVMIRQ_NONE) {
+ vcpu->arch.interrupt.pending = KVMIRQ_INJECTED;
vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+ }
}
if (!vcpu->arch.interrupt_window_open &&
- (vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
+ (vcpu->arch.irq_summary || kvm_run->request_interrupt_window
+ || vcpu->arch.interrupt.pending != KVMIRQ_NONE))
enable_irq_window(vcpu);
}
@@ -2624,7 +2635,8 @@ static int handle_exception(struct kvm_v
cr2 = vmcs_readl(EXIT_QUALIFICATION);
KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
(u32)((u64)cr2 >> 32), handler);
- if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending)
+ if (vcpu->arch.interrupt.pending != KVMIRQ_NONE
+ || vcpu->arch.exception.pending)
kvm_mmu_unprotect_page_virt(vcpu, cr2);
return kvm_mmu_page_fault(vcpu, cr2, error_code);
}
@@ -3244,7 +3256,8 @@ static void vmx_complete_interrupts(stru
GUEST_INTR_STATE_NMI);
else
vmx->vcpu.arch.nmi_injected = false;
- }
+ } else if (vmx->vcpu.arch.interrupt.pending == KVMIRQ_INJECTED)
+ kvm_clear_interrupt_queue(&vmx->vcpu);
kvm_clear_exception_queue(&vmx->vcpu);
if (idtv_info_valid && type == INTR_TYPE_EXCEPTION) {
if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
@@ -3253,9 +3266,7 @@ static void vmx_complete_interrupts(stru
} else
kvm_queue_exception(&vmx->vcpu, vector);
vmx->idt_vectoring_info = 0;
- }
- kvm_clear_interrupt_queue(&vmx->vcpu);
- if (idtv_info_valid && type == INTR_TYPE_EXT_INTR) {
+ } else if (idtv_info_valid && type == INTR_TYPE_EXT_INTR) {
kvm_queue_interrupt(&vmx->vcpu, vector);
vmx->idt_vectoring_info = 0;
}
@@ -3278,22 +3289,34 @@ static void vmx_intr_assist(struct kvm_v
}
if (vcpu->arch.nmi_injected) {
vmx_inject_nmi(vcpu);
+ if (vcpu->arch.interrupt.pending == KVMIRQ_INJECTED)
+ /*
+ * Degrade pending state, we will properly reinject
+ * after the NMI.
+ */
+ vcpu->arch.interrupt.pending = KVMIRQ_QUEUED;
if (vcpu->arch.nmi_pending)
enable_nmi_window(vcpu);
- else if (kvm_cpu_has_interrupt(vcpu))
+ else if (vcpu->arch.interrupt.pending != KVMIRQ_NONE
+ || kvm_cpu_has_interrupt(vcpu))
enable_irq_window(vcpu);
return;
}
- if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
- if (vcpu->arch.interrupt_window_open)
+ if (vcpu->arch.interrupt_window_open) {
+ if (vcpu->arch.interrupt.pending == KVMIRQ_NONE
+ && kvm_cpu_has_interrupt(vcpu))
kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
- else
- enable_irq_window(vcpu);
- }
- if (vcpu->arch.interrupt.pending) {
- vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
- kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
+
+ if (vcpu->arch.interrupt.pending != KVMIRQ_NONE) {
+ vcpu->arch.interrupt.pending = KVMIRQ_INJECTED;
+ vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+ kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
+ }
}
+ if (!vcpu->arch.interrupt_window_open
+ && (vcpu->arch.interrupt.pending != KVMIRQ_NONE
+ || kvm_cpu_has_interrupt(vcpu)))
+ enable_irq_window(vcpu);
}
/*
Index: b/arch/x86/kvm/x86.h
===================================================================
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -10,13 +10,13 @@ static inline void kvm_clear_exception_q
static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector)
{
- vcpu->arch.interrupt.pending = true;
+ vcpu->arch.interrupt.pending = KVMIRQ_QUEUED;
vcpu->arch.interrupt.nr = vector;
}
static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
{
- vcpu->arch.interrupt.pending = false;
+ vcpu->arch.interrupt.pending = KVMIRQ_NONE;
}
#endif
--
Siemens AG, Corporate Technology, CT SE 2 ES-OS
Corporate Competence Center Embedded Linux
next reply other threads:[~2008-11-10 15:53 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-11-10 15:52 Jan Kiszka [this message]
2008-11-16 12:29 ` [PATCH] KVM: VMX: Fix race between pending IRQ and NMI Avi Kivity
2008-11-16 14:58 ` Jan Kiszka
2008-11-16 15:15 ` Avi Kivity
2008-11-16 15:39 ` Jan Kiszka
2008-11-19 17:38 ` Avi Kivity
2008-11-19 21:28 ` Avi Kivity
2008-11-20 13:29 ` Jan Kiszka
2008-11-20 13:59 ` Avi Kivity
2008-11-21 10:04 ` Jan Kiszka
2008-11-21 11:14 ` Avi Kivity
2008-11-22 12:25 ` Avi Kivity
2008-11-24 9:55 ` Jan Kiszka
2008-11-25 14:45 ` Avi Kivity
2008-11-25 14:55 ` Jan Kiszka
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=491858C8.2040401@siemens.com \
--to=jan.kiszka@siemens.com \
--cc=avi@redhat.com \
--cc=jiajun.xu@intel.com \
--cc=kvm@vger.kernel.org \
--cc=sheng.yang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox