[PATCH 10/11] VMX: work around lacking VNMI support

kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Jan Kiszka <jan.kiszka@siemens.com>
To: kvm-devel <kvm@vger.kernel.org>
Cc: "Yang, Sheng" <sheng.yang@intel.com>, Avi Kivity <avi@redhat.com>,
	Gleb Natapov <gleb@qumranet.com>
Subject: [PATCH 10/11] VMX: work around lacking VNMI support
Date: Mon, 22 Sep 2008 09:59:07 +0200	[thread overview]
Message-ID: <48D7504B.7050102@siemens.com> (raw)
In-Reply-To: <48D74CE6.5060008@siemens.com>

Older VMX supporting CPUs do not provide the "Virtual NMI" feature for
tracking the NMI-blocked state after injecting such events. For now
KVM is unable to inject NMIs on those CPUs.

Derived from Sheng Yang's suggestion to use the IRQ window notification
for detecting the end of NMI handlers, this patch implements virtual
NMI support without impact on the host's ability to receive real NMIs.
The downside is that the given approach requires some heuristics that
can cause NMI nesting in vary rare corner cases.

The approach works as follows:
 - inject NMI and set a software-based NMI-blocked flag
 - arm the IRQ window start notification whenever an NMI window is
   requested
 - if the guest exits due to an opening IRQ window, clear the emulated
   NMI-blocked flag
 - if the guest net execution time with NMI-blocked but without an IRQ
   window exceeds 1 second, force NMI-blocked reset and inject anyway

This approach covers most practical scenarios:
 - succeeding NMIs are seperated by at least one open IRQ window
 - the guest may spin with IRQs disabled (e.g. due to a bug), but
   leaving the NMI handler takes much less time than one second
 - the guest does not rely on strict ordering or timing of NMIs
   (would be problematic in virtualized environments anyway)

Successfully tested with the 'nmi n' monitor command, the kgdbts
testsuite on smp guests (additional patches required to add debug
register support to kvm) + the kernel's nmi_watchdog=1, and a Siemens-
specific board emulation (+ guest) that comes with its own NMI
watchdog mechanism.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/kvm/vmx.c |  168 +++++++++++++++++++++++++++++++++++------------------
 1 file changed, 111 insertions(+), 57 deletions(-)

Index: b/arch/x86/kvm/vmx.c
===================================================================
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -90,6 +90,11 @@ struct vcpu_vmx {
 	} rmode;
 	int vpid;
 	bool emulation_required;
+
+	/* Support for vnmi-less CPUs */
+	int soft_vnmi_blocked;
+	ktime_t entry_time;
+	s64 vnmi_blocked_time;
 };
 
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -2331,6 +2336,29 @@ out:
 	return ret;
 }
 
+static void enable_irq_window(struct kvm_vcpu *vcpu)
+{
+	u32 cpu_based_vm_exec_control;
+
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
+{
+	u32 cpu_based_vm_exec_control;
+
+	if (!cpu_has_virtual_nmis()) {
+		enable_irq_window(vcpu);
+		return;
+	}
+
+	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
+	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
 static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2356,6 +2384,19 @@ static void vmx_inject_nmi(struct kvm_vc
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	if (!cpu_has_virtual_nmis()) {
+		/*
+		 * Tracking the NMI-blocked state in software is built upon
+		 * finding the next open IRQ window. This, in turn, depends on
+		 * well-behaving guests: They have to keep IRQs disabled at
+		 * least as long as the NMI handler runs. Otherwise we may
+		 * cause NMI nesting, maybe breaking the guest. But as this is
+		 * highly unlikely, we can live with the residual risk.
+		 */
+		vmx->soft_vnmi_blocked = 1;
+		vmx->vnmi_blocked_time = 0;
+	}
+
 	++vcpu->stat.nmi_injections;
 	if (vcpu->arch.rmode.active) {
 		vmx->rmode.irq.pending = true;
@@ -2380,6 +2421,8 @@ static void vmx_update_window_states(str
 		!(guest_intr & (GUEST_INTR_STATE_STI |
 				GUEST_INTR_STATE_MOV_SS |
 				GUEST_INTR_STATE_NMI));
+	if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
+		vcpu->arch.nmi_window_open = 0;
 
 	vcpu->arch.interrupt_window_open =
 		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
@@ -2399,55 +2442,31 @@ static void kvm_do_inject_irq(struct kvm
 	kvm_queue_interrupt(vcpu, irq);
 }
 
-static void enable_irq_window(struct kvm_vcpu *vcpu)
-{
-	u32 cpu_based_vm_exec_control;
-
-	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
-static void enable_nmi_window(struct kvm_vcpu *vcpu)
-{
-	u32 cpu_based_vm_exec_control;
-
-	if (!cpu_has_virtual_nmis())
-		return;
-
-	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
-	cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
-	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
 static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 				       struct kvm_run *kvm_run)
 {
 	vmx_update_window_states(vcpu);
 
-	if (cpu_has_virtual_nmis()) {
-		if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-			if (vcpu->arch.nmi_window_open) {
-				vcpu->arch.nmi_pending = false;
-				vcpu->arch.nmi_injected = true;
-			} else {
-				enable_nmi_window(vcpu);
-				return;
-			}
-		}
-		if (vcpu->arch.nmi_injected) {
-			vmx_inject_nmi(vcpu);
-			if (vcpu->arch.nmi_pending
-			    || kvm_run->request_nmi_window)
-				enable_nmi_window(vcpu);
-			else if (vcpu->arch.irq_summary
-				 || kvm_run->request_interrupt_window)
-				enable_irq_window(vcpu);
+	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
+		if (vcpu->arch.nmi_window_open) {
+			vcpu->arch.nmi_pending = false;
+			vcpu->arch.nmi_injected = true;
+		} else {
+			enable_nmi_window(vcpu);
 			return;
 		}
-		if (!vcpu->arch.nmi_window_open || kvm_run->request_nmi_window)
+	}
+	if (vcpu->arch.nmi_injected) {
+		vmx_inject_nmi(vcpu);
+		if (vcpu->arch.nmi_pending || kvm_run->request_nmi_window)
 			enable_nmi_window(vcpu);
+		else if (vcpu->arch.irq_summary
+			 || kvm_run->request_interrupt_window)
+			enable_irq_window(vcpu);
+		return;
 	}
+	if (!vcpu->arch.nmi_window_open || kvm_run->request_nmi_window)
+		enable_nmi_window(vcpu);
 
 	if (vcpu->arch.interrupt_window_open) {
 		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
@@ -3071,6 +3090,37 @@ static int kvm_handle_exit(struct kvm_ru
 			exit_reason != EXIT_REASON_EPT_VIOLATION))
 		printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
 		       "exit reason is 0x%x\n", __func__, exit_reason);
+
+	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) {
+		if (vcpu->arch.interrupt_window_open) {
+			vmx->soft_vnmi_blocked = 0;
+			vcpu->arch.nmi_window_open = 1;
+		} else if (vmx->vnmi_blocked_time > 1000000000LL &&
+		    (kvm_run->request_nmi_window || vcpu->arch.nmi_pending)) {
+			/*
+			 * This CPU don't support us in finding the end of an
+			 * NMI-blocked window if the guest runs with IRQs
+			 * disabled. So we pull the trigger after 1 s of
+			 * futile waiting, but inform the user about this.
+			 */
+			printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
+			       "state on VCPU %d after 1 s timeout\n",
+			       __func__, vcpu->vcpu_id);
+			vmx->soft_vnmi_blocked = 0;
+			vmx->vcpu.arch.nmi_window_open = 1;
+		}
+
+		/*
+		 * If the user space waits to inject an NNI, exit ASAP
+		 */
+		if (vcpu->arch.nmi_window_open && kvm_run->request_nmi_window
+		    && !vcpu->arch.nmi_pending) {
+			kvm_run->exit_reason = KVM_EXIT_NMI_WINDOW_OPEN;
+			++vcpu->stat.nmi_window_exits;
+			return 0;
+		}
+	}
+
 	if (exit_reason < kvm_vmx_max_exit_handlers
 	    && kvm_vmx_exit_handlers[exit_reason])
 		return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
@@ -3120,7 +3170,9 @@ static void vmx_complete_interrupts(stru
 		if (unblock_nmi && vector != DF_VECTOR)
 			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
 				      GUEST_INTR_STATE_NMI);
-	}
+	} else if (unlikely(vmx->soft_vnmi_blocked))
+		vmx->vnmi_blocked_time +=
+			ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
 
 	idt_vectoring_info = vmx->idt_vectoring_info;
 	idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
@@ -3160,25 +3212,23 @@ static void vmx_intr_assist(struct kvm_v
 
 	vmx_update_window_states(vcpu);
 
-	if (cpu_has_virtual_nmis()) {
-		if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-			if (vcpu->arch.nmi_window_open) {
-				vcpu->arch.nmi_pending = false;
-				vcpu->arch.nmi_injected = true;
-			} else {
-				enable_nmi_window(vcpu);
-				return;
-			}
-		}
-		if (vcpu->arch.nmi_injected) {
-			vmx_inject_nmi(vcpu);
-			if (vcpu->arch.nmi_pending)
-				enable_nmi_window(vcpu);
-			else if (kvm_cpu_has_interrupt(vcpu))
-				enable_irq_window(vcpu);
+	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
+		if (vcpu->arch.nmi_window_open) {
+			vcpu->arch.nmi_pending = false;
+			vcpu->arch.nmi_injected = true;
+		} else {
+			enable_nmi_window(vcpu);
 			return;
 		}
 	}
+	if (vcpu->arch.nmi_injected) {
+		vmx_inject_nmi(vcpu);
+		if (vcpu->arch.nmi_pending)
+			enable_nmi_window(vcpu);
+		else if (kvm_cpu_has_interrupt(vcpu))
+			enable_irq_window(vcpu);
+		return;
+	}
 	if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
 		if (vcpu->arch.interrupt_window_open)
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
@@ -3227,6 +3277,10 @@ static void vmx_vcpu_run(struct kvm_vcpu
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 intr_info;
 
+	/* Record the guest's net vcpu time for enforced NMI injections. */
+	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
+		vmx->entry_time = ktime_get();
+
 	/* Handle invalid guest state instead of entering VMX */
 	if (vmx->emulation_required && emulate_invalid_guest_state) {
 		handle_invalid_guest_state(vcpu, kvm_run);

next prev parent reply	other threads:[~2008-09-22  8:03 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-09-22  7:44 [PATCH 0/11] Fix&Enhance NMI support for KVM - v3 Jan Kiszka
2008-09-22  7:58 ` [PATCH 1/11] VMX: include all IRQ window exits in statistics Jan Kiszka
2008-09-22  7:58 ` [PATCH 2/11] VMX: refactor/fix IRQ and NMI injectability determination Jan Kiszka
2008-09-22  7:58 ` [PATCH 3/11] VMX: refactor IRQ and NMI window enabling Jan Kiszka
2008-09-22  7:58 ` [PATCH 5/11] kvm-x86: Enable NMI Watchdog via in-kernel PIT source Jan Kiszka
2008-09-22  7:58 ` [PATCH 6/11] kvm-x86: VCPU with pending NMI is runnabled Jan Kiszka
2008-09-22  7:58 ` [PATCH 7/11] kvm: kick NMI receiving VCPU Jan Kiszka
2008-09-22  7:58 ` [PATCH 8/11] kvm-x86: Support for user space injected NMIs Jan Kiszka
2008-09-22  7:59 ` [PATCH 9/11] VMX: Provide support " Jan Kiszka
2008-09-23  6:28   ` Yang, Sheng
2008-09-23 15:42     ` Jan Kiszka
2008-09-22  7:59 ` Jan Kiszka [this message]
2008-09-22 14:15   ` [PATCH 10/11] VMX: work around lacking VNMI support Gleb Natapov
2008-09-23  8:46     ` Jan Kiszka
2008-09-23  8:50       ` Gleb Natapov
2008-09-23  8:57         ` Jan Kiszka
2008-09-23  9:00           ` Gleb Natapov
2008-09-23  9:08             ` Yang, Sheng
2008-09-23  9:15               ` Gleb Natapov
2008-09-23  9:24                 ` Yang, Sheng
2008-09-23  9:26                   ` Gleb Natapov
2008-09-23  9:37                     ` Yang, Sheng
2008-09-23  9:42                       ` Yang, Sheng
2008-09-23  9:45                         ` Gleb Natapov
2008-09-23  9:50                           ` Yang, Sheng
2008-09-24 12:40                             ` Jan Kiszka
2008-09-24 12:44                               ` Avi Kivity
2008-09-24 12:50                               ` Gleb Natapov
2008-09-24 12:56                                 ` Jan Kiszka
2008-09-24 13:02                                   ` Gleb Natapov
2008-09-24 13:08                                     ` Jan Kiszka
2008-09-24 13:24                                       ` Gleb Natapov
2008-09-24 13:33                                         ` Jan Kiszka
2008-09-24 13:35                                           ` Gleb Natapov
2008-09-24 13:35                                           ` Avi Kivity
2008-09-24 14:07                                         ` Jan Kiszka
2008-09-24 14:19                                           ` Avi Kivity
2008-09-24 13:11                                     ` Jan Kiszka
2008-09-24 13:17                                       ` Gleb Natapov
2008-09-24 13:20                                         ` Gleb Natapov
2008-09-24 13:39                                           ` Jan Kiszka
2008-09-24 14:48                                     ` Gleb Natapov
2008-09-23  9:27                 ` Yang, Sheng
2008-09-23 15:15               ` Jan Kiszka
2008-09-25  9:41   ` Jan Kiszka
2008-09-25 10:31     ` Avi Kivity
2008-09-25 14:22       ` Jan Kiszka
2008-09-27 10:57         ` Avi Kivity
2008-09-22  7:59 ` [PATCH 11/11] kvm: Enable NMI support for userspace irqchip Jan Kiszka
2008-09-22  8:00 ` [PATCH 4/11] VMX: fix real-mode NMI support Jan Kiszka

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48D7504B.7050102@siemens.com \
    --to=jan.kiszka@siemens.com \
    --cc=avi@redhat.com \
    --cc=gleb@qumranet.com \
    --cc=kvm@vger.kernel.org \
    --cc=sheng.yang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).