kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 00/15] interrupt injection rework
@ 2009-04-13  9:55 Gleb Natapov
  2009-04-13  9:55 ` [PATCH 01/15] Make kvm_cpu_(has|get)_interrupt() work for userspace irqchip too Gleb Natapov
                   ` (16 more replies)
  0 siblings, 17 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-13  9:55 UTC (permalink / raw)
  To: avi; +Cc: kvm, joerg.roedel, sheng, Gleb Natapov

Hi,

This patch series aims to consolidate IRQ injection code for in kernel
IRQ chip and userspace one. Also to move IRQ injection logic from
SVM/VMX specific code to x86.c.

Gleb Natapov (15):
  Make kvm_cpu_(has|get)_interrupt() work for userspace irqchip too.
  Consolidate userspace and kernel interrupt injection for VMX.
  Cleanup vmx_intr_assist()
  Use kvm_arch_interrupt_allowed() instead of checking
    interrupt_window_open directly
  Coalesce userspace/kernel irqchip interrupt injection logic.
  Use EVENTINJ to inject interrupts.
  Remove exception_injected() callback.
  Remove inject_pending_vectors() callback.
  kvm_push_irq() no longer used.
  sync_lapic_to_cr8() should always sync cr8 to V_TPR.
  Do not report TPR write to userspace if new value bigger or equal to
    a previous one.
  Get rid of arch.interrupt_window_open & arch.nmi_window_open
  Add NMI injection support to SVM.
  Move interrupt injection logic to x86.c
  Get rid of get_irq() callback.

 arch/x86/include/asm/kvm_host.h |   14 +-
 arch/x86/kvm/irq.c              |    7 +
 arch/x86/kvm/svm.c              |  252 +++++++++++++++++----------------------
 arch/x86/kvm/vmx.c              |  184 +++++++----------------------
 arch/x86/kvm/x86.c              |  105 +++++++++++++----
 arch/x86/kvm/x86.h              |    7 -
 6 files changed, 249 insertions(+), 320 deletions(-)


^ permalink raw reply	[flat|nested] 95+ messages in thread

* [PATCH 01/15] Make kvm_cpu_(has|get)_interrupt() work for userspace irqchip too.
  2009-04-13  9:55 [PATCH 00/15] interrupt injection rework Gleb Natapov
@ 2009-04-13  9:55 ` Gleb Natapov
  2009-04-13  9:55 ` [PATCH 02/15] Consolidate userspace and kernel interrupt injection for VMX Gleb Natapov
                   ` (15 subsequent siblings)
  16 siblings, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-13  9:55 UTC (permalink / raw)
  To: avi; +Cc: kvm, joerg.roedel, sheng, Gleb Natapov

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/irq.c |    7 +++++++
 arch/x86/kvm/svm.c |   11 +++++++----
 arch/x86/kvm/vmx.c |   18 +++++++++---------
 arch/x86/kvm/x86.c |    4 ++--
 4 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index cf17ed5..11c2757 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -24,6 +24,7 @@
 
 #include "irq.h"
 #include "i8254.h"
+#include "x86.h"
 
 /*
  * check if there are pending timer events
@@ -48,6 +49,9 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 {
 	struct kvm_pic *s;
 
+	if (!irqchip_in_kernel(v->kvm))
+		return v->arch.irq_summary;
+
 	if (kvm_apic_has_interrupt(v) == -1) {	/* LAPIC */
 		if (kvm_apic_accept_pic_intr(v)) {
 			s = pic_irqchip(v->kvm);	/* PIC */
@@ -67,6 +71,9 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
 	struct kvm_pic *s;
 	int vector;
 
+	if (!irqchip_in_kernel(v->kvm))
+		return kvm_pop_irq(v);
+
 	vector = kvm_get_apic_interrupt(v);	/* APIC */
 	if (vector == -1) {
 		if (kvm_apic_accept_pic_intr(v)) {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 3ffb695..7ac57e7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2082,8 +2082,9 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
 	 * If the user space waits to inject interrupts, exit as soon as
 	 * possible
 	 */
-	if (kvm_run->request_interrupt_window &&
-	    !svm->vcpu.arch.irq_summary) {
+	if (!irqchip_in_kernel(svm->vcpu.kvm) &&
+	    kvm_run->request_interrupt_window &&
+	    !kvm_cpu_has_interrupt(&svm->vcpu)) {
 		++svm->vcpu.stat.irq_window_exits;
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
 		return 0;
@@ -2364,7 +2365,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 		 (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
 		 (svm->vcpu.arch.hflags & HF_GIF_MASK));
 
-	if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary)
+	if (svm->vcpu.arch.interrupt_window_open &&
+	    kvm_cpu_has_interrupt(&svm->vcpu))
 		/*
 		 * If interrupts enabled, and not blocked by sti or mov ss. Good.
 		 */
@@ -2374,7 +2376,8 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 	 * Interrupts blocked.  Wait for unblock.
 	 */
 	if (!svm->vcpu.arch.interrupt_window_open &&
-	    (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
+	    (kvm_cpu_has_interrupt(&svm->vcpu) ||
+	     kvm_run->request_interrupt_window))
 		svm_set_vintr(svm);
 	else
 		svm_clear_vintr(svm);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c6997c0..b3292c1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2535,21 +2535,20 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
 		vmx_inject_nmi(vcpu);
 		if (vcpu->arch.nmi_pending)
 			enable_nmi_window(vcpu);
-		else if (vcpu->arch.irq_summary
-			 || kvm_run->request_interrupt_window)
+		else if (kvm_cpu_has_interrupt(vcpu) ||
+			 kvm_run->request_interrupt_window)
 			enable_irq_window(vcpu);
 		return;
 	}
 
 	if (vcpu->arch.interrupt_window_open) {
-		if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
-			kvm_queue_interrupt(vcpu, kvm_pop_irq(vcpu));
+		if (kvm_cpu_has_interrupt(vcpu) && !vcpu->arch.interrupt.pending)
+			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
 
 		if (vcpu->arch.interrupt.pending)
 			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-	}
-	if (!vcpu->arch.interrupt_window_open &&
-	    (vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
+	} else if(kvm_cpu_has_interrupt(vcpu) ||
+		  kvm_run->request_interrupt_window)
 		enable_irq_window(vcpu);
 }
 
@@ -2976,8 +2975,9 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 	 * If the user space waits to inject interrupts, exit as soon as
 	 * possible
 	 */
-	if (kvm_run->request_interrupt_window &&
-	    !vcpu->arch.irq_summary) {
+	if (!irqchip_in_kernel(vcpu->kvm) &&
+	    kvm_run->request_interrupt_window &&
+	    !kvm_cpu_has_interrupt(vcpu)) {
 		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
 		return 0;
 	}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0bb4131..7184f55 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3059,7 +3059,7 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 					  struct kvm_run *kvm_run)
 {
-	return (!vcpu->arch.irq_summary &&
+	return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
 		kvm_run->request_interrupt_window &&
 		vcpu->arch.interrupt_window_open &&
 		(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
@@ -3076,7 +3076,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 	else
 		kvm_run->ready_for_interrupt_injection =
 					(vcpu->arch.interrupt_window_open &&
-					 vcpu->arch.irq_summary == 0);
+					 !kvm_cpu_has_interrupt(vcpu));
 }
 
 static void vapic_enter(struct kvm_vcpu *vcpu)
-- 
1.5.6.5


^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 02/15] Consolidate userspace and kernel interrupt injection for VMX.
  2009-04-13  9:55 [PATCH 00/15] interrupt injection rework Gleb Natapov
  2009-04-13  9:55 ` [PATCH 01/15] Make kvm_cpu_(has|get)_interrupt() work for userspace irqchip too Gleb Natapov
@ 2009-04-13  9:55 ` Gleb Natapov
  2009-04-13  9:55 ` [PATCH 03/15] Cleanup vmx_intr_assist() Gleb Natapov
                   ` (14 subsequent siblings)
  16 siblings, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-13  9:55 UTC (permalink / raw)
  To: avi; +Cc: kvm, joerg.roedel, sheng, Gleb Natapov

Use the same callback to inject irq/nmi events no matter what irqchip is
in use. Only from VMX for now.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    2 +-
 arch/x86/kvm/svm.c              |    2 +-
 arch/x86/kvm/vmx.c              |   71 +++++++++------------------------------
 arch/x86/kvm/x86.c              |    2 +-
 4 files changed, 19 insertions(+), 58 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3fc4623..4a9022d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -520,7 +520,7 @@ struct kvm_x86_ops {
 	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
 				bool has_error_code, u32 error_code);
 	bool (*exception_injected)(struct kvm_vcpu *vcpu);
-	void (*inject_pending_irq)(struct kvm_vcpu *vcpu);
+	void (*inject_pending_irq)(struct kvm_vcpu *vcpu, struct kvm_run *run);
 	void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
 				       struct kvm_run *run);
 	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7ac57e7..52c41aa 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2289,7 +2289,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
 		(svm->vcpu.arch.hflags & HF_GIF_MASK);
 }
 
-static void svm_intr_assist(struct kvm_vcpu *vcpu)
+static void svm_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct vmcb *vmcb = svm->vmcb;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b3292c1..06252f7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2510,48 +2510,6 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
 	return vcpu->arch.interrupt_window_open;
 }
 
-static void do_interrupt_requests(struct kvm_vcpu *vcpu,
-				       struct kvm_run *kvm_run)
-{
-	vmx_update_window_states(vcpu);
-
-	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
-		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
-				GUEST_INTR_STATE_STI |
-				GUEST_INTR_STATE_MOV_SS);
-
-	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-		if (vcpu->arch.interrupt.pending) {
-			enable_nmi_window(vcpu);
-		} else if (vcpu->arch.nmi_window_open) {
-			vcpu->arch.nmi_pending = false;
-			vcpu->arch.nmi_injected = true;
-		} else {
-			enable_nmi_window(vcpu);
-			return;
-		}
-	}
-	if (vcpu->arch.nmi_injected) {
-		vmx_inject_nmi(vcpu);
-		if (vcpu->arch.nmi_pending)
-			enable_nmi_window(vcpu);
-		else if (kvm_cpu_has_interrupt(vcpu) ||
-			 kvm_run->request_interrupt_window)
-			enable_irq_window(vcpu);
-		return;
-	}
-
-	if (vcpu->arch.interrupt_window_open) {
-		if (kvm_cpu_has_interrupt(vcpu) && !vcpu->arch.interrupt.pending)
-			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-
-		if (vcpu->arch.interrupt.pending)
-			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-	} else if(kvm_cpu_has_interrupt(vcpu) ||
-		  kvm_run->request_interrupt_window)
-		enable_irq_window(vcpu);
-}
-
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 {
 	int ret;
@@ -3351,8 +3309,11 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 	}
 }
 
-static void vmx_intr_assist(struct kvm_vcpu *vcpu)
+static void vmx_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
+	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
+		kvm_run->request_interrupt_window;
+
 	update_tpr_threshold(vcpu);
 
 	vmx_update_window_states(vcpu);
@@ -3373,25 +3334,25 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 			return;
 		}
 	}
+
 	if (vcpu->arch.nmi_injected) {
 		vmx_inject_nmi(vcpu);
-		if (vcpu->arch.nmi_pending)
-			enable_nmi_window(vcpu);
-		else if (kvm_cpu_has_interrupt(vcpu))
-			enable_irq_window(vcpu);
-		return;
+		goto out;
 	}
+
 	if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
 		if (vcpu->arch.interrupt_window_open)
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-		else
-			enable_irq_window(vcpu);
 	}
-	if (vcpu->arch.interrupt.pending) {
+
+	if (vcpu->arch.interrupt.pending)
 		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-		if (kvm_cpu_has_interrupt(vcpu))
-			enable_irq_window(vcpu);
-	}
+
+out:
+	if (vcpu->arch.nmi_pending)
+		enable_nmi_window(vcpu);
+	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+		enable_irq_window(vcpu);
 }
 
 /*
@@ -3733,7 +3694,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.queue_exception = vmx_queue_exception,
 	.exception_injected = vmx_exception_injected,
 	.inject_pending_irq = vmx_intr_assist,
-	.inject_pending_vectors = do_interrupt_requests,
+	.inject_pending_vectors = vmx_intr_assist,
 	.interrupt_allowed = vmx_interrupt_allowed,
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7184f55..dfcf358 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3163,7 +3163,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (vcpu->arch.exception.pending)
 		__queue_exception(vcpu);
 	else if (irqchip_in_kernel(vcpu->kvm))
-		kvm_x86_ops->inject_pending_irq(vcpu);
+		kvm_x86_ops->inject_pending_irq(vcpu, kvm_run);
 	else
 		kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
 
-- 
1.5.6.5


^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 03/15] Cleanup vmx_intr_assist()
  2009-04-13  9:55 [PATCH 00/15] interrupt injection rework Gleb Natapov
  2009-04-13  9:55 ` [PATCH 01/15] Make kvm_cpu_(has|get)_interrupt() work for userspace irqchip too Gleb Natapov
  2009-04-13  9:55 ` [PATCH 02/15] Consolidate userspace and kernel interrupt injection for VMX Gleb Natapov
@ 2009-04-13  9:55 ` Gleb Natapov
  2009-04-13  9:55 ` [PATCH 04/15] Use kvm_arch_interrupt_allowed() instead of checking interrupt_window_open directly Gleb Natapov
                   ` (13 subsequent siblings)
  16 siblings, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-13  9:55 UTC (permalink / raw)
  To: avi; +Cc: kvm, joerg.roedel, sheng, Gleb Natapov

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/vmx.c |   55 ++++++++++++++++++++++++++++-----------------------
 1 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 06252f7..9eb518f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3309,6 +3309,34 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 	}
 }
 
+static void vmx_intr_inject(struct kvm_vcpu *vcpu)
+{
+	/* try to reinject previous events if any */
+	if (vcpu->arch.nmi_injected) {
+		vmx_inject_nmi(vcpu);
+		return;
+	}
+
+	if (vcpu->arch.interrupt.pending) {
+		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+		return;
+	}
+
+	/* try to inject new event if pending */
+	if (vcpu->arch.nmi_pending) {
+		if (vcpu->arch.nmi_window_open) {
+			vcpu->arch.nmi_pending = false;
+			vcpu->arch.nmi_injected = true;
+			vmx_inject_nmi(vcpu);
+		}
+	} else if (kvm_cpu_has_interrupt(vcpu)) {
+		if (vcpu->arch.interrupt_window_open) {
+			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
+			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+		}
+	}
+}
+
 static void vmx_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
@@ -3323,32 +3351,9 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 				GUEST_INTR_STATE_STI |
 				GUEST_INTR_STATE_MOV_SS);
 
-	if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-		if (vcpu->arch.interrupt.pending) {
-			enable_nmi_window(vcpu);
-		} else if (vcpu->arch.nmi_window_open) {
-			vcpu->arch.nmi_pending = false;
-			vcpu->arch.nmi_injected = true;
-		} else {
-			enable_nmi_window(vcpu);
-			return;
-		}
-	}
-
-	if (vcpu->arch.nmi_injected) {
-		vmx_inject_nmi(vcpu);
-		goto out;
-	}
-
-	if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
-		if (vcpu->arch.interrupt_window_open)
-			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-	}
-
-	if (vcpu->arch.interrupt.pending)
-		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+	vmx_intr_inject(vcpu);
 
-out:
+	/* enable NMI/IRQ window open exits if needed */
 	if (vcpu->arch.nmi_pending)
 		enable_nmi_window(vcpu);
 	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
-- 
1.5.6.5


^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 04/15] Use kvm_arch_interrupt_allowed() instead of checking interrupt_window_open directly
  2009-04-13  9:55 [PATCH 00/15] interrupt injection rework Gleb Natapov
                   ` (2 preceding siblings ...)
  2009-04-13  9:55 ` [PATCH 03/15] Cleanup vmx_intr_assist() Gleb Natapov
@ 2009-04-13  9:55 ` Gleb Natapov
  2009-04-13  9:55 ` [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic Gleb Natapov
                   ` (12 subsequent siblings)
  16 siblings, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-13  9:55 UTC (permalink / raw)
  To: avi; +Cc: kvm, joerg.roedel, sheng, Gleb Natapov

kvm_arch_interrupt_allowed() also checks IF so drop the check.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/x86.c |    5 ++---
 1 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index dfcf358..f6aaa15 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3061,8 +3061,7 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
 {
 	return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
 		kvm_run->request_interrupt_window &&
-		vcpu->arch.interrupt_window_open &&
-		(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
+		kvm_arch_interrupt_allowed(vcpu));
 }
 
 static void post_kvm_run_save(struct kvm_vcpu *vcpu,
@@ -3075,7 +3074,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 		kvm_run->ready_for_interrupt_injection = 1;
 	else
 		kvm_run->ready_for_interrupt_injection =
-					(vcpu->arch.interrupt_window_open &&
+					(kvm_arch_interrupt_allowed(vcpu) &&
 					 !kvm_cpu_has_interrupt(vcpu));
 }
 
-- 
1.5.6.5


^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-13  9:55 [PATCH 00/15] interrupt injection rework Gleb Natapov
                   ` (3 preceding siblings ...)
  2009-04-13  9:55 ` [PATCH 04/15] Use kvm_arch_interrupt_allowed() instead of checking interrupt_window_open directly Gleb Natapov
@ 2009-04-13  9:55 ` Gleb Natapov
  2009-04-14 14:14   ` Dmitry Eremin-Solenikov
                     ` (3 more replies)
  2009-04-13  9:55 ` [PATCH 06/15] Use EVENTINJ to inject interrupts Gleb Natapov
                   ` (11 subsequent siblings)
  16 siblings, 4 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-13  9:55 UTC (permalink / raw)
  To: avi; +Cc: kvm, joerg.roedel, sheng, Gleb Natapov

Start to use interrupt/exception queues like VMX does.
This also fix the bug that if exit was caused by a guest
internal exception access to IDT the exception was not
reinjected.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/svm.c |  176 ++++++++++++++++++++++------------------------------
 1 files changed, 75 insertions(+), 101 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 52c41aa..053370d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -70,7 +70,6 @@ module_param(npt, int, S_IRUGO);
 static int nested = 0;
 module_param(nested, int, S_IRUGO);
 
-static void kvm_reput_irq(struct vcpu_svm *svm);
 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
 
 static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
@@ -199,9 +198,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 
 static bool svm_exception_injected(struct kvm_vcpu *vcpu)
 {
-	struct vcpu_svm *svm = to_svm(vcpu);
-
-	return !(svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID);
+	return false;
 }
 
 static int is_external_interrupt(u32 info)
@@ -976,12 +973,9 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
 
 static int svm_get_irq(struct kvm_vcpu *vcpu)
 {
-	struct vcpu_svm *svm = to_svm(vcpu);
-	u32 exit_int_info = svm->vmcb->control.exit_int_info;
-
-	if (is_external_interrupt(exit_int_info))
-		return exit_int_info & SVM_EVTINJ_VEC_MASK;
-	return -1;
+	if (!vcpu->arch.interrupt.pending)
+		return -1;
+	return vcpu->arch.interrupt.nr;
 }
 
 static void load_host_msrs(struct kvm_vcpu *vcpu)
@@ -1088,17 +1082,8 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
 
 static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
-	u32 exit_int_info = svm->vmcb->control.exit_int_info;
-	struct kvm *kvm = svm->vcpu.kvm;
 	u64 fault_address;
 	u32 error_code;
-	bool event_injection = false;
-
-	if (!irqchip_in_kernel(kvm) &&
-	    is_external_interrupt(exit_int_info)) {
-		event_injection = true;
-		kvm_push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
-	}
 
 	fault_address  = svm->vmcb->control.exit_info_2;
 	error_code = svm->vmcb->control.exit_info_1;
@@ -1118,9 +1103,11 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	 */
 	if (npt_enabled)
 		svm_flush_tlb(&svm->vcpu);
-
-	if (!npt_enabled && event_injection)
-		kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
+	else {
+		if (svm->vcpu.arch.interrupt.pending ||
+				svm->vcpu.arch.exception.pending)
+			kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
+	}
 	return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
 }
 
@@ -2187,7 +2174,6 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		}
 	}
 
-	kvm_reput_irq(svm);
 
 	if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
 		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -2289,98 +2275,47 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
 		(svm->vcpu.arch.hflags & HF_GIF_MASK);
 }
 
-static void svm_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void enable_irq_window(struct kvm_vcpu *vcpu)
 {
-	struct vcpu_svm *svm = to_svm(vcpu);
-	struct vmcb *vmcb = svm->vmcb;
-	int intr_vector = -1;
-
-	if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) &&
-	    ((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) {
-		intr_vector = vmcb->control.exit_int_info &
-			      SVM_EVTINJ_VEC_MASK;
-		vmcb->control.exit_int_info = 0;
-		svm_inject_irq(svm, intr_vector);
-		goto out;
-	}
-
-	if (vmcb->control.int_ctl & V_IRQ_MASK)
-		goto out;
-
-	if (!kvm_cpu_has_interrupt(vcpu))
-		goto out;
-
-	if (nested_svm_intr(svm))
-		goto out;
-
-	if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
-		goto out;
-
-	if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
-	    (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
-	    (vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
-		/* unable to deliver irq, set pending irq */
-		svm_set_vintr(svm);
-		svm_inject_irq(svm, 0x0);
-		goto out;
-	}
-	/* Okay, we can deliver the interrupt: grab it and update PIC state. */
-	intr_vector = kvm_cpu_get_interrupt(vcpu);
-	svm_inject_irq(svm, intr_vector);
-out:
-	update_cr8_intercept(vcpu);
+	svm_set_vintr(to_svm(vcpu));
+	svm_inject_irq(to_svm(vcpu), 0x0);
 }
 
-static void kvm_reput_irq(struct vcpu_svm *svm)
+static void svm_intr_inject(struct kvm_vcpu *vcpu)
 {
-	struct vmcb_control_area *control = &svm->vmcb->control;
-
-	if ((control->int_ctl & V_IRQ_MASK)
-	    && !irqchip_in_kernel(svm->vcpu.kvm)) {
-		control->int_ctl &= ~V_IRQ_MASK;
-		kvm_push_irq(&svm->vcpu, control->int_vector);
+	/* try to reinject previous events if any */
+	if (vcpu->arch.interrupt.pending) {
+		svm_inject_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
+		return;
 	}
 
-	svm->vcpu.arch.interrupt_window_open =
-		!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
-		 (svm->vcpu.arch.hflags & HF_GIF_MASK);
-}
-
-static void svm_do_inject_vector(struct vcpu_svm *svm)
-{
-	svm_inject_irq(svm, kvm_pop_irq(&svm->vcpu));
+	/* try to inject new event if pending */
+	if (kvm_cpu_has_interrupt(vcpu)) {
+		if (vcpu->arch.interrupt_window_open) {
+			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
+			svm_inject_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
+		}
+	}
 }
 
-static void do_interrupt_requests(struct kvm_vcpu *vcpu,
-				       struct kvm_run *kvm_run)
+static void svm_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-	struct vmcb_control_area *control = &svm->vmcb->control;
+	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
+		kvm_run->request_interrupt_window;
 
 	if (nested_svm_intr(svm))
-		return;
+		goto out;
 
-	svm->vcpu.arch.interrupt_window_open =
-		(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
-		 (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
-		 (svm->vcpu.arch.hflags & HF_GIF_MASK));
+	svm->vcpu.arch.interrupt_window_open = svm_interrupt_allowed(vcpu);
 
-	if (svm->vcpu.arch.interrupt_window_open &&
-	    kvm_cpu_has_interrupt(&svm->vcpu))
-		/*
-		 * If interrupts enabled, and not blocked by sti or mov ss. Good.
-		 */
-		svm_do_inject_vector(svm);
+	svm_intr_inject(vcpu);
 
-	/*
-	 * Interrupts blocked.  Wait for unblock.
-	 */
-	if (!svm->vcpu.arch.interrupt_window_open &&
-	    (kvm_cpu_has_interrupt(&svm->vcpu) ||
-	     kvm_run->request_interrupt_window))
-		svm_set_vintr(svm);
-	else
-		svm_clear_vintr(svm);
+	if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+		enable_irq_window(vcpu);
+
+out:
+	update_cr8_intercept(vcpu);
 }
 
 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -2420,6 +2355,43 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
 	svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
 }
 
+static void svm_complete_interrupts(struct vcpu_svm *svm)
+{
+	u8 vector;
+	int type;
+	u32 exitintinfo = svm->vmcb->control.exit_int_info;
+
+	svm->vcpu.arch.nmi_injected = false;
+	kvm_clear_exception_queue(&svm->vcpu);
+	kvm_clear_interrupt_queue(&svm->vcpu);
+
+	if (!(exitintinfo & SVM_EXITINTINFO_VALID))
+		return;
+
+	vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
+	type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
+
+	switch (type) {
+	case SVM_EXITINTINFO_TYPE_NMI:
+		svm->vcpu.arch.nmi_injected = true;
+		break;
+	case SVM_EXITINTINFO_TYPE_EXEPT:
+		if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
+			u32 err = svm->vmcb->control.exit_int_info_err;
+			kvm_queue_exception_e(&svm->vcpu, vector, err);
+					
+		} else
+			kvm_queue_exception(&svm->vcpu, vector);
+		break;
+	case SVM_EXITINTINFO_TYPE_SOFT:
+	case SVM_EXITINTINFO_TYPE_INTR:
+		kvm_queue_interrupt(&svm->vcpu, vector);
+		break;
+	default:
+		break;
+	}
+}
+
 #ifdef CONFIG_X86_64
 #define R "r"
 #else
@@ -2548,6 +2520,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	sync_cr8_to_lapic(vcpu);
 
 	svm->next_rip = 0;
+
+	svm_complete_interrupts(svm);
 }
 
 #undef R
@@ -2669,7 +2643,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.queue_exception = svm_queue_exception,
 	.exception_injected = svm_exception_injected,
 	.inject_pending_irq = svm_intr_assist,
-	.inject_pending_vectors = do_interrupt_requests,
+	.inject_pending_vectors = svm_intr_assist,
 	.interrupt_allowed = svm_interrupt_allowed,
 
 	.set_tss_addr = svm_set_tss_addr,
-- 
1.5.6.5


^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 06/15] Use EVENTINJ to inject interrupts.
  2009-04-13  9:55 [PATCH 00/15] interrupt injection rework Gleb Natapov
                   ` (4 preceding siblings ...)
  2009-04-13  9:55 ` [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic Gleb Natapov
@ 2009-04-13  9:55 ` Gleb Natapov
  2009-04-13  9:55 ` [PATCH 07/15] Remove exception_injected() callback Gleb Natapov
                   ` (10 subsequent siblings)
  16 siblings, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-13  9:55 UTC (permalink / raw)
  To: avi; +Cc: kvm, joerg.roedel, sheng, Gleb Natapov

Use VINT only for detecting when IRQ windows is open again.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/svm.c |   12 +++++++++---
 1 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 053370d..3b4c00f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2236,13 +2236,19 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
 		((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
 }
 
+static void svm_queue_irq(struct vcpu_svm *svm, unsigned nr)
+{
+	svm->vmcb->control.event_inj = nr |
+		SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
+}
+
 static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	nested_svm_intr(svm);
 
-	svm_inject_irq(svm, irq);
+	svm_queue_irq(svm, irq);
 }
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu)
@@ -2285,7 +2291,7 @@ static void svm_intr_inject(struct kvm_vcpu *vcpu)
 {
 	/* try to reinject previous events if any */
 	if (vcpu->arch.interrupt.pending) {
-		svm_inject_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
+		svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
 		return;
 	}
 
@@ -2293,7 +2299,7 @@ static void svm_intr_inject(struct kvm_vcpu *vcpu)
 	if (kvm_cpu_has_interrupt(vcpu)) {
 		if (vcpu->arch.interrupt_window_open) {
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-			svm_inject_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
+			svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
 		}
 	}
 }
-- 
1.5.6.5


^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 07/15] Remove exception_injected() callback.
  2009-04-13  9:55 [PATCH 00/15] interrupt injection rework Gleb Natapov
                   ` (5 preceding siblings ...)
  2009-04-13  9:55 ` [PATCH 06/15] Use EVENTINJ to inject interrupts Gleb Natapov
@ 2009-04-13  9:55 ` Gleb Natapov
  2009-04-13  9:55 ` [PATCH 08/15] Remove inject_pending_vectors() callback Gleb Natapov
                   ` (9 subsequent siblings)
  16 siblings, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-13  9:55 UTC (permalink / raw)
  To: avi; +Cc: kvm, joerg.roedel, sheng, Gleb Natapov

It always return false for VMX/SVM now.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    1 -
 arch/x86/kvm/svm.c              |    6 ------
 arch/x86/kvm/vmx.c              |    6 ------
 arch/x86/kvm/x86.c              |    2 --
 4 files changed, 0 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4a9022d..fc29e36 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -519,7 +519,6 @@ struct kvm_x86_ops {
 	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
 	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
 				bool has_error_code, u32 error_code);
-	bool (*exception_injected)(struct kvm_vcpu *vcpu);
 	void (*inject_pending_irq)(struct kvm_vcpu *vcpu, struct kvm_run *run);
 	void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
 				       struct kvm_run *run);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 3b4c00f..d6bc56a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -196,11 +196,6 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 	svm->vmcb->control.event_inj_err = error_code;
 }
 
-static bool svm_exception_injected(struct kvm_vcpu *vcpu)
-{
-	return false;
-}
-
 static int is_external_interrupt(u32 info)
 {
 	info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
@@ -2647,7 +2642,6 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.get_irq = svm_get_irq,
 	.set_irq = svm_set_irq,
 	.queue_exception = svm_queue_exception,
-	.exception_injected = svm_exception_injected,
 	.inject_pending_irq = svm_intr_assist,
 	.inject_pending_vectors = svm_intr_assist,
 	.interrupt_allowed = svm_interrupt_allowed,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9eb518f..3186fcf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -789,11 +789,6 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
 }
 
-static bool vmx_exception_injected(struct kvm_vcpu *vcpu)
-{
-	return false;
-}
-
 /*
  * Swap MSR entry in host/guest MSR entry array.
  */
@@ -3697,7 +3692,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.get_irq = vmx_get_irq,
 	.set_irq = vmx_inject_irq,
 	.queue_exception = vmx_queue_exception,
-	.exception_injected = vmx_exception_injected,
 	.inject_pending_irq = vmx_intr_assist,
 	.inject_pending_vectors = vmx_intr_assist,
 	.interrupt_allowed = vmx_interrupt_allowed,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f6aaa15..a50c8a3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3227,8 +3227,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		profile_hit(KVM_PROFILING, (void *)rip);
 	}
 
-	if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu))
-		vcpu->arch.exception.pending = false;
 
 	kvm_lapic_sync_from_vapic(vcpu);
 
-- 
1.5.6.5


^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 08/15] Remove inject_pending_vectors() callback.
  2009-04-13  9:55 [PATCH 00/15] interrupt injection rework Gleb Natapov
                   ` (6 preceding siblings ...)
  2009-04-13  9:55 ` [PATCH 07/15] Remove exception_injected() callback Gleb Natapov
@ 2009-04-13  9:55 ` Gleb Natapov
  2009-04-13  9:55 ` [PATCH 09/15] kvm_push_irq() no longer used Gleb Natapov
                   ` (8 subsequent siblings)
  16 siblings, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-13  9:55 UTC (permalink / raw)
  To: avi; +Cc: kvm, joerg.roedel, sheng, Gleb Natapov

It is the same as inject_pending_irq() for VMX/SVM now.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    2 --
 arch/x86/kvm/svm.c              |    1 -
 arch/x86/kvm/vmx.c              |    1 -
 arch/x86/kvm/x86.c              |    4 +---
 4 files changed, 1 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fc29e36..663dba8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -520,8 +520,6 @@ struct kvm_x86_ops {
 	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
 				bool has_error_code, u32 error_code);
 	void (*inject_pending_irq)(struct kvm_vcpu *vcpu, struct kvm_run *run);
-	void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
-				       struct kvm_run *run);
 	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d6bc56a..f8beddb 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2643,7 +2643,6 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.set_irq = svm_set_irq,
 	.queue_exception = svm_queue_exception,
 	.inject_pending_irq = svm_intr_assist,
-	.inject_pending_vectors = svm_intr_assist,
 	.interrupt_allowed = svm_interrupt_allowed,
 
 	.set_tss_addr = svm_set_tss_addr,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3186fcf..9162b4c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3693,7 +3693,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.set_irq = vmx_inject_irq,
 	.queue_exception = vmx_queue_exception,
 	.inject_pending_irq = vmx_intr_assist,
-	.inject_pending_vectors = vmx_intr_assist,
 	.interrupt_allowed = vmx_interrupt_allowed,
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a50c8a3..d94a152 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3161,10 +3161,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	if (vcpu->arch.exception.pending)
 		__queue_exception(vcpu);
-	else if (irqchip_in_kernel(vcpu->kvm))
-		kvm_x86_ops->inject_pending_irq(vcpu, kvm_run);
 	else
-		kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
+		kvm_x86_ops->inject_pending_irq(vcpu, kvm_run);
 
 	kvm_lapic_sync_to_vapic(vcpu);
 
-- 
1.5.6.5


^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 09/15] kvm_push_irq() no longer used.
  2009-04-13  9:55 [PATCH 00/15] interrupt injection rework Gleb Natapov
                   ` (7 preceding siblings ...)
  2009-04-13  9:55 ` [PATCH 08/15] Remove inject_pending_vectors() callback Gleb Natapov
@ 2009-04-13  9:55 ` Gleb Natapov
  2009-04-13  9:55 ` [PATCH 10/15] sync_lapic_to_cr8() should always sync cr8 to V_TPR Gleb Natapov
                   ` (7 subsequent siblings)
  16 siblings, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-13  9:55 UTC (permalink / raw)
  To: avi; +Cc: kvm, joerg.roedel, sheng, Gleb Natapov

Remove it.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/x86.h |    7 -------
 1 files changed, 0 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 2ab6791..39350b2 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -30,11 +30,4 @@ static inline u8 kvm_pop_irq(struct kvm_vcpu *vcpu)
 		clear_bit(word_index, &vcpu->arch.irq_summary);
 	return irq;
 }
-
-static inline void kvm_push_irq(struct kvm_vcpu *vcpu, u8 irq)
-{
-        set_bit(irq, vcpu->arch.irq_pending);
-        set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
-}
-
 #endif
-- 
1.5.6.5


^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 10/15] sync_lapic_to_cr8() should always sync cr8 to V_TPR.
  2009-04-13  9:55 [PATCH 00/15] interrupt injection rework Gleb Natapov
                   ` (8 preceding siblings ...)
  2009-04-13  9:55 ` [PATCH 09/15] kvm_push_irq() no longer used Gleb Natapov
@ 2009-04-13  9:55 ` Gleb Natapov
  2009-04-13  9:55 ` [PATCH 11/15] Do not report TPR write to userspace if new value bigger or equal to a previous one Gleb Natapov
                   ` (6 subsequent siblings)
  16 siblings, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-13  9:55 UTC (permalink / raw)
  To: avi; +Cc: kvm, joerg.roedel, sheng, Gleb Natapov

Even if IRQ chip is in userspace.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/svm.c |    5 +----
 1 files changed, 1 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f8beddb..52bd999 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2339,7 +2339,7 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
 
 	if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) {
 		int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
-		kvm_lapic_set_tpr(vcpu, cr8);
+		kvm_set_cr8(vcpu, cr8);
 	}
 }
 
@@ -2348,9 +2348,6 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 	u64 cr8;
 
-	if (!irqchip_in_kernel(vcpu->kvm))
-		return;
-
 	cr8 = kvm_get_cr8(vcpu);
 	svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
 	svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
-- 
1.5.6.5


^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 11/15] Do not report TPR write to userspace if new value bigger or equal to a previous one.
  2009-04-13  9:55 [PATCH 00/15] interrupt injection rework Gleb Natapov
                   ` (9 preceding siblings ...)
  2009-04-13  9:55 ` [PATCH 10/15] sync_lapic_to_cr8() should always sync cr8 to V_TPR Gleb Natapov
@ 2009-04-13  9:55 ` Gleb Natapov
  2009-04-13  9:55 ` [PATCH 12/15] Get rid of arch.interrupt_window_open & arch.nmi_window_open Gleb Natapov
                   ` (5 subsequent siblings)
  16 siblings, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-13  9:55 UTC (permalink / raw)
  To: avi; +Cc: kvm, joerg.roedel, sheng, Gleb Natapov

Saves many exits to userspace in a case of IRQ chip in userspace.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/svm.c |    4 ++++
 arch/x86/kvm/vmx.c |   19 ++++++++++++-------
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 52bd999..9a8eb14 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1853,9 +1853,13 @@ static int emulate_on_interception(struct vcpu_svm *svm,
 
 static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
+	u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
+	/* instruction emulation calls kvm_set_cr8() */
 	emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
 	if (irqchip_in_kernel(svm->vcpu.kvm))
 		return 1;
+	if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
+		return 1;
 	kvm_run->exit_reason = KVM_EXIT_SET_TPR;
 	return 0;
 }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9162b4c..51f804c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2724,13 +2724,18 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 			kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg));
 			skip_emulated_instruction(vcpu);
 			return 1;
-		case 8:
-			kvm_set_cr8(vcpu, kvm_register_read(vcpu, reg));
-			skip_emulated_instruction(vcpu);
-			if (irqchip_in_kernel(vcpu->kvm))
-				return 1;
-			kvm_run->exit_reason = KVM_EXIT_SET_TPR;
-			return 0;
+		case 8: {
+				u8 cr8_prev = kvm_get_cr8(vcpu);
+				u8 cr8 = kvm_register_read(vcpu, reg);
+				kvm_set_cr8(vcpu, cr8);
+				skip_emulated_instruction(vcpu);
+				if (irqchip_in_kernel(vcpu->kvm))
+					return 1;
+				if (cr8_prev <= cr8)
+					return 1;
+				kvm_run->exit_reason = KVM_EXIT_SET_TPR;
+				return 0;
+			}
 		};
 		break;
 	case 2: /* clts */
-- 
1.5.6.5


^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 12/15] Get rid of arch.interrupt_window_open & arch.nmi_window_open
  2009-04-13  9:55 [PATCH 00/15] interrupt injection rework Gleb Natapov
                   ` (10 preceding siblings ...)
  2009-04-13  9:55 ` [PATCH 11/15] Do not report TPR write to userspace if new value bigger or equal to a previous one Gleb Natapov
@ 2009-04-13  9:55 ` Gleb Natapov
  2009-04-13  9:55 ` [PATCH 13/15] Add NMI injection support to SVM Gleb Natapov
                   ` (4 subsequent siblings)
  16 siblings, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-13  9:55 UTC (permalink / raw)
  To: avi; +Cc: kvm, joerg.roedel, sheng, Gleb Natapov

They are recalculated before each use anyway.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    2 --
 arch/x86/kvm/svm.c              |    6 +-----
 arch/x86/kvm/vmx.c              |   35 +++++++++++------------------------
 3 files changed, 12 insertions(+), 31 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 663dba8..8b6f6e9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -267,7 +267,6 @@ struct kvm_mmu {
 
 struct kvm_vcpu_arch {
 	u64 host_tsc;
-	int interrupt_window_open;
 	unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
 	DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS);
 	/*
@@ -361,7 +360,6 @@ struct kvm_vcpu_arch {
 
 	bool nmi_pending;
 	bool nmi_injected;
-	bool nmi_window_open;
 
 	struct mtrr_state_type mtrr_state;
 	u32 pat;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9a8eb14..c605477 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -216,8 +216,6 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 
 	kvm_rip_write(vcpu, svm->next_rip);
 	svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
-
-	vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK);
 }
 
 static int has_svm(void)
@@ -2296,7 +2294,7 @@ static void svm_intr_inject(struct kvm_vcpu *vcpu)
 
 	/* try to inject new event if pending */
 	if (kvm_cpu_has_interrupt(vcpu)) {
-		if (vcpu->arch.interrupt_window_open) {
+		if (svm_interrupt_allowed(vcpu)) {
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
 			svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
 		}
@@ -2312,8 +2310,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (nested_svm_intr(svm))
 		goto out;
 
-	svm->vcpu.arch.interrupt_window_open = svm_interrupt_allowed(vcpu);
-
 	svm_intr_inject(vcpu);
 
 	if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 51f804c..116eac0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -753,7 +753,6 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 	if (interruptibility & 3)
 		vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
 			     interruptibility & ~3);
-	vcpu->arch.interrupt_window_open = 1;
 }
 
 static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
@@ -2482,27 +2481,21 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
 }
 
-static void vmx_update_window_states(struct kvm_vcpu *vcpu)
+static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
 {
-	u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
-
-	vcpu->arch.nmi_window_open =
-		!(guest_intr & (GUEST_INTR_STATE_STI |
-				GUEST_INTR_STATE_MOV_SS |
-				GUEST_INTR_STATE_NMI));
 	if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
-		vcpu->arch.nmi_window_open = 0;
+		return 0;
 
-	vcpu->arch.interrupt_window_open =
-		((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
-		 !(guest_intr & (GUEST_INTR_STATE_STI |
-				 GUEST_INTR_STATE_MOV_SS)));
+	return	!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+			(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS |
+				GUEST_INTR_STATE_NMI));
 }
 
 static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
-	vmx_update_window_states(vcpu);
-	return vcpu->arch.interrupt_window_open;
+	return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+		!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+			(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
 }
 
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -3194,9 +3187,8 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		       __func__, vectoring_info, exit_reason);
 
 	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) {
-		if (vcpu->arch.interrupt_window_open) {
+		if (vmx_interrupt_allowed(vcpu)) {
 			vmx->soft_vnmi_blocked = 0;
-			vcpu->arch.nmi_window_open = 1;
 		} else if (vmx->vnmi_blocked_time > 1000000000LL &&
 			   vcpu->arch.nmi_pending) {
 			/*
@@ -3209,7 +3201,6 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 			       "state on VCPU %d after 1 s timeout\n",
 			       __func__, vcpu->vcpu_id);
 			vmx->soft_vnmi_blocked = 0;
-			vmx->vcpu.arch.nmi_window_open = 1;
 		}
 	}
 
@@ -3324,13 +3315,13 @@ static void vmx_intr_inject(struct kvm_vcpu *vcpu)
 
 	/* try to inject new event if pending */
 	if (vcpu->arch.nmi_pending) {
-		if (vcpu->arch.nmi_window_open) {
+		if (vmx_nmi_allowed(vcpu)) {
 			vcpu->arch.nmi_pending = false;
 			vcpu->arch.nmi_injected = true;
 			vmx_inject_nmi(vcpu);
 		}
 	} else if (kvm_cpu_has_interrupt(vcpu)) {
-		if (vcpu->arch.interrupt_window_open) {
+		if (vmx_interrupt_allowed(vcpu)) {
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
 			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
 		}
@@ -3344,8 +3335,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	update_tpr_threshold(vcpu);
 
-	vmx_update_window_states(vcpu);
-
 	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
 		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
 				GUEST_INTR_STATE_STI |
@@ -3518,8 +3507,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (vmx->rmode.irq.pending)
 		fixup_rmode_irq(vmx);
 
-	vmx_update_window_states(vcpu);
-
 	asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
 	vmx->launched = 1;
 
-- 
1.5.6.5


^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-13  9:55 [PATCH 00/15] interrupt injection rework Gleb Natapov
                   ` (11 preceding siblings ...)
  2009-04-13  9:55 ` [PATCH 12/15] Get rid of arch.interrupt_window_open & arch.nmi_window_open Gleb Natapov
@ 2009-04-13  9:55 ` Gleb Natapov
  2009-04-17 11:59   ` Jan Kiszka
                     ` (3 more replies)
  2009-04-13  9:55 ` [PATCH 14/15] Move interrupt injection logic to x86.c Gleb Natapov
                   ` (3 subsequent siblings)
  16 siblings, 4 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-13  9:55 UTC (permalink / raw)
  To: avi; +Cc: kvm, joerg.roedel, sheng, Gleb Natapov


Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/kvm/svm.c              |   49 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8b6f6e9..057a612 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -766,6 +766,7 @@ enum {
 #define HF_GIF_MASK		(1 << 0)
 #define HF_HIF_MASK		(1 << 1)
 #define HF_VINTR_MASK		(1 << 2)
+#define HF_NMI_MASK		(1 << 3)
 
 /*
  * Hardware virtualization extension instructions may fault if a
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c605477..cd60fd7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1834,6 +1834,13 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	return 1;
 }
 
+static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
+	svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
+	return 0;
+}
+
 static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE)
@@ -2111,6 +2118,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 	[SVM_EXIT_VINTR]			= interrupt_window_interception,
 	/* [SVM_EXIT_CR0_SEL_WRITE]		= emulate_on_interception, */
 	[SVM_EXIT_CPUID]			= cpuid_interception,
+	[SVM_EXIT_IRET]                         = iret_interception,
 	[SVM_EXIT_INVD]                         = emulate_on_interception,
 	[SVM_EXIT_HLT]				= halt_interception,
 	[SVM_EXIT_INVLPG]			= invlpg_interception,
@@ -2218,6 +2226,11 @@ static void pre_svm_run(struct vcpu_svm *svm)
 		new_asid(svm, svm_data);
 }
 
+static void svm_inject_nmi(struct vcpu_svm *svm)
+{
+	svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
+	svm->vcpu.arch.hflags |= HF_NMI_MASK;
+}
 
 static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
 {
@@ -2269,6 +2282,14 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 		vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
 }
 
+static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	struct vmcb *vmcb = svm->vmcb;
+	return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
+		!(svm->vcpu.arch.hflags & HF_NMI_MASK);
+}
+
 static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -2284,16 +2305,37 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
 	svm_inject_irq(to_svm(vcpu), 0x0);
 }
 
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	if (svm->vcpu.arch.hflags & HF_NMI_MASK)
+		svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
+	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
+		enable_irq_window(vcpu);
+}
+
 static void svm_intr_inject(struct kvm_vcpu *vcpu)
 {
 	/* try to reinject previous events if any */
+	if (vcpu->arch.nmi_injected) {
+		svm_inject_nmi(to_svm(vcpu));
+		return;
+	}
+
 	if (vcpu->arch.interrupt.pending) {
 		svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
 		return;
 	}
 
 	/* try to inject new event if pending */
-	if (kvm_cpu_has_interrupt(vcpu)) {
+	if (vcpu->arch.nmi_pending) {
+		if (svm_nmi_allowed(vcpu)) {
+			vcpu->arch.nmi_pending = false;
+			vcpu->arch.nmi_injected = true;
+			svm_inject_nmi(vcpu);
+		}
+	} else if (kvm_cpu_has_interrupt(vcpu)) {
 		if (svm_interrupt_allowed(vcpu)) {
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
 			svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
@@ -2312,7 +2354,10 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	svm_intr_inject(vcpu);
 
-	if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+	/* enable NMI/IRQ window open exits if needed */
+	if (vcpu->arch.nmi_pending)
+		enable_nmi_window(vcpu);
+	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
 		enable_irq_window(vcpu);
 
 out:
-- 
1.5.6.5


^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 14/15] Move interrupt injection logic to x86.c
  2009-04-13  9:55 [PATCH 00/15] interrupt injection rework Gleb Natapov
                   ` (12 preceding siblings ...)
  2009-04-13  9:55 ` [PATCH 13/15] Add NMI injection support to SVM Gleb Natapov
@ 2009-04-13  9:55 ` Gleb Natapov
  2009-04-14  7:22   ` Gleb Natapov
  2009-04-13  9:55 ` [PATCH 15/15] Get rid of get_irq() callback Gleb Natapov
                   ` (2 subsequent siblings)
  16 siblings, 1 reply; 95+ messages in thread
From: Gleb Natapov @ 2009-04-13  9:55 UTC (permalink / raw)
  To: avi; +Cc: kvm, joerg.roedel, sheng, Gleb Natapov


Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    7 +++-
 arch/x86/kvm/svm.c              |   94 +++++++++++----------------------------
 arch/x86/kvm/vmx.c              |   79 ++++++++-------------------------
 arch/x86/kvm/x86.c              |   70 ++++++++++++++++++++++++++++-
 4 files changed, 119 insertions(+), 131 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 057a612..f9eb9ac 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -515,10 +515,15 @@ struct kvm_x86_ops {
 				unsigned char *hypercall_addr);
 	int (*get_irq)(struct kvm_vcpu *vcpu);
 	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
+	void (*set_nmi)(struct kvm_vcpu *vcpu);
 	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
 				bool has_error_code, u32 error_code);
-	void (*inject_pending_irq)(struct kvm_vcpu *vcpu, struct kvm_run *run);
 	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
+	int (*nmi_allowed)(struct kvm_vcpu *vcpu);
+	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
+	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
+	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
+	void (*drop_interrupt_shadow)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
 	int (*get_mt_mask_shift)(void);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index cd60fd7..1aa10b3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1861,8 +1861,10 @@ static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
 	/* instruction emulation calls kvm_set_cr8() */
 	emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
-	if (irqchip_in_kernel(svm->vcpu.kvm))
+	if (irqchip_in_kernel(svm->vcpu.kvm)) {
+		svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
 		return 1;
+	}
 	if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
 		return 1;
 	kvm_run->exit_reason = KVM_EXIT_SET_TPR;
@@ -2226,8 +2228,16 @@ static void pre_svm_run(struct vcpu_svm *svm)
 		new_asid(svm, svm_data);
 }
 
-static void svm_inject_nmi(struct vcpu_svm *svm)
+static void svm_drop_interrupt_shadow(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
+}
+
+static void svm_inject_nmi(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
+
 	svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
 	svm->vcpu.arch.hflags |= HF_NMI_MASK;
 }
@@ -2246,8 +2256,10 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
 		((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
 }
 
-static void svm_queue_irq(struct vcpu_svm *svm, unsigned nr)
+static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
+
 	svm->vmcb->control.event_inj = nr |
 		SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
 }
@@ -2258,28 +2270,18 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
 
 	nested_svm_intr(svm);
 
-	svm_queue_irq(svm, irq);
+	svm_queue_irq(vcpu, irq);
 }
 
-static void update_cr8_intercept(struct kvm_vcpu *vcpu)
+static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-	struct vmcb *vmcb = svm->vmcb;
-	int max_irr, tpr;
 
-	if (!irqchip_in_kernel(vcpu->kvm) || vcpu->arch.apic->vapic_addr)
+	if (irr == -1)
 		return;
 
-	vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
-
-	max_irr = kvm_lapic_find_highest_irr(vcpu);
-	if (max_irr == -1)
-		return;
-
-	tpr = kvm_lapic_get_cr8(vcpu) << 4;
-
-	if (tpr >= (max_irr & 0xf0))
-		vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
+	if (tpr >= irr)
+		svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
 }
 
 static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -2315,55 +2317,6 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
 		enable_irq_window(vcpu);
 }
 
-static void svm_intr_inject(struct kvm_vcpu *vcpu)
-{
-	/* try to reinject previous events if any */
-	if (vcpu->arch.nmi_injected) {
-		svm_inject_nmi(to_svm(vcpu));
-		return;
-	}
-
-	if (vcpu->arch.interrupt.pending) {
-		svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
-		return;
-	}
-
-	/* try to inject new event if pending */
-	if (vcpu->arch.nmi_pending) {
-		if (svm_nmi_allowed(vcpu)) {
-			vcpu->arch.nmi_pending = false;
-			vcpu->arch.nmi_injected = true;
-			svm_inject_nmi(vcpu);
-		}
-	} else if (kvm_cpu_has_interrupt(vcpu)) {
-		if (svm_interrupt_allowed(vcpu)) {
-			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-			svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
-		}
-	}
-}
-
-static void svm_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct vcpu_svm *svm = to_svm(vcpu);
-	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
-		kvm_run->request_interrupt_window;
-
-	if (nested_svm_intr(svm))
-		goto out;
-
-	svm_intr_inject(vcpu);
-
-	/* enable NMI/IRQ window open exits if needed */
-	if (vcpu->arch.nmi_pending)
-		enable_nmi_window(vcpu);
-	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
-		enable_irq_window(vcpu);
-
-out:
-	update_cr8_intercept(vcpu);
-}
-
 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
 {
 	return 0;
@@ -2683,9 +2636,14 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.patch_hypercall = svm_patch_hypercall,
 	.get_irq = svm_get_irq,
 	.set_irq = svm_set_irq,
+	.set_nmi = svm_inject_nmi,
 	.queue_exception = svm_queue_exception,
-	.inject_pending_irq = svm_intr_assist,
 	.interrupt_allowed = svm_interrupt_allowed,
+	.nmi_allowed = svm_nmi_allowed,
+	.enable_nmi_window = enable_nmi_window,
+	.enable_irq_window = enable_irq_window,
+	.update_cr8_intercept = update_cr8_intercept,
+	.drop_interrupt_shadow = svm_drop_interrupt_shadow,
 
 	.set_tss_addr = svm_set_tss_addr,
 	.get_tdp_level = get_npt_level,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 116eac0..a9c4ecc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1314,6 +1314,9 @@ static __init int hardware_setup(void)
 	if (!cpu_has_vmx_flexpriority())
 		flexpriority_enabled = 0;
 
+	if (!cpu_has_vmx_tpr_shadow())
+		kvm_x86_ops->update_cr8_intercept = NULL;
+	
 	return alloc_kvm_area();
 }
 
@@ -2404,6 +2407,12 @@ out:
 	return ret;
 }
 
+void vmx_drop_interrupt_shadow(struct kvm_vcpu *vcpu)
+{
+	vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+			GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
+}
+
 static void enable_irq_window(struct kvm_vcpu *vcpu)
 {
 	u32 cpu_based_vm_exec_control;
@@ -3214,21 +3223,14 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static void update_tpr_threshold(struct kvm_vcpu *vcpu)
+static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
-	int max_irr, tpr;
-
-	if (!vm_need_tpr_shadow(vcpu->kvm))
-		return;
-
-	if (!kvm_lapic_enabled(vcpu) ||
-	    ((max_irr = kvm_lapic_find_highest_irr(vcpu)) == -1)) {
+	if (irr == -1 || tpr < irr) {
 		vmcs_write32(TPR_THRESHOLD, 0);
 		return;
 	}
 
-	tpr = (kvm_lapic_get_cr8(vcpu) & 0x0f) << 4;
-	vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4);
+	vmcs_write32(TPR_THRESHOLD, irr);
 }
 
 static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
@@ -3300,55 +3302,6 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 	}
 }
 
-static void vmx_intr_inject(struct kvm_vcpu *vcpu)
-{
-	/* try to reinject previous events if any */
-	if (vcpu->arch.nmi_injected) {
-		vmx_inject_nmi(vcpu);
-		return;
-	}
-
-	if (vcpu->arch.interrupt.pending) {
-		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-		return;
-	}
-
-	/* try to inject new event if pending */
-	if (vcpu->arch.nmi_pending) {
-		if (vmx_nmi_allowed(vcpu)) {
-			vcpu->arch.nmi_pending = false;
-			vcpu->arch.nmi_injected = true;
-			vmx_inject_nmi(vcpu);
-		}
-	} else if (kvm_cpu_has_interrupt(vcpu)) {
-		if (vmx_interrupt_allowed(vcpu)) {
-			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-		}
-	}
-}
-
-static void vmx_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
-		kvm_run->request_interrupt_window;
-
-	update_tpr_threshold(vcpu);
-
-	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
-		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
-				GUEST_INTR_STATE_STI |
-				GUEST_INTR_STATE_MOV_SS);
-
-	vmx_intr_inject(vcpu);
-
-	/* enable NMI/IRQ window open exits if needed */
-	if (vcpu->arch.nmi_pending)
-		enable_nmi_window(vcpu);
-	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
-		enable_irq_window(vcpu);
-}
-
 /*
  * Failure to inject an interrupt should give us the information
  * in IDT_VECTORING_INFO_FIELD.  However, if the failure occurs
@@ -3683,9 +3636,15 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.patch_hypercall = vmx_patch_hypercall,
 	.get_irq = vmx_get_irq,
 	.set_irq = vmx_inject_irq,
+	.set_nmi = vmx_inject_nmi,
 	.queue_exception = vmx_queue_exception,
-	.inject_pending_irq = vmx_intr_assist,
 	.interrupt_allowed = vmx_interrupt_allowed,
+	.nmi_allowed = vmx_nmi_allowed,
+	.enable_nmi_window = enable_nmi_window,
+	.enable_irq_window = enable_irq_window,
+	.update_cr8_intercept = update_cr8_intercept,
+	.drop_interrupt_shadow = vmx_drop_interrupt_shadow,
+
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
 	.get_mt_mask_shift = vmx_get_mt_mask_shift,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d94a152..ebd27ef 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3104,6 +3104,68 @@ static void vapic_exit(struct kvm_vcpu *vcpu)
 	up_read(&vcpu->kvm->slots_lock);
 }
 
+static void update_cr8_intercept(struct kvm_vcpu *vcpu)
+{
+        int max_irr, tpr;
+
+	if (!kvm_x86_ops->update_cr8_intercept)
+		return;
+
+	max_irr = kvm_lapic_find_highest_irr(vcpu);
+
+	if (max_irr != -1)
+		max_irr >>= 4;
+
+	tpr = kvm_lapic_get_cr8(vcpu);
+
+	kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
+}
+
+static void inject_irq(struct kvm_vcpu *vcpu)
+{
+        /* try to reinject previous events if any */
+        if (vcpu->arch.nmi_injected) {
+                kvm_x86_ops->set_nmi(vcpu);
+                return;
+        }
+
+        if (vcpu->arch.interrupt.pending) {
+                kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
+                return;
+        }
+
+        /* try to inject new event if pending */
+        if (vcpu->arch.nmi_pending) {
+                if (kvm_x86_ops->nmi_allowed(vcpu)) {
+                        vcpu->arch.nmi_pending = false;
+                        vcpu->arch.nmi_injected = true;
+                        kvm_x86_ops->set_nmi(vcpu);
+                }
+        } else if (kvm_cpu_has_interrupt(vcpu)) {
+                if (kvm_x86_ops->interrupt_allowed(vcpu)) {
+                        kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
+                        kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
+                }
+	}
+}
+
+static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
+		kvm_run->request_interrupt_window;
+
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+		kvm_x86_ops->drop_interrupt_shadow(vcpu);
+
+	inject_irq(vcpu);
+
+	/* enable NMI/IRQ window open exits if needed */
+	if (vcpu->arch.nmi_pending)
+		kvm_x86_ops->enable_nmi_window(vcpu);
+	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+		kvm_x86_ops->enable_irq_window(vcpu);
+}
+
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
@@ -3162,9 +3224,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (vcpu->arch.exception.pending)
 		__queue_exception(vcpu);
 	else
-		kvm_x86_ops->inject_pending_irq(vcpu, kvm_run);
+		inject_pending_irq(vcpu, kvm_run);
 
-	kvm_lapic_sync_to_vapic(vcpu);
+	if (kvm_lapic_enabled(vcpu))
+		if (!vcpu->arch.apic->vapic_addr)
+			update_cr8_intercept(vcpu);
+		else
+			kvm_lapic_sync_to_vapic(vcpu);
 
 	up_read(&vcpu->kvm->slots_lock);
 
-- 
1.5.6.5


^ permalink raw reply related	[flat|nested] 95+ messages in thread

* [PATCH 15/15] Get rid of get_irq() callback.
  2009-04-13  9:55 [PATCH 00/15] interrupt injection rework Gleb Natapov
                   ` (13 preceding siblings ...)
  2009-04-13  9:55 ` [PATCH 14/15] Move interrupt injection logic to x86.c Gleb Natapov
@ 2009-04-13  9:55 ` Gleb Natapov
  2009-04-13 11:51 ` [PATCH 00/15] interrupt injection rework Avi Kivity
  2009-04-14 15:33 ` Joerg Roedel
  16 siblings, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-13  9:55 UTC (permalink / raw)
  To: avi; +Cc: kvm, joerg.roedel, sheng, Gleb Natapov

It just returns pending IRQ vector from the queue for VMX/SVM.
Also fix migration bits. Re-put pending IRQ into interrupt_bitmap.
And put it back into pending after migration.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    1 -
 arch/x86/kvm/svm.c              |    8 --------
 arch/x86/kvm/vmx.c              |    8 --------
 arch/x86/kvm/x86.c              |   22 ++++++++++------------
 4 files changed, 10 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f9eb9ac..0fa07e7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -513,7 +513,6 @@ struct kvm_x86_ops {
 	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
 	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
 				unsigned char *hypercall_addr);
-	int (*get_irq)(struct kvm_vcpu *vcpu);
 	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
 	void (*set_nmi)(struct kvm_vcpu *vcpu);
 	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1aa10b3..af61744 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -964,13 +964,6 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
 	return 0;
 }
 
-static int svm_get_irq(struct kvm_vcpu *vcpu)
-{
-	if (!vcpu->arch.interrupt.pending)
-		return -1;
-	return vcpu->arch.interrupt.nr;
-}
-
 static void load_host_msrs(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_X86_64
@@ -2634,7 +2627,6 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.handle_exit = handle_exit,
 	.skip_emulated_instruction = skip_emulated_instruction,
 	.patch_hypercall = svm_patch_hypercall,
-	.get_irq = svm_get_irq,
 	.set_irq = svm_set_irq,
 	.set_nmi = svm_inject_nmi,
 	.queue_exception = svm_queue_exception,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a9c4ecc..f660c68 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1045,13 +1045,6 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
 	return 0;
 }
 
-static int vmx_get_irq(struct kvm_vcpu *vcpu)
-{
-	if (!vcpu->arch.interrupt.pending)
-		return -1;
-	return vcpu->arch.interrupt.nr;
-}
-
 static __init int cpu_has_kvm_support(void)
 {
 	return cpu_has_vmx();
@@ -3634,7 +3627,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.handle_exit = vmx_handle_exit,
 	.skip_emulated_instruction = skip_emulated_instruction,
 	.patch_hypercall = vmx_patch_hypercall,
-	.get_irq = vmx_get_irq,
 	.set_irq = vmx_inject_irq,
 	.set_nmi = vmx_inject_nmi,
 	.queue_exception = vmx_queue_exception,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ebd27ef..e4cc717 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3525,7 +3525,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 				  struct kvm_sregs *sregs)
 {
 	struct descriptor_table dt;
-	int pending_vec;
 
 	vcpu_load(vcpu);
 
@@ -3555,16 +3554,16 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 	sregs->efer = vcpu->arch.shadow_efer;
 	sregs->apic_base = kvm_get_apic_base(vcpu);
 
-	if (irqchip_in_kernel(vcpu->kvm)) {
+	if (irqchip_in_kernel(vcpu->kvm))
 		memset(sregs->interrupt_bitmap, 0,
-		       sizeof sregs->interrupt_bitmap);
-		pending_vec = kvm_x86_ops->get_irq(vcpu);
-		if (pending_vec >= 0)
-			set_bit(pending_vec,
-				(unsigned long *)sregs->interrupt_bitmap);
-	} else
+				sizeof sregs->interrupt_bitmap);
+	else
 		memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending,
-		       sizeof sregs->interrupt_bitmap);
+				sizeof sregs->interrupt_bitmap);
+
+	if (vcpu->arch.interrupt.pending)
+		set_bit(vcpu->arch.interrupt.nr,
+				(unsigned long *)sregs->interrupt_bitmap);
 
 	vcpu_put(vcpu);
 
@@ -4080,9 +4079,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 			max_bits);
 		/* Only pending external irq is handled here */
 		if (pending_vec < max_bits) {
-			kvm_x86_ops->set_irq(vcpu, pending_vec);
-			pr_debug("Set back pending irq %d\n",
-				 pending_vec);
+			kvm_queue_interrupt(vcpu, pending_vec);
+			pr_debug("Set back pending irq %d\n", pending_vec);
 		}
 		kvm_pic_clear_isr_ack(vcpu->kvm);
 	}
-- 
1.5.6.5


^ permalink raw reply related	[flat|nested] 95+ messages in thread

* Re: [PATCH 00/15] interrupt injection rework
  2009-04-13  9:55 [PATCH 00/15] interrupt injection rework Gleb Natapov
                   ` (14 preceding siblings ...)
  2009-04-13  9:55 ` [PATCH 15/15] Get rid of get_irq() callback Gleb Natapov
@ 2009-04-13 11:51 ` Avi Kivity
  2009-04-14  3:20   ` Sheng Yang
  2009-04-14 15:33 ` Joerg Roedel
  16 siblings, 1 reply; 95+ messages in thread
From: Avi Kivity @ 2009-04-13 11:51 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, joerg.roedel, sheng

Gleb Natapov wrote:
> Hi,
>
> This patch series aims to consolidate IRQ injection code for in kernel
> IRQ chip and userspace one. Also to move IRQ injection logic from
> SVM/VMX specific code to x86.c.
>   

Very nice patchset, removes code, consolidates functionality, and fixes 
bugs.

Joerg and Sheng, please give this a thorough review.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 00/15] interrupt injection rework
  2009-04-13 11:51 ` [PATCH 00/15] interrupt injection rework Avi Kivity
@ 2009-04-14  3:20   ` Sheng Yang
  2009-04-14  5:20     ` Gleb Natapov
  0 siblings, 1 reply; 95+ messages in thread
From: Sheng Yang @ 2009-04-14  3:20 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Avi Kivity, kvm, joerg.roedel

On Monday 13 April 2009 19:51:59 Avi Kivity wrote:
> Gleb Natapov wrote:
> > Hi,
> >
> > This patch series aims to consolidate IRQ injection code for in kernel
> > IRQ chip and userspace one. Also to move IRQ injection logic from
> > SVM/VMX specific code to x86.c.
>
> Very nice patchset, removes code, consolidates functionality, and fixes
> bugs.
>
> Joerg and Sheng, please give this a thorough review.

Looks fine on my side, and indeed a nice patchset! But you may need 
./script/checkpatch.pl to clean up style problems. :)

-- 
regards
Yang, Sheng



^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 00/15] interrupt injection rework
  2009-04-14  3:20   ` Sheng Yang
@ 2009-04-14  5:20     ` Gleb Natapov
  0 siblings, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-14  5:20 UTC (permalink / raw)
  To: Sheng Yang; +Cc: Avi Kivity, kvm, joerg.roedel

On Tue, Apr 14, 2009 at 11:20:04AM +0800, Sheng Yang wrote:
> On Monday 13 April 2009 19:51:59 Avi Kivity wrote:
> > Gleb Natapov wrote:
> > > Hi,
> > >
> > > This patch series aims to consolidate IRQ injection code for in kernel
> > > IRQ chip and userspace one. Also to move IRQ injection logic from
> > > SVM/VMX specific code to x86.c.
> >
> > Very nice patchset, removes code, consolidates functionality, and fixes
> > bugs.
> >
> > Joerg and Sheng, please give this a thorough review.
> 
> Looks fine on my side, and indeed a nice patchset! But you may need 
> ./script/checkpatch.pl to clean up style problems. :)
> 

Oh damn, I always forget this part.

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 14/15] Move interrupt injection logic to x86.c
  2009-04-13  9:55 ` [PATCH 14/15] Move interrupt injection logic to x86.c Gleb Natapov
@ 2009-04-14  7:22   ` Gleb Natapov
  0 siblings, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-14  7:22 UTC (permalink / raw)
  To: avi; +Cc: kvm

Please use this one instead. Exactly as previous one, but with fixed
checkpatch.pl problems.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 057a612..f9eb9ac 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -515,10 +515,15 @@ struct kvm_x86_ops {
 				unsigned char *hypercall_addr);
 	int (*get_irq)(struct kvm_vcpu *vcpu);
 	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
+	void (*set_nmi)(struct kvm_vcpu *vcpu);
 	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
 				bool has_error_code, u32 error_code);
-	void (*inject_pending_irq)(struct kvm_vcpu *vcpu, struct kvm_run *run);
 	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
+	int (*nmi_allowed)(struct kvm_vcpu *vcpu);
+	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
+	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
+	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
+	void (*drop_interrupt_shadow)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
 	int (*get_mt_mask_shift)(void);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index cd60fd7..1aa10b3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1861,8 +1861,10 @@ static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
 	/* instruction emulation calls kvm_set_cr8() */
 	emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
-	if (irqchip_in_kernel(svm->vcpu.kvm))
+	if (irqchip_in_kernel(svm->vcpu.kvm)) {
+		svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
 		return 1;
+	}
 	if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
 		return 1;
 	kvm_run->exit_reason = KVM_EXIT_SET_TPR;
@@ -2226,8 +2228,16 @@ static void pre_svm_run(struct vcpu_svm *svm)
 		new_asid(svm, svm_data);
 }
 
-static void svm_inject_nmi(struct vcpu_svm *svm)
+static void svm_drop_interrupt_shadow(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
+}
+
+static void svm_inject_nmi(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
+
 	svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
 	svm->vcpu.arch.hflags |= HF_NMI_MASK;
 }
@@ -2246,8 +2256,10 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
 		((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
 }
 
-static void svm_queue_irq(struct vcpu_svm *svm, unsigned nr)
+static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
+
 	svm->vmcb->control.event_inj = nr |
 		SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
 }
@@ -2258,28 +2270,18 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
 
 	nested_svm_intr(svm);
 
-	svm_queue_irq(svm, irq);
+	svm_queue_irq(vcpu, irq);
 }
 
-static void update_cr8_intercept(struct kvm_vcpu *vcpu)
+static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-	struct vmcb *vmcb = svm->vmcb;
-	int max_irr, tpr;
 
-	if (!irqchip_in_kernel(vcpu->kvm) || vcpu->arch.apic->vapic_addr)
+	if (irr == -1)
 		return;
 
-	vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
-
-	max_irr = kvm_lapic_find_highest_irr(vcpu);
-	if (max_irr == -1)
-		return;
-
-	tpr = kvm_lapic_get_cr8(vcpu) << 4;
-
-	if (tpr >= (max_irr & 0xf0))
-		vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
+	if (tpr >= irr)
+		svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
 }
 
 static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -2315,55 +2317,6 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
 		enable_irq_window(vcpu);
 }
 
-static void svm_intr_inject(struct kvm_vcpu *vcpu)
-{
-	/* try to reinject previous events if any */
-	if (vcpu->arch.nmi_injected) {
-		svm_inject_nmi(to_svm(vcpu));
-		return;
-	}
-
-	if (vcpu->arch.interrupt.pending) {
-		svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
-		return;
-	}
-
-	/* try to inject new event if pending */
-	if (vcpu->arch.nmi_pending) {
-		if (svm_nmi_allowed(vcpu)) {
-			vcpu->arch.nmi_pending = false;
-			vcpu->arch.nmi_injected = true;
-			svm_inject_nmi(vcpu);
-		}
-	} else if (kvm_cpu_has_interrupt(vcpu)) {
-		if (svm_interrupt_allowed(vcpu)) {
-			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-			svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
-		}
-	}
-}
-
-static void svm_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct vcpu_svm *svm = to_svm(vcpu);
-	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
-		kvm_run->request_interrupt_window;
-
-	if (nested_svm_intr(svm))
-		goto out;
-
-	svm_intr_inject(vcpu);
-
-	/* enable NMI/IRQ window open exits if needed */
-	if (vcpu->arch.nmi_pending)
-		enable_nmi_window(vcpu);
-	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
-		enable_irq_window(vcpu);
-
-out:
-	update_cr8_intercept(vcpu);
-}
-
 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
 {
 	return 0;
@@ -2683,9 +2636,14 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.patch_hypercall = svm_patch_hypercall,
 	.get_irq = svm_get_irq,
 	.set_irq = svm_set_irq,
+	.set_nmi = svm_inject_nmi,
 	.queue_exception = svm_queue_exception,
-	.inject_pending_irq = svm_intr_assist,
 	.interrupt_allowed = svm_interrupt_allowed,
+	.nmi_allowed = svm_nmi_allowed,
+	.enable_nmi_window = enable_nmi_window,
+	.enable_irq_window = enable_irq_window,
+	.update_cr8_intercept = update_cr8_intercept,
+	.drop_interrupt_shadow = svm_drop_interrupt_shadow,
 
 	.set_tss_addr = svm_set_tss_addr,
 	.get_tdp_level = get_npt_level,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 116eac0..bad2413 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1314,6 +1314,9 @@ static __init int hardware_setup(void)
 	if (!cpu_has_vmx_flexpriority())
 		flexpriority_enabled = 0;
 
+	if (!cpu_has_vmx_tpr_shadow())
+		kvm_x86_ops->update_cr8_intercept = NULL;
+
 	return alloc_kvm_area();
 }
 
@@ -2404,6 +2407,12 @@ out:
 	return ret;
 }
 
+void vmx_drop_interrupt_shadow(struct kvm_vcpu *vcpu)
+{
+	vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+			GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
+}
+
 static void enable_irq_window(struct kvm_vcpu *vcpu)
 {
 	u32 cpu_based_vm_exec_control;
@@ -3214,21 +3223,14 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static void update_tpr_threshold(struct kvm_vcpu *vcpu)
+static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
-	int max_irr, tpr;
-
-	if (!vm_need_tpr_shadow(vcpu->kvm))
-		return;
-
-	if (!kvm_lapic_enabled(vcpu) ||
-	    ((max_irr = kvm_lapic_find_highest_irr(vcpu)) == -1)) {
+	if (irr == -1 || tpr < irr) {
 		vmcs_write32(TPR_THRESHOLD, 0);
 		return;
 	}
 
-	tpr = (kvm_lapic_get_cr8(vcpu) & 0x0f) << 4;
-	vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4);
+	vmcs_write32(TPR_THRESHOLD, irr);
 }
 
 static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
@@ -3300,55 +3302,6 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 	}
 }
 
-static void vmx_intr_inject(struct kvm_vcpu *vcpu)
-{
-	/* try to reinject previous events if any */
-	if (vcpu->arch.nmi_injected) {
-		vmx_inject_nmi(vcpu);
-		return;
-	}
-
-	if (vcpu->arch.interrupt.pending) {
-		vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-		return;
-	}
-
-	/* try to inject new event if pending */
-	if (vcpu->arch.nmi_pending) {
-		if (vmx_nmi_allowed(vcpu)) {
-			vcpu->arch.nmi_pending = false;
-			vcpu->arch.nmi_injected = true;
-			vmx_inject_nmi(vcpu);
-		}
-	} else if (kvm_cpu_has_interrupt(vcpu)) {
-		if (vmx_interrupt_allowed(vcpu)) {
-			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-			vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
-		}
-	}
-}
-
-static void vmx_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
-		kvm_run->request_interrupt_window;
-
-	update_tpr_threshold(vcpu);
-
-	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
-		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
-				GUEST_INTR_STATE_STI |
-				GUEST_INTR_STATE_MOV_SS);
-
-	vmx_intr_inject(vcpu);
-
-	/* enable NMI/IRQ window open exits if needed */
-	if (vcpu->arch.nmi_pending)
-		enable_nmi_window(vcpu);
-	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
-		enable_irq_window(vcpu);
-}
-
 /*
  * Failure to inject an interrupt should give us the information
  * in IDT_VECTORING_INFO_FIELD.  However, if the failure occurs
@@ -3683,9 +3636,15 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.patch_hypercall = vmx_patch_hypercall,
 	.get_irq = vmx_get_irq,
 	.set_irq = vmx_inject_irq,
+	.set_nmi = vmx_inject_nmi,
 	.queue_exception = vmx_queue_exception,
-	.inject_pending_irq = vmx_intr_assist,
 	.interrupt_allowed = vmx_interrupt_allowed,
+	.nmi_allowed = vmx_nmi_allowed,
+	.enable_nmi_window = enable_nmi_window,
+	.enable_irq_window = enable_irq_window,
+	.update_cr8_intercept = update_cr8_intercept,
+	.drop_interrupt_shadow = vmx_drop_interrupt_shadow,
+
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
 	.get_mt_mask_shift = vmx_get_mt_mask_shift,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d94a152..cae1985 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3104,6 +3104,68 @@ static void vapic_exit(struct kvm_vcpu *vcpu)
 	up_read(&vcpu->kvm->slots_lock);
 }
 
+static void update_cr8_intercept(struct kvm_vcpu *vcpu)
+{
+	int max_irr, tpr;
+
+	if (!kvm_x86_ops->update_cr8_intercept)
+		return;
+
+	max_irr = kvm_lapic_find_highest_irr(vcpu);
+
+	if (max_irr != -1)
+		max_irr >>= 4;
+
+	tpr = kvm_lapic_get_cr8(vcpu);
+
+	kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
+}
+
+static void inject_irq(struct kvm_vcpu *vcpu)
+{
+	/* try to reinject previous events if any */
+	if (vcpu->arch.nmi_injected) {
+		kvm_x86_ops->set_nmi(vcpu);
+		return;
+	}
+
+	if (vcpu->arch.interrupt.pending) {
+		kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
+		return;
+	}
+
+	/* try to inject new event if pending */
+	if (vcpu->arch.nmi_pending) {
+		if (kvm_x86_ops->nmi_allowed(vcpu)) {
+			vcpu->arch.nmi_pending = false;
+			vcpu->arch.nmi_injected = true;
+			kvm_x86_ops->set_nmi(vcpu);
+		}
+	} else if (kvm_cpu_has_interrupt(vcpu)) {
+		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
+			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
+			kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
+		}
+	}
+}
+
+static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
+		kvm_run->request_interrupt_window;
+
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+		kvm_x86_ops->drop_interrupt_shadow(vcpu);
+
+	inject_irq(vcpu);
+
+	/* enable NMI/IRQ window open exits if needed */
+	if (vcpu->arch.nmi_pending)
+		kvm_x86_ops->enable_nmi_window(vcpu);
+	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+		kvm_x86_ops->enable_irq_window(vcpu);
+}
+
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
@@ -3162,9 +3224,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (vcpu->arch.exception.pending)
 		__queue_exception(vcpu);
 	else
-		kvm_x86_ops->inject_pending_irq(vcpu, kvm_run);
+		inject_pending_irq(vcpu, kvm_run);
 
-	kvm_lapic_sync_to_vapic(vcpu);
+	if (kvm_lapic_enabled(vcpu)) {
+		if (!vcpu->arch.apic->vapic_addr)
+			update_cr8_intercept(vcpu);
+		else
+			kvm_lapic_sync_to_vapic(vcpu);
+	}
 
 	up_read(&vcpu->kvm->slots_lock);
 
--
			Gleb.

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-13  9:55 ` [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic Gleb Natapov
@ 2009-04-14 14:14   ` Dmitry Eremin-Solenikov
  2009-04-14 14:24     ` Gleb Natapov
  2009-04-17 12:39   ` Jan Kiszka
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 95+ messages in thread
From: Dmitry Eremin-Solenikov @ 2009-04-14 14:14 UTC (permalink / raw)
  To: kvm

Gleb Natapov wrote:

> Start to use interrupt/exception queues like VMX does. This also fix the
> bug that if exit was caused by a guest internal exception access to IDT
> the exception was not reinjected.

This patch broke KVM for me: after it is applied (to the tip of avi's git tree),
linux inside KVM (version 84 from Debian) stops booting, moaning about lost
interrupts from ide. The KVM is executed inside qemu-system-x86_64,
version 0.10.2.

-- 
With best wishes
Dmitry



^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-14 14:14   ` Dmitry Eremin-Solenikov
@ 2009-04-14 14:24     ` Gleb Natapov
  2009-04-14 14:32       ` Dmitry Eremin-Solenikov
  2009-04-14 16:10       ` Avi Kivity
  0 siblings, 2 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-14 14:24 UTC (permalink / raw)
  To: Dmitry Eremin-Solenikov; +Cc: kvm

On Tue, Apr 14, 2009 at 02:14:04PM +0000, Dmitry Eremin-Solenikov wrote:
> Gleb Natapov wrote:
> 
> > Start to use interrupt/exception queues like VMX does. This also fix the
> > bug that if exit was caused by a guest internal exception access to IDT
> > the exception was not reinjected.
> 
> This patch broke KVM for me: after it is applied (to the tip of avi's git tree),
> linux inside KVM (version 84 from Debian) stops booting, moaning about lost
> interrupts from ide. The KVM is executed inside qemu-system-x86_64,
> version 0.10.2.
> 
Please apply next patch in the series too. This one will not work
without it. But better yet can you please test entire series.

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-14 14:24     ` Gleb Natapov
@ 2009-04-14 14:32       ` Dmitry Eremin-Solenikov
  2009-04-14 14:55         ` Gleb Natapov
  2009-04-14 16:10       ` Avi Kivity
  1 sibling, 1 reply; 95+ messages in thread
From: Dmitry Eremin-Solenikov @ 2009-04-14 14:32 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

2009/4/14 Gleb Natapov <gleb@redhat.com>:
> On Tue, Apr 14, 2009 at 02:14:04PM +0000, Dmitry Eremin-Solenikov wrote:
>> Gleb Natapov wrote:
>>
>> > Start to use interrupt/exception queues like VMX does. This also fix the
>> > bug that if exit was caused by a guest internal exception access to IDT
>> > the exception was not reinjected.
>>
>> This patch broke KVM for me: after it is applied (to the tip of avi's git tree),
>> linux inside KVM (version 84 from Debian) stops booting, moaning about lost
>> interrupts from ide. The KVM is executed inside qemu-system-x86_64,
>> version 0.10.2.
>>
> Please apply next patch in the series too. This one will not work
> without it. But better yet can you please test entire series.

After applying the next patch (or the whole serie), I get the following messages
during initramfs drivers probe:

Clocksource tsc unstable (delta...)
no cont in shutdown!
floppy0: FDC access conflict!

Then kernel boot stalls. I'll try gdbing into kernel but this may
require lots of efforts.
I don't quite understand how do these two patches influence FDC emulation, but
they do. Tell me if you need any additional info.

-- 
With best wishes
Dmitry

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-14 14:32       ` Dmitry Eremin-Solenikov
@ 2009-04-14 14:55         ` Gleb Natapov
  2009-04-14 15:38           ` Gleb Natapov
  2009-04-14 19:29           ` Dmitry Eremin-Solenikov
  0 siblings, 2 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-14 14:55 UTC (permalink / raw)
  To: Dmitry Eremin-Solenikov; +Cc: kvm

On Tue, Apr 14, 2009 at 06:32:29PM +0400, Dmitry Eremin-Solenikov wrote:
> 2009/4/14 Gleb Natapov <gleb@redhat.com>:
> > On Tue, Apr 14, 2009 at 02:14:04PM +0000, Dmitry Eremin-Solenikov wrote:
> >> Gleb Natapov wrote:
> >>
> >> > Start to use interrupt/exception queues like VMX does. This also fix the
> >> > bug that if exit was caused by a guest internal exception access to IDT
> >> > the exception was not reinjected.
> >>
> >> This patch broke KVM for me: after it is applied (to the tip of avi's git tree),
> >> linux inside KVM (version 84 from Debian) stops booting, moaning about lost
> >> interrupts from ide. The KVM is executed inside qemu-system-x86_64,
> >> version 0.10.2.
> >>
> > Please apply next patch in the series too. This one will not work
> > without it. But better yet can you please test entire series.
> 
> After applying the next patch (or the whole serie), I get the following messages
> during initramfs drivers probe:
> 
> Clocksource tsc unstable (delta...)
> no cont in shutdown!
> floppy0: FDC access conflict!
> 
> Then kernel boot stalls. I'll try gdbing into kernel but this may
> require lots of efforts.
> I don't quite understand how do these two patches influence FDC emulation, but
> they do. Tell me if you need any additional info.
> 
What guest is this? What kernel? Does the whole series works?

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 00/15] interrupt injection rework
  2009-04-13  9:55 [PATCH 00/15] interrupt injection rework Gleb Natapov
                   ` (15 preceding siblings ...)
  2009-04-13 11:51 ` [PATCH 00/15] interrupt injection rework Avi Kivity
@ 2009-04-14 15:33 ` Joerg Roedel
  2009-04-14 15:37   ` Gleb Natapov
  16 siblings, 1 reply; 95+ messages in thread
From: Joerg Roedel @ 2009-04-14 15:33 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: avi, kvm, sheng

Hi Gleb,

I am just curious, have you given these patches some stress testing with all
kinds of Windows guests? To my experience they break very fast if something is
wrong with interrupts ;)

Joerg

On Mon, Apr 13, 2009 at 12:55:30PM +0300, Gleb Natapov wrote:
> Hi,
> 
> This patch series aims to consolidate IRQ injection code for in kernel
> IRQ chip and userspace one. Also to move IRQ injection logic from
> SVM/VMX specific code to x86.c.

-- 
           | Advanced Micro Devices GmbH
 Operating | Karl-Hammerschmidt-Str. 34, 85609 Dornach bei München
 System    | 
 Research  | Geschäftsführer: Jochen Polster, Thomas M. McCoy, Giuliano Meroni
 Center    | Sitz: Dornach, Gemeinde Aschheim, Landkreis München
           | Registergericht München, HRB Nr. 43632


^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 00/15] interrupt injection rework
  2009-04-14 15:33 ` Joerg Roedel
@ 2009-04-14 15:37   ` Gleb Natapov
  0 siblings, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-14 15:37 UTC (permalink / raw)
  To: Joerg Roedel; +Cc: avi, kvm, sheng

On Tue, Apr 14, 2009 at 05:33:49PM +0200, Joerg Roedel wrote:
> Hi Gleb,
> 
> I am just curious, have you given these patches some stress testing with all
> kinds of Windows guests? To my experience they break very fast if something is
> wrong with interrupts ;)
> 
I tested VMX with windows XP/vista64 and SVM with windows 2003/Vista64, but
SVM is tested much less then VMX.

> Joerg
> 
> On Mon, Apr 13, 2009 at 12:55:30PM +0300, Gleb Natapov wrote:
> > Hi,
> > 
> > This patch series aims to consolidate IRQ injection code for in kernel
> > IRQ chip and userspace one. Also to move IRQ injection logic from
> > SVM/VMX specific code to x86.c.
> 
> -- 
>            | Advanced Micro Devices GmbH
>  Operating | Karl-Hammerschmidt-Str. 34, 85609 Dornach bei München
>  System    | 
>  Research  | Geschäftsführer: Jochen Polster, Thomas M. McCoy, Giuliano Meroni
>  Center    | Sitz: Dornach, Gemeinde Aschheim, Landkreis München
>            | Registergericht München, HRB Nr. 43632

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-14 14:55         ` Gleb Natapov
@ 2009-04-14 15:38           ` Gleb Natapov
  2009-04-14 19:29           ` Dmitry Eremin-Solenikov
  1 sibling, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-14 15:38 UTC (permalink / raw)
  To: Dmitry Eremin-Solenikov; +Cc: kvm

On Tue, Apr 14, 2009 at 05:55:36PM +0300, Gleb Natapov wrote:
> > After applying the next patch (or the whole serie), I get the following messages
> > during initramfs drivers probe:
> > 
> > Clocksource tsc unstable (delta...)
> > no cont in shutdown!
> > floppy0: FDC access conflict!
> > 
> > Then kernel boot stalls. I'll try gdbing into kernel but this may
> > require lots of efforts.
> > I don't quite understand how do these two patches influence FDC emulation, but
> > they do. Tell me if you need any additional info.
> > 
> What guest is this? What kernel? Does the whole series works?
> 
And while you are at it can you try to run with -no-kvm-irqchip?

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-14 14:24     ` Gleb Natapov
  2009-04-14 14:32       ` Dmitry Eremin-Solenikov
@ 2009-04-14 16:10       ` Avi Kivity
  2009-04-14 16:18         ` Gleb Natapov
  1 sibling, 1 reply; 95+ messages in thread
From: Avi Kivity @ 2009-04-14 16:10 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Dmitry Eremin-Solenikov, kvm

Gleb Natapov wrote:
> On Tue, Apr 14, 2009 at 02:14:04PM +0000, Dmitry Eremin-Solenikov wrote:
>   
>> Gleb Natapov wrote:
>>
>>     
>>> Start to use interrupt/exception queues like VMX does. This also fix the
>>> bug that if exit was caused by a guest internal exception access to IDT
>>> the exception was not reinjected.
>>>       
>> This patch broke KVM for me: after it is applied (to the tip of avi's git tree),
>> linux inside KVM (version 84 from Debian) stops booting, moaning about lost
>> interrupts from ide. The KVM is executed inside qemu-system-x86_64,
>> version 0.10.2.
>>
>>     
> Please apply next patch in the series too. This one will not work
> without it. But better yet can you please test entire series.
>
>   

Er, I'd much rather have a bisectable series.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-14 16:10       ` Avi Kivity
@ 2009-04-14 16:18         ` Gleb Natapov
  0 siblings, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-14 16:18 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Dmitry Eremin-Solenikov, kvm

On Tue, Apr 14, 2009 at 07:10:40PM +0300, Avi Kivity wrote:
> Gleb Natapov wrote:
>> On Tue, Apr 14, 2009 at 02:14:04PM +0000, Dmitry Eremin-Solenikov wrote:
>>   
>>> Gleb Natapov wrote:
>>>
>>>     
>>>> Start to use interrupt/exception queues like VMX does. This also fix the
>>>> bug that if exit was caused by a guest internal exception access to IDT
>>>> the exception was not reinjected.
>>>>       
>>> This patch broke KVM for me: after it is applied (to the tip of avi's git tree),
>>> linux inside KVM (version 84 from Debian) stops booting, moaning about lost
>>> interrupts from ide. The KVM is executed inside qemu-system-x86_64,
>>> version 0.10.2.
>>>
>>>     
>> Please apply next patch in the series too. This one will not work
>> without it. But better yet can you please test entire series.
>>
>>   
>
> Er, I'd much rather have a bisectable series.
>
The next patch (use eveninj to inject interrupts) is really small.
We can coalesce those two.

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-14 14:55         ` Gleb Natapov
  2009-04-14 15:38           ` Gleb Natapov
@ 2009-04-14 19:29           ` Dmitry Eremin-Solenikov
  2009-04-14 19:41             ` Gleb Natapov
  1 sibling, 1 reply; 95+ messages in thread
From: Dmitry Eremin-Solenikov @ 2009-04-14 19:29 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

2009/4/14 Gleb Natapov <gleb@redhat.com>:
> On Tue, Apr 14, 2009 at 06:32:29PM +0400, Dmitry Eremin-Solenikov wrote:
>> 2009/4/14 Gleb Natapov <gleb@redhat.com>:
>> > On Tue, Apr 14, 2009 at 02:14:04PM +0000, Dmitry Eremin-Solenikov wrote:
>> >> Gleb Natapov wrote:
>> >>
>> >> > Start to use interrupt/exception queues like VMX does. This also fix the
>> >> > bug that if exit was caused by a guest internal exception access to IDT
>> >> > the exception was not reinjected.
>> >>
>> >> This patch broke KVM for me: after it is applied (to the tip of avi's git tree),
>> >> linux inside KVM (version 84 from Debian) stops booting, moaning about lost
>> >> interrupts from ide. The KVM is executed inside qemu-system-x86_64,
>> >> version 0.10.2.
>> >>
>> > Please apply next patch in the series too. This one will not work
>> > without it. But better yet can you please test entire series.
>>
>> After applying the next patch (or the whole serie), I get the following messages
>> during initramfs drivers probe:
>>
>> Clocksource tsc unstable (delta...)
>> no cont in shutdown!
>> floppy0: FDC access conflict!
>>
>> Then kernel boot stalls. I'll try gdbing into kernel but this may
>> require lots of efforts.
>> I don't quite understand how do these two patches influence FDC emulation, but
>> they do. Tell me if you need any additional info.
>>
> What guest is this? What kernel? Does the whole series works?

Guest: Debian lenny. Linux 2.6.26, Debian version (I can provide
config or bzImage + initrd).
The whole serie doesn't work too (that's why I started bisecting).

And BTW, I got the same results with -no-kvm-irqchip

-- 
With best wishes
Dmitry

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-14 19:29           ` Dmitry Eremin-Solenikov
@ 2009-04-14 19:41             ` Gleb Natapov
  2009-04-15  6:11               ` Gleb Natapov
  2009-04-15  9:30               ` Dmitry Eremin-Solenikov
  0 siblings, 2 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-14 19:41 UTC (permalink / raw)
  To: Dmitry Eremin-Solenikov; +Cc: kvm

On Tue, Apr 14, 2009 at 11:29:49PM +0400, Dmitry Eremin-Solenikov wrote:
> 2009/4/14 Gleb Natapov <gleb@redhat.com>:
> > On Tue, Apr 14, 2009 at 06:32:29PM +0400, Dmitry Eremin-Solenikov wrote:
> >> 2009/4/14 Gleb Natapov <gleb@redhat.com>:
> >> > On Tue, Apr 14, 2009 at 02:14:04PM +0000, Dmitry Eremin-Solenikov wrote:
> >> >> Gleb Natapov wrote:
> >> >>
> >> >> > Start to use interrupt/exception queues like VMX does. This also fix the
> >> >> > bug that if exit was caused by a guest internal exception access to IDT
> >> >> > the exception was not reinjected.
> >> >>
> >> >> This patch broke KVM for me: after it is applied (to the tip of avi's git tree),
> >> >> linux inside KVM (version 84 from Debian) stops booting, moaning about lost
> >> >> interrupts from ide. The KVM is executed inside qemu-system-x86_64,
> >> >> version 0.10.2.
> >> >>
> >> > Please apply next patch in the series too. This one will not work
> >> > without it. But better yet can you please test entire series.
> >>
> >> After applying the next patch (or the whole serie), I get the following messages
> >> during initramfs drivers probe:
> >>
> >> Clocksource tsc unstable (delta...)
> >> no cont in shutdown!
> >> floppy0: FDC access conflict!
> >>
> >> Then kernel boot stalls. I'll try gdbing into kernel but this may
> >> require lots of efforts.
> >> I don't quite understand how do these two patches influence FDC emulation, but
> >> they do. Tell me if you need any additional info.
> >>
> > What guest is this? What kernel? Does the whole series works?
> 
> Guest: Debian lenny. Linux 2.6.26, Debian version (I can provide
> config or bzImage + initrd).
Yes please provide. Debian lenny (x86_64) is my default guest :) And I
just booted it fine on AMD barcelona CPU.  What is you host cpu?
"cat /proc/cpuinfo"

I just noticed that my kernel is different. Will install 2.6.26 and retest,
but provide me yours anyway.

> The whole serie doesn't work too (that's why I started bisecting).
> 
> And BTW, I got the same results with -no-kvm-irqchip
> 

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-14 19:41             ` Gleb Natapov
@ 2009-04-15  6:11               ` Gleb Natapov
  2009-04-15  9:30               ` Dmitry Eremin-Solenikov
  1 sibling, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-15  6:11 UTC (permalink / raw)
  To: Dmitry Eremin-Solenikov; +Cc: kvm

On Tue, Apr 14, 2009 at 10:41:03PM +0300, Gleb Natapov wrote:
> > Guest: Debian lenny. Linux 2.6.26, Debian version (I can provide
> > config or bzImage + initrd).
> Yes please provide. Debian lenny (x86_64) is my default guest :) And I
> just booted it fine on AMD barcelona CPU.  What is you host cpu?
> "cat /proc/cpuinfo"
> 
> I just noticed that my kernel is different. Will install 2.6.26 and retest,
> but provide me yours anyway.

2.6.26-2-amd64 works for me too.

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-14 19:41             ` Gleb Natapov
  2009-04-15  6:11               ` Gleb Natapov
@ 2009-04-15  9:30               ` Dmitry Eremin-Solenikov
  2009-04-15  9:39                 ` Gleb Natapov
  2009-04-15  9:44                 ` Gleb Natapov
  1 sibling, 2 replies; 95+ messages in thread
From: Dmitry Eremin-Solenikov @ 2009-04-15  9:30 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

2009/4/14 Gleb Natapov <gleb@redhat.com>:
> On Tue, Apr 14, 2009 at 11:29:49PM +0400, Dmitry Eremin-Solenikov wrote:
>> 2009/4/14 Gleb Natapov <gleb@redhat.com>:
>> > On Tue, Apr 14, 2009 at 06:32:29PM +0400, Dmitry Eremin-Solenikov wrote:
>> >> 2009/4/14 Gleb Natapov <gleb@redhat.com>:
>> >> > On Tue, Apr 14, 2009 at 02:14:04PM +0000, Dmitry Eremin-Solenikov wrote:
>> >> >> Gleb Natapov wrote:
>> >> >>
>> >> >> > Start to use interrupt/exception queues like VMX does. This also fix the
>> >> >> > bug that if exit was caused by a guest internal exception access to IDT
>> >> >> > the exception was not reinjected.
>> >> >>
>> >> >> This patch broke KVM for me: after it is applied (to the tip of avi's git tree),
>> >> >> linux inside KVM (version 84 from Debian) stops booting, moaning about lost
>> >> >> interrupts from ide. The KVM is executed inside qemu-system-x86_64,
>> >> >> version 0.10.2.
>> >> >>
>> >> > Please apply next patch in the series too. This one will not work
>> >> > without it. But better yet can you please test entire series.
>> >>
>> >> After applying the next patch (or the whole serie), I get the following messages
>> >> during initramfs drivers probe:
>> >>
>> >> Clocksource tsc unstable (delta...)
>> >> no cont in shutdown!
>> >> floppy0: FDC access conflict!
>> >>
>> >> Then kernel boot stalls. I'll try gdbing into kernel but this may
>> >> require lots of efforts.
>> >> I don't quite understand how do these two patches influence FDC emulation, but
>> >> they do. Tell me if you need any additional info.
>> >>
>> > What guest is this? What kernel? Does the whole series works?
>>
>> Guest: Debian lenny. Linux 2.6.26, Debian version (I can provide
>> config or bzImage + initrd).
> Yes please provide. Debian lenny (x86_64) is my default guest :) And I
> just booted it fine on AMD barcelona CPU.  What is you host cpu?
> "cat /proc/cpuinfo"

qemu-x86_64 version 0.10.2 running on i386
Due to problems with qemu-x86_64 I have to boot the 'host' kernel with 'noapic'.
qemu-64:~# cat /proc/cpuinfo
processor       : 0
vendor_id       : AuthenticAMD
cpu family      : 6
model           : 2
model name      : QEMU Virtual CPU version 0.10.2
stepping        : 3
cpu MHz         : 1828.754
cache size      : 512 KB
fpu             : yes
fpu_exception   : yes
cpuid level     : 2
wp              : yes
flags           : fpu de pse tsc msr pae mce cx8 apic sep mtrr pge mca
cmov pat pse36 clflush mmx fxsr sse sse2 syscall nx lm 3dnowext 3dnow
up pni svm
bogomips        : 3700.32
TLB size        : 1024 4K pages
clflush size    : 64
cache_alignment : 64
address sizes   : 40 bits physical, 48 bits virtual
power management:




>
> I just noticed that my kernel is different. Will install 2.6.26 and retest,
> but provide me yours anyway.
>
>> The whole serie doesn't work too (that's why I started bisecting).
>>
>> And BTW, I got the same results with -no-kvm-irqchip
>>
>
> --
>                        Gleb.
>



-- 
With best wishes
Dmitry

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-15  9:30               ` Dmitry Eremin-Solenikov
@ 2009-04-15  9:39                 ` Gleb Natapov
  2009-04-15 10:22                   ` Jan Kiszka
  2009-04-15  9:44                 ` Gleb Natapov
  1 sibling, 1 reply; 95+ messages in thread
From: Gleb Natapov @ 2009-04-15  9:39 UTC (permalink / raw)
  To: Dmitry Eremin-Solenikov; +Cc: kvm

On Wed, Apr 15, 2009 at 01:30:29PM +0400, Dmitry Eremin-Solenikov wrote:
> 2009/4/14 Gleb Natapov <gleb@redhat.com>:
> > On Tue, Apr 14, 2009 at 11:29:49PM +0400, Dmitry Eremin-Solenikov wrote:
> >> 2009/4/14 Gleb Natapov <gleb@redhat.com>:
> >> > On Tue, Apr 14, 2009 at 06:32:29PM +0400, Dmitry Eremin-Solenikov wrote:
> >> >> 2009/4/14 Gleb Natapov <gleb@redhat.com>:
> >> >> > On Tue, Apr 14, 2009 at 02:14:04PM +0000, Dmitry Eremin-Solenikov wrote:
> >> >> >> Gleb Natapov wrote:
> >> >> >>
> >> >> >> > Start to use interrupt/exception queues like VMX does. This also fix the
> >> >> >> > bug that if exit was caused by a guest internal exception access to IDT
> >> >> >> > the exception was not reinjected.
> >> >> >>
> >> >> >> This patch broke KVM for me: after it is applied (to the tip of avi's git tree),
> >> >> >> linux inside KVM (version 84 from Debian) stops booting, moaning about lost
> >> >> >> interrupts from ide. The KVM is executed inside qemu-system-x86_64,
> >> >> >> version 0.10.2.
> >> >> >>
> >> >> > Please apply next patch in the series too. This one will not work
> >> >> > without it. But better yet can you please test entire series.
> >> >>
> >> >> After applying the next patch (or the whole serie), I get the following messages
> >> >> during initramfs drivers probe:
> >> >>
> >> >> Clocksource tsc unstable (delta...)
> >> >> no cont in shutdown!
> >> >> floppy0: FDC access conflict!
> >> >>
> >> >> Then kernel boot stalls. I'll try gdbing into kernel but this may
> >> >> require lots of efforts.
> >> >> I don't quite understand how do these two patches influence FDC emulation, but
> >> >> they do. Tell me if you need any additional info.
> >> >>
> >> > What guest is this? What kernel? Does the whole series works?
> >>
> >> Guest: Debian lenny. Linux 2.6.26, Debian version (I can provide
> >> config or bzImage + initrd).
> > Yes please provide. Debian lenny (x86_64) is my default guest :) And I
> > just booted it fine on AMD barcelona CPU.  What is you host cpu?
> > "cat /proc/cpuinfo"
> 
> qemu-x86_64 version 0.10.2 running on i386
> Due to problems with qemu-x86_64 I have to boot the 'host' kernel with 'noapic'.
> qemu-64:~# cat /proc/cpuinfo
> processor       : 0
> vendor_id       : AuthenticAMD
> cpu family      : 6
> model           : 2
> model name      : QEMU Virtual CPU version 0.10.2
> stepping        : 3
> cpu MHz         : 1828.754
> cache size      : 512 KB
> fpu             : yes
> fpu_exception   : yes
> cpuid level     : 2
> wp              : yes
> flags           : fpu de pse tsc msr pae mce cx8 apic sep mtrr pge mca
> cmov pat pse36 clflush mmx fxsr sse sse2 syscall nx lm 3dnowext 3dnow
> up pni svm
> bogomips        : 3700.32
> TLB size        : 1024 4K pages
> clflush size    : 64
> cache_alignment : 64
> address sizes   : 40 bits physical, 48 bits virtual
> power management:
> 
> 
I need _host_ cpu info. Do the same on the host please.

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-15  9:30               ` Dmitry Eremin-Solenikov
  2009-04-15  9:39                 ` Gleb Natapov
@ 2009-04-15  9:44                 ` Gleb Natapov
  2009-04-15 11:11                   ` Dmitry Eremin-Solenikov
  1 sibling, 1 reply; 95+ messages in thread
From: Gleb Natapov @ 2009-04-15  9:44 UTC (permalink / raw)
  To: Dmitry Eremin-Solenikov; +Cc: kvm

On Wed, Apr 15, 2009 at 01:30:29PM +0400, Dmitry Eremin-Solenikov wrote:
> qemu-x86_64 version 0.10.2 running on i386
> Due to problems with qemu-x86_64 I have to boot the 'host' kernel with 'noapic'.
Do you mean boot 'guest' kernel with noapic? The guest is what runs
inside qemu. So you are able to boot guest with 'noapic'?

What is the command line you are using.

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt   injection logic.
  2009-04-15  9:39                 ` Gleb Natapov
@ 2009-04-15 10:22                   ` Jan Kiszka
  2009-04-15 10:36                     ` Gleb Natapov
  0 siblings, 1 reply; 95+ messages in thread
From: Jan Kiszka @ 2009-04-15 10:22 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Dmitry Eremin-Solenikov, kvm

Gleb Natapov wrote:
> On Wed, Apr 15, 2009 at 01:30:29PM +0400, Dmitry Eremin-Solenikov wrote:
>> 2009/4/14 Gleb Natapov <gleb@redhat.com>:
>>> On Tue, Apr 14, 2009 at 11:29:49PM +0400, Dmitry Eremin-Solenikov wrote:
>>>> 2009/4/14 Gleb Natapov <gleb@redhat.com>:
>>>>> On Tue, Apr 14, 2009 at 06:32:29PM +0400, Dmitry Eremin-Solenikov wrote:
>>>>>> 2009/4/14 Gleb Natapov <gleb@redhat.com>:
>>>>>>> On Tue, Apr 14, 2009 at 02:14:04PM +0000, Dmitry Eremin-Solenikov wrote:
>>>>>>>> Gleb Natapov wrote:
>>>>>>>>
>>>>>>>>> Start to use interrupt/exception queues like VMX does. This also fix the
>>>>>>>>> bug that if exit was caused by a guest internal exception access to IDT
>>>>>>>>> the exception was not reinjected.
>>>>>>>> This patch broke KVM for me: after it is applied (to the tip of avi's git tree),
>>>>>>>> linux inside KVM (version 84 from Debian) stops booting, moaning about lost
>>>>>>>> interrupts from ide. The KVM is executed inside qemu-system-x86_64,
>>>>>>>> version 0.10.2.
>>>>>>>>
>>>>>>> Please apply next patch in the series too. This one will not work
>>>>>>> without it. But better yet can you please test entire series.
>>>>>> After applying the next patch (or the whole serie), I get the following messages
>>>>>> during initramfs drivers probe:
>>>>>>
>>>>>> Clocksource tsc unstable (delta...)
>>>>>> no cont in shutdown!
>>>>>> floppy0: FDC access conflict!
>>>>>>
>>>>>> Then kernel boot stalls. I'll try gdbing into kernel but this may
>>>>>> require lots of efforts.
>>>>>> I don't quite understand how do these two patches influence FDC emulation, but
>>>>>> they do. Tell me if you need any additional info.
>>>>>>
>>>>> What guest is this? What kernel? Does the whole series works?
>>>> Guest: Debian lenny. Linux 2.6.26, Debian version (I can provide
>>>> config or bzImage + initrd).
>>> Yes please provide. Debian lenny (x86_64) is my default guest :) And I
>>> just booted it fine on AMD barcelona CPU.  What is you host cpu?
>>> "cat /proc/cpuinfo"
>> qemu-x86_64 version 0.10.2 running on i386
>> Due to problems with qemu-x86_64 I have to boot the 'host' kernel with 'noapic'.
>> qemu-64:~# cat /proc/cpuinfo
>> processor       : 0
>> vendor_id       : AuthenticAMD
>> cpu family      : 6
>> model           : 2
>> model name      : QEMU Virtual CPU version 0.10.2
>> stepping        : 3
>> cpu MHz         : 1828.754
>> cache size      : 512 KB
>> fpu             : yes
>> fpu_exception   : yes
>> cpuid level     : 2
>> wp              : yes
>> flags           : fpu de pse tsc msr pae mce cx8 apic sep mtrr pge mca
>> cmov pat pse36 clflush mmx fxsr sse sse2 syscall nx lm 3dnowext 3dnow
>> up pni svm
>> bogomips        : 3700.32
>> TLB size        : 1024 4K pages
>> clflush size    : 64
>> cache_alignment : 64
>> address sizes   : 40 bits physical, 48 bits virtual
>> power management:
>>
>>
> I need _host_ cpu info. Do the same on the host please.

That _is_ his host - qemu in emulation mode (ie. nested virtualization).
Maybe there is an issue with qemu's emulation of svm or, rather, with
the apic emulation. The fact that he has to boot the first-level guest
with noapic is fairly suspicious.

Dmitry, what is your first level-guest distro/kernel, also Lenny? And
what is the top-level qemu command line? Let's focus on this first,
leaving KVM and this patch series aside for a while.

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-15 10:22                   ` Jan Kiszka
@ 2009-04-15 10:36                     ` Gleb Natapov
  2009-04-15 10:51                       ` Jan Kiszka
  0 siblings, 1 reply; 95+ messages in thread
From: Gleb Natapov @ 2009-04-15 10:36 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Dmitry Eremin-Solenikov, kvm

On Wed, Apr 15, 2009 at 12:22:34PM +0200, Jan Kiszka wrote:
> Gleb Natapov wrote:
> > On Wed, Apr 15, 2009 at 01:30:29PM +0400, Dmitry Eremin-Solenikov wrote:
> >> 2009/4/14 Gleb Natapov <gleb@redhat.com>:
> >>> On Tue, Apr 14, 2009 at 11:29:49PM +0400, Dmitry Eremin-Solenikov wrote:
> >>>> 2009/4/14 Gleb Natapov <gleb@redhat.com>:
> >>>>> On Tue, Apr 14, 2009 at 06:32:29PM +0400, Dmitry Eremin-Solenikov wrote:
> >>>>>> 2009/4/14 Gleb Natapov <gleb@redhat.com>:
> >>>>>>> On Tue, Apr 14, 2009 at 02:14:04PM +0000, Dmitry Eremin-Solenikov wrote:
> >>>>>>>> Gleb Natapov wrote:
> >>>>>>>>
> >>>>>>>>> Start to use interrupt/exception queues like VMX does. This also fix the
> >>>>>>>>> bug that if exit was caused by a guest internal exception access to IDT
> >>>>>>>>> the exception was not reinjected.
> >>>>>>>> This patch broke KVM for me: after it is applied (to the tip of avi's git tree),
> >>>>>>>> linux inside KVM (version 84 from Debian) stops booting, moaning about lost
> >>>>>>>> interrupts from ide. The KVM is executed inside qemu-system-x86_64,
> >>>>>>>> version 0.10.2.
> >>>>>>>>
> >>>>>>> Please apply next patch in the series too. This one will not work
> >>>>>>> without it. But better yet can you please test entire series.
> >>>>>> After applying the next patch (or the whole serie), I get the following messages
> >>>>>> during initramfs drivers probe:
> >>>>>>
> >>>>>> Clocksource tsc unstable (delta...)
> >>>>>> no cont in shutdown!
> >>>>>> floppy0: FDC access conflict!
> >>>>>>
> >>>>>> Then kernel boot stalls. I'll try gdbing into kernel but this may
> >>>>>> require lots of efforts.
> >>>>>> I don't quite understand how do these two patches influence FDC emulation, but
> >>>>>> they do. Tell me if you need any additional info.
> >>>>>>
> >>>>> What guest is this? What kernel? Does the whole series works?
> >>>> Guest: Debian lenny. Linux 2.6.26, Debian version (I can provide
> >>>> config or bzImage + initrd).
> >>> Yes please provide. Debian lenny (x86_64) is my default guest :) And I
> >>> just booted it fine on AMD barcelona CPU.  What is you host cpu?
> >>> "cat /proc/cpuinfo"
> >> qemu-x86_64 version 0.10.2 running on i386
> >> Due to problems with qemu-x86_64 I have to boot the 'host' kernel with 'noapic'.
> >> qemu-64:~# cat /proc/cpuinfo
> >> processor       : 0
> >> vendor_id       : AuthenticAMD
> >> cpu family      : 6
> >> model           : 2
> >> model name      : QEMU Virtual CPU version 0.10.2
> >> stepping        : 3
> >> cpu MHz         : 1828.754
> >> cache size      : 512 KB
> >> fpu             : yes
> >> fpu_exception   : yes
> >> cpuid level     : 2
> >> wp              : yes
> >> flags           : fpu de pse tsc msr pae mce cx8 apic sep mtrr pge mca
> >> cmov pat pse36 clflush mmx fxsr sse sse2 syscall nx lm 3dnowext 3dnow
> >> up pni svm
> >> bogomips        : 3700.32
> >> TLB size        : 1024 4K pages
> >> clflush size    : 64
> >> cache_alignment : 64
> >> address sizes   : 40 bits physical, 48 bits virtual
> >> power management:
> >>
> >>
> > I need _host_ cpu info. Do the same on the host please.
> 
> That _is_ his host - qemu in emulation mode (ie. nested virtualization).
Ah, now I noticed svm in cpu flags. Does qemu support svm in TCG?

> Maybe there is an issue with qemu's emulation of svm or, rather, with
> the apic emulation. The fact that he has to boot the first-level guest
> with noapic is fairly suspicious.
> 
> Dmitry, what is your first level-guest distro/kernel, also Lenny? And
> what is the top-level qemu command line? Let's focus on this first,
> leaving KVM and this patch series aside for a while.
> 
If KVM runs inside a guest that is definitely a good idea :)

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-15 10:36                     ` Gleb Natapov
@ 2009-04-15 10:51                       ` Jan Kiszka
  2009-04-15 10:57                         ` Gleb Natapov
  0 siblings, 1 reply; 95+ messages in thread
From: Jan Kiszka @ 2009-04-15 10:51 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Dmitry Eremin-Solenikov, kvm

Gleb Natapov wrote:
> On Wed, Apr 15, 2009 at 12:22:34PM +0200, Jan Kiszka wrote:
>> Gleb Natapov wrote:
>>> On Wed, Apr 15, 2009 at 01:30:29PM +0400, Dmitry Eremin-Solenikov wrote:
>>>> 2009/4/14 Gleb Natapov <gleb@redhat.com>:
>>>>> On Tue, Apr 14, 2009 at 11:29:49PM +0400, Dmitry Eremin-Solenikov wrote:
>>>>>> 2009/4/14 Gleb Natapov <gleb@redhat.com>:
>>>>>>> On Tue, Apr 14, 2009 at 06:32:29PM +0400, Dmitry Eremin-Solenikov wrote:
>>>>>>>> 2009/4/14 Gleb Natapov <gleb@redhat.com>:
>>>>>>>>> On Tue, Apr 14, 2009 at 02:14:04PM +0000, Dmitry Eremin-Solenikov wrote:
>>>>>>>>>> Gleb Natapov wrote:
>>>>>>>>>>
>>>>>>>>>>> Start to use interrupt/exception queues like VMX does. This also fix the
>>>>>>>>>>> bug that if exit was caused by a guest internal exception access to IDT
>>>>>>>>>>> the exception was not reinjected.
>>>>>>>>>> This patch broke KVM for me: after it is applied (to the tip of avi's git tree),
>>>>>>>>>> linux inside KVM (version 84 from Debian) stops booting, moaning about lost
>>>>>>>>>> interrupts from ide. The KVM is executed inside qemu-system-x86_64,
>>>>>>>>>> version 0.10.2.
>>>>>>>>>>
>>>>>>>>> Please apply next patch in the series too. This one will not work
>>>>>>>>> without it. But better yet can you please test entire series.
>>>>>>>> After applying the next patch (or the whole serie), I get the following messages
>>>>>>>> during initramfs drivers probe:
>>>>>>>>
>>>>>>>> Clocksource tsc unstable (delta...)
>>>>>>>> no cont in shutdown!
>>>>>>>> floppy0: FDC access conflict!
>>>>>>>>
>>>>>>>> Then kernel boot stalls. I'll try gdbing into kernel but this may
>>>>>>>> require lots of efforts.
>>>>>>>> I don't quite understand how do these two patches influence FDC emulation, but
>>>>>>>> they do. Tell me if you need any additional info.
>>>>>>>>
>>>>>>> What guest is this? What kernel? Does the whole series works?
>>>>>> Guest: Debian lenny. Linux 2.6.26, Debian version (I can provide
>>>>>> config or bzImage + initrd).
>>>>> Yes please provide. Debian lenny (x86_64) is my default guest :) And I
>>>>> just booted it fine on AMD barcelona CPU.  What is you host cpu?
>>>>> "cat /proc/cpuinfo"
>>>> qemu-x86_64 version 0.10.2 running on i386
>>>> Due to problems with qemu-x86_64 I have to boot the 'host' kernel with 'noapic'.
>>>> qemu-64:~# cat /proc/cpuinfo
>>>> processor       : 0
>>>> vendor_id       : AuthenticAMD
>>>> cpu family      : 6
>>>> model           : 2
>>>> model name      : QEMU Virtual CPU version 0.10.2
>>>> stepping        : 3
>>>> cpu MHz         : 1828.754
>>>> cache size      : 512 KB
>>>> fpu             : yes
>>>> fpu_exception   : yes
>>>> cpuid level     : 2
>>>> wp              : yes
>>>> flags           : fpu de pse tsc msr pae mce cx8 apic sep mtrr pge mca
>>>> cmov pat pse36 clflush mmx fxsr sse sse2 syscall nx lm 3dnowext 3dnow
>>>> up pni svm
>>>> bogomips        : 3700.32
>>>> TLB size        : 1024 4K pages
>>>> clflush size    : 64
>>>> cache_alignment : 64
>>>> address sizes   : 40 bits physical, 48 bits virtual
>>>> power management:
>>>>
>>>>
>>> I need _host_ cpu info. Do the same on the host please.
>> That _is_ his host - qemu in emulation mode (ie. nested virtualization).
> Ah, now I noticed svm in cpu flags. Does qemu support svm in TCG?

Yes, and KVM seems to have been fine without the patch. But that may not
exclude remaining bugs in QEMU (as first-level hypervisor here).

On the other hand, it wouldn't be the first time QEMU, with its extreme
delays, triggers some nasty race in its guest...

> 
>> Maybe there is an issue with qemu's emulation of svm or, rather, with
>> the apic emulation. The fact that he has to boot the first-level guest
>> with noapic is fairly suspicious.
>>
>> Dmitry, what is your first level-guest distro/kernel, also Lenny? And
>> what is the top-level qemu command line? Let's focus on this first,
>> leaving KVM and this patch series aside for a while.
>>
> If KVM runs inside a guest that is definitely a good idea :)
> 
> --
> 			Gleb.

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-15 10:51                       ` Jan Kiszka
@ 2009-04-15 10:57                         ` Gleb Natapov
  0 siblings, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-15 10:57 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Dmitry Eremin-Solenikov, kvm

On Wed, Apr 15, 2009 at 12:51:00PM +0200, Jan Kiszka wrote:
> >>> I need _host_ cpu info. Do the same on the host please.
> >> That _is_ his host - qemu in emulation mode (ie. nested virtualization).
> > Ah, now I noticed svm in cpu flags. Does qemu support svm in TCG?
> 
> Yes, and KVM seems to have been fine without the patch. But that may not
> exclude remaining bugs in QEMU (as first-level hypervisor here).
> 
> On the other hand, it wouldn't be the first time QEMU, with its extreme
> delays, triggers some nasty race in its guest...
> 
It doesn't look like race to me. The failure is 100% reproducible. I'll
try to reproduce locally and see what is going on.

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-15  9:44                 ` Gleb Natapov
@ 2009-04-15 11:11                   ` Dmitry Eremin-Solenikov
  2009-04-15 11:26                     ` Jan Kiszka
  0 siblings, 1 reply; 95+ messages in thread
From: Dmitry Eremin-Solenikov @ 2009-04-15 11:11 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, Jan Kiszka

Gleb Natapov wrote:
> On Wed, Apr 15, 2009 at 01:30:29PM +0400, Dmitry Eremin-Solenikov wrote:
>> qemu-x86_64 version 0.10.2 running on i386
>> Due to problems with qemu-x86_64 I have to boot the 'host' kernel with 'noapic'.
> Do you mean boot 'guest' kernel with noapic? The guest is what runs
> inside qemu. So you are able to boot guest with 'noapic'?
> 
> What is the command line you are using.

Well, since this caused lot's of questions, here is my setup:

Main host: Debian squeeze, kernel 2.6.28 or .29 (doesn't matter),
qemu-system-x86_64 version 0.10.2

KVM kernel run inside qemu: e3dbe3f408a46a045012f1882e9f62b27b8a616c 
from Avi's tree (KVM: x86 emulator: fix call near emulation) + these 
patches. I have to boot the kernels (both this kernel and 2.6.26 from 
debian) with noapic to w/around APIC problems (I dunno if it's qemu or 
bochsbios problem).

system inside qemu: 64-bit debian lenny

KVM userspace: debian 84+dfsg-2

inside kvm I run 32-bit debian lenny with plain debian 2.6.26 kernel.

-- 
With best wishes
Dmitry


^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-15 11:11                   ` Dmitry Eremin-Solenikov
@ 2009-04-15 11:26                     ` Jan Kiszka
  2009-04-15 11:53                       ` Dmitry Eremin-Solenikov
  0 siblings, 1 reply; 95+ messages in thread
From: Jan Kiszka @ 2009-04-15 11:26 UTC (permalink / raw)
  To: Dmitry Eremin-Solenikov; +Cc: Gleb Natapov, kvm

Dmitry Eremin-Solenikov wrote:
> Gleb Natapov wrote:
>> On Wed, Apr 15, 2009 at 01:30:29PM +0400, Dmitry Eremin-Solenikov wrote:
>>> qemu-x86_64 version 0.10.2 running on i386
>>> Due to problems with qemu-x86_64 I have to boot the 'host' kernel
>>> with 'noapic'.
>> Do you mean boot 'guest' kernel with noapic? The guest is what runs
>> inside qemu. So you are able to boot guest with 'noapic'?
>>
>> What is the command line you are using.
> 
> Well, since this caused lot's of questions, here is my setup:
> 
> Main host: Debian squeeze, kernel 2.6.28 or .29 (doesn't matter),
> qemu-system-x86_64 version 0.10.2
> 
> KVM kernel run inside qemu: e3dbe3f408a46a045012f1882e9f62b27b8a616c
> from Avi's tree (KVM: x86 emulator: fix call near emulation) + these
> patches. I have to boot the kernels (both this kernel and 2.6.26 from
> debian) with noapic to w/around APIC problems (I dunno if it's qemu or
> bochsbios problem).

And the bios you are using with 0.10.2 is from 0.10.2 (when in doubt,
specify explicitly with -bios and/or -L)? Then this would be a QEMU
upstream bug.

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-15 11:26                     ` Jan Kiszka
@ 2009-04-15 11:53                       ` Dmitry Eremin-Solenikov
  2009-04-15 11:58                         ` Dmitry Eremin-Solenikov
                                           ` (2 more replies)
  0 siblings, 3 replies; 95+ messages in thread
From: Dmitry Eremin-Solenikov @ 2009-04-15 11:53 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Gleb Natapov, kvm

[-- Attachment #1: Type: text/plain, Size: 1389 bytes --]

Jan Kiszka пишет:
> Dmitry Eremin-Solenikov wrote:
>> Gleb Natapov wrote:
>>> On Wed, Apr 15, 2009 at 01:30:29PM +0400, Dmitry Eremin-Solenikov wrote:
>>>> qemu-x86_64 version 0.10.2 running on i386
>>>> Due to problems with qemu-x86_64 I have to boot the 'host' kernel
>>>> with 'noapic'.
>>> Do you mean boot 'guest' kernel with noapic? The guest is what runs
>>> inside qemu. So you are able to boot guest with 'noapic'?
>>>
>>> What is the command line you are using.
>> Well, since this caused lot's of questions, here is my setup:
>>
>> Main host: Debian squeeze, kernel 2.6.28 or .29 (doesn't matter),
>> qemu-system-x86_64 version 0.10.2
>>
>> KVM kernel run inside qemu: e3dbe3f408a46a045012f1882e9f62b27b8a616c
>> from Avi's tree (KVM: x86 emulator: fix call near emulation) + these
>> patches. I have to boot the kernels (both this kernel and 2.6.26 from
>> debian) with noapic to w/around APIC problems (I dunno if it's qemu or
>> bochsbios problem).
> 
> And the bios you are using with 0.10.2 is from 0.10.2 (when in doubt,
> specify explicitly with -bios and/or -L)? Then this would be a QEMU
> upstream bug.

Indeed, there seem to be problems with upstream qemu bios. I was using
the image from the debian's bochsbios package. I asked qemu to use the 
bios from 0.10.2 release and got slightly different messages. Attached
the kernel log

-- 
With best wishes
Dmitry


[-- Attachment #2: log --]
[-- Type: text/plain, Size: 8979 bytes --]

Linux version 2.6.29-06626-gb9d7dba (lumag@doriath) (gcc version 4.3.3 (Debian 4.3.3-3) ) #8 SMP Wed Apr 15 15:46:28 MSD 2009
Command line: root=/dev/sda1 ro console=ttyS0 apic=debug debug
KERNEL supported cpus:
  Intel GenuineIntel
  AMD AuthenticAMD
  Centaur CentaurHauls
BIOS-provided physical RAM map:
 BIOS-e820: 0000000000000000 - 000000000009f000 (usable)
 BIOS-e820: 000000000009f000 - 00000000000a0000 (reserved)
 BIOS-e820: 00000000000e8000 - 0000000000100000 (reserved)
 BIOS-e820: 0000000000100000 - 000000000fff0000 (usable)
 BIOS-e820: 000000000fff0000 - 0000000010000000 (ACPI data)
 BIOS-e820: 00000000fffc0000 - 0000000100000000 (reserved)
DMI 2.4 present.
last_pfn = 0xfff0 max_arch_pfn = 0x100000000
x86 PAT enabled: cpu 0, old 0x0, new 0x7010600070106
init_memory_mapping: 0000000000000000-000000000fff0000
 0000000000 - 000fe00000 page 2M
 000fe00000 - 000fff0000 page 4k
kernel direct mapping tables up to fff0000 @ 8000-b000
last_map_addr: fff0000 end: fff0000
ACPI: RSDP 000FBB80, 0014 (r0 QEMU  )
ACPI: RSDT 0FFF0000, 0034 (r1 QEMU   QEMURSDT        1 QEMU        1)
ACPI: FACP 0FFF0034, 0074 (r1 QEMU   QEMUFACP        1 QEMU        1)
FADT: X_PM1a_EVT_BLK.bit_width (16) does not match PM1_EVT_LEN (4)
ACPI: DSDT 0FFF0100, 080D (r1   BXPC   BXDSDT        1 INTL 20061109)
ACPI: FACS 0FFF00C0, 0040
ACPI: APIC 0FFF0948, 004A (r1 QEMU   QEMUAPIC        1 QEMU        1)
ACPI: SSDT 0FFF090D, 0037 (r1 QEMU   QEMUSSDT        1 QEMU        1)
ACPI: HPET 0FFF0998, 0038 (r1 QEMU   QEMUHPET        1 QEMU        1)
ACPI: Local APIC address 0xfee00000
(5 early reservations) ==> bootmem [0000000000 - 000fff0000]
  #0 [0000000000 - 0000001000]   BIOS data page ==> [0000000000 - 0000001000]
  #1 [0000006000 - 0000008000]       TRAMPOLINE ==> [0000006000 - 0000008000]
  #2 [0000200000 - 0000876c54]    TEXT DATA BSS ==> [0000200000 - 0000876c54]
  #3 [000009fc00 - 0000100000]    BIOS reserved ==> [000009fc00 - 0000100000]
  #4 [0000008000 - 0000009000]          PGTABLE ==> [0000008000 - 0000009000]
Scan SMP from ffff880000000000 for 1024 bytes.
Scan SMP from ffff88000009fc00 for 1024 bytes.
Scan SMP from ffff8800000f0000 for 65536 bytes.
found SMP MP-table at [ffff8800000fba60] fba60
 [ffffe20000000000-ffffe200003fffff] PMD -> [ffff880001200000-ffff8800015fffff] on node 0
Zone PFN ranges:
  DMA      0x00000000 -> 0x00001000
  DMA32    0x00001000 -> 0x00100000
  Normal   0x00100000 -> 0x00100000
Movable zone start PFN for each node
early_node_map[2] active PFN ranges
    0: 0x00000000 -> 0x0000009f
    0: 0x00000100 -> 0x0000fff0
On node 0 totalpages: 65423
  DMA zone: 56 pages used for memmap
  DMA zone: 1756 pages reserved
  DMA zone: 2187 pages, LIFO batch:0
  DMA32 zone: 840 pages used for memmap
  DMA32 zone: 60584 pages, LIFO batch:15
ACPI: PM-Timer IO Port: 0xb008
ACPI: Local APIC address 0xfee00000
ACPI: LAPIC (acpi_id[0x00] lapic_id[0x00] enabled)
ACPI: IOAPIC (id[0x01] address[0xfec00000] gsi_base[0])
IOAPIC[0]: apic_id 1, version 0, address 0xfec00000, GSI 0-23
ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)
ACPI: IRQ0 used by override.
ACPI: IRQ2 used by override.
ACPI: IRQ9 used by override.
Using ACPI (MADT) for SMP configuration information
ACPI: HPET id: 0x8086a201 base: 0xfed00000
SMP: Allowing 1 CPUs, 0 hotplug CPUs
mapped APIC to ffffffffff5fc000 (fee00000)
mapped IOAPIC to ffffffffff5fb000 (fec00000)
nr_irqs_gsi: 24
Allocating PCI resources starting at 20000000 (gap: 10000000:effc0000)
NR_CPUS:8 nr_cpumask_bits:8 nr_cpu_ids:1 nr_node_ids:1
PERCPU: Embedded 25 pages at ffff880001033000, static data 70880 bytes
Built 1 zonelists in Zone order, mobility grouping on.  Total pages: 62771
Kernel command line: root=/dev/sda1 ro console=ttyS0 apic=debug debug
Initializing CPU#0
NR_IRQS:512
PID hash table entries: 1024 (order: 10, 8192 bytes)
Fast TSC calibration using PIT
Detected 1828.564 MHz processor.
Console: colour VGA+ 80x25
console [ttyS0] enabled
Dentry cache hash table entries: 32768 (order: 6, 262144 bytes)
Inode-cache hash table entries: 16384 (order: 5, 131072 bytes)
Checking aperture...
No AGP bridge found
Memory: 249848k/262080k available (4048k kernel code, 388k absent, 11528k reserved, 1626k data, 436k init)
SLUB: Genslabs=13, HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1
hpet clockevent registered
Calibrating delay loop (skipped), value calculated using timer frequency.. 3657.12 BogoMIPS (lpj=7314256)
Mount-cache hash table entries: 256
CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
CPU: L2 Cache: 512K (64 bytes/line)
SMP alternatives: switching to UP code
Freeing SMP alternatives: 29k freed
ACPI: Core revision 20081204
Setting APIC routing to flat
Getting VERSION: 50011
Getting VERSION: 50011
Getting ID: 0
Getting ID: ff000000
Getting LVT0: 700
Getting LVT1: 10000
enabled ExtINT on CPU#0
ENABLING IO-APIC IRQs
init IO_APIC IRQs
 1-0 (apicid-pin) not connected
IOAPIC[0]: Set routing entry (1-1 -> 0x31 -> IRQ 1 Mode:0 Active:0)
IOAPIC[0]: Set routing entry (1-2 -> 0x30 -> IRQ 0 Mode:0 Active:0)
IOAPIC[0]: Set routing entry (1-3 -> 0x33 -> IRQ 3 Mode:0 Active:0)
IOAPIC[0]: Set routing entry (1-4 -> 0x34 -> IRQ 4 Mode:0 Active:0)
IOAPIC[0]: Set routing entry (1-5 -> 0x35 -> IRQ 5 Mode:0 Active:0)
IOAPIC[0]: Set routing entry (1-6 -> 0x36 -> IRQ 6 Mode:0 Active:0)
IOAPIC[0]: Set routing entry (1-7 -> 0x37 -> IRQ 7 Mode:0 Active:0)
IOAPIC[0]: Set routing entry (1-8 -> 0x38 -> IRQ 8 Mode:0 Active:0)
IOAPIC[0]: Set routing entry (1-9 -> 0x39 -> IRQ 9 Mode:1 Active:1)
IOAPIC[0]: Set routing entry (1-10 -> 0x3a -> IRQ 10 Mode:0 Active:0)
IOAPIC[0]: Set routing entry (1-11 -> 0x3b -> IRQ 11 Mode:0 Active:0)
IOAPIC[0]: Set routing entry (1-12 -> 0x3c -> IRQ 12 Mode:0 Active:0)
IOAPIC[0]: Set routing entry (1-13 -> 0x3d -> IRQ 13 Mode:0 Active:0)
IOAPIC[0]: Set routing entry (1-14 -> 0x3e -> IRQ 14 Mode:0 Active:0)
IOAPIC[0]: Set routing entry (1-15 -> 0x3f -> IRQ 15 Mode:0 Active:0)
 1-16 1-17 1-18 1-19 1-20 1-21 1-22 1-23 (apicid-pin) not connected
..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1
..MP-BIOS bug: 8254 timer not connected to IO-APIC
...trying to set up timer (IRQ0) through the 8259A ...
..... (found apic 0 pin 2) ...
....... failed.
...trying to set up timer as Virtual Wire IRQ...
..... failed.
...trying to set up timer as ExtINT IRQ...
..... failed :(.
Kernel panic - not syncing: IO-APIC + timer doesn't work!  Boot with apic=debug and send a report.  Then try booting with the 'noapic' option.

Pid: 1, comm: swapper Not tainted 2.6.29-06626-gb9d7dba #8
Call Trace:
 [<ffffffff805ee186>] ? panic+0x86/0x156
 [<ffffffff80297191>] ? default_enable+0x21/0x40
 [<ffffffff805f0bee>] ? _spin_lock_irqsave+0x2e/0x50
 [<ffffffff8024ccc5>] ? default_spin_lock_flags+0x5/0x10
 [<ffffffff805f0bee>] ? _spin_lock_irqsave+0x2e/0x50
 [<ffffffff804119cd>] ? delay_tsc+0x3d/0x70
 [<ffffffff807c149b>] ? setup_IO_APIC+0x85e/0x9fd
 [<ffffffff805f0b26>] ? __down_read+0xa6/0xc2
 [<ffffffff807bcdb2>] ? native_smp_prepare_cpus+0x2c3/0x351
 [<ffffffff807b2541>] ? kernel_init+0x52/0x1ae
 [<ffffffff80232f2a>] ? child_rip+0xa/0x20
 [<ffffffff807b24ef>] ? kernel_init+0x0/0x1ae
 [<ffffffff80232f20>] ? child_rip+0x0/0x20
------------[ cut here ]------------
WARNING: at kernel/smp.c:329 smp_call_function_many+0x1f9/0x260()
Hardware name: 
Modules linked in:
Pid: 1, comm: swapper Not tainted 2.6.29-06626-gb9d7dba #8
Call Trace:
 [<ffffffff8026421a>] ? warn_slowpath+0xea/0x160
 [<ffffffff80232f20>] ? child_rip+0x0/0x20
 [<ffffffff80232f20>] ? child_rip+0x0/0x20
 [<ffffffff80232f20>] ? child_rip+0x0/0x20
 [<ffffffff80232f20>] ? child_rip+0x0/0x20
 [<ffffffff80232f20>] ? child_rip+0x0/0x20
 [<ffffffff805ee2a4>] ? printk+0x4e/0x56
 [<ffffffff802788ef>] ? __kernel_text_address+0x2f/0x60
 [<ffffffff802366a0>] ? print_context_stack+0x70/0xd0
 [<ffffffff80291dad>] ? crash_kexec+0x6d/0x110
 [<ffffffff8024ccc5>] ? default_spin_lock_flags+0x5/0x10
 [<ffffffff805f0bee>] ? _spin_lock_irqsave+0x2e/0x50
 [<ffffffff8028b009>] ? smp_call_function_many+0x1f9/0x260
 [<ffffffff8028b090>] ? smp_call_function+0x20/0x30
 [<ffffffff80244f20>] ? native_smp_send_stop+0x20/0x40
 [<ffffffff805ee19a>] ? panic+0x9a/0x156
 [<ffffffff80297191>] ? default_enable+0x21/0x40
 [<ffffffff805f0bee>] ? _spin_lock_irqsave+0x2e/0x50
 [<ffffffff8024ccc5>] ? default_spin_lock_flags+0x5/0x10
 [<ffffffff805f0bee>] ? _spin_lock_irqsave+0x2e/0x50
 [<ffffffff804119cd>] ? delay_tsc+0x3d/0x70
 [<ffffffff807c149b>] ? setup_IO_APIC+0x85e/0x9fd
 [<ffffffff805f0b26>] ? __down_read+0xa6/0xc2
 [<ffffffff807bcdb2>] ? native_smp_prepare_cpus+0x2c3/0x351
 [<ffffffff807b2541>] ? kernel_init+0x52/0x1ae
 [<ffffffff80232f2a>] ? child_rip+0xa/0x20
 [<ffffffff807b24ef>] ? kernel_init+0x0/0x1ae
 [<ffffffff80232f20>] ? child_rip+0x0/0x20
---[ end trace 4eaa2a86a8e2da22 ]---

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-15 11:53                       ` Dmitry Eremin-Solenikov
@ 2009-04-15 11:58                         ` Dmitry Eremin-Solenikov
  2009-04-15 12:01                         ` Gleb Natapov
  2009-04-15 12:03                         ` Jan Kiszka
  2 siblings, 0 replies; 95+ messages in thread
From: Dmitry Eremin-Solenikov @ 2009-04-15 11:58 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Gleb Natapov, kvm

Dmitry Eremin-Solenikov пишет:
> Jan Kiszka пишет:
>> Dmitry Eremin-Solenikov wrote:
>>> Gleb Natapov wrote:
>>>> On Wed, Apr 15, 2009 at 01:30:29PM +0400, Dmitry Eremin-Solenikov 
>>>> wrote:
>>>>> qemu-x86_64 version 0.10.2 running on i386
>>>>> Due to problems with qemu-x86_64 I have to boot the 'host' kernel
>>>>> with 'noapic'.
>>>> Do you mean boot 'guest' kernel with noapic? The guest is what runs
>>>> inside qemu. So you are able to boot guest with 'noapic'?
>>>>
>>>> What is the command line you are using.
>>> Well, since this caused lot's of questions, here is my setup:
>>>
>>> Main host: Debian squeeze, kernel 2.6.28 or .29 (doesn't matter),
>>> qemu-system-x86_64 version 0.10.2
>>>
>>> KVM kernel run inside qemu: e3dbe3f408a46a045012f1882e9f62b27b8a616c
>>> from Avi's tree (KVM: x86 emulator: fix call near emulation) + these
>>> patches. I have to boot the kernels (both this kernel and 2.6.26 from
>>> debian) with noapic to w/around APIC problems (I dunno if it's qemu or
>>> bochsbios problem).
>>
>> And the bios you are using with 0.10.2 is from 0.10.2 (when in doubt,
>> specify explicitly with -bios and/or -L)? Then this would be a QEMU
>> upstream bug.
> 
> Indeed, there seem to be problems with upstream qemu bios. I was using
> the image from the debian's bochsbios package. I asked qemu to use the 
> bios from 0.10.2 release and got slightly different messages. Attached
> the kernel log
> 

Moreover, using bios from 0.10.2 I can't boot linux even with noapic:
ACPI: PM-Timer IO Port: 0xb008
ACPI: LAPIC (acpi_id[0x00] lapic_id[0x00] enabled)
ACPI: Skipping IOAPIC probe due to 'noapic' option.
Using ACPI for processor (LAPIC) configuration information
ACPI: HPET id: 0x8086a201 base: 0xfed00000
Intel MultiProcessor Specification v1.4
MPTABLE: OEM ID: QEMUCPU
MPTABLE: Product ID: 0.1
MPTABLE: APIC at: 0xFEE00000
I/O APIC #1 Version 17 at 0xFEC00000.
Processors: 1
SMP: Allowing 1 CPUs, 0 hotplug CPUs
Allocating PCI resources starting at 20000000 (gap: 10000000:effc0000)
NR_CPUS:8 nr_cpumask_bits:8 nr_cpu_ids:1 nr_node_ids:1
PERCPU: Embedded 25 pages at ffff880001033000, static data 70880 bytes
Built 1 zonelists in Zone order, mobility grouping on.  Total pages: 62771
Kernel command line: root=/dev/sda1 ro console=ttyS0 noapic
Initializing CPU#0
NR_IRQS:512
PID hash table entries: 1024 (order: 10, 8192 bytes)
Fast TSC calibration using PIT
Detected 1828.371 MHz processor.
Console: colour VGA+ 80x25
console [ttyS0] enabled
Dentry cache hash table entries: 32768 (order: 6, 262144 bytes)
Inode-cache hash table entries: 16384 (order: 5, 131072 bytes)
Checking aperture...
No AGP bridge found
Memory: 249848k/262080k available (4048k kernel code, 388k absent, 
11528k reserved, 1626k data, 436k init)
SLUB: Genslabs=13, HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1
Calibrating delay loop (skipped), value calculated using timer 
frequency.. 3656.74 BogoMIPS (lpj=7313484)
Mount-cache hash table entries: 256
CPU: L1 I Cache: 64K (64 bytes/line), D cache 64K (64 bytes/line)
CPU: L2 Cache: 512K (64 bytes/line)
SMP alternatives: switching to UP code
Freeing SMP alternatives: 29k freed
ACPI: Core revision 20081204
ACPI: setting ELCR to 0200 (from 0a00)
Setting APIC routing to flat
CPU0: AMD QEMU Virtual CPU version 0.10.2 stepping 03

And after that qemu stalls.

-- 
With best wishes
Dmitry


^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-15 11:53                       ` Dmitry Eremin-Solenikov
  2009-04-15 11:58                         ` Dmitry Eremin-Solenikov
@ 2009-04-15 12:01                         ` Gleb Natapov
  2009-04-15 12:02                           ` Dmitry Eremin-Solenikov
  2009-04-15 12:03                         ` Jan Kiszka
  2 siblings, 1 reply; 95+ messages in thread
From: Gleb Natapov @ 2009-04-15 12:01 UTC (permalink / raw)
  To: Dmitry Eremin-Solenikov; +Cc: Jan Kiszka, kvm

On Wed, Apr 15, 2009 at 03:53:40PM +0400, Dmitry Eremin-Solenikov wrote:
> Jan Kiszka пишет:
>> Dmitry Eremin-Solenikov wrote:
>>> Gleb Natapov wrote:
>>>> On Wed, Apr 15, 2009 at 01:30:29PM +0400, Dmitry Eremin-Solenikov wrote:
>>>>> qemu-x86_64 version 0.10.2 running on i386
>>>>> Due to problems with qemu-x86_64 I have to boot the 'host' kernel
>>>>> with 'noapic'.
>>>> Do you mean boot 'guest' kernel with noapic? The guest is what runs
>>>> inside qemu. So you are able to boot guest with 'noapic'?
>>>>
>>>> What is the command line you are using.
>>> Well, since this caused lot's of questions, here is my setup:
>>>
>>> Main host: Debian squeeze, kernel 2.6.28 or .29 (doesn't matter),
>>> qemu-system-x86_64 version 0.10.2
>>>
>>> KVM kernel run inside qemu: e3dbe3f408a46a045012f1882e9f62b27b8a616c
>>> from Avi's tree (KVM: x86 emulator: fix call near emulation) + these
>>> patches. I have to boot the kernels (both this kernel and 2.6.26 from
>>> debian) with noapic to w/around APIC problems (I dunno if it's qemu or
>>> bochsbios problem).
>>
>> And the bios you are using with 0.10.2 is from 0.10.2 (when in doubt,
>> specify explicitly with -bios and/or -L)? Then this would be a QEMU
>> upstream bug.
>
> Indeed, there seem to be problems with upstream qemu bios. I was using
> the image from the debian's bochsbios package. I asked qemu to use the  
> bios from 0.10.2 release and got slightly different messages. Attached
> the kernel log
>
Now it seems to be a problem with KVM bios. KVM will not work with
upstream bochs or qemu bios only with its own version.

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-15 12:01                         ` Gleb Natapov
@ 2009-04-15 12:02                           ` Dmitry Eremin-Solenikov
  0 siblings, 0 replies; 95+ messages in thread
From: Dmitry Eremin-Solenikov @ 2009-04-15 12:02 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Jan Kiszka, kvm

Gleb Natapov пишет:
> On Wed, Apr 15, 2009 at 03:53:40PM +0400, Dmitry Eremin-Solenikov wrote:
>> Jan Kiszka пишет:
>>> Dmitry Eremin-Solenikov wrote:
>>>> Gleb Natapov wrote:
>>>>> On Wed, Apr 15, 2009 at 01:30:29PM +0400, Dmitry Eremin-Solenikov wrote:
>>>>>> qemu-x86_64 version 0.10.2 running on i386
>>>>>> Due to problems with qemu-x86_64 I have to boot the 'host' kernel
>>>>>> with 'noapic'.
>>>>> Do you mean boot 'guest' kernel with noapic? The guest is what runs
>>>>> inside qemu. So you are able to boot guest with 'noapic'?
>>>>>
>>>>> What is the command line you are using.
>>>> Well, since this caused lot's of questions, here is my setup:
>>>>
>>>> Main host: Debian squeeze, kernel 2.6.28 or .29 (doesn't matter),
>>>> qemu-system-x86_64 version 0.10.2
>>>>
>>>> KVM kernel run inside qemu: e3dbe3f408a46a045012f1882e9f62b27b8a616c
>>>> from Avi's tree (KVM: x86 emulator: fix call near emulation) + these
>>>> patches. I have to boot the kernels (both this kernel and 2.6.26 from
>>>> debian) with noapic to w/around APIC problems (I dunno if it's qemu or
>>>> bochsbios problem).
>>> And the bios you are using with 0.10.2 is from 0.10.2 (when in doubt,
>>> specify explicitly with -bios and/or -L)? Then this would be a QEMU
>>> upstream bug.
>> Indeed, there seem to be problems with upstream qemu bios. I was using
>> the image from the debian's bochsbios package. I asked qemu to use the  
>> bios from 0.10.2 release and got slightly different messages. Attached
>> the kernel log
>>
> Now it seems to be a problem with KVM bios. KVM will not work with
> upstream bochs or qemu bios only with its own version.

I was talking about qemu-system_x86-64, not about KVM.

-- 
With best wishes
Dmitry


^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-15 11:53                       ` Dmitry Eremin-Solenikov
  2009-04-15 11:58                         ` Dmitry Eremin-Solenikov
  2009-04-15 12:01                         ` Gleb Natapov
@ 2009-04-15 12:03                         ` Jan Kiszka
  2009-04-15 12:39                           ` Dmitry Eremin-Solenikov
  2 siblings, 1 reply; 95+ messages in thread
From: Jan Kiszka @ 2009-04-15 12:03 UTC (permalink / raw)
  To: Dmitry Eremin-Solenikov; +Cc: Gleb Natapov, kvm

Dmitry Eremin-Solenikov wrote:
> Jan Kiszka пишет:
>> Dmitry Eremin-Solenikov wrote:
>>> Gleb Natapov wrote:
>>>> On Wed, Apr 15, 2009 at 01:30:29PM +0400, Dmitry Eremin-Solenikov
>>>> wrote:
>>>>> qemu-x86_64 version 0.10.2 running on i386
>>>>> Due to problems with qemu-x86_64 I have to boot the 'host' kernel
>>>>> with 'noapic'.
>>>> Do you mean boot 'guest' kernel with noapic? The guest is what runs
>>>> inside qemu. So you are able to boot guest with 'noapic'?
>>>>
>>>> What is the command line you are using.
>>> Well, since this caused lot's of questions, here is my setup:
>>>
>>> Main host: Debian squeeze, kernel 2.6.28 or .29 (doesn't matter),
>>> qemu-system-x86_64 version 0.10.2
>>>
>>> KVM kernel run inside qemu: e3dbe3f408a46a045012f1882e9f62b27b8a616c
>>> from Avi's tree (KVM: x86 emulator: fix call near emulation) + these
>>> patches. I have to boot the kernels (both this kernel and 2.6.26 from
>>> debian) with noapic to w/around APIC problems (I dunno if it's qemu or
>>> bochsbios problem).
>>
>> And the bios you are using with 0.10.2 is from 0.10.2 (when in doubt,
>> specify explicitly with -bios and/or -L)? Then this would be a QEMU
>> upstream bug.
> 
> Indeed, there seem to be problems with upstream qemu bios. I was using
> the image from the debian's bochsbios package.

Bochsbios is typically lacking some patches qemu needs, therefore that
bios patch queue in qemu.

> I asked qemu to use the
> bios from 0.10.2 release and got slightly different messages. Attached
> the kernel log
> 

...

> init IO_APIC IRQs
>  1-0 (apicid-pin) not connected
> IOAPIC[0]: Set routing entry (1-1 -> 0x31 -> IRQ 1 Mode:0 Active:0)
> IOAPIC[0]: Set routing entry (1-2 -> 0x30 -> IRQ 0 Mode:0 Active:0)
> IOAPIC[0]: Set routing entry (1-3 -> 0x33 -> IRQ 3 Mode:0 Active:0)
> IOAPIC[0]: Set routing entry (1-4 -> 0x34 -> IRQ 4 Mode:0 Active:0)
> IOAPIC[0]: Set routing entry (1-5 -> 0x35 -> IRQ 5 Mode:0 Active:0)
> IOAPIC[0]: Set routing entry (1-6 -> 0x36 -> IRQ 6 Mode:0 Active:0)
> IOAPIC[0]: Set routing entry (1-7 -> 0x37 -> IRQ 7 Mode:0 Active:0)
> IOAPIC[0]: Set routing entry (1-8 -> 0x38 -> IRQ 8 Mode:0 Active:0)
> IOAPIC[0]: Set routing entry (1-9 -> 0x39 -> IRQ 9 Mode:1 Active:1)
> IOAPIC[0]: Set routing entry (1-10 -> 0x3a -> IRQ 10 Mode:0 Active:0)
> IOAPIC[0]: Set routing entry (1-11 -> 0x3b -> IRQ 11 Mode:0 Active:0)
> IOAPIC[0]: Set routing entry (1-12 -> 0x3c -> IRQ 12 Mode:0 Active:0)
> IOAPIC[0]: Set routing entry (1-13 -> 0x3d -> IRQ 13 Mode:0 Active:0)
> IOAPIC[0]: Set routing entry (1-14 -> 0x3e -> IRQ 14 Mode:0 Active:0)
> IOAPIC[0]: Set routing entry (1-15 -> 0x3f -> IRQ 15 Mode:0 Active:0)
>  1-16 1-17 1-18 1-19 1-20 1-21 1-22 1-23 (apicid-pin) not connected
> ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1
> ..MP-BIOS bug: 8254 timer not connected to IO-APIC
> ...trying to set up timer (IRQ0) through the 8259A ...
> ..... (found apic 0 pin 2) ...
> ....... failed.
> ...trying to set up timer as Virtual Wire IRQ...
> ..... failed.
> ...trying to set up timer as ExtINT IRQ...
> ..... failed  :( .
> Kernel panic - not syncing: IO-APIC + timer doesn't work!  Boot with apic=debug and send a report.  Then try booting with the 'noapic' option.

This looks a bit like [1, 2] on first glance...

Jan

[1] http://permalink.gmane.org/gmane.comp.emulators.qemu/41300
[2] http://permalink.gmane.org/gmane.comp.emulators.qemu/41433

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-15 12:03                         ` Jan Kiszka
@ 2009-04-15 12:39                           ` Dmitry Eremin-Solenikov
  2009-04-15 12:48                             ` Jan Kiszka
  0 siblings, 1 reply; 95+ messages in thread
From: Dmitry Eremin-Solenikov @ 2009-04-15 12:39 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Gleb Natapov, kvm

Jan Kiszka пишет:
> Dmitry Eremin-Solenikov wrote:
>> Jan Kiszka пишет:
>>> Dmitry Eremin-Solenikov wrote:
>>>> Gleb Natapov wrote:
>>>>> On Wed, Apr 15, 2009 at 01:30:29PM +0400, Dmitry Eremin-Solenikov
>>>>> wrote:
>>>>>> qemu-x86_64 version 0.10.2 running on i386
>>>>>> Due to problems with qemu-x86_64 I have to boot the 'host' kernel
>>>>>> with 'noapic'.
>>>>> Do you mean boot 'guest' kernel with noapic? The guest is what runs
>>>>> inside qemu. So you are able to boot guest with 'noapic'?
>>>>>
>>>>> What is the command line you are using.
>>>> Well, since this caused lot's of questions, here is my setup:
>>>>
>>>> Main host: Debian squeeze, kernel 2.6.28 or .29 (doesn't matter),
>>>> qemu-system-x86_64 version 0.10.2
>>>>
>>>> KVM kernel run inside qemu: e3dbe3f408a46a045012f1882e9f62b27b8a616c
>>>> from Avi's tree (KVM: x86 emulator: fix call near emulation) + these
>>>> patches. I have to boot the kernels (both this kernel and 2.6.26 from
>>>> debian) with noapic to w/around APIC problems (I dunno if it's qemu or
>>>> bochsbios problem).
>>> And the bios you are using with 0.10.2 is from 0.10.2 (when in doubt,
>>> specify explicitly with -bios and/or -L)? Then this would be a QEMU
>>> upstream bug.
>> Indeed, there seem to be problems with upstream qemu bios. I was using
>> the image from the debian's bochsbios package.
> 
> Bochsbios is typically lacking some patches qemu needs, therefore that
> bios patch queue in qemu.

Debian's bochsbios provides two bios versions: one for bochs and one 
patched with qemu (maybe not the latest patches though)

>> I asked qemu to use the
>> bios from 0.10.2 release and got slightly different messages. Attached
>> the kernel log
>>
> 
> ...
> 
>> init IO_APIC IRQs
>>  1-0 (apicid-pin) not connected
>> IOAPIC[0]: Set routing entry (1-1 -> 0x31 -> IRQ 1 Mode:0 Active:0)
>> IOAPIC[0]: Set routing entry (1-2 -> 0x30 -> IRQ 0 Mode:0 Active:0)
>> IOAPIC[0]: Set routing entry (1-3 -> 0x33 -> IRQ 3 Mode:0 Active:0)
>> IOAPIC[0]: Set routing entry (1-4 -> 0x34 -> IRQ 4 Mode:0 Active:0)
>> IOAPIC[0]: Set routing entry (1-5 -> 0x35 -> IRQ 5 Mode:0 Active:0)
>> IOAPIC[0]: Set routing entry (1-6 -> 0x36 -> IRQ 6 Mode:0 Active:0)
>> IOAPIC[0]: Set routing entry (1-7 -> 0x37 -> IRQ 7 Mode:0 Active:0)
>> IOAPIC[0]: Set routing entry (1-8 -> 0x38 -> IRQ 8 Mode:0 Active:0)
>> IOAPIC[0]: Set routing entry (1-9 -> 0x39 -> IRQ 9 Mode:1 Active:1)
>> IOAPIC[0]: Set routing entry (1-10 -> 0x3a -> IRQ 10 Mode:0 Active:0)
>> IOAPIC[0]: Set routing entry (1-11 -> 0x3b -> IRQ 11 Mode:0 Active:0)
>> IOAPIC[0]: Set routing entry (1-12 -> 0x3c -> IRQ 12 Mode:0 Active:0)
>> IOAPIC[0]: Set routing entry (1-13 -> 0x3d -> IRQ 13 Mode:0 Active:0)
>> IOAPIC[0]: Set routing entry (1-14 -> 0x3e -> IRQ 14 Mode:0 Active:0)
>> IOAPIC[0]: Set routing entry (1-15 -> 0x3f -> IRQ 15 Mode:0 Active:0)
>>  1-16 1-17 1-18 1-19 1-20 1-21 1-22 1-23 (apicid-pin) not connected
>> ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1
>> ..MP-BIOS bug: 8254 timer not connected to IO-APIC
>> ...trying to set up timer (IRQ0) through the 8259A ...
>> ..... (found apic 0 pin 2) ...
>> ....... failed.
>> ...trying to set up timer as Virtual Wire IRQ...
>> ..... failed.
>> ...trying to set up timer as ExtINT IRQ...
>> ..... failed  :( .
>> Kernel panic - not syncing: IO-APIC + timer doesn't work!  Boot with apic=debug and send a report.  Then try booting with the 'noapic' option.
> 
> This looks a bit like [1, 2] on first glance...
> 
> Jan
> 
> [1] http://permalink.gmane.org/gmane.comp.emulators.qemu/41300
> [2] http://permalink.gmane.org/gmane.comp.emulators.qemu/41433

Looks like a part of this changes. However I don't quite understand: 
these patches should address non-ACPI OS, but linux is surely and ACPI os!

-- 
With best wishes
Dmitry


^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-15 12:39                           ` Dmitry Eremin-Solenikov
@ 2009-04-15 12:48                             ` Jan Kiszka
  0 siblings, 0 replies; 95+ messages in thread
From: Jan Kiszka @ 2009-04-15 12:48 UTC (permalink / raw)
  To: Dmitry Eremin-Solenikov; +Cc: Gleb Natapov, kvm

Dmitry Eremin-Solenikov wrote:
> Jan Kiszka пишет:
>> Dmitry Eremin-Solenikov wrote:
>>> Jan Kiszka пишет:
>>>> Dmitry Eremin-Solenikov wrote:
>>>>> Gleb Natapov wrote:
>>>>>> On Wed, Apr 15, 2009 at 01:30:29PM +0400, Dmitry Eremin-Solenikov
>>>>>> wrote:
>>>>>>> qemu-x86_64 version 0.10.2 running on i386
>>>>>>> Due to problems with qemu-x86_64 I have to boot the 'host' kernel
>>>>>>> with 'noapic'.
>>>>>> Do you mean boot 'guest' kernel with noapic? The guest is what runs
>>>>>> inside qemu. So you are able to boot guest with 'noapic'?
>>>>>>
>>>>>> What is the command line you are using.
>>>>> Well, since this caused lot's of questions, here is my setup:
>>>>>
>>>>> Main host: Debian squeeze, kernel 2.6.28 or .29 (doesn't matter),
>>>>> qemu-system-x86_64 version 0.10.2
>>>>>
>>>>> KVM kernel run inside qemu: e3dbe3f408a46a045012f1882e9f62b27b8a616c
>>>>> from Avi's tree (KVM: x86 emulator: fix call near emulation) + these
>>>>> patches. I have to boot the kernels (both this kernel and 2.6.26 from
>>>>> debian) with noapic to w/around APIC problems (I dunno if it's qemu or
>>>>> bochsbios problem).
>>>> And the bios you are using with 0.10.2 is from 0.10.2 (when in doubt,
>>>> specify explicitly with -bios and/or -L)? Then this would be a QEMU
>>>> upstream bug.
>>> Indeed, there seem to be problems with upstream qemu bios. I was using
>>> the image from the debian's bochsbios package.
>>
>> Bochsbios is typically lacking some patches qemu needs, therefore that
>> bios patch queue in qemu.
> 
> Debian's bochsbios provides two bios versions: one for bochs and one
> patched with qemu (maybe not the latest patches though)
> 
>>> I asked qemu to use the
>>> bios from 0.10.2 release and got slightly different messages. Attached
>>> the kernel log
>>>
>>
>> ...
>>
>>> init IO_APIC IRQs
>>>  1-0 (apicid-pin) not connected
>>> IOAPIC[0]: Set routing entry (1-1 -> 0x31 -> IRQ 1 Mode:0 Active:0)
>>> IOAPIC[0]: Set routing entry (1-2 -> 0x30 -> IRQ 0 Mode:0 Active:0)
>>> IOAPIC[0]: Set routing entry (1-3 -> 0x33 -> IRQ 3 Mode:0 Active:0)
>>> IOAPIC[0]: Set routing entry (1-4 -> 0x34 -> IRQ 4 Mode:0 Active:0)
>>> IOAPIC[0]: Set routing entry (1-5 -> 0x35 -> IRQ 5 Mode:0 Active:0)
>>> IOAPIC[0]: Set routing entry (1-6 -> 0x36 -> IRQ 6 Mode:0 Active:0)
>>> IOAPIC[0]: Set routing entry (1-7 -> 0x37 -> IRQ 7 Mode:0 Active:0)
>>> IOAPIC[0]: Set routing entry (1-8 -> 0x38 -> IRQ 8 Mode:0 Active:0)
>>> IOAPIC[0]: Set routing entry (1-9 -> 0x39 -> IRQ 9 Mode:1 Active:1)
>>> IOAPIC[0]: Set routing entry (1-10 -> 0x3a -> IRQ 10 Mode:0 Active:0)
>>> IOAPIC[0]: Set routing entry (1-11 -> 0x3b -> IRQ 11 Mode:0 Active:0)
>>> IOAPIC[0]: Set routing entry (1-12 -> 0x3c -> IRQ 12 Mode:0 Active:0)
>>> IOAPIC[0]: Set routing entry (1-13 -> 0x3d -> IRQ 13 Mode:0 Active:0)
>>> IOAPIC[0]: Set routing entry (1-14 -> 0x3e -> IRQ 14 Mode:0 Active:0)
>>> IOAPIC[0]: Set routing entry (1-15 -> 0x3f -> IRQ 15 Mode:0 Active:0)
>>>  1-16 1-17 1-18 1-19 1-20 1-21 1-22 1-23 (apicid-pin) not connected
>>> ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1
>>> ..MP-BIOS bug: 8254 timer not connected to IO-APIC
>>> ...trying to set up timer (IRQ0) through the 8259A ...
>>> ..... (found apic 0 pin 2) ...
>>> ....... failed.
>>> ...trying to set up timer as Virtual Wire IRQ...
>>> ..... failed.
>>> ...trying to set up timer as ExtINT IRQ...
>>> ..... failed  :( .
>>> Kernel panic - not syncing: IO-APIC + timer doesn't work!  Boot with
>>> apic=debug and send a report.  Then try booting with the 'noapic'
>>> option.
>>
>> This looks a bit like [1, 2] on first glance...
>>
>> Jan
>>
>> [1] http://permalink.gmane.org/gmane.comp.emulators.qemu/41300
>> [2] http://permalink.gmane.org/gmane.comp.emulators.qemu/41433
> 
> Looks like a part of this changes.

You mean the kernel boots for you now?

> However I don't quite understand:
> these patches should address non-ACPI OS, but linux is surely and ACPI os!

That's what I also do not understand ATM. I've once seen the above error
as well, but with a !CONFIG_ACPI kernel. However, I'd suggest to move
the issue (if it still exists) to qemu-devel until we reach KVM
specifics again.

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-13  9:55 ` [PATCH 13/15] Add NMI injection support to SVM Gleb Natapov
@ 2009-04-17 11:59   ` Jan Kiszka
  2009-04-17 15:12   ` Dmitry Eremin-Solenikov
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 95+ messages in thread
From: Jan Kiszka @ 2009-04-17 11:59 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: avi, kvm, joerg.roedel, sheng, Dmitry Eremin-Solenikov

Gleb Natapov wrote:
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h |    1 +
>  arch/x86/kvm/svm.c              |   49 +++++++++++++++++++++++++++++++++++++-
>  2 files changed, 48 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 8b6f6e9..057a612 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -766,6 +766,7 @@ enum {
>  #define HF_GIF_MASK		(1 << 0)
>  #define HF_HIF_MASK		(1 << 1)
>  #define HF_VINTR_MASK		(1 << 2)
> +#define HF_NMI_MASK		(1 << 3)
>  
>  /*
>   * Hardware virtualization extension instructions may fault if a
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index c605477..cd60fd7 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -1834,6 +1834,13 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>  	return 1;
>  }
>  
> +static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
> +{
> +	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
> +	svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
> +	return 0;
> +}
> +
>  static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>  {
>  	if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE)
> @@ -2111,6 +2118,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
>  	[SVM_EXIT_VINTR]			= interrupt_window_interception,
>  	/* [SVM_EXIT_CR0_SEL_WRITE]		= emulate_on_interception, */
>  	[SVM_EXIT_CPUID]			= cpuid_interception,
> +	[SVM_EXIT_IRET]                         = iret_interception,
>  	[SVM_EXIT_INVD]                         = emulate_on_interception,
>  	[SVM_EXIT_HLT]				= halt_interception,
>  	[SVM_EXIT_INVLPG]			= invlpg_interception,
> @@ -2218,6 +2226,11 @@ static void pre_svm_run(struct vcpu_svm *svm)
>  		new_asid(svm, svm_data);
>  }
>  
> +static void svm_inject_nmi(struct vcpu_svm *svm)
> +{
> +	svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
> +	svm->vcpu.arch.hflags |= HF_NMI_MASK;
> +}
>  
>  static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
>  {
> @@ -2269,6 +2282,14 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
>  		vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
>  }
>  
> +static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
> +{
> +	struct vcpu_svm *svm = to_svm(vcpu);
> +	struct vmcb *vmcb = svm->vmcb;
> +	return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
> +		!(svm->vcpu.arch.hflags & HF_NMI_MASK);
> +}
> +
>  static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
> @@ -2284,16 +2305,37 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
>  	svm_inject_irq(to_svm(vcpu), 0x0);
>  }
>  
> +static void enable_nmi_window(struct kvm_vcpu *vcpu)
> +{
> +	struct vcpu_svm *svm = to_svm(vcpu);
> +
> +	if (svm->vcpu.arch.hflags & HF_NMI_MASK)
> +		svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
> +	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
> +		enable_irq_window(vcpu);
> +}
> +
>  static void svm_intr_inject(struct kvm_vcpu *vcpu)
>  {
>  	/* try to reinject previous events if any */
> +	if (vcpu->arch.nmi_injected) {
> +		svm_inject_nmi(to_svm(vcpu));
> +		return;
> +	}
> +
>  	if (vcpu->arch.interrupt.pending) {
>  		svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
>  		return;
>  	}
>  
>  	/* try to inject new event if pending */
> -	if (kvm_cpu_has_interrupt(vcpu)) {
> +	if (vcpu->arch.nmi_pending) {
> +		if (svm_nmi_allowed(vcpu)) {
> +			vcpu->arch.nmi_pending = false;
> +			vcpu->arch.nmi_injected = true;
> +			svm_inject_nmi(vcpu);
> +		}
> +	} else if (kvm_cpu_has_interrupt(vcpu)) {

Strictly spoken, this 'else' is incorrect: If we have an NMI pending
while the NMI window is closed _but_ the guest decided to open the IRQ
window, there is no reason why we shouldn't inject an IRQ. Only if we
actually injected an NMI, pending IRQs should be skipped for this run.

>  		if (svm_interrupt_allowed(vcpu)) {
>  			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
>  			svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
> @@ -2312,7 +2354,10 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  
>  	svm_intr_inject(vcpu);
>  
> -	if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
> +	/* enable NMI/IRQ window open exits if needed */
> +	if (vcpu->arch.nmi_pending)
> +		enable_nmi_window(vcpu);
> +	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
>  		enable_irq_window(vcpu);
>  
>  out:

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-13  9:55 ` [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic Gleb Natapov
  2009-04-14 14:14   ` Dmitry Eremin-Solenikov
@ 2009-04-17 12:39   ` Jan Kiszka
  2009-04-17 12:50     ` Jan Kiszka
  2009-04-17 14:13   ` Dmitry Eremin-Solenikov
  2009-04-18  9:05   ` Jan Kiszka
  3 siblings, 1 reply; 95+ messages in thread
From: Jan Kiszka @ 2009-04-17 12:39 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: avi, kvm, joerg.roedel, sheng

Gleb Natapov wrote:
> Start to use interrupt/exception queues like VMX does.
> This also fix the bug that if exit was caused by a guest
> internal exception access to IDT the exception was not
> reinjected.
> 
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/x86/kvm/svm.c |  176 ++++++++++++++++++++++------------------------------
>  1 files changed, 75 insertions(+), 101 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 52c41aa..053370d 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -70,7 +70,6 @@ module_param(npt, int, S_IRUGO);
>  static int nested = 0;
>  module_param(nested, int, S_IRUGO);
>  
> -static void kvm_reput_irq(struct vcpu_svm *svm);
>  static void svm_flush_tlb(struct kvm_vcpu *vcpu);
>  
>  static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
> @@ -199,9 +198,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
>  
>  static bool svm_exception_injected(struct kvm_vcpu *vcpu)
>  {
> -	struct vcpu_svm *svm = to_svm(vcpu);
> -
> -	return !(svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID);
> +	return false;
>  }
>  
>  static int is_external_interrupt(u32 info)
> @@ -976,12 +973,9 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
>  
>  static int svm_get_irq(struct kvm_vcpu *vcpu)
>  {
> -	struct vcpu_svm *svm = to_svm(vcpu);
> -	u32 exit_int_info = svm->vmcb->control.exit_int_info;
> -
> -	if (is_external_interrupt(exit_int_info))
> -		return exit_int_info & SVM_EVTINJ_VEC_MASK;
> -	return -1;
> +	if (!vcpu->arch.interrupt.pending)
> +		return -1;
> +	return vcpu->arch.interrupt.nr;
>  }
>  
>  static void load_host_msrs(struct kvm_vcpu *vcpu)
> @@ -1088,17 +1082,8 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
>  
>  static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>  {
> -	u32 exit_int_info = svm->vmcb->control.exit_int_info;
> -	struct kvm *kvm = svm->vcpu.kvm;
>  	u64 fault_address;
>  	u32 error_code;
> -	bool event_injection = false;
> -
> -	if (!irqchip_in_kernel(kvm) &&
> -	    is_external_interrupt(exit_int_info)) {
> -		event_injection = true;
> -		kvm_push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
> -	}
>  
>  	fault_address  = svm->vmcb->control.exit_info_2;
>  	error_code = svm->vmcb->control.exit_info_1;
> @@ -1118,9 +1103,11 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>  	 */
>  	if (npt_enabled)
>  		svm_flush_tlb(&svm->vcpu);
> -
> -	if (!npt_enabled && event_injection)
> -		kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
> +	else {
> +		if (svm->vcpu.arch.interrupt.pending ||
> +				svm->vcpu.arch.exception.pending)
> +			kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
> +	}
>  	return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
>  }
>  
> @@ -2187,7 +2174,6 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
>  		}
>  	}
>  
> -	kvm_reput_irq(svm);
>  
>  	if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
>  		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
> @@ -2289,98 +2275,47 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
>  		(svm->vcpu.arch.hflags & HF_GIF_MASK);
>  }
>  
> -static void svm_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> +static void enable_irq_window(struct kvm_vcpu *vcpu)
>  {
> -	struct vcpu_svm *svm = to_svm(vcpu);
> -	struct vmcb *vmcb = svm->vmcb;
> -	int intr_vector = -1;
> -
> -	if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) &&
> -	    ((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) {
> -		intr_vector = vmcb->control.exit_int_info &
> -			      SVM_EVTINJ_VEC_MASK;
> -		vmcb->control.exit_int_info = 0;
> -		svm_inject_irq(svm, intr_vector);
> -		goto out;
> -	}
> -
> -	if (vmcb->control.int_ctl & V_IRQ_MASK)
> -		goto out;
> -
> -	if (!kvm_cpu_has_interrupt(vcpu))
> -		goto out;
> -
> -	if (nested_svm_intr(svm))
> -		goto out;
> -
> -	if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
> -		goto out;
> -
> -	if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
> -	    (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
> -	    (vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
> -		/* unable to deliver irq, set pending irq */
> -		svm_set_vintr(svm);
> -		svm_inject_irq(svm, 0x0);
> -		goto out;
> -	}
> -	/* Okay, we can deliver the interrupt: grab it and update PIC state. */
> -	intr_vector = kvm_cpu_get_interrupt(vcpu);
> -	svm_inject_irq(svm, intr_vector);
> -out:
> -	update_cr8_intercept(vcpu);
> +	svm_set_vintr(to_svm(vcpu));
> +	svm_inject_irq(to_svm(vcpu), 0x0);
>  }
>  
> -static void kvm_reput_irq(struct vcpu_svm *svm)
> +static void svm_intr_inject(struct kvm_vcpu *vcpu)
>  {
> -	struct vmcb_control_area *control = &svm->vmcb->control;
> -
> -	if ((control->int_ctl & V_IRQ_MASK)
> -	    && !irqchip_in_kernel(svm->vcpu.kvm)) {
> -		control->int_ctl &= ~V_IRQ_MASK;
> -		kvm_push_irq(&svm->vcpu, control->int_vector);
> +	/* try to reinject previous events if any */
> +	if (vcpu->arch.interrupt.pending) {
> +		svm_inject_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
> +		return;
>  	}
>  
> -	svm->vcpu.arch.interrupt_window_open =
> -		!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
> -		 (svm->vcpu.arch.hflags & HF_GIF_MASK);
> -}
> -
> -static void svm_do_inject_vector(struct vcpu_svm *svm)
> -{
> -	svm_inject_irq(svm, kvm_pop_irq(&svm->vcpu));
> +	/* try to inject new event if pending */
> +	if (kvm_cpu_has_interrupt(vcpu)) {
> +		if (vcpu->arch.interrupt_window_open) {
> +			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
> +			svm_inject_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
> +		}
> +	}
>  }
>  
> -static void do_interrupt_requests(struct kvm_vcpu *vcpu,
> -				       struct kvm_run *kvm_run)
> +static void svm_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
> -	struct vmcb_control_area *control = &svm->vmcb->control;
> +	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
> +		kvm_run->request_interrupt_window;
>  
>  	if (nested_svm_intr(svm))
> -		return;
> +		goto out;
>  
> -	svm->vcpu.arch.interrupt_window_open =
> -		(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
> -		 (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
> -		 (svm->vcpu.arch.hflags & HF_GIF_MASK));
> +	svm->vcpu.arch.interrupt_window_open = svm_interrupt_allowed(vcpu);
>  
> -	if (svm->vcpu.arch.interrupt_window_open &&
> -	    kvm_cpu_has_interrupt(&svm->vcpu))
> -		/*
> -		 * If interrupts enabled, and not blocked by sti or mov ss. Good.
> -		 */
> -		svm_do_inject_vector(svm);
> +	svm_intr_inject(vcpu);
>  
> -	/*
> -	 * Interrupts blocked.  Wait for unblock.
> -	 */
> -	if (!svm->vcpu.arch.interrupt_window_open &&
> -	    (kvm_cpu_has_interrupt(&svm->vcpu) ||
> -	     kvm_run->request_interrupt_window))
> -		svm_set_vintr(svm);
> -	else
> -		svm_clear_vintr(svm);
> +	if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
> +		enable_irq_window(vcpu);
> +
> +out:
> +	update_cr8_intercept(vcpu);
>  }
>  
>  static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
> @@ -2420,6 +2355,43 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
>  	svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
>  }
>  
> +static void svm_complete_interrupts(struct vcpu_svm *svm)
> +{
> +	u8 vector;
> +	int type;
> +	u32 exitintinfo = svm->vmcb->control.exit_int_info;
> +
> +	svm->vcpu.arch.nmi_injected = false;
> +	kvm_clear_exception_queue(&svm->vcpu);
> +	kvm_clear_interrupt_queue(&svm->vcpu);
> +
> +	if (!(exitintinfo & SVM_EXITINTINFO_VALID))
> +		return;
> +
> +	vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
> +	type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
> +
> +	switch (type) {
> +	case SVM_EXITINTINFO_TYPE_NMI:
> +		svm->vcpu.arch.nmi_injected = true;
> +		break;
> +	case SVM_EXITINTINFO_TYPE_EXEPT:
> +		if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
> +			u32 err = svm->vmcb->control.exit_int_info_err;
> +			kvm_queue_exception_e(&svm->vcpu, vector, err);
> +					

stgit remarked "trailing whitespace". checkpatch.pl would probably do
so, too. :)

And there is another one in patch 14.

> +		} else
> +			kvm_queue_exception(&svm->vcpu, vector);
> +		break;
> +	case SVM_EXITINTINFO_TYPE_SOFT:
> +	case SVM_EXITINTINFO_TYPE_INTR:
> +		kvm_queue_interrupt(&svm->vcpu, vector);
> +		break;
> +	default:
> +		break;
> +	}
> +}
> +
>  #ifdef CONFIG_X86_64
>  #define R "r"
>  #else
> @@ -2548,6 +2520,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  	sync_cr8_to_lapic(vcpu);
>  
>  	svm->next_rip = 0;
> +
> +	svm_complete_interrupts(svm);
>  }
>  
>  #undef R
> @@ -2669,7 +2643,7 @@ static struct kvm_x86_ops svm_x86_ops = {
>  	.queue_exception = svm_queue_exception,
>  	.exception_injected = svm_exception_injected,
>  	.inject_pending_irq = svm_intr_assist,
> -	.inject_pending_vectors = do_interrupt_requests,
> +	.inject_pending_vectors = svm_intr_assist,
>  	.interrupt_allowed = svm_interrupt_allowed,
>  
>  	.set_tss_addr = svm_set_tss_addr,

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-17 12:39   ` Jan Kiszka
@ 2009-04-17 12:50     ` Jan Kiszka
  0 siblings, 0 replies; 95+ messages in thread
From: Jan Kiszka @ 2009-04-17 12:50 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: avi, kvm, joerg.roedel, sheng

Jan Kiszka wrote:
> Gleb Natapov wrote:
>> +	switch (type) {
>> +	case SVM_EXITINTINFO_TYPE_NMI:
>> +		svm->vcpu.arch.nmi_injected = true;
>> +		break;
>> +	case SVM_EXITINTINFO_TYPE_EXEPT:
>> +		if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
>> +			u32 err = svm->vmcb->control.exit_int_info_err;
>> +			kvm_queue_exception_e(&svm->vcpu, vector, err);
>> +					
> 
> stgit remarked "trailing whitespace". checkpatch.pl would probably do
> so, too. :)
> 
> And there is another one in patch 14.

Sorry, the latter one was due to /me using the old version.

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-13  9:55 ` [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic Gleb Natapov
  2009-04-14 14:14   ` Dmitry Eremin-Solenikov
  2009-04-17 12:39   ` Jan Kiszka
@ 2009-04-17 14:13   ` Dmitry Eremin-Solenikov
  2009-04-18  9:16     ` Jan Kiszka
  2009-04-18  9:05   ` Jan Kiszka
  3 siblings, 1 reply; 95+ messages in thread
From: Dmitry Eremin-Solenikov @ 2009-04-17 14:13 UTC (permalink / raw)
  To: kvm

Gleb Natapov wrote:

> Start to use interrupt/exception queues like VMX does. This also fix the
> bug that if exit was caused by a guest internal exception access to IDT
> the exception was not reinjected.
> 


OK. On real SVM HW this seems to work. However now i'm stumbled upon another
problem wrt. NMI. See another mail.


-- 
With best wishes
Dmitry



^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-13  9:55 ` [PATCH 13/15] Add NMI injection support to SVM Gleb Natapov
  2009-04-17 11:59   ` Jan Kiszka
@ 2009-04-17 15:12   ` Dmitry Eremin-Solenikov
  2009-04-19 13:11     ` Gleb Natapov
  2009-04-17 19:13   ` Dmitry Eremin-Solenikov
  2009-04-17 19:55   ` Jan Kiszka
  3 siblings, 1 reply; 95+ messages in thread
From: Dmitry Eremin-Solenikov @ 2009-04-17 15:12 UTC (permalink / raw)
  To: kvm


This patch does expose some problems on real HW. The first NMI completes w/o
problems. However If I try to boot the kernel w/ nmi_watchdog=1 or to trigger
two NMIs from the monitor, kernel is stuck somewhere.

-- 
With best wishes
Dmitry



^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-13  9:55 ` [PATCH 13/15] Add NMI injection support to SVM Gleb Natapov
  2009-04-17 11:59   ` Jan Kiszka
  2009-04-17 15:12   ` Dmitry Eremin-Solenikov
@ 2009-04-17 19:13   ` Dmitry Eremin-Solenikov
  2009-04-17 19:53     ` Jan Kiszka
  2009-04-17 19:55   ` Jan Kiszka
  3 siblings, 1 reply; 95+ messages in thread
From: Dmitry Eremin-Solenikov @ 2009-04-17 19:13 UTC (permalink / raw)
  To: kvm-owner; +Cc: avi, kvm, joerg.roedel, sheng, Gleb Natapov

[-- Attachment #1: Type: text/plain, Size: 301 bytes --]

On Mon, Apr 13, 2009 at 12:55:43PM +0300, kvm-owner@vger.kernel.org wrote:
> 
> Signed-off-by: Gleb Natapov <gleb@redhat.com>

The attached patch if applied on the top of the serie fixes the NMI issue on
SVM. I did not refactor it on the top of this patch though, sorry.

-- 
With best wishes
Dmitry


[-- Attachment #2: 0001-KVM-correct-NMI-injection-logic-wrt-NMI-window-trac.patch --]
[-- Type: text/x-diff, Size: 1092 bytes --]

>From 26d7e88c84089abbe871286d54e77ff2922dc33d Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Fri, 17 Apr 2009 22:53:50 +0400
Subject: [PATCH] KVM: correct NMI injection logic wrt NMI window tracking

inject_pending_irq() calls inject_irq() which disables nmi_pending flag
if the nmi was injected. Thus for tracking we should use nmi_injected
flag. This al fin fixes NMI injection on SVM.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
---
 arch/x86/kvm/x86.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e4cc717..eeed350 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3160,7 +3160,7 @@ static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	inject_irq(vcpu);
 
 	/* enable NMI/IRQ window open exits if needed */
-	if (vcpu->arch.nmi_pending)
+	if (vcpu->arch.nmi_injected)
 		kvm_x86_ops->enable_nmi_window(vcpu);
 	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
 		kvm_x86_ops->enable_irq_window(vcpu);
-- 
1.6.2.1


^ permalink raw reply related	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-17 19:13   ` Dmitry Eremin-Solenikov
@ 2009-04-17 19:53     ` Jan Kiszka
  2009-04-18  9:08       ` Jan Kiszka
  0 siblings, 1 reply; 95+ messages in thread
From: Jan Kiszka @ 2009-04-17 19:53 UTC (permalink / raw)
  To: Dmitry Eremin-Solenikov
  Cc: kvm-owner, avi, kvm, joerg.roedel, sheng, Gleb Natapov

[-- Attachment #1: Type: text/plain, Size: 2256 bytes --]

Dmitry Eremin-Solenikov wrote:
> On Mon, Apr 13, 2009 at 12:55:43PM +0300, kvm-owner@vger.kernel.org wrote:
>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> 
> The attached patch if applied on the top of the serie fixes the NMI issue on
> SVM. I did not refactor it on the top of this patch though, sorry.
> 
> 
> From 26d7e88c84089abbe871286d54e77ff2922dc33d Mon Sep 17 00:00:00 2001
> From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
> Date: Fri, 17 Apr 2009 22:53:50 +0400
> Subject: [PATCH] KVM: correct NMI injection logic wrt NMI window tracking
> 
> inject_pending_irq() calls inject_irq() which disables nmi_pending flag
> if the nmi was injected. Thus for tracking we should use nmi_injected
> flag. This al fin fixes NMI injection on SVM.
> 
> Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
> ---
>  arch/x86/kvm/x86.c |    2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index e4cc717..eeed350 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3160,7 +3160,7 @@ static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  	inject_irq(vcpu);
>  
>  	/* enable NMI/IRQ window open exits if needed */
> -	if (vcpu->arch.nmi_pending)
> +	if (vcpu->arch.nmi_injected)
>  		kvm_x86_ops->enable_nmi_window(vcpu);
>  	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
>  		kvm_x86_ops->enable_irq_window(vcpu);

Hmm, good to know that it works better now, but I'm afraid this papers
over an issue in svm (and will break other cases). The logic here is: We
injected something (IRQ or NMI), and if there is more pending, _then_
enable the corresponding window. The check you changed should actually
only fire if we (re-)injected an IRQ for this round, and now there is
also an NMI pending.

My feeling is that the real issue is in svm which probably fails to open
the NMI window on NMI injection. In contrast to latest Intel CPUs, we
have to do this unconditionally on AMD (no virtual NMI mask). And as
this is so, svm has to take care that this is done on injection, not
here via the generic code. What about setting INTERCEPT_IRET
additionally in svm_inject_nmi?

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-13  9:55 ` [PATCH 13/15] Add NMI injection support to SVM Gleb Natapov
                     ` (2 preceding siblings ...)
  2009-04-17 19:13   ` Dmitry Eremin-Solenikov
@ 2009-04-17 19:55   ` Jan Kiszka
  2009-04-19  8:57     ` Avi Kivity
  2009-04-19 13:17     ` Gleb Natapov
  3 siblings, 2 replies; 95+ messages in thread
From: Jan Kiszka @ 2009-04-17 19:55 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: avi, kvm, joerg.roedel, sheng, Dmitry Baryshkov

[-- Attachment #1: Type: text/plain, Size: 1840 bytes --]

Gleb Natapov wrote:
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h |    1 +
>  arch/x86/kvm/svm.c              |   49 +++++++++++++++++++++++++++++++++++++-
>  2 files changed, 48 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 8b6f6e9..057a612 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -766,6 +766,7 @@ enum {
>  #define HF_GIF_MASK		(1 << 0)
>  #define HF_HIF_MASK		(1 << 1)
>  #define HF_VINTR_MASK		(1 << 2)
> +#define HF_NMI_MASK		(1 << 3)
>  
>  /*
>   * Hardware virtualization extension instructions may fault if a
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index c605477..cd60fd7 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -1834,6 +1834,13 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>  	return 1;
>  }
>  
> +static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
> +{
> +	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
> +	svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
> +	return 0;
> +}

First, this must return 1 (or set an exit reason, but there is no reason
to escape to user space here). And second, I think a corner case is not
handled the same way as on real iron: If there is already the next NMI
waiting, we will inject it before iret, not after its execution as it
should be.

No easy solution for this yet. Maybe emulating iret, but there is no
implementation, specifically for protected mode. Maybe setting a
breakpoint. Or maybe enforcing a single step exception. Nothing trivial
in this list. On the other hand, this may only be a slight imprecision
of the virtualization. Need to think about it.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-13  9:55 ` [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic Gleb Natapov
                     ` (2 preceding siblings ...)
  2009-04-17 14:13   ` Dmitry Eremin-Solenikov
@ 2009-04-18  9:05   ` Jan Kiszka
  2009-04-18 16:20     ` Gleb Natapov
  2009-04-19  8:52     ` Avi Kivity
  3 siblings, 2 replies; 95+ messages in thread
From: Jan Kiszka @ 2009-04-18  9:05 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: avi, kvm, joerg.roedel, sheng, Dmitry Baryshkov

[-- Attachment #1: Type: text/plain, Size: 3164 bytes --]

Gleb Natapov wrote:
> Start to use interrupt/exception queues like VMX does.
> This also fix the bug that if exit was caused by a guest
> internal exception access to IDT the exception was not
> reinjected.
> 
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/x86/kvm/svm.c |  176 ++++++++++++++++++++++------------------------------
>  1 files changed, 75 insertions(+), 101 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 52c41aa..053370d 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -70,7 +70,6 @@ module_param(npt, int, S_IRUGO);
>  static int nested = 0;
>  module_param(nested, int, S_IRUGO);
>  
> -static void kvm_reput_irq(struct vcpu_svm *svm);
>  static void svm_flush_tlb(struct kvm_vcpu *vcpu);
>  
>  static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
> @@ -199,9 +198,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
>  
>  static bool svm_exception_injected(struct kvm_vcpu *vcpu)
>  {
> -	struct vcpu_svm *svm = to_svm(vcpu);
> -
> -	return !(svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID);
> +	return false;
>  }
>  
>  static int is_external_interrupt(u32 info)
> @@ -976,12 +973,9 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
>  
>  static int svm_get_irq(struct kvm_vcpu *vcpu)
>  {
> -	struct vcpu_svm *svm = to_svm(vcpu);
> -	u32 exit_int_info = svm->vmcb->control.exit_int_info;
> -
> -	if (is_external_interrupt(exit_int_info))
> -		return exit_int_info & SVM_EVTINJ_VEC_MASK;
> -	return -1;
> +	if (!vcpu->arch.interrupt.pending)
> +		return -1;
> +	return vcpu->arch.interrupt.nr;
>  }
>  
>  static void load_host_msrs(struct kvm_vcpu *vcpu)
> @@ -1088,17 +1082,8 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
>  
>  static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>  {
> -	u32 exit_int_info = svm->vmcb->control.exit_int_info;
> -	struct kvm *kvm = svm->vcpu.kvm;
>  	u64 fault_address;
>  	u32 error_code;
> -	bool event_injection = false;
> -
> -	if (!irqchip_in_kernel(kvm) &&
> -	    is_external_interrupt(exit_int_info)) {
> -		event_injection = true;
> -		kvm_push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
> -	}
>  
>  	fault_address  = svm->vmcb->control.exit_info_2;
>  	error_code = svm->vmcb->control.exit_info_1;
> @@ -1118,9 +1103,11 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>  	 */
>  	if (npt_enabled)
>  		svm_flush_tlb(&svm->vcpu);
> -
> -	if (!npt_enabled && event_injection)
> -		kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
> +	else {
> +		if (svm->vcpu.arch.interrupt.pending ||
> +				svm->vcpu.arch.exception.pending)
> +			kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
> +	}

Without understanding yet why kvm_mmu_unprotect_page_virt is required
here, this looks like it is lacking '|| svm->vcpu.arch.nmi_injected'.
Interrupts and exceptions are re-queued on fault-during-injection,
therefore they are now pending again, right?

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-17 19:53     ` Jan Kiszka
@ 2009-04-18  9:08       ` Jan Kiszka
  0 siblings, 0 replies; 95+ messages in thread
From: Jan Kiszka @ 2009-04-18  9:08 UTC (permalink / raw)
  To: Dmitry Eremin-Solenikov
  Cc: kvm-owner, avi, kvm, joerg.roedel, sheng, Gleb Natapov

[-- Attachment #1: Type: text/plain, Size: 3137 bytes --]

Jan Kiszka wrote:
> Dmitry Eremin-Solenikov wrote:
>> On Mon, Apr 13, 2009 at 12:55:43PM +0300, kvm-owner@vger.kernel.org wrote:
>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>> The attached patch if applied on the top of the serie fixes the NMI issue on
>> SVM. I did not refactor it on the top of this patch though, sorry.
>>
>>
>> From 26d7e88c84089abbe871286d54e77ff2922dc33d Mon Sep 17 00:00:00 2001
>> From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
>> Date: Fri, 17 Apr 2009 22:53:50 +0400
>> Subject: [PATCH] KVM: correct NMI injection logic wrt NMI window tracking
>>
>> inject_pending_irq() calls inject_irq() which disables nmi_pending flag
>> if the nmi was injected. Thus for tracking we should use nmi_injected
>> flag. This al fin fixes NMI injection on SVM.
>>
>> Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
>> ---
>>  arch/x86/kvm/x86.c |    2 +-
>>  1 files changed, 1 insertions(+), 1 deletions(-)
>>
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index e4cc717..eeed350 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -3160,7 +3160,7 @@ static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>>  	inject_irq(vcpu);
>>  
>>  	/* enable NMI/IRQ window open exits if needed */
>> -	if (vcpu->arch.nmi_pending)
>> +	if (vcpu->arch.nmi_injected)
>>  		kvm_x86_ops->enable_nmi_window(vcpu);
>>  	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
>>  		kvm_x86_ops->enable_irq_window(vcpu);
> 
> Hmm, good to know that it works better now, but I'm afraid this papers
> over an issue in svm (and will break other cases). The logic here is: We
> injected something (IRQ or NMI), and if there is more pending, _then_
> enable the corresponding window. The check you changed should actually
> only fire if we (re-)injected an IRQ for this round, and now there is
> also an NMI pending.
> 
> My feeling is that the real issue is in svm which probably fails to open
> the NMI window on NMI injection. In contrast to latest Intel CPUs, we
> have to do this unconditionally on AMD (no virtual NMI mask). And as
> this is so, svm has to take care that this is done on injection, not
> here via the generic code. What about setting INTERCEPT_IRET
> additionally in svm_inject_nmi?
> 

Yep, this also allows to inject >1 NMI here:

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index af61744..79b9d8b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1831,7 +1831,7 @@ static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
 	svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
-	return 0;
+	return 1;
 }
 
 static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
@@ -2232,6 +2232,7 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
+	svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
 	svm->vcpu.arch.hflags |= HF_NMI_MASK;
 }
 

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt   injection logic.
  2009-04-17 14:13   ` Dmitry Eremin-Solenikov
@ 2009-04-18  9:16     ` Jan Kiszka
  2009-04-18 16:28       ` Gleb Natapov
  0 siblings, 1 reply; 95+ messages in thread
From: Jan Kiszka @ 2009-04-18  9:16 UTC (permalink / raw)
  To: Dmitry Eremin-Solenikov; +Cc: kvm, Gleb Natapov, Avi Kivity, Joerg Roedel

[-- Attachment #1: Type: text/plain, Size: 951 bytes --]

Dmitry Eremin-Solenikov wrote:
> Gleb Natapov wrote:
> 
>> Start to use interrupt/exception queues like VMX does. This also fix the
>> bug that if exit was caused by a guest internal exception access to IDT
>> the exception was not reinjected.
>>
> 
> 
> OK. On real SVM HW this seems to work. However now i'm stumbled upon another
> problem wrt. NMI. See another mail.
> 

I can confirm that this series (probably this patch) breaks the
following scenario:

qemu-system-x86_64 (qemu trunk, emulation mode)
    linux (kvm.git) with kvm-amd + kvm-userspace
        linux (the same kvm.git)

The second-level guest hangs in the mid of its boot, maybe no longer
receiving some or any interrupt.

So this patch may either expose a bug in the svm emulation of qemu or
comes with a subtle regression that only triggers due to qemu's timing.
This needs to be understood. Gleb, any progress on reproducing it on
your side?

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-18  9:05   ` Jan Kiszka
@ 2009-04-18 16:20     ` Gleb Natapov
  2009-04-19  8:52     ` Avi Kivity
  1 sibling, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-18 16:20 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: avi, kvm, joerg.roedel, sheng, Dmitry Baryshkov

On Sat, Apr 18, 2009 at 11:05:10AM +0200, Jan Kiszka wrote:
> Gleb Natapov wrote:
> > Start to use interrupt/exception queues like VMX does.
> > This also fix the bug that if exit was caused by a guest
> > internal exception access to IDT the exception was not
> > reinjected.
> > 
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> >  arch/x86/kvm/svm.c |  176 ++++++++++++++++++++++------------------------------
> >  1 files changed, 75 insertions(+), 101 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index 52c41aa..053370d 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -70,7 +70,6 @@ module_param(npt, int, S_IRUGO);
> >  static int nested = 0;
> >  module_param(nested, int, S_IRUGO);
> >  
> > -static void kvm_reput_irq(struct vcpu_svm *svm);
> >  static void svm_flush_tlb(struct kvm_vcpu *vcpu);
> >  
> >  static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
> > @@ -199,9 +198,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
> >  
> >  static bool svm_exception_injected(struct kvm_vcpu *vcpu)
> >  {
> > -	struct vcpu_svm *svm = to_svm(vcpu);
> > -
> > -	return !(svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID);
> > +	return false;
> >  }
> >  
> >  static int is_external_interrupt(u32 info)
> > @@ -976,12 +973,9 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
> >  
> >  static int svm_get_irq(struct kvm_vcpu *vcpu)
> >  {
> > -	struct vcpu_svm *svm = to_svm(vcpu);
> > -	u32 exit_int_info = svm->vmcb->control.exit_int_info;
> > -
> > -	if (is_external_interrupt(exit_int_info))
> > -		return exit_int_info & SVM_EVTINJ_VEC_MASK;
> > -	return -1;
> > +	if (!vcpu->arch.interrupt.pending)
> > +		return -1;
> > +	return vcpu->arch.interrupt.nr;
> >  }
> >  
> >  static void load_host_msrs(struct kvm_vcpu *vcpu)
> > @@ -1088,17 +1082,8 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
> >  
> >  static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
> >  {
> > -	u32 exit_int_info = svm->vmcb->control.exit_int_info;
> > -	struct kvm *kvm = svm->vcpu.kvm;
> >  	u64 fault_address;
> >  	u32 error_code;
> > -	bool event_injection = false;
> > -
> > -	if (!irqchip_in_kernel(kvm) &&
> > -	    is_external_interrupt(exit_int_info)) {
> > -		event_injection = true;
> > -		kvm_push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
> > -	}
> >  
> >  	fault_address  = svm->vmcb->control.exit_info_2;
> >  	error_code = svm->vmcb->control.exit_info_1;
> > @@ -1118,9 +1103,11 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
> >  	 */
> >  	if (npt_enabled)
> >  		svm_flush_tlb(&svm->vcpu);
> > -
> > -	if (!npt_enabled && event_injection)
> > -		kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
> > +	else {
> > +		if (svm->vcpu.arch.interrupt.pending ||
> > +				svm->vcpu.arch.exception.pending)
> > +			kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
> > +	}
> 
> Without understanding yet why kvm_mmu_unprotect_page_virt is required
> here, this looks like it is lacking '|| svm->vcpu.arch.nmi_injected'.
> Interrupts and exceptions are re-queued on fault-during-injection,
> therefore they are now pending again, right?
> 
Yes right, and we discussed this with Avi already. I'll send another
patch after this series will be applied (exactly the same issue exists
for VMX btw).

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-18  9:16     ` Jan Kiszka
@ 2009-04-18 16:28       ` Gleb Natapov
  2009-04-19 13:57         ` Gleb Natapov
  0 siblings, 1 reply; 95+ messages in thread
From: Gleb Natapov @ 2009-04-18 16:28 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Dmitry Eremin-Solenikov, kvm, Avi Kivity, Joerg Roedel

On Sat, Apr 18, 2009 at 11:16:47AM +0200, Jan Kiszka wrote:
> Dmitry Eremin-Solenikov wrote:
> > Gleb Natapov wrote:
> > 
> >> Start to use interrupt/exception queues like VMX does. This also fix the
> >> bug that if exit was caused by a guest internal exception access to IDT
> >> the exception was not reinjected.
> >>
> > 
> > 
> > OK. On real SVM HW this seems to work. However now i'm stumbled upon another
> > problem wrt. NMI. See another mail.
> > 
> 
> I can confirm that this series (probably this patch) breaks the
> following scenario:
> 
> qemu-system-x86_64 (qemu trunk, emulation mode)
>     linux (kvm.git) with kvm-amd + kvm-userspace
>         linux (the same kvm.git)
> 
> The second-level guest hangs in the mid of its boot, maybe no longer
> receiving some or any interrupt.
> 
> So this patch may either expose a bug in the svm emulation of qemu or
> comes with a subtle regression that only triggers due to qemu's timing.
> This needs to be understood. Gleb, any progress on reproducing it on
> your side?
> 
I reproduced it and I am debugging it. In my case the boot hangs on sti;hlt
sequence. Instrumentation thus far shows that at this point interrupts no longer
injected because ppr value is too big. Need to see why, but tpr handling
is not complete in qemu svm. May be this is the reason. Will know more
tomorrow.

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-18  9:05   ` Jan Kiszka
  2009-04-18 16:20     ` Gleb Natapov
@ 2009-04-19  8:52     ` Avi Kivity
  1 sibling, 0 replies; 95+ messages in thread
From: Avi Kivity @ 2009-04-19  8:52 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Gleb Natapov, kvm, joerg.roedel, sheng, Dmitry Baryshkov

Jan Kiszka wrote:
>> -
>> -	if (!npt_enabled && event_injection)
>> -		kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
>> +	else {
>> +		if (svm->vcpu.arch.interrupt.pending ||
>> +				svm->vcpu.arch.exception.pending)
>> +			kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
>> +	}
>>     
>
> Without understanding yet why kvm_mmu_unprotect_page_virt is required
> here,

That is needed in case interrupt injection failed because the process of 
injecting the interrupt (usually pushing data on the stack) writes to a 
write-protected page table.

I guess we need nmi_pending here as well.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-17 19:55   ` Jan Kiszka
@ 2009-04-19  8:57     ` Avi Kivity
  2009-04-19  9:12       ` Jan Kiszka
  2009-04-19 13:17     ` Gleb Natapov
  1 sibling, 1 reply; 95+ messages in thread
From: Avi Kivity @ 2009-04-19  8:57 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Gleb Natapov, kvm, joerg.roedel, sheng, Dmitry Baryshkov

Jan Kiszka wrote:
> First, this must return 1 (or set an exit reason, but there is no reason
> to escape to user space here). And second, I think a corner case is not
> handled the same way as on real iron: If there is already the next NMI
> waiting, we will inject it before iret, not after its execution as it
> should be.
>   

Yes, good catch.

> No easy solution for this yet. Maybe emulating iret, but there is no
> implementation, specifically for protected mode. 

That will be a disaster, IRET is horribly complicated.

> Maybe setting a
> breakpoint. Or maybe enforcing a single step exception. 

Single step looks good, except that it may conflict with a guest 
debugging itself (guest debugging an NMI handler?!).

> Nothing trivial
> in this list. On the other hand, this may only be a slight imprecision
> of the virtualization. Need to think about it.
>   

It may cause a stack overflow if we have a huge stream of NMIs (if an 
NMI triggers an NMI in the handler).  Of course that's a totally 
unrealistic scenario.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19  8:57     ` Avi Kivity
@ 2009-04-19  9:12       ` Jan Kiszka
  0 siblings, 0 replies; 95+ messages in thread
From: Jan Kiszka @ 2009-04-19  9:12 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Gleb Natapov, kvm, joerg.roedel, sheng, Dmitry Baryshkov

[-- Attachment #1: Type: text/plain, Size: 1764 bytes --]

Avi Kivity wrote:
> Jan Kiszka wrote:
>> First, this must return 1 (or set an exit reason, but there is no reason
>> to escape to user space here). And second, I think a corner case is not
>> handled the same way as on real iron: If there is already the next NMI
>> waiting, we will inject it before iret, not after its execution as it
>> should be.
>>   
> 
> Yes, good catch.
> 
>> No easy solution for this yet. Maybe emulating iret, but there is no
>> implementation, specifically for protected mode. 
> 
> That will be a disaster, IRET is horribly complicated.

Yeah, I know...

> 
>> Maybe setting a
>> breakpoint. Or maybe enforcing a single step exception. 
> 
> Single step looks good, except that it may conflict with a guest
> debugging itself (guest debugging an NMI handler?!).

But that should be solvable without too much effort.

BTW, guest single-stepping in and out of interrupt handlers is not
properly working anyway. We only set TF in current eflags but do not
bother about the CPU state that will get loaded next. Given some rainy
days (or a paying customer) I think I'll look into this once again. Same
goes for suppressing IRQ injection while single-stepping just as QEMU
does, which may even come earlier as someone already asked for it.

> 
>> Nothing trivial
>> in this list. On the other hand, this may only be a slight imprecision
>> of the virtualization. Need to think about it.
>>   
> 
> It may cause a stack overflow if we have a huge stream of NMIs (if an
> NMI triggers an NMI in the handler).  Of course that's a totally
> unrealistic scenario.
> 

Good point. But as it is a corner case, I think we can fly without a
complete solution first, fixing it in a second step.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-17 15:12   ` Dmitry Eremin-Solenikov
@ 2009-04-19 13:11     ` Gleb Natapov
  2009-04-20 12:08       ` Dmitry Eremin-Solenikov
  2009-04-20 15:50       ` Jan Kiszka
  0 siblings, 2 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-19 13:11 UTC (permalink / raw)
  To: Dmitry Eremin-Solenikov; +Cc: kvm

On Fri, Apr 17, 2009 at 03:12:57PM +0000, Dmitry Eremin-Solenikov wrote:
> 
> This patch does expose some problems on real HW. The first NMI completes w/o
> problems. However If I try to boot the kernel w/ nmi_watchdog=1 or to trigger
> two NMIs from the monitor, kernel is stuck somewhere.
> 
Can you try this patch instead patch13:


diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8b6f6e9..057a612 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -766,6 +766,7 @@ enum {
 #define HF_GIF_MASK		(1 << 0)
 #define HF_HIF_MASK		(1 << 1)
 #define HF_VINTR_MASK		(1 << 2)
+#define HF_NMI_MASK		(1 << 3)
 
 /*
  * Hardware virtualization extension instructions may fault if a
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c605477..0a2b3f1 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1834,6 +1834,13 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	return 1;
 }
 
+static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
+	svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
+	return 1;
+}
+
 static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE)
@@ -2111,6 +2118,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 	[SVM_EXIT_VINTR]			= interrupt_window_interception,
 	/* [SVM_EXIT_CR0_SEL_WRITE]		= emulate_on_interception, */
 	[SVM_EXIT_CPUID]			= cpuid_interception,
+	[SVM_EXIT_IRET]                         = iret_interception,
 	[SVM_EXIT_INVD]                         = emulate_on_interception,
 	[SVM_EXIT_HLT]				= halt_interception,
 	[SVM_EXIT_INVLPG]			= invlpg_interception,
@@ -2218,6 +2226,12 @@ static void pre_svm_run(struct vcpu_svm *svm)
 		new_asid(svm, svm_data);
 }
 
+static void svm_inject_nmi(struct vcpu_svm *svm)
+{
+	svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
+	svm->vcpu.arch.hflags |= HF_NMI_MASK;
+	svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
+}
 
 static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
 {
@@ -2269,6 +2283,14 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 		vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
 }
 
+static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	struct vmcb *vmcb = svm->vmcb;
+	return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
+		!(svm->vcpu.arch.hflags & HF_NMI_MASK);
+}
+
 static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -2284,16 +2306,35 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
 	svm_inject_irq(to_svm(vcpu), 0x0);
 }
 
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
+		enable_irq_window(vcpu);
+}
+
 static void svm_intr_inject(struct kvm_vcpu *vcpu)
 {
 	/* try to reinject previous events if any */
+	if (vcpu->arch.nmi_injected) {
+		svm_inject_nmi(to_svm(vcpu));
+		return;
+	}
+
 	if (vcpu->arch.interrupt.pending) {
 		svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
 		return;
 	}
 
 	/* try to inject new event if pending */
-	if (kvm_cpu_has_interrupt(vcpu)) {
+	if (vcpu->arch.nmi_pending) {
+		if (svm_nmi_allowed(vcpu)) {
+			vcpu->arch.nmi_pending = false;
+			vcpu->arch.nmi_injected = true;
+			svm_inject_nmi(vcpu);
+		}
+	} else if (kvm_cpu_has_interrupt(vcpu)) {
 		if (svm_interrupt_allowed(vcpu)) {
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
 			svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
@@ -2312,7 +2353,10 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	svm_intr_inject(vcpu);
 
-	if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+	/* enable NMI/IRQ window open exits if needed */
+	if (vcpu->arch.nmi_pending)
+		enable_nmi_window(vcpu);
+	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
 		enable_irq_window(vcpu);
 
 out:
--
			Gleb.

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-17 19:55   ` Jan Kiszka
  2009-04-19  8:57     ` Avi Kivity
@ 2009-04-19 13:17     ` Gleb Natapov
  2009-04-19 13:21       ` Avi Kivity
  2009-04-19 13:27       ` Jan Kiszka
  1 sibling, 2 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-19 13:17 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: avi, kvm, joerg.roedel, sheng, Dmitry Baryshkov

On Fri, Apr 17, 2009 at 09:55:45PM +0200, Jan Kiszka wrote:
> Gleb Natapov wrote:
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> >  arch/x86/include/asm/kvm_host.h |    1 +
> >  arch/x86/kvm/svm.c              |   49 +++++++++++++++++++++++++++++++++++++-
> >  2 files changed, 48 insertions(+), 2 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 8b6f6e9..057a612 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -766,6 +766,7 @@ enum {
> >  #define HF_GIF_MASK		(1 << 0)
> >  #define HF_HIF_MASK		(1 << 1)
> >  #define HF_VINTR_MASK		(1 << 2)
> > +#define HF_NMI_MASK		(1 << 3)
> >  
> >  /*
> >   * Hardware virtualization extension instructions may fault if a
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index c605477..cd60fd7 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -1834,6 +1834,13 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
> >  	return 1;
> >  }
> >  
> > +static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
> > +{
> > +	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
> > +	svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
> > +	return 0;
> > +}
> 
> First, this must return 1 (or set an exit reason, but there is no reason
> to escape to user space here). And second, I think a corner case is not
> handled the same way as on real iron: If there is already the next NMI
> waiting, we will inject it before iret, not after its execution as it
> should be.
> 
> No easy solution for this yet. Maybe emulating iret, but there is no
> implementation, specifically for protected mode. Maybe setting a
> breakpoint. Or maybe enforcing a single step exception. Nothing trivial
> in this list. On the other hand, this may only be a slight imprecision
> of the virtualization. Need to think about it.
> 
What about this:
Instead of clearing HF_NMI_MASK in iret_interception() we can set
another flag (HF_IRET) and on guest entry clear HF_NMI_MASK (and
HF_IRET) if HF_IRET is set, but do that after checking for NMI
injection. The pending NMI will be injected on the next entry.
Also not how real HW works, but may be better then current situation.


--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 13:17     ` Gleb Natapov
@ 2009-04-19 13:21       ` Avi Kivity
  2009-04-19 13:24         ` Gleb Natapov
  2009-04-19 13:27       ` Jan Kiszka
  1 sibling, 1 reply; 95+ messages in thread
From: Avi Kivity @ 2009-04-19 13:21 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Jan Kiszka, kvm, joerg.roedel, sheng, Dmitry Baryshkov

Gleb Natapov wrote:
> On Fri, Apr 17, 2009 at 09:55:45PM +0200, Jan Kiszka wrote:
>   
>> Gleb Natapov wrote:
>>     
>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>>> ---
>>>  arch/x86/include/asm/kvm_host.h |    1 +
>>>  arch/x86/kvm/svm.c              |   49 +++++++++++++++++++++++++++++++++++++-
>>>  2 files changed, 48 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>> index 8b6f6e9..057a612 100644
>>> --- a/arch/x86/include/asm/kvm_host.h
>>> +++ b/arch/x86/include/asm/kvm_host.h
>>> @@ -766,6 +766,7 @@ enum {
>>>  #define HF_GIF_MASK		(1 << 0)
>>>  #define HF_HIF_MASK		(1 << 1)
>>>  #define HF_VINTR_MASK		(1 << 2)
>>> +#define HF_NMI_MASK		(1 << 3)
>>>  
>>>  /*
>>>   * Hardware virtualization extension instructions may fault if a
>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>> index c605477..cd60fd7 100644
>>> --- a/arch/x86/kvm/svm.c
>>> +++ b/arch/x86/kvm/svm.c
>>> @@ -1834,6 +1834,13 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>>>  	return 1;
>>>  }
>>>  
>>> +static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>>> +{
>>> +	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
>>> +	svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
>>> +	return 0;
>>> +}
>>>       
>> First, this must return 1 (or set an exit reason, but there is no reason
>> to escape to user space here). And second, I think a corner case is not
>> handled the same way as on real iron: If there is already the next NMI
>> waiting, we will inject it before iret, not after its execution as it
>> should be.
>>
>> No easy solution for this yet. Maybe emulating iret, but there is no
>> implementation, specifically for protected mode. Maybe setting a
>> breakpoint. Or maybe enforcing a single step exception. Nothing trivial
>> in this list. On the other hand, this may only be a slight imprecision
>> of the virtualization. Need to think about it.
>>
>>     
> What about this:
> Instead of clearing HF_NMI_MASK in iret_interception() we can set
> another flag (HF_IRET) and on guest entry clear HF_NMI_MASK (and
> HF_IRET) if HF_IRET is set, but do that after checking for NMI
> injection. The pending NMI will be injected on the next entry.
> Also not how real HW works, but may be better then current situation.
>   

There may not be a next entry if the guest is in a tight loop.  Given 
NMIs are used for watchdogs, that's not good.

btw, injection before IRET is executed is broken if interrupt stack 
tables are used, since the injection will reset rsp instead of nesting.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 13:21       ` Avi Kivity
@ 2009-04-19 13:24         ` Gleb Natapov
  2009-04-19 13:28           ` Avi Kivity
  0 siblings, 1 reply; 95+ messages in thread
From: Gleb Natapov @ 2009-04-19 13:24 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Jan Kiszka, kvm, joerg.roedel, sheng, Dmitry Baryshkov

On Sun, Apr 19, 2009 at 04:21:29PM +0300, Avi Kivity wrote:
> Gleb Natapov wrote:
>> On Fri, Apr 17, 2009 at 09:55:45PM +0200, Jan Kiszka wrote:
>>   
>>> Gleb Natapov wrote:
>>>     
>>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>>>> ---
>>>>  arch/x86/include/asm/kvm_host.h |    1 +
>>>>  arch/x86/kvm/svm.c              |   49 +++++++++++++++++++++++++++++++++++++-
>>>>  2 files changed, 48 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>>> index 8b6f6e9..057a612 100644
>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>> @@ -766,6 +766,7 @@ enum {
>>>>  #define HF_GIF_MASK		(1 << 0)
>>>>  #define HF_HIF_MASK		(1 << 1)
>>>>  #define HF_VINTR_MASK		(1 << 2)
>>>> +#define HF_NMI_MASK		(1 << 3)
>>>>   /*
>>>>   * Hardware virtualization extension instructions may fault if a
>>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>>> index c605477..cd60fd7 100644
>>>> --- a/arch/x86/kvm/svm.c
>>>> +++ b/arch/x86/kvm/svm.c
>>>> @@ -1834,6 +1834,13 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>>>>  	return 1;
>>>>  }
>>>>  +static int iret_interception(struct vcpu_svm *svm, struct kvm_run 
>>>> *kvm_run)
>>>> +{
>>>> +	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
>>>> +	svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
>>>> +	return 0;
>>>> +}
>>>>       
>>> First, this must return 1 (or set an exit reason, but there is no reason
>>> to escape to user space here). And second, I think a corner case is not
>>> handled the same way as on real iron: If there is already the next NMI
>>> waiting, we will inject it before iret, not after its execution as it
>>> should be.
>>>
>>> No easy solution for this yet. Maybe emulating iret, but there is no
>>> implementation, specifically for protected mode. Maybe setting a
>>> breakpoint. Or maybe enforcing a single step exception. Nothing trivial
>>> in this list. On the other hand, this may only be a slight imprecision
>>> of the virtualization. Need to think about it.
>>>
>>>     
>> What about this:
>> Instead of clearing HF_NMI_MASK in iret_interception() we can set
>> another flag (HF_IRET) and on guest entry clear HF_NMI_MASK (and
>> HF_IRET) if HF_IRET is set, but do that after checking for NMI
>> injection. The pending NMI will be injected on the next entry.
>> Also not how real HW works, but may be better then current situation.
>>   
>
> There may not be a next entry if the guest is in a tight loop.  Given  
> NMIs are used for watchdogs, that's not good.
>
We don't exit a guest after kvm time slice ends?

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 13:17     ` Gleb Natapov
  2009-04-19 13:21       ` Avi Kivity
@ 2009-04-19 13:27       ` Jan Kiszka
  2009-04-19 13:32         ` Gleb Natapov
  1 sibling, 1 reply; 95+ messages in thread
From: Jan Kiszka @ 2009-04-19 13:27 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: avi, kvm, joerg.roedel, sheng, Dmitry Baryshkov

[-- Attachment #1: Type: text/plain, Size: 2866 bytes --]

Gleb Natapov wrote:
> On Fri, Apr 17, 2009 at 09:55:45PM +0200, Jan Kiszka wrote:
>> Gleb Natapov wrote:
>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>>> ---
>>>  arch/x86/include/asm/kvm_host.h |    1 +
>>>  arch/x86/kvm/svm.c              |   49 +++++++++++++++++++++++++++++++++++++-
>>>  2 files changed, 48 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>> index 8b6f6e9..057a612 100644
>>> --- a/arch/x86/include/asm/kvm_host.h
>>> +++ b/arch/x86/include/asm/kvm_host.h
>>> @@ -766,6 +766,7 @@ enum {
>>>  #define HF_GIF_MASK		(1 << 0)
>>>  #define HF_HIF_MASK		(1 << 1)
>>>  #define HF_VINTR_MASK		(1 << 2)
>>> +#define HF_NMI_MASK		(1 << 3)
>>>  
>>>  /*
>>>   * Hardware virtualization extension instructions may fault if a
>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>> index c605477..cd60fd7 100644
>>> --- a/arch/x86/kvm/svm.c
>>> +++ b/arch/x86/kvm/svm.c
>>> @@ -1834,6 +1834,13 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>>>  	return 1;
>>>  }
>>>  
>>> +static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>>> +{
>>> +	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
>>> +	svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
>>> +	return 0;
>>> +}
>> First, this must return 1 (or set an exit reason, but there is no reason
>> to escape to user space here). And second, I think a corner case is not
>> handled the same way as on real iron: If there is already the next NMI
>> waiting, we will inject it before iret, not after its execution as it
>> should be.
>>
>> No easy solution for this yet. Maybe emulating iret, but there is no
>> implementation, specifically for protected mode. Maybe setting a
>> breakpoint. Or maybe enforcing a single step exception. Nothing trivial
>> in this list. On the other hand, this may only be a slight imprecision
>> of the virtualization. Need to think about it.
>>
> What about this:
> Instead of clearing HF_NMI_MASK in iret_interception() we can set
> another flag (HF_IRET) and on guest entry clear HF_NMI_MASK (and
> HF_IRET) if HF_IRET is set, but do that after checking for NMI
> injection. The pending NMI will be injected on the next entry.
> Also not how real HW works, but may be better then current situation.

It's OK as a first step towards correct NMI emulation. Additionally, you
could enable the IRQ window interception in case the is an NMI pending.
The resulting behavior should then much like the VNMI mask emulation for
vmx.

The next step should then be setting TF in the eflags stored on the
guest's stack before returning *if* there is already the next NMI
pending. But I wonder how much additional effort this will actually mean
(compared to the band-aid work)... :)

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 13:24         ` Gleb Natapov
@ 2009-04-19 13:28           ` Avi Kivity
  2009-04-19 13:40             ` Gleb Natapov
  0 siblings, 1 reply; 95+ messages in thread
From: Avi Kivity @ 2009-04-19 13:28 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Jan Kiszka, kvm, joerg.roedel, sheng, Dmitry Baryshkov

Gleb Natapov wrote:
>> There may not be a next entry if the guest is in a tight loop.  Given  
>> NMIs are used for watchdogs, that's not good.
>>
>>     
> We don't exit a guest after kvm time slice ends?
>   

There are no time slices any more.  If there's only once thread for a 
vcpu, you might have no exits at all with a tickless kernel.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 13:27       ` Jan Kiszka
@ 2009-04-19 13:32         ` Gleb Natapov
  2009-04-19 13:40           ` Jan Kiszka
  2009-04-19 13:40           ` Avi Kivity
  0 siblings, 2 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-19 13:32 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: avi, kvm, joerg.roedel, sheng, Dmitry Baryshkov

On Sun, Apr 19, 2009 at 03:27:22PM +0200, Jan Kiszka wrote:
> Gleb Natapov wrote:
> > On Fri, Apr 17, 2009 at 09:55:45PM +0200, Jan Kiszka wrote:
> >> Gleb Natapov wrote:
> >>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> >>> ---
> >>>  arch/x86/include/asm/kvm_host.h |    1 +
> >>>  arch/x86/kvm/svm.c              |   49 +++++++++++++++++++++++++++++++++++++-
> >>>  2 files changed, 48 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >>> index 8b6f6e9..057a612 100644
> >>> --- a/arch/x86/include/asm/kvm_host.h
> >>> +++ b/arch/x86/include/asm/kvm_host.h
> >>> @@ -766,6 +766,7 @@ enum {
> >>>  #define HF_GIF_MASK		(1 << 0)
> >>>  #define HF_HIF_MASK		(1 << 1)
> >>>  #define HF_VINTR_MASK		(1 << 2)
> >>> +#define HF_NMI_MASK		(1 << 3)
> >>>  
> >>>  /*
> >>>   * Hardware virtualization extension instructions may fault if a
> >>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> >>> index c605477..cd60fd7 100644
> >>> --- a/arch/x86/kvm/svm.c
> >>> +++ b/arch/x86/kvm/svm.c
> >>> @@ -1834,6 +1834,13 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
> >>>  	return 1;
> >>>  }
> >>>  
> >>> +static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
> >>> +{
> >>> +	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
> >>> +	svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
> >>> +	return 0;
> >>> +}
> >> First, this must return 1 (or set an exit reason, but there is no reason
> >> to escape to user space here). And second, I think a corner case is not
> >> handled the same way as on real iron: If there is already the next NMI
> >> waiting, we will inject it before iret, not after its execution as it
> >> should be.
> >>
> >> No easy solution for this yet. Maybe emulating iret, but there is no
> >> implementation, specifically for protected mode. Maybe setting a
> >> breakpoint. Or maybe enforcing a single step exception. Nothing trivial
> >> in this list. On the other hand, this may only be a slight imprecision
> >> of the virtualization. Need to think about it.
> >>
> > What about this:
> > Instead of clearing HF_NMI_MASK in iret_interception() we can set
> > another flag (HF_IRET) and on guest entry clear HF_NMI_MASK (and
> > HF_IRET) if HF_IRET is set, but do that after checking for NMI
> > injection. The pending NMI will be injected on the next entry.
> > Also not how real HW works, but may be better then current situation.
> 
> It's OK as a first step towards correct NMI emulation. Additionally, you
> could enable the IRQ window interception in case the is an NMI pending.
> The resulting behavior should then much like the VNMI mask emulation for
> vmx.
> 
Yeah, but the question is if IRQ windows is already opened will exit
happens before or after IRET.

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 13:28           ` Avi Kivity
@ 2009-04-19 13:40             ` Gleb Natapov
  2009-04-19 13:43               ` Jan Kiszka
  0 siblings, 1 reply; 95+ messages in thread
From: Gleb Natapov @ 2009-04-19 13:40 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Jan Kiszka, kvm, joerg.roedel, sheng, Dmitry Baryshkov

On Sun, Apr 19, 2009 at 04:28:09PM +0300, Avi Kivity wrote:
> Gleb Natapov wrote:
>>> There may not be a next entry if the guest is in a tight loop.  Given 
>>>  NMIs are used for watchdogs, that's not good.
>>>
>>>     
>> We don't exit a guest after kvm time slice ends?
>>   
>
> There are no time slices any more.  If there's only once thread for a  
> vcpu, you might have no exits at all with a tickless kernel.
>
Well, KVM may request some kind of even (timer) that will cause exit to
VCPU. This looks hacky though.

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 13:32         ` Gleb Natapov
@ 2009-04-19 13:40           ` Jan Kiszka
  2009-04-19 13:40           ` Avi Kivity
  1 sibling, 0 replies; 95+ messages in thread
From: Jan Kiszka @ 2009-04-19 13:40 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: avi, kvm, joerg.roedel, sheng, Dmitry Baryshkov

[-- Attachment #1: Type: text/plain, Size: 2983 bytes --]

Gleb Natapov wrote:
> On Sun, Apr 19, 2009 at 03:27:22PM +0200, Jan Kiszka wrote:
>> Gleb Natapov wrote:
>>> On Fri, Apr 17, 2009 at 09:55:45PM +0200, Jan Kiszka wrote:
>>>> Gleb Natapov wrote:
>>>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>>>>> ---
>>>>>  arch/x86/include/asm/kvm_host.h |    1 +
>>>>>  arch/x86/kvm/svm.c              |   49 +++++++++++++++++++++++++++++++++++++-
>>>>>  2 files changed, 48 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>>>> index 8b6f6e9..057a612 100644
>>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>>> @@ -766,6 +766,7 @@ enum {
>>>>>  #define HF_GIF_MASK		(1 << 0)
>>>>>  #define HF_HIF_MASK		(1 << 1)
>>>>>  #define HF_VINTR_MASK		(1 << 2)
>>>>> +#define HF_NMI_MASK		(1 << 3)
>>>>>  
>>>>>  /*
>>>>>   * Hardware virtualization extension instructions may fault if a
>>>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>>>> index c605477..cd60fd7 100644
>>>>> --- a/arch/x86/kvm/svm.c
>>>>> +++ b/arch/x86/kvm/svm.c
>>>>> @@ -1834,6 +1834,13 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>>>>>  	return 1;
>>>>>  }
>>>>>  
>>>>> +static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>>>>> +{
>>>>> +	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
>>>>> +	svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
>>>>> +	return 0;
>>>>> +}
>>>> First, this must return 1 (or set an exit reason, but there is no reason
>>>> to escape to user space here). And second, I think a corner case is not
>>>> handled the same way as on real iron: If there is already the next NMI
>>>> waiting, we will inject it before iret, not after its execution as it
>>>> should be.
>>>>
>>>> No easy solution for this yet. Maybe emulating iret, but there is no
>>>> implementation, specifically for protected mode. Maybe setting a
>>>> breakpoint. Or maybe enforcing a single step exception. Nothing trivial
>>>> in this list. On the other hand, this may only be a slight imprecision
>>>> of the virtualization. Need to think about it.
>>>>
>>> What about this:
>>> Instead of clearing HF_NMI_MASK in iret_interception() we can set
>>> another flag (HF_IRET) and on guest entry clear HF_NMI_MASK (and
>>> HF_IRET) if HF_IRET is set, but do that after checking for NMI
>>> injection. The pending NMI will be injected on the next entry.
>>> Also not how real HW works, but may be better then current situation.
>> It's OK as a first step towards correct NMI emulation. Additionally, you
>> could enable the IRQ window interception in case the is an NMI pending.
>> The resulting behavior should then much like the VNMI mask emulation for
>> vmx.
>>
> Yeah, but the question is if IRQ windows is already opened will exit
> happens before or after IRET.
> 

Hey, this is band-aid, it won't heal all cases. ;)

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 13:32         ` Gleb Natapov
  2009-04-19 13:40           ` Jan Kiszka
@ 2009-04-19 13:40           ` Avi Kivity
  2009-04-19 13:41             ` Gleb Natapov
  1 sibling, 1 reply; 95+ messages in thread
From: Avi Kivity @ 2009-04-19 13:40 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Jan Kiszka, kvm, joerg.roedel, sheng, Dmitry Baryshkov

Gleb Natapov wrote:
>> It's OK as a first step towards correct NMI emulation. Additionally, you
>> could enable the IRQ window interception in case the is an NMI pending.
>> The resulting behavior should then much like the VNMI mask emulation for
>> vmx.
>>
>>     
> Yeah, but the question is if IRQ windows is already opened will exit
> happens before or after IRET.
>   

You mean if the NMI handler enabled interrupts?

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 13:40           ` Avi Kivity
@ 2009-04-19 13:41             ` Gleb Natapov
  2009-04-19 13:43               ` Avi Kivity
  0 siblings, 1 reply; 95+ messages in thread
From: Gleb Natapov @ 2009-04-19 13:41 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Jan Kiszka, kvm, joerg.roedel, sheng, Dmitry Baryshkov

On Sun, Apr 19, 2009 at 04:40:51PM +0300, Avi Kivity wrote:
> Gleb Natapov wrote:
>>> It's OK as a first step towards correct NMI emulation. Additionally, you
>>> could enable the IRQ window interception in case the is an NMI pending.
>>> The resulting behavior should then much like the VNMI mask emulation for
>>> vmx.
>>>
>>>     
>> Yeah, but the question is if IRQ windows is already opened will exit
>> happens before or after IRET.
>>   
>
> You mean if the NMI handler enabled interrupts?
>
Yes.

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 13:41             ` Gleb Natapov
@ 2009-04-19 13:43               ` Avi Kivity
  2009-04-19 13:44                 ` Gleb Natapov
  0 siblings, 1 reply; 95+ messages in thread
From: Avi Kivity @ 2009-04-19 13:43 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Jan Kiszka, kvm, joerg.roedel, sheng, Dmitry Baryshkov

Gleb Natapov wrote:
> On Sun, Apr 19, 2009 at 04:40:51PM +0300, Avi Kivity wrote:
>   
>> Gleb Natapov wrote:
>>     
>>>> It's OK as a first step towards correct NMI emulation. Additionally, you
>>>> could enable the IRQ window interception in case the is an NMI pending.
>>>> The resulting behavior should then much like the VNMI mask emulation for
>>>> vmx.
>>>>
>>>>     
>>>>         
>>> Yeah, but the question is if IRQ windows is already opened will exit
>>> happens before or after IRET.
>>>   
>>>       
>> You mean if the NMI handler enabled interrupts?
>>
>>     
> Yes.
>
>   

Then the guest deserves whatever it gets...

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 13:40             ` Gleb Natapov
@ 2009-04-19 13:43               ` Jan Kiszka
  2009-04-19 13:49                 ` Avi Kivity
  0 siblings, 1 reply; 95+ messages in thread
From: Jan Kiszka @ 2009-04-19 13:43 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Avi Kivity, kvm, joerg.roedel, sheng, Dmitry Baryshkov

[-- Attachment #1: Type: text/plain, Size: 862 bytes --]

Gleb Natapov wrote:
> On Sun, Apr 19, 2009 at 04:28:09PM +0300, Avi Kivity wrote:
>> Gleb Natapov wrote:
>>>> There may not be a next entry if the guest is in a tight loop.  Given 
>>>>  NMIs are used for watchdogs, that's not good.
>>>>
>>>>     
>>> We don't exit a guest after kvm time slice ends?
>>>   
>> There are no time slices any more.  If there's only once thread for a  
>> vcpu, you might have no exits at all with a tickless kernel.
>>
> Well, KVM may request some kind of even (timer) that will cause exit to
> VCPU. This looks hacky though.

We already spent to much electrons and brain cycles on possibly "much
simpler" workarounds. I think injecting and handling a single-step, even
while there is guest debugging going on or the guest itself single-steps
or both, will not be more complicated - but "more correct".

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 13:43               ` Avi Kivity
@ 2009-04-19 13:44                 ` Gleb Natapov
  2009-04-19 14:07                   ` Julian Stecklina
  0 siblings, 1 reply; 95+ messages in thread
From: Gleb Natapov @ 2009-04-19 13:44 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Jan Kiszka, kvm, joerg.roedel, sheng, Dmitry Baryshkov

On Sun, Apr 19, 2009 at 04:43:12PM +0300, Avi Kivity wrote:
> Gleb Natapov wrote:
>> On Sun, Apr 19, 2009 at 04:40:51PM +0300, Avi Kivity wrote:
>>   
>>> Gleb Natapov wrote:
>>>     
>>>>> It's OK as a first step towards correct NMI emulation. Additionally, you
>>>>> could enable the IRQ window interception in case the is an NMI pending.
>>>>> The resulting behavior should then much like the VNMI mask emulation for
>>>>> vmx.
>>>>>
>>>>>             
>>>> Yeah, but the question is if IRQ windows is already opened will exit
>>>> happens before or after IRET.
>>>>         
>>> You mean if the NMI handler enabled interrupts?
>>>
>>>     
>> Yes.
>>
>>   
>
> Then the guest deserves whatever it gets...
>
I suspect windows may do this since it uses NMI for task switching.

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 13:43               ` Jan Kiszka
@ 2009-04-19 13:49                 ` Avi Kivity
  2009-04-19 13:51                   ` Gleb Natapov
  2009-04-19 13:59                   ` Jan Kiszka
  0 siblings, 2 replies; 95+ messages in thread
From: Avi Kivity @ 2009-04-19 13:49 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Gleb Natapov, kvm, joerg.roedel, sheng, Dmitry Baryshkov

Jan Kiszka wrote:
> We already spent to much electrons and brain cycles on possibly "much
> simpler" workarounds. I think injecting and handling a single-step, even
> while there is guest debugging going on or the guest itself single-steps
> or both, will not be more complicated - but "more correct".
>   

I agree.  I'm still worried about interactions between the IRET single 
stepping code and other things which use the debug registers.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 13:49                 ` Avi Kivity
@ 2009-04-19 13:51                   ` Gleb Natapov
  2009-04-19 13:59                   ` Jan Kiszka
  1 sibling, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-19 13:51 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Jan Kiszka, kvm, joerg.roedel, sheng, Dmitry Baryshkov

On Sun, Apr 19, 2009 at 04:49:18PM +0300, Avi Kivity wrote:
> Jan Kiszka wrote:
>> We already spent to much electrons and brain cycles on possibly "much
>> simpler" workarounds. I think injecting and handling a single-step, even
>> while there is guest debugging going on or the guest itself single-steps
>> or both, will not be more complicated - but "more correct".
>>   
>
> I agree.  I'm still worried about interactions between the IRET single  
> stepping code and other things which use the debug registers.
>
I don't disagree too. Just throwing other ideas :)

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-18 16:28       ` Gleb Natapov
@ 2009-04-19 13:57         ` Gleb Natapov
  2009-04-19 14:05           ` Jan Kiszka
  0 siblings, 1 reply; 95+ messages in thread
From: Gleb Natapov @ 2009-04-19 13:57 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Dmitry Eremin-Solenikov, kvm, Avi Kivity, Joerg Roedel

On Sat, Apr 18, 2009 at 07:28:20PM +0300, Gleb Natapov wrote:
> > 
> > So this patch may either expose a bug in the svm emulation of qemu or
> > comes with a subtle regression that only triggers due to qemu's timing.
> > This needs to be understood. Gleb, any progress on reproducing it on
> > your side?
> > 
> I reproduced it and I am debugging it. In my case the boot hangs on sti;hlt
> sequence. Instrumentation thus far shows that at this point interrupts no longer
> injected because ppr value is too big. Need to see why, but tpr handling
> is not complete in qemu svm. May be this is the reason. Will know more
> tomorrow.
> 
I've looked into this and my conclusion is that if you are not going to
develop SVM in qemu don't use it just yet. QEMU doesn't handle exceptions
during event injection properly. Actually it does not handle it at all,
so if PF happens during interrupt injection interrupt is lost and, what
worse, is never acked. If interrupt was high prio it blocks all other
interrupts.

The patch below adds exception handling during event injection. Valid
flag removed from EVENTINJ only after successful injection and EVENTINJ
is copied to EXITINTINFO on exit. Can you give it a try?

And this is not the only problem I saw, but the one that caused my guest
to hang.

diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c
index be09263..9264afd 100644
--- a/target-i386/op_helper.c
+++ b/target-i386/op_helper.c
@@ -1249,6 +1249,10 @@ void do_interrupt(int intno, int is_int, int error_code,
     } else {
         do_interrupt_real(intno, is_int, error_code, next_eip);
     }
+    if (env->hflags & HF_SVMI_MASK) {
+	    uint32_t event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
+	    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
+    }
 }
 
 /* This should come from sysemu.h - if we could include it here... */
@@ -4994,7 +4998,6 @@ void helper_vmrun(int aflag, int next_eip_addend)
         uint8_t vector = event_inj & SVM_EVTINJ_VEC_MASK;
         uint16_t valid_err = event_inj & SVM_EVTINJ_VALID_ERR;
         uint32_t event_inj_err = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
-        stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
 
         qemu_log_mask(CPU_LOG_TB_IN_ASM, "Injecting(%#hx): ", valid_err);
         /* FIXME: need to implement valid_err */
@@ -5331,6 +5334,8 @@ void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1)
     cpu_x86_set_cpl(env, 0);
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_code), exit_code);
     stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_1), exit_info_1);
+    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info), ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj)));
+    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info_err), ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err)));
 
     env->hflags2 &= ~HF2_GIF_MASK;
     /* FIXME: Resets the current ASID register to zero (host ASID). */
--
			Gleb.

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 13:49                 ` Avi Kivity
  2009-04-19 13:51                   ` Gleb Natapov
@ 2009-04-19 13:59                   ` Jan Kiszka
  1 sibling, 0 replies; 95+ messages in thread
From: Jan Kiszka @ 2009-04-19 13:59 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Gleb Natapov, kvm, joerg.roedel, sheng, Dmitry Baryshkov

[-- Attachment #1: Type: text/plain, Size: 811 bytes --]

Avi Kivity wrote:
> Jan Kiszka wrote:
>> We already spent to much electrons and brain cycles on possibly "much
>> simpler" workarounds. I think injecting and handling a single-step, even
>> while there is guest debugging going on or the guest itself single-steps
>> or both, will not be more complicated - but "more correct".
>>   
> 
> I agree.  I'm still worried about interactions between the IRET single
> stepping code and other things which use the debug registers.

The interaction is inside KVM, so under our control. We may simply save
the related states, hook into the guest-debugging parts that evaluate
single step exceptions, and handle this case with highest prio,
transparently to users of lower prio. In theory - you never really know
until you tried to implement it...

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-19 13:57         ` Gleb Natapov
@ 2009-04-19 14:05           ` Jan Kiszka
  2009-04-19 14:28             ` Gleb Natapov
  2009-04-19 15:06             ` Jan Kiszka
  0 siblings, 2 replies; 95+ messages in thread
From: Jan Kiszka @ 2009-04-19 14:05 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Dmitry Eremin-Solenikov, kvm, Avi Kivity, Joerg Roedel,
	Alexander Graf, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 3815 bytes --]

Gleb Natapov wrote:
> On Sat, Apr 18, 2009 at 07:28:20PM +0300, Gleb Natapov wrote:
>>> So this patch may either expose a bug in the svm emulation of qemu or
>>> comes with a subtle regression that only triggers due to qemu's timing.
>>> This needs to be understood. Gleb, any progress on reproducing it on
>>> your side?
>>>
>> I reproduced it and I am debugging it. In my case the boot hangs on sti;hlt
>> sequence. Instrumentation thus far shows that at this point interrupts no longer
>> injected because ppr value is too big. Need to see why, but tpr handling
>> is not complete in qemu svm. May be this is the reason. Will know more
>> tomorrow.
>>
> I've looked into this and my conclusion is that if you are not going to
> develop SVM in qemu don't use it just yet.

We had a resource conflict regarding SVM capable AMD boxes and a tight
schedule, so we decided to pick qemu as initial development platform.
Turns out that this has was a bit too optimistic. :)

> QEMU doesn't handle exceptions
> during event injection properly. Actually it does not handle it at all,
> so if PF happens during interrupt injection interrupt is lost and, what
> worse, is never acked. If interrupt was high prio it blocks all other
> interrupts.
> 
> The patch below adds exception handling during event injection. Valid
> flag removed from EVENTINJ only after successful injection and EVENTINJ
> is copied to EXITINTINFO on exit. Can you give it a try?

Ah, great, thanks. Will test.

> 
> And this is not the only problem I saw, but the one that caused my guest
> to hang.

OK, good to know. I added Alex (though he's said to be on vacation ATM)
and qemu to CC. Maybe you can quickly list the other issues you've
stumbled over, for the records and for motivating contributors...

> 
> diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c
> index be09263..9264afd 100644
> --- a/target-i386/op_helper.c
> +++ b/target-i386/op_helper.c
> @@ -1249,6 +1249,10 @@ void do_interrupt(int intno, int is_int, int error_code,
>      } else {
>          do_interrupt_real(intno, is_int, error_code, next_eip);
>      }
> +    if (env->hflags & HF_SVMI_MASK) {
> +	    uint32_t event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
> +	    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
> +    }
>  }
>  
>  /* This should come from sysemu.h - if we could include it here... */
> @@ -4994,7 +4998,6 @@ void helper_vmrun(int aflag, int next_eip_addend)
>          uint8_t vector = event_inj & SVM_EVTINJ_VEC_MASK;
>          uint16_t valid_err = event_inj & SVM_EVTINJ_VALID_ERR;
>          uint32_t event_inj_err = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
> -        stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
>  
>          qemu_log_mask(CPU_LOG_TB_IN_ASM, "Injecting(%#hx): ", valid_err);
>          /* FIXME: need to implement valid_err */
> @@ -5331,6 +5334,8 @@ void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1)
>      cpu_x86_set_cpl(env, 0);
>      stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_code), exit_code);
>      stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_1), exit_info_1);
> +    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info), ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj)));
> +    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info_err), ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err)));
>  
>      env->hflags2 &= ~HF2_GIF_MASK;
>      /* FIXME: Resets the current ASID register to zero (host ASID). */
> --
> 			Gleb.

Thanks again,
Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 13:44                 ` Gleb Natapov
@ 2009-04-19 14:07                   ` Julian Stecklina
  2009-04-19 14:13                     ` Gleb Natapov
  0 siblings, 1 reply; 95+ messages in thread
From: Julian Stecklina @ 2009-04-19 14:07 UTC (permalink / raw)
  To: kvm-u79uwXL29TY76Z2rM5mHXA

Gleb Natapov <gleb-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> writes:

> On Sun, Apr 19, 2009 at 04:43:12PM +0300, Avi Kivity wrote:
>> Gleb Natapov wrote:
>>> On Sun, Apr 19, 2009 at 04:40:51PM +0300, Avi Kivity wrote:
>>>   
>>>> Gleb Natapov wrote:
>>>>     
>>>>>> It's OK as a first step towards correct NMI emulation. Additionally, you
>>>>>> could enable the IRQ window interception in case the is an NMI pending.
>>>>>> The resulting behavior should then much like the VNMI mask emulation for
>>>>>> vmx.
>>>>>>
>>>>>>             
>>>>> Yeah, but the question is if IRQ windows is already opened will exit
>>>>> happens before or after IRET.
>>>>>         
>>>> You mean if the NMI handler enabled interrupts?
>>>>
>>>>     
>>> Yes.
>>>
>>>   
>>
>> Then the guest deserves whatever it gets...
>>
> I suspect windows may do this since it uses NMI for task switching.

Could you elaborate on that? How/why does it use NMIs for task
switching?

Regards,
-- 
Julian Stecklina

The day Microsoft makes something that doesn't suck is probably the day
they start making vacuum cleaners - Ernst Jan Plugge

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 14:07                   ` Julian Stecklina
@ 2009-04-19 14:13                     ` Gleb Natapov
  2009-04-19 14:20                       ` Avi Kivity
  0 siblings, 1 reply; 95+ messages in thread
From: Gleb Natapov @ 2009-04-19 14:13 UTC (permalink / raw)
  To: Julian Stecklina; +Cc: kvm

On Sun, Apr 19, 2009 at 04:07:52PM +0200, Julian Stecklina wrote:
> Gleb Natapov <gleb@redhat.com> writes:
> 
> > On Sun, Apr 19, 2009 at 04:43:12PM +0300, Avi Kivity wrote:
> >> Gleb Natapov wrote:
> >>> On Sun, Apr 19, 2009 at 04:40:51PM +0300, Avi Kivity wrote:
> >>>   
> >>>> Gleb Natapov wrote:
> >>>>     
> >>>>>> It's OK as a first step towards correct NMI emulation. Additionally, you
> >>>>>> could enable the IRQ window interception in case the is an NMI pending.
> >>>>>> The resulting behavior should then much like the VNMI mask emulation for
> >>>>>> vmx.
> >>>>>>
> >>>>>>             
> >>>>> Yeah, but the question is if IRQ windows is already opened will exit
> >>>>> happens before or after IRET.
> >>>>>         
> >>>> You mean if the NMI handler enabled interrupts?
> >>>>
> >>>>     
> >>> Yes.
> >>>
> >>>   
> >>
> >> Then the guest deserves whatever it gets...
> >>
> > I suspect windows may do this since it uses NMI for task switching.
> 
> Could you elaborate on that? How/why does it use NMIs for task
> switching?
> 
During WHQL testing (or if you just enable verifier on windows 2003)
windows changes hibernate to not power down a PC, but resume
immediately. During this immediate resume it sends NMI to non-boot CPUs
while IDT for nmi is configured as a task gate. I am not sure it
actually calls IRET after that.

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 14:13                     ` Gleb Natapov
@ 2009-04-19 14:20                       ` Avi Kivity
  2009-04-19 14:29                         ` Gleb Natapov
  0 siblings, 1 reply; 95+ messages in thread
From: Avi Kivity @ 2009-04-19 14:20 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Julian Stecklina, kvm

Gleb Natapov wrote:
>> Could you elaborate on that? How/why does it use NMIs for task
>> switching?
>>
>>     
> During WHQL testing (or if you just enable verifier on windows 2003)
> windows changes hibernate to not power down a PC, but resume
> immediately. During this immediate resume it sends NMI to non-boot CPUs
> while IDT for nmi is configured as a task gate. I am not sure it
> actually calls IRET after that.
>   

If it doesn't call IRET, it will never see another NMI.

But of course it will execute IRET, as part of normal execution.  You 
can't do anything without it.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-19 14:05           ` Jan Kiszka
@ 2009-04-19 14:28             ` Gleb Natapov
  2009-04-19 15:06             ` Jan Kiszka
  1 sibling, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-19 14:28 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Dmitry Eremin-Solenikov, kvm, Avi Kivity, Joerg Roedel,
	Alexander Graf, qemu-devel

On Sun, Apr 19, 2009 at 04:05:21PM +0200, Jan Kiszka wrote:
> > And this is not the only problem I saw, but the one that caused my guest
> > to hang.
> 
> OK, good to know. I added Alex (though he's said to be on vacation ATM)
> and qemu to CC. Maybe you can quickly list the other issues you've
> stumbled over, for the records and for motivating contributors...
> 
Another one that I remember (because this was my first suspect) is
interrupt shadow handling. HF_INHIBIT_IRQ_MASK is cleared on exit when
shadow bit is set in int_state and is not set on entry if hypervisor
set shadow bit by itself. I am not sure how real HW actually handles this,
but patch below demonstrates how I think it does it :) And of cause
comments like /* FIXME: this should respect TPR */ don't look promising.


diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c
index be09263..691a7f0 100644
--- a/target-i386/op_helper.c
+++ b/target-i386/op_helper.c
@@ -4971,6 +4997,15 @@ void helper_vmrun(int aflag, int next_eip_addend)
     env->dr[6] = ldq_phys(env->vm_vmcb + offsetof(struct vmcb, save.dr6));
     cpu_x86_set_cpl(env, ldub_phys(env->vm_vmcb + offsetof(struct vmcb, save.cpl)));
 
+    {
+	uint32_t aaa;
+        aaa = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_state));
+	if (aaa & SVM_INTERRUPT_SHADOW_MASK)
+		helper_set_inhibit_irq();
+	else
+		helper_reset_inhibit_irq();
+    }
+
     /* FIXME: guest state consistency checks */
 
     switch(ldub_phys(env->vm_vmcb + offsetof(struct vmcb, control.tlb_ctl))) {
@@ -5243,7 +5280,6 @@ void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1)
 
     if(env->hflags & HF_INHIBIT_IRQ_MASK) {
         stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_state), SVM_INTERRUPT_SHADOW_MASK);
-        env->hflags &= ~HF_INHIBIT_IRQ_MASK;
     } else {
         stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.int_state), 0);
     }
--
			Gleb.

^ permalink raw reply related	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 14:20                       ` Avi Kivity
@ 2009-04-19 14:29                         ` Gleb Natapov
  2009-04-19 14:57                           ` Avi Kivity
  0 siblings, 1 reply; 95+ messages in thread
From: Gleb Natapov @ 2009-04-19 14:29 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Julian Stecklina, kvm

On Sun, Apr 19, 2009 at 05:20:37PM +0300, Avi Kivity wrote:
> Gleb Natapov wrote:
>>> Could you elaborate on that? How/why does it use NMIs for task
>>> switching?
>>>
>>>     
>> During WHQL testing (or if you just enable verifier on windows 2003)
>> windows changes hibernate to not power down a PC, but resume
>> immediately. During this immediate resume it sends NMI to non-boot CPUs
>> while IDT for nmi is configured as a task gate. I am not sure it
>> actually calls IRET after that.
>>   
>
> If it doesn't call IRET, it will never see another NMI.
>
> But of course it will execute IRET, as part of normal execution.  You  
> can't do anything without it.
>
Boot CPU can send INIT after task switch (and I think this is what
happens).

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 14:29                         ` Gleb Natapov
@ 2009-04-19 14:57                           ` Avi Kivity
  2009-04-19 16:36                             ` Gleb Natapov
  0 siblings, 1 reply; 95+ messages in thread
From: Avi Kivity @ 2009-04-19 14:57 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Julian Stecklina, kvm

Gleb Natapov wrote:
> On Sun, Apr 19, 2009 at 05:20:37PM +0300, Avi Kivity wrote:
>   
>> Gleb Natapov wrote:
>>     
>>>> Could you elaborate on that? How/why does it use NMIs for task
>>>> switching?
>>>>
>>>>     
>>>>         
>>> During WHQL testing (or if you just enable verifier on windows 2003)
>>> windows changes hibernate to not power down a PC, but resume
>>> immediately. During this immediate resume it sends NMI to non-boot CPUs
>>> while IDT for nmi is configured as a task gate. I am not sure it
>>> actually calls IRET after that.
>>>   
>>>       
>> If it doesn't call IRET, it will never see another NMI.
>>
>> But of course it will execute IRET, as part of normal execution.  You  
>> can't do anything without it.
>>
>>     
> Boot CPU can send INIT after task switch (and I think this is what
> happens).
>   

But eventually it will execute IRET.

(We need to fix INIT to clear the NMI blocking flag, not that it matters 
so much)

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-19 14:05           ` Jan Kiszka
  2009-04-19 14:28             ` Gleb Natapov
@ 2009-04-19 15:06             ` Jan Kiszka
  2009-04-19 15:20               ` Gleb Natapov
  1 sibling, 1 reply; 95+ messages in thread
From: Jan Kiszka @ 2009-04-19 15:06 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: Dmitry Eremin-Solenikov, kvm, Avi Kivity, Joerg Roedel,
	Alexander Graf, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 3732 bytes --]

Jan Kiszka wrote:
> Gleb Natapov wrote:
>> On Sat, Apr 18, 2009 at 07:28:20PM +0300, Gleb Natapov wrote:
>>>> So this patch may either expose a bug in the svm emulation of qemu or
>>>> comes with a subtle regression that only triggers due to qemu's timing.
>>>> This needs to be understood. Gleb, any progress on reproducing it on
>>>> your side?
>>>>
>>> I reproduced it and I am debugging it. In my case the boot hangs on sti;hlt
>>> sequence. Instrumentation thus far shows that at this point interrupts no longer
>>> injected because ppr value is too big. Need to see why, but tpr handling
>>> is not complete in qemu svm. May be this is the reason. Will know more
>>> tomorrow.
>>>
>> I've looked into this and my conclusion is that if you are not going to
>> develop SVM in qemu don't use it just yet.
> 
> We had a resource conflict regarding SVM capable AMD boxes and a tight
> schedule, so we decided to pick qemu as initial development platform.
> Turns out that this has was a bit too optimistic. :)
> 
>> QEMU doesn't handle exceptions
>> during event injection properly. Actually it does not handle it at all,
>> so if PF happens during interrupt injection interrupt is lost and, what
>> worse, is never acked. If interrupt was high prio it blocks all other
>> interrupts.
>>
>> The patch below adds exception handling during event injection. Valid
>> flag removed from EVENTINJ only after successful injection and EVENTINJ
>> is copied to EXITINTINFO on exit. Can you give it a try?
> 
> Ah, great, thanks. Will test.

I can confirm: patch below makes my kvm-in-qemu test case happy, too.
Maybe you want to post this with changelog and signed-off to qemu-devel.

Jan

>> diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c
>> index be09263..9264afd 100644
>> --- a/target-i386/op_helper.c
>> +++ b/target-i386/op_helper.c
>> @@ -1249,6 +1249,10 @@ void do_interrupt(int intno, int is_int, int error_code,
>>      } else {
>>          do_interrupt_real(intno, is_int, error_code, next_eip);
>>      }
>> +    if (env->hflags & HF_SVMI_MASK) {
>> +	    uint32_t event_inj = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj));
>> +	    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
>> +    }
>>  }
>>  
>>  /* This should come from sysemu.h - if we could include it here... */
>> @@ -4994,7 +4998,6 @@ void helper_vmrun(int aflag, int next_eip_addend)
>>          uint8_t vector = event_inj & SVM_EVTINJ_VEC_MASK;
>>          uint16_t valid_err = event_inj & SVM_EVTINJ_VALID_ERR;
>>          uint32_t event_inj_err = ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err));
>> -        stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID);
>>  
>>          qemu_log_mask(CPU_LOG_TB_IN_ASM, "Injecting(%#hx): ", valid_err);
>>          /* FIXME: need to implement valid_err */
>> @@ -5331,6 +5334,8 @@ void helper_vmexit(uint32_t exit_code, uint64_t exit_info_1)
>>      cpu_x86_set_cpl(env, 0);
>>      stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_code), exit_code);
>>      stq_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_info_1), exit_info_1);
>> +    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info), ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj)));
>> +    stl_phys(env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info_err), ldl_phys(env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err)));
>>  
>>      env->hflags2 &= ~HF2_GIF_MASK;
>>      /* FIXME: Resets the current ASID register to zero (host ASID). */
>> --
>> 			Gleb.



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic.
  2009-04-19 15:06             ` Jan Kiszka
@ 2009-04-19 15:20               ` Gleb Natapov
  0 siblings, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-19 15:20 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Dmitry Eremin-Solenikov, kvm, Avi Kivity, Joerg Roedel,
	Alexander Graf, qemu-devel

On Sun, Apr 19, 2009 at 05:06:29PM +0200, Jan Kiszka wrote:
> Jan Kiszka wrote:
> > Gleb Natapov wrote:
> >> On Sat, Apr 18, 2009 at 07:28:20PM +0300, Gleb Natapov wrote:
> >>>> So this patch may either expose a bug in the svm emulation of qemu or
> >>>> comes with a subtle regression that only triggers due to qemu's timing.
> >>>> This needs to be understood. Gleb, any progress on reproducing it on
> >>>> your side?
> >>>>
> >>> I reproduced it and I am debugging it. In my case the boot hangs on sti;hlt
> >>> sequence. Instrumentation thus far shows that at this point interrupts no longer
> >>> injected because ppr value is too big. Need to see why, but tpr handling
> >>> is not complete in qemu svm. May be this is the reason. Will know more
> >>> tomorrow.
> >>>
> >> I've looked into this and my conclusion is that if you are not going to
> >> develop SVM in qemu don't use it just yet.
> > 
> > We had a resource conflict regarding SVM capable AMD boxes and a tight
> > schedule, so we decided to pick qemu as initial development platform.
> > Turns out that this has was a bit too optimistic. :)
> > 
> >> QEMU doesn't handle exceptions
> >> during event injection properly. Actually it does not handle it at all,
> >> so if PF happens during interrupt injection interrupt is lost and, what
> >> worse, is never acked. If interrupt was high prio it blocks all other
> >> interrupts.
> >>
> >> The patch below adds exception handling during event injection. Valid
> >> flag removed from EVENTINJ only after successful injection and EVENTINJ
> >> is copied to EXITINTINFO on exit. Can you give it a try?
> > 
> > Ah, great, thanks. Will test.
> 
> I can confirm: patch below makes my kvm-in-qemu test case happy, too.
> Maybe you want to post this with changelog and signed-off to qemu-devel.
> 
Yeah, I'll reformat and submit.

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 14:57                           ` Avi Kivity
@ 2009-04-19 16:36                             ` Gleb Natapov
  0 siblings, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-19 16:36 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Julian Stecklina, kvm

On Sun, Apr 19, 2009 at 05:57:56PM +0300, Avi Kivity wrote:
> Gleb Natapov wrote:
>> On Sun, Apr 19, 2009 at 05:20:37PM +0300, Avi Kivity wrote:
>>   
>>> Gleb Natapov wrote:
>>>     
>>>>> Could you elaborate on that? How/why does it use NMIs for task
>>>>> switching?
>>>>>
>>>>>             
>>>> During WHQL testing (or if you just enable verifier on windows 2003)
>>>> windows changes hibernate to not power down a PC, but resume
>>>> immediately. During this immediate resume it sends NMI to non-boot CPUs
>>>> while IDT for nmi is configured as a task gate. I am not sure it
>>>> actually calls IRET after that.
>>>>         
>>> If it doesn't call IRET, it will never see another NMI.
>>>
>>> But of course it will execute IRET, as part of normal execution.  You 
>>>  can't do anything without it.
>>>
>>>     
>> Boot CPU can send INIT after task switch (and I think this is what
>> happens).
>>   
>
> But eventually it will execute IRET.
>
Yes :) But I strongly suspect that NMI window will be opened after SIPI
even before first IRET.

> (We need to fix INIT to clear the NMI blocking flag, not that it matters  
> so much)
If we reset intercept mask on INIT, but don't clear NMI blocking flag we
will never receive NMIs on the vcpu.

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 13:11     ` Gleb Natapov
@ 2009-04-20 12:08       ` Dmitry Eremin-Solenikov
  2009-04-20 15:50       ` Jan Kiszka
  1 sibling, 0 replies; 95+ messages in thread
From: Dmitry Eremin-Solenikov @ 2009-04-20 12:08 UTC (permalink / raw)
  To: kvm

Gleb Natapov wrote:

> On Fri, Apr 17, 2009 at 03:12:57PM +0000, Dmitry Eremin-Solenikov wrote:
>> 
>> This patch does expose some problems on real HW. The first NMI
>> completes w/o problems. However If I try to boot the kernel w/
>> nmi_watchdog=1 or to trigger two NMIs from the monitor, kernel is stuck
>> somewhere.
>> 
> Can you try this patch instead patch13:
> 

Seems to work.



-- 
With best wishes
Dmitry



^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-19 13:11     ` Gleb Natapov
  2009-04-20 12:08       ` Dmitry Eremin-Solenikov
@ 2009-04-20 15:50       ` Jan Kiszka
  2009-04-21 14:07         ` Gleb Natapov
  1 sibling, 1 reply; 95+ messages in thread
From: Jan Kiszka @ 2009-04-20 15:50 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Dmitry Eremin-Solenikov, kvm

Gleb Natapov wrote:
> On Fri, Apr 17, 2009 at 03:12:57PM +0000, Dmitry Eremin-Solenikov wrote:
>> This patch does expose some problems on real HW. The first NMI completes w/o
>> problems. However If I try to boot the kernel w/ nmi_watchdog=1 or to trigger
>> two NMIs from the monitor, kernel is stuck somewhere.
>>
> Can you try this patch instead patch13:
> 
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 8b6f6e9..057a612 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -766,6 +766,7 @@ enum {
>  #define HF_GIF_MASK		(1 << 0)
>  #define HF_HIF_MASK		(1 << 1)
>  #define HF_VINTR_MASK		(1 << 2)
> +#define HF_NMI_MASK		(1 << 3)
>  
>  /*
>   * Hardware virtualization extension instructions may fault if a
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index c605477..0a2b3f1 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -1834,6 +1834,13 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>  	return 1;
>  }
>  
> +static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
> +{
> +	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
> +	svm->vcpu.arch.hflags &= ~HF_NMI_MASK;

Two minor issues:

++vcpu->stat.nmi_window_exits;

> +	return 1;
> +}
> +
>  static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
>  {
>  	if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE)
> @@ -2111,6 +2118,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
>  	[SVM_EXIT_VINTR]			= interrupt_window_interception,
>  	/* [SVM_EXIT_CR0_SEL_WRITE]		= emulate_on_interception, */
>  	[SVM_EXIT_CPUID]			= cpuid_interception,
> +	[SVM_EXIT_IRET]                         = iret_interception,
>  	[SVM_EXIT_INVD]                         = emulate_on_interception,
>  	[SVM_EXIT_HLT]				= halt_interception,
>  	[SVM_EXIT_INVLPG]			= invlpg_interception,
> @@ -2218,6 +2226,12 @@ static void pre_svm_run(struct vcpu_svm *svm)
>  		new_asid(svm, svm_data);
>  }
>  
> +static void svm_inject_nmi(struct vcpu_svm *svm)
> +{
> +	svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
> +	svm->vcpu.arch.hflags |= HF_NMI_MASK;
> +	svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);

and:

++svm->vcpu.stat.nmi_injections;

> +}
>  
>  static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
>  {
> @@ -2269,6 +2283,14 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
>  		vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
>  }
>  
> +static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
> +{
> +	struct vcpu_svm *svm = to_svm(vcpu);
> +	struct vmcb *vmcb = svm->vmcb;
> +	return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
> +		!(svm->vcpu.arch.hflags & HF_NMI_MASK);
> +}
> +
>  static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
> @@ -2284,16 +2306,35 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
>  	svm_inject_irq(to_svm(vcpu), 0x0);
>  }
>  
> +static void enable_nmi_window(struct kvm_vcpu *vcpu)
> +{
> +	struct vcpu_svm *svm = to_svm(vcpu);
> +
> +	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
> +		enable_irq_window(vcpu);
> +}
> +
>  static void svm_intr_inject(struct kvm_vcpu *vcpu)
>  {
>  	/* try to reinject previous events if any */
> +	if (vcpu->arch.nmi_injected) {
> +		svm_inject_nmi(to_svm(vcpu));
> +		return;
> +	}
> +
>  	if (vcpu->arch.interrupt.pending) {
>  		svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
>  		return;
>  	}
>  
>  	/* try to inject new event if pending */
> -	if (kvm_cpu_has_interrupt(vcpu)) {
> +	if (vcpu->arch.nmi_pending) {
> +		if (svm_nmi_allowed(vcpu)) {
> +			vcpu->arch.nmi_pending = false;
> +			vcpu->arch.nmi_injected = true;
> +			svm_inject_nmi(vcpu);
> +		}
> +	} else if (kvm_cpu_has_interrupt(vcpu)) {
>  		if (svm_interrupt_allowed(vcpu)) {
>  			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
>  			svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
> @@ -2312,7 +2353,10 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  
>  	svm_intr_inject(vcpu);
>  
> -	if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
> +	/* enable NMI/IRQ window open exits if needed */
> +	if (vcpu->arch.nmi_pending)
> +		enable_nmi_window(vcpu);
> +	else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
>  		enable_irq_window(vcpu);
>  
>  out:
> --
> 			Gleb.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

Jan

-- 
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 95+ messages in thread

* Re: [PATCH 13/15] Add NMI injection support to SVM.
  2009-04-20 15:50       ` Jan Kiszka
@ 2009-04-21 14:07         ` Gleb Natapov
  0 siblings, 0 replies; 95+ messages in thread
From: Gleb Natapov @ 2009-04-21 14:07 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Dmitry Eremin-Solenikov, kvm

On Mon, Apr 20, 2009 at 05:50:01PM +0200, Jan Kiszka wrote:
> > +static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
> > +{
> > +	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
> > +	svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
> 
> Two minor issues:
> 
> ++vcpu->stat.nmi_window_exits;
> 
[...]
> > +static void svm_inject_nmi(struct vcpu_svm *svm)
> > +{
> > +	svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
> > +	svm->vcpu.arch.hflags |= HF_NMI_MASK;
> > +	svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
> 
> and:
> 
> ++svm->vcpu.stat.nmi_injections;
> 
Added both. Thanks.

--
			Gleb.

^ permalink raw reply	[flat|nested] 95+ messages in thread

end of thread, other threads:[~2009-04-21 14:07 UTC | newest]

Thread overview: 95+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-04-13  9:55 [PATCH 00/15] interrupt injection rework Gleb Natapov
2009-04-13  9:55 ` [PATCH 01/15] Make kvm_cpu_(has|get)_interrupt() work for userspace irqchip too Gleb Natapov
2009-04-13  9:55 ` [PATCH 02/15] Consolidate userspace and kernel interrupt injection for VMX Gleb Natapov
2009-04-13  9:55 ` [PATCH 03/15] Cleanup vmx_intr_assist() Gleb Natapov
2009-04-13  9:55 ` [PATCH 04/15] Use kvm_arch_interrupt_allowed() instead of checking interrupt_window_open directly Gleb Natapov
2009-04-13  9:55 ` [PATCH 05/15] Coalesce userspace/kernel irqchip interrupt injection logic Gleb Natapov
2009-04-14 14:14   ` Dmitry Eremin-Solenikov
2009-04-14 14:24     ` Gleb Natapov
2009-04-14 14:32       ` Dmitry Eremin-Solenikov
2009-04-14 14:55         ` Gleb Natapov
2009-04-14 15:38           ` Gleb Natapov
2009-04-14 19:29           ` Dmitry Eremin-Solenikov
2009-04-14 19:41             ` Gleb Natapov
2009-04-15  6:11               ` Gleb Natapov
2009-04-15  9:30               ` Dmitry Eremin-Solenikov
2009-04-15  9:39                 ` Gleb Natapov
2009-04-15 10:22                   ` Jan Kiszka
2009-04-15 10:36                     ` Gleb Natapov
2009-04-15 10:51                       ` Jan Kiszka
2009-04-15 10:57                         ` Gleb Natapov
2009-04-15  9:44                 ` Gleb Natapov
2009-04-15 11:11                   ` Dmitry Eremin-Solenikov
2009-04-15 11:26                     ` Jan Kiszka
2009-04-15 11:53                       ` Dmitry Eremin-Solenikov
2009-04-15 11:58                         ` Dmitry Eremin-Solenikov
2009-04-15 12:01                         ` Gleb Natapov
2009-04-15 12:02                           ` Dmitry Eremin-Solenikov
2009-04-15 12:03                         ` Jan Kiszka
2009-04-15 12:39                           ` Dmitry Eremin-Solenikov
2009-04-15 12:48                             ` Jan Kiszka
2009-04-14 16:10       ` Avi Kivity
2009-04-14 16:18         ` Gleb Natapov
2009-04-17 12:39   ` Jan Kiszka
2009-04-17 12:50     ` Jan Kiszka
2009-04-17 14:13   ` Dmitry Eremin-Solenikov
2009-04-18  9:16     ` Jan Kiszka
2009-04-18 16:28       ` Gleb Natapov
2009-04-19 13:57         ` Gleb Natapov
2009-04-19 14:05           ` Jan Kiszka
2009-04-19 14:28             ` Gleb Natapov
2009-04-19 15:06             ` Jan Kiszka
2009-04-19 15:20               ` Gleb Natapov
2009-04-18  9:05   ` Jan Kiszka
2009-04-18 16:20     ` Gleb Natapov
2009-04-19  8:52     ` Avi Kivity
2009-04-13  9:55 ` [PATCH 06/15] Use EVENTINJ to inject interrupts Gleb Natapov
2009-04-13  9:55 ` [PATCH 07/15] Remove exception_injected() callback Gleb Natapov
2009-04-13  9:55 ` [PATCH 08/15] Remove inject_pending_vectors() callback Gleb Natapov
2009-04-13  9:55 ` [PATCH 09/15] kvm_push_irq() no longer used Gleb Natapov
2009-04-13  9:55 ` [PATCH 10/15] sync_lapic_to_cr8() should always sync cr8 to V_TPR Gleb Natapov
2009-04-13  9:55 ` [PATCH 11/15] Do not report TPR write to userspace if new value bigger or equal to a previous one Gleb Natapov
2009-04-13  9:55 ` [PATCH 12/15] Get rid of arch.interrupt_window_open & arch.nmi_window_open Gleb Natapov
2009-04-13  9:55 ` [PATCH 13/15] Add NMI injection support to SVM Gleb Natapov
2009-04-17 11:59   ` Jan Kiszka
2009-04-17 15:12   ` Dmitry Eremin-Solenikov
2009-04-19 13:11     ` Gleb Natapov
2009-04-20 12:08       ` Dmitry Eremin-Solenikov
2009-04-20 15:50       ` Jan Kiszka
2009-04-21 14:07         ` Gleb Natapov
2009-04-17 19:13   ` Dmitry Eremin-Solenikov
2009-04-17 19:53     ` Jan Kiszka
2009-04-18  9:08       ` Jan Kiszka
2009-04-17 19:55   ` Jan Kiszka
2009-04-19  8:57     ` Avi Kivity
2009-04-19  9:12       ` Jan Kiszka
2009-04-19 13:17     ` Gleb Natapov
2009-04-19 13:21       ` Avi Kivity
2009-04-19 13:24         ` Gleb Natapov
2009-04-19 13:28           ` Avi Kivity
2009-04-19 13:40             ` Gleb Natapov
2009-04-19 13:43               ` Jan Kiszka
2009-04-19 13:49                 ` Avi Kivity
2009-04-19 13:51                   ` Gleb Natapov
2009-04-19 13:59                   ` Jan Kiszka
2009-04-19 13:27       ` Jan Kiszka
2009-04-19 13:32         ` Gleb Natapov
2009-04-19 13:40           ` Jan Kiszka
2009-04-19 13:40           ` Avi Kivity
2009-04-19 13:41             ` Gleb Natapov
2009-04-19 13:43               ` Avi Kivity
2009-04-19 13:44                 ` Gleb Natapov
2009-04-19 14:07                   ` Julian Stecklina
2009-04-19 14:13                     ` Gleb Natapov
2009-04-19 14:20                       ` Avi Kivity
2009-04-19 14:29                         ` Gleb Natapov
2009-04-19 14:57                           ` Avi Kivity
2009-04-19 16:36                             ` Gleb Natapov
2009-04-13  9:55 ` [PATCH 14/15] Move interrupt injection logic to x86.c Gleb Natapov
2009-04-14  7:22   ` Gleb Natapov
2009-04-13  9:55 ` [PATCH 15/15] Get rid of get_irq() callback Gleb Natapov
2009-04-13 11:51 ` [PATCH 00/15] interrupt injection rework Avi Kivity
2009-04-14  3:20   ` Sheng Yang
2009-04-14  5:20     ` Gleb Natapov
2009-04-14 15:33 ` Joerg Roedel
2009-04-14 15:37   ` Gleb Natapov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).