[patch 0/3] fix PIT injection

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

* [patch 0/3] fix PIT injection
@ 2008-07-26 20:00 Marcelo Tosatti
  2008-07-26 20:00 ` [patch 1/3] KVM: Add irq ack notifier list Marcelo Tosatti
                   ` (5 more replies)
  0 siblings, 6 replies; 15+ messages in thread
From: Marcelo Tosatti @ 2008-07-26 20:00 UTC (permalink / raw)
  To: Avi Kivity, Sheng Yang; +Cc: kvm

The in-kernel PIT emulation can either inject too many or too few
interrupts.

-- 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 1/3] KVM: Add irq ack notifier list
  2008-07-26 20:00 [patch 0/3] fix PIT injection Marcelo Tosatti
@ 2008-07-26 20:00 ` Marcelo Tosatti
  2008-07-26 20:01 ` [patch 2/3] KVM: irq ack notification Marcelo Tosatti
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 15+ messages in thread
From: Marcelo Tosatti @ 2008-07-26 20:00 UTC (permalink / raw)
  To: Avi Kivity, Sheng Yang; +Cc: kvm

[-- Attachment #1: ack1 --]
[-- Type: text/plain, Size: 2673 bytes --]

From: Avi Kivity <avi@qumranet.com>

This can be used by kvm subsystems that are interested in when
interrupts are acked, for example time drift compensation.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/irq.c         |   22 ++++++++++++++++++++++
 arch/x86/kvm/irq.h         |    5 +++++
 include/asm-x86/kvm_host.h |    7 +++++++
 3 files changed, 34 insertions(+), 0 deletions(-)


Index: kvm/arch/x86/kvm/irq.c
===================================================================
--- kvm.orig/arch/x86/kvm/irq.c
+++ kvm/arch/x86/kvm/irq.c
@@ -111,3 +111,25 @@ void kvm_set_irq(struct kvm *kvm, int ir
 	kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level);
 	kvm_pic_set_irq(pic_irqchip(kvm), irq, level);
 }
+
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi)
+{
+	struct kvm_irq_ack_notifier *kian;
+	struct hlist_node *n;
+
+	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
+		if (kian->gsi == gsi)
+			kian->irq_acked(kian);
+}
+
+void kvm_register_irq_ack_notifier(struct kvm *kvm,
+				   struct kvm_irq_ack_notifier *kian)
+{
+	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
+}
+
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				     struct kvm_irq_ack_notifier *kian)
+{
+	hlist_del(&kian->link);
+}
Index: kvm/arch/x86/kvm/irq.h
===================================================================
--- kvm.orig/arch/x86/kvm/irq.h
+++ kvm/arch/x86/kvm/irq.h
@@ -83,6 +83,11 @@ static inline int irqchip_in_kernel(stru
 void kvm_pic_reset(struct kvm_kpic_state *s);
 
 void kvm_set_irq(struct kvm *kvm, int irq, int level);
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
+void kvm_register_irq_ack_notifier(struct kvm *kvm,
+				   struct kvm_irq_ack_notifier *kian);
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				     struct kvm_irq_ack_notifier *kian);
 
 void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
Index: kvm/include/asm-x86/kvm_host.h
===================================================================
--- kvm.orig/include/asm-x86/kvm_host.h
+++ kvm/include/asm-x86/kvm_host.h
@@ -319,6 +319,12 @@ struct kvm_mem_alias {
 	gfn_t target_gfn;
 };
 
+struct kvm_irq_ack_notifier {
+	struct hlist_node link;
+	unsigned gsi;
+	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
+};
+
 struct kvm_arch{
 	int naliases;
 	struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
@@ -334,6 +340,7 @@ struct kvm_arch{
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
+	struct hlist_head irq_ack_notifier_list;
 
 	int round_robin_prev_vcpu;
 	unsigned int tss_addr;

-- 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 2/3] KVM: irq ack notification
  2008-07-26 20:00 [patch 0/3] fix PIT injection Marcelo Tosatti
  2008-07-26 20:00 ` [patch 1/3] KVM: Add irq ack notifier list Marcelo Tosatti
@ 2008-07-26 20:01 ` Marcelo Tosatti
  2008-07-26 20:01 ` [patch 3/3] KVM: PIT: fix injection logic and count Marcelo Tosatti
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 15+ messages in thread
From: Marcelo Tosatti @ 2008-07-26 20:01 UTC (permalink / raw)
  To: Avi Kivity, Sheng Yang; +Cc: kvm, Marcelo Tosatti, Amit Shah, Ben-Ami Yassour

[-- Attachment #1: ack2 --]
[-- Type: text/plain, Size: 5700 bytes --]

Based on a patch from: Ben-Ami Yassour <benami@il.ibm.com>
which was based on a patch from: Amit Shah <amit.shah@qumranet.com>

Notify IRQ acking on PIC/APIC emulation. The previous patch missed two things:

- Edge triggered interrupts on IOAPIC
- PIC reset with IRR/ISR set should be equivalent to ack (LAPIC probably
needs something similar).

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
CC: Amit Shah <amit.shah@qumranet.com>
CC: Ben-Ami Yassour <benami@il.ibm.com>

Index: kvm/arch/x86/kvm/i8259.c
===================================================================
--- kvm.orig/arch/x86/kvm/i8259.c
+++ kvm/arch/x86/kvm/i8259.c
@@ -159,9 +159,10 @@ static inline void pic_intack(struct kvm
 		s->irr &= ~(1 << irq);
 }
 
-int kvm_pic_read_irq(struct kvm_pic *s)
+int kvm_pic_read_irq(struct kvm *kvm)
 {
 	int irq, irq2, intno;
+	struct kvm_pic *s = pic_irqchip(kvm);
 
 	irq = pic_get_irq(&s->pics[0]);
 	if (irq >= 0) {
@@ -187,12 +188,21 @@ int kvm_pic_read_irq(struct kvm_pic *s)
 		intno = s->pics[0].irq_base + irq;
 	}
 	pic_update_irq(s);
+	kvm_notify_acked_irq(kvm, irq);
 
 	return intno;
 }
 
 void kvm_pic_reset(struct kvm_kpic_state *s)
 {
+	int irq;
+	struct kvm *kvm = s->pics_state->irq_request_opaque;
+
+	for (irq = 0; irq < PIC_NUM_PINS; irq++) {
+		if (!(s->imr & (1 << irq)) && (s->irr & (1 << irq) ||
+		    s->isr & (1 << irq)))
+			kvm_notify_acked_irq(kvm, irq);
+	}
 	s->last_irr = 0;
 	s->irr = 0;
 	s->imr = 0;
Index: kvm/arch/x86/kvm/irq.c
===================================================================
--- kvm.orig/arch/x86/kvm/irq.c
+++ kvm/arch/x86/kvm/irq.c
@@ -72,7 +72,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcp
 		if (kvm_apic_accept_pic_intr(v)) {
 			s = pic_irqchip(v->kvm);
 			s->output = 0;		/* PIC */
-			vector = kvm_pic_read_irq(s);
+			vector = kvm_pic_read_irq(v->kvm);
 		}
 	}
 	return vector;
Index: kvm/arch/x86/kvm/irq.h
===================================================================
--- kvm.orig/arch/x86/kvm/irq.h
+++ kvm/arch/x86/kvm/irq.h
@@ -63,11 +63,12 @@ struct kvm_pic {
 	void *irq_request_opaque;
 	int output;		/* intr from master PIC */
 	struct kvm_io_device dev;
+	void (*ack_notifier)(void *opaque, int irq);
 };
 
 struct kvm_pic *kvm_create_pic(struct kvm *kvm);
 void kvm_pic_set_irq(void *opaque, int irq, int level);
-int kvm_pic_read_irq(struct kvm_pic *s);
+int kvm_pic_read_irq(struct kvm *kvm);
 void kvm_pic_update_irq(struct kvm_pic *s);
 
 static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
Index: kvm/virt/kvm/ioapic.c
===================================================================
--- kvm.orig/virt/kvm/ioapic.c
+++ kvm/virt/kvm/ioapic.c
@@ -39,6 +39,7 @@
 
 #include "ioapic.h"
 #include "lapic.h"
+#include "irq.h"
 
 #if 0
 #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
@@ -285,26 +286,31 @@ void kvm_ioapic_set_irq(struct kvm_ioapi
 	}
 }
 
-static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi)
+static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi,
+				    int trigger_mode)
 {
 	union ioapic_redir_entry *ent;
 
 	ent = &ioapic->redirtbl[gsi];
-	ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
 
-	ent->fields.remote_irr = 0;
-	if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
-		ioapic_service(ioapic, gsi);
+	kvm_notify_acked_irq(ioapic->kvm, gsi);
+
+	if (trigger_mode == IOAPIC_LEVEL_TRIG) {
+		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
+		ent->fields.remote_irr = 0;
+		if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
+			ioapic_service(ioapic, gsi);
+	}
 }
 
-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
+void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
 {
 	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
 	int i;
 
 	for (i = 0; i < IOAPIC_NUM_PINS; i++)
 		if (ioapic->redirtbl[i].fields.vector == vector)
-			__kvm_ioapic_update_eoi(ioapic, i);
+			__kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
 }
 
 static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr,
Index: kvm/virt/kvm/ioapic.h
===================================================================
--- kvm.orig/virt/kvm/ioapic.h
+++ kvm/virt/kvm/ioapic.h
@@ -58,6 +58,7 @@ struct kvm_ioapic {
 	} redirtbl[IOAPIC_NUM_PINS];
 	struct kvm_io_device dev;
 	struct kvm *kvm;
+	void (*ack_notifier)(void *opaque, int irq);
 };
 
 #ifdef DEBUG
@@ -87,7 +88,7 @@ static inline int irqchip_in_kernel(stru
 
 struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
 				       unsigned long bitmap);
-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector);
+void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
 int kvm_ioapic_init(struct kvm *kvm);
 void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
Index: kvm/arch/x86/kvm/lapic.c
===================================================================
--- kvm.orig/arch/x86/kvm/lapic.c
+++ kvm/arch/x86/kvm/lapic.c
@@ -439,7 +439,7 @@ struct kvm_vcpu *kvm_get_lowest_prio_vcp
 static void apic_set_eoi(struct kvm_lapic *apic)
 {
 	int vector = apic_find_highest_isr(apic);
-
+	int trigger_mode;
 	/*
 	 * Not every write EOI will has corresponding ISR,
 	 * one example is when Kernel check timer on setup_IO_APIC
@@ -451,7 +451,10 @@ static void apic_set_eoi(struct kvm_lapi
 	apic_update_ppr(apic);
 
 	if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR))
-		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector);
+		trigger_mode = IOAPIC_LEVEL_TRIG;
+	else
+		trigger_mode = IOAPIC_EDGE_TRIG;
+	kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
 }
 
 static void apic_send_ipi(struct kvm_lapic *apic)

-- 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [patch 3/3] KVM: PIT: fix injection logic and count
  2008-07-26 20:00 [patch 0/3] fix PIT injection Marcelo Tosatti
  2008-07-26 20:00 ` [patch 1/3] KVM: Add irq ack notifier list Marcelo Tosatti
  2008-07-26 20:01 ` [patch 2/3] KVM: irq ack notification Marcelo Tosatti
@ 2008-07-26 20:01 ` Marcelo Tosatti
  2008-07-28  5:56   ` Yang, Sheng
                     ` (2 more replies)
  2008-07-28  4:31 ` [patch 0/3] fix PIT injection David S. Ahern
                   ` (2 subsequent siblings)
  5 siblings, 3 replies; 15+ messages in thread
From: Marcelo Tosatti @ 2008-07-26 20:01 UTC (permalink / raw)
  To: Avi Kivity, Sheng Yang; +Cc: kvm, Marcelo Tosatti

[-- Attachment #1: improve-pit-reinjection --]
[-- Type: text/plain, Size: 7004 bytes --]

The PIT injection logic is problematic under the following cases:

1) If there is a higher priority vector to be delivered by the time
kvm_pit_timer_intr_post is invoked ps->inject_pending won't be set. 
This opens the possibility for missing many PIT event injections (say if
guest executes hlt at this point).

2) ps->inject_pending is racy with more than two vcpus. Since there's no locking
around read/dec of pt->pending, two vcpu's can inject two interrupts for a single
pt->pending count.

Fix 1 by using an irq ack notifier: only reinject when the previous irq 
has been acked. Fix 2 with appropriate locking around manipulation of 
pending count and irq_ack by the injection / ack paths.

Also, count_load_time should be set at the time the count is reloaded,
not when the interrupt is injected (BTW, LAPIC uses the same apparently
broken scheme, could someone explain what was the reasoning behind
that? kvm_apic_timer_intr_post).

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Index: kvm/arch/x86/kvm/i8254.c
===================================================================
--- kvm.orig/arch/x86/kvm/i8254.c
+++ kvm/arch/x86/kvm/i8254.c
@@ -207,6 +207,8 @@ static int __pit_timer_fn(struct kvm_kpi
 
 	pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
 	pt->scheduled = ktime_to_ns(pt->timer.expires);
+	if (pt->period)
+		ps->channels[0].count_load_time = pt->timer.expires;
 
 	return (pt->period == 0 ? 0 : 1);
 }
@@ -215,12 +217,22 @@ int pit_has_pending_timer(struct kvm_vcp
 {
 	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
 
-	if (pit && vcpu->vcpu_id == 0 && pit->pit_state.inject_pending)
+	if (pit && vcpu->vcpu_id == 0 && pit->pit_state.irq_ack)
 		return atomic_read(&pit->pit_state.pit_timer.pending);
-
 	return 0;
 }
 
+void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
+{
+	struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
+						 irq_ack_notifier);
+	spin_lock(&ps->inject_lock);
+	if (atomic_dec_return(&ps->pit_timer.pending) < 0)
+		WARN_ON(1);
+	ps->irq_ack = 1;
+	spin_unlock(&ps->inject_lock);
+}
+
 static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 {
 	struct kvm_kpit_state *ps;
@@ -255,8 +267,9 @@ static void destroy_pit_timer(struct kvm
 	hrtimer_cancel(&pt->timer);
 }
 
-static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period)
+static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
 {
+	struct kvm_kpit_timer *pt = &ps->pit_timer;
 	s64 interval;
 
 	interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
@@ -268,6 +281,7 @@ static void create_pit_timer(struct kvm_
 	pt->period = (is_period == 0) ? 0 : interval;
 	pt->timer.function = pit_timer_fn;
 	atomic_set(&pt->pending, 0);
+	ps->irq_ack = 1;
 
 	hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
 		      HRTIMER_MODE_ABS);
@@ -302,11 +316,11 @@ static void pit_load_count(struct kvm *k
 	case 1:
         /* FIXME: enhance mode 4 precision */
 	case 4:
-		create_pit_timer(&ps->pit_timer, val, 0);
+		create_pit_timer(ps, val, 0);
 		break;
 	case 2:
 	case 3:
-		create_pit_timer(&ps->pit_timer, val, 1);
+		create_pit_timer(ps, val, 1);
 		break;
 	default:
 		destroy_pit_timer(&ps->pit_timer);
@@ -520,7 +534,7 @@ void kvm_pit_reset(struct kvm_pit *pit)
 	mutex_unlock(&pit->pit_state.lock);
 
 	atomic_set(&pit->pit_state.pit_timer.pending, 0);
-	pit->pit_state.inject_pending = 1;
+	pit->pit_state.irq_ack = 1;
 }
 
 struct kvm_pit *kvm_create_pit(struct kvm *kvm)
@@ -534,6 +548,7 @@ struct kvm_pit *kvm_create_pit(struct kv
 
 	mutex_init(&pit->pit_state.lock);
 	mutex_lock(&pit->pit_state.lock);
+	spin_lock_init(&pit->pit_state.inject_lock);
 
 	/* Initialize PIO device */
 	pit->dev.read = pit_ioport_read;
@@ -555,6 +570,9 @@ struct kvm_pit *kvm_create_pit(struct kv
 	pit_state->pit = pit;
 	hrtimer_init(&pit_state->pit_timer.timer,
 		     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	pit_state->irq_ack_notifier.gsi = 0;
+	pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
+	kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
 	mutex_unlock(&pit->pit_state.lock);
 
 	kvm_pit_reset(pit);
@@ -592,37 +610,19 @@ void kvm_inject_pit_timer_irqs(struct kv
 	struct kvm_kpit_state *ps;
 
 	if (vcpu && pit) {
+		int inject = 0;
 		ps = &pit->pit_state;
 
-		/* Try to inject pending interrupts when:
-		 * 1. Pending exists
-		 * 2. Last interrupt was accepted or waited for too long time*/
-		if (atomic_read(&ps->pit_timer.pending) &&
-		    (ps->inject_pending ||
-		    (jiffies - ps->last_injected_time
-				>= KVM_MAX_PIT_INTR_INTERVAL))) {
-			ps->inject_pending = 0;
-			__inject_pit_timer_intr(kvm);
-			ps->last_injected_time = jiffies;
-		}
-	}
-}
-
-void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
-{
-	struct kvm_arch *arch = &vcpu->kvm->arch;
-	struct kvm_kpit_state *ps;
-
-	if (vcpu && arch->vpit) {
-		ps = &arch->vpit->pit_state;
-		if (atomic_read(&ps->pit_timer.pending) &&
-		(((arch->vpic->pics[0].imr & 1) == 0 &&
-		  arch->vpic->pics[0].irq_base == vec) ||
-		  (arch->vioapic->redirtbl[0].fields.vector == vec &&
-		  arch->vioapic->redirtbl[0].fields.mask != 1))) {
-			ps->inject_pending = 1;
-			atomic_dec(&ps->pit_timer.pending);
-			ps->channels[0].count_load_time = ktime_get();
+		/* Try to inject pending interrupts when
+		 * last one has been acked.
+		 */
+		spin_lock(&ps->inject_lock);
+		if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) {
+			ps->irq_ack = 0;
+			inject = 1;
 		}
+		spin_unlock(&ps->inject_lock);
+		if (inject)
+			__inject_pit_timer_intr(kvm);
 	}
 }
Index: kvm/arch/x86/kvm/i8254.h
===================================================================
--- kvm.orig/arch/x86/kvm/i8254.h
+++ kvm/arch/x86/kvm/i8254.h
@@ -8,7 +8,6 @@ struct kvm_kpit_timer {
 	int irq;
 	s64 period; /* unit: ns */
 	s64 scheduled;
-	ktime_t last_update;
 	atomic_t pending;
 };
 
@@ -34,8 +33,9 @@ struct kvm_kpit_state {
 	u32    speaker_data_on;
 	struct mutex lock;
 	struct kvm_pit *pit;
-	bool inject_pending; /* if inject pending interrupts */
-	unsigned long last_injected_time;
+	spinlock_t inject_lock;
+	unsigned long irq_ack;
+	struct kvm_irq_ack_notifier irq_ack_notifier;
 };
 
 struct kvm_pit {
@@ -54,7 +54,6 @@ struct kvm_pit {
 #define KVM_PIT_CHANNEL_MASK	    0x3
 
 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
-void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val);
 struct kvm_pit *kvm_create_pit(struct kvm *kvm);
 void kvm_free_pit(struct kvm *kvm);
Index: kvm/arch/x86/kvm/irq.c
===================================================================
--- kvm.orig/arch/x86/kvm/irq.c
+++ kvm/arch/x86/kvm/irq.c
@@ -90,7 +90,6 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_tim
 void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
 {
 	kvm_apic_timer_intr_post(vcpu, vec);
-	kvm_pit_timer_intr_post(vcpu, vec);
 	/* TODO: PIT, RTC etc. */
 }
 EXPORT_SYMBOL_GPL(kvm_timer_intr_post);

-- 


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 0/3] fix PIT injection
  2008-07-26 20:00 [patch 0/3] fix PIT injection Marcelo Tosatti
                   ` (2 preceding siblings ...)
  2008-07-26 20:01 ` [patch 3/3] KVM: PIT: fix injection logic and count Marcelo Tosatti
@ 2008-07-28  4:31 ` David S. Ahern
  2008-07-29 12:50 ` Avi Kivity
  2008-07-29 21:50 ` Dor Laor
  5 siblings, 0 replies; 15+ messages in thread
From: David S. Ahern @ 2008-07-28  4:31 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Avi Kivity, Sheng Yang, kvm

Hi Marcelo:

With kvm-72 + this patch set, timekeeping in RHEL3, RHEL4 and RHEL5
guests with 2 vcpus is much better. Approaching 5 hours of uptime and
all 3 guests are within 2 seconds of the host (part of the delta
measurement based). I'll let all 3 run overnight and then turn on ntp
tomorrow.

Thanks for working on this,

david

Marcelo Tosatti wrote:
> The in-kernel PIT emulation can either inject too many or too few
> interrupts.
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 3/3] KVM: PIT: fix injection logic and count
  2008-07-26 20:01 ` [patch 3/3] KVM: PIT: fix injection logic and count Marcelo Tosatti
@ 2008-07-28  5:56   ` Yang, Sheng
  2008-07-29 14:29     ` Marcelo Tosatti
  2008-07-29 12:55   ` Avi Kivity
  2008-08-12 15:35   ` David S. Ahern
  2 siblings, 1 reply; 15+ messages in thread
From: Yang, Sheng @ 2008-07-28  5:56 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Avi Kivity, kvm

Hi Marcelo

Thanks for your work! It finally reslove my problem on failing to ack 
some injected interrupts. :)

On Sunday 27 July 2008 04:01:01 Marcelo Tosatti wrote:
> The PIT injection logic is problematic under the following cases:
>
> 1) If there is a higher priority vector to be delivered by the time
> kvm_pit_timer_intr_post is invoked ps->inject_pending won't be set.
> This opens the possibility for missing many PIT event injections
> (say if guest executes hlt at this point).

IRQ ack is of course the better way.

But I am still confuse about why PIT event lost. If higher priority 
vector there, it would inject first, but finally PIT one should be 
injected and because PIT inject one interrupt after another had been 
injected, so no PIT interrupt should be ignored.

>
> 2) ps->inject_pending is racy with more than two vcpus. Since
> there's no locking around read/dec of pt->pending, two vcpu's can
> inject two interrupts for a single pt->pending count.
>
> Fix 1 by using an irq ack notifier: only reinject when the previous
> irq has been acked. Fix 2 with appropriate locking around
> manipulation of pending count and irq_ack by the injection / ack
> paths.
>
> Also, count_load_time should be set at the time the count is
> reloaded, not when the interrupt is injected (BTW, LAPIC uses the
> same apparently broken scheme, could someone explain what was the
> reasoning behind that? kvm_apic_timer_intr_post).

PIT did so because we don't know how long we should wait for interrupt 
injection, so that point is nearer the real injection point. I think 
lapic have the similar reason.

I think keep timer_intr_post() for time update is reasonable.

--
regards
Yang, Sheng

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 0/3] fix PIT injection
  2008-07-26 20:00 [patch 0/3] fix PIT injection Marcelo Tosatti
                   ` (3 preceding siblings ...)
  2008-07-28  4:31 ` [patch 0/3] fix PIT injection David S. Ahern
@ 2008-07-29 12:50 ` Avi Kivity
  2008-07-29 21:50 ` Dor Laor
  5 siblings, 0 replies; 15+ messages in thread
From: Avi Kivity @ 2008-07-29 12:50 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Sheng Yang, kvm

Marcelo Tosatti wrote:
> The in-kernel PIT emulation can either inject too many or too few
> interrupts.
>   

Applied all, thanks.


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 3/3] KVM: PIT: fix injection logic and count
  2008-07-26 20:01 ` [patch 3/3] KVM: PIT: fix injection logic and count Marcelo Tosatti
  2008-07-28  5:56   ` Yang, Sheng
@ 2008-07-29 12:55   ` Avi Kivity
  2008-07-29 14:42     ` Marcelo Tosatti
  2008-08-12 15:35   ` David S. Ahern
  2 siblings, 1 reply; 15+ messages in thread
From: Avi Kivity @ 2008-07-29 12:55 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Sheng Yang, kvm

Marcelo Tosatti wrote:
> The PIT injection logic is problematic under the following cases:
>
> 1) If there is a higher priority vector to be delivered by the time
> kvm_pit_timer_intr_post is invoked ps->inject_pending won't be set. 
> This opens the possibility for missing many PIT event injections (say if
> guest executes hlt at this point).
>
> 2) ps->inject_pending is racy with more than two vcpus. Since there's no locking
> around read/dec of pt->pending, two vcpu's can inject two interrupts for a single
> pt->pending count.
>
> Fix 1 by using an irq ack notifier: only reinject when the previous irq 
> has been acked. Fix 2 with appropriate locking around manipulation of 
> pending count and irq_ack by the injection / ack paths.
>
> Also, count_load_time should be set at the time the count is reloaded,
> not when the interrupt is injected (BTW, LAPIC uses the same apparently
> broken scheme, could someone explain what was the reasoning behind
> that? kvm_apic_timer_intr_post).
>
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>
> Index: kvm/arch/x86/kvm/i8254.c
> ===================================================================
> --- kvm.orig/arch/x86/kvm/i8254.c
> +++ kvm/arch/x86/kvm/i8254.c
> @@ -207,6 +207,8 @@ static int __pit_timer_fn(struct kvm_kpi
>  
>  	pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
>  	pt->scheduled = ktime_to_ns(pt->timer.expires);
> +	if (pt->period)
> +		ps->channels[0].count_load_time = pt->timer.expires;
>  
>  	return (pt->period == 0 ? 0 : 1);
>  }
>   

Sometimes the guest leaves the timer enabled but the output pin masked, 
(e.g. it doesn't use the timer bug doesn't bother to turn it off 
properly).  This results in extraneous interrupts, causing unnecessary 
vmexits and increased power usage.

But if we detect that the guest isn't processing the interrupts, we can 
turn the timer off, and after the next injection, calculate the number 
of missing interrupts, and turn the timer on again.  This will have to 
be done carefully (taking care of the guest adjusting the frequency 
during the period where we missed injections, for example).

Comments?  Questions?  Patches?

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 3/3] KVM: PIT: fix injection logic and count
  2008-07-28  5:56   ` Yang, Sheng
@ 2008-07-29 14:29     ` Marcelo Tosatti
  0 siblings, 0 replies; 15+ messages in thread
From: Marcelo Tosatti @ 2008-07-29 14:29 UTC (permalink / raw)
  To: Yang, Sheng; +Cc: Avi Kivity, kvm

Hi Sheng,

On Mon, Jul 28, 2008 at 01:56:59PM +0800, Yang, Sheng wrote:
> Hi Marcelo
> 
> Thanks for your work! It finally reslove my problem on failing to ack 
> some injected interrupts. :)
> 
> On Sunday 27 July 2008 04:01:01 Marcelo Tosatti wrote:
> > The PIT injection logic is problematic under the following cases:
> >
> > 1) If there is a higher priority vector to be delivered by the time
> > kvm_pit_timer_intr_post is invoked ps->inject_pending won't be set.
> > This opens the possibility for missing many PIT event injections
> > (say if guest executes hlt at this point).
> 
> IRQ ack is of course the better way.
> 
> But I am still confuse about why PIT event lost. If higher priority 
> vector there, it would inject first, but finally PIT one should be 
> injected and because PIT inject one interrupt after another had been 
> injected, so no PIT interrupt should be ignored.

The problem is (was) the check for ps->inject_pending in
pit_has_pending_timer(). If the guest hlt'ed just after a higher
priority vector had been injected ps->inject_pending would not
be cleared, so we wouldnt break out of the kvm_vcpu_block() loop
immediately.

In my test case, it would block in kvm_vcpu_block() until a lapic timer
fired, which would then inject that. Repeat forever until the guest
exits for some reason other than hlt.

> > 2) ps->inject_pending is racy with more than two vcpus. Since
> > there's no locking around read/dec of pt->pending, two vcpu's can
> > inject two interrupts for a single pt->pending count.
> >
> > Fix 1 by using an irq ack notifier: only reinject when the previous
> > irq has been acked. Fix 2 with appropriate locking around
> > manipulation of pending count and irq_ack by the injection / ack
> > paths.
> >
> > Also, count_load_time should be set at the time the count is
> > reloaded, not when the interrupt is injected (BTW, LAPIC uses the
> > same apparently broken scheme, could someone explain what was the
> > reasoning behind that? kvm_apic_timer_intr_post).
> 
> PIT did so because we don't know how long we should wait for interrupt 
> injection, so that point is nearer the real injection point. I think 
> lapic have the similar reason.
> 
> I think keep timer_intr_post() for time update is reasonable.

But the real hardware does not do any adjustment of that kind right? The
issue is, when the guest reads the CH0 register (0x40) it expects the
actual count (ie. how long until the timer fires), not any other value.

Failing to return the proper "count" value via CH0 read breaks
"clock=pit" on older Linux 2.6 kernels for example (and really, I don't
see any advantage of setting load count to when injection happens).



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 3/3] KVM: PIT: fix injection logic and count
  2008-07-29 12:55   ` Avi Kivity
@ 2008-07-29 14:42     ` Marcelo Tosatti
  0 siblings, 0 replies; 15+ messages in thread
From: Marcelo Tosatti @ 2008-07-29 14:42 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Sheng Yang, kvm

On Tue, Jul 29, 2008 at 03:55:26PM +0300, Avi Kivity wrote:
> Sometimes the guest leaves the timer enabled but the output pin masked,  
> (e.g. it doesn't use the timer bug doesn't bother to turn it off  
> properly).  This results in extraneous interrupts, causing unnecessary  
> vmexits and increased power usage.
>
> But if we detect that the guest isn't processing the interrupts, we can  
> turn the timer off, and after the next injection, calculate the number  
> of missing interrupts, and turn the timer on again.  This will have to  
> be done carefully (taking care of the guest adjusting the frequency  
> during the period where we missed injections, for example).

Sounds like a good idea, I'll try something later.

>
> Comments?  Questions?  Patches?


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 0/3] fix PIT injection
  2008-07-26 20:00 [patch 0/3] fix PIT injection Marcelo Tosatti
                   ` (4 preceding siblings ...)
  2008-07-29 12:50 ` Avi Kivity
@ 2008-07-29 21:50 ` Dor Laor
  2008-07-30 14:15   ` Marcelo Tosatti
  5 siblings, 1 reply; 15+ messages in thread
From: Dor Laor @ 2008-07-29 21:50 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Avi Kivity, Sheng Yang, kvm

Marcelo Tosatti wrote:
> The in-kernel PIT emulation can either inject too many or too few
> interrupts.
>
>   
While it's an improvement, the in-kernel pit is still not perfect. For 
example, on pit frequency changes the
pending count should be recalculated and matched to the new frequency. I 
also tumbled on live migration problem
and there is your guest smp fix.
IMHO we need to switch back to userspace pit. [Actually I did consider 
in-kernel pit myself in the past.]. The reasons:
1. There is no performance advantage doing this in the kernel.
    It's just potentially reduced the host stability and reduces code
2. There are floating patches to fix pit/rtc injection in the same way 
the acked irq is sone here.
    So the first 2 patches are relevant.
3. Will we do the same for rtc? -> why duplicate userspace code in the 
kernel?
    We won't have smp issues since we have qemu_mutex and it will be 
simpler too.

If you agree, please help merging the qemu patches.
Otherwise argue against the above :)

Cheers, Dor

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 0/3] fix PIT injection
  2008-07-29 21:50 ` Dor Laor
@ 2008-07-30 14:15   ` Marcelo Tosatti
  2008-07-30 14:56     ` Sheng Yang
  2008-07-30 21:48     ` Dor Laor
  0 siblings, 2 replies; 15+ messages in thread
From: Marcelo Tosatti @ 2008-07-30 14:15 UTC (permalink / raw)
  To: Dor Laor; +Cc: Avi Kivity, Sheng Yang, kvm

Hi Dor,

On Wed, Jul 30, 2008 at 12:50:06AM +0300, Dor Laor wrote:
> Marcelo Tosatti wrote:
>> The in-kernel PIT emulation can either inject too many or too few
>> interrupts.
>>
>>   
> While it's an improvement, the in-kernel pit is still not perfect. For  
> example, on pit frequency changes the
> pending count should be recalculated and matched to the new frequency. 

Point. That one can be addressed.

> I also tumbled on live migration problem

Can you provide more details? How to reproduce?

> and there is your guest smp fix.
> IMHO we need to switch back to userspace pit. [Actually I did consider  
> in-kernel pit myself in the past.]. The reasons:
> 1. There is no performance advantage doing this in the kernel.
>    It's just potentially reduced the host stability and reduces code

Keeping device emulation in userspace is desired, of course. The
drawbacks of timer emulation, AFAICS, are:

- Timers in QEMU are, currently, not handled separately from other
IO processing. The delay between timer expiration and interrupt
injection depends on the time spent handling unrelated IO in QEMU's
main loop. This can be fixed, by treating guest timer expiration 
with higher priority.

- The in-kernel emulation allows the host timer to be locked to the vcpu
it belongs. With userspace emulation, an IPI is necessary whenever the
iothread is running on a different physical CPU than the target vcpu.
The overall cost to wakeup a vcpu in a different physical CPU is:
syscall, IRQ lock acquision (currently kvm->lock, which also protects
access to in-kernel devices) and finally the IPI cost, which is hundreds
of ns (googling around it seems to be in the range of 700ns).

That cost is non-existant with timers locked to the vcpu.

I don't know what specific problems the in-kernel PIT emulation solved
(I recall certain Windows configurations were largely improved). Do you
the details? Sheng?

> 2. There are floating patches to fix pit/rtc injection in the same way  
> the acked irq is sone here.
>    So the first 2 patches are relevant.
> 3. Will we do the same for rtc? -> why duplicate userspace code in the  
> kernel?
>    We won't have smp issues since we have qemu_mutex and it will be  
> simpler too.
>
> If you agree, please help merging the qemu patches.
> Otherwise argue against the above :)
>
> Cheers, Dor
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 0/3] fix PIT injection
  2008-07-30 14:15   ` Marcelo Tosatti
@ 2008-07-30 14:56     ` Sheng Yang
  2008-07-30 21:48     ` Dor Laor
  1 sibling, 0 replies; 15+ messages in thread
From: Sheng Yang @ 2008-07-30 14:56 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Dor Laor, Avi Kivity, Sheng Yang, kvm

(Sorry, forgot to switch to plain text in Gmail, rejected by vger.kernel.org...)

On Wed, Jul 30, 2008 at 10:15 PM, Marcelo Tosatti <mtosatti@redhat.com> wrote:
> Hi Dor,
>
> On Wed, Jul 30, 2008 at 12:50:06AM +0300, Dor Laor wrote:
>> Marcelo Tosatti wrote:
>>> The in-kernel PIT emulation can either inject too many or too few
>>> interrupts.
>>>
>>>
>> While it's an improvement, the in-kernel pit is still not perfect. For
>> example, on pit frequency changes the
>> pending count should be recalculated and matched to the new frequency.
>
> Point. That one can be addressed.
>
>> I also tumbled on live migration problem
>
> Can you provide more details? How to reproduce?
>
>> and there is your guest smp fix.
>> IMHO we need to switch back to userspace pit. [Actually I did consider
>> in-kernel pit myself in the past.]. The reasons:
>> 1. There is no performance advantage doing this in the kernel.
>>    It's just potentially reduced the host stability and reduces code
>
> Keeping device emulation in userspace is desired, of course. The
> drawbacks of timer emulation, AFAICS, are:
>
> - Timers in QEMU are, currently, not handled separately from other
> IO processing. The delay between timer expiration and interrupt
> injection depends on the time spent handling unrelated IO in QEMU's
> main loop. This can be fixed, by treating guest timer expiration
> with higher priority.
>
> - The in-kernel emulation allows the host timer to be locked to the vcpu
> it belongs. With userspace emulation, an IPI is necessary whenever the
> iothread is running on a different physical CPU than the target vcpu.
> The overall cost to wakeup a vcpu in a different physical CPU is:
> syscall, IRQ lock acquision (currently kvm->lock, which also protects
> access to in-kernel devices) and finally the IPI cost, which is hundreds
> of ns (googling around it seems to be in the range of 700ns).
>
> That cost is non-existant with timers locked to the vcpu.
>
> I don't know what specific problems the in-kernel PIT emulation solved
> (I recall certain Windows configurations were largely improved). Do you
> the details? Sheng?

Basicly, after in-kernel irqchip checked in and before in-kernel pit
checked in, the time mechanism in KVM is chaos... Simply because
userspace pit can't sync with in-kernel irqchip, typically tons of
interrupt lost, e.g. a bug named "guest time is 1/6 compare to host
time".

The main purpose to move pit to kernel is to fix this timer issue.
There was another purpose, when we do the same to the Xen. That's
Xen's IO exit to qemu is too heavy and affect performance badly. But
KVM at least don't suffer that much.

Personally, I don't against the idea move the IO part of PIT back to
qemu and keep the interrupt handling part in kernel (though still a
little sad...). I just don't know if we can do it elegantly,
genericly, precisely, efficiently and relatively simply, for all
(important) timer source. "vpt.c" in Xen has been criticized as too
complex.

--
regards
Yang, Sheng

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 0/3] fix PIT injection
  2008-07-30 14:15   ` Marcelo Tosatti
  2008-07-30 14:56     ` Sheng Yang
@ 2008-07-30 21:48     ` Dor Laor
  1 sibling, 0 replies; 15+ messages in thread
From: Dor Laor @ 2008-07-30 21:48 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Avi Kivity, Sheng Yang, kvm

Marcelo Tosatti wrote:
> Hi Dor,
>
> On Wed, Jul 30, 2008 at 12:50:06AM +0300, Dor Laor wrote:
>   
>> Marcelo Tosatti wrote:
>>     
>>> The in-kernel PIT emulation can either inject too many or too few
>>> interrupts.
>>>
>>>   
>>>       
>> While it's an improvement, the in-kernel pit is still not perfect. For  
>> example, on pit frequency changes the
>> pending count should be recalculated and matched to the new frequency. 
>>     
>
> Point. That one can be addressed.
>
>   
>> I also tumbled on live migration problem
>>     
>
> Can you provide more details? How to reproduce?
>
>   
Do live migration for windows guest (standard hal), the time on the 
destination advances way too slow.
>> and there is your guest smp fix.
>> IMHO we need to switch back to userspace pit. [Actually I did consider  
>> in-kernel pit myself in the past.]. The reasons:
>> 1. There is no performance advantage doing this in the kernel.
>>    It's just potentially reduced the host stability and reduces code
>>     
>
> Keeping device emulation in userspace is desired, of course. The
> drawbacks of timer emulation, AFAICS, are:
>
> - Timers in QEMU are, currently, not handled separately from other
> IO processing. The delay between timer expiration and interrupt
> injection depends on the time spent handling unrelated IO in QEMU's
> main loop. This can be fixed, by treating guest timer expiration 
> with higher priority.
>   
As you said, this can be fixed.
> - The in-kernel emulation allows the host timer to be locked to the vcpu
> it belongs. With userspace emulation, an IPI is necessary whenever the
> iothread is running on a different physical CPU than the target vcpu.
> The overall cost to wakeup a vcpu in a different physical CPU is:
> syscall, IRQ lock acquision (currently kvm->lock, which also protects
> access to in-kernel devices) and finally the IPI cost, which is hundreds
> of ns (googling around it seems to be in the range of 700ns).
>
>   
While you have a point, there are many IPIs when running smp without NPT 
and we can still
do nice performance (thanks for your mmu work..)
> That cost is non-existant with timers locked to the vcpu.
>
>   
Maybe we can keep the pit logic and implementation in userspace like 
today, use the kernel timer like today, but when
did timer will pop we'll force the vcpu to go to userspace and run the 
pit code in the vcpu context.
We'll pay ctx from passing kernel->user->kernel. Hmm, I think that too 
costly.

> I don't know what specific problems the in-kernel PIT emulation solved
> (I recall certain Windows configurations were largely improved). Do you
> the details? Sheng?
>
>   
Eventually I don't really know, I saw newer email from Sheng. If the 
interrupt injection path is too slow then we might keep Sheng's pit.
>> 2. There are floating patches to fix pit/rtc injection in the same way  
>> the acked irq is sone here.
>>    So the first 2 patches are relevant.
>> 3. Will we do the same for rtc? -> why duplicate userspace code in the  
>> kernel?
>>    We won't have smp issues since we have qemu_mutex and it will be  
>> simpler too.
>>
>> If you agree, please help merging the qemu patches.
>> Otherwise argue against the above :)
>>
>> Cheers, Dor
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>     


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [patch 3/3] KVM: PIT: fix injection logic and count
  2008-07-26 20:01 ` [patch 3/3] KVM: PIT: fix injection logic and count Marcelo Tosatti
  2008-07-28  5:56   ` Yang, Sheng
  2008-07-29 12:55   ` Avi Kivity
@ 2008-08-12 15:35   ` David S. Ahern
  2 siblings, 0 replies; 15+ messages in thread
From: David S. Ahern @ 2008-08-12 15:35 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm-devel

Hi Marcelo:

I am seeing erroneous accounting data in RHEL3 guests which I believe I
have traced to this patch. The easiest way to see this is to run 'mpstat
1': intr/s is in the 50's (e.g., for a nearly idle guest with negligible
disk/network). This is wrong. At a minimum it should be 100 -- 100 timer
interrupts per second.

Once this caught my eye, I took a look at /proc/stat. If you take
samples 1 second apart, the difference of the sums for the 'cpu' line
should be HZ * ncpus and for each individual cpu entry (e.g., cpu0,
cpu1, etc) the result should be HZ.  In code:

function cpu_stat {
    awk -v cpu="$1" '{
        if ($1 == cpu)
        {
            sum=0
            for (i=1; i<= NF; ++i)
                sum += $i
            print sum
        }
    }' /proc/stat
}

cpu=${1:-"cpu"}
d1=$(cpu_stat $cpu)
echo "have first sample. sleeping"
usleep 990000
d2=$(cpu_stat $cpu)

echo "delta: $(($d2 - $d1))"

I am seeing a result of 2*HZ. So for a 4 vcpu guest the delta for the
cpu line is > 800, and each cpu# line is >200.

You see the effect with the SMP kernel regardless of the number of vcpus
(1 or more) -- it's always twice what it should be and the interrupts
are always half what they should be. The accounting is fine with the
uniprocessor kernel. This suggests the problem is that lapic timer
interrupts are coming in twice as fast (or more) than they should.

Interestingly, I only see this for RHEL5.2 as the host OS; the
accounting is fine for Fedora 9 as the host OS. In both cases it's
kvm-72 with just this patch set.

Any ideas on where the problem could be?

david


Marcelo Tosatti wrote:
> The PIT injection logic is problematic under the following cases:
> 
> 1) If there is a higher priority vector to be delivered by the time
> kvm_pit_timer_intr_post is invoked ps->inject_pending won't be set. 
> This opens the possibility for missing many PIT event injections (say if
> guest executes hlt at this point).
> 
> 2) ps->inject_pending is racy with more than two vcpus. Since there's no locking
> around read/dec of pt->pending, two vcpu's can inject two interrupts for a single
> pt->pending count.
> 
> Fix 1 by using an irq ack notifier: only reinject when the previous irq 
> has been acked. Fix 2 with appropriate locking around manipulation of 
> pending count and irq_ack by the injection / ack paths.
> 
> Also, count_load_time should be set at the time the count is reloaded,
> not when the interrupt is injected (BTW, LAPIC uses the same apparently
> broken scheme, could someone explain what was the reasoning behind
> that? kvm_apic_timer_intr_post).
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> Index: kvm/arch/x86/kvm/i8254.c
> ===================================================================
> --- kvm.orig/arch/x86/kvm/i8254.c
> +++ kvm/arch/x86/kvm/i8254.c
> @@ -207,6 +207,8 @@ static int __pit_timer_fn(struct kvm_kpi
>  
>  	pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
>  	pt->scheduled = ktime_to_ns(pt->timer.expires);
> +	if (pt->period)
> +		ps->channels[0].count_load_time = pt->timer.expires;
>  
>  	return (pt->period == 0 ? 0 : 1);
>  }
> @@ -215,12 +217,22 @@ int pit_has_pending_timer(struct kvm_vcp
>  {
>  	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
>  
> -	if (pit && vcpu->vcpu_id == 0 && pit->pit_state.inject_pending)
> +	if (pit && vcpu->vcpu_id == 0 && pit->pit_state.irq_ack)
>  		return atomic_read(&pit->pit_state.pit_timer.pending);
> -
>  	return 0;
>  }
>  
> +void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
> +{
> +	struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
> +						 irq_ack_notifier);
> +	spin_lock(&ps->inject_lock);
> +	if (atomic_dec_return(&ps->pit_timer.pending) < 0)
> +		WARN_ON(1);
> +	ps->irq_ack = 1;
> +	spin_unlock(&ps->inject_lock);
> +}
> +
>  static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
>  {
>  	struct kvm_kpit_state *ps;
> @@ -255,8 +267,9 @@ static void destroy_pit_timer(struct kvm
>  	hrtimer_cancel(&pt->timer);
>  }
>  
> -static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period)
> +static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
>  {
> +	struct kvm_kpit_timer *pt = &ps->pit_timer;
>  	s64 interval;
>  
>  	interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
> @@ -268,6 +281,7 @@ static void create_pit_timer(struct kvm_
>  	pt->period = (is_period == 0) ? 0 : interval;
>  	pt->timer.function = pit_timer_fn;
>  	atomic_set(&pt->pending, 0);
> +	ps->irq_ack = 1;
>  
>  	hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
>  		      HRTIMER_MODE_ABS);
> @@ -302,11 +316,11 @@ static void pit_load_count(struct kvm *k
>  	case 1:
>          /* FIXME: enhance mode 4 precision */
>  	case 4:
> -		create_pit_timer(&ps->pit_timer, val, 0);
> +		create_pit_timer(ps, val, 0);
>  		break;
>  	case 2:
>  	case 3:
> -		create_pit_timer(&ps->pit_timer, val, 1);
> +		create_pit_timer(ps, val, 1);
>  		break;
>  	default:
>  		destroy_pit_timer(&ps->pit_timer);
> @@ -520,7 +534,7 @@ void kvm_pit_reset(struct kvm_pit *pit)
>  	mutex_unlock(&pit->pit_state.lock);
>  
>  	atomic_set(&pit->pit_state.pit_timer.pending, 0);
> -	pit->pit_state.inject_pending = 1;
> +	pit->pit_state.irq_ack = 1;
>  }
>  
>  struct kvm_pit *kvm_create_pit(struct kvm *kvm)
> @@ -534,6 +548,7 @@ struct kvm_pit *kvm_create_pit(struct kv
>  
>  	mutex_init(&pit->pit_state.lock);
>  	mutex_lock(&pit->pit_state.lock);
> +	spin_lock_init(&pit->pit_state.inject_lock);
>  
>  	/* Initialize PIO device */
>  	pit->dev.read = pit_ioport_read;
> @@ -555,6 +570,9 @@ struct kvm_pit *kvm_create_pit(struct kv
>  	pit_state->pit = pit;
>  	hrtimer_init(&pit_state->pit_timer.timer,
>  		     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
> +	pit_state->irq_ack_notifier.gsi = 0;
> +	pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
> +	kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
>  	mutex_unlock(&pit->pit_state.lock);
>  
>  	kvm_pit_reset(pit);
> @@ -592,37 +610,19 @@ void kvm_inject_pit_timer_irqs(struct kv
>  	struct kvm_kpit_state *ps;
>  
>  	if (vcpu && pit) {
> +		int inject = 0;
>  		ps = &pit->pit_state;
>  
> -		/* Try to inject pending interrupts when:
> -		 * 1. Pending exists
> -		 * 2. Last interrupt was accepted or waited for too long time*/
> -		if (atomic_read(&ps->pit_timer.pending) &&
> -		    (ps->inject_pending ||
> -		    (jiffies - ps->last_injected_time
> -				>= KVM_MAX_PIT_INTR_INTERVAL))) {
> -			ps->inject_pending = 0;
> -			__inject_pit_timer_intr(kvm);
> -			ps->last_injected_time = jiffies;
> -		}
> -	}
> -}
> -
> -void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
> -{
> -	struct kvm_arch *arch = &vcpu->kvm->arch;
> -	struct kvm_kpit_state *ps;
> -
> -	if (vcpu && arch->vpit) {
> -		ps = &arch->vpit->pit_state;
> -		if (atomic_read(&ps->pit_timer.pending) &&
> -		(((arch->vpic->pics[0].imr & 1) == 0 &&
> -		  arch->vpic->pics[0].irq_base == vec) ||
> -		  (arch->vioapic->redirtbl[0].fields.vector == vec &&
> -		  arch->vioapic->redirtbl[0].fields.mask != 1))) {
> -			ps->inject_pending = 1;
> -			atomic_dec(&ps->pit_timer.pending);
> -			ps->channels[0].count_load_time = ktime_get();
> +		/* Try to inject pending interrupts when
> +		 * last one has been acked.
> +		 */
> +		spin_lock(&ps->inject_lock);
> +		if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) {
> +			ps->irq_ack = 0;
> +			inject = 1;
>  		}
> +		spin_unlock(&ps->inject_lock);
> +		if (inject)
> +			__inject_pit_timer_intr(kvm);
>  	}
>  }
> Index: kvm/arch/x86/kvm/i8254.h
> ===================================================================
> --- kvm.orig/arch/x86/kvm/i8254.h
> +++ kvm/arch/x86/kvm/i8254.h
> @@ -8,7 +8,6 @@ struct kvm_kpit_timer {
>  	int irq;
>  	s64 period; /* unit: ns */
>  	s64 scheduled;
> -	ktime_t last_update;
>  	atomic_t pending;
>  };
>  
> @@ -34,8 +33,9 @@ struct kvm_kpit_state {
>  	u32    speaker_data_on;
>  	struct mutex lock;
>  	struct kvm_pit *pit;
> -	bool inject_pending; /* if inject pending interrupts */
> -	unsigned long last_injected_time;
> +	spinlock_t inject_lock;
> +	unsigned long irq_ack;
> +	struct kvm_irq_ack_notifier irq_ack_notifier;
>  };
>  
>  struct kvm_pit {
> @@ -54,7 +54,6 @@ struct kvm_pit {
>  #define KVM_PIT_CHANNEL_MASK	    0x3
>  
>  void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
> -void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
>  void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val);
>  struct kvm_pit *kvm_create_pit(struct kvm *kvm);
>  void kvm_free_pit(struct kvm *kvm);
> Index: kvm/arch/x86/kvm/irq.c
> ===================================================================
> --- kvm.orig/arch/x86/kvm/irq.c
> +++ kvm/arch/x86/kvm/irq.c
> @@ -90,7 +90,6 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_tim
>  void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
>  {
>  	kvm_apic_timer_intr_post(vcpu, vec);
> -	kvm_pit_timer_intr_post(vcpu, vec);
>  	/* TODO: PIT, RTC etc. */
>  }
>  EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2008-08-12 15:35 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-26 20:00 [patch 0/3] fix PIT injection Marcelo Tosatti
2008-07-26 20:00 ` [patch 1/3] KVM: Add irq ack notifier list Marcelo Tosatti
2008-07-26 20:01 ` [patch 2/3] KVM: irq ack notification Marcelo Tosatti
2008-07-26 20:01 ` [patch 3/3] KVM: PIT: fix injection logic and count Marcelo Tosatti
2008-07-28  5:56   ` Yang, Sheng
2008-07-29 14:29     ` Marcelo Tosatti
2008-07-29 12:55   ` Avi Kivity
2008-07-29 14:42     ` Marcelo Tosatti
2008-08-12 15:35   ` David S. Ahern
2008-07-28  4:31 ` [patch 0/3] fix PIT injection David S. Ahern
2008-07-29 12:50 ` Avi Kivity
2008-07-29 21:50 ` Dor Laor
2008-07-30 14:15   ` Marcelo Tosatti
2008-07-30 14:56     ` Sheng Yang
2008-07-30 21:48     ` Dor Laor

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox