public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] Fix guest time drift under heavy load.
@ 2008-06-29 13:59 Gleb Natapov
  2008-06-29 13:59 ` [PATCH 1/3] Change qemu_set_irq() to return status information Gleb Natapov
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Gleb Natapov @ 2008-06-29 13:59 UTC (permalink / raw)
  To: gleb; +Cc: kvm

Resending one more time. There was no response last time I've sent it.

Qemu device emulation for timers might be inaccurate and
causes coalescing of several IRQs into one. It happens when the
load on the host is high and the guest did not manage to ack the
previous IRQ. The first patch in the series fixes this by changing
qemu_irq subsystem to return IRQ delivery status information.
If device is notified that IRQs where lost it can regenerate them
as needed. The following two patches add IRQ regeneration to PIC and RTC
devices.

---

Gleb Natapov (3):
      Fix time drift problem under high load when RTC is in use.
      Fix time drift problem under high load when PIT is in use.
      Change qemu_set_irq() to return status information.


 hw/apic.c        |  103 +++++++++++++++++++++++++++++++++++-------------------
 hw/esp.c         |    4 ++
 hw/fdc.c         |    4 ++
 hw/i8254.c       |   20 ++++++++++
 hw/i8259.c       |   19 +++++++---
 hw/ide.c         |    8 +++-
 hw/irq.c         |   10 +++--
 hw/irq.h         |   22 +++++++-----
 hw/max7310.c     |    4 ++
 hw/mc146818rtc.c |   11 +++++-
 hw/pc.c          |    6 ++-
 hw/pc.h          |    2 +
 hw/pci.c         |    8 +++-
 hw/ssd0323.c     |    4 ++
 hw/twl92230.c    |    8 +++-
 15 files changed, 159 insertions(+), 74 deletions(-)

-- 
		Gleb.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/3] Change qemu_set_irq() to return status information.
  2008-06-29 13:59 [PATCH 0/3] Fix guest time drift under heavy load Gleb Natapov
@ 2008-06-29 13:59 ` Gleb Natapov
  2008-06-29 13:59 ` [PATCH 2/3] Fix time drift problem under high load when PIT is in use Gleb Natapov
  2008-06-29 13:59 ` [PATCH 3/3] Fix time drift problem under high load when RTC " Gleb Natapov
  2 siblings, 0 replies; 5+ messages in thread
From: Gleb Natapov @ 2008-06-29 13:59 UTC (permalink / raw)
  To: gleb; +Cc: kvm

The return value is less then zero if interrupt is masked, zero if it
is known that interrupt is lost (due to coalescing) or greater then zero
if interrupt is delivered or was successfully queued for delivery by
interrupt controller. Device emulation can use this info as it pleases.
Included patch adds detection of interrupt coalescing into PIC and APIC
code for edge triggered interrupts.

Signed-off-by: Gleb Natapov <gleb@qumranet.com>
---

 hw/apic.c     |  103 +++++++++++++++++++++++++++++++++++++--------------------
 hw/esp.c      |    4 ++
 hw/fdc.c      |    4 ++
 hw/i8259.c    |   19 +++++++----
 hw/ide.c      |    8 +++-
 hw/irq.c      |   10 +++---
 hw/irq.h      |   22 +++++++-----
 hw/max7310.c  |    4 ++
 hw/pc.c       |    6 ++-
 hw/pc.h       |    2 +
 hw/pci.c      |    8 +++-
 hw/ssd0323.c  |    4 ++
 hw/twl92230.c |    8 +++-
 13 files changed, 131 insertions(+), 71 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index 6fd0160..9bdaed2 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -101,7 +101,7 @@ static APICState *local_apics[MAX_APICS + 1];
 static int last_apic_id = 0;
 
 static void apic_init_ipi(APICState *s);
-static void apic_set_irq(APICState *s, int vector_num, int trigger_mode);
+static int apic_set_irq(APICState *s, int vector_num, int trigger_mode);
 static void apic_update_irq(APICState *s);
 
 /* Find first bit starting from msb. Return 0 if value = 0 */
@@ -166,6 +166,14 @@ static inline void reset_bit(uint32_t *tab, int index)
     tab[i] &= ~mask;
 }
 
+static inline int get_bit(uint32_t *tab, int index)
+{
+    int i, mask;
+    i = index >> 5;
+    mask = 1 << (index & 0x1f);
+    return !!(tab[i] & mask);
+}
+
 void apic_local_deliver(CPUState *env, int vector)
 {
     APICState *s = env->apic_state;
@@ -215,12 +223,13 @@ void apic_local_deliver(CPUState *env, int vector)
     }\
 }
 
-static void apic_bus_deliver(const uint32_t *deliver_bitmask,
+static int apic_bus_deliver(const uint32_t *deliver_bitmask,
                              uint8_t delivery_mode,
                              uint8_t vector_num, uint8_t polarity,
                              uint8_t trigger_mode)
 {
     APICState *apic_iter;
+    int ret = 0;
 
     switch (delivery_mode) {
         case APIC_DM_LOWPRI:
@@ -237,11 +246,12 @@ static void apic_bus_deliver(const uint32_t *deliver_bitmask,
                 if (d >= 0) {
                     apic_iter = local_apics[d];
                     if (apic_iter) {
-                        apic_set_irq(apic_iter, vector_num, trigger_mode);
+                        ret = apic_set_irq(apic_iter, vector_num, trigger_mode);
                     }
-                }
+                } else
+                    ret = 1;
             }
-            return;
+            return ret;
 
         case APIC_DM_FIXED:
             break;
@@ -249,29 +259,33 @@ static void apic_bus_deliver(const uint32_t *deliver_bitmask,
         case APIC_DM_SMI:
             foreach_apic(apic_iter, deliver_bitmask,
                 cpu_interrupt(apic_iter->cpu_env, CPU_INTERRUPT_SMI) );
-            return;
+            return 1;
 
         case APIC_DM_NMI:
             foreach_apic(apic_iter, deliver_bitmask,
                 cpu_interrupt(apic_iter->cpu_env, CPU_INTERRUPT_NMI) );
-            return;
+            return 1;
 
         case APIC_DM_INIT:
             /* normal INIT IPI sent to processors */
             foreach_apic(apic_iter, deliver_bitmask,
                          apic_init_ipi(apic_iter) );
-            return;
+            return 1;
 
         case APIC_DM_EXTINT:
             /* handled in I/O APIC code */
             break;
 
         default:
-            return;
+            return ret;
     }
 
+    /* if interrupt should be delivered to more then one apic report it as
+     * delivered if at least one apic accepts it */
     foreach_apic(apic_iter, deliver_bitmask,
-                 apic_set_irq(apic_iter, vector_num, trigger_mode) );
+                 ret += apic_set_irq(apic_iter, vector_num, trigger_mode) );
+
+    return ret;
 }
 
 void cpu_set_apic_base(CPUState *env, uint64_t val)
@@ -361,14 +375,18 @@ static void apic_update_irq(APICState *s)
     cpu_interrupt(s->cpu_env, CPU_INTERRUPT_HARD);
 }
 
-static void apic_set_irq(APICState *s, int vector_num, int trigger_mode)
+static int apic_set_irq(APICState *s, int vector_num, int trigger_mode)
 {
+    int ret = !get_bit(s->irr, vector_num);
+
     set_bit(s->irr, vector_num);
     if (trigger_mode)
         set_bit(s->tmr, vector_num);
     else
         reset_bit(s->tmr, vector_num);
     apic_update_irq(s);
+
+    return ret;
 }
 
 static void apic_eoi(APICState *s)
@@ -906,9 +924,8 @@ int apic_init(CPUState *env)
     return 0;
 }
 
-static void ioapic_service(IOAPICState *s)
+static int ioapic_service_one(IOAPICState *s, uint8_t pin)
 {
-    uint8_t i;
     uint8_t trig_mode;
     uint8_t vector;
     uint8_t delivery_mode;
@@ -919,34 +936,44 @@ static void ioapic_service(IOAPICState *s)
     uint8_t polarity;
     uint32_t deliver_bitmask[MAX_APIC_WORDS];
 
+    mask = 1 << pin;
+    if (!(s->irr & mask))
+        return 1;
+    
+    entry = s->ioredtbl[pin];
+    if ((entry & APIC_LVT_MASKED))
+        return -1;
+
+    trig_mode = ((entry >> 15) & 1);
+    dest = entry >> 56;
+    dest_mode = (entry >> 11) & 1;
+    delivery_mode = (entry >> 8) & 7;
+    polarity = (entry >> 13) & 1;
+    if (trig_mode == APIC_TRIGGER_EDGE)
+        s->irr &= ~mask;
+    if (delivery_mode == APIC_DM_EXTINT)
+        vector = pic_read_irq(isa_pic);
+    else
+        vector = entry & 0xff;
+
+    apic_get_delivery_bitmask(deliver_bitmask, dest, dest_mode);
+    return apic_bus_deliver(deliver_bitmask, delivery_mode, vector, polarity,
+            trig_mode);
+}
+
+static void ioapic_service(IOAPICState *s)
+{
+    uint8_t i;
+
     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
-        mask = 1 << i;
-        if (s->irr & mask) {
-            entry = s->ioredtbl[i];
-            if (!(entry & APIC_LVT_MASKED)) {
-                trig_mode = ((entry >> 15) & 1);
-                dest = entry >> 56;
-                dest_mode = (entry >> 11) & 1;
-                delivery_mode = (entry >> 8) & 7;
-                polarity = (entry >> 13) & 1;
-                if (trig_mode == APIC_TRIGGER_EDGE)
-                    s->irr &= ~mask;
-                if (delivery_mode == APIC_DM_EXTINT)
-                    vector = pic_read_irq(isa_pic);
-                else
-                    vector = entry & 0xff;
-
-                apic_get_delivery_bitmask(deliver_bitmask, dest, dest_mode);
-                apic_bus_deliver(deliver_bitmask, delivery_mode,
-                                 vector, polarity, trig_mode);
-            }
-        }
+        ioapic_service_one(s, i);
     }
 }
 
-void ioapic_set_irq(void *opaque, int vector, int level)
+int ioapic_set_irq(void *opaque, int vector, int level)
 {
     IOAPICState *s = opaque;
+    int ret = 1;
 
     if (vector >= 0 && vector < IOAPIC_NUM_PINS) {
         uint32_t mask = 1 << vector;
@@ -956,7 +983,7 @@ void ioapic_set_irq(void *opaque, int vector, int level)
             /* level triggered */
             if (level) {
                 s->irr |= mask;
-                ioapic_service(s);
+                ioapic_service_one(s, vector);
             } else {
                 s->irr &= ~mask;
             }
@@ -964,10 +991,12 @@ void ioapic_set_irq(void *opaque, int vector, int level)
             /* edge triggered */
             if (level) {
                 s->irr |= mask;
-                ioapic_service(s);
+                ret = ioapic_service_one(s, vector);
             }
         }
     }
+
+    return ret;
 }
 
 static uint32_t ioapic_mem_readl(void *opaque, target_phys_addr_t addr)
diff --git a/hw/esp.c b/hw/esp.c
index 1843fb8..aa5e546 100644
--- a/hw/esp.c
+++ b/hw/esp.c
@@ -409,10 +409,12 @@ static void esp_reset(void *opaque)
     s->do_cmd = 0;
 }
 
-static void parent_esp_reset(void *opaque, int irq, int level)
+static int parent_esp_reset(void *opaque, int irq, int level)
 {
     if (level)
         esp_reset(opaque);
+
+    return 1;
 }
 
 static uint32_t esp_mem_readb(void *opaque, target_phys_addr_t addr)
diff --git a/hw/fdc.c b/hw/fdc.c
index cd00420..b212d89 100644
--- a/hw/fdc.c
+++ b/hw/fdc.c
@@ -713,7 +713,7 @@ static void fdctrl_external_reset(void *opaque)
     fdctrl_reset(s, 0);
 }
 
-static void fdctrl_handle_tc(void *opaque, int irq, int level)
+static int fdctrl_handle_tc(void *opaque, int irq, int level)
 {
     //fdctrl_t *s = opaque;
 
@@ -721,6 +721,8 @@ static void fdctrl_handle_tc(void *opaque, int irq, int level)
         // XXX
         FLOPPY_DPRINTF("TC pulsed\n");
     }
+
+    return 1;
 }
 
 /* XXX: may change if moved to bdrv */
diff --git a/hw/i8259.c b/hw/i8259.c
index add6345..ab7eb2d 100644
--- a/hw/i8259.c
+++ b/hw/i8259.c
@@ -72,9 +72,9 @@ static uint64_t irq_count[16];
 #endif
 
 /* set irq level. If an edge is detected, then the IRR is set to 1 */
-static inline void pic_set_irq1(PicState *s, int irq, int level)
+static inline int pic_set_irq1(PicState *s, int irq, int level)
 {
-    int mask;
+    int mask, ret = 1;
     mask = 1 << irq;
     if (s->elcr & mask) {
         /* level triggered */
@@ -88,13 +88,18 @@ static inline void pic_set_irq1(PicState *s, int irq, int level)
     } else {
         /* edge triggered */
         if (level) {
-            if ((s->last_irr & mask) == 0)
+            if ((s->last_irr & mask) == 0) {
+                if((s->irr & mask))
+                    ret = 0;
                 s->irr |= mask;
+            }
             s->last_irr |= mask;
         } else {
             s->last_irr &= ~mask;
         }
     }
+
+    return (s->imr & mask) ? -1 : ret;
 }
 
 /* return the highest priority found in mask (highest = smallest
@@ -178,9 +183,10 @@ void pic_update_irq(PicState2 *s)
 int64_t irq_time[16];
 #endif
 
-static void i8259_set_irq(void *opaque, int irq, int level)
+static int i8259_set_irq(void *opaque, int irq, int level)
 {
     PicState2 *s = opaque;
+    int pic_ret, ioapic_ret;
 
 #if defined(DEBUG_PIC) || defined(DEBUG_IRQ_COUNT)
     if (level != irq_level[irq]) {
@@ -199,11 +205,12 @@ static void i8259_set_irq(void *opaque, int irq, int level)
         irq_time[irq] = qemu_get_clock(vm_clock);
     }
 #endif
-    pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
+    pic_ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
     /* used for IOAPIC irqs */
     if (s->alt_irq_func)
-        s->alt_irq_func(s->alt_irq_opaque, irq, level);
+        ioapic_ret = s->alt_irq_func(s->alt_irq_opaque, irq, level);
     pic_update_irq(s);
+    return MAX(pic_ret, ioapic_ret);
 }
 
 /* acknowledge interrupt 'irq' */
diff --git a/hw/ide.c b/hw/ide.c
index dc41982..d155bae 100644
--- a/hw/ide.c
+++ b/hw/ide.c
@@ -2942,7 +2942,7 @@ static void cmd646_update_irq(PCIIDEState *d)
 }
 
 /* the PCI irq level is the logical OR of the two channels */
-static void cmd646_set_irq(void *opaque, int channel, int level)
+static int cmd646_set_irq(void *opaque, int channel, int level)
 {
     PCIIDEState *d = opaque;
     int irq_mask;
@@ -2953,6 +2953,8 @@ static void cmd646_set_irq(void *opaque, int channel, int level)
     else
         d->dev.config[MRDMODE] &= ~irq_mask;
     cmd646_update_irq(d);
+
+    return 1;
 }
 
 /* CMD646 PCI IDE controller */
@@ -3345,7 +3347,7 @@ static inline void md_interrupt_update(struct md_s *s)
                     !(s->opt & OPT_SRESET));
 }
 
-static void md_set_irq(void *opaque, int irq, int level)
+static int md_set_irq(void *opaque, int irq, int level)
 {
     struct md_s *s = (struct md_s *) opaque;
     if (level)
@@ -3354,6 +3356,8 @@ static void md_set_irq(void *opaque, int irq, int level)
         s->stat &= ~STAT_INT;
 
     md_interrupt_update(s);
+
+    return 1;
 }
 
 static void md_reset(struct md_s *s)
diff --git a/hw/irq.c b/hw/irq.c
index eca707d..78ece4c 100644
--- a/hw/irq.c
+++ b/hw/irq.c
@@ -30,12 +30,12 @@ struct IRQState {
     int n;
 };
 
-void qemu_set_irq(qemu_irq irq, int level)
+int qemu_set_irq(qemu_irq irq, int level)
 {
     if (!irq)
-        return;
+        return 1;
 
-    irq->handler(irq->opaque, irq->n, level);
+    return irq->handler(irq->opaque, irq->n, level);
 }
 
 qemu_irq *qemu_allocate_irqs(qemu_irq_handler handler, void *opaque, int n)
@@ -56,11 +56,11 @@ qemu_irq *qemu_allocate_irqs(qemu_irq_handler handler, void *opaque, int n)
     return s;
 }
 
-static void qemu_notirq(void *opaque, int line, int level)
+static int qemu_notirq(void *opaque, int line, int level)
 {
     struct IRQState *irq = opaque;
 
-    irq->handler(irq->opaque, irq->n, !level);
+    return irq->handler(irq->opaque, irq->n, !level);
 }
 
 qemu_irq qemu_irq_invert(qemu_irq irq)
diff --git a/hw/irq.h b/hw/irq.h
index 0880ad2..36abdae 100644
--- a/hw/irq.h
+++ b/hw/irq.h
@@ -4,25 +4,29 @@
 /* Generic IRQ/GPIO pin infrastructure.  */
 
 /* FIXME: Rmove one of these.  */
-typedef void (*qemu_irq_handler)(void *opaque, int n, int level);
-typedef void SetIRQFunc(void *opaque, int irq_num, int level);
+typedef int (*qemu_irq_handler)(void *opaque, int n, int level);
+typedef int SetIRQFunc(void *opaque, int irq_num, int level);
 
-void qemu_set_irq(qemu_irq irq, int level);
+int qemu_set_irq(qemu_irq irq, int level);
 
-static inline void qemu_irq_raise(qemu_irq irq)
+static inline int qemu_irq_raise(qemu_irq irq)
 {
-    qemu_set_irq(irq, 1);
+    return qemu_set_irq(irq, 1);
 }
 
-static inline void qemu_irq_lower(qemu_irq irq)
+static inline int qemu_irq_lower(qemu_irq irq)
 {
-    qemu_set_irq(irq, 0);
+    return qemu_set_irq(irq, 0);
 }
 
-static inline void qemu_irq_pulse(qemu_irq irq)
+static inline int qemu_irq_pulse(qemu_irq irq)
 {
-    qemu_set_irq(irq, 1);
+    int ret;
+
+    ret = qemu_set_irq(irq, 1);
     qemu_set_irq(irq, 0);
+
+    return ret;
 }
 
 /* Returns an array of N IRQs.  */
diff --git a/hw/max7310.c b/hw/max7310.c
index 397950a..6d498d5 100644
--- a/hw/max7310.c
+++ b/hw/max7310.c
@@ -179,7 +179,7 @@ static int max7310_load(QEMUFile *f, void *opaque, int version_id)
 
 static int max7310_iid = 0;
 
-static void max7310_gpio_set(void *opaque, int line, int level)
+static int max7310_gpio_set(void *opaque, int line, int level)
 {
     struct max7310_s *s = (struct max7310_s *) opaque;
     if (line >= sizeof(s->handler) / sizeof(*s->handler) || line  < 0)
@@ -189,6 +189,8 @@ static void max7310_gpio_set(void *opaque, int line, int level)
         s->level |= s->direction & (1 << line);
     else
         s->level &= ~(s->direction & (1 << line));
+
+    return 1;
 }
 
 /* MAX7310 is SMBus-compatible (can be used with only SMBus protocols),
diff --git a/hw/pc.c b/hw/pc.c
index 4c5e1c3..8e52041 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -114,18 +114,20 @@ int cpu_get_pic_interrupt(CPUState *env)
     return intno;
 }
 
-static void pic_irq_request(void *opaque, int irq, int level)
+static int pic_irq_request(void *opaque, int irq, int level)
 {
     CPUState *env = first_cpu;
 
     if (!level)
-        return;
+        return 1;
 
     while (env) {
         if (apic_accept_pic_intr(env))
             apic_local_deliver(env, APIC_LINT0);
         env = env->next_cpu;
     }
+
+    return 1;
 }
 
 /* PC cmos mappings */
diff --git a/hw/pc.h b/hw/pc.h
index 0c98ff0..b789f98 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -47,7 +47,7 @@ int apic_accept_pic_intr(CPUState *env);
 void apic_local_deliver(CPUState *env, int vector);
 int apic_get_interrupt(CPUState *env);
 IOAPICState *ioapic_init(void);
-void ioapic_set_irq(void *opaque, int vector, int level);
+int ioapic_set_irq(void *opaque, int vector, int level);
 
 /* i8254.c */
 
diff --git a/hw/pci.c b/hw/pci.c
index bc55989..6d85ccb 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -47,7 +47,7 @@ struct PCIBus {
 };
 
 static void pci_update_mappings(PCIDevice *d);
-static void pci_set_irq(void *opaque, int irq_num, int level);
+static int pci_set_irq(void *opaque, int irq_num, int level);
 
 target_phys_addr_t pci_mem_base;
 static int pci_irq_index;
@@ -485,7 +485,7 @@ uint32_t pci_data_read(void *opaque, uint32_t addr, int len)
 /* generic PCI irq support */
 
 /* 0 <= irq_num <= 3. level must be 0 or 1 */
-static void pci_set_irq(void *opaque, int irq_num, int level)
+static int pci_set_irq(void *opaque, int irq_num, int level)
 {
     PCIDevice *pci_dev = (PCIDevice *)opaque;
     PCIBus *bus;
@@ -493,7 +493,7 @@ static void pci_set_irq(void *opaque, int irq_num, int level)
 
     change = level - pci_dev->irq_state[irq_num];
     if (!change)
-        return;
+        return 1;
 
     pci_dev->irq_state[irq_num] = level;
     for (;;) {
@@ -505,6 +505,8 @@ static void pci_set_irq(void *opaque, int irq_num, int level)
     }
     bus->irq_count[irq_num] += change;
     bus->set_irq(bus->irq_opaque, irq_num, bus->irq_count[irq_num] != 0);
+
+    return 1;
 }
 
 /***********************************************************/
diff --git a/hw/ssd0323.c b/hw/ssd0323.c
index e2e619f..dc5eb88 100644
--- a/hw/ssd0323.c
+++ b/hw/ssd0323.c
@@ -265,11 +265,13 @@ static void ssd0323_invalidate_display(void * opaque)
 }
 
 /* Command/data input.  */
-static void ssd0323_cd(void *opaque, int n, int level)
+static int ssd0323_cd(void *opaque, int n, int level)
 {
     ssd0323_state *s = (ssd0323_state *)opaque;
     DPRINTF("%s mode\n", level ? "Data" : "Command");
     s->mode = level ? SSD0323_DATA : SSD0323_CMD;
+
+    return 1;
 }
 
 void *ssd0323_init(DisplayState *ds, qemu_irq *cmd_p)
diff --git a/hw/twl92230.c b/hw/twl92230.c
index 8d0ce5d..6d49bea 100644
--- a/hw/twl92230.c
+++ b/hw/twl92230.c
@@ -195,16 +195,18 @@ static inline int from_bcd(uint8_t val)
     return ((val >> 4) * 10) + (val & 0x0f);
 }
 
-static void menelaus_gpio_set(void *opaque, int line, int level)
+static int menelaus_gpio_set(void *opaque, int line, int level)
 {
     struct menelaus_s *s = (struct menelaus_s *) opaque;
 
     /* No interrupt generated */
     s->inputs &= ~(1 << line);
     s->inputs |= level << line;
+
+    return 1;
 }
 
-static void menelaus_pwrbtn_set(void *opaque, int line, int level)
+static int menelaus_pwrbtn_set(void *opaque, int line, int level)
 {
     struct menelaus_s *s = (struct menelaus_s *) opaque;
 
@@ -213,6 +215,8 @@ static void menelaus_pwrbtn_set(void *opaque, int line, int level)
         menelaus_update(s);
     }
     s->pwrbtn_state = level;
+
+    return 1;
 }
 
 #define MENELAUS_REV		0x01


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/3] Fix time drift problem under high load when PIT is in use.
  2008-06-29 13:59 [PATCH 0/3] Fix guest time drift under heavy load Gleb Natapov
  2008-06-29 13:59 ` [PATCH 1/3] Change qemu_set_irq() to return status information Gleb Natapov
@ 2008-06-29 13:59 ` Gleb Natapov
  2008-06-29 20:56   ` Dor Laor
  2008-06-29 13:59 ` [PATCH 3/3] Fix time drift problem under high load when RTC " Gleb Natapov
  2 siblings, 1 reply; 5+ messages in thread
From: Gleb Natapov @ 2008-06-29 13:59 UTC (permalink / raw)
  To: gleb; +Cc: kvm

Count the number of interrupts that was lost due to interrupt coalescing
and re-inject them back when possible. This fixes time drift problem when
pit is used as a time source.

Signed-off-by: Gleb Natapov <gleb@qumranet.com>
---

 hw/i8254.c |   20 +++++++++++++++++++-
 1 files changed, 19 insertions(+), 1 deletions(-)

diff --git a/hw/i8254.c b/hw/i8254.c
index 4813b03..c4f0f46 100644
--- a/hw/i8254.c
+++ b/hw/i8254.c
@@ -61,6 +61,8 @@ static PITState pit_state;
 
 static void pit_irq_timer_update(PITChannelState *s, int64_t current_time);
 
+static uint32_t pit_irq_coalesced;
+
 static int pit_get_count(PITChannelState *s)
 {
     uint64_t d;
@@ -369,12 +371,28 @@ static void pit_irq_timer_update(PITChannelState *s, int64_t current_time)
         return;
     expire_time = pit_get_next_transition_time(s, current_time);
     irq_level = pit_get_out1(s, current_time);
-    qemu_set_irq(s->irq, irq_level);
+    if(irq_level) {
+        if(!qemu_irq_raise(s->irq))
+            pit_irq_coalesced++;
+    } else {
+        qemu_irq_lower(s->irq);
+        if(pit_irq_coalesced > 0) {
+            if(qemu_irq_raise(s->irq))
+                pit_irq_coalesced--;
+            qemu_irq_lower(s->irq);
+        }
+    }
+
 #ifdef DEBUG_PIT
     printf("irq_level=%d next_delay=%f\n",
            irq_level,
            (double)(expire_time - current_time) / ticks_per_sec);
 #endif
+    if(pit_irq_coalesced && expire_time != -1) {
+        uint32_t div = ((pit_irq_coalesced >> 10) & 0x7f) + 2;
+        expire_time -= ((expire_time - current_time) / div);
+    }
+
     s->next_transition_time = expire_time;
     if (expire_time != -1)
         qemu_mod_timer(s->irq_timer, expire_time);


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/3] Fix time drift problem under high load when RTC is in use.
  2008-06-29 13:59 [PATCH 0/3] Fix guest time drift under heavy load Gleb Natapov
  2008-06-29 13:59 ` [PATCH 1/3] Change qemu_set_irq() to return status information Gleb Natapov
  2008-06-29 13:59 ` [PATCH 2/3] Fix time drift problem under high load when PIT is in use Gleb Natapov
@ 2008-06-29 13:59 ` Gleb Natapov
  2 siblings, 0 replies; 5+ messages in thread
From: Gleb Natapov @ 2008-06-29 13:59 UTC (permalink / raw)
  To: gleb; +Cc: kvm

Count the number of interrupts that was lost due to interrupt coalescing
and re-inject them back when possible. This fixes time drift problem when
RTC is used as a time source.

Signed-off-by: Gleb Natapov <gleb@qumranet.com>
---

 hw/mc146818rtc.c |   11 +++++++++--
 1 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/hw/mc146818rtc.c b/hw/mc146818rtc.c
index 30bb044..64e4c11 100644
--- a/hw/mc146818rtc.c
+++ b/hw/mc146818rtc.c
@@ -72,6 +72,7 @@ struct RTCState {
 
 static void rtc_set_time(RTCState *s);
 static void rtc_copy_date(RTCState *s);
+static uint32_t rtc_irq_coalesced;
 
 static void rtc_timer_update(RTCState *s, int64_t current_time)
 {
@@ -101,7 +102,8 @@ static void rtc_periodic_timer(void *opaque)
 
     rtc_timer_update(s, s->next_periodic_time);
     s->cmos_data[RTC_REG_C] |= 0xc0;
-    qemu_irq_raise(s->irq);
+    if(!qemu_irq_raise(s->irq))
+        rtc_irq_coalesced++;
 }
 
 static void cmos_ioport_write(void *opaque, uint32_t addr, uint32_t data)
@@ -360,7 +362,12 @@ static uint32_t cmos_ioport_read(void *opaque, uint32_t addr)
         case RTC_REG_C:
             ret = s->cmos_data[s->cmos_index];
             qemu_irq_lower(s->irq);
-            s->cmos_data[RTC_REG_C] = 0x00;
+            if(!rtc_irq_coalesced) {
+                s->cmos_data[RTC_REG_C] = 0x00;
+            } else {
+                if(qemu_irq_raise(s->irq))
+                    rtc_irq_coalesced--;
+            }
             break;
         default:
             ret = s->cmos_data[s->cmos_index];


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/3] Fix time drift problem under high load when PIT is in use.
  2008-06-29 13:59 ` [PATCH 2/3] Fix time drift problem under high load when PIT is in use Gleb Natapov
@ 2008-06-29 20:56   ` Dor Laor
  0 siblings, 0 replies; 5+ messages in thread
From: Dor Laor @ 2008-06-29 20:56 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, qemu-devel

On Sun, 2008-06-29 at 16:59 +0300, Gleb Natapov wrote:
> Count the number of interrupts that was lost due to interrupt coalescing
> and re-inject them back when possible. This fixes time drift problem when
> pit is used as a time source.
> 
> Signed-off-by: Gleb Natapov <gleb@qumranet.com>
> ---
> 
>  hw/i8254.c |   20 +++++++++++++++++++-
>  1 files changed, 19 insertions(+), 1 deletions(-)
> 
> diff --git a/hw/i8254.c b/hw/i8254.c
> index 4813b03..c4f0f46 100644
> --- a/hw/i8254.c
> +++ b/hw/i8254.c
> @@ -61,6 +61,8 @@ static PITState pit_state;
>  
>  static void pit_irq_timer_update(PITChannelState *s, int64_t current_time);
>  
> +static uint32_t pit_irq_coalesced;

The pit has 3 channels, it should be a channel field.

Also every time the pit frequency changes the above field should be
compensated with * (new_freq/old_freq). 
For example, if the guest was running with 1000hz clock and the
pit_irq_coalesced value is 100 currently, a frequency change to 100hz
should reduce pit_irq_coalesced to 10.

Except that, its high time we stop drifting :)

> +
>  static int pit_get_count(PITChannelState *s)
>  {
>      uint64_t d;
> @@ -369,12 +371,28 @@ static void pit_irq_timer_update(PITChannelState *s, int64_t current_time)
>          return;
>      expire_time = pit_get_next_transition_time(s, current_time);
>      irq_level = pit_get_out1(s, current_time);
> -    qemu_set_irq(s->irq, irq_level);
> +    if(irq_level) {
> +        if(!qemu_irq_raise(s->irq))
> +            pit_irq_coalesced++;
> +    } else {
> +        qemu_irq_lower(s->irq);
> +        if(pit_irq_coalesced > 0) {
> +            if(qemu_irq_raise(s->irq))
> +                pit_irq_coalesced--;
> +            qemu_irq_lower(s->irq);
> +        }
> +    }
> +
>  #ifdef DEBUG_PIT
>      printf("irq_level=%d next_delay=%f\n",
>             irq_level,
>             (double)(expire_time - current_time) / ticks_per_sec);
>  #endif
> +    if(pit_irq_coalesced && expire_time != -1) {
> +        uint32_t div = ((pit_irq_coalesced >> 10) & 0x7f) + 2;
> +        expire_time -= ((expire_time - current_time) / div);
> +    }
> +
>      s->next_transition_time = expire_time;
>      if (expire_time != -1)
>          qemu_mod_timer(s->irq_timer, expire_time);
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2008-06-29 20:57 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-29 13:59 [PATCH 0/3] Fix guest time drift under heavy load Gleb Natapov
2008-06-29 13:59 ` [PATCH 1/3] Change qemu_set_irq() to return status information Gleb Natapov
2008-06-29 13:59 ` [PATCH 2/3] Fix time drift problem under high load when PIT is in use Gleb Natapov
2008-06-29 20:56   ` Dor Laor
2008-06-29 13:59 ` [PATCH 3/3] Fix time drift problem under high load when RTC " Gleb Natapov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox