qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 1/2] Add HPET emulation to qemu (v3)
@ 2008-10-17 12:17 Beth Kon
  2008-10-17 13:14 ` rinku buragohain
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Beth Kon @ 2008-10-17 12:17 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alexander Graf, kvm

[-- Attachment #1: Type: text/plain, Size: 97 bytes --]


-- 
Elizabeth Kon (Beth)
IBM Linux Technology Center
Open Hypervisor Team
email: eak@us.ibm.com

[-- Attachment #2: qemu_hpet_v3.patch --]
[-- Type: text/x-patch, Size: 23942 bytes --]

This version contains many miscellaneous changes, including incorporation 
of comments received, addition of save/restore support and a reset handler, 
and a couple of bugfixes.

I've booted Linux and Win2k832 guests on QEMU. Win2k832 still looks shaky
on QEMU even without the hpet - I've gotten intermittent blue screens. But 
I did get it to boot at least once both with and without hpet. 

Clock drift on Linux is in the range of .017% - .019%, loaded and unloaded. I
haven't found a straightforward way to test on Windows and would appreciate
any pointers to existing approaches.


The second patch in this series contains the needed bochs bios changes.


Signed-off-by: Beth Kon <eak@us.ibm.com>
---
 Makefile.target  |    2 +-
 hw/hpet.c        |  572 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/i8254.c       |   11 +
 hw/mc146818rtc.c |   30 +++-
 hw/pc.c          |    2 +
 5 files changed, 614 insertions(+), 3 deletions(-)

diff --git a/Makefile.target b/Makefile.target
index e2edf9d..9e80b3d 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -545,7 +545,7 @@ ifeq ($(TARGET_BASE_ARCH), i386)
 OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o
 OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
 OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
-OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o
+OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o hpet.o
 CPPFLAGS += -DHAS_AUDIO -DHAS_AUDIO_CHOICE
 endif
 ifeq ($(TARGET_BASE_ARCH), ppc)
diff --git a/hw/hpet.c b/hw/hpet.c
new file mode 100644
index 0000000..61fbaaf
--- /dev/null
+++ b/hw/hpet.c
@@ -0,0 +1,572 @@
+/*
+ *  High Precisition Event Timer emulation
+ *
+ *  Copyright (c) 2007 Alexander Graf
+ *  Copyright (c) 2008 IBM Corporation
+ *
+ *  Authors: Beth Kon <bkon@us.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * *****************************************************************
+ *
+ * This driver attempts to emulate an HPET device in software.
+ *
+ */
+#include "hw.h"
+#include "console.h"
+#include "qemu-timer.h"
+
+//#define HPET_DEBUG
+
+#define HPET_BASE		0xfed00000
+#define HPET_CLK_PERIOD         10000000ULL /* 10000000 femtoseconds == 10ns*/
+
+#define FS_PER_NS 1000000
+#define HPET_NUM_TIMERS 3
+#define HPET_TIMER_TYPE_LEVEL 1
+#define HPET_TIMER_TYPE_EDGE 0
+#define HPET_TIMER_DELIVERY_APIC 0
+#define HPET_TIMER_DELIVERY_FSB 1
+#define HPET_TIMER_CAP_FSB_INT_DEL (1 << 15)
+#define HPET_TIMER_CAP_PER_INT (1 << 4)
+
+#define HPET_CFG_ENABLE 0x001
+#define HPET_CFG_LEGACY 0x002
+
+#define HPET_ID         0x000
+#define HPET_PERIOD     0x004
+#define HPET_CFG        0x010
+#define HPET_STATUS     0x020
+#define HPET_COUNTER    0x0f0
+#define HPET_TN_REGS    0x100 ... 0x3ff /*address range of all TN regs*/
+#define HPET_TN_CFG     0x000
+#define HPET_TN_CMP     0x008
+#define HPET_TN_ROUTE   0x010
+
+
+#define HPET_TN_ENABLE           0x004
+#define HPET_TN_PERIODIC         0x008
+#define HPET_TN_PERIODIC_CAP     0x010
+#define HPET_TN_SIZE_CAP         0x020
+#define HPET_TN_SETVAL           0x040
+#define HPET_TN_32BIT            0x100
+#define HPET_TN_INT_ROUTE_MASK  0x3e00
+#define HPET_TN_INT_ROUTE_SHIFT      9
+#define HPET_TN_INT_ROUTE_CAP_SHIFT 32
+#define HPET_TN_CFG_BITS_READONLY_OR_RESERVED 0xffff80b1U
+
+#define timer_int_route(timer)   \
+    ((timer->config & HPET_TN_INT_ROUTE_MASK) >> HPET_TN_INT_ROUTE_SHIFT)
+
+#define hpet_enabled(s)          (s->config & HPET_CFG_ENABLE)
+#define timer_is_periodic(t)     (t->config & HPET_TN_PERIODIC)
+#define timer_enabled(t)         (t->config & HPET_TN_ENABLE)
+
+#define hpet_time_after(a, b)   ((int32_t)(b) - (int32_t)(a) < 0)
+#define hpet_time_after64(a, b) ((int64_t)(b) - (int64_t)(a) < 0)
+
+
+/*indicator hpet is operating in legacy mode */
+int hpet_legacy=0;
+
+struct HPETState;
+typedef struct HPETTimer {  /* timers */
+    uint8_t tn;             /*timer number*/
+    QEMUTimer *qemu_timer;
+    struct HPETState *state;
+    /* Memory-mapped, software visible timer registers */
+    uint64_t config;        /* configuration/cap */
+    uint64_t cmp;           /* comparator */
+    uint64_t fsb;           /* FSB route, not supported now */
+    /* Hidden register state */
+    uint64_t period;        /* Last value written to comparator */
+} HPETTimer;
+
+
+typedef struct HPETState {
+    uint64_t hpet_offset;
+    qemu_irq *irqs;
+    HPETTimer timer[HPET_NUM_TIMERS];
+    /* Memory-mapped, software visible registers */
+    uint64_t capability;        /* capabilities */
+    uint64_t config;            /* configuration */
+    uint64_t isr;               /* interrupt status reg */
+    uint64_t hpet_counter;      /* main counter */
+} HPETState;
+
+static inline uint64_t ticks_to_ns(uint64_t value) 
+{
+    return (muldiv64(value, HPET_CLK_PERIOD, FS_PER_NS));
+}
+
+static inline uint64_t ns_to_ticks(uint64_t value) 
+{
+    return (muldiv64(value, FS_PER_NS, HPET_CLK_PERIOD));
+}
+
+static inline uint64_t hpet_fixup_reg(
+    uint64_t new, uint64_t old, uint64_t mask)
+{
+    new &= mask;
+    new |= old & ~mask;
+    return new;
+}
+
+static inline uint64_t hpet_get_ticks(HPETState *s) {
+    uint64_t ticks;
+    ticks = ns_to_ticks(qemu_get_clock(vm_clock) + s->hpet_offset);
+    return ticks;
+}
+
+
+/* 
+ * calculate diff between comparator value and current ticks  
+ */
+
+static inline uint64_t hpet_calculate_diff(HPETTimer *t, uint64_t current)
+{
+    
+    if (t->config & HPET_TN_32BIT) {
+        uint32_t diff, cmp;
+        cmp = (uint32_t)t->cmp;
+        diff = cmp - (uint32_t)current;
+        diff = (int32_t)diff > 0 ? diff : (uint32_t)0;
+        return (uint64_t)diff;
+    } else {
+        uint64_t diff, cmp;
+        cmp = t->cmp;
+        diff = cmp - current;
+        diff = (int64_t)diff > 0 ? diff : (uint64_t)0;
+        return diff;
+    }
+    
+}
+
+static void update_irq(struct HPETTimer *timer)
+{
+    qemu_irq irq;
+    int route, do_ioapic = 0;
+
+    if ( (timer->tn <= 1) && (timer->state->config & HPET_CFG_LEGACY) ) {
+
+        /* if LegacyReplacementRoute bit is set, HPET specification requires
+         * timer0 be routed to IRQ0 in NON-APIC or IRQ2 in the I/O APIC,
+         * timer1 be routed to IRQ8 in NON-APIC or IRQ8 in the I/O APIC. 
+         */
+        if (timer->tn == 0) {
+            irq=timer->state->irqs[0];
+            do_ioapic = 1;
+        } else
+            irq=timer->state->irqs[8];
+    }else{
+        route=timer_int_route(timer);
+        irq=timer->state->irqs[route];
+    }
+
+    if(timer_enabled(timer) && hpet_enabled(timer->state)) {
+        qemu_irq_pulse(irq);
+        /* windows wants timer0 on irq2 and linux wants irq0, 
+         * so we pulse both 
+         */
+        if (do_ioapic)
+            qemu_irq_pulse(timer->state->irqs[2]);
+    }
+}
+
+static void hpet_save(QEMUFile *f, void *opaque)
+{
+    HPETState *s = opaque;
+    int i;
+    qemu_put_be64s(f, &s->config);
+    qemu_put_be64s(f, &s->isr);
+    /* save current counter value */
+    s->hpet_counter = hpet_get_ticks(s); 
+    qemu_put_be64s(f, &s->hpet_counter);
+
+    for(i = 0; i < HPET_NUM_TIMERS; i++) {
+        qemu_put_8s(f, &s->timer[i].tn);
+        qemu_put_be64s(f, &s->timer[i].config);
+        qemu_put_be64s(f, &s->timer[i].cmp);
+        qemu_put_be64s(f, &s->timer[i].fsb);
+        qemu_put_be64s(f, &s->timer[i].period);
+        if (s->timer[i].qemu_timer) {
+            qemu_put_timer(f, s->timer[i].qemu_timer);
+        }
+    }
+}
+
+static int hpet_load(QEMUFile *f, void *opaque, int version_id)
+{
+    HPETState *s = opaque;
+    int i;
+ 
+    if (version_id != 1)
+        return -EINVAL;
+
+    qemu_get_be64s(f, &s->config);
+    qemu_get_be64s(f, &s->isr);
+    qemu_get_be64s(f, &s->hpet_counter);
+    /* Recalculate the offset between the main counter and guest time */
+    s->hpet_offset = ticks_to_ns(s->hpet_counter) - qemu_get_clock(vm_clock);
+
+    for(i = 0; i < HPET_NUM_TIMERS; i++) {
+        qemu_get_8s(f, &s->timer[i].tn);
+        qemu_get_be64s(f, &s->timer[i].config);
+        qemu_get_be64s(f, &s->timer[i].cmp);
+        qemu_get_be64s(f, &s->timer[i].fsb);
+        qemu_get_be64s(f, &s->timer[i].period);
+        if (s->timer[i].qemu_timer) {
+            qemu_get_timer(f, s->timer[i].qemu_timer);
+        }
+    }
+    return 0;
+}
+
+
+/* 
+ * timer expiration callback
+ */
+
+static void hpet_timer(void *opaque)
+{
+    HPETTimer *t = (HPETTimer*)opaque;
+    uint64_t diff;
+
+    uint64_t period = t->period;
+    uint64_t cur_tick = hpet_get_ticks(t->state);
+
+    if (timer_is_periodic(t) && (t->period != 0))
+    {
+        if (t->config & HPET_TN_32BIT) {
+            while (hpet_time_after(cur_tick, t->cmp))
+                t->cmp = (uint32_t)(t->cmp + t->period);
+        } else
+            while (hpet_time_after64(cur_tick, t->cmp))
+                t->cmp += period;
+
+        diff = hpet_calculate_diff(t, cur_tick);
+        qemu_mod_timer(t->qemu_timer, qemu_get_clock(vm_clock) 
+                        + (int64_t)ticks_to_ns(diff));
+    }
+    update_irq(t);
+}
+
+static void hpet_set_timer(HPETTimer *t)
+{
+    uint64_t diff;
+    uint64_t cur_tick = hpet_get_ticks(t->state);
+
+    diff = hpet_calculate_diff(t, cur_tick);
+    qemu_mod_timer(t->qemu_timer, qemu_get_clock(vm_clock) + (int64_t)ticks_to_ns(diff));
+}
+
+static void hpet_del_timer(HPETTimer *t)
+{
+    qemu_del_timer(t->qemu_timer);
+}
+
+#ifdef HPET_DEBUG
+static uint32_t hpet_ram_readb(void *opaque, target_phys_addr_t addr)
+{
+    fprintf(stderr, "qemu: hpet_read b at %" PRIx64 "\n", addr);
+    return 10;
+}
+
+static uint32_t hpet_ram_readw(void *opaque, target_phys_addr_t addr)
+{
+    fprintf(stderr, "qemu: hpet_read w at %" PRIx64 "\n", addr);
+    return 10;
+}
+#endif
+
+static uint32_t hpet_ram_readl(void *opaque, target_phys_addr_t addr)
+{
+    HPETState *s = (HPETState *)opaque;
+    uint64_t cur_tick;
+#ifdef HPET_DEBUG
+    fprintf(stderr, "qemu: hpet_read l at %" PRIx64 "\n", addr);
+#endif
+    switch(addr - HPET_BASE) {
+        case HPET_ID:
+            return s->capability;
+        case HPET_PERIOD:
+            return s->capability >> 32; 
+        case HPET_CFG:
+            return s->config;
+        case HPET_CFG + 4:
+#ifdef HPET_DEBUG
+            fprintf(stderr, "qemu: invalid hpet_read l at %" PRIx64 "\n", addr);
+#endif
+            return 0;
+        case HPET_COUNTER: 
+            if (hpet_enabled(s))
+                cur_tick = hpet_get_ticks(s);
+            else 
+                cur_tick = s->hpet_counter;
+#ifdef HPET_DEBUG
+            fprintf(stderr, "qemu: reading counter %" PRIx64 "\n", cur_tick);
+#endif
+            return cur_tick;
+        case HPET_COUNTER + 4:
+            return 0;
+        case HPET_STATUS:
+            return s->isr;
+        case HPET_TN_REGS:
+            {
+                uint8_t timer_id = (addr - HPET_BASE - 0x100) / 0x20;
+                if (timer_id > HPET_NUM_TIMERS - 1) {
+                    fprintf(stderr, "qemu: timer id out of range\n");
+                    return 0;
+                }
+                HPETTimer *timer = &s->timer[timer_id];
+
+                switch((addr - HPET_BASE - 0x100) % 0x20) {
+                    case HPET_TN_CFG:
+                        return timer->config;
+                    case HPET_TN_CFG + 4: // Interrupt capabilities
+                        return timer->config >> 32;
+                    case HPET_TN_CMP: // comparator register
+                        return timer->cmp;
+                    case HPET_TN_CMP + 4:
+                        return timer->cmp >> 32;
+                    case HPET_TN_ROUTE:
+                        return timer->fsb >> 32;
+                }
+            }
+            break;
+    }
+
+#ifdef HPET_DEBUG
+    fprintf(stderr, "qemu: invalid hpet_read l at %" PRIx64 "\n", addr);
+#endif
+    return 10;
+}
+
+#ifdef HPET_DEBUG
+static void hpet_ram_writeb(void *opaque, target_phys_addr_t addr, 
+                            uint32_t value)
+{
+    fprintf(stderr, "qemu: invalid hpet_write b at %" PRIx64 " = %#x\n", 
+                     addr, value);
+}
+
+static void hpet_ram_writew(void *opaque, target_phys_addr_t addr,
+                            uint32_t value)
+{
+    fprintf(stderr, "qemu: invalid hpet_write w at %" PRIx64 " = %#x\n", 
+                     addr, value);
+}
+#endif
+
+static void hpet_ram_writel(void *opaque, target_phys_addr_t addr,
+                            uint32_t value)
+{
+    int i;
+    HPETState *s = (HPETState *)opaque;
+    uint64_t old_val, new_val;
+
+#ifdef HPET_DEBUG
+    fprintf(stderr, "qemu: hpet_write l at %" PRIx64 " = %#x\n", addr, value);
+#endif
+    old_val = hpet_ram_readl(opaque, addr);
+    new_val = value;
+
+
+    switch(addr - HPET_BASE) {
+        case HPET_ID:
+            return;
+        case HPET_CFG:
+            s->config = hpet_fixup_reg(new_val, old_val, 0x3);
+            if (!(old_val & HPET_CFG_ENABLE) && (new_val & HPET_CFG_ENABLE)) {
+                /* Enable main counter and interrupt generation. */
+                s->hpet_offset = ticks_to_ns(s->hpet_counter)
+                                        - qemu_get_clock(vm_clock);
+                for (i = 0; i < HPET_NUM_TIMERS; i++)
+                    if ((&s->timer[i])->cmp != ~0ULL)
+                        hpet_set_timer(&s->timer[i]);
+            }
+            else if ( (old_val & HPET_CFG_ENABLE) && 
+                                           !(new_val & HPET_CFG_ENABLE)) {
+                /* Halt main counter and disable interrupt generation. */
+                s->hpet_counter = hpet_get_ticks(s); 
+                for (i = 0; i < HPET_NUM_TIMERS; i++)
+                    hpet_del_timer(&s->timer[i]);
+            }
+            hpet_legacy = s->config & HPET_CFG_LEGACY;
+            break;
+        case HPET_CFG + 4: 
+#ifdef HPET_DEBUG
+            fprintf(stderr, "qemu:   invalid hpet_writel at %" PRIx64 "= %#x\n",
+                             addr, value);
+#endif
+            break;
+	case HPET_STATUS:
+            /* FIXME: need to handle level-triggered interrupts */
+            break;
+        case HPET_COUNTER:
+           if (hpet_enabled(s)) 
+               fprintf(stderr, "qemu: Writing counter while HPET enabled!\n"); 
+           s->hpet_counter =  value;
+           break;
+        case HPET_COUNTER + 4:
+           s->hpet_counter = (s->hpet_counter & 0xffffffffULL) 
+                             | (((uint64_t)value) << 32);
+#ifdef HPET_DEBUG
+           fprintf(stderr, "qemu:   HPET ctr set to %#x -> %" PRIx64 "\n",
+                    value, s->hpet_counter);
+#endif
+           break;
+        case HPET_TN_REGS:
+            {
+                uint8_t timer_id = (addr - HPET_BASE - 0x100) / 0x20;
+#ifdef HPET_DEBUG
+                fprintf(stderr, 
+                         "qemu:    hpet_write l timer_id = %#x \n", timer_id);
+#endif
+                HPETTimer *timer = &s->timer[timer_id];
+                
+                switch((addr - HPET_BASE - 0x100) % 0x20) {
+                    case HPET_TN_CFG:
+#ifdef HPET_DEBUG
+                        fprintf(stderr, 
+                                "qemu:   hpet_write l TN config value %#x\n", 
+                                value);
+#endif
+                        timer->config = 
+                                    hpet_fixup_reg(new_val, old_val, 0x3e4e);
+                        if (new_val & HPET_TN_32BIT) {
+                            timer->cmp = (uint32_t)timer->cmp;
+                            timer->period = (uint32_t)timer->period;
+                        }
+                        if (new_val & HPET_TIMER_TYPE_LEVEL){
+                            fprintf(stderr, 
+                                "qemu: level-triggered hpet not supported\n");
+                            exit (-1);
+                        }
+
+                        break;
+#ifdef HPET_DEBUG
+                    case HPET_TN_CFG + 4: // Interrupt capabilities
+                        fprintf(stderr, 
+                                "qemu:   invalid write at %" PRIx64 "=%#x\n", 
+                                 addr, value);
+                        break;
+#endif
+                    case HPET_TN_CMP: // comparator register
+#ifdef HPET_DEBUG
+                        fprintf(stderr, 
+                                "qemu:   hpet_writel TN cmp value %#x\n",
+                                    value);
+#endif
+                        if ( timer->config & HPET_TN_32BIT)
+                            new_val = (uint32_t)new_val;
+                        if ( !timer_is_periodic(timer) ||
+                                   (timer->config & HPET_TN_SETVAL) )
+                            timer->cmp = new_val;
+                        else {
+                            /*
+                             * FIXME: Clamp period to reasonable min value?
+                             * Clamp period to reasonable max value
+                             */
+                             new_val &= (timer->config & HPET_TN_32BIT ? ~0u : ~0ull) >> 1;
+                             timer->period = new_val;
+                        }
+                        timer->config &= ~HPET_TN_SETVAL;
+                        if (hpet_enabled(s))
+                            hpet_set_timer(timer);
+                        break;
+
+                    case HPET_TN_ROUTE + 4:
+#ifdef HPET_DEBUG
+                        fprintf(stderr, 
+                                "qemu:   invalid write at %" PRIx64 " = %#x\n", 
+                                 addr, value);
+#endif
+                        break;
+                }
+            }
+            break;
+	default:
+            fprintf(stderr, "qemu:  invalid write at %" PRIx64 " = %#x\n",
+                    addr, value);
+    }
+
+}
+
+static CPUReadMemoryFunc *hpet_ram_read[] = {
+#ifdef HPET_DEBUG
+    hpet_ram_readb,
+    hpet_ram_readw,
+#else
+    NULL,
+    NULL,
+#endif
+    hpet_ram_readl,
+};
+
+static CPUWriteMemoryFunc *hpet_ram_write[] = {
+#ifdef HPET_DEBUG
+    hpet_ram_writeb,
+    hpet_ram_writew,
+#else
+    NULL,
+    NULL,
+#endif
+    hpet_ram_writel,
+};
+
+static void hpet_reset(void *opaque) {
+    HPETState *s = opaque;
+    int i;
+
+    /* 32-bit main counter; 3 timers supported; LegacyReplacementRoute. 
+     * 64-bit counter cannot be supported until 64 bit atomic reads are 
+     * possible, since reading counter involves grabbing current qemu 
+     * clock, can't be done in 2 32-bit chunks.
+     */
+    s->capability = 0x80868201ULL;
+    s->capability |= ((HPET_CLK_PERIOD) << 32);
+
+    for(i=0; i<HPET_NUM_TIMERS; i++) {
+        HPETTimer *timer = &s->timer[i];
+        timer->tn = i;
+        timer->cmp = ~0ULL;
+        timer->config =  HPET_TN_PERIODIC_CAP;
+        timer->config |=  0x0000ffffULL << 32;
+        timer->state = s;
+        timer->qemu_timer = qemu_new_timer(vm_clock, hpet_timer, timer);
+    }
+}
+
+
+void hpet_init(qemu_irq *irq) {
+    int iomemtype;
+    HPETState *s;
+    
+    fprintf (stderr, "hpet_init\n");
+
+    /* XXX this is a dirty hack for HPET support w/o LPC
+           Actually this is a config descriptor for the RCBA */
+    s = qemu_mallocz(sizeof(HPETState));
+    s->irqs = irq;
+    hpet_reset(s);
+    register_savevm("hpet", -1, 1, hpet_save, hpet_load, s);
+    qemu_register_reset(hpet_reset, s);
+    /* HPET Area */
+    iomemtype = cpu_register_io_memory(0, hpet_ram_read,
+                    hpet_ram_write, s);
+    cpu_register_physical_memory(HPET_BASE, 0x400, iomemtype);
+}
diff --git a/hw/i8254.c b/hw/i8254.c
index 4813b03..7ffdf61 100644
--- a/hw/i8254.c
+++ b/hw/i8254.c
@@ -26,6 +26,9 @@
 #include "isa.h"
 #include "qemu-timer.h"
 
+#if defined TARGET_I386 || defined TARGET_X86_64
+extern int hpet_legacy;
+#endif
 //#define DEBUG_PIT
 
 #define RW_STATE_LSB 1
@@ -367,6 +370,14 @@ static void pit_irq_timer_update(PITChannelState *s, int64_t current_time)
 
     if (!s->irq_timer)
         return;
+
+#if defined TARGET_I386 || defined TARGET_X86_64
+    if (hpet_legacy) {
+        qemu_del_timer(s->irq_timer);
+        return;
+    }
+#endif
+
     expire_time = pit_get_next_transition_time(s, current_time);
     irq_level = pit_get_out1(s, current_time);
     qemu_set_irq(s->irq, irq_level);
diff --git a/hw/mc146818rtc.c b/hw/mc146818rtc.c
index 30bb044..ffb7a48 100644
--- a/hw/mc146818rtc.c
+++ b/hw/mc146818rtc.c
@@ -27,6 +27,10 @@
 #include "pc.h"
 #include "isa.h"
 
+#if defined TARGET_I386 || defined TARGET_X86_64
+extern int hpet_legacy;
+#endif
+
 //#define DEBUG_CMOS
 
 #define RTC_SECONDS             0
@@ -80,7 +84,11 @@ static void rtc_timer_update(RTCState *s, int64_t current_time)
 
     period_code = s->cmos_data[RTC_REG_A] & 0x0f;
     if (period_code != 0 &&
+#if defined TARGET_I386 || defined TARGET_X86_64
+        (s->cmos_data[RTC_REG_B] & REG_B_PIE) && !hpet_legacy) {
+#else
         (s->cmos_data[RTC_REG_B] & REG_B_PIE)) {
+#endif
         if (period_code <= 2)
             period_code += 7;
         /* period in 32 Khz cycles */
@@ -101,7 +109,10 @@ static void rtc_periodic_timer(void *opaque)
 
     rtc_timer_update(s, s->next_periodic_time);
     s->cmos_data[RTC_REG_C] |= 0xc0;
-    qemu_irq_raise(s->irq);
+#if defined TARGET_I386 || defined TARGET_X86_64
+    if (!hpet_legacy)
+#endif
+        qemu_irq_raise(s->irq);
 }
 
 static void cmos_ioport_write(void *opaque, uint32_t addr, uint32_t data)
@@ -281,6 +292,12 @@ static void rtc_update_second(void *opaque)
     RTCState *s = opaque;
     int64_t delay;
 
+#if defined TARGET_I386 || defined TARGET_X86_64
+    if (hpet_legacy) {
+        qemu_del_timer(s->second_timer2);
+        return;
+    }
+#endif
     /* if the oscillator is not in normal operation, we do not update */
     if ((s->cmos_data[RTC_REG_A] & 0x70) != 0x20) {
         s->next_second_time += ticks_per_sec;
@@ -306,6 +323,12 @@ static void rtc_update_second2(void *opaque)
 {
     RTCState *s = opaque;
 
+#if defined TARGET_I386 || defined TARGET_X86_64
+    if (hpet_legacy) {
+        qemu_del_timer(s->second_timer);
+        return;
+    }
+#endif
     if (!(s->cmos_data[RTC_REG_B] & REG_B_SET)) {
         rtc_copy_date(s);
     }
@@ -359,7 +382,10 @@ static uint32_t cmos_ioport_read(void *opaque, uint32_t addr)
             break;
         case RTC_REG_C:
             ret = s->cmos_data[s->cmos_index];
-            qemu_irq_lower(s->irq);
+#if defined TARGET_I386 || defined TARGET_X86_64
+            if (!hpet_legacy)
+#endif
+                qemu_irq_lower(s->irq);
             s->cmos_data[RTC_REG_C] = 0x00;
             break;
         default:
diff --git a/hw/pc.c b/hw/pc.c
index 34683e7..d2452e0 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -48,6 +48,7 @@
 #define BIOS_CFG_IOPORT 0x510
 
 #define MAX_IDE_BUS 2
+void hpet_init(qemu_irq irq);
 
 static fdctrl_t *floppy_controller;
 static RTCState *rtc_state;
@@ -949,6 +950,7 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
     }
     pit = pit_init(0x40, i8259[0]);
     pcspk_init(pit);
+    hpet_init(i8259);
     if (pci_enabled) {
         pic_set_alt_irq_func(isa_pic, ioapic_set_irq, ioapic);
     }

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH 1/2] Add HPET emulation to qemu (v3)
  2008-10-17 12:17 [Qemu-devel] [PATCH 1/2] Add HPET emulation to qemu (v3) Beth Kon
@ 2008-10-17 13:14 ` rinku buragohain
  2008-10-17 15:49 ` Jamie Lokier
  2008-10-21 15:21 ` [Qemu-devel] " Anthony Liguori
  2 siblings, 0 replies; 11+ messages in thread
From: rinku buragohain @ 2008-10-17 13:14 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 270 bytes --]

  sir thank you for your reply .. but you first tell me how to open a .patch
file. thank you

On Fri, Oct 17, 2008 at 5:17 AM, Beth Kon <eak@us.ibm.com> wrote:

>
> --
> Elizabeth Kon (Beth)
> IBM Linux Technology Center
> Open Hypervisor Team
> email: eak@us.ibm.com
>

[-- Attachment #2: Type: text/html, Size: 624 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH 1/2] Add HPET emulation to qemu (v3)
  2008-10-17 12:17 [Qemu-devel] [PATCH 1/2] Add HPET emulation to qemu (v3) Beth Kon
  2008-10-17 13:14 ` rinku buragohain
@ 2008-10-17 15:49 ` Jamie Lokier
  2008-10-20 19:08   ` Beth Kon
  2008-10-21 15:21 ` [Qemu-devel] " Anthony Liguori
  2 siblings, 1 reply; 11+ messages in thread
From: Jamie Lokier @ 2008-10-17 15:49 UTC (permalink / raw)
  To: qemu-devel; +Cc: Alexander Graf, kvm

Beth Kon wrote:
> Clock drift on Linux is in the range of .017% - .019%, loaded and unloaded. I
> haven't found a straightforward way to test on Windows and would appreciate
> any pointers to existing approaches.

Is there any reason why there should be any clock drift, when the
guest is using a non-PIT clock?

I'm probably being naive, but with 32-bit or 64-bit HPET counters
available to the guest, and accurate values from the CMOS clock
emulation, I don't see why drift would accumulate over the long term
relative to the host clock.

-- Jamie

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH 1/2] Add HPET emulation to qemu (v3)
  2008-10-17 15:49 ` Jamie Lokier
@ 2008-10-20 19:08   ` Beth Kon
  2008-10-27 10:49     ` Dor Laor
  0 siblings, 1 reply; 11+ messages in thread
From: Beth Kon @ 2008-10-20 19:08 UTC (permalink / raw)
  To: Jamie Lokier; +Cc: qemu-devel, kvm, Alexander Graf

On Fri, 2008-10-17 at 16:49 +0100, Jamie Lokier wrote:
> Beth Kon wrote:
> > Clock drift on Linux is in the range of .017% - .019%, loaded and unloaded. I
> > haven't found a straightforward way to test on Windows and would appreciate
> > any pointers to existing approaches.
> 
> Is there any reason why there should be any clock drift, when the
> guest is using a non-PIT clock?
> 
> I'm probably being naive, but with 32-bit or 64-bit HPET counters
> available to the guest, and accurate values from the CMOS clock
> emulation, I don't see why drift would accumulate over the long term
> relative to the host clock.

I was measuring with ntpdate, so the drift is with respect to the ntp
server pool, not the host clock. But in any case, since timer interrupts
and reads of the hpet counter are at the mercy of the host scheduler
(i.e., the qemu process can be swapped out at any time during hpet read
or timer expiration), I'd guess there would always be some amount of
inaccuracy. Also, qemu checks for timer expiration (qemu_run_timers) as
part of a bigger loop (main_loop_wait), so the varying amounts of work
to do elsewhere in the loop from iteration to iteration would also
introduce irregular delays.
> 
> -- Jamie
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
-- 
Elizabeth Kon (Beth)
IBM Linux Technology Center
Open Hypervisor Team
email: eak@us.ibm.com

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Qemu-devel] Re: [PATCH 1/2] Add HPET emulation to qemu (v3)
  2008-10-17 12:17 [Qemu-devel] [PATCH 1/2] Add HPET emulation to qemu (v3) Beth Kon
  2008-10-17 13:14 ` rinku buragohain
  2008-10-17 15:49 ` Jamie Lokier
@ 2008-10-21 15:21 ` Anthony Liguori
  2008-10-27 14:07   ` Beth Kon
  2 siblings, 1 reply; 11+ messages in thread
From: Anthony Liguori @ 2008-10-21 15:21 UTC (permalink / raw)
  To: Beth Kon; +Cc: qemu-devel, kvm, Alexander Graf

Beth Kon wrote:
> This version contains many miscellaneous changes, including incorporation 
> of comments received, addition of save/restore support and a reset handler, 
> and a couple of bugfixes.
>
> I've booted Linux and Win2k832 guests on QEMU. Win2k832 still looks shaky
> on QEMU even without the hpet - I've gotten intermittent blue screens. But 
> I did get it to boot at least once both with and without hpet. 
>
> Clock drift on Linux is in the range of .017% - .019%, loaded and unloaded. I
> haven't found a straightforward way to test on Windows and would appreciate
> any pointers to existing approaches.
>
>
> The second patch in this series contains the needed bochs bios changes.
>   

We need a --disable-hpet option at least to be able to disable the hpet 
when doing migration from an older QEMU to a newer one.  An info hpet 
would also be nice to be able to probe as to whether the hpet was active.

> +#define FS_PER_NS 1000000
> +#define HPET_NUM_TIMERS 3
> +#define HPET_TIMER_TYPE_LEVEL 1
> +#define HPET_TIMER_TYPE_EDGE 0
> +#define HPET_TIMER_DELIVERY_APIC 0
> +#define HPET_TIMER_DELIVERY_FSB 1
> +#define HPET_TIMER_CAP_FSB_INT_DEL (1 << 15)
> +#define HPET_TIMER_CAP_PER_INT (1 << 4)
> +
> +#define HPET_CFG_ENABLE 0x001
> +#define HPET_CFG_LEGACY 0x002
> +
> +#define HPET_ID         0x000
> +#define HPET_PERIOD     0x004
> +#define HPET_CFG        0x010
> +#define HPET_STATUS     0x020
> +#define HPET_COUNTER    0x0f0
> +#define HPET_TN_REGS    0x100 ... 0x3ff /*address range of all TN regs*/
>   

Most of this stuff could go into a header file.  This particular define 
uses a GCCism (range case statements).  I would object to this sort of 
define in principle but regardless, we shouldn't introduce this GCCism 
to QEMU since it only serves cosmetic function.  A simple if() before 
the switch statement would work just as well.

> +#define HPET_TN_CFG     0x000
> +#define HPET_TN_CMP     0x008
> +#define HPET_TN_ROUTE   0x010
> +
> +
> +#define HPET_TN_ENABLE           0x004
> +#define HPET_TN_PERIODIC         0x008
> +#define HPET_TN_PERIODIC_CAP     0x010
> +#define HPET_TN_SIZE_CAP         0x020
> +#define HPET_TN_SETVAL           0x040
> +#define HPET_TN_32BIT            0x100
> +#define HPET_TN_INT_ROUTE_MASK  0x3e00
> +#define HPET_TN_INT_ROUTE_SHIFT      9
> +#define HPET_TN_INT_ROUTE_CAP_SHIFT 32
> +#define HPET_TN_CFG_BITS_READONLY_OR_RESERVED 0xffff80b1U
> +
> +#define timer_int_route(timer)   \
> +    ((timer->config & HPET_TN_INT_ROUTE_MASK) >> HPET_TN_INT_ROUTE_SHIFT)
> +#define hpet_enabled(s)          (s->config & HPET_CFG_ENABLE)
> +#define timer_is_periodic(t)     (t->config & HPET_TN_PERIODIC)
> +#define timer_enabled(t)         (t->config & HPET_TN_ENABLE)
> +
> +#define hpet_time_after(a, b)   ((int32_t)(b) - (int32_t)(a) < 0)
> +#define hpet_time_after64(a, b) ((int64_t)(b) - (int64_t)(a) < 0)
>
>   

These should all be static functions.

> +
> +/*indicator hpet is operating in legacy mode */
> +int hpet_legacy=0;
>   

This should be part of the HPETState, and we should use an function to 
access it from other parts of the code.

> +static inline uint64_t ticks_to_ns(uint64_t value) 
> +{
> +    return (muldiv64(value, HPET_CLK_PERIOD, FS_PER_NS));
> +}
>   

inline is unnecessary for all of these.

> +static inline uint64_t hpet_get_ticks(HPETState *s) {
>   

We need to be consistent in our formatting.  { should go on a new line.

> +static void update_irq(struct HPETTimer *timer)
> +{
> +    qemu_irq irq;
> +    int route, do_ioapic = 0;
> +
> +    if ( (timer->tn <= 1) && (timer->state->config & HPET_CFG_LEGACY) ) {
>   

Whitespace is unnecessary here.

> +
> +        /* if LegacyReplacementRoute bit is set, HPET specification requires
> +         * timer0 be routed to IRQ0 in NON-APIC or IRQ2 in the I/O APIC,
> +         * timer1 be routed to IRQ8 in NON-APIC or IRQ8 in the I/O APIC. 
> +         */
> +        if (timer->tn == 0) {
> +            irq=timer->state->irqs[0];
>   

But necessary here.

> +            do_ioapic = 1;
> +        } else
> +            irq=timer->state->irqs[8];
> +    }else{
>   
And here and in lots of other places.  Please try to make the whitespace 
consistent in this file as with the rest of QEMU.
> +        route=timer_int_route(timer);
> +        irq=timer->state->irqs[route];
> +    }
> +
> +    if(timer_enabled(timer) && hpet_enabled(timer->state)) {
> +        qemu_irq_pulse(irq);
> +        /* windows wants timer0 on irq2 and linux wants irq0, 
> +         * so we pulse both 
> +         */
> +        if (do_ioapic)
> +            qemu_irq_pulse(timer->state->irqs[2]);
>   

This seems curious and not quite right.  We should be able to detect 
whether the HPET is being used in IO APIC mode and raise the appropriate 
interrupt instead of generating a spurious irq0 interrupt.

> +    }
> +}
> +
> +static void hpet_save(QEMUFile *f, void *opaque)
> +{
> +    HPETState *s = opaque;
> +    int i;
> +    qemu_put_be64s(f, &s->config);
> +    qemu_put_be64s(f, &s->isr);
> +    /* save current counter value */
> +    s->hpet_counter = hpet_get_ticks(s); 
> +    qemu_put_be64s(f, &s->hpet_counter);
> +
> +    for(i = 0; i < HPET_NUM_TIMERS; i++) {
> +        qemu_put_8s(f, &s->timer[i].tn);
> +        qemu_put_be64s(f, &s->timer[i].config);
> +        qemu_put_be64s(f, &s->timer[i].cmp);
> +        qemu_put_be64s(f, &s->timer[i].fsb);
> +        qemu_put_be64s(f, &s->timer[i].period);
> +        if (s->timer[i].qemu_timer) {
> +            qemu_put_timer(f, s->timer[i].qemu_timer);
> +        }
>   

Would qemu_timer ever be NULL?

> +/* 
> + * timer expiration callback
> + */
> +
> +static void hpet_timer(void *opaque)
> +{
> +    HPETTimer *t = (HPETTimer*)opaque;
>   

The cast is unnecessary.

> +    uint64_t diff;
> +
> +    uint64_t period = t->period;
> +    uint64_t cur_tick = hpet_get_ticks(t->state);
> +
> +    if (timer_is_periodic(t) && (t->period != 0))
> +    {
>   

Bad formatting.

> +        if (t->config & HPET_TN_32BIT) {
> +            while (hpet_time_after(cur_tick, t->cmp))
> +                t->cmp = (uint32_t)(t->cmp + t->period);
> +        } else
> +            while (hpet_time_after64(cur_tick, t->cmp))
> +                t->cmp += period;
> +
> +        diff = hpet_calculate_diff(t, cur_tick);
> +        qemu_mod_timer(t->qemu_timer, qemu_get_clock(vm_clock) 
> +                        + (int64_t)ticks_to_ns(diff));
>   

May want to convert ticks_to_ns to take and return an int64_t.  The 
explicit casting could introduce very subtle bugs.

> +    }
> +    update_irq(t);
> +}
> +
> +static void hpet_set_timer(HPETTimer *t)
> +{
> +    uint64_t diff;
> +    uint64_t cur_tick = hpet_get_ticks(t->state);
> +
> +    diff = hpet_calculate_diff(t, cur_tick);
> +    qemu_mod_timer(t->qemu_timer, qemu_get_clock(vm_clock) + (int64_t)ticks_to_ns(diff));
> +}
> +
> +static void hpet_del_timer(HPETTimer *t)
> +{
> +    qemu_del_timer(t->qemu_timer);
> +}
> +
> +#ifdef HPET_DEBUG
> +static uint32_t hpet_ram_readb(void *opaque, target_phys_addr_t addr)
> +{
> +    fprintf(stderr, "qemu: hpet_read b at %" PRIx64 "\n", addr);
> +    return 10;
> +}
> +
> +static uint32_t hpet_ram_readw(void *opaque, target_phys_addr_t addr)
> +{
> +    fprintf(stderr, "qemu: hpet_read w at %" PRIx64 "\n", addr);
> +    return 10;
> +}
> +#endif
> +
> +static uint32_t hpet_ram_readl(void *opaque, target_phys_addr_t addr)
> +{
> +    HPETState *s = (HPETState *)opaque;
> +    uint64_t cur_tick;
> +#ifdef HPET_DEBUG
> +    fprintf(stderr, "qemu: hpet_read l at %" PRIx64 "\n", addr);
> +#endif
>   

I think it would be better to introduce a dprintf() so we could avoid 
having so many open coded #ifdefs.

> +    switch(addr - HPET_BASE) {
> +        case HPET_ID:
> +            return s->capability;
> +        case HPET_PERIOD:
> +            return s->capability >> 32; 
> +        case HPET_CFG:
> +            return s->config;
> +        case HPET_CFG + 4:
> +#ifdef HPET_DEBUG
> +            fprintf(stderr, "qemu: invalid hpet_read l at %" PRIx64 "\n", addr);
> +#endif
> +            return 0;
> +        case HPET_COUNTER: 
> +            if (hpet_enabled(s))
> +                cur_tick = hpet_get_ticks(s);
>   

Any reason for hpet_get_ticks(s) to not have this check integrated into it?

> +            else 
> +                cur_tick = s->hpet_counter;
> +#ifdef HPET_DEBUG
> +            fprintf(stderr, "qemu: reading counter %" PRIx64 "\n", cur_tick);
> +#endif
> +            return cur_tick;
> +        case HPET_COUNTER + 4:
> +            return 0;
> +        case HPET_STATUS:
> +            return s->isr;
> +        case HPET_TN_REGS:
> +            {
> +                uint8_t timer_id = (addr - HPET_BASE - 0x100) / 0x20;
> +                if (timer_id > HPET_NUM_TIMERS - 1) {
> +                    fprintf(stderr, "qemu: timer id out of range\n");
> +                    return 0;
> +                }
> +                HPETTimer *timer = &s->timer[timer_id];
> +
> +                switch((addr - HPET_BASE - 0x100) % 0x20) {
> +                    case HPET_TN_CFG:
> +                        return timer->config;
> +                    case HPET_TN_CFG + 4: // Interrupt capabilities
> +                        return timer->config >> 32;
> +                    case HPET_TN_CMP: // comparator register
> +                        return timer->cmp;
> +                    case HPET_TN_CMP + 4:
> +                        return timer->cmp >> 32;
> +                    case HPET_TN_ROUTE:
> +                        return timer->fsb >> 32;
> +                }
> +            }
> +            break;
> +    }
> +
> +#ifdef HPET_DEBUG
> +    fprintf(stderr, "qemu: invalid hpet_read l at %" PRIx64 "\n", addr);
> +#endif
> +    return 10;
> +}
> +
> +#ifdef HPET_DEBUG
> +static void hpet_ram_writeb(void *opaque, target_phys_addr_t addr, 
> +                            uint32_t value)
> +{
> +    fprintf(stderr, "qemu: invalid hpet_write b at %" PRIx64 " = %#x\n", 
> +                     addr, value);
> +}
> +
> +static void hpet_ram_writew(void *opaque, target_phys_addr_t addr,
> +                            uint32_t value)
> +{
> +    fprintf(stderr, "qemu: invalid hpet_write w at %" PRIx64 " = %#x\n", 
> +                     addr, value);
> +}
> +#endif
> +
> +static void hpet_ram_writel(void *opaque, target_phys_addr_t addr,
> +                            uint32_t value)
> +{
> +    int i;
> +    HPETState *s = (HPETState *)opaque;
> +    uint64_t old_val, new_val;
> +
> +#ifdef HPET_DEBUG
> +    fprintf(stderr, "qemu: hpet_write l at %" PRIx64 " = %#x\n", addr, value);
> +#endif
> +    old_val = hpet_ram_readl(opaque, addr);
> +    new_val = value;
> +
> +
> +    switch(addr - HPET_BASE) {
> +        case HPET_ID:
> +            return;
> +        case HPET_CFG:
> +            s->config = hpet_fixup_reg(new_val, old_val, 0x3);
> +            if (!(old_val & HPET_CFG_ENABLE) && (new_val & HPET_CFG_ENABLE)) {
> +                /* Enable main counter and interrupt generation. */
> +                s->hpet_offset = ticks_to_ns(s->hpet_counter)
> +                                        - qemu_get_clock(vm_clock);
> +                for (i = 0; i < HPET_NUM_TIMERS; i++)
> +                    if ((&s->timer[i])->cmp != ~0ULL)
> +                        hpet_set_timer(&s->timer[i]);
> +            }
> +            else if ( (old_val & HPET_CFG_ENABLE) && 
> +                                           !(new_val & HPET_CFG_ENABLE)) {
> +                /* Halt main counter and disable interrupt generation. */
> +                s->hpet_counter = hpet_get_ticks(s); 
> +                for (i = 0; i < HPET_NUM_TIMERS; i++)
> +                    hpet_del_timer(&s->timer[i]);
> +            }
> +            hpet_legacy = s->config & HPET_CFG_LEGACY;
> +            break;
> +        case HPET_CFG + 4: 
> +#ifdef HPET_DEBUG
> +            fprintf(stderr, "qemu:   invalid hpet_writel at %" PRIx64 "= %#x\n",
> +                             addr, value);
> +#endif
> +            break;
> +	case HPET_STATUS:
> +            /* FIXME: need to handle level-triggered interrupts */
> +            break;
> +        case HPET_COUNTER:
> +           if (hpet_enabled(s)) 
> +               fprintf(stderr, "qemu: Writing counter while HPET enabled!\n"); 
> +           s->hpet_counter =  value;
> +           break;
> +        case HPET_COUNTER + 4:
> +           s->hpet_counter = (s->hpet_counter & 0xffffffffULL) 
> +                             | (((uint64_t)value) << 32);
> +#ifdef HPET_DEBUG
> +           fprintf(stderr, "qemu:   HPET ctr set to %#x -> %" PRIx64 "\n",
> +                    value, s->hpet_counter);
> +#endif
> +           break;
> +        case HPET_TN_REGS:
> +            {
> +                uint8_t timer_id = (addr - HPET_BASE - 0x100) / 0x20;
> +#ifdef HPET_DEBUG
> +                fprintf(stderr, 
> +                         "qemu:    hpet_write l timer_id = %#x \n", timer_id);
> +#endif
> +                HPETTimer *timer = &s->timer[timer_id];
> +                
> +                switch((addr - HPET_BASE - 0x100) % 0x20) {
> +                    case HPET_TN_CFG:
> +#ifdef HPET_DEBUG
> +                        fprintf(stderr, 
> +                                "qemu:   hpet_write l TN config value %#x\n", 
> +                                value);
> +#endif
> +                        timer->config = 
> +                                    hpet_fixup_reg(new_val, old_val, 0x3e4e);
> +                        if (new_val & HPET_TN_32BIT) {
> +                            timer->cmp = (uint32_t)timer->cmp;
> +                            timer->period = (uint32_t)timer->period;
> +                        }
> +                        if (new_val & HPET_TIMER_TYPE_LEVEL){
> +                            fprintf(stderr, 
> +                                "qemu: level-triggered hpet not supported\n");
> +                            exit (-1);
> +                        }
> +
> +                        break;
> +#ifdef HPET_DEBUG
> +                    case HPET_TN_CFG + 4: // Interrupt capabilities
> +                        fprintf(stderr, 
> +                                "qemu:   invalid write at %" PRIx64 "=%#x\n", 
> +                                 addr, value);
> +                        break;
> +#endif
> +                    case HPET_TN_CMP: // comparator register
> +#ifdef HPET_DEBUG
> +                        fprintf(stderr, 
> +                                "qemu:   hpet_writel TN cmp value %#x\n",
> +                                    value);
> +#endif
> +                        if ( timer->config & HPET_TN_32BIT)
> +                            new_val = (uint32_t)new_val;
> +                        if ( !timer_is_periodic(timer) ||
> +                                   (timer->config & HPET_TN_SETVAL) )
> +                            timer->cmp = new_val;
> +                        else {
> +                            /*
> +                             * FIXME: Clamp period to reasonable min value?
> +                             * Clamp period to reasonable max value
> +                             */
> +                             new_val &= (timer->config & HPET_TN_32BIT ? ~0u : ~0ull) >> 1;
> +                             timer->period = new_val;
> +                        }
> +                        timer->config &= ~HPET_TN_SETVAL;
> +                        if (hpet_enabled(s))
> +                            hpet_set_timer(timer);
> +                        break;
> +
> +                    case HPET_TN_ROUTE + 4:
> +#ifdef HPET_DEBUG
> +                        fprintf(stderr, 
> +                                "qemu:   invalid write at %" PRIx64 " = %#x\n", 
> +                                 addr, value);
> +#endif
> +                        break;
> +                }
> +            }
> +            break;
> +	default:
> +            fprintf(stderr, "qemu:  invalid write at %" PRIx64 " = %#x\n",
> +                    addr, value);
> +    }
> +
> +}
> +
> +static CPUReadMemoryFunc *hpet_ram_read[] = {
> +#ifdef HPET_DEBUG
> +    hpet_ram_readb,
> +    hpet_ram_readw,
> +#else
> +    NULL,
> +    NULL,
> +#endif
> +    hpet_ram_readl,
> +};
> +
> +static CPUWriteMemoryFunc *hpet_ram_write[] = {
> +#ifdef HPET_DEBUG
> +    hpet_ram_writeb,
> +    hpet_ram_writew,
> +#else
> +    NULL,
> +    NULL,
> +#endif
> +    hpet_ram_writel,
> +};
> +
> +static void hpet_reset(void *opaque) {
> +    HPETState *s = opaque;
> +    int i;
> +
> +    /* 32-bit main counter; 3 timers supported; LegacyReplacementRoute. 
> +     * 64-bit counter cannot be supported until 64 bit atomic reads are 
> +     * possible, since reading counter involves grabbing current qemu 
> +     * clock, can't be done in 2 32-bit chunks.
> +     */
> +    s->capability = 0x80868201ULL;
> +    s->capability |= ((HPET_CLK_PERIOD) << 32);
> +
> +    for(i=0; i<HPET_NUM_TIMERS; i++) {
> +        HPETTimer *timer = &s->timer[i];
> +        timer->tn = i;
> +        timer->cmp = ~0ULL;
> +        timer->config =  HPET_TN_PERIODIC_CAP;
> +        timer->config |=  0x0000ffffULL << 32;
> +        timer->state = s;
> +        timer->qemu_timer = qemu_new_timer(vm_clock, hpet_timer, timer);
> +    }
> +}
> +
> +
> +void hpet_init(qemu_irq *irq) {
> +    int iomemtype;
> +    HPETState *s;
> +    
> +    fprintf (stderr, "hpet_init\n");
>   

Don't unconditionally print to stderr.

> +
> +    /* XXX this is a dirty hack for HPET support w/o LPC
> +           Actually this is a config descriptor for the RCBA */
>   

What's the dirty hack?

> +    s = qemu_mallocz(sizeof(HPETState));
> +    s->irqs = irq;
> +    hpet_reset(s);
> +    register_savevm("hpet", -1, 1, hpet_save, hpet_load, s);
> +    qemu_register_reset(hpet_reset, s);
> +    /* HPET Area */
> +    iomemtype = cpu_register_io_memory(0, hpet_ram_read,
> +                    hpet_ram_write, s);
> +    cpu_register_physical_memory(HPET_BASE, 0x400, iomemtype);
> +}
> diff --git a/hw/i8254.c b/hw/i8254.c
> index 4813b03..7ffdf61 100644
> --- a/hw/i8254.c
> +++ b/hw/i8254.c
> @@ -26,6 +26,9 @@
>  #include "isa.h"
>  #include "qemu-timer.h"
>  
> +#if defined TARGET_I386 || defined TARGET_X86_64
> +extern int hpet_legacy;
> +#endif
>  //#define DEBUG_PIT
>  
>  #define RW_STATE_LSB 1
> @@ -367,6 +370,14 @@ static void pit_irq_timer_update(PITChannelState *s, int64_t current_time)
>  
>      if (!s->irq_timer)
>          return;
> +
> +#if defined TARGET_I386 || defined TARGET_X86_64
> +    if (hpet_legacy) {
> +        qemu_del_timer(s->irq_timer);
> +        return;
> +    }
> +#endif
>   

This should have a comment.

>      expire_time = pit_get_next_transition_time(s, current_time);
>      irq_level = pit_get_out1(s, current_time);
>      qemu_set_irq(s->irq, irq_level);
> diff --git a/hw/mc146818rtc.c b/hw/mc146818rtc.c
> index 30bb044..ffb7a48 100644
> --- a/hw/mc146818rtc.c
> +++ b/hw/mc146818rtc.c
> @@ -27,6 +27,10 @@
>  #include "pc.h"
>  #include "isa.h"
>  
> +#if defined TARGET_I386 || defined TARGET_X86_64
> +extern int hpet_legacy;
> +#endif
> +
>  //#define DEBUG_CMOS
>  
>  #define RTC_SECONDS             0
> @@ -80,7 +84,11 @@ static void rtc_timer_update(RTCState *s, int64_t current_time)
>  
>      period_code = s->cmos_data[RTC_REG_A] & 0x0f;
>      if (period_code != 0 &&
> +#if defined TARGET_I386 || defined TARGET_X86_64
> +        (s->cmos_data[RTC_REG_B] & REG_B_PIE) && !hpet_legacy) {
> +#else
>          (s->cmos_data[RTC_REG_B] & REG_B_PIE)) {
> +#endif
>   

This needs a comment.

>          if (period_code <= 2)
>              period_code += 7;
>          /* period in 32 Khz cycles */
> @@ -101,7 +109,10 @@ static void rtc_periodic_timer(void *opaque)
>  
>      rtc_timer_update(s, s->next_periodic_time);
>      s->cmos_data[RTC_REG_C] |= 0xc0;
> -    qemu_irq_raise(s->irq);
> +#if defined TARGET_I386 || defined TARGET_X86_64
> +    if (!hpet_legacy)
> +#endif
> +        qemu_irq_raise(s->irq);
>  }
>  
>  static void cmos_ioport_write(void *opaque, uint32_t addr, uint32_t data)
> @@ -281,6 +292,12 @@ static void rtc_update_second(void *opaque)
>      RTCState *s = opaque;
>      int64_t delay;
>  
> +#if defined TARGET_I386 || defined TARGET_X86_64
> +    if (hpet_legacy) {
> +        qemu_del_timer(s->second_timer2);
> +        return;
> +    }
> +#endif
>      /* if the oscillator is not in normal operation, we do not update */
>      if ((s->cmos_data[RTC_REG_A] & 0x70) != 0x20) {
>          s->next_second_time += ticks_per_sec;
> @@ -306,6 +323,12 @@ static void rtc_update_second2(void *opaque)
>  {
>      RTCState *s = opaque;
>  
> +#if defined TARGET_I386 || defined TARGET_X86_64
> +    if (hpet_legacy) {
> +        qemu_del_timer(s->second_timer);
> +        return;
> +    }
> +#endif
>      if (!(s->cmos_data[RTC_REG_B] & REG_B_SET)) {
>          rtc_copy_date(s);
>      }
> @@ -359,7 +382,10 @@ static uint32_t cmos_ioport_read(void *opaque, uint32_t addr)
>              break;
>          case RTC_REG_C:
>              ret = s->cmos_data[s->cmos_index];
> -            qemu_irq_lower(s->irq);
> +#if defined TARGET_I386 || defined TARGET_X86_64
> +            if (!hpet_legacy)
> +#endif
> +                qemu_irq_lower(s->irq);
>              s->cmos_data[RTC_REG_C] = 0x00;
>              break;
>          default:
> diff --git a/hw/pc.c b/hw/pc.c
> index 34683e7..d2452e0 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -48,6 +48,7 @@
>  #define BIOS_CFG_IOPORT 0x510
>  
>  #define MAX_IDE_BUS 2
> +void hpet_init(qemu_irq irq);
>  
>  static fdctrl_t *floppy_controller;
>  static RTCState *rtc_state;
> @@ -949,6 +950,7 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size,
>      }
>      pit = pit_init(0x40, i8259[0]);
>      pcspk_init(pit);
> +    hpet_init(i8259);
>      if (pci_enabled) {
>          pic_set_alt_irq_func(isa_pic, ioapic_set_irq, ioapic);
>      }
>   

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH 1/2] Add HPET emulation to qemu (v3)
  2008-10-20 19:08   ` Beth Kon
@ 2008-10-27 10:49     ` Dor Laor
  2008-10-30 17:57       ` Beth Kon
  0 siblings, 1 reply; 11+ messages in thread
From: Dor Laor @ 2008-10-27 10:49 UTC (permalink / raw)
  To: Beth Kon; +Cc: qemu-devel, kvm, Alexander Graf

[-- Attachment #1: Type: text/plain, Size: 1546 bytes --]

Beth Kon wrote:
> On Fri, 2008-10-17 at 16:49 +0100, Jamie Lokier wrote:
>   
>> Beth Kon wrote:
>>     
>>> Clock drift on Linux is in the range of .017% - .019%, loaded and unloaded. I
>>> haven't found a straightforward way to test on Windows and would appreciate
>>> any pointers to existing approaches.
>>>       
>> Is there any reason why there should be any clock drift, when the
>> guest is using a non-PIT clock?
>>
>> I'm probably being naive, but with 32-bit or 64-bit HPET counters
>> available to the guest, and accurate values from the CMOS clock
>> emulation, I don't see why drift would accumulate over the long term
>> relative to the host clock.
>>     
>
> I was measuring with ntpdate, so the drift is with respect to the ntp
> server pool, not the host clock. But in any case, since timer interrupts
> and reads of the hpet counter are at the mercy of the host scheduler
> (i.e., the qemu process can be swapped out at any time during hpet read
> or timer expiration), I'd guess there would always be some amount of
> inaccuracy. Also, qemu checks for timer expiration (qemu_run_timers) as
> part of a bigger loop (main_loop_wait), so the varying amounts of work
> to do elsewhere in the loop from iteration to iteration would also
> introduce irregular delays.
>   
This is exactly why hpet as the other clock emulation in qemu (pit, rtc, 
pm?) need
to check whether their irq was really injected. Gleb sent patches for 
the rtc, pit.
The idea is to check with the irq chip if the injected irq was really 
successful.

Dor

[-- Attachment #2: Type: text/html, Size: 2002 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Qemu-devel] Re: [PATCH 1/2] Add HPET emulation to qemu (v3)
  2008-10-21 15:21 ` [Qemu-devel] " Anthony Liguori
@ 2008-10-27 14:07   ` Beth Kon
  2008-10-29  3:41     ` Alexander Graf
  0 siblings, 1 reply; 11+ messages in thread
From: Beth Kon @ 2008-10-27 14:07 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel, kvm, Alexander Graf

On Tue, 2008-10-21 at 10:21 -0500, Anthony Liguori wrote: 
> Beth Kon wrote:
<snip>
Thanks for the feedback, Anthony. I'll only respond where I have
specific comments. Otherwise, I agree to your suggestions and will make
the changes. 
<snip> 
> > +    if(timer_enabled(timer) && hpet_enabled(timer->state)) {
> > +        qemu_irq_pulse(irq);
> > +        /* windows wants timer0 on irq2 and linux wants irq0, 
> > +         * so we pulse both 
> > +         */
> > +        if (do_ioapic)
> > +            qemu_irq_pulse(timer->state->irqs[2]);
> >   
> 
> This seems curious and not quite right.  We should be able to detect 
> whether the HPET is being used in IO APIC mode and raise the appropriate 
> interrupt instead of generating a spurious irq0 interrupt.
> 
After digging further on this, it turns out that the need for the 2
interrupts was caused by what looks like a problem with the way qemu is
generating interrupts for the ioapic. I will send out a separate patch
for that issue, and make the necessary changes in this hpet code.
> > +    }
> > +}
> > +
> > +static void hpet_save(QEMUFile *f, void *opaque)
> > +{
> > +    HPETState *s = opaque;
> > +    int i;
> > +    qemu_put_be64s(f, &s->config);
> > +    qemu_put_be64s(f, &s->isr);
> > +    /* save current counter value */
> > +    s->hpet_counter = hpet_get_ticks(s); 
> > +    qemu_put_be64s(f, &s->hpet_counter);
> > +
> > +    for(i = 0; i < HPET_NUM_TIMERS; i++) {
> > +        qemu_put_8s(f, &s->timer[i].tn);
> > +        qemu_put_be64s(f, &s->timer[i].config);
> > +        qemu_put_be64s(f, &s->timer[i].cmp);
> > +        qemu_put_be64s(f, &s->timer[i].fsb);
> > +        qemu_put_be64s(f, &s->timer[i].period);
> > +        if (s->timer[i].qemu_timer) {
> > +            qemu_put_timer(f, s->timer[i].qemu_timer);
> > +        }
> >   
> 
> Would qemu_timer ever be NULL?

You're right... the answer is no. I'll fix that.
<snip>
> > +        
> > +
> > +        diff = hpet_calculate_diff(t, cur_tick);
> > +        qemu_mod_timer(t->qemu_timer, qemu_get_clock(vm_clock) 
> > +                        + (int64_t)ticks_to_ns(diff));
> >   
> 
> May want to convert ticks_to_ns to take and return an int64_t.  The 
> explicit casting could introduce very subtle bugs.
> 
It seems better this way to me, since muldiv64 in ticks_to_ns takes uint64_t. 
The likelihood of diff being big enough to create a problem seems small enough. Am I 
missing something?
> > +        case HPET_COUNTER: 
> > +            if (hpet_enabled(s))
> > +                cur_tick = hpet_get_ticks(s);
> >   
> 
> Any reason for hpet_get_ticks(s) to not have this check integrated into it?
When the hpet is being disabled, we need to get the actual count, even though the 
hpet_enabled check would return false. So if I made this change it would introduce an 
ordering issue in the disable code (i.e., get the ticks before setting the hpet to
disabled)

<snip>
> > +
> > +    /* XXX this is a dirty hack for HPET support w/o LPC
> > +           Actually this is a config descriptor for the RCBA */
> >   
> 
> What's the dirty hack?
This comment is left over from Alexander Graf's code. I'm not sure why it is in this location and will I'll remove it. But
in comments on the first version of hpet code I produced, Alexander said, regarding the fixed assignment of HPET_BASE:

"This is a dirty hack that I did to make Mac OS X happy. Actually the HPET base address gets specified in the RCBA on the
 LPC and is configured by the BIOS to point to a valid address, with 0xfed00000 being the default (IIRC if you write 0 to 
 the fields you end up with that address)."

But in other areas of qemu code I see base addresses being hardcoded and am not sure anything different needs to be done 
here. Comments?


<snip>
> 
> Regards,
> 
> Anthony Liguori
> 
-- 
Elizabeth Kon (Beth)
IBM Linux Technology Center
Open Hypervisor Team
email: eak@us.ibm.com

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Qemu-devel] Re: [PATCH 1/2] Add HPET emulation to qemu (v3)
  2008-10-27 14:07   ` Beth Kon
@ 2008-10-29  3:41     ` Alexander Graf
  0 siblings, 0 replies; 11+ messages in thread
From: Alexander Graf @ 2008-10-29  3:41 UTC (permalink / raw)
  To: Beth Kon; +Cc: qemu-devel@nongnu.org, kvm@vger.kernel.org





Am 27.10.2008 um 08:07 schrieb Beth Kon <eak@us.ibm.com>:

> On Tue, 2008-10-21 at 10:21 -0500, Anthony Liguori wrote:
>> Beth Kon wrote:
> <snip>
> Thanks for the feedback, Anthony. I'll only respond where I have
> specific comments. Otherwise, I agree to your suggestions and will  
> make
> the changes.
> <snip>
>>> +    if(timer_enabled(timer) && hpet_enabled(timer->state)) {
>>> +        qemu_irq_pulse(irq);
>>> +        /* windows wants timer0 on irq2 and linux wants irq0,
>>> +         * so we pulse both
>>> +         */
>>> +        if (do_ioapic)
>>> +            qemu_irq_pulse(timer->state->irqs[2]);
>>>
>>
>> This seems curious and not quite right.  We should be able to detect
>> whether the HPET is being used in IO APIC mode and raise the  
>> appropriate
>> interrupt instead of generating a spurious irq0 interrupt.
>>
> After digging further on this, it turns out that the need for the 2
> interrupts was caused by what looks like a problem with the way qemu  
> is
> generating interrupts for the ioapic. I will send out a separate patch
> for that issue, and make the necessary changes in this hpet code.
>>> +    }
>>> +}
>>> +
>>> +static void hpet_save(QEMUFile *f, void *opaque)
>>> +{
>>> +    HPETState *s = opaque;
>>> +    int i;
>>> +    qemu_put_be64s(f, &s->config);
>>> +    qemu_put_be64s(f, &s->isr);
>>> +    /* save current counter value */
>>> +    s->hpet_counter = hpet_get_ticks(s);
>>> +    qemu_put_be64s(f, &s->hpet_counter);
>>> +
>>> +    for(i = 0; i < HPET_NUM_TIMERS; i++) {
>>> +        qemu_put_8s(f, &s->timer[i].tn);
>>> +        qemu_put_be64s(f, &s->timer[i].config);
>>> +        qemu_put_be64s(f, &s->timer[i].cmp);
>>> +        qemu_put_be64s(f, &s->timer[i].fsb);
>>> +        qemu_put_be64s(f, &s->timer[i].period);
>>> +        if (s->timer[i].qemu_timer) {
>>> +            qemu_put_timer(f, s->timer[i].qemu_timer);
>>> +        }
>>>
>>
>> Would qemu_timer ever be NULL?
>
> You're right... the answer is no. I'll fix that.
> <snip>
>>> +
>>> +
>>> +        diff = hpet_calculate_diff(t, cur_tick);
>>> +        qemu_mod_timer(t->qemu_timer, qemu_get_clock(vm_clock)
>>> +                        + (int64_t)ticks_to_ns(diff));
>>>
>>
>> May want to convert ticks_to_ns to take and return an int64_t.  The
>> explicit casting could introduce very subtle bugs.
>>
> It seems better this way to me, since muldiv64 in ticks_to_ns takes  
> uint64_t.
> The likelihood of diff being big enough to create a problem seems  
> small enough. Am I
> missing something?
>>> +        case HPET_COUNTER:
>>> +            if (hpet_enabled(s))
>>> +                cur_tick = hpet_get_ticks(s);
>>>
>>
>> Any reason for hpet_get_ticks(s) to not have this check integrated  
>> into it?
> When the hpet is being disabled, we need to get the actual count,  
> even though the
> hpet_enabled check would return false. So if I made this change it  
> would introduce an
> ordering issue in the disable code (i.e., get the ticks before  
> setting the hpet to
> disabled)
>
> <snip>
>>> +
>>> +    /* XXX this is a dirty hack for HPET support w/o LPC
>>> +           Actually this is a config descriptor for the RCBA */
>>>
>>
>> What's the dirty hack?
> This comment is left over from Alexander Graf's code. I'm not sure  
> why it is in this location and will I'll remove it. But
> in comments on the first version of hpet code I produced, Alexander  
> said, regarding the fixed assignment of HPET_BASE:
>
> "This is a dirty hack that I did to make Mac OS X happy. Actually  
> the HPET base address gets specified in the RCBA on the
> LPC and is configured by the BIOS to point to a valid address, with  
> 0xfed00000 being the default (IIRC if you write 0 to
> the fields you end up with that address)."

Basically IIRC on the ICH-7 the HPET base address is configured  
indirectly by writing an address to the RCBA, which is mmio based  
space configured in the LPC pci device config space.
Since we don't have an LPC device, but a PIIX ISA bridge, there was no  
space to configure this on. That's why I faked and hardcoded some  
parts here, as the OS should read the acpi tables to get the address  
anyways.

Please double-check that information please, as I don't have the specs  
with me atm.

Alex

>
>
> But in other areas of qemu code I see base addresses being hardcoded  
> and am not sure anything different needs to be done
> here. Comments?
>
>
> <snip>
>>
>> Regards,
>>
>> Anthony Liguori
>>
> -- 
> Elizabeth Kon (Beth)
> IBM Linux Technology Center
> Open Hypervisor Team
> email: eak@us.ibm.com
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH 1/2] Add HPET emulation to qemu (v3)
  2008-10-27 10:49     ` Dor Laor
@ 2008-10-30 17:57       ` Beth Kon
  2008-11-05 12:55         ` Dor Laor
  0 siblings, 1 reply; 11+ messages in thread
From: Beth Kon @ 2008-10-30 17:57 UTC (permalink / raw)
  To: Dor Laor; +Cc: qemu-devel, kvm, Alexander Graf

On Mon, 2008-10-27 at 12:49 +0200, Dor Laor wrote:
> Beth Kon wrote: 
> > On Fri, 2008-10-17 at 16:49 +0100, Jamie Lokier wrote:
> >   
> > > Beth Kon wrote:
> > >     
> > > > Clock drift on Linux is in the range of .017% - .019%, loaded and unloaded. I
> > > > haven't found a straightforward way to test on Windows and would appreciate
> > > > any pointers to existing approaches.
> > > >       
> > > Is there any reason why there should be any clock drift, when the
> > > guest is using a non-PIT clock?
> > > 
> > > I'm probably being naive, but with 32-bit or 64-bit HPET counters
> > > available to the guest, and accurate values from the CMOS clock
> > > emulation, I don't see why drift would accumulate over the long term
> > > relative to the host clock.
> > >     
> > 
> > I was measuring with ntpdate, so the drift is with respect to the ntp
> > server pool, not the host clock. But in any case, since timer interrupts
> > and reads of the hpet counter are at the mercy of the host scheduler
> > (i.e., the qemu process can be swapped out at any time during hpet read
> > or timer expiration), I'd guess there would always be some amount of
> > inaccuracy. Also, qemu checks for timer expiration (qemu_run_timers) as
> > part of a bigger loop (main_loop_wait), so the varying amounts of work
> > to do elsewhere in the loop from iteration to iteration would also
> > introduce irregular delays.
> >   
> This is exactly why hpet as the other clock emulation in qemu (pit,
> rtc, pm?) need
> to check whether their irq was really injected. Gleb sent patches for
> the rtc, pit.
> The idea is to check with the irq chip if the injected irq was really
> successful.
> 
I assume these are the patches you're referring to?
http://thread.gmane.org/gmane.comp.emulators.kvm.devel/18974/focus=18977

Looks like they were never merged. Does anyone know the history on that?
Also, HPET generates edge-triggered interrupts (as dictated by Linux and
Windows) so I'm not sure if this scheme could work for it.
> Dor
-- 
Elizabeth Kon (Beth)
IBM Linux Technology Center
Open Hypervisor Team
email: eak@us.ibm.com

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH 1/2] Add HPET emulation to qemu (v3)
  2008-10-30 17:57       ` Beth Kon
@ 2008-11-05 12:55         ` Dor Laor
  2008-11-05 14:48           ` Jamie Lokier
  0 siblings, 1 reply; 11+ messages in thread
From: Dor Laor @ 2008-11-05 12:55 UTC (permalink / raw)
  To: Beth Kon; +Cc: qemu-devel, kvm, Alexander Graf

[-- Attachment #1: Type: text/plain, Size: 2279 bytes --]

Beth Kon wrote:
> On Mon, 2008-10-27 at 12:49 +0200, Dor Laor wrote:
>   
>> Beth Kon wrote: 
>>     
>>> On Fri, 2008-10-17 at 16:49 +0100, Jamie Lokier wrote:
>>>   
>>>       
>>>> Beth Kon wrote:
>>>>     
>>>>         
>>>>> Clock drift on Linux is in the range of .017% - .019%, loaded and unloaded. I
>>>>> haven't found a straightforward way to test on Windows and would appreciate
>>>>> any pointers to existing approaches.
>>>>>       
>>>>>           
>>>> Is there any reason why there should be any clock drift, when the
>>>> guest is using a non-PIT clock?
>>>>
>>>> I'm probably being naive, but with 32-bit or 64-bit HPET counters
>>>> available to the guest, and accurate values from the CMOS clock
>>>> emulation, I don't see why drift would accumulate over the long term
>>>> relative to the host clock.
>>>>     
>>>>         
>>> I was measuring with ntpdate, so the drift is with respect to the ntp
>>> server pool, not the host clock. But in any case, since timer interrupts
>>> and reads of the hpet counter are at the mercy of the host scheduler
>>> (i.e., the qemu process can be swapped out at any time during hpet read
>>> or timer expiration), I'd guess there would always be some amount of
>>> inaccuracy. Also, qemu checks for timer expiration (qemu_run_timers) as
>>> part of a bigger loop (main_loop_wait), so the varying amounts of work
>>> to do elsewhere in the loop from iteration to iteration would also
>>> introduce irregular delays.
>>>   
>>>       
>> This is exactly why hpet as the other clock emulation in qemu (pit,
>> rtc, pm?) need
>> to check whether their irq was really injected. Gleb sent patches for
>> the rtc, pit.
>> The idea is to check with the irq chip if the injected irq was really
>> successful.
>>
>>     
> I assume these are the patches you're referring to?
> http://thread.gmane.org/gmane.comp.emulators.kvm.devel/18974/focus=18977
>
>   
Yap, Gleb just resent it.
> Looks like they were never merged. Does anyone know the history on that?
> Also, HPET generates edge-triggered interrupts (as dictated by Linux and
> Windows) so I'm not sure if this scheme could work for it.
>   
Right, I think if this time drift fix approach is accepted, it should 
also be implemented
for qemu_irq_pulse too.
>> Dor
>>     


[-- Attachment #2: Type: text/html, Size: 3197 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Qemu-devel] [PATCH 1/2] Add HPET emulation to qemu (v3)
  2008-11-05 12:55         ` Dor Laor
@ 2008-11-05 14:48           ` Jamie Lokier
  0 siblings, 0 replies; 11+ messages in thread
From: Jamie Lokier @ 2008-11-05 14:48 UTC (permalink / raw)
  To: Dor Laor; +Cc: Beth Kon, qemu-devel, kvm, Alexander Graf

Dor Laor wrote:
> Right, I think if this time drift fix approach is accepted, it should also be
> implemented for qemu_irq_pulse too.

I don't think simply injecting timer interrupts is the right approach.

I suspect doing that to compensate for lost ticks can _cause_ drift in
some guests.

Some guest kernels have lost-timer-interrupt detection.  For example,
by reading the local TSC, local APIC timer, PM timer, and/or HPET
counter, they can determine (on a real machine) when some timer
interrupts were missed, and compensate for it.

If a burst of timer interrupts are sent by QEMU to compensate for lost
ones due to host scheduling delays, on servicing the first of those
interrupts the guest may read a timer value which indicates time has
jumped forward, and the guest may adjust it's clock to compensate for
missing interrupts.  On servicing the remaining ones injected by QEMU,
the guest will read timer values which haven't increased by much, but
since _extra_ timer interrupts aren't expected on real hardware, the
guest may not implement reverse compensation.

The result will be QEMU sends a burst of timer interrupts, and the
guest clock moves _forward_ by a few ticks.

I think a better to handle this in QEMU is to still inject those
"lost" interrupts but also modify the values returned when the guest
reads timers, so they appear to increment by a normal amount between
each interrupt.

In other words, rather than counting ticks, modify the flow of virtual
time - stretch it when the virtual CPU is supposed to run but is
delayed, and compress it afterwards to resync with real time.

On some architectures, this may be even more important with "tickless"
guest kernels which are busily running high resolution timers with
different delays between each event.  Simply counting missed
interrupts and injecting a burst doesn't work if the guest depends on
reprogramming a timer chip to a different delay between each interrupt
event (like x86 PIT), but it's fine with timer chips (like HPET) which
have a free-running counter and alarm register.

That method should work with all guests, whatever timers they use and
whatever their interrupt handler does, and result in _zero_ long term
drift as long as the virtual CPU is not persistantly starved.  In the
case of timer chips like HPET, which have specified nominal frequency,
it may mean you don't have to run NTP on the guest at all.

-- Jamie

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2008-11-05 14:49 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-10-17 12:17 [Qemu-devel] [PATCH 1/2] Add HPET emulation to qemu (v3) Beth Kon
2008-10-17 13:14 ` rinku buragohain
2008-10-17 15:49 ` Jamie Lokier
2008-10-20 19:08   ` Beth Kon
2008-10-27 10:49     ` Dor Laor
2008-10-30 17:57       ` Beth Kon
2008-11-05 12:55         ` Dor Laor
2008-11-05 14:48           ` Jamie Lokier
2008-10-21 15:21 ` [Qemu-devel] " Anthony Liguori
2008-10-27 14:07   ` Beth Kon
2008-10-29  3:41     ` Alexander Graf

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).