* [PATCH 1/6] KVM: In kernel pit model
@ 2008-03-04 10:22 Yang, Sheng
2008-03-05 6:54 ` Avi Kivity
0 siblings, 1 reply; 19+ messages in thread
From: Yang, Sheng @ 2008-03-04 10:22 UTC (permalink / raw)
To: kvm-devel
[-- Attachment #1: Type: text/plain, Size: 21644 bytes --]
From f389ff3bc388888b1684f15d8073c6d9abd2f2c9 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng.yang@intel.com>
Date: Mon, 28 Jan 2008 05:10:22 +0800
Subject: [PATCH] KVM: In kernel pit model
The patch moved PIT from userspace to kernel, and increase the timer accuracy
greatly.
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
---
arch/x86/kvm/Makefile | 3 +-
arch/x86/kvm/i8254.c | 583
++++++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/i8254.h | 59 +++++
arch/x86/kvm/irq.c | 3 +
arch/x86/kvm/x86.c | 9 +
include/asm-x86/kvm_host.h | 1 +
include/linux/kvm.h | 2 +
7 files changed, 659 insertions(+), 1 deletions(-)
create mode 100644 arch/x86/kvm/i8254.c
create mode 100644 arch/x86/kvm/i8254.h
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index ffdd0b3..4d0c22e 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -6,7 +6,8 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o
ioapic.o)
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
-kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
+kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
+ i8254.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
new file mode 100644
index 0000000..6bbb254
--- /dev/null
+++ b/arch/x86/kvm/i8254.c
@@ -0,0 +1,583 @@
+/*
+ * 8253/8254 interval timer emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2006 Intel Corporation
+ * Copyright (c) 2007 Keir Fraser, XenSource Inc
+ * Copyright (c) 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
copy
+ * of this software and associated documentation files (the "Software"), to
deal
+ * in the Software without restriction, including without limitation the
rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * Authors:
+ * Sheng Yang <sheng.yang@intel.com>
+ * Based on QEMU and Xen.
+ */
+
+#include <linux/kvm_host.h>
+
+#include "irq.h"
+#include "i8254.h"
+
+#if 0
+#define pit_debug(fmt, arg...) printk(KERN_WARNING fmt, ##arg)
+#else
+#define pit_debug(fmt, arg...)
+#endif
+
+#ifndef CONFIG_X86_64
+#define mod_64(x, y) ((x) - (y) * div64_64(x, y))
+#else
+#define mod_64(x, y) ((x) % (y))
+#endif
+
+#define RW_STATE_LSB 1
+#define RW_STATE_MSB 2
+#define RW_STATE_WORD0 3
+#define RW_STATE_WORD1 4
+
+/* Compute with 96 bit intermediate result: (a*b)/c */
+static u64 muldiv64(u64 a, u32 b, u32 c)
+{
+ union {
+ u64 ll;
+ struct {
+ u32 low, high;
+ } l;
+ } u, res;
+ u64 rl, rh;
+
+ u.ll = a;
+ rl = (u64)u.l.low * (u64)b;
+ rh = (u64)u.l.high * (u64)b;
+ rh += (rl >> 32);
+ res.l.high = div64_64(rh, c);
+ res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
+ return res.ll;
+}
+
+static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ switch (c->mode) {
+ default:
+ case 0:
+ case 4:
+ /* XXX: just disable/enable counting */
+ break;
+ case 1:
+ case 2:
+ case 3:
+ case 5:
+ /* Restart counting on rising edge. */
+ if (c->gate < val)
+ c->count_load_time = ktime_get();
+ break;
+ }
+
+ c->gate = val;
+}
+
+int pit_get_gate(struct kvm *kvm, int channel)
+{
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ return kvm->arch.vpit->pit_state.channels[channel].gate;
+}
+
+static int pit_get_count(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+ s64 d, t;
+ int counter;
+
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+ d = muldiv64(t, PIT_FREQ, NSEC_PER_SEC);
+
+ switch (c->mode) {
+ case 0:
+ case 1:
+ case 4:
+ case 5:
+ counter = (c->count - d) & 0xffff;
+ break;
+ case 3:
+ /* XXX: may be incorrect for odd counts */
+ counter = c->count - (mod_64((2 * d), c->count));
+ break;
+ default:
+ counter = c->count - mod_64(d, c->count);
+ break;
+ }
+ return counter;
+}
+
+static int pit_get_out(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+ s64 d, t;
+ int out;
+
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+ d = muldiv64(t, PIT_FREQ, 1e9);
+
+ switch (c->mode) {
+ default:
+ case 0:
+ out = (d >= c->count);
+ break;
+ case 1:
+ out = (d < c->count);
+ break;
+ case 2:
+ out = ((mod_64(d, c->count) == 0) && (d != 0));
+ break;
+ case 3:
+ out = (mod_64(d, c->count) < ((c->count + 1) >> 1));
+ break;
+ case 4:
+ case 5:
+ out = (d == c->count);
+ break;
+ }
+
+ return out;
+}
+
+static void pit_latch_count(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ if (!c->count_latched) {
+ c->latched_count = pit_get_count(kvm, channel);
+ c->count_latched = c->rw_mode;
+ }
+}
+
+static void pit_latch_status(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ if (!c->status_latched) {
+ /* TODO: Return NULL COUNT (bit 6). */
+ c->status = ((pit_get_out(kvm, channel) << 7) |
+ (c->rw_mode << 4) |
+ (c->mode << 1) |
+ c->bcd);
+ c->status_latched = 1;
+ }
+}
+
+int __pit_timer_fn(struct kvm_kpit_state *ps)
+{
+ struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
+ struct kvm_kpit_timer *pt = &ps->pit_timer;
+
+ atomic_inc(&pt->pending);
+ if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
+ vcpu0->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
+ wake_up_interruptible(&vcpu0->wq);
+ }
+
+ pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
+ pt->scheduled = ktime_to_ns(pt->timer.expires);
+
+ return (pt->period == 0 ? 0 : 1);
+}
+
+static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
+{
+ struct kvm_kpit_state *ps;
+ int restart_timer = 0;
+
+ ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
+
+ restart_timer = __pit_timer_fn(ps);
+
+ if (restart_timer)
+ return HRTIMER_RESTART;
+ else
+ return HRTIMER_NORESTART;
+}
+
+static void destroy_pit_timer(struct kvm_kpit_timer *pt)
+{
+ pit_debug("pit: execute del timer!\n");
+ hrtimer_cancel(&pt->timer);
+}
+
+static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int
is_period)
+{
+ s64 interval;
+
+ interval = muldiv64(val, 1e9, PIT_FREQ);
+
+ pit_debug("pit: create pit timer, interval is %llu nsec\n", interval);
+
+ /* TODO The new value only affected after the retriggered */
+ hrtimer_cancel(&pt->timer);
+ pt->period = (is_period == 0) ? 0 : interval;
+ pt->timer.function = pit_timer_fn;
+ atomic_set(&pt->pending, 0);
+
+ hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
+ HRTIMER_MODE_ABS);
+}
+
+static void pit_load_count(struct kvm *kvm, int channel, u32 val)
+{
+ struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
+
+ ASSERT(mutex_is_locked(&ps->lock));
+
+ pit_debug("pit: load_count val is %d, channel is %d\n", val, channel);
+
+ if (val == 0)
+ val = 0x10000;
+
+ ps->channels[channel].count_load_time = ktime_get();
+ ps->channels[channel].count = val;
+
+ if (channel != 0)
+ return;
+
+ /* Two types of timer
+ * mode 1 is one shot, mode 2 is period, otherwise del timer */
+ switch (ps->channels[0].mode) {
+ case 1:
+ create_pit_timer(&ps->pit_timer, val, 0);
+ break;
+ case 2:
+ create_pit_timer(&ps->pit_timer, val, 1);
+ break;
+ default:
+ destroy_pit_timer(&ps->pit_timer);
+ }
+}
+
+static void pit_ioport_write(struct kvm_io_device *this,
+ gpa_t addr, int len, const void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ int channel, access;
+ struct kvm_kpit_channel_state *s;
+ u32 val = *(u32 *) data;
+
+ val &= 0xff;
+ addr &= 3;
+
+ mutex_lock(&pit_state->lock);
+
+ if (val != 0)
+ pit_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n",
+ (unsigned int)addr, len, val);
+
+ if (addr == 3) {
+ channel = val >> 6;
+ if (channel == 3) {
+ /* Read-Back Command. */
+ for (channel = 0; channel < 3; channel++) {
+ s = &pit_state->channels[channel];
+ if (val & (2 << channel)) {
+ if (!(val & 0x20))
+ pit_latch_count(kvm, channel);
+ if (!(val & 0x10))
+ pit_latch_status(kvm, channel);
+ }
+ }
+ } else {
+ /* Select Counter <channel>. */
+ s = &pit_state->channels[channel];
+ access = (val >> 4) & 3;
+ if (access == 0) {
+ pit_latch_count(kvm, channel);
+ } else {
+ s->rw_mode = access;
+ s->read_state = access;
+ s->write_state = access;
+ s->mode = (val >> 1) & 7;
+ if (s->mode > 5)
+ s->mode -= 4;
+ s->bcd = val & 1;
+ }
+ }
+ } else {
+ /* Write Count. */
+ s = &pit_state->channels[addr];
+ switch (s->write_state) {
+ default:
+ case RW_STATE_LSB:
+ pit_load_count(kvm, addr, val);
+ break;
+ case RW_STATE_MSB:
+ pit_load_count(kvm, addr, val << 8);
+ break;
+ case RW_STATE_WORD0:
+ s->write_latch = val;
+ s->write_state = RW_STATE_WORD1;
+ break;
+ case RW_STATE_WORD1:
+ pit_load_count(kvm, addr, s->write_latch | (val << 8));
+ s->write_state = RW_STATE_WORD0;
+ break;
+ }
+ }
+
+ mutex_unlock(&pit_state->lock);
+}
+
+static void pit_ioport_read(struct kvm_io_device *this,
+ gpa_t addr, int len, void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ int ret, count;
+ struct kvm_kpit_channel_state *s;
+
+ addr &= 3;
+ s = &pit_state->channels[addr];
+
+ mutex_lock(&pit_state->lock);
+
+ if (s->status_latched) {
+ s->status_latched = 0;
+ ret = s->status;
+ } else if (s->count_latched) {
+ switch (s->count_latched) {
+ default:
+ case RW_STATE_LSB:
+ ret = s->latched_count & 0xff;
+ s->count_latched = 0;
+ break;
+ case RW_STATE_MSB:
+ ret = s->latched_count >> 8;
+ s->count_latched = 0;
+ break;
+ case RW_STATE_WORD0:
+ ret = s->latched_count & 0xff;
+ s->count_latched = RW_STATE_MSB;
+ break;
+ }
+ } else {
+ switch (s->read_state) {
+ default:
+ case RW_STATE_LSB:
+ count = pit_get_count(kvm, addr);
+ ret = count & 0xff;
+ break;
+ case RW_STATE_MSB:
+ count = pit_get_count(kvm, addr);
+ ret = (count >> 8) & 0xff;
+ break;
+ case RW_STATE_WORD0:
+ count = pit_get_count(kvm, addr);
+ ret = count & 0xff;
+ s->read_state = RW_STATE_WORD1;
+ break;
+ case RW_STATE_WORD1:
+ count = pit_get_count(kvm, addr);
+ ret = (count >> 8) & 0xff;
+ s->read_state = RW_STATE_WORD0;
+ break;
+ }
+ }
+
+ if (len > sizeof(ret))
+ len = sizeof(ret);
+ memcpy(data, (char *)&ret, len);
+
+ mutex_unlock(&pit_state->lock);
+}
+
+static int pit_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ return ((addr >= PIT_BASE_ADDRESS) &&
+ (addr < PIT_BASE_ADDRESS + PIT_MEM_LENGTH));
+}
+
+static void speaker_ioport_write(struct kvm_io_device *this,
+ gpa_t addr, int len, const void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ u32 val = *(u32 *) data;
+
+ mutex_lock(&pit_state->lock);
+ pit_state->speaker_data_on = (val >> 1) & 1;
+ pit_set_gate(kvm, 2, val & 1);
+ mutex_unlock(&pit_state->lock);
+}
+
+static void speaker_ioport_read(struct kvm_io_device *this,
+ gpa_t addr, int len, void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ unsigned int refresh_clock;
+ int ret;
+
+ /* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
+ refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
+
+ mutex_lock(&pit_state->lock);
+ ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) |
+ (pit_get_out(kvm, 2) << 5) | (refresh_clock << 4));
+ if (len > sizeof(ret))
+ len = sizeof(ret);
+ memcpy(data, (char *)&ret, len);
+ mutex_unlock(&pit_state->lock);
+}
+
+static int speaker_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ return (addr == SPEAKER_BASE_ADDRESS);
+}
+
+struct kvm_pit *kvm_create_pit(struct kvm *kvm)
+{
+ int i;
+ struct kvm_pit *pit;
+ struct kvm_kpit_state *pit_state;
+ struct kvm_kpit_channel_state *c;
+
+ pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
+ if (!pit)
+ return NULL;
+
+ mutex_init(&pit->pit_state.lock);
+ mutex_lock(&pit->pit_state.lock);
+
+ /* Initialize PIO device */
+ pit->dev.read = pit_ioport_read;
+ pit->dev.write = pit_ioport_write;
+ pit->dev.in_range = pit_in_range;
+ pit->dev.private = pit;
+ kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
+
+ pit->speaker_dev.read = speaker_ioport_read;
+ pit->speaker_dev.write = speaker_ioport_write;
+ pit->speaker_dev.in_range = speaker_in_range;
+ pit->speaker_dev.private = pit;
+ kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
+
+ kvm->arch.vpit = pit;
+ pit->kvm = kvm;
+
+ pit_state = &pit->pit_state;
+ pit_state->pit = pit;
+ hrtimer_init(&pit_state->pit_timer.timer,
+ CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ atomic_set(&pit_state->pit_timer.pending, 0);
+ for (i = 0; i < 3; i++) {
+ c = &pit_state->channels[i];
+ c->mode = 0xff;
+ c->gate = (i != 2);
+ pit_load_count(kvm, i, 0);
+ }
+
+ mutex_unlock(&pit->pit_state.lock);
+
+ pit->pit_state.inject_pending = 1;
+
+ return pit;
+}
+
+void kvm_free_pit(struct kvm *kvm)
+{
+ struct hrtimer *timer;
+
+ if (kvm->arch.vpit) {
+ mutex_lock(&kvm->arch.vpit->pit_state.lock);
+ timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
+ hrtimer_cancel(timer);
+ mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+ kfree(kvm->arch.vpit);
+ }
+}
+
+void __inject_pit_timer_intr(struct kvm *kvm)
+{
+ mutex_lock(&kvm->lock);
+ kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1);
+ kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0);
+ kvm_pic_set_irq(pic_irqchip(kvm), 0, 1);
+ kvm_pic_set_irq(pic_irqchip(kvm), 0, 0);
+ mutex_unlock(&kvm->lock);
+}
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pit *pit = vcpu->kvm->arch.vpit;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_kpit_state *ps;
+ static unsigned long last_injected_time;
+
+ if (vcpu && pit) {
+ ps = &pit->pit_state;
+ /* Try to inject pending interrupts when:
+ * 1. Pending exists
+ * 2. Last interrupt was accepted or waited for too long time*/
+ if (atomic_read(&ps->pit_timer.pending) &&
+ (ps->inject_pending ||
+ (jiffies - last_injected_time >= MAX_PIT_INTR_INTERVAL))) {
+ ps->inject_pending = 0;
+ __inject_pit_timer_intr(kvm);
+ last_injected_time = jiffies;
+ }
+ }
+}
+
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
+{
+ struct kvm_arch *arch = &vcpu->kvm->arch;
+ struct kvm_kpit_state *ps;
+
+ if (vcpu && arch->vpit) {
+ ps = &arch->vpit->pit_state;
+ if (atomic_read(&ps->pit_timer.pending) &&
+ (((arch->vpic->pics[0].imr & 1) == 0 &&
+ arch->vpic->pics[0].irq_base == vec) ||
+ (arch->vioapic->redirtbl[0].fields.vector == vec &&
+ arch->vioapic->redirtbl[0].fields.mask != 1))) {
+ ps->inject_pending = 1;
+ atomic_dec(&ps->pit_timer.pending);
+ ps->channels[0].count_load_time = ktime_get();
+ }
+ }
+}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
new file mode 100644
index 0000000..2f6645d
--- /dev/null
+++ b/arch/x86/kvm/i8254.h
@@ -0,0 +1,59 @@
+#ifndef __I8254_H
+#define __I8254_H
+
+#include "iodev.h"
+
+struct kvm_kpit_timer {
+ struct hrtimer timer;
+ int irq;
+ s64 period; /* unit: ns */
+ s64 scheduled;
+ ktime_t last_update;
+ atomic_t pending;
+};
+
+struct kvm_kpit_channel_state {
+ u32 count; /* can be 65536 */
+ u16 latched_count;
+ u8 count_latched;
+ u8 status_latched;
+ u8 status;
+ u8 read_state;
+ u8 write_state;
+ u8 write_latch;
+ u8 rw_mode;
+ u8 mode;
+ u8 bcd; /* not supported */
+ u8 gate; /* timer start */
+ ktime_t count_load_time;
+};
+
+struct kvm_kpit_state {
+ struct kvm_kpit_channel_state channels[3];
+ struct kvm_kpit_timer pit_timer;
+ u32 speaker_data_on;
+ struct mutex lock;
+ struct kvm_pit *pit;
+ bool inject_pending; /* if inject pending interrupts */
+};
+
+struct kvm_pit {
+ unsigned long base_addresss;
+ struct kvm_io_device dev;
+ struct kvm_io_device speaker_dev;
+ struct kvm *kvm;
+ struct kvm_kpit_state pit_state;
+};
+
+#define PIT_BASE_ADDRESS 0x40
+#define SPEAKER_BASE_ADDRESS 0x61
+#define PIT_MEM_LENGTH 4
+#define PIT_FREQ 1193181
+#define MAX_PIT_INTR_INTERVAL HZ / 100
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
+struct kvm_pit *kvm_create_pit(struct kvm *kvm);
+void kvm_free_pit(struct kvm *kvm);
+
+#endif
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index e571475..dbfe21c 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -23,6 +23,7 @@
#include <linux/kvm_host.h>
#include "irq.h"
+#include "i8254.h"
/*
* check if there is pending interrupt without
@@ -66,6 +67,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
{
kvm_inject_apic_timer_irqs(vcpu);
+ kvm_inject_pit_timer_irqs(vcpu);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
@@ -73,6 +75,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
{
kvm_apic_timer_intr_post(vcpu, vec);
+ kvm_pit_timer_intr_post(vcpu, vec);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6f09840..a79b221 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -17,6 +17,7 @@
#include <linux/kvm_host.h>
#include "irq.h"
#include "mmu.h"
+#include "i8254.h"
#include <linux/clocksource.h>
#include <linux/kvm.h>
@@ -805,6 +806,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
case KVM_CAP_CLOCKSOURCE:
+ case KVM_CAP_PIT:
r = 1;
break;
case KVM_CAP_VAPIC:
@@ -1581,6 +1583,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
} else
goto out;
break;
+ case KVM_CREATE_PIT:
+ r = -ENOMEM;
+ kvm->arch.vpit = kvm_create_pit(kvm);
+ if (kvm->arch.vpit)
+ r = 0;
+ break;
case KVM_IRQ_LINE: {
struct kvm_irq_level irq_event;
@@ -3359,6 +3367,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
+ kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
kvm_free_vcpus(kvm);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 024b57c..12932bb 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -297,6 +297,7 @@ struct kvm_arch{
struct list_head active_mmu_pages;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
+ struct kvm_pit *vpit;
int round_robin_prev_vcpu;
unsigned int tss_addr;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index e92e703..cefa9a2 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -236,6 +236,7 @@ struct kvm_vapic_addr {
#define KVM_CAP_CLOCKSOURCE 8
#define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */
#define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */
+#define KVM_CAP_PIT 11
/*
* ioctls for VM fds
@@ -258,6 +259,7 @@ struct kvm_vapic_addr {
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip)
#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)
+#define KVM_CREATE_PIT _IO(KVMIO, 0x64)
/*
* ioctls for vcpu fds
--
debian.1.5.3.7.1-dirty
[-- Attachment #2: 0001-In-kernel-pit-model.patch --]
[-- Type: text/x-diff, Size: 21634 bytes --]
From f389ff3bc388888b1684f15d8073c6d9abd2f2c9 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng.yang@intel.com>
Date: Mon, 28 Jan 2008 05:10:22 +0800
Subject: [PATCH] In kernel pit model
The patch moved PIT from userspace to kernel, and increase the timer accuracy greatly.
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
---
arch/x86/kvm/Makefile | 3 +-
arch/x86/kvm/i8254.c | 583 ++++++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/i8254.h | 59 +++++
arch/x86/kvm/irq.c | 3 +
arch/x86/kvm/x86.c | 9 +
include/asm-x86/kvm_host.h | 1 +
include/linux/kvm.h | 2 +
7 files changed, 659 insertions(+), 1 deletions(-)
create mode 100644 arch/x86/kvm/i8254.c
create mode 100644 arch/x86/kvm/i8254.h
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index ffdd0b3..4d0c22e 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -6,7 +6,8 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
-kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
+kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
+ i8254.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
new file mode 100644
index 0000000..6bbb254
--- /dev/null
+++ b/arch/x86/kvm/i8254.c
@@ -0,0 +1,583 @@
+/*
+ * 8253/8254 interval timer emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2006 Intel Corporation
+ * Copyright (c) 2007 Keir Fraser, XenSource Inc
+ * Copyright (c) 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * Authors:
+ * Sheng Yang <sheng.yang@intel.com>
+ * Based on QEMU and Xen.
+ */
+
+#include <linux/kvm_host.h>
+
+#include "irq.h"
+#include "i8254.h"
+
+#if 0
+#define pit_debug(fmt, arg...) printk(KERN_WARNING fmt, ##arg)
+#else
+#define pit_debug(fmt, arg...)
+#endif
+
+#ifndef CONFIG_X86_64
+#define mod_64(x, y) ((x) - (y) * div64_64(x, y))
+#else
+#define mod_64(x, y) ((x) % (y))
+#endif
+
+#define RW_STATE_LSB 1
+#define RW_STATE_MSB 2
+#define RW_STATE_WORD0 3
+#define RW_STATE_WORD1 4
+
+/* Compute with 96 bit intermediate result: (a*b)/c */
+static u64 muldiv64(u64 a, u32 b, u32 c)
+{
+ union {
+ u64 ll;
+ struct {
+ u32 low, high;
+ } l;
+ } u, res;
+ u64 rl, rh;
+
+ u.ll = a;
+ rl = (u64)u.l.low * (u64)b;
+ rh = (u64)u.l.high * (u64)b;
+ rh += (rl >> 32);
+ res.l.high = div64_64(rh, c);
+ res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
+ return res.ll;
+}
+
+static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ switch (c->mode) {
+ default:
+ case 0:
+ case 4:
+ /* XXX: just disable/enable counting */
+ break;
+ case 1:
+ case 2:
+ case 3:
+ case 5:
+ /* Restart counting on rising edge. */
+ if (c->gate < val)
+ c->count_load_time = ktime_get();
+ break;
+ }
+
+ c->gate = val;
+}
+
+int pit_get_gate(struct kvm *kvm, int channel)
+{
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ return kvm->arch.vpit->pit_state.channels[channel].gate;
+}
+
+static int pit_get_count(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+ s64 d, t;
+ int counter;
+
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+ d = muldiv64(t, PIT_FREQ, NSEC_PER_SEC);
+
+ switch (c->mode) {
+ case 0:
+ case 1:
+ case 4:
+ case 5:
+ counter = (c->count - d) & 0xffff;
+ break;
+ case 3:
+ /* XXX: may be incorrect for odd counts */
+ counter = c->count - (mod_64((2 * d), c->count));
+ break;
+ default:
+ counter = c->count - mod_64(d, c->count);
+ break;
+ }
+ return counter;
+}
+
+static int pit_get_out(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+ s64 d, t;
+ int out;
+
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+ d = muldiv64(t, PIT_FREQ, 1e9);
+
+ switch (c->mode) {
+ default:
+ case 0:
+ out = (d >= c->count);
+ break;
+ case 1:
+ out = (d < c->count);
+ break;
+ case 2:
+ out = ((mod_64(d, c->count) == 0) && (d != 0));
+ break;
+ case 3:
+ out = (mod_64(d, c->count) < ((c->count + 1) >> 1));
+ break;
+ case 4:
+ case 5:
+ out = (d == c->count);
+ break;
+ }
+
+ return out;
+}
+
+static void pit_latch_count(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ if (!c->count_latched) {
+ c->latched_count = pit_get_count(kvm, channel);
+ c->count_latched = c->rw_mode;
+ }
+}
+
+static void pit_latch_status(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ if (!c->status_latched) {
+ /* TODO: Return NULL COUNT (bit 6). */
+ c->status = ((pit_get_out(kvm, channel) << 7) |
+ (c->rw_mode << 4) |
+ (c->mode << 1) |
+ c->bcd);
+ c->status_latched = 1;
+ }
+}
+
+int __pit_timer_fn(struct kvm_kpit_state *ps)
+{
+ struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
+ struct kvm_kpit_timer *pt = &ps->pit_timer;
+
+ atomic_inc(&pt->pending);
+ if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
+ vcpu0->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
+ wake_up_interruptible(&vcpu0->wq);
+ }
+
+ pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
+ pt->scheduled = ktime_to_ns(pt->timer.expires);
+
+ return (pt->period == 0 ? 0 : 1);
+}
+
+static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
+{
+ struct kvm_kpit_state *ps;
+ int restart_timer = 0;
+
+ ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
+
+ restart_timer = __pit_timer_fn(ps);
+
+ if (restart_timer)
+ return HRTIMER_RESTART;
+ else
+ return HRTIMER_NORESTART;
+}
+
+static void destroy_pit_timer(struct kvm_kpit_timer *pt)
+{
+ pit_debug("pit: execute del timer!\n");
+ hrtimer_cancel(&pt->timer);
+}
+
+static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period)
+{
+ s64 interval;
+
+ interval = muldiv64(val, 1e9, PIT_FREQ);
+
+ pit_debug("pit: create pit timer, interval is %llu nsec\n", interval);
+
+ /* TODO The new value only affected after the retriggered */
+ hrtimer_cancel(&pt->timer);
+ pt->period = (is_period == 0) ? 0 : interval;
+ pt->timer.function = pit_timer_fn;
+ atomic_set(&pt->pending, 0);
+
+ hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
+ HRTIMER_MODE_ABS);
+}
+
+static void pit_load_count(struct kvm *kvm, int channel, u32 val)
+{
+ struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
+
+ ASSERT(mutex_is_locked(&ps->lock));
+
+ pit_debug("pit: load_count val is %d, channel is %d\n", val, channel);
+
+ if (val == 0)
+ val = 0x10000;
+
+ ps->channels[channel].count_load_time = ktime_get();
+ ps->channels[channel].count = val;
+
+ if (channel != 0)
+ return;
+
+ /* Two types of timer
+ * mode 1 is one shot, mode 2 is period, otherwise del timer */
+ switch (ps->channels[0].mode) {
+ case 1:
+ create_pit_timer(&ps->pit_timer, val, 0);
+ break;
+ case 2:
+ create_pit_timer(&ps->pit_timer, val, 1);
+ break;
+ default:
+ destroy_pit_timer(&ps->pit_timer);
+ }
+}
+
+static void pit_ioport_write(struct kvm_io_device *this,
+ gpa_t addr, int len, const void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ int channel, access;
+ struct kvm_kpit_channel_state *s;
+ u32 val = *(u32 *) data;
+
+ val &= 0xff;
+ addr &= 3;
+
+ mutex_lock(&pit_state->lock);
+
+ if (val != 0)
+ pit_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n",
+ (unsigned int)addr, len, val);
+
+ if (addr == 3) {
+ channel = val >> 6;
+ if (channel == 3) {
+ /* Read-Back Command. */
+ for (channel = 0; channel < 3; channel++) {
+ s = &pit_state->channels[channel];
+ if (val & (2 << channel)) {
+ if (!(val & 0x20))
+ pit_latch_count(kvm, channel);
+ if (!(val & 0x10))
+ pit_latch_status(kvm, channel);
+ }
+ }
+ } else {
+ /* Select Counter <channel>. */
+ s = &pit_state->channels[channel];
+ access = (val >> 4) & 3;
+ if (access == 0) {
+ pit_latch_count(kvm, channel);
+ } else {
+ s->rw_mode = access;
+ s->read_state = access;
+ s->write_state = access;
+ s->mode = (val >> 1) & 7;
+ if (s->mode > 5)
+ s->mode -= 4;
+ s->bcd = val & 1;
+ }
+ }
+ } else {
+ /* Write Count. */
+ s = &pit_state->channels[addr];
+ switch (s->write_state) {
+ default:
+ case RW_STATE_LSB:
+ pit_load_count(kvm, addr, val);
+ break;
+ case RW_STATE_MSB:
+ pit_load_count(kvm, addr, val << 8);
+ break;
+ case RW_STATE_WORD0:
+ s->write_latch = val;
+ s->write_state = RW_STATE_WORD1;
+ break;
+ case RW_STATE_WORD1:
+ pit_load_count(kvm, addr, s->write_latch | (val << 8));
+ s->write_state = RW_STATE_WORD0;
+ break;
+ }
+ }
+
+ mutex_unlock(&pit_state->lock);
+}
+
+static void pit_ioport_read(struct kvm_io_device *this,
+ gpa_t addr, int len, void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ int ret, count;
+ struct kvm_kpit_channel_state *s;
+
+ addr &= 3;
+ s = &pit_state->channels[addr];
+
+ mutex_lock(&pit_state->lock);
+
+ if (s->status_latched) {
+ s->status_latched = 0;
+ ret = s->status;
+ } else if (s->count_latched) {
+ switch (s->count_latched) {
+ default:
+ case RW_STATE_LSB:
+ ret = s->latched_count & 0xff;
+ s->count_latched = 0;
+ break;
+ case RW_STATE_MSB:
+ ret = s->latched_count >> 8;
+ s->count_latched = 0;
+ break;
+ case RW_STATE_WORD0:
+ ret = s->latched_count & 0xff;
+ s->count_latched = RW_STATE_MSB;
+ break;
+ }
+ } else {
+ switch (s->read_state) {
+ default:
+ case RW_STATE_LSB:
+ count = pit_get_count(kvm, addr);
+ ret = count & 0xff;
+ break;
+ case RW_STATE_MSB:
+ count = pit_get_count(kvm, addr);
+ ret = (count >> 8) & 0xff;
+ break;
+ case RW_STATE_WORD0:
+ count = pit_get_count(kvm, addr);
+ ret = count & 0xff;
+ s->read_state = RW_STATE_WORD1;
+ break;
+ case RW_STATE_WORD1:
+ count = pit_get_count(kvm, addr);
+ ret = (count >> 8) & 0xff;
+ s->read_state = RW_STATE_WORD0;
+ break;
+ }
+ }
+
+ if (len > sizeof(ret))
+ len = sizeof(ret);
+ memcpy(data, (char *)&ret, len);
+
+ mutex_unlock(&pit_state->lock);
+}
+
+static int pit_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ return ((addr >= PIT_BASE_ADDRESS) &&
+ (addr < PIT_BASE_ADDRESS + PIT_MEM_LENGTH));
+}
+
+static void speaker_ioport_write(struct kvm_io_device *this,
+ gpa_t addr, int len, const void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ u32 val = *(u32 *) data;
+
+ mutex_lock(&pit_state->lock);
+ pit_state->speaker_data_on = (val >> 1) & 1;
+ pit_set_gate(kvm, 2, val & 1);
+ mutex_unlock(&pit_state->lock);
+}
+
+static void speaker_ioport_read(struct kvm_io_device *this,
+ gpa_t addr, int len, void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ unsigned int refresh_clock;
+ int ret;
+
+ /* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
+ refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
+
+ mutex_lock(&pit_state->lock);
+ ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) |
+ (pit_get_out(kvm, 2) << 5) | (refresh_clock << 4));
+ if (len > sizeof(ret))
+ len = sizeof(ret);
+ memcpy(data, (char *)&ret, len);
+ mutex_unlock(&pit_state->lock);
+}
+
+static int speaker_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ return (addr == SPEAKER_BASE_ADDRESS);
+}
+
+struct kvm_pit *kvm_create_pit(struct kvm *kvm)
+{
+ int i;
+ struct kvm_pit *pit;
+ struct kvm_kpit_state *pit_state;
+ struct kvm_kpit_channel_state *c;
+
+ pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
+ if (!pit)
+ return NULL;
+
+ mutex_init(&pit->pit_state.lock);
+ mutex_lock(&pit->pit_state.lock);
+
+ /* Initialize PIO device */
+ pit->dev.read = pit_ioport_read;
+ pit->dev.write = pit_ioport_write;
+ pit->dev.in_range = pit_in_range;
+ pit->dev.private = pit;
+ kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
+
+ pit->speaker_dev.read = speaker_ioport_read;
+ pit->speaker_dev.write = speaker_ioport_write;
+ pit->speaker_dev.in_range = speaker_in_range;
+ pit->speaker_dev.private = pit;
+ kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
+
+ kvm->arch.vpit = pit;
+ pit->kvm = kvm;
+
+ pit_state = &pit->pit_state;
+ pit_state->pit = pit;
+ hrtimer_init(&pit_state->pit_timer.timer,
+ CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ atomic_set(&pit_state->pit_timer.pending, 0);
+ for (i = 0; i < 3; i++) {
+ c = &pit_state->channels[i];
+ c->mode = 0xff;
+ c->gate = (i != 2);
+ pit_load_count(kvm, i, 0);
+ }
+
+ mutex_unlock(&pit->pit_state.lock);
+
+ pit->pit_state.inject_pending = 1;
+
+ return pit;
+}
+
+void kvm_free_pit(struct kvm *kvm)
+{
+ struct hrtimer *timer;
+
+ if (kvm->arch.vpit) {
+ mutex_lock(&kvm->arch.vpit->pit_state.lock);
+ timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
+ hrtimer_cancel(timer);
+ mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+ kfree(kvm->arch.vpit);
+ }
+}
+
+void __inject_pit_timer_intr(struct kvm *kvm)
+{
+ mutex_lock(&kvm->lock);
+ kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1);
+ kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0);
+ kvm_pic_set_irq(pic_irqchip(kvm), 0, 1);
+ kvm_pic_set_irq(pic_irqchip(kvm), 0, 0);
+ mutex_unlock(&kvm->lock);
+}
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pit *pit = vcpu->kvm->arch.vpit;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_kpit_state *ps;
+ static unsigned long last_injected_time;
+
+ if (vcpu && pit) {
+ ps = &pit->pit_state;
+ /* Try to inject pending interrupts when:
+ * 1. Pending exists
+ * 2. Last interrupt was accepted or waited for too long time*/
+ if (atomic_read(&ps->pit_timer.pending) &&
+ (ps->inject_pending ||
+ (jiffies - last_injected_time >= MAX_PIT_INTR_INTERVAL))) {
+ ps->inject_pending = 0;
+ __inject_pit_timer_intr(kvm);
+ last_injected_time = jiffies;
+ }
+ }
+}
+
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
+{
+ struct kvm_arch *arch = &vcpu->kvm->arch;
+ struct kvm_kpit_state *ps;
+
+ if (vcpu && arch->vpit) {
+ ps = &arch->vpit->pit_state;
+ if (atomic_read(&ps->pit_timer.pending) &&
+ (((arch->vpic->pics[0].imr & 1) == 0 &&
+ arch->vpic->pics[0].irq_base == vec) ||
+ (arch->vioapic->redirtbl[0].fields.vector == vec &&
+ arch->vioapic->redirtbl[0].fields.mask != 1))) {
+ ps->inject_pending = 1;
+ atomic_dec(&ps->pit_timer.pending);
+ ps->channels[0].count_load_time = ktime_get();
+ }
+ }
+}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
new file mode 100644
index 0000000..2f6645d
--- /dev/null
+++ b/arch/x86/kvm/i8254.h
@@ -0,0 +1,59 @@
+#ifndef __I8254_H
+#define __I8254_H
+
+#include "iodev.h"
+
+struct kvm_kpit_timer {
+ struct hrtimer timer;
+ int irq;
+ s64 period; /* unit: ns */
+ s64 scheduled;
+ ktime_t last_update;
+ atomic_t pending;
+};
+
+struct kvm_kpit_channel_state {
+ u32 count; /* can be 65536 */
+ u16 latched_count;
+ u8 count_latched;
+ u8 status_latched;
+ u8 status;
+ u8 read_state;
+ u8 write_state;
+ u8 write_latch;
+ u8 rw_mode;
+ u8 mode;
+ u8 bcd; /* not supported */
+ u8 gate; /* timer start */
+ ktime_t count_load_time;
+};
+
+struct kvm_kpit_state {
+ struct kvm_kpit_channel_state channels[3];
+ struct kvm_kpit_timer pit_timer;
+ u32 speaker_data_on;
+ struct mutex lock;
+ struct kvm_pit *pit;
+ bool inject_pending; /* if inject pending interrupts */
+};
+
+struct kvm_pit {
+ unsigned long base_addresss;
+ struct kvm_io_device dev;
+ struct kvm_io_device speaker_dev;
+ struct kvm *kvm;
+ struct kvm_kpit_state pit_state;
+};
+
+#define PIT_BASE_ADDRESS 0x40
+#define SPEAKER_BASE_ADDRESS 0x61
+#define PIT_MEM_LENGTH 4
+#define PIT_FREQ 1193181
+#define MAX_PIT_INTR_INTERVAL HZ / 100
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
+struct kvm_pit *kvm_create_pit(struct kvm *kvm);
+void kvm_free_pit(struct kvm *kvm);
+
+#endif
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index e571475..dbfe21c 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -23,6 +23,7 @@
#include <linux/kvm_host.h>
#include "irq.h"
+#include "i8254.h"
/*
* check if there is pending interrupt without
@@ -66,6 +67,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
{
kvm_inject_apic_timer_irqs(vcpu);
+ kvm_inject_pit_timer_irqs(vcpu);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
@@ -73,6 +75,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
{
kvm_apic_timer_intr_post(vcpu, vec);
+ kvm_pit_timer_intr_post(vcpu, vec);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6f09840..a79b221 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -17,6 +17,7 @@
#include <linux/kvm_host.h>
#include "irq.h"
#include "mmu.h"
+#include "i8254.h"
#include <linux/clocksource.h>
#include <linux/kvm.h>
@@ -805,6 +806,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
case KVM_CAP_CLOCKSOURCE:
+ case KVM_CAP_PIT:
r = 1;
break;
case KVM_CAP_VAPIC:
@@ -1581,6 +1583,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
} else
goto out;
break;
+ case KVM_CREATE_PIT:
+ r = -ENOMEM;
+ kvm->arch.vpit = kvm_create_pit(kvm);
+ if (kvm->arch.vpit)
+ r = 0;
+ break;
case KVM_IRQ_LINE: {
struct kvm_irq_level irq_event;
@@ -3359,6 +3367,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
+ kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
kvm_free_vcpus(kvm);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 024b57c..12932bb 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -297,6 +297,7 @@ struct kvm_arch{
struct list_head active_mmu_pages;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
+ struct kvm_pit *vpit;
int round_robin_prev_vcpu;
unsigned int tss_addr;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index e92e703..cefa9a2 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -236,6 +236,7 @@ struct kvm_vapic_addr {
#define KVM_CAP_CLOCKSOURCE 8
#define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */
#define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */
+#define KVM_CAP_PIT 11
/*
* ioctls for VM fds
@@ -258,6 +259,7 @@ struct kvm_vapic_addr {
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip)
#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)
+#define KVM_CREATE_PIT _IO(KVMIO, 0x64)
/*
* ioctls for vcpu fds
--
debian.1.5.3.7.1-dirty
[-- Attachment #3: Type: text/plain, Size: 228 bytes --]
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
[-- Attachment #4: Type: text/plain, Size: 158 bytes --]
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH 1/6] KVM: In kernel pit model
2008-03-04 10:22 [PATCH 1/6] KVM: In kernel pit model Yang, Sheng
@ 2008-03-05 6:54 ` Avi Kivity
2008-03-05 7:04 ` Yang, Sheng
0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2008-03-05 6:54 UTC (permalink / raw)
To: Yang, Sheng; +Cc: kvm-devel
Yang, Sheng wrote:
> +
> +static int pit_get_out(struct kvm *kvm, int channel)
> +{
> + struct kvm_kpit_channel_state *c =
> + &kvm->arch.vpit->pit_state.channels[channel];
> + s64 d, t;
> + int out;
> +
> + ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
> +
> + t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
> + d = muldiv64(t, PIT_FREQ, 1e9);
>
NSECS_PER_SEC to avoid people jumping on you saying you can't use
floating point in the kernel (yes, the compiler converts it at
compile-time, but they'll still say it).
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/6] KVM: In kernel pit model
2008-03-05 6:54 ` Avi Kivity
@ 2008-03-05 7:04 ` Yang, Sheng
2008-03-05 9:15 ` Ingo Molnar
0 siblings, 1 reply; 19+ messages in thread
From: Yang, Sheng @ 2008-03-05 7:04 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel
[-- Attachment #1: Type: text/plain, Size: 21261 bytes --]
On Wednesday 05 March 2008 14:54:06 Avi Kivity wrote:
> Yang, Sheng wrote:
> > +
> > +static int pit_get_out(struct kvm *kvm, int channel)
> > +{
> > + struct kvm_kpit_channel_state *c =
> > + &kvm->arch.vpit->pit_state.channels[channel];
> > + s64 d, t;
> > + int out;
> > +
> > + ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
> > +
> > + t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
> > + d = muldiv64(t, PIT_FREQ, 1e9);
>
> NSECS_PER_SEC to avoid people jumping on you saying you can't use
> floating point in the kernel (yes, the compiler converts it at
> compile-time, but they'll still say it).
Sorry... I remembered I've modified it, seems something got wrong...
Here is the updated patch:
---
arch/x86/kvm/Makefile | 3 +-
arch/x86/kvm/i8254.c | 583
++++++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/i8254.h | 59 +++++
arch/x86/kvm/irq.c | 3 +
arch/x86/kvm/x86.c | 9 +
include/asm-x86/kvm_host.h | 1 +
include/linux/kvm.h | 2 +
7 files changed, 659 insertions(+), 1 deletions(-)
create mode 100644 arch/x86/kvm/i8254.c
create mode 100644 arch/x86/kvm/i8254.h
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index ffdd0b3..4d0c22e 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -6,7 +6,8 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o
ioapic.o)
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
-kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
+kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
+ i8254.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
new file mode 100644
index 0000000..84fb3d9
--- /dev/null
+++ b/arch/x86/kvm/i8254.c
@@ -0,0 +1,583 @@
+/*
+ * 8253/8254 interval timer emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2006 Intel Corporation
+ * Copyright (c) 2007 Keir Fraser, XenSource Inc
+ * Copyright (c) 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
copy
+ * of this software and associated documentation files (the "Software"), to
deal
+ * in the Software without restriction, including without limitation the
rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * Authors:
+ * Sheng Yang <sheng.yang@intel.com>
+ * Based on QEMU and Xen.
+ */
+
+#include <linux/kvm_host.h>
+
+#include "irq.h"
+#include "i8254.h"
+
+#if 1
+#define pit_debug(fmt, arg...) printk(KERN_WARNING fmt, ##arg)
+#else
+#define pit_debug(fmt, arg...)
+#endif
+
+#ifndef CONFIG_X86_64
+#define mod_64(x, y) ((x) - (y) * div64_64(x, y))
+#else
+#define mod_64(x, y) ((x) % (y))
+#endif
+
+#define RW_STATE_LSB 1
+#define RW_STATE_MSB 2
+#define RW_STATE_WORD0 3
+#define RW_STATE_WORD1 4
+
+/* Compute with 96 bit intermediate result: (a*b)/c */
+static u64 muldiv64(u64 a, u32 b, u32 c)
+{
+ union {
+ u64 ll;
+ struct {
+ u32 low, high;
+ } l;
+ } u, res;
+ u64 rl, rh;
+
+ u.ll = a;
+ rl = (u64)u.l.low * (u64)b;
+ rh = (u64)u.l.high * (u64)b;
+ rh += (rl >> 32);
+ res.l.high = div64_64(rh, c);
+ res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
+ return res.ll;
+}
+
+static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ switch (c->mode) {
+ default:
+ case 0:
+ case 4:
+ /* XXX: just disable/enable counting */
+ break;
+ case 1:
+ case 2:
+ case 3:
+ case 5:
+ /* Restart counting on rising edge. */
+ if (c->gate < val)
+ c->count_load_time = ktime_get();
+ break;
+ }
+
+ c->gate = val;
+}
+
+int pit_get_gate(struct kvm *kvm, int channel)
+{
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ return kvm->arch.vpit->pit_state.channels[channel].gate;
+}
+
+static int pit_get_count(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+ s64 d, t;
+ int counter;
+
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+ d = muldiv64(t, PIT_FREQ, NSEC_PER_SEC);
+
+ switch (c->mode) {
+ case 0:
+ case 1:
+ case 4:
+ case 5:
+ counter = (c->count - d) & 0xffff;
+ break;
+ case 3:
+ /* XXX: may be incorrect for odd counts */
+ counter = c->count - (mod_64((2 * d), c->count));
+ break;
+ default:
+ counter = c->count - mod_64(d, c->count);
+ break;
+ }
+ return counter;
+}
+
+static int pit_get_out(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+ s64 d, t;
+ int out;
+
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+ d = muldiv64(t, PIT_FREQ, NSEC_PER_SEC);
+
+ switch (c->mode) {
+ default:
+ case 0:
+ out = (d >= c->count);
+ break;
+ case 1:
+ out = (d < c->count);
+ break;
+ case 2:
+ out = ((mod_64(d, c->count) == 0) && (d != 0));
+ break;
+ case 3:
+ out = (mod_64(d, c->count) < ((c->count + 1) >> 1));
+ break;
+ case 4:
+ case 5:
+ out = (d == c->count);
+ break;
+ }
+
+ return out;
+}
+
+static void pit_latch_count(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ if (!c->count_latched) {
+ c->latched_count = pit_get_count(kvm, channel);
+ c->count_latched = c->rw_mode;
+ }
+}
+
+static void pit_latch_status(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ if (!c->status_latched) {
+ /* TODO: Return NULL COUNT (bit 6). */
+ c->status = ((pit_get_out(kvm, channel) << 7) |
+ (c->rw_mode << 4) |
+ (c->mode << 1) |
+ c->bcd);
+ c->status_latched = 1;
+ }
+}
+
+int __pit_timer_fn(struct kvm_kpit_state *ps)
+{
+ struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
+ struct kvm_kpit_timer *pt = &ps->pit_timer;
+
+ atomic_inc(&pt->pending);
+ if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
+ vcpu0->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
+ wake_up_interruptible(&vcpu0->wq);
+ }
+
+ pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
+ pt->scheduled = ktime_to_ns(pt->timer.expires);
+
+ return (pt->period == 0 ? 0 : 1);
+}
+
+static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
+{
+ struct kvm_kpit_state *ps;
+ int restart_timer = 0;
+
+ ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
+
+ restart_timer = __pit_timer_fn(ps);
+
+ if (restart_timer)
+ return HRTIMER_RESTART;
+ else
+ return HRTIMER_NORESTART;
+}
+
+static void destroy_pit_timer(struct kvm_kpit_timer *pt)
+{
+ pit_debug("pit: execute del timer!\n");
+ hrtimer_cancel(&pt->timer);
+}
+
+static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int
is_period)
+{
+ s64 interval;
+
+ interval = muldiv64(val, NSEC_PER_SEC, PIT_FREQ);
+
+ pit_debug("pit: create pit timer, interval is %llu nsec\n", interval);
+
+ /* TODO The new value only affected after the retriggered */
+ hrtimer_cancel(&pt->timer);
+ pt->period = (is_period == 0) ? 0 : interval;
+ pt->timer.function = pit_timer_fn;
+ atomic_set(&pt->pending, 0);
+
+ hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
+ HRTIMER_MODE_ABS);
+}
+
+static void pit_load_count(struct kvm *kvm, int channel, u32 val)
+{
+ struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
+
+ ASSERT(mutex_is_locked(&ps->lock));
+
+ pit_debug("pit: load_count val is %d, channel is %d\n", val, channel);
+
+ if (val == 0)
+ val = 0x10000;
+
+ ps->channels[channel].count_load_time = ktime_get();
+ ps->channels[channel].count = val;
+
+ if (channel != 0)
+ return;
+
+ /* Two types of timer
+ * mode 1 is one shot, mode 2 is period, otherwise del timer */
+ switch (ps->channels[0].mode) {
+ case 1:
+ create_pit_timer(&ps->pit_timer, val, 0);
+ break;
+ case 2:
+ create_pit_timer(&ps->pit_timer, val, 1);
+ break;
+ default:
+ destroy_pit_timer(&ps->pit_timer);
+ }
+}
+
+static void pit_ioport_write(struct kvm_io_device *this,
+ gpa_t addr, int len, const void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ int channel, access;
+ struct kvm_kpit_channel_state *s;
+ u32 val = *(u32 *) data;
+
+ val &= 0xff;
+ addr &= 3;
+
+ mutex_lock(&pit_state->lock);
+
+ if (val != 0)
+ pit_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n",
+ (unsigned int)addr, len, val);
+
+ if (addr == 3) {
+ channel = val >> 6;
+ if (channel == 3) {
+ /* Read-Back Command. */
+ for (channel = 0; channel < 3; channel++) {
+ s = &pit_state->channels[channel];
+ if (val & (2 << channel)) {
+ if (!(val & 0x20))
+ pit_latch_count(kvm, channel);
+ if (!(val & 0x10))
+ pit_latch_status(kvm, channel);
+ }
+ }
+ } else {
+ /* Select Counter <channel>. */
+ s = &pit_state->channels[channel];
+ access = (val >> 4) & 3;
+ if (access == 0) {
+ pit_latch_count(kvm, channel);
+ } else {
+ s->rw_mode = access;
+ s->read_state = access;
+ s->write_state = access;
+ s->mode = (val >> 1) & 7;
+ if (s->mode > 5)
+ s->mode -= 4;
+ s->bcd = val & 1;
+ }
+ }
+ } else {
+ /* Write Count. */
+ s = &pit_state->channels[addr];
+ switch (s->write_state) {
+ default:
+ case RW_STATE_LSB:
+ pit_load_count(kvm, addr, val);
+ break;
+ case RW_STATE_MSB:
+ pit_load_count(kvm, addr, val << 8);
+ break;
+ case RW_STATE_WORD0:
+ s->write_latch = val;
+ s->write_state = RW_STATE_WORD1;
+ break;
+ case RW_STATE_WORD1:
+ pit_load_count(kvm, addr, s->write_latch | (val << 8));
+ s->write_state = RW_STATE_WORD0;
+ break;
+ }
+ }
+
+ mutex_unlock(&pit_state->lock);
+}
+
+static void pit_ioport_read(struct kvm_io_device *this,
+ gpa_t addr, int len, void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ int ret, count;
+ struct kvm_kpit_channel_state *s;
+
+ addr &= 3;
+ s = &pit_state->channels[addr];
+
+ mutex_lock(&pit_state->lock);
+
+ if (s->status_latched) {
+ s->status_latched = 0;
+ ret = s->status;
+ } else if (s->count_latched) {
+ switch (s->count_latched) {
+ default:
+ case RW_STATE_LSB:
+ ret = s->latched_count & 0xff;
+ s->count_latched = 0;
+ break;
+ case RW_STATE_MSB:
+ ret = s->latched_count >> 8;
+ s->count_latched = 0;
+ break;
+ case RW_STATE_WORD0:
+ ret = s->latched_count & 0xff;
+ s->count_latched = RW_STATE_MSB;
+ break;
+ }
+ } else {
+ switch (s->read_state) {
+ default:
+ case RW_STATE_LSB:
+ count = pit_get_count(kvm, addr);
+ ret = count & 0xff;
+ break;
+ case RW_STATE_MSB:
+ count = pit_get_count(kvm, addr);
+ ret = (count >> 8) & 0xff;
+ break;
+ case RW_STATE_WORD0:
+ count = pit_get_count(kvm, addr);
+ ret = count & 0xff;
+ s->read_state = RW_STATE_WORD1;
+ break;
+ case RW_STATE_WORD1:
+ count = pit_get_count(kvm, addr);
+ ret = (count >> 8) & 0xff;
+ s->read_state = RW_STATE_WORD0;
+ break;
+ }
+ }
+
+ if (len > sizeof(ret))
+ len = sizeof(ret);
+ memcpy(data, (char *)&ret, len);
+
+ mutex_unlock(&pit_state->lock);
+}
+
+static int pit_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ return ((addr >= PIT_BASE_ADDRESS) &&
+ (addr < PIT_BASE_ADDRESS + PIT_MEM_LENGTH));
+}
+
+static void speaker_ioport_write(struct kvm_io_device *this,
+ gpa_t addr, int len, const void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ u32 val = *(u32 *) data;
+
+ mutex_lock(&pit_state->lock);
+ pit_state->speaker_data_on = (val >> 1) & 1;
+ pit_set_gate(kvm, 2, val & 1);
+ mutex_unlock(&pit_state->lock);
+}
+
+static void speaker_ioport_read(struct kvm_io_device *this,
+ gpa_t addr, int len, void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ unsigned int refresh_clock;
+ int ret;
+
+ /* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
+ refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
+
+ mutex_lock(&pit_state->lock);
+ ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) |
+ (pit_get_out(kvm, 2) << 5) | (refresh_clock << 4));
+ if (len > sizeof(ret))
+ len = sizeof(ret);
+ memcpy(data, (char *)&ret, len);
+ mutex_unlock(&pit_state->lock);
+}
+
+static int speaker_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ return (addr == SPEAKER_BASE_ADDRESS);
+}
+
+struct kvm_pit *kvm_create_pit(struct kvm *kvm)
+{
+ int i;
+ struct kvm_pit *pit;
+ struct kvm_kpit_state *pit_state;
+ struct kvm_kpit_channel_state *c;
+
+ pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
+ if (!pit)
+ return NULL;
+
+ mutex_init(&pit->pit_state.lock);
+ mutex_lock(&pit->pit_state.lock);
+
+ /* Initialize PIO device */
+ pit->dev.read = pit_ioport_read;
+ pit->dev.write = pit_ioport_write;
+ pit->dev.in_range = pit_in_range;
+ pit->dev.private = pit;
+ kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
+
+ pit->speaker_dev.read = speaker_ioport_read;
+ pit->speaker_dev.write = speaker_ioport_write;
+ pit->speaker_dev.in_range = speaker_in_range;
+ pit->speaker_dev.private = pit;
+ kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
+
+ kvm->arch.vpit = pit;
+ pit->kvm = kvm;
+
+ pit_state = &pit->pit_state;
+ pit_state->pit = pit;
+ hrtimer_init(&pit_state->pit_timer.timer,
+ CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ atomic_set(&pit_state->pit_timer.pending, 0);
+ for (i = 0; i < 3; i++) {
+ c = &pit_state->channels[i];
+ c->mode = 0xff;
+ c->gate = (i != 2);
+ pit_load_count(kvm, i, 0);
+ }
+
+ mutex_unlock(&pit->pit_state.lock);
+
+ pit->pit_state.inject_pending = 1;
+
+ return pit;
+}
+
+void kvm_free_pit(struct kvm *kvm)
+{
+ struct hrtimer *timer;
+
+ if (kvm->arch.vpit) {
+ mutex_lock(&kvm->arch.vpit->pit_state.lock);
+ timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
+ hrtimer_cancel(timer);
+ mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+ kfree(kvm->arch.vpit);
+ }
+}
+
+void __inject_pit_timer_intr(struct kvm *kvm)
+{
+ mutex_lock(&kvm->lock);
+ kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1);
+ kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0);
+ kvm_pic_set_irq(pic_irqchip(kvm), 0, 1);
+ kvm_pic_set_irq(pic_irqchip(kvm), 0, 0);
+ mutex_unlock(&kvm->lock);
+}
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pit *pit = vcpu->kvm->arch.vpit;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_kpit_state *ps;
+ static unsigned long last_injected_time;
+
+ if (vcpu && pit) {
+ ps = &pit->pit_state;
+ /* Try to inject pending interrupts when:
+ * 1. Pending exists
+ * 2. Last interrupt was accepted or waited for too long time*/
+ if (atomic_read(&ps->pit_timer.pending) &&
+ (ps->inject_pending ||
+ (jiffies - last_injected_time >= MAX_PIT_INTR_INTERVAL))) {
+ ps->inject_pending = 0;
+ __inject_pit_timer_intr(kvm);
+ last_injected_time = jiffies;
+ }
+ }
+}
+
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
+{
+ struct kvm_arch *arch = &vcpu->kvm->arch;
+ struct kvm_kpit_state *ps;
+
+ if (vcpu && arch->vpit) {
+ ps = &arch->vpit->pit_state;
+ if (atomic_read(&ps->pit_timer.pending) &&
+ (((arch->vpic->pics[0].imr & 1) == 0 &&
+ arch->vpic->pics[0].irq_base == vec) ||
+ (arch->vioapic->redirtbl[0].fields.vector == vec &&
+ arch->vioapic->redirtbl[0].fields.mask != 1))) {
+ ps->inject_pending = 1;
+ atomic_dec(&ps->pit_timer.pending);
+ ps->channels[0].count_load_time = ktime_get();
+ }
+ }
+}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
new file mode 100644
index 0000000..2f6645d
--- /dev/null
+++ b/arch/x86/kvm/i8254.h
@@ -0,0 +1,59 @@
+#ifndef __I8254_H
+#define __I8254_H
+
+#include "iodev.h"
+
+struct kvm_kpit_timer {
+ struct hrtimer timer;
+ int irq;
+ s64 period; /* unit: ns */
+ s64 scheduled;
+ ktime_t last_update;
+ atomic_t pending;
+};
+
+struct kvm_kpit_channel_state {
+ u32 count; /* can be 65536 */
+ u16 latched_count;
+ u8 count_latched;
+ u8 status_latched;
+ u8 status;
+ u8 read_state;
+ u8 write_state;
+ u8 write_latch;
+ u8 rw_mode;
+ u8 mode;
+ u8 bcd; /* not supported */
+ u8 gate; /* timer start */
+ ktime_t count_load_time;
+};
+
+struct kvm_kpit_state {
+ struct kvm_kpit_channel_state channels[3];
+ struct kvm_kpit_timer pit_timer;
+ u32 speaker_data_on;
+ struct mutex lock;
+ struct kvm_pit *pit;
+ bool inject_pending; /* if inject pending interrupts */
+};
+
+struct kvm_pit {
+ unsigned long base_addresss;
+ struct kvm_io_device dev;
+ struct kvm_io_device speaker_dev;
+ struct kvm *kvm;
+ struct kvm_kpit_state pit_state;
+};
+
+#define PIT_BASE_ADDRESS 0x40
+#define SPEAKER_BASE_ADDRESS 0x61
+#define PIT_MEM_LENGTH 4
+#define PIT_FREQ 1193181
+#define MAX_PIT_INTR_INTERVAL HZ / 100
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
+struct kvm_pit *kvm_create_pit(struct kvm *kvm);
+void kvm_free_pit(struct kvm *kvm);
+
+#endif
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index e571475..dbfe21c 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -23,6 +23,7 @@
#include <linux/kvm_host.h>
#include "irq.h"
+#include "i8254.h"
/*
* check if there is pending interrupt without
@@ -66,6 +67,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
{
kvm_inject_apic_timer_irqs(vcpu);
+ kvm_inject_pit_timer_irqs(vcpu);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
@@ -73,6 +75,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
{
kvm_apic_timer_intr_post(vcpu, vec);
+ kvm_pit_timer_intr_post(vcpu, vec);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6f09840..a79b221 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -17,6 +17,7 @@
#include <linux/kvm_host.h>
#include "irq.h"
#include "mmu.h"
+#include "i8254.h"
#include <linux/clocksource.h>
#include <linux/kvm.h>
@@ -805,6 +806,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
case KVM_CAP_CLOCKSOURCE:
+ case KVM_CAP_PIT:
r = 1;
break;
case KVM_CAP_VAPIC:
@@ -1581,6 +1583,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
} else
goto out;
break;
+ case KVM_CREATE_PIT:
+ r = -ENOMEM;
+ kvm->arch.vpit = kvm_create_pit(kvm);
+ if (kvm->arch.vpit)
+ r = 0;
+ break;
case KVM_IRQ_LINE: {
struct kvm_irq_level irq_event;
@@ -3359,6 +3367,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
+ kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
kvm_free_vcpus(kvm);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 024b57c..12932bb 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -297,6 +297,7 @@ struct kvm_arch{
struct list_head active_mmu_pages;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
+ struct kvm_pit *vpit;
int round_robin_prev_vcpu;
unsigned int tss_addr;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index e92e703..cefa9a2 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -236,6 +236,7 @@ struct kvm_vapic_addr {
#define KVM_CAP_CLOCKSOURCE 8
#define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */
#define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */
+#define KVM_CAP_PIT 11
/*
* ioctls for VM fds
@@ -258,6 +259,7 @@ struct kvm_vapic_addr {
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip)
#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)
+#define KVM_CREATE_PIT _IO(KVMIO, 0x64)
/*
* ioctls for vcpu fds
--
debian.1.5.3.7.1-dirty
[-- Attachment #2: 0001-In-kernel-pit-model.patch --]
[-- Type: text/x-diff, Size: 21652 bytes --]
From 14e4b71fb765c8243d968c4ad8e3271c3188c469 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng.yang@intel.com>
Date: Mon, 28 Jan 2008 05:10:22 +0800
Subject: [PATCH] In kernel pit model
The patch moved PIT from userspace to kernel, and increase the timer accuracy greatly.
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
---
arch/x86/kvm/Makefile | 3 +-
arch/x86/kvm/i8254.c | 583 ++++++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/i8254.h | 59 +++++
arch/x86/kvm/irq.c | 3 +
arch/x86/kvm/x86.c | 9 +
include/asm-x86/kvm_host.h | 1 +
include/linux/kvm.h | 2 +
7 files changed, 659 insertions(+), 1 deletions(-)
create mode 100644 arch/x86/kvm/i8254.c
create mode 100644 arch/x86/kvm/i8254.h
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index ffdd0b3..4d0c22e 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -6,7 +6,8 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
-kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
+kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
+ i8254.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
new file mode 100644
index 0000000..84fb3d9
--- /dev/null
+++ b/arch/x86/kvm/i8254.c
@@ -0,0 +1,583 @@
+/*
+ * 8253/8254 interval timer emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2006 Intel Corporation
+ * Copyright (c) 2007 Keir Fraser, XenSource Inc
+ * Copyright (c) 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * Authors:
+ * Sheng Yang <sheng.yang@intel.com>
+ * Based on QEMU and Xen.
+ */
+
+#include <linux/kvm_host.h>
+
+#include "irq.h"
+#include "i8254.h"
+
+#if 1
+#define pit_debug(fmt, arg...) printk(KERN_WARNING fmt, ##arg)
+#else
+#define pit_debug(fmt, arg...)
+#endif
+
+#ifndef CONFIG_X86_64
+#define mod_64(x, y) ((x) - (y) * div64_64(x, y))
+#else
+#define mod_64(x, y) ((x) % (y))
+#endif
+
+#define RW_STATE_LSB 1
+#define RW_STATE_MSB 2
+#define RW_STATE_WORD0 3
+#define RW_STATE_WORD1 4
+
+/* Compute with 96 bit intermediate result: (a*b)/c */
+static u64 muldiv64(u64 a, u32 b, u32 c)
+{
+ union {
+ u64 ll;
+ struct {
+ u32 low, high;
+ } l;
+ } u, res;
+ u64 rl, rh;
+
+ u.ll = a;
+ rl = (u64)u.l.low * (u64)b;
+ rh = (u64)u.l.high * (u64)b;
+ rh += (rl >> 32);
+ res.l.high = div64_64(rh, c);
+ res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
+ return res.ll;
+}
+
+static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ switch (c->mode) {
+ default:
+ case 0:
+ case 4:
+ /* XXX: just disable/enable counting */
+ break;
+ case 1:
+ case 2:
+ case 3:
+ case 5:
+ /* Restart counting on rising edge. */
+ if (c->gate < val)
+ c->count_load_time = ktime_get();
+ break;
+ }
+
+ c->gate = val;
+}
+
+int pit_get_gate(struct kvm *kvm, int channel)
+{
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ return kvm->arch.vpit->pit_state.channels[channel].gate;
+}
+
+static int pit_get_count(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+ s64 d, t;
+ int counter;
+
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+ d = muldiv64(t, PIT_FREQ, NSEC_PER_SEC);
+
+ switch (c->mode) {
+ case 0:
+ case 1:
+ case 4:
+ case 5:
+ counter = (c->count - d) & 0xffff;
+ break;
+ case 3:
+ /* XXX: may be incorrect for odd counts */
+ counter = c->count - (mod_64((2 * d), c->count));
+ break;
+ default:
+ counter = c->count - mod_64(d, c->count);
+ break;
+ }
+ return counter;
+}
+
+static int pit_get_out(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+ s64 d, t;
+ int out;
+
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+ d = muldiv64(t, PIT_FREQ, NSEC_PER_SEC);
+
+ switch (c->mode) {
+ default:
+ case 0:
+ out = (d >= c->count);
+ break;
+ case 1:
+ out = (d < c->count);
+ break;
+ case 2:
+ out = ((mod_64(d, c->count) == 0) && (d != 0));
+ break;
+ case 3:
+ out = (mod_64(d, c->count) < ((c->count + 1) >> 1));
+ break;
+ case 4:
+ case 5:
+ out = (d == c->count);
+ break;
+ }
+
+ return out;
+}
+
+static void pit_latch_count(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ if (!c->count_latched) {
+ c->latched_count = pit_get_count(kvm, channel);
+ c->count_latched = c->rw_mode;
+ }
+}
+
+static void pit_latch_status(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ if (!c->status_latched) {
+ /* TODO: Return NULL COUNT (bit 6). */
+ c->status = ((pit_get_out(kvm, channel) << 7) |
+ (c->rw_mode << 4) |
+ (c->mode << 1) |
+ c->bcd);
+ c->status_latched = 1;
+ }
+}
+
+int __pit_timer_fn(struct kvm_kpit_state *ps)
+{
+ struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
+ struct kvm_kpit_timer *pt = &ps->pit_timer;
+
+ atomic_inc(&pt->pending);
+ if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
+ vcpu0->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
+ wake_up_interruptible(&vcpu0->wq);
+ }
+
+ pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
+ pt->scheduled = ktime_to_ns(pt->timer.expires);
+
+ return (pt->period == 0 ? 0 : 1);
+}
+
+static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
+{
+ struct kvm_kpit_state *ps;
+ int restart_timer = 0;
+
+ ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
+
+ restart_timer = __pit_timer_fn(ps);
+
+ if (restart_timer)
+ return HRTIMER_RESTART;
+ else
+ return HRTIMER_NORESTART;
+}
+
+static void destroy_pit_timer(struct kvm_kpit_timer *pt)
+{
+ pit_debug("pit: execute del timer!\n");
+ hrtimer_cancel(&pt->timer);
+}
+
+static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period)
+{
+ s64 interval;
+
+ interval = muldiv64(val, NSEC_PER_SEC, PIT_FREQ);
+
+ pit_debug("pit: create pit timer, interval is %llu nsec\n", interval);
+
+ /* TODO The new value only affected after the retriggered */
+ hrtimer_cancel(&pt->timer);
+ pt->period = (is_period == 0) ? 0 : interval;
+ pt->timer.function = pit_timer_fn;
+ atomic_set(&pt->pending, 0);
+
+ hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
+ HRTIMER_MODE_ABS);
+}
+
+static void pit_load_count(struct kvm *kvm, int channel, u32 val)
+{
+ struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
+
+ ASSERT(mutex_is_locked(&ps->lock));
+
+ pit_debug("pit: load_count val is %d, channel is %d\n", val, channel);
+
+ if (val == 0)
+ val = 0x10000;
+
+ ps->channels[channel].count_load_time = ktime_get();
+ ps->channels[channel].count = val;
+
+ if (channel != 0)
+ return;
+
+ /* Two types of timer
+ * mode 1 is one shot, mode 2 is period, otherwise del timer */
+ switch (ps->channels[0].mode) {
+ case 1:
+ create_pit_timer(&ps->pit_timer, val, 0);
+ break;
+ case 2:
+ create_pit_timer(&ps->pit_timer, val, 1);
+ break;
+ default:
+ destroy_pit_timer(&ps->pit_timer);
+ }
+}
+
+static void pit_ioport_write(struct kvm_io_device *this,
+ gpa_t addr, int len, const void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ int channel, access;
+ struct kvm_kpit_channel_state *s;
+ u32 val = *(u32 *) data;
+
+ val &= 0xff;
+ addr &= 3;
+
+ mutex_lock(&pit_state->lock);
+
+ if (val != 0)
+ pit_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n",
+ (unsigned int)addr, len, val);
+
+ if (addr == 3) {
+ channel = val >> 6;
+ if (channel == 3) {
+ /* Read-Back Command. */
+ for (channel = 0; channel < 3; channel++) {
+ s = &pit_state->channels[channel];
+ if (val & (2 << channel)) {
+ if (!(val & 0x20))
+ pit_latch_count(kvm, channel);
+ if (!(val & 0x10))
+ pit_latch_status(kvm, channel);
+ }
+ }
+ } else {
+ /* Select Counter <channel>. */
+ s = &pit_state->channels[channel];
+ access = (val >> 4) & 3;
+ if (access == 0) {
+ pit_latch_count(kvm, channel);
+ } else {
+ s->rw_mode = access;
+ s->read_state = access;
+ s->write_state = access;
+ s->mode = (val >> 1) & 7;
+ if (s->mode > 5)
+ s->mode -= 4;
+ s->bcd = val & 1;
+ }
+ }
+ } else {
+ /* Write Count. */
+ s = &pit_state->channels[addr];
+ switch (s->write_state) {
+ default:
+ case RW_STATE_LSB:
+ pit_load_count(kvm, addr, val);
+ break;
+ case RW_STATE_MSB:
+ pit_load_count(kvm, addr, val << 8);
+ break;
+ case RW_STATE_WORD0:
+ s->write_latch = val;
+ s->write_state = RW_STATE_WORD1;
+ break;
+ case RW_STATE_WORD1:
+ pit_load_count(kvm, addr, s->write_latch | (val << 8));
+ s->write_state = RW_STATE_WORD0;
+ break;
+ }
+ }
+
+ mutex_unlock(&pit_state->lock);
+}
+
+static void pit_ioport_read(struct kvm_io_device *this,
+ gpa_t addr, int len, void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ int ret, count;
+ struct kvm_kpit_channel_state *s;
+
+ addr &= 3;
+ s = &pit_state->channels[addr];
+
+ mutex_lock(&pit_state->lock);
+
+ if (s->status_latched) {
+ s->status_latched = 0;
+ ret = s->status;
+ } else if (s->count_latched) {
+ switch (s->count_latched) {
+ default:
+ case RW_STATE_LSB:
+ ret = s->latched_count & 0xff;
+ s->count_latched = 0;
+ break;
+ case RW_STATE_MSB:
+ ret = s->latched_count >> 8;
+ s->count_latched = 0;
+ break;
+ case RW_STATE_WORD0:
+ ret = s->latched_count & 0xff;
+ s->count_latched = RW_STATE_MSB;
+ break;
+ }
+ } else {
+ switch (s->read_state) {
+ default:
+ case RW_STATE_LSB:
+ count = pit_get_count(kvm, addr);
+ ret = count & 0xff;
+ break;
+ case RW_STATE_MSB:
+ count = pit_get_count(kvm, addr);
+ ret = (count >> 8) & 0xff;
+ break;
+ case RW_STATE_WORD0:
+ count = pit_get_count(kvm, addr);
+ ret = count & 0xff;
+ s->read_state = RW_STATE_WORD1;
+ break;
+ case RW_STATE_WORD1:
+ count = pit_get_count(kvm, addr);
+ ret = (count >> 8) & 0xff;
+ s->read_state = RW_STATE_WORD0;
+ break;
+ }
+ }
+
+ if (len > sizeof(ret))
+ len = sizeof(ret);
+ memcpy(data, (char *)&ret, len);
+
+ mutex_unlock(&pit_state->lock);
+}
+
+static int pit_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ return ((addr >= PIT_BASE_ADDRESS) &&
+ (addr < PIT_BASE_ADDRESS + PIT_MEM_LENGTH));
+}
+
+static void speaker_ioport_write(struct kvm_io_device *this,
+ gpa_t addr, int len, const void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ u32 val = *(u32 *) data;
+
+ mutex_lock(&pit_state->lock);
+ pit_state->speaker_data_on = (val >> 1) & 1;
+ pit_set_gate(kvm, 2, val & 1);
+ mutex_unlock(&pit_state->lock);
+}
+
+static void speaker_ioport_read(struct kvm_io_device *this,
+ gpa_t addr, int len, void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ unsigned int refresh_clock;
+ int ret;
+
+ /* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
+ refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
+
+ mutex_lock(&pit_state->lock);
+ ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) |
+ (pit_get_out(kvm, 2) << 5) | (refresh_clock << 4));
+ if (len > sizeof(ret))
+ len = sizeof(ret);
+ memcpy(data, (char *)&ret, len);
+ mutex_unlock(&pit_state->lock);
+}
+
+static int speaker_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ return (addr == SPEAKER_BASE_ADDRESS);
+}
+
+struct kvm_pit *kvm_create_pit(struct kvm *kvm)
+{
+ int i;
+ struct kvm_pit *pit;
+ struct kvm_kpit_state *pit_state;
+ struct kvm_kpit_channel_state *c;
+
+ pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
+ if (!pit)
+ return NULL;
+
+ mutex_init(&pit->pit_state.lock);
+ mutex_lock(&pit->pit_state.lock);
+
+ /* Initialize PIO device */
+ pit->dev.read = pit_ioport_read;
+ pit->dev.write = pit_ioport_write;
+ pit->dev.in_range = pit_in_range;
+ pit->dev.private = pit;
+ kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
+
+ pit->speaker_dev.read = speaker_ioport_read;
+ pit->speaker_dev.write = speaker_ioport_write;
+ pit->speaker_dev.in_range = speaker_in_range;
+ pit->speaker_dev.private = pit;
+ kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
+
+ kvm->arch.vpit = pit;
+ pit->kvm = kvm;
+
+ pit_state = &pit->pit_state;
+ pit_state->pit = pit;
+ hrtimer_init(&pit_state->pit_timer.timer,
+ CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ atomic_set(&pit_state->pit_timer.pending, 0);
+ for (i = 0; i < 3; i++) {
+ c = &pit_state->channels[i];
+ c->mode = 0xff;
+ c->gate = (i != 2);
+ pit_load_count(kvm, i, 0);
+ }
+
+ mutex_unlock(&pit->pit_state.lock);
+
+ pit->pit_state.inject_pending = 1;
+
+ return pit;
+}
+
+void kvm_free_pit(struct kvm *kvm)
+{
+ struct hrtimer *timer;
+
+ if (kvm->arch.vpit) {
+ mutex_lock(&kvm->arch.vpit->pit_state.lock);
+ timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
+ hrtimer_cancel(timer);
+ mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+ kfree(kvm->arch.vpit);
+ }
+}
+
+void __inject_pit_timer_intr(struct kvm *kvm)
+{
+ mutex_lock(&kvm->lock);
+ kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1);
+ kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0);
+ kvm_pic_set_irq(pic_irqchip(kvm), 0, 1);
+ kvm_pic_set_irq(pic_irqchip(kvm), 0, 0);
+ mutex_unlock(&kvm->lock);
+}
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pit *pit = vcpu->kvm->arch.vpit;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_kpit_state *ps;
+ static unsigned long last_injected_time;
+
+ if (vcpu && pit) {
+ ps = &pit->pit_state;
+ /* Try to inject pending interrupts when:
+ * 1. Pending exists
+ * 2. Last interrupt was accepted or waited for too long time*/
+ if (atomic_read(&ps->pit_timer.pending) &&
+ (ps->inject_pending ||
+ (jiffies - last_injected_time >= MAX_PIT_INTR_INTERVAL))) {
+ ps->inject_pending = 0;
+ __inject_pit_timer_intr(kvm);
+ last_injected_time = jiffies;
+ }
+ }
+}
+
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
+{
+ struct kvm_arch *arch = &vcpu->kvm->arch;
+ struct kvm_kpit_state *ps;
+
+ if (vcpu && arch->vpit) {
+ ps = &arch->vpit->pit_state;
+ if (atomic_read(&ps->pit_timer.pending) &&
+ (((arch->vpic->pics[0].imr & 1) == 0 &&
+ arch->vpic->pics[0].irq_base == vec) ||
+ (arch->vioapic->redirtbl[0].fields.vector == vec &&
+ arch->vioapic->redirtbl[0].fields.mask != 1))) {
+ ps->inject_pending = 1;
+ atomic_dec(&ps->pit_timer.pending);
+ ps->channels[0].count_load_time = ktime_get();
+ }
+ }
+}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
new file mode 100644
index 0000000..2f6645d
--- /dev/null
+++ b/arch/x86/kvm/i8254.h
@@ -0,0 +1,59 @@
+#ifndef __I8254_H
+#define __I8254_H
+
+#include "iodev.h"
+
+struct kvm_kpit_timer {
+ struct hrtimer timer;
+ int irq;
+ s64 period; /* unit: ns */
+ s64 scheduled;
+ ktime_t last_update;
+ atomic_t pending;
+};
+
+struct kvm_kpit_channel_state {
+ u32 count; /* can be 65536 */
+ u16 latched_count;
+ u8 count_latched;
+ u8 status_latched;
+ u8 status;
+ u8 read_state;
+ u8 write_state;
+ u8 write_latch;
+ u8 rw_mode;
+ u8 mode;
+ u8 bcd; /* not supported */
+ u8 gate; /* timer start */
+ ktime_t count_load_time;
+};
+
+struct kvm_kpit_state {
+ struct kvm_kpit_channel_state channels[3];
+ struct kvm_kpit_timer pit_timer;
+ u32 speaker_data_on;
+ struct mutex lock;
+ struct kvm_pit *pit;
+ bool inject_pending; /* if inject pending interrupts */
+};
+
+struct kvm_pit {
+ unsigned long base_addresss;
+ struct kvm_io_device dev;
+ struct kvm_io_device speaker_dev;
+ struct kvm *kvm;
+ struct kvm_kpit_state pit_state;
+};
+
+#define PIT_BASE_ADDRESS 0x40
+#define SPEAKER_BASE_ADDRESS 0x61
+#define PIT_MEM_LENGTH 4
+#define PIT_FREQ 1193181
+#define MAX_PIT_INTR_INTERVAL HZ / 100
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
+struct kvm_pit *kvm_create_pit(struct kvm *kvm);
+void kvm_free_pit(struct kvm *kvm);
+
+#endif
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index e571475..dbfe21c 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -23,6 +23,7 @@
#include <linux/kvm_host.h>
#include "irq.h"
+#include "i8254.h"
/*
* check if there is pending interrupt without
@@ -66,6 +67,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
{
kvm_inject_apic_timer_irqs(vcpu);
+ kvm_inject_pit_timer_irqs(vcpu);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
@@ -73,6 +75,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
{
kvm_apic_timer_intr_post(vcpu, vec);
+ kvm_pit_timer_intr_post(vcpu, vec);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6f09840..a79b221 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -17,6 +17,7 @@
#include <linux/kvm_host.h>
#include "irq.h"
#include "mmu.h"
+#include "i8254.h"
#include <linux/clocksource.h>
#include <linux/kvm.h>
@@ -805,6 +806,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
case KVM_CAP_CLOCKSOURCE:
+ case KVM_CAP_PIT:
r = 1;
break;
case KVM_CAP_VAPIC:
@@ -1581,6 +1583,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
} else
goto out;
break;
+ case KVM_CREATE_PIT:
+ r = -ENOMEM;
+ kvm->arch.vpit = kvm_create_pit(kvm);
+ if (kvm->arch.vpit)
+ r = 0;
+ break;
case KVM_IRQ_LINE: {
struct kvm_irq_level irq_event;
@@ -3359,6 +3367,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
+ kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
kvm_free_vcpus(kvm);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 024b57c..12932bb 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -297,6 +297,7 @@ struct kvm_arch{
struct list_head active_mmu_pages;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
+ struct kvm_pit *vpit;
int round_robin_prev_vcpu;
unsigned int tss_addr;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index e92e703..cefa9a2 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -236,6 +236,7 @@ struct kvm_vapic_addr {
#define KVM_CAP_CLOCKSOURCE 8
#define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */
#define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */
+#define KVM_CAP_PIT 11
/*
* ioctls for VM fds
@@ -258,6 +259,7 @@ struct kvm_vapic_addr {
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip)
#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)
+#define KVM_CREATE_PIT _IO(KVMIO, 0x64)
/*
* ioctls for vcpu fds
--
debian.1.5.3.7.1-dirty
[-- Attachment #3: Type: text/plain, Size: 228 bytes --]
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
[-- Attachment #4: Type: text/plain, Size: 158 bytes --]
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH 1/6] KVM: In kernel pit model
2008-03-05 7:04 ` Yang, Sheng
@ 2008-03-05 9:15 ` Ingo Molnar
2008-03-05 11:35 ` Yang, Sheng
0 siblings, 1 reply; 19+ messages in thread
From: Ingo Molnar @ 2008-03-05 9:15 UTC (permalink / raw)
To: Yang, Sheng; +Cc: kvm-devel, Avi Kivity
* Yang, Sheng <sheng.yang@intel.com> wrote:
> +#if 1
> +#define pit_debug(fmt, arg...) printk(KERN_WARNING fmt, ##arg)
> +#else
> +#define pit_debug(fmt, arg...)
> +#endif
this should use pr_debug() instead i guess.
> +#ifndef CONFIG_X86_64
> +#define mod_64(x, y) ((x) - (y) * div64_64(x, y))
> +#else
> +#define mod_64(x, y) ((x) % (y))
> +#endif
> +/* Compute with 96 bit intermediate result: (a*b)/c */
> +static u64 muldiv64(u64 a, u32 b, u32 c)
> +{
> + union {
> + u64 ll;
> + struct {
> + u32 low, high;
> + } l;
> + } u, res;
> + u64 rl, rh;
> +
> + u.ll = a;
> + rl = (u64)u.l.low * (u64)b;
> + rh = (u64)u.l.high * (u64)b;
> + rh += (rl >> 32);
> + res.l.high = div64_64(rh, c);
> + res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
> + return res.ll;
> +}
eventually these should move into a generic file, for example
lib/div64.c.
> + ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
could we please standardize on WARN_ON(!(x)) instead?
> +static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
> +{
> + struct kvm_kpit_state *ps;
> + int restart_timer = 0;
> +
> + ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
> +
> + restart_timer = __pit_timer_fn(ps);
> +
> + if (restart_timer)
> + return HRTIMER_RESTART;
> + else
> + return HRTIMER_NORESTART;
> +}
elegant use of hrtimers! :-)
> + if (val == 0)
> + val = 0x10000;
magic constant.
> + val &= 0xff;
> + addr &= 3;
magic constants.
Ingo
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/6] KVM: In kernel pit model
2008-03-05 9:15 ` Ingo Molnar
@ 2008-03-05 11:35 ` Yang, Sheng
2008-03-05 11:43 ` Avi Kivity
` (2 more replies)
0 siblings, 3 replies; 19+ messages in thread
From: Yang, Sheng @ 2008-03-05 11:35 UTC (permalink / raw)
To: Ingo Molnar; +Cc: kvm-devel, Avi Kivity
Thanks for comments!
On Wednesday 05 March 2008 17:15:29 Ingo Molnar wrote:
> * Yang, Sheng <sheng.yang@intel.com> wrote:
> > +#if 1
> > +#define pit_debug(fmt, arg...) printk(KERN_WARNING fmt, ##arg)
> > +#else
> > +#define pit_debug(fmt, arg...)
> > +#endif
>
> this should use pr_debug() instead i guess.
Um... I followed example on ./virt/kvm/ioapic.c here. Though I think it's good
to substitute all self defined debug printk with pr_debug, why KVM have
little pr_xxx(the only ones are in x86.c)? Maybe for KVM is acting more like
a separate driver, and using printk is easier for separate debug? I really
don't know...
> > +#ifndef CONFIG_X86_64
> > +#define mod_64(x, y) ((x) - (y) * div64_64(x, y))
> > +#else
> > +#define mod_64(x, y) ((x) % (y))
> > +#endif
> >
> > +/* Compute with 96 bit intermediate result: (a*b)/c */
> > +static u64 muldiv64(u64 a, u32 b, u32 c)
> > +{
> > + union {
> > + u64 ll;
> > + struct {
> > + u32 low, high;
> > + } l;
> > + } u, res;
> > + u64 rl, rh;
> > +
> > + u.ll = a;
> > + rl = (u64)u.l.low * (u64)b;
> > + rh = (u64)u.l.high * (u64)b;
> > + rh += (rl >> 32);
> > + res.l.high = div64_64(rh, c);
> > + res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
> > + return res.ll;
> > +}
>
> eventually these should move into a generic file, for example
> lib/div64.c.
That's my hope (of course with big endian support). But is there any user
outside KVM? I think it may not easy to get into the generic part.
> > + ASSERT(mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
>
> could we please standardize on WARN_ON(!(x)) instead?
Sure. :)
> > +static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
> > +{
> > + struct kvm_kpit_state *ps;
> > + int restart_timer = 0;
> > +
> > + ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
> > +
> > + restart_timer = __pit_timer_fn(ps);
> > +
> > + if (restart_timer)
> > + return HRTIMER_RESTART;
> > + else
> > + return HRTIMER_NORESTART;
> > +}
>
> elegant use of hrtimers! :-)
>
> > + if (val == 0)
> > + val = 0x10000;
>
> magic constant.
>
> > + val &= 0xff;
> > + addr &= 3;
>
> magic constants.
I will update these constants. :) In fact, I have thought of these before, but
not insist...
Thanks!
Yang, Sheng
>
> Ingo
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/6] KVM: In kernel pit model
2008-03-05 11:35 ` Yang, Sheng
@ 2008-03-05 11:43 ` Avi Kivity
2008-03-05 12:24 ` Ingo Molnar
2008-03-06 7:56 ` Yang, Sheng
2 siblings, 0 replies; 19+ messages in thread
From: Avi Kivity @ 2008-03-05 11:43 UTC (permalink / raw)
To: Yang, Sheng; +Cc: kvm-devel
Yang, Sheng wrote:
> Thanks for comments!
>
> On Wednesday 05 March 2008 17:15:29 Ingo Molnar wrote:
>
>> * Yang, Sheng <sheng.yang@intel.com> wrote:
>>
>>> +#if 1
>>> +#define pit_debug(fmt, arg...) printk(KERN_WARNING fmt, ##arg)
>>> +#else
>>> +#define pit_debug(fmt, arg...)
>>> +#endif
>>>
>> this should use pr_debug() instead i guess.
>>
>
> Um... I followed example on ./virt/kvm/ioapic.c here. Though I think it's good
> to substitute all self defined debug printk with pr_debug, why KVM have
> little pr_xxx(the only ones are in x86.c)? Maybe for KVM is acting more like
> a separate driver, and using printk is easier for separate debug? I really
> don't know...
>
>
It's mostly due to lack of knowledge about pr_debug(); it wasn't
intentional.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/6] KVM: In kernel pit model
2008-03-05 11:35 ` Yang, Sheng
2008-03-05 11:43 ` Avi Kivity
@ 2008-03-05 12:24 ` Ingo Molnar
2008-03-06 7:56 ` Yang, Sheng
2 siblings, 0 replies; 19+ messages in thread
From: Ingo Molnar @ 2008-03-05 12:24 UTC (permalink / raw)
To: Yang, Sheng; +Cc: kvm-devel, Avi Kivity
* Yang, Sheng <sheng.yang@intel.com> wrote:
> > * Yang, Sheng <sheng.yang@intel.com> wrote:
> > > +#if 1
> > > +#define pit_debug(fmt, arg...) printk(KERN_WARNING fmt, ##arg)
> > > +#else
> > > +#define pit_debug(fmt, arg...)
> > > +#endif
> >
> > this should use pr_debug() instead i guess.
>
> Um... I followed example on ./virt/kvm/ioapic.c here. Though I think
> it's good to substitute all self defined debug printk with pr_debug,
> why KVM have little pr_xxx(the only ones are in x86.c)? Maybe for KVM
> is acting more like a separate driver, and using printk is easier for
> separate debug? I really don't know...
it's just a small detail - it's really not a big issue and my suggestion
should be functionally equivalent. What i meant is that with pr_debug()
you can remove the pit_debug() define altogether (and change all
pit_debug's to pr_debug). In that case all you need to do is to get the
printks is to stick this to the head of the source code file (to before
the include files):
#define DEBUG
and the pr_debug() calls turn from a NOP into a printk(KERN_DEBUG,...).
Ingo
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/6] KVM: In kernel pit model
2008-03-05 11:35 ` Yang, Sheng
2008-03-05 11:43 ` Avi Kivity
2008-03-05 12:24 ` Ingo Molnar
@ 2008-03-06 7:56 ` Yang, Sheng
2008-03-06 8:06 ` Avi Kivity
2008-03-06 9:14 ` Ingo Molnar
2 siblings, 2 replies; 19+ messages in thread
From: Yang, Sheng @ 2008-03-06 7:56 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel
[-- Attachment #1: Type: text/plain, Size: 21262 bytes --]
On Wednesday 05 March 2008 19:35:40 Yang, Sheng wrote:
> On Wednesday 05 March 2008 17:15:29 Ingo Molnar wrote:
> > * Yang, Sheng <sheng.yang@intel.com> wrote:
> > > + val &= 0xff;
> > > + addr &= 3;
> >
> > magic constants.
>
> I will update these constants. :) In fact, I have thought of these before,
> but not insist...
Here is the updated patch. I kept 0xff because I think it's OK for understand
easily. :)
Thanks.
Yang, Sheng
---
arch/x86/kvm/Makefile | 3 +-
arch/x86/kvm/i8254.c | 583
++++++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/i8254.h | 60 +++++
arch/x86/kvm/irq.c | 3 +
arch/x86/kvm/x86.c | 9 +
include/asm-x86/kvm_host.h | 1 +
include/linux/kvm.h | 2 +
7 files changed, 660 insertions(+), 1 deletions(-)
create mode 100644 arch/x86/kvm/i8254.c
create mode 100644 arch/x86/kvm/i8254.h
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index ffdd0b3..4d0c22e 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -6,7 +6,8 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o
ioapic.o)
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
-kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
+kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
+ i8254.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
new file mode 100644
index 0000000..3a7e9da
--- /dev/null
+++ b/arch/x86/kvm/i8254.c
@@ -0,0 +1,583 @@
+/*
+ * 8253/8254 interval timer emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2006 Intel Corporation
+ * Copyright (c) 2007 Keir Fraser, XenSource Inc
+ * Copyright (c) 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
copy
+ * of this software and associated documentation files (the "Software"), to
deal
+ * in the Software without restriction, including without limitation the
rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * Authors:
+ * Sheng Yang <sheng.yang@intel.com>
+ * Based on QEMU and Xen.
+ */
+
+#include <linux/kvm_host.h>
+
+#include "irq.h"
+#include "i8254.h"
+
+#ifndef CONFIG_X86_64
+#define mod_64(x, y) ((x) - (y) * div64_64(x, y))
+#else
+#define mod_64(x, y) ((x) % (y))
+#endif
+
+#define RW_STATE_LSB 1
+#define RW_STATE_MSB 2
+#define RW_STATE_WORD0 3
+#define RW_STATE_WORD1 4
+
+/* Compute with 96 bit intermediate result: (a*b)/c */
+static u64 muldiv64(u64 a, u32 b, u32 c)
+{
+ union {
+ u64 ll;
+ struct {
+ u32 low, high;
+ } l;
+ } u, res;
+ u64 rl, rh;
+
+ u.ll = a;
+ rl = (u64)u.l.low * (u64)b;
+ rh = (u64)u.l.high * (u64)b;
+ rh += (rl >> 32);
+ res.l.high = div64_64(rh, c);
+ res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
+ return res.ll;
+}
+
+static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ switch (c->mode) {
+ default:
+ case 0:
+ case 4:
+ /* XXX: just disable/enable counting */
+ break;
+ case 1:
+ case 2:
+ case 3:
+ case 5:
+ /* Restart counting on rising edge. */
+ if (c->gate < val)
+ c->count_load_time = ktime_get();
+ break;
+ }
+
+ c->gate = val;
+}
+
+int pit_get_gate(struct kvm *kvm, int channel)
+{
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ return kvm->arch.vpit->pit_state.channels[channel].gate;
+}
+
+static int pit_get_count(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+ s64 d, t;
+ int counter;
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+ d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
+
+ switch (c->mode) {
+ case 0:
+ case 1:
+ case 4:
+ case 5:
+ counter = (c->count - d) & 0xffff;
+ break;
+ case 3:
+ /* XXX: may be incorrect for odd counts */
+ counter = c->count - (mod_64((2 * d), c->count));
+ break;
+ default:
+ counter = c->count - mod_64(d, c->count);
+ break;
+ }
+ return counter;
+}
+
+static int pit_get_out(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+ s64 d, t;
+ int out;
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+ d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
+
+ switch (c->mode) {
+ default:
+ case 0:
+ out = (d >= c->count);
+ break;
+ case 1:
+ out = (d < c->count);
+ break;
+ case 2:
+ out = ((mod_64(d, c->count) == 0) && (d != 0));
+ break;
+ case 3:
+ out = (mod_64(d, c->count) < ((c->count + 1) >> 1));
+ break;
+ case 4:
+ case 5:
+ out = (d == c->count);
+ break;
+ }
+
+ return out;
+}
+
+static void pit_latch_count(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ if (!c->count_latched) {
+ c->latched_count = pit_get_count(kvm, channel);
+ c->count_latched = c->rw_mode;
+ }
+}
+
+static void pit_latch_status(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ if (!c->status_latched) {
+ /* TODO: Return NULL COUNT (bit 6). */
+ c->status = ((pit_get_out(kvm, channel) << 7) |
+ (c->rw_mode << 4) |
+ (c->mode << 1) |
+ c->bcd);
+ c->status_latched = 1;
+ }
+}
+
+int __pit_timer_fn(struct kvm_kpit_state *ps)
+{
+ struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
+ struct kvm_kpit_timer *pt = &ps->pit_timer;
+
+ atomic_inc(&pt->pending);
+ if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
+ vcpu0->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
+ wake_up_interruptible(&vcpu0->wq);
+ }
+
+ pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
+ pt->scheduled = ktime_to_ns(pt->timer.expires);
+
+ return (pt->period == 0 ? 0 : 1);
+}
+
+static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
+{
+ struct kvm_kpit_state *ps;
+ int restart_timer = 0;
+
+ ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
+
+ restart_timer = __pit_timer_fn(ps);
+
+ if (restart_timer)
+ return HRTIMER_RESTART;
+ else
+ return HRTIMER_NORESTART;
+}
+
+static void destroy_pit_timer(struct kvm_kpit_timer *pt)
+{
+ pr_debug("pit: execute del timer!\n");
+ hrtimer_cancel(&pt->timer);
+}
+
+static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int
is_period)
+{
+ s64 interval;
+
+ interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
+
+ pr_debug("pit: create pit timer, interval is %llu nsec\n", interval);
+
+ /* TODO The new value only affected after the retriggered */
+ hrtimer_cancel(&pt->timer);
+ pt->period = (is_period == 0) ? 0 : interval;
+ pt->timer.function = pit_timer_fn;
+ atomic_set(&pt->pending, 0);
+
+ hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
+ HRTIMER_MODE_ABS);
+}
+
+static void pit_load_count(struct kvm *kvm, int channel, u32 val)
+{
+ struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
+
+ WARN_ON(!mutex_is_locked(&ps->lock));
+
+ pr_debug("pit: load_count val is %d, channel is %d\n", val, channel);
+
+ /* Though spec said the state of 8254 is undefined after power-up,
+ * seems some tricky OS like Windows XP depends on IRQ0 interrupt
+ * when booting up.
+ * So here setting initialize rate for it, and not a specific number */
+ if (val == 0)
+ val = 0x10000;
+
+ ps->channels[channel].count_load_time = ktime_get();
+ ps->channels[channel].count = val;
+
+ if (channel != 0)
+ return;
+
+ /* Two types of timer
+ * mode 1 is one shot, mode 2 is period, otherwise del timer */
+ switch (ps->channels[0].mode) {
+ case 1:
+ create_pit_timer(&ps->pit_timer, val, 0);
+ break;
+ case 2:
+ create_pit_timer(&ps->pit_timer, val, 1);
+ break;
+ default:
+ destroy_pit_timer(&ps->pit_timer);
+ }
+}
+
+static void pit_ioport_write(struct kvm_io_device *this,
+ gpa_t addr, int len, const void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ int channel, access;
+ struct kvm_kpit_channel_state *s;
+ u32 val = *(u32 *) data;
+
+ val &= 0xff;
+ addr &= KVM_PIT_CHANNEL_MASK;
+
+ mutex_lock(&pit_state->lock);
+
+ if (val != 0)
+ pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n",
+ (unsigned int)addr, len, val);
+
+ if (addr == 3) {
+ channel = val >> 6;
+ if (channel == 3) {
+ /* Read-Back Command. */
+ for (channel = 0; channel < 3; channel++) {
+ s = &pit_state->channels[channel];
+ if (val & (2 << channel)) {
+ if (!(val & 0x20))
+ pit_latch_count(kvm, channel);
+ if (!(val & 0x10))
+ pit_latch_status(kvm, channel);
+ }
+ }
+ } else {
+ /* Select Counter <channel>. */
+ s = &pit_state->channels[channel];
+ access = (val >> 4) & KVM_PIT_CHANNEL_MASK;
+ if (access == 0) {
+ pit_latch_count(kvm, channel);
+ } else {
+ s->rw_mode = access;
+ s->read_state = access;
+ s->write_state = access;
+ s->mode = (val >> 1) & 7;
+ if (s->mode > 5)
+ s->mode -= 4;
+ s->bcd = val & 1;
+ }
+ }
+ } else {
+ /* Write Count. */
+ s = &pit_state->channels[addr];
+ switch (s->write_state) {
+ default:
+ case RW_STATE_LSB:
+ pit_load_count(kvm, addr, val);
+ break;
+ case RW_STATE_MSB:
+ pit_load_count(kvm, addr, val << 8);
+ break;
+ case RW_STATE_WORD0:
+ s->write_latch = val;
+ s->write_state = RW_STATE_WORD1;
+ break;
+ case RW_STATE_WORD1:
+ pit_load_count(kvm, addr, s->write_latch | (val << 8));
+ s->write_state = RW_STATE_WORD0;
+ break;
+ }
+ }
+
+ mutex_unlock(&pit_state->lock);
+}
+
+static void pit_ioport_read(struct kvm_io_device *this,
+ gpa_t addr, int len, void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ int ret, count;
+ struct kvm_kpit_channel_state *s;
+
+ addr &= KVM_PIT_CHANNEL_MASK;
+ s = &pit_state->channels[addr];
+
+ mutex_lock(&pit_state->lock);
+
+ if (s->status_latched) {
+ s->status_latched = 0;
+ ret = s->status;
+ } else if (s->count_latched) {
+ switch (s->count_latched) {
+ default:
+ case RW_STATE_LSB:
+ ret = s->latched_count & 0xff;
+ s->count_latched = 0;
+ break;
+ case RW_STATE_MSB:
+ ret = s->latched_count >> 8;
+ s->count_latched = 0;
+ break;
+ case RW_STATE_WORD0:
+ ret = s->latched_count & 0xff;
+ s->count_latched = RW_STATE_MSB;
+ break;
+ }
+ } else {
+ switch (s->read_state) {
+ default:
+ case RW_STATE_LSB:
+ count = pit_get_count(kvm, addr);
+ ret = count & 0xff;
+ break;
+ case RW_STATE_MSB:
+ count = pit_get_count(kvm, addr);
+ ret = (count >> 8) & 0xff;
+ break;
+ case RW_STATE_WORD0:
+ count = pit_get_count(kvm, addr);
+ ret = count & 0xff;
+ s->read_state = RW_STATE_WORD1;
+ break;
+ case RW_STATE_WORD1:
+ count = pit_get_count(kvm, addr);
+ ret = (count >> 8) & 0xff;
+ s->read_state = RW_STATE_WORD0;
+ break;
+ }
+ }
+
+ if (len > sizeof(ret))
+ len = sizeof(ret);
+ memcpy(data, (char *)&ret, len);
+
+ mutex_unlock(&pit_state->lock);
+}
+
+static int pit_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ return ((addr >= KVM_PIT_BASE_ADDRESS) &&
+ (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
+}
+
+static void speaker_ioport_write(struct kvm_io_device *this,
+ gpa_t addr, int len, const void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ u32 val = *(u32 *) data;
+
+ mutex_lock(&pit_state->lock);
+ pit_state->speaker_data_on = (val >> 1) & 1;
+ pit_set_gate(kvm, 2, val & 1);
+ mutex_unlock(&pit_state->lock);
+}
+
+static void speaker_ioport_read(struct kvm_io_device *this,
+ gpa_t addr, int len, void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ unsigned int refresh_clock;
+ int ret;
+
+ /* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
+ refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
+
+ mutex_lock(&pit_state->lock);
+ ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) |
+ (pit_get_out(kvm, 2) << 5) | (refresh_clock << 4));
+ if (len > sizeof(ret))
+ len = sizeof(ret);
+ memcpy(data, (char *)&ret, len);
+ mutex_unlock(&pit_state->lock);
+}
+
+static int speaker_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ return (addr == KVM_SPEAKER_BASE_ADDRESS);
+}
+
+struct kvm_pit *kvm_create_pit(struct kvm *kvm)
+{
+ int i;
+ struct kvm_pit *pit;
+ struct kvm_kpit_state *pit_state;
+ struct kvm_kpit_channel_state *c;
+
+ pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
+ if (!pit)
+ return NULL;
+
+ mutex_init(&pit->pit_state.lock);
+ mutex_lock(&pit->pit_state.lock);
+
+ /* Initialize PIO device */
+ pit->dev.read = pit_ioport_read;
+ pit->dev.write = pit_ioport_write;
+ pit->dev.in_range = pit_in_range;
+ pit->dev.private = pit;
+ kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
+
+ pit->speaker_dev.read = speaker_ioport_read;
+ pit->speaker_dev.write = speaker_ioport_write;
+ pit->speaker_dev.in_range = speaker_in_range;
+ pit->speaker_dev.private = pit;
+ kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
+
+ kvm->arch.vpit = pit;
+ pit->kvm = kvm;
+
+ pit_state = &pit->pit_state;
+ pit_state->pit = pit;
+ hrtimer_init(&pit_state->pit_timer.timer,
+ CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ atomic_set(&pit_state->pit_timer.pending, 0);
+ for (i = 0; i < 3; i++) {
+ c = &pit_state->channels[i];
+ c->mode = 0xff;
+ c->gate = (i != 2);
+ pit_load_count(kvm, i, 0);
+ }
+
+ mutex_unlock(&pit->pit_state.lock);
+
+ pit->pit_state.inject_pending = 1;
+
+ return pit;
+}
+
+void kvm_free_pit(struct kvm *kvm)
+{
+ struct hrtimer *timer;
+
+ if (kvm->arch.vpit) {
+ mutex_lock(&kvm->arch.vpit->pit_state.lock);
+ timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
+ hrtimer_cancel(timer);
+ mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+ kfree(kvm->arch.vpit);
+ }
+}
+
+void __inject_pit_timer_intr(struct kvm *kvm)
+{
+ mutex_lock(&kvm->lock);
+ kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1);
+ kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0);
+ kvm_pic_set_irq(pic_irqchip(kvm), 0, 1);
+ kvm_pic_set_irq(pic_irqchip(kvm), 0, 0);
+ mutex_unlock(&kvm->lock);
+}
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pit *pit = vcpu->kvm->arch.vpit;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_kpit_state *ps;
+ static unsigned long last_injected_time;
+
+ if (vcpu && pit) {
+ ps = &pit->pit_state;
+
+ /* Try to inject pending interrupts when:
+ * 1. Pending exists
+ * 2. Last interrupt was accepted or waited for too long time*/
+ if (atomic_read(&ps->pit_timer.pending) &&
+ (ps->inject_pending ||
+ (jiffies - last_injected_time
+ >= KVM_MAX_PIT_INTR_INTERVAL))) {
+ ps->inject_pending = 0;
+ __inject_pit_timer_intr(kvm);
+ last_injected_time = jiffies;
+ }
+ }
+}
+
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
+{
+ struct kvm_arch *arch = &vcpu->kvm->arch;
+ struct kvm_kpit_state *ps;
+
+ if (vcpu && arch->vpit) {
+ ps = &arch->vpit->pit_state;
+ if (atomic_read(&ps->pit_timer.pending) &&
+ (((arch->vpic->pics[0].imr & 1) == 0 &&
+ arch->vpic->pics[0].irq_base == vec) ||
+ (arch->vioapic->redirtbl[0].fields.vector == vec &&
+ arch->vioapic->redirtbl[0].fields.mask != 1))) {
+ ps->inject_pending = 1;
+ atomic_dec(&ps->pit_timer.pending);
+ ps->channels[0].count_load_time = ktime_get();
+ }
+ }
+}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
new file mode 100644
index 0000000..38184d5
--- /dev/null
+++ b/arch/x86/kvm/i8254.h
@@ -0,0 +1,60 @@
+#ifndef __I8254_H
+#define __I8254_H
+
+#include "iodev.h"
+
+struct kvm_kpit_timer {
+ struct hrtimer timer;
+ int irq;
+ s64 period; /* unit: ns */
+ s64 scheduled;
+ ktime_t last_update;
+ atomic_t pending;
+};
+
+struct kvm_kpit_channel_state {
+ u32 count; /* can be 65536 */
+ u16 latched_count;
+ u8 count_latched;
+ u8 status_latched;
+ u8 status;
+ u8 read_state;
+ u8 write_state;
+ u8 write_latch;
+ u8 rw_mode;
+ u8 mode;
+ u8 bcd; /* not supported */
+ u8 gate; /* timer start */
+ ktime_t count_load_time;
+};
+
+struct kvm_kpit_state {
+ struct kvm_kpit_channel_state channels[3];
+ struct kvm_kpit_timer pit_timer;
+ u32 speaker_data_on;
+ struct mutex lock;
+ struct kvm_pit *pit;
+ bool inject_pending; /* if inject pending interrupts */
+};
+
+struct kvm_pit {
+ unsigned long base_addresss;
+ struct kvm_io_device dev;
+ struct kvm_io_device speaker_dev;
+ struct kvm *kvm;
+ struct kvm_kpit_state pit_state;
+};
+
+#define KVM_PIT_BASE_ADDRESS 0x40
+#define KVM_SPEAKER_BASE_ADDRESS 0x61
+#define KVM_PIT_MEM_LENGTH 4
+#define KVM_PIT_FREQ 1193181
+#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100
+#define KVM_PIT_CHANNEL_MASK 0x3
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
+struct kvm_pit *kvm_create_pit(struct kvm *kvm);
+void kvm_free_pit(struct kvm *kvm);
+
+#endif
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index e571475..dbfe21c 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -23,6 +23,7 @@
#include <linux/kvm_host.h>
#include "irq.h"
+#include "i8254.h"
/*
* check if there is pending interrupt without
@@ -66,6 +67,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
{
kvm_inject_apic_timer_irqs(vcpu);
+ kvm_inject_pit_timer_irqs(vcpu);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
@@ -73,6 +75,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
{
kvm_apic_timer_intr_post(vcpu, vec);
+ kvm_pit_timer_intr_post(vcpu, vec);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e65a9d6..963c093 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -17,6 +17,7 @@
#include <linux/kvm_host.h>
#include "irq.h"
#include "mmu.h"
+#include "i8254.h"
#include <linux/clocksource.h>
#include <linux/kvm.h>
@@ -805,6 +806,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
case KVM_CAP_CLOCKSOURCE:
+ case KVM_CAP_PIT:
r = 1;
break;
case KVM_CAP_VAPIC:
@@ -1581,6 +1583,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
} else
goto out;
break;
+ case KVM_CREATE_PIT:
+ r = -ENOMEM;
+ kvm->arch.vpit = kvm_create_pit(kvm);
+ if (kvm->arch.vpit)
+ r = 0;
+ break;
case KVM_IRQ_LINE: {
struct kvm_irq_level irq_event;
@@ -3359,6 +3367,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
+ kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
kvm_free_vcpus(kvm);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 024b57c..12932bb 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -297,6 +297,7 @@ struct kvm_arch{
struct list_head active_mmu_pages;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
+ struct kvm_pit *vpit;
int round_robin_prev_vcpu;
unsigned int tss_addr;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index e92e703..cefa9a2 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -236,6 +236,7 @@ struct kvm_vapic_addr {
#define KVM_CAP_CLOCKSOURCE 8
#define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */
#define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */
+#define KVM_CAP_PIT 11
/*
* ioctls for VM fds
@@ -258,6 +259,7 @@ struct kvm_vapic_addr {
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip)
#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)
+#define KVM_CREATE_PIT _IO(KVMIO, 0x64)
/*
* ioctls for vcpu fds
--
debian.1.5.3.7.1-dirty
[-- Attachment #2: 0001-In-kernel-pit-model.patch --]
[-- Type: text/x-diff, Size: 21944 bytes --]
From 7064ec4c1afd2c04acdbcda82986befc70241ae9 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng.yang@intel.com>
Date: Mon, 28 Jan 2008 05:10:22 +0800
Subject: [PATCH] In kernel pit model
The patch moved PIT from userspace to kernel, and increase the timer accuracy greatly.
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
---
arch/x86/kvm/Makefile | 3 +-
arch/x86/kvm/i8254.c | 583 ++++++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/i8254.h | 60 +++++
arch/x86/kvm/irq.c | 3 +
arch/x86/kvm/x86.c | 9 +
include/asm-x86/kvm_host.h | 1 +
include/linux/kvm.h | 2 +
7 files changed, 660 insertions(+), 1 deletions(-)
create mode 100644 arch/x86/kvm/i8254.c
create mode 100644 arch/x86/kvm/i8254.h
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index ffdd0b3..4d0c22e 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -6,7 +6,8 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
-kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
+kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
+ i8254.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
new file mode 100644
index 0000000..3a7e9da
--- /dev/null
+++ b/arch/x86/kvm/i8254.c
@@ -0,0 +1,583 @@
+/*
+ * 8253/8254 interval timer emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2006 Intel Corporation
+ * Copyright (c) 2007 Keir Fraser, XenSource Inc
+ * Copyright (c) 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * Authors:
+ * Sheng Yang <sheng.yang@intel.com>
+ * Based on QEMU and Xen.
+ */
+
+#include <linux/kvm_host.h>
+
+#include "irq.h"
+#include "i8254.h"
+
+#ifndef CONFIG_X86_64
+#define mod_64(x, y) ((x) - (y) * div64_64(x, y))
+#else
+#define mod_64(x, y) ((x) % (y))
+#endif
+
+#define RW_STATE_LSB 1
+#define RW_STATE_MSB 2
+#define RW_STATE_WORD0 3
+#define RW_STATE_WORD1 4
+
+/* Compute with 96 bit intermediate result: (a*b)/c */
+static u64 muldiv64(u64 a, u32 b, u32 c)
+{
+ union {
+ u64 ll;
+ struct {
+ u32 low, high;
+ } l;
+ } u, res;
+ u64 rl, rh;
+
+ u.ll = a;
+ rl = (u64)u.l.low * (u64)b;
+ rh = (u64)u.l.high * (u64)b;
+ rh += (rl >> 32);
+ res.l.high = div64_64(rh, c);
+ res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
+ return res.ll;
+}
+
+static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ switch (c->mode) {
+ default:
+ case 0:
+ case 4:
+ /* XXX: just disable/enable counting */
+ break;
+ case 1:
+ case 2:
+ case 3:
+ case 5:
+ /* Restart counting on rising edge. */
+ if (c->gate < val)
+ c->count_load_time = ktime_get();
+ break;
+ }
+
+ c->gate = val;
+}
+
+int pit_get_gate(struct kvm *kvm, int channel)
+{
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ return kvm->arch.vpit->pit_state.channels[channel].gate;
+}
+
+static int pit_get_count(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+ s64 d, t;
+ int counter;
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+ d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
+
+ switch (c->mode) {
+ case 0:
+ case 1:
+ case 4:
+ case 5:
+ counter = (c->count - d) & 0xffff;
+ break;
+ case 3:
+ /* XXX: may be incorrect for odd counts */
+ counter = c->count - (mod_64((2 * d), c->count));
+ break;
+ default:
+ counter = c->count - mod_64(d, c->count);
+ break;
+ }
+ return counter;
+}
+
+static int pit_get_out(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+ s64 d, t;
+ int out;
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+ d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
+
+ switch (c->mode) {
+ default:
+ case 0:
+ out = (d >= c->count);
+ break;
+ case 1:
+ out = (d < c->count);
+ break;
+ case 2:
+ out = ((mod_64(d, c->count) == 0) && (d != 0));
+ break;
+ case 3:
+ out = (mod_64(d, c->count) < ((c->count + 1) >> 1));
+ break;
+ case 4:
+ case 5:
+ out = (d == c->count);
+ break;
+ }
+
+ return out;
+}
+
+static void pit_latch_count(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ if (!c->count_latched) {
+ c->latched_count = pit_get_count(kvm, channel);
+ c->count_latched = c->rw_mode;
+ }
+}
+
+static void pit_latch_status(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ if (!c->status_latched) {
+ /* TODO: Return NULL COUNT (bit 6). */
+ c->status = ((pit_get_out(kvm, channel) << 7) |
+ (c->rw_mode << 4) |
+ (c->mode << 1) |
+ c->bcd);
+ c->status_latched = 1;
+ }
+}
+
+int __pit_timer_fn(struct kvm_kpit_state *ps)
+{
+ struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
+ struct kvm_kpit_timer *pt = &ps->pit_timer;
+
+ atomic_inc(&pt->pending);
+ if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
+ vcpu0->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
+ wake_up_interruptible(&vcpu0->wq);
+ }
+
+ pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
+ pt->scheduled = ktime_to_ns(pt->timer.expires);
+
+ return (pt->period == 0 ? 0 : 1);
+}
+
+static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
+{
+ struct kvm_kpit_state *ps;
+ int restart_timer = 0;
+
+ ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
+
+ restart_timer = __pit_timer_fn(ps);
+
+ if (restart_timer)
+ return HRTIMER_RESTART;
+ else
+ return HRTIMER_NORESTART;
+}
+
+static void destroy_pit_timer(struct kvm_kpit_timer *pt)
+{
+ pr_debug("pit: execute del timer!\n");
+ hrtimer_cancel(&pt->timer);
+}
+
+static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period)
+{
+ s64 interval;
+
+ interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
+
+ pr_debug("pit: create pit timer, interval is %llu nsec\n", interval);
+
+ /* TODO The new value only affected after the retriggered */
+ hrtimer_cancel(&pt->timer);
+ pt->period = (is_period == 0) ? 0 : interval;
+ pt->timer.function = pit_timer_fn;
+ atomic_set(&pt->pending, 0);
+
+ hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
+ HRTIMER_MODE_ABS);
+}
+
+static void pit_load_count(struct kvm *kvm, int channel, u32 val)
+{
+ struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
+
+ WARN_ON(!mutex_is_locked(&ps->lock));
+
+ pr_debug("pit: load_count val is %d, channel is %d\n", val, channel);
+
+ /* Though spec said the state of 8254 is undefined after power-up,
+ * seems some tricky OS like Windows XP depends on IRQ0 interrupt
+ * when booting up.
+ * So here setting initialize rate for it, and not a specific number */
+ if (val == 0)
+ val = 0x10000;
+
+ ps->channels[channel].count_load_time = ktime_get();
+ ps->channels[channel].count = val;
+
+ if (channel != 0)
+ return;
+
+ /* Two types of timer
+ * mode 1 is one shot, mode 2 is period, otherwise del timer */
+ switch (ps->channels[0].mode) {
+ case 1:
+ create_pit_timer(&ps->pit_timer, val, 0);
+ break;
+ case 2:
+ create_pit_timer(&ps->pit_timer, val, 1);
+ break;
+ default:
+ destroy_pit_timer(&ps->pit_timer);
+ }
+}
+
+static void pit_ioport_write(struct kvm_io_device *this,
+ gpa_t addr, int len, const void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ int channel, access;
+ struct kvm_kpit_channel_state *s;
+ u32 val = *(u32 *) data;
+
+ val &= 0xff;
+ addr &= KVM_PIT_CHANNEL_MASK;
+
+ mutex_lock(&pit_state->lock);
+
+ if (val != 0)
+ pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n",
+ (unsigned int)addr, len, val);
+
+ if (addr == 3) {
+ channel = val >> 6;
+ if (channel == 3) {
+ /* Read-Back Command. */
+ for (channel = 0; channel < 3; channel++) {
+ s = &pit_state->channels[channel];
+ if (val & (2 << channel)) {
+ if (!(val & 0x20))
+ pit_latch_count(kvm, channel);
+ if (!(val & 0x10))
+ pit_latch_status(kvm, channel);
+ }
+ }
+ } else {
+ /* Select Counter <channel>. */
+ s = &pit_state->channels[channel];
+ access = (val >> 4) & KVM_PIT_CHANNEL_MASK;
+ if (access == 0) {
+ pit_latch_count(kvm, channel);
+ } else {
+ s->rw_mode = access;
+ s->read_state = access;
+ s->write_state = access;
+ s->mode = (val >> 1) & 7;
+ if (s->mode > 5)
+ s->mode -= 4;
+ s->bcd = val & 1;
+ }
+ }
+ } else {
+ /* Write Count. */
+ s = &pit_state->channels[addr];
+ switch (s->write_state) {
+ default:
+ case RW_STATE_LSB:
+ pit_load_count(kvm, addr, val);
+ break;
+ case RW_STATE_MSB:
+ pit_load_count(kvm, addr, val << 8);
+ break;
+ case RW_STATE_WORD0:
+ s->write_latch = val;
+ s->write_state = RW_STATE_WORD1;
+ break;
+ case RW_STATE_WORD1:
+ pit_load_count(kvm, addr, s->write_latch | (val << 8));
+ s->write_state = RW_STATE_WORD0;
+ break;
+ }
+ }
+
+ mutex_unlock(&pit_state->lock);
+}
+
+static void pit_ioport_read(struct kvm_io_device *this,
+ gpa_t addr, int len, void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ int ret, count;
+ struct kvm_kpit_channel_state *s;
+
+ addr &= KVM_PIT_CHANNEL_MASK;
+ s = &pit_state->channels[addr];
+
+ mutex_lock(&pit_state->lock);
+
+ if (s->status_latched) {
+ s->status_latched = 0;
+ ret = s->status;
+ } else if (s->count_latched) {
+ switch (s->count_latched) {
+ default:
+ case RW_STATE_LSB:
+ ret = s->latched_count & 0xff;
+ s->count_latched = 0;
+ break;
+ case RW_STATE_MSB:
+ ret = s->latched_count >> 8;
+ s->count_latched = 0;
+ break;
+ case RW_STATE_WORD0:
+ ret = s->latched_count & 0xff;
+ s->count_latched = RW_STATE_MSB;
+ break;
+ }
+ } else {
+ switch (s->read_state) {
+ default:
+ case RW_STATE_LSB:
+ count = pit_get_count(kvm, addr);
+ ret = count & 0xff;
+ break;
+ case RW_STATE_MSB:
+ count = pit_get_count(kvm, addr);
+ ret = (count >> 8) & 0xff;
+ break;
+ case RW_STATE_WORD0:
+ count = pit_get_count(kvm, addr);
+ ret = count & 0xff;
+ s->read_state = RW_STATE_WORD1;
+ break;
+ case RW_STATE_WORD1:
+ count = pit_get_count(kvm, addr);
+ ret = (count >> 8) & 0xff;
+ s->read_state = RW_STATE_WORD0;
+ break;
+ }
+ }
+
+ if (len > sizeof(ret))
+ len = sizeof(ret);
+ memcpy(data, (char *)&ret, len);
+
+ mutex_unlock(&pit_state->lock);
+}
+
+static int pit_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ return ((addr >= KVM_PIT_BASE_ADDRESS) &&
+ (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
+}
+
+static void speaker_ioport_write(struct kvm_io_device *this,
+ gpa_t addr, int len, const void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ u32 val = *(u32 *) data;
+
+ mutex_lock(&pit_state->lock);
+ pit_state->speaker_data_on = (val >> 1) & 1;
+ pit_set_gate(kvm, 2, val & 1);
+ mutex_unlock(&pit_state->lock);
+}
+
+static void speaker_ioport_read(struct kvm_io_device *this,
+ gpa_t addr, int len, void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ unsigned int refresh_clock;
+ int ret;
+
+ /* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
+ refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
+
+ mutex_lock(&pit_state->lock);
+ ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) |
+ (pit_get_out(kvm, 2) << 5) | (refresh_clock << 4));
+ if (len > sizeof(ret))
+ len = sizeof(ret);
+ memcpy(data, (char *)&ret, len);
+ mutex_unlock(&pit_state->lock);
+}
+
+static int speaker_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ return (addr == KVM_SPEAKER_BASE_ADDRESS);
+}
+
+struct kvm_pit *kvm_create_pit(struct kvm *kvm)
+{
+ int i;
+ struct kvm_pit *pit;
+ struct kvm_kpit_state *pit_state;
+ struct kvm_kpit_channel_state *c;
+
+ pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
+ if (!pit)
+ return NULL;
+
+ mutex_init(&pit->pit_state.lock);
+ mutex_lock(&pit->pit_state.lock);
+
+ /* Initialize PIO device */
+ pit->dev.read = pit_ioport_read;
+ pit->dev.write = pit_ioport_write;
+ pit->dev.in_range = pit_in_range;
+ pit->dev.private = pit;
+ kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
+
+ pit->speaker_dev.read = speaker_ioport_read;
+ pit->speaker_dev.write = speaker_ioport_write;
+ pit->speaker_dev.in_range = speaker_in_range;
+ pit->speaker_dev.private = pit;
+ kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
+
+ kvm->arch.vpit = pit;
+ pit->kvm = kvm;
+
+ pit_state = &pit->pit_state;
+ pit_state->pit = pit;
+ hrtimer_init(&pit_state->pit_timer.timer,
+ CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ atomic_set(&pit_state->pit_timer.pending, 0);
+ for (i = 0; i < 3; i++) {
+ c = &pit_state->channels[i];
+ c->mode = 0xff;
+ c->gate = (i != 2);
+ pit_load_count(kvm, i, 0);
+ }
+
+ mutex_unlock(&pit->pit_state.lock);
+
+ pit->pit_state.inject_pending = 1;
+
+ return pit;
+}
+
+void kvm_free_pit(struct kvm *kvm)
+{
+ struct hrtimer *timer;
+
+ if (kvm->arch.vpit) {
+ mutex_lock(&kvm->arch.vpit->pit_state.lock);
+ timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
+ hrtimer_cancel(timer);
+ mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+ kfree(kvm->arch.vpit);
+ }
+}
+
+void __inject_pit_timer_intr(struct kvm *kvm)
+{
+ mutex_lock(&kvm->lock);
+ kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1);
+ kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0);
+ kvm_pic_set_irq(pic_irqchip(kvm), 0, 1);
+ kvm_pic_set_irq(pic_irqchip(kvm), 0, 0);
+ mutex_unlock(&kvm->lock);
+}
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pit *pit = vcpu->kvm->arch.vpit;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_kpit_state *ps;
+ static unsigned long last_injected_time;
+
+ if (vcpu && pit) {
+ ps = &pit->pit_state;
+
+ /* Try to inject pending interrupts when:
+ * 1. Pending exists
+ * 2. Last interrupt was accepted or waited for too long time*/
+ if (atomic_read(&ps->pit_timer.pending) &&
+ (ps->inject_pending ||
+ (jiffies - last_injected_time
+ >= KVM_MAX_PIT_INTR_INTERVAL))) {
+ ps->inject_pending = 0;
+ __inject_pit_timer_intr(kvm);
+ last_injected_time = jiffies;
+ }
+ }
+}
+
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
+{
+ struct kvm_arch *arch = &vcpu->kvm->arch;
+ struct kvm_kpit_state *ps;
+
+ if (vcpu && arch->vpit) {
+ ps = &arch->vpit->pit_state;
+ if (atomic_read(&ps->pit_timer.pending) &&
+ (((arch->vpic->pics[0].imr & 1) == 0 &&
+ arch->vpic->pics[0].irq_base == vec) ||
+ (arch->vioapic->redirtbl[0].fields.vector == vec &&
+ arch->vioapic->redirtbl[0].fields.mask != 1))) {
+ ps->inject_pending = 1;
+ atomic_dec(&ps->pit_timer.pending);
+ ps->channels[0].count_load_time = ktime_get();
+ }
+ }
+}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
new file mode 100644
index 0000000..38184d5
--- /dev/null
+++ b/arch/x86/kvm/i8254.h
@@ -0,0 +1,60 @@
+#ifndef __I8254_H
+#define __I8254_H
+
+#include "iodev.h"
+
+struct kvm_kpit_timer {
+ struct hrtimer timer;
+ int irq;
+ s64 period; /* unit: ns */
+ s64 scheduled;
+ ktime_t last_update;
+ atomic_t pending;
+};
+
+struct kvm_kpit_channel_state {
+ u32 count; /* can be 65536 */
+ u16 latched_count;
+ u8 count_latched;
+ u8 status_latched;
+ u8 status;
+ u8 read_state;
+ u8 write_state;
+ u8 write_latch;
+ u8 rw_mode;
+ u8 mode;
+ u8 bcd; /* not supported */
+ u8 gate; /* timer start */
+ ktime_t count_load_time;
+};
+
+struct kvm_kpit_state {
+ struct kvm_kpit_channel_state channels[3];
+ struct kvm_kpit_timer pit_timer;
+ u32 speaker_data_on;
+ struct mutex lock;
+ struct kvm_pit *pit;
+ bool inject_pending; /* if inject pending interrupts */
+};
+
+struct kvm_pit {
+ unsigned long base_addresss;
+ struct kvm_io_device dev;
+ struct kvm_io_device speaker_dev;
+ struct kvm *kvm;
+ struct kvm_kpit_state pit_state;
+};
+
+#define KVM_PIT_BASE_ADDRESS 0x40
+#define KVM_SPEAKER_BASE_ADDRESS 0x61
+#define KVM_PIT_MEM_LENGTH 4
+#define KVM_PIT_FREQ 1193181
+#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100
+#define KVM_PIT_CHANNEL_MASK 0x3
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
+struct kvm_pit *kvm_create_pit(struct kvm *kvm);
+void kvm_free_pit(struct kvm *kvm);
+
+#endif
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index e571475..dbfe21c 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -23,6 +23,7 @@
#include <linux/kvm_host.h>
#include "irq.h"
+#include "i8254.h"
/*
* check if there is pending interrupt without
@@ -66,6 +67,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
{
kvm_inject_apic_timer_irqs(vcpu);
+ kvm_inject_pit_timer_irqs(vcpu);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
@@ -73,6 +75,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
{
kvm_apic_timer_intr_post(vcpu, vec);
+ kvm_pit_timer_intr_post(vcpu, vec);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e65a9d6..963c093 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -17,6 +17,7 @@
#include <linux/kvm_host.h>
#include "irq.h"
#include "mmu.h"
+#include "i8254.h"
#include <linux/clocksource.h>
#include <linux/kvm.h>
@@ -805,6 +806,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
case KVM_CAP_CLOCKSOURCE:
+ case KVM_CAP_PIT:
r = 1;
break;
case KVM_CAP_VAPIC:
@@ -1581,6 +1583,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
} else
goto out;
break;
+ case KVM_CREATE_PIT:
+ r = -ENOMEM;
+ kvm->arch.vpit = kvm_create_pit(kvm);
+ if (kvm->arch.vpit)
+ r = 0;
+ break;
case KVM_IRQ_LINE: {
struct kvm_irq_level irq_event;
@@ -3359,6 +3367,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
+ kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
kvm_free_vcpus(kvm);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 024b57c..12932bb 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -297,6 +297,7 @@ struct kvm_arch{
struct list_head active_mmu_pages;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
+ struct kvm_pit *vpit;
int round_robin_prev_vcpu;
unsigned int tss_addr;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index e92e703..cefa9a2 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -236,6 +236,7 @@ struct kvm_vapic_addr {
#define KVM_CAP_CLOCKSOURCE 8
#define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */
#define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */
+#define KVM_CAP_PIT 11
/*
* ioctls for VM fds
@@ -258,6 +259,7 @@ struct kvm_vapic_addr {
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip)
#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)
+#define KVM_CREATE_PIT _IO(KVMIO, 0x64)
/*
* ioctls for vcpu fds
--
debian.1.5.3.7.1-dirty
[-- Attachment #3: Type: text/plain, Size: 228 bytes --]
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
[-- Attachment #4: Type: text/plain, Size: 158 bytes --]
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH 1/6] KVM: In kernel pit model
2008-03-06 7:56 ` Yang, Sheng
@ 2008-03-06 8:06 ` Avi Kivity
2008-03-06 8:43 ` Yang, Sheng
2008-03-06 9:14 ` Ingo Molnar
1 sibling, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2008-03-06 8:06 UTC (permalink / raw)
To: Yang, Sheng; +Cc: kvm-devel
Yang, Sheng wrote:
> Here is the updated patch. I kept 0xff because I think it's OK for understand
> easily. :)
>
>
Any news on the regression with older Linux guests? That's the only
thing keeping my from applying the patchset.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/6] KVM: In kernel pit model
2008-03-06 8:06 ` Avi Kivity
@ 2008-03-06 8:43 ` Yang, Sheng
2008-03-06 9:41 ` Yang, Sheng
0 siblings, 1 reply; 19+ messages in thread
From: Yang, Sheng @ 2008-03-06 8:43 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel
On Thursday 06 March 2008 16:06:51 Avi Kivity wrote:
> Yang, Sheng wrote:
> > Here is the updated patch. I kept 0xff because I think it's OK for
> > understand easily. :)
>
> Any news on the regression with older Linux guests? That's the only
> thing keeping my from applying the patchset.
Not much. PIT interrupts injection is all right, 1000 per second. Just found
two clock source in guest got problem: PM timer and TSC. Seems both due
to compensate for lost ticks. Get rid of something like "jiffies_64 +=
lost -1" get the time ok.
And I think it's a exist bug. As you see, in most condition, userspace pit +
in kernel irqchip resulted in time flow slowly, due to the lost of
interrupts. But RHEL4 runs even faster than host...
I will do more investigate.
--
Thanks
Yang, Sheng
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/6] KVM: In kernel pit model
2008-03-06 7:56 ` Yang, Sheng
2008-03-06 8:06 ` Avi Kivity
@ 2008-03-06 9:14 ` Ingo Molnar
2008-03-06 9:25 ` Yang, Sheng
1 sibling, 1 reply; 19+ messages in thread
From: Ingo Molnar @ 2008-03-06 9:14 UTC (permalink / raw)
To: Yang, Sheng; +Cc: kvm-devel, Avi Kivity
* Yang, Sheng <sheng.yang@intel.com> wrote:
> + /* Though spec said the state of 8254 is undefined after power-up,
> + * seems some tricky OS like Windows XP depends on IRQ0 interrupt
> + * when booting up.
> + * So here setting initialize rate for it, and not a specific number */
another silly style nit, the canonical comment style is:
> + /*
> + * Though spec said the state of 8254 is undefined after power-up,
> + * seems some tricky OS like Windows XP depends on IRQ0 interrupt
> + * when booting up.
> + * So here setting initialize rate for it, and not a specific number
> + */
we are standardizing on that in other areas of arch/x86 and KVM uses
that too.
but your patch looks good otherwise :)
/me goes back into lurker mode
Ingo
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/6] KVM: In kernel pit model
2008-03-06 9:14 ` Ingo Molnar
@ 2008-03-06 9:25 ` Yang, Sheng
2008-03-06 12:33 ` Ingo Molnar
0 siblings, 1 reply; 19+ messages in thread
From: Yang, Sheng @ 2008-03-06 9:25 UTC (permalink / raw)
To: Ingo Molnar; +Cc: kvm-devel, Avi Kivity
On Thursday 06 March 2008 17:14:34 Ingo Molnar wrote:
> * Yang, Sheng <sheng.yang@intel.com> wrote:
> > + /* Though spec said the state of 8254 is undefined after power-up,
> > + * seems some tricky OS like Windows XP depends on IRQ0 interrupt
> > + * when booting up.
> > + * So here setting initialize rate for it, and not a specific number */
>
> another silly style nit, the canonical comment style is:
> > + /*
> > + * Though spec said the state of 8254 is undefined after power-up,
> > + * seems some tricky OS like Windows XP depends on IRQ0 interrupt
> > + * when booting up.
> > + * So here setting initialize rate for it, and not a specific number
> > + */
>
> we are standardizing on that in other areas of arch/x86 and KVM uses
> that too.
... Seems I still can't fully depend on script/checkpatch.pl to correct my
coding style error...
> but your patch looks good otherwise :)
>
> /me goes back into lurker mode
>
> Ingo
Thanks for review! :)
Yang, Sheng
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/6] KVM: In kernel pit model
2008-03-06 8:43 ` Yang, Sheng
@ 2008-03-06 9:41 ` Yang, Sheng
2008-03-07 8:12 ` Yang, Sheng
0 siblings, 1 reply; 19+ messages in thread
From: Yang, Sheng @ 2008-03-06 9:41 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel
On Thursday 06 March 2008 16:43:18 Yang, Sheng wrote:
> On Thursday 06 March 2008 16:06:51 Avi Kivity wrote:
> > Yang, Sheng wrote:
> > > Here is the updated patch. I kept 0xff because I think it's OK for
> > > understand easily. :)
> >
> > Any news on the regression with older Linux guests? That's the only
> > thing keeping my from applying the patchset.
>
> Not much. PIT interrupts injection is all right, 1000 per second. Just
> found two clock source in guest got problem: PM timer and TSC. Seems both
> due to compensate for lost ticks. Get rid of something like "jiffies_64 +=
> lost -1" get the time ok.
>
> And I think it's a exist bug. As you see, in most condition, userspace pit
> + in kernel irqchip resulted in time flow slowly, due to the lost of
> interrupts. But RHEL4 runs even faster than host...
>
> I will do more investigate.
Get some clues.
It seems like for the kernel which is active to inject lost interrupt, when
some PIT interrupts were pending, the TSC/other clocksource got the wrong
impression that some PIT interrupt lost, then using itself's counter to
adjust the jiffies. So the problem occurs.
Xen adjust TSC to fit this mode. It pull TSC backward to get the correct value
when injecting one PIT interrupt. But this would causing trouble on some
Windows. Then, it got the "time mode" concept...
--
Thanks
Yang, Sheng
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/6] KVM: In kernel pit model
2008-03-06 9:25 ` Yang, Sheng
@ 2008-03-06 12:33 ` Ingo Molnar
0 siblings, 0 replies; 19+ messages in thread
From: Ingo Molnar @ 2008-03-06 12:33 UTC (permalink / raw)
To: Yang, Sheng; +Cc: kvm-devel, Avi Kivity
* Yang, Sheng <sheng.yang@intel.com> wrote:
> > another silly style nit, the canonical comment style is:
> > > + /*
> > > + * Though spec said the state of 8254 is undefined after power-up,
> > > + * seems some tricky OS like Windows XP depends on IRQ0 interrupt
> > > + * when booting up.
> > > + * So here setting initialize rate for it, and not a specific number
> > > + */
> >
> > we are standardizing on that in other areas of arch/x86 and KVM uses
> > that too.
>
> ... Seems I still can't fully depend on script/checkpatch.pl to
> correct my coding style error...
yeah - checkpatch.pl tries to err on the side of caution, so that it can
be used in automated setups. (And your patch was pretty clean already so
this was mostly a sidenote.)
Ingo
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/6] KVM: In kernel pit model
2008-03-06 9:41 ` Yang, Sheng
@ 2008-03-07 8:12 ` Yang, Sheng
2008-03-07 8:53 ` Avi Kivity
0 siblings, 1 reply; 19+ messages in thread
From: Yang, Sheng @ 2008-03-07 8:12 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel
On Thursday 06 March 2008 17:41:03 Yang, Sheng wrote:
> On Thursday 06 March 2008 16:43:18 Yang, Sheng wrote:
> > On Thursday 06 March 2008 16:06:51 Avi Kivity wrote:
> > > Yang, Sheng wrote:
> > > > Here is the updated patch. I kept 0xff because I think it's OK for
> > > > understand easily. :)
> > >
> > > Any news on the regression with older Linux guests? That's the only
> > > thing keeping my from applying the patchset.
> >
> > Not much. PIT interrupts injection is all right, 1000 per second. Just
> > found two clock source in guest got problem: PM timer and TSC. Seems both
> > due to compensate for lost ticks. Get rid of something like "jiffies_64
> > += lost -1" get the time ok.
> >
> > And I think it's a exist bug. As you see, in most condition, userspace
> > pit + in kernel irqchip resulted in time flow slowly, due to the lost of
> > interrupts. But RHEL4 runs even faster than host...
> >
> > I will do more investigate.
>
> Get some clues.
>
> It seems like for the kernel which is active to inject lost interrupt, when
> some PIT interrupts were pending, the TSC/other clocksource got the wrong
> impression that some PIT interrupt lost, then using itself's counter to
> adjust the jiffies. So the problem occurs.
>
> Xen adjust TSC to fit this mode. It pull TSC backward to get the correct
> value when injecting one PIT interrupt. But this would causing trouble on
> some Windows. Then, it got the "time mode" concept...
Found more complex for KVM. Xen pulled pm timer down to kernel part, and used
the guest TSC as source. So only adjust TSC is OK for it. But we are still
using pm timer in QEmu, which using host time as source. So even we pull back
TSC, the problem still exists, for 2.6.9 prefer to pm timer by default.
--
Thanks
Yang, Sheng
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/6] KVM: In kernel pit model
2008-03-07 8:12 ` Yang, Sheng
@ 2008-03-07 8:53 ` Avi Kivity
2008-03-07 9:14 ` Yang, Sheng
0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2008-03-07 8:53 UTC (permalink / raw)
To: Yang, Sheng; +Cc: kvm-devel
Yang, Sheng wrote:
> Found more complex for KVM. Xen pulled pm timer down to kernel part, and used
> the guest TSC as source. So only adjust TSC is OK for it. But we are still
> using pm timer in QEmu, which using host time as source. So even we pull back
> TSC, the problem still exists, for 2.6.9 prefer to pm timer by default
Interesting. I guess we should pull the pm timer into the kernel as
well. Timing is too tricky for userspace.
--
Any sufficiently difficult bug is indistinguishable from a feature.
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/6] KVM: In kernel pit model
2008-03-07 8:53 ` Avi Kivity
@ 2008-03-07 9:14 ` Yang, Sheng
2008-03-07 9:57 ` Avi Kivity
0 siblings, 1 reply; 19+ messages in thread
From: Yang, Sheng @ 2008-03-07 9:14 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel
On Friday 07 March 2008 16:53:40 Avi Kivity wrote:
> Yang, Sheng wrote:
> > Found more complex for KVM. Xen pulled pm timer down to kernel part, and
> > used the guest TSC as source. So only adjust TSC is OK for it. But we are
> > still using pm timer in QEmu, which using host time as source. So even we
> > pull back TSC, the problem still exists, for 2.6.9 prefer to pm timer by
> > default
>
> Interesting. I guess we should pull the pm timer into the kernel as
> well. Timing is too tricky for userspace.
... Should we suggest using "clock=pit" on pae 2.6.9 at first?
--
Thanks
Yang, Sheng
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH 1/6] KVM: In kernel pit model
2008-03-07 9:14 ` Yang, Sheng
@ 2008-03-07 9:57 ` Avi Kivity
0 siblings, 0 replies; 19+ messages in thread
From: Avi Kivity @ 2008-03-07 9:57 UTC (permalink / raw)
To: Yang, Sheng; +Cc: kvm-devel
Yang, Sheng wrote:
> On Friday 07 March 2008 16:53:40 Avi Kivity wrote:
>
>> Yang, Sheng wrote:
>>
>>> Found more complex for KVM. Xen pulled pm timer down to kernel part, and
>>> used the guest TSC as source. So only adjust TSC is OK for it. But we are
>>> still using pm timer in QEmu, which using host time as source. So even we
>>> pull back TSC, the problem still exists, for 2.6.9 prefer to pm timer by
>>> default
>>>
>> Interesting. I guess we should pull the pm timer into the kernel as
>> well. Timing is too tricky for userspace.
>>
>
> ... Should we suggest using "clock=pit" on pae 2.6.9 at first?
>
>
While it is hardly a lovely solution (things should work out of the box)
it is reasonable as a temporary measure.
Can you repost your patchset? If you're quick I can apply it today,
otherwise it will have to wait until next week.
--
Any sufficiently difficult bug is indistinguishable from a feature.
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH 1/6] KVM: In kernel PIT model
@ 2008-03-07 12:52 Yang, Sheng
0 siblings, 0 replies; 19+ messages in thread
From: Yang, Sheng @ 2008-03-07 12:52 UTC (permalink / raw)
To: kvm-devel
[-- Attachment #1: Type: text/plain, Size: 21964 bytes --]
From 19bc8000b0ed1c2021ddb509a3d923e1cd8d53ec Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng.yang@intel.com>
Date: Mon, 28 Jan 2008 05:10:22 +0800
Subject: [PATCH] KVM: In kernel PIT model
The patch moved PIT from userspace to kernel, and increase the timer accuracy
greatly.
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
---
arch/x86/kvm/Makefile | 3 +-
arch/x86/kvm/i8254.c | 585
++++++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/i8254.h | 60 +++++
arch/x86/kvm/irq.c | 3 +
arch/x86/kvm/x86.c | 9 +
include/asm-x86/kvm_host.h | 1 +
include/linux/kvm.h | 2 +
7 files changed, 662 insertions(+), 1 deletions(-)
create mode 100644 arch/x86/kvm/i8254.c
create mode 100644 arch/x86/kvm/i8254.h
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index ffdd0b3..4d0c22e 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -6,7 +6,8 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o
ioapic.o)
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
-kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
+kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
+ i8254.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
new file mode 100644
index 0000000..1031901
--- /dev/null
+++ b/arch/x86/kvm/i8254.c
@@ -0,0 +1,585 @@
+/*
+ * 8253/8254 interval timer emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2006 Intel Corporation
+ * Copyright (c) 2007 Keir Fraser, XenSource Inc
+ * Copyright (c) 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
copy
+ * of this software and associated documentation files (the "Software"), to
deal
+ * in the Software without restriction, including without limitation the
rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * Authors:
+ * Sheng Yang <sheng.yang@intel.com>
+ * Based on QEMU and Xen.
+ */
+
+#include <linux/kvm_host.h>
+
+#include "irq.h"
+#include "i8254.h"
+
+#ifndef CONFIG_X86_64
+#define mod_64(x, y) ((x) - (y) * div64_64(x, y))
+#else
+#define mod_64(x, y) ((x) % (y))
+#endif
+
+#define RW_STATE_LSB 1
+#define RW_STATE_MSB 2
+#define RW_STATE_WORD0 3
+#define RW_STATE_WORD1 4
+
+/* Compute with 96 bit intermediate result: (a*b)/c */
+static u64 muldiv64(u64 a, u32 b, u32 c)
+{
+ union {
+ u64 ll;
+ struct {
+ u32 low, high;
+ } l;
+ } u, res;
+ u64 rl, rh;
+
+ u.ll = a;
+ rl = (u64)u.l.low * (u64)b;
+ rh = (u64)u.l.high * (u64)b;
+ rh += (rl >> 32);
+ res.l.high = div64_64(rh, c);
+ res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
+ return res.ll;
+}
+
+static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ switch (c->mode) {
+ default:
+ case 0:
+ case 4:
+ /* XXX: just disable/enable counting */
+ break;
+ case 1:
+ case 2:
+ case 3:
+ case 5:
+ /* Restart counting on rising edge. */
+ if (c->gate < val)
+ c->count_load_time = ktime_get();
+ break;
+ }
+
+ c->gate = val;
+}
+
+int pit_get_gate(struct kvm *kvm, int channel)
+{
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ return kvm->arch.vpit->pit_state.channels[channel].gate;
+}
+
+static int pit_get_count(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+ s64 d, t;
+ int counter;
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+ d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
+
+ switch (c->mode) {
+ case 0:
+ case 1:
+ case 4:
+ case 5:
+ counter = (c->count - d) & 0xffff;
+ break;
+ case 3:
+ /* XXX: may be incorrect for odd counts */
+ counter = c->count - (mod_64((2 * d), c->count));
+ break;
+ default:
+ counter = c->count - mod_64(d, c->count);
+ break;
+ }
+ return counter;
+}
+
+static int pit_get_out(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+ s64 d, t;
+ int out;
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+ d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
+
+ switch (c->mode) {
+ default:
+ case 0:
+ out = (d >= c->count);
+ break;
+ case 1:
+ out = (d < c->count);
+ break;
+ case 2:
+ out = ((mod_64(d, c->count) == 0) && (d != 0));
+ break;
+ case 3:
+ out = (mod_64(d, c->count) < ((c->count + 1) >> 1));
+ break;
+ case 4:
+ case 5:
+ out = (d == c->count);
+ break;
+ }
+
+ return out;
+}
+
+static void pit_latch_count(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ if (!c->count_latched) {
+ c->latched_count = pit_get_count(kvm, channel);
+ c->count_latched = c->rw_mode;
+ }
+}
+
+static void pit_latch_status(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ if (!c->status_latched) {
+ /* TODO: Return NULL COUNT (bit 6). */
+ c->status = ((pit_get_out(kvm, channel) << 7) |
+ (c->rw_mode << 4) |
+ (c->mode << 1) |
+ c->bcd);
+ c->status_latched = 1;
+ }
+}
+
+int __pit_timer_fn(struct kvm_kpit_state *ps)
+{
+ struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
+ struct kvm_kpit_timer *pt = &ps->pit_timer;
+
+ atomic_inc(&pt->pending);
+ if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
+ vcpu0->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
+ wake_up_interruptible(&vcpu0->wq);
+ }
+
+ pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
+ pt->scheduled = ktime_to_ns(pt->timer.expires);
+
+ return (pt->period == 0 ? 0 : 1);
+}
+
+static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
+{
+ struct kvm_kpit_state *ps;
+ int restart_timer = 0;
+
+ ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
+
+ restart_timer = __pit_timer_fn(ps);
+
+ if (restart_timer)
+ return HRTIMER_RESTART;
+ else
+ return HRTIMER_NORESTART;
+}
+
+static void destroy_pit_timer(struct kvm_kpit_timer *pt)
+{
+ pr_debug("pit: execute del timer!\n");
+ hrtimer_cancel(&pt->timer);
+}
+
+static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int
is_period)
+{
+ s64 interval;
+
+ interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
+
+ pr_debug("pit: create pit timer, interval is %llu nsec\n", interval);
+
+ /* TODO The new value only affected after the retriggered */
+ hrtimer_cancel(&pt->timer);
+ pt->period = (is_period == 0) ? 0 : interval;
+ pt->timer.function = pit_timer_fn;
+ atomic_set(&pt->pending, 0);
+
+ hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
+ HRTIMER_MODE_ABS);
+}
+
+static void pit_load_count(struct kvm *kvm, int channel, u32 val)
+{
+ struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
+
+ WARN_ON(!mutex_is_locked(&ps->lock));
+
+ pr_debug("pit: load_count val is %d, channel is %d\n", val, channel);
+
+ /*
+ * Though spec said the state of 8254 is undefined after power-up,
+ * seems some tricky OS like Windows XP depends on IRQ0 interrupt
+ * when booting up.
+ * So here setting initialize rate for it, and not a specific number
+ */
+ if (val == 0)
+ val = 0x10000;
+
+ ps->channels[channel].count_load_time = ktime_get();
+ ps->channels[channel].count = val;
+
+ if (channel != 0)
+ return;
+
+ /* Two types of timer
+ * mode 1 is one shot, mode 2 is period, otherwise del timer */
+ switch (ps->channels[0].mode) {
+ case 1:
+ create_pit_timer(&ps->pit_timer, val, 0);
+ break;
+ case 2:
+ create_pit_timer(&ps->pit_timer, val, 1);
+ break;
+ default:
+ destroy_pit_timer(&ps->pit_timer);
+ }
+}
+
+static void pit_ioport_write(struct kvm_io_device *this,
+ gpa_t addr, int len, const void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ int channel, access;
+ struct kvm_kpit_channel_state *s;
+ u32 val = *(u32 *) data;
+
+ val &= 0xff;
+ addr &= KVM_PIT_CHANNEL_MASK;
+
+ mutex_lock(&pit_state->lock);
+
+ if (val != 0)
+ pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n",
+ (unsigned int)addr, len, val);
+
+ if (addr == 3) {
+ channel = val >> 6;
+ if (channel == 3) {
+ /* Read-Back Command. */
+ for (channel = 0; channel < 3; channel++) {
+ s = &pit_state->channels[channel];
+ if (val & (2 << channel)) {
+ if (!(val & 0x20))
+ pit_latch_count(kvm, channel);
+ if (!(val & 0x10))
+ pit_latch_status(kvm, channel);
+ }
+ }
+ } else {
+ /* Select Counter <channel>. */
+ s = &pit_state->channels[channel];
+ access = (val >> 4) & KVM_PIT_CHANNEL_MASK;
+ if (access == 0) {
+ pit_latch_count(kvm, channel);
+ } else {
+ s->rw_mode = access;
+ s->read_state = access;
+ s->write_state = access;
+ s->mode = (val >> 1) & 7;
+ if (s->mode > 5)
+ s->mode -= 4;
+ s->bcd = val & 1;
+ }
+ }
+ } else {
+ /* Write Count. */
+ s = &pit_state->channels[addr];
+ switch (s->write_state) {
+ default:
+ case RW_STATE_LSB:
+ pit_load_count(kvm, addr, val);
+ break;
+ case RW_STATE_MSB:
+ pit_load_count(kvm, addr, val << 8);
+ break;
+ case RW_STATE_WORD0:
+ s->write_latch = val;
+ s->write_state = RW_STATE_WORD1;
+ break;
+ case RW_STATE_WORD1:
+ pit_load_count(kvm, addr, s->write_latch | (val << 8));
+ s->write_state = RW_STATE_WORD0;
+ break;
+ }
+ }
+
+ mutex_unlock(&pit_state->lock);
+}
+
+static void pit_ioport_read(struct kvm_io_device *this,
+ gpa_t addr, int len, void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ int ret, count;
+ struct kvm_kpit_channel_state *s;
+
+ addr &= KVM_PIT_CHANNEL_MASK;
+ s = &pit_state->channels[addr];
+
+ mutex_lock(&pit_state->lock);
+
+ if (s->status_latched) {
+ s->status_latched = 0;
+ ret = s->status;
+ } else if (s->count_latched) {
+ switch (s->count_latched) {
+ default:
+ case RW_STATE_LSB:
+ ret = s->latched_count & 0xff;
+ s->count_latched = 0;
+ break;
+ case RW_STATE_MSB:
+ ret = s->latched_count >> 8;
+ s->count_latched = 0;
+ break;
+ case RW_STATE_WORD0:
+ ret = s->latched_count & 0xff;
+ s->count_latched = RW_STATE_MSB;
+ break;
+ }
+ } else {
+ switch (s->read_state) {
+ default:
+ case RW_STATE_LSB:
+ count = pit_get_count(kvm, addr);
+ ret = count & 0xff;
+ break;
+ case RW_STATE_MSB:
+ count = pit_get_count(kvm, addr);
+ ret = (count >> 8) & 0xff;
+ break;
+ case RW_STATE_WORD0:
+ count = pit_get_count(kvm, addr);
+ ret = count & 0xff;
+ s->read_state = RW_STATE_WORD1;
+ break;
+ case RW_STATE_WORD1:
+ count = pit_get_count(kvm, addr);
+ ret = (count >> 8) & 0xff;
+ s->read_state = RW_STATE_WORD0;
+ break;
+ }
+ }
+
+ if (len > sizeof(ret))
+ len = sizeof(ret);
+ memcpy(data, (char *)&ret, len);
+
+ mutex_unlock(&pit_state->lock);
+}
+
+static int pit_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ return ((addr >= KVM_PIT_BASE_ADDRESS) &&
+ (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
+}
+
+static void speaker_ioport_write(struct kvm_io_device *this,
+ gpa_t addr, int len, const void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ u32 val = *(u32 *) data;
+
+ mutex_lock(&pit_state->lock);
+ pit_state->speaker_data_on = (val >> 1) & 1;
+ pit_set_gate(kvm, 2, val & 1);
+ mutex_unlock(&pit_state->lock);
+}
+
+static void speaker_ioport_read(struct kvm_io_device *this,
+ gpa_t addr, int len, void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ unsigned int refresh_clock;
+ int ret;
+
+ /* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
+ refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
+
+ mutex_lock(&pit_state->lock);
+ ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) |
+ (pit_get_out(kvm, 2) << 5) | (refresh_clock << 4));
+ if (len > sizeof(ret))
+ len = sizeof(ret);
+ memcpy(data, (char *)&ret, len);
+ mutex_unlock(&pit_state->lock);
+}
+
+static int speaker_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ return (addr == KVM_SPEAKER_BASE_ADDRESS);
+}
+
+struct kvm_pit *kvm_create_pit(struct kvm *kvm)
+{
+ int i;
+ struct kvm_pit *pit;
+ struct kvm_kpit_state *pit_state;
+ struct kvm_kpit_channel_state *c;
+
+ pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
+ if (!pit)
+ return NULL;
+
+ mutex_init(&pit->pit_state.lock);
+ mutex_lock(&pit->pit_state.lock);
+
+ /* Initialize PIO device */
+ pit->dev.read = pit_ioport_read;
+ pit->dev.write = pit_ioport_write;
+ pit->dev.in_range = pit_in_range;
+ pit->dev.private = pit;
+ kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
+
+ pit->speaker_dev.read = speaker_ioport_read;
+ pit->speaker_dev.write = speaker_ioport_write;
+ pit->speaker_dev.in_range = speaker_in_range;
+ pit->speaker_dev.private = pit;
+ kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
+
+ kvm->arch.vpit = pit;
+ pit->kvm = kvm;
+
+ pit_state = &pit->pit_state;
+ pit_state->pit = pit;
+ hrtimer_init(&pit_state->pit_timer.timer,
+ CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ atomic_set(&pit_state->pit_timer.pending, 0);
+ for (i = 0; i < 3; i++) {
+ c = &pit_state->channels[i];
+ c->mode = 0xff;
+ c->gate = (i != 2);
+ pit_load_count(kvm, i, 0);
+ }
+
+ mutex_unlock(&pit->pit_state.lock);
+
+ pit->pit_state.inject_pending = 1;
+
+ return pit;
+}
+
+void kvm_free_pit(struct kvm *kvm)
+{
+ struct hrtimer *timer;
+
+ if (kvm->arch.vpit) {
+ mutex_lock(&kvm->arch.vpit->pit_state.lock);
+ timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
+ hrtimer_cancel(timer);
+ mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+ kfree(kvm->arch.vpit);
+ }
+}
+
+void __inject_pit_timer_intr(struct kvm *kvm)
+{
+ mutex_lock(&kvm->lock);
+ kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1);
+ kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0);
+ kvm_pic_set_irq(pic_irqchip(kvm), 0, 1);
+ kvm_pic_set_irq(pic_irqchip(kvm), 0, 0);
+ mutex_unlock(&kvm->lock);
+}
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pit *pit = vcpu->kvm->arch.vpit;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_kpit_state *ps;
+ static unsigned long last_injected_time;
+
+ if (vcpu && pit) {
+ ps = &pit->pit_state;
+
+ /* Try to inject pending interrupts when:
+ * 1. Pending exists
+ * 2. Last interrupt was accepted or waited for too long time*/
+ if (atomic_read(&ps->pit_timer.pending) &&
+ (ps->inject_pending ||
+ (jiffies - last_injected_time
+ >= KVM_MAX_PIT_INTR_INTERVAL))) {
+ ps->inject_pending = 0;
+ __inject_pit_timer_intr(kvm);
+ last_injected_time = jiffies;
+ }
+ }
+}
+
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
+{
+ struct kvm_arch *arch = &vcpu->kvm->arch;
+ struct kvm_kpit_state *ps;
+
+ if (vcpu && arch->vpit) {
+ ps = &arch->vpit->pit_state;
+ if (atomic_read(&ps->pit_timer.pending) &&
+ (((arch->vpic->pics[0].imr & 1) == 0 &&
+ arch->vpic->pics[0].irq_base == vec) ||
+ (arch->vioapic->redirtbl[0].fields.vector == vec &&
+ arch->vioapic->redirtbl[0].fields.mask != 1))) {
+ ps->inject_pending = 1;
+ atomic_dec(&ps->pit_timer.pending);
+ ps->channels[0].count_load_time = ktime_get();
+ }
+ }
+}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
new file mode 100644
index 0000000..38184d5
--- /dev/null
+++ b/arch/x86/kvm/i8254.h
@@ -0,0 +1,60 @@
+#ifndef __I8254_H
+#define __I8254_H
+
+#include "iodev.h"
+
+struct kvm_kpit_timer {
+ struct hrtimer timer;
+ int irq;
+ s64 period; /* unit: ns */
+ s64 scheduled;
+ ktime_t last_update;
+ atomic_t pending;
+};
+
+struct kvm_kpit_channel_state {
+ u32 count; /* can be 65536 */
+ u16 latched_count;
+ u8 count_latched;
+ u8 status_latched;
+ u8 status;
+ u8 read_state;
+ u8 write_state;
+ u8 write_latch;
+ u8 rw_mode;
+ u8 mode;
+ u8 bcd; /* not supported */
+ u8 gate; /* timer start */
+ ktime_t count_load_time;
+};
+
+struct kvm_kpit_state {
+ struct kvm_kpit_channel_state channels[3];
+ struct kvm_kpit_timer pit_timer;
+ u32 speaker_data_on;
+ struct mutex lock;
+ struct kvm_pit *pit;
+ bool inject_pending; /* if inject pending interrupts */
+};
+
+struct kvm_pit {
+ unsigned long base_addresss;
+ struct kvm_io_device dev;
+ struct kvm_io_device speaker_dev;
+ struct kvm *kvm;
+ struct kvm_kpit_state pit_state;
+};
+
+#define KVM_PIT_BASE_ADDRESS 0x40
+#define KVM_SPEAKER_BASE_ADDRESS 0x61
+#define KVM_PIT_MEM_LENGTH 4
+#define KVM_PIT_FREQ 1193181
+#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100
+#define KVM_PIT_CHANNEL_MASK 0x3
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
+struct kvm_pit *kvm_create_pit(struct kvm *kvm);
+void kvm_free_pit(struct kvm *kvm);
+
+#endif
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index e571475..dbfe21c 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -23,6 +23,7 @@
#include <linux/kvm_host.h>
#include "irq.h"
+#include "i8254.h"
/*
* check if there is pending interrupt without
@@ -66,6 +67,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
{
kvm_inject_apic_timer_irqs(vcpu);
+ kvm_inject_pit_timer_irqs(vcpu);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
@@ -73,6 +75,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
{
kvm_apic_timer_intr_post(vcpu, vec);
+ kvm_pit_timer_intr_post(vcpu, vec);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 64beff6..421b2b5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -17,6 +17,7 @@
#include <linux/kvm_host.h>
#include "irq.h"
#include "mmu.h"
+#include "i8254.h"
#include <linux/clocksource.h>
#include <linux/kvm.h>
@@ -815,6 +816,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
case KVM_CAP_CLOCKSOURCE:
+ case KVM_CAP_PIT:
r = 1;
break;
case KVM_CAP_VAPIC:
@@ -1591,6 +1593,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
} else
goto out;
break;
+ case KVM_CREATE_PIT:
+ r = -ENOMEM;
+ kvm->arch.vpit = kvm_create_pit(kvm);
+ if (kvm->arch.vpit)
+ r = 0;
+ break;
case KVM_IRQ_LINE: {
struct kvm_irq_level irq_event;
@@ -3369,6 +3377,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
+ kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
kvm_free_vcpus(kvm);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 024b57c..12932bb 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -297,6 +297,7 @@ struct kvm_arch{
struct list_head active_mmu_pages;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
+ struct kvm_pit *vpit;
int round_robin_prev_vcpu;
unsigned int tss_addr;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index e92e703..cefa9a2 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -236,6 +236,7 @@ struct kvm_vapic_addr {
#define KVM_CAP_CLOCKSOURCE 8
#define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */
#define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */
+#define KVM_CAP_PIT 11
/*
* ioctls for VM fds
@@ -258,6 +259,7 @@ struct kvm_vapic_addr {
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip)
#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)
+#define KVM_CREATE_PIT _IO(KVMIO, 0x64)
/*
* ioctls for vcpu fds
--
debian.1.5.3.7.1-dirty
[-- Attachment #2: 0001-In-kernel-pit-model.patch --]
[-- Type: text/x-diff, Size: 21959 bytes --]
From 19bc8000b0ed1c2021ddb509a3d923e1cd8d53ec Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng.yang@intel.com>
Date: Mon, 28 Jan 2008 05:10:22 +0800
Subject: [PATCH] KVM: In kernel PIT model
The patch moved PIT from userspace to kernel, and increase the timer accuracy greatly.
Signed-off-by: Sheng Yang <sheng.yang@intel.com>
---
arch/x86/kvm/Makefile | 3 +-
arch/x86/kvm/i8254.c | 585 ++++++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/i8254.h | 60 +++++
arch/x86/kvm/irq.c | 3 +
arch/x86/kvm/x86.c | 9 +
include/asm-x86/kvm_host.h | 1 +
include/linux/kvm.h | 2 +
7 files changed, 662 insertions(+), 1 deletions(-)
create mode 100644 arch/x86/kvm/i8254.c
create mode 100644 arch/x86/kvm/i8254.h
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index ffdd0b3..4d0c22e 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -6,7 +6,8 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
-kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
+kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
+ i8254.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
new file mode 100644
index 0000000..1031901
--- /dev/null
+++ b/arch/x86/kvm/i8254.c
@@ -0,0 +1,585 @@
+/*
+ * 8253/8254 interval timer emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2006 Intel Corporation
+ * Copyright (c) 2007 Keir Fraser, XenSource Inc
+ * Copyright (c) 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * Authors:
+ * Sheng Yang <sheng.yang@intel.com>
+ * Based on QEMU and Xen.
+ */
+
+#include <linux/kvm_host.h>
+
+#include "irq.h"
+#include "i8254.h"
+
+#ifndef CONFIG_X86_64
+#define mod_64(x, y) ((x) - (y) * div64_64(x, y))
+#else
+#define mod_64(x, y) ((x) % (y))
+#endif
+
+#define RW_STATE_LSB 1
+#define RW_STATE_MSB 2
+#define RW_STATE_WORD0 3
+#define RW_STATE_WORD1 4
+
+/* Compute with 96 bit intermediate result: (a*b)/c */
+static u64 muldiv64(u64 a, u32 b, u32 c)
+{
+ union {
+ u64 ll;
+ struct {
+ u32 low, high;
+ } l;
+ } u, res;
+ u64 rl, rh;
+
+ u.ll = a;
+ rl = (u64)u.l.low * (u64)b;
+ rh = (u64)u.l.high * (u64)b;
+ rh += (rl >> 32);
+ res.l.high = div64_64(rh, c);
+ res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
+ return res.ll;
+}
+
+static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ switch (c->mode) {
+ default:
+ case 0:
+ case 4:
+ /* XXX: just disable/enable counting */
+ break;
+ case 1:
+ case 2:
+ case 3:
+ case 5:
+ /* Restart counting on rising edge. */
+ if (c->gate < val)
+ c->count_load_time = ktime_get();
+ break;
+ }
+
+ c->gate = val;
+}
+
+int pit_get_gate(struct kvm *kvm, int channel)
+{
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ return kvm->arch.vpit->pit_state.channels[channel].gate;
+}
+
+static int pit_get_count(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+ s64 d, t;
+ int counter;
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+ d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
+
+ switch (c->mode) {
+ case 0:
+ case 1:
+ case 4:
+ case 5:
+ counter = (c->count - d) & 0xffff;
+ break;
+ case 3:
+ /* XXX: may be incorrect for odd counts */
+ counter = c->count - (mod_64((2 * d), c->count));
+ break;
+ default:
+ counter = c->count - mod_64(d, c->count);
+ break;
+ }
+ return counter;
+}
+
+static int pit_get_out(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+ s64 d, t;
+ int out;
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+ d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
+
+ switch (c->mode) {
+ default:
+ case 0:
+ out = (d >= c->count);
+ break;
+ case 1:
+ out = (d < c->count);
+ break;
+ case 2:
+ out = ((mod_64(d, c->count) == 0) && (d != 0));
+ break;
+ case 3:
+ out = (mod_64(d, c->count) < ((c->count + 1) >> 1));
+ break;
+ case 4:
+ case 5:
+ out = (d == c->count);
+ break;
+ }
+
+ return out;
+}
+
+static void pit_latch_count(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ if (!c->count_latched) {
+ c->latched_count = pit_get_count(kvm, channel);
+ c->count_latched = c->rw_mode;
+ }
+}
+
+static void pit_latch_status(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ if (!c->status_latched) {
+ /* TODO: Return NULL COUNT (bit 6). */
+ c->status = ((pit_get_out(kvm, channel) << 7) |
+ (c->rw_mode << 4) |
+ (c->mode << 1) |
+ c->bcd);
+ c->status_latched = 1;
+ }
+}
+
+int __pit_timer_fn(struct kvm_kpit_state *ps)
+{
+ struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
+ struct kvm_kpit_timer *pt = &ps->pit_timer;
+
+ atomic_inc(&pt->pending);
+ if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
+ vcpu0->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
+ wake_up_interruptible(&vcpu0->wq);
+ }
+
+ pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
+ pt->scheduled = ktime_to_ns(pt->timer.expires);
+
+ return (pt->period == 0 ? 0 : 1);
+}
+
+static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
+{
+ struct kvm_kpit_state *ps;
+ int restart_timer = 0;
+
+ ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
+
+ restart_timer = __pit_timer_fn(ps);
+
+ if (restart_timer)
+ return HRTIMER_RESTART;
+ else
+ return HRTIMER_NORESTART;
+}
+
+static void destroy_pit_timer(struct kvm_kpit_timer *pt)
+{
+ pr_debug("pit: execute del timer!\n");
+ hrtimer_cancel(&pt->timer);
+}
+
+static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period)
+{
+ s64 interval;
+
+ interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
+
+ pr_debug("pit: create pit timer, interval is %llu nsec\n", interval);
+
+ /* TODO The new value only affected after the retriggered */
+ hrtimer_cancel(&pt->timer);
+ pt->period = (is_period == 0) ? 0 : interval;
+ pt->timer.function = pit_timer_fn;
+ atomic_set(&pt->pending, 0);
+
+ hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
+ HRTIMER_MODE_ABS);
+}
+
+static void pit_load_count(struct kvm *kvm, int channel, u32 val)
+{
+ struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
+
+ WARN_ON(!mutex_is_locked(&ps->lock));
+
+ pr_debug("pit: load_count val is %d, channel is %d\n", val, channel);
+
+ /*
+ * Though spec said the state of 8254 is undefined after power-up,
+ * seems some tricky OS like Windows XP depends on IRQ0 interrupt
+ * when booting up.
+ * So here setting initialize rate for it, and not a specific number
+ */
+ if (val == 0)
+ val = 0x10000;
+
+ ps->channels[channel].count_load_time = ktime_get();
+ ps->channels[channel].count = val;
+
+ if (channel != 0)
+ return;
+
+ /* Two types of timer
+ * mode 1 is one shot, mode 2 is period, otherwise del timer */
+ switch (ps->channels[0].mode) {
+ case 1:
+ create_pit_timer(&ps->pit_timer, val, 0);
+ break;
+ case 2:
+ create_pit_timer(&ps->pit_timer, val, 1);
+ break;
+ default:
+ destroy_pit_timer(&ps->pit_timer);
+ }
+}
+
+static void pit_ioport_write(struct kvm_io_device *this,
+ gpa_t addr, int len, const void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ int channel, access;
+ struct kvm_kpit_channel_state *s;
+ u32 val = *(u32 *) data;
+
+ val &= 0xff;
+ addr &= KVM_PIT_CHANNEL_MASK;
+
+ mutex_lock(&pit_state->lock);
+
+ if (val != 0)
+ pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n",
+ (unsigned int)addr, len, val);
+
+ if (addr == 3) {
+ channel = val >> 6;
+ if (channel == 3) {
+ /* Read-Back Command. */
+ for (channel = 0; channel < 3; channel++) {
+ s = &pit_state->channels[channel];
+ if (val & (2 << channel)) {
+ if (!(val & 0x20))
+ pit_latch_count(kvm, channel);
+ if (!(val & 0x10))
+ pit_latch_status(kvm, channel);
+ }
+ }
+ } else {
+ /* Select Counter <channel>. */
+ s = &pit_state->channels[channel];
+ access = (val >> 4) & KVM_PIT_CHANNEL_MASK;
+ if (access == 0) {
+ pit_latch_count(kvm, channel);
+ } else {
+ s->rw_mode = access;
+ s->read_state = access;
+ s->write_state = access;
+ s->mode = (val >> 1) & 7;
+ if (s->mode > 5)
+ s->mode -= 4;
+ s->bcd = val & 1;
+ }
+ }
+ } else {
+ /* Write Count. */
+ s = &pit_state->channels[addr];
+ switch (s->write_state) {
+ default:
+ case RW_STATE_LSB:
+ pit_load_count(kvm, addr, val);
+ break;
+ case RW_STATE_MSB:
+ pit_load_count(kvm, addr, val << 8);
+ break;
+ case RW_STATE_WORD0:
+ s->write_latch = val;
+ s->write_state = RW_STATE_WORD1;
+ break;
+ case RW_STATE_WORD1:
+ pit_load_count(kvm, addr, s->write_latch | (val << 8));
+ s->write_state = RW_STATE_WORD0;
+ break;
+ }
+ }
+
+ mutex_unlock(&pit_state->lock);
+}
+
+static void pit_ioport_read(struct kvm_io_device *this,
+ gpa_t addr, int len, void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ int ret, count;
+ struct kvm_kpit_channel_state *s;
+
+ addr &= KVM_PIT_CHANNEL_MASK;
+ s = &pit_state->channels[addr];
+
+ mutex_lock(&pit_state->lock);
+
+ if (s->status_latched) {
+ s->status_latched = 0;
+ ret = s->status;
+ } else if (s->count_latched) {
+ switch (s->count_latched) {
+ default:
+ case RW_STATE_LSB:
+ ret = s->latched_count & 0xff;
+ s->count_latched = 0;
+ break;
+ case RW_STATE_MSB:
+ ret = s->latched_count >> 8;
+ s->count_latched = 0;
+ break;
+ case RW_STATE_WORD0:
+ ret = s->latched_count & 0xff;
+ s->count_latched = RW_STATE_MSB;
+ break;
+ }
+ } else {
+ switch (s->read_state) {
+ default:
+ case RW_STATE_LSB:
+ count = pit_get_count(kvm, addr);
+ ret = count & 0xff;
+ break;
+ case RW_STATE_MSB:
+ count = pit_get_count(kvm, addr);
+ ret = (count >> 8) & 0xff;
+ break;
+ case RW_STATE_WORD0:
+ count = pit_get_count(kvm, addr);
+ ret = count & 0xff;
+ s->read_state = RW_STATE_WORD1;
+ break;
+ case RW_STATE_WORD1:
+ count = pit_get_count(kvm, addr);
+ ret = (count >> 8) & 0xff;
+ s->read_state = RW_STATE_WORD0;
+ break;
+ }
+ }
+
+ if (len > sizeof(ret))
+ len = sizeof(ret);
+ memcpy(data, (char *)&ret, len);
+
+ mutex_unlock(&pit_state->lock);
+}
+
+static int pit_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ return ((addr >= KVM_PIT_BASE_ADDRESS) &&
+ (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
+}
+
+static void speaker_ioport_write(struct kvm_io_device *this,
+ gpa_t addr, int len, const void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ u32 val = *(u32 *) data;
+
+ mutex_lock(&pit_state->lock);
+ pit_state->speaker_data_on = (val >> 1) & 1;
+ pit_set_gate(kvm, 2, val & 1);
+ mutex_unlock(&pit_state->lock);
+}
+
+static void speaker_ioport_read(struct kvm_io_device *this,
+ gpa_t addr, int len, void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ unsigned int refresh_clock;
+ int ret;
+
+ /* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
+ refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
+
+ mutex_lock(&pit_state->lock);
+ ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) |
+ (pit_get_out(kvm, 2) << 5) | (refresh_clock << 4));
+ if (len > sizeof(ret))
+ len = sizeof(ret);
+ memcpy(data, (char *)&ret, len);
+ mutex_unlock(&pit_state->lock);
+}
+
+static int speaker_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ return (addr == KVM_SPEAKER_BASE_ADDRESS);
+}
+
+struct kvm_pit *kvm_create_pit(struct kvm *kvm)
+{
+ int i;
+ struct kvm_pit *pit;
+ struct kvm_kpit_state *pit_state;
+ struct kvm_kpit_channel_state *c;
+
+ pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
+ if (!pit)
+ return NULL;
+
+ mutex_init(&pit->pit_state.lock);
+ mutex_lock(&pit->pit_state.lock);
+
+ /* Initialize PIO device */
+ pit->dev.read = pit_ioport_read;
+ pit->dev.write = pit_ioport_write;
+ pit->dev.in_range = pit_in_range;
+ pit->dev.private = pit;
+ kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
+
+ pit->speaker_dev.read = speaker_ioport_read;
+ pit->speaker_dev.write = speaker_ioport_write;
+ pit->speaker_dev.in_range = speaker_in_range;
+ pit->speaker_dev.private = pit;
+ kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
+
+ kvm->arch.vpit = pit;
+ pit->kvm = kvm;
+
+ pit_state = &pit->pit_state;
+ pit_state->pit = pit;
+ hrtimer_init(&pit_state->pit_timer.timer,
+ CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ atomic_set(&pit_state->pit_timer.pending, 0);
+ for (i = 0; i < 3; i++) {
+ c = &pit_state->channels[i];
+ c->mode = 0xff;
+ c->gate = (i != 2);
+ pit_load_count(kvm, i, 0);
+ }
+
+ mutex_unlock(&pit->pit_state.lock);
+
+ pit->pit_state.inject_pending = 1;
+
+ return pit;
+}
+
+void kvm_free_pit(struct kvm *kvm)
+{
+ struct hrtimer *timer;
+
+ if (kvm->arch.vpit) {
+ mutex_lock(&kvm->arch.vpit->pit_state.lock);
+ timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
+ hrtimer_cancel(timer);
+ mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+ kfree(kvm->arch.vpit);
+ }
+}
+
+void __inject_pit_timer_intr(struct kvm *kvm)
+{
+ mutex_lock(&kvm->lock);
+ kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1);
+ kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0);
+ kvm_pic_set_irq(pic_irqchip(kvm), 0, 1);
+ kvm_pic_set_irq(pic_irqchip(kvm), 0, 0);
+ mutex_unlock(&kvm->lock);
+}
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pit *pit = vcpu->kvm->arch.vpit;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_kpit_state *ps;
+ static unsigned long last_injected_time;
+
+ if (vcpu && pit) {
+ ps = &pit->pit_state;
+
+ /* Try to inject pending interrupts when:
+ * 1. Pending exists
+ * 2. Last interrupt was accepted or waited for too long time*/
+ if (atomic_read(&ps->pit_timer.pending) &&
+ (ps->inject_pending ||
+ (jiffies - last_injected_time
+ >= KVM_MAX_PIT_INTR_INTERVAL))) {
+ ps->inject_pending = 0;
+ __inject_pit_timer_intr(kvm);
+ last_injected_time = jiffies;
+ }
+ }
+}
+
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
+{
+ struct kvm_arch *arch = &vcpu->kvm->arch;
+ struct kvm_kpit_state *ps;
+
+ if (vcpu && arch->vpit) {
+ ps = &arch->vpit->pit_state;
+ if (atomic_read(&ps->pit_timer.pending) &&
+ (((arch->vpic->pics[0].imr & 1) == 0 &&
+ arch->vpic->pics[0].irq_base == vec) ||
+ (arch->vioapic->redirtbl[0].fields.vector == vec &&
+ arch->vioapic->redirtbl[0].fields.mask != 1))) {
+ ps->inject_pending = 1;
+ atomic_dec(&ps->pit_timer.pending);
+ ps->channels[0].count_load_time = ktime_get();
+ }
+ }
+}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
new file mode 100644
index 0000000..38184d5
--- /dev/null
+++ b/arch/x86/kvm/i8254.h
@@ -0,0 +1,60 @@
+#ifndef __I8254_H
+#define __I8254_H
+
+#include "iodev.h"
+
+struct kvm_kpit_timer {
+ struct hrtimer timer;
+ int irq;
+ s64 period; /* unit: ns */
+ s64 scheduled;
+ ktime_t last_update;
+ atomic_t pending;
+};
+
+struct kvm_kpit_channel_state {
+ u32 count; /* can be 65536 */
+ u16 latched_count;
+ u8 count_latched;
+ u8 status_latched;
+ u8 status;
+ u8 read_state;
+ u8 write_state;
+ u8 write_latch;
+ u8 rw_mode;
+ u8 mode;
+ u8 bcd; /* not supported */
+ u8 gate; /* timer start */
+ ktime_t count_load_time;
+};
+
+struct kvm_kpit_state {
+ struct kvm_kpit_channel_state channels[3];
+ struct kvm_kpit_timer pit_timer;
+ u32 speaker_data_on;
+ struct mutex lock;
+ struct kvm_pit *pit;
+ bool inject_pending; /* if inject pending interrupts */
+};
+
+struct kvm_pit {
+ unsigned long base_addresss;
+ struct kvm_io_device dev;
+ struct kvm_io_device speaker_dev;
+ struct kvm *kvm;
+ struct kvm_kpit_state pit_state;
+};
+
+#define KVM_PIT_BASE_ADDRESS 0x40
+#define KVM_SPEAKER_BASE_ADDRESS 0x61
+#define KVM_PIT_MEM_LENGTH 4
+#define KVM_PIT_FREQ 1193181
+#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100
+#define KVM_PIT_CHANNEL_MASK 0x3
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
+struct kvm_pit *kvm_create_pit(struct kvm *kvm);
+void kvm_free_pit(struct kvm *kvm);
+
+#endif
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index e571475..dbfe21c 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -23,6 +23,7 @@
#include <linux/kvm_host.h>
#include "irq.h"
+#include "i8254.h"
/*
* check if there is pending interrupt without
@@ -66,6 +67,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
{
kvm_inject_apic_timer_irqs(vcpu);
+ kvm_inject_pit_timer_irqs(vcpu);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
@@ -73,6 +75,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
{
kvm_apic_timer_intr_post(vcpu, vec);
+ kvm_pit_timer_intr_post(vcpu, vec);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 64beff6..421b2b5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -17,6 +17,7 @@
#include <linux/kvm_host.h>
#include "irq.h"
#include "mmu.h"
+#include "i8254.h"
#include <linux/clocksource.h>
#include <linux/kvm.h>
@@ -815,6 +816,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
case KVM_CAP_CLOCKSOURCE:
+ case KVM_CAP_PIT:
r = 1;
break;
case KVM_CAP_VAPIC:
@@ -1591,6 +1593,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
} else
goto out;
break;
+ case KVM_CREATE_PIT:
+ r = -ENOMEM;
+ kvm->arch.vpit = kvm_create_pit(kvm);
+ if (kvm->arch.vpit)
+ r = 0;
+ break;
case KVM_IRQ_LINE: {
struct kvm_irq_level irq_event;
@@ -3369,6 +3377,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
+ kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
kvm_free_vcpus(kvm);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 024b57c..12932bb 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -297,6 +297,7 @@ struct kvm_arch{
struct list_head active_mmu_pages;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
+ struct kvm_pit *vpit;
int round_robin_prev_vcpu;
unsigned int tss_addr;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index e92e703..cefa9a2 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -236,6 +236,7 @@ struct kvm_vapic_addr {
#define KVM_CAP_CLOCKSOURCE 8
#define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */
#define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */
+#define KVM_CAP_PIT 11
/*
* ioctls for VM fds
@@ -258,6 +259,7 @@ struct kvm_vapic_addr {
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip)
#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)
+#define KVM_CREATE_PIT _IO(KVMIO, 0x64)
/*
* ioctls for vcpu fds
--
debian.1.5.3.7.1-dirty
[-- Attachment #3: Type: text/plain, Size: 228 bytes --]
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
[-- Attachment #4: Type: text/plain, Size: 158 bytes --]
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel
^ permalink raw reply related [flat|nested] 19+ messages in thread
end of thread, other threads:[~2008-03-07 12:52 UTC | newest]
Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-03-04 10:22 [PATCH 1/6] KVM: In kernel pit model Yang, Sheng
2008-03-05 6:54 ` Avi Kivity
2008-03-05 7:04 ` Yang, Sheng
2008-03-05 9:15 ` Ingo Molnar
2008-03-05 11:35 ` Yang, Sheng
2008-03-05 11:43 ` Avi Kivity
2008-03-05 12:24 ` Ingo Molnar
2008-03-06 7:56 ` Yang, Sheng
2008-03-06 8:06 ` Avi Kivity
2008-03-06 8:43 ` Yang, Sheng
2008-03-06 9:41 ` Yang, Sheng
2008-03-07 8:12 ` Yang, Sheng
2008-03-07 8:53 ` Avi Kivity
2008-03-07 9:14 ` Yang, Sheng
2008-03-07 9:57 ` Avi Kivity
2008-03-06 9:14 ` Ingo Molnar
2008-03-06 9:25 ` Yang, Sheng
2008-03-06 12:33 ` Ingo Molnar
-- strict thread matches above, loose matches on Subject: below --
2008-03-07 12:52 [PATCH 1/6] KVM: In kernel PIT model Yang, Sheng
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox