* apic+ioapiuc patch
@ 2007-07-13 9:29 Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C2514E-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
0 siblings, 1 reply; 20+ messages in thread
From: Dong, Eddie @ 2007-07-13 9:29 UTC (permalink / raw)
To: avi-atKUWr5tajBWk0Htik3J/w; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Avi:
Per our discussion, we will only support all user level irqchip
or all kernel level irqchip.
Here is the patch against lapic2 that passed RHEL5U test. Please give
comments.
thx,eddie
diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
index 952dff3..b29651b 100644
--- a/drivers/kvm/Makefile
+++ b/drivers/kvm/Makefile
@@ -2,7 +2,7 @@
# Makefile for Kernel-based Virtual Machine module
#
-kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o kvm_irq.o lapic.o
ioapic.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/ioapic.c b/drivers/kvm/ioapic.c
new file mode 100644
index 0000000..3a0fcc6
--- /dev/null
+++ b/drivers/kvm/ioapic.c
@@ -0,0 +1,388 @@
+/*
+ * Copyright (C) 2001 MandrakeSoft S.A.
+ *
+ * MandrakeSoft S.A.
+ * 43, rue d'Aboukir
+ * 75002 Paris - France
+ * http://www.linux-mandrake.com/
+ * http://www.mandrakesoft.com/
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA
+ *
+ * Yunhong Jiang <yunhong.jiang-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
+ * Yaozu (Eddie) Dong <eddie.dong-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
+ */
+
+#include "kvm.h"
+#include <linux/kvm.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/smp.h>
+#include <linux/hrtimer.h>
+#include <linux/io.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/apicdef.h>
+#include "irq.h"
+/* #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
+#define ioapic_debug(fmt, arg...)
+static void ioapic_deliver(struct kvm_ioapic *vioapic, int irq);
+
+static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
+ unsigned long addr,
+ unsigned long length)
+{
+ unsigned long result = 0;
+
+ switch (ioapic->ioregsel) {
+ case IOAPIC_REG_VERSION:
+ result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16)
+ | (IOAPIC_VERSION_ID & 0xff));
+ break;
+
+ case IOAPIC_REG_APIC_ID:
+ case IOAPIC_REG_ARB_ID:
+ result = ((ioapic->id & 0xf) << 24);
+ break;
+
+ default:
+ {
+ u32 redir_index = (ioapic->ioregsel - 0x10) >>
1;
+ u64 redir_content;
+
+ ASSERT(redir_index < IOAPIC_NUM_PINS)
+
+ redir_content =
ioapic->redirtbl[redir_index].bits;
+ result = (ioapic->ioregsel & 0x1) ?
+ (redir_content >> 32) & 0xffffffff :
+ redir_content & 0xffffffff;
+ break;
+ }
+ }
+
+ return result;
+}
+
+static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
+{
+ union ioapic_redir_entry *pent;
+
+ pent = &ioapic->redirtbl[idx];
+
+ if (!pent->fields.mask) {
+ ioapic_deliver(ioapic, idx);
+ if (pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
+ pent->fields.remote_irr = 1;
+ }
+}
+
+static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
+{
+ int index;
+
+ switch (ioapic->ioregsel) {
+ case IOAPIC_REG_VERSION:
+ /* Writes are ignored. */
+ break;
+
+ case IOAPIC_REG_APIC_ID:
+ ioapic->id = (val >> 24) & 0xf;
+ break;
+
+ case IOAPIC_REG_ARB_ID:
+ break;
+
+ default:
+ index = (ioapic->ioregsel - 0x10) >> 1;
+
+ ioapic_debug("change redir index %x val %x", index,
val);
+ ASSERT(irq < IOAPIC_NUM_PINS);
+ if (ioapic->ioregsel & 1) {
+ ioapic->redirtbl[index].bits &= 0xffffffff;
+ ioapic->redirtbl[index].bits |= (u64) val << 32;
+ } else {
+ ioapic->redirtbl[index].bits &= ~0xffffffffULL;
+ ioapic->redirtbl[index].bits |= (u32) val;
+ ioapic->redirtbl[index].fields.remote_irr = 0;
+ }
+
+ ioapic_service(ioapic, index);
+ break;
+ }
+}
+
+static void ioapic_inj_irq(struct kvm_ioapic *ioapic,
+ struct kvm_lapic *target,
+ u8 vector, u8 trig_mode, u8 delivery_mode)
+{
+ ioapic_debug("irq %d trig %d deliv %d", vector, trig_mode,
+ delivery_mode);
+
+ ASSERT((delivery_mode == dest_Fixed) ||
+ (delivery_mode == dest_LowestPrio));
+
+ if (kvm_apic_set_irq(target, vector, trig_mode))
+ kvm_vcpu_kick(target->vcpu);
+}
+
+static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8
dest,
+ u8 dest_mode)
+{
+ u32 mask = 0;
+ int i;
+ struct kvm *kvm = ioapic->kvm;
+ struct kvm_vcpu *vcpu;
+
+ ioapic_debug("dest %d dest_mode %d", dest, dest_mode);
+
+ if (dest_mode == 0) { /* Physical mode. */
+ if (dest == 0xFF) { /* Broadcast. */
+ for (i = 0; i < KVM_MAX_VCPUS; ++i)
+ mask |= 1 << i;
+ return mask;
+ }
+
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ vcpu = &kvm->vcpus[i];
+ if (kvm_apic_match_physical_addr(vcpu->apic,
dest)) {
+ mask = 1 << vcpu->vcpu_id;
+ break;
+ }
+ }
+ } else if (dest != 0) { /* Logical mode, MDA non-zero. */
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ vcpu = &kvm->vcpus[i];
+ if (kvm_apic_match_logical_addr(vcpu->apic,
dest))
+ mask |= 1 << vcpu->vcpu_id;
+ }
+ }
+
+ ioapic_debug("mask %x", mask);
+ return mask;
+}
+
+static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
+{
+ u8 dest = ioapic->redirtbl[irq].fields.dest_id;
+ u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode;
+ u8 delivery_mode = ioapic->redirtbl[irq].fields.delivery_mode;
+ u8 vector = ioapic->redirtbl[irq].fields.vector;
+ u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
+ u32 deliver_bitmask;
+ struct kvm_lapic *target;
+ struct kvm_vcpu *vcpu;
+ int vcpu_id;
+
+ ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
+ "vector=%x trig_mode=%x",
+ dest, dest_mode, delivery_mode, vector, trig_mode);
+
+ deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest,
dest_mode);
+ if (!deliver_bitmask) {
+ ioapic_debug("no target on destination");
+ return;
+ }
+
+ switch (delivery_mode) {
+ case dest_LowestPrio:
+ target =
+ kvm_apic_round_robin(ioapic->kvm, vector,
deliver_bitmask);
+ if (target != NULL) {
+ ioapic_inj_irq(ioapic, target, vector,
+ trig_mode, delivery_mode);
+ } else {
+ ioapic_debug("null round robin: "
+ "mask=%x vector=%x
delivery_mode=%x",
+ deliver_bitmask, vector,
dest_LowestPrio);
+ }
+ break;
+ case dest_Fixed:
+ for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
+ if (!(deliver_bitmask & (1 << vcpu_id)))
+ continue;
+ deliver_bitmask &= ~(1 << vcpu_id);
+
+ vcpu = &ioapic->kvm->vcpus[vcpu_id];
+ if (vcpu != NULL) {
+ target = vcpu->apic;
+ ioapic_inj_irq(ioapic, target, vector,
+ trig_mode,
delivery_mode);
+ }
+ }
+ break;
+
+ /* TODO: NMI */
+ default:
+ printk(KERN_WARNING "Unsupported delivery mode %d\n",
+ delivery_mode);
+ break;
+ }
+}
+
+void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
+{
+ u32 mask = 1 << irq;
+ union ioapic_redir_entry entry;
+
+ if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
+ entry = ioapic->redirtbl[irq];
+ if (!level)
+ ioapic->irr &= ~mask;
+ if (entry.fields.trig_mode) { /* level triggered */
+ if (level && !entry.fields.remote_irr) {
+ ioapic->irr |= mask;
+ ioapic_service(ioapic, irq);
+ }
+ } else if (level && !(ioapic->irr & mask)) {
+ /*
+ * edge triggered
+ */
+ ioapic->irr |= mask;
+ ioapic_service(ioapic, irq);
+ }
+ }
+}
+
+static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector)
+{
+ int i;
+
+ for (i = 0; i < IOAPIC_NUM_PINS; i++)
+ if (ioapic->redirtbl[i].fields.vector == vector)
+ return i;
+
+ return -1;
+}
+void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
+{
+ struct kvm_ioapic *ioapic = kvm->vioapic;
+ union ioapic_redir_entry *ent;
+ int gsi;
+
+ gsi = get_eoi_gsi(ioapic, vector);
+ if (gsi == -1) {
+ printk(KERN_WARNING "Can't find redir item for %d
EOI\n",
+ vector);
+ return;
+ }
+
+ ent = &ioapic->redirtbl[gsi];
+ ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
+
+ ent->fields.remote_irr = 0;
+ if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) {
+ ioapic_deliver(ioapic, gsi);
+ }
+
+}
+
+static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
+
+ return ((addr >= ioapic->base_address &&
+ (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
+}
+
+static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr,
int len,
+ void *val)
+{
+ struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
+ u32 result;
+
+ ioapic_debug("addr %lx", (unsigned long)addr);
+ ASSERT(!(addr & 0xf)); /* check alignment */
+
+ addr &= 0xff;
+
+ switch (addr) {
+ case IOAPIC_REG_SELECT:
+ result = ioapic->ioregsel;
+ break;
+
+ case IOAPIC_REG_WINDOW:
+ result = ioapic_read_indirect(ioapic, addr, len);
+ break;
+
+ default:
+ result = 0;
+ break;
+ }
+ switch (len) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ memcpy(val, (char *)&result, len);
+ break;
+ default:
+ printk(KERN_WARNING "ioapic: wrong length %d\n", len);
+ }
+}
+
+static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr,
int len,
+ const void *val)
+{
+ struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
+ u32 data;
+
+ ioapic_debug("ioapic_mmio_write addr=%lx len=%d val=%p\n",
+ addr, len, val);
+ ASSERT(!(addr & 0xf)); /* check alignment */
+ if (len == 4 || len == 8)
+ data = *(u32 *) val;
+ else {
+ printk(KERN_WARNING "ioapic: Unsupported size %d\n",
len);
+ return;
+ }
+ addr &= 0xff;
+
+ switch (addr) {
+ case IOAPIC_REG_SELECT:
+ ioapic->ioregsel = data;
+ break;
+
+ case IOAPIC_REG_WINDOW:
+ ioapic_write_indirect(ioapic, data);
+ break;
+
+ default:
+ break;
+ }
+}
+
+int kvm_ioapic_init(struct kvm *kvm)
+{
+ struct kvm_ioapic *ioapic;
+ int i;
+
+ ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
+ if (!ioapic)
+ return -ENOMEM;
+ kvm->vioapic = ioapic;
+ for (i = 0; i < IOAPIC_NUM_PINS; i++)
+ ioapic->redirtbl[i].fields.mask = 1;
+ ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
+
+ ioapic->dev.read = ioapic_mmio_read;
+ ioapic->dev.write = ioapic_mmio_write;
+ ioapic->dev.in_range = ioapic_in_range;
+ ioapic->dev.private = ioapic;
+ ioapic->kvm = kvm;
+ kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev);
+ return 0;
+}
diff --git a/drivers/kvm/irq.c b/drivers/kvm/irq.c
index b08005c..5265f82 100644
--- a/drivers/kvm/irq.c
+++ b/drivers/kvm/irq.c
@@ -30,14 +30,13 @@
*/
int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
{
- struct kvm_pic *s = pic_irqchip(v->kvm);
-
- if (s->output) /* PIC */
- return 1;
- /*
- * TODO: APIC
- */
- return 0;
+ struct kvm_pic *s;
+
+ if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */
+ s = pic_irqchip(v->kvm); /* PIC */
+ return s->output;
+ }
+ return 1;
}
EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
@@ -46,16 +45,32 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
*/
int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
{
- struct kvm_pic *s = pic_irqchip(v->kvm);
+ struct kvm_pic *s;
int vector;
- s->output = 0;
- vector = kvm_pic_read_irq(s);
- if (vector != -1)
- return vector;
- /*
- * TODO: APIC
- */
- return -1;
+ vector = kvm_get_apic_interrupt(v); /* APIC */
+ if (vector == -1) {
+ s = pic_irqchip(v->kvm);
+ s->output = 0; /* PIC */
+ vector = kvm_pic_read_irq(s);
+ }
+ return vector;
}
EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
+
+static void vcpu_kick_intr(void *info)
+{
+#ifdef DEBUG
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
+ printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
+#endif
+}
+
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+ int ipi_pcpu = vcpu->cpu;
+
+ if (vcpu->guest_mode)
+ smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu,
0, 0);
+}
+
diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h
index a6b3869..4b1b4b7 100644
--- a/drivers/kvm/irq.h
+++ b/drivers/kvm/irq.h
@@ -26,12 +26,11 @@
typedef void irq_request_func(void *opaque, int level);
-struct kvm_pic;
struct kvm_pic_state {
- u8 last_irr; /* edge detection */
- u8 irr; /* interrupt request register */
- u8 imr; /* interrupt mask register */
- u8 isr; /* interrupt service register */
+ u8 last_irr; /* edge detection */
+ u8 irr; /* interrupt request register */
+ u8 imr; /* interrupt mask register */
+ u8 isr; /* interrupt service register */
u8 priority_add; /* highest irq priority */
u8 irq_base;
u8 read_reg_select;
@@ -48,7 +47,7 @@ struct kvm_pic_state {
};
struct kvm_pic {
- struct kvm_pic_state pics[2]; /* 0 is master pic, 1 is slave pic
*/
+ struct kvm_pic_state pics[2]; /* 0 is master pic, 1 is slave
pic */
irq_request_func *irq_request;
void *irq_request_opaque;
int output; /* intr from master PIC */
@@ -61,4 +60,94 @@ int kvm_pic_read_irq(struct kvm_pic *s);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
+#define IOAPIC_NUM_PINS 24
+#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
+#define IOAPIC_EDGE_TRIG 0
+#define IOAPIC_LEVEL_TRIG 1
+
+#define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000
+#define IOAPIC_MEM_LENGTH 0x100
+
+/* Direct registers. */
+#define IOAPIC_REG_SELECT 0x00
+#define IOAPIC_REG_WINDOW 0x10
+#define IOAPIC_REG_EOI 0x40 /* IA64 IOSAPIC only */
+
+/* Indirect registers. */
+#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */
+#define IOAPIC_REG_VERSION 0x01
+#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */
+
+struct kvm_ioapic {
+ struct kvm_io_device dev;
+ unsigned long base_address;
+ struct kvm *kvm;
+ u32 ioregsel;
+ u32 id;
+ u32 irr;
+ union ioapic_redir_entry {
+ u64 bits;
+ struct {
+ u8 vector;
+ u8 delivery_mode:3;
+ u8 dest_mode:1;
+ u8 delivery_status:1;
+ u8 polarity:1;
+ u8 remote_irr:1;
+ u8 trig_mode:1;
+ u8 mask:1;
+ u8 reserve:7;
+ u8 reserved[4];
+ u8 dest_id;
+ } fields;
+ } redirtbl[IOAPIC_NUM_PINS];
+};
+
+struct kvm_lapic {
+ spinlock_t lock; /* TODO: need? */
+ u32 status;
+ u64 base_msr;
+ unsigned long base_address;
+ struct kvm_io_device dev;
+ struct {
+ unsigned long pending;
+ u32 divide_count;
+ ktime_t last_update;
+ struct hrtimer dev;
+ } timer;
+ struct kvm_vcpu *vcpu;
+ struct page *regs_page;
+ void *regs;
+};
+
+#ifdef DEBUG
+#define ASSERT(x)
\
+ if (!(x)) {
\
+ printk(KERN_EMERG "assertion failed %s: %d: %s\n",
\
+ __FILE__, __LINE__, #x);
\
+ BUG();
\
+ }
+#else
+#define ASSERT(x)
+#endif
+
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
+int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
+int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
+int kvm_create_lapic(struct kvm_vcpu *vcpu);
+void kvm_free_apic(struct kvm_lapic *apic);
+u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
+void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
+void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
+struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector, u32
bitmap);
+u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
+void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
+void kvm_ioapic_update_eoi(struct kvm *kvm, int vector);
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
+int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig);
+
+int kvm_ioapic_init(struct kvm *kvm);
+void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
+
#endif
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index f1a6773..a886ba9 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -334,15 +334,13 @@ struct kvm_vcpu {
};
struct mutex mutex;
int cpu;
- int launched;
+ char vcpu_id;
+ char launched;
u64 host_tsc;
struct kvm_run *run;
int interrupt_window_open;
int guest_mode;
unsigned long requests;
- unsigned long irq_summary; /* bit vector: 1 per word in
irq_pending */
-#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
- unsigned long irq_pending[NR_IRQ_WORDS];
unsigned long regs[NR_VCPU_REGS]; /* for rsp:
vcpu_load_rsp_rip() */
unsigned long rip; /* needs vcpu_load_rsp_rip() */
@@ -353,10 +351,23 @@ struct kvm_vcpu {
struct page *para_state_page;
gpa_t hypercall_gpa;
unsigned long cr4;
- unsigned long cr8;
u64 pdptrs[4]; /* pae */
u64 shadow_efer;
- u64 apic_base;
+ union {
+ struct { /* user irqchip context */
+ /*
+ * bit vector: 1 per word in irq_pending
+ */
+ unsigned long irq_summary;
+#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
+ unsigned long irq_pending[NR_IRQ_WORDS];
+ unsigned long cr8;
+ u64 apic_base;
+ };
+ struct { /* kernel irqchip context */
+ struct kvm_lapic *apic;
+ };
+ };
u64 ia32_misc_enable_msr;
int nmsrs;
int save_nmsrs;
@@ -435,7 +446,6 @@ struct kvm_memory_slot {
unsigned long *dirty_bitmap;
};
-struct kvm_pic;
struct kvm {
spinlock_t lock; /* protects everything except vcpus */
int naliases;
@@ -458,6 +468,7 @@ struct kvm {
struct kvm_io_bus mmio_bus;
struct kvm_io_bus pio_bus;
struct kvm_pic *vpic;
+ struct kvm_ioapic *vioapic;
};
static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
@@ -600,6 +611,7 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long
cr0);
void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0);
void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0);
void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0);
+unsigned long get_cr8(struct kvm_vcpu *vcpu);
void lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index d785b2e..9a7003b 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -324,6 +324,7 @@ static struct kvm *kvm_create_vm(void)
mutex_init(&vcpu->mutex);
vcpu->cpu = -1;
+ vcpu->vcpu_id = i;
vcpu->kvm = kvm;
vcpu->mmu.root_hpa = INVALID_PAGE;
}
@@ -395,6 +396,7 @@ static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
vcpu_load(vcpu);
kvm_mmu_destroy(vcpu);
+ kvm_free_apic(vcpu->apic);
vcpu_put(vcpu);
kvm_arch_ops->vcpu_free(vcpu);
free_page((unsigned long)vcpu->run);
@@ -430,6 +432,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
kfree(kvm->vpic);
kvm_io_bus_destroy(&kvm->pio_bus);
kvm_io_bus_destroy(&kvm->mmio_bus);
+ kfree(kvm->vioapic);
kvm_free_vcpus(kvm);
kvm_free_physmem(kvm);
kfree(kvm);
@@ -630,15 +633,46 @@ EXPORT_SYMBOL_GPL(set_cr3);
void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
{
- if ( cr8 & CR8_RESEVED_BITS) {
+ if (cr8 & CR8_RESEVED_BITS) {
printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n",
cr8);
inject_gp(vcpu);
return;
}
- vcpu->cr8 = cr8;
+ if (irqchip_in_kernel(vcpu->kvm))
+ kvm_lapic_set_tpr(vcpu, cr8);
+ else
+ vcpu->cr8 = cr8;
}
EXPORT_SYMBOL_GPL(set_cr8);
+unsigned long get_cr8(struct kvm_vcpu *vcpu)
+{
+ if (irqchip_in_kernel(vcpu->kvm))
+ return kvm_lapic_get_cr8(vcpu);
+ else
+ return vcpu->cr8;
+}
+EXPORT_SYMBOL_GPL(get_cr8);
+
+u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
+{
+ if (irqchip_in_kernel(vcpu->kvm))
+ return vcpu->apic->base_msr;
+ else
+ return vcpu->apic_base;
+}
+EXPORT_SYMBOL_GPL(kvm_get_apic_base);
+
+void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
+{
+ /* TODO: reserve bits check */
+ if (irqchip_in_kernel(vcpu->kvm))
+ kvm_lapic_set_base(vcpu, data);
+ else
+ vcpu->apic_base = data;
+}
+EXPORT_SYMBOL_GPL(kvm_set_apic_base);
+
void fx_init(struct kvm_vcpu *vcpu)
{
struct __attribute__ ((__packed__)) fx_image_s {
@@ -1053,15 +1087,31 @@ static int emulator_write_std(unsigned long
addr,
return X86EMUL_UNHANDLEABLE;
}
+/*
+ * Only apic need an MMIO device hook, so shortcut now..
+ */
+static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu
*vcpu,
+ gpa_t addr)
+{
+ struct kvm_io_device *dev;
+
+ if (vcpu->apic) {
+ dev = &vcpu->apic->dev;
+ if (dev->in_range(dev, addr))
+ return dev;
+ }
+ return NULL;
+}
+
static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
gpa_t addr)
{
- /*
- * Note that its important to have this wrapper function because
- * in the very near future we will be checking for MMIOs against
- * the LAPIC as well as the general MMIO bus
- */
- return kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
+ struct kvm_io_device *dev;
+
+ dev = vcpu_find_pervcpu_dev(vcpu, addr);
+ if (dev == NULL)
+ dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
+ return dev;
}
static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
@@ -1546,7 +1596,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32
msr, u64 *pdata)
data = 3;
break;
case MSR_IA32_APICBASE:
- data = vcpu->apic_base;
+ data = kvm_get_apic_base(vcpu);
break;
case MSR_IA32_MISC_ENABLE:
data = vcpu->ia32_misc_enable_msr;
@@ -1624,7 +1674,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32
msr, u64 data)
case 0x200 ... 0x2ff: /* MTRRs */
break;
case MSR_IA32_APICBASE:
- vcpu->apic_base = data;
+ kvm_set_apic_base(vcpu, data);
break;
case MSR_IA32_MISC_ENABLE:
vcpu->ia32_misc_enable_msr = data;
@@ -1914,7 +1964,7 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu
*vcpu, struct kvm_run *kvm_run)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
/* re-sync apic's tpr */
- vcpu->cr8 = kvm_run->cr8;
+ set_cr8(vcpu, kvm_run->cr8);
if (vcpu->pio.cur_count) {
r = complete_pio(vcpu);
@@ -2063,9 +2113,9 @@ static int kvm_vcpu_ioctl_get_sregs(struct
kvm_vcpu *vcpu,
sregs->cr2 = vcpu->cr2;
sregs->cr3 = vcpu->cr3;
sregs->cr4 = vcpu->cr4;
- sregs->cr8 = vcpu->cr8;
+ sregs->cr8 = get_cr8(vcpu);
sregs->efer = vcpu->shadow_efer;
- sregs->apic_base = vcpu->apic_base;
+ sregs->apic_base = kvm_get_apic_base(vcpu);
memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
sizeof sregs->interrupt_bitmap);
@@ -2101,13 +2151,13 @@ static int kvm_vcpu_ioctl_set_sregs(struct
kvm_vcpu *vcpu,
mmu_reset_needed |= vcpu->cr3 != sregs->cr3;
vcpu->cr3 = sregs->cr3;
- vcpu->cr8 = sregs->cr8;
+ set_cr8(vcpu, sregs->cr8);
mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;
#ifdef CONFIG_X86_64
kvm_arch_ops->set_efer(vcpu, sregs->efer);
#endif
- vcpu->apic_base = sregs->apic_base;
+ kvm_set_apic_base(vcpu, sregs->apic_base);
kvm_arch_ops->decache_cr4_guest_bits(vcpu);
@@ -2421,6 +2471,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm
*kvm, int n)
if (r < 0)
goto out_free_vcpus;
+ if (irqchip_in_kernel(vcpu->kvm)) {
+ r = kvm_create_lapic(vcpu);
+ if (r < 0)
+ goto out_free_vcpus;
+ }
kvm_arch_ops->vcpu_load(vcpu);
r = kvm_mmu_setup(vcpu);
if (r >= 0)
@@ -2791,10 +2846,14 @@ static long kvm_vm_ioctl(struct file *filp,
case KVM_CREATE_PIC:
r = -ENOMEM;
kvm->vpic = kvm_create_pic(kvm);
- if (kvm->vpic)
- r = 0;
- else
- goto out;
+ if (kvm->vpic) {
+ r = kvm_ioapic_init(kvm);
+ if (r) {
+ kfree(kvm->vpic);
+ kvm->vpic = NULL;
+ goto out;
+ }
+ }
break;
case KVM_IRQ_LINE: {
struct kvm_irq_level irq_event;
@@ -2807,7 +2866,9 @@ static long kvm_vm_ioctl(struct file *filp,
kvm_pic_set_irq(pic_irqchip(kvm),
irq_event.irq,
irq_event.level);
- /* TODO: IOAPIC */
+ kvm_ioapic_set_irq(kvm->vioapic,
+ irq_event.irq,
+ irq_event.level);
r = 0;
}
break;
@@ -2924,7 +2985,8 @@ static long kvm_dev_ioctl(struct file *filp,
int ext = (long)argp;
switch (ext) {
- case KVM_CAP_PIC:
+ case KVM_CAP_IRQCHIP:
+ case KVM_CAP_HLT:
r = 1;
break;
default:
diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c
new file mode 100644
index 0000000..3096c07
--- /dev/null
+++ b/drivers/kvm/lapic.c
@@ -0,0 +1,946 @@
+
+/*
+ * Local APIC virtualization
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright (C) 2007 Novell
+ * Copyright (C) 2007 Intel
+ *
+ * Authors:
+ * Dor Laor <dor.laor-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
+ * Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
+ * Yaozu (Eddie) Dong <eddie.dong-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
+ *
+ * Based on Xen 3.0 code, Copyright (c) 2004, Intel Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "kvm.h"
+#include <linux/kvm.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/smp.h>
+#include <linux/hrtimer.h>
+#include <linux/io.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/apicdef.h>
+#include "irq.h"
+
+#define PRId64 "d"
+#define PRIx64 "llx"
+#define PRIu64 "u"
+#define PRIo64 "o"
+
+#define APIC_BUS_CYCLE_NS 1
+
+/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
+#define apic_debug(fmt, arg...)
+
+#define APIC_LVT_NUM 6
+/* 14 is the version for Xeon and Pentium 8.4.8*/
+#define APIC_VERSION (0x14UL | ((APIC_LVT_NUM - 1) <<
16))
+#define LAPIC_MMIO_LENGTH (1 << 12)
+/* followed define is not in apicdef.h */
+#define APIC_SHORT_MASK 0xc0000
+#define APIC_DEST_NOSHORT 0x0
+#define APIC_DEST_MASK 0x800
+#define _APIC_SW_DISABLE 0
+#define APIC_SW_DISABLE (1 << _APIC_SW_DISABLE)
+#define MAX_APIC_VECTOR 256
+
+#define VEC_POS(v) ((v) & (32 - 1))
+#define REG_POS(v) (((v) >> 5) << 4)
+#define apic_test_and_set_vector(vec, bitmap) \
+ test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec))
+#define apic_test_and_clear_vector(vec, bitmap) \
+ test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec))
+#define apic_set_vector(vec, bitmap) \
+ set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec))
+#define apic_clear_vector(vec, bitmap) \
+ clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec))
+
+#define apic_hw_enabled(apic) ((apic)->base_msr &
MSR_IA32_APICBASE_ENABLE)
+#define apic_sw_enabled(apic) (!((apic)->status & APIC_SW_DISABLE))
+#define apic_enabled(apic) (apic_sw_enabled(apic) && \
+ apic_hw_enabled(apic))
+
+#define LVT_MASK \
+ (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
+
+#define LINT_MASK \
+ LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
+ APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER
+
+#define KVM_APIC_ID(apic) \
+ (GET_APIC_ID(apic_get_reg(apic, APIC_ID)))
+
+#define apic_lvt_enabled(apic, lvt_type) \
+ (!(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED))
+
+#define apic_lvt_vector(apic, lvt_type) \
+ (apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK)
+
+#define apic_lvt_dm(apic, lvt_type) \
+ (apic_get_reg(apic, lvt_type) & APIC_MODE_MASK)
+
+#define apic_lvtt_period(apic) \
+ (apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC)
+
+static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off)
+{
+ return *((u32 *) (apic->regs + reg_off));
+}
+
+static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off,
u32 val)
+{
+ *((u32 *) (apic->regs + reg_off)) = val;
+}
+
+static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
+ LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */
+ LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
+ LVT_MASK | APIC_MODE_MASK, /* LVTPC */
+ LINT_MASK, LINT_MASK, /* LVT0-1 */
+ LVT_MASK /* LVTERR */
+};
+
+static int find_highest_vector(void *bitmap)
+{
+ u32 *word = bitmap;
+ int word_offset = MAX_APIC_VECTOR >> 5;
+
+ while ((word_offset != 0) && (word[(--word_offset) << 2] == 0))
+ continue;
+
+ if (likely(!word_offset && !word[0]))
+ return -1;
+ else
+ return fls(word[word_offset << 2]) - 1 + (word_offset <<
5);
+}
+
+static inline int apic_test_and_set_irr(int vec, struct kvm_lapic
*apic)
+{
+ return apic_test_and_set_vector(vec, apic->regs + APIC_IRR);
+}
+
+static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
+{
+ apic_clear_vector(vec, apic->regs + APIC_IRR);
+}
+
+static inline int apic_find_highest_irr(struct kvm_lapic *apic)
+{
+ int result;
+
+ result = find_highest_vector(apic->regs + APIC_IRR);
+ ASSERT(result == -1 || result >= 16);
+
+ return result;
+}
+
+int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig)
+{
+ if (!apic_test_and_set_irr(vec, apic)) {
+ /* a new pending irq is set in IRR */
+ if (trig)
+ apic_set_vector(vec, apic->regs + APIC_TMR);
+ else
+ apic_clear_vector(vec, apic->regs + APIC_TMR);
+ kvm_vcpu_kick(apic->vcpu);
+ return 1;
+ }
+ return 0;
+}
+
+static inline int apic_find_highest_isr(struct kvm_lapic *apic)
+{
+ int result;
+
+ result = find_highest_vector(apic->regs + APIC_ISR);
+ ASSERT(result == -1 || result >= 16);
+
+ return result;
+}
+
+static void apic_update_ppr(struct kvm_lapic *apic)
+{
+ u32 tpr, isrv, ppr;
+ int isr;
+
+ tpr = apic_get_reg(apic, APIC_TASKPRI);
+ isr = apic_find_highest_isr(apic);
+ isrv = (isr != -1) ? isr : 0;
+
+ if ((tpr & 0xf0) >= (isrv & 0xf0))
+ ppr = tpr & 0xff;
+ else
+ ppr = isrv & 0xf0;
+
+ apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
+ apic, ppr, isr, isrv);
+
+ apic_set_reg(apic, APIC_PROCPRI, ppr);
+}
+
+static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
+{
+ apic_set_reg(apic, APIC_TASKPRI, tpr);
+ apic_update_ppr(apic);
+}
+
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
+{
+ return KVM_APIC_ID(apic) == dest;
+}
+
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
+{
+ int result = 0;
+ u8 logical_id;
+
+ logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR));
+
+ switch (apic_get_reg(apic, APIC_DFR)) {
+ case APIC_DFR_FLAT:
+ if (logical_id & mda)
+ result = 1;
+ break;
+ case APIC_DFR_CLUSTER:
+ if (((logical_id >> 4) == (mda >> 0x4))
+ && (logical_id & mda & 0xf))
+ result = 1;
+ break;
+ default:
+ printk(KERN_WARNING "Bad DFR vcpu %d: %08x\n",
+ apic->vcpu->vcpu_id, apic_get_reg(apic,
APIC_DFR));
+ break;
+ }
+
+ return result;
+}
+
+static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic
*source,
+ int short_hand, int dest, int dest_mode)
+{
+ int result = 0;
+ struct kvm_lapic *target = vcpu->apic;
+
+ apic_debug("target %p, source %p, dest 0x%x, "
+ "dest_mode 0x%x, short_hand 0x%x",
+ target, source, dest, dest_mode, short_hand);
+
+ if (!target)
+ return 0;
+ switch (short_hand) {
+ case APIC_DEST_NOSHORT:
+ if (dest_mode == 0) {
+ /* Physical mode. */
+ if ((dest == 0xFF) || (dest ==
KVM_APIC_ID(target)))
+ result = 1;
+ } else {
+ /* Logical mode. */
+ result = kvm_apic_match_logical_addr(target,
dest);
+ }
+ break;
+ case APIC_DEST_SELF:
+ if (target == source)
+ result = 1;
+ break;
+ case APIC_DEST_ALLINC:
+ result = 1;
+ break;
+ case APIC_DEST_ALLBUT:
+ if (target != source)
+ result = 1;
+ break;
+ default:
+ printk(KERN_WARNING "Bad dest shorthand value %x\n",
+ short_hand);
+ break;
+ }
+
+ return result;
+}
+
+/*
+ * Add a pending IRQ into lapic.
+ * Return 1 if successfully added and 0 if discarded.
+ */
+static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
+ int vector, int level, int trig_mode)
+{
+ int result = 0;
+
+ switch (delivery_mode) {
+ case APIC_DM_FIXED:
+ case APIC_DM_LOWEST:
+ /* FIXME add logic for vcpu on reset */
+ if (unlikely(!apic_enabled(apic)))
+ break;
+
+ if (apic_test_and_set_irr(vector, apic) && trig_mode) {
+ apic_debug("level trig mode repeatedly for
vector %d",
+ vector);
+ break;
+ }
+
+ if (trig_mode) {
+ apic_debug("level trig mode for vector %d",
vector);
+ apic_set_vector(vector, apic->regs + APIC_TMR);
+ } else
+ apic_clear_vector(vector, apic->regs +
APIC_TMR);
+
+ kvm_vcpu_kick(apic->vcpu);
+
+ result = 1;
+ break;
+ case APIC_DM_REMRD:
+ printk(KERN_DEBUG "Ignoring delivery mode 3\n");
+ break;
+ case APIC_DM_SMI:
+ printk(KERN_DEBUG "Ignoring guest SMI\n");
+ break;
+ case APIC_DM_NMI:
+ printk(KERN_DEBUG "Ignoring guest NMI\n");
+ break;
+ case APIC_DM_INIT:
+ printk(KERN_DEBUG "Ignoring guest INIT\n");
+ break;
+ case APIC_DM_STARTUP:
+ printk(KERN_DEBUG "Ignoring guest STARTUP\n");
+ break;
+ default:
+ printk(KERN_WARNING "TODO: unsupported delivery mode
%x\n",
+ delivery_mode);
+ break;
+ }
+ return result;
+}
+
+static inline int apic_accept_irq(struct kvm_lapic *apic, int
delivery_mode,
+ int vector, int level, int trig_mode)
+{
+ int result = 0;
+
+ spin_lock_bh(&apic->lock);
+ result = __apic_accept_irq(apic, delivery_mode,
+ vector, level, trig_mode);
+ spin_unlock_bh(&apic->lock);
+ return result;
+}
+
+struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector, u32
bitmap)
+{
+ int vcpu_id;
+
+ /* TODO for real round robin */
+ vcpu_id = fls(bitmap) - 1;
+ ASSERT(vcpu_id >= 0);
+ return kvm->vcpus[vcpu_id].apic;
+}
+
+static void apic_set_eoi(struct kvm_lapic *apic)
+{
+ int vector = apic_find_highest_isr(apic);
+
+ /*
+ * Not every write EOI will has corresponding ISR,
+ * one example is when Kernel check timer on setup_IO_APIC
+ */
+ if (vector == -1)
+ return;
+
+ apic_clear_vector(vector, apic->regs + APIC_ISR);
+ apic_update_ppr(apic);
+
+ if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR))
+ kvm_ioapic_update_eoi(apic->vcpu->kvm, vector);
+}
+
+static void apic_send_ipi(struct kvm_lapic *apic)
+{
+ u32 icr_low = apic_get_reg(apic, APIC_ICR);
+ u32 icr_high = apic_get_reg(apic, APIC_ICR2);
+
+ unsigned int dest = GET_APIC_DEST_FIELD(icr_high);
+ unsigned int short_hand = icr_low & APIC_SHORT_MASK;
+ unsigned int trig_mode = icr_low & APIC_INT_LEVELTRIG;
+ unsigned int level = icr_low & APIC_INT_ASSERT;
+ unsigned int dest_mode = icr_low & APIC_DEST_MASK;
+ unsigned int delivery_mode = icr_low & APIC_MODE_MASK;
+ unsigned int vector = icr_low & APIC_VECTOR_MASK;
+
+ struct kvm_lapic *target;
+ struct kvm_vcpu *vcpu;
+ u32 lpr_map = 0;
+ int i;
+
+ apic_debug("icr_high 0x%x, icr_low 0x%x, "
+ "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level
0x%x, "
+ "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
+ icr_high, icr_low, short_hand, dest,
+ trig_mode, level, dest_mode, delivery_mode, vector);
+
+ for (i = 0; i < KVM_MAX_VCPUS; i++) {
+ vcpu = &apic->vcpu->kvm->vcpus[i];
+
+ if (apic_match_dest(vcpu, apic, short_hand, dest,
dest_mode)) {
+ if (delivery_mode == APIC_DM_LOWEST)
+ set_bit(vcpu->vcpu_id, &lpr_map);
+ else
+ __apic_accept_irq(vcpu->apic,
delivery_mode,
+ vector, level,
trig_mode);
+ }
+ }
+
+ if (delivery_mode == APIC_DM_LOWEST) {
+ target = kvm_apic_round_robin(vcpu->kvm, vector,
lpr_map);
+ if (target != NULL)
+ __apic_accept_irq(target, delivery_mode,
+ vector, level, trig_mode);
+ }
+}
+
+static u32 apic_get_tmcct(struct kvm_lapic *apic)
+{
+ u32 counter_passed;
+ ktime_t passed, now = apic->timer.dev.base->get_time();
+ u32 tmcct = apic_get_reg(apic, APIC_TMCCT);
+
+ ASSERT(apic != NULL);
+
+ if (unlikely(ktime_to_ns(now) <=
+ ktime_to_ns(apic->timer.last_update))) {
+ /* Wrap around */
+ passed = ktime_add(( {
+ (ktime_t) {
+ .tv64 = KTIME_MAX -
+ (apic->timer.last_update).tv64};
+ }
+ ), now);
+ apic_debug("time elapsed\n");
+ } else
+ passed = ktime_sub(now, apic->timer.last_update);
+
+ counter_passed = ktime_to_ns(passed) /
+ (APIC_BUS_CYCLE_NS * apic->timer.divide_count);
+ tmcct -= counter_passed;
+
+ if (tmcct <= 0) {
+ if (unlikely(!apic_lvtt_period(apic))) {
+ tmcct = 0;
+ } else {
+ do {
+ tmcct += apic_get_reg(apic, APIC_TMICT);
+ } while (tmcct <= 0);
+ }
+ }
+
+ apic->timer.last_update = now;
+ apic_set_reg(apic, APIC_TMCCT, tmcct);
+
+ return tmcct;
+}
+
+static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
+{
+ u32 val = 0;
+
+ if (offset >= LAPIC_MMIO_LENGTH)
+ return 0;
+
+ switch (offset) {
+ case APIC_ARBPRI:
+ printk(KERN_WARNING "Access APIC ARBPRI register "
+ "which is for P6\n");
+ break;
+
+ case APIC_TMCCT: /* Timer CCR */
+ val = apic_get_tmcct(apic);
+ break;
+
+ default:
+ val = apic_get_reg(apic, offset);
+ break;
+ }
+
+ return val;
+}
+
+static void apic_mmio_read(struct kvm_io_device *this,
+ gpa_t address, int len, void *data)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
+ unsigned int offset = address - apic->base_address;
+ unsigned char alignment = offset & 0xf;
+ u32 result;
+
+ if ((alignment + len) > 4) {
+ if (printk_ratelimit())
+ printk(KERN_ERR "Bad alignment %lx %d",
+ (unsigned long)address, len);
+ return;
+ }
+ spin_lock_bh(&apic->lock);
+ result = __apic_read(apic, offset & ~0xf);
+ spin_unlock_bh(&apic->lock);
+
+ switch (len) {
+ case 1:
+ case 2:
+ case 4:
+ memcpy(data, (char *)&result + alignment, len);
+ break;
+ default:
+ if (printk_ratelimit())
+ printk(KERN_ERR "Local APIC read with len = %x,
"
+ "should be 1,2, or 4 instead\n", len);
+ break;
+ }
+}
+
+static void apic_mmio_write(struct kvm_io_device *this,
+ gpa_t address, int len, const void *data)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
+ unsigned int offset = address - apic->base_address;
+ unsigned char alignment = offset & 0xf;
+ u32 val;
+
+ /*
+ * APIC register must be aligned on 128-bits boundary.
+ * 32/64/128 bits registers must be accessed thru 32 bits.
+ * Refer SDM 8.4.1
+ */
+ if (len != 4 || alignment) {
+ if (printk_ratelimit())
+ printk(KERN_ERR "apic write: bad size=%d %lx\n",
+ len, (long)address);
+ return;
+ }
+
+ val = *(u32 *) data;
+
+ /* too common printing */
+ if (offset != APIC_EOI)
+ apic_debug("%s: offset 0x%x with length 0x%x, and value
is "
+ "0x%x\n", __FUNCTION__, offset, len, val);
+
+ offset &= 0xff0;
+
+ spin_lock_bh(&apic->lock);
+ switch (offset) {
+ case APIC_ID: /* Local APIC ID */
+ apic_set_reg(apic, APIC_ID, val);
+ break;
+
+ case APIC_TASKPRI:
+ apic_set_tpr(apic, val & 0xff);
+ break;
+
+ case APIC_EOI:
+ apic_set_eoi(apic);
+ break;
+
+ case APIC_LDR:
+ apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
+ break;
+
+ case APIC_DFR:
+ apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
+ break;
+
+ case APIC_SPIV:
+ apic_set_reg(apic, APIC_SPIV, val & 0x3ff);
+ if (!(val & APIC_SPIV_APIC_ENABLED)) {
+ int i;
+ u32 lvt_val;
+
+ apic->status |= APIC_SW_DISABLE;
+ for (i = 0; i < APIC_LVT_NUM; i++) {
+ lvt_val = apic_get_reg(apic,
+ APIC_LVTT + 0x10
* i);
+ apic_set_reg(apic, APIC_LVTT + 0x10 * i,
+ lvt_val | APIC_LVT_MASKED);
+ }
+
+ } else {
+ apic->status &= ~APIC_SW_DISABLE;
+ }
+ break;
+
+ case APIC_ICR:
+ /* No delay here, so we always clear the pending bit */
+ apic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
+ apic_send_ipi(apic);
+ break;
+
+ case APIC_ICR2:
+ apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
+ break;
+
+ case APIC_LVTT:
+ case APIC_LVTTHMR:
+ case APIC_LVTPC:
+ case APIC_LVT0:
+ case APIC_LVT1:
+ case APIC_LVTERR:
+ /* TODO: Check vector */
+ if (!apic_sw_enabled(apic))
+ val |= APIC_LVT_MASKED;
+
+ val &= apic_lvt_mask[(offset - APIC_LVTT) >> 4];
+ apic_set_reg(apic, offset, val);
+
+ break;
+
+ case APIC_TMICT:
+ {
+ ktime_t now = apic->timer.dev.base->get_time();
+ u32 offset;
+
+ apic_set_reg(apic, APIC_TMICT, val);
+ apic_set_reg(apic, APIC_TMCCT, val);
+ apic->timer.last_update = now;
+ offset =
+ APIC_BUS_CYCLE_NS * apic->timer.divide_count
* val;
+
+ /* Make sure the lock ordering is coherent */
+ spin_unlock_bh(&apic->lock);
+ hrtimer_cancel(&apic->timer.dev);
+ hrtimer_start(&apic->timer.dev,
+ ktime_add_ns(now, offset),
+ HRTIMER_MODE_ABS);
+
+ apic_debug("%s: bus cycle is %" PRId64 "ns, now
0x%016"
+ PRIx64 ", "
+ "timer initial count 0x%x, offset
0x%x, "
+ "expire @ 0x%016" PRIx64 ".\n",
__FUNCTION__,
+ APIC_BUS_CYCLE_NS, ktime_to_ns(now),
+ apic_get_reg(apic, APIC_TMICT),
offset,
+ ktime_to_ns(ktime_add_ns(now,
offset)));
+ }
+ return;
+
+ case APIC_TDCR:
+ {
+ unsigned int tmp1, tmp2;
+
+ if (val & 4)
+ printk(KERN_ERR "KVM_WRITE:TDCR %x\n",
val);
+ tmp1 = val & 0xf;
+ tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
+ apic->timer.divide_count = 0x1 << (tmp2 & 0x7);
+
+ apic_set_reg(apic, APIC_TDCR, val);
+
+ apic_debug("timer divide count is 0x%x\n",
+ apic->timer.divide_count);
+ }
+ break;
+
+ default:
+ apic_debug("Local APIC Write to read-only register
%x\n",
+ offset);
+ break;
+ }
+
+ spin_unlock_bh(&apic->lock);
+}
+
+static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
+ int ret = 0;
+
+ spin_lock_bh(&apic->lock);
+
+ if (apic_hw_enabled(apic) &&
+ (addr >= apic->base_address) &&
+ (addr < (apic->base_address + LAPIC_MMIO_LENGTH)))
+ ret = 1;
+
+ spin_unlock_bh(&apic->lock);
+
+ return ret;
+}
+
+void kvm_free_apic(struct kvm_lapic *apic)
+{
+ if (!apic)
+ return;
+ spin_lock_bh(&apic->lock);
+
+ hrtimer_cancel(&apic->timer.dev);
+
+ if (apic->regs_page) {
+ __free_page(apic->regs_page);
+ apic->regs_page = 0;
+ }
+
+ spin_unlock_bh(&apic->lock);
+
+ kfree(apic);
+}
+
+/*
+
*----------------------------------------------------------------------
+ * LAPIC interface
+
*----------------------------------------------------------------------
+ */
+
+void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+
+ spin_lock_bh(&apic->lock);
+ apic_set_tpr(apic, ((cr8 & 0x0f) << 4));
+ spin_unlock_bh(&apic->lock);
+}
+
+u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+ u64 tpr;
+
+ spin_lock_bh(&apic->lock);
+ tpr = (u64) apic_get_reg(apic, APIC_TASKPRI);
+ spin_unlock_bh(&apic->lock);
+
+ return (tpr & 0xf0) >> 4;
+}
+
+void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+
+ spin_lock_bh(&apic->lock);
+ if (apic->vcpu->vcpu_id)
+ value &= ~MSR_IA32_APICBASE_BSP;
+
+ apic->base_msr = value;
+ apic->base_address = apic->base_msr & MSR_IA32_APICBASE_BASE;
+
+ /* with FSB delivery interrupt, we can restart APIC
functionality */
+ apic_debug("apic base msr is 0x%016" PRIx64 ", and base address
is "
+ "0x%lx.\n", apic->base_msr, apic->base_address);
+
+ spin_unlock_bh(&apic->lock);
+}
+
+u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+ u64 base;
+
+ spin_lock_bh(&apic->lock);
+ base = apic->base_msr;
+ spin_unlock_bh(&apic->lock);
+
+ return base;
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_get_base);
+
+static void lapic_reset(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic;
+ int i;
+
+ apic_debug("%s\n", __FUNCTION__);
+
+ ASSERT(vcpu);
+ apic = vcpu->apic;
+ ASSERT(apic != NULL);
+
+ /* Stop the timer in case it's a reset to an active apic */
+ hrtimer_cancel(&apic->timer.dev);
+
+ spin_lock_bh(&apic->lock);
+
+ apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
+ apic_set_reg(apic, APIC_LVR, APIC_VERSION);
+
+ for (i = 0; i < APIC_LVT_NUM; i++)
+ apic_set_reg(apic, APIC_LVTT + 0x10 * i,
APIC_LVT_MASKED);
+
+ apic_set_reg(apic, APIC_DFR, 0xffffffffU);
+ apic_set_reg(apic, APIC_SPIV, 0xff);
+ apic_set_reg(apic, APIC_TASKPRI, 0);
+ apic_set_reg(apic, APIC_LDR, 0);
+ apic_set_reg(apic, APIC_ESR, 0);
+ apic_set_reg(apic, APIC_ICR, 0);
+ apic_set_reg(apic, APIC_ICR2, 0);
+ apic_set_reg(apic, APIC_TDCR, 0);
+ apic_set_reg(apic, APIC_TMICT, 0);
+ apic_set_reg(apic, APIC_TMCCT, 0);
+ for (i = 0; i < 8; i++) {
+ apic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
+ apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
+ apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
+ }
+ apic->timer.divide_count = 0;
+ apic->timer.pending = 0;
+ apic->status = APIC_SW_DISABLE;
+ if (vcpu->vcpu_id == 0) {
+ apic->base_msr |= MSR_IA32_APICBASE_BSP;
+ }
+ apic_update_ppr(apic);
+
+ spin_unlock_bh(&apic->lock);
+
+ apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
+ "0x%016" PRIx64 ", base_address=0x%0lx.\n",
__FUNCTION__,
+ vcpu, GET_APIC_ID(apic_get_reg(apic, APIC_ID)),
+ apic->base_msr, apic->base_address);
+}
+
+int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+ int ret = 0;
+
+ spin_lock_bh(&apic->lock);
+ ret = apic_enabled(apic);
+ spin_unlock_bh(&apic->lock);
+
+ return ret;
+}
+
+void *kvm_lapic_get_regs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+ return apic->regs;
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_get_regs);
+
+/*
+
*----------------------------------------------------------------------
+ * timer interface
+
*----------------------------------------------------------------------
+ */
+static int __apic_timer_fn(struct kvm_lapic *apic)
+{
+ u32 vector;
+ ktime_t now;
+ int result = 0;
+
+ if (unlikely(!apic_enabled(apic) ||
+ !apic_lvt_enabled(apic, APIC_LVTT))) {
+ apic_debug("%s: time interrupt although apic is down\n",
+ __FUNCTION__);
+ return 0;
+ }
+
+ vector = apic_lvt_vector(apic, APIC_LVTT);
+ now = apic->timer.dev.base->get_time();
+ apic->timer.last_update = now;
+ apic->timer.pending++;
+ __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0);
+
+ if (apic_lvtt_period(apic)) {
+ u32 offset;
+ u32 tmict = apic_get_reg(apic, APIC_TMICT);
+
+ apic_set_reg(apic, APIC_TMCCT, tmict);
+ offset = APIC_BUS_CYCLE_NS * apic->timer.divide_count *
tmict;
+
+ result = 1;
+ apic->timer.dev.expires = ktime_add_ns(now, offset);
+ } else {
+ apic_set_reg(apic, APIC_TMCCT, 0);
+ }
+
+ return result;
+}
+
+static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
+{
+ struct kvm_lapic *apic;
+ int restart_timer = 0;
+
+ apic = container_of(data, struct kvm_lapic, timer.dev);
+
+ spin_lock_bh(&apic->lock);
+ restart_timer = __apic_timer_fn(apic);
+ spin_unlock_bh(&apic->lock);
+
+ if (restart_timer)
+ return HRTIMER_RESTART;
+ else
+ return HRTIMER_NORESTART;
+}
+
+int kvm_create_lapic(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic;
+
+ ASSERT(vcpu != NULL);
+ apic_debug("apic_init %d\n", vcpu->vcpu_id);
+
+ apic = kzalloc(sizeof(*apic), GFP_KERNEL);
+ if (!apic)
+ goto nomem;
+
+ vcpu->apic = apic;
+ spin_lock_init(&apic->lock);
+
+ apic->regs_page = alloc_page(GFP_KERNEL);
+ if (apic->regs_page == NULL) {
+ printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
+ vcpu->vcpu_id);
+ goto nomem;
+ }
+ apic->regs = page_address(apic->regs_page);
+ memset(apic->regs, 0, PAGE_SIZE);
+ apic->vcpu = vcpu;
+
+ hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC,
HRTIMER_MODE_ABS);
+ apic->timer.dev.function = apic_timer_fn;
+ apic->base_address = APIC_DEFAULT_PHYS_BASE;
+ apic->base_msr = APIC_DEFAULT_PHYS_BASE;
+
+ lapic_reset(vcpu);
+ apic->dev.read = apic_mmio_read;
+ apic->dev.write = apic_mmio_write;
+ apic->dev.in_range = apic_mmio_range;
+ apic->dev.private = apic;
+
+ return 0;
+nomem:
+ kvm_free_apic(apic);
+ return -ENOMEM;
+}
+
+int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->apic;
+ int highest_irr;
+
+ if (!apic || !apic_enabled(apic))
+ return -1;
+
+ highest_irr = apic_find_highest_irr(apic);
+ if ((highest_irr == -1) ||
+ ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI)))
+ return -1;
+ return highest_irr;
+}
+
+int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
+{
+ int vector = kvm_apic_has_interrupt(vcpu);
+ struct kvm_lapic *apic = vcpu->apic;
+
+ if (vector == -1)
+ return -1;
+
+ apic_set_vector(vector, apic->regs + APIC_ISR);
+ apic_update_ppr(apic);
+ apic_clear_irr(vector, apic);
+ return vector;
+}
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index f614800..732ebe1 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -460,6 +460,12 @@ static void init_sys_seg(struct vmcb_seg *seg,
uint32_t type)
static int svm_vcpu_setup(struct kvm_vcpu *vcpu)
{
+ u64 msr;
+
+ msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+ if (vcpu == &vcpu->kvm->vcpus[0])
+ msr |= MSR_IA32_APICBASE_BSP;
+ kvm_set_apic_base(vcpu, msr);
return 0;
}
@@ -590,9 +596,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
fx_init(vcpu);
vcpu->fpu_active = 1;
- vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
- if (vcpu == &vcpu->kvm->vcpus[0])
- vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
return 0;
@@ -1472,8 +1475,8 @@ static void post_kvm_run_save(struct kvm_vcpu
*vcpu,
kvm_run->ready_for_interrupt_injection =
(vcpu->interrupt_window_open &&
vcpu->irq_summary ==
0);
kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags &
X86_EFLAGS_IF) != 0;
- kvm_run->cr8 = vcpu->cr8;
- kvm_run->apic_base = vcpu->apic_base;
+ kvm_run->cr8 = get_cr8(vcpu);
+ kvm_run->apic_base = kvm_get_apic_base(vcpu);
}
/*
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 598b2b2..853ad2d 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1243,6 +1243,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
int i;
int ret = 0;
unsigned long kvm_vmx_return;
+ u64 msr;
if (!init_rmode_tss(vcpu->kvm)) {
ret = -ENOMEM;
@@ -1251,10 +1252,11 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
memset(vcpu->regs, 0, sizeof(vcpu->regs));
vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val();
- vcpu->cr8 = 0;
- vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+ set_cr8(vcpu, 0);
+ msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
if (vcpu == &vcpu->kvm->vcpus[0])
- vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
+ msr |= MSR_IA32_APICBASE_BSP;
+ kvm_set_apic_base(vcpu, msr);
fx_init(vcpu);
@@ -1797,7 +1799,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct
kvm_run *kvm_run)
return 1;
case 8:
vcpu_load_rsp_rip(vcpu);
- vcpu->regs[reg] = vcpu->cr8;
+ vcpu->regs[reg] = get_cr8(vcpu);
vcpu_put_rsp_rip(vcpu);
skip_emulated_instruction(vcpu);
return 1;
@@ -1894,8 +1896,8 @@ static void post_kvm_run_save(struct kvm_vcpu
*vcpu,
struct kvm_run *kvm_run)
{
kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) !=
0;
- kvm_run->cr8 = vcpu->cr8;
- kvm_run->apic_base = vcpu->apic_base;
+ kvm_run->cr8 = get_cr8(vcpu);
+ kvm_run->apic_base = kvm_get_apic_base(vcpu);
kvm_run->ready_for_interrupt_injection =
(vcpu->interrupt_window_open &&
vcpu->irq_summary ==
0);
}
@@ -2057,7 +2059,6 @@ static int vmx_vcpu_run(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
preempted:
if (vcpu->guest_debug.enabled)
kvm_guest_debug_pre(vcpu);
-
again:
vmx_save_host_state(vcpu);
kvm_load_guest_fpu(vcpu);
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 979cb3d..e7a2e96 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -41,7 +41,7 @@ struct kvm_memory_alias {
__u64 target_phys_addr;
};
-/* for KVM_SET_IRQ_LEVEL */
+/* for KVM_IRQ_LINE */
struct kvm_irq_level {
/*
* ACPI gsi notion of irq.
@@ -285,7 +285,8 @@ struct kvm_signal_mask {
/*
* Extension capability list.
*/
-#define KVM_CAP_PIC 0
+#define KVM_CAP_IRQCHIP 0
+#define KVM_CAP_HLT 1
/*
* ioctls for VM fds
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: apic+ioapiuc patch
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C2514E-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2007-07-13 12:51 ` Avi Kivity
[not found] ` <4697754D.7000003-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
0 siblings, 1 reply; 20+ messages in thread
From: Avi Kivity @ 2007-07-13 12:51 UTC (permalink / raw)
To: Dong, Eddie; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Dong, Eddie wrote:
> Avi:
> Per our discussion, we will only support all user level irqchip
> or all kernel level irqchip.
> Here is the patch against lapic2 that passed RHEL5U test. Please give
> comments.
>
>
> thx,eddie
>
>
General comments:
- please split into a hlt patch, lapic patch, and ioapic patch. the
last patch can enable the irqchip capability, but intermediate results
have to be compilable.
- document files that were taken from Xen or qemu; specify what
revision was used as base
More comments below.
>
>
>
> diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
> index 952dff3..b29651b 100644
> --- a/drivers/kvm/Makefile
> +++ b/drivers/kvm/Makefile
> @@ -2,7 +2,7 @@
> # Makefile for Kernel-based Virtual Machine module
> #
>
> -kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o
> +kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o kvm_irq.o lapic.o
> ioapic.o
>
irq.c was renamed to kvm_irq.c? why?
> +
> +static void ioapic_inj_irq(struct kvm_ioapic *ioapic,
> + struct kvm_lapic *target,
> + u8 vector, u8 trig_mode, u8 delivery_mode)
> +{
> + ioapic_debug("irq %d trig %d deliv %d", vector, trig_mode,
> + delivery_mode);
> +
> + ASSERT((delivery_mode == dest_Fixed) ||
> + (delivery_mode == dest_LowestPrio));
> +
> + if (kvm_apic_set_irq(target, vector, trig_mode))
> + kvm_vcpu_kick(target->vcpu);
> +}
>
Put kvm_vcpu_kick() into kvm_apic_set_irq() so that callers need not do
that themselves.
> +
> + switch (delivery_mode) {
> + case dest_LowestPrio:
>
Wierd constant. How about IOAPIC_DEST_LOWEST_PRIO?
> + target =
> + kvm_apic_round_robin(ioapic->kvm, vector,
> deliver_bitmask);
> + if (target != NULL) {
> + ioapic_inj_irq(ioapic, target, vector,
> + trig_mode, delivery_mode);
> + } else {
> + ioapic_debug("null round robin: "
> + "mask=%x vector=%x
> delivery_mode=%x",
> + deliver_bitmask, vector,
> dest_LowestPrio);
> + }
>
Unnecessary {}.
> +
> +static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector)
> +{
> + int i;
> +
> + for (i = 0; i < IOAPIC_NUM_PINS; i++)
> + if (ioapic->redirtbl[i].fields.vector == vector)
> + return i;
> +
> + return -1;
> +}
>
blank line.
> +void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
> +{
> + struct kvm_ioapic *ioapic = kvm->vioapic;
> + union ioapic_redir_entry *ent;
> + int gsi;
> +
> + gsi = get_eoi_gsi(ioapic, vector);
> + if (gsi == -1) {
> + printk(KERN_WARNING "Can't find redir item for %d
> EOI\n",
> + vector);
> + return;
> + }
> +
> + ent = &ioapic->redirtbl[gsi];
> + ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
> +
> + ent->fields.remote_irr = 0;
> + if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) {
> + ioapic_deliver(ioapic, gsi);
> + }
>
excess braces.
> +
> +}
> +
> +static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr)
> +{
> + struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
> +
> + return ((addr >= ioapic->base_address &&
> + (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
> +}
> +
> +static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr,
> int len,
> + void *val)
> +{
> + struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
> + u32 result;
> +
> + ioapic_debug("addr %lx", (unsigned long)addr);
> + ASSERT(!(addr & 0xf)); /* check alignment */
> +
> + addr &= 0xff;
> +
> + switch (addr) {
> + case IOAPIC_REG_SELECT:
> + result = ioapic->ioregsel;
> + break;
> +
> + case IOAPIC_REG_WINDOW:
> + result = ioapic_read_indirect(ioapic, addr, len);
> + break;
> +
> + default:
> + result = 0;
> + break;
> + }
> + switch (len) {
> + case 1:
> + case 2:
> + case 4:
> + case 8:
> + memcpy(val, (char *)&result, len);
>
If len == 8, you're copying a bit of kernel stack into the guest. While
it's hardly a security hole, we'd better not do that.
> +static void vcpu_kick_intr(void *info)
> +{
> +#ifdef DEBUG
> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
> + printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
> +#endif
> +}
> +
> +void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
> +{
> + int ipi_pcpu = vcpu->cpu;
> +
> + if (vcpu->guest_mode)
> + smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu,
> 0, 0);
> +}
>
What if it's in hlt state?
> +
> diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h
> index a6b3869..4b1b4b7 100644
> --- a/drivers/kvm/irq.h
> +++ b/drivers/kvm/irq.h
> @@ -26,12 +26,11 @@
>
> typedef void irq_request_func(void *opaque, int level);
>
> -struct kvm_pic;
> struct kvm_pic_state {
> - u8 last_irr; /* edge detection */
> - u8 irr; /* interrupt request register */
> - u8 imr; /* interrupt mask register */
> - u8 isr; /* interrupt service register */
> + u8 last_irr; /* edge detection */
> + u8 irr; /* interrupt request register */
> + u8 imr; /* interrupt mask register */
> + u8 isr; /* interrupt service register */
> u8 priority_add; /* highest irq priority */
> u8 irq_base;
> u8 read_reg_select;
> @@ -48,7 +47,7 @@ struct kvm_pic_state {
> };
>
> struct kvm_pic {
> - struct kvm_pic_state pics[2]; /* 0 is master pic, 1 is slave pic
> */
> + struct kvm_pic_state pics[2]; /* 0 is master pic, 1 is slave
> pic */
> irq_request_func *irq_request;
> void *irq_request_opaque;
> int output; /* intr from master PIC */
>
Please separate the pic changes so I can fold them into the existing pic
patch.
> +
> +struct kvm_ioapic {
> + struct kvm_io_device dev;
> + unsigned long base_address;
> + struct kvm *kvm;
> + u32 ioregsel;
> + u32 id;
> + u32 irr;
> + union ioapic_redir_entry {
> + u64 bits;
> + struct {
> + u8 vector;
> + u8 delivery_mode:3;
> + u8 dest_mode:1;
> + u8 delivery_status:1;
> + u8 polarity:1;
> + u8 remote_irr:1;
> + u8 trig_mode:1;
> + u8 mask:1;
> + u8 reserve:7;
> + u8 reserved[4];
> + u8 dest_id;
> + } fields;
> + } redirtbl[IOAPIC_NUM_PINS];
> +};
>
Which lock protects this?
> +
> +struct kvm_lapic {
> + spinlock_t lock; /* TODO: need? */
>
I think not. Maybe when we have msi support?
> +
> +#ifdef DEBUG
> +#define ASSERT(x)
> \
> + if (!(x)) {
> \
> + printk(KERN_EMERG "assertion failed %s: %d: %s\n",
> \
> + __FILE__, __LINE__, #x);
> \
> + BUG();
> \
> + }
>
Wrap in a do { } while (0) to avoid surprises.
> #endif
> diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
> index f1a6773..a886ba9 100644
> --- a/drivers/kvm/kvm.h
> +++ b/drivers/kvm/kvm.h
> @@ -334,15 +334,13 @@ struct kvm_vcpu {
> };
> struct mutex mutex;
> int cpu;
> - int launched;
> + char vcpu_id;
>
There is already a vcpu_id in kvm.git...
> + char launched;
>
???
>
> +unsigned long get_cr8(struct kvm_vcpu *vcpu)
> +{
> + if (irqchip_in_kernel(vcpu->kvm))
> + return kvm_lapic_get_cr8(vcpu);
> + else
> + return vcpu->cr8;
> +}
> +EXPORT_SYMBOL_GPL(get_cr8);
>
How about keep vcpu->cr8 even with kernel lapic? then we don't need this.
> +
> +u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
> +{
> + if (irqchip_in_kernel(vcpu->kvm))
> + return vcpu->apic->base_msr;
> + else
> + return vcpu->apic_base;
> +}
> +EXPORT_SYMBOL_GPL(kvm_get_apic_base);
>
ditto.
> +
> +#define VEC_POS(v) ((v) & (32 - 1))
> +#define REG_POS(v) (((v) >> 5) << 4)
> +#define apic_test_and_set_vector(vec, bitmap) \
> + test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec))
> +#define apic_test_and_clear_vector(vec, bitmap) \
> + test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec))
> +#define apic_set_vector(vec, bitmap) \
> + set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec))
> +#define apic_clear_vector(vec, bitmap) \
> + clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec))
> +
> +#define apic_hw_enabled(apic) ((apic)->base_msr &
> MSR_IA32_APICBASE_ENABLE)
> +#define apic_sw_enabled(apic) (!((apic)->status & APIC_SW_DISABLE))
> +#define apic_enabled(apic) (apic_sw_enabled(apic) && \
> + apic_hw_enabled(apic))
>
These would be better as inline functions (type checking, etc.)
> +
> +#define KVM_APIC_ID(apic) \
> + (GET_APIC_ID(apic_get_reg(apic, APIC_ID)))
> +
> +#define apic_lvt_enabled(apic, lvt_type) \
> + (!(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED))
> +
> +#define apic_lvt_vector(apic, lvt_type) \
> + (apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK)
> +
> +#define apic_lvt_dm(apic, lvt_type) \
> + (apic_get_reg(apic, lvt_type) & APIC_MODE_MASK)
> +
> +#define apic_lvtt_period(apic) \
> + (apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC)
>
more functions
Where are the state save/restore? they can be added later, so long as
the capability is enabled only after everything is working.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: apic+ioapiuc patch
[not found] ` <4697754D.7000003-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-07-17 3:37 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C7272B-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
0 siblings, 1 reply; 20+ messages in thread
From: Dong, Eddie @ 2007-07-17 3:37 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Avi:
Most of the comments are fixed, but need to have a double check
on several point of them. Can u have a look?
Eddie
>> +
>> + switch (delivery_mode) {
>> + case dest_LowestPrio:
>>
>
>Wierd constant. How about IOAPIC_DEST_LOWEST_PRIO?
dest_LowestPrio is defined in native Linux asm-i386/io_apic.h &
asm-x86_64/io_apic.h. Do u want to add new definition?
>
>> +static void vcpu_kick_intr(void *info)
>> +{
>> +#ifdef DEBUG
>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
>> + printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
>> +#endif
>> +}
>> +
>> +void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
>> +{
>> + int ipi_pcpu = vcpu->cpu;
>> +
>> + if (vcpu->guest_mode)
>> + smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu,
>> 0, 0);
>> +}
>>
>
>What if it's in hlt state?
If guest is in hlt time, vcpu->guest_mode=0. (VM Exit always clear this
one).
BTW, even we give a redunadant IPI, it is just a minor performance issue
only.
>> +
>> +struct kvm_ioapic {
>> + struct kvm_io_device dev;
>> + unsigned long base_address;
>> + struct kvm *kvm;
>> + u32 ioregsel;
>> + u32 id;
>> + u32 irr;
>> + union ioapic_redir_entry {
>> + u64 bits;
>> + struct {
>> + u8 vector;
>> + u8 delivery_mode:3;
>> + u8 dest_mode:1;
>> + u8 delivery_status:1;
>> + u8 polarity:1;
>> + u8 remote_irr:1;
>> + u8 trig_mode:1;
>> + u8 mask:1;
>> + u8 reserve:7;
>> + u8 reserved[4];
>> + u8 dest_id;
>> + } fields;
>> + } redirtbl[IOAPIC_NUM_PINS];
>> +};
>>
>
>Which lock protects this?
kvm->lock.
When guest do ioapic ops, it is in shadow page fault handler,
and can take kvm->lock for page fault.
If asynchronize Qemu do ioapic ops, it will take this lock too.
>
>> +
>> +struct kvm_lapic {
>> + spinlock_t lock; /* TODO: need? */
>>
>
>I think not. Maybe when we have msi support?
I want to use kvm->lock for lapic too in future. But leave as it is now.
The key thing is to cancel hrtimer when VP migrates.
>> --- a/drivers/kvm/kvm.h
>> +++ b/drivers/kvm/kvm.h
>> @@ -334,15 +334,13 @@ struct kvm_vcpu {
>> };
>> struct mutex mutex;
>> int cpu;
>> - int launched;
>> + char vcpu_id;
>>
>
>There is already a vcpu_id in kvm.git...
>
>> + char launched;
>>
>
>
>???
I saw you added vcpu_id in main steam, can u pull this to lapic2 too?
So I don't need to add redundantly.
>
>>
>> +unsigned long get_cr8(struct kvm_vcpu *vcpu)
>> +{
>> + if (irqchip_in_kernel(vcpu->kvm))
>> + return kvm_lapic_get_cr8(vcpu);
>> + else
>> + return vcpu->cr8;
>> +}
>> +EXPORT_SYMBOL_GPL(get_cr8);
>>
>
>How about keep vcpu->cr8 even with kernel lapic? then we
>don't need this.
We need to sync cr8 with vTPR, Are u suggesting to sync them every
VM_EXIT?
That means we sparse apic registers in different place and extra sync
issue.
I can seperate the patch as a preparation patch to wrap all cr8 access.
Which one is prefered?
>
>> +
>> +u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
>> +{
>> + if (irqchip_in_kernel(vcpu->kvm))
>> + return vcpu->apic->base_msr;
>> + else
>> + return vcpu->apic_base;
>> +}
>> +EXPORT_SYMBOL_GPL(kvm_get_apic_base);
>>
>
>ditto.
>
>Where are the state save/restore? they can be added later, so long as
>the capability is enabled only after everything is working.
>
this is only for branch check-in and we need to stay in branch for one
more week.
Eddie
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: apic+ioapiuc patch
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C7272B-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2007-07-17 6:46 ` Dor Laor
[not found] ` <64F9B87B6B770947A9F8391472E032160CC162E8-yEcIvxbTEBqsx+V+t5oei8rau4O3wl8o3fe8/T/H7NteoWH0uzbU5w@public.gmane.org>
2007-07-17 8:03 ` Avi Kivity
1 sibling, 1 reply; 20+ messages in thread
From: Dor Laor @ 2007-07-17 6:46 UTC (permalink / raw)
To: Dong, Eddie, Avi Kivity; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
>>> +static void vcpu_kick_intr(void *info)
>>> +{
>>> +#ifdef DEBUG
>>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
>>> + printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
>>> +#endif
>>> +}
>>> +
>>> +void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
>>> +{
>>> + int ipi_pcpu = vcpu->cpu;
>>> +
>>> + if (vcpu->guest_mode)
>>> + smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu,
>>> 0, 0);
>>> +}
>>>
>>
>>What if it's in hlt state?
>
>If guest is in hlt time, vcpu->guest_mode=0. (VM Exit always clear this
>one).
>BTW, even we give a redunadant IPI, it is just a minor performance
issue
>only.
I think that Avi was looking for the call to
wake_up_interruptible(&vcpu->wq);
Maybe it's hard to realize the apic code path where you call it.
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: apic+ioapiuc patch
[not found] ` <64F9B87B6B770947A9F8391472E032160CC162E8-yEcIvxbTEBqsx+V+t5oei8rau4O3wl8o3fe8/T/H7NteoWH0uzbU5w@public.gmane.org>
@ 2007-07-17 7:36 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C72915-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
0 siblings, 1 reply; 20+ messages in thread
From: Dong, Eddie @ 2007-07-17 7:36 UTC (permalink / raw)
To: Dor Laor, Avi Kivity; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
>>
>>If guest is in hlt time, vcpu->guest_mode=0. (VM Exit always
>clear this
>>one).
>>BTW, even we give a redunadant IPI, it is just a minor performance
>issue
>>only.
>
>I think that Avi was looking for the call to
>wake_up_interruptible(&vcpu->wq);
>Maybe it's hard to realize the apic code path where you call it.
>
O, yes, that is should be in hlt emulation code.
Eddie
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: apic+ioapiuc patch
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C7272B-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-07-17 6:46 ` Dor Laor
@ 2007-07-17 8:03 ` Avi Kivity
[not found] ` <469C77D1.6010003-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
1 sibling, 1 reply; 20+ messages in thread
From: Avi Kivity @ 2007-07-17 8:03 UTC (permalink / raw)
To: Dong, Eddie; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Dong, Eddie wrote:
> Avi:
> Most of the comments are fixed, but need to have a double check
> on several point of them. Can u have a look?
> Eddie
>
>
>
>
>>> +
>>> + switch (delivery_mode) {
>>> + case dest_LowestPrio:
>>>
>>>
>> Wierd constant. How about IOAPIC_DEST_LOWEST_PRIO?
>>
>
> dest_LowestPrio is defined in native Linux asm-i386/io_apic.h &
> asm-x86_64/io_apic.h. Do u want to add new definition?
>
>
No, wierd constants from Linux are okay.
>>> +
>>> +struct kvm_ioapic {
>>> + struct kvm_io_device dev;
>>> + unsigned long base_address;
>>> + struct kvm *kvm;
>>> + u32 ioregsel;
>>> + u32 id;
>>> + u32 irr;
>>> + union ioapic_redir_entry {
>>> + u64 bits;
>>> + struct {
>>> + u8 vector;
>>> + u8 delivery_mode:3;
>>> + u8 dest_mode:1;
>>> + u8 delivery_status:1;
>>> + u8 polarity:1;
>>> + u8 remote_irr:1;
>>> + u8 trig_mode:1;
>>> + u8 mask:1;
>>> + u8 reserve:7;
>>> + u8 reserved[4];
>>> + u8 dest_id;
>>> + } fields;
>>> + } redirtbl[IOAPIC_NUM_PINS];
>>> +};
>>>
>>>
>> Which lock protects this?
>>
>
> kvm->lock.
> When guest do ioapic ops, it is in shadow page fault handler,
> and can take kvm->lock for page fault.
> If asynchronize Qemu do ioapic ops, it will take this lock too.
>
>
>
Okay.
>>> +
>>> +struct kvm_lapic {
>>> + spinlock_t lock; /* TODO: need? */
>>>
>>>
>> I think not. Maybe when we have msi support?
>>
>
> I want to use kvm->lock for lapic too in future. But leave as it is now.
> The key thing is to cancel hrtimer when VP migrates.
>
>
Okay.
>>> --- a/drivers/kvm/kvm.h
>>> +++ b/drivers/kvm/kvm.h
>>> @@ -334,15 +334,13 @@ struct kvm_vcpu {
>>> };
>>> struct mutex mutex;
>>> int cpu;
>>> - int launched;
>>> + char vcpu_id;
>>>
>>>
>> There is already a vcpu_id in kvm.git...
>>
>>
>>> + char launched;
>>>
>>>
>> ???
>>
>
> I saw you added vcpu_id in main steam, can u pull this to lapic2 too?
> So I don't need to add redundantly.
>
>
Rebased and pushed.
>>>
>>> +unsigned long get_cr8(struct kvm_vcpu *vcpu)
>>> +{
>>> + if (irqchip_in_kernel(vcpu->kvm))
>>> + return kvm_lapic_get_cr8(vcpu);
>>> + else
>>> + return vcpu->cr8;
>>> +}
>>> +EXPORT_SYMBOL_GPL(get_cr8);
>>>
>>>
>> How about keep vcpu->cr8 even with kernel lapic? then we
>> don't need this.
>>
>
> We need to sync cr8 with vTPR, Are u suggesting to sync them every
> VM_EXIT?
> That means we sparse apic registers in different place and extra sync
> issue.
> I can seperate the patch as a preparation patch to wrap all cr8 access.
> Which one is prefered?
>
A separate patch, please.
>
>>> +
>>> +u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
>>> +{
>>> + if (irqchip_in_kernel(vcpu->kvm))
>>> + return vcpu->apic->base_msr;
>>> + else
>>> + return vcpu->apic_base;
>>> +}
>>> +EXPORT_SYMBOL_GPL(kvm_get_apic_base);
>>>
>>>
>> ditto.
>>
>> Where are the state save/restore? they can be added later, so long as
>> the capability is enabled only after everything is working.
>>
>>
> this is only for branch check-in and we need to stay in branch for one
> more week.
Sure.
--
error compiling committee.c: too many arguments to function
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: apic+ioapiuc patch
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C72915-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2007-07-17 8:04 ` Avi Kivity
0 siblings, 0 replies; 20+ messages in thread
From: Avi Kivity @ 2007-07-17 8:04 UTC (permalink / raw)
To: Dong, Eddie; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Dong, Eddie wrote:
>
>
>>> If guest is in hlt time, vcpu->guest_mode=0. (VM Exit always
>>>
>> clear this
>>
>>> one).
>>> BTW, even we give a redunadant IPI, it is just a minor performance
>>>
>> issue
>>
>>> only.
>>>
>> I think that Avi was looking for the call to
>> wake_up_interruptible(&vcpu->wq);
>> Maybe it's hard to realize the apic code path where you call it.
>>
>>
> O, yes, that is should be in hlt emulation code.
>
That depends on the order of patches... if hlt goes in first, this needs
to be fixed. (I don't mind the order here really)
--
error compiling committee.c: too many arguments to function
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: apic+ioapiuc patch
[not found] ` <469C77D1.6010003-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-07-17 15:15 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C72AAE-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
0 siblings, 1 reply; 20+ messages in thread
From: Dong, Eddie @ 2007-07-17 15:15 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
[-- Attachment #1: Type: text/plain, Size: 7271 bytes --]
>>>>
>>>> +unsigned long get_cr8(struct kvm_vcpu *vcpu)
>>>> +{
>>>> + if (irqchip_in_kernel(vcpu->kvm))
>>>> + return kvm_lapic_get_cr8(vcpu);
>>>> + else
>>>> + return vcpu->cr8;
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(get_cr8);
>>>>
>>>>
>>> How about keep vcpu->cr8 even with kernel lapic? then we don't
>>> need this.
>>>
>>
>> We need to sync cr8 with vTPR, Are u suggesting to sync them every
>> VM_EXIT? That means we sparse apic registers in different place and
>> extra sync issue. I can seperate the patch as a preparation patch to
>> wrap all cr8 access. Which one is prefered?
>>
>
> A separate patch, please.
>
This patch is to wrap APIC base register and CR8 operation which can
provide a unique API for user level irqchip and kernel irqchip.
This is a preparation of merging lapic/ioapic patch.
against 0dfb860def58bfb2daa000af490ed1986373fea5 / lapic2 merged patch.
Signed-off-by: Yaozu (Eddie) Dong <eddie.dong-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index f02fcda..caa7c97 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -573,6 +573,9 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long
cr0);
void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0);
void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0);
void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0);
+unsigned long get_cr8(struct kvm_vcpu *vcpu);
+u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
+void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
void lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 34c3572..b40a315 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -625,7 +625,7 @@ EXPORT_SYMBOL_GPL(set_cr3);
void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
{
- if ( cr8 & CR8_RESEVED_BITS) {
+ if (cr8 & CR8_RESEVED_BITS) {
printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n",
cr8);
inject_gp(vcpu);
return;
@@ -634,6 +634,24 @@ void set_cr8(struct kvm_vcpu *vcpu, unsigned long
cr8)
}
EXPORT_SYMBOL_GPL(set_cr8);
+unsigned long get_cr8(struct kvm_vcpu *vcpu)
+{
+ return vcpu->cr8;
+}
+EXPORT_SYMBOL_GPL(get_cr8);
+
+u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
+{
+ return vcpu->apic_base;
+}
+EXPORT_SYMBOL_GPL(kvm_get_apic_base);
+
+void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
+{
+ vcpu->apic_base = data;
+}
+EXPORT_SYMBOL_GPL(kvm_set_apic_base);
+
void fx_init(struct kvm_vcpu *vcpu)
{
struct __attribute__ ((__packed__)) fx_image_s {
@@ -1508,7 +1526,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32
msr, u64 *pdata)
data = 3;
break;
case MSR_IA32_APICBASE:
- data = vcpu->apic_base;
+ data = kvm_get_apic_base(vcpu);
break;
case MSR_IA32_MISC_ENABLE:
data = vcpu->ia32_misc_enable_msr;
@@ -1586,7 +1604,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32
msr, u64 data)
case 0x200 ... 0x2ff: /* MTRRs */
break;
case MSR_IA32_APICBASE:
- vcpu->apic_base = data;
+ kvm_set_apic_base(vcpu, data);
break;
case MSR_IA32_MISC_ENABLE:
vcpu->ia32_misc_enable_msr = data;
@@ -1902,7 +1920,7 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu
*vcpu, struct kvm_run *kvm_run)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
/* re-sync apic's tpr */
- vcpu->cr8 = kvm_run->cr8;
+ set_cr8(vcpu, kvm_run->cr8);
if (vcpu->pio.cur_count) {
r = complete_pio(vcpu);
@@ -2051,9 +2069,9 @@ static int kvm_vcpu_ioctl_get_sregs(struct
kvm_vcpu *vcpu,
sregs->cr2 = vcpu->cr2;
sregs->cr3 = vcpu->cr3;
sregs->cr4 = vcpu->cr4;
- sregs->cr8 = vcpu->cr8;
+ sregs->cr8 = get_cr8(vcpu);
sregs->efer = vcpu->shadow_efer;
- sregs->apic_base = vcpu->apic_base;
+ sregs->apic_base = kvm_get_apic_base(vcpu);
memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
sizeof sregs->interrupt_bitmap);
@@ -2089,13 +2107,13 @@ static int kvm_vcpu_ioctl_set_sregs(struct
kvm_vcpu *vcpu,
mmu_reset_needed |= vcpu->cr3 != sregs->cr3;
vcpu->cr3 = sregs->cr3;
- vcpu->cr8 = sregs->cr8;
+ set_cr8(vcpu, sregs->cr8);
mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;
#ifdef CONFIG_X86_64
kvm_arch_ops->set_efer(vcpu, sregs->efer);
#endif
- vcpu->apic_base = sregs->apic_base;
+ kvm_set_apic_base(vcpu, sregs->apic_base);
kvm_arch_ops->decache_cr4_guest_bits(vcpu);
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 5c058fa..8d1ad61 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -459,6 +459,12 @@ static void init_sys_seg(struct vmcb_seg *seg,
uint32_t type)
static int svm_vcpu_setup(struct kvm_vcpu *vcpu)
{
+ u64 msr;
+
+ msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+ if (vcpu->vcpu_id == 0)
+ msr |= MSR_IA32_APICBASE_BSP;
+ kvm_set_apic_base(vcpu, msr);
return 0;
}
@@ -589,9 +595,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
fx_init(vcpu);
vcpu->fpu_active = 1;
- vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
- if (vcpu->vcpu_id == 0)
- vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
return 0;
@@ -1435,8 +1438,8 @@ static void post_kvm_run_save(struct kvm_vcpu
*vcpu,
kvm_run->ready_for_interrupt_injection =
(vcpu->interrupt_window_open &&
vcpu->irq_summary ==
0);
kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags &
X86_EFLAGS_IF) != 0;
- kvm_run->cr8 = vcpu->cr8;
- kvm_run->apic_base = vcpu->apic_base;
+ kvm_run->cr8 = get_cr8(vcpu);
+ kvm_run->apic_base = kvm_get_apic_base(vcpu);
}
/*
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index f3e7818..663894d 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1243,6 +1243,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
int i;
int ret = 0;
unsigned long kvm_vmx_return;
+ u64 msr;
if (!init_rmode_tss(vcpu->kvm)) {
ret = -ENOMEM;
@@ -1251,10 +1252,11 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
memset(vcpu->regs, 0, sizeof(vcpu->regs));
vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val();
- vcpu->cr8 = 0;
- vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+ set_cr8(vcpu, 0);
+ msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
if (vcpu->vcpu_id == 0)
- vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
+ msr |= MSR_IA32_APICBASE_BSP;
+ kvm_set_apic_base(vcpu, msr);
fx_init(vcpu);
@@ -1793,7 +1795,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct
kvm_run *kvm_run)
return 1;
case 8:
vcpu_load_rsp_rip(vcpu);
- vcpu->regs[reg] = vcpu->cr8;
+ vcpu->regs[reg] = get_cr8(vcpu);
vcpu_put_rsp_rip(vcpu);
skip_emulated_instruction(vcpu);
return 1;
@@ -1890,8 +1892,8 @@ static void post_kvm_run_save(struct kvm_vcpu
*vcpu,
struct kvm_run *kvm_run)
{
kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) !=
0;
- kvm_run->cr8 = vcpu->cr8;
- kvm_run->apic_base = vcpu->apic_base;
+ kvm_run->cr8 = get_cr8(vcpu);
+ kvm_run->apic_base = kvm_get_apic_base(vcpu);
kvm_run->ready_for_interrupt_injection =
(vcpu->interrupt_window_open &&
vcpu->irq_summary ==
0);
}
[-- Attachment #2: cr8.patch --]
[-- Type: application/octet-stream, Size: 6050 bytes --]
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index f02fcda..caa7c97 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -573,6 +573,9 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0);
void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0);
void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0);
+unsigned long get_cr8(struct kvm_vcpu *vcpu);
+u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
+void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
void lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 34c3572..b40a315 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -625,7 +625,7 @@ EXPORT_SYMBOL_GPL(set_cr3);
void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
{
- if ( cr8 & CR8_RESEVED_BITS) {
+ if (cr8 & CR8_RESEVED_BITS) {
printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
inject_gp(vcpu);
return;
@@ -634,6 +634,24 @@ void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
}
EXPORT_SYMBOL_GPL(set_cr8);
+unsigned long get_cr8(struct kvm_vcpu *vcpu)
+{
+ return vcpu->cr8;
+}
+EXPORT_SYMBOL_GPL(get_cr8);
+
+u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
+{
+ return vcpu->apic_base;
+}
+EXPORT_SYMBOL_GPL(kvm_get_apic_base);
+
+void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
+{
+ vcpu->apic_base = data;
+}
+EXPORT_SYMBOL_GPL(kvm_set_apic_base);
+
void fx_init(struct kvm_vcpu *vcpu)
{
struct __attribute__ ((__packed__)) fx_image_s {
@@ -1508,7 +1526,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
data = 3;
break;
case MSR_IA32_APICBASE:
- data = vcpu->apic_base;
+ data = kvm_get_apic_base(vcpu);
break;
case MSR_IA32_MISC_ENABLE:
data = vcpu->ia32_misc_enable_msr;
@@ -1586,7 +1604,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
case 0x200 ... 0x2ff: /* MTRRs */
break;
case MSR_IA32_APICBASE:
- vcpu->apic_base = data;
+ kvm_set_apic_base(vcpu, data);
break;
case MSR_IA32_MISC_ENABLE:
vcpu->ia32_misc_enable_msr = data;
@@ -1902,7 +1920,7 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
/* re-sync apic's tpr */
- vcpu->cr8 = kvm_run->cr8;
+ set_cr8(vcpu, kvm_run->cr8);
if (vcpu->pio.cur_count) {
r = complete_pio(vcpu);
@@ -2051,9 +2069,9 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
sregs->cr2 = vcpu->cr2;
sregs->cr3 = vcpu->cr3;
sregs->cr4 = vcpu->cr4;
- sregs->cr8 = vcpu->cr8;
+ sregs->cr8 = get_cr8(vcpu);
sregs->efer = vcpu->shadow_efer;
- sregs->apic_base = vcpu->apic_base;
+ sregs->apic_base = kvm_get_apic_base(vcpu);
memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
sizeof sregs->interrupt_bitmap);
@@ -2089,13 +2107,13 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
mmu_reset_needed |= vcpu->cr3 != sregs->cr3;
vcpu->cr3 = sregs->cr3;
- vcpu->cr8 = sregs->cr8;
+ set_cr8(vcpu, sregs->cr8);
mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;
#ifdef CONFIG_X86_64
kvm_arch_ops->set_efer(vcpu, sregs->efer);
#endif
- vcpu->apic_base = sregs->apic_base;
+ kvm_set_apic_base(vcpu, sregs->apic_base);
kvm_arch_ops->decache_cr4_guest_bits(vcpu);
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 5c058fa..8d1ad61 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -459,6 +459,12 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
static int svm_vcpu_setup(struct kvm_vcpu *vcpu)
{
+ u64 msr;
+
+ msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+ if (vcpu->vcpu_id == 0)
+ msr |= MSR_IA32_APICBASE_BSP;
+ kvm_set_apic_base(vcpu, msr);
return 0;
}
@@ -589,9 +595,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
fx_init(vcpu);
vcpu->fpu_active = 1;
- vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
- if (vcpu->vcpu_id == 0)
- vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
return 0;
@@ -1435,8 +1438,8 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
vcpu->irq_summary == 0);
kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0;
- kvm_run->cr8 = vcpu->cr8;
- kvm_run->apic_base = vcpu->apic_base;
+ kvm_run->cr8 = get_cr8(vcpu);
+ kvm_run->apic_base = kvm_get_apic_base(vcpu);
}
/*
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index f3e7818..663894d 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1243,6 +1243,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
int i;
int ret = 0;
unsigned long kvm_vmx_return;
+ u64 msr;
if (!init_rmode_tss(vcpu->kvm)) {
ret = -ENOMEM;
@@ -1251,10 +1252,11 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
memset(vcpu->regs, 0, sizeof(vcpu->regs));
vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val();
- vcpu->cr8 = 0;
- vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+ set_cr8(vcpu, 0);
+ msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
if (vcpu->vcpu_id == 0)
- vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
+ msr |= MSR_IA32_APICBASE_BSP;
+ kvm_set_apic_base(vcpu, msr);
fx_init(vcpu);
@@ -1793,7 +1795,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
case 8:
vcpu_load_rsp_rip(vcpu);
- vcpu->regs[reg] = vcpu->cr8;
+ vcpu->regs[reg] = get_cr8(vcpu);
vcpu_put_rsp_rip(vcpu);
skip_emulated_instruction(vcpu);
return 1;
@@ -1890,8 +1892,8 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0;
- kvm_run->cr8 = vcpu->cr8;
- kvm_run->apic_base = vcpu->apic_base;
+ kvm_run->cr8 = get_cr8(vcpu);
+ kvm_run->apic_base = kvm_get_apic_base(vcpu);
kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
vcpu->irq_summary == 0);
}
[-- Attachment #3: Type: text/plain, Size: 286 bytes --]
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
[-- Attachment #4: Type: text/plain, Size: 186 bytes --]
_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: apic+ioapiuc patch
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C72AAE-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2007-07-18 2:24 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C72CD4-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-07-18 2:27 ` Dong, Eddie
2007-07-18 8:36 ` Avi Kivity
2 siblings, 1 reply; 20+ messages in thread
From: Dong, Eddie @ 2007-07-18 2:24 UTC (permalink / raw)
To: Dong, Eddie, Avi Kivity; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
[-- Attachment #1: Type: text/plain, Size: 39908 bytes --]
Avi:
This is the cleaned and branch check-in ready candidate patch
for lapic based on previous cr8 patch.
It needs the later ioapic patch to fully function.
thx,eddie
diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
index 952dff3..3bf7276 100644
--- a/drivers/kvm/Makefile
+++ b/drivers/kvm/Makefile
@@ -2,7 +2,7 @@
# Makefile for Kernel-based Virtual Machine module
#
-kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/irq.c b/drivers/kvm/irq.c
index b08005c..0b4430a 100644
--- a/drivers/kvm/irq.c
+++ b/drivers/kvm/irq.c
@@ -30,14 +30,13 @@
*/
int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
{
- struct kvm_pic *s = pic_irqchip(v->kvm);
-
- if (s->output) /* PIC */
- return 1;
- /*
- * TODO: APIC
- */
- return 0;
+ struct kvm_pic *s;
+
+ if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */
+ s = pic_irqchip(v->kvm); /* PIC */
+ return s->output;
+ }
+ return 1;
}
EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
@@ -46,16 +45,36 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
*/
int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
{
- struct kvm_pic *s = pic_irqchip(v->kvm);
+ struct kvm_pic *s;
int vector;
- s->output = 0;
- vector = kvm_pic_read_irq(s);
- if (vector != -1)
- return vector;
- /*
- * TODO: APIC
- */
- return -1;
+ vector = kvm_get_apic_interrupt(v); /* APIC */
+ if (vector == -1) {
+ s = pic_irqchip(v->kvm);
+ s->output = 0; /* PIC */
+ vector = kvm_pic_read_irq(s);
+ }
+ return vector;
}
EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
+
+static void vcpu_kick_intr(void *info)
+{
+#ifdef DEBUG
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
+ printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
+#endif
+}
+
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+ int ipi_pcpu = vcpu->cpu;
+
+ if (vcpu->guest_mode)
+ smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu,
0, 0);
+}
+
+void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
+{
+ /* TODO: for kernel IOAPIC */
+}
diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h
index a6b3869..24f8b31 100644
--- a/drivers/kvm/irq.h
+++ b/drivers/kvm/irq.h
@@ -26,7 +26,6 @@
typedef void irq_request_func(void *opaque, int level);
-struct kvm_pic;
struct kvm_pic_state {
u8 last_irr; /* edge detection */
u8 irr; /* interrupt request register */
@@ -61,4 +60,46 @@ int kvm_pic_read_irq(struct kvm_pic *s);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
+struct kvm_lapic {
+ spinlock_t lock; /* TODO for revise */
+ u32 status;
+ u64 base_msr;
+ unsigned long base_address;
+ struct kvm_io_device dev;
+ struct {
+ unsigned long pending;
+ u32 divide_count;
+ ktime_t last_update;
+ struct hrtimer dev;
+ } timer;
+ struct kvm_vcpu *vcpu;
+ struct page *regs_page;
+ void *regs;
+};
+
+#ifdef DEBUG
+#define ASSERT(x)
\
+do {
\
+ if (!(x)) {
\
+ printk(KERN_EMERG "assertion failed %s: %d: %s\n",
\
+ __FILE__, __LINE__, #x);
\
+ BUG();
\
+ }
\
+} while (0)
+#else
+#define ASSERT(x) do { } while (0)
+#endif
+
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
+int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
+int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
+int kvm_create_lapic(struct kvm_vcpu *vcpu);
+void kvm_free_apic(struct kvm_lapic *apic);
+u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
+void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
+void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
+u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
+void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
+void kvm_ioapic_update_eoi(struct kvm *kvm, int vector);
+
#endif
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 1d1ee4f..99f9440 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -335,15 +335,13 @@ struct kvm_vcpu {
};
struct mutex mutex;
int cpu;
- int launched;
+ char vcpu_id;
+ char launched;
u64 host_tsc;
struct kvm_run *run;
int interrupt_window_open;
int guest_mode;
unsigned long requests;
- unsigned long irq_summary; /* bit vector: 1 per word in
irq_pending */
-#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
- unsigned long irq_pending[NR_IRQ_WORDS];
unsigned long regs[NR_VCPU_REGS]; /* for rsp:
vcpu_load_rsp_rip() */
unsigned long rip; /* needs vcpu_load_rsp_rip() */
@@ -354,10 +352,23 @@ struct kvm_vcpu {
struct page *para_state_page;
gpa_t hypercall_gpa;
unsigned long cr4;
- unsigned long cr8;
u64 pdptrs[4]; /* pae */
u64 shadow_efer;
- u64 apic_base;
+ union {
+ struct { /* user irqchip context */
+ /*
+ * bit vector: 1 per word in irq_pending
+ */
+ unsigned long irq_summary;
+#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
+ unsigned long irq_pending[NR_IRQ_WORDS];
+ unsigned long cr8;
+ u64 apic_base;
+ };
+ struct { /* kernel irqchip context */
+ struct kvm_lapic *apic;
+ };
+ };
u64 ia32_misc_enable_msr;
int nmsrs;
int save_nmsrs;
@@ -602,6 +613,7 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long
cr0);
void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0);
void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0);
void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0);
+unsigned long get_cr8(struct kvm_vcpu *vcpu);
void lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 98d9f32..987dbb2 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -325,6 +325,7 @@ static struct kvm *kvm_create_vm(void)
mutex_init(&vcpu->mutex);
vcpu->cpu = -1;
+ vcpu->vcpu_id = i;
vcpu->kvm = kvm;
vcpu->mmu.root_hpa = INVALID_PAGE;
init_waitqueue_head(&vcpu->wq);
@@ -397,6 +398,7 @@ static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
vcpu_load(vcpu);
kvm_mmu_destroy(vcpu);
+ kvm_free_apic(vcpu->apic);
vcpu_put(vcpu);
kvm_arch_ops->vcpu_free(vcpu);
free_page((unsigned long)vcpu->run);
@@ -632,15 +634,46 @@ EXPORT_SYMBOL_GPL(set_cr3);
void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
{
- if ( cr8 & CR8_RESEVED_BITS) {
+ if (cr8 & CR8_RESEVED_BITS) {
printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n",
cr8);
inject_gp(vcpu);
return;
}
- vcpu->cr8 = cr8;
+ if (irqchip_in_kernel(vcpu->kvm))
+ kvm_lapic_set_tpr(vcpu, cr8);
+ else
+ vcpu->cr8 = cr8;
}
EXPORT_SYMBOL_GPL(set_cr8);
+unsigned long get_cr8(struct kvm_vcpu *vcpu)
+{
+ if (irqchip_in_kernel(vcpu->kvm))
+ return kvm_lapic_get_cr8(vcpu);
+ else
+ return vcpu->cr8;
+}
+EXPORT_SYMBOL_GPL(get_cr8);
+
+u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
+{
+ if (irqchip_in_kernel(vcpu->kvm))
+ return vcpu->apic->base_msr;
+ else
+ return vcpu->apic_base;
+}
+EXPORT_SYMBOL_GPL(kvm_get_apic_base);
+
+void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
+{
+ /* TODO: reserve bits check */
+ if (irqchip_in_kernel(vcpu->kvm))
+ kvm_lapic_set_base(vcpu, data);
+ else
+ vcpu->apic_base = data;
+}
+EXPORT_SYMBOL_GPL(kvm_set_apic_base);
+
void fx_init(struct kvm_vcpu *vcpu)
{
struct __attribute__ ((__packed__)) fx_image_s {
@@ -1055,15 +1088,31 @@ static int emulator_write_std(unsigned long
addr,
return X86EMUL_UNHANDLEABLE;
}
+/*
+ * Only apic need an MMIO device hook, so shortcut now..
+ */
+static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu
*vcpu,
+ gpa_t addr)
+{
+ struct kvm_io_device *dev;
+
+ if (vcpu->apic) {
+ dev = &vcpu->apic->dev;
+ if (dev->in_range(dev, addr))
+ return dev;
+ }
+ return NULL;
+}
+
static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
gpa_t addr)
{
- /*
- * Note that its important to have this wrapper function because
- * in the very near future we will be checking for MMIOs against
- * the LAPIC as well as the general MMIO bus
- */
- return kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
+ struct kvm_io_device *dev;
+
+ dev = vcpu_find_pervcpu_dev(vcpu, addr);
+ if (dev == NULL)
+ dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
+ return dev;
}
static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
@@ -1574,7 +1623,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32
msr, u64 *pdata)
data = 3;
break;
case MSR_IA32_APICBASE:
- data = vcpu->apic_base;
+ data = kvm_get_apic_base(vcpu);
break;
case MSR_IA32_MISC_ENABLE:
data = vcpu->ia32_misc_enable_msr;
@@ -1652,7 +1701,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32
msr, u64 data)
case 0x200 ... 0x2ff: /* MTRRs */
break;
case MSR_IA32_APICBASE:
- vcpu->apic_base = data;
+ kvm_set_apic_base(vcpu, data);
break;
case MSR_IA32_MISC_ENABLE:
vcpu->ia32_misc_enable_msr = data;
@@ -1942,7 +1991,7 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu
*vcpu, struct kvm_run *kvm_run)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
/* re-sync apic's tpr */
- vcpu->cr8 = kvm_run->cr8;
+ set_cr8(vcpu, kvm_run->cr8);
if (vcpu->pio.cur_count) {
r = complete_pio(vcpu);
@@ -2091,9 +2140,9 @@ static int kvm_vcpu_ioctl_get_sregs(struct
kvm_vcpu *vcpu,
sregs->cr2 = vcpu->cr2;
sregs->cr3 = vcpu->cr3;
sregs->cr4 = vcpu->cr4;
- sregs->cr8 = vcpu->cr8;
+ sregs->cr8 = get_cr8(vcpu);
sregs->efer = vcpu->shadow_efer;
- sregs->apic_base = vcpu->apic_base;
+ sregs->apic_base = kvm_get_apic_base(vcpu);
memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
sizeof sregs->interrupt_bitmap);
@@ -2129,13 +2178,13 @@ static int kvm_vcpu_ioctl_set_sregs(struct
kvm_vcpu *vcpu,
mmu_reset_needed |= vcpu->cr3 != sregs->cr3;
vcpu->cr3 = sregs->cr3;
- vcpu->cr8 = sregs->cr8;
+ set_cr8(vcpu, sregs->cr8);
mmu_reset_needed |= vcpu->shadow_efer != sregs->efer;
#ifdef CONFIG_X86_64
kvm_arch_ops->set_efer(vcpu, sregs->efer);
#endif
- vcpu->apic_base = sregs->apic_base;
+ kvm_set_apic_base(vcpu, sregs->apic_base);
kvm_arch_ops->decache_cr4_guest_bits(vcpu);
@@ -2449,6 +2498,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm
*kvm, int n)
if (r < 0)
goto out_free_vcpus;
+ if (irqchip_in_kernel(vcpu->kvm)) {
+ r = kvm_create_lapic(vcpu);
+ if (r < 0)
+ goto out_free_vcpus;
+ }
kvm_arch_ops->vcpu_load(vcpu);
r = kvm_mmu_setup(vcpu);
if (r >= 0)
@@ -2821,8 +2875,6 @@ static long kvm_vm_ioctl(struct file *filp,
kvm->vpic = kvm_create_pic(kvm);
if (kvm->vpic)
r = 0;
- else
- goto out;
break;
case KVM_IRQ_LINE: {
struct kvm_irq_level irq_event;
@@ -2952,7 +3004,7 @@ static long kvm_dev_ioctl(struct file *filp,
int ext = (long)argp;
switch (ext) {
- case KVM_CAP_PIC:
+ case KVM_CAP_IRQCHIP:
case KVM_CAP_HLT:
r = 1;
break;
diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c
new file mode 100644
index 0000000..7408b75
--- /dev/null
+++ b/drivers/kvm/lapic.c
@@ -0,0 +1,971 @@
+
+/*
+ * Local APIC virtualization
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright (C) 2007 Novell
+ * Copyright (C) 2007 Intel
+ *
+ * Authors:
+ * Dor Laor <dor.laor-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
+ * Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
+ * Yaozu (Eddie) Dong <eddie.dong-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
+ *
+ * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "kvm.h"
+#include <linux/kvm.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/smp.h>
+#include <linux/hrtimer.h>
+#include <linux/io.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/apicdef.h>
+#include "irq.h"
+
+#define PRId64 "d"
+#define PRIx64 "llx"
+#define PRIu64 "u"
+#define PRIo64 "o"
+
+#define APIC_BUS_CYCLE_NS 1
+
+/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
+#define apic_debug(fmt, arg...)
+
+#define APIC_LVT_NUM 6
+/* 14 is the version for Xeon and Pentium 8.4.8*/
+#define APIC_VERSION (0x14UL | ((APIC_LVT_NUM - 1) <<
16))
+#define LAPIC_MMIO_LENGTH (1 << 12)
+/* followed define is not in apicdef.h */
+#define APIC_SHORT_MASK 0xc0000
+#define APIC_DEST_NOSHORT 0x0
+#define APIC_DEST_MASK 0x800
+#define _APIC_SW_DISABLE 0
+#define APIC_SW_DISABLE (1 << _APIC_SW_DISABLE)
+#define MAX_APIC_VECTOR 256
+
+#define VEC_POS(v) ((v) & (32 - 1))
+#define REG_POS(v) (((v) >> 5) << 4)
+static inline int apic_test_and_set_vector(int vec, void *bitmap)
+{
+ return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
+static inline int apic_test_and_clear_vector(int vec, void *bitmap)
+{
+ return test_and_clear_bit(VEC_POS(vec), (bitmap) +
REG_POS(vec));
+}
+
+static inline void apic_set_vector(int vec, void *bitmap)
+{
+ set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
+static inline void apic_clear_vector(int vec, void *bitmap)
+{
+ clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
+static inline int apic_hw_enabled(struct kvm_lapic *apic)
+{
+ return (apic)->base_msr & MSR_IA32_APICBASE_ENABLE;
+}
+
+static inline int apic_sw_enabled(struct kvm_lapic *apic)
+{
+ return !((apic)->status & APIC_SW_DISABLE);
+}
+
+static inline int apic_enabled(struct kvm_lapic *apic)
+{
+ return apic_sw_enabled(apic) && apic_hw_enabled(apic);
+}
+
+#define LVT_MASK \
+ (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
+
+#define LINT_MASK \
+ LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
+ APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER
+
+static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off)
+{
+ return *((u32 *) (apic->regs + reg_off));
+}
+
+static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off,
u32 val)
+{
+ *((u32 *) (apic->regs + reg_off)) = val;
+}
+
+static inline int kvm_apic_id(struct kvm_lapic *apic)
+{
+ return GET_APIC_ID(apic_get_reg(apic, APIC_ID));
+}
+
+static inline int apic_lvt_enabled(struct kvm_lapic *apic, int
lvt_type)
+{
+ return !(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
+}
+
+static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
+{
+ return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
+}
+
+static inline int apic_lvtt_period(struct kvm_lapic *apic)
+{
+ return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC;
+}
+
+static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
+ LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */
+ LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
+ LVT_MASK | APIC_MODE_MASK, /* LVTPC */
+ LINT_MASK, LINT_MASK, /* LVT0-1 */
+ LVT_MASK /* LVTERR */
+};
+
+static int find_highest_vector(void *bitmap)
+{
+ u32 *word = bitmap;
+ int word_offset = MAX_APIC_VECTOR >> 5;
+
+ while ((word_offset != 0) && (word[(--word_offset) << 2] == 0))
+ continue;
+
+ if (likely(!word_offset && !word[0]))
+ return -1;
+ else
+ return fls(word[word_offset << 2]) - 1 + (word_offset <<
5);
+}
+
+static inline int apic_test_and_set_irr(int vec, struct kvm_lapic
*apic)
+{
+ return apic_test_and_set_vector(vec, apic->regs + APIC_IRR);
+}
+
+static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
+{
+ apic_clear_vector(vec, apic->regs + APIC_IRR);
+}
+
+static inline int apic_find_highest_irr(struct kvm_lapic *apic)
+{
+ int result;
+
+ result = find_highest_vector(apic->regs + APIC_IRR);
+ ASSERT(result == -1 || result >= 16);
+
+ return result;
+}
+
+int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig)
+{
+ if (!apic_test_and_set_irr(vec, apic)) {
+ /* a new pending irq is set in IRR */
+ if (trig)
+ apic_set_vector(vec, apic->regs + APIC_TMR);
+ else
+ apic_clear_vector(vec, apic->regs + APIC_TMR);
+ kvm_vcpu_kick(apic->vcpu);
+ return 1;
+ }
+ return 0;
+}
+
+static inline int apic_find_highest_isr(struct kvm_lapic *apic)
+{
+ int result;
+
+ result = find_highest_vector(apic->regs + APIC_ISR);
+ ASSERT(result == -1 || result >= 16);
+
+ return result;
+}
+
+static void apic_update_ppr(struct kvm_lapic *apic)
+{
+ u32 tpr, isrv, ppr;
+ int isr;
+
+ tpr = apic_get_reg(apic, APIC_TASKPRI);
+ isr = apic_find_highest_isr(apic);
+ isrv = (isr != -1) ? isr : 0;
+
+ if ((tpr & 0xf0) >= (isrv & 0xf0))
+ ppr = tpr & 0xff;
+ else
+ ppr = isrv & 0xf0;
+
+ apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
+ apic, ppr, isr, isrv);
+
+ apic_set_reg(apic, APIC_PROCPRI, ppr);
+}
+
+static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
+{
+ apic_set_reg(apic, APIC_TASKPRI, tpr);
+ apic_update_ppr(apic);
+}
+
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
+{
+ return kvm_apic_id(apic) == dest;
+}
+
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
+{
+ int result = 0;
+ u8 logical_id;
+
+ logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR));
+
+ switch (apic_get_reg(apic, APIC_DFR)) {
+ case APIC_DFR_FLAT:
+ if (logical_id & mda)
+ result = 1;
+ break;
+ case APIC_DFR_CLUSTER:
+ if (((logical_id >> 4) == (mda >> 0x4))
+ && (logical_id & mda & 0xf))
+ result = 1;
+ break;
+ default:
+ printk(KERN_WARNING "Bad DFR vcpu %d: %08x\n",
+ apic->vcpu->vcpu_id, apic_get_reg(apic,
APIC_DFR));
+ break;
+ }
+
+ return result;
+}
+
+static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic
*source,
+ int short_hand, int dest, int dest_mode)
+{
+ int result = 0;
+ struct kvm_lapic *target = vcpu->apic;
+
+ apic_debug("target %p, source %p, dest 0x%x, "
+ "dest_mode 0x%x, short_hand 0x%x",
+ target, source, dest, dest_mode, short_hand);
+
+ if (!target)
+ return 0;
+ switch (short_hand) {
+ case APIC_DEST_NOSHORT:
+ if (dest_mode == 0) {
+ /* Physical mode. */
+ if ((dest == 0xFF) || (dest ==
kvm_apic_id(target)))
+ result = 1;
+ } else
+ /* Logical mode. */
+ result = kvm_apic_match_logical_addr(target,
dest);
+ break;
+ case APIC_DEST_SELF:
+ if (target == source)
+ result = 1;
+ break;
+ case APIC_DEST_ALLINC:
+ result = 1;
+ break;
+ case APIC_DEST_ALLBUT:
+ if (target != source)
+ result = 1;
+ break;
+ default:
+ printk(KERN_WARNING "Bad dest shorthand value %x\n",
+ short_hand);
+ break;
+ }
+
+ return result;
+}
+
+/*
+ * Add a pending IRQ into lapic.
+ * Return 1 if successfully added and 0 if discarded.
+ */
+static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
+ int vector, int level, int trig_mode)
+{
+ int result = 0;
+
+ switch (delivery_mode) {
+ case APIC_DM_FIXED:
+ case APIC_DM_LOWEST:
+ /* FIXME add logic for vcpu on reset */
+ if (unlikely(!apic_enabled(apic)))
+ break;
+
+ if (apic_test_and_set_irr(vector, apic) && trig_mode) {
+ apic_debug("level trig mode repeatedly for
vector %d",
+ vector);
+ break;
+ }
+
+ if (trig_mode) {
+ apic_debug("level trig mode for vector %d",
vector);
+ apic_set_vector(vector, apic->regs + APIC_TMR);
+ } else
+ apic_clear_vector(vector, apic->regs +
APIC_TMR);
+
+ kvm_vcpu_kick(apic->vcpu);
+
+ result = 1;
+ break;
+
+ case APIC_DM_REMRD:
+ printk(KERN_DEBUG "Ignoring delivery mode 3\n");
+ break;
+
+ case APIC_DM_SMI:
+ printk(KERN_DEBUG "Ignoring guest SMI\n");
+ break;
+ case APIC_DM_NMI:
+ printk(KERN_DEBUG "Ignoring guest NMI\n");
+ break;
+
+ case APIC_DM_INIT:
+ printk(KERN_DEBUG "Ignoring guest INIT\n");
+ break;
+
+ case APIC_DM_STARTUP:
+ printk(KERN_DEBUG "Ignoring guest STARTUP\n");
+ break;
+
+ default:
+ printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
+ delivery_mode);
+ break;
+ }
+ return result;
+}
+
+static inline int apic_accept_irq(struct kvm_lapic *apic, int
delivery_mode,
+ int vector, int level, int trig_mode)
+{
+ int result = 0;
+
+ spin_lock_bh(&apic->lock);
+ result = __apic_accept_irq(apic, delivery_mode,
+ vector, level, trig_mode);
+ spin_unlock_bh(&apic->lock);
+ return result;
+}
+
+struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector, u32
bitmap)
+{
+ int vcpu_id;
+
+ /* TODO for real round robin */
+ vcpu_id = fls(bitmap) - 1;
+ if (vcpu_id < 0)
+ printk(KERN_DEBUG "vcpu not ready for
apic_round_robin\n");
+ return kvm->vcpus[vcpu_id].apic;
+}
+
+static void apic_set_eoi(struct kvm_lapic *apic)
+{
+ int vector = apic_find_highest_isr(apic);
+
+ /*
+ * Not every write EOI will has corresponding ISR,
+ * one example is when Kernel check timer on setup_IO_APIC
+ */
+ if (vector == -1)
+ return;
+
+ apic_clear_vector(vector, apic->regs + APIC_ISR);
+ apic_update_ppr(apic);
+
+ if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR))
+ kvm_ioapic_update_eoi(apic->vcpu->kvm, vector);
+}
+
+static void apic_send_ipi(struct kvm_lapic *apic)
+{
+ u32 icr_low = apic_get_reg(apic, APIC_ICR);
+ u32 icr_high = apic_get_reg(apic, APIC_ICR2);
+
+ unsigned int dest = GET_APIC_DEST_FIELD(icr_high);
+ unsigned int short_hand = icr_low & APIC_SHORT_MASK;
+ unsigned int trig_mode = icr_low & APIC_INT_LEVELTRIG;
+ unsigned int level = icr_low & APIC_INT_ASSERT;
+ unsigned int dest_mode = icr_low & APIC_DEST_MASK;
+ unsigned int delivery_mode = icr_low & APIC_MODE_MASK;
+ unsigned int vector = icr_low & APIC_VECTOR_MASK;
+
+ struct kvm_lapic *target;
+ struct kvm_vcpu *vcpu;
+ u32 lpr_map = 0;
+ int i;
+
+ apic_debug("icr_high 0x%x, icr_low 0x%x, "
+ "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level
0x%x, "
+ "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
+ icr_high, icr_low, short_hand, dest,
+ trig_mode, level, dest_mode, delivery_mode, vector);
+
+ for (i = 0; i < KVM_MAX_VCPUS; i++) {
+ vcpu = &apic->vcpu->kvm->vcpus[i];
+
+ if (apic_match_dest(vcpu, apic, short_hand, dest,
dest_mode)) {
+ if (delivery_mode == APIC_DM_LOWEST)
+ set_bit(vcpu->vcpu_id, &lpr_map);
+ else
+ __apic_accept_irq(vcpu->apic,
delivery_mode,
+ vector, level,
trig_mode);
+ }
+ }
+
+ if (delivery_mode == APIC_DM_LOWEST) {
+ target = kvm_apic_round_robin(vcpu->kvm, vector,
lpr_map);
+ if (target != NULL)
+ __apic_accept_irq(target, delivery_mode,
+ vector, level, trig_mode);
+ }
+}
+
+static u32 apic_get_tmcct(struct kvm_lapic *apic)
+{
+ u32 counter_passed;
+ ktime_t passed, now = apic->timer.dev.base->get_time();
+ u32 tmcct = apic_get_reg(apic, APIC_TMCCT);
+
+ ASSERT(apic != NULL);
+
+ if (unlikely(ktime_to_ns(now) <=
+ ktime_to_ns(apic->timer.last_update))) {
+ /* Wrap around */
+ passed = ktime_add(( {
+ (ktime_t) {
+ .tv64 = KTIME_MAX -
+ (apic->timer.last_update).tv64};}
+ ), now);
+ apic_debug("time elapsed\n");
+ } else
+ passed = ktime_sub(now, apic->timer.last_update);
+
+ counter_passed = ktime_to_ns(passed) /
+ (APIC_BUS_CYCLE_NS * apic->timer.divide_count);
+ tmcct -= counter_passed;
+
+ if (tmcct <= 0) {
+ if (unlikely(!apic_lvtt_period(apic)))
+ tmcct = 0;
+ else
+ do {
+ tmcct += apic_get_reg(apic, APIC_TMICT);
+ } while (tmcct <= 0);
+ }
+
+ apic->timer.last_update = now;
+ apic_set_reg(apic, APIC_TMCCT, tmcct);
+
+ return tmcct;
+}
+
+static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
+{
+ u32 val = 0;
+
+ if (offset >= LAPIC_MMIO_LENGTH)
+ return 0;
+
+ switch (offset) {
+ case APIC_ARBPRI:
+ printk(KERN_WARNING "Access APIC ARBPRI register "
+ "which is for P6\n");
+ break;
+
+ case APIC_TMCCT: /* Timer CCR */
+ val = apic_get_tmcct(apic);
+ break;
+
+ default:
+ val = apic_get_reg(apic, offset);
+ break;
+ }
+
+ return val;
+}
+
+static void apic_mmio_read(struct kvm_io_device *this,
+ gpa_t address, int len, void *data)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
+ unsigned int offset = address - apic->base_address;
+ unsigned char alignment = offset & 0xf;
+ u32 result;
+
+ if ((alignment + len) > 4) {
+ printk(KERN_ERR "KVM_APIC_READ: alignment error %lx %d",
+ (unsigned long)address, len);
+ return;
+ }
+ spin_lock_bh(&apic->lock);
+ result = __apic_read(apic, offset & ~0xf);
+ spin_unlock_bh(&apic->lock);
+
+ switch (len) {
+ case 1:
+ case 2:
+ case 4:
+ memcpy(data, (char *)&result + alignment, len);
+ break;
+ default:
+ printk(KERN_ERR "Local APIC read with len = %x, "
+ "should be 1,2, or 4 instead\n", len);
+ break;
+ }
+}
+
+static void apic_mmio_write(struct kvm_io_device *this,
+ gpa_t address, int len, const void *data)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
+ unsigned int offset = address - apic->base_address;
+ unsigned char alignment = offset & 0xf;
+ u32 val;
+
+ /*
+ * APIC register must be aligned on 128-bits boundary.
+ * 32/64/128 bits registers must be accessed thru 32 bits.
+ * Refer SDM 8.4.1
+ */
+ if (len != 4 || alignment) {
+ if (printk_ratelimit())
+ printk(KERN_ERR "apic write: bad size=%d %lx\n",
+ len, (long)address);
+ return;
+ }
+
+ val = *(u32 *) data;
+
+ /* too common printing */
+ if (offset != APIC_EOI)
+ apic_debug("%s: offset 0x%x with length 0x%x, and value
is "
+ "0x%x\n", __FUNCTION__, offset, len, val);
+
+ offset &= 0xff0;
+
+ spin_lock_bh(&apic->lock);
+ switch (offset) {
+ case APIC_ID: /* Local APIC ID */
+ apic_set_reg(apic, APIC_ID, val);
+ break;
+
+ case APIC_TASKPRI:
+ apic_set_tpr(apic, val & 0xff);
+ break;
+
+ case APIC_EOI:
+ apic_set_eoi(apic);
+ break;
+
+ case APIC_LDR:
+ apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
+ break;
+
+ case APIC_DFR:
+ apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
+ break;
+
+ case APIC_SPIV:
+ apic_set_reg(apic, APIC_SPIV, val & 0x3ff);
+ if (!(val & APIC_SPIV_APIC_ENABLED)) {
+ int i;
+ u32 lvt_val;
+
+ apic->status |= APIC_SW_DISABLE;
+ for (i = 0; i < APIC_LVT_NUM; i++) {
+ lvt_val = apic_get_reg(apic,
+ APIC_LVTT + 0x10
* i);
+ apic_set_reg(apic, APIC_LVTT + 0x10 * i,
+ lvt_val | APIC_LVT_MASKED);
+ }
+
+ } else
+ apic->status &= ~APIC_SW_DISABLE;
+ break;
+
+ case APIC_ICR:
+ /* No delay here, so we always clear the pending bit */
+ apic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
+ apic_send_ipi(apic);
+ break;
+
+ case APIC_ICR2:
+ apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
+ break;
+
+ case APIC_LVTT:
+ case APIC_LVTTHMR:
+ case APIC_LVTPC:
+ case APIC_LVT0:
+ case APIC_LVT1:
+ case APIC_LVTERR:
+ /* TODO: Check vector */
+ if (!apic_sw_enabled(apic))
+ val |= APIC_LVT_MASKED;
+
+ val &= apic_lvt_mask[(offset - APIC_LVTT) >> 4];
+ apic_set_reg(apic, offset, val);
+
+ break;
+
+ case APIC_TMICT:
+ {
+ ktime_t now = apic->timer.dev.base->get_time();
+ u32 offset;
+
+ apic_set_reg(apic, APIC_TMICT, val);
+ apic_set_reg(apic, APIC_TMCCT, val);
+ apic->timer.last_update = now;
+ offset =
+ APIC_BUS_CYCLE_NS * apic->timer.divide_count
* val;
+
+ /* Make sure the lock ordering is coherent */
+ spin_unlock_bh(&apic->lock);
+ hrtimer_cancel(&apic->timer.dev);
+ hrtimer_start(&apic->timer.dev,
+ ktime_add_ns(now, offset),
+ HRTIMER_MODE_ABS);
+
+ apic_debug("%s: bus cycle is %" PRId64 "ns, now
0x%016"
+ PRIx64 ", "
+ "timer initial count 0x%x, offset
0x%x, "
+ "expire @ 0x%016" PRIx64 ".\n",
__FUNCTION__,
+ APIC_BUS_CYCLE_NS, ktime_to_ns(now),
+ apic_get_reg(apic, APIC_TMICT),
offset,
+ ktime_to_ns(ktime_add_ns(now,
offset)));
+ }
+ return;
+
+ case APIC_TDCR:
+ {
+ unsigned int tmp1, tmp2;
+
+ if (val & 4)
+ printk(KERN_ERR "KVM_WRITE:TDCR %x\n",
val);
+ tmp1 = val & 0xf;
+ tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
+ apic->timer.divide_count = 0x1 << (tmp2 & 0x7);
+
+ apic_set_reg(apic, APIC_TDCR, val);
+
+ apic_debug("timer divide count is 0x%x\n",
+ apic->timer.divide_count);
+ }
+ break;
+
+ default:
+ apic_debug("Local APIC Write to read-only register
%x\n",
+ offset);
+ break;
+ }
+
+ spin_unlock_bh(&apic->lock);
+}
+
+static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
+ int ret = 0;
+
+ spin_lock_bh(&apic->lock);
+
+ if (apic_hw_enabled(apic) &&
+ (addr >= apic->base_address) &&
+ (addr < (apic->base_address + LAPIC_MMIO_LENGTH)))
+ ret = 1;
+
+ spin_unlock_bh(&apic->lock);
+
+ return ret;
+}
+
+void kvm_free_apic(struct kvm_lapic *apic)
+{
+ if (!apic)
+ return;
+ spin_lock_bh(&apic->lock);
+
+ hrtimer_cancel(&apic->timer.dev);
+
+ if (apic->regs_page) {
+ __free_page(apic->regs_page);
+ apic->regs_page = 0;
+ }
+
+ spin_unlock_bh(&apic->lock);
+
+ kfree(apic);
+}
+
+/*
+
*----------------------------------------------------------------------
+ * LAPIC interface
+
*----------------------------------------------------------------------
+ */
+
+void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+
+ spin_lock_bh(&apic->lock);
+ apic_set_tpr(apic, ((cr8 & 0x0f) << 4));
+ spin_unlock_bh(&apic->lock);
+}
+
+u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+ u64 tpr;
+
+ spin_lock_bh(&apic->lock);
+ tpr = (u64) apic_get_reg(apic, APIC_TASKPRI);
+ spin_unlock_bh(&apic->lock);
+
+ return (tpr & 0xf0) >> 4;
+}
+
+void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+
+ spin_lock_bh(&apic->lock);
+ if (apic->vcpu->vcpu_id)
+ value &= ~MSR_IA32_APICBASE_BSP;
+
+ apic->base_msr = value;
+ apic->base_address = apic->base_msr & MSR_IA32_APICBASE_BASE;
+
+ /* with FSB delivery interrupt, we can restart APIC
functionality */
+ apic_debug("apic base msr is 0x%016" PRIx64 ", and base address
is "
+ "0x%lx.\n", apic->base_msr, apic->base_address);
+
+ spin_unlock_bh(&apic->lock);
+}
+
+u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+ u64 base;
+
+ spin_lock_bh(&apic->lock);
+ base = apic->base_msr;
+ spin_unlock_bh(&apic->lock);
+
+ return base;
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_get_base);
+
+static void lapic_reset(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic;
+ int i;
+
+ apic_debug("%s\n", __FUNCTION__);
+
+ ASSERT(vcpu);
+ apic = vcpu->apic;
+ ASSERT(apic != NULL);
+
+ /* Stop the timer in case it's a reset to an active apic */
+ hrtimer_cancel(&apic->timer.dev);
+
+ spin_lock_bh(&apic->lock);
+
+ apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
+ apic_set_reg(apic, APIC_LVR, APIC_VERSION);
+
+ for (i = 0; i < APIC_LVT_NUM; i++)
+ apic_set_reg(apic, APIC_LVTT + 0x10 * i,
APIC_LVT_MASKED);
+
+ apic_set_reg(apic, APIC_DFR, 0xffffffffU);
+ apic_set_reg(apic, APIC_SPIV, 0xff);
+ apic_set_reg(apic, APIC_TASKPRI, 0);
+ apic_set_reg(apic, APIC_LDR, 0);
+ apic_set_reg(apic, APIC_ESR, 0);
+ apic_set_reg(apic, APIC_ICR, 0);
+ apic_set_reg(apic, APIC_ICR2, 0);
+ apic_set_reg(apic, APIC_TDCR, 0);
+ apic_set_reg(apic, APIC_TMICT, 0);
+ apic_set_reg(apic, APIC_TMCCT, 0);
+ for (i = 0; i < 8; i++) {
+ apic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
+ apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
+ apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
+ }
+ apic->timer.divide_count = 0;
+ apic->timer.pending = 0;
+ apic->status = APIC_SW_DISABLE;
+ if (vcpu->vcpu_id == 0)
+ apic->base_msr |= MSR_IA32_APICBASE_BSP;
+ apic_update_ppr(apic);
+
+ spin_unlock_bh(&apic->lock);
+
+ apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
+ "0x%016" PRIx64 ", base_address=0x%0lx.\n",
__FUNCTION__,
+ vcpu, GET_APIC_ID(apic_get_reg(apic, APIC_ID)),
+ apic->base_msr, apic->base_address);
+}
+
+int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+ int ret = 0;
+
+ spin_lock_bh(&apic->lock);
+ ret = apic_enabled(apic);
+ spin_unlock_bh(&apic->lock);
+
+ return ret;
+}
+
+void *kvm_lapic_get_regs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+ return apic->regs;
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_get_regs);
+
+/*
+
*----------------------------------------------------------------------
+ * timer interface
+
*----------------------------------------------------------------------
+ */
+static int __apic_timer_fn(struct kvm_lapic *apic)
+{
+ u32 vector;
+ ktime_t now;
+ int result = 0;
+
+ if (unlikely(!apic_enabled(apic) ||
+ !apic_lvt_enabled(apic, APIC_LVTT))) {
+ apic_debug("%s: time interrupt although apic is down\n",
+ __FUNCTION__);
+ return 0;
+ }
+
+ vector = apic_lvt_vector(apic, APIC_LVTT);
+ now = apic->timer.dev.base->get_time();
+ apic->timer.last_update = now;
+ apic->timer.pending++;
+ __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0);
+
+ if (apic_lvtt_period(apic)) {
+ u32 offset;
+ u32 tmict = apic_get_reg(apic, APIC_TMICT);
+
+ apic_set_reg(apic, APIC_TMCCT, tmict);
+ offset = APIC_BUS_CYCLE_NS * apic->timer.divide_count *
tmict;
+
+ result = 1;
+ apic->timer.dev.expires = ktime_add_ns(now, offset);
+ } else {
+ apic_set_reg(apic, APIC_TMCCT, 0);
+ }
+
+ return result;
+}
+
+static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
+{
+ struct kvm_lapic *apic;
+ int restart_timer = 0;
+
+ apic = container_of(data, struct kvm_lapic, timer.dev);
+
+ spin_lock_bh(&apic->lock);
+ restart_timer = __apic_timer_fn(apic);
+ spin_unlock_bh(&apic->lock);
+
+ if (restart_timer)
+ return HRTIMER_RESTART;
+ else
+ return HRTIMER_NORESTART;
+}
+
+int kvm_create_lapic(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic;
+
+ ASSERT(vcpu != NULL);
+ apic_debug("apic_init %d\n", vcpu->vcpu_id);
+
+ apic = kzalloc(sizeof(*apic), GFP_KERNEL);
+ if (!apic)
+ goto nomem;
+
+ vcpu->apic = apic;
+ spin_lock_init(&apic->lock);
+
+ apic->regs_page = alloc_page(GFP_KERNEL);
+ if (apic->regs_page == NULL) {
+ printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
+ vcpu->vcpu_id);
+ goto nomem;
+ }
+ apic->regs = page_address(apic->regs_page);
+ memset(apic->regs, 0, PAGE_SIZE);
+ apic->vcpu = vcpu;
+
+ hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC,
HRTIMER_MODE_ABS);
+ apic->timer.dev.function = apic_timer_fn;
+ apic->base_address = APIC_DEFAULT_PHYS_BASE;
+ apic->base_msr = APIC_DEFAULT_PHYS_BASE;
+
+ lapic_reset(vcpu);
+ apic->dev.read = apic_mmio_read;
+ apic->dev.write = apic_mmio_write;
+ apic->dev.in_range = apic_mmio_range;
+ apic->dev.private = apic;
+
+ return 0;
+nomem:
+ kvm_free_apic(apic);
+ return -ENOMEM;
+}
+
+int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->apic;
+ int highest_irr;
+
+ if (!apic || !apic_enabled(apic))
+ return -1;
+
+ highest_irr = apic_find_highest_irr(apic);
+ if ((highest_irr == -1) ||
+ ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI)))
+ return -1;
+ return highest_irr;
+}
+
+int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
+{
+ int vector = kvm_apic_has_interrupt(vcpu);
+ struct kvm_lapic *apic = vcpu->apic;
+
+ if (vector == -1)
+ return -1;
+
+ apic_set_vector(vector, apic->regs + APIC_ISR);
+ apic_update_ppr(apic);
+ apic_clear_irr(vector, apic);
+ return vector;
+}
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 7150f05..a65a2b3 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -460,6 +460,12 @@ static void init_sys_seg(struct vmcb_seg *seg,
uint32_t type)
static int svm_vcpu_setup(struct kvm_vcpu *vcpu)
{
+ u64 msr;
+
+ msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+ if (vcpu == &vcpu->kvm->vcpus[0])
+ msr |= MSR_IA32_APICBASE_BSP;
+ kvm_set_apic_base(vcpu, msr);
return 0;
}
@@ -590,9 +596,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
fx_init(vcpu);
vcpu->fpu_active = 1;
- vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
- if (vcpu == &vcpu->kvm->vcpus[0])
- vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
return 0;
@@ -1476,8 +1479,8 @@ static void post_kvm_run_save(struct kvm_vcpu
*vcpu,
(vcpu->interrupt_window_open &&
vcpu->irq_summary == 0);
kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags &
X86_EFLAGS_IF) != 0;
- kvm_run->cr8 = vcpu->cr8;
- kvm_run->apic_base = vcpu->apic_base;
+ kvm_run->cr8 = get_cr8(vcpu);
+ kvm_run->apic_base = kvm_get_apic_base(vcpu);
}
/*
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index ece7f86..36d7134 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1243,6 +1243,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
int i;
int ret = 0;
unsigned long kvm_vmx_return;
+ u64 msr;
if (!init_rmode_tss(vcpu->kvm)) {
ret = -ENOMEM;
@@ -1251,10 +1252,11 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
memset(vcpu->regs, 0, sizeof(vcpu->regs));
vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val();
- vcpu->cr8 = 0;
- vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+ set_cr8(vcpu, 0);
+ msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
if (vcpu == &vcpu->kvm->vcpus[0])
- vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
+ msr |= MSR_IA32_APICBASE_BSP;
+ kvm_set_apic_base(vcpu, msr);
fx_init(vcpu);
@@ -1797,7 +1799,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct
kvm_run *kvm_run)
return 1;
case 8:
vcpu_load_rsp_rip(vcpu);
- vcpu->regs[reg] = vcpu->cr8;
+ vcpu->regs[reg] = get_cr8(vcpu);
vcpu_put_rsp_rip(vcpu);
skip_emulated_instruction(vcpu);
return 1;
@@ -1894,8 +1896,8 @@ static void post_kvm_run_save(struct kvm_vcpu
*vcpu,
struct kvm_run *kvm_run)
{
kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) !=
0;
- kvm_run->cr8 = vcpu->cr8;
- kvm_run->apic_base = vcpu->apic_base;
+ kvm_run->cr8 = get_cr8(vcpu);
+ kvm_run->apic_base = kvm_get_apic_base(vcpu);
if (irqchip_in_kernel(vcpu->kvm))
kvm_run->ready_for_interrupt_injection = 1;
else
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index ac1c4ee..5b96079 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -41,7 +41,7 @@ struct kvm_memory_alias {
__u64 target_phys_addr;
};
-/* for KVM_SET_IRQ_LEVEL */
+/* for KVM_IRQ_LINE */
struct kvm_irq_level {
/*
* ACPI gsi notion of irq.
@@ -285,7 +285,7 @@ struct kvm_signal_mask {
/*
* Extension capability list.
*/
-#define KVM_CAP_PIC 0
+#define KVM_CAP_IRQCHIP 0
#define KVM_CAP_HLT 1
/*
@@ -301,7 +301,7 @@ struct kvm_signal_mask {
#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct
kvm_memory_alias)
/* Device model IOC */
#define KVM_CREATE_PIC _IO(KVMIO, 0x60)
-#define KVM_IRQ_LINE _IO(KVMIO, 0x61)
+#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct
kvm_irq_level)
/*
* ioctls for vcpu fds
[-- Attachment #2: apic-30-4.patch --]
[-- Type: application/octet-stream, Size: 32727 bytes --]
Add kernel lapic
Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>
diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
index 952dff3..3bf7276 100644
--- a/drivers/kvm/Makefile
+++ b/drivers/kvm/Makefile
@@ -2,7 +2,7 @@
# Makefile for Kernel-based Virtual Machine module
#
-kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/irq.c b/drivers/kvm/irq.c
index b08005c..0b4430a 100644
--- a/drivers/kvm/irq.c
+++ b/drivers/kvm/irq.c
@@ -30,14 +30,13 @@
*/
int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
{
- struct kvm_pic *s = pic_irqchip(v->kvm);
-
- if (s->output) /* PIC */
- return 1;
- /*
- * TODO: APIC
- */
- return 0;
+ struct kvm_pic *s;
+
+ if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */
+ s = pic_irqchip(v->kvm); /* PIC */
+ return s->output;
+ }
+ return 1;
}
EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
@@ -46,16 +45,36 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
*/
int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
{
- struct kvm_pic *s = pic_irqchip(v->kvm);
+ struct kvm_pic *s;
int vector;
- s->output = 0;
- vector = kvm_pic_read_irq(s);
- if (vector != -1)
- return vector;
- /*
- * TODO: APIC
- */
- return -1;
+ vector = kvm_get_apic_interrupt(v); /* APIC */
+ if (vector == -1) {
+ s = pic_irqchip(v->kvm);
+ s->output = 0; /* PIC */
+ vector = kvm_pic_read_irq(s);
+ }
+ return vector;
}
EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
+
+static void vcpu_kick_intr(void *info)
+{
+#ifdef DEBUG
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
+ printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
+#endif
+}
+
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+ int ipi_pcpu = vcpu->cpu;
+
+ if (vcpu->guest_mode)
+ smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
+}
+
+void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
+{
+ /* TODO: for kernel IOAPIC */
+}
diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h
index a6b3869..24f8b31 100644
--- a/drivers/kvm/irq.h
+++ b/drivers/kvm/irq.h
@@ -26,7 +26,6 @@
typedef void irq_request_func(void *opaque, int level);
-struct kvm_pic;
struct kvm_pic_state {
u8 last_irr; /* edge detection */
u8 irr; /* interrupt request register */
@@ -61,4 +60,46 @@ int kvm_pic_read_irq(struct kvm_pic *s);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
+struct kvm_lapic {
+ spinlock_t lock; /* TODO for revise */
+ u32 status;
+ u64 base_msr;
+ unsigned long base_address;
+ struct kvm_io_device dev;
+ struct {
+ unsigned long pending;
+ u32 divide_count;
+ ktime_t last_update;
+ struct hrtimer dev;
+ } timer;
+ struct kvm_vcpu *vcpu;
+ struct page *regs_page;
+ void *regs;
+};
+
+#ifdef DEBUG
+#define ASSERT(x) \
+do { \
+ if (!(x)) { \
+ printk(KERN_EMERG "assertion failed %s: %d: %s\n", \
+ __FILE__, __LINE__, #x); \
+ BUG(); \
+ } \
+} while (0)
+#else
+#define ASSERT(x) do { } while (0)
+#endif
+
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
+int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
+int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
+int kvm_create_lapic(struct kvm_vcpu *vcpu);
+void kvm_free_apic(struct kvm_lapic *apic);
+u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
+void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
+void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
+u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
+void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
+void kvm_ioapic_update_eoi(struct kvm *kvm, int vector);
+
#endif
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 1e0d2a6..a5107b5 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -323,9 +323,6 @@ struct kvm_vcpu {
int interrupt_window_open;
int guest_mode;
unsigned long requests;
- unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
-#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
- unsigned long irq_pending[NR_IRQ_WORDS];
unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
unsigned long rip; /* needs vcpu_load_rsp_rip() */
@@ -336,10 +333,23 @@ struct kvm_vcpu {
struct page *para_state_page;
gpa_t hypercall_gpa;
unsigned long cr4;
- unsigned long cr8;
u64 pdptrs[4]; /* pae */
u64 shadow_efer;
- u64 apic_base;
+ union {
+ struct { /* user irqchip context */
+ /*
+ * bit vector: 1 per word in irq_pending
+ */
+ unsigned long irq_summary;
+#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
+ unsigned long irq_pending[NR_IRQ_WORDS];
+ unsigned long cr8;
+ u64 apic_base;
+ };
+ struct { /* kernel irqchip context */
+ struct kvm_lapic *apic;
+ };
+ };
u64 ia32_misc_enable_msr;
int nmsrs;
int save_nmsrs;
@@ -586,8 +596,6 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0);
void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0);
void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0);
unsigned long get_cr8(struct kvm_vcpu *vcpu);
-u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
-void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
void lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index ae1895a..0d7c3e6 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -383,6 +383,7 @@ static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
vcpu_load(vcpu);
kvm_mmu_destroy(vcpu);
+ kvm_free_apic(vcpu->apic);
vcpu_put(vcpu);
kvm_arch_ops->vcpu_free(vcpu);
free_page((unsigned long)vcpu->run);
@@ -632,25 +633,38 @@ void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
inject_gp(vcpu);
return;
}
- vcpu->cr8 = cr8;
+ if (irqchip_in_kernel(vcpu->kvm))
+ kvm_lapic_set_tpr(vcpu, cr8);
+ else
+ vcpu->cr8 = cr8;
}
EXPORT_SYMBOL_GPL(set_cr8);
unsigned long get_cr8(struct kvm_vcpu *vcpu)
{
- return vcpu->cr8;
+ if (irqchip_in_kernel(vcpu->kvm))
+ return kvm_lapic_get_cr8(vcpu);
+ else
+ return vcpu->cr8;
}
EXPORT_SYMBOL_GPL(get_cr8);
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
{
- return vcpu->apic_base;
+ if (irqchip_in_kernel(vcpu->kvm))
+ return vcpu->apic->base_msr;
+ else
+ return vcpu->apic_base;
}
EXPORT_SYMBOL_GPL(kvm_get_apic_base);
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
{
- vcpu->apic_base = data;
+ /* TODO: reserve bits check */
+ if (irqchip_in_kernel(vcpu->kvm))
+ kvm_lapic_set_base(vcpu, data);
+ else
+ vcpu->apic_base = data;
}
EXPORT_SYMBOL_GPL(kvm_set_apic_base);
@@ -1035,15 +1049,31 @@ static int emulator_write_std(unsigned long addr,
return X86EMUL_UNHANDLEABLE;
}
+/*
+ * Only apic need an MMIO device hook, so shortcut now..
+ */
+static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
+ gpa_t addr)
+{
+ struct kvm_io_device *dev;
+
+ if (vcpu->apic) {
+ dev = &vcpu->apic->dev;
+ if (dev->in_range(dev, addr))
+ return dev;
+ }
+ return NULL;
+}
+
static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
gpa_t addr)
{
- /*
- * Note that its important to have this wrapper function because
- * in the very near future we will be checking for MMIOs against
- * the LAPIC as well as the general MMIO bus
- */
- return kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
+ struct kvm_io_device *dev;
+
+ dev = vcpu_find_pervcpu_dev(vcpu, addr);
+ if (dev == NULL)
+ dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
+ return dev;
}
static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
@@ -2433,6 +2463,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
if (r < 0)
goto out_free_vcpus;
+ if (irqchip_in_kernel(vcpu->kvm)) {
+ r = kvm_create_lapic(vcpu);
+ if (r < 0)
+ goto out_free_vcpus;
+ }
kvm_arch_ops->vcpu_load(vcpu);
r = kvm_mmu_setup(vcpu);
if (r >= 0)
diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c
new file mode 100644
index 0000000..cf2af18
--- /dev/null
+++ b/drivers/kvm/lapic.c
@@ -0,0 +1,971 @@
+
+/*
+ * Local APIC virtualization
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright (C) 2007 Novell
+ * Copyright (C) 2007 Intel
+ *
+ * Authors:
+ * Dor Laor <dor.laor@qumranet.com>
+ * Gregory Haskins <ghaskins@novell.com>
+ * Yaozu (Eddie) Dong <eddie.dong@intel.com>
+ *
+ * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "kvm.h"
+#include <linux/kvm.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/smp.h>
+#include <linux/hrtimer.h>
+#include <linux/io.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/apicdef.h>
+#include "irq.h"
+
+#define PRId64 "d"
+#define PRIx64 "llx"
+#define PRIu64 "u"
+#define PRIo64 "o"
+
+#define APIC_BUS_CYCLE_NS 1
+
+/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
+#define apic_debug(fmt, arg...)
+
+#define APIC_LVT_NUM 6
+/* 14 is the version for Xeon and Pentium 8.4.8*/
+#define APIC_VERSION (0x14UL | ((APIC_LVT_NUM - 1) << 16))
+#define LAPIC_MMIO_LENGTH (1 << 12)
+/* followed define is not in apicdef.h */
+#define APIC_SHORT_MASK 0xc0000
+#define APIC_DEST_NOSHORT 0x0
+#define APIC_DEST_MASK 0x800
+#define _APIC_SW_DISABLE 0
+#define APIC_SW_DISABLE (1 << _APIC_SW_DISABLE)
+#define MAX_APIC_VECTOR 256
+
+#define VEC_POS(v) ((v) & (32 - 1))
+#define REG_POS(v) (((v) >> 5) << 4)
+static inline int apic_test_and_set_vector(int vec, void *bitmap)
+{
+ return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
+static inline int apic_test_and_clear_vector(int vec, void *bitmap)
+{
+ return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
+static inline void apic_set_vector(int vec, void *bitmap)
+{
+ set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
+static inline void apic_clear_vector(int vec, void *bitmap)
+{
+ clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
+static inline int apic_hw_enabled(struct kvm_lapic *apic)
+{
+ return (apic)->base_msr & MSR_IA32_APICBASE_ENABLE;
+}
+
+static inline int apic_sw_enabled(struct kvm_lapic *apic)
+{
+ return !((apic)->status & APIC_SW_DISABLE);
+}
+
+static inline int apic_enabled(struct kvm_lapic *apic)
+{
+ return apic_sw_enabled(apic) && apic_hw_enabled(apic);
+}
+
+#define LVT_MASK \
+ (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
+
+#define LINT_MASK \
+ (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
+ APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
+
+static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off)
+{
+ return *((u32 *) (apic->regs + reg_off));
+}
+
+static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
+{
+ *((u32 *) (apic->regs + reg_off)) = val;
+}
+
+static inline int kvm_apic_id(struct kvm_lapic *apic)
+{
+ return GET_APIC_ID(apic_get_reg(apic, APIC_ID));
+}
+
+static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
+{
+ return !(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
+}
+
+static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
+{
+ return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
+}
+
+static inline int apic_lvtt_period(struct kvm_lapic *apic)
+{
+ return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC;
+}
+
+static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
+ LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */
+ LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
+ LVT_MASK | APIC_MODE_MASK, /* LVTPC */
+ LINT_MASK, LINT_MASK, /* LVT0-1 */
+ LVT_MASK /* LVTERR */
+};
+
+static int find_highest_vector(void *bitmap)
+{
+ u32 *word = bitmap;
+ int word_offset = MAX_APIC_VECTOR >> 5;
+
+ while ((word_offset != 0) && (word[(--word_offset) << 2] == 0))
+ continue;
+
+ if (likely(!word_offset && !word[0]))
+ return -1;
+ else
+ return fls(word[word_offset << 2]) - 1 + (word_offset << 5);
+}
+
+static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
+{
+ return apic_test_and_set_vector(vec, apic->regs + APIC_IRR);
+}
+
+static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
+{
+ apic_clear_vector(vec, apic->regs + APIC_IRR);
+}
+
+static inline int apic_find_highest_irr(struct kvm_lapic *apic)
+{
+ int result;
+
+ result = find_highest_vector(apic->regs + APIC_IRR);
+ ASSERT(result == -1 || result >= 16);
+
+ return result;
+}
+
+int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig)
+{
+ if (!apic_test_and_set_irr(vec, apic)) {
+ /* a new pending irq is set in IRR */
+ if (trig)
+ apic_set_vector(vec, apic->regs + APIC_TMR);
+ else
+ apic_clear_vector(vec, apic->regs + APIC_TMR);
+ kvm_vcpu_kick(apic->vcpu);
+ return 1;
+ }
+ return 0;
+}
+
+static inline int apic_find_highest_isr(struct kvm_lapic *apic)
+{
+ int result;
+
+ result = find_highest_vector(apic->regs + APIC_ISR);
+ ASSERT(result == -1 || result >= 16);
+
+ return result;
+}
+
+static void apic_update_ppr(struct kvm_lapic *apic)
+{
+ u32 tpr, isrv, ppr;
+ int isr;
+
+ tpr = apic_get_reg(apic, APIC_TASKPRI);
+ isr = apic_find_highest_isr(apic);
+ isrv = (isr != -1) ? isr : 0;
+
+ if ((tpr & 0xf0) >= (isrv & 0xf0))
+ ppr = tpr & 0xff;
+ else
+ ppr = isrv & 0xf0;
+
+ apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
+ apic, ppr, isr, isrv);
+
+ apic_set_reg(apic, APIC_PROCPRI, ppr);
+}
+
+static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
+{
+ apic_set_reg(apic, APIC_TASKPRI, tpr);
+ apic_update_ppr(apic);
+}
+
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
+{
+ return kvm_apic_id(apic) == dest;
+}
+
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
+{
+ int result = 0;
+ u8 logical_id;
+
+ logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR));
+
+ switch (apic_get_reg(apic, APIC_DFR)) {
+ case APIC_DFR_FLAT:
+ if (logical_id & mda)
+ result = 1;
+ break;
+ case APIC_DFR_CLUSTER:
+ if (((logical_id >> 4) == (mda >> 0x4))
+ && (logical_id & mda & 0xf))
+ result = 1;
+ break;
+ default:
+ printk(KERN_WARNING "Bad DFR vcpu %d: %08x\n",
+ apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR));
+ break;
+ }
+
+ return result;
+}
+
+static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+ int short_hand, int dest, int dest_mode)
+{
+ int result = 0;
+ struct kvm_lapic *target = vcpu->apic;
+
+ apic_debug("target %p, source %p, dest 0x%x, "
+ "dest_mode 0x%x, short_hand 0x%x",
+ target, source, dest, dest_mode, short_hand);
+
+ if (!target)
+ return 0;
+ switch (short_hand) {
+ case APIC_DEST_NOSHORT:
+ if (dest_mode == 0) {
+ /* Physical mode. */
+ if ((dest == 0xFF) || (dest == kvm_apic_id(target)))
+ result = 1;
+ } else
+ /* Logical mode. */
+ result = kvm_apic_match_logical_addr(target, dest);
+ break;
+ case APIC_DEST_SELF:
+ if (target == source)
+ result = 1;
+ break;
+ case APIC_DEST_ALLINC:
+ result = 1;
+ break;
+ case APIC_DEST_ALLBUT:
+ if (target != source)
+ result = 1;
+ break;
+ default:
+ printk(KERN_WARNING "Bad dest shorthand value %x\n",
+ short_hand);
+ break;
+ }
+
+ return result;
+}
+
+/*
+ * Add a pending IRQ into lapic.
+ * Return 1 if successfully added and 0 if discarded.
+ */
+static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
+ int vector, int level, int trig_mode)
+{
+ int result = 0;
+
+ switch (delivery_mode) {
+ case APIC_DM_FIXED:
+ case APIC_DM_LOWEST:
+ /* FIXME add logic for vcpu on reset */
+ if (unlikely(!apic_enabled(apic)))
+ break;
+
+ if (apic_test_and_set_irr(vector, apic) && trig_mode) {
+ apic_debug("level trig mode repeatedly for vector %d",
+ vector);
+ break;
+ }
+
+ if (trig_mode) {
+ apic_debug("level trig mode for vector %d", vector);
+ apic_set_vector(vector, apic->regs + APIC_TMR);
+ } else
+ apic_clear_vector(vector, apic->regs + APIC_TMR);
+
+ kvm_vcpu_kick(apic->vcpu);
+
+ result = 1;
+ break;
+
+ case APIC_DM_REMRD:
+ printk(KERN_DEBUG "Ignoring delivery mode 3\n");
+ break;
+
+ case APIC_DM_SMI:
+ printk(KERN_DEBUG "Ignoring guest SMI\n");
+ break;
+ case APIC_DM_NMI:
+ printk(KERN_DEBUG "Ignoring guest NMI\n");
+ break;
+
+ case APIC_DM_INIT:
+ printk(KERN_DEBUG "Ignoring guest INIT\n");
+ break;
+
+ case APIC_DM_STARTUP:
+ printk(KERN_DEBUG "Ignoring guest STARTUP\n");
+ break;
+
+ default:
+ printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
+ delivery_mode);
+ break;
+ }
+ return result;
+}
+
+static inline int apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
+ int vector, int level, int trig_mode)
+{
+ int result = 0;
+
+ spin_lock_bh(&apic->lock);
+ result = __apic_accept_irq(apic, delivery_mode,
+ vector, level, trig_mode);
+ spin_unlock_bh(&apic->lock);
+ return result;
+}
+
+struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector, u32 bitmap)
+{
+ int vcpu_id;
+
+ /* TODO for real round robin */
+ vcpu_id = fls(bitmap) - 1;
+ if (vcpu_id < 0)
+ printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n");
+ return kvm->vcpus[vcpu_id].apic;
+}
+
+static void apic_set_eoi(struct kvm_lapic *apic)
+{
+ int vector = apic_find_highest_isr(apic);
+
+ /*
+ * Not every write EOI will has corresponding ISR,
+ * one example is when Kernel check timer on setup_IO_APIC
+ */
+ if (vector == -1)
+ return;
+
+ apic_clear_vector(vector, apic->regs + APIC_ISR);
+ apic_update_ppr(apic);
+
+ if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR))
+ kvm_ioapic_update_eoi(apic->vcpu->kvm, vector);
+}
+
+static void apic_send_ipi(struct kvm_lapic *apic)
+{
+ u32 icr_low = apic_get_reg(apic, APIC_ICR);
+ u32 icr_high = apic_get_reg(apic, APIC_ICR2);
+
+ unsigned int dest = GET_APIC_DEST_FIELD(icr_high);
+ unsigned int short_hand = icr_low & APIC_SHORT_MASK;
+ unsigned int trig_mode = icr_low & APIC_INT_LEVELTRIG;
+ unsigned int level = icr_low & APIC_INT_ASSERT;
+ unsigned int dest_mode = icr_low & APIC_DEST_MASK;
+ unsigned int delivery_mode = icr_low & APIC_MODE_MASK;
+ unsigned int vector = icr_low & APIC_VECTOR_MASK;
+
+ struct kvm_lapic *target;
+ struct kvm_vcpu *vcpu;
+ u32 lpr_map = 0;
+ int i;
+
+ apic_debug("icr_high 0x%x, icr_low 0x%x, "
+ "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
+ "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
+ icr_high, icr_low, short_hand, dest,
+ trig_mode, level, dest_mode, delivery_mode, vector);
+
+ for (i = 0; i < KVM_MAX_VCPUS; i++) {
+ vcpu = &apic->vcpu->kvm->vcpus[i];
+
+ if (apic_match_dest(vcpu, apic, short_hand, dest, dest_mode)) {
+ if (delivery_mode == APIC_DM_LOWEST)
+ set_bit(vcpu->vcpu_id, &lpr_map);
+ else
+ __apic_accept_irq(vcpu->apic, delivery_mode,
+ vector, level, trig_mode);
+ }
+ }
+
+ if (delivery_mode == APIC_DM_LOWEST) {
+ target = kvm_apic_round_robin(vcpu->kvm, vector, lpr_map);
+ if (target != NULL)
+ __apic_accept_irq(target, delivery_mode,
+ vector, level, trig_mode);
+ }
+}
+
+static u32 apic_get_tmcct(struct kvm_lapic *apic)
+{
+ u32 counter_passed;
+ ktime_t passed, now = apic->timer.dev.base->get_time();
+ u32 tmcct = apic_get_reg(apic, APIC_TMCCT);
+
+ ASSERT(apic != NULL);
+
+ if (unlikely(ktime_to_ns(now) <=
+ ktime_to_ns(apic->timer.last_update))) {
+ /* Wrap around */
+ passed = ktime_add(( {
+ (ktime_t) {
+ .tv64 = KTIME_MAX -
+ (apic->timer.last_update).tv64}; }
+ ), now);
+ apic_debug("time elapsed\n");
+ } else
+ passed = ktime_sub(now, apic->timer.last_update);
+
+ counter_passed = ktime_to_ns(passed) /
+ (APIC_BUS_CYCLE_NS * apic->timer.divide_count);
+ tmcct -= counter_passed;
+
+ if (tmcct <= 0) {
+ if (unlikely(!apic_lvtt_period(apic)))
+ tmcct = 0;
+ else
+ do {
+ tmcct += apic_get_reg(apic, APIC_TMICT);
+ } while (tmcct <= 0);
+ }
+
+ apic->timer.last_update = now;
+ apic_set_reg(apic, APIC_TMCCT, tmcct);
+
+ return tmcct;
+}
+
+static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
+{
+ u32 val = 0;
+
+ if (offset >= LAPIC_MMIO_LENGTH)
+ return 0;
+
+ switch (offset) {
+ case APIC_ARBPRI:
+ printk(KERN_WARNING "Access APIC ARBPRI register "
+ "which is for P6\n");
+ break;
+
+ case APIC_TMCCT: /* Timer CCR */
+ val = apic_get_tmcct(apic);
+ break;
+
+ default:
+ val = apic_get_reg(apic, offset);
+ break;
+ }
+
+ return val;
+}
+
+static void apic_mmio_read(struct kvm_io_device *this,
+ gpa_t address, int len, void *data)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
+ unsigned int offset = address - apic->base_address;
+ unsigned char alignment = offset & 0xf;
+ u32 result;
+
+ if ((alignment + len) > 4) {
+ printk(KERN_ERR "KVM_APIC_READ: alignment error %lx %d",
+ (unsigned long)address, len);
+ return;
+ }
+ spin_lock_bh(&apic->lock);
+ result = __apic_read(apic, offset & ~0xf);
+ spin_unlock_bh(&apic->lock);
+
+ switch (len) {
+ case 1:
+ case 2:
+ case 4:
+ memcpy(data, (char *)&result + alignment, len);
+ break;
+ default:
+ printk(KERN_ERR "Local APIC read with len = %x, "
+ "should be 1,2, or 4 instead\n", len);
+ break;
+ }
+}
+
+static void apic_mmio_write(struct kvm_io_device *this,
+ gpa_t address, int len, const void *data)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
+ unsigned int offset = address - apic->base_address;
+ unsigned char alignment = offset & 0xf;
+ u32 val;
+
+ /*
+ * APIC register must be aligned on 128-bits boundary.
+ * 32/64/128 bits registers must be accessed thru 32 bits.
+ * Refer SDM 8.4.1
+ */
+ if (len != 4 || alignment) {
+ if (printk_ratelimit())
+ printk(KERN_ERR "apic write: bad size=%d %lx\n",
+ len, (long)address);
+ return;
+ }
+
+ val = *(u32 *) data;
+
+ /* too common printing */
+ if (offset != APIC_EOI)
+ apic_debug("%s: offset 0x%x with length 0x%x, and value is "
+ "0x%x\n", __FUNCTION__, offset, len, val);
+
+ offset &= 0xff0;
+
+ spin_lock_bh(&apic->lock);
+ switch (offset) {
+ case APIC_ID: /* Local APIC ID */
+ apic_set_reg(apic, APIC_ID, val);
+ break;
+
+ case APIC_TASKPRI:
+ apic_set_tpr(apic, val & 0xff);
+ break;
+
+ case APIC_EOI:
+ apic_set_eoi(apic);
+ break;
+
+ case APIC_LDR:
+ apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
+ break;
+
+ case APIC_DFR:
+ apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
+ break;
+
+ case APIC_SPIV:
+ apic_set_reg(apic, APIC_SPIV, val & 0x3ff);
+ if (!(val & APIC_SPIV_APIC_ENABLED)) {
+ int i;
+ u32 lvt_val;
+
+ apic->status |= APIC_SW_DISABLE;
+ for (i = 0; i < APIC_LVT_NUM; i++) {
+ lvt_val = apic_get_reg(apic,
+ APIC_LVTT + 0x10 * i);
+ apic_set_reg(apic, APIC_LVTT + 0x10 * i,
+ lvt_val | APIC_LVT_MASKED);
+ }
+
+ } else
+ apic->status &= ~APIC_SW_DISABLE;
+ break;
+
+ case APIC_ICR:
+ /* No delay here, so we always clear the pending bit */
+ apic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
+ apic_send_ipi(apic);
+ break;
+
+ case APIC_ICR2:
+ apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
+ break;
+
+ case APIC_LVTT:
+ case APIC_LVTTHMR:
+ case APIC_LVTPC:
+ case APIC_LVT0:
+ case APIC_LVT1:
+ case APIC_LVTERR:
+ /* TODO: Check vector */
+ if (!apic_sw_enabled(apic))
+ val |= APIC_LVT_MASKED;
+
+ val &= apic_lvt_mask[(offset - APIC_LVTT) >> 4];
+ apic_set_reg(apic, offset, val);
+
+ break;
+
+ case APIC_TMICT:
+ {
+ ktime_t now = apic->timer.dev.base->get_time();
+ u32 offset;
+
+ apic_set_reg(apic, APIC_TMICT, val);
+ apic_set_reg(apic, APIC_TMCCT, val);
+ apic->timer.last_update = now;
+ offset =
+ APIC_BUS_CYCLE_NS * apic->timer.divide_count * val;
+
+ /* Make sure the lock ordering is coherent */
+ spin_unlock_bh(&apic->lock);
+ hrtimer_cancel(&apic->timer.dev);
+ hrtimer_start(&apic->timer.dev,
+ ktime_add_ns(now, offset),
+ HRTIMER_MODE_ABS);
+
+ apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
+ PRIx64 ", "
+ "timer initial count 0x%x, offset 0x%x, "
+ "expire @ 0x%016" PRIx64 ".\n", __FUNCTION__,
+ APIC_BUS_CYCLE_NS, ktime_to_ns(now),
+ apic_get_reg(apic, APIC_TMICT), offset,
+ ktime_to_ns(ktime_add_ns(now, offset)));
+ }
+ return;
+
+ case APIC_TDCR:
+ {
+ unsigned int tmp1, tmp2;
+
+ if (val & 4)
+ printk(KERN_ERR "KVM_WRITE:TDCR %x\n", val);
+ tmp1 = val & 0xf;
+ tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
+ apic->timer.divide_count = 0x1 << (tmp2 & 0x7);
+
+ apic_set_reg(apic, APIC_TDCR, val);
+
+ apic_debug("timer divide count is 0x%x\n",
+ apic->timer.divide_count);
+ }
+ break;
+
+ default:
+ apic_debug("Local APIC Write to read-only register %x\n",
+ offset);
+ break;
+ }
+
+ spin_unlock_bh(&apic->lock);
+}
+
+static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
+ int ret = 0;
+
+ spin_lock_bh(&apic->lock);
+
+ if (apic_hw_enabled(apic) &&
+ (addr >= apic->base_address) &&
+ (addr < (apic->base_address + LAPIC_MMIO_LENGTH)))
+ ret = 1;
+
+ spin_unlock_bh(&apic->lock);
+
+ return ret;
+}
+
+void kvm_free_apic(struct kvm_lapic *apic)
+{
+ if (!apic)
+ return;
+ spin_lock_bh(&apic->lock);
+
+ hrtimer_cancel(&apic->timer.dev);
+
+ if (apic->regs_page) {
+ __free_page(apic->regs_page);
+ apic->regs_page = 0;
+ }
+
+ spin_unlock_bh(&apic->lock);
+
+ kfree(apic);
+}
+
+/*
+ *----------------------------------------------------------------------
+ * LAPIC interface
+ *----------------------------------------------------------------------
+ */
+
+void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+
+ spin_lock_bh(&apic->lock);
+ apic_set_tpr(apic, ((cr8 & 0x0f) << 4));
+ spin_unlock_bh(&apic->lock);
+}
+
+u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+ u64 tpr;
+
+ spin_lock_bh(&apic->lock);
+ tpr = (u64) apic_get_reg(apic, APIC_TASKPRI);
+ spin_unlock_bh(&apic->lock);
+
+ return (tpr & 0xf0) >> 4;
+}
+
+void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+
+ spin_lock_bh(&apic->lock);
+ if (apic->vcpu->vcpu_id)
+ value &= ~MSR_IA32_APICBASE_BSP;
+
+ apic->base_msr = value;
+ apic->base_address = apic->base_msr & MSR_IA32_APICBASE_BASE;
+
+ /* with FSB delivery interrupt, we can restart APIC functionality */
+ apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
+ "0x%lx.\n", apic->base_msr, apic->base_address);
+
+ spin_unlock_bh(&apic->lock);
+}
+
+u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+ u64 base;
+
+ spin_lock_bh(&apic->lock);
+ base = apic->base_msr;
+ spin_unlock_bh(&apic->lock);
+
+ return base;
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_get_base);
+
+static void lapic_reset(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic;
+ int i;
+
+ apic_debug("%s\n", __FUNCTION__);
+
+ ASSERT(vcpu);
+ apic = vcpu->apic;
+ ASSERT(apic != NULL);
+
+ /* Stop the timer in case it's a reset to an active apic */
+ hrtimer_cancel(&apic->timer.dev);
+
+ spin_lock_bh(&apic->lock);
+
+ apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
+ apic_set_reg(apic, APIC_LVR, APIC_VERSION);
+
+ for (i = 0; i < APIC_LVT_NUM; i++)
+ apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
+
+ apic_set_reg(apic, APIC_DFR, 0xffffffffU);
+ apic_set_reg(apic, APIC_SPIV, 0xff);
+ apic_set_reg(apic, APIC_TASKPRI, 0);
+ apic_set_reg(apic, APIC_LDR, 0);
+ apic_set_reg(apic, APIC_ESR, 0);
+ apic_set_reg(apic, APIC_ICR, 0);
+ apic_set_reg(apic, APIC_ICR2, 0);
+ apic_set_reg(apic, APIC_TDCR, 0);
+ apic_set_reg(apic, APIC_TMICT, 0);
+ apic_set_reg(apic, APIC_TMCCT, 0);
+ for (i = 0; i < 8; i++) {
+ apic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
+ apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
+ apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
+ }
+ apic->timer.divide_count = 0;
+ apic->timer.pending = 0;
+ apic->status = APIC_SW_DISABLE;
+ if (vcpu->vcpu_id == 0)
+ apic->base_msr |= MSR_IA32_APICBASE_BSP;
+ apic_update_ppr(apic);
+
+ spin_unlock_bh(&apic->lock);
+
+ apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
+ "0x%016" PRIx64 ", base_address=0x%0lx.\n", __FUNCTION__,
+ vcpu, GET_APIC_ID(apic_get_reg(apic, APIC_ID)),
+ apic->base_msr, apic->base_address);
+}
+
+int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+ int ret = 0;
+
+ spin_lock_bh(&apic->lock);
+ ret = apic_enabled(apic);
+ spin_unlock_bh(&apic->lock);
+
+ return ret;
+}
+
+void *kvm_lapic_get_regs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+ return apic->regs;
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_get_regs);
+
+/*
+ *----------------------------------------------------------------------
+ * timer interface
+ *----------------------------------------------------------------------
+ */
+static int __apic_timer_fn(struct kvm_lapic *apic)
+{
+ u32 vector;
+ ktime_t now;
+ int result = 0;
+
+ if (unlikely(!apic_enabled(apic) ||
+ !apic_lvt_enabled(apic, APIC_LVTT))) {
+ apic_debug("%s: time interrupt although apic is down\n",
+ __FUNCTION__);
+ return 0;
+ }
+
+ vector = apic_lvt_vector(apic, APIC_LVTT);
+ now = apic->timer.dev.base->get_time();
+ apic->timer.last_update = now;
+ apic->timer.pending++;
+ __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0);
+
+ if (apic_lvtt_period(apic)) {
+ u32 offset;
+ u32 tmict = apic_get_reg(apic, APIC_TMICT);
+
+ apic_set_reg(apic, APIC_TMCCT, tmict);
+ offset = APIC_BUS_CYCLE_NS * apic->timer.divide_count * tmict;
+
+ result = 1;
+ apic->timer.dev.expires = ktime_add_ns(now, offset);
+ } else {
+ apic_set_reg(apic, APIC_TMCCT, 0);
+ }
+
+ return result;
+}
+
+static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
+{
+ struct kvm_lapic *apic;
+ int restart_timer = 0;
+
+ apic = container_of(data, struct kvm_lapic, timer.dev);
+
+ spin_lock_bh(&apic->lock);
+ restart_timer = __apic_timer_fn(apic);
+ spin_unlock_bh(&apic->lock);
+
+ if (restart_timer)
+ return HRTIMER_RESTART;
+ else
+ return HRTIMER_NORESTART;
+}
+
+int kvm_create_lapic(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic;
+
+ ASSERT(vcpu != NULL);
+ apic_debug("apic_init %d\n", vcpu->vcpu_id);
+
+ apic = kzalloc(sizeof(*apic), GFP_KERNEL);
+ if (!apic)
+ goto nomem;
+
+ vcpu->apic = apic;
+ spin_lock_init(&apic->lock);
+
+ apic->regs_page = alloc_page(GFP_KERNEL);
+ if (apic->regs_page == NULL) {
+ printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
+ vcpu->vcpu_id);
+ goto nomem;
+ }
+ apic->regs = page_address(apic->regs_page);
+ memset(apic->regs, 0, PAGE_SIZE);
+ apic->vcpu = vcpu;
+
+ hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ apic->timer.dev.function = apic_timer_fn;
+ apic->base_address = APIC_DEFAULT_PHYS_BASE;
+ apic->base_msr = APIC_DEFAULT_PHYS_BASE;
+
+ lapic_reset(vcpu);
+ apic->dev.read = apic_mmio_read;
+ apic->dev.write = apic_mmio_write;
+ apic->dev.in_range = apic_mmio_range;
+ apic->dev.private = apic;
+
+ return 0;
+nomem:
+ kvm_free_apic(apic);
+ return -ENOMEM;
+}
+
+int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->apic;
+ int highest_irr;
+
+ if (!apic || !apic_enabled(apic))
+ return -1;
+
+ highest_irr = apic_find_highest_irr(apic);
+ if ((highest_irr == -1) ||
+ ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI)))
+ return -1;
+ return highest_irr;
+}
+
+int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
+{
+ int vector = kvm_apic_has_interrupt(vcpu);
+ struct kvm_lapic *apic = vcpu->apic;
+
+ if (vector == -1)
+ return -1;
+
+ apic_set_vector(vector, apic->regs + APIC_ISR);
+ apic_update_ppr(apic);
+ apic_clear_irr(vector, apic);
+ return vector;
+}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 2f0dc42..61cc2b0 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -40,7 +40,7 @@ struct kvm_memory_alias {
__u64 target_phys_addr;
};
-/* for KVM_SET_IRQ_LEVEL */
+/* for KVM_IRQ_LINE */
struct kvm_irq_level {
/*
* ACPI gsi notion of irq.
@@ -302,7 +302,7 @@ struct kvm_signal_mask {
#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias)
/* Device model IOC */
#define KVM_CREATE_PIC _IO(KVMIO, 0x60)
-#define KVM_IRQ_LINE _IO(KVMIO, 0x61)
+#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
/*
* ioctls for vcpu fds
[-- Attachment #3: apic-30-4.user.patch --]
[-- Type: application/octet-stream, Size: 661 bytes --]
commit 4d217d13d4a47b2b7796ff152c0560dd68102feb
Author: root <root@vt32-pae.(none)>
Date: Wed Jul 18 10:06:27 2007 +0800
Add kernel lapic
Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>
diff --git a/kernel/Kbuild b/kernel/Kbuild
index 77141a5..ceb6e4b 100644
--- a/kernel/Kbuild
+++ b/kernel/Kbuild
@@ -1,5 +1,5 @@
EXTRA_CFLAGS := -I$(src)/include -include $(src)/external-module-compat.h
obj-m := kvm.o kvm-intel.o kvm-amd.o
-kvm-objs := kvm_main.o mmu.o x86_emulate.o anon_inodes.o irq.o i8259.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o anon_inodes.o irq.o i8259.o lapic.o
kvm-intel-objs := vmx.o vmx-debug.o
kvm-amd-objs := svm.o
[-- Attachment #4: Type: text/plain, Size: 286 bytes --]
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
[-- Attachment #5: Type: text/plain, Size: 186 bytes --]
_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: apic+ioapiuc patch
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C72AAE-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-07-18 2:24 ` Dong, Eddie
@ 2007-07-18 2:27 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C72CDD-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-07-18 8:36 ` Avi Kivity
2 siblings, 1 reply; 20+ messages in thread
From: Dong, Eddie @ 2007-07-18 2:27 UTC (permalink / raw)
To: Dong, Eddie, Avi Kivity; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
This is the ioapic patch. With this on rebased lapic2 branch, Both
Win2003/Xp and Rhel5 works well.
Appreciate if this can be in the branch so that I can fully move to live
migration support.
thx,eddie
kernel apic/ioapic support
Signed-off-by: Yaozu (Eddie) Dong <eddie.dong-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
index 3bf7276..e5a8f4d 100644
--- a/drivers/kvm/Makefile
+++ b/drivers/kvm/Makefile
@@ -2,7 +2,7 @@
# Makefile for Kernel-based Virtual Machine module
#
-kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
ioapic.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/ioapic.c b/drivers/kvm/ioapic.c
new file mode 100644
index 0000000..7c5a6ea
--- /dev/null
+++ b/drivers/kvm/ioapic.c
@@ -0,0 +1,380 @@
+/*
+ * Copyright (C) 2001 MandrakeSoft S.A.
+ *
+ * MandrakeSoft S.A.
+ * 43, rue d'Aboukir
+ * 75002 Paris - France
+ * http://www.linux-mandrake.com/
+ * http://www.mandrakesoft.com/
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA
+ *
+ * Yunhong Jiang <yunhong.jiang-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
+ * Yaozu (Eddie) Dong <eddie.dong-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
+ * Based on Xen 3.1 code.
+ */
+
+#include "kvm.h"
+#include <linux/kvm.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/smp.h>
+#include <linux/hrtimer.h>
+#include <linux/io.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/apicdef.h>
+#include "irq.h"
+/* #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
+#define ioapic_debug(fmt, arg...)
+static void ioapic_deliver(struct kvm_ioapic *vioapic, int irq);
+
+static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
+ unsigned long addr,
+ unsigned long length)
+{
+ unsigned long result = 0;
+
+ switch (ioapic->ioregsel) {
+ case IOAPIC_REG_VERSION:
+ result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16)
+ | (IOAPIC_VERSION_ID & 0xff));
+ break;
+
+ case IOAPIC_REG_APIC_ID:
+ case IOAPIC_REG_ARB_ID:
+ result = ((ioapic->id & 0xf) << 24);
+ break;
+
+ default:
+ {
+ u32 redir_index = (ioapic->ioregsel - 0x10) >>
1;
+ u64 redir_content;
+
+ ASSERT(redir_index < IOAPIC_NUM_PINS);
+
+ redir_content =
ioapic->redirtbl[redir_index].bits;
+ result = (ioapic->ioregsel & 0x1) ?
+ (redir_content >> 32) & 0xffffffff :
+ redir_content & 0xffffffff;
+ break;
+ }
+ }
+
+ return result;
+}
+
+static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
+{
+ union ioapic_redir_entry *pent;
+
+ pent = &ioapic->redirtbl[idx];
+
+ if (!pent->fields.mask) {
+ ioapic_deliver(ioapic, idx);
+ if (pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
+ pent->fields.remote_irr = 1;
+ }
+}
+
+static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
+{
+ int index;
+
+ switch (ioapic->ioregsel) {
+ case IOAPIC_REG_VERSION:
+ /* Writes are ignored. */
+ break;
+
+ case IOAPIC_REG_APIC_ID:
+ ioapic->id = (val >> 24) & 0xf;
+ break;
+
+ case IOAPIC_REG_ARB_ID:
+ break;
+
+ default:
+ index = (ioapic->ioregsel - 0x10) >> 1;
+
+ ioapic_debug("change redir index %x val %x", index,
val);
+ ASSERT(irq < IOAPIC_NUM_PINS);
+ if (ioapic->ioregsel & 1) {
+ ioapic->redirtbl[index].bits &= 0xffffffff;
+ ioapic->redirtbl[index].bits |= (u64) val << 32;
+ } else {
+ ioapic->redirtbl[index].bits &= ~0xffffffffULL;
+ ioapic->redirtbl[index].bits |= (u32) val;
+ ioapic->redirtbl[index].fields.remote_irr = 0;
+ }
+ ioapic_service(ioapic, index);
+ break;
+ }
+}
+
+static void ioapic_inj_irq(struct kvm_ioapic *ioapic,
+ struct kvm_lapic *target,
+ u8 vector, u8 trig_mode, u8 delivery_mode)
+{
+ ioapic_debug("irq %d trig %d deliv %d", vector, trig_mode,
+ delivery_mode);
+
+ ASSERT((delivery_mode == dest_Fixed) ||
+ (delivery_mode == dest_LowestPrio));
+
+ kvm_apic_set_irq(target, vector, trig_mode);
+}
+
+static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8
dest,
+ u8 dest_mode)
+{
+ u32 mask = 0;
+ int i;
+ struct kvm *kvm = ioapic->kvm;
+ struct kvm_vcpu *vcpu;
+
+ ioapic_debug("dest %d dest_mode %d", dest, dest_mode);
+
+ if (dest_mode == 0) { /* Physical mode. */
+ if (dest == 0xFF) { /* Broadcast. */
+ for (i = 0; i < KVM_MAX_VCPUS; ++i)
+ mask |= 1 << i;
+ return mask;
+ }
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ vcpu = &kvm->vcpus[i];
+ if (kvm_apic_match_physical_addr(vcpu->apic,
dest)) {
+ mask = 1 << vcpu->vcpu_id;
+ break;
+ }
+ }
+ } else if (dest != 0) /* Logical mode, MDA non-zero. */
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ vcpu = &kvm->vcpus[i];
+ if (kvm_apic_match_logical_addr(vcpu->apic,
dest))
+ mask |= 1 << vcpu->vcpu_id;
+ }
+ ioapic_debug("mask %x", mask);
+ return mask;
+}
+
+static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
+{
+ u8 dest = ioapic->redirtbl[irq].fields.dest_id;
+ u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode;
+ u8 delivery_mode = ioapic->redirtbl[irq].fields.delivery_mode;
+ u8 vector = ioapic->redirtbl[irq].fields.vector;
+ u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
+ u32 deliver_bitmask;
+ struct kvm_lapic *target;
+ struct kvm_vcpu *vcpu;
+ int vcpu_id;
+
+ ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
+ "vector=%x trig_mode=%x",
+ dest, dest_mode, delivery_mode, vector, trig_mode);
+
+ deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest,
dest_mode);
+ if (!deliver_bitmask) {
+ ioapic_debug("no target on destination");
+ return;
+ }
+
+ switch (delivery_mode) {
+ case dest_LowestPrio:
+ target =
+ kvm_apic_round_robin(ioapic->kvm, vector,
deliver_bitmask);
+ if (target != NULL)
+ ioapic_inj_irq(ioapic, target, vector,
+ trig_mode, delivery_mode);
+ else
+ ioapic_debug("null round robin: "
+ "mask=%x vector=%x
delivery_mode=%x",
+ deliver_bitmask, vector,
dest_LowestPrio);
+ break;
+ case dest_Fixed:
+ for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
+ if (!(deliver_bitmask & (1 << vcpu_id)))
+ continue;
+ deliver_bitmask &= ~(1 << vcpu_id);
+ vcpu = &ioapic->kvm->vcpus[vcpu_id];
+ if (vcpu != NULL) {
+ target = vcpu->apic;
+ ioapic_inj_irq(ioapic, target, vector,
+ trig_mode,
delivery_mode);
+ }
+ }
+ break;
+
+ /* TODO: NMI */
+ default:
+ printk(KERN_WARNING "Unsupported delivery mode %d\n",
+ delivery_mode);
+ break;
+ }
+}
+
+void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
+{
+ u32 mask = 1 << irq;
+ union ioapic_redir_entry entry;
+
+ if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
+ entry = ioapic->redirtbl[irq];
+ if (!level)
+ ioapic->irr &= ~mask;
+ if (entry.fields.trig_mode) { /* level triggered */
+ if (level && !entry.fields.remote_irr) {
+ ioapic->irr |= mask;
+ ioapic_service(ioapic, irq);
+ }
+ } else if (level && !(ioapic->irr & mask)) {
+ /*
+ * edge triggered
+ */
+ ioapic->irr |= mask;
+ ioapic_service(ioapic, irq);
+ }
+ }
+}
+
+static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector)
+{
+ int i;
+
+ for (i = 0; i < IOAPIC_NUM_PINS; i++)
+ if (ioapic->redirtbl[i].fields.vector == vector)
+ return i;
+ return -1;
+}
+
+void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
+{
+ struct kvm_ioapic *ioapic = kvm->vioapic;
+ union ioapic_redir_entry *ent;
+ int gsi;
+
+ gsi = get_eoi_gsi(ioapic, vector);
+ if (gsi == -1) {
+ printk(KERN_WARNING "Can't find redir item for %d
EOI\n",
+ vector);
+ return;
+ }
+
+ ent = &ioapic->redirtbl[gsi];
+ ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
+
+ ent->fields.remote_irr = 0;
+ if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
+ ioapic_deliver(ioapic, gsi);
+}
+
+static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
+
+ return ((addr >= ioapic->base_address &&
+ (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
+}
+
+static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr,
int len,
+ void *val)
+{
+ struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
+ u32 result;
+
+ ioapic_debug("addr %lx", (unsigned long)addr);
+ ASSERT(!(addr & 0xf)); /* check alignment */
+
+ addr &= 0xff;
+ switch (addr) {
+ case IOAPIC_REG_SELECT:
+ result = ioapic->ioregsel;
+ break;
+
+ case IOAPIC_REG_WINDOW:
+ result = ioapic_read_indirect(ioapic, addr, len);
+ break;
+
+ default:
+ result = 0;
+ break;
+ }
+ switch (len) {
+ case 8:
+ *(u64 *) val = result;
+ break;
+ case 1:
+ case 2:
+ case 4:
+ memcpy(val, (char *)&result, len);
+ break;
+ default:
+ printk(KERN_WARNING "ioapic: wrong length %d\n", len);
+ }
+}
+
+static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr,
int len,
+ const void *val)
+{
+ struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
+ u32 data;
+
+ ioapic_debug("ioapic_mmio_write addr=%lx len=%d val=%p\n",
+ addr, len, val);
+ ASSERT(!(addr & 0xf)); /* check alignment */
+ if (len == 4 || len == 8)
+ data = *(u32 *) val;
+ else {
+ printk(KERN_WARNING "ioapic: Unsupported size %d\n",
len);
+ return;
+ }
+
+ addr &= 0xff;
+ switch (addr) {
+ case IOAPIC_REG_SELECT:
+ ioapic->ioregsel = data;
+ break;
+
+ case IOAPIC_REG_WINDOW:
+ ioapic_write_indirect(ioapic, data);
+ break;
+
+ default:
+ break;
+ }
+}
+
+int kvm_ioapic_init(struct kvm *kvm)
+{
+ struct kvm_ioapic *ioapic;
+ int i;
+
+ ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
+ if (!ioapic)
+ return -ENOMEM;
+ kvm->vioapic = ioapic;
+ for (i = 0; i < IOAPIC_NUM_PINS; i++)
+ ioapic->redirtbl[i].fields.mask = 1;
+ ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
+ ioapic->dev.read = ioapic_mmio_read;
+ ioapic->dev.write = ioapic_mmio_write;
+ ioapic->dev.in_range = ioapic_in_range;
+ ioapic->dev.private = ioapic;
+ ioapic->kvm = kvm;
+ kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev);
+ return 0;
+}
diff --git a/drivers/kvm/irq.c b/drivers/kvm/irq.c
index 0b4430a..5265f82 100644
--- a/drivers/kvm/irq.c
+++ b/drivers/kvm/irq.c
@@ -74,7 +74,3 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu,
0, 0);
}
-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
-{
- /* TODO: for kernel IOAPIC */
-}
diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h
index 24f8b31..a74d2e9 100644
--- a/drivers/kvm/irq.h
+++ b/drivers/kvm/irq.h
@@ -60,6 +60,49 @@ int kvm_pic_read_irq(struct kvm_pic *s);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
+#define IOAPIC_NUM_PINS 24
+#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
+#define IOAPIC_EDGE_TRIG 0
+#define IOAPIC_LEVEL_TRIG 1
+
+#define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000
+#define IOAPIC_MEM_LENGTH 0x100
+
+/* Direct registers. */
+#define IOAPIC_REG_SELECT 0x00
+#define IOAPIC_REG_WINDOW 0x10
+#define IOAPIC_REG_EOI 0x40 /* IA64 IOSAPIC only */
+
+/* Indirect registers. */
+#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */
+#define IOAPIC_REG_VERSION 0x01
+#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */
+
+struct kvm_ioapic {
+ struct kvm_io_device dev;
+ unsigned long base_address;
+ struct kvm *kvm;
+ u32 ioregsel;
+ u32 id;
+ u32 irr;
+ union ioapic_redir_entry {
+ u64 bits;
+ struct {
+ u8 vector;
+ u8 delivery_mode:3;
+ u8 dest_mode:1;
+ u8 delivery_status:1;
+ u8 polarity:1;
+ u8 remote_irr:1;
+ u8 trig_mode:1;
+ u8 mask:1;
+ u8 reserve:7;
+ u8 reserved[4];
+ u8 dest_id;
+ } fields;
+ } redirtbl[IOAPIC_NUM_PINS];
+};
+
struct kvm_lapic {
spinlock_t lock; /* TODO for revise */
u32 status;
@@ -98,8 +141,14 @@ void kvm_free_apic(struct kvm_lapic *apic);
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
+struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector, u32
bitmap);
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector);
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
+int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig);
+int kvm_ioapic_init(struct kvm *kvm);
+void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
#endif
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index a5107b5..9dbdf04 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -451,6 +451,7 @@ struct kvm {
struct kvm_io_bus mmio_bus;
struct kvm_io_bus pio_bus;
struct kvm_pic *vpic;
+ struct kvm_ioapic *vioapic;
};
static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 0d7c3e6..864c251 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -419,6 +419,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_io_bus_destroy(&kvm->pio_bus);
kvm_io_bus_destroy(&kvm->mmio_bus);
kfree(kvm->vpic);
+ kfree(kvm->vioapic);
kvm_free_vcpus(kvm);
kvm_free_physmem(kvm);
kfree(kvm);
@@ -2838,8 +2839,14 @@ static long kvm_vm_ioctl(struct file *filp,
case KVM_CREATE_PIC:
r = -ENOMEM;
kvm->vpic = kvm_create_pic(kvm);
- if (kvm->vpic)
- r = 0;
+ if (kvm->vpic) {
+ r = kvm_ioapic_init(kvm);
+ if (r) {
+ kfree(kvm->vpic);
+ kvm->vpic = NULL;
+ goto out;
+ }
+ }
else
goto out;
break;
@@ -2854,7 +2861,9 @@ static long kvm_vm_ioctl(struct file *filp,
kvm_pic_set_irq(pic_irqchip(kvm),
irq_event.irq,
irq_event.level);
- /* TODO: IOAPIC */
+ kvm_ioapic_set_irq(kvm->vioapic,
+ irq_event.irq,
+ irq_event.level);
r = 0;
}
break;
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: apic+ioapiuc patch
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C72CDD-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2007-07-18 2:28 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C72CE1-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
0 siblings, 1 reply; 20+ messages in thread
From: Dong, Eddie @ 2007-07-18 2:28 UTC (permalink / raw)
To: Dong, Eddie, Avi Kivity; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
[-- Attachment #1: Type: text/plain, Size: 32 bytes --]
Missed attachment. Here it is.
[-- Attachment #2: ioapic-30-4.user.patch --]
[-- Type: application/octet-stream, Size: 1739 bytes --]
commit 990a3885946f11df2706de87ef53cfbcd447a32c
Author: root <root@vt32-pae.(none)>
Date: Wed Jul 18 10:12:59 2007 +0800
kernel apic/ioapic support
Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>
diff --git a/kernel/Kbuild b/kernel/Kbuild
index ceb6e4b..a502c02 100644
--- a/kernel/Kbuild
+++ b/kernel/Kbuild
@@ -1,5 +1,5 @@
EXTRA_CFLAGS := -I$(src)/include -include $(src)/external-module-compat.h
obj-m := kvm.o kvm-intel.o kvm-amd.o
-kvm-objs := kvm_main.o mmu.o x86_emulate.o anon_inodes.o irq.o i8259.o lapic.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o anon_inodes.o irq.o i8259.o lapic.o ioapic.o
kvm-intel-objs := vmx.o vmx-debug.o
kvm-amd-objs := svm.o
diff --git a/user/kvmctl.c b/user/kvmctl.c
index d610111..51ab82d 100644
--- a/user/kvmctl.c
+++ b/user/kvmctl.c
@@ -1048,25 +1048,6 @@ int kvm_guest_debug(kvm_context_t kvm, int vcpu, struct kvm_debug_guest *dbg)
return ioctl(kvm->vcpu_fd[vcpu], KVM_DEBUG_GUEST, dbg);
}
-static void cpuid_remove_apic(struct kvm_cpuid *cpuid)
-{
- int i;
- struct kvm_cpuid_entry *e, *entry;
-
- entry = NULL;
- for (i = 0; i < cpuid->nent; ++i) {
- e = &cpuid->entries[i];
- if (e->function == 1) {
- entry = e;
- break;
- }
- }
- if (entry) {
- entry->edx &= ~(1 << 9);
- printf("Guest APIC capibility removed\n");
- }
-}
-
int kvm_setup_cpuid(kvm_context_t kvm, int vcpu, int nent,
struct kvm_cpuid_entry *entries)
{
@@ -1079,8 +1060,6 @@ int kvm_setup_cpuid(kvm_context_t kvm, int vcpu, int nent,
cpuid->nent = nent;
memcpy(cpuid->entries, entries, nent * sizeof(*entries));
- /* temply walkaround before merge of in-kernel APIC */
- cpuid_remove_apic(cpuid);
r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_CPUID, cpuid);
free(cpuid);
[-- Attachment #3: ioapic-30-4.patch --]
[-- Type: application/octet-stream, Size: 14587 bytes --]
commit 56933b5ce9c73bb514826f83ff01bd1b21765eb2
Author: root <root@vt32-pae.(none)>
Date: Wed Jul 18 10:05:33 2007 +0800
kernel apic/ioapic support
Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>
diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
index 3bf7276..e5a8f4d 100644
--- a/drivers/kvm/Makefile
+++ b/drivers/kvm/Makefile
@@ -2,7 +2,7 @@
# Makefile for Kernel-based Virtual Machine module
#
-kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o ioapic.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/ioapic.c b/drivers/kvm/ioapic.c
new file mode 100644
index 0000000..7c5a6ea
--- /dev/null
+++ b/drivers/kvm/ioapic.c
@@ -0,0 +1,380 @@
+/*
+ * Copyright (C) 2001 MandrakeSoft S.A.
+ *
+ * MandrakeSoft S.A.
+ * 43, rue d'Aboukir
+ * 75002 Paris - France
+ * http://www.linux-mandrake.com/
+ * http://www.mandrakesoft.com/
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Yunhong Jiang <yunhong.jiang@intel.com>
+ * Yaozu (Eddie) Dong <eddie.dong@intel.com>
+ * Based on Xen 3.1 code.
+ */
+
+#include "kvm.h"
+#include <linux/kvm.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/smp.h>
+#include <linux/hrtimer.h>
+#include <linux/io.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/apicdef.h>
+#include "irq.h"
+/* #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
+#define ioapic_debug(fmt, arg...)
+static void ioapic_deliver(struct kvm_ioapic *vioapic, int irq);
+
+static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
+ unsigned long addr,
+ unsigned long length)
+{
+ unsigned long result = 0;
+
+ switch (ioapic->ioregsel) {
+ case IOAPIC_REG_VERSION:
+ result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16)
+ | (IOAPIC_VERSION_ID & 0xff));
+ break;
+
+ case IOAPIC_REG_APIC_ID:
+ case IOAPIC_REG_ARB_ID:
+ result = ((ioapic->id & 0xf) << 24);
+ break;
+
+ default:
+ {
+ u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
+ u64 redir_content;
+
+ ASSERT(redir_index < IOAPIC_NUM_PINS);
+
+ redir_content = ioapic->redirtbl[redir_index].bits;
+ result = (ioapic->ioregsel & 0x1) ?
+ (redir_content >> 32) & 0xffffffff :
+ redir_content & 0xffffffff;
+ break;
+ }
+ }
+
+ return result;
+}
+
+static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
+{
+ union ioapic_redir_entry *pent;
+
+ pent = &ioapic->redirtbl[idx];
+
+ if (!pent->fields.mask) {
+ ioapic_deliver(ioapic, idx);
+ if (pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
+ pent->fields.remote_irr = 1;
+ }
+}
+
+static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
+{
+ int index;
+
+ switch (ioapic->ioregsel) {
+ case IOAPIC_REG_VERSION:
+ /* Writes are ignored. */
+ break;
+
+ case IOAPIC_REG_APIC_ID:
+ ioapic->id = (val >> 24) & 0xf;
+ break;
+
+ case IOAPIC_REG_ARB_ID:
+ break;
+
+ default:
+ index = (ioapic->ioregsel - 0x10) >> 1;
+
+ ioapic_debug("change redir index %x val %x", index, val);
+ ASSERT(irq < IOAPIC_NUM_PINS);
+ if (ioapic->ioregsel & 1) {
+ ioapic->redirtbl[index].bits &= 0xffffffff;
+ ioapic->redirtbl[index].bits |= (u64) val << 32;
+ } else {
+ ioapic->redirtbl[index].bits &= ~0xffffffffULL;
+ ioapic->redirtbl[index].bits |= (u32) val;
+ ioapic->redirtbl[index].fields.remote_irr = 0;
+ }
+ ioapic_service(ioapic, index);
+ break;
+ }
+}
+
+static void ioapic_inj_irq(struct kvm_ioapic *ioapic,
+ struct kvm_lapic *target,
+ u8 vector, u8 trig_mode, u8 delivery_mode)
+{
+ ioapic_debug("irq %d trig %d deliv %d", vector, trig_mode,
+ delivery_mode);
+
+ ASSERT((delivery_mode == dest_Fixed) ||
+ (delivery_mode == dest_LowestPrio));
+
+ kvm_apic_set_irq(target, vector, trig_mode);
+}
+
+static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
+ u8 dest_mode)
+{
+ u32 mask = 0;
+ int i;
+ struct kvm *kvm = ioapic->kvm;
+ struct kvm_vcpu *vcpu;
+
+ ioapic_debug("dest %d dest_mode %d", dest, dest_mode);
+
+ if (dest_mode == 0) { /* Physical mode. */
+ if (dest == 0xFF) { /* Broadcast. */
+ for (i = 0; i < KVM_MAX_VCPUS; ++i)
+ mask |= 1 << i;
+ return mask;
+ }
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ vcpu = &kvm->vcpus[i];
+ if (kvm_apic_match_physical_addr(vcpu->apic, dest)) {
+ mask = 1 << vcpu->vcpu_id;
+ break;
+ }
+ }
+ } else if (dest != 0) /* Logical mode, MDA non-zero. */
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ vcpu = &kvm->vcpus[i];
+ if (kvm_apic_match_logical_addr(vcpu->apic, dest))
+ mask |= 1 << vcpu->vcpu_id;
+ }
+ ioapic_debug("mask %x", mask);
+ return mask;
+}
+
+static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
+{
+ u8 dest = ioapic->redirtbl[irq].fields.dest_id;
+ u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode;
+ u8 delivery_mode = ioapic->redirtbl[irq].fields.delivery_mode;
+ u8 vector = ioapic->redirtbl[irq].fields.vector;
+ u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
+ u32 deliver_bitmask;
+ struct kvm_lapic *target;
+ struct kvm_vcpu *vcpu;
+ int vcpu_id;
+
+ ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
+ "vector=%x trig_mode=%x",
+ dest, dest_mode, delivery_mode, vector, trig_mode);
+
+ deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode);
+ if (!deliver_bitmask) {
+ ioapic_debug("no target on destination");
+ return;
+ }
+
+ switch (delivery_mode) {
+ case dest_LowestPrio:
+ target =
+ kvm_apic_round_robin(ioapic->kvm, vector, deliver_bitmask);
+ if (target != NULL)
+ ioapic_inj_irq(ioapic, target, vector,
+ trig_mode, delivery_mode);
+ else
+ ioapic_debug("null round robin: "
+ "mask=%x vector=%x delivery_mode=%x",
+ deliver_bitmask, vector, dest_LowestPrio);
+ break;
+ case dest_Fixed:
+ for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
+ if (!(deliver_bitmask & (1 << vcpu_id)))
+ continue;
+ deliver_bitmask &= ~(1 << vcpu_id);
+ vcpu = &ioapic->kvm->vcpus[vcpu_id];
+ if (vcpu != NULL) {
+ target = vcpu->apic;
+ ioapic_inj_irq(ioapic, target, vector,
+ trig_mode, delivery_mode);
+ }
+ }
+ break;
+
+ /* TODO: NMI */
+ default:
+ printk(KERN_WARNING "Unsupported delivery mode %d\n",
+ delivery_mode);
+ break;
+ }
+}
+
+void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
+{
+ u32 mask = 1 << irq;
+ union ioapic_redir_entry entry;
+
+ if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
+ entry = ioapic->redirtbl[irq];
+ if (!level)
+ ioapic->irr &= ~mask;
+ if (entry.fields.trig_mode) { /* level triggered */
+ if (level && !entry.fields.remote_irr) {
+ ioapic->irr |= mask;
+ ioapic_service(ioapic, irq);
+ }
+ } else if (level && !(ioapic->irr & mask)) {
+ /*
+ * edge triggered
+ */
+ ioapic->irr |= mask;
+ ioapic_service(ioapic, irq);
+ }
+ }
+}
+
+static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector)
+{
+ int i;
+
+ for (i = 0; i < IOAPIC_NUM_PINS; i++)
+ if (ioapic->redirtbl[i].fields.vector == vector)
+ return i;
+ return -1;
+}
+
+void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
+{
+ struct kvm_ioapic *ioapic = kvm->vioapic;
+ union ioapic_redir_entry *ent;
+ int gsi;
+
+ gsi = get_eoi_gsi(ioapic, vector);
+ if (gsi == -1) {
+ printk(KERN_WARNING "Can't find redir item for %d EOI\n",
+ vector);
+ return;
+ }
+
+ ent = &ioapic->redirtbl[gsi];
+ ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
+
+ ent->fields.remote_irr = 0;
+ if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
+ ioapic_deliver(ioapic, gsi);
+}
+
+static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
+
+ return ((addr >= ioapic->base_address &&
+ (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
+}
+
+static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
+ void *val)
+{
+ struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
+ u32 result;
+
+ ioapic_debug("addr %lx", (unsigned long)addr);
+ ASSERT(!(addr & 0xf)); /* check alignment */
+
+ addr &= 0xff;
+ switch (addr) {
+ case IOAPIC_REG_SELECT:
+ result = ioapic->ioregsel;
+ break;
+
+ case IOAPIC_REG_WINDOW:
+ result = ioapic_read_indirect(ioapic, addr, len);
+ break;
+
+ default:
+ result = 0;
+ break;
+ }
+ switch (len) {
+ case 8:
+ *(u64 *) val = result;
+ break;
+ case 1:
+ case 2:
+ case 4:
+ memcpy(val, (char *)&result, len);
+ break;
+ default:
+ printk(KERN_WARNING "ioapic: wrong length %d\n", len);
+ }
+}
+
+static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
+ const void *val)
+{
+ struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
+ u32 data;
+
+ ioapic_debug("ioapic_mmio_write addr=%lx len=%d val=%p\n",
+ addr, len, val);
+ ASSERT(!(addr & 0xf)); /* check alignment */
+ if (len == 4 || len == 8)
+ data = *(u32 *) val;
+ else {
+ printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
+ return;
+ }
+
+ addr &= 0xff;
+ switch (addr) {
+ case IOAPIC_REG_SELECT:
+ ioapic->ioregsel = data;
+ break;
+
+ case IOAPIC_REG_WINDOW:
+ ioapic_write_indirect(ioapic, data);
+ break;
+
+ default:
+ break;
+ }
+}
+
+int kvm_ioapic_init(struct kvm *kvm)
+{
+ struct kvm_ioapic *ioapic;
+ int i;
+
+ ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
+ if (!ioapic)
+ return -ENOMEM;
+ kvm->vioapic = ioapic;
+ for (i = 0; i < IOAPIC_NUM_PINS; i++)
+ ioapic->redirtbl[i].fields.mask = 1;
+ ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
+ ioapic->dev.read = ioapic_mmio_read;
+ ioapic->dev.write = ioapic_mmio_write;
+ ioapic->dev.in_range = ioapic_in_range;
+ ioapic->dev.private = ioapic;
+ ioapic->kvm = kvm;
+ kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev);
+ return 0;
+}
diff --git a/drivers/kvm/irq.c b/drivers/kvm/irq.c
index 0b4430a..5265f82 100644
--- a/drivers/kvm/irq.c
+++ b/drivers/kvm/irq.c
@@ -74,7 +74,3 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
}
-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
-{
- /* TODO: for kernel IOAPIC */
-}
diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h
index 24f8b31..a74d2e9 100644
--- a/drivers/kvm/irq.h
+++ b/drivers/kvm/irq.h
@@ -60,6 +60,49 @@ int kvm_pic_read_irq(struct kvm_pic *s);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
+#define IOAPIC_NUM_PINS 24
+#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
+#define IOAPIC_EDGE_TRIG 0
+#define IOAPIC_LEVEL_TRIG 1
+
+#define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000
+#define IOAPIC_MEM_LENGTH 0x100
+
+/* Direct registers. */
+#define IOAPIC_REG_SELECT 0x00
+#define IOAPIC_REG_WINDOW 0x10
+#define IOAPIC_REG_EOI 0x40 /* IA64 IOSAPIC only */
+
+/* Indirect registers. */
+#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */
+#define IOAPIC_REG_VERSION 0x01
+#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */
+
+struct kvm_ioapic {
+ struct kvm_io_device dev;
+ unsigned long base_address;
+ struct kvm *kvm;
+ u32 ioregsel;
+ u32 id;
+ u32 irr;
+ union ioapic_redir_entry {
+ u64 bits;
+ struct {
+ u8 vector;
+ u8 delivery_mode:3;
+ u8 dest_mode:1;
+ u8 delivery_status:1;
+ u8 polarity:1;
+ u8 remote_irr:1;
+ u8 trig_mode:1;
+ u8 mask:1;
+ u8 reserve:7;
+ u8 reserved[4];
+ u8 dest_id;
+ } fields;
+ } redirtbl[IOAPIC_NUM_PINS];
+};
+
struct kvm_lapic {
spinlock_t lock; /* TODO for revise */
u32 status;
@@ -98,8 +141,14 @@ void kvm_free_apic(struct kvm_lapic *apic);
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
+struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector, u32 bitmap);
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector);
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
+int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig);
+int kvm_ioapic_init(struct kvm *kvm);
+void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
#endif
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index a5107b5..9dbdf04 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -451,6 +451,7 @@ struct kvm {
struct kvm_io_bus mmio_bus;
struct kvm_io_bus pio_bus;
struct kvm_pic *vpic;
+ struct kvm_ioapic *vioapic;
};
static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 0d7c3e6..864c251 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -419,6 +419,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_io_bus_destroy(&kvm->pio_bus);
kvm_io_bus_destroy(&kvm->mmio_bus);
kfree(kvm->vpic);
+ kfree(kvm->vioapic);
kvm_free_vcpus(kvm);
kvm_free_physmem(kvm);
kfree(kvm);
@@ -2838,8 +2839,14 @@ static long kvm_vm_ioctl(struct file *filp,
case KVM_CREATE_PIC:
r = -ENOMEM;
kvm->vpic = kvm_create_pic(kvm);
- if (kvm->vpic)
- r = 0;
+ if (kvm->vpic) {
+ r = kvm_ioapic_init(kvm);
+ if (r) {
+ kfree(kvm->vpic);
+ kvm->vpic = NULL;
+ goto out;
+ }
+ }
else
goto out;
break;
@@ -2854,7 +2861,9 @@ static long kvm_vm_ioctl(struct file *filp,
kvm_pic_set_irq(pic_irqchip(kvm),
irq_event.irq,
irq_event.level);
- /* TODO: IOAPIC */
+ kvm_ioapic_set_irq(kvm->vioapic,
+ irq_event.irq,
+ irq_event.level);
r = 0;
}
break;
[-- Attachment #4: Type: text/plain, Size: 286 bytes --]
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
[-- Attachment #5: Type: text/plain, Size: 186 bytes --]
_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: apic+ioapiuc patch
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C72AAE-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-07-18 2:24 ` Dong, Eddie
2007-07-18 2:27 ` Dong, Eddie
@ 2007-07-18 8:36 ` Avi Kivity
2 siblings, 0 replies; 20+ messages in thread
From: Avi Kivity @ 2007-07-18 8:36 UTC (permalink / raw)
To: Dong, Eddie; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Dong, Eddie wrote:
> This patch is to wrap APIC base register and CR8 operation which can
> provide a unique API for user level irqchip and kernel irqchip.
> This is a preparation of merging lapic/ioapic patch.
>
>
Applied and pushed, thanks.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: apic+ioapiuc patch
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C72CD4-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2007-07-18 9:01 ` Avi Kivity
2007-07-18 9:07 ` Avi Kivity
1 sibling, 0 replies; 20+ messages in thread
From: Avi Kivity @ 2007-07-18 9:01 UTC (permalink / raw)
To: Dong, Eddie; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Dong, Eddie wrote:
> Avi:
> This is the cleaned and branch check-in ready candidate patch
> for lapic based on previous cr8 patch.
> It needs the later ioapic patch to fully function.
>
>
Applied. It misses a description and sign-off line. I invented a
description, but please supply a signed-off-by line.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: apic+ioapiuc patch
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C72CE1-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2007-07-18 9:04 ` Avi Kivity
2007-07-18 9:07 ` Avi Kivity
1 sibling, 0 replies; 20+ messages in thread
From: Avi Kivity @ 2007-07-18 9:04 UTC (permalink / raw)
To: Dong, Eddie; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Dong, Eddie wrote:
> Missed attachment. Here it is.
>
Applied & pushed, thanks.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: apic+ioapiuc patch
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C72CD4-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-07-18 9:01 ` Avi Kivity
@ 2007-07-18 9:07 ` Avi Kivity
[not found] ` <469DD843.4040904-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
1 sibling, 1 reply; 20+ messages in thread
From: Avi Kivity @ 2007-07-18 9:07 UTC (permalink / raw)
To: Dong, Eddie; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Dong, Eddie wrote:
> Avi:
> This is the cleaned and branch check-in ready candidate patch
> for lapic based on previous cr8 patch.
> It needs the later ioapic patch to fully function.
>
>
Most of userspace part is missing.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: apic+ioapiuc patch
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C72CE1-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-07-18 9:04 ` Avi Kivity
@ 2007-07-18 9:07 ` Avi Kivity
1 sibling, 0 replies; 20+ messages in thread
From: Avi Kivity @ 2007-07-18 9:07 UTC (permalink / raw)
To: Dong, Eddie; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Dong, Eddie wrote:
> Missed attachment. Here it is.
>
The userspace part doesn't apply, it needs the missing lapic userspace
patch.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: apic+ioapiuc patch
[not found] ` <469DD843.4040904-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-07-18 9:38 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C73018-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
0 siblings, 1 reply; 20+ messages in thread
From: Dong, Eddie @ 2007-07-18 9:38 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Avi Kivity wrote:
> Dong, Eddie wrote:
>> Avi:
>> This is the cleaned and branch check-in ready candidate patch
>> for lapic based on previous cr8 patch.
>> It needs the later ioapic patch to fully function.
>>
>>
>
> Most of userspace part is missing.
>
??? I don't need the user side change for this one since it
is just for compile purpose. Only add the file to KBuild.
thx,eddie
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: apic+ioapiuc patch
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C73018-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2007-07-18 9:43 ` Avi Kivity
[not found] ` <469DE0B1.6020101-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
0 siblings, 1 reply; 20+ messages in thread
From: Avi Kivity @ 2007-07-18 9:43 UTC (permalink / raw)
To: Dong, Eddie; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Dong, Eddie wrote:
> Avi Kivity wrote:
>
>> Dong, Eddie wrote:
>>
>>> Avi:
>>> This is the cleaned and branch check-in ready candidate patch
>>> for lapic based on previous cr8 patch.
>>> It needs the later ioapic patch to fully function.
>>>
>>>
>>>
>> Most of userspace part is missing.
>>
>>
> ??? I don't need the user side change for this one since it
> is just for compile purpose. Only add the file to KBuild.
> thx,eddie
>
Well, the ioapic userspace patch removes some nonexistent code that I
assume the lapic userspace patch should have added.
Please send a new userspace patch series just to avoid confusion.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: apic+ioapiuc patch
[not found] ` <469DE0B1.6020101-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-07-18 9:46 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C7301C-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
0 siblings, 1 reply; 20+ messages in thread
From: Dong, Eddie @ 2007-07-18 9:46 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
[-- Attachment #1: Type: text/plain, Size: 290 bytes --]
> Well, the ioapic userspace patch removes some nonexistent code that I
> assume the lapic userspace patch should have added.
>
O, this is a temp hack added by in kernel PIC. Maybe the branch
mismatch.
Here is the resent one. The sequence is:
lapic
ioapic
hlt
thx,eddie
[-- Attachment #2: pic-hlt5.user.patch --]
[-- Type: application/octet-stream, Size: 664 bytes --]
Remove temp hack introduced by kernel PIC.
With full kernel hlt emulation, this hack
is not necessary.
Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>
diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index 1c7c14e..a5f3492 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -622,8 +622,7 @@ static int kvm_halt(void *opaque, int vcpu)
env = envs[0];
if (!((env->interrupt_request & CPU_INTERRUPT_HARD) &&
(env->eflags & IF_MASK))) {
- /* TODO: for halt emulation, temply walkaround now */
- /* env->hflags |= HF_HALTED_MASK; */
+ env->hflags |= HF_HALTED_MASK;
env->exception_index = EXCP_HLT;
}
[-- Attachment #3: apic-30-4.user.patch --]
[-- Type: application/octet-stream, Size: 673 bytes --]
commit 4d217d13d4a47b2b7796ff152c0560dd68102feb
Author: root <root@vt32-pae.(none)>
Date: Wed Jul 18 10:06:27 2007 +0800
Add kernel lapic for compile
Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>
diff --git a/kernel/Kbuild b/kernel/Kbuild
index 77141a5..ceb6e4b 100644
--- a/kernel/Kbuild
+++ b/kernel/Kbuild
@@ -1,5 +1,5 @@
EXTRA_CFLAGS := -I$(src)/include -include $(src)/external-module-compat.h
obj-m := kvm.o kvm-intel.o kvm-amd.o
-kvm-objs := kvm_main.o mmu.o x86_emulate.o anon_inodes.o irq.o i8259.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o anon_inodes.o irq.o i8259.o lapic.o
kvm-intel-objs := vmx.o vmx-debug.o
kvm-amd-objs := svm.o
[-- Attachment #4: ioapic-30-4.user.patch --]
[-- Type: application/octet-stream, Size: 1746 bytes --]
commit 990a3885946f11df2706de87ef53cfbcd447a32c
Author: root <root@vt32-pae.(none)>
Date: Wed Jul 18 10:12:59 2007 +0800
Enable kernel apic/ioapic support
Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>
diff --git a/kernel/Kbuild b/kernel/Kbuild
index ceb6e4b..a502c02 100644
--- a/kernel/Kbuild
+++ b/kernel/Kbuild
@@ -1,5 +1,5 @@
EXTRA_CFLAGS := -I$(src)/include -include $(src)/external-module-compat.h
obj-m := kvm.o kvm-intel.o kvm-amd.o
-kvm-objs := kvm_main.o mmu.o x86_emulate.o anon_inodes.o irq.o i8259.o lapic.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o anon_inodes.o irq.o i8259.o lapic.o ioapic.o
kvm-intel-objs := vmx.o vmx-debug.o
kvm-amd-objs := svm.o
diff --git a/user/kvmctl.c b/user/kvmctl.c
index d610111..51ab82d 100644
--- a/user/kvmctl.c
+++ b/user/kvmctl.c
@@ -1048,25 +1048,6 @@ int kvm_guest_debug(kvm_context_t kvm, int vcpu, struct kvm_debug_guest *dbg)
return ioctl(kvm->vcpu_fd[vcpu], KVM_DEBUG_GUEST, dbg);
}
-static void cpuid_remove_apic(struct kvm_cpuid *cpuid)
-{
- int i;
- struct kvm_cpuid_entry *e, *entry;
-
- entry = NULL;
- for (i = 0; i < cpuid->nent; ++i) {
- e = &cpuid->entries[i];
- if (e->function == 1) {
- entry = e;
- break;
- }
- }
- if (entry) {
- entry->edx &= ~(1 << 9);
- printf("Guest APIC capibility removed\n");
- }
-}
-
int kvm_setup_cpuid(kvm_context_t kvm, int vcpu, int nent,
struct kvm_cpuid_entry *entries)
{
@@ -1079,8 +1060,6 @@ int kvm_setup_cpuid(kvm_context_t kvm, int vcpu, int nent,
cpuid->nent = nent;
memcpy(cpuid->entries, entries, nent * sizeof(*entries));
- /* temply walkaround before merge of in-kernel APIC */
- cpuid_remove_apic(cpuid);
r = ioctl(kvm->vcpu_fd[vcpu], KVM_SET_CPUID, cpuid);
free(cpuid);
[-- Attachment #5: Type: text/plain, Size: 286 bytes --]
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
[-- Attachment #6: Type: text/plain, Size: 186 bytes --]
_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: apic+ioapiuc patch
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C7301C-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2007-07-18 10:29 ` Avi Kivity
0 siblings, 0 replies; 20+ messages in thread
From: Avi Kivity @ 2007-07-18 10:29 UTC (permalink / raw)
To: Dong, Eddie; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Dong, Eddie wrote:
>> Well, the ioapic userspace patch removes some nonexistent code that I
>> assume the lapic userspace patch should have added.
>>
>>
> O, this is a temp hack added by in kernel PIC. Maybe the branch
> mismatch.
>
> Here is the resent one. The sequence is:
> lapic
> ioapic
> hlt
>
>
All applied and pushed.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] 20+ messages in thread
end of thread, other threads:[~2007-07-18 10:29 UTC | newest]
Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-07-13 9:29 apic+ioapiuc patch Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C2514E-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-07-13 12:51 ` Avi Kivity
[not found] ` <4697754D.7000003-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-07-17 3:37 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C7272B-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-07-17 6:46 ` Dor Laor
[not found] ` <64F9B87B6B770947A9F8391472E032160CC162E8-yEcIvxbTEBqsx+V+t5oei8rau4O3wl8o3fe8/T/H7NteoWH0uzbU5w@public.gmane.org>
2007-07-17 7:36 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C72915-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-07-17 8:04 ` Avi Kivity
2007-07-17 8:03 ` Avi Kivity
[not found] ` <469C77D1.6010003-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-07-17 15:15 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C72AAE-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-07-18 2:24 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C72CD4-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-07-18 9:01 ` Avi Kivity
2007-07-18 9:07 ` Avi Kivity
[not found] ` <469DD843.4040904-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-07-18 9:38 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C73018-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-07-18 9:43 ` Avi Kivity
[not found] ` <469DE0B1.6020101-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-07-18 9:46 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C7301C-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-07-18 10:29 ` Avi Kivity
2007-07-18 2:27 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C72CDD-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-07-18 2:28 ` Dong, Eddie
[not found] ` <10EA09EFD8728347A513008B6B0DA77A01C72CE1-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-07-18 9:04 ` Avi Kivity
2007-07-18 9:07 ` Avi Kivity
2007-07-18 8:36 ` Avi Kivity
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox