* PCI device assignment to guests
@ 2008-06-27 12:55 Amit Shah
2008-06-27 12:55 ` [PATCH 1/4] KVM: Introduce kvm_set_irq to inject interrupts in guests Amit Shah
0 siblings, 1 reply; 15+ messages in thread
From: Amit Shah @ 2008-06-27 12:55 UTC (permalink / raw)
To: kvm; +Cc: muli, benami, allen.m.kay, chrisw, weidong.han, avi,
virtualization
The main change from the patches I sent out earlier this week is support for guests that use the PIC. A callback for PIC irq ack handling is also introduced.
Currently, there's no mechanism to register/unregister callers to the irq ack callbacks, but they can be added when there's more than one user for the functionality.
Please review.
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 1/4] KVM: Introduce kvm_set_irq to inject interrupts in guests
2008-06-27 12:55 PCI device assignment to guests Amit Shah
@ 2008-06-27 12:55 ` Amit Shah
2008-06-27 12:55 ` [PATCH 2/4] KVM: Introduce a callback routine for IOAPIC ack handling Amit Shah
2008-07-05 11:05 ` [PATCH 1/4] KVM: Introduce kvm_set_irq to inject interrupts in guests Avi Kivity
0 siblings, 2 replies; 15+ messages in thread
From: Amit Shah @ 2008-06-27 12:55 UTC (permalink / raw)
To: kvm
Cc: muli, benami, allen.m.kay, chrisw, weidong.han, avi,
virtualization, Amit Shah
This function injects an interrupt into the guest given the kvm struct,
the (guest) irq number and the interrupt level.
Signed-off-by: Amit Shah <amit.shah@qumranet.com>
---
arch/x86/kvm/irq.c | 11 +++++++++++
arch/x86/kvm/irq.h | 2 ++
2 files changed, 13 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 76d736b..0d9e552 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -100,3 +100,14 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
__kvm_migrate_apic_timer(vcpu);
__kvm_migrate_pit_timer(vcpu);
}
+
+/* This should be called with the kvm->lock mutex held */
+void kvm_set_irq(struct kvm *kvm, int irq, int level)
+{
+ /* Not possible to detect if the guest uses the PIC or the
+ * IOAPIC. So set the bit in both. The guest will ignore
+ * writes to the unused one.
+ */
+ kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level);
+ kvm_pic_set_irq(pic_irqchip(kvm), irq, level);
+}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2a15be2..ba4e3bf 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -80,6 +80,8 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
void kvm_pic_reset(struct kvm_kpic_state *s);
+void kvm_set_irq(struct kvm *kvm, int irq, int level);
+
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
--
1.5.4.3
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 2/4] KVM: Introduce a callback routine for IOAPIC ack handling
2008-06-27 12:55 ` [PATCH 1/4] KVM: Introduce kvm_set_irq to inject interrupts in guests Amit Shah
@ 2008-06-27 12:55 ` Amit Shah
2008-06-27 12:55 ` [PATCH 3/4] KVM: Introduce a callback routine for PIC " Amit Shah
2008-07-07 10:08 ` [PATCH 2/4] KVM: Introduce a callback routine for IOAPIC ack handling Avi Kivity
2008-07-05 11:05 ` [PATCH 1/4] KVM: Introduce kvm_set_irq to inject interrupts in guests Avi Kivity
1 sibling, 2 replies; 15+ messages in thread
From: Amit Shah @ 2008-06-27 12:55 UTC (permalink / raw)
To: kvm
Cc: muli, benami, allen.m.kay, chrisw, weidong.han, avi,
virtualization, Amit Shah
This will be useful for acking irqs of assigned devices
Signed-off-by: Amit Shah <amit.shah@qumranet.com>
---
virt/kvm/ioapic.c | 3 +++
virt/kvm/ioapic.h | 1 +
2 files changed, 4 insertions(+), 0 deletions(-)
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 9d02136..6d99a35 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -295,6 +295,9 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi)
ent->fields.remote_irr = 0;
if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
ioapic_deliver(ioapic, gsi);
+
+ if (ioapic->ack_notifier)
+ ioapic->ack_notifier(ioapic->kvm, gsi);
}
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 7f16675..a42743f 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -58,6 +58,7 @@ struct kvm_ioapic {
} redirtbl[IOAPIC_NUM_PINS];
struct kvm_io_device dev;
struct kvm *kvm;
+ void (*ack_notifier)(void *opaque, int irq);
};
#ifdef DEBUG
--
1.5.4.3
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 3/4] KVM: Introduce a callback routine for PIC ack handling
2008-06-27 12:55 ` [PATCH 2/4] KVM: Introduce a callback routine for IOAPIC ack handling Amit Shah
@ 2008-06-27 12:55 ` Amit Shah
2008-06-27 12:55 ` [PATCH 4/4] KVM: Handle device assignment to guests Amit Shah
2008-07-07 10:08 ` [PATCH 2/4] KVM: Introduce a callback routine for IOAPIC ack handling Avi Kivity
1 sibling, 1 reply; 15+ messages in thread
From: Amit Shah @ 2008-06-27 12:55 UTC (permalink / raw)
To: kvm
Cc: muli, benami, allen.m.kay, chrisw, weidong.han, avi,
virtualization, Amit Shah
This is useful for acking irqs of assigned devices
Signed-off-by: Amit Shah <amit.shah@qumranet.com>
---
arch/x86/kvm/i8259.c | 6 +++++-
arch/x86/kvm/irq.c | 2 +-
arch/x86/kvm/irq.h | 3 ++-
3 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 5857f59..3ba5e5c 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -151,9 +151,10 @@ static inline void pic_intack(struct kvm_kpic_state *s, int irq)
s->irr &= ~(1 << irq);
}
-int kvm_pic_read_irq(struct kvm_pic *s)
+int kvm_pic_read_irq(struct kvm *kvm)
{
int irq, irq2, intno;
+ struct kvm_pic *s = pic_irqchip(kvm);
irq = pic_get_irq(&s->pics[0]);
if (irq >= 0) {
@@ -178,6 +179,9 @@ int kvm_pic_read_irq(struct kvm_pic *s)
irq = 7;
intno = s->pics[0].irq_base + irq;
}
+ if (kvm->arch.vpic->ack_notifier)
+ kvm->arch.vpic->ack_notifier(kvm, irq);
+
pic_update_irq(s);
return intno;
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 0d9e552..3529620 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -72,7 +72,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
if (kvm_apic_accept_pic_intr(v)) {
s = pic_irqchip(v->kvm);
s->output = 0; /* PIC */
- vector = kvm_pic_read_irq(s);
+ vector = kvm_pic_read_irq(v->kvm);
}
}
return vector;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index ba4e3bf..bef9127 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -61,11 +61,12 @@ struct kvm_pic {
void *irq_request_opaque;
int output; /* intr from master PIC */
struct kvm_io_device dev;
+ void (*ack_notifier)(void *opaque, int irq);
};
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
void kvm_pic_set_irq(void *opaque, int irq, int level);
-int kvm_pic_read_irq(struct kvm_pic *s);
+int kvm_pic_read_irq(struct kvm *kvm);
void kvm_pic_update_irq(struct kvm_pic *s);
static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
--
1.5.4.3
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 4/4] KVM: Handle device assignment to guests
2008-06-27 12:55 ` [PATCH 3/4] KVM: Introduce a callback routine for PIC " Amit Shah
@ 2008-06-27 12:55 ` Amit Shah
2008-07-02 10:25 ` Amit Shah
0 siblings, 1 reply; 15+ messages in thread
From: Amit Shah @ 2008-06-27 12:55 UTC (permalink / raw)
To: kvm
Cc: muli, benami, allen.m.kay, chrisw, weidong.han, avi,
virtualization, Amit Shah
From: Amit Shah <amit.shah@qumranet.com>
From: Ben-Ami Yassour <benami@il.ibm.com>
From: Han, Weidong <weidong.han@intel.com>
This patch adds support for handling PCI devices that are assigned to the
guest ("PCI passthrough").
The device to be assigned to the guest is registered in the host kernel and
interrupt delivery is handled. If a device is already assigned, or the device
driver for it is still loaded on the host, the device assignment is failed by
conveying a -EBUSY reply to the userspace.
Devices that share their interrupt line are not supported at the moment.
By itself, this patch will not make devices work within the guest. There has
to be some mechanism of translating guest DMA addresses into machine addresses.
This support comes from one of three approaches:
1. If you have recent Intel hardware with VT-d support, you can use the patches
in
git.kernel.org/pub/scm/linux/kernel/git/amit/kvm.git vtd
git.kernel.org/pub/scm/linux/kernel/git/amit/kvm-userspace.git vtd
These patches are for the host kernel.
2. For paravirtualised Linux guests, you can use the patches in
git.kernel.org/pub/scm/linux/kernel/git/amit/kvm.git pvdma
git.kernel.org/pub/scm/linux/kernel/git/amit/kvm-userspace.git pvdma
This kernel tree has patches for host as well as guest kernels.
3. 1-1 mapping of guest in host address space
The patches to do this are available on the kvm / lkml list archives:
http://thread.gmane.org/gmane.comp.emulators.kvm.devel/18722/focus=18753
Signed-off-by: Amit Shah <amit.shah@qumranet.com>
---
arch/x86/kvm/x86.c | 293 ++++++++++++++++++++++++++++++++++++++++++++
include/asm-x86/kvm_host.h | 38 ++++++
include/asm-x86/kvm_para.h | 16 +++-
include/linux/kvm.h | 3 +
virt/kvm/ioapic.c | 12 ++-
5 files changed, 359 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0fbc032..bef3706 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4,10 +4,12 @@
* derived from drivers/kvm/kvm_main.c
*
* Copyright (C) 2006 Qumranet, Inc.
+ * Copyright (C) 2008 Qumranet, Inc.
*
* Authors:
* Avi Kivity <avi@qumranet.com>
* Yaniv Kamay <yaniv@qumranet.com>
+ * Amit Shah <amit.shah@qumranet.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
@@ -21,8 +23,10 @@
#include "tss.h"
#include <linux/clocksource.h>
+#include <linux/interrupt.h>
#include <linux/kvm.h>
#include <linux/fs.h>
+#include <linux/pci.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/mman.h>
@@ -95,6 +99,282 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
+DEFINE_RWLOCK(kvm_pci_pt_lock);
+
+/*
+ * Used to find a registered host PCI device (a "passthrough" device)
+ * during ioctls, interrupts or EOI
+ */
+struct kvm_pci_pt_dev_list *
+kvm_find_pci_pt_dev(struct list_head *head,
+ struct kvm_pci_pt_info *pt_pci_info, int irq, int source)
+{
+ struct list_head *ptr;
+ struct kvm_pci_pt_dev_list *match;
+
+ list_for_each(ptr, head) {
+ match = list_entry(ptr, struct kvm_pci_pt_dev_list, list);
+
+ switch (source) {
+ case KVM_PT_SOURCE_IRQ:
+ /*
+ * Used to find a registered host device
+ * during interrupt context on host
+ */
+ if (match->pt_dev.host.irq == irq)
+ return match;
+ break;
+ case KVM_PT_SOURCE_IRQ_ACK:
+ /*
+ * Used to find a registered host device when
+ * the guest acks an interrupt
+ */
+ if (match->pt_dev.guest.irq == irq)
+ return match;
+ break;
+ case KVM_PT_SOURCE_UPDATE:
+ if ((match->pt_dev.host.busnr == pt_pci_info->busnr) &&
+ (match->pt_dev.host.devfn == pt_pci_info->devfn))
+ return match;
+ break;
+ }
+ }
+ return NULL;
+}
+
+static DECLARE_BITMAP(pt_irq_handled, NR_IRQS);
+
+static void kvm_pci_pt_work_fn(struct work_struct *work)
+{
+ struct kvm_pci_pt_dev_list *match;
+ struct kvm_pci_pt_work *int_work;
+ int source;
+ unsigned long flags;
+ int guest_irq;
+ int host_irq;
+
+ int_work = container_of(work, struct kvm_pci_pt_work, work);
+
+ source = int_work->source ? KVM_PT_SOURCE_IRQ_ACK : KVM_PT_SOURCE_IRQ;
+
+ /* This is taken to safely inject irq inside the guest. When
+ * the interrupt injection (or the ioapic code) uses a
+ * finer-grained lock, update this
+ */
+ mutex_lock(&int_work->kvm->lock);
+ read_lock_irqsave(&kvm_pci_pt_lock, flags);
+ match = kvm_find_pci_pt_dev(&int_work->kvm->arch.pci_pt_dev_head, NULL,
+ int_work->irq, source);
+ if (!match) {
+ printk(KERN_ERR "%s: no matching device assigned to guest "
+ "found for irq %d, source = %d!\n",
+ __func__, int_work->irq, int_work->source);
+ read_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+ goto out;
+ }
+ guest_irq = match->pt_dev.guest.irq;
+ host_irq = match->pt_dev.host.irq;
+ read_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+
+ if (source == KVM_PT_SOURCE_IRQ)
+ kvm_set_irq(int_work->kvm, guest_irq, 1);
+ else {
+ kvm_set_irq(int_work->kvm, int_work->irq, 0);
+ enable_irq(host_irq);
+ }
+out:
+ mutex_unlock(&int_work->kvm->lock);
+ kvm_put_kvm(int_work->kvm);
+}
+
+/* FIXME: Implement the OR logic needed to make shared interrupts on
+ * this line behave properly
+ */
+static irqreturn_t kvm_pci_pt_dev_intr(int irq, void *dev_id)
+{
+ struct kvm *kvm = (struct kvm *) dev_id;
+ struct kvm_pci_pt_dev_list *pci_pt_dev;
+
+ if (!test_bit(irq, pt_irq_handled))
+ return IRQ_NONE;
+
+ read_lock(&kvm_pci_pt_lock);
+ pci_pt_dev = kvm_find_pci_pt_dev(&kvm->arch.pci_pt_dev_head, NULL,
+ irq, KVM_PT_SOURCE_IRQ);
+ if (!pci_pt_dev) {
+ read_unlock(&kvm_pci_pt_lock);
+ return IRQ_NONE;
+ }
+
+ pci_pt_dev->pt_dev.int_work.irq = irq;
+ pci_pt_dev->pt_dev.int_work.kvm = kvm;
+ pci_pt_dev->pt_dev.int_work.source = 0;
+
+ kvm_get_kvm(kvm);
+ schedule_work(&pci_pt_dev->pt_dev.int_work.work);
+ read_unlock(&kvm_pci_pt_lock);
+
+ disable_irq_nosync(irq);
+ return IRQ_HANDLED;
+}
+
+/* Ack the irq line for a passthrough device */
+static void kvm_pci_pt_ack_irq(void *opaque, int irq)
+{
+ struct kvm *kvm = opaque;
+ struct kvm_pci_pt_dev_list *pci_pt_dev;
+ unsigned long flags;
+
+ if (irq == -1)
+ return;
+
+ read_lock_irqsave(&kvm_pci_pt_lock, flags);
+ pci_pt_dev = kvm_find_pci_pt_dev(&kvm->arch.pci_pt_dev_head, NULL, irq,
+ KVM_PT_SOURCE_IRQ_ACK);
+ if (!pci_pt_dev) {
+ read_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+ return;
+ }
+ pci_pt_dev->pt_dev.ack_work.irq = irq;
+ pci_pt_dev->pt_dev.ack_work.kvm = kvm;
+ pci_pt_dev->pt_dev.ack_work.source = 1;
+
+ kvm_get_kvm(kvm);
+ schedule_work(&pci_pt_dev->pt_dev.ack_work.work);
+ read_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+}
+
+static int kvm_vm_ioctl_pci_pt_dev(struct kvm *kvm,
+ struct kvm_pci_passthrough_dev *pci_pt_dev)
+{
+ int r = 0;
+ struct kvm_pci_pt_dev_list *match;
+ unsigned long flags;
+ struct pci_dev *dev;
+
+ write_lock_irqsave(&kvm_pci_pt_lock, flags);
+
+ /* Check if this is a request to update the irq of the device
+ * in the guest (BIOS/ kernels can dynamically reprogram irq
+ * numbers). This also protects us from adding the same
+ * device twice.
+ */
+ match = kvm_find_pci_pt_dev(&kvm->arch.pci_pt_dev_head,
+ &pci_pt_dev->host, 0, KVM_PT_SOURCE_UPDATE);
+ if (match) {
+ match->pt_dev.guest.irq = pci_pt_dev->guest.irq;
+ write_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+ goto out;
+ }
+ write_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+
+ match = kzalloc(sizeof(struct kvm_pci_pt_dev_list), GFP_KERNEL);
+ if (match == NULL) {
+ printk(KERN_INFO "%s: Couldn't allocate memory\n",
+ __func__);
+ r = -ENOMEM;
+ goto out;
+ }
+ dev = pci_get_bus_and_slot(pci_pt_dev->host.busnr,
+ pci_pt_dev->host.devfn);
+ if (!dev) {
+ printk(KERN_INFO "%s: host device not found\n", __func__);
+ r = -EINVAL;
+ goto out_free;
+ }
+ if (pci_enable_device(dev)) {
+ printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
+ r = -EBUSY;
+ goto out_put;
+ }
+ r = pci_request_regions(dev, "kvm_pt_device");
+ if (r) {
+ printk(KERN_INFO "%s: Could not get access to device regions\n",
+ __func__);
+ goto out_put;
+ }
+ match->pt_dev.guest.busnr = pci_pt_dev->guest.busnr;
+ match->pt_dev.guest.devfn = pci_pt_dev->guest.devfn;
+ match->pt_dev.host.busnr = pci_pt_dev->host.busnr;
+ match->pt_dev.host.devfn = pci_pt_dev->host.devfn;
+ match->pt_dev.dev = dev;
+
+ if (irqchip_in_kernel(kvm)) {
+ /* Even though this is PCI, we don't want to use shared
+ * interrupts. Sharing host devices with guest-assigned devices
+ * on the same interrupt line is not a happy situation: there
+ * are going to be long delays in accepting, acking, etc.
+ */
+ if (request_irq(dev->irq, kvm_pci_pt_dev_intr, 0,
+ "kvm_pt_device", (void *)kvm)) {
+ printk(KERN_INFO "%s: couldn't allocate irq for pv "
+ "device\n", __func__);
+ r = -EIO;
+ goto out_put;
+ }
+ match->pt_dev.guest.irq = pci_pt_dev->guest.irq;
+ match->pt_dev.host.irq = dev->irq;
+ if (kvm->arch.vioapic)
+ kvm->arch.vioapic->ack_notifier = kvm_pci_pt_ack_irq;
+ if (kvm->arch.vpic)
+ kvm->arch.vpic->ack_notifier = kvm_pci_pt_ack_irq;
+ }
+ write_lock_irqsave(&kvm_pci_pt_lock, flags);
+
+ INIT_WORK(&match->pt_dev.int_work.work, kvm_pci_pt_work_fn);
+ INIT_WORK(&match->pt_dev.ack_work.work, kvm_pci_pt_work_fn);
+
+ list_add(&match->list, &kvm->arch.pci_pt_dev_head);
+
+ if (irqchip_in_kernel(kvm))
+ set_bit(dev->irq, pt_irq_handled);
+ write_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+out:
+ return r;
+out_put:
+ pci_dev_put(dev);
+out_free:
+ kfree(match);
+ goto out;
+}
+
+static void kvm_free_pci_passthrough(struct kvm *kvm)
+{
+ struct list_head *ptr, *ptr2;
+ struct kvm_pci_pt_dev_list *pci_pt_dev;
+ unsigned long flags;
+
+ write_lock_irqsave(&kvm_pci_pt_lock, flags);
+ list_for_each_safe(ptr, ptr2, &kvm->arch.pci_pt_dev_head) {
+ pci_pt_dev = list_entry(ptr, struct kvm_pci_pt_dev_list, list);
+ if (cancel_work_sync(&pci_pt_dev->pt_dev.int_work.work))
+ /* We had pending work. That means we will have to take
+ * care of kvm_put_kvm.
+ */
+ kvm_put_kvm(kvm);
+
+ if (cancel_work_sync(&pci_pt_dev->pt_dev.ack_work.work))
+ /* We had pending work. That means we will have to take
+ * care of kvm_put_kvm.
+ */
+ kvm_put_kvm(kvm);
+ }
+
+ list_for_each_safe(ptr, ptr2, &kvm->arch.pci_pt_dev_head) {
+ pci_pt_dev = list_entry(ptr, struct kvm_pci_pt_dev_list, list);
+
+ if (irqchip_in_kernel(kvm) && pci_pt_dev->pt_dev.host.irq)
+ free_irq(pci_pt_dev->pt_dev.host.irq, kvm);
+ /* Search for this device got us a refcount */
+ pci_dev_put(pci_pt_dev->pt_dev.dev);
+ pci_release_regions(pci_pt_dev->pt_dev.dev);
+ pci_disable_device(pci_pt_dev->pt_dev.dev);
+
+ list_del(&pci_pt_dev->list);
+ kfree(pci_pt_dev);
+ }
+ write_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+}
unsigned long segment_base(u16 selector)
{
@@ -1743,6 +2023,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
break;
}
+ case KVM_UPDATE_PCI_PT_DEV: {
+ struct kvm_pci_passthrough_dev pci_pt_dev;
+
+ r = -EFAULT;
+ if (copy_from_user(&pci_pt_dev, argp, sizeof pci_pt_dev))
+ goto out;
+ r = kvm_vm_ioctl_pci_pt_dev(kvm, &pci_pt_dev);
+ if (r)
+ goto out;
+ break;
+ }
case KVM_GET_PIT: {
struct kvm_pit_state ps;
r = -EFAULT;
@@ -3938,6 +4229,7 @@ struct kvm *kvm_arch_create_vm(void)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+ INIT_LIST_HEAD(&kvm->arch.pci_pt_dev_head);
return kvm;
}
@@ -3970,6 +4262,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
+ kvm_free_pci_passthrough(kvm);
kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index c64d124..c969120 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -83,6 +83,7 @@
#define KVM_NR_VAR_MTRR 8
extern spinlock_t kvm_lock;
+extern rwlock_t kvm_pci_pt_lock;
extern struct list_head vm_list;
struct kvm_vcpu;
@@ -302,6 +303,38 @@ struct kvm_mem_alias {
gfn_t target_gfn;
};
+/* Some definitions for devices assigned to the guest by the host */
+#define KVM_PT_SOURCE_IRQ 1
+#define KVM_PT_SOURCE_IRQ_ACK 2
+#define KVM_PT_SOURCE_UPDATE 3
+
+/* For assigned devices, we schedule work in the system workqueue to
+ * inject interrupts into the guest when an interrupt occurs on the
+ * physical device and also when the guest acks the interrupt.
+ */
+struct kvm_pci_pt_work {
+ struct work_struct work;
+ struct kvm *kvm;
+ int irq;
+ bool source;
+};
+
+struct kvm_pci_passthrough_dev_kernel {
+ struct kvm_pci_pt_info guest;
+ struct kvm_pci_pt_info host;
+ struct kvm_pci_pt_work int_work;
+ struct kvm_pci_pt_work ack_work;
+ struct pci_dev *dev;
+};
+
+/* This list is to store the guest bus:device:function-irq and host
+ * bus:device:function-irq mapping for assigned devices.
+ */
+struct kvm_pci_pt_dev_list {
+ struct list_head list;
+ struct kvm_pci_passthrough_dev_kernel pt_dev;
+};
+
struct kvm_arch{
int naliases;
struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
@@ -314,6 +347,7 @@ struct kvm_arch{
* Hash table of struct kvm_mmu_page.
*/
struct list_head active_mmu_pages;
+ struct list_head pci_pt_dev_head;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
@@ -560,6 +594,10 @@ void kvm_enable_tdp(void);
int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
int complete_pio(struct kvm_vcpu *vcpu);
+struct kvm_pci_pt_dev_list *
+kvm_find_pci_pt_dev(struct list_head *head,
+ struct kvm_pci_pt_info *pt_pci_info, int irq, int source);
+
static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
{
struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index bfd9900..1bed3f3 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -137,6 +137,20 @@ static inline unsigned int kvm_arch_para_features(void)
return cpuid_eax(KVM_CPUID_FEATURES);
}
-#endif
+#endif /* KERNEL */
+/* Stores information for identifying host PCI devices assigned to the
+ * guest: this is used in the host kernel and in the userspace.
+ */
+struct kvm_pci_pt_info {
+ unsigned char busnr;
+ unsigned int devfn;
+ __u32 irq;
+};
+
+/* Mapping between host and guest PCI device */
+struct kvm_pci_passthrough_dev {
+ struct kvm_pci_pt_info guest;
+ struct kvm_pci_pt_info host;
+};
#endif
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 0ea064c..d700bac 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -371,6 +371,7 @@ struct kvm_trace_rec {
#define KVM_CAP_PV_MMU 13
#define KVM_CAP_MP_STATE 14
#define KVM_CAP_COALESCED_MMIO 15
+#define KVM_CAP_PCI_PASSTHROUGH 16
/*
* ioctls for VM fds
@@ -400,6 +401,8 @@ struct kvm_trace_rec {
_IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone)
#define KVM_UNREGISTER_COALESCED_MMIO \
_IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone)
+#define KVM_UPDATE_PCI_PT_DEV _IOR(KVMIO, 0x69, \
+ struct kvm_pci_passthrough_dev)
/*
* ioctls for vcpu fds
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 6d99a35..c580d59 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -288,13 +288,21 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi)
{
union ioapic_redir_entry *ent;
+ struct kvm_pci_pt_dev_list *match;
+ unsigned long flags;
ent = &ioapic->redirtbl[gsi];
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
ent->fields.remote_irr = 0;
- if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
- ioapic_deliver(ioapic, gsi);
+
+ read_lock_irqsave(&kvm_pci_pt_lock, flags);
+ match = kvm_find_pci_pt_dev(&ioapic->kvm->arch.pci_pt_dev_head, NULL,
+ gsi, KVM_PT_SOURCE_IRQ_ACK);
+ read_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+ if (!match)
+ if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
+ ioapic_deliver(ioapic, gsi);
if (ioapic->ack_notifier)
ioapic->ack_notifier(ioapic->kvm, gsi);
--
1.5.4.3
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH 4/4] KVM: Handle device assignment to guests
2008-06-27 12:55 ` [PATCH 4/4] KVM: Handle device assignment to guests Amit Shah
@ 2008-07-02 10:25 ` Amit Shah
0 siblings, 0 replies; 15+ messages in thread
From: Amit Shah @ 2008-07-02 10:25 UTC (permalink / raw)
To: kvm
Cc: muli, benami, allen.m.kay, chrisw, weidong.han, Avi Kivity,
virtualization
Added locking (kvm->lock) around the initialization of function pointers for
irq structures.
So the new 4/4 patch is:
Subject: [PATCH] KVM: Handle device assignment to guests
From: Amit Shah <amit.shah@qumranet.com>
From: Ben-Ami Yassour <benami@il.ibm.com>
From: Han, Weidong <weidong.han@intel.com>
This patch adds support for handling PCI devices that are assigned to
the guest ("PCI passthrough").
The device to be assigned to the guest is registered in the host kernel
and interrupt delivery is handled. If a device is already assigned, or
the device driver for it is still loaded on the host, the device
assignment is failed by conveying a -EBUSY reply to the userspace.
Devices that share their interrupt line are not supported at the moment.
By itself, this patch will not make devices work within the guest. There
has to be some mechanism of translating guest DMA addresses into machine
addresses. This support comes from one of three approaches:
1. If you have recent Intel hardware with VT-d support, you can use the
patches in
git.kernel.org/pub/scm/linux/kernel/git/amit/kvm.git vtd
git.kernel.org/pub/scm/linux/kernel/git/amit/kvm-userspace.git vtd
These patches are for the host kernel.
2. For paravirtualised Linux guests, you can use the patches in
git.kernel.org/pub/scm/linux/kernel/git/amit/kvm.git pvdma
git.kernel.org/pub/scm/linux/kernel/git/amit/kvm-userspace.git pvdma
This kernel tree has patches for host as well as guest kernels.
3. 1-1 mapping of guest in host address space
The patches to do this are available on the kvm / lkml list archives:
http://thread.gmane.org/gmane.comp.emulators.kvm.devel/18722/focus=18753
Signed-off-by: Amit Shah <amit.shah@qumranet.com>
---
arch/x86/kvm/x86.c | 295
++++++++++++++++++++++++++++++++++++++++++++
include/asm-x86/kvm_host.h | 38 ++++++
include/asm-x86/kvm_para.h | 16 +++-
include/linux/kvm.h | 3 +
virt/kvm/ioapic.c | 12 ++-
5 files changed, 361 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5a83c3b..d4d4e0c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4,10 +4,12 @@
* derived from drivers/kvm/kvm_main.c
*
* Copyright (C) 2006 Qumranet, Inc.
+ * Copyright (C) 2008 Qumranet, Inc.
*
* Authors:
* Avi Kivity <avi@qumranet.com>
* Yaniv Kamay <yaniv@qumranet.com>
+ * Amit Shah <amit.shah@qumranet.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
@@ -22,8 +24,10 @@
#include "kvm_cache_regs.h"
#include <linux/clocksource.h>
+#include <linux/interrupt.h>
#include <linux/kvm.h>
#include <linux/fs.h>
+#include <linux/pci.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/mman.h>
@@ -97,6 +101,284 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
+DEFINE_RWLOCK(kvm_pci_pt_lock);
+
+/*
+ * Used to find a registered host PCI device (a "passthrough" device)
+ * during ioctls, interrupts or EOI
+ */
+struct kvm_pci_pt_dev_list *
+kvm_find_pci_pt_dev(struct list_head *head,
+ struct kvm_pci_pt_info *pt_pci_info, int irq, int source)
+{
+ struct list_head *ptr;
+ struct kvm_pci_pt_dev_list *match;
+
+ list_for_each(ptr, head) {
+ match = list_entry(ptr, struct kvm_pci_pt_dev_list, list);
+
+ switch (source) {
+ case KVM_PT_SOURCE_IRQ:
+ /*
+ * Used to find a registered host device
+ * during interrupt context on host
+ */
+ if (match->pt_dev.host.irq == irq)
+ return match;
+ break;
+ case KVM_PT_SOURCE_IRQ_ACK:
+ /*
+ * Used to find a registered host device when
+ * the guest acks an interrupt
+ */
+ if (match->pt_dev.guest.irq == irq)
+ return match;
+ break;
+ case KVM_PT_SOURCE_UPDATE:
+ if ((match->pt_dev.host.busnr == pt_pci_info->busnr) &&
+ (match->pt_dev.host.devfn == pt_pci_info->devfn))
+ return match;
+ break;
+ }
+ }
+ return NULL;
+}
+
+static DECLARE_BITMAP(pt_irq_handled, NR_IRQS);
+
+static void kvm_pci_pt_work_fn(struct work_struct *work)
+{
+ struct kvm_pci_pt_dev_list *match;
+ struct kvm_pci_pt_work *int_work;
+ int source;
+ unsigned long flags;
+ int guest_irq;
+ int host_irq;
+
+ int_work = container_of(work, struct kvm_pci_pt_work, work);
+
+ source = int_work->source ? KVM_PT_SOURCE_IRQ_ACK : KVM_PT_SOURCE_IRQ;
+
+ /* This is taken to safely inject irq inside the guest. When
+ * the interrupt injection (or the ioapic code) uses a
+ * finer-grained lock, update this
+ */
+ mutex_lock(&int_work->kvm->lock);
+ read_lock_irqsave(&kvm_pci_pt_lock, flags);
+ match = kvm_find_pci_pt_dev(&int_work->kvm->arch.pci_pt_dev_head, NULL,
+ int_work->irq, source);
+ if (!match) {
+ printk(KERN_ERR "%s: no matching device assigned to guest "
+ "found for irq %d, source = %d!\n",
+ __func__, int_work->irq, int_work->source);
+ read_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+ goto out;
+ }
+ guest_irq = match->pt_dev.guest.irq;
+ host_irq = match->pt_dev.host.irq;
+ read_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+
+ if (source == KVM_PT_SOURCE_IRQ)
+ kvm_set_irq(int_work->kvm, guest_irq, 1);
+ else {
+ kvm_set_irq(int_work->kvm, int_work->irq, 0);
+ enable_irq(host_irq);
+ }
+out:
+ mutex_unlock(&int_work->kvm->lock);
+ kvm_put_kvm(int_work->kvm);
+}
+
+/* FIXME: Implement the OR logic needed to make shared interrupts on
+ * this line behave properly
+ */
+static irqreturn_t kvm_pci_pt_dev_intr(int irq, void *dev_id)
+{
+ struct kvm *kvm = (struct kvm *) dev_id;
+ struct kvm_pci_pt_dev_list *pci_pt_dev;
+
+ if (!test_bit(irq, pt_irq_handled))
+ return IRQ_NONE;
+
+ read_lock(&kvm_pci_pt_lock);
+ pci_pt_dev = kvm_find_pci_pt_dev(&kvm->arch.pci_pt_dev_head, NULL,
+ irq, KVM_PT_SOURCE_IRQ);
+ if (!pci_pt_dev) {
+ read_unlock(&kvm_pci_pt_lock);
+ return IRQ_NONE;
+ }
+
+ pci_pt_dev->pt_dev.int_work.irq = irq;
+ pci_pt_dev->pt_dev.int_work.kvm = kvm;
+ pci_pt_dev->pt_dev.int_work.source = 0;
+
+ kvm_get_kvm(kvm);
+ schedule_work(&pci_pt_dev->pt_dev.int_work.work);
+ read_unlock(&kvm_pci_pt_lock);
+
+ disable_irq_nosync(irq);
+ return IRQ_HANDLED;
+}
+
+/* Ack the irq line for a passthrough device */
+static void kvm_pci_pt_ack_irq(void *opaque, int irq)
+{
+ struct kvm *kvm = opaque;
+ struct kvm_pci_pt_dev_list *pci_pt_dev;
+ unsigned long flags;
+
+ if (irq == -1)
+ return;
+
+ read_lock_irqsave(&kvm_pci_pt_lock, flags);
+ pci_pt_dev = kvm_find_pci_pt_dev(&kvm->arch.pci_pt_dev_head, NULL, irq,
+ KVM_PT_SOURCE_IRQ_ACK);
+ if (!pci_pt_dev) {
+ read_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+ return;
+ }
+ pci_pt_dev->pt_dev.ack_work.irq = irq;
+ pci_pt_dev->pt_dev.ack_work.kvm = kvm;
+ pci_pt_dev->pt_dev.ack_work.source = 1;
+
+ kvm_get_kvm(kvm);
+ schedule_work(&pci_pt_dev->pt_dev.ack_work.work);
+ read_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+}
+
+static int kvm_vm_ioctl_pci_pt_dev(struct kvm *kvm,
+ struct kvm_pci_passthrough_dev *pci_pt_dev)
+{
+ int r = 0;
+ struct kvm_pci_pt_dev_list *match;
+ unsigned long flags;
+ struct pci_dev *dev;
+
+ write_lock_irqsave(&kvm_pci_pt_lock, flags);
+
+ /* Check if this is a request to update the irq of the device
+ * in the guest (BIOS/ kernels can dynamically reprogram irq
+ * numbers). This also protects us from adding the same
+ * device twice.
+ */
+ match = kvm_find_pci_pt_dev(&kvm->arch.pci_pt_dev_head,
+ &pci_pt_dev->host, 0, KVM_PT_SOURCE_UPDATE);
+ if (match) {
+ match->pt_dev.guest.irq = pci_pt_dev->guest.irq;
+ write_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+ goto out;
+ }
+ write_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+
+ match = kzalloc(sizeof(struct kvm_pci_pt_dev_list), GFP_KERNEL);
+ if (match == NULL) {
+ printk(KERN_INFO "%s: Couldn't allocate memory\n",
+ __func__);
+ r = -ENOMEM;
+ goto out;
+ }
+ dev = pci_get_bus_and_slot(pci_pt_dev->host.busnr,
+ pci_pt_dev->host.devfn);
+ if (!dev) {
+ printk(KERN_INFO "%s: host device not found\n", __func__);
+ r = -EINVAL;
+ goto out_free;
+ }
+ if (pci_enable_device(dev)) {
+ printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
+ r = -EBUSY;
+ goto out_put;
+ }
+ r = pci_request_regions(dev, "kvm_pt_device");
+ if (r) {
+ printk(KERN_INFO "%s: Could not get access to device regions\n",
+ __func__);
+ goto out_put;
+ }
+ match->pt_dev.guest.busnr = pci_pt_dev->guest.busnr;
+ match->pt_dev.guest.devfn = pci_pt_dev->guest.devfn;
+ match->pt_dev.host.busnr = pci_pt_dev->host.busnr;
+ match->pt_dev.host.devfn = pci_pt_dev->host.devfn;
+ match->pt_dev.dev = dev;
+
+ if (irqchip_in_kernel(kvm)) {
+ /* Even though this is PCI, we don't want to use shared
+ * interrupts. Sharing host devices with guest-assigned devices
+ * on the same interrupt line is not a happy situation: there
+ * are going to be long delays in accepting, acking, etc.
+ */
+ if (request_irq(dev->irq, kvm_pci_pt_dev_intr, 0,
+ "kvm_pt_device", (void *)kvm)) {
+ printk(KERN_INFO "%s: couldn't allocate irq for pv "
+ "device\n", __func__);
+ r = -EIO;
+ goto out_put;
+ }
+ match->pt_dev.guest.irq = pci_pt_dev->guest.irq;
+ match->pt_dev.host.irq = dev->irq;
+ mutex_lock(&kvm->lock);
+ if (kvm->arch.vioapic)
+ kvm->arch.vioapic->ack_notifier = kvm_pci_pt_ack_irq;
+ if (kvm->arch.vpic)
+ kvm->arch.vpic->ack_notifier = kvm_pci_pt_ack_irq;
+ mutex_unlock(&kvm->lock);
+ }
+ write_lock_irqsave(&kvm_pci_pt_lock, flags);
+
+ INIT_WORK(&match->pt_dev.int_work.work, kvm_pci_pt_work_fn);
+ INIT_WORK(&match->pt_dev.ack_work.work, kvm_pci_pt_work_fn);
+
+ list_add(&match->list, &kvm->arch.pci_pt_dev_head);
+
+ if (irqchip_in_kernel(kvm))
+ set_bit(dev->irq, pt_irq_handled);
+ write_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+out:
+ return r;
+out_put:
+ pci_dev_put(dev);
+out_free:
+ kfree(match);
+ goto out;
+}
+
+static void kvm_free_pci_passthrough(struct kvm *kvm)
+{
+ struct list_head *ptr, *ptr2;
+ struct kvm_pci_pt_dev_list *pci_pt_dev;
+ unsigned long flags;
+
+ write_lock_irqsave(&kvm_pci_pt_lock, flags);
+ list_for_each_safe(ptr, ptr2, &kvm->arch.pci_pt_dev_head) {
+ pci_pt_dev = list_entry(ptr, struct kvm_pci_pt_dev_list, list);
+ if (cancel_work_sync(&pci_pt_dev->pt_dev.int_work.work))
+ /* We had pending work. That means we will have to take
+ * care of kvm_put_kvm.
+ */
+ kvm_put_kvm(kvm);
+
+ if (cancel_work_sync(&pci_pt_dev->pt_dev.ack_work.work))
+ /* We had pending work. That means we will have to take
+ * care of kvm_put_kvm.
+ */
+ kvm_put_kvm(kvm);
+ }
+
+ list_for_each_safe(ptr, ptr2, &kvm->arch.pci_pt_dev_head) {
+ pci_pt_dev = list_entry(ptr, struct kvm_pci_pt_dev_list, list);
+
+ if (irqchip_in_kernel(kvm) && pci_pt_dev->pt_dev.host.irq)
+ free_irq(pci_pt_dev->pt_dev.host.irq, kvm);
+ /* Search for this device got us a refcount */
+ pci_dev_put(pci_pt_dev->pt_dev.dev);
+ pci_release_regions(pci_pt_dev->pt_dev.dev);
+ pci_disable_device(pci_pt_dev->pt_dev.dev);
+
+ list_del(&pci_pt_dev->list);
+ kfree(pci_pt_dev);
+ }
+ write_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+}
unsigned long segment_base(u16 selector)
{
@@ -1745,6 +2027,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
break;
}
+ case KVM_UPDATE_PCI_PT_DEV: {
+ struct kvm_pci_passthrough_dev pci_pt_dev;
+
+ r = -EFAULT;
+ if (copy_from_user(&pci_pt_dev, argp, sizeof pci_pt_dev))
+ goto out;
+ r = kvm_vm_ioctl_pci_pt_dev(kvm, &pci_pt_dev);
+ if (r)
+ goto out;
+ break;
+ }
case KVM_GET_PIT: {
struct kvm_pit_state ps;
r = -EFAULT;
@@ -3946,6 +4239,7 @@ struct kvm *kvm_arch_create_vm(void)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+ INIT_LIST_HEAD(&kvm->arch.pci_pt_dev_head);
return kvm;
}
@@ -3978,6 +4272,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
+ kvm_free_pci_passthrough(kvm);
kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index bae1b76..0c6699f 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -83,6 +83,7 @@
#define KVM_NR_VAR_MTRR 8
extern spinlock_t kvm_lock;
+extern rwlock_t kvm_pci_pt_lock;
extern struct list_head vm_list;
struct kvm_vcpu;
@@ -308,6 +309,38 @@ struct kvm_mem_alias {
gfn_t target_gfn;
};
+/* Some definitions for devices assigned to the guest by the host */
+#define KVM_PT_SOURCE_IRQ 1
+#define KVM_PT_SOURCE_IRQ_ACK 2
+#define KVM_PT_SOURCE_UPDATE 3
+
+/* For assigned devices, we schedule work in the system workqueue to
+ * inject interrupts into the guest when an interrupt occurs on the
+ * physical device and also when the guest acks the interrupt.
+ */
+struct kvm_pci_pt_work {
+ struct work_struct work;
+ struct kvm *kvm;
+ int irq;
+ bool source;
+};
+
+struct kvm_pci_passthrough_dev_kernel {
+ struct kvm_pci_pt_info guest;
+ struct kvm_pci_pt_info host;
+ struct kvm_pci_pt_work int_work;
+ struct kvm_pci_pt_work ack_work;
+ struct pci_dev *dev;
+};
+
+/* This list is to store the guest bus:device:function-irq and host
+ * bus:device:function-irq mapping for assigned devices.
+ */
+struct kvm_pci_pt_dev_list {
+ struct list_head list;
+ struct kvm_pci_passthrough_dev_kernel pt_dev;
+};
+
struct kvm_arch{
int naliases;
struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
@@ -320,6 +353,7 @@ struct kvm_arch{
* Hash table of struct kvm_mmu_page.
*/
struct list_head active_mmu_pages;
+ struct list_head pci_pt_dev_head;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
@@ -565,6 +599,10 @@ void kvm_enable_tdp(void);
int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
int complete_pio(struct kvm_vcpu *vcpu);
+struct kvm_pci_pt_dev_list *
+kvm_find_pci_pt_dev(struct list_head *head,
+ struct kvm_pci_pt_info *pt_pci_info, int irq, int source);
+
static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
{
struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index bfd9900..1bed3f3 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -137,6 +137,20 @@ static inline unsigned int kvm_arch_para_features(void)
return cpuid_eax(KVM_CPUID_FEATURES);
}
-#endif
+#endif /* KERNEL */
+/* Stores information for identifying host PCI devices assigned to the
+ * guest: this is used in the host kernel and in the userspace.
+ */
+struct kvm_pci_pt_info {
+ unsigned char busnr;
+ unsigned int devfn;
+ __u32 irq;
+};
+
+/* Mapping between host and guest PCI device */
+struct kvm_pci_passthrough_dev {
+ struct kvm_pci_pt_info guest;
+ struct kvm_pci_pt_info host;
+};
#endif
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 0ea064c..d700bac 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -371,6 +371,7 @@ struct kvm_trace_rec {
#define KVM_CAP_PV_MMU 13
#define KVM_CAP_MP_STATE 14
#define KVM_CAP_COALESCED_MMIO 15
+#define KVM_CAP_PCI_PASSTHROUGH 16
/*
* ioctls for VM fds
@@ -400,6 +401,8 @@ struct kvm_trace_rec {
_IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone)
#define KVM_UNREGISTER_COALESCED_MMIO \
_IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone)
+#define KVM_UPDATE_PCI_PT_DEV _IOR(KVMIO, 0x69, \
+ struct kvm_pci_passthrough_dev)
/*
* ioctls for vcpu fds
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 6d99a35..c580d59 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -288,13 +288,21 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int
irq, int level)
static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi)
{
union ioapic_redir_entry *ent;
+ struct kvm_pci_pt_dev_list *match;
+ unsigned long flags;
ent = &ioapic->redirtbl[gsi];
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
ent->fields.remote_irr = 0;
- if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
- ioapic_deliver(ioapic, gsi);
+
+ read_lock_irqsave(&kvm_pci_pt_lock, flags);
+ match = kvm_find_pci_pt_dev(&ioapic->kvm->arch.pci_pt_dev_head, NULL,
+ gsi, KVM_PT_SOURCE_IRQ_ACK);
+ read_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+ if (!match)
+ if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
+ ioapic_deliver(ioapic, gsi);
if (ioapic->ack_notifier)
ioapic->ack_notifier(ioapic->kvm, gsi);
--
1.5.4.3
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH 1/4] KVM: Introduce kvm_set_irq to inject interrupts in guests
2008-06-27 12:55 ` [PATCH 1/4] KVM: Introduce kvm_set_irq to inject interrupts in guests Amit Shah
2008-06-27 12:55 ` [PATCH 2/4] KVM: Introduce a callback routine for IOAPIC ack handling Amit Shah
@ 2008-07-05 11:05 ` Avi Kivity
2008-07-06 13:50 ` Ben-Ami Yassour
1 sibling, 1 reply; 15+ messages in thread
From: Avi Kivity @ 2008-07-05 11:05 UTC (permalink / raw)
To: Amit Shah
Cc: kvm, muli, benami, allen.m.kay, chrisw, weidong.han,
virtualization
Amit Shah wrote:
> This function injects an interrupt into the guest given the kvm struct,
> the (guest) irq number and the interrupt level.
>
> Signed-off-by: Amit Shah <amit.shah@qumranet.com>
> ---
> arch/x86/kvm/irq.c | 11 +++++++++++
> arch/x86/kvm/irq.h | 2 ++
> 2 files changed, 13 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> index 76d736b..0d9e552 100644
> --- a/arch/x86/kvm/irq.c
> +++ b/arch/x86/kvm/irq.c
> @@ -100,3 +100,14 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
> __kvm_migrate_apic_timer(vcpu);
> __kvm_migrate_pit_timer(vcpu);
> }
> +
> +/* This should be called with the kvm->lock mutex held */
> +void kvm_set_irq(struct kvm *kvm, int irq, int level)
> +{
> + /* Not possible to detect if the guest uses the PIC or the
> + * IOAPIC. So set the bit in both. The guest will ignore
> + * writes to the unused one.
> + */
> + kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level);
> + kvm_pic_set_irq(pic_irqchip(kvm), irq, level);
> +}
>
pic supports only irqs 0-15.
I guess instead of adding the check here, better move it into i8259.c
and ioapic.c.
--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 1/4] KVM: Introduce kvm_set_irq to inject interrupts in guests
2008-07-05 11:05 ` [PATCH 1/4] KVM: Introduce kvm_set_irq to inject interrupts in guests Avi Kivity
@ 2008-07-06 13:50 ` Ben-Ami Yassour
2008-07-06 13:59 ` Avi Kivity
0 siblings, 1 reply; 15+ messages in thread
From: Ben-Ami Yassour @ 2008-07-06 13:50 UTC (permalink / raw)
To: Avi Kivity
Cc: Amit Shah, kvm, Muli Ben-Yehuda, allen.m.kay, chrisw, weidong.han,
virtualization
On Sat, 2008-07-05 at 14:05 +0300, Avi Kivity wrote:
> Amit Shah wrote:
> > This function injects an interrupt into the guest given the kvm struct,
> > the (guest) irq number and the interrupt level.
> >
> > Signed-off-by: Amit Shah <amit.shah@qumranet.com>
> > ---
> > arch/x86/kvm/irq.c | 11 +++++++++++
> > arch/x86/kvm/irq.h | 2 ++
> > 2 files changed, 13 insertions(+), 0 deletions(-)
> >
> > diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> > index 76d736b..0d9e552 100644
> > --- a/arch/x86/kvm/irq.c
> > +++ b/arch/x86/kvm/irq.c
> > @@ -100,3 +100,14 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
> > __kvm_migrate_apic_timer(vcpu);
> > __kvm_migrate_pit_timer(vcpu);
> > }
> > +
> > +/* This should be called with the kvm->lock mutex held */
> > +void kvm_set_irq(struct kvm *kvm, int irq, int level)
> > +{
> > + /* Not possible to detect if the guest uses the PIC or the
> > + * IOAPIC. So set the bit in both. The guest will ignore
> > + * writes to the unused one.
> > + */
> > + kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level);
> > + kvm_pic_set_irq(pic_irqchip(kvm), irq, level);
> > +}
> >
>
> pic supports only irqs 0-15.
>
> I guess instead of adding the check here, better move it into i8259.c
> and ioapic.c.
>
>
Do you mean something like the patch below?
( in the ioapic code there is already a check for the irq: if (irq >= 0 && irq < IOAPIC_NUM_PINS) )
Thanks,
Ben
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 3ba5e5c..c2c4884 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -130,8 +130,10 @@ void kvm_pic_set_irq(void *opaque, int irq, int level)
{
struct kvm_pic *s = opaque;
- pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
- pic_update_irq(s);
+ if (irq >= 0 && irq < PIC_NUM_PINS) {
+ pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
+ pic_update_irq(s);
+ }
}
/*
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index bef9127..e9c7b3f 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -30,6 +30,8 @@
#include "ioapic.h"
#include "lapic.h"
+#define PIC_NUM_PINS 15
+
struct kvm;
struct kvm_vcpu;
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH 1/4] KVM: Introduce kvm_set_irq to inject interrupts in guests
2008-07-06 13:50 ` Ben-Ami Yassour
@ 2008-07-06 13:59 ` Avi Kivity
0 siblings, 0 replies; 15+ messages in thread
From: Avi Kivity @ 2008-07-06 13:59 UTC (permalink / raw)
To: Ben-Ami Yassour
Cc: Amit Shah, kvm, Muli Ben-Yehuda, allen.m.kay, chrisw, weidong.han,
virtualization
Ben-Ami Yassour wrote:
> On Sat, 2008-07-05 at 14:05 +0300, Avi Kivity wrote:
>
>> Amit Shah wrote:
>>
>>> This function injects an interrupt into the guest given the kvm struct,
>>> the (guest) irq number and the interrupt level.
>>>
>>> Signed-off-by: Amit Shah <amit.shah@qumranet.com>
>>> ---
>>> arch/x86/kvm/irq.c | 11 +++++++++++
>>> arch/x86/kvm/irq.h | 2 ++
>>> 2 files changed, 13 insertions(+), 0 deletions(-)
>>>
>>> diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
>>> index 76d736b..0d9e552 100644
>>> --- a/arch/x86/kvm/irq.c
>>> +++ b/arch/x86/kvm/irq.c
>>> @@ -100,3 +100,14 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
>>> __kvm_migrate_apic_timer(vcpu);
>>> __kvm_migrate_pit_timer(vcpu);
>>> }
>>> +
>>> +/* This should be called with the kvm->lock mutex held */
>>> +void kvm_set_irq(struct kvm *kvm, int irq, int level)
>>> +{
>>> + /* Not possible to detect if the guest uses the PIC or the
>>> + * IOAPIC. So set the bit in both. The guest will ignore
>>> + * writes to the unused one.
>>> + */
>>> + kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level);
>>> + kvm_pic_set_irq(pic_irqchip(kvm), irq, level);
>>> +}
>>>
>>>
>> pic supports only irqs 0-15.
>>
>> I guess instead of adding the check here, better move it into i8259.c
>> and ioapic.c.
>>
>>
>>
>
> Do you mean something like the patch below?
> ( in the ioapic code there is already a check for the irq: if (irq >= 0 && irq < IOAPIC_NUM_PINS) )
>
> Thanks,
> Ben
>
> diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
> index 3ba5e5c..c2c4884 100644
> --- a/arch/x86/kvm/i8259.c
> +++ b/arch/x86/kvm/i8259.c
> @@ -130,8 +130,10 @@ void kvm_pic_set_irq(void *opaque, int irq, int level)
> {
> struct kvm_pic *s = opaque;
>
> - pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
> - pic_update_irq(s);
> + if (irq >= 0 && irq < PIC_NUM_PINS) {
> + pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
> + pic_update_irq(s);
> + }
> }
>
> /*
> diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
> index bef9127..e9c7b3f 100644
> --- a/arch/x86/kvm/irq.h
> +++ b/arch/x86/kvm/irq.h
> @@ -30,6 +30,8 @@
> #include "ioapic.h"
> #include "lapic.h"
>
> +#define PIC_NUM_PINS 15
> +
>
16
Also, needs signoff and changelog entry.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 2/4] KVM: Introduce a callback routine for IOAPIC ack handling
2008-06-27 12:55 ` [PATCH 2/4] KVM: Introduce a callback routine for IOAPIC ack handling Amit Shah
2008-06-27 12:55 ` [PATCH 3/4] KVM: Introduce a callback routine for PIC " Amit Shah
@ 2008-07-07 10:08 ` Avi Kivity
2008-07-08 10:34 ` Ben-Ami Yassour
1 sibling, 1 reply; 15+ messages in thread
From: Avi Kivity @ 2008-07-07 10:08 UTC (permalink / raw)
To: Amit Shah
Cc: kvm, muli, benami, allen.m.kay, chrisw, weidong.han,
virtualization
Amit Shah wrote:
> This will be useful for acking irqs of assigned devices
>
>
And also for improving time drift tracking.
Please make this more generic by having a list of callbacks. There
could also be just one list, rather than one for the ioapic and one for
the pic as implemented now.
It may also make sense to filter the irq number before calling the
callback rather than relying on the callback to ignore uninteresting irqs.
> Signed-off-by: Amit Shah <amit.shah@qumranet.com>
> ---
> virt/kvm/ioapic.c | 3 +++
> virt/kvm/ioapic.h | 1 +
> 2 files changed, 4 insertions(+), 0 deletions(-)
>
> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> index 9d02136..6d99a35 100644
> --- a/virt/kvm/ioapic.c
> +++ b/virt/kvm/ioapic.c
> @@ -295,6 +295,9 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi)
> ent->fields.remote_irr = 0;
> if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
> ioapic_deliver(ioapic, gsi);
> +
> + if (ioapic->ack_notifier)
> + ioapic->ack_notifier(ioapic->kvm, gsi);
> }
>
> void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
> diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
> index 7f16675..a42743f 100644
> --- a/virt/kvm/ioapic.h
> +++ b/virt/kvm/ioapic.h
> @@ -58,6 +58,7 @@ struct kvm_ioapic {
> } redirtbl[IOAPIC_NUM_PINS];
> struct kvm_io_device dev;
> struct kvm *kvm;
> + void (*ack_notifier)(void *opaque, int irq);
> };
>
> #ifdef DEBUG
>
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 2/4] KVM: Introduce a callback routine for IOAPIC ack handling
2008-07-07 10:08 ` [PATCH 2/4] KVM: Introduce a callback routine for IOAPIC ack handling Avi Kivity
@ 2008-07-08 10:34 ` Ben-Ami Yassour
2008-07-10 13:57 ` Avi Kivity
0 siblings, 1 reply; 15+ messages in thread
From: Ben-Ami Yassour @ 2008-07-08 10:34 UTC (permalink / raw)
To: Avi Kivity
Cc: Amit Shah, kvm, Muli Ben-Yehuda, allen.m.kay, chrisw, weidong.han,
virtualization
On Mon, 2008-07-07 at 13:08 +0300, Avi Kivity wrote:
> Amit Shah wrote:
> > This will be useful for acking irqs of assigned devices
> >
> >
>
> And also for improving time drift tracking.
>
> Please make this more generic by having a list of callbacks. There
> could also be just one list, rather than one for the ioapic and one for
> the pic as implemented now.
>
> It may also make sense to filter the irq number before calling the
> callback rather than relying on the callback to ignore uninteresting irqs.
>
Avi,
Did you mean something like the patch below?
How should we protect list accesses, should it be a new lock or an existing one?
Regards,
Ben
From 102013bdd75e8141027e461ae5834138f561a3c3 Mon Sep 17 00:00:00 2001
From: Ben-Ami Yassour <benami@il.ibm.com>
Date: Tue, 8 Jul 2008 13:30:01 +0300
Subject: [PATCH] KVM: interrupt ack notifier list
API to get notification when a guest acks an interrupt.
Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
---
arch/x86/kvm/i8259.c | 4 ++-
arch/x86/kvm/irq.c | 2 +-
arch/x86/kvm/irq.h | 2 +-
arch/x86/kvm/x86.c | 84 ++++++++++++++++++++++++++++++++++++++++++++
include/asm-x86/kvm_host.h | 10 +++++
5 files changed, 99 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 5857f59..9160343 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -151,9 +151,10 @@ static inline void pic_intack(struct kvm_kpic_state *s, int irq)
s->irr &= ~(1 << irq);
}
-int kvm_pic_read_irq(struct kvm_pic *s)
+int kvm_pic_read_irq(struct kvm *kvm)
{
int irq, irq2, intno;
+ struct kvm_pic *s = pic_irqchip(kvm);
irq = pic_get_irq(&s->pics[0]);
if (irq >= 0) {
@@ -178,6 +179,7 @@ int kvm_pic_read_irq(struct kvm_pic *s)
irq = 7;
intno = s->pics[0].irq_base + irq;
}
+ notify_interrupt_ack(kvm, irq);
pic_update_irq(s);
return intno;
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 76d736b..cf29c02 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -72,7 +72,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
if (kvm_apic_accept_pic_intr(v)) {
s = pic_irqchip(v->kvm);
s->output = 0; /* PIC */
- vector = kvm_pic_read_irq(s);
+ vector = kvm_pic_read_irq(v->kvm);
}
}
return vector;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2a15be2..7657654 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -65,7 +65,7 @@ struct kvm_pic {
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
void kvm_pic_set_irq(void *opaque, int irq, int level);
-int kvm_pic_read_irq(struct kvm_pic *s);
+int kvm_pic_read_irq(struct kvm *kvm);
void kvm_pic_update_irq(struct kvm_pic *s);
static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5a83c3b..6d33f00 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -199,6 +199,88 @@ static void __queue_exception(struct kvm_vcpu *vcpu)
vcpu->arch.exception.error_code);
}
+void notify_interrupt_ack(struct kvm *kvm, int irq)
+{
+ struct list_head *ptr;
+ struct interrupt_ack_notifier *notifier;
+
+ list_for_each(ptr, &kvm->arch.interrupt_ack_notifier) {
+ notifier = list_entry(ptr, struct interrupt_ack_notifier, list);
+ if (irq == notifier->irq)
+ notifier->callback(notifier->opaque, irq);
+ }
+}
+
+static int
+register_interrupt_ack_notifier(struct kvm *kvm,
+ int irq,
+ void (*callback)(void *opaque, int irq),
+ void *opaque)
+{
+ struct interrupt_ack_notifier *notifier;
+
+ notifier = kzalloc(sizeof(struct interrupt_ack_notifier), GFP_KERNEL);
+ if (notifier == NULL) {
+ printk(KERN_INFO "%s: Couldn't allocate memory\n", __func__);
+ return -ENOMEM;
+ }
+
+ notifier->callback = callback;
+ notifier->opaque = opaque;
+ notifier->irq = irq;
+ list_add(¬ifier->list, &kvm->arch.interrupt_ack_notifier);
+
+ return 0;
+}
+
+static struct interrupt_ack_notifier *
+find_interrupt_ack_notifier(struct kvm *kvm,
+ int irq,
+ void (*callback)(void *opaque, int irq),
+ void *opaque)
+{
+ struct list_head *ptr;
+ struct interrupt_ack_notifier *notifier;
+
+ list_for_each(ptr, &kvm->arch.interrupt_ack_notifier) {
+ notifier = list_entry(ptr, struct interrupt_ack_notifier, list);
+ if ((irq == notifier->irq) &&
+ (callback == notifier->callback) &&
+ (opaque == notifier->opaque)) {
+ return notifier;
+ }
+ }
+ return NULL;
+}
+
+static void
+unregister_interrupt_ack_notifier(struct kvm *kvm,
+ int irq,
+ void (*callback)(void *opaque, int irq),
+ void *opaque)
+{
+ struct interrupt_ack_notifier *notifier;
+
+ notifier = find_interrupt_ack_notifier(kvm, irq, callback, opaque);
+ if (notifier) {
+ list_del(¬ifier->list);
+ kfree(notifier);
+ }
+}
+
+static void free_interrupt_ack_notifier_list(struct kvm *kvm)
+{
+ struct list_head *ptr, *ptr2;
+ struct interrupt_ack_notifier *notifier;
+
+ list_for_each_safe(ptr, ptr2, &kvm->arch.interrupt_ack_notifier) {
+ notifier = list_entry(ptr,
+ struct interrupt_ack_notifier, list);
+ list_del(¬ifier->list);
+ kfree(notifier);
+ }
+}
+
/*
* Load the pae pdptrs. Return true is they are all valid.
*/
@@ -3946,6 +4028,7 @@ struct kvm *kvm_arch_create_vm(void)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+ INIT_LIST_HEAD(&kvm->arch.interrupt_ack_notifier);
return kvm;
}
@@ -3981,6 +4064,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
+ free_interrupt_ack_notifier_list(kvm);
kvm_free_vcpus(kvm);
kvm_free_physmem(kvm);
if (kvm->arch.apic_access_page)
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 9391e57..fe35b15 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -302,6 +302,13 @@ struct kvm_vcpu_arch {
u64 mtrr[0x100];
};
+struct interrupt_ack_notifier {
+ struct list_head list;
+ int irq;
+ void (*callback)(void *opaque, int irq);
+ void *opaque;
+};
+
struct kvm_mem_alias {
gfn_t base_gfn;
unsigned long npages;
@@ -339,6 +346,7 @@ struct kvm_arch{
struct dmar_domain *intel_iommu_domain;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
+ struct list_head interrupt_ack_notifier;
struct kvm_pit *vpit;
int round_robin_prev_vcpu;
@@ -582,6 +590,8 @@ void kvm_enable_tdp(void);
int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
int complete_pio(struct kvm_vcpu *vcpu);
+void notify_interrupt_ack(struct kvm *kvm, int irq);
+
static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
{
struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
--
1.5.5.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH 2/4] KVM: Introduce a callback routine for IOAPIC ack handling
2008-07-08 10:34 ` Ben-Ami Yassour
@ 2008-07-10 13:57 ` Avi Kivity
2008-07-17 16:56 ` Ben-Ami Yassour
0 siblings, 1 reply; 15+ messages in thread
From: Avi Kivity @ 2008-07-10 13:57 UTC (permalink / raw)
To: Ben-Ami Yassour
Cc: Amit Shah, kvm, Muli Ben-Yehuda, allen.m.kay, chrisw, weidong.han,
virtualization
Ben-Ami Yassour wrote:
> On Mon, 2008-07-07 at 13:08 +0300, Avi Kivity wrote:
>
>> Amit Shah wrote:
>>
>>> This will be useful for acking irqs of assigned devices
>>>
>>>
>>>
>> And also for improving time drift tracking.
>>
>> Please make this more generic by having a list of callbacks. There
>> could also be just one list, rather than one for the ioapic and one for
>> the pic as implemented now.
>>
>> It may also make sense to filter the irq number before calling the
>> callback rather than relying on the callback to ignore uninteresting irqs.
>>
>>
> Avi,
>
> Did you mean something like the patch below?
>
I did, and have something very similar queued.
> How should we protect list accesses, should it be a new lock or an existing one?
>
>
kvm->lock for now.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 2/4] KVM: Introduce a callback routine for IOAPIC ack handling
2008-07-10 13:57 ` Avi Kivity
@ 2008-07-17 16:56 ` Ben-Ami Yassour
2008-07-17 17:16 ` Avi Kivity
0 siblings, 1 reply; 15+ messages in thread
From: Ben-Ami Yassour @ 2008-07-17 16:56 UTC (permalink / raw)
To: Avi Kivity
Cc: Amit Shah, kvm, Muli Ben-Yehuda, allen.m.kay, chrisw, weidong.han,
virtualization
On Thu, 2008-07-10 at 16:57 +0300, Avi Kivity wrote:
> Ben-Ami Yassour wrote:
> > On Mon, 2008-07-07 at 13:08 +0300, Avi Kivity wrote:
> >
> >> Amit Shah wrote:
> >>
> >>> This will be useful for acking irqs of assigned devices
> >>>
> >>>
> >>>
> >> And also for improving time drift tracking.
> >>
> >> Please make this more generic by having a list of callbacks. There
> >> could also be just one list, rather than one for the ioapic and one for
> >> the pic as implemented now.
> >>
> >> It may also make sense to filter the irq number before calling the
> >> callback rather than relying on the callback to ignore uninteresting irqs.
> >>
> >>
> > Avi,
> >
> > Did you mean something like the patch below?
> >
>
> I did, and have something very similar queued.
The notification list might help simplify the assigned device code.
Are you planning to merge the patch you have queued, or should I use the
one that I sent you?
Thanks,
Ben
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 2/4] KVM: Introduce a callback routine for IOAPIC ack handling
2008-07-17 16:56 ` Ben-Ami Yassour
@ 2008-07-17 17:16 ` Avi Kivity
2008-07-17 17:25 ` Avi Kivity
0 siblings, 1 reply; 15+ messages in thread
From: Avi Kivity @ 2008-07-17 17:16 UTC (permalink / raw)
To: Ben-Ami Yassour
Cc: Amit Shah, kvm, Muli Ben-Yehuda, allen.m.kay, chrisw, weidong.han,
virtualization
Ben-Ami Yassour wrote:
>>>
>>>
>> I did, and have something very similar queued.
>>
>
> The notification list might help simplify the assigned device code.
> Are you planning to merge the patch you have queued, or should I use the
> one that I sent you?
>
>
I'll dig mine up tomorrow and post it, so you can tell if it fits your
needs.
--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 2/4] KVM: Introduce a callback routine for IOAPIC ack handling
2008-07-17 17:16 ` Avi Kivity
@ 2008-07-17 17:25 ` Avi Kivity
0 siblings, 0 replies; 15+ messages in thread
From: Avi Kivity @ 2008-07-17 17:25 UTC (permalink / raw)
To: Ben-Ami Yassour
Cc: Amit Shah, kvm, Muli Ben-Yehuda, allen.m.kay, chrisw, weidong.han,
virtualization
[-- Attachment #1: Type: text/plain, Size: 684 bytes --]
Avi Kivity wrote:
> Ben-Ami Yassour wrote:
>>>>
>>> I did, and have something very similar queued.
>>>
>>
>> The notification list might help simplify the assigned device code.
>> Are you planning to merge the patch you have queued, or should I use the
>> one that I sent you?
>>
>>
>
> I'll dig mine up tomorrow and post it, so you can tell if it fits your
> needs.
>
Since the chances of me remembering to do this tomorrow are slim, here
it is. Note that it hasn't been wired to the pic and ioapic yet, but
that should be easy. Hasn't been tested either.
--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.
[-- Attachment #2: ack-notifier-list.patch --]
[-- Type: text/plain, Size: 2929 bytes --]
From ed7fe282c23ab47a5a306277ea1010039d094ea6 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Mon, 7 Jul 2008 14:48:46 +0300
Subject: [PATCH] KVM: Add irq ack notifier list
This can be used by kvm subsystems that are interested in when interrupts
are acked, for example time drift compenstation.
Signed-off-by: Avi Kivity <avi@qumranet.com>
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 0d9e552..9091195 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -111,3 +111,25 @@ void kvm_set_irq(struct kvm *kvm, int irq, int level)
kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level);
kvm_pic_set_irq(pic_irqchip(kvm), irq, level);
}
+
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi)
+{
+ struct kvm_irq_ack_notifier *kian;
+ struct hlist_node *n;
+
+ hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
+ if (kian->gsi == gsi)
+ kian->irq_acked(kian);
+}
+
+void kvm_register_irq_ack_notifier(struct kvm *kvm,
+ struct kvm_irq_ack_notifier *kian)
+{
+ hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
+}
+
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+ struct kvm_irq_ack_notifier *kian)
+{
+ hlist_del(&kian->link);
+}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 07ff2ae..95fe718 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -83,6 +83,11 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
void kvm_pic_reset(struct kvm_kpic_state *s);
void kvm_set_irq(struct kvm *kvm, int irq, int level);
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
+void kvm_register_irq_ack_notifier(struct kvm *kvm,
+ struct kvm_irq_ack_notifier *kian);
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+ struct kvm_irq_ack_notifier *kian);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c7bad53..a016900 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3948,6 +3948,7 @@ struct kvm *kvm_arch_create_vm(void)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
+ INIT_HLIST_HEAD(&kvm->arch.irq_ack_notifier_list);
return kvm;
}
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index b893a85..73756e5 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -314,6 +314,12 @@ struct kvm_mem_alias {
gfn_t target_gfn;
};
+struct kvm_irq_ack_notifier {
+ struct hlist_node link;
+ unsigned gsi;
+ void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
+};
+
struct kvm_arch{
int naliases;
struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
@@ -329,6 +335,7 @@ struct kvm_arch{
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
+ struct hlist_head irq_ack_notifier_list;
int round_robin_prev_vcpu;
unsigned int tss_addr;
^ permalink raw reply related [flat|nested] 15+ messages in thread
end of thread, other threads:[~2008-07-17 17:25 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-27 12:55 PCI device assignment to guests Amit Shah
2008-06-27 12:55 ` [PATCH 1/4] KVM: Introduce kvm_set_irq to inject interrupts in guests Amit Shah
2008-06-27 12:55 ` [PATCH 2/4] KVM: Introduce a callback routine for IOAPIC ack handling Amit Shah
2008-06-27 12:55 ` [PATCH 3/4] KVM: Introduce a callback routine for PIC " Amit Shah
2008-06-27 12:55 ` [PATCH 4/4] KVM: Handle device assignment to guests Amit Shah
2008-07-02 10:25 ` Amit Shah
2008-07-07 10:08 ` [PATCH 2/4] KVM: Introduce a callback routine for IOAPIC ack handling Avi Kivity
2008-07-08 10:34 ` Ben-Ami Yassour
2008-07-10 13:57 ` Avi Kivity
2008-07-17 16:56 ` Ben-Ami Yassour
2008-07-17 17:16 ` Avi Kivity
2008-07-17 17:25 ` Avi Kivity
2008-07-05 11:05 ` [PATCH 1/4] KVM: Introduce kvm_set_irq to inject interrupts in guests Avi Kivity
2008-07-06 13:50 ` Ben-Ami Yassour
2008-07-06 13:59 ` Avi Kivity
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).