* [PATCH 4/4] KVM: Enable MSI-X for KVM assigned device
2008-12-23 8:12 [PATCH 0/4] MSI-X Enabling Sheng Yang
@ 2008-12-23 8:12 ` Sheng Yang
0 siblings, 0 replies; 8+ messages in thread
From: Sheng Yang @ 2008-12-23 8:12 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm, Sheng Yang
This patch finally enable MSI-X.
What we need for MSI-X:
1. Intercept one page in MMIO region of device. So that we can get guest desired
MSI-X table and set up the real one.
2. IRQ fifo. Now one device can have more than one interrupt, and they are all
handled by one workqueue structure. So we need to identify them. irq_fifo
provide a mechanism to handle more than one interrupt at one time.
3. Mapping from host IRQ to guest gsi as well as guest gsi to real MSI/MSI-X
message address/data. We used same entry number for the host and guest here, so
that it's easy to find the correlated guest gsi.
What we lack for now:
1. The PCI spec said nothing can existed with MSI-X table in the same page of
MMIO region, except pending bits. The patch ignore pending bits as the first
step (so they are always 0 - no pending).
2. The PCI spec allowed to change MSI-X table dynamically. That means, the OS
can enable MSI-X, then mask one MSI-X entry, modify it, and unmask it. The patch
didn't support this, and Linux also don't work in this way.
3. The patch didn't implement MSI-X mask all and mask single entry. I would
implement the former in driver/pci/msi.c later. And for single entry, I would
add a hook in intercepted MMIO's read/write handler later.
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
include/linux/kvm.h | 4 +
include/linux/kvm_host.h | 12 ++-
virt/kvm/kvm_main.c | 246 +++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 255 insertions(+), 7 deletions(-)
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index c45b08d..0531838 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -397,6 +397,7 @@ struct kvm_trace_rec {
#define KVM_CAP_GSI_MSG 24
#if defined(CONFIG_X86)
#define KVM_CAP_INTERCEPTED_MMIO 25
+#define KVM_CAP_DEVICE_MSIX 26
#endif
/*
@@ -552,6 +553,9 @@ struct kvm_assigned_irq {
#define KVM_DEV_IRQ_ASSIGN_MSI_ACTION (1 << 0)
#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 1)
+#define KVM_DEV_IRQ_ASSIGN_MSIX_ACTION (1 << 2)
+#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX (1 << 3)
+#define KVM_DEV_IRQ_ASSIGN_MASK_MSIX (1 << 4)
struct kvm_assigned_gsi_msg {
__u32 gsi;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e4d6b99..c0d29aa 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -319,16 +319,24 @@ struct kvm_assigned_dev_kernel {
int assigned_dev_id;
int host_busnr;
int host_devfn;
- int host_irq;
bool host_irq_disabled;
- int guest_irq;
#define KVM_ASSIGNED_DEV_IRQ_FIFO_LEN 0x100
struct kfifo *irq_fifo;
spinlock_t irq_fifo_lock;
+ int entries_nr;
+ union {
+ int host_irq;
+ struct msix_entry *host_msix_entries;
+ };
+ union {
+ int guest_irq;
+ struct msix_entry *guest_msix_entries;
+ };
#define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0)
#define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1)
#define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8)
#define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9)
+#define KVM_ASSIGNED_DEV_MSIX ((1 << 2) | (1 << 10))
unsigned long irq_requested_type;
int irq_source_id;
struct kvm_intercepted_mmio msix_mmio;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a5a9763..b453279 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -100,6 +100,41 @@ static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *h
return NULL;
}
+static u32 find_gsi_from_host_irq(struct kvm_assigned_dev_kernel *assigned_dev,
+ int irq)
+{
+ int i;
+ int entry;
+ u32 gsi;
+ struct msix_entry *host_msix_entries, *guest_msix_entries;
+
+ host_msix_entries = assigned_dev->host_msix_entries;
+ guest_msix_entries = assigned_dev->guest_msix_entries;
+
+ entry = -1;
+ gsi = 0;
+ for (i = 0; i < assigned_dev->entries_nr; i++)
+ if (irq == (host_msix_entries + i)->vector) {
+ entry = (host_msix_entries + i)->entry;
+ break;
+ }
+ if (entry < 0) {
+ printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
+ return 0;
+ }
+ for (i = 0; i < assigned_dev->entries_nr; i++)
+ if (entry == (guest_msix_entries + i)->entry) {
+ gsi = (guest_msix_entries + i)->vector;
+ break;
+ }
+ if (gsi == 0) {
+ printk(KERN_WARNING "Fail to find correlated MSI-X gsi!\n");
+ return 0;
+ }
+
+ return gsi;
+}
+
static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
{
struct kvm_assigned_dev_kernel *assigned_dev;
@@ -119,12 +154,16 @@ handle_irq:
kfifo_get(assigned_dev->irq_fifo,
(unsigned char *)&irq, sizeof(int));
- gsi = assigned_dev->guest_irq;
+ if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX)
+ gsi = find_gsi_from_host_irq(assigned_dev, irq);
+ else
+ gsi = assigned_dev->guest_irq;
kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, gsi, 1);
- if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) {
- enable_irq(assigned_dev->host_irq);
+ if ((assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) ||
+ (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX)) {
+ enable_irq(irq);
assigned_dev->host_irq_disabled = false;
}
@@ -196,11 +235,23 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
*/
kvm_put_kvm(kvm);
- free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+ if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
+ int i;
+ for (i = 0; i < assigned_dev->entries_nr; i++)
+ free_irq((assigned_dev->host_msix_entries + i)->vector,
+ (void *)assigned_dev);
+ } else
+ free_irq(assigned_dev->host_irq, (void *)assigned_dev);
if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
pci_disable_msi(assigned_dev->dev);
+ if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
+ kfree(assigned_dev->host_msix_entries);
+ kfree(assigned_dev->guest_msix_entries);
+ pci_disable_msix(assigned_dev->dev);
+ }
+
assigned_dev->irq_requested_type = 0;
}
@@ -325,6 +376,144 @@ static int assigned_device_update_msi(struct kvm *kvm,
adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI;
return 0;
}
+
+static int assigned_device_update_msix_mmio(struct kvm *kvm,
+ struct kvm_assigned_dev_kernel *adev)
+{
+ struct kvm_intercepted_mmio *mmio = &adev->msix_mmio;
+ void * va;
+ u16 entries_nr = 0, entries_max_nr;
+ int pos, i, r = 0;
+ u32 msg_addr, msg_upper_addr, msg_data, msg_ctrl;
+ struct kvm_gsi_msg gsi_msg;
+
+ pos = pci_find_capability(adev->dev, PCI_CAP_ID_MSIX);
+ if (!pos)
+ return -EINVAL;
+
+ pci_read_config_word(adev->dev, pos + PCI_MSIX_FLAGS, &entries_max_nr);
+ entries_max_nr &= PCI_MSIX_FLAGS_QSIZE;
+
+ va = kmap(mmio->page);
+ /* Get the usable entry number for allocating */
+ for (i = 0; i < entries_max_nr; i++) {
+ memcpy(&msg_ctrl, va + i * 16 + 12, 4);
+ if (msg_ctrl & PCI_MSIX_FLAGS_BITMASK)
+ continue;
+ memcpy(&msg_data, va + i * 16 + 8, 4);
+ /* Ignore unused entry even it's unmasked */
+ if (msg_data == 0)
+ continue;
+ entries_nr ++;
+ }
+
+ adev->entries_nr = entries_nr;
+ adev->host_msix_entries = kmalloc(sizeof(struct msix_entry) * entries_nr,
+ GFP_KERNEL);
+ if (!adev->host_msix_entries) {
+ printk(KERN_ERR "no memory for host msix entries!\n");
+ return -ENOMEM;
+ }
+ adev->guest_msix_entries = kmalloc(sizeof(struct msix_entry) * entries_nr,
+ GFP_KERNEL);
+ if (!adev->guest_msix_entries) {
+ printk(KERN_ERR "no memory for guest msix entries!\n");
+ return -ENOMEM;
+ }
+
+ entries_nr = 0;
+ for (i = 0; i < entries_max_nr; i++) {
+ if (entries_nr >= adev->entries_nr)
+ break;
+ memcpy(&msg_ctrl, va + i * 16 + 12, 4);
+ if (msg_ctrl & PCI_MSIX_FLAGS_BITMASK)
+ continue;
+ memcpy(&msg_addr, va + i * 16, 4);
+ memcpy(&msg_upper_addr, va + i * 16 + 4, 4);
+ memcpy(&msg_data, va + i * 16 + 8, 4);
+ if (msg_data == 0)
+ continue;
+
+ gsi_msg.gsi = 0;
+ gsi_msg.msg.address_lo = msg_addr;
+ gsi_msg.msg.address_hi = msg_upper_addr;
+ gsi_msg.msg.data = msg_data;
+ r = kvm_update_gsi_msg(kvm, &gsi_msg);
+ if (r) {
+ printk(KERN_ERR "Fail to update gsi_msg for MSIX!");
+ break;
+ }
+ (adev->guest_msix_entries + entries_nr)->entry = i;
+ (adev->guest_msix_entries + entries_nr)->vector = gsi_msg.gsi;
+ (adev->host_msix_entries + entries_nr)->entry = i;
+ entries_nr ++;
+ }
+ kunmap(mmio->page);
+
+ return r;
+}
+
+static int assigned_device_update_msix(struct kvm *kvm,
+ struct kvm_assigned_dev_kernel *adev,
+ struct kvm_assigned_irq *airq)
+{
+ /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
+ int i, r;
+
+ adev->ack_notifier.gsi = -1;
+
+ if (irqchip_in_kernel(kvm)) {
+ if (airq->flags & KVM_DEV_IRQ_ASSIGN_MASK_MSIX) {
+ printk(KERN_WARNING
+ "kvm: unsupported mask MSI-X, flags 0x%x!\n",
+ airq->flags);
+ return 0;
+ }
+
+ if (!(airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX)) {
+ /* Guest disable MSI-X */
+ kvm_free_assigned_irq(kvm, adev);
+ if (msi2intx) {
+ pci_enable_msi(adev->dev);
+ if (adev->dev->msi_enabled)
+ return assigned_device_update_msi(kvm,
+ adev, airq);
+ }
+ return assigned_device_update_intx(kvm, adev, airq);
+ }
+
+ kvm_free_assigned_irq(kvm, adev);
+
+ /*
+ * We only scan device (emulated) MMIO when guest want to enable
+ * MSI-X, and don't support dynamically add MSI-X entry for now
+ */
+ r = assigned_device_update_msix_mmio(kvm, adev);
+ if (r)
+ return r;
+
+ r = pci_enable_msix(adev->dev, adev->host_msix_entries,
+ adev->entries_nr);
+ if (r) {
+ printk(KERN_ERR "Fail to enable MSI-X feature!\n");
+ return r;
+ }
+
+ for (i = 0; i < adev->entries_nr; i++) {
+ r = request_irq((adev->host_msix_entries + i)->vector,
+ kvm_assigned_dev_intr, 0,
+ "kvm_assigned_msix_device",
+ (void *)adev);
+ if (r)
+ return r;
+ }
+ }
+
+ adev->irq_requested_type |= KVM_ASSIGNED_DEV_MSIX;
+
+ return 0;
+}
+
#endif
static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
@@ -370,6 +559,16 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
}
}
+#ifdef CONFIG_X86
+ if (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_MSIX_ACTION) {
+ r = assigned_device_update_msix(kvm, match, assigned_irq);
+ if (r) {
+ printk(KERN_WARNING "kvm: failed to execute "
+ "MSI-X action!\n");
+ goto out_release;
+ }
+ } else
+#endif
if ((!msi2intx &&
(assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION)) ||
(msi2intx && match->dev->msi_enabled)) {
@@ -413,6 +612,33 @@ out_release:
return r;
}
+static int assigned_dev_register_msix_mmio(struct kvm_assigned_dev_kernel *adev)
+{
+ int pos = pci_find_capability(adev->dev, PCI_CAP_ID_MSIX);
+ u32 msix_table_entry;
+ int bar_nr;
+
+ adev->msix_mmio.dev = adev;
+ INIT_HLIST_NODE(&adev->msix_mmio.link);
+
+ if (!pos)
+ return 0;
+
+ if (pci_read_config_dword(adev->dev, pos + 4,
+ &msix_table_entry) != PCIBIOS_SUCCESSFUL)
+ return -EFAULT;
+
+ bar_nr = msix_table_entry & PCI_MSIX_FLAGS_BIRMASK;
+
+ /* Get table offset */
+ msix_table_entry &= ~PCI_MSIX_FLAGS_BIRMASK;
+ adev->msix_mmio.pfn = (pci_resource_start(adev->dev, bar_nr) +
+ msix_table_entry) >> PAGE_SHIFT;
+
+ kvm_register_intercept_mmio(adev->kvm, &adev->msix_mmio);
+ return 0;
+}
+
static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
struct kvm_assigned_pci_dev *assigned_dev)
{
@@ -475,15 +701,25 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
if (!match->irq_fifo)
goto out_list_del;
+ /*
+ * Check for MSI-X capability, if device got, we need to intercept
+ * its MSI-X table accessing
+ */
+ if (assigned_dev_register_msix_mmio(match))
+ goto out_fifo_del;
+
if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
r = kvm_iommu_map_guest(kvm, match);
if (r)
- goto out_fifo_del;
+ goto out_unregister;
}
out:
mutex_unlock(&kvm->lock);
return r;
+out_unregister:
+ if (pci_find_capability(dev, PCI_CAP_ID_MSIX))
+ kvm_unregister_intercept_mmio(&match->msix_mmio);
out_fifo_del:
kfifo_free(match->irq_fifo);
out_list_del:
--
1.5.4.5
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 0/3 v4] MSI-X enabling
@ 2009-02-18 14:26 Sheng Yang
2009-02-18 14:26 ` [PATCH 1/4] KVM: Fix wrong usage of _IOR in assigned device interface Sheng Yang
` (3 more replies)
0 siblings, 4 replies; 8+ messages in thread
From: Sheng Yang @ 2009-02-18 14:26 UTC (permalink / raw)
To: Avi Kivity; +Cc: Marcelo Tosatti, kvm
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 1/4] KVM: Fix wrong usage of _IOR in assigned device interface
2009-02-18 14:26 [PATCH 0/3 v4] MSI-X enabling Sheng Yang
@ 2009-02-18 14:26 ` Sheng Yang
2009-02-18 14:44 ` Avi Kivity
2009-02-18 14:26 ` [PATCH 2/4] KVM: Ioctls for init MSI-X entry Sheng Yang
` (2 subsequent siblings)
3 siblings, 1 reply; 8+ messages in thread
From: Sheng Yang @ 2009-02-18 14:26 UTC (permalink / raw)
To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Sheng Yang
_IOR for copy_to_user and _IOW for copy_from_user...
Noticed by Avi.
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
include/linux/kvm.h | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 2163b3d..d742cbf 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -469,10 +469,10 @@ struct kvm_irq_routing {
_IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone)
#define KVM_UNREGISTER_COALESCED_MMIO \
_IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone)
-#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
+#define KVM_ASSIGN_PCI_DEVICE _IOW(KVMIO, 0x69, \
struct kvm_assigned_pci_dev)
#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
-#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
+#define KVM_ASSIGN_IRQ _IOW(KVMIO, 0x70, \
struct kvm_assigned_irq)
#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71)
--
1.5.4.5
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/4] KVM: Ioctls for init MSI-X entry
2009-02-18 14:26 [PATCH 0/3 v4] MSI-X enabling Sheng Yang
2009-02-18 14:26 ` [PATCH 1/4] KVM: Fix wrong usage of _IOR in assigned device interface Sheng Yang
@ 2009-02-18 14:26 ` Sheng Yang
2009-02-18 14:26 ` [PATCH 3/4] KVM: Add MSI-X interrupt injection logic Sheng Yang
2009-02-18 14:26 ` [PATCH 4/4] KVM: Enable MSI-X for KVM assigned device Sheng Yang
3 siblings, 0 replies; 8+ messages in thread
From: Sheng Yang @ 2009-02-18 14:26 UTC (permalink / raw)
To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Sheng Yang
Introduce KVM_SET_MSIX_NR and KVM_SET_MSIX_ENTRY two ioctls.
This two ioctls are used by userspace to specific guest device MSI-X entry
number and correlate MSI-X entry with GSI during the initialization stage.
MSI-X should be well initialzed before enabling.
Don't support change MSI-X entry number for now.
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
include/linux/kvm.h | 18 ++++++++
include/linux/kvm_host.h | 10 ++++
virt/kvm/kvm_main.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 132 insertions(+), 0 deletions(-)
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index d742cbf..8e14629 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -475,6 +475,10 @@ struct kvm_irq_routing {
#define KVM_ASSIGN_IRQ _IOW(KVMIO, 0x70, \
struct kvm_assigned_irq)
#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71)
+#define KVM_ASSIGN_SET_MSIX_NR \
+ _IOW(KVMIO, 0x72, struct kvm_assigned_msix_nr)
+#define KVM_ASSIGN_SET_MSIX_ENTRY \
+ _IOW(KVMIO, 0x73, struct kvm_assigned_msix_entry)
/*
* ioctls for vcpu fds
@@ -595,4 +599,18 @@ struct kvm_assigned_irq {
#define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0)
+struct kvm_assigned_msix_nr {
+ __u32 assigned_dev_id;
+ __u16 entry_nr;
+ __u16 padding;
+};
+
+#define KVM_MAX_MSIX_PER_DEV 512
+struct kvm_assigned_msix_entry {
+ __u32 assigned_dev_id;
+ __u32 gsi;
+ __u16 entry; /* The index of entry in the MSI-X table */
+ __u16 padding[3];
+};
+
#endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7c7096d..33ed9f8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -319,6 +319,12 @@ struct kvm_irq_ack_notifier {
void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
};
+struct kvm_guest_msix_entry {
+ u32 vector;
+ u16 entry;
+ u16 flags;
+};
+
struct kvm_assigned_dev_kernel {
struct kvm_irq_ack_notifier ack_notifier;
struct work_struct interrupt_work;
@@ -326,13 +332,17 @@ struct kvm_assigned_dev_kernel {
int assigned_dev_id;
int host_busnr;
int host_devfn;
+ unsigned int entries_nr;
int host_irq;
bool host_irq_disabled;
+ struct msix_entry *host_msix_entries;
int guest_irq;
+ struct kvm_guest_msix_entry *guest_msix_entries;
#define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0)
#define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1)
#define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8)
#define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9)
+#define KVM_ASSIGNED_DEV_MSIX ((1 << 2) | (1 << 10))
unsigned long irq_requested_type;
int irq_source_id;
int flags;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 266bdaf..b373466 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1593,6 +1593,88 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
return 0;
}
+#ifdef __KVM_HAVE_MSIX
+static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
+ struct kvm_assigned_msix_nr *entry_nr)
+{
+ int r = 0;
+ struct kvm_assigned_dev_kernel *adev;
+
+ mutex_lock(&kvm->lock);
+
+ adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+ entry_nr->assigned_dev_id);
+ if (!adev) {
+ r = -EINVAL;
+ goto msix_nr_out;
+ }
+
+ if (adev->entries_nr == 0) {
+ adev->entries_nr = entry_nr->entry_nr;
+ if (adev->entries_nr == 0 ||
+ adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) {
+ r = -EINVAL;
+ goto msix_nr_out;
+ }
+
+ adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
+ entry_nr->entry_nr,
+ GFP_KERNEL);
+ if (!adev->host_msix_entries) {
+ r = -ENOMEM;
+ goto msix_nr_out;
+ }
+ adev->guest_msix_entries = kzalloc(
+ sizeof(struct kvm_guest_msix_entry) *
+ entry_nr->entry_nr, GFP_KERNEL);
+ if (!adev->guest_msix_entries) {
+ kfree(adev->host_msix_entries);
+ r = -ENOMEM;
+ goto msix_nr_out;
+ }
+ } else /* Not allowed set MSI-X number twice */
+ r = -EINVAL;
+msix_nr_out:
+ mutex_unlock(&kvm->lock);
+ return r;
+}
+
+static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
+ struct kvm_assigned_msix_entry *entry)
+{
+ int r = 0, i;
+ struct kvm_assigned_dev_kernel *adev;
+
+ mutex_lock(&kvm->lock);
+
+ adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+ entry->assigned_dev_id);
+
+ if (!adev) {
+ r = -EINVAL;
+ goto msix_entry_out;
+ }
+
+ for (i = 0; i < adev->entries_nr; i++)
+ if (adev->guest_msix_entries[i].vector == 0 ||
+ adev->guest_msix_entries[i].entry == entry->entry) {
+ adev->guest_msix_entries[i].entry = entry->entry;
+ adev->guest_msix_entries[i].vector = entry->gsi;
+ adev->host_msix_entries[i].entry = entry->entry;
+ break;
+ }
+ if (i == adev->entries_nr) {
+ r = -ENOSPC;
+ goto msix_entry_out;
+ }
+
+msix_entry_out:
+ mutex_unlock(&kvm->lock);
+
+ return r;
+}
+#endif
+
static long kvm_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -1917,7 +1999,29 @@ static long kvm_vm_ioctl(struct file *filp,
vfree(entries);
break;
}
+#ifdef __KVM_HAVE_MSIX
+ case KVM_ASSIGN_SET_MSIX_NR: {
+ struct kvm_assigned_msix_nr entry_nr;
+ r = -EFAULT;
+ if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
+ goto out;
+ r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
+ if (r)
+ goto out;
+ break;
+ }
+ case KVM_ASSIGN_SET_MSIX_ENTRY: {
+ struct kvm_assigned_msix_entry entry;
+ r = -EFAULT;
+ if (copy_from_user(&entry, argp, sizeof entry))
+ goto out;
+ r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
+ if (r)
+ goto out;
+ break;
+ }
#endif
+#endif /* KVM_CAP_IRQ_ROUTING */
default:
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
}
--
1.5.4.5
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 3/4] KVM: Add MSI-X interrupt injection logic
2009-02-18 14:26 [PATCH 0/3 v4] MSI-X enabling Sheng Yang
2009-02-18 14:26 ` [PATCH 1/4] KVM: Fix wrong usage of _IOR in assigned device interface Sheng Yang
2009-02-18 14:26 ` [PATCH 2/4] KVM: Ioctls for init MSI-X entry Sheng Yang
@ 2009-02-18 14:26 ` Sheng Yang
2009-02-18 14:26 ` [PATCH 4/4] KVM: Enable MSI-X for KVM assigned device Sheng Yang
3 siblings, 0 replies; 8+ messages in thread
From: Sheng Yang @ 2009-02-18 14:26 UTC (permalink / raw)
To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Sheng Yang
We have to handle more than one interrupt with one handler for MSI-X. Avi
suggested to use a flag to indicate the pending. So here is it.
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
include/linux/kvm_host.h | 1 +
virt/kvm/kvm_main.c | 66 +++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 60 insertions(+), 7 deletions(-)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 33ed9f8..5aad46a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -319,6 +319,7 @@ struct kvm_irq_ack_notifier {
void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
};
+#define KVM_ASSIGNED_MSIX_PENDING 0x1
struct kvm_guest_msix_entry {
u32 vector;
u16 entry;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b373466..1e80b6e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -95,25 +95,69 @@ static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *h
return NULL;
}
+static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
+ *assigned_dev, int irq)
+{
+ int i, index;
+ struct msix_entry *host_msix_entries;
+
+ host_msix_entries = assigned_dev->host_msix_entries;
+
+ index = -1;
+ for (i = 0; i < assigned_dev->entries_nr; i++)
+ if (irq == host_msix_entries[i].vector) {
+ index = i;
+ break;
+ }
+ if (index < 0) {
+ printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
+ return 0;
+ }
+
+ return index;
+}
+
static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
{
struct kvm_assigned_dev_kernel *assigned_dev;
+ struct kvm *kvm;
+ int irq, i;
assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
interrupt_work);
+ kvm = assigned_dev->kvm;
/* This is taken to safely inject irq inside the guest. When
* the interrupt injection (or the ioapic code) uses a
* finer-grained lock, update this
*/
- mutex_lock(&assigned_dev->kvm->lock);
- kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
- assigned_dev->guest_irq, 1);
-
- if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) {
- enable_irq(assigned_dev->host_irq);
- assigned_dev->host_irq_disabled = false;
+ mutex_lock(&kvm->lock);
+ if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
+ struct kvm_guest_msix_entry *guest_entries =
+ assigned_dev->guest_msix_entries;
+ for (i = 0; i < assigned_dev->entries_nr; i++) {
+ if (!(guest_entries[i].flags &
+ KVM_ASSIGNED_MSIX_PENDING))
+ continue;
+ guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING;
+ kvm_set_irq(assigned_dev->kvm,
+ assigned_dev->irq_source_id,
+ guest_entries[i].vector, 1);
+ irq = assigned_dev->host_msix_entries[i].vector;
+ if (irq != 0)
+ enable_irq(irq);
+ assigned_dev->host_irq_disabled = false;
+ }
+ } else {
+ kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
+ assigned_dev->guest_irq, 1);
+ if (assigned_dev->irq_requested_type &
+ KVM_ASSIGNED_DEV_GUEST_MSI) {
+ enable_irq(assigned_dev->host_irq);
+ assigned_dev->host_irq_disabled = false;
+ }
}
+
mutex_unlock(&assigned_dev->kvm->lock);
}
@@ -122,6 +166,14 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
struct kvm_assigned_dev_kernel *assigned_dev =
(struct kvm_assigned_dev_kernel *) dev_id;
+ if (assigned_dev->irq_requested_type == KVM_ASSIGNED_DEV_MSIX) {
+ int index = find_index_from_host_irq(assigned_dev, irq);
+ if (index < 0)
+ return IRQ_HANDLED;
+ assigned_dev->guest_msix_entries[index].flags |=
+ KVM_ASSIGNED_MSIX_PENDING;
+ }
+
schedule_work(&assigned_dev->interrupt_work);
disable_irq_nosync(irq);
--
1.5.4.5
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 4/4] KVM: Enable MSI-X for KVM assigned device
2009-02-18 14:26 [PATCH 0/3 v4] MSI-X enabling Sheng Yang
` (2 preceding siblings ...)
2009-02-18 14:26 ` [PATCH 3/4] KVM: Add MSI-X interrupt injection logic Sheng Yang
@ 2009-02-18 14:26 ` Sheng Yang
3 siblings, 0 replies; 8+ messages in thread
From: Sheng Yang @ 2009-02-18 14:26 UTC (permalink / raw)
To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Sheng Yang
This patch finally enable MSI-X.
What we need for MSI-X:
1. Intercept one page in MMIO region of device. So that we can get guest desired
MSI-X table and set up the real one. Now this have been done by guest, and
transfer to kernel using ioctl KVM_SET_MSIX_NR and KVM_SET_MSIX_ENTRY.
2. Information for incoming interrupt. Now one device can have more than one
interrupt, and they are all handled by one workqueue structure. So we need to
identify them. The previous patch enable gsi_msg_pending_bitmap get this done.
3. Mapping from host IRQ to guest gsi as well as guest gsi to real MSI/MSI-X
message address/data. We used same entry number for the host and guest here, so
that it's easy to find the correlated guest gsi.
What we lack for now:
1. The PCI spec said nothing can existed with MSI-X table in the same page of
MMIO region, except pending bits. The patch ignore pending bits as the first
step (so they are always 0 - no pending).
2. The PCI spec allowed to change MSI-X table dynamically. That means, the OS
can enable MSI-X, then mask one MSI-X entry, modify it, and unmask it. The patch
didn't support this, and Linux also don't work in this way.
3. The patch didn't implement MSI-X mask all and mask single entry. I would
implement the former in driver/pci/msi.c later. And for single entry, userspace
should have reposibility to handle it.
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
arch/x86/include/asm/kvm.h | 1 +
include/linux/kvm.h | 8 ++++
virt/kvm/kvm_main.c | 98 +++++++++++++++++++++++++++++++++++++++++---
3 files changed, 101 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index dc3f6cf..125be8b 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -16,6 +16,7 @@
#define __KVM_HAVE_MSI
#define __KVM_HAVE_USER_NMI
#define __KVM_HAVE_GUEST_DEBUG
+#define __KVM_HAVE_MSIX
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 8e14629..470a43c 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -402,6 +402,9 @@ struct kvm_trace_rec {
#ifdef __KVM_HAVE_IOAPIC
#define KVM_CAP_IRQ_ROUTING 25
#endif
+#ifdef __KVM_HAVE_MSIX
+#define KVM_CAP_DEVICE_MSIX 26
+#endif
#ifdef KVM_CAP_IRQ_ROUTING
@@ -599,6 +602,11 @@ struct kvm_assigned_irq {
#define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0)
+#define KVM_DEV_IRQ_ASSIGN_MSIX_ACTION (KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX |\
+ KVM_DEV_IRQ_ASSIGN_MASK_MSIX)
+#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX (1 << 1)
+#define KVM_DEV_IRQ_ASSIGN_MASK_MSIX (1 << 2)
+
struct kvm_assigned_msix_nr {
__u32 assigned_dev_id;
__u16 entry_nr;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1e80b6e..b1f2399 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -236,13 +236,33 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
* now, the kvm state is still legal for probably we also have to wait
* interrupt_work done.
*/
- disable_irq_nosync(assigned_dev->host_irq);
- cancel_work_sync(&assigned_dev->interrupt_work);
+ if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_MSIX) {
+ int i;
+ for (i = 0; i < assigned_dev->entries_nr; i++)
+ disable_irq_nosync(assigned_dev->
+ host_msix_entries[i].vector);
+
+ cancel_work_sync(&assigned_dev->interrupt_work);
- free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+ for (i = 0; i < assigned_dev->entries_nr; i++)
+ free_irq(assigned_dev->host_msix_entries[i].vector,
+ (void *)assigned_dev);
- if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
- pci_disable_msi(assigned_dev->dev);
+ assigned_dev->entries_nr = 0;
+ kfree(assigned_dev->host_msix_entries);
+ kfree(assigned_dev->guest_msix_entries);
+ pci_disable_msix(assigned_dev->dev);
+ } else {
+ /* Deal with MSI and INTx */
+ disable_irq_nosync(assigned_dev->host_irq);
+ cancel_work_sync(&assigned_dev->interrupt_work);
+
+ free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+
+ if (assigned_dev->irq_requested_type &
+ KVM_ASSIGNED_DEV_HOST_MSI)
+ pci_disable_msi(assigned_dev->dev);
+ }
assigned_dev->irq_requested_type = 0;
}
@@ -373,6 +393,60 @@ static int assigned_device_update_msi(struct kvm *kvm,
}
#endif
+#ifdef __KVM_HAVE_MSIX
+static int assigned_device_update_msix(struct kvm *kvm,
+ struct kvm_assigned_dev_kernel *adev,
+ struct kvm_assigned_irq *airq)
+{
+ /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
+ int i, r;
+
+ adev->ack_notifier.gsi = -1;
+
+ if (irqchip_in_kernel(kvm)) {
+ if (airq->flags & KVM_DEV_IRQ_ASSIGN_MASK_MSIX)
+ return -ENOTTY;
+
+ if (!(airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX)) {
+ /* Guest disable MSI-X */
+ kvm_free_assigned_irq(kvm, adev);
+ if (msi2intx) {
+ pci_enable_msi(adev->dev);
+ if (adev->dev->msi_enabled)
+ return assigned_device_update_msi(kvm,
+ adev, airq);
+ }
+ return assigned_device_update_intx(kvm, adev, airq);
+ }
+
+ /* host_msix_entries and guest_msix_entries should have been
+ * initialized */
+ if (adev->entries_nr == 0)
+ return -EINVAL;
+
+ kvm_free_assigned_irq(kvm, adev);
+
+ r = pci_enable_msix(adev->dev, adev->host_msix_entries,
+ adev->entries_nr);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->entries_nr; i++) {
+ r = request_irq((adev->host_msix_entries + i)->vector,
+ kvm_assigned_dev_intr, 0,
+ "kvm_assigned_msix_device",
+ (void *)adev);
+ if (r)
+ return r;
+ }
+ }
+
+ adev->irq_requested_type |= KVM_ASSIGNED_DEV_MSIX;
+
+ return 0;
+}
+#endif
+
static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
struct kvm_assigned_irq
*assigned_irq)
@@ -417,12 +491,24 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
}
}
- if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) &&
+ if (match->irq_requested_type & KVM_ASSIGNED_DEV_MSIX)
+ current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSIX;
+ else if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) &&
(match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI))
current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
changed_flags = assigned_irq->flags ^ current_flags;
+#ifdef __KVM_HAVE_MSIX
+ if (changed_flags & KVM_DEV_IRQ_ASSIGN_MSIX_ACTION) {
+ r = assigned_device_update_msix(kvm, match, assigned_irq);
+ if (r) {
+ printk(KERN_WARNING "kvm: failed to execute "
+ "MSI-X action!\n");
+ goto out_release;
+ }
+ } else
+#endif
if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) ||
(msi2intx && match->dev->msi_enabled)) {
#ifdef CONFIG_X86
--
1.5.4.5
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 1/4] KVM: Fix wrong usage of _IOR in assigned device interface
2009-02-18 14:26 ` [PATCH 1/4] KVM: Fix wrong usage of _IOR in assigned device interface Sheng Yang
@ 2009-02-18 14:44 ` Avi Kivity
2009-02-20 7:02 ` Sheng Yang
0 siblings, 1 reply; 8+ messages in thread
From: Avi Kivity @ 2009-02-18 14:44 UTC (permalink / raw)
To: Sheng Yang; +Cc: Marcelo Tosatti, kvm
Sheng Yang wrote:
> _IOR for copy_to_user and _IOW for copy_from_user...
>
> Noticed by Avi.
>
> Signed-off-by: Sheng Yang <sheng@linux.intel.com>
> ---
> include/linux/kvm.h | 4 ++--
> 1 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index 2163b3d..d742cbf 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -469,10 +469,10 @@ struct kvm_irq_routing {
> _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone)
> #define KVM_UNREGISTER_COALESCED_MMIO \
> _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone)
> -#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
> +#define KVM_ASSIGN_PCI_DEVICE _IOW(KVMIO, 0x69, \
> struct kvm_assigned_pci_dev)
> #define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
> -#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
> +#define KVM_ASSIGN_IRQ _IOW(KVMIO, 0x70, \
> struct kvm_assigned_irq)
> #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71)
>
>
KVM_ASSIGN_PCI_DEVICE was introduced in 2.6.28. We can't fix it since
it's part of the ABI.
--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/4] KVM: Fix wrong usage of _IOR in assigned device interface
2009-02-18 14:44 ` Avi Kivity
@ 2009-02-20 7:02 ` Sheng Yang
0 siblings, 0 replies; 8+ messages in thread
From: Sheng Yang @ 2009-02-20 7:02 UTC (permalink / raw)
To: Avi Kivity; +Cc: Marcelo Tosatti, kvm
On Wednesday 18 February 2009 22:44:15 Avi Kivity wrote:
> Sheng Yang wrote:
> > _IOR for copy_to_user and _IOW for copy_from_user...
> >
> > Noticed by Avi.
> >
> > Signed-off-by: Sheng Yang <sheng@linux.intel.com>
> > ---
> > include/linux/kvm.h | 4 ++--
> > 1 files changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> > index 2163b3d..d742cbf 100644
> > --- a/include/linux/kvm.h
> > +++ b/include/linux/kvm.h
> > @@ -469,10 +469,10 @@ struct kvm_irq_routing {
> > _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone)
> > #define KVM_UNREGISTER_COALESCED_MMIO \
> > _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone)
> > -#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
> > +#define KVM_ASSIGN_PCI_DEVICE _IOW(KVMIO, 0x69, \
> > struct kvm_assigned_pci_dev)
> > #define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct
> > kvm_irq_routing) -#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
> > +#define KVM_ASSIGN_IRQ _IOW(KVMIO, 0x70, \
> > struct kvm_assigned_irq)
> > #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71)
>
> KVM_ASSIGN_PCI_DEVICE was introduced in 2.6.28. We can't fix it since
> it's part of the ABI.
OK... So please take this one off.
--
regards
Yang, Sheng
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2009-02-20 7:02 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-02-18 14:26 [PATCH 0/3 v4] MSI-X enabling Sheng Yang
2009-02-18 14:26 ` [PATCH 1/4] KVM: Fix wrong usage of _IOR in assigned device interface Sheng Yang
2009-02-18 14:44 ` Avi Kivity
2009-02-20 7:02 ` Sheng Yang
2009-02-18 14:26 ` [PATCH 2/4] KVM: Ioctls for init MSI-X entry Sheng Yang
2009-02-18 14:26 ` [PATCH 3/4] KVM: Add MSI-X interrupt injection logic Sheng Yang
2009-02-18 14:26 ` [PATCH 4/4] KVM: Enable MSI-X for KVM assigned device Sheng Yang
-- strict thread matches above, loose matches on Subject: below --
2008-12-23 8:12 [PATCH 0/4] MSI-X Enabling Sheng Yang
2008-12-23 8:12 ` [PATCH 4/4] KVM: Enable MSI-X for KVM assigned device Sheng Yang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox