* [PATCH 1/4] qemu-kvm: device assignment: Enabling MSI-X according to the entries' mask bit
2010-12-22 9:25 [PATCH 0/4 v6] MSI-X MMIO support in userspace for assigned devices Sheng Yang
@ 2010-12-22 9:25 ` Sheng Yang
2010-12-22 9:25 ` [PATCH 2/4] qemu-kvm: Ioctl for MSIX MMIO support Sheng Yang
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Sheng Yang @ 2010-12-22 9:25 UTC (permalink / raw)
To: Avi Kivity, Marcelo Tosatti; +Cc: Michael S. Tsirkin, kvm, Sheng Yang
The old MSI-X enabling method assume the entries are written before MSI-X
enabled, but some OS didn't obey this, e.g. FreeBSD. This patch would fix
this.
Also, according to the PCI spec, mask bit of MSI-X table should be set
after reset.
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
hw/device-assignment.c | 188 +++++++++++++++++++++++++++++++++++++++++-------
hw/device-assignment.h | 2 +-
2 files changed, 162 insertions(+), 28 deletions(-)
diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 832c236..ed0b491 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -1111,15 +1111,12 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev, unsigned int ctrl_pos)
#endif
#ifdef KVM_CAP_DEVICE_MSIX
-static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
+
+#define PCI_MSIX_CTRL_MASKBIT 1ul
+static int get_msix_entries_max_nr(AssignedDevice *adev)
{
- AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
- uint16_t entries_nr = 0, entries_max_nr;
- int pos = 0, i, r = 0;
- uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl;
- struct kvm_assigned_msix_nr msix_nr;
- struct kvm_assigned_msix_entry msix_entry;
- void *va = adev->msix_table_page;
+ int pos, entries_max_nr;
+ PCIDevice *pci_dev = &adev->dev;
pos = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
@@ -1127,20 +1124,48 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
entries_max_nr &= PCI_MSIX_TABSIZE;
entries_max_nr += 1;
+ return entries_max_nr;
+}
+
+static int assigned_dev_msix_entry_masked(AssignedDevice *adev, int entry)
+{
+ uint32_t msg_ctrl;
+ void *va = adev->msix_table_page;
+
+ memcpy(&msg_ctrl, va + entry * 16 + 12, 4);
+ return (msg_ctrl & PCI_MSIX_CTRL_MASKBIT);
+}
+
+static int get_msix_valid_entries_nr(AssignedDevice *adev,
+ uint16_t entries_max_nr)
+{
+ void *va = adev->msix_table_page;
+ uint32_t msg_ctrl;
+ uint16_t entries_nr = 0;
+ int i;
+
/* Get the usable entry number for allocating */
for (i = 0; i < entries_max_nr; i++) {
memcpy(&msg_ctrl, va + i * 16 + 12, 4);
- memcpy(&msg_data, va + i * 16 + 8, 4);
/* Ignore unused entry even it's unmasked */
- if (msg_data == 0)
+ if (assigned_dev_msix_entry_masked(adev, i))
continue;
entries_nr ++;
}
+ return entries_nr;
+}
+
+static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev,
+ uint16_t entries_nr,
+ uint16_t entries_max_nr)
+{
+ AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
+ int i, r = 0;
+ uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl;
+ struct kvm_assigned_msix_nr msix_nr;
+ struct kvm_assigned_msix_entry msix_entry;
+ void *va = adev->msix_table_page;
- if (entries_nr == 0) {
- fprintf(stderr, "MSI-X entry number is zero!\n");
- return -EINVAL;
- }
msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_segnr, adev->h_busnr,
(uint8_t)adev->h_devfn);
msix_nr.entry_nr = entries_nr;
@@ -1152,6 +1177,8 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
}
free_dev_irq_entries(adev);
+ memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV *
+ sizeof(*pci_dev->msix_entry_used));
adev->irq_entries_nr = entries_nr;
adev->entry = calloc(entries_nr, sizeof(struct kvm_irq_routing_entry));
if (!adev->entry) {
@@ -1165,10 +1192,10 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
if (entries_nr >= msix_nr.entry_nr)
break;
memcpy(&msg_ctrl, va + i * 16 + 12, 4);
- memcpy(&msg_data, va + i * 16 + 8, 4);
- if (msg_data == 0)
+ if (assigned_dev_msix_entry_masked(adev, i))
continue;
+ memcpy(&msg_data, va + i * 16 + 8, 4);
memcpy(&msg_addr, va + i * 16, 4);
memcpy(&msg_upper_addr, va + i * 16 + 4, 4);
@@ -1182,17 +1209,18 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
adev->entry[entries_nr].u.msi.address_lo = msg_addr;
adev->entry[entries_nr].u.msi.address_hi = msg_upper_addr;
adev->entry[entries_nr].u.msi.data = msg_data;
- DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!", msg_data, msg_addr);
- kvm_add_routing_entry(&adev->entry[entries_nr]);
+ DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x!\n", msg_data, msg_addr);
+ kvm_add_routing_entry(&adev->entry[entries_nr]);
msix_entry.gsi = adev->entry[entries_nr].gsi;
msix_entry.entry = i;
+ pci_dev->msix_entry_used[i] = 1;
r = kvm_assign_set_msix_entry(kvm_context, &msix_entry);
if (r) {
fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r));
break;
}
- DEBUG("MSI-X entry gsi 0x%x, entry %d\n!",
+ DEBUG("MSI-X entry gsi 0x%x, entry %d!\n",
msix_entry.gsi, msix_entry.entry);
entries_nr ++;
}
@@ -1209,20 +1237,24 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
{
struct kvm_assigned_irq assigned_irq_data;
AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
- uint16_t *ctrl_word = (uint16_t *)(pci_dev->config + ctrl_pos);
+ uint16_t ctrl_word = *(uint16_t *)(pci_dev->config + ctrl_pos);
int r;
+ uint16_t entries_nr, entries_max_nr;
+ int enable_msix;
memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
assigned_irq_data.assigned_dev_id =
calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr,
(uint8_t)assigned_dev->h_devfn);
+ enable_msix = ((ctrl_word & PCI_MSIX_ENABLE) &&
+ !(ctrl_word & PCI_MSIX_MASK));
+
/* Some guests gratuitously disable MSIX even if they're not using it,
* try to catch this by only deassigning irqs if the guest is using
* MSIX or intends to start. */
if ((assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MSIX) ||
- (*ctrl_word & PCI_MSIX_ENABLE)) {
-
+ enable_msix) {
assigned_irq_data.flags = assigned_dev->irq_requested_type;
free_dev_irq_entries(assigned_dev);
r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
@@ -1231,16 +1263,30 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
perror("assigned_dev_update_msix: deassign irq");
assigned_dev->irq_requested_type = 0;
+ memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV *
+ sizeof(*pci_dev->msix_entry_used));
}
- if (*ctrl_word & PCI_MSIX_ENABLE) {
- assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX |
- KVM_DEV_IRQ_GUEST_MSIX;
-
- if (assigned_dev_update_msix_mmio(pci_dev) < 0) {
+ entries_max_nr = assigned_dev->max_msix_entries_nr;
+ if (entries_max_nr == 0) {
+ fprintf(stderr, "assigned_dev_update_msix: MSI-X entries_max_nr == 0");
+ return;
+ }
+ /*
+ * Guest may try to enable MSI-X before setting MSI-X entry done, so
+ * let's wait until guest unmask the entries.
+ */
+ entries_nr = get_msix_valid_entries_nr(assigned_dev, entries_max_nr);
+ if (entries_nr == 0)
+ return;
+ if (enable_msix) {
+ if (assigned_dev_update_msix_mmio(pci_dev,
+ entries_nr, entries_max_nr) < 0) {
perror("assigned_dev_update_msix_mmio");
return;
}
+ assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX |
+ KVM_DEV_IRQ_GUEST_MSIX;
if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0) {
perror("assigned_dev_enable_msix: assign irq");
return;
@@ -1341,6 +1387,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
bar_nr = msix_table_entry & PCI_MSIX_BIR;
msix_table_entry &= ~PCI_MSIX_BIR;
dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
+ dev->max_msix_entries_nr = get_msix_entries_max_nr(dev);
}
#endif
#endif
@@ -1378,10 +1425,90 @@ static void msix_mmio_writel(void *opaque,
AssignedDevice *adev = opaque;
unsigned int offset = addr & 0xfff;
void *page = adev->msix_table_page;
+ int ctrl_word, index;
+ struct kvm_irq_routing_entry new_entry = {};
+ int entry_idx, entries_max_nr, r = 0, i;
+ uint32_t msg_ctrl, msg_data, msg_upper_addr, msg_addr;
+ struct PCIDevice *pci_dev = &adev->dev;
+ uint8_t cap = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
addr, val);
memcpy((void *)((char *)page + offset), &val, 4);
+
+ index = offset / 16;
+
+ /* Check if mask bit is being accessed */
+ memcpy(&msg_addr, (char *)page + index * 16, 4);
+ memcpy(&msg_upper_addr, (char *)page + index * 16 + 4, 4);
+ memcpy(&msg_data, (char *)page + index * 16 + 8, 4);
+ memcpy(&msg_ctrl, (char *)page + index * 16 + 12, 4);
+ DEBUG("MSI-X entries index %d: "
+ "msg_addr 0x%x, msg_upper_addr 0x%x, msg_data 0x%x, vec_ctl 0x%x\n",
+ index, msg_addr, msg_upper_addr, msg_data, msg_ctrl);
+
+ ctrl_word = pci_get_word(pci_dev->config + cap + PCI_MSIX_FLAGS);
+
+ if (!((ctrl_word & PCI_MSIX_ENABLE) && !(ctrl_word & PCI_MSIX_MASK)))
+ return;
+
+ if (!assigned_dev_msix_entry_masked(adev, index)) {
+ if (!adev->dev.msix_entry_used[index]) {
+ DEBUG("Try to modify unenabled MSI-X entry %d's mask. "
+ "Reenable MSI-X.\n",
+ index);
+ assigned_dev_update_msix(&adev->dev, cap + PCI_MSIX_FLAGS);
+ }
+ return;
+ }
+
+ if (!adev->dev.msix_entry_used[index])
+ return;
+
+ entries_max_nr = adev->max_msix_entries_nr;
+
+ /*
+ * Find the index of routing entry, it can be different from 'index' if
+ * empty entry existed in between
+ */
+ entry_idx = -1;
+ for (i = 0; i <= index; i++) {
+ if (adev->dev.msix_entry_used[i])
+ entry_idx ++;
+ }
+ if (entry_idx >= entries_max_nr || entry_idx == -1) {
+ fprintf(stderr, "msix_mmio_writel: Entry idx %d exceed limit!\n",
+ entry_idx);
+ return;
+ }
+
+ if (!assigned_dev_msix_entry_masked(adev, index)) {
+ fprintf(stderr, "msix_mmio_writel: Trying write to unmasked entry!\n");
+ return;
+ }
+
+ new_entry.gsi = adev->entry[entry_idx].gsi;
+ new_entry.type = KVM_IRQ_ROUTING_MSI;
+ new_entry.flags = 0;
+ new_entry.u.msi.address_lo = msg_addr;
+ new_entry.u.msi.address_hi = msg_upper_addr;
+ new_entry.u.msi.data = msg_data;
+ if (memcmp(&adev->entry[entry_idx].u.msi, &new_entry.u.msi,
+ sizeof new_entry.u.msi)) {
+ r = kvm_update_routing_entry(&adev->entry[entry_idx], &new_entry);
+ if (r) {
+ perror("msix_mmio_writel: kvm_update_routing_entry failed\n");
+ return;
+ }
+ r = kvm_commit_irq_routes();
+ if (r) {
+ perror("msix_mmio_writel: kvm_commit_irq_routes failed\n");
+ return;
+ }
+ }
+ adev->entry[entry_idx].u.msi.address_lo = msg_addr;
+ adev->entry[entry_idx].u.msi.address_hi = msg_upper_addr;
+ adev->entry[entry_idx].u.msi.data = msg_data;
}
static void msix_mmio_writew(void *opaque,
@@ -1408,6 +1535,7 @@ static CPUReadMemoryFunc *msix_mmio_read[] = {
static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
{
+ int i;
dev->msix_table_page = mmap(NULL, 0x1000,
PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
@@ -1417,8 +1545,12 @@ static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
return -EFAULT;
}
memset(dev->msix_table_page, 0, 0x1000);
+ for (i = 0; i < 0x1000; i += 0x10)
+ *(uint32_t *)(dev->msix_table_page + i + 0xc) = 1;
dev->mmio_index = cpu_register_io_memory(
msix_mmio_read, msix_mmio_write, dev);
+ dev->dev.msix_entry_used = qemu_mallocz(KVM_MAX_MSIX_PER_DEV *
+ sizeof *dev->dev.msix_entry_used);
return 0;
}
@@ -1435,6 +1567,8 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
strerror(errno));
}
dev->msix_table_page = NULL;
+ free(dev->dev.msix_entry_used);
+ dev->dev.msix_entry_used = NULL;
}
static const VMStateDescription vmstate_assigned_device = {
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index c94a730..754e5c0 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -104,7 +104,7 @@ typedef struct AssignedDevice {
#define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2)
uint32_t state;
} cap;
- int irq_entries_nr;
+ int irq_entries_nr, max_msix_entries_nr;
struct kvm_irq_routing_entry *entry;
void *msix_table_page;
target_phys_addr_t msix_table_addr;
--
1.7.0.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 2/4] qemu-kvm: Ioctl for MSIX MMIO support
2010-12-22 9:25 [PATCH 0/4 v6] MSI-X MMIO support in userspace for assigned devices Sheng Yang
2010-12-22 9:25 ` [PATCH 1/4] qemu-kvm: device assignment: Enabling MSI-X according to the entries' mask bit Sheng Yang
@ 2010-12-22 9:25 ` Sheng Yang
2010-12-22 9:25 ` [PATCH 3/4] qemu-kvm: Header file update for MSI-X " Sheng Yang
2010-12-22 9:25 ` [PATCH 4/4] qemu-kvm: MSI-X MMIO support for assigned device Sheng Yang
3 siblings, 0 replies; 5+ messages in thread
From: Sheng Yang @ 2010-12-22 9:25 UTC (permalink / raw)
To: Avi Kivity, Marcelo Tosatti; +Cc: Michael S. Tsirkin, kvm, Sheng Yang
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
qemu-kvm.c | 14 ++++++++++++++
qemu-kvm.h | 7 +++++++
2 files changed, 21 insertions(+), 0 deletions(-)
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 471306b..956b62a 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -1050,6 +1050,20 @@ int kvm_assign_set_msix_entry(kvm_context_t kvm,
}
#endif
+#ifdef KVM_CAP_MSIX_MMIO
+int kvm_register_msix_mmio(kvm_context_t kvm,
+ struct kvm_msix_mmio_user *mmio_user)
+{
+ return kvm_vm_ioctl(kvm_state, KVM_REGISTER_MSIX_MMIO, mmio_user);
+}
+
+int kvm_unregister_msix_mmio(kvm_context_t kvm,
+ struct kvm_msix_mmio_user *mmio_user)
+{
+ return kvm_vm_ioctl(kvm_state, KVM_UNREGISTER_MSIX_MMIO, mmio_user);
+}
+#endif
+
#if defined(KVM_CAP_IRQFD) && defined(CONFIG_EVENTFD)
#include <sys/eventfd.h>
diff --git a/qemu-kvm.h b/qemu-kvm.h
index 7e6edfb..86799e6 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -602,6 +602,13 @@ int kvm_assign_set_msix_entry(kvm_context_t kvm,
struct kvm_assigned_msix_entry *entry);
#endif
+#ifdef KVM_CAP_MSIX_MMIO
+int kvm_register_msix_mmio(kvm_context_t kvm,
+ struct kvm_msix_mmio_user *mmio_user);
+int kvm_unregister_msix_mmio(kvm_context_t kvm,
+ struct kvm_msix_mmio_user *mmio_user);
+#endif
+
#else /* !CONFIG_KVM */
typedef struct kvm_context *kvm_context_t;
--
1.7.0.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 3/4] qemu-kvm: Header file update for MSI-X MMIO support
2010-12-22 9:25 [PATCH 0/4 v6] MSI-X MMIO support in userspace for assigned devices Sheng Yang
2010-12-22 9:25 ` [PATCH 1/4] qemu-kvm: device assignment: Enabling MSI-X according to the entries' mask bit Sheng Yang
2010-12-22 9:25 ` [PATCH 2/4] qemu-kvm: Ioctl for MSIX MMIO support Sheng Yang
@ 2010-12-22 9:25 ` Sheng Yang
2010-12-22 9:25 ` [PATCH 4/4] qemu-kvm: MSI-X MMIO support for assigned device Sheng Yang
3 siblings, 0 replies; 5+ messages in thread
From: Sheng Yang @ 2010-12-22 9:25 UTC (permalink / raw)
To: Avi Kivity, Marcelo Tosatti; +Cc: Michael S. Tsirkin, kvm, Sheng Yang
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
kvm/include/linux/kvm.h | 22 ++++++++++++++++++++++
1 files changed, 22 insertions(+), 0 deletions(-)
diff --git a/kvm/include/linux/kvm.h b/kvm/include/linux/kvm.h
index e46729e..e11d2b2 100644
--- a/kvm/include/linux/kvm.h
+++ b/kvm/include/linux/kvm.h
@@ -161,6 +161,7 @@ struct kvm_pit_config {
#define KVM_EXIT_NMI 16
#define KVM_EXIT_INTERNAL_ERROR 17
#define KVM_EXIT_OSI 18
+#define KVM_EXIT_MSIX_ROUTING_UPDATE 19
/* For KVM_EXIT_INTERNAL_ERROR */
#define KVM_INTERNAL_ERROR_EMULATION 1
@@ -530,6 +531,7 @@ struct kvm_enable_cap {
#ifdef __KVM_HAVE_XCRS
#define KVM_CAP_XCRS 56
#endif
+#define KVM_CAP_MSIX_MMIO 60
#ifdef KVM_CAP_IRQ_ROUTING
@@ -660,6 +662,9 @@ struct kvm_clock_data {
#define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config)
#define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data)
#define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data)
+/* Available with KVM_CAP_MSIX_MMIO */
+#define KVM_REGISTER_MSIX_MMIO _IOW(KVMIO, 0x7d, struct kvm_msix_mmio_user)
+#define KVM_UNREGISTER_MSIX_MMIO _IOW(KVMIO, 0x7e, struct kvm_msix_mmio_user)
/* Available with KVM_CAP_PIT_STATE2 */
#define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2)
#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
@@ -781,4 +786,21 @@ struct kvm_assigned_msix_entry {
__u16 padding[3];
};
+#define KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV (1 << 0)
+
+#define KVM_MSIX_MMIO_TYPE_BASE_TABLE (1 << 8)
+#define KVM_MSIX_MMIO_TYPE_BASE_PBA (1 << 9)
+
+#define KVM_MSIX_MMIO_TYPE_DEV_MASK 0x00ff
+#define KVM_MSIX_MMIO_TYPE_BASE_MASK 0xff00
+struct kvm_msix_mmio_user {
+ __u32 dev_id;
+ __u16 type;
+ __u16 max_entries_nr;
+ __u64 base_addr;
+ __u64 base_va;
+ __u64 flags;
+ __u64 reserved[4];
+};
+
#endif /* __LINUX_KVM_H */
--
1.7.0.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 4/4] qemu-kvm: MSI-X MMIO support for assigned device
2010-12-22 9:25 [PATCH 0/4 v6] MSI-X MMIO support in userspace for assigned devices Sheng Yang
` (2 preceding siblings ...)
2010-12-22 9:25 ` [PATCH 3/4] qemu-kvm: Header file update for MSI-X " Sheng Yang
@ 2010-12-22 9:25 ` Sheng Yang
3 siblings, 0 replies; 5+ messages in thread
From: Sheng Yang @ 2010-12-22 9:25 UTC (permalink / raw)
To: Avi Kivity, Marcelo Tosatti; +Cc: Michael S. Tsirkin, kvm, Sheng Yang
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
hw/device-assignment.c | 143 +++++++++++++++++++++++++++++++++++++++++++-----
hw/device-assignment.h | 7 ++-
qemu-kvm.c | 36 ++++++++++++
qemu-kvm.h | 11 ++++
4 files changed, 180 insertions(+), 17 deletions(-)
diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index ed0b491..0aec1f4 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -67,6 +67,11 @@ static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev,
uint32_t address,
uint32_t val, int len);
+static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn)
+{
+ return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn;
+}
+
static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
uint32_t addr, int len, uint32_t *val)
{
@@ -269,6 +274,10 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
AssignedDevRegion *region = &r_dev->v_addrs[region_num];
PCIRegion *real_region = &r_dev->real_device.regions[region_num];
int ret = 0;
+#ifdef KVM_CAP_MSIX_MMIO
+ int cap_mask = kvm_check_extension(kvm_state, KVM_CAP_MSIX_MMIO);
+ struct kvm_msix_mmio_user msix_mmio;
+#endif
DEBUG("e_phys=%08" FMT_PCIBUS " r_virt=%p type=%d len=%08" FMT_PCIBUS " region_num=%d \n",
e_phys, region->u.r_virtbase, type, e_size, region_num);
@@ -287,6 +296,45 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
cpu_register_physical_memory(e_phys + offset,
TARGET_PAGE_SIZE, r_dev->mmio_index);
+#ifdef KVM_CAP_MSIX_MMIO
+ if (cap_mask) {
+ r_dev->guest_msix_table_addr = e_phys + offset;
+ memset(&msix_mmio, 0, sizeof msix_mmio);
+ msix_mmio.dev_id = calc_assigned_dev_id(r_dev->h_segnr,
+ r_dev->h_busnr, r_dev->h_devfn);
+ msix_mmio.type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV |
+ KVM_MSIX_MMIO_TYPE_BASE_TABLE;
+ msix_mmio.base_addr = e_phys + offset;
+ msix_mmio.base_va = (unsigned long)r_dev->msix_table_page;
+ msix_mmio.max_entries_nr = r_dev->max_msix_entries_nr;
+ msix_mmio.flags = 0;
+ ret = kvm_register_msix_mmio(kvm_context, &msix_mmio);
+ if (ret)
+ fprintf(stderr, "fail to register in-kernel msix_mmio!\n");
+ }
+#endif
+ if (real_region->base_addr <= r_dev->msix_pba_addr &&
+ real_region->base_addr + real_region->size >=
+ r_dev->msix_pba_addr) {
+#ifdef KVM_CAP_MSIX_MMIO
+ int offset = r_dev->msix_pba_addr - real_region->base_addr;
+ if (cap_mask) {
+ r_dev->guest_msix_pba_addr = e_phys + offset;
+ memset(&msix_mmio, 0, sizeof msix_mmio);
+ msix_mmio.dev_id = calc_assigned_dev_id(r_dev->h_segnr,
+ r_dev->h_busnr, r_dev->h_devfn);
+ msix_mmio.type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV |
+ KVM_MSIX_MMIO_TYPE_BASE_PBA;
+ msix_mmio.base_addr = e_phys + offset;
+ msix_mmio.base_va = (unsigned long)r_dev->msix_pba_page;
+ msix_mmio.max_entries_nr = r_dev->max_msix_entries_nr;
+ msix_mmio.flags = 0;
+ ret = kvm_register_msix_mmio(kvm_context, &msix_mmio);
+ if (ret)
+ fprintf(stderr, "fail to register in-kernel msix_mmio!\n");
+ }
+#endif
+ }
}
}
@@ -822,11 +870,6 @@ static void free_assigned_device(AssignedDevice *dev)
}
}
-static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn)
-{
- return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn;
-}
-
static void assign_failed_examine(AssignedDevice *dev)
{
char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns;
@@ -1233,6 +1276,8 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev,
return r;
}
+static int assigned_dev_update_routing_handler(void *opaque, unsigned long addr);
+
static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
{
struct kvm_assigned_irq assigned_irq_data;
@@ -1368,8 +1413,8 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
#ifdef KVM_CAP_DEVICE_MSIX
/* Expose MSI-X capability */
if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSIX))) {
- int bar_nr;
- uint32_t msix_table_entry;
+ int table_bar_nr, pba_bar_nr;
+ uint32_t msix_table_entry, msix_pba_entry;
dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX;
pci_add_capability(pci_dev, PCI_CAP_ID_MSIX, pos,
@@ -1384,9 +1429,17 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL);
msix_table_entry = pci_get_long(pci_dev->config + pos + PCI_MSIX_TABLE);
- bar_nr = msix_table_entry & PCI_MSIX_BIR;
+ table_bar_nr = msix_table_entry & PCI_MSIX_BIR;
msix_table_entry &= ~PCI_MSIX_BIR;
- dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
+ dev->msix_table_addr = pci_region[table_bar_nr].base_addr +
+ msix_table_entry;
+
+ msix_pba_entry = pci_get_long(pci_dev->config + pos + PCI_MSIX_PBA);
+ pba_bar_nr = msix_pba_entry & PCI_MSIX_BIR;
+ msix_pba_entry &= ~PCI_MSIX_BIR;
+ dev->msix_pba_addr = pci_region[pba_bar_nr].base_addr +
+ msix_pba_entry;
+
dev->max_msix_entries_nr = get_msix_entries_max_nr(dev);
}
#endif
@@ -1419,8 +1472,7 @@ static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr)
(8 * (addr & 3))) & 0xffff;
}
-static void msix_mmio_writel(void *opaque,
- target_phys_addr_t addr, uint32_t val)
+static void assigned_dev_update_routing(void *opaque, unsigned long addr)
{
AssignedDevice *adev = opaque;
unsigned int offset = addr & 0xfff;
@@ -1432,10 +1484,6 @@ static void msix_mmio_writel(void *opaque,
struct PCIDevice *pci_dev = &adev->dev;
uint8_t cap = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
- DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
- addr, val);
- memcpy((void *)((char *)page + offset), &val, 4);
-
index = offset / 16;
/* Check if mask bit is being accessed */
@@ -1511,6 +1559,41 @@ static void msix_mmio_writel(void *opaque,
adev->entry[entry_idx].u.msi.data = msg_data;
}
+static int assigned_dev_update_routing_handler(void *opaque, unsigned long addr)
+{
+ AssignedDevice *adev = opaque;
+
+ if (addr >= adev->guest_msix_table_addr &&
+ addr < adev->guest_msix_table_addr + adev->max_msix_entries_nr * 16) {
+ assigned_dev_update_routing(opaque, addr);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+static void msix_mmio_writel(void *opaque,
+ target_phys_addr_t addr, uint32_t val)
+{
+ AssignedDevice *adev = opaque;
+ void *page = adev->msix_table_page;
+ unsigned int offset = addr & 0xfff;
+#ifdef KVM_CAP_MSIX_MMIO
+ int cap_mask = kvm_check_extension(kvm_state, KVM_CAP_MSIX_MMIO);
+#else
+ int cap_mask = 0;
+#endif
+
+ DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
+ addr, val);
+ if (!cap_mask) {
+ memcpy((void *)((char *)page + offset), &val, 4);
+ } else {
+ fprintf(stderr, "msix_mmio_writel: shouldn't be here with KVM_CAP_MSIX_MMIO!\n");
+ }
+
+ assigned_dev_update_routing(opaque, addr);
+}
+
static void msix_mmio_writew(void *opaque,
target_phys_addr_t addr, uint32_t val)
{
@@ -1547,11 +1630,32 @@ static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
memset(dev->msix_table_page, 0, 0x1000);
for (i = 0; i < 0x1000; i += 0x10)
*(uint32_t *)(dev->msix_table_page + i + 0xc) = 1;
+ dev->msix_pba_page = mmap(NULL, 0x1000,
+ PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
+ if (dev->msix_pba_page == MAP_FAILED) {
+ fprintf(stderr, "fail allocate msix_table_page! %s\n",
+ strerror(errno));
+ goto out;
+ }
+ memset(dev->msix_pba_page, 0, 0x1000);
dev->mmio_index = cpu_register_io_memory(
msix_mmio_read, msix_mmio_write, dev);
dev->dev.msix_entry_used = qemu_mallocz(KVM_MAX_MSIX_PER_DEV *
sizeof *dev->dev.msix_entry_used);
+ dev->routing_updater_entry =
+ kvm_add_routing_updater(assigned_dev_update_routing_handler, dev);
+ if (!dev->routing_updater_entry) {
+ perror("kvm_add_routing_updater");
+ goto out2;
+ }
return 0;
+out2:
+ free(dev->dev.msix_entry_used);
+ munmap(dev->msix_pba_page, 0x1000);
+out:
+ munmap(dev->msix_table_page, 0x1000);
+ return -EFAULT;
}
static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
@@ -1567,6 +1671,15 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
strerror(errno));
}
dev->msix_table_page = NULL;
+ if (munmap(dev->msix_pba_page, 0x1000) == -1) {
+ fprintf(stderr, "error unmapping msix_table_page! %s\n",
+ strerror(errno));
+ }
+ if (dev->routing_updater_entry) {
+ kvm_del_routing_updater(dev->routing_updater_entry);
+ dev->routing_updater_entry = NULL;
+ }
+ dev->msix_pba_page = NULL;
free(dev->dev.msix_entry_used);
dev->dev.msix_entry_used = NULL;
}
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index 754e5c0..9288753 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -32,6 +32,7 @@
#include "qemu-common.h"
#include "qemu-queue.h"
#include "pci.h"
+#include "qemu-kvm.h"
/* From include/linux/pci.h in the kernel sources */
#define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07))
@@ -106,11 +107,13 @@ typedef struct AssignedDevice {
} cap;
int irq_entries_nr, max_msix_entries_nr;
struct kvm_irq_routing_entry *entry;
- void *msix_table_page;
- target_phys_addr_t msix_table_addr;
+ void *msix_table_page, *msix_pba_page;
+ target_phys_addr_t msix_table_addr, msix_pba_addr;
+ target_phys_addr_t guest_msix_table_addr, guest_msix_pba_addr;
int mmio_index;
int need_emulate_cmd;
char *configfd_name;
+ KVMRoutingUpdateEntry *routing_updater_entry;
QLIST_ENTRY(AssignedDevice) next;
} AssignedDevice;
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 956b62a..bee398c 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -511,6 +511,38 @@ static int handle_mmio(CPUState *env)
return 0;
}
+static QLIST_HEAD(kvm_routing_update_entry_head, kvm_routing_update_entry) kvm_routing_update_entry_head;
+
+KVMRoutingUpdateEntry *kvm_add_routing_updater(KVMRoutingUpdateHandler *cb, void *opaque)
+{
+ KVMRoutingUpdateEntry *e;
+
+ e = qemu_mallocz(sizeof (*e));
+
+ e->cb = cb;
+ e->opaque = opaque;
+ QLIST_INSERT_HEAD(&kvm_routing_update_entry_head, e, entries);
+ return e;
+}
+
+void kvm_del_routing_updater(KVMRoutingUpdateEntry *e)
+{
+ QLIST_REMOVE(e, entries);
+ qemu_free(e);
+}
+
+static void kvm_update_msix_routing(CPUState *env)
+{
+ unsigned long addr = env->kvm_run->mmio.phys_addr;
+ KVMRoutingUpdateEntry *e;
+
+ for (e = kvm_routing_update_entry_head.lh_first; e; e = e->entries.le_next) {
+ if (e->cb(e->opaque, addr) == 0)
+ return;
+ }
+ fprintf(stderr, "unhandled MSI-X routing update addr: 0x%lx\n", addr);
+}
+
int handle_io_window(kvm_context_t kvm)
{
return 1;
@@ -647,6 +679,10 @@ int kvm_run(CPUState *env)
kvm_handle_internal_error(env, run);
r = 1;
break;
+ case KVM_EXIT_MSIX_ROUTING_UPDATE:
+ kvm_update_msix_routing(env);
+ r = 1;
+ break;
default:
if (kvm_arch_run(env)) {
fprintf(stderr, "unhandled vm exit: 0x%x\n", run->exit_reason);
diff --git a/qemu-kvm.h b/qemu-kvm.h
index 86799e6..21a3274 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -772,6 +772,17 @@ int kvm_tpr_enable_vapic(CPUState *env);
unsigned long kvm_get_thread_id(void);
int kvm_cpu_is_stopped(CPUState *env);
+typedef struct kvm_routing_update_entry KVMRoutingUpdateEntry;
+typedef int KVMRoutingUpdateHandler(void *opaque, unsigned long addr);
+
+struct kvm_routing_update_entry {
+ KVMRoutingUpdateHandler *cb;
+ void *opaque;
+ QLIST_ENTRY (kvm_routing_update_entry) entries;
+};
+
+KVMRoutingUpdateEntry *kvm_add_routing_updater(KVMRoutingUpdateHandler *cb, void *opaque);
+void kvm_del_routing_updater(KVMRoutingUpdateEntry *e);
#endif
#endif
--
1.7.0.1
^ permalink raw reply related [flat|nested] 5+ messages in thread