* [PATCH v2 1/8] pci-assign: Fix warnings with DEBUG enabled
2012-02-01 5:32 [PATCH v2 0/8] pci-assign: better MSI-X table support Alex Williamson
@ 2012-02-01 5:32 ` Alex Williamson
2012-02-01 5:32 ` [PATCH v2 2/8] pci-assign: Update MSI-X MMIO to Memory API Alex Williamson
` (7 subsequent siblings)
8 siblings, 0 replies; 16+ messages in thread
From: Alex Williamson @ 2012-02-01 5:32 UTC (permalink / raw)
To: kvm; +Cc: alex.williamson, avi, mst, jan.kiszka, shashidhar.patil
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
hw/device-assignment.c | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 7f4a5ec..d82f274 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -74,7 +74,7 @@ static uint64_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
if (fd >= 0) {
if (data) {
- DEBUG("pwrite data=%x, size=%d, e_phys=%x, addr=%x\n",
+ DEBUG("pwrite data=%lx, size=%d, e_phys=%lx, addr=%lx\n",
*data, size, addr, addr);
if (pwrite(fd, data, size, addr) != size) {
fprintf(stderr, "%s - pwrite failed %s\n",
@@ -86,14 +86,14 @@ static uint64_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
__func__, strerror(errno));
val = (1UL << (size * 8)) - 1;
}
- DEBUG("pread val=%x, size=%d, e_phys=%x, addr=%x\n",
+ DEBUG("pread val=%lx, size=%d, e_phys=%lx, addr=%lx\n",
val, size, addr, addr);
}
} else {
uint32_t port = addr + dev_region->u.r_baseport;
if (data) {
- DEBUG("out data=%x, size=%d, e_phys=%x, host=%x\n",
+ DEBUG("out data=%lx, size=%d, e_phys=%lx, host=%x\n",
*data, size, addr, port);
switch (size) {
case 1:
@@ -118,7 +118,7 @@ static uint64_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
val = inl(port);
break;
}
- DEBUG("in data=%x, size=%d, e_phys=%x, host=%x\n",
+ DEBUG("in data=%lx, size=%d, e_phys=%lx, host=%x\n",
val, size, addr, port);
}
}
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 2/8] pci-assign: Update MSI-X MMIO to Memory API
2012-02-01 5:32 [PATCH v2 0/8] pci-assign: better MSI-X table support Alex Williamson
2012-02-01 5:32 ` [PATCH v2 1/8] pci-assign: Fix warnings with DEBUG enabled Alex Williamson
@ 2012-02-01 5:32 ` Alex Williamson
2012-02-01 5:32 ` [PATCH v2 3/8] pci-assign: Use struct for MSI-X table Alex Williamson
` (6 subsequent siblings)
8 siblings, 0 replies; 16+ messages in thread
From: Alex Williamson @ 2012-02-01 5:32 UTC (permalink / raw)
To: kvm; +Cc: alex.williamson, avi, mst, jan.kiszka, shashidhar.patil
Stop using compatibility mode and at the same time fix available
access sizes. The PCI spec indicates that the MSI-X table may
only be accessed as DWORD or QWORD. 8-byte accesses are still
getting split in exec.c, but this will pre-enable it.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
hw/device-assignment.c | 60 +++++++++++++++---------------------------------
1 files changed, 19 insertions(+), 41 deletions(-)
diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index d82f274..67c417b 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -1430,62 +1430,40 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
return 0;
}
-static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
+static uint64_t msix_mmio_read(void *opaque, target_phys_addr_t addr,
+ unsigned size)
{
AssignedDevice *adev = opaque;
- unsigned int offset = addr & 0xfff;
- void *page = adev->msix_table_page;
- uint32_t val = 0;
+ uint64_t val;
- memcpy(&val, (void *)((char *)page + offset), 4);
+ memcpy(&val, (void *)((uint8_t *)adev->msix_table_page + addr), size);
return val;
}
-static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr)
-{
- return ((msix_mmio_readl(opaque, addr & ~3)) >>
- (8 * (addr & 3))) & 0xff;
-}
-
-static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr)
-{
- return ((msix_mmio_readl(opaque, addr & ~3)) >>
- (8 * (addr & 3))) & 0xffff;
-}
-
-static void msix_mmio_writel(void *opaque,
- target_phys_addr_t addr, uint32_t val)
+static void msix_mmio_write(void *opaque, target_phys_addr_t addr,
+ uint64_t val, unsigned size)
{
AssignedDevice *adev = opaque;
- unsigned int offset = addr & 0xfff;
- void *page = adev->msix_table_page;
- DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
- addr, val);
- memcpy((void *)((char *)page + offset), &val, 4);
-}
+ DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%lx\n",
+ addr, val);
-static void msix_mmio_writew(void *opaque,
- target_phys_addr_t addr, uint32_t val)
-{
- msix_mmio_writel(opaque, addr & ~3,
- (val & 0xffff) << (8*(addr & 3)));
-}
-
-static void msix_mmio_writeb(void *opaque,
- target_phys_addr_t addr, uint32_t val)
-{
- msix_mmio_writel(opaque, addr & ~3,
- (val & 0xff) << (8*(addr & 3)));
+ memcpy((void *)((uint8_t *)adev->msix_table_page + addr), &val, size);
}
static const MemoryRegionOps msix_mmio_ops = {
- .old_mmio = {
- .read = { msix_mmio_readb, msix_mmio_readw, msix_mmio_readl, },
- .write = { msix_mmio_writeb, msix_mmio_writew, msix_mmio_writel, },
- },
+ .read = msix_mmio_read,
+ .write = msix_mmio_write,
.endianness = DEVICE_NATIVE_ENDIAN,
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ },
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 8,
+ },
};
static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 3/8] pci-assign: Use struct for MSI-X table
2012-02-01 5:32 [PATCH v2 0/8] pci-assign: better MSI-X table support Alex Williamson
2012-02-01 5:32 ` [PATCH v2 1/8] pci-assign: Fix warnings with DEBUG enabled Alex Williamson
2012-02-01 5:32 ` [PATCH v2 2/8] pci-assign: Update MSI-X MMIO to Memory API Alex Williamson
@ 2012-02-01 5:32 ` Alex Williamson
2012-02-01 5:32 ` [PATCH v2 4/8] pci-assign: Only calculate maximum MSI-X vector entries once Alex Williamson
` (5 subsequent siblings)
8 siblings, 0 replies; 16+ messages in thread
From: Alex Williamson @ 2012-02-01 5:32 UTC (permalink / raw)
To: kvm; +Cc: alex.williamson, avi, mst, jan.kiszka, shashidhar.patil
This makes access much easier.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
hw/device-assignment.c | 55 ++++++++++++++++++++++--------------------------
hw/device-assignment.h | 9 +++++++-
2 files changed, 33 insertions(+), 31 deletions(-)
diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 67c417b..53c843e 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -966,10 +966,9 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
uint16_t entries_nr = 0, entries_max_nr;
int pos = 0, i, r = 0;
- uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl;
struct kvm_assigned_msix_nr msix_nr;
struct kvm_assigned_msix_entry msix_entry;
- void *va = adev->msix_table_page;
+ MSIXTableEntry *entry = adev->msix_table;
pos = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
@@ -978,12 +977,11 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
entries_max_nr += 1;
/* Get the usable entry number for allocating */
- for (i = 0; i < entries_max_nr; i++) {
- memcpy(&msg_ctrl, va + i * 16 + 12, 4);
- memcpy(&msg_data, va + i * 16 + 8, 4);
+ for (i = 0; i < entries_max_nr; i++, entry++) {
/* Ignore unused entry even it's unmasked */
- if (msg_data == 0)
+ if (entry->data == 0) {
continue;
+ }
entries_nr ++;
}
@@ -1006,16 +1004,13 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
msix_entry.assigned_dev_id = msix_nr.assigned_dev_id;
entries_nr = 0;
- for (i = 0; i < entries_max_nr; i++) {
+ entry = adev->msix_table;
+ for (i = 0; i < entries_max_nr; i++, entry++) {
if (entries_nr >= msix_nr.entry_nr)
break;
- memcpy(&msg_ctrl, va + i * 16 + 12, 4);
- memcpy(&msg_data, va + i * 16 + 8, 4);
- if (msg_data == 0)
+ if (entry->data == 0) {
continue;
-
- memcpy(&msg_addr, va + i * 16, 4);
- memcpy(&msg_upper_addr, va + i * 16 + 4, 4);
+ }
r = kvm_get_irq_route_gsi();
if (r < 0)
@@ -1024,10 +1019,11 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
adev->entry[entries_nr].gsi = r;
adev->entry[entries_nr].type = KVM_IRQ_ROUTING_MSI;
adev->entry[entries_nr].flags = 0;
- adev->entry[entries_nr].u.msi.address_lo = msg_addr;
- adev->entry[entries_nr].u.msi.address_hi = msg_upper_addr;
- adev->entry[entries_nr].u.msi.data = msg_data;
- DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!", msg_data, msg_addr);
+ adev->entry[entries_nr].u.msi.address_lo = entry->addr_lo;
+ adev->entry[entries_nr].u.msi.address_hi = entry->addr_hi;
+ adev->entry[entries_nr].u.msi.data = entry->data;
+ DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!",
+ entry->data, entry->addr_lo);
kvm_add_routing_entry(&adev->entry[entries_nr]);
msix_entry.gsi = adev->entry[entries_nr].gsi;
@@ -1436,7 +1432,7 @@ static uint64_t msix_mmio_read(void *opaque, target_phys_addr_t addr,
AssignedDevice *adev = opaque;
uint64_t val;
- memcpy(&val, (void *)((uint8_t *)adev->msix_table_page + addr), size);
+ memcpy(&val, (void *)((uint8_t *)adev->msix_table + addr), size);
return val;
}
@@ -1449,7 +1445,7 @@ static void msix_mmio_write(void *opaque, target_phys_addr_t addr,
DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%lx\n",
addr, val);
- memcpy((void *)((uint8_t *)adev->msix_table_page + addr), &val, size);
+ memcpy((void *)((uint8_t *)adev->msix_table + addr), &val, size);
}
static const MemoryRegionOps msix_mmio_ops = {
@@ -1468,15 +1464,13 @@ static const MemoryRegionOps msix_mmio_ops = {
static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
{
- dev->msix_table_page = mmap(NULL, 0x1000,
- PROT_READ|PROT_WRITE,
- MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
- if (dev->msix_table_page == MAP_FAILED) {
- fprintf(stderr, "fail allocate msix_table_page! %s\n",
- strerror(errno));
+ dev->msix_table = mmap(NULL, 0x1000, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
+ if (dev->msix_table == MAP_FAILED) {
+ fprintf(stderr, "fail allocate msix_table! %s\n", strerror(errno));
return -EFAULT;
}
- memset(dev->msix_table_page, 0, 0x1000);
+ memset(dev->msix_table, 0, 0x1000);
memory_region_init_io(&dev->mmio, &msix_mmio_ops, dev,
"assigned-dev-msix", MSIX_PAGE_SIZE);
return 0;
@@ -1484,16 +1478,17 @@ static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
{
- if (!dev->msix_table_page)
+ if (!dev->msix_table) {
return;
+ }
memory_region_destroy(&dev->mmio);
- if (munmap(dev->msix_table_page, 0x1000) == -1) {
- fprintf(stderr, "error unmapping msix_table_page! %s\n",
+ if (munmap(dev->msix_table, 0x1000) == -1) {
+ fprintf(stderr, "error unmapping msix_table! %s\n",
strerror(errno));
}
- dev->msix_table_page = NULL;
+ dev->msix_table = NULL;
}
static const VMStateDescription vmstate_assigned_device = {
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index 1b4aecc..2e06ebb 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -80,6 +80,13 @@ typedef struct {
#define ASSIGNED_DEVICE_USE_IOMMU_MASK (1 << ASSIGNED_DEVICE_USE_IOMMU_BIT)
#define ASSIGNED_DEVICE_PREFER_MSI_MASK (1 << ASSIGNED_DEVICE_PREFER_MSI_BIT)
+typedef struct {
+ uint32_t addr_lo;
+ uint32_t addr_hi;
+ uint32_t data;
+ uint32_t ctrl;
+} MSIXTableEntry;
+
typedef struct AssignedDevice {
PCIDevice dev;
PCIHostDevice host;
@@ -108,7 +115,7 @@ typedef struct AssignedDevice {
uint8_t emulate_config_write[PCI_CONFIG_SPACE_SIZE];
int irq_entries_nr;
struct kvm_irq_routing_entry *entry;
- void *msix_table_page;
+ MSIXTableEntry *msix_table;
target_phys_addr_t msix_table_addr;
MemoryRegion mmio;
char *configfd_name;
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 4/8] pci-assign: Only calculate maximum MSI-X vector entries once
2012-02-01 5:32 [PATCH v2 0/8] pci-assign: better MSI-X table support Alex Williamson
` (2 preceding siblings ...)
2012-02-01 5:32 ` [PATCH v2 3/8] pci-assign: Use struct for MSI-X table Alex Williamson
@ 2012-02-01 5:32 ` Alex Williamson
2012-02-01 5:32 ` [PATCH v2 5/8] pci-assign: Proper initialization for MSI-X table Alex Williamson
` (4 subsequent siblings)
8 siblings, 0 replies; 16+ messages in thread
From: Alex Williamson @ 2012-02-01 5:32 UTC (permalink / raw)
To: kvm; +Cc: alex.williamson, avi, mst, jan.kiszka, shashidhar.patil
We'll use this in a few more places for reseting the MSI-X
table and ignoring certain accesses, so it seems worth two
bytes to not recalculate all the time.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
hw/device-assignment.c | 17 +++++++----------
hw/device-assignment.h | 1 +
2 files changed, 8 insertions(+), 10 deletions(-)
diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 53c843e..25dff2c 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -964,20 +964,14 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
{
AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
- uint16_t entries_nr = 0, entries_max_nr;
- int pos = 0, i, r = 0;
+ uint16_t entries_nr = 0;
+ int i, r = 0;
struct kvm_assigned_msix_nr msix_nr;
struct kvm_assigned_msix_entry msix_entry;
MSIXTableEntry *entry = adev->msix_table;
- pos = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
-
- entries_max_nr = *(uint16_t *)(pci_dev->config + pos + 2);
- entries_max_nr &= PCI_MSIX_FLAGS_QSIZE;
- entries_max_nr += 1;
-
/* Get the usable entry number for allocating */
- for (i = 0; i < entries_max_nr; i++, entry++) {
+ for (i = 0; i < adev->msix_max; i++, entry++) {
/* Ignore unused entry even it's unmasked */
if (entry->data == 0) {
continue;
@@ -1005,7 +999,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
msix_entry.assigned_dev_id = msix_nr.assigned_dev_id;
entries_nr = 0;
entry = adev->msix_table;
- for (i = 0; i < entries_max_nr; i++, entry++) {
+ for (i = 0; i < adev->msix_max; i++, entry++) {
if (entries_nr >= msix_nr.entry_nr)
break;
if (entry->data == 0) {
@@ -1218,6 +1212,9 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
bar_nr = msix_table_entry & PCI_MSIX_FLAGS_BIRMASK;
msix_table_entry &= ~PCI_MSIX_FLAGS_BIRMASK;
dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
+ dev->msix_max = pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS);
+ dev->msix_max &= PCI_MSIX_FLAGS_QSIZE;
+ dev->msix_max += 1;
}
/* Minimal PM support, nothing writable, device appears to NAK changes */
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index 2e06ebb..b4bcfa6 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -117,6 +117,7 @@ typedef struct AssignedDevice {
struct kvm_irq_routing_entry *entry;
MSIXTableEntry *msix_table;
target_phys_addr_t msix_table_addr;
+ uint16_t msix_max;
MemoryRegion mmio;
char *configfd_name;
int32_t bootindex;
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 5/8] pci-assign: Proper initialization for MSI-X table
2012-02-01 5:32 [PATCH v2 0/8] pci-assign: better MSI-X table support Alex Williamson
` (3 preceding siblings ...)
2012-02-01 5:32 ` [PATCH v2 4/8] pci-assign: Only calculate maximum MSI-X vector entries once Alex Williamson
@ 2012-02-01 5:32 ` Alex Williamson
2012-02-01 5:32 ` [PATCH v2 6/8] pci-assign: Allocate entries for all MSI-X vectors Alex Williamson
` (3 subsequent siblings)
8 siblings, 0 replies; 16+ messages in thread
From: Alex Williamson @ 2012-02-01 5:32 UTC (permalink / raw)
To: kvm; +Cc: alex.williamson, avi, mst, jan.kiszka, shashidhar.patil
Per the PCI spec, all vectors should be masked at handoff.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
hw/device-assignment.c | 20 +++++++++++++++++++-
1 files changed, 19 insertions(+), 1 deletions(-)
diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 25dff2c..a33e5b9 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -1459,6 +1459,22 @@ static const MemoryRegionOps msix_mmio_ops = {
},
};
+static void msix_reset(AssignedDevice *dev)
+{
+ MSIXTableEntry *entry;
+ int i;
+
+ if (!dev->msix_table) {
+ return;
+ }
+
+ memset(dev->msix_table, 0, 0x1000);
+
+ for (i = 0, entry = dev->msix_table; i < dev->msix_max; i++, entry++) {
+ entry->ctrl = cpu_to_le32(0x1); /* Masked */
+ }
+}
+
static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
{
dev->msix_table = mmap(NULL, 0x1000, PROT_READ|PROT_WRITE,
@@ -1467,7 +1483,9 @@ static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
fprintf(stderr, "fail allocate msix_table! %s\n", strerror(errno));
return -EFAULT;
}
- memset(dev->msix_table, 0, 0x1000);
+
+ msix_reset(dev);
+
memory_region_init_io(&dev->mmio, &msix_mmio_ops, dev,
"assigned-dev-msix", MSIX_PAGE_SIZE);
return 0;
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 6/8] pci-assign: Allocate entries for all MSI-X vectors
2012-02-01 5:32 [PATCH v2 0/8] pci-assign: better MSI-X table support Alex Williamson
` (4 preceding siblings ...)
2012-02-01 5:32 ` [PATCH v2 5/8] pci-assign: Proper initialization for MSI-X table Alex Williamson
@ 2012-02-01 5:32 ` Alex Williamson
2012-02-01 5:33 ` [PATCH v2 7/8] pci-assign: Use MSIX_PAGE_SIZE Alex Williamson
` (2 subsequent siblings)
8 siblings, 0 replies; 16+ messages in thread
From: Alex Williamson @ 2012-02-01 5:32 UTC (permalink / raw)
To: kvm; +Cc: alex.williamson, avi, mst, jan.kiszka, shashidhar.patil
We still only initialize the number used in the host. This lets
us do direct access based on MSI-X table offset on write without
needing to translate between physical vector space and initalized
vector space. It's expected that guests will typically use the
majority of the available vectors, so we're likely not allocating
significantly more entires than are used.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
hw/device-assignment.c | 44 ++++++++++++++++++++++----------------------
1 files changed, 22 insertions(+), 22 deletions(-)
diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index a33e5b9..fe10a23 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -627,8 +627,11 @@ static void free_dev_irq_entries(AssignedDevice *dev)
{
int i;
- for (i = 0; i < dev->irq_entries_nr; i++)
- kvm_del_routing_entry(&dev->entry[i]);
+ for (i = 0; i < dev->irq_entries_nr; i++) {
+ if (dev->entry[i].type) {
+ kvm_del_routing_entry(&dev->entry[i]);
+ }
+ }
g_free(dev->entry);
dev->entry = NULL;
dev->irq_entries_nr = 0;
@@ -976,7 +979,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
if (entry->data == 0) {
continue;
}
- entries_nr ++;
+ entries_nr++;
}
if (entries_nr == 0) {
@@ -993,15 +996,13 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
}
free_dev_irq_entries(adev);
- adev->irq_entries_nr = entries_nr;
- adev->entry = g_malloc0(entries_nr * sizeof(*(adev->entry)));
+
+ adev->irq_entries_nr = adev->msix_max;
+ adev->entry = g_malloc0(adev->msix_max * sizeof(*(adev->entry)));
msix_entry.assigned_dev_id = msix_nr.assigned_dev_id;
- entries_nr = 0;
entry = adev->msix_table;
for (i = 0; i < adev->msix_max; i++, entry++) {
- if (entries_nr >= msix_nr.entry_nr)
- break;
if (entry->data == 0) {
continue;
}
@@ -1010,26 +1011,25 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
if (r < 0)
return r;
- adev->entry[entries_nr].gsi = r;
- adev->entry[entries_nr].type = KVM_IRQ_ROUTING_MSI;
- adev->entry[entries_nr].flags = 0;
- adev->entry[entries_nr].u.msi.address_lo = entry->addr_lo;
- adev->entry[entries_nr].u.msi.address_hi = entry->addr_hi;
- adev->entry[entries_nr].u.msi.data = entry->data;
- DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!",
- entry->data, entry->addr_lo);
- kvm_add_routing_entry(&adev->entry[entries_nr]);
-
- msix_entry.gsi = adev->entry[entries_nr].gsi;
+ adev->entry[i].gsi = r;
+ adev->entry[i].type = KVM_IRQ_ROUTING_MSI;
+ adev->entry[i].flags = 0;
+ adev->entry[i].u.msi.address_lo = entry->addr_lo;
+ adev->entry[i].u.msi.address_hi = entry->addr_hi;
+ adev->entry[i].u.msi.data = entry->data;
+
+ DEBUG("MSI-X vector %d, gsi %d, addr %08x_%08x, data %08x\n", i,
+ r, entry->addr_hi, entry->addr_lo, entry->data);
+
+ kvm_add_routing_entry(&adev->entry[i]);
+
+ msix_entry.gsi = adev->entry[i].gsi;
msix_entry.entry = i;
r = kvm_assign_set_msix_entry(kvm_state, &msix_entry);
if (r) {
fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r));
break;
}
- DEBUG("MSI-X entry gsi 0x%x, entry %d\n!",
- msix_entry.gsi, msix_entry.entry);
- entries_nr ++;
}
if (r == 0 && kvm_commit_irq_routes() < 0) {
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 7/8] pci-assign: Use MSIX_PAGE_SIZE
2012-02-01 5:32 [PATCH v2 0/8] pci-assign: better MSI-X table support Alex Williamson
` (5 preceding siblings ...)
2012-02-01 5:32 ` [PATCH v2 6/8] pci-assign: Allocate entries for all MSI-X vectors Alex Williamson
@ 2012-02-01 5:33 ` Alex Williamson
2012-02-01 5:33 ` [PATCH v2 8/8] pci-assign: Update MSI-X config based on table writes Alex Williamson
2012-02-07 20:04 ` [PATCH v2 0/8] pci-assign: better MSI-X table support Marcelo Tosatti
8 siblings, 0 replies; 16+ messages in thread
From: Alex Williamson @ 2012-02-01 5:33 UTC (permalink / raw)
To: kvm; +Cc: alex.williamson, avi, mst, jan.kiszka, shashidhar.patil
We've already got it defined, use it.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
hw/device-assignment.c | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index fe10a23..71685ee 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -1468,7 +1468,7 @@ static void msix_reset(AssignedDevice *dev)
return;
}
- memset(dev->msix_table, 0, 0x1000);
+ memset(dev->msix_table, 0, MSIX_PAGE_SIZE);
for (i = 0, entry = dev->msix_table; i < dev->msix_max; i++, entry++) {
entry->ctrl = cpu_to_le32(0x1); /* Masked */
@@ -1477,7 +1477,7 @@ static void msix_reset(AssignedDevice *dev)
static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
{
- dev->msix_table = mmap(NULL, 0x1000, PROT_READ|PROT_WRITE,
+ dev->msix_table = mmap(NULL, MSIX_PAGE_SIZE, PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
if (dev->msix_table == MAP_FAILED) {
fprintf(stderr, "fail allocate msix_table! %s\n", strerror(errno));
@@ -1499,7 +1499,7 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
memory_region_destroy(&dev->mmio);
- if (munmap(dev->msix_table, 0x1000) == -1) {
+ if (munmap(dev->msix_table, MSIX_PAGE_SIZE) == -1) {
fprintf(stderr, "error unmapping msix_table! %s\n",
strerror(errno));
}
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 8/8] pci-assign: Update MSI-X config based on table writes
2012-02-01 5:32 [PATCH v2 0/8] pci-assign: better MSI-X table support Alex Williamson
` (6 preceding siblings ...)
2012-02-01 5:33 ` [PATCH v2 7/8] pci-assign: Use MSIX_PAGE_SIZE Alex Williamson
@ 2012-02-01 5:33 ` Alex Williamson
2012-02-01 7:22 ` Michael S. Tsirkin
2012-02-01 7:25 ` Michael S. Tsirkin
2012-02-07 20:04 ` [PATCH v2 0/8] pci-assign: better MSI-X table support Marcelo Tosatti
8 siblings, 2 replies; 16+ messages in thread
From: Alex Williamson @ 2012-02-01 5:33 UTC (permalink / raw)
To: kvm; +Cc: alex.williamson, avi, mst, jan.kiszka, shashidhar.patil
When a guest enables MSI-X in PCI configuration space we walk
through the MSI-X vector table trying to guess what might get
used. We have to guess because we don't do anything with
writes to the vector table, so we look for clues ahead of time
to pre-enable the vectors we think will be used. This means
that instead of doing the sane thing and test the mask bit, we
test whether the data field contains a non-zero value. It's
amazing this works as well as it does.
However, two key things missed by doing this is that we don't
catch vector changes after enabling (ex. setting smp_affinity
on an irq) and we don't support guests that don't touch the
vector table prior to enabling the MSI-X capability (ex.
freebsd). This patch fixes both of these problems.
NB we're not actually masking vectors yet with this patch as
it's unclear whether we really have the ability to do this
without losing interrupts.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
hw/device-assignment.c | 92 ++++++++++++++++++++++++++++++++++++++++++------
1 files changed, 81 insertions(+), 11 deletions(-)
diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 71685ee..9791ec9 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -964,6 +964,11 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
}
}
+static bool msix_masked(MSIXTableEntry *entry)
+{
+ return (entry->ctrl & cpu_to_le32(0x1)) != 0;
+}
+
static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
{
AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
@@ -975,17 +980,19 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
/* Get the usable entry number for allocating */
for (i = 0; i < adev->msix_max; i++, entry++) {
- /* Ignore unused entry even it's unmasked */
- if (entry->data == 0) {
+ if (msix_masked(entry)) {
continue;
}
entries_nr++;
}
- if (entries_nr == 0) {
- fprintf(stderr, "MSI-X entry number is zero!\n");
- return -EINVAL;
+ DEBUG("MSI-X entries: %d\n", entries_nr);
+
+ /* It's valid to enable MSI-X with all entries masked */
+ if (!entries_nr) {
+ return 0;
}
+
msix_nr.assigned_dev_id = calc_assigned_dev_id(adev);
msix_nr.entry_nr = entries_nr;
r = kvm_assign_set_msix_nr(kvm_state, &msix_nr);
@@ -1003,7 +1010,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
msix_entry.assigned_dev_id = msix_nr.assigned_dev_id;
entry = adev->msix_table;
for (i = 0; i < adev->msix_max; i++, entry++) {
- if (entry->data == 0) {
+ if (msix_masked(entry)) {
continue;
}
@@ -1075,9 +1082,12 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev)
perror("assigned_dev_update_msix_mmio");
return;
}
- if (kvm_assign_irq(kvm_state, &assigned_irq_data) < 0) {
- perror("assigned_dev_enable_msix: assign irq");
- return;
+
+ if (assigned_dev->irq_entries_nr) {
+ if (kvm_assign_irq(kvm_state, &assigned_irq_data) < 0) {
+ perror("assigned_dev_enable_msix: assign irq");
+ return;
+ }
}
assigned_dev->girq = -1;
assigned_dev->irq_requested_type = assigned_irq_data.flags;
@@ -1438,11 +1448,71 @@ static void msix_mmio_write(void *opaque, target_phys_addr_t addr,
uint64_t val, unsigned size)
{
AssignedDevice *adev = opaque;
+ PCIDevice *pdev = &adev->dev;
+ uint16_t ctrl;
+ MSIXTableEntry orig;
+ int i = addr >> 4;
+
+ if (i >= adev->msix_max) {
+ return; /* Drop write */
+ }
- DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%lx\n",
- addr, val);
+ ctrl = pci_get_word(pdev->config + pdev->msix_cap + PCI_MSIX_FLAGS);
+
+ DEBUG("write to MSI-X table offset 0x%lx, val 0x%lx\n", addr, val);
+
+ if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
+ orig = adev->msix_table[i];
+ }
memcpy((void *)((uint8_t *)adev->msix_table + addr), &val, size);
+
+ if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
+ MSIXTableEntry *entry = &adev->msix_table[i];
+
+ if (!msix_masked(&orig) && msix_masked(entry)) {
+ /*
+ * Vector masked, disable it
+ *
+ * XXX It's not clear if we can or should actually attempt
+ * to mask or disable the interrupt. KVM doesn't have
+ * support for pending bits and kvm_assign_set_msix_entry
+ * doesn't modify the device hardware mask. Interrupts
+ * while masked are simply not injected to the guest, so
+ * are lost. Can we get away with always injecting an
+ * interrupt on unmask?
+ */
+ } else if (msix_masked(&orig) && !msix_masked(entry)) {
+ /* Vector unmasked */
+ if (i >= adev->irq_entries_nr || !adev->entry[i].type) {
+ /* Previously unassigned vector, start from scratch */
+ assigned_dev_update_msix(pdev);
+ return;
+ } else {
+ /* Update an existing, previously masked vector */
+ struct kvm_irq_routing_entry orig = adev->entry[i];
+ int ret;
+
+ adev->entry[i].u.msi.address_lo = entry->addr_lo;
+ adev->entry[i].u.msi.address_hi = entry->addr_hi;
+ adev->entry[i].u.msi.data = entry->data;
+
+ ret = kvm_update_routing_entry(&orig, &adev->entry[i]);
+ if (ret) {
+ fprintf(stderr,
+ "Error updating irq routing entry (%d)\n", ret);
+ return;
+ }
+
+ ret = kvm_commit_irq_routes();
+ if (ret) {
+ fprintf(stderr,
+ "Error committing irq routes (%d)\n", ret);
+ return;
+ }
+ }
+ }
+ }
}
static const MemoryRegionOps msix_mmio_ops = {
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH v2 8/8] pci-assign: Update MSI-X config based on table writes
2012-02-01 5:33 ` [PATCH v2 8/8] pci-assign: Update MSI-X config based on table writes Alex Williamson
@ 2012-02-01 7:22 ` Michael S. Tsirkin
2012-02-01 13:48 ` Alex Williamson
2012-02-01 7:25 ` Michael S. Tsirkin
1 sibling, 1 reply; 16+ messages in thread
From: Michael S. Tsirkin @ 2012-02-01 7:22 UTC (permalink / raw)
To: Alex Williamson; +Cc: kvm, avi, jan.kiszka, shashidhar.patil
On Tue, Jan 31, 2012 at 10:33:14PM -0700, Alex Williamson wrote:
> When a guest enables MSI-X in PCI configuration space we walk
> through the MSI-X vector table trying to guess what might get
> used. We have to guess because we don't do anything with
> writes to the vector table, so we look for clues ahead of time
> to pre-enable the vectors we think will be used. This means
> that instead of doing the sane thing and test the mask bit, we
> test whether the data field contains a non-zero value. It's
> amazing this works as well as it does.
>
> However, two key things missed by doing this is that we don't
> catch vector changes after enabling (ex. setting smp_affinity
> on an irq) and we don't support guests that don't touch the
> vector table prior to enabling the MSI-X capability (ex.
> freebsd). This patch fixes both of these problems.
>
> NB we're not actually masking vectors yet with this patch as
> it's unclear whether we really have the ability to do this
> without losing interrupts.
>
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Overall I think this is better than what we have now.
One question though: would it make sense to use msix mask notifiers
instead of parsing msix tables on your own?
> ---
>
> hw/device-assignment.c | 92 ++++++++++++++++++++++++++++++++++++++++++------
> 1 files changed, 81 insertions(+), 11 deletions(-)
>
> diff --git a/hw/device-assignment.c b/hw/device-assignment.c
> index 71685ee..9791ec9 100644
> --- a/hw/device-assignment.c
> +++ b/hw/device-assignment.c
> @@ -964,6 +964,11 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
> }
> }
>
> +static bool msix_masked(MSIXTableEntry *entry)
> +{
> + return (entry->ctrl & cpu_to_le32(0x1)) != 0;
> +}
> +
> static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
> {
> AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> @@ -975,17 +980,19 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
>
> /* Get the usable entry number for allocating */
> for (i = 0; i < adev->msix_max; i++, entry++) {
> - /* Ignore unused entry even it's unmasked */
> - if (entry->data == 0) {
> + if (msix_masked(entry)) {
> continue;
> }
> entries_nr++;
> }
>
> - if (entries_nr == 0) {
> - fprintf(stderr, "MSI-X entry number is zero!\n");
> - return -EINVAL;
> + DEBUG("MSI-X entries: %d\n", entries_nr);
> +
> + /* It's valid to enable MSI-X with all entries masked */
> + if (!entries_nr) {
> + return 0;
> }
> +
> msix_nr.assigned_dev_id = calc_assigned_dev_id(adev);
> msix_nr.entry_nr = entries_nr;
> r = kvm_assign_set_msix_nr(kvm_state, &msix_nr);
> @@ -1003,7 +1010,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
> msix_entry.assigned_dev_id = msix_nr.assigned_dev_id;
> entry = adev->msix_table;
> for (i = 0; i < adev->msix_max; i++, entry++) {
> - if (entry->data == 0) {
> + if (msix_masked(entry)) {
> continue;
> }
>
> @@ -1075,9 +1082,12 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev)
> perror("assigned_dev_update_msix_mmio");
> return;
> }
> - if (kvm_assign_irq(kvm_state, &assigned_irq_data) < 0) {
> - perror("assigned_dev_enable_msix: assign irq");
> - return;
> +
> + if (assigned_dev->irq_entries_nr) {
> + if (kvm_assign_irq(kvm_state, &assigned_irq_data) < 0) {
> + perror("assigned_dev_enable_msix: assign irq");
> + return;
> + }
> }
> assigned_dev->girq = -1;
> assigned_dev->irq_requested_type = assigned_irq_data.flags;
> @@ -1438,11 +1448,71 @@ static void msix_mmio_write(void *opaque, target_phys_addr_t addr,
> uint64_t val, unsigned size)
> {
> AssignedDevice *adev = opaque;
> + PCIDevice *pdev = &adev->dev;
> + uint16_t ctrl;
> + MSIXTableEntry orig;
> + int i = addr >> 4;
> +
> + if (i >= adev->msix_max) {
> + return; /* Drop write */
> + }
>
> - DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%lx\n",
> - addr, val);
> + ctrl = pci_get_word(pdev->config + pdev->msix_cap + PCI_MSIX_FLAGS);
> +
> + DEBUG("write to MSI-X table offset 0x%lx, val 0x%lx\n", addr, val);
> +
> + if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
> + orig = adev->msix_table[i];
> + }
>
> memcpy((void *)((uint8_t *)adev->msix_table + addr), &val, size);
> +
> + if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
> + MSIXTableEntry *entry = &adev->msix_table[i];
> +
> + if (!msix_masked(&orig) && msix_masked(entry)) {
> + /*
> + * Vector masked, disable it
> + *
> + * XXX It's not clear if we can or should actually attempt
> + * to mask or disable the interrupt. KVM doesn't have
> + * support for pending bits and kvm_assign_set_msix_entry
> + * doesn't modify the device hardware mask. Interrupts
> + * while masked are simply not injected to the guest, so
> + * are lost. Can we get away with always injecting an
> + * interrupt on unmask?
> + */
> + } else if (msix_masked(&orig) && !msix_masked(entry)) {
> + /* Vector unmasked */
> + if (i >= adev->irq_entries_nr || !adev->entry[i].type) {
> + /* Previously unassigned vector, start from scratch */
> + assigned_dev_update_msix(pdev);
> + return;
> + } else {
> + /* Update an existing, previously masked vector */
> + struct kvm_irq_routing_entry orig = adev->entry[i];
> + int ret;
> +
> + adev->entry[i].u.msi.address_lo = entry->addr_lo;
> + adev->entry[i].u.msi.address_hi = entry->addr_hi;
> + adev->entry[i].u.msi.data = entry->data;
> +
> + ret = kvm_update_routing_entry(&orig, &adev->entry[i]);
> + if (ret) {
> + fprintf(stderr,
> + "Error updating irq routing entry (%d)\n", ret);
> + return;
> + }
> +
> + ret = kvm_commit_irq_routes();
> + if (ret) {
> + fprintf(stderr,
> + "Error committing irq routes (%d)\n", ret);
> + return;
> + }
> + }
> + }
> + }
> }
>
> static const MemoryRegionOps msix_mmio_ops = {
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v2 8/8] pci-assign: Update MSI-X config based on table writes
2012-02-01 7:22 ` Michael S. Tsirkin
@ 2012-02-01 13:48 ` Alex Williamson
2012-02-01 14:13 ` Jan Kiszka
2012-02-01 15:13 ` Michael S. Tsirkin
0 siblings, 2 replies; 16+ messages in thread
From: Alex Williamson @ 2012-02-01 13:48 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: kvm, avi, jan.kiszka, shashidhar.patil
On Wed, 2012-02-01 at 09:22 +0200, Michael S. Tsirkin wrote:
> On Tue, Jan 31, 2012 at 10:33:14PM -0700, Alex Williamson wrote:
> > When a guest enables MSI-X in PCI configuration space we walk
> > through the MSI-X vector table trying to guess what might get
> > used. We have to guess because we don't do anything with
> > writes to the vector table, so we look for clues ahead of time
> > to pre-enable the vectors we think will be used. This means
> > that instead of doing the sane thing and test the mask bit, we
> > test whether the data field contains a non-zero value. It's
> > amazing this works as well as it does.
> >
> > However, two key things missed by doing this is that we don't
> > catch vector changes after enabling (ex. setting smp_affinity
> > on an irq) and we don't support guests that don't touch the
> > vector table prior to enabling the MSI-X capability (ex.
> > freebsd). This patch fixes both of these problems.
> >
> > NB we're not actually masking vectors yet with this patch as
> > it's unclear whether we really have the ability to do this
> > without losing interrupts.
> >
> > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
>
> Overall I think this is better than what we have now.
> One question though: would it make sense to use msix mask notifiers
> instead of parsing msix tables on your own?
Based on vfio, I think some minor changes would be necessary to the msix
core to port to that infrastructure. I don't think I'd want to use this
code to push those changes since it likely won't get ported to qemu.git.
The parsing is pretty trivial now that we're not counting bytes anyway.
Thanks,
Alex
> > ---
> >
> > hw/device-assignment.c | 92 ++++++++++++++++++++++++++++++++++++++++++------
> > 1 files changed, 81 insertions(+), 11 deletions(-)
> >
> > diff --git a/hw/device-assignment.c b/hw/device-assignment.c
> > index 71685ee..9791ec9 100644
> > --- a/hw/device-assignment.c
> > +++ b/hw/device-assignment.c
> > @@ -964,6 +964,11 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
> > }
> > }
> >
> > +static bool msix_masked(MSIXTableEntry *entry)
> > +{
> > + return (entry->ctrl & cpu_to_le32(0x1)) != 0;
> > +}
> > +
> > static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
> > {
> > AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> > @@ -975,17 +980,19 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
> >
> > /* Get the usable entry number for allocating */
> > for (i = 0; i < adev->msix_max; i++, entry++) {
> > - /* Ignore unused entry even it's unmasked */
> > - if (entry->data == 0) {
> > + if (msix_masked(entry)) {
> > continue;
> > }
> > entries_nr++;
> > }
> >
> > - if (entries_nr == 0) {
> > - fprintf(stderr, "MSI-X entry number is zero!\n");
> > - return -EINVAL;
> > + DEBUG("MSI-X entries: %d\n", entries_nr);
> > +
> > + /* It's valid to enable MSI-X with all entries masked */
> > + if (!entries_nr) {
> > + return 0;
> > }
> > +
> > msix_nr.assigned_dev_id = calc_assigned_dev_id(adev);
> > msix_nr.entry_nr = entries_nr;
> > r = kvm_assign_set_msix_nr(kvm_state, &msix_nr);
> > @@ -1003,7 +1010,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
> > msix_entry.assigned_dev_id = msix_nr.assigned_dev_id;
> > entry = adev->msix_table;
> > for (i = 0; i < adev->msix_max; i++, entry++) {
> > - if (entry->data == 0) {
> > + if (msix_masked(entry)) {
> > continue;
> > }
> >
> > @@ -1075,9 +1082,12 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev)
> > perror("assigned_dev_update_msix_mmio");
> > return;
> > }
> > - if (kvm_assign_irq(kvm_state, &assigned_irq_data) < 0) {
> > - perror("assigned_dev_enable_msix: assign irq");
> > - return;
> > +
> > + if (assigned_dev->irq_entries_nr) {
> > + if (kvm_assign_irq(kvm_state, &assigned_irq_data) < 0) {
> > + perror("assigned_dev_enable_msix: assign irq");
> > + return;
> > + }
> > }
> > assigned_dev->girq = -1;
> > assigned_dev->irq_requested_type = assigned_irq_data.flags;
> > @@ -1438,11 +1448,71 @@ static void msix_mmio_write(void *opaque, target_phys_addr_t addr,
> > uint64_t val, unsigned size)
> > {
> > AssignedDevice *adev = opaque;
> > + PCIDevice *pdev = &adev->dev;
> > + uint16_t ctrl;
> > + MSIXTableEntry orig;
> > + int i = addr >> 4;
> > +
> > + if (i >= adev->msix_max) {
> > + return; /* Drop write */
> > + }
> >
> > - DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%lx\n",
> > - addr, val);
> > + ctrl = pci_get_word(pdev->config + pdev->msix_cap + PCI_MSIX_FLAGS);
> > +
> > + DEBUG("write to MSI-X table offset 0x%lx, val 0x%lx\n", addr, val);
> > +
> > + if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
> > + orig = adev->msix_table[i];
> > + }
> >
> > memcpy((void *)((uint8_t *)adev->msix_table + addr), &val, size);
> > +
> > + if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
> > + MSIXTableEntry *entry = &adev->msix_table[i];
> > +
> > + if (!msix_masked(&orig) && msix_masked(entry)) {
> > + /*
> > + * Vector masked, disable it
> > + *
> > + * XXX It's not clear if we can or should actually attempt
> > + * to mask or disable the interrupt. KVM doesn't have
> > + * support for pending bits and kvm_assign_set_msix_entry
> > + * doesn't modify the device hardware mask. Interrupts
> > + * while masked are simply not injected to the guest, so
> > + * are lost. Can we get away with always injecting an
> > + * interrupt on unmask?
> > + */
> > + } else if (msix_masked(&orig) && !msix_masked(entry)) {
> > + /* Vector unmasked */
> > + if (i >= adev->irq_entries_nr || !adev->entry[i].type) {
> > + /* Previously unassigned vector, start from scratch */
> > + assigned_dev_update_msix(pdev);
> > + return;
> > + } else {
> > + /* Update an existing, previously masked vector */
> > + struct kvm_irq_routing_entry orig = adev->entry[i];
> > + int ret;
> > +
> > + adev->entry[i].u.msi.address_lo = entry->addr_lo;
> > + adev->entry[i].u.msi.address_hi = entry->addr_hi;
> > + adev->entry[i].u.msi.data = entry->data;
> > +
> > + ret = kvm_update_routing_entry(&orig, &adev->entry[i]);
> > + if (ret) {
> > + fprintf(stderr,
> > + "Error updating irq routing entry (%d)\n", ret);
> > + return;
> > + }
> > +
> > + ret = kvm_commit_irq_routes();
> > + if (ret) {
> > + fprintf(stderr,
> > + "Error committing irq routes (%d)\n", ret);
> > + return;
> > + }
> > + }
> > + }
> > + }
> > }
> >
> > static const MemoryRegionOps msix_mmio_ops = {
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v2 8/8] pci-assign: Update MSI-X config based on table writes
2012-02-01 13:48 ` Alex Williamson
@ 2012-02-01 14:13 ` Jan Kiszka
2012-02-01 15:13 ` Michael S. Tsirkin
1 sibling, 0 replies; 16+ messages in thread
From: Jan Kiszka @ 2012-02-01 14:13 UTC (permalink / raw)
To: Alex Williamson
Cc: Michael S. Tsirkin, kvm@vger.kernel.org, avi@redhat.com,
shashidhar.patil@gmail.com
On 2012-02-01 14:48, Alex Williamson wrote:
> On Wed, 2012-02-01 at 09:22 +0200, Michael S. Tsirkin wrote:
>> On Tue, Jan 31, 2012 at 10:33:14PM -0700, Alex Williamson wrote:
>>> When a guest enables MSI-X in PCI configuration space we walk
>>> through the MSI-X vector table trying to guess what might get
>>> used. We have to guess because we don't do anything with
>>> writes to the vector table, so we look for clues ahead of time
>>> to pre-enable the vectors we think will be used. This means
>>> that instead of doing the sane thing and test the mask bit, we
>>> test whether the data field contains a non-zero value. It's
>>> amazing this works as well as it does.
>>>
>>> However, two key things missed by doing this is that we don't
>>> catch vector changes after enabling (ex. setting smp_affinity
>>> on an irq) and we don't support guests that don't touch the
>>> vector table prior to enabling the MSI-X capability (ex.
>>> freebsd). This patch fixes both of these problems.
>>>
>>> NB we're not actually masking vectors yet with this patch as
>>> it's unclear whether we really have the ability to do this
>>> without losing interrupts.
>>>
>>> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
>>
>> Overall I think this is better than what we have now.
>> One question though: would it make sense to use msix mask notifiers
>> instead of parsing msix tables on your own?
>
> Based on vfio, I think some minor changes would be necessary to the msix
> core to port to that infrastructure.
The core will require more changes as the current way of dealing with
KVM support for MSI[-X] is not upstreamable. This will affect the mask
notifier concept more or less as well.
> I don't think I'd want to use this
> code to push those changes since it likely won't get ported to qemu.git.
I'm pretty sure we will have pci assignment based on the KVM kernel
support upstream as well, simply because VFIO will take a while to show
up fully functional in distro kernels and because there is not that much
work required in addition to the core refactoring that we need to do anyway.
Jan
--
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v2 8/8] pci-assign: Update MSI-X config based on table writes
2012-02-01 13:48 ` Alex Williamson
2012-02-01 14:13 ` Jan Kiszka
@ 2012-02-01 15:13 ` Michael S. Tsirkin
1 sibling, 0 replies; 16+ messages in thread
From: Michael S. Tsirkin @ 2012-02-01 15:13 UTC (permalink / raw)
To: Alex Williamson; +Cc: kvm, avi, jan.kiszka, shashidhar.patil
On Wed, Feb 01, 2012 at 06:48:32AM -0700, Alex Williamson wrote:
> On Wed, 2012-02-01 at 09:22 +0200, Michael S. Tsirkin wrote:
> > On Tue, Jan 31, 2012 at 10:33:14PM -0700, Alex Williamson wrote:
> > > When a guest enables MSI-X in PCI configuration space we walk
> > > through the MSI-X vector table trying to guess what might get
> > > used. We have to guess because we don't do anything with
> > > writes to the vector table, so we look for clues ahead of time
> > > to pre-enable the vectors we think will be used. This means
> > > that instead of doing the sane thing and test the mask bit, we
> > > test whether the data field contains a non-zero value. It's
> > > amazing this works as well as it does.
> > >
> > > However, two key things missed by doing this is that we don't
> > > catch vector changes after enabling (ex. setting smp_affinity
> > > on an irq) and we don't support guests that don't touch the
> > > vector table prior to enabling the MSI-X capability (ex.
> > > freebsd). This patch fixes both of these problems.
> > >
> > > NB we're not actually masking vectors yet with this patch as
> > > it's unclear whether we really have the ability to do this
> > > without losing interrupts.
> > >
> > > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> >
> > Overall I think this is better than what we have now.
> > One question though: would it make sense to use msix mask notifiers
> > instead of parsing msix tables on your own?
>
> Based on vfio, I think some minor changes would be necessary to the msix
> core to port to that infrastructure. I don't think I'd want to use this
> code to push those changes since it likely won't get ported to qemu.git.
> The parsing is pretty trivial now that we're not counting bytes anyway.
> Thanks,
>
> Alex
Well the notifiers are a small bit of code,
so I can see myself porting them forward even if
there are no users in qemu.git.
Let me know if you think it makes sense.
>
> > > ---
> > >
> > > hw/device-assignment.c | 92 ++++++++++++++++++++++++++++++++++++++++++------
> > > 1 files changed, 81 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/hw/device-assignment.c b/hw/device-assignment.c
> > > index 71685ee..9791ec9 100644
> > > --- a/hw/device-assignment.c
> > > +++ b/hw/device-assignment.c
> > > @@ -964,6 +964,11 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
> > > }
> > > }
> > >
> > > +static bool msix_masked(MSIXTableEntry *entry)
> > > +{
> > > + return (entry->ctrl & cpu_to_le32(0x1)) != 0;
> > > +}
> > > +
> > > static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
> > > {
> > > AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
> > > @@ -975,17 +980,19 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
> > >
> > > /* Get the usable entry number for allocating */
> > > for (i = 0; i < adev->msix_max; i++, entry++) {
> > > - /* Ignore unused entry even it's unmasked */
> > > - if (entry->data == 0) {
> > > + if (msix_masked(entry)) {
> > > continue;
> > > }
> > > entries_nr++;
> > > }
> > >
> > > - if (entries_nr == 0) {
> > > - fprintf(stderr, "MSI-X entry number is zero!\n");
> > > - return -EINVAL;
> > > + DEBUG("MSI-X entries: %d\n", entries_nr);
> > > +
> > > + /* It's valid to enable MSI-X with all entries masked */
> > > + if (!entries_nr) {
> > > + return 0;
> > > }
> > > +
> > > msix_nr.assigned_dev_id = calc_assigned_dev_id(adev);
> > > msix_nr.entry_nr = entries_nr;
> > > r = kvm_assign_set_msix_nr(kvm_state, &msix_nr);
> > > @@ -1003,7 +1010,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
> > > msix_entry.assigned_dev_id = msix_nr.assigned_dev_id;
> > > entry = adev->msix_table;
> > > for (i = 0; i < adev->msix_max; i++, entry++) {
> > > - if (entry->data == 0) {
> > > + if (msix_masked(entry)) {
> > > continue;
> > > }
> > >
> > > @@ -1075,9 +1082,12 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev)
> > > perror("assigned_dev_update_msix_mmio");
> > > return;
> > > }
> > > - if (kvm_assign_irq(kvm_state, &assigned_irq_data) < 0) {
> > > - perror("assigned_dev_enable_msix: assign irq");
> > > - return;
> > > +
> > > + if (assigned_dev->irq_entries_nr) {
> > > + if (kvm_assign_irq(kvm_state, &assigned_irq_data) < 0) {
> > > + perror("assigned_dev_enable_msix: assign irq");
> > > + return;
> > > + }
> > > }
> > > assigned_dev->girq = -1;
> > > assigned_dev->irq_requested_type = assigned_irq_data.flags;
> > > @@ -1438,11 +1448,71 @@ static void msix_mmio_write(void *opaque, target_phys_addr_t addr,
> > > uint64_t val, unsigned size)
> > > {
> > > AssignedDevice *adev = opaque;
> > > + PCIDevice *pdev = &adev->dev;
> > > + uint16_t ctrl;
> > > + MSIXTableEntry orig;
> > > + int i = addr >> 4;
> > > +
> > > + if (i >= adev->msix_max) {
> > > + return; /* Drop write */
> > > + }
> > >
> > > - DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%lx\n",
> > > - addr, val);
> > > + ctrl = pci_get_word(pdev->config + pdev->msix_cap + PCI_MSIX_FLAGS);
> > > +
> > > + DEBUG("write to MSI-X table offset 0x%lx, val 0x%lx\n", addr, val);
> > > +
> > > + if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
> > > + orig = adev->msix_table[i];
> > > + }
> > >
> > > memcpy((void *)((uint8_t *)adev->msix_table + addr), &val, size);
> > > +
> > > + if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
> > > + MSIXTableEntry *entry = &adev->msix_table[i];
> > > +
> > > + if (!msix_masked(&orig) && msix_masked(entry)) {
> > > + /*
> > > + * Vector masked, disable it
> > > + *
> > > + * XXX It's not clear if we can or should actually attempt
> > > + * to mask or disable the interrupt. KVM doesn't have
> > > + * support for pending bits and kvm_assign_set_msix_entry
> > > + * doesn't modify the device hardware mask. Interrupts
> > > + * while masked are simply not injected to the guest, so
> > > + * are lost. Can we get away with always injecting an
> > > + * interrupt on unmask?
> > > + */
> > > + } else if (msix_masked(&orig) && !msix_masked(entry)) {
> > > + /* Vector unmasked */
> > > + if (i >= adev->irq_entries_nr || !adev->entry[i].type) {
> > > + /* Previously unassigned vector, start from scratch */
> > > + assigned_dev_update_msix(pdev);
> > > + return;
> > > + } else {
> > > + /* Update an existing, previously masked vector */
> > > + struct kvm_irq_routing_entry orig = adev->entry[i];
> > > + int ret;
> > > +
> > > + adev->entry[i].u.msi.address_lo = entry->addr_lo;
> > > + adev->entry[i].u.msi.address_hi = entry->addr_hi;
> > > + adev->entry[i].u.msi.data = entry->data;
> > > +
> > > + ret = kvm_update_routing_entry(&orig, &adev->entry[i]);
> > > + if (ret) {
> > > + fprintf(stderr,
> > > + "Error updating irq routing entry (%d)\n", ret);
> > > + return;
> > > + }
> > > +
> > > + ret = kvm_commit_irq_routes();
> > > + if (ret) {
> > > + fprintf(stderr,
> > > + "Error committing irq routes (%d)\n", ret);
> > > + return;
> > > + }
> > > + }
> > > + }
> > > + }
> > > }
> > >
> > > static const MemoryRegionOps msix_mmio_ops = {
>
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v2 8/8] pci-assign: Update MSI-X config based on table writes
2012-02-01 5:33 ` [PATCH v2 8/8] pci-assign: Update MSI-X config based on table writes Alex Williamson
2012-02-01 7:22 ` Michael S. Tsirkin
@ 2012-02-01 7:25 ` Michael S. Tsirkin
2012-02-01 13:51 ` Alex Williamson
1 sibling, 1 reply; 16+ messages in thread
From: Michael S. Tsirkin @ 2012-02-01 7:25 UTC (permalink / raw)
To: Alex Williamson; +Cc: kvm, avi, jan.kiszka, shashidhar.patil
On Tue, Jan 31, 2012 at 10:33:14PM -0700, Alex Williamson wrote:
> @@ -1438,11 +1448,71 @@ static void msix_mmio_write(void *opaque, target_phys_addr_t addr,
> uint64_t val, unsigned size)
> {
> AssignedDevice *adev = opaque;
> + PCIDevice *pdev = &adev->dev;
> + uint16_t ctrl;
> + MSIXTableEntry orig;
> + int i = addr >> 4;
> +
> + if (i >= adev->msix_max) {
> + return; /* Drop write */
> + }
>
> - DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%lx\n",
> - addr, val);
> + ctrl = pci_get_word(pdev->config + pdev->msix_cap + PCI_MSIX_FLAGS);
> +
> + DEBUG("write to MSI-X table offset 0x%lx, val 0x%lx\n", addr, val);
> +
> + if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
> + orig = adev->msix_table[i];
> + }
>
> memcpy((void *)((uint8_t *)adev->msix_table + addr), &val, size);
Does the core guarantee alignment even if guest violates the rules?
I ask because we validate i above but don't check the low bits
of addr, so a misaligned call can access beyond the array boundary.
If core does not ensure alignment we must check here ourselves.
> +
> + if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
> + MSIXTableEntry *entry = &adev->msix_table[i];
> +
> + if (!msix_masked(&orig) && msix_masked(entry)) {
> + /*
> + * Vector masked, disable it
> + *
> + * XXX It's not clear if we can or should actually attempt
> + * to mask or disable the interrupt. KVM doesn't have
> + * support for pending bits and kvm_assign_set_msix_entry
> + * doesn't modify the device hardware mask. Interrupts
> + * while masked are simply not injected to the guest, so
> + * are lost. Can we get away with always injecting an
> + * interrupt on unmask?
> + */
> + } else if (msix_masked(&orig) && !msix_masked(entry)) {
> + /* Vector unmasked */
> + if (i >= adev->irq_entries_nr || !adev->entry[i].type) {
> + /* Previously unassigned vector, start from scratch */
> + assigned_dev_update_msix(pdev);
> + return;
> + } else {
> + /* Update an existing, previously masked vector */
> + struct kvm_irq_routing_entry orig = adev->entry[i];
> + int ret;
> +
> + adev->entry[i].u.msi.address_lo = entry->addr_lo;
> + adev->entry[i].u.msi.address_hi = entry->addr_hi;
> + adev->entry[i].u.msi.data = entry->data;
> +
> + ret = kvm_update_routing_entry(&orig, &adev->entry[i]);
> + if (ret) {
> + fprintf(stderr,
> + "Error updating irq routing entry (%d)\n", ret);
> + return;
> + }
> +
> + ret = kvm_commit_irq_routes();
> + if (ret) {
> + fprintf(stderr,
> + "Error committing irq routes (%d)\n", ret);
> + return;
> + }
> + }
> + }
> + }
> }
>
> static const MemoryRegionOps msix_mmio_ops = {
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v2 8/8] pci-assign: Update MSI-X config based on table writes
2012-02-01 7:25 ` Michael S. Tsirkin
@ 2012-02-01 13:51 ` Alex Williamson
0 siblings, 0 replies; 16+ messages in thread
From: Alex Williamson @ 2012-02-01 13:51 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: kvm, avi, jan.kiszka, shashidhar.patil
On Wed, 2012-02-01 at 09:25 +0200, Michael S. Tsirkin wrote:
> On Tue, Jan 31, 2012 at 10:33:14PM -0700, Alex Williamson wrote:
> > @@ -1438,11 +1448,71 @@ static void msix_mmio_write(void *opaque, target_phys_addr_t addr,
> > uint64_t val, unsigned size)
> > {
> > AssignedDevice *adev = opaque;
> > + PCIDevice *pdev = &adev->dev;
> > + uint16_t ctrl;
> > + MSIXTableEntry orig;
> > + int i = addr >> 4;
> > +
> > + if (i >= adev->msix_max) {
> > + return; /* Drop write */
> > + }
> >
> > - DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%lx\n",
> > - addr, val);
> > + ctrl = pci_get_word(pdev->config + pdev->msix_cap + PCI_MSIX_FLAGS);
> > +
> > + DEBUG("write to MSI-X table offset 0x%lx, val 0x%lx\n", addr, val);
> > +
> > + if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
> > + orig = adev->msix_table[i];
> > + }
> >
> > memcpy((void *)((uint8_t *)adev->msix_table + addr), &val, size);
>
> Does the core guarantee alignment even if guest violates the rules?
> I ask because we validate i above but don't check the low bits
> of addr, so a misaligned call can access beyond the array boundary.
>
> If core does not ensure alignment we must check here ourselves.
AIUI, if I don't specifically enable unaligned access in the memory
region .valid struct, the core will reject unaligned accesses. Thanks,
Alex
> > +
> > + if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
> > + MSIXTableEntry *entry = &adev->msix_table[i];
> > +
> > + if (!msix_masked(&orig) && msix_masked(entry)) {
> > + /*
> > + * Vector masked, disable it
> > + *
> > + * XXX It's not clear if we can or should actually attempt
> > + * to mask or disable the interrupt. KVM doesn't have
> > + * support for pending bits and kvm_assign_set_msix_entry
> > + * doesn't modify the device hardware mask. Interrupts
> > + * while masked are simply not injected to the guest, so
> > + * are lost. Can we get away with always injecting an
> > + * interrupt on unmask?
> > + */
> > + } else if (msix_masked(&orig) && !msix_masked(entry)) {
> > + /* Vector unmasked */
> > + if (i >= adev->irq_entries_nr || !adev->entry[i].type) {
> > + /* Previously unassigned vector, start from scratch */
> > + assigned_dev_update_msix(pdev);
> > + return;
> > + } else {
> > + /* Update an existing, previously masked vector */
> > + struct kvm_irq_routing_entry orig = adev->entry[i];
> > + int ret;
> > +
> > + adev->entry[i].u.msi.address_lo = entry->addr_lo;
> > + adev->entry[i].u.msi.address_hi = entry->addr_hi;
> > + adev->entry[i].u.msi.data = entry->data;
> > +
> > + ret = kvm_update_routing_entry(&orig, &adev->entry[i]);
> > + if (ret) {
> > + fprintf(stderr,
> > + "Error updating irq routing entry (%d)\n", ret);
> > + return;
> > + }
> > +
> > + ret = kvm_commit_irq_routes();
> > + if (ret) {
> > + fprintf(stderr,
> > + "Error committing irq routes (%d)\n", ret);
> > + return;
> > + }
> > + }
> > + }
> > + }
> > }
> >
> > static const MemoryRegionOps msix_mmio_ops = {
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v2 0/8] pci-assign: better MSI-X table support
2012-02-01 5:32 [PATCH v2 0/8] pci-assign: better MSI-X table support Alex Williamson
` (7 preceding siblings ...)
2012-02-01 5:33 ` [PATCH v2 8/8] pci-assign: Update MSI-X config based on table writes Alex Williamson
@ 2012-02-07 20:04 ` Marcelo Tosatti
8 siblings, 0 replies; 16+ messages in thread
From: Marcelo Tosatti @ 2012-02-07 20:04 UTC (permalink / raw)
To: Alex Williamson; +Cc: kvm, avi, mst, jan.kiszka, shashidhar.patil
On Tue, Jan 31, 2012 at 10:32:15PM -0700, Alex Williamson wrote:
> This series enables better runtime MSI-X table support so
> that we can track vector updates for routing, enabling
> guest interrupt smp_affinity, as well as vectors setup
> after the MSI-X PCI capability is enabled, allowing for
> MSI-X on devices assigned to FreeBSD guests. Thanks,
>
> Alex
>
> v2:
> - Dropping mem64, we can work on this elsewhere
> - Fixed MemoryRegionOps to use .valid, tested 8-byte access
> - Endian conversions when testing MSI-X vector mask bit
> - Patch description updates
> - Misc cleanups
>
> ---
>
> Alex Williamson (8):
> pci-assign: Update MSI-X config based on table writes
> pci-assign: Use MSIX_PAGE_SIZE
> pci-assign: Allocate entries for all MSI-X vectors
> pci-assign: Proper initialization for MSI-X table
> pci-assign: Only calculate maximum MSI-X vector entries once
> pci-assign: Use struct for MSI-X table
> pci-assign: Update MSI-X MMIO to Memory API
> pci-assign: Fix warnings with DEBUG enabled
>
>
> hw/device-assignment.c | 262 +++++++++++++++++++++++++++++-------------------
> hw/device-assignment.h | 10 ++
> 2 files changed, 169 insertions(+), 103 deletions(-)
Applied, thanks.
^ permalink raw reply [flat|nested] 16+ messages in thread