[Qemu-devel] [PATCHv2] qemu: make default_write

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCHv2] qemu: make default_write_config use mask table
       [not found] <cover.1243253205.git.mst@redhat.com>
@ 2009-05-25 12:12 ` Michael S. Tsirkin
  2009-05-25 12:24 ` [Qemu-devel] [PATCH 01/11] " Michael S. Tsirkin
                   ` (10 subsequent siblings)
  11 siblings, 0 replies; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-05-25 12:12 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl,
	yamahata

Change much of hw/pci to use symbolic constants and a table-driven
design: add a mask table with writable bits set and readonly bits unset.
Detect change by comparing original and new registers.

This makes it easy to support capabilities where read-only/writeable
bit layout differs between devices, depending on capabilities present.

As a result, writing a single byte in BAR registers now works as
it should. Writing to upper limit registers in the bridge
also works as it should. Code is also shorter.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---

This fixes a minor bug in previous version of the patch:
pci config header size should be 0x40.
yamahata, you probably want to rebase your patch on top of this.

 hw/pci.c |  145 ++++++++++++-------------------------------------------------
 hw/pci.h |   18 +++++++-
 2 files changed, 46 insertions(+), 117 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 0ab5b94..1e70e46 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -239,6 +239,17 @@ int pci_assign_devaddr(const char *addr, int *domp, int *busp, unsigned *slotp)
     return pci_parse_devaddr(devaddr, domp, busp, slotp);
 }
 
+static void pci_init_mask(PCIDevice *dev)
+{
+    int i;
+    dev->mask[PCI_CACHE_LINE_SIZE] = 0xff;
+    dev->mask[PCI_INTERRUPT_LINE] = 0xff;
+    dev->mask[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY
+                             | PCI_COMMAND_MASTER;
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+        dev->mask[i] = 0xff;
+}
+
 /* -1 for devfn means auto assign */
 static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
                                          const char *name, int devfn,
@@ -261,6 +272,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     memset(pci_dev->irq_state, 0, sizeof(pci_dev->irq_state));
     pci_set_default_subsystem_id(pci_dev);
+    pci_init_mask(pci_dev);
 
     if (!config_read)
         config_read = pci_default_read_config;
@@ -334,6 +346,7 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
 {
     PCIIORegion *r;
     uint32_t addr;
+    uint32_t mask;
 
     if ((unsigned int)region_num >= PCI_NUM_REGIONS)
         return;
@@ -349,12 +362,17 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
     r->size = size;
     r->type = type;
     r->map_func = map_func;
+
+    mask = ~(size - 1);
     if (region_num == PCI_ROM_SLOT) {
         addr = 0x30;
+        /* ROM enable bit is writeable */
+        mask |= 1;
     } else {
         addr = 0x10 + region_num * 4;
     }
     *(uint32_t *)(pci_dev->config + addr) = cpu_to_le32(type);
+    *(uint32_t *)(pci_dev->mask + addr) = cpu_to_le32(mask);
 }
 
 static void pci_update_mappings(PCIDevice *d)
@@ -463,118 +481,21 @@ uint32_t pci_default_read_config(PCIDevice *d,
     return val;
 }
 
-void pci_default_write_config(PCIDevice *d,
-                              uint32_t address, uint32_t val, int len)
+void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
 {
-    int can_write, i;
-    uint32_t end, addr;
-
-    if (len == 4 && ((address >= 0x10 && address < 0x10 + 4 * 6) ||
-                     (address >= 0x30 && address < 0x34))) {
-        PCIIORegion *r;
-        int reg;
+    uint8_t orig[PCI_CONFIG_SPACE_SIZE];
+    int i;
 
-        if ( address >= 0x30 ) {
-            reg = PCI_ROM_SLOT;
-        }else{
-            reg = (address - 0x10) >> 2;
-        }
-        r = &d->io_regions[reg];
-        if (r->size == 0)
-            goto default_config;
-        /* compute the stored value */
-        if (reg == PCI_ROM_SLOT) {
-            /* keep ROM enable bit */
-            val &= (~(r->size - 1)) | 1;
-        } else {
-            val &= ~(r->size - 1);
-            val |= r->type;
-        }
-        *(uint32_t *)(d->config + address) = cpu_to_le32(val);
-        pci_update_mappings(d);
-        return;
-    }
- default_config:
     /* not efficient, but simple */
-    addr = address;
-    for(i = 0; i < len; i++) {
-        /* default read/write accesses */
-        switch(d->config[0x0e]) {
-        case 0x00:
-        case 0x80:
-            switch(addr) {
-            case 0x00:
-            case 0x01:
-            case 0x02:
-            case 0x03:
-            case 0x06:
-            case 0x07:
-            case 0x08:
-            case 0x09:
-            case 0x0a:
-            case 0x0b:
-            case 0x0e:
-            case 0x10 ... 0x27: /* base */
-            case 0x2c ... 0x2f: /* read-only subsystem ID & vendor ID */
-            case 0x30 ... 0x33: /* rom */
-            case 0x3d:
-                can_write = 0;
-                break;
-            default:
-                can_write = 1;
-                break;
-            }
-            break;
-        default:
-        case 0x01:
-            switch(addr) {
-            case 0x00:
-            case 0x01:
-            case 0x02:
-            case 0x03:
-            case 0x06:
-            case 0x07:
-            case 0x08:
-            case 0x09:
-            case 0x0a:
-            case 0x0b:
-            case 0x0e:
-            case 0x2c ... 0x2f: /* read-only subsystem ID & vendor ID */
-            case 0x38 ... 0x3b: /* rom */
-            case 0x3d:
-                can_write = 0;
-                break;
-            default:
-                can_write = 1;
-                break;
-            }
-            break;
-        }
-        if (can_write) {
-            /* Mask out writes to reserved bits in registers */
-            switch (addr) {
-	    case 0x05:
-                val &= ~PCI_COMMAND_RESERVED_MASK_HI;
-                break;
-            case 0x06:
-                val &= ~PCI_STATUS_RESERVED_MASK_LO;
-                break;
-            case 0x07:
-                val &= ~PCI_STATUS_RESERVED_MASK_HI;
-                break;
-            }
-            d->config[addr] = val;
-        }
-        if (++addr > 0xff)
-        	break;
-        val >>= 8;
+    memcpy(orig, d->config, PCI_CONFIG_SPACE_SIZE);
+    for(i = 0; i < l && addr < PCI_CONFIG_SPACE_SIZE; val >>= 8, ++i, ++addr) {
+        uint8_t mask = d->mask[addr];
+        d->config[addr] = (d->config[addr] & ~mask) | (val & mask);
     }
-
-    end = address + len;
-    if (end > PCI_COMMAND && address < (PCI_COMMAND + 2)) {
-        /* if the command register is modified, we must modify the mappings */
+    if (memcmp(orig + PCI_BASE_ADDRESS_0, d->config + PCI_BASE_ADDRESS_0, 24)
+        || ((orig[PCI_COMMAND] ^ d->config[PCI_COMMAND])
+            & (PCI_COMMAND_MEMORY | PCI_COMMAND_IO)))
         pci_update_mappings(d);
-    }
 }
 
 void pci_data_write(void *opaque, uint32_t addr, uint32_t val, int len)
@@ -847,16 +768,8 @@ static void pci_bridge_write_config(PCIDevice *d,
 {
     PCIBridge *s = (PCIBridge *)d;
 
-    if (address == 0x19 || (address == 0x18 && len > 1)) {
-        if (address == 0x19)
-            s->bus->bus_num = val & 0xff;
-        else
-            s->bus->bus_num = (val >> 8) & 0xff;
-#if defined(DEBUG_PCI)
-        printf ("pci-bridge: %s: Assigned bus %d\n", d->name, s->bus->bus_num);
-#endif
-    }
     pci_default_write_config(d, address, val, len);
+    s->bus->bus_num = d->config[PCI_SECONDARY_BUS];
 }
 
 PCIBus *pci_find_bus(int bus_num)
diff --git a/hw/pci.h b/hw/pci.h
index 0405837..c0c2380 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -100,16 +100,24 @@ typedef struct PCIIORegion {
 #define PCI_COMMAND		0x04	/* 16 bits */
 #define  PCI_COMMAND_IO		0x1	/* Enable response in I/O space */
 #define  PCI_COMMAND_MEMORY	0x2	/* Enable response in Memory space */
+#define  PCI_COMMAND_MASTER	0x4	/* Enable bus master */
 #define PCI_STATUS              0x06    /* 16 bits */
 #define PCI_REVISION_ID         0x08    /* 8 bits  */
 #define PCI_CLASS_DEVICE        0x0a    /* Device class */
+#define PCI_CACHE_LINE_SIZE	0x0c	/* 8 bits */
+#define PCI_LATENCY_TIMER	0x0d	/* 8 bits */
 #define PCI_HEADER_TYPE         0x0e    /* 8 bits */
 #define  PCI_HEADER_TYPE_NORMAL		0
 #define  PCI_HEADER_TYPE_BRIDGE		1
 #define  PCI_HEADER_TYPE_CARDBUS	2
 #define  PCI_HEADER_TYPE_MULTI_FUNCTION 0x80
+#define PCI_BASE_ADDRESS_0	0x10	/* 32 bits */
+#define PCI_PRIMARY_BUS		0x18	/* Primary bus number */
+#define PCI_SECONDARY_BUS	0x19	/* Secondary bus number */
+#define PCI_SEC_STATUS		0x1e	/* Secondary status register, only bit 14 used */
 #define PCI_SUBSYSTEM_VENDOR_ID 0x2c    /* 16 bits */
 #define PCI_SUBSYSTEM_ID        0x2e    /* 16 bits */
+#define PCI_CAPABILITY_LIST	0x34	/* Offset of first capability list entry */
 #define PCI_INTERRUPT_LINE	0x3c	/* 8 bits */
 #define PCI_INTERRUPT_PIN	0x3d	/* 8 bits */
 #define PCI_MIN_GNT		0x3e	/* 8 bits */
@@ -139,10 +147,18 @@ typedef struct PCIIORegion {
 
 #define PCI_COMMAND_RESERVED_MASK_HI (PCI_COMMAND_RESERVED >> 8)
 
+/* Size of the standard PCI config header */
+#define PCI_CONFIG_HEADER_SIZE 0x40
+/* Size of the standard PCI config space */
+#define PCI_CONFIG_SPACE_SIZE 0x100
+
 struct PCIDevice {
     DeviceState qdev;
     /* PCI config space */
-    uint8_t config[256];
+    uint8_t config[PCI_CONFIG_SPACE_SIZE];
+
+    /* Used to implement R/W bytes */
+    uint8_t mask[PCI_CONFIG_SPACE_SIZE];
 
     /* the following fields are read only */
     PCIBus *bus;
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Qemu-devel] [PATCH 01/11] qemu: make default_write_config use mask table
       [not found] <cover.1243253205.git.mst@redhat.com>
  2009-05-25 12:12 ` [Qemu-devel] [PATCHv2] qemu: make default_write_config use mask table Michael S. Tsirkin
@ 2009-05-25 12:24 ` Michael S. Tsirkin
  2009-05-25 12:25 ` [Qemu-devel] [PATCH 02/11] qemu: capability bits in pci save/restore Michael S. Tsirkin
                   ` (9 subsequent siblings)
  11 siblings, 0 replies; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-05-25 12:24 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl

Change much of hw/pci to use symbolic constants and a table-driven
design: add a mask table with writable bits set and readonly bits unset.
Detect change by comparing original and new registers.

This makes it easy to support capabilities where read-only/writeable
bit layout differs between devices, depending on capabilities present.

As a result, writing a single byte in BAR registers now works as
it should. Writing to upper limit registers in the bridge
also works as it should. Code is also shorter.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---

This is the same as v2 of the patch that I sent previously.
It is included here for completeness.

 hw/pci.c |  145 ++++++++++++-------------------------------------------------
 hw/pci.h |   18 +++++++-
 2 files changed, 46 insertions(+), 117 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 0ab5b94..1e70e46 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -239,6 +239,17 @@ int pci_assign_devaddr(const char *addr, int *domp, int *busp, unsigned *slotp)
     return pci_parse_devaddr(devaddr, domp, busp, slotp);
 }
 
+static void pci_init_mask(PCIDevice *dev)
+{
+    int i;
+    dev->mask[PCI_CACHE_LINE_SIZE] = 0xff;
+    dev->mask[PCI_INTERRUPT_LINE] = 0xff;
+    dev->mask[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY
+                             | PCI_COMMAND_MASTER;
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+        dev->mask[i] = 0xff;
+}
+
 /* -1 for devfn means auto assign */
 static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
                                          const char *name, int devfn,
@@ -261,6 +272,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     memset(pci_dev->irq_state, 0, sizeof(pci_dev->irq_state));
     pci_set_default_subsystem_id(pci_dev);
+    pci_init_mask(pci_dev);
 
     if (!config_read)
         config_read = pci_default_read_config;
@@ -334,6 +346,7 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
 {
     PCIIORegion *r;
     uint32_t addr;
+    uint32_t mask;
 
     if ((unsigned int)region_num >= PCI_NUM_REGIONS)
         return;
@@ -349,12 +362,17 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
     r->size = size;
     r->type = type;
     r->map_func = map_func;
+
+    mask = ~(size - 1);
     if (region_num == PCI_ROM_SLOT) {
         addr = 0x30;
+        /* ROM enable bit is writeable */
+        mask |= 1;
     } else {
         addr = 0x10 + region_num * 4;
     }
     *(uint32_t *)(pci_dev->config + addr) = cpu_to_le32(type);
+    *(uint32_t *)(pci_dev->mask + addr) = cpu_to_le32(mask);
 }
 
 static void pci_update_mappings(PCIDevice *d)
@@ -463,118 +481,21 @@ uint32_t pci_default_read_config(PCIDevice *d,
     return val;
 }
 
-void pci_default_write_config(PCIDevice *d,
-                              uint32_t address, uint32_t val, int len)
+void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
 {
-    int can_write, i;
-    uint32_t end, addr;
-
-    if (len == 4 && ((address >= 0x10 && address < 0x10 + 4 * 6) ||
-                     (address >= 0x30 && address < 0x34))) {
-        PCIIORegion *r;
-        int reg;
+    uint8_t orig[PCI_CONFIG_SPACE_SIZE];
+    int i;
 
-        if ( address >= 0x30 ) {
-            reg = PCI_ROM_SLOT;
-        }else{
-            reg = (address - 0x10) >> 2;
-        }
-        r = &d->io_regions[reg];
-        if (r->size == 0)
-            goto default_config;
-        /* compute the stored value */
-        if (reg == PCI_ROM_SLOT) {
-            /* keep ROM enable bit */
-            val &= (~(r->size - 1)) | 1;
-        } else {
-            val &= ~(r->size - 1);
-            val |= r->type;
-        }
-        *(uint32_t *)(d->config + address) = cpu_to_le32(val);
-        pci_update_mappings(d);
-        return;
-    }
- default_config:
     /* not efficient, but simple */
-    addr = address;
-    for(i = 0; i < len; i++) {
-        /* default read/write accesses */
-        switch(d->config[0x0e]) {
-        case 0x00:
-        case 0x80:
-            switch(addr) {
-            case 0x00:
-            case 0x01:
-            case 0x02:
-            case 0x03:
-            case 0x06:
-            case 0x07:
-            case 0x08:
-            case 0x09:
-            case 0x0a:
-            case 0x0b:
-            case 0x0e:
-            case 0x10 ... 0x27: /* base */
-            case 0x2c ... 0x2f: /* read-only subsystem ID & vendor ID */
-            case 0x30 ... 0x33: /* rom */
-            case 0x3d:
-                can_write = 0;
-                break;
-            default:
-                can_write = 1;
-                break;
-            }
-            break;
-        default:
-        case 0x01:
-            switch(addr) {
-            case 0x00:
-            case 0x01:
-            case 0x02:
-            case 0x03:
-            case 0x06:
-            case 0x07:
-            case 0x08:
-            case 0x09:
-            case 0x0a:
-            case 0x0b:
-            case 0x0e:
-            case 0x2c ... 0x2f: /* read-only subsystem ID & vendor ID */
-            case 0x38 ... 0x3b: /* rom */
-            case 0x3d:
-                can_write = 0;
-                break;
-            default:
-                can_write = 1;
-                break;
-            }
-            break;
-        }
-        if (can_write) {
-            /* Mask out writes to reserved bits in registers */
-            switch (addr) {
-	    case 0x05:
-                val &= ~PCI_COMMAND_RESERVED_MASK_HI;
-                break;
-            case 0x06:
-                val &= ~PCI_STATUS_RESERVED_MASK_LO;
-                break;
-            case 0x07:
-                val &= ~PCI_STATUS_RESERVED_MASK_HI;
-                break;
-            }
-            d->config[addr] = val;
-        }
-        if (++addr > 0xff)
-        	break;
-        val >>= 8;
+    memcpy(orig, d->config, PCI_CONFIG_SPACE_SIZE);
+    for(i = 0; i < l && addr < PCI_CONFIG_SPACE_SIZE; val >>= 8, ++i, ++addr) {
+        uint8_t mask = d->mask[addr];
+        d->config[addr] = (d->config[addr] & ~mask) | (val & mask);
     }
-
-    end = address + len;
-    if (end > PCI_COMMAND && address < (PCI_COMMAND + 2)) {
-        /* if the command register is modified, we must modify the mappings */
+    if (memcmp(orig + PCI_BASE_ADDRESS_0, d->config + PCI_BASE_ADDRESS_0, 24)
+        || ((orig[PCI_COMMAND] ^ d->config[PCI_COMMAND])
+            & (PCI_COMMAND_MEMORY | PCI_COMMAND_IO)))
         pci_update_mappings(d);
-    }
 }
 
 void pci_data_write(void *opaque, uint32_t addr, uint32_t val, int len)
@@ -847,16 +768,8 @@ static void pci_bridge_write_config(PCIDevice *d,
 {
     PCIBridge *s = (PCIBridge *)d;
 
-    if (address == 0x19 || (address == 0x18 && len > 1)) {
-        if (address == 0x19)
-            s->bus->bus_num = val & 0xff;
-        else
-            s->bus->bus_num = (val >> 8) & 0xff;
-#if defined(DEBUG_PCI)
-        printf ("pci-bridge: %s: Assigned bus %d\n", d->name, s->bus->bus_num);
-#endif
-    }
     pci_default_write_config(d, address, val, len);
+    s->bus->bus_num = d->config[PCI_SECONDARY_BUS];
 }
 
 PCIBus *pci_find_bus(int bus_num)
diff --git a/hw/pci.h b/hw/pci.h
index 0405837..c0c2380 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -100,16 +100,24 @@ typedef struct PCIIORegion {
 #define PCI_COMMAND		0x04	/* 16 bits */
 #define  PCI_COMMAND_IO		0x1	/* Enable response in I/O space */
 #define  PCI_COMMAND_MEMORY	0x2	/* Enable response in Memory space */
+#define  PCI_COMMAND_MASTER	0x4	/* Enable bus master */
 #define PCI_STATUS              0x06    /* 16 bits */
 #define PCI_REVISION_ID         0x08    /* 8 bits  */
 #define PCI_CLASS_DEVICE        0x0a    /* Device class */
+#define PCI_CACHE_LINE_SIZE	0x0c	/* 8 bits */
+#define PCI_LATENCY_TIMER	0x0d	/* 8 bits */
 #define PCI_HEADER_TYPE         0x0e    /* 8 bits */
 #define  PCI_HEADER_TYPE_NORMAL		0
 #define  PCI_HEADER_TYPE_BRIDGE		1
 #define  PCI_HEADER_TYPE_CARDBUS	2
 #define  PCI_HEADER_TYPE_MULTI_FUNCTION 0x80
+#define PCI_BASE_ADDRESS_0	0x10	/* 32 bits */
+#define PCI_PRIMARY_BUS		0x18	/* Primary bus number */
+#define PCI_SECONDARY_BUS	0x19	/* Secondary bus number */
+#define PCI_SEC_STATUS		0x1e	/* Secondary status register, only bit 14 used */
 #define PCI_SUBSYSTEM_VENDOR_ID 0x2c    /* 16 bits */
 #define PCI_SUBSYSTEM_ID        0x2e    /* 16 bits */
+#define PCI_CAPABILITY_LIST	0x34	/* Offset of first capability list entry */
 #define PCI_INTERRUPT_LINE	0x3c	/* 8 bits */
 #define PCI_INTERRUPT_PIN	0x3d	/* 8 bits */
 #define PCI_MIN_GNT		0x3e	/* 8 bits */
@@ -139,10 +147,18 @@ typedef struct PCIIORegion {
 
 #define PCI_COMMAND_RESERVED_MASK_HI (PCI_COMMAND_RESERVED >> 8)
 
+/* Size of the standard PCI config header */
+#define PCI_CONFIG_HEADER_SIZE 0x40
+/* Size of the standard PCI config space */
+#define PCI_CONFIG_SPACE_SIZE 0x100
+
 struct PCIDevice {
     DeviceState qdev;
     /* PCI config space */
-    uint8_t config[256];
+    uint8_t config[PCI_CONFIG_SPACE_SIZE];
+
+    /* Used to implement R/W bytes */
+    uint8_t mask[PCI_CONFIG_SPACE_SIZE];
 
     /* the following fields are read only */
     PCIBus *bus;
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Qemu-devel] [PATCH 02/11] qemu: capability bits in pci save/restore
       [not found] <cover.1243253205.git.mst@redhat.com>
  2009-05-25 12:12 ` [Qemu-devel] [PATCHv2] qemu: make default_write_config use mask table Michael S. Tsirkin
  2009-05-25 12:24 ` [Qemu-devel] [PATCH 01/11] " Michael S. Tsirkin
@ 2009-05-25 12:25 ` Michael S. Tsirkin
  2009-05-25 12:25 ` [Qemu-devel] [PATCH 03/11] qemu: add routines to manage PCI capabilities Michael S. Tsirkin
                   ` (8 subsequent siblings)
  11 siblings, 0 replies; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-05-25 12:25 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl

Add support for capability bits in save/restore for pci.
These will be used for MSI, where the capability might
be present or not as requested by user, which does not
map well into a single version number.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |   14 ++++++++++++--
 hw/pci.h |    4 ++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 1e70e46..5dcfb4e 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -127,12 +127,15 @@ int pci_bus_num(PCIBus *s)
 
 void pci_device_save(PCIDevice *s, QEMUFile *f)
 {
+    int version = s->cap_present ? 3 : 2;
     int i;
 
-    qemu_put_be32(f, 2); /* PCI device version */
+    qemu_put_be32(f, version); /* PCI device version */
     qemu_put_buffer(f, s->config, 256);
     for (i = 0; i < 4; i++)
         qemu_put_be32(f, s->irq_state[i]);
+    if (version >= 3)
+        qemu_put_be32(f, s->cap_present);
 }
 
 int pci_device_load(PCIDevice *s, QEMUFile *f)
@@ -141,7 +144,7 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
     int i;
 
     version_id = qemu_get_be32(f);
-    if (version_id > 2)
+    if (version_id > 3)
         return -EINVAL;
     qemu_get_buffer(f, s->config, 256);
     pci_update_mappings(s);
@@ -149,6 +152,13 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
     if (version_id >= 2)
         for (i = 0; i < 4; i ++)
             s->irq_state[i] = qemu_get_be32(f);
+    if (version_id >= 3)
+        s->cap_present = qemu_get_be32(f);
+    else
+        s->cap_present = 0;
+
+    if (s->cap_present & ~s->cap_supported)
+        return -EINVAL;
 
     return 0;
 }
diff --git a/hw/pci.h b/hw/pci.h
index c0c2380..9139504 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -178,6 +178,10 @@ struct PCIDevice {
 
     /* Current IRQ levels.  Used internally by the generic PCI code.  */
     int irq_state[4];
+
+    /* Capability bits for save/load */
+    uint32_t cap_supported;
+    uint32_t cap_present;
 };
 
 PCIDevice *pci_register_device(PCIBus *bus, const char *name,
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Qemu-devel] [PATCH 03/11] qemu: add routines to manage PCI capabilities
       [not found] <cover.1243253205.git.mst@redhat.com>
                   ` (2 preceding siblings ...)
  2009-05-25 12:25 ` [Qemu-devel] [PATCH 02/11] qemu: capability bits in pci save/restore Michael S. Tsirkin
@ 2009-05-25 12:25 ` Michael S. Tsirkin
  2009-05-26  8:49   ` Isaku Yamahata
  2009-05-26  9:30   ` [Qemu-devel] [PATCH] qemu: fix pci_find_capability for multiple caps Michael S. Tsirkin
  2009-05-25 12:25 ` [Qemu-devel] [PATCH 04/11] qemu: helper routines for pci access Michael S. Tsirkin
                   ` (7 subsequent siblings)
  11 siblings, 2 replies; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-05-25 12:25 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl

Add routines to manage PCI capability list. First user will be MSI-X.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 hw/pci.h |   18 +++++++++++-
 2 files changed, 106 insertions(+), 10 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 5dcfb4e..6bc3819 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
     int version = s->cap_present ? 3 : 2;
     int i;
 
-    qemu_put_be32(f, version); /* PCI device version */
+    /* PCI device version and capabilities */
+    qemu_put_be32(f, version);
+    if (version >= 3)
+        qemu_put_be32(f, s->cap_present);
     qemu_put_buffer(f, s->config, 256);
     for (i = 0; i < 4; i++)
         qemu_put_be32(f, s->irq_state[i]);
-    if (version >= 3)
-        qemu_put_be32(f, s->cap_present);
 }
 
 int pci_device_load(PCIDevice *s, QEMUFile *f)
@@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
     version_id = qemu_get_be32(f);
     if (version_id > 3)
         return -EINVAL;
-    qemu_get_buffer(f, s->config, 256);
-    pci_update_mappings(s);
-
-    if (version_id >= 2)
-        for (i = 0; i < 4; i ++)
-            s->irq_state[i] = qemu_get_be32(f);
     if (version_id >= 3)
         s->cap_present = qemu_get_be32(f);
     else
@@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
     if (s->cap_present & ~s->cap_supported)
         return -EINVAL;
 
+    qemu_get_buffer(f, s->config, 256);
+    pci_update_mappings(s);
+
+    if (version_id >= 2)
+        for (i = 0; i < 4; i ++)
+            s->irq_state[i] = qemu_get_be32(f);
+    /* Clear mask and used bits for capabilities.
+       Must be restored separately, since capabilities can
+       be placed anywhere in config space. */
+    memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+        s->mask[i] = 0xff;
     return 0;
 }
 
@@ -870,3 +877,76 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
 
     return (PCIDevice *)dev;
 }
+
+static int pci_find_space(PCIDevice *pdev, uint8_t size)
+{
+    int offset = PCI_CONFIG_HEADER_SIZE;
+    int i;
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+        if (pdev->used[i])
+            offset = i + 1;
+        else if (i - offset + 1 == size)
+            return offset;
+    return 0;
+}
+
+static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
+                                        uint8_t *prev_p)
+{
+    uint8_t next, prev;
+
+    if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST))
+        return 0;
+
+    for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
+         prev = next + PCI_CAP_LIST_NEXT)
+        if (pdev->config[next + PCI_CAP_LIST_ID] != cap_id)
+            break;
+
+    *prev_p = prev;
+    return next;
+}
+
+/* Reserve space and add capability to the linked list in pci config space */
+int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
+{
+    uint8_t offset = pci_find_space(pdev, size);
+    uint8_t *config = pdev->config + offset;
+    if (!offset)
+        return -ENOSPC;
+    config[PCI_CAP_LIST_ID] = cap_id;
+    config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
+    pdev->config[PCI_CAPABILITY_LIST] = offset;
+    pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
+    memset(pdev->used + offset, 0xFF, size);
+    /* Make capability read-only by default */
+    memset(pdev->mask + offset, 0, size);
+    return offset;
+}
+
+/* Unlink capability from the pci config space. */
+void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
+{
+    uint8_t prev, offset = pci_find_capability_list(pdev, cap_id, &prev);
+    if (!offset)
+        return;
+    pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
+    /* Make capability writeable again */
+    memset(pdev->mask + offset, 0xff, size);
+    memset(pdev->used + offset, 0, size);
+
+    if (!pdev->config[PCI_CAPABILITY_LIST])
+        pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
+}
+
+/* Reserve space for capability at a known offset (to call after load). */
+void pci_reserve_capability(PCIDevice *pdev, uint8_t offset, uint8_t size)
+{
+    memset(pdev->used + offset, 0xff, size);
+}
+
+uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id)
+{
+    uint8_t prev;
+    return pci_find_capability_list(pdev, cap_id, &prev);
+}
diff --git a/hw/pci.h b/hw/pci.h
index 9139504..40137c6 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -123,6 +123,10 @@ typedef struct PCIIORegion {
 #define PCI_MIN_GNT		0x3e	/* 8 bits */
 #define PCI_MAX_LAT		0x3f	/* 8 bits */
 
+/* Capability lists */
+#define PCI_CAP_LIST_ID		0	/* Capability ID */
+#define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
+
 #define PCI_REVISION            0x08    /* obsolete, use PCI_REVISION_ID */
 #define PCI_SUBVENDOR_ID        0x2c    /* obsolete, use PCI_SUBSYSTEM_VENDOR_ID */
 #define PCI_SUBDEVICE_ID        0x2e    /* obsolete, use PCI_SUBSYSTEM_ID */
@@ -130,7 +134,7 @@ typedef struct PCIIORegion {
 /* Bits in the PCI Status Register (PCI 2.3 spec) */
 #define PCI_STATUS_RESERVED1	0x007
 #define PCI_STATUS_INT_STATUS	0x008
-#define PCI_STATUS_CAPABILITIES	0x010
+#define PCI_STATUS_CAP_LIST	0x010
 #define PCI_STATUS_66MHZ	0x020
 #define PCI_STATUS_RESERVED2	0x040
 #define PCI_STATUS_FAST_BACK	0x080
@@ -160,6 +164,9 @@ struct PCIDevice {
     /* Used to implement R/W bytes */
     uint8_t mask[PCI_CONFIG_SPACE_SIZE];
 
+    /* Used to allocate config space for capabilities. */
+    uint8_t used[PCI_CONFIG_SPACE_SIZE];
+
     /* the following fields are read only */
     PCIBus *bus;
     int devfn;
@@ -194,6 +201,15 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
                             uint32_t size, int type,
                             PCIMapIORegionFunc *map_func);
 
+int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
+
+void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
+
+void pci_reserve_capability(PCIDevice *pci_dev, uint8_t offset, uint8_t size);
+
+uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id);
+
+
 uint32_t pci_default_read_config(PCIDevice *d,
                                  uint32_t address, int len);
 void pci_default_write_config(PCIDevice *d,
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 03/11] qemu: add routines to manage PCI capabilities
  2009-05-25 12:25 ` [Qemu-devel] [PATCH 03/11] qemu: add routines to manage PCI capabilities Michael S. Tsirkin
@ 2009-05-26  8:49   ` Isaku Yamahata
  2009-05-26  9:22     ` Michael S. Tsirkin
  2009-05-26  9:30   ` [Qemu-devel] [PATCH] qemu: fix pci_find_capability for multiple caps Michael S. Tsirkin
  1 sibling, 1 reply; 37+ messages in thread
From: Isaku Yamahata @ 2009-05-26  8:49 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Mon, May 25, 2009 at 03:25:20PM +0300, Michael S. Tsirkin wrote:
> Add routines to manage PCI capability list. First user will be MSI-X.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  hw/pci.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
>  hw/pci.h |   18 +++++++++++-
>  2 files changed, 106 insertions(+), 10 deletions(-)
> 
> diff --git a/hw/pci.c b/hw/pci.c
> index 5dcfb4e..6bc3819 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
>      int version = s->cap_present ? 3 : 2;
>      int i;
>  
> -    qemu_put_be32(f, version); /* PCI device version */
> +    /* PCI device version and capabilities */
> +    qemu_put_be32(f, version);
> +    if (version >= 3)
> +        qemu_put_be32(f, s->cap_present);
>      qemu_put_buffer(f, s->config, 256);
>      for (i = 0; i < 4; i++)
>          qemu_put_be32(f, s->irq_state[i]);
> -    if (version >= 3)
> -        qemu_put_be32(f, s->cap_present);
>  }
>  
>  int pci_device_load(PCIDevice *s, QEMUFile *f)
> @@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
>      version_id = qemu_get_be32(f);
>      if (version_id > 3)
>          return -EINVAL;
> -    qemu_get_buffer(f, s->config, 256);
> -    pci_update_mappings(s);
> -
> -    if (version_id >= 2)
> -        for (i = 0; i < 4; i ++)
> -            s->irq_state[i] = qemu_get_be32(f);
>      if (version_id >= 3)
>          s->cap_present = qemu_get_be32(f);
>      else
> @@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
>      if (s->cap_present & ~s->cap_supported)
>          return -EINVAL;
>  
> +    qemu_get_buffer(f, s->config, 256);
> +    pci_update_mappings(s);
> +
> +    if (version_id >= 2)
> +        for (i = 0; i < 4; i ++)
> +            s->irq_state[i] = qemu_get_be32(f);
> +    /* Clear mask and used bits for capabilities.
> +       Must be restored separately, since capabilities can
> +       be placed anywhere in config space. */
> +    memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
> +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> +        s->mask[i] = 0xff;
>      return 0;
>  }
>  
> @@ -870,3 +877,76 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
>  
>      return (PCIDevice *)dev;
>  }
> +
> +static int pci_find_space(PCIDevice *pdev, uint8_t size)
> +{
> +    int offset = PCI_CONFIG_HEADER_SIZE;
> +    int i;
> +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> +        if (pdev->used[i])
> +            offset = i + 1;
> +        else if (i - offset + 1 == size)
> +            return offset;
> +    return 0;
> +}
> +
> +static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
> +                                        uint8_t *prev_p)
> +{
> +    uint8_t next, prev;
> +
> +    if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST))
> +        return 0;
> +
> +    for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
> +         prev = next + PCI_CAP_LIST_NEXT)
> +        if (pdev->config[next + PCI_CAP_LIST_ID] != cap_id)

typo? ==


> +            break;
> +
> +    *prev_p = prev;
> +    return next;
> +}
> +
> +/* Reserve space and add capability to the linked list in pci config space */
> +int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
> +{
> +    uint8_t offset = pci_find_space(pdev, size);
> +    uint8_t *config = pdev->config + offset;
> +    if (!offset)
> +        return -ENOSPC;
> +    config[PCI_CAP_LIST_ID] = cap_id;
> +    config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
> +    pdev->config[PCI_CAPABILITY_LIST] = offset;
> +    pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
> +    memset(pdev->used + offset, 0xFF, size);
> +    /* Make capability read-only by default */
> +    memset(pdev->mask + offset, 0, size);
> +    return offset;
> +}
> +
> +/* Unlink capability from the pci config space. */
> +void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
> +{
> +    uint8_t prev, offset = pci_find_capability_list(pdev, cap_id, &prev);
> +    if (!offset)
> +        return;
> +    pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
> +    /* Make capability writeable again */
> +    memset(pdev->mask + offset, 0xff, size);
> +    memset(pdev->used + offset, 0, size);
> +
> +    if (!pdev->config[PCI_CAPABILITY_LIST])
> +        pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
> +}
> +
> +/* Reserve space for capability at a known offset (to call after load). */
> +void pci_reserve_capability(PCIDevice *pdev, uint8_t offset, uint8_t size)
> +{
> +    memset(pdev->used + offset, 0xff, size);
> +}
> +
> +uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id)
> +{
> +    uint8_t prev;
> +    return pci_find_capability_list(pdev, cap_id, &prev);
> +}
> diff --git a/hw/pci.h b/hw/pci.h
> index 9139504..40137c6 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -123,6 +123,10 @@ typedef struct PCIIORegion {
>  #define PCI_MIN_GNT		0x3e	/* 8 bits */
>  #define PCI_MAX_LAT		0x3f	/* 8 bits */
>  
> +/* Capability lists */
> +#define PCI_CAP_LIST_ID		0	/* Capability ID */
> +#define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
> +
>  #define PCI_REVISION            0x08    /* obsolete, use PCI_REVISION_ID */
>  #define PCI_SUBVENDOR_ID        0x2c    /* obsolete, use PCI_SUBSYSTEM_VENDOR_ID */
>  #define PCI_SUBDEVICE_ID        0x2e    /* obsolete, use PCI_SUBSYSTEM_ID */
> @@ -130,7 +134,7 @@ typedef struct PCIIORegion {
>  /* Bits in the PCI Status Register (PCI 2.3 spec) */
>  #define PCI_STATUS_RESERVED1	0x007
>  #define PCI_STATUS_INT_STATUS	0x008
> -#define PCI_STATUS_CAPABILITIES	0x010
> +#define PCI_STATUS_CAP_LIST	0x010
>  #define PCI_STATUS_66MHZ	0x020
>  #define PCI_STATUS_RESERVED2	0x040
>  #define PCI_STATUS_FAST_BACK	0x080
> @@ -160,6 +164,9 @@ struct PCIDevice {
>      /* Used to implement R/W bytes */
>      uint8_t mask[PCI_CONFIG_SPACE_SIZE];
>  
> +    /* Used to allocate config space for capabilities. */
> +    uint8_t used[PCI_CONFIG_SPACE_SIZE];
> +

bitmap? Or will be used for other purpose?

>      /* the following fields are read only */
>      PCIBus *bus;
>      int devfn;
> @@ -194,6 +201,15 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
>                              uint32_t size, int type,
>                              PCIMapIORegionFunc *map_func);
>  
> +int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
> +
> +void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
> +
> +void pci_reserve_capability(PCIDevice *pci_dev, uint8_t offset, uint8_t size);
> +
> +uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id);
> +
> +
>  uint32_t pci_default_read_config(PCIDevice *d,
>                                   uint32_t address, int len);
>  void pci_default_write_config(PCIDevice *d,

-- 
yamahata

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 03/11] qemu: add routines to manage PCI capabilities
  2009-05-26  8:49   ` Isaku Yamahata
@ 2009-05-26  9:22     ` Michael S. Tsirkin
  0 siblings, 0 replies; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-05-26  9:22 UTC (permalink / raw)
  To: Isaku Yamahata
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Tue, May 26, 2009 at 05:49:26PM +0900, Isaku Yamahata wrote:
> On Mon, May 25, 2009 at 03:25:20PM +0300, Michael S. Tsirkin wrote:
> > Add routines to manage PCI capability list. First user will be MSI-X.
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >  hw/pci.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
> >  hw/pci.h |   18 +++++++++++-
> >  2 files changed, 106 insertions(+), 10 deletions(-)
> > 
> > diff --git a/hw/pci.c b/hw/pci.c
> > index 5dcfb4e..6bc3819 100644
> > --- a/hw/pci.c
> > +++ b/hw/pci.c
> > @@ -130,12 +130,13 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
> >      int version = s->cap_present ? 3 : 2;
> >      int i;
> >  
> > -    qemu_put_be32(f, version); /* PCI device version */
> > +    /* PCI device version and capabilities */
> > +    qemu_put_be32(f, version);
> > +    if (version >= 3)
> > +        qemu_put_be32(f, s->cap_present);
> >      qemu_put_buffer(f, s->config, 256);
> >      for (i = 0; i < 4; i++)
> >          qemu_put_be32(f, s->irq_state[i]);
> > -    if (version >= 3)
> > -        qemu_put_be32(f, s->cap_present);
> >  }
> >  
> >  int pci_device_load(PCIDevice *s, QEMUFile *f)
> > @@ -146,12 +147,6 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> >      version_id = qemu_get_be32(f);
> >      if (version_id > 3)
> >          return -EINVAL;
> > -    qemu_get_buffer(f, s->config, 256);
> > -    pci_update_mappings(s);
> > -
> > -    if (version_id >= 2)
> > -        for (i = 0; i < 4; i ++)
> > -            s->irq_state[i] = qemu_get_be32(f);
> >      if (version_id >= 3)
> >          s->cap_present = qemu_get_be32(f);
> >      else
> > @@ -160,6 +155,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
> >      if (s->cap_present & ~s->cap_supported)
> >          return -EINVAL;
> >  
> > +    qemu_get_buffer(f, s->config, 256);
> > +    pci_update_mappings(s);
> > +
> > +    if (version_id >= 2)
> > +        for (i = 0; i < 4; i ++)
> > +            s->irq_state[i] = qemu_get_be32(f);
> > +    /* Clear mask and used bits for capabilities.
> > +       Must be restored separately, since capabilities can
> > +       be placed anywhere in config space. */
> > +    memset(s->used, 0, PCI_CONFIG_SPACE_SIZE);
> > +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> > +        s->mask[i] = 0xff;
> >      return 0;
> >  }
> >  
> > @@ -870,3 +877,76 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
> >  
> >      return (PCIDevice *)dev;
> >  }
> > +
> > +static int pci_find_space(PCIDevice *pdev, uint8_t size)
> > +{
> > +    int offset = PCI_CONFIG_HEADER_SIZE;
> > +    int i;
> > +    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
> > +        if (pdev->used[i])
> > +            offset = i + 1;
> > +        else if (i - offset + 1 == size)
> > +            return offset;
> > +    return 0;
> > +}
> > +
> > +static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
> > +                                        uint8_t *prev_p)
> > +{
> > +    uint8_t next, prev;
> > +
> > +    if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST))
> > +        return 0;
> > +
> > +    for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
> > +         prev = next + PCI_CAP_LIST_NEXT)
> > +        if (pdev->config[next + PCI_CAP_LIST_ID] != cap_id)
> 
> typo? ==


Fixed. Thanks.

> > +            break;
> > +
> > +    *prev_p = prev;
> > +    return next;
> > +}
> > +
> > +/* Reserve space and add capability to the linked list in pci config space */
> > +int pci_add_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
> > +{
> > +    uint8_t offset = pci_find_space(pdev, size);
> > +    uint8_t *config = pdev->config + offset;
> > +    if (!offset)
> > +        return -ENOSPC;
> > +    config[PCI_CAP_LIST_ID] = cap_id;
> > +    config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
> > +    pdev->config[PCI_CAPABILITY_LIST] = offset;
> > +    pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
> > +    memset(pdev->used + offset, 0xFF, size);
> > +    /* Make capability read-only by default */
> > +    memset(pdev->mask + offset, 0, size);
> > +    return offset;
> > +}
> > +
> > +/* Unlink capability from the pci config space. */
> > +void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
> > +{
> > +    uint8_t prev, offset = pci_find_capability_list(pdev, cap_id, &prev);
> > +    if (!offset)
> > +        return;
> > +    pdev->config[prev] = pdev->config[offset + PCI_CAP_LIST_NEXT];
> > +    /* Make capability writeable again */
> > +    memset(pdev->mask + offset, 0xff, size);
> > +    memset(pdev->used + offset, 0, size);
> > +
> > +    if (!pdev->config[PCI_CAPABILITY_LIST])
> > +        pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
> > +}
> > +
> > +/* Reserve space for capability at a known offset (to call after load). */
> > +void pci_reserve_capability(PCIDevice *pdev, uint8_t offset, uint8_t size)
> > +{
> > +    memset(pdev->used + offset, 0xff, size);
> > +}
> > +
> > +uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id)
> > +{
> > +    uint8_t prev;
> > +    return pci_find_capability_list(pdev, cap_id, &prev);
> > +}
> > diff --git a/hw/pci.h b/hw/pci.h
> > index 9139504..40137c6 100644
> > --- a/hw/pci.h
> > +++ b/hw/pci.h
> > @@ -123,6 +123,10 @@ typedef struct PCIIORegion {
> >  #define PCI_MIN_GNT		0x3e	/* 8 bits */
> >  #define PCI_MAX_LAT		0x3f	/* 8 bits */
> >  
> > +/* Capability lists */
> > +#define PCI_CAP_LIST_ID		0	/* Capability ID */
> > +#define PCI_CAP_LIST_NEXT	1	/* Next capability in the list */
> > +
> >  #define PCI_REVISION            0x08    /* obsolete, use PCI_REVISION_ID */
> >  #define PCI_SUBVENDOR_ID        0x2c    /* obsolete, use PCI_SUBSYSTEM_VENDOR_ID */
> >  #define PCI_SUBDEVICE_ID        0x2e    /* obsolete, use PCI_SUBSYSTEM_ID */
> > @@ -130,7 +134,7 @@ typedef struct PCIIORegion {
> >  /* Bits in the PCI Status Register (PCI 2.3 spec) */
> >  #define PCI_STATUS_RESERVED1	0x007
> >  #define PCI_STATUS_INT_STATUS	0x008
> > -#define PCI_STATUS_CAPABILITIES	0x010
> > +#define PCI_STATUS_CAP_LIST	0x010
> >  #define PCI_STATUS_66MHZ	0x020
> >  #define PCI_STATUS_RESERVED2	0x040
> >  #define PCI_STATUS_FAST_BACK	0x080
> > @@ -160,6 +164,9 @@ struct PCIDevice {
> >      /* Used to implement R/W bytes */
> >      uint8_t mask[PCI_CONFIG_SPACE_SIZE];
> >  
> > +    /* Used to allocate config space for capabilities. */
> > +    uint8_t used[PCI_CONFIG_SPACE_SIZE];
> > +
> 
> bitmap?

Could be a bitmap. But byte for byte is much easier to code for.
compare
	used[offset] = 1
with
	used[offset / 8] = (1 << (offset & 0x7));

> Or will be used for other purpose?

We could thinkably mark separate bits as used/unused.
It's not useful for capabilities which are always byte
aligned, but who knows ...

> >      /* the following fields are read only */
> >      PCIBus *bus;
> >      int devfn;
> > @@ -194,6 +201,15 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
> >                              uint32_t size, int type,
> >                              PCIMapIORegionFunc *map_func);
> >  
> > +int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
> > +
> > +void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
> > +
> > +void pci_reserve_capability(PCIDevice *pci_dev, uint8_t offset, uint8_t size);
> > +
> > +uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id);
> > +
> > +
> >  uint32_t pci_default_read_config(PCIDevice *d,
> >                                   uint32_t address, int len);
> >  void pci_default_write_config(PCIDevice *d,
> 
> -- 
> yamahata
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
MST

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [Qemu-devel] [PATCH] qemu: fix pci_find_capability for multiple caps
  2009-05-25 12:25 ` [Qemu-devel] [PATCH 03/11] qemu: add routines to manage PCI capabilities Michael S. Tsirkin
  2009-05-26  8:49   ` Isaku Yamahata
@ 2009-05-26  9:30   ` Michael S. Tsirkin
  1 sibling, 0 replies; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-05-26  9:30 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl

pci_find_capability_list has a bug so it'd stop at the first
capability. This only happens to work as we only support
a single capability (MSI-X). Here's a fix.

Found-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

---

This is a fixup for my patch "qemu: add routines to manage PCI
capabilities".

 hw/pci.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 19905b9..a63d988 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -918,7 +918,7 @@ static uint8_t pci_find_capability_list(PCIDevice *pdev, uint8_t cap_id,
 
     for (prev = PCI_CAPABILITY_LIST; (next = pdev->config[prev]);
          prev = next + PCI_CAP_LIST_NEXT)
-        if (pdev->config[next + PCI_CAP_LIST_ID] != cap_id)
+        if (pdev->config[next + PCI_CAP_LIST_ID] == cap_id)
             break;
 
     *prev_p = prev;

-- 
MST

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Qemu-devel] [PATCH 04/11] qemu: helper routines for pci access.
       [not found] <cover.1243253205.git.mst@redhat.com>
                   ` (3 preceding siblings ...)
  2009-05-25 12:25 ` [Qemu-devel] [PATCH 03/11] qemu: add routines to manage PCI capabilities Michael S. Tsirkin
@ 2009-05-25 12:25 ` Michael S. Tsirkin
  2009-05-26  2:33   ` Isaku Yamahata
  2009-05-25 12:25 ` [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions Michael S. Tsirkin
                   ` (6 subsequent siblings)
  11 siblings, 1 reply; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-05-25 12:25 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl

Add inline routines for convenient access to pci devices
with correct (little) endianness. Will be used by MSI-X support.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.h |   30 +++++++++++++++++++++++++++---
 1 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/hw/pci.h b/hw/pci.h
index 40137c6..4e112a3 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -240,21 +240,45 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
                         pci_map_irq_fn map_irq, const char *name);
 
 static inline void
+pci_set_word(uint8_t *config, uint16_t val)
+{
+    cpu_to_le16wu((uint16_t *)config, val);
+}
+
+static inline uint16_t
+pci_get_word(uint8_t *config)
+{
+    return le16_to_cpupu((uint16_t *)config);
+}
+
+static inline void
+pci_set_long(uint8_t *config, uint32_t val)
+{
+    cpu_to_le32wu((uint32_t *)config, val);
+}
+
+static inline uint32_t
+pci_get_long(uint8_t *config)
+{
+    return le32_to_cpupu((uint32_t *)config);
+}
+
+static inline void
 pci_config_set_vendor_id(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_VENDOR_ID], val);
+    pci_set_word(&pci_config[PCI_VENDOR_ID], val);
 }
 
 static inline void
 pci_config_set_device_id(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_DEVICE_ID], val);
+    pci_set_word(&pci_config[PCI_DEVICE_ID], val);
 }
 
 static inline void
 pci_config_set_class(uint8_t *pci_config, uint16_t val)
 {
-    cpu_to_le16wu((uint16_t *)&pci_config[PCI_CLASS_DEVICE], val);
+    pci_set_word(&pci_config[PCI_CLASS_DEVICE], val);
 }
 
 typedef void (*pci_qdev_initfn)(PCIDevice *dev);
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 04/11] qemu: helper routines for pci access.
  2009-05-25 12:25 ` [Qemu-devel] [PATCH 04/11] qemu: helper routines for pci access Michael S. Tsirkin
@ 2009-05-26  2:33   ` Isaku Yamahata
  2009-05-26  6:41     ` Michael S. Tsirkin
  0 siblings, 1 reply; 37+ messages in thread
From: Isaku Yamahata @ 2009-05-26  2:33 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

Just a minor comment.
How about to add pci_[sg]et_byte() for consistency?

On Mon, May 25, 2009 at 03:25:33PM +0300, Michael S. Tsirkin wrote:
> Add inline routines for convenient access to pci devices
> with correct (little) endianness. Will be used by MSI-X support.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  hw/pci.h |   30 +++++++++++++++++++++++++++---
>  1 files changed, 27 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/pci.h b/hw/pci.h
> index 40137c6..4e112a3 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -240,21 +240,45 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
>                          pci_map_irq_fn map_irq, const char *name);
>  
>  static inline void
> +pci_set_word(uint8_t *config, uint16_t val)
> +{
> +    cpu_to_le16wu((uint16_t *)config, val);
> +}
> +
> +static inline uint16_t
> +pci_get_word(uint8_t *config)
> +{
> +    return le16_to_cpupu((uint16_t *)config);
> +}
> +
> +static inline void
> +pci_set_long(uint8_t *config, uint32_t val)
> +{
> +    cpu_to_le32wu((uint32_t *)config, val);
> +}
> +
> +static inline uint32_t
> +pci_get_long(uint8_t *config)
> +{
> +    return le32_to_cpupu((uint32_t *)config);
> +}
> +
> +static inline void
>  pci_config_set_vendor_id(uint8_t *pci_config, uint16_t val)
>  {
> -    cpu_to_le16wu((uint16_t *)&pci_config[PCI_VENDOR_ID], val);
> +    pci_set_word(&pci_config[PCI_VENDOR_ID], val);
>  }
>  
>  static inline void
>  pci_config_set_device_id(uint8_t *pci_config, uint16_t val)
>  {
> -    cpu_to_le16wu((uint16_t *)&pci_config[PCI_DEVICE_ID], val);
> +    pci_set_word(&pci_config[PCI_DEVICE_ID], val);
>  }
>  
>  static inline void
>  pci_config_set_class(uint8_t *pci_config, uint16_t val)
>  {
> -    cpu_to_le16wu((uint16_t *)&pci_config[PCI_CLASS_DEVICE], val);
> +    pci_set_word(&pci_config[PCI_CLASS_DEVICE], val);
>  }
>  
>  typedef void (*pci_qdev_initfn)(PCIDevice *dev);

-- 
yamahata

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 04/11] qemu: helper routines for pci access.
  2009-05-26  2:33   ` Isaku Yamahata
@ 2009-05-26  6:41     ` Michael S. Tsirkin
  2009-05-26  8:07       ` Avi Kivity
  0 siblings, 1 reply; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-05-26  6:41 UTC (permalink / raw)
  To: Isaku Yamahata
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Paul Brook, Avi Kivity

On Tue, May 26, 2009 at 11:33:37AM +0900, Isaku Yamahata wrote:
> On Mon, May 25, 2009 at 03:25:33PM +0300, Michael S. Tsirkin wrote:
> > Add inline routines for convenient access to pci devices
> > with correct (little) endianness. Will be used by MSI-X support.
>
> Just a minor comment.
> How about to add pci_[sg]et_byte() for consistency?

I don't see that it makes sense - pci_set_long(config, value)
is shorter than *((uint32_t *)config) = cpu_to_le32(value),
but single bytes don't have endianness, and *config = value
is shorter.

-- 
MST

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 04/11] qemu: helper routines for pci access.
  2009-05-26  6:41     ` Michael S. Tsirkin
@ 2009-05-26  8:07       ` Avi Kivity
  2009-05-26  8:49         ` Michael S. Tsirkin
  0 siblings, 1 reply; 37+ messages in thread
From: Avi Kivity @ 2009-05-26  8:07 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Christian Borntraeger, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Isaku Yamahata,
	Paul Brook

Michael S. Tsirkin wrote:
> On Tue, May 26, 2009 at 11:33:37AM +0900, Isaku Yamahata wrote:
>   
>> On Mon, May 25, 2009 at 03:25:33PM +0300, Michael S. Tsirkin wrote:
>>     
>>> Add inline routines for convenient access to pci devices
>>> with correct (little) endianness. Will be used by MSI-X support.
>>>       
>> Just a minor comment.
>> How about to add pci_[sg]et_byte() for consistency?
>>     
>
> I don't see that it makes sense - pci_set_long(config, value)
> is shorter than *((uint32_t *)config) = cpu_to_le32(value),
> but single bytes don't have endianness, and *config = value
> is shorter.
>   

It's nice to have consistent APIs though.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 04/11] qemu: helper routines for pci access.
  2009-05-26  8:07       ` Avi Kivity
@ 2009-05-26  8:49         ` Michael S. Tsirkin
  0 siblings, 0 replies; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-05-26  8:49 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Carsten Otte, kvm, Christian Borntraeger, Rusty Russell,
	qemu-devel, virtualization, Blue Swirl, Isaku Yamahata,
	Paul Brook

On Tue, May 26, 2009 at 11:07:33AM +0300, Avi Kivity wrote:
> Michael S. Tsirkin wrote:
>> On Tue, May 26, 2009 at 11:33:37AM +0900, Isaku Yamahata wrote:
>>   
>>> On Mon, May 25, 2009 at 03:25:33PM +0300, Michael S. Tsirkin wrote:
>>>     
>>>> Add inline routines for convenient access to pci devices
>>>> with correct (little) endianness. Will be used by MSI-X support.
>>>>       
>>> Just a minor comment.
>>> How about to add pci_[sg]et_byte() for consistency?
>>>     
>>
>> I don't see that it makes sense - pci_set_long(config, value)
>> is shorter than *((uint32_t *)config) = cpu_to_le32(value),
>> but single bytes don't have endianness, and *config = value
>> is shorter.
>>   
>
> It's nice to have consistent APIs though.

Well, if enough people feel so ...



qemu: add pci_get/set_byte

Add pci_get/set_byte to keep *_word and *_long access functions company.
They are unused for now.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

---

diff --git a/hw/pci.h b/hw/pci.h
index 4072f16..e1e4fb4 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -263,6 +263,18 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
                         pci_map_irq_fn map_irq, const char *name);
 
 static inline void
+pci_set_byte(uint8_t *config, uint8_t val)
+{
+    *config = val;
+}
+
+static inline uint8_t
+pci_get_byte(uint8_t *config)
+{
+    return *config;
+}
+
+static inline void
 pci_set_word(uint8_t *config, uint16_t val)
 {
     cpu_to_le16wu((uint16_t *)config, val);

-- 
MST

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions
       [not found] <cover.1243253205.git.mst@redhat.com>
                   ` (4 preceding siblings ...)
  2009-05-25 12:25 ` [Qemu-devel] [PATCH 04/11] qemu: helper routines for pci access Michael S. Tsirkin
@ 2009-05-25 12:25 ` Michael S. Tsirkin
  2009-06-09 23:19   ` Paul Brook
  2009-05-25 12:25 ` [Qemu-devel] [PATCH 06/11] qemu: add flag to disable MSI-X by default Michael S. Tsirkin
                   ` (5 subsequent siblings)
  11 siblings, 1 reply; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-05-25 12:25 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl

Add functions implementing MSI-X support. First user will be virtio-pci.
Note that platform must set a flag to declare MSI supported.
For PC this will be set by APIC.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 Makefile.target |    2 +-
 hw/msix.c       |  423 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/msix.h       |   35 +++++
 hw/pci.h        |   20 +++
 4 files changed, 479 insertions(+), 1 deletions(-)
 create mode 100644 hw/msix.c
 create mode 100644 hw/msix.h

diff --git a/Makefile.target b/Makefile.target
index 664a1e3..87b2859 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -486,7 +486,7 @@ endif #CONFIG_BSD_USER
 ifndef CONFIG_USER_ONLY
 
 OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o \
-     gdbstub.o gdbstub-xml.o
+     gdbstub.o gdbstub-xml.o msix.o
 # virtio has to be here due to weird dependency between PCI and virtio-net.
 # need to fix this properly
 OBJS+=virtio-blk.o virtio-balloon.o virtio-net.o virtio-console.o
diff --git a/hw/msix.c b/hw/msix.c
new file mode 100644
index 0000000..4db2fc1
--- /dev/null
+++ b/hw/msix.c
@@ -0,0 +1,423 @@
+/*
+ * MSI-X device support
+ *
+ * This module includes support for MSI-X in pci devices.
+ *
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ *  Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "hw.h"
+#include "msix.h"
+#include "pci.h"
+
+/* Declaration from linux/pci_regs.h */
+#define  PCI_CAP_ID_MSIX 0x11 /* MSI-X */
+#define  PCI_MSIX_FLAGS 2     /* Table at lower 11 bits */
+#define  PCI_MSIX_FLAGS_QSIZE	0x7FF
+#define  PCI_MSIX_FLAGS_ENABLE	(1 << 15)
+#define  PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
+
+/* MSI-X capability structure */
+#define MSIX_TABLE_OFFSET 4
+#define MSIX_PBA_OFFSET 8
+#define MSIX_CAP_LENGTH 12
+
+/* MSI enable bit is in byte 1 in FLAGS register */
+#define MSIX_ENABLE_OFFSET (PCI_MSIX_FLAGS + 1)
+#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
+
+/* MSI-X table format */
+#define MSIX_MSG_ADDR 0
+#define MSIX_MSG_UPPER_ADDR 4
+#define MSIX_MSG_DATA 8
+#define MSIX_VECTOR_CTRL 12
+#define MSIX_ENTRY_SIZE 16
+#define MSIX_VECTOR_MASK 0x1
+
+/* How much space does an MSIX table need. */
+/* The spec requires giving the table structure
+ * a 4K aligned region all by itself. Align it to
+ * target pages so that drivers can do passthrough
+ * on the rest of the region. */
+#define MSIX_PAGE_SIZE TARGET_PAGE_ALIGN(0x1000)
+/* Reserve second half of the page for pending bits */
+#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
+#define MSIX_MAX_ENTRIES 32
+
+
+#ifdef MSIX_DEBUG
+#define DEBUG(fmt, ...)                                       \
+    do {                                                      \
+      fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__);    \
+    } while (0)
+#else
+#define DEBUG(fmt, ...) do { } while(0)
+#endif
+
+/* Flag to globally disable MSI-X support */
+int msix_disable;
+
+/* Flag for interrupt controller to declare MSI-X support */
+int msix_supported;
+
+/* Add MSI-X capability to the config space for the device. */
+/* Given a bar and its size, add MSI-X table on top of it
+ * and fill MSI-X capability in the config space.
+ * Original bar size must be a power of 2 or 0.
+ * New bar size is returned. */
+static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
+                           unsigned bar_nr, unsigned bar_size)
+{
+    int config_offset;
+    uint8_t *config;
+    uint32_t new_size;
+
+    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
+        return -EINVAL;
+    if (bar_size > 0x80000000)
+        return -ENOSPC;
+
+    /* Add space for MSI-X structures */
+    if (!bar_size)
+        new_size = MSIX_PAGE_SIZE;
+    else if (bar_size < MSIX_PAGE_SIZE) {
+        bar_size = MSIX_PAGE_SIZE;
+        new_size = MSIX_PAGE_SIZE * 2;
+    } else
+        new_size = bar_size * 2;
+
+    pdev->msix_bar_size = new_size;
+    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
+    if (config_offset < 0)
+        return config_offset;
+    config = pdev->config + config_offset;
+
+    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
+    /* Table on top of BAR */
+    pci_set_long(config + MSIX_TABLE_OFFSET, bar_size | bar_nr);
+    /* Pending bits on top of that */
+    pci_set_long(config + MSIX_PBA_OFFSET, (bar_size + MSIX_PAGE_PENDING) |
+                 bar_nr);
+    pdev->msix_cap = config_offset;
+    /* Make flags bit writeable. */
+    pdev->mask[config_offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
+    return 0;
+}
+
+static void msix_free_irq_entries(PCIDevice *dev)
+{
+    int vector;
+
+    for (vector = 0; vector < dev->msix_entries_nr; ++vector)
+        dev->msix_entry_used[vector] = 0;
+}
+
+/* Handle MSI-X capability config write. */
+void msix_write_config(PCIDevice *dev, uint32_t addr,
+                       uint32_t val, int len)
+{
+    unsigned enable_pos = dev->msix_cap + MSIX_ENABLE_OFFSET;
+    if (addr + len <= enable_pos || addr > enable_pos)
+        return;
+
+    if (msix_enabled(dev))
+        qemu_set_irq(dev->irq[0], 0);
+}
+
+static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+    PCIDevice *dev = opaque;
+    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
+    void *page = dev->msix_table_page;
+    uint32_t val = 0;
+
+    memcpy(&val, (void *)((char *)page + offset), 4);
+
+    return val;
+}
+
+static uint32_t msix_mmio_read_unallowed(void *opaque, target_phys_addr_t addr)
+{
+    fprintf(stderr, "MSI-X: only dword read is allowed!\n");
+    return 0;
+}
+
+static uint8_t msix_pending_mask(int vector)
+{
+    return 1 << (vector % 8);
+}
+
+static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
+{
+    return dev->msix_table_page + MSIX_PAGE_PENDING + vector / 8;
+}
+
+static int msix_is_pending(PCIDevice *dev, int vector)
+{
+    return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
+}
+
+static void msix_set_pending(PCIDevice *dev, int vector)
+{
+    *msix_pending_byte(dev, vector) |= msix_pending_mask(vector);
+}
+
+static void msix_clr_pending(PCIDevice *dev, int vector)
+{
+    *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector);
+}
+
+static int msix_is_masked(PCIDevice *dev, int vector)
+{
+    unsigned offset = vector * MSIX_ENTRY_SIZE + MSIX_VECTOR_CTRL;
+    return dev->msix_table_page[offset] & MSIX_VECTOR_MASK;
+}
+
+static void msix_mmio_writel(void *opaque, target_phys_addr_t addr,
+                             uint32_t val)
+{
+    PCIDevice *dev = opaque;
+    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
+    int vector = offset / MSIX_ENTRY_SIZE;
+    memcpy(dev->msix_table_page + offset, &val, 4);
+    if (!msix_is_masked(dev, vector) && msix_is_pending(dev, vector)) {
+        msix_clr_pending(dev, vector);
+        msix_notify(dev, vector);
+    }
+}
+
+static void msix_mmio_write_unallowed(void *opaque, target_phys_addr_t addr,
+                                      uint32_t val)
+{
+    fprintf(stderr, "MSI-X: only dword write is allowed!\n");
+}
+
+static CPUWriteMemoryFunc *msix_mmio_write[] = {
+    msix_mmio_write_unallowed, msix_mmio_write_unallowed, msix_mmio_writel
+};
+
+static CPUReadMemoryFunc *msix_mmio_read[] = {
+    msix_mmio_read_unallowed, msix_mmio_read_unallowed, msix_mmio_readl
+};
+
+/* Should be called from device's map method. */
+void msix_mmio_map(PCIDevice *d, int region_num,
+                   uint32_t addr, uint32_t size, int type)
+{
+    uint8_t *config = d->config + d->msix_cap;
+    uint32_t table = pci_get_long(config + MSIX_TABLE_OFFSET);
+    uint32_t offset = table & ~(MSIX_PAGE_SIZE - 1);
+    /* TODO: for assigned devices, we'll want to make it possible to map
+     * pending bits separately in case they are in a separate bar. */
+    int table_bir = table & PCI_MSIX_FLAGS_BIRMASK;
+
+    if (table_bir != region_num)
+        return;
+    if (size <= offset)
+        return;
+    cpu_register_physical_memory(addr + offset, size - offset,
+                                 d->msix_mmio_index);
+}
+
+/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
+ * modified, it should be retrieved with msix_bar_size. */
+int msix_init(struct PCIDevice *dev, unsigned short nentries,
+              unsigned bar_nr, unsigned bar_size)
+{
+    int ret = -ENOMEM;
+    /* Nothing to do if MSI is not supported by interrupt controller */
+    if (!msix_supported)
+        return -ENOTTY;
+
+    if (nentries > MSIX_MAX_ENTRIES)
+        return -EINVAL;
+
+    dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
+                                        sizeof *dev->msix_entry_used);
+    if (!dev->msix_entry_used)
+        goto err_used;
+
+    dev->msix_table_page = qemu_mallocz(MSIX_PAGE_SIZE);
+    if (!dev->msix_table_page)
+        goto err_page;
+
+    dev->msix_mmio_index = cpu_register_io_memory(0, msix_mmio_read,
+                                                  msix_mmio_write, dev);
+    if (dev->msix_mmio_index == -1) {
+        ret = -EBUSY;
+        goto err_index;
+    }
+
+    dev->msix_entries_nr = nentries;
+    dev->cap_supported |= QEMU_PCI_CAP_MSIX;
+    /* If disabled, stop here. User can later load confiuration with MSI-X
+     * enabled. */
+    if (msix_disable)
+        return 0;
+
+    ret = msix_add_config(dev, nentries, bar_nr, bar_size);
+    if (ret)
+        goto err_config;
+
+    dev->cap_present |= QEMU_PCI_CAP_MSIX;
+    return 0;
+
+err_config:
+    cpu_unregister_io_memory(dev->msix_mmio_index);
+err_index:
+    qemu_free(dev->msix_table_page);
+    dev->msix_table_page = NULL;
+err_page:
+    qemu_free(dev->msix_entry_used);
+    dev->msix_entry_used = NULL;
+err_used:
+    return ret;
+}
+
+/* Clean up resources for the device. */
+int msix_uninit(PCIDevice *dev)
+{
+    if (!(dev->cap_supported & QEMU_PCI_CAP_MSIX))
+        return 0;
+    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
+    dev->msix_cap = 0;
+    msix_free_irq_entries(dev);
+    dev->msix_entries_nr = 0;
+    cpu_unregister_io_memory(dev->msix_mmio_index);
+    qemu_free(dev->msix_table_page);
+    dev->msix_table_page = NULL;
+    qemu_free(dev->msix_entry_used);
+    dev->msix_entry_used = NULL;
+    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
+    dev->cap_supported &= ~QEMU_PCI_CAP_MSIX;
+    return 0;
+}
+
+void msix_save(PCIDevice *dev, QEMUFile *f)
+{
+    unsigned nentries = (pci_get_word(dev->config + PCI_MSIX_FLAGS) &
+                         PCI_MSIX_FLAGS_QSIZE) + 1;
+    qemu_put_buffer(f, dev->msix_table_page, nentries * MSIX_ENTRY_SIZE);
+    qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING,
+                    (nentries + 7) / 8);
+}
+
+/* Should be called after restoring the config space. */
+int msix_load(PCIDevice *dev, QEMUFile *f)
+{
+    uint8_t offset = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+    unsigned nentries;
+
+    if (!!(dev->cap_present & QEMU_PCI_CAP_MSIX) == !!offset) {
+        fprintf(stderr, "MSI-X bit set but no capability is present\n");
+        return -EINVAL;
+    }
+
+    msix_free_irq_entries(dev);
+
+    if (!dev->cap_present & QEMU_PCI_CAP_MSIX)
+        return 0;
+
+    /* Sanity check: we probably could add more of these. */
+    nentries = (pci_get_word(dev->config + PCI_MSIX_FLAGS) &
+                PCI_MSIX_FLAGS_QSIZE) + 1;
+    if (nentries > MSIX_MAX_ENTRIES) {
+        fprintf(stderr, "msix_load: nentries mismatch: %d > %d\n",
+                nentries, dev->msix_entries_nr);
+        return -EINVAL;
+    }
+
+    /* Make flags bit writeable. */
+    dev->mask[offset + MSIX_ENABLE_OFFSET] |= MSIX_ENABLE_MASK;
+    /* Reserve space used by this capability */
+    pci_reserve_capability(dev, offset, MSIX_CAP_LENGTH);
+    /* Store the new offset */
+    dev->msix_cap = offset;
+
+    dev->msix_entries_nr = nentries;
+
+    qemu_get_buffer(f, dev->msix_table_page, nentries * MSIX_ENTRY_SIZE);
+    qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING,
+                   (nentries + 7) / 8);
+
+    return 0;
+}
+
+/* Does device support MSI-X? */
+int msix_present(PCIDevice *dev)
+{
+    return dev->cap_present & QEMU_PCI_CAP_MSIX;
+}
+
+/* Is MSI-X enabled? */
+int msix_enabled(PCIDevice *dev)
+{
+    return (dev->cap_present & QEMU_PCI_CAP_MSIX) &&
+        (dev->config[dev->msix_cap + MSIX_ENABLE_OFFSET] &
+         MSIX_ENABLE_MASK);
+}
+
+/* Size of bar where MSI-X table resides, or 0 if MSI-X not supported. */
+uint32_t msix_bar_size(PCIDevice *dev)
+{
+    return (dev->cap_present & QEMU_PCI_CAP_MSIX) ?
+        dev->msix_bar_size : 0;
+}
+
+/* Send an MSI-X message */
+void msix_notify(PCIDevice *dev, unsigned vector)
+{
+    uint8_t *table_entry = dev->msix_table_page + vector * MSIX_ENTRY_SIZE;
+    uint64_t address;
+    uint32_t data;
+
+    if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
+        return;
+    if (msix_is_masked(dev, vector)) {
+        msix_set_pending(dev, vector);
+        return;
+    }
+
+    address = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR);
+    address = (address << 32) | pci_get_long(table_entry + MSIX_MSG_ADDR);
+    data = pci_get_long(table_entry + MSIX_MSG_DATA);
+    stl_phys(address, data);
+}
+
+void msix_reset(PCIDevice *dev)
+{
+    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
+        return;
+    msix_free_irq_entries(dev);
+    dev->config[dev->msix_cap + MSIX_ENABLE_OFFSET] &= MSIX_ENABLE_MASK;
+    memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE);
+}
+
+/* PCI spec suggests that devices make it possible for software to configure
+ * less vectors than supported by the device, but does not specify a standard
+ * mechanism for devices to do so.
+ *
+ * We support this by asking devices to declare vectors software is going to
+ * actually use, and checking this on the notification path. Devices that
+ * don't want to follow the spec suggestion can declare all vectors as used. */
+
+/* Mark vector as used. */
+int msix_vector_use(PCIDevice *dev, unsigned vector)
+{
+    if (vector >= dev->msix_entries_nr)
+        return -EINVAL;
+    dev->msix_entry_used[vector]++;
+    return 0;
+}
+
+/* Mark vector as unused. */
+void msix_vector_unuse(PCIDevice *dev, unsigned vector)
+{
+    if (vector < dev->msix_entries_nr && dev->msix_entry_used[vector])
+        --dev->msix_entry_used[vector];
+}
diff --git a/hw/msix.h b/hw/msix.h
new file mode 100644
index 0000000..79e84a3
--- /dev/null
+++ b/hw/msix.h
@@ -0,0 +1,35 @@
+#ifndef QEMU_MSIX_H
+#define QEMU_MSIX_H
+
+#include "qemu-common.h"
+
+int msix_init(PCIDevice *pdev, unsigned short nentries,
+              unsigned bar_nr, unsigned bar_size);
+
+void msix_write_config(PCIDevice *pci_dev, uint32_t address,
+                       uint32_t val, int len);
+
+void msix_mmio_map(PCIDevice *pci_dev, int region_num,
+                   uint32_t addr, uint32_t size, int type);
+
+int msix_uninit(PCIDevice *d);
+
+void msix_save(PCIDevice *dev, QEMUFile *f);
+int msix_load(PCIDevice *dev, QEMUFile *f);
+
+int msix_enabled(PCIDevice *dev);
+int msix_present(PCIDevice *dev);
+
+uint32_t msix_bar_size(PCIDevice *dev);
+
+int msix_vector_use(PCIDevice *dev, unsigned vector);
+void msix_vector_unuse(PCIDevice *dev, unsigned vector);
+
+void msix_notify(PCIDevice *dev, unsigned vector);
+
+void msix_reset(PCIDevice *dev);
+
+extern int msix_disable;
+extern int msix_supported;
+
+#endif
diff --git a/hw/pci.h b/hw/pci.h
index 4e112a3..6da626b 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -156,6 +156,11 @@ typedef struct PCIIORegion {
 /* Size of the standard PCI config space */
 #define PCI_CONFIG_SPACE_SIZE 0x100
 
+/* Bits in cap_supported/cap_present fields. */
+enum {
+    QEMU_PCI_CAP_MSIX = 0x1,
+};
+
 struct PCIDevice {
     DeviceState qdev;
     /* PCI config space */
@@ -189,6 +194,21 @@ struct PCIDevice {
     /* Capability bits for save/load */
     uint32_t cap_supported;
     uint32_t cap_present;
+
+    /* Offset of MSI-X capability in config space */
+    uint8_t msix_cap;
+
+    /* MSI-X entries */
+    int msix_entries_nr;
+
+    /* Space to store MSIX table */
+    uint8_t *msix_table_page;
+    /* MMIO index used to map MSIX table and pending bit entries. */
+    int msix_mmio_index;
+    /* Reference-count for entries actually in use by driver. */
+    unsigned *msix_entry_used;
+    /* Region including the MSI-X table */
+    uint32_t msix_bar_size;
 };
 
 PCIDevice *pci_register_device(PCIBus *bus, const char *name,
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions
  2009-05-25 12:25 ` [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions Michael S. Tsirkin
@ 2009-06-09 23:19   ` Paul Brook
  2009-06-10  9:46     ` Michael S. Tsirkin
  0 siblings, 1 reply; 37+ messages in thread
From: Paul Brook @ 2009-06-09 23:19 UTC (permalink / raw)
  To: qemu-devel
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Rusty Russell,
	virtualization, Blue Swirl, Christian Borntraeger, Avi Kivity

On Monday 25 May 2009, Michael S. Tsirkin wrote:
> Add functions implementing MSI-X support. First user will be virtio-pci.
> Note that platform must set a flag to declare MSI supported.
> For PC this will be set by APIC.

This sounds wrong. The device shouldn't know or care whether the system has a 
MSI capable interrupt controller. That's for the guest OS to figure out.

Paul 

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions
  2009-06-09 23:19   ` Paul Brook
@ 2009-06-10  9:46     ` Michael S. Tsirkin
  2009-06-10 14:07       ` Paul Brook
  0 siblings, 1 reply; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10  9:46 UTC (permalink / raw)
  To: Paul Brook
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Avi Kivity

On Wed, Jun 10, 2009 at 12:19:42AM +0100, Paul Brook wrote:
> On Monday 25 May 2009, Michael S. Tsirkin wrote:
> > Add functions implementing MSI-X support. First user will be virtio-pci.
> > Note that platform must set a flag to declare MSI supported.
> > For PC this will be set by APIC.
> 
> This sounds wrong. The device shouldn't know or care whether the system has a 
> MSI capable interrupt controller. That's for the guest OS to figure out.
> 
> Paul 

You are right of course. In theory there's nothing that breaks if I
set this flag to on, on all platforms. OTOH if qemu emulates some
controller incorrectly, guest might misdetect MSI support in the
controller, and things will break horribly.

It seems safer to have a flag that can be enabled by people
that know about a specific platform.

What do you think?

-- 
MST

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions
  2009-06-10  9:46     ` Michael S. Tsirkin
@ 2009-06-10 14:07       ` Paul Brook
  2009-06-10 14:25         ` Michael S. Tsirkin
  0 siblings, 1 reply; 37+ messages in thread
From: Paul Brook @ 2009-06-10 14:07 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Avi Kivity

> > > Note that platform must set a flag to declare MSI supported.
> > > For PC this will be set by APIC.
> >
> > This sounds wrong. The device shouldn't know or care whether the system
> > has a MSI capable interrupt controller. That's for the guest OS to figure
> > out.
>
> You are right of course. In theory there's nothing that breaks if I
> set this flag to on, on all platforms. OTOH if qemu emulates some
> controller incorrectly, guest might misdetect MSI support in the
> controller, and things will break horribly.
>
> It seems safer to have a flag that can be enabled by people
> that know about a specific platform.

No. The solution is to fix whatever is broken.

If we really need to avoid MSI-X capable devices then that should be done 
explicity per-device. i.e. you have a different virtio-net device that does 
not use MSI-X.

Paul

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions
  2009-06-10 14:07       ` Paul Brook
@ 2009-06-10 14:25         ` Michael S. Tsirkin
  2009-06-10 14:39           ` Paul Brook
  0 siblings, 1 reply; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 14:25 UTC (permalink / raw)
  To: Paul Brook
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Avi Kivity

On Wed, Jun 10, 2009 at 03:07:34PM +0100, Paul Brook wrote:
> > > > Note that platform must set a flag to declare MSI supported.
> > > > For PC this will be set by APIC.
> > >
> > > This sounds wrong. The device shouldn't know or care whether the system
> > > has a MSI capable interrupt controller. That's for the guest OS to figure
> > > out.
> >
> > You are right of course. In theory there's nothing that breaks if I
> > set this flag to on, on all platforms. OTOH if qemu emulates some
> > controller incorrectly, guest might misdetect MSI support in the
> > controller, and things will break horribly.
> >
> > It seems safer to have a flag that can be enabled by people
> > that know about a specific platform.
> 
> No. The solution is to fix whatever is broken.

That's easy enough then. Patch below.

> 
> If we really need to avoid MSI-X capable devices then that should be done 
> explicity per-device. i.e. you have a different virtio-net device that does 
> not use MSI-X.
> 
> Paul

Why should it be done per-device?

--->

Don't add an option for platforms to disable MSI-X in all devices.
Paul Brook will find and fix all platforms that have broken MSI-X
emulation.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

---

diff --git a/hw/apic.c b/hw/apic.c
index ed03a36..9d44061 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -945,7 +945,6 @@ int apic_init(CPUState *env)
     s->cpu_env = env;
 
     apic_reset(s);
-    msix_supported = 1;
 
     /* XXX: mapping more APICs at the same memory location */
     if (apic_io_memory == 0) {
diff --git a/hw/msix.c b/hw/msix.c
index ce4e6ba..16efb27 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -62,9 +62,6 @@
 /* Flag to globally disable MSI-X support */
 int msix_disable;
 
-/* Flag for interrupt controller to declare MSI-X support */
-int msix_supported;
-
 /* Add MSI-X capability to the config space for the device. */
 /* Given a bar and its size, add MSI-X table on top of it
  * and fill MSI-X capability in the config space.
@@ -232,10 +229,7 @@ void msix_mmio_map(PCIDevice *d, int region_num,
 int msix_init(struct PCIDevice *dev, unsigned short nentries,
               unsigned bar_nr, unsigned bar_size)
 {
-    int ret = -ENOMEM;
-    /* Nothing to do if MSI is not supported by interrupt controller */
-    if (!msix_supported)
-        return -ENOTTY;
+    int ret;
 
     if (nentries > MSIX_MAX_ENTRIES)
         return -EINVAL;
diff --git a/hw/msix.h b/hw/msix.h
index 79e84a3..2fcadd3 100644
--- a/hw/msix.h
+++ b/hw/msix.h
@@ -30,6 +30,5 @@ void msix_notify(PCIDevice *dev, unsigned vector);
 void msix_reset(PCIDevice *dev);
 
 extern int msix_disable;
-extern int msix_supported;
 
 #endif
-- 
MST

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions
  2009-06-10 14:25         ` Michael S. Tsirkin
@ 2009-06-10 14:39           ` Paul Brook
  2009-06-10 14:47             ` Michael S. Tsirkin
  0 siblings, 1 reply; 37+ messages in thread
From: Paul Brook @ 2009-06-10 14:39 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Avi Kivity

> > If we really need to avoid MSI-X capable devices then that should be done
> > explicity per-device. i.e. you have a different virtio-net device that
> > does not use MSI-X.
> >
> > Paul
>
> Why should it be done per-device?


Because otherwise you end up with the horrible hacks that you're currently 
tripping over: devices have to magically morph into a different device when 
you load a VM. That's seems just plain wrong to me. Loading a VM shouldn't not 
do anything that can't happen during normal operation.

Paul

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions
  2009-06-10 14:39           ` Paul Brook
@ 2009-06-10 14:47             ` Michael S. Tsirkin
  2009-06-10 15:15               ` Paul Brook
  0 siblings, 1 reply; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 14:47 UTC (permalink / raw)
  To: Paul Brook
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Avi Kivity

On Wed, Jun 10, 2009 at 03:39:05PM +0100, Paul Brook wrote:
> > > If we really need to avoid MSI-X capable devices then that should be done
> > > explicity per-device. i.e. you have a different virtio-net device that
> > > does not use MSI-X.
> > >
> > > Paul
> >
> > Why should it be done per-device?
> 
> 
> Because otherwise you end up with the horrible hacks that you're currently 
> tripping over: devices have to magically morph into a different device when 
> you load a VM.

No, the hacks are there so I that I can support loading and saving from
non-MSI setups in a backward-compatible way.

The flag we are discussing is set at qemu startup and can't change
across load/store.

> That's seems just plain wrong to me.
> Loading a VM shouldn't not 
> do anything that can't happen during normal operation.

At least wrt pci, we are very far from this state: load just overwrites
all registers, readonly or not, which can never happen during normal
operation. And if we "fix" it, and only edit BAR registers, the happy
result will be that we can't add functionality to PCI devices without
breaking guests and/or breaking loading from old images each time.

> Paul

-- 
MST

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions
  2009-06-10 14:47             ` Michael S. Tsirkin
@ 2009-06-10 15:15               ` Paul Brook
  2009-06-10 15:52                 ` Michael S. Tsirkin
  0 siblings, 1 reply; 37+ messages in thread
From: Paul Brook @ 2009-06-10 15:15 UTC (permalink / raw)
  To: qemu-devel
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Rusty Russell,
	virtualization, Blue Swirl, Christian Borntraeger, Avi Kivity

> > That's seems just plain wrong to me.
> > Loading a VM shouldn't not
> > do anything that can't happen during normal operation.
>
> At least wrt pci, we are very far from this state: load just overwrites
> all registers, readonly or not, which can never happen during normal
> operation.

IMO that code is wrong. We should only be loading things that the guest can 
change (directly or indirectly).

Paul

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions
  2009-06-10 15:15               ` Paul Brook
@ 2009-06-10 15:52                 ` Michael S. Tsirkin
  2009-06-10 16:08                   ` Paul Brook
  0 siblings, 1 reply; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 15:52 UTC (permalink / raw)
  To: Paul Brook
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Avi Kivity

On Wed, Jun 10, 2009 at 04:15:04PM +0100, Paul Brook wrote:
> > > That's seems just plain wrong to me.
> > > Loading a VM shouldn't not
> > > do anything that can't happen during normal operation.
> >
> > At least wrt pci, we are very far from this state: load just overwrites
> > all registers, readonly or not, which can never happen during normal
> > operation.
> 
> IMO that code is wrong. We should only be loading things that the guest can 
> change (directly or indirectly).
> 
> Paul

Making it work this way will mean that minor changes to a device can
break backwards compatibility with old images, often in surprising ways.
What are the advantages?

-- 
MST

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions
  2009-06-10 15:52                 ` Michael S. Tsirkin
@ 2009-06-10 16:08                   ` Paul Brook
  2009-06-10 16:26                     ` Michael S. Tsirkin
  0 siblings, 1 reply; 37+ messages in thread
From: Paul Brook @ 2009-06-10 16:08 UTC (permalink / raw)
  To: qemu-devel
  Cc: Carsten Otte, kvm, Michael S. Tsirkin, Rusty Russell,
	virtualization, Blue Swirl, Christian Borntraeger, Avi Kivity

On Wednesday 10 June 2009, Michael S. Tsirkin wrote:
> On Wed, Jun 10, 2009 at 04:15:04PM +0100, Paul Brook wrote:
> > > > That's seems just plain wrong to me.
> > > > Loading a VM shouldn't not
> > > > do anything that can't happen during normal operation.
> > >
> > > At least wrt pci, we are very far from this state: load just overwrites
> > > all registers, readonly or not, which can never happen during normal
> > > operation.
> >
> > IMO that code is wrong. We should only be loading things that the guest
> > can change (directly or indirectly).
>
> Making it work this way will mean that minor changes to a device can
> break backwards compatibility with old images, often in surprising ways.
> What are the advantages?

If you can't create an identical machine from scratch then I don't consider 
snapshot/migration to be a useful feature. i.e. as soon as you shutdown and 
restart the guest it is liable to break anyway.

It may be that the snapshot/migration code wants to include a machine config, 
and create a new machine from that. However this is a separate issue, and 
arguably something your VM manager should be handling for you.

Paul

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions
  2009-06-10 16:08                   ` Paul Brook
@ 2009-06-10 16:26                     ` Michael S. Tsirkin
  2009-06-10 16:46                       ` Paul Brook
  0 siblings, 1 reply; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 16:26 UTC (permalink / raw)
  To: Paul Brook
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Avi Kivity

On Wed, Jun 10, 2009 at 05:08:15PM +0100, Paul Brook wrote:
> On Wednesday 10 June 2009, Michael S. Tsirkin wrote:
> > On Wed, Jun 10, 2009 at 04:15:04PM +0100, Paul Brook wrote:
> > > > > That's seems just plain wrong to me.
> > > > > Loading a VM shouldn't not
> > > > > do anything that can't happen during normal operation.
> > > >
> > > > At least wrt pci, we are very far from this state: load just overwrites
> > > > all registers, readonly or not, which can never happen during normal
> > > > operation.
> > >
> > > IMO that code is wrong. We should only be loading things that the guest
> > > can change (directly or indirectly).
> >
> > Making it work this way will mean that minor changes to a device can
> > break backwards compatibility with old images, often in surprising ways.
> > What are the advantages?
> 
> If you can't create an identical machine from scratch then I don't consider 
> snapshot/migration to be a useful feature. i.e. as soon as you shutdown and 
> restart the guest it is liable to break anyway.

Why is liable to break?
Configuration does not change until you load another image.
Look here:

void msix_reset(PCIDevice *dev)
{
    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
        return;

...
}

So once you load and image with MSIX capability off,
it will stay off across guest restarts.

> It may be that the snapshot/migration code wants to include a machine config, 
> and create a new machine from that. However this is a separate issue, and 
> arguably something your VM manager should be handling for you.
> 
> Paul

Since the image already has a necessary information, duplicating
it in a separate machine config will just lead to errors.

-- 
MST

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions
  2009-06-10 16:26                     ` Michael S. Tsirkin
@ 2009-06-10 16:46                       ` Paul Brook
  2009-06-10 17:03                         ` Michael S. Tsirkin
  0 siblings, 1 reply; 37+ messages in thread
From: Paul Brook @ 2009-06-10 16:46 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Avi Kivity

> > If you can't create an identical machine from scratch then I don't
> > consider snapshot/migration to be a useful feature. i.e. as soon as you
> > shutdown and restart the guest it is liable to break anyway.
>
> Why is liable to break?

A VM booted on an old version of qemu and migrated to a new version will 
behave differently to a the same VM booted on a new version of qemu.
I hope I don't need to explain why this is bad.

As previously discussed, any guest visible changes are liable to break a guest 
OS, particularly guests like Windows which deliberately break when run on 
"different" hardware. Personally I don't particularly care, but if we support 
live migration we also need to support "cold" migration - i.e. shutdown and 
restart.

>So once you load and image with MSIX capability off,
>it will stay off across guest restarts.

I'm assuming guest restart includes restarting qemu.

Paul

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions
  2009-06-10 16:46                       ` Paul Brook
@ 2009-06-10 17:03                         ` Michael S. Tsirkin
  2009-06-10 17:30                           ` Paul Brook
  0 siblings, 1 reply; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 17:03 UTC (permalink / raw)
  To: Paul Brook
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Avi Kivity

On Wed, Jun 10, 2009 at 05:46:03PM +0100, Paul Brook wrote:
> > > If you can't create an identical machine from scratch then I don't
> > > consider snapshot/migration to be a useful feature. i.e. as soon as you
> > > shutdown and restart the guest it is liable to break anyway.
> >
> > Why is liable to break?
> 
> A VM booted on an old version of qemu and migrated to a new version will 
> behave differently to a the same VM booted on a new version of qemu.

It will behave identically. That's what the patch does: discover
how did the device behave on old qemu, and make it behave the same way
on new qemu.

> I hope I don't need to explain why this is bad.
> 
> As previously discussed, any guest visible changes are liable to break a guest 
> OS, particularly guests like Windows which deliberately break when run on 
> "different" hardware. Personally I don't particularly care, but if we support 
> live migration we also need to support "cold" migration - i.e. shutdown and 
> restart.
> 
> >So once you load and image with MSIX capability off,
> >it will stay off across guest restarts.
> 
> I'm assuming guest restart includes restarting qemu.
> 
> Paul

If you restart qemu, and load an image, what we should do is look at the
image and behave in a way consistent with how the qemu that created the
image behaved. If you load an image, you switch to a VM, and should be
consistent with the VM you just loaded. And we do not need a flag or a
machine description file to tell us this.


-- 
MST

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions
  2009-06-10 17:03                         ` Michael S. Tsirkin
@ 2009-06-10 17:30                           ` Paul Brook
  2009-06-10 18:07                             ` Michael S. Tsirkin
  0 siblings, 1 reply; 37+ messages in thread
From: Paul Brook @ 2009-06-10 17:30 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Avi Kivity

On Wednesday 10 June 2009, Michael S. Tsirkin wrote:
> On Wed, Jun 10, 2009 at 05:46:03PM +0100, Paul Brook wrote:
> > > > If you can't create an identical machine from scratch then I don't
> > > > consider snapshot/migration to be a useful feature. i.e. as soon as
> > > > you shutdown and restart the guest it is liable to break anyway.
> > >
> > > Why is liable to break?
> >
> > A VM booted on an old version of qemu and migrated to a new version will
> > behave differently to a the same VM booted on a new version of qemu.
>
> It will behave identically. That's what the patch does: discover
> how did the device behave on old qemu, and make it behave the same way
> on new qemu.

You're missing the point.  After doing a live migration from old-qemu to new-
qemu, there is no snapshot to load.  We need to be able to shutdown the guest, 
kill qemu (without saving a snapshot), then start qemu with the exact same 
hardware.

If we can't start a new qemu with the same hardware configuration then we 
should not be allowing migration or loading of snapshots.

Paul

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions
  2009-06-10 17:30                           ` Paul Brook
@ 2009-06-10 18:07                             ` Michael S. Tsirkin
  2009-06-10 19:04                               ` Paul Brook
  0 siblings, 1 reply; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-06-10 18:07 UTC (permalink / raw)
  To: Paul Brook
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Avi Kivity

On Wed, Jun 10, 2009 at 06:30:16PM +0100, Paul Brook wrote:
> On Wednesday 10 June 2009, Michael S. Tsirkin wrote:
> > On Wed, Jun 10, 2009 at 05:46:03PM +0100, Paul Brook wrote:
> > > > > If you can't create an identical machine from scratch then I don't
> > > > > consider snapshot/migration to be a useful feature. i.e. as soon as
> > > > > you shutdown and restart the guest it is liable to break anyway.
> > > >
> > > > Why is liable to break?
> > >
> > > A VM booted on an old version of qemu and migrated to a new version will
> > > behave differently to a the same VM booted on a new version of qemu.
> >
> > It will behave identically. That's what the patch does: discover
> > how did the device behave on old qemu, and make it behave the same way
> > on new qemu.
> 
> You're missing the point.  After doing a live migration from old-qemu to new-
> qemu, there is no snapshot to load.  We need to be able to shutdown the guest, 
> kill qemu (without saving a snapshot), then start qemu with the exact same 
> hardware.

Yes, I see how this would sometimes be useful. So this feature request
would mean that we need a flag to disable msix in virtio net.

I think we can agree that hardware can change across reboots. It can
with real hardware and it happens without guest doing anything.

> If we can't start a new qemu with the same hardware configuration then we 
> should not be allowing migration or loading of snapshots.
> 
> Paul

OK, so I'll add an option in virtio-net to disable msi-x, and such
an option will be added in any device with msi-x support.
Will that address your concern?

-- 
MST

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions
  2009-06-10 18:07                             ` Michael S. Tsirkin
@ 2009-06-10 19:04                               ` Paul Brook
  2009-06-11  8:29                                 ` Michael S. Tsirkin
  0 siblings, 1 reply; 37+ messages in thread
From: Paul Brook @ 2009-06-10 19:04 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Avi Kivity

> > If we can't start a new qemu with the same hardware configuration then we
> > should not be allowing migration or loading of snapshots.
>
> OK, so I'll add an option in virtio-net to disable msi-x, and such
> an option will be added in any device with msi-x support.
> Will that address your concern?

Yes, as long as migration fails when you try to migrate to the wrong kind of 
device.

Paul

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions
  2009-06-10 19:04                               ` Paul Brook
@ 2009-06-11  8:29                                 ` Michael S. Tsirkin
  0 siblings, 0 replies; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-06-11  8:29 UTC (permalink / raw)
  To: Paul Brook
  Cc: Carsten Otte, kvm, Rusty Russell, qemu-devel, virtualization,
	Blue Swirl, Christian Borntraeger, Avi Kivity

On Wed, Jun 10, 2009 at 08:04:13PM +0100, Paul Brook wrote:
> > > If we can't start a new qemu with the same hardware configuration then we
> > > should not be allowing migration or loading of snapshots.
> >
> > OK, so I'll add an option in virtio-net to disable msi-x, and such
> > an option will be added in any device with msi-x support.
> > Will that address your concern?
> 
> Yes, as long as migration fails when you try to migrate to the wrong kind of 
> device.
> 
> Paul

I think the right way to do this, is to make sure that standard
read-only registers in PCI config space are not modified in migration
(device-specific registers could have changed as a result of guest
actions, so we can't make assumptions).
-- 
MST

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [Qemu-devel] [PATCH 06/11] qemu: add flag to disable MSI-X by default
       [not found] <cover.1243253205.git.mst@redhat.com>
                   ` (5 preceding siblings ...)
  2009-05-25 12:25 ` [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions Michael S. Tsirkin
@ 2009-05-25 12:25 ` Michael S. Tsirkin
  2009-05-25 12:25 ` [Qemu-devel] [PATCH 07/11] qemu: minimal MSI/MSI-X implementation for PC Michael S. Tsirkin
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-05-25 12:25 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl

Add global flag to disable MSI-X by default.  This is useful primarily
to make images loadable by older qemu (without msix).  Even when MSI-X
is disabled by flag, you can still load images that have MSI-X enabled.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/msix.c       |    3 +++
 qemu-options.hx |    2 ++
 vl.c            |    3 +++
 3 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/hw/msix.c b/hw/msix.c
index 4db2fc1..970a8ca 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -122,6 +122,9 @@ void msix_write_config(PCIDevice *dev, uint32_t addr,
                        uint32_t val, int len)
 {
     unsigned enable_pos = dev->msix_cap + MSIX_ENABLE_OFFSET;
+    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX))
+        return;
+
     if (addr + len <= enable_pos || addr > enable_pos)
         return;
 
diff --git a/qemu-options.hx b/qemu-options.hx
index 87af798..fd041a4 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1575,3 +1575,5 @@ DEF("semihosting", 0, QEMU_OPTION_semihosting,
 DEF("old-param", 0, QEMU_OPTION_old_param,
     "-old-param      old param mode\n")
 #endif
+DEF("disable-msix", 0, QEMU_OPTION_disable_msix,
+    "-disable-msix disable msix support for PCI devices (enabled by default)\n")
diff --git a/vl.c b/vl.c
index 2c1f0e0..2757d4f 100644
--- a/vl.c
+++ b/vl.c
@@ -134,6 +134,7 @@ int main(int argc, char **argv)
 #include "hw/usb.h"
 #include "hw/pcmcia.h"
 #include "hw/pc.h"
+#include "hw/msix.h"
 #include "hw/audiodev.h"
 #include "hw/isa.h"
 #include "hw/baum.h"
@@ -5557,6 +5558,8 @@ int main(int argc, char **argv, char **envp)
                 xen_mode = XEN_ATTACH;
                 break;
 #endif
+            case QEMU_OPTION_disable_msix:
+                msix_disable = 1;
             }
         }
     }
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Qemu-devel] [PATCH 07/11] qemu: minimal MSI/MSI-X implementation for PC
       [not found] <cover.1243253205.git.mst@redhat.com>
                   ` (6 preceding siblings ...)
  2009-05-25 12:25 ` [Qemu-devel] [PATCH 06/11] qemu: add flag to disable MSI-X by default Michael S. Tsirkin
@ 2009-05-25 12:25 ` Michael S. Tsirkin
  2009-05-25 12:26 ` [Qemu-devel] [PATCH 08/11] qemu: add support for resizing regions Michael S. Tsirkin
                   ` (3 subsequent siblings)
  11 siblings, 0 replies; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-05-25 12:25 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl

Implement MSI support in APIC. Note that MSI and MMIO APIC registers
are at the same memory location, but actually not on the global bus: MSI
is on PCI bus, APIC is connected directly to the CPU. We map them on the
global bus at the same address which happens to work because MSI
registers are reserved in APIC MMIO and vice versa.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/apic.c |   50 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index 8c8b2de..d831709 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -19,6 +19,8 @@
  */
 #include "hw.h"
 #include "pc.h"
+#include "pci.h"
+#include "msix.h"
 #include "qemu-timer.h"
 #include "host-utils.h"
 
@@ -63,6 +65,19 @@
 #define MAX_APICS 255
 #define MAX_APIC_WORDS 8
 
+/* Intel APIC constants: from include/asm/msidef.h */
+#define MSI_DATA_VECTOR_SHIFT		0
+#define MSI_DATA_VECTOR_MASK		0x000000ff
+#define MSI_DATA_DELIVERY_MODE_SHIFT	8
+#define MSI_DATA_TRIGGER_SHIFT		15
+#define MSI_DATA_LEVEL_SHIFT		14
+#define MSI_ADDR_DEST_MODE_SHIFT	2
+#define MSI_ADDR_DEST_ID_SHIFT		12
+#define	MSI_ADDR_DEST_ID_MASK		0x00ffff0
+
+#define MSI_ADDR_BASE                   0xfee00000
+#define MSI_ADDR_SIZE                   0x100000
+
 typedef struct APICState {
     CPUState *cpu_env;
     uint32_t apicbase;
@@ -86,6 +101,13 @@ typedef struct APICState {
     QEMUTimer *timer;
 } APICState;
 
+struct msi_state {
+    uint64_t addr;
+    uint32_t data;
+    int mask;
+    int pending;
+};
+
 static int apic_io_memory;
 static APICState *local_apics[MAX_APICS + 1];
 static int last_apic_id = 0;
@@ -712,11 +734,31 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
     return val;
 }
 
+static void apic_send_msi(target_phys_addr_t addr, uint32 data)
+{
+    uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+    uint8_t vector = (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+    uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
+    uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
+    uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
+    /* XXX: Ignore redirection hint. */
+    apic_deliver_irq(dest, dest_mode, delivery, vector, 0, trigger_mode);
+}
+
 static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
 {
     CPUState *env;
     APICState *s;
-    int index;
+    int index = (addr >> 4) & 0xff;
+    if (addr > 0xfff || !index) {
+        /* MSI and MMIO APIC are at the same memory location,
+         * but actually not on the global bus: MSI is on PCI bus
+         * APIC is connected directly to the CPU.
+         * Mapping them on the global bus happens to work because
+         * MSI registers are reserved in APIC MMIO and vice versa. */
+        apic_send_msi(addr, val);
+        return;
+    }
 
     env = cpu_single_env;
     if (!env)
@@ -727,7 +769,6 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
     printf("APIC write: %08x = %08x\n", (uint32_t)addr, val);
 #endif
 
-    index = (addr >> 4) & 0xff;
     switch(index) {
     case 0x02:
         s->id = (val >> 24);
@@ -911,6 +952,7 @@ int apic_init(CPUState *env)
     s->cpu_env = env;
 
     apic_reset(s);
+    msix_supported = 1;
 
     /* XXX: mapping more APICs at the same memory location */
     if (apic_io_memory == 0) {
@@ -918,7 +960,8 @@ int apic_init(CPUState *env)
            on the global memory bus. */
         apic_io_memory = cpu_register_io_memory(0, apic_mem_read,
                                                 apic_mem_write, NULL);
-        cpu_register_physical_memory(s->apicbase & ~0xfff, 0x1000,
+        /* XXX: what if the base changes? */
+        cpu_register_physical_memory(MSI_ADDR_BASE, MSI_ADDR_SIZE,
                                      apic_io_memory);
     }
     s->timer = qemu_new_timer(vm_clock, apic_timer, s);
@@ -929,4 +972,3 @@ int apic_init(CPUState *env)
     local_apics[s->id] = s;
     return 0;
 }
-
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Qemu-devel] [PATCH 08/11] qemu: add support for resizing regions
       [not found] <cover.1243253205.git.mst@redhat.com>
                   ` (7 preceding siblings ...)
  2009-05-25 12:25 ` [Qemu-devel] [PATCH 07/11] qemu: minimal MSI/MSI-X implementation for PC Michael S. Tsirkin
@ 2009-05-25 12:26 ` Michael S. Tsirkin
  2009-05-25 12:26 ` [Qemu-devel] [PATCH 09/11] qemu: virtio support for many interrupt vectors Michael S. Tsirkin
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-05-25 12:26 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl

Make it possible to resize PCI regions.  This will be used by virtio
with MSI-X, where the region size depends on whether MSI-X is enabled,
and can change across load/save.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci.c |   54 ++++++++++++++++++++++++++++++++++++------------------
 hw/pci.h |    3 +++
 2 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 6bc3819..19905b9 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -392,6 +392,41 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
     *(uint32_t *)(pci_dev->mask + addr) = cpu_to_le32(mask);
 }
 
+static void pci_unmap_region(PCIDevice *d, PCIIORegion *r)
+{
+    if (r->addr == -1)
+        return;
+    if (r->type & PCI_ADDRESS_SPACE_IO) {
+        int class;
+        /* NOTE: specific hack for IDE in PC case:
+           only one byte must be mapped. */
+        class = pci_get_word(d->config + PCI_CLASS_DEVICE);
+        if (class == 0x0101 && r->size == 4) {
+            isa_unassign_ioport(r->addr + 2, 1);
+        } else {
+            isa_unassign_ioport(r->addr, r->size);
+        }
+    } else {
+        cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
+                                     r->size,
+                                     IO_MEM_UNASSIGNED);
+        qemu_unregister_coalesced_mmio(r->addr, r->size);
+    }
+}
+
+void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
+                          uint32_t size)
+{
+
+    PCIIORegion *r = &pci_dev->io_regions[region_num];
+    if (r->size == size)
+        return;
+    r->size = size;
+    pci_unmap_region(pci_dev, r);
+    r->addr = -1;
+    pci_update_mappings(pci_dev);
+}
+
 static void pci_update_mappings(PCIDevice *d)
 {
     PCIIORegion *r;
@@ -445,24 +480,7 @@ static void pci_update_mappings(PCIDevice *d)
             }
             /* now do the real mapping */
             if (new_addr != r->addr) {
-                if (r->addr != -1) {
-                    if (r->type & PCI_ADDRESS_SPACE_IO) {
-                        int class;
-                        /* NOTE: specific hack for IDE in PC case:
-                           only one byte must be mapped. */
-                        class = d->config[0x0a] | (d->config[0x0b] << 8);
-                        if (class == 0x0101 && r->size == 4) {
-                            isa_unassign_ioport(r->addr + 2, 1);
-                        } else {
-                            isa_unassign_ioport(r->addr, r->size);
-                        }
-                    } else {
-                        cpu_register_physical_memory(pci_to_cpu_addr(r->addr),
-                                                     r->size,
-                                                     IO_MEM_UNASSIGNED);
-                        qemu_unregister_coalesced_mmio(r->addr, r->size);
-                    }
-                }
+                pci_unmap_region(d, r);
                 r->addr = new_addr;
                 if (r->addr != -1) {
                     r->map_func(d, i, r->addr, r->size, r->type);
diff --git a/hw/pci.h b/hw/pci.h
index 6da626b..4072f16 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -221,6 +221,9 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
                             uint32_t size, int type,
                             PCIMapIORegionFunc *map_func);
 
+void pci_resize_io_region(PCIDevice *pci_dev, int region_num,
+                          uint32_t size);
+
 int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
 
 void pci_del_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Qemu-devel] [PATCH 09/11] qemu: virtio support for many interrupt vectors
       [not found] <cover.1243253205.git.mst@redhat.com>
                   ` (8 preceding siblings ...)
  2009-05-25 12:26 ` [Qemu-devel] [PATCH 08/11] qemu: add support for resizing regions Michael S. Tsirkin
@ 2009-05-25 12:26 ` Michael S. Tsirkin
  2009-05-25 12:26 ` [Qemu-devel] [PATCH 10/11] qemu: MSI-X support in virtio PCI Michael S. Tsirkin
  2009-05-25 12:26 ` [Qemu-devel] [PATCH 11/11] qemu: request 3 vectors in virtio-net Michael S. Tsirkin
  11 siblings, 0 replies; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-05-25 12:26 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl

Extend virtio to support many interrupt vectors, and rearrange code in
preparation for multi-vector support (mostly move reset out to bindings,
because we will have to reset the vectors in transport-specific code).
Actual bindings in pci, and use in net, to follow.
Load and save are not connected to bindings yet, so they are left
stubbed out for now.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/syborg_virtio.c |   13 ++++++++--
 hw/virtio-pci.c    |   24 +++++++++++++++----
 hw/virtio.c        |   63 ++++++++++++++++++++++++++++++++++++++-------------
 hw/virtio.h        |   10 ++++++-
 4 files changed, 84 insertions(+), 26 deletions(-)

diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c
index 37c219c..d8c978a 100644
--- a/hw/syborg_virtio.c
+++ b/hw/syborg_virtio.c
@@ -134,7 +134,10 @@ static void syborg_virtio_writel(void *opaque, target_phys_addr_t offset,
         vdev->features = value;
         break;
     case SYBORG_VIRTIO_QUEUE_BASE:
-        virtio_queue_set_addr(vdev, vdev->queue_sel, value);
+        if (value == 0)
+            virtio_reset(vdev);
+        else
+            virtio_queue_set_addr(vdev, vdev->queue_sel, value);
         break;
     case SYBORG_VIRTIO_QUEUE_SEL:
         if (value < VIRTIO_PCI_QUEUE_MAX)
@@ -228,7 +231,7 @@ static CPUWriteMemoryFunc *syborg_virtio_writefn[] = {
      syborg_virtio_writel
 };
 
-static void syborg_virtio_update_irq(void *opaque)
+static void syborg_virtio_update_irq(void *opaque, uint16_t vector)
 {
     SyborgVirtIOProxy *proxy = opaque;
     int level;
@@ -239,7 +242,7 @@ static void syborg_virtio_update_irq(void *opaque)
 }
 
 static VirtIOBindings syborg_virtio_bindings = {
-    .update_irq = syborg_virtio_update_irq
+    .notify = syborg_virtio_update_irq
 };
 
 static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
@@ -248,6 +251,8 @@ static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
 
     proxy->vdev = vdev;
 
+    /* Don't support multiple vectors */
+    proxy->vdev->nvectors = 0;
     sysbus_init_irq(&proxy->busdev, &proxy->irq);
     iomemtype = cpu_register_io_memory(0, syborg_virtio_readfn,
                                        syborg_virtio_writefn, proxy);
@@ -255,6 +260,8 @@ static void syborg_virtio_init(SyborgVirtIOProxy *proxy, VirtIODevice *vdev)
 
     proxy->id = ((uint32_t)0x1af4 << 16) | vdev->device_id;
 
+    qemu_register_reset(virtio_reset, 0, vdev);
+
     virtio_bind_device(vdev, &syborg_virtio_bindings, proxy);
 }
 
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index c072423..7dfdd80 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -78,13 +78,19 @@ typedef struct {
 
 /* virtio device */
 
-static void virtio_pci_update_irq(void *opaque)
+static void virtio_pci_notify(void *opaque, uint16_t vector)
 {
     VirtIOPCIProxy *proxy = opaque;
 
     qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
+static void virtio_pci_reset(void *opaque)
+{
+    VirtIOPCIProxy *proxy = opaque;
+    virtio_reset(proxy->vdev);
+}
+
 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
@@ -108,7 +114,10 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
         break;
     case VIRTIO_PCI_QUEUE_PFN:
         pa = (target_phys_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
-        virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
+        if (pa == 0)
+            virtio_pci_reset(proxy);
+        else
+            virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
         break;
     case VIRTIO_PCI_QUEUE_SEL:
         if (val < VIRTIO_PCI_QUEUE_MAX)
@@ -120,7 +129,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
     case VIRTIO_PCI_STATUS:
         vdev->status = val & 0xFF;
         if (vdev->status == 0)
-            virtio_reset(vdev);
+            virtio_pci_reset(proxy);
         break;
     }
 }
@@ -158,7 +167,7 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
         /* reading from the ISR also clears it. */
         ret = vdev->isr;
         vdev->isr = 0;
-        virtio_update_irq(vdev);
+        qemu_set_irq(proxy->pci_dev.irq[0], 0);
         break;
     default:
         break;
@@ -243,7 +252,7 @@ static void virtio_map(PCIDevice *pci_dev, int region_num,
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
-    .update_irq = virtio_pci_update_irq
+    .notify = virtio_pci_notify
 };
 
 static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
@@ -255,6 +264,9 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     proxy->vdev = vdev;
 
+    /* No support for multiple vectors yet. */
+    proxy->vdev->nvectors = 0;
+
     config = proxy->pci_dev.config;
     pci_config_set_vendor_id(config, vendor);
     pci_config_set_device_id(config, device);
@@ -279,6 +291,8 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
     pci_register_io_region(&proxy->pci_dev, 0, size, PCI_ADDRESS_SPACE_IO,
                            virtio_map);
 
+    qemu_register_reset(virtio_pci_reset, 0, proxy);
+
     virtio_bind_device(vdev, &virtio_pci_bindings, proxy);
 }
 
diff --git a/hw/virtio.c b/hw/virtio.c
index 45a49fa..63ffcff 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -68,6 +68,7 @@ struct VirtQueue
     target_phys_addr_t pa;
     uint16_t last_avail_idx;
     int inuse;
+    uint16_t vector;
     void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
 };
 
@@ -373,12 +374,16 @@ int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
 }
 
 /* virtio device */
+static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
+{
+    if (vdev->binding->notify) {
+        vdev->binding->notify(vdev->binding_opaque, vector);
+    }
+}
 
 void virtio_update_irq(VirtIODevice *vdev)
 {
-    if (vdev->binding->update_irq) {
-        vdev->binding->update_irq(vdev->binding_opaque);
-    }
+    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
 }
 
 void virtio_reset(void *opaque)
@@ -393,7 +398,8 @@ void virtio_reset(void *opaque)
     vdev->queue_sel = 0;
     vdev->status = 0;
     vdev->isr = 0;
-    virtio_update_irq(vdev);
+    vdev->config_vector = VIRTIO_NO_VECTOR;
+    virtio_notify_vector(vdev, vdev->config_vector);
 
     for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
         vdev->vq[i].vring.desc = 0;
@@ -401,6 +407,7 @@ void virtio_reset(void *opaque)
         vdev->vq[i].vring.used = 0;
         vdev->vq[i].last_avail_idx = 0;
         vdev->vq[i].pa = 0;
+        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
     }
 }
 
@@ -484,12 +491,8 @@ void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
 
 void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr)
 {
-    if (addr == 0) {
-        virtio_reset(vdev);
-    } else {
-        vdev->vq[n].pa = addr;
-        virtqueue_init(&vdev->vq[n]);
-    }
+    vdev->vq[n].pa = addr;
+    virtqueue_init(&vdev->vq[n]);
 }
 
 target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n)
@@ -509,6 +512,18 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
     }
 }
 
+uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
+{
+    return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
+        VIRTIO_NO_VECTOR;
+}
+
+void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
+{
+    if (n < VIRTIO_PCI_QUEUE_MAX)
+        vdev->vq[n].vector = vector;
+}
+
 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
                             void (*handle_output)(VirtIODevice *, VirtQueue *))
 {
@@ -537,7 +552,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
         return;
 
     vdev->isr |= 0x01;
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, vq->vector);
 }
 
 void virtio_notify_config(VirtIODevice *vdev)
@@ -546,7 +561,7 @@ void virtio_notify_config(VirtIODevice *vdev)
         return;
 
     vdev->isr |= 0x03;
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, vdev->config_vector);
 }
 
 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
@@ -555,6 +570,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
 
     /* FIXME: load/save binding.  */
     //pci_device_save(&vdev->pci_dev, f);
+    //msix_save(&vdev->pci_dev, f);
 
     qemu_put_8s(f, &vdev->status);
     qemu_put_8s(f, &vdev->isr);
@@ -563,6 +579,9 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
     qemu_put_be32(f, vdev->config_len);
     qemu_put_buffer(f, vdev->config, vdev->config_len);
 
+    if (vdev->nvectors)
+        qemu_put_be16s(f, &vdev->config_vector);
+
     for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
         if (vdev->vq[i].vring.num == 0)
             break;
@@ -577,15 +596,19 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
         qemu_put_be32(f, vdev->vq[i].vring.num);
         qemu_put_be64(f, vdev->vq[i].pa);
         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
+        if (vdev->nvectors)
+            qemu_put_be16s(f, &vdev->vq[i].vector);
     }
 }
 
-void virtio_load(VirtIODevice *vdev, QEMUFile *f)
+int virtio_load(VirtIODevice *vdev, QEMUFile *f)
 {
     int num, i;
 
     /* FIXME: load/save binding.  */
     //pci_device_load(&vdev->pci_dev, f);
+    //r = msix_load(&vdev->pci_dev, f);
+    //pci_resize_io_region(&vdev->pci_dev, 1, msix_bar_size(&vdev->pci_dev));
 
     qemu_get_8s(f, &vdev->status);
     qemu_get_8s(f, &vdev->isr);
@@ -594,6 +617,10 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
     vdev->config_len = qemu_get_be32(f);
     qemu_get_buffer(f, vdev->config, vdev->config_len);
 
+    if (vdev->nvectors) {
+        qemu_get_be16s(f, &vdev->config_vector);
+        //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+    }
     num = qemu_get_be32(f);
 
     for (i = 0; i < num; i++) {
@@ -604,9 +631,14 @@ void virtio_load(VirtIODevice *vdev, QEMUFile *f)
         if (vdev->vq[i].pa) {
             virtqueue_init(&vdev->vq[i]);
         }
+        if (vdev->nvectors) {
+            qemu_get_be16s(f, &vdev->vq[i].vector);
+            //msix_vector_use(&vdev->pci_dev, vdev->config_vector);
+        }
     }
 
-    virtio_update_irq(vdev);
+    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
+    return 0;
 }
 
 void virtio_cleanup(VirtIODevice *vdev)
@@ -627,6 +659,7 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
     vdev->status = 0;
     vdev->isr = 0;
     vdev->queue_sel = 0;
+    vdev->config_vector = VIRTIO_NO_VECTOR;
     vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
 
     vdev->name = name;
@@ -636,8 +669,6 @@ VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
     else
         vdev->config = NULL;
 
-    qemu_register_reset(virtio_reset, 0, vdev);
-
     return vdev;
 }
 
diff --git a/hw/virtio.h b/hw/virtio.h
index 425727e..04a3c3d 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -71,11 +71,13 @@ typedef struct VirtQueueElement
 } VirtQueueElement;
 
 typedef struct {
-    void (*update_irq)(void * opaque);
+    void (*notify)(void * opaque, uint16_t vector);
 } VirtIOBindings;
 
 #define VIRTIO_PCI_QUEUE_MAX 16
 
+#define VIRTIO_NO_VECTOR 0xffff
+
 struct VirtIODevice
 {
     const char *name;
@@ -85,6 +87,8 @@ struct VirtIODevice
     uint32_t features;
     size_t config_len;
     void *config;
+    uint16_t config_vector;
+    int nvectors;
     uint32_t (*get_features)(VirtIODevice *vdev);
     uint32_t (*bad_features)(VirtIODevice *vdev);
     void (*set_features)(VirtIODevice *vdev, uint32_t val);
@@ -114,7 +118,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
 
 void virtio_save(VirtIODevice *vdev, QEMUFile *f);
 
-void virtio_load(VirtIODevice *vdev, QEMUFile *f);
+int virtio_load(VirtIODevice *vdev, QEMUFile *f);
 
 void virtio_cleanup(VirtIODevice *vdev);
 
@@ -140,6 +144,8 @@ void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr);
 target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n);
 int virtio_queue_get_num(VirtIODevice *vdev, int n);
 void virtio_queue_notify(VirtIODevice *vdev, int n);
+uint16_t virtio_queue_vector(VirtIODevice *vdev, int n);
+void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector);
 void virtio_reset(void *opaque);
 void virtio_update_irq(VirtIODevice *vdev);
 
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Qemu-devel] [PATCH 10/11] qemu: MSI-X support in virtio PCI
       [not found] <cover.1243253205.git.mst@redhat.com>
                   ` (9 preceding siblings ...)
  2009-05-25 12:26 ` [Qemu-devel] [PATCH 09/11] qemu: virtio support for many interrupt vectors Michael S. Tsirkin
@ 2009-05-25 12:26 ` Michael S. Tsirkin
  2009-05-25 12:26 ` [Qemu-devel] [PATCH 11/11] qemu: request 3 vectors in virtio-net Michael S. Tsirkin
  11 siblings, 0 replies; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-05-25 12:26 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl

This enables actual support for MSI-X in virtio PCI.
First user will be virtio-net.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-pci.c |  152 ++++++++++++++++++++++++++++++++++++++++--------------
 1 files changed, 112 insertions(+), 40 deletions(-)

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 7dfdd80..294f4c7 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -18,6 +18,7 @@
 #include "virtio.h"
 #include "pci.h"
 //#include "sysemu.h"
+#include "msix.h"
 
 /* from Linux's linux/virtio_pci.h */
 
@@ -47,7 +48,24 @@
  * a read-and-acknowledge. */
 #define VIRTIO_PCI_ISR                  19
 
-#define VIRTIO_PCI_CONFIG               20
+/* MSI-X registers: only enabled if MSI-X is enabled. */
+/* A 16-bit vector for configuration changes. */
+#define VIRTIO_MSI_CONFIG_VECTOR        20
+/* A 16-bit vector for selected queue notifications. */
+#define VIRTIO_MSI_QUEUE_VECTOR         22
+
+/* Config space size */
+#define VIRTIO_PCI_CONFIG_NOMSI         20
+#define VIRTIO_PCI_CONFIG_MSI           24
+#define VIRTIO_PCI_REGION_SIZE(dev)     (msix_present(dev) ? \
+                                         VIRTIO_PCI_CONFIG_MSI : \
+                                         VIRTIO_PCI_CONFIG_NOMSI)
+
+/* The remaining space is defined by each driver as the per-driver
+ * configuration space */
+#define VIRTIO_PCI_CONFIG(dev)          (msix_enabled(dev) ? \
+                                         VIRTIO_PCI_CONFIG_MSI : \
+                                         VIRTIO_PCI_CONFIG_NOMSI)
 
 /* Virtio ABI version, if we increment this, we break the guest driver. */
 #define VIRTIO_PCI_ABI_VERSION          0
@@ -81,14 +99,17 @@ typedef struct {
 static void virtio_pci_notify(void *opaque, uint16_t vector)
 {
     VirtIOPCIProxy *proxy = opaque;
-
-    qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
+    if (msix_enabled(&proxy->pci_dev))
+        msix_notify(&proxy->pci_dev, vector);
+    else
+        qemu_set_irq(proxy->pci_dev.irq[0], proxy->vdev->isr & 1);
 }
 
 static void virtio_pci_reset(void *opaque)
 {
     VirtIOPCIProxy *proxy = opaque;
     virtio_reset(proxy->vdev);
+    msix_reset(&proxy->pci_dev);
 }
 
 static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
@@ -97,8 +118,6 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
     VirtIODevice *vdev = proxy->vdev;
     target_phys_addr_t pa;
 
-    addr -= proxy->addr;
-
     switch (addr) {
     case VIRTIO_PCI_GUEST_FEATURES:
 	/* Guest does not negotiate properly?  We have to assume nothing. */
@@ -131,17 +150,33 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
         if (vdev->status == 0)
             virtio_pci_reset(proxy);
         break;
+    case VIRTIO_MSI_CONFIG_VECTOR:
+        msix_vector_unuse(&proxy->pci_dev, vdev->config_vector);
+        /* Make it possible for guest to discover an error took place. */
+        if (msix_vector_use(&proxy->pci_dev, val) < 0)
+            val = VIRTIO_NO_VECTOR;
+        vdev->config_vector = val;
+        break;
+    case VIRTIO_MSI_QUEUE_VECTOR:
+        msix_vector_unuse(&proxy->pci_dev,
+                          virtio_queue_vector(vdev, vdev->queue_sel));
+        /* Make it possible for guest to discover an error took place. */
+        if (msix_vector_use(&proxy->pci_dev, val) < 0)
+            val = VIRTIO_NO_VECTOR;
+        virtio_queue_set_vector(vdev, vdev->queue_sel, val);
+        break;
+    default:
+        fprintf(stderr, "%s: unexpected address 0x%x value 0x%x\n",
+                __func__, addr, val);
+        break;
     }
 }
 
-static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
+static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
 {
-    VirtIOPCIProxy *proxy = opaque;
     VirtIODevice *vdev = proxy->vdev;
     uint32_t ret = 0xFFFFFFFF;
 
-    addr -= proxy->addr;
-
     switch (addr) {
     case VIRTIO_PCI_HOST_FEATURES:
         ret = vdev->get_features(vdev);
@@ -169,6 +204,12 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
         vdev->isr = 0;
         qemu_set_irq(proxy->pci_dev.irq[0], 0);
         break;
+    case VIRTIO_MSI_CONFIG_VECTOR:
+        ret = vdev->config_vector;
+        break;
+    case VIRTIO_MSI_QUEUE_VECTOR:
+        ret = virtio_queue_vector(vdev, vdev->queue_sel);
+        break;
     default:
         break;
     }
@@ -179,42 +220,72 @@ static uint32_t virtio_ioport_read(void *opaque, uint32_t addr)
 static uint32_t virtio_pci_config_readb(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readb(proxy->vdev, addr);
 }
 
 static uint32_t virtio_pci_config_readw(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readw(proxy->vdev, addr);
 }
 
 static uint32_t virtio_pci_config_readl(void *opaque, uint32_t addr)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config)
+        return virtio_ioport_read(proxy, addr);
+    addr -= config;
     return virtio_config_readl(proxy->vdev, addr);
 }
 
 static void virtio_pci_config_writeb(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writeb(proxy->vdev, addr, val);
 }
 
 static void virtio_pci_config_writew(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writew(proxy->vdev, addr, val);
 }
 
 static void virtio_pci_config_writel(void *opaque, uint32_t addr, uint32_t val)
 {
     VirtIOPCIProxy *proxy = opaque;
-    addr -= proxy->addr + VIRTIO_PCI_CONFIG;
+    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    addr -= proxy->addr;
+    if (addr < config) {
+        virtio_ioport_write(proxy, addr, val);
+        return;
+    }
+    addr -= config;
     virtio_config_writel(proxy->vdev, addr, val);
 }
 
@@ -223,32 +294,26 @@ static void virtio_map(PCIDevice *pci_dev, int region_num,
 {
     VirtIOPCIProxy *proxy = container_of(pci_dev, VirtIOPCIProxy, pci_dev);
     VirtIODevice *vdev = proxy->vdev;
-    int i;
+    unsigned config_len = VIRTIO_PCI_REGION_SIZE(pci_dev) + vdev->config_len;
 
     proxy->addr = addr;
-    for (i = 0; i < 3; i++) {
-        register_ioport_write(addr, VIRTIO_PCI_CONFIG, 1 << i,
-                              virtio_ioport_write, proxy);
-        register_ioport_read(addr, VIRTIO_PCI_CONFIG, 1 << i,
-                             virtio_ioport_read, proxy);
-    }
 
-    if (vdev->config_len) {
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 1,
-                              virtio_pci_config_writeb, proxy);
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 2,
-                              virtio_pci_config_writew, proxy);
-        register_ioport_write(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 4,
-                              virtio_pci_config_writel, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 1,
-                             virtio_pci_config_readb, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 2,
-                             virtio_pci_config_readw, proxy);
-        register_ioport_read(addr + VIRTIO_PCI_CONFIG, vdev->config_len, 4,
-                             virtio_pci_config_readl, proxy);
+    register_ioport_write(addr, config_len, 1, virtio_pci_config_writeb, proxy);
+    register_ioport_write(addr, config_len, 2, virtio_pci_config_writew, proxy);
+    register_ioport_write(addr, config_len, 4, virtio_pci_config_writel, proxy);
+    register_ioport_read(addr, config_len, 1, virtio_pci_config_readb, proxy);
+    register_ioport_read(addr, config_len, 2, virtio_pci_config_readw, proxy);
+    register_ioport_read(addr, config_len, 4, virtio_pci_config_readl, proxy);
 
+    if (vdev->config_len)
         vdev->get_config(vdev, vdev->config);
-    }
+}
+
+static void virtio_write_config(PCIDevice *pci_dev, uint32_t address,
+                                uint32_t val, int len)
+{
+    pci_default_write_config(pci_dev, address, val, len);
+    msix_write_config(pci_dev, address, val, len);
 }
 
 static const VirtIOBindings virtio_pci_bindings = {
@@ -264,9 +329,6 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     proxy->vdev = vdev;
 
-    /* No support for multiple vectors yet. */
-    proxy->vdev->nvectors = 0;
-
     config = proxy->pci_dev.config;
     pci_config_set_vendor_id(config, vendor);
     pci_config_set_device_id(config, device);
@@ -284,7 +346,17 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
 
     config[0x3d] = 1;
 
-    size = 20 + vdev->config_len;
+    if (vdev->nvectors && !msix_init(&proxy->pci_dev, vdev->nvectors, 1, 0)) {
+        pci_register_io_region(&proxy->pci_dev, 1,
+                               msix_bar_size(&proxy->pci_dev),
+                               PCI_ADDRESS_SPACE_MEM,
+                               msix_mmio_map);
+        proxy->pci_dev.config_write = virtio_write_config;
+        proxy->pci_dev.unregister = msix_uninit;
+    } else
+        vdev->nvectors = 0;
+
+    size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev) + vdev->config_len;
     if (size & (size-1))
         size = 1 << qemu_fls(size);
 
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Qemu-devel] [PATCH 11/11] qemu: request 3 vectors in virtio-net
       [not found] <cover.1243253205.git.mst@redhat.com>
                   ` (10 preceding siblings ...)
  2009-05-25 12:26 ` [Qemu-devel] [PATCH 10/11] qemu: MSI-X support in virtio PCI Michael S. Tsirkin
@ 2009-05-25 12:26 ` Michael S. Tsirkin
  11 siblings, 0 replies; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-05-25 12:26 UTC (permalink / raw)
  To: Paul Brook, Avi Kivity, qemu-devel, Carsten Otte, kvm,
	Rusty Russell, virtualization, Christian Borntraeger, Blue Swirl

Request up to 3 vectors in virtio-net. Actual bindings might supply
less.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio-net.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 60aa6da..6118fe3 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -621,6 +621,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev)
     n->mac_table.macs = qemu_mallocz(MAC_TABLE_ENTRIES * ETH_ALEN);
 
     n->vlans = qemu_mallocz(MAX_VLAN >> 3);
+    n->vdev.nvectors = 3;
 
     register_savevm("virtio-net", virtio_net_id++, VIRTIO_NET_VM_VERSION,
                     virtio_net_save, virtio_net_load, n);
-- 
1.6.3.1.56.g79e1.dirty

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [Qemu-devel] [PATCHv2] qemu: make default_write_config use mask table
@ 2009-05-11 10:01 Michael S. Tsirkin
  2009-05-11 10:21 ` Isaku Yamahata
  0 siblings, 1 reply; 37+ messages in thread
From: Michael S. Tsirkin @ 2009-05-11 10:01 UTC (permalink / raw)
  To: qemu-devel, Anthony Liguori, Blue Swirl, mtosatti, armbru, avi,
	Isaku Yamahata

Change much of hw/pci to use symbolic constants and a table-driven
design: add a mask table with writable bits set and readonly bits unset.
Detect change by comparing original and new registers.

As a result, writing a single byte in BAR registers now works as it
should. Writing to upper limit registers in the bridge also works as it
should. Writes to BAR registers trigger mapping update.  Code is also
shorter.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---

Anthony, could you comment on this patch please?

Changelog since v1
- simplify the code some more
- use PCI_CONFIG_SPACE_SIZE instead of 0x100
- only trigger pci update when IO/MEM bits in command register have changed

 hw/pci.c |  147 +++++++++++++-------------------------------------------------
 hw/pci.h |   18 +++++++-
 2 files changed, 47 insertions(+), 118 deletions(-)

diff --git a/hw/pci.c b/hw/pci.c
index 8278894..b8186f6 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -235,6 +235,17 @@ int pci_assign_devaddr(const char *addr, int *domp, int *busp, unsigned *slotp)
     return pci_parse_devaddr(devaddr, domp, busp, slotp);
 }
 
+static void pci_init_mask(PCIDevice *dev)
+{
+    int i;
+    dev->mask[PCI_CACHE_LINE_SIZE] = 0xff;
+    dev->mask[PCI_INTERRUPT_LINE] = 0xff;
+    dev->mask[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY
+                             | PCI_COMMAND_MASTER;
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
+        dev->mask[i] = 0xff;
+}
+
 /* -1 for devfn means auto assign */
 PCIDevice *pci_register_device(PCIBus *bus, const char *name,
                                int instance_size, int devfn,
@@ -260,6 +271,7 @@ PCIDevice *pci_register_device(PCIBus *bus, const char *name,
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     memset(pci_dev->irq_state, 0, sizeof(pci_dev->irq_state));
     pci_set_default_subsystem_id(pci_dev);
+    pci_init_mask(pci_dev);
 
     if (!config_read)
         config_read = pci_default_read_config;
@@ -321,6 +333,7 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
 {
     PCIIORegion *r;
     uint32_t addr;
+    uint32_t mask;
 
     if ((unsigned int)region_num >= PCI_NUM_REGIONS)
         return;
@@ -336,12 +349,17 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
     r->size = size;
     r->type = type;
     r->map_func = map_func;
+
+    mask = ~(size - 1);
     if (region_num == PCI_ROM_SLOT) {
         addr = 0x30;
+        /* ROM enable bit is writeable */
+        mask |= 1;
     } else {
         addr = 0x10 + region_num * 4;
     }
     *(uint32_t *)(pci_dev->config + addr) = cpu_to_le32(type);
+    *(uint32_t *)(pci_dev->mask + addr) = cpu_to_le32(mask);
 }
 
 static void pci_update_mappings(PCIDevice *d)
@@ -450,118 +468,21 @@ uint32_t pci_default_read_config(PCIDevice *d,
     return val;
 }
 
-void pci_default_write_config(PCIDevice *d,
-                              uint32_t address, uint32_t val, int len)
+void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
 {
-    int can_write, i;
-    uint32_t end, addr;
-
-    if (len == 4 && ((address >= 0x10 && address < 0x10 + 4 * 6) ||
-                     (address >= 0x30 && address < 0x34))) {
-        PCIIORegion *r;
-        int reg;
+    uint8_t orig[PCI_CONFIG_SPACE_SIZE];
+    int i;
 
-        if ( address >= 0x30 ) {
-            reg = PCI_ROM_SLOT;
-        }else{
-            reg = (address - 0x10) >> 2;
-        }
-        r = &d->io_regions[reg];
-        if (r->size == 0)
-            goto default_config;
-        /* compute the stored value */
-        if (reg == PCI_ROM_SLOT) {
-            /* keep ROM enable bit */
-            val &= (~(r->size - 1)) | 1;
-        } else {
-            val &= ~(r->size - 1);
-            val |= r->type;
-        }
-        *(uint32_t *)(d->config + address) = cpu_to_le32(val);
-        pci_update_mappings(d);
-        return;
-    }
- default_config:
     /* not efficient, but simple */
-    addr = address;
-    for(i = 0; i < len; i++) {
-        /* default read/write accesses */
-        switch(d->config[0x0e]) {
-        case 0x00:
-        case 0x80:
-            switch(addr) {
-            case 0x00:
-            case 0x01:
-            case 0x02:
-            case 0x03:
-            case 0x06:
-            case 0x07:
-            case 0x08:
-            case 0x09:
-            case 0x0a:
-            case 0x0b:
-            case 0x0e:
-            case 0x10 ... 0x27: /* base */
-            case 0x2c ... 0x2f: /* read-only subsystem ID & vendor ID */
-            case 0x30 ... 0x33: /* rom */
-            case 0x3d:
-                can_write = 0;
-                break;
-            default:
-                can_write = 1;
-                break;
-            }
-            break;
-        default:
-        case 0x01:
-            switch(addr) {
-            case 0x00:
-            case 0x01:
-            case 0x02:
-            case 0x03:
-            case 0x06:
-            case 0x07:
-            case 0x08:
-            case 0x09:
-            case 0x0a:
-            case 0x0b:
-            case 0x0e:
-            case 0x2c ... 0x2f: /* read-only subsystem ID & vendor ID */
-            case 0x38 ... 0x3b: /* rom */
-            case 0x3d:
-                can_write = 0;
-                break;
-            default:
-                can_write = 1;
-                break;
-            }
-            break;
-        }
-        if (can_write) {
-            /* Mask out writes to reserved bits in registers */
-            switch (addr) {
-	    case 0x05:
-                val &= ~PCI_COMMAND_RESERVED_MASK_HI;
-                break;
-            case 0x06:
-                val &= ~PCI_STATUS_RESERVED_MASK_LO;
-                break;
-            case 0x07:
-                val &= ~PCI_STATUS_RESERVED_MASK_HI;
-                break;
-            }
-            d->config[addr] = val;
-        }
-        if (++addr > 0xff)
-        	break;
-        val >>= 8;
+    memcpy(orig, d->config, PCI_CONFIG_SPACE_SIZE);
+    for(i = 0; i < l && addr < PCI_CONFIG_SPACE_SIZE; val >>= 8, ++i, ++addr) {
+        uint8_t mask = d->mask[addr];
+        d->config[addr] = (d->config[addr] & ~mask) | (val & mask);
     }
-
-    end = address + len;
-    if (end > PCI_COMMAND && address < (PCI_COMMAND + 2)) {
-        /* if the command register is modified, we must modify the mappings */
+    if (memcmp(orig + PCI_BASE_ADDRESS_0, d->config + PCI_BASE_ADDRESS_0, 24)
+        || ((orig[PCI_COMMAND] ^ d->config[PCI_COMMAND])
+            & (PCI_COMMAND_MEMORY | PCI_COMMAND_IO)))
         pci_update_mappings(d);
-    }
 }
 
 void pci_data_write(void *opaque, uint32_t addr, uint32_t val, int len)
@@ -833,16 +754,8 @@ static void pci_bridge_write_config(PCIDevice *d,
 {
     PCIBridge *s = (PCIBridge *)d;
 
-    if (address == 0x19 || (address == 0x18 && len > 1)) {
-        if (address == 0x19)
-            s->bus->bus_num = val & 0xff;
-        else
-            s->bus->bus_num = (val >> 8) & 0xff;
-#if defined(DEBUG_PCI)
-        printf ("pci-bridge: %s: Assigned bus %d\n", d->name, s->bus->bus_num);
-#endif
-    }
     pci_default_write_config(d, address, val, len);
+    s->bus->bus_num = d->config[PCI_SECONDARY_BUS];
 }
 
 PCIBus *pci_find_bus(int bus_num)
diff --git a/hw/pci.h b/hw/pci.h
index ff858a1..a629e60 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -98,16 +98,24 @@ typedef struct PCIIORegion {
 #define PCI_COMMAND		0x04	/* 16 bits */
 #define  PCI_COMMAND_IO		0x1	/* Enable response in I/O space */
 #define  PCI_COMMAND_MEMORY	0x2	/* Enable response in Memory space */
+#define  PCI_COMMAND_MASTER	0x4	/* Enable bus master */
 #define PCI_STATUS              0x06    /* 16 bits */
 #define PCI_REVISION_ID         0x08    /* 8 bits  */
 #define PCI_CLASS_DEVICE        0x0a    /* Device class */
+#define PCI_CACHE_LINE_SIZE	0x0c	/* 8 bits */
+#define PCI_LATENCY_TIMER	0x0d	/* 8 bits */
 #define PCI_HEADER_TYPE         0x0e    /* 8 bits */
 #define  PCI_HEADER_TYPE_NORMAL		0
 #define  PCI_HEADER_TYPE_BRIDGE		1
 #define  PCI_HEADER_TYPE_CARDBUS	2
 #define  PCI_HEADER_TYPE_MULTI_FUNCTION 0x80
+#define PCI_BASE_ADDRESS_0	0x10	/* 32 bits */
+#define PCI_PRIMARY_BUS		0x18	/* Primary bus number */
+#define PCI_SECONDARY_BUS	0x19	/* Secondary bus number */
+#define PCI_SEC_STATUS		0x1e	/* Secondary status register, only bit 14 used */
 #define PCI_SUBSYSTEM_VENDOR_ID 0x2c    /* 16 bits */
 #define PCI_SUBSYSTEM_ID        0x2e    /* 16 bits */
+#define PCI_CAPABILITY_LIST	0x34	/* Offset of first capability list entry */
 #define PCI_INTERRUPT_LINE	0x3c	/* 8 bits */
 #define PCI_INTERRUPT_PIN	0x3d	/* 8 bits */
 #define PCI_MIN_GNT		0x3e	/* 8 bits */
@@ -137,9 +145,17 @@ typedef struct PCIIORegion {
 
 #define PCI_COMMAND_RESERVED_MASK_HI (PCI_COMMAND_RESERVED >> 8)
 
+/* Size of the standard PCI config header */
+#define PCI_CONFIG_HEADER_SIZE (PCI_INTERRUPT_PIN + 1)
+/* Size of the standard PCI config space */
+#define PCI_CONFIG_SPACE_SIZE 0x100
+
 struct PCIDevice {
     /* PCI config space */
-    uint8_t config[256];
+    uint8_t config[PCI_CONFIG_SPACE_SIZE];
+
+    /* Used to implement R/W bytes */
+    uint8_t mask[PCI_CONFIG_SPACE_SIZE];
 
     /* the following fields are read only */
     PCIBus *bus;
-- 
1.6.3.rc3.1.g830204

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [Qemu-devel] [PATCHv2] qemu: make default_write_config use mask table
  2009-05-11 10:01 [Qemu-devel] [PATCHv2] qemu: make default_write_config use mask table Michael S. Tsirkin
@ 2009-05-11 10:21 ` Isaku Yamahata
  0 siblings, 0 replies; 37+ messages in thread
From: Isaku Yamahata @ 2009-05-11 10:21 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: mtosatti, qemu-devel, armbru, Blue Swirl, avi

Here is the created patch on top of your patch.

commit a415d5e07de7ca9dc213d0c70cbd84d67d6c05b5
Author: Isaku Yamahata <yamahata@valinux.co.jp>
Date:   Mon May 11 19:16:55 2009 +0900

    pci: pci_default_config_write() clean up.
    
    clean up of pci_default_config_write() with callback.
    
    Changes v3
    - recreated on top of Michael's patch.
    
    Changes v2
    - converted static table into dynamic initialization.
    - changed callback signature.
    
    Cc: Michael S. Tsirkin <mst@redhat.com>
    Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>

diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c
index 32cf3e4..8c5d1fc 100644
--- a/hw/cirrus_vga.c
+++ b/hw/cirrus_vga.c
@@ -180,7 +180,7 @@
 #define PCI_COMMAND_PALETTESNOOPING         0x0020
 #define PCI_COMMAND_PARITYDETECTION         0x0040
 #define PCI_COMMAND_ADDRESSDATASTEPPING     0x0080
-#define PCI_COMMAND_SERR                    0x0100
+//#define PCI_COMMAND_SERR                    0x0100	/* duplicated */
 #define PCI_COMMAND_BACKTOBACKTRANS         0x0200
 // PCI 0x08, 0xff000000 (0x09-0x0b:class,0x08:rev)
 #define PCI_CLASS_BASE_DISPLAY        0x03
diff --git a/hw/pci.c b/hw/pci.c
index 99d2afe..ef5318d 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -21,6 +21,9 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
+
+#include <assert.h>
+
 #include "hw.h"
 #include "pci.h"
 #include "monitor.h"
@@ -236,22 +239,122 @@ int pci_assign_devaddr(const char *addr, int *domp, int *busp, unsigned *slotp)
     return pci_parse_devaddr(devaddr, domp, busp, slotp);
 }
 
-static void pci_init_mask(PCIDevice *dev)
+static void pci_conf_init(struct PCIConfigReg *config_regs,
+                          uint32_t addr, pci_config_written_t callback,
+                          uint32_t wmask, int len)
 {
     int i;
-    dev->mask[PCI_CACHE_LINE_SIZE] = 0xff;
-    dev->mask[PCI_INTERRUPT_LINE] = 0xff;
-    dev->mask[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY
-                             | PCI_COMMAND_MASTER;
-    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i)
-        dev->mask[i] = 0xff;
+
+    for (i = 0; i < len; i++) {
+        config_regs[addr].wmask = wmask & 0xff;
+        config_regs[addr].shift = 8 * i;
+        config_regs[addr].callback = callback;
+
+        wmask >>= 8;
+    }
+}
+
+static void pci_conf_initb(struct PCIConfigReg *config_regs,
+                           uint32_t addr, pci_config_written_t callback,
+                           uint32_t wmask)
+{
+    pci_conf_init(config_regs, addr, callback,  wmask, 1);
+}
+
+static void pci_conf_initw(struct PCIConfigReg *config_regs,
+                           uint32_t addr, pci_config_written_t callback,
+                           uint32_t wmask)
+{
+    pci_conf_init(config_regs, addr, callback,  wmask, 2);
+}
+
+static void pci_conf_initl(struct PCIConfigReg *config_regs,
+                           uint32_t addr, pci_config_written_t callback,
+                           uint32_t wmask)
+{
+    pci_conf_init(config_regs, addr, callback,  wmask, 4);
+}
+
+static void pci_conf_update_mappings(struct PCIDevice *d, uint32_t old,
+                                    uint32_t written, uint32_t mask)
+{
+    pci_update_mappings(d);
+}
+
+static void pci_conf_init_type_00_default(struct PCIConfigReg *config_regs)
+{
+    uint32_t addr;
+
+    /* Vendor ID, Device ID: read only */
+
+    pci_conf_initw(config_regs, PCI_COMMAND, pci_conf_update_mappings,
+                   PCI_COMMAND_IO |
+                   PCI_COMMAND_MEMORY |
+                   PCI_COMMAND_MASTER |
+                   PCI_COMMAND_SPECIAL |
+                   PCI_COMMAND_INVALIDATE |
+                   PCI_COMMAND_VGA_PALETTE |
+                   PCI_COMMAND_PARITY |
+                   PCI_COMMAND_WAIT |
+                   PCI_COMMAND_SERR |
+                   PCI_COMMAND_FAST_BACK |
+                   PCI_COMMAND_INTX_DISABLE);
+
+    /* nothing is emulated at this moment */
+    pci_conf_initw(config_regs, PCI_STATUS, NULL, 0);
+
+    /* revision id, class code: read only */
+
+    pci_conf_initb(config_regs, PCI_CACHE_LINE_SIZE, NULL, ~0);
+    pci_conf_initb(config_regs, PCI_LATENCY_TIMER, NULL, ~0);
+
+    /* header type: read only */
+
+    /* BIST emulation isn't implemented */
+    pci_conf_initb(config_regs, PCI_BIST, NULL, 0);
+
+    /* bar:wmask will be updated by pci_register_io_region() */
+    pci_conf_initl(config_regs, PCI_BASE_ADDRESS_0,
+                   pci_conf_update_mappings, 0);
+    pci_conf_initl(config_regs, PCI_BASE_ADDRESS_1,
+                   pci_conf_update_mappings, 0);
+    pci_conf_initl(config_regs, PCI_BASE_ADDRESS_2,
+                   pci_conf_update_mappings, 0);
+    pci_conf_initl(config_regs, PCI_BASE_ADDRESS_3,
+                   pci_conf_update_mappings, 0);
+    pci_conf_initl(config_regs, PCI_BASE_ADDRESS_4,
+                   pci_conf_update_mappings, 0);
+    pci_conf_initl(config_regs, PCI_BASE_ADDRESS_5,
+                   pci_conf_update_mappings, 0);
+
+    /* not card bus*/
+    pci_conf_initl(config_regs, PCI_CARDBUS_CIS, NULL, 0);
+
+    /* Subsystem ID, Subsystem Vendor ID: read only*/
+
+    /* mask will be updated by pci_register_io_region() */
+    pci_conf_initl(config_regs, PCI_ROM_ADDRESS, pci_conf_update_mappings, 0);
+
+    pci_conf_initb(config_regs, PCI_CAPABILITY_LIST, NULL, 0);
+
+    /* offset 0x35 ... 0x3d are reserved so is read only */
+
+    pci_conf_initb(config_regs, PCI_INTERRUPT_LINE, NULL, ~0);
+    pci_conf_initb(config_regs, PCI_INTERRUPT_PIN, NULL, 0);
+    pci_conf_initb(config_regs, PCI_MIN_GNT, NULL, 0);
+    pci_conf_initb(config_regs, PCI_MAX_LAT, NULL, 0);
+
+    /* device dependent part */
+    for (addr = PCI_CONFIG_HEADER_SIZE; addr < PCI_CONFIG_SPACE_SIZE; addr++)
+        pci_conf_initb(config_regs, addr, NULL, ~0);
 }
 
 /* -1 for devfn means auto assign */
-PCIDevice *pci_register_device(PCIBus *bus, const char *name,
-                               int instance_size, int devfn,
-                               PCIConfigReadFunc *config_read,
-                               PCIConfigWriteFunc *config_write)
+PCIDevice *pci_register_device_confreg(PCIBus *bus, const char *name,
+				       int instance_size, int devfn,
+				       PCIConfigReadFunc *config_read,
+				       PCIConfigWriteFunc *config_write,
+                                       pci_conf_init_t conf_init)
 {
     PCIDevice *pci_dev;
 
@@ -272,7 +375,7 @@ PCIDevice *pci_register_device(PCIBus *bus, const char *name,
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     memset(pci_dev->irq_state, 0, sizeof(pci_dev->irq_state));
     pci_set_default_subsystem_id(pci_dev);
-    pci_init_mask(pci_dev);
+    conf_init(pci_dev->config_regs);
 
     if (!config_read)
         config_read = pci_default_read_config;
@@ -286,6 +389,16 @@ PCIDevice *pci_register_device(PCIBus *bus, const char *name,
     return pci_dev;
 }
 
+PCIDevice *pci_register_device(PCIBus *bus, const char *name,
+                               int instance_size, int devfn,
+                               PCIConfigReadFunc *config_read,
+                               PCIConfigWriteFunc *config_write)
+{
+    return pci_register_device_confreg(bus, name, instance_size, devfn,
+                                       config_read, config_write,
+                                       pci_conf_init_type_00_default);
+}
+
 static target_phys_addr_t pci_to_cpu_addr(target_phys_addr_t addr)
 {
     return addr + pci_mem_base;
@@ -334,7 +447,8 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
 {
     PCIIORegion *r;
     uint32_t addr;
-    uint32_t mask;
+    uint32_t wmask;
+    int i;
 
     if ((unsigned int)region_num >= PCI_NUM_REGIONS)
         return;
@@ -350,17 +464,18 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
     r->size = size;
     r->type = type;
     r->map_func = map_func;
-
-    mask = ~(size - 1);
+    wmask = ~(size - 1);
     if (region_num == PCI_ROM_SLOT) {
         addr = 0x30;
         /* ROM enable bit is writeable */
-        mask |= 1;
+        wmask |= PCI_ROM_ADDRESS_ENABLE;
     } else {
         addr = 0x10 + region_num * 4;
     }
     *(uint32_t *)(pci_dev->config + addr) = cpu_to_le32(type);
-    *(uint32_t *)(pci_dev->mask + addr) = cpu_to_le32(mask);
+
+    for (i = 0; i < 4; i++)
+        pci_dev->config_regs[addr + i].wmask = (wmask >> (8 * i)) && 0xff;
 }
 
 static void pci_update_mappings(PCIDevice *d)
@@ -469,21 +584,48 @@ uint32_t pci_default_read_config(PCIDevice *d,
     return val;
 }
 
-void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
+void pci_default_write_config(PCIDevice *d,
+                              uint32_t addr, uint32_t val, int len)
 {
-    uint8_t orig[PCI_CONFIG_SPACE_SIZE];
     int i;
 
-    /* not efficient, but simple */
-    memcpy(orig, d->config, PCI_CONFIG_SPACE_SIZE);
-    for(i = 0; i < l && addr < PCI_CONFIG_SPACE_SIZE; val >>= 8, ++i, ++addr) {
-        uint8_t mask = d->mask[addr];
-        d->config[addr] = (d->config[addr] & ~mask) | (val & mask);
+    uint32_t old[4] = {0, 0, 0, 0};
+    uint32_t written[4] = {0, 0, 0, 0};
+    uint32_t mask[4] = {0, 0, 0, 0};
+    pci_config_written_t callback[4] = {NULL, NULL, NULL, NULL};
+    int nb_written = 0;
+
+    assert(len == 1 || len == 2 || len == 4);
+    for (i = 0; i < len; val >>= 8, ++i) {
+        uint8_t wmask = d->config_regs[addr].wmask;
+        uint8_t shift = d->config_regs[addr].shift;
+        pci_config_written_t c = d->config_regs[addr].callback;
+
+        uint8_t old_val = d->config[addr];
+        uint8_t new_val = (old_val & ~wmask) | (val & wmask);
+        d->config[addr] = new_val;
+
+        if (c != NULL) {
+            if (callback[nb_written] != c) {
+                if (nb_written != 0)
+                    nb_written++;
+                callback[nb_written] = c;
+            }
+
+            old[nb_written] |= ((uint32_t)old_val) << shift;
+            written[nb_written] |= ((uint32_t)val & 0xff) << shift;
+            mask[nb_written] |= 0xff << shift;
+        }
+
+        if (++addr >= PCI_CONFIG_SPACE_SIZE)
+             break;
     }
-    if (memcmp(orig + PCI_BASE_ADDRESS_0, d->config + PCI_BASE_ADDRESS_0, 24)
-        || ((orig[PCI_COMMAND] ^ d->config[PCI_COMMAND])
-            & (PCI_COMMAND_MEMORY | PCI_COMMAND_IO)))
-        pci_update_mappings(d);
+
+    if (callback[nb_written] != NULL)
+        nb_written++;
+
+    for (i = 0; i < nb_written; i++)
+        (callback[i])(d, old[i], written[i], mask[i]);
 }
 
 void pci_data_write(void *opaque, uint32_t addr, uint32_t val, int len)
@@ -750,15 +892,6 @@ typedef struct {
     PCIBus *bus;
 } PCIBridge;
 
-static void pci_bridge_write_config(PCIDevice *d,
-                             uint32_t address, uint32_t val, int len)
-{
-    PCIBridge *s = (PCIBridge *)d;
-
-    pci_default_write_config(d, address, val, len);
-    s->bus->bus_num = d->config[PCI_SECONDARY_BUS];
-}
-
 PCIBus *pci_find_bus(int bus_num)
 {
     PCIBus *bus = first_bus;
@@ -779,12 +912,98 @@ PCIDevice *pci_find_device(int bus_num, int slot, int function)
     return bus->devices[PCI_DEVFN(slot, function)];
 }
 
+static void pci_conf_secondary_bus_changed(PCIDevice *d, uint32_t old,
+                                           uint32_t written, uint32_t mask)
+{
+    PCIBridge *s = (PCIBridge*) d;
+    s->bus->bus_num = s->dev.config[PCI_SECONDARY_BUS];
+}
+
+static void pci_conf_init_type_01_default(struct PCIConfigReg *config_regs)
+{
+    uint32_t addr;
+
+    /* Vendor ID, Device ID: read only */
+
+    pci_conf_initw(config_regs, PCI_COMMAND, pci_conf_update_mappings,
+                   PCI_COMMAND_IO |
+                   PCI_COMMAND_MEMORY |
+                   PCI_COMMAND_MASTER |
+                   PCI_COMMAND_SPECIAL |
+                   PCI_COMMAND_INVALIDATE |
+                   PCI_COMMAND_VGA_PALETTE |
+                   PCI_COMMAND_PARITY |
+                   PCI_COMMAND_WAIT |
+                   PCI_COMMAND_SERR |
+                   PCI_COMMAND_FAST_BACK |
+                   PCI_COMMAND_INTX_DISABLE);
+
+    /* nothing is emulated at this moment */
+    pci_conf_initw(config_regs, PCI_STATUS, NULL, 0);
+
+    /* revision id, class code: read only */
+
+    pci_conf_initb(config_regs, PCI_CACHE_LINE_SIZE, NULL, ~0);
+    pci_conf_initb(config_regs, PCI_LATENCY_TIMER, NULL, ~0);
+
+    /* header type: read only */
+
+    /* BIST emulation isn't implemented */
+    pci_conf_initb(config_regs, PCI_BIST, NULL, 0);
+
+    /* bar:wmask will be updated by pci_register_io_region() */
+    pci_conf_initl(config_regs, PCI_BASE_ADDRESS_0,
+                   pci_conf_update_mappings, 0);
+    pci_conf_initl(config_regs, PCI_BASE_ADDRESS_1,
+                   pci_conf_update_mappings, 0);
+
+    pci_conf_initb(config_regs, PCI_PRIMARY_BUS, NULL, ~0);
+    pci_conf_initb(config_regs, PCI_SECONDARY_BUS,
+                   pci_conf_secondary_bus_changed, ~0);
+    pci_conf_initb(config_regs, PCI_SUBORDINATE_BUS, NULL, ~0);
+    pci_conf_initb(config_regs, PCI_SEC_LATENCY_TIMER, NULL, ~0);
+    pci_conf_initb(config_regs, PCI_IO_BASE, NULL, PCI_IO_RANGE_MASK & 0xff);
+    pci_conf_initb(config_regs, PCI_IO_LIMIT, NULL, PCI_IO_RANGE_MASK & 0xff);
+
+    /* sec status isn't emulated (yet) */
+    pci_conf_initw(config_regs, PCI_SEC_STATUS, NULL, 0);
+
+    pci_conf_initw(config_regs, PCI_MEMORY_BASE, NULL,
+                   PCI_MEMORY_RANGE_MASK & 0xffff);
+    pci_conf_initw(config_regs, PCI_MEMORY_LIMIT, NULL,
+                   PCI_MEMORY_RANGE_MASK & 0xffff);
+    pci_conf_initw(config_regs, PCI_PREF_MEMORY_BASE, NULL,
+                   PCI_PREF_RANGE_MASK & 0xffff);
+    pci_conf_initw(config_regs, PCI_PREF_MEMORY_LIMIT, NULL,
+                   PCI_PREF_RANGE_MASK & 0xffff);
+    pci_conf_initl(config_regs, PCI_PREF_BASE_UPPER32, NULL, ~0);
+    pci_conf_initl(config_regs, PCI_PREF_LIMIT_UPPER32, NULL, ~0);
+
+    /* only support 64K io port */
+    pci_conf_initw(config_regs, PCI_IO_BASE_UPPER16, NULL, 0);
+    pci_conf_initw(config_regs, PCI_IO_LIMIT_UPPER16, NULL, 0);
+
+    pci_conf_initb(config_regs, PCI_CAPABILITY_LIST, NULL, 0);
+
+    /* wmask will be updated by pci_register_io_region() */
+    pci_conf_initl(config_regs, PCI_ROM_ADDRESS1, pci_conf_update_mappings, 0);
+
+    pci_conf_initb(config_regs, PCI_INTERRUPT_LINE, NULL, ~0);
+    pci_conf_initb(config_regs, PCI_INTERRUPT_PIN, NULL, 0);
+    pci_conf_initw(config_regs, PCI_BRIDGE_CONTROL, NULL, ~0);
+
+    /* device dependent part */
+    for (addr = PCI_CONFIG_HEADER_SIZE; addr < PCI_CONFIG_SPACE_SIZE; addr++)
+        pci_conf_initb(config_regs, addr, NULL, ~0);
+}
+
 PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
                         pci_map_irq_fn map_irq, const char *name)
 {
     PCIBridge *s;
-    s = (PCIBridge *)pci_register_device(bus, name, sizeof(PCIBridge),
-                                         devfn, NULL, pci_bridge_write_config);
+    s = (PCIBridge *)pci_register_device_confreg(bus, name, sizeof(PCIBridge),
+                                                 devfn, NULL, NULL,
+                                                 pci_conf_init_type_01_default);
 
     pci_config_set_vendor_id(s->dev.config, vid);
     pci_config_set_device_id(s->dev.config, did);
diff --git a/hw/pci.h b/hw/pci.h
index a629e60..6bb4d66 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -99,6 +99,14 @@ typedef struct PCIIORegion {
 #define  PCI_COMMAND_IO		0x1	/* Enable response in I/O space */
 #define  PCI_COMMAND_MEMORY	0x2	/* Enable response in Memory space */
 #define  PCI_COMMAND_MASTER	0x4	/* Enable bus master */
+#define  PCI_COMMAND_SPECIAL	0x8	/* Enable response to special cycles */
+#define  PCI_COMMAND_INVALIDATE 0x10	/* Use memory write and invalidate */
+#define  PCI_COMMAND_VGA_PALETTE 0x20	/* Enable palette snooping */
+#define  PCI_COMMAND_PARITY	0x40	/* Enable parity checking */
+#define  PCI_COMMAND_WAIT	0x80	/* Enable address/data stepping */
+#define  PCI_COMMAND_SERR	0x100	/* Enable SERR */
+#define  PCI_COMMAND_FAST_BACK	0x200	/* Enable back-to-back writes */
+#define  PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */
 #define PCI_STATUS              0x06    /* 16 bits */
 #define PCI_REVISION_ID         0x08    /* 8 bits  */
 #define PCI_CLASS_DEVICE        0x0a    /* Device class */
@@ -109,10 +117,30 @@ typedef struct PCIIORegion {
 #define  PCI_HEADER_TYPE_BRIDGE		1
 #define  PCI_HEADER_TYPE_CARDBUS	2
 #define  PCI_HEADER_TYPE_MULTI_FUNCTION 0x80
+#define PCI_BIST		0x0f	/* 8 bits */
+#define  PCI_BIST_CODE_MASK	0x0f	/* Return result */
+#define  PCI_BIST_START		0x40	/* 1 to start BIST, 2 secs or less */
+#define  PCI_BIST_CAPABLE	0x80	/* 1 if BIST capable */
 #define PCI_BASE_ADDRESS_0	0x10	/* 32 bits */
+#define PCI_BASE_ADDRESS_1	0x14	/* 32 bits [htype 0,1 only] */
+#define PCI_BASE_ADDRESS_2	0x18	/* 32 bits [htype 0 only] */
+#define PCI_BASE_ADDRESS_3	0x1c	/* 32 bits */
+#define PCI_BASE_ADDRESS_4	0x20	/* 32 bits */
+#define PCI_BASE_ADDRESS_5	0x24	/* 32 bits */
+#define  PCI_BASE_ADDRESS_SPACE		0x01	/* 0 = memory, 1 = I/O */
+#define  PCI_BASE_ADDRESS_SPACE_IO	0x01
+#define  PCI_BASE_ADDRESS_SPACE_MEMORY	0x00
+#define  PCI_BASE_ADDRESS_MEM_TYPE_MASK 0x06
+#define  PCI_BASE_ADDRESS_MEM_TYPE_32	0x00	/* 32 bit address */
+#define  PCI_BASE_ADDRESS_MEM_TYPE_1M	0x02	/* Below 1M [obsolete] */
+#define  PCI_BASE_ADDRESS_MEM_TYPE_64	0x04	/* 64 bit address */
+#define  PCI_BASE_ADDRESS_MEM_PREFETCH	0x08	/* prefetchable? */
+#define  PCI_BASE_ADDRESS_MEM_MASK	(~0x0fUL)
+#define  PCI_BASE_ADDRESS_IO_MASK	(~0x03UL)
 #define PCI_PRIMARY_BUS		0x18	/* Primary bus number */
 #define PCI_SECONDARY_BUS	0x19	/* Secondary bus number */
 #define PCI_SEC_STATUS		0x1e	/* Secondary status register, only bit 14 used */
+#define PCI_CARDBUS_CIS         0x28
 #define PCI_SUBSYSTEM_VENDOR_ID 0x2c    /* 16 bits */
 #define PCI_SUBSYSTEM_ID        0x2e    /* 16 bits */
 #define PCI_CAPABILITY_LIST	0x34	/* Offset of first capability list entry */
@@ -125,6 +153,10 @@ typedef struct PCIIORegion {
 #define PCI_SUBVENDOR_ID        0x2c    /* obsolete, use PCI_SUBSYSTEM_VENDOR_ID */
 #define PCI_SUBDEVICE_ID        0x2e    /* obsolete, use PCI_SUBSYSTEM_ID */
 
+#define PCI_ROM_ADDRESS         0x30    /* Bits 31..11 are address, 10..1 reserved */
+#define  PCI_ROM_ADDRESS_ENABLE 0x01
+#define PCI_ROM_ADDRESS_MASK    (~0x7ffUL)
+
 /* Bits in the PCI Status Register (PCI 2.3 spec) */
 #define PCI_STATUS_RESERVED1	0x007
 #define PCI_STATUS_INT_STATUS	0x008
@@ -145,17 +177,68 @@ typedef struct PCIIORegion {
 
 #define PCI_COMMAND_RESERVED_MASK_HI (PCI_COMMAND_RESERVED >> 8)
 
+/* Header type 1 (PCI-to-PCI bridges) */
+#define PCI_PRIMARY_BUS		0x18	/* Primary bus number */
+#define PCI_SECONDARY_BUS	0x19	/* Secondary bus number */
+#define PCI_SUBORDINATE_BUS	0x1a	/* Highest bus number behind the bridge */
+#define PCI_SEC_LATENCY_TIMER	0x1b	/* Latency timer for secondary interface */
+#define PCI_IO_BASE		0x1c	/* I/O range behind the bridge */
+#define PCI_IO_LIMIT		0x1d
+#define  PCI_IO_RANGE_TYPE_MASK	0x0fUL	/* I/O bridging type */
+#define  PCI_IO_RANGE_TYPE_16	0x00
+#define  PCI_IO_RANGE_TYPE_32	0x01
+#define  PCI_IO_RANGE_MASK	(~0x0fUL)
+#define PCI_SEC_STATUS		0x1e	/* Secondary status register, only bit 14 used */
+#define PCI_MEMORY_BASE		0x20	/* Memory range behind */
+#define PCI_MEMORY_LIMIT	0x22
+#define  PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL
+#define  PCI_MEMORY_RANGE_MASK	(~0x0fUL)
+#define PCI_PREF_MEMORY_BASE	0x24	/* Prefetchable memory range behind */
+#define PCI_PREF_MEMORY_LIMIT	0x26
+#define  PCI_PREF_RANGE_TYPE_MASK 0x0fUL
+#define  PCI_PREF_RANGE_TYPE_32	0x00
+#define  PCI_PREF_RANGE_TYPE_64	0x01
+#define  PCI_PREF_RANGE_MASK	(~0x0fUL)
+#define PCI_PREF_BASE_UPPER32	0x28	/* Upper half of prefetchable memory range */
+#define PCI_PREF_LIMIT_UPPER32	0x2c
+#define PCI_IO_BASE_UPPER16	0x30	/* Upper half of I/O addresses */
+#define PCI_IO_LIMIT_UPPER16	0x32
+/* 0x34 same as for htype 0 */
+/* 0x35-0x3b is reserved */
+#define PCI_ROM_ADDRESS1	0x38	/* Same as PCI_ROM_ADDRESS, but for htype 1 */
+/* 0x3c-0x3d are same as for htype 0 */
+#define PCI_BRIDGE_CONTROL	0x3e
+#define  PCI_BRIDGE_CTL_PARITY	0x01	/* Enable parity detection on secondary interface */
+#define  PCI_BRIDGE_CTL_SERR	0x02	/* The same for SERR forwarding */
+#define  PCI_BRIDGE_CTL_ISA	0x04	/* Enable ISA mode */
+#define  PCI_BRIDGE_CTL_VGA	0x08	/* Forward VGA addresses */
+#define  PCI_BRIDGE_CTL_MASTER_ABORT	0x20  /* Report master aborts */
+#define  PCI_BRIDGE_CTL_BUS_RESET	0x40	/* Secondary bus reset */
+#define  PCI_BRIDGE_CTL_FAST_BACK	0x80	/* Fast Back2Back enabled on secondary interface */
+
+/* Bits in the PCI Command Register (PCI 2.3 spec) */
+#define PCI_COMMAND_RESERVED_BRIDGE	0xf880
+
+#define PCI_COMMAND_RESERVED_MASK_HI_BRIDGE (PCI_COMMAND_RESERVED >> 8)
+
 /* Size of the standard PCI config header */
-#define PCI_CONFIG_HEADER_SIZE (PCI_INTERRUPT_PIN + 1)
+#define PCI_CONFIG_HEADER_SIZE  0x40
 /* Size of the standard PCI config space */
-#define PCI_CONFIG_SPACE_SIZE 0x100
+#define PCI_CONFIG_SPACE_SIZE   0x100
+
+typedef void (*pci_config_written_t)(struct PCIDevice *d,
+                                     uint32_t old, uint32_t written,
+                                     uint32_t mask);
+struct PCIConfigReg {
+    uint8_t wmask;
+    uint8_t shift;
+    pci_config_written_t callback;
+};
 
 struct PCIDevice {
     /* PCI config space */
     uint8_t config[PCI_CONFIG_SPACE_SIZE];
-
-    /* Used to implement R/W bytes */
-    uint8_t mask[PCI_CONFIG_SPACE_SIZE];
+    struct PCIConfigReg config_regs[PCI_CONFIG_SPACE_SIZE];
 
     /* the following fields are read only */
     PCIBus *bus;
@@ -177,6 +260,13 @@ struct PCIDevice {
     int irq_state[4];
 };
 
+typedef void(*pci_conf_init_t)(struct PCIConfigReg*);
+
+PCIDevice *pci_register_device_confreg(PCIBus *bus, const char *name,
+				       int instance_size, int devfn,
+				       PCIConfigReadFunc *config_read,
+				       PCIConfigWriteFunc *config_write,
+                                       pci_conf_init_t conf_init);
 PCIDevice *pci_register_device(PCIBus *bus, const char *name,
                                int instance_size, int devfn,
                                PCIConfigReadFunc *config_read,



-- 
yamahata

^ permalink raw reply related	[flat|nested] 37+ messages in thread

end of thread, other threads:[~2009-06-11  8:32 UTC | newest]

Thread overview: 37+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <cover.1243253205.git.mst@redhat.com>
2009-05-25 12:12 ` [Qemu-devel] [PATCHv2] qemu: make default_write_config use mask table Michael S. Tsirkin
2009-05-25 12:24 ` [Qemu-devel] [PATCH 01/11] " Michael S. Tsirkin
2009-05-25 12:25 ` [Qemu-devel] [PATCH 02/11] qemu: capability bits in pci save/restore Michael S. Tsirkin
2009-05-25 12:25 ` [Qemu-devel] [PATCH 03/11] qemu: add routines to manage PCI capabilities Michael S. Tsirkin
2009-05-26  8:49   ` Isaku Yamahata
2009-05-26  9:22     ` Michael S. Tsirkin
2009-05-26  9:30   ` [Qemu-devel] [PATCH] qemu: fix pci_find_capability for multiple caps Michael S. Tsirkin
2009-05-25 12:25 ` [Qemu-devel] [PATCH 04/11] qemu: helper routines for pci access Michael S. Tsirkin
2009-05-26  2:33   ` Isaku Yamahata
2009-05-26  6:41     ` Michael S. Tsirkin
2009-05-26  8:07       ` Avi Kivity
2009-05-26  8:49         ` Michael S. Tsirkin
2009-05-25 12:25 ` [Qemu-devel] [PATCH 05/11] qemu: MSI-X support functions Michael S. Tsirkin
2009-06-09 23:19   ` Paul Brook
2009-06-10  9:46     ` Michael S. Tsirkin
2009-06-10 14:07       ` Paul Brook
2009-06-10 14:25         ` Michael S. Tsirkin
2009-06-10 14:39           ` Paul Brook
2009-06-10 14:47             ` Michael S. Tsirkin
2009-06-10 15:15               ` Paul Brook
2009-06-10 15:52                 ` Michael S. Tsirkin
2009-06-10 16:08                   ` Paul Brook
2009-06-10 16:26                     ` Michael S. Tsirkin
2009-06-10 16:46                       ` Paul Brook
2009-06-10 17:03                         ` Michael S. Tsirkin
2009-06-10 17:30                           ` Paul Brook
2009-06-10 18:07                             ` Michael S. Tsirkin
2009-06-10 19:04                               ` Paul Brook
2009-06-11  8:29                                 ` Michael S. Tsirkin
2009-05-25 12:25 ` [Qemu-devel] [PATCH 06/11] qemu: add flag to disable MSI-X by default Michael S. Tsirkin
2009-05-25 12:25 ` [Qemu-devel] [PATCH 07/11] qemu: minimal MSI/MSI-X implementation for PC Michael S. Tsirkin
2009-05-25 12:26 ` [Qemu-devel] [PATCH 08/11] qemu: add support for resizing regions Michael S. Tsirkin
2009-05-25 12:26 ` [Qemu-devel] [PATCH 09/11] qemu: virtio support for many interrupt vectors Michael S. Tsirkin
2009-05-25 12:26 ` [Qemu-devel] [PATCH 10/11] qemu: MSI-X support in virtio PCI Michael S. Tsirkin
2009-05-25 12:26 ` [Qemu-devel] [PATCH 11/11] qemu: request 3 vectors in virtio-net Michael S. Tsirkin
2009-05-11 10:01 [Qemu-devel] [PATCHv2] qemu: make default_write_config use mask table Michael S. Tsirkin
2009-05-11 10:21 ` Isaku Yamahata

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).