All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/5][v2] Userspace for MSI support of KVM
@ 2008-11-24 11:50 Sheng Yang
  2008-11-24 11:50 ` [PATCH 1/5] kvm: Replace force type convert with container_of() Sheng Yang
                   ` (5 more replies)
  0 siblings, 6 replies; 9+ messages in thread
From: Sheng Yang @ 2008-11-24 11:50 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, kvm, qemu-devel

Hi Avi & Anthony

Here is the userspace for MSI support of KVM.

Main change from v1:
Make device assignment depends on libpci.
Move capability framework to pci.c (this patch may can be accepted by QEmu).

Thanks!
--
regards
Yang, Sheng

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/5] kvm: Replace force type convert with container_of()
  2008-11-24 11:50 [PATCH 0/5][v2] Userspace for MSI support of KVM Sheng Yang
@ 2008-11-24 11:50 ` Sheng Yang
  2008-11-24 11:50 ` [PATCH 2/5] Make device assignment depend on libpci Sheng Yang
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Sheng Yang @ 2008-11-24 11:50 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, kvm, qemu-devel, Sheng Yang


Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 qemu/hw/device-assignment.c |   20 ++++++++++++--------
 1 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
index 9a790c6..786b2f0 100644
--- a/qemu/hw/device-assignment.c
+++ b/qemu/hw/device-assignment.c
@@ -144,7 +144,7 @@ static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t addr)
 static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
                                    uint32_t e_phys, uint32_t e_size, int type)
 {
-    AssignedDevice *r_dev = (AssignedDevice *) pci_dev;
+    AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev);
     AssignedDevRegion *region = &r_dev->v_addrs[region_num];
     uint32_t old_ephys = region->e_physbase;
     uint32_t old_esize = region->e_size;
@@ -172,7 +172,7 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
 static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
                                     uint32_t addr, uint32_t size, int type)
 {
-    AssignedDevice *r_dev = (AssignedDevice *) pci_dev;
+    AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev);
     AssignedDevRegion *region = &r_dev->v_addrs[region_num];
     int first_map = (region->e_size == 0);
     CPUState *env;
@@ -221,6 +221,7 @@ static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
 {
     int fd;
     ssize_t ret;
+    AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev);
 
     DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
           ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
@@ -242,7 +243,7 @@ static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
           ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
           (uint16_t) address, val, len);
 
-    fd = ((AssignedDevice *)d)->real_device.config_fd;
+    fd = pci_dev->real_device.config_fd;
 
 again:
     ret = pwrite(fd, &val, len, address);
@@ -263,6 +264,7 @@ static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address,
     uint32_t val = 0;
     int fd;
     ssize_t ret;
+    AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev);
 
     if ((address >= 0x10 && address <= 0x24) || address == 0x34 ||
         address == 0x3c || address == 0x3d) {
@@ -276,7 +278,7 @@ static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address,
     if (address == 0xFC)
         goto do_log;
 
-    fd = ((AssignedDevice *)d)->real_device.config_fd;
+    fd = pci_dev->real_device.config_fd;
 
 again:
     ret = pread(fd, &val, len, address);
@@ -489,16 +491,18 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
 {
     int r;
     AssignedDevice *dev;
+    PCIDevice *pci_dev;
     uint8_t e_device, e_intx;
     struct kvm_assigned_pci_dev assigned_dev_data;
 
     DEBUG("Registering real physical device %s (bus=%x dev=%x func=%x)\n",
           adev->name, adev->bus, adev->dev, adev->func);
 
-    dev = (AssignedDevice *)
-        pci_register_device(bus, adev->name, sizeof(AssignedDevice),
-                            -1, assigned_dev_pci_read_config,
-                            assigned_dev_pci_write_config);
+    pci_dev = pci_register_device(bus, adev->name,
+              sizeof(AssignedDevice), -1, assigned_dev_pci_read_config,
+              assigned_dev_pci_write_config);
+    dev = container_of(pci_dev, AssignedDevice, dev);
+
     if (NULL == dev) {
         fprintf(stderr, "%s: Error: Couldn't register real device %s\n",
                 __func__, adev->name);
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/5] Make device assignment depend on libpci
  2008-11-24 11:50 [PATCH 0/5][v2] Userspace for MSI support of KVM Sheng Yang
  2008-11-24 11:50 ` [PATCH 1/5] kvm: Replace force type convert with container_of() Sheng Yang
@ 2008-11-24 11:50 ` Sheng Yang
  2008-11-24 11:50 ` [PATCH 3/5] Figure out device capability Sheng Yang
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Sheng Yang @ 2008-11-24 11:50 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, kvm, qemu-devel, Sheng Yang

Which is used later for capability detection.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 qemu/Makefile.target |    1 +
 qemu/configure       |   20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/qemu/Makefile.target b/qemu/Makefile.target
index 05ace8e..59653ba 100644
--- a/qemu/Makefile.target
+++ b/qemu/Makefile.target
@@ -735,6 +735,7 @@ OBJS += device-hotplug.o
 
 ifeq ($(USE_KVM_DEVICE_ASSIGNMENT), 1)
 OBJS+= device-assignment.o
+LIBS+=-lpci
 endif
 
 ifeq ($(TARGET_BASE_ARCH), i386)
diff --git a/qemu/configure b/qemu/configure
index 18ef980..bdde5ed 100755
--- a/qemu/configure
+++ b/qemu/configure
@@ -808,6 +808,26 @@ EOF
     fi
 fi
 
+# libpci probe for kvm_cap_device_assignment
+if test $kvm_cap_device_assignment = "yes" ; then
+cat > $TMPC << EOF
+#include <pci/pci.h>
+#ifndef PCI_VENDOR_ID
+#error NO LIBPCI
+#endif
+int main(void) { return 0; }
+EOF
+    if $cc $ARCH_CFLAGS -o $TMPE ${OS_CFLAGS} $TMPC 2>/dev/null ; then
+        :
+    else
+        echo
+        echo "Error: libpci check failed"
+        echo "Disable KVM Device Assignment capability."
+        echo
+        kvm_cap_device_assignment="no"
+    fi
+fi
+
 ##########################################
 # zlib check
 
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/5] Figure out device capability
  2008-11-24 11:50 [PATCH 0/5][v2] Userspace for MSI support of KVM Sheng Yang
  2008-11-24 11:50 ` [PATCH 1/5] kvm: Replace force type convert with container_of() Sheng Yang
  2008-11-24 11:50 ` [PATCH 2/5] Make device assignment depend on libpci Sheng Yang
@ 2008-11-24 11:50 ` Sheng Yang
  2008-11-25  6:06   ` Sheng Yang
  2008-11-24 11:50 ` [PATCH 4/5] Support for " Sheng Yang
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 9+ messages in thread
From: Sheng Yang @ 2008-11-24 11:50 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, kvm, qemu-devel, Sheng Yang, Allen Kay

Try to figure out device capability in update_dev_cap(). Now we are only care
about MSI capability.

The function pci_find_cap_offset original function wrote by Allen for Xen.
Notice the function need root privilege to work. This depends on libpci to work.

Signed-off-by: Allen Kay <allen.m.kay@intel.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 qemu/hw/device-assignment.c |   50 +++++++++++++++++++++++++++++++++++++++++++
 qemu/hw/device-assignment.h |    5 ++++
 2 files changed, 55 insertions(+), 0 deletions(-)

diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
index 786b2f0..d3105bc 100644
--- a/qemu/hw/device-assignment.c
+++ b/qemu/hw/device-assignment.c
@@ -216,6 +216,35 @@ static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
                           (r_dev->v_addrs + region_num));
 }
 
+uint8_t pci_find_cap_offset(struct pci_dev *pci_dev, uint8_t cap)
+{
+    int id;
+    int max_cap = 48;
+    int pos = PCI_CAPABILITY_LIST;
+    int status;
+
+    status = pci_read_byte(pci_dev, PCI_STATUS);
+    if ((status & PCI_STATUS_CAP_LIST) == 0)
+        return 0;
+
+    while (max_cap--) {
+        pos = pci_read_byte(pci_dev, pos);
+        if (pos < 0x40)
+            break;
+
+        pos &= ~3;
+        id = pci_read_byte(pci_dev, pos + PCI_CAP_LIST_ID);
+
+        if (id == 0xff)
+            break;
+        if (id == cap)
+            return pos;
+
+        pos += PCI_CAP_LIST_NEXT;
+    }
+    return 0;
+}
+
 static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
                                           uint32_t val, int len)
 {
@@ -367,6 +396,25 @@ static int assigned_dev_register_regions(PCIRegion *io_regions,
     return 0;
 }
 
+static void update_dev_cap(AssignedDevice *pci_dev, uint8_t r_bus,
+                           uint8_t r_dev, uint8_t r_func)
+{
+#ifdef KVM_CAP_DEVICE_MSI
+    struct pci_access *pacc;
+    struct pci_dev *pdev;
+    int r;
+
+    pacc = pci_alloc();
+    pci_init(pacc);
+    pdev = pci_get_dev(pacc, 0, r_bus, r_dev, r_func);
+    pci_cleanup(pacc);
+    r = pci_find_cap_offset(pdev, PCI_CAP_ID_MSI);
+    if (r)
+        pci_dev->cap.available |= ASSIGNED_DEVICE_CAP_MSI;
+    pci_free_dev(pdev);
+#endif
+}
+
 static int get_real_device(AssignedDevice *pci_dev, uint8_t r_bus,
                            uint8_t r_dev, uint8_t r_func)
 {
@@ -436,6 +484,8 @@ again:
     fclose(f);
 
     dev->region_number = r;
+
+    update_dev_cap(pci_dev, r_bus, r_dev, r_func);
     return 0;
 }
 
diff --git a/qemu/hw/device-assignment.h b/qemu/hw/device-assignment.h
index d6caa67..de60988 100644
--- a/qemu/hw/device-assignment.h
+++ b/qemu/hw/device-assignment.h
@@ -29,6 +29,7 @@
 #define __DEVICE_ASSIGNMENT_H__
 
 #include <sys/mman.h>
+#include <pci/pci.h>
 #include "qemu-common.h"
 #include "sys-queue.h"
 #include "pci.h"
@@ -80,6 +81,10 @@ typedef struct {
     unsigned char h_busnr;
     unsigned int h_devfn;
     int bound;
+    struct {
+#define ASSIGNED_DEVICE_CAP_MSI (1 << 0)
+        int available;
+    } cap;
 } AssignedDevice;
 
 typedef struct AssignedDevInfo AssignedDevInfo;
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 4/5] Support for device capability
  2008-11-24 11:50 [PATCH 0/5][v2] Userspace for MSI support of KVM Sheng Yang
                   ` (2 preceding siblings ...)
  2008-11-24 11:50 ` [PATCH 3/5] Figure out device capability Sheng Yang
@ 2008-11-24 11:50 ` Sheng Yang
  2008-11-24 11:50 ` [PATCH 5/5] kvm: expose MSI capability to guest Sheng Yang
  2008-11-27  2:12 ` [PATCH 0/5][v2] Userspace for MSI support of KVM Sheng Yang
  5 siblings, 0 replies; 9+ messages in thread
From: Sheng Yang @ 2008-11-24 11:50 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, kvm, qemu-devel, Sheng Yang

This framework can be easily extended to support device capability, like
MSI/MSI-x.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 qemu/hw/pci.c |   85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 qemu/hw/pci.h |   30 ++++++++++++++++++++
 2 files changed, 115 insertions(+), 0 deletions(-)

diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c
index 75bc9a9..73f73da 100644
--- a/qemu/hw/pci.c
+++ b/qemu/hw/pci.c
@@ -339,11 +339,65 @@ static void pci_update_mappings(PCIDevice *d)
     }
 }
 
+int pci_access_cap_config(PCIDevice *pci_dev, uint32_t address, int len)
+{
+    if (pci_dev->cap.supported && address >= pci_dev->cap.start &&
+            (address + len) < pci_dev->cap.start + pci_dev->cap.length)
+        return 1;
+    return 0;
+}
+
+uint32_t pci_default_cap_read_config(PCIDevice *pci_dev,
+                                     uint32_t address, int len)
+{
+    uint32_t val = 0;
+
+    if (pci_access_cap_config(pci_dev, address, len)) {
+        switch(len) {
+        default:
+        case 4:
+            if (address < pci_dev->cap.start + pci_dev->cap.length - 4) {
+                val = le32_to_cpu(*(uint32_t *)(pci_dev->cap.config
+                            + address - pci_dev->cap.start));
+                break;
+            }
+            /* fall through */
+        case 2:
+            if (address < pci_dev->cap.start + pci_dev->cap.length - 2) {
+                val = le16_to_cpu(*(uint16_t *)(pci_dev->cap.config
+                            + address - pci_dev->cap.start));
+                break;
+            }
+            /* fall through */
+        case 1:
+            val = pci_dev->cap.config[address - pci_dev->cap.start];
+            break;
+        }
+    }
+    return val;
+}
+
+void pci_default_cap_write_config(PCIDevice *pci_dev,
+                                  uint32_t address, uint32_t val, int len)
+{
+    if (pci_access_cap_config(pci_dev, address, len)) {
+        int i;
+        for (i = 0; i < len; i++) {
+            pci_dev->cap.config[address + i - pci_dev->cap.start] = val;
+            val >>= 8;
+        }
+        return;
+    }
+}
+
 uint32_t pci_default_read_config(PCIDevice *d,
                                  uint32_t address, int len)
 {
     uint32_t val;
 
+    if (pci_access_cap_config(d, address, len))
+        return d->cap.config_read(d, address, len);
+
     switch(len) {
     default:
     case 4:
@@ -397,6 +451,11 @@ void pci_default_write_config(PCIDevice *d,
         return;
     }
  default_config:
+    if (pci_access_cap_config(d, address, len)) {
+        d->cap.config_write(d, address, val, len);
+        return;
+    }
+
     /* not efficient, but simple */
     addr = address;
     for(i = 0; i < len; i++) {
@@ -802,3 +861,29 @@ PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint32_t id,
     s->bus = pci_register_secondary_bus(&s->dev, map_irq);
     return s->bus;
 }
+
+void pci_enable_capability_support(PCIDevice *pci_dev,
+                                   uint32_t config_start,
+                                   PCICapConfigReadFunc *config_read,
+                                   PCICapConfigWriteFunc *config_write,
+                                   PCICapConfigInitFunc *config_init)
+{
+    if (!pci_dev)
+        return;
+
+    if (config_start >= 0x40 && config_start < 0xff)
+        pci_dev->cap.start = config_start;
+    else
+        pci_dev->cap.start = PCI_CAPABILITY_CONFIG_DEFAULT_START_ADDR;
+    if (config_read)
+        pci_dev->cap.config_read = config_read;
+    else
+        pci_dev->cap.config_read = pci_default_cap_read_config;
+    if (config_write)
+        pci_dev->cap.config_write = config_write;
+    else
+        pci_dev->cap.config_write = pci_default_cap_write_config;
+    pci_dev->cap.supported = 1;
+    pci_dev->config[0x34] = pci_dev->cap.start;
+    config_init(pci_dev);
+}
diff --git a/qemu/hw/pci.h b/qemu/hw/pci.h
index e11fbbf..86b4ae5 100644
--- a/qemu/hw/pci.h
+++ b/qemu/hw/pci.h
@@ -19,6 +19,12 @@ typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num,
                                 uint32_t addr, uint32_t size, int type);
 typedef int PCIUnregisterFunc(PCIDevice *pci_dev);
 
+typedef void PCICapConfigWriteFunc(PCIDevice *pci_dev,
+                                   uint32_t address, uint32_t val, int len);
+typedef uint32_t PCICapConfigReadFunc(PCIDevice *pci_dev,
+                                      uint32_t address, int len);
+typedef void PCICapConfigInitFunc(PCIDevice *pci_dev);
+
 #define PCI_ADDRESS_SPACE_MEM		0x00
 #define PCI_ADDRESS_SPACE_IO		0x01
 #define PCI_ADDRESS_SPACE_MEM_PREFETCH	0x08
@@ -46,6 +52,10 @@ typedef struct PCIIORegion {
 #define PCI_MIN_GNT		0x3e	/* 8 bits */
 #define PCI_MAX_LAT		0x3f	/* 8 bits */
 
+#define PCI_CAPABILITY_CONFIG_MAX_LENGTH 0x60
+#define PCI_CAPABILITY_CONFIG_DEFAULT_START_ADDR 0x40
+#define PCI_CAPABILITY_CONFIG_MSI_LENGTH 0x10
+
 struct PCIDevice {
     /* PCI config space */
     uint8_t config[256];
@@ -68,6 +78,15 @@ struct PCIDevice {
 
     /* Current IRQ levels.  Used internally by the generic PCI code.  */
     int irq_state[4];
+
+    /* Device capability configuration space */
+    struct {
+        int supported;
+        uint8_t config[PCI_CAPABILITY_CONFIG_MAX_LENGTH];
+        unsigned int start, length;
+        PCICapConfigReadFunc *config_read;
+        PCICapConfigWriteFunc *config_write;
+    } cap;
 };
 
 PCIDevice *pci_register_device(PCIBus *bus, const char *name,
@@ -81,6 +100,12 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num,
                             uint32_t size, int type,
                             PCIMapIORegionFunc *map_func);
 
+void pci_enable_capability_support(PCIDevice *pci_dev,
+                                   uint32_t config_start,
+                                   PCICapConfigReadFunc *config_read,
+                                   PCICapConfigWriteFunc *config_write,
+                                   PCICapConfigInitFunc *config_init);
+
 int pci_map_irq(PCIDevice *pci_dev, int pin);
 uint32_t pci_default_read_config(PCIDevice *d,
                                  uint32_t address, int len);
@@ -88,6 +113,11 @@ void pci_default_write_config(PCIDevice *d,
                               uint32_t address, uint32_t val, int len);
 void pci_device_save(PCIDevice *s, QEMUFile *f);
 int pci_device_load(PCIDevice *s, QEMUFile *f);
+uint32_t pci_default_cap_read_config(PCIDevice *pci_dev,
+                                     uint32_t address, int len);
+void pci_default_cap_write_config(PCIDevice *pci_dev,
+                                  uint32_t address, uint32_t val, int len);
+int pci_access_cap_config(PCIDevice *pci_dev, uint32_t address, int len);
 
 typedef void (*pci_set_irq_fn)(qemu_irq *pic, int irq_num, int level);
 typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num);
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 5/5] kvm: expose MSI capability to guest
  2008-11-24 11:50 [PATCH 0/5][v2] Userspace for MSI support of KVM Sheng Yang
                   ` (3 preceding siblings ...)
  2008-11-24 11:50 ` [PATCH 4/5] Support for " Sheng Yang
@ 2008-11-24 11:50 ` Sheng Yang
  2008-11-27  3:51   ` Sheng Yang
  2008-11-27  2:12 ` [PATCH 0/5][v2] Userspace for MSI support of KVM Sheng Yang
  5 siblings, 1 reply; 9+ messages in thread
From: Sheng Yang @ 2008-11-24 11:50 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Anthony Liguori, kvm, qemu-devel, Sheng Yang


Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 qemu/hw/device-assignment.c |   90 +++++++++++++++++++++++++++++++++++++++---
 qemu/hw/device-assignment.h |    2 +
 2 files changed, 85 insertions(+), 7 deletions(-)

diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
index d3105bc..67bd6b3 100644
--- a/qemu/hw/device-assignment.c
+++ b/qemu/hw/device-assignment.c
@@ -262,7 +262,8 @@ static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
     }
 
     if ((address >= 0x10 && address <= 0x24) || address == 0x34 ||
-        address == 0x3c || address == 0x3d) {
+        address == 0x3c || address == 0x3d ||
+        pci_access_cap_config(d, address, len)) {
         /* used for update-mappings (BAR emulation) */
         pci_default_write_config(d, address, val, len);
         return;
@@ -296,7 +297,8 @@ static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address,
     AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev);
 
     if ((address >= 0x10 && address <= 0x24) || address == 0x34 ||
-        address == 0x3c || address == 0x3d) {
+        address == 0x3c || address == 0x3d ||
+        pci_access_cap_config(d, address, len)) {
         val = pci_default_read_config(d, address, len);
         DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
               (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
@@ -325,11 +327,13 @@ do_log:
     DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
           (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
 
-    /* kill the special capabilities */
-    if (address == 4 && len == 4)
-        val &= ~0x100000;
-    else if (address == 6)
-        val &= ~0x10;
+    if (!pci_dev->cap.available) {
+        /* kill the special capabilities */
+        if (address == 4 && len == 4)
+            val &= ~0x100000;
+        else if (address == 6)
+            val &= ~0x10;
+    }
 
     return val;
 }
@@ -537,6 +541,73 @@ void assigned_dev_update_irq(PCIDevice *d)
     }
 }
 
+#ifdef KVM_CAP_DEVICE_MSI
+static void assigned_dev_enable_msi(PCIDevice *pci_dev)
+{
+    int r;
+    struct kvm_assigned_irq assigned_irq_data;
+    AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
+
+    memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
+    assigned_irq_data.assigned_dev_id  =
+            calc_assigned_dev_id(assigned_dev->h_busnr,
+                    (uint8_t)assigned_dev->h_devfn);
+    assigned_irq_data.guest_msi.addr_lo = *(uint32_t *)
+            (pci_dev->cap.config + 4);
+    assigned_irq_data.guest_msi.data = *(uint16_t *)
+            (pci_dev->cap.config + 8);
+    assigned_irq_data.flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
+    r = kvm_assign_irq(kvm_context, &assigned_irq_data);
+    if (r < 0) {
+        perror("assigned_dev_enable_msi");
+        assigned_dev->cap.enabled &= ~ASSIGNED_DEVICE_MSI_ENABLED;
+        /* Fail to enable MSI, enable INTx instead */
+        assigned_dev_update_irq(pci_dev);
+    }
+}
+#endif
+
+void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address,
+                                          uint32_t val, int len)
+{
+    AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
+    uint32_t pos = pci_dev->cap.start;
+    uint8_t target_byte, target_position;
+
+    pci_default_cap_write_config(pci_dev, address, val, len);
+#ifdef KVM_CAP_DEVICE_MSI
+    /* Check if guest want to enable MSI */
+    if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
+        target_position = pos + 2;
+        if (address <= target_position && address + len > target_position) {
+            target_byte = (uint8_t)(val >> (target_position - address));
+            if (target_byte == 1) {
+                assigned_dev->cap.enabled |= ASSIGNED_DEVICE_MSI_ENABLED;
+                assigned_dev_enable_msi(pci_dev);
+                if (!assigned_dev->cap.enabled & ASSIGNED_DEVICE_MSI_ENABLED)
+                    pci_dev->cap.config[target_position - pos] = 0;
+            }
+        }
+        pos += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
+    }
+#endif
+    return;
+}
+
+void assigned_device_pci_cap_init(PCIDevice *pci_dev)
+{
+    AssignedDevice *dev = container_of(pci_dev, AssignedDevice, dev);
+
+#ifdef KVM_CAP_DEVICE_MSI
+    /* Expose MSI capability
+     * MSI capability is the 1st capability in cap.config */
+    if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
+        pci_dev->cap.config[0] = 0x5;
+        pci_dev->cap.length += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
+    }
+#endif
+}
+
 struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
 {
     int r;
@@ -580,6 +651,11 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
     dev->h_busnr = adev->bus;
     dev->h_devfn = PCI_DEVFN(adev->dev, adev->func);
 
+    if (dev->cap.available)
+        pci_enable_capability_support(pci_dev, 0, NULL,
+                                      assigned_device_pci_cap_write_config,
+                                      assigned_device_pci_cap_init);
+
     memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
     assigned_dev_data.assigned_dev_id  =
 	calc_assigned_dev_id(dev->h_busnr, (uint32_t)dev->h_devfn);
diff --git a/qemu/hw/device-assignment.h b/qemu/hw/device-assignment.h
index de60988..da8e7e1 100644
--- a/qemu/hw/device-assignment.h
+++ b/qemu/hw/device-assignment.h
@@ -84,6 +84,8 @@ typedef struct {
     struct {
 #define ASSIGNED_DEVICE_CAP_MSI (1 << 0)
         int available;
+#define ASSIGNED_DEVICE_MSI_ENABLED (1 << 0)
+        int enabled;
     } cap;
 } AssignedDevice;
 
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/5] Figure out device capability
  2008-11-24 11:50 ` [PATCH 3/5] Figure out device capability Sheng Yang
@ 2008-11-25  6:06   ` Sheng Yang
  0 siblings, 0 replies; 9+ messages in thread
From: Sheng Yang @ 2008-11-25  6:06 UTC (permalink / raw)
  To: Avi Kivity, Anthony Liguori; +Cc: kvm, Sheng Yang, Allen Kay

Try to figure out device capability in update_dev_cap(). Now we are only care
about MSI capability.

The function pci_find_cap_offset original function wrote by Allen for Xen.
Notice the function need root privilege to work. This depends on libpci to work.

(Update: Make update_dev_cap() more generic.)

Signed-off-by: Allen Kay <allen.m.kay@intel.com>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 qemu/hw/device-assignment.c |   50 +++++++++++++++++++++++++++++++++++++++++++
 qemu/hw/device-assignment.h |    5 ++++
 2 files changed, 55 insertions(+), 0 deletions(-)

diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
index 786b2f0..f79cc67 100644
--- a/qemu/hw/device-assignment.c
+++ b/qemu/hw/device-assignment.c
@@ -216,6 +216,35 @@ static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
                           (r_dev->v_addrs + region_num));
 }
 
+uint8_t pci_find_cap_offset(struct pci_dev *pci_dev, uint8_t cap)
+{
+    int id;
+    int max_cap = 48;
+    int pos = PCI_CAPABILITY_LIST;
+    int status;
+
+    status = pci_read_byte(pci_dev, PCI_STATUS);
+    if ((status & PCI_STATUS_CAP_LIST) == 0)
+        return 0;
+
+    while (max_cap--) {
+        pos = pci_read_byte(pci_dev, pos);
+        if (pos < 0x40)
+            break;
+
+        pos &= ~3;
+        id = pci_read_byte(pci_dev, pos + PCI_CAP_LIST_ID);
+
+        if (id == 0xff)
+            break;
+        if (id == cap)
+            return pos;
+
+        pos += PCI_CAP_LIST_NEXT;
+    }
+    return 0;
+}
+
 static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
                                           uint32_t val, int len)
 {
@@ -367,6 +396,25 @@ static int assigned_dev_register_regions(PCIRegion *io_regions,
     return 0;
 }
 
+static void update_dev_cap(AssignedDevice *pci_dev, uint8_t r_bus,
+                           uint8_t r_dev, uint8_t r_func)
+{
+    struct pci_access *pacc;
+    struct pci_dev *pdev;
+    int r;
+
+    pacc = pci_alloc();
+    pci_init(pacc);
+    pdev = pci_get_dev(pacc, 0, r_bus, r_dev, r_func);
+    pci_cleanup(pacc);
+#ifdef KVM_CAP_DEVICE_MSI
+    r = pci_find_cap_offset(pdev, PCI_CAP_ID_MSI);
+    if (r)
+        pci_dev->cap.available |= ASSIGNED_DEVICE_CAP_MSI;
+#endif
+    pci_free_dev(pdev);
+}
+
 static int get_real_device(AssignedDevice *pci_dev, uint8_t r_bus,
                            uint8_t r_dev, uint8_t r_func)
 {
@@ -436,6 +484,8 @@ again:
     fclose(f);
 
     dev->region_number = r;
+
+    update_dev_cap(pci_dev, r_bus, r_dev, r_func);
     return 0;
 }
 
diff --git a/qemu/hw/device-assignment.h b/qemu/hw/device-assignment.h
index d6caa67..de60988 100644
--- a/qemu/hw/device-assignment.h
+++ b/qemu/hw/device-assignment.h
@@ -29,6 +29,7 @@
 #define __DEVICE_ASSIGNMENT_H__
 
 #include <sys/mman.h>
+#include <pci/pci.h>
 #include "qemu-common.h"
 #include "sys-queue.h"
 #include "pci.h"
@@ -80,6 +81,10 @@ typedef struct {
     unsigned char h_busnr;
     unsigned int h_devfn;
     int bound;
+    struct {
+#define ASSIGNED_DEVICE_CAP_MSI (1 << 0)
+        int available;
+    } cap;
 } AssignedDevice;
 
 typedef struct AssignedDevInfo AssignedDevInfo;
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/5][v2] Userspace for MSI support of KVM
  2008-11-24 11:50 [PATCH 0/5][v2] Userspace for MSI support of KVM Sheng Yang
                   ` (4 preceding siblings ...)
  2008-11-24 11:50 ` [PATCH 5/5] kvm: expose MSI capability to guest Sheng Yang
@ 2008-11-27  2:12 ` Sheng Yang
  5 siblings, 0 replies; 9+ messages in thread
From: Sheng Yang @ 2008-11-27  2:12 UTC (permalink / raw)
  To: kvm; +Cc: Avi Kivity, Anthony Liguori, qemu-devel

On Monday 24 November 2008 19:50:30 Sheng Yang wrote:
> Hi Avi & Anthony
>
> Here is the userspace for MSI support of KVM.
>
> Main change from v1:
> Make device assignment depends on libpci.
> Move capability framework to pci.c (this patch may can be accepted by
> QEmu).

Avi & Anthony

Any comments?

Can QEmu upstream accept "[PATCH 4/5] Support for device capability"?

Thanks!
-- 
regards
Yang, Sheng


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 5/5] kvm: expose MSI capability to guest
  2008-11-24 11:50 ` [PATCH 5/5] kvm: expose MSI capability to guest Sheng Yang
@ 2008-11-27  3:51   ` Sheng Yang
  0 siblings, 0 replies; 9+ messages in thread
From: Sheng Yang @ 2008-11-27  3:51 UTC (permalink / raw)
  To: kvm; +Cc: Avi Kivity, Anthony Liguori, qemu-devel

On Monday 24 November 2008 19:50:35 Sheng Yang wrote:
> Signed-off-by: Sheng Yang <sheng@linux.intel.com>

Oh, hold this one for a moment...

I don't want to deal with compatible problem of deliver msi_msg, so I would 
send out gsi->msi mapping patch and update the userspace patch.

-- 
regards
Yang, Sheng

> ---
>  qemu/hw/device-assignment.c |   90
> +++++++++++++++++++++++++++++++++++++++--- qemu/hw/device-assignment.h |   
> 2 +
>  2 files changed, 85 insertions(+), 7 deletions(-)
>
> diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
> index d3105bc..67bd6b3 100644
> --- a/qemu/hw/device-assignment.c
> +++ b/qemu/hw/device-assignment.c
> @@ -262,7 +262,8 @@ static void assigned_dev_pci_write_config(PCIDevice *d,
> uint32_t address, }
>
>      if ((address >= 0x10 && address <= 0x24) || address == 0x34 ||
> -        address == 0x3c || address == 0x3d) {
> +        address == 0x3c || address == 0x3d ||
> +        pci_access_cap_config(d, address, len)) {
>          /* used for update-mappings (BAR emulation) */
>          pci_default_write_config(d, address, val, len);
>          return;
> @@ -296,7 +297,8 @@ static uint32_t assigned_dev_pci_read_config(PCIDevice
> *d, uint32_t address, AssignedDevice *pci_dev = container_of(d,
> AssignedDevice, dev);
>
>      if ((address >= 0x10 && address <= 0x24) || address == 0x34 ||
> -        address == 0x3c || address == 0x3d) {
> +        address == 0x3c || address == 0x3d ||
> +        pci_access_cap_config(d, address, len)) {
>          val = pci_default_read_config(d, address, len);
>          DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
>                (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val,
> len); @@ -325,11 +327,13 @@ do_log:
>      DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
>            (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
>
> -    /* kill the special capabilities */
> -    if (address == 4 && len == 4)
> -        val &= ~0x100000;
> -    else if (address == 6)
> -        val &= ~0x10;
> +    if (!pci_dev->cap.available) {
> +        /* kill the special capabilities */
> +        if (address == 4 && len == 4)
> +            val &= ~0x100000;
> +        else if (address == 6)
> +            val &= ~0x10;
> +    }
>
>      return val;
>  }
> @@ -537,6 +541,73 @@ void assigned_dev_update_irq(PCIDevice *d)
>      }
>  }
>
> +#ifdef KVM_CAP_DEVICE_MSI
> +static void assigned_dev_enable_msi(PCIDevice *pci_dev)
> +{
> +    int r;
> +    struct kvm_assigned_irq assigned_irq_data;
> +    AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice,
> dev); +
> +    memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
> +    assigned_irq_data.assigned_dev_id  =
> +            calc_assigned_dev_id(assigned_dev->h_busnr,
> +                    (uint8_t)assigned_dev->h_devfn);
> +    assigned_irq_data.guest_msi.addr_lo = *(uint32_t *)
> +            (pci_dev->cap.config + 4);
> +    assigned_irq_data.guest_msi.data = *(uint16_t *)
> +            (pci_dev->cap.config + 8);
> +    assigned_irq_data.flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
> +    r = kvm_assign_irq(kvm_context, &assigned_irq_data);
> +    if (r < 0) {
> +        perror("assigned_dev_enable_msi");
> +        assigned_dev->cap.enabled &= ~ASSIGNED_DEVICE_MSI_ENABLED;
> +        /* Fail to enable MSI, enable INTx instead */
> +        assigned_dev_update_irq(pci_dev);
> +    }
> +}
> +#endif
> +
> +void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t
> address, +                                          uint32_t val, int len)
> +{
> +    AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice,
> dev); +    uint32_t pos = pci_dev->cap.start;
> +    uint8_t target_byte, target_position;
> +
> +    pci_default_cap_write_config(pci_dev, address, val, len);
> +#ifdef KVM_CAP_DEVICE_MSI
> +    /* Check if guest want to enable MSI */
> +    if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
> +        target_position = pos + 2;
> +        if (address <= target_position && address + len > target_position)
> { +            target_byte = (uint8_t)(val >> (target_position - address));
> +            if (target_byte == 1) {
> +                assigned_dev->cap.enabled |= ASSIGNED_DEVICE_MSI_ENABLED;
> +                assigned_dev_enable_msi(pci_dev);
> +                if (!assigned_dev->cap.enabled &
> ASSIGNED_DEVICE_MSI_ENABLED) +                   
> pci_dev->cap.config[target_position - pos] = 0; +            }
> +        }
> +        pos += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
> +    }
> +#endif
> +    return;
> +}
> +
> +void assigned_device_pci_cap_init(PCIDevice *pci_dev)
> +{
> +    AssignedDevice *dev = container_of(pci_dev, AssignedDevice, dev);
> +
> +#ifdef KVM_CAP_DEVICE_MSI
> +    /* Expose MSI capability
> +     * MSI capability is the 1st capability in cap.config */
> +    if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
> +        pci_dev->cap.config[0] = 0x5;
> +        pci_dev->cap.length += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
> +    }
> +#endif
> +}
> +
>  struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
>  {
>      int r;
> @@ -580,6 +651,11 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo
> *adev, PCIBus *bus) dev->h_busnr = adev->bus;
>      dev->h_devfn = PCI_DEVFN(adev->dev, adev->func);
>
> +    if (dev->cap.available)
> +        pci_enable_capability_support(pci_dev, 0, NULL,
> +                                     
> assigned_device_pci_cap_write_config, +                                    
>  assigned_device_pci_cap_init); +
>      memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
>      assigned_dev_data.assigned_dev_id  =
>  	calc_assigned_dev_id(dev->h_busnr, (uint32_t)dev->h_devfn);
> diff --git a/qemu/hw/device-assignment.h b/qemu/hw/device-assignment.h
> index de60988..da8e7e1 100644
> --- a/qemu/hw/device-assignment.h
> +++ b/qemu/hw/device-assignment.h
> @@ -84,6 +84,8 @@ typedef struct {
>      struct {
>  #define ASSIGNED_DEVICE_CAP_MSI (1 << 0)
>          int available;
> +#define ASSIGNED_DEVICE_MSI_ENABLED (1 << 0)
> +        int enabled;
>      } cap;
>  } AssignedDevice;


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2008-11-27  3:51 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-11-24 11:50 [PATCH 0/5][v2] Userspace for MSI support of KVM Sheng Yang
2008-11-24 11:50 ` [PATCH 1/5] kvm: Replace force type convert with container_of() Sheng Yang
2008-11-24 11:50 ` [PATCH 2/5] Make device assignment depend on libpci Sheng Yang
2008-11-24 11:50 ` [PATCH 3/5] Figure out device capability Sheng Yang
2008-11-25  6:06   ` Sheng Yang
2008-11-24 11:50 ` [PATCH 4/5] Support for " Sheng Yang
2008-11-24 11:50 ` [PATCH 5/5] kvm: expose MSI capability to guest Sheng Yang
2008-11-27  3:51   ` Sheng Yang
2008-11-27  2:12 ` [PATCH 0/5][v2] Userspace for MSI support of KVM Sheng Yang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.