- * [Qemu-devel] [PATCH v9 01/11] vfio: extract vfio_get_hot_reset_info as a single function
  2016-07-19  7:38 [Qemu-devel] [PATCH v9 00/11] vfio-pci: pass the aer error to guest Zhou Jie
@ 2016-07-19  7:38 ` Zhou Jie
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 02/11] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset Zhou Jie
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 25+ messages in thread
From: Zhou Jie @ 2016-07-19  7:38 UTC (permalink / raw)
  To: alex.williamson; +Cc: qemu-devel, izumi.taku, mst, fan.chen, Chen Fan
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
The function is used to get affected devices by bus reset.
So here extract it, and can used for aer soon.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 66 +++++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 18 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 44783c5..c1c7d31 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1693,6 +1693,51 @@ static void vfio_check_af_flr(VFIOPCIDevice *vdev, uint8_t pos)
     }
 }
 
+/*
+ * return negative with errno, return 0 on success.
+ * if success, the point of ret_info fill with the affected device reset info.
+ *
+ */
+static int vfio_get_hot_reset_info(VFIOPCIDevice *vdev,
+                                   struct vfio_pci_hot_reset_info **ret_info)
+{
+    struct vfio_pci_hot_reset_info *info;
+    int ret, count;
+
+    *ret_info = NULL;
+
+    info = g_malloc0(sizeof(*info));
+    info->argsz = sizeof(*info);
+
+    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
+    if (ret && errno != ENOSPC) {
+        ret = -errno;
+        goto error;
+    }
+
+    count = info->count;
+
+    info = g_realloc(info, sizeof(*info) +
+                     (count * sizeof(struct vfio_pci_dependent_device)));
+    info->argsz = sizeof(*info) +
+                  (count * sizeof(struct vfio_pci_dependent_device));
+
+    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
+    if (ret) {
+        ret = -errno;
+        error_report("vfio: hot reset info failed: %m");
+        goto error;
+    }
+
+    *ret_info = info;
+    info = NULL;
+
+    return 0;
+error:
+    g_free(info);
+    return ret;
+}
+
 static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
 {
     PCIDevice *pdev = &vdev->pdev;
@@ -1918,7 +1963,7 @@ static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
 static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
 {
     VFIOGroup *group;
-    struct vfio_pci_hot_reset_info *info;
+    struct vfio_pci_hot_reset_info *info = NULL;
     struct vfio_pci_dependent_device *devices;
     struct vfio_pci_hot_reset *reset;
     int32_t *fds;
@@ -1930,12 +1975,8 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
     vfio_pci_pre_reset(vdev);
     vdev->vbasedev.needs_reset = false;
 
-    info = g_malloc0(sizeof(*info));
-    info->argsz = sizeof(*info);
-
-    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
-    if (ret && errno != ENOSPC) {
-        ret = -errno;
+    ret = vfio_get_hot_reset_info(vdev, &info);
+    if (ret) {
         if (!vdev->has_pm_reset) {
             error_report("vfio: Cannot reset device %s, "
                          "no available reset mechanism.", vdev->vbasedev.name);
@@ -1943,18 +1984,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
         goto out_single;
     }
 
-    count = info->count;
-    info = g_realloc(info, sizeof(*info) + (count * sizeof(*devices)));
-    info->argsz = sizeof(*info) + (count * sizeof(*devices));
     devices = &info->devices[0];
-
-    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
-    if (ret) {
-        ret = -errno;
-        error_report("vfio: hot reset info failed: %m");
-        goto out_single;
-    }
-
     trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
 
     /* Verify that we have all the groups required */
-- 
1.8.3.1
^ permalink raw reply related	[flat|nested] 25+ messages in thread
- * [Qemu-devel] [PATCH v9 02/11] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset
  2016-07-19  7:38 [Qemu-devel] [PATCH v9 00/11] vfio-pci: pass the aer error to guest Zhou Jie
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 01/11] vfio: extract vfio_get_hot_reset_info as a single function Zhou Jie
@ 2016-07-19  7:38 ` Zhou Jie
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 03/11] vfio: add aer support for vfio device Zhou Jie
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 25+ messages in thread
From: Zhou Jie @ 2016-07-19  7:38 UTC (permalink / raw)
  To: alex.williamson; +Cc: qemu-devel, izumi.taku, mst, fan.chen, Chen Fan
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Squeeze out vfio_pci_do_hot_reset to do host bus reset when AER recovery.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 75 +++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 44 insertions(+), 31 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c1c7d31..6a6160b 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1738,6 +1738,48 @@ error:
     return ret;
 }
 
+static int vfio_pci_do_hot_reset(VFIOPCIDevice *vdev,
+                                 struct vfio_pci_hot_reset_info *info)
+{
+    VFIOGroup *group;
+    struct vfio_pci_hot_reset *reset;
+    int32_t *fds;
+    int ret, i, count;
+    struct vfio_pci_dependent_device *devices;
+
+    /* Determine how many group fds need to be passed */
+    count = 0;
+    devices = &info->devices[0];
+    QLIST_FOREACH(group, &vfio_group_list, next) {
+        for (i = 0; i < info->count; i++) {
+            if (group->groupid == devices[i].group_id) {
+                count++;
+                break;
+            }
+        }
+    }
+
+    reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
+    reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
+    fds = &reset->group_fds[0];
+
+    /* Fill in group fds */
+    QLIST_FOREACH(group, &vfio_group_list, next) {
+        for (i = 0; i < info->count; i++) {
+            if (group->groupid == devices[i].group_id) {
+                fds[reset->count++] = group->fd;
+                break;
+            }
+        }
+    }
+
+    /* Bus reset! */
+    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
+    g_free(reset);
+
+    return ret;
+}
+
 static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
 {
     PCIDevice *pdev = &vdev->pdev;
@@ -1965,9 +2007,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
     VFIOGroup *group;
     struct vfio_pci_hot_reset_info *info = NULL;
     struct vfio_pci_dependent_device *devices;
-    struct vfio_pci_hot_reset *reset;
-    int32_t *fds;
-    int ret, i, count;
+    int ret, i;
     bool multi = false;
 
     trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
@@ -2045,34 +2085,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
         goto out_single;
     }
 
-    /* Determine how many group fds need to be passed */
-    count = 0;
-    QLIST_FOREACH(group, &vfio_group_list, next) {
-        for (i = 0; i < info->count; i++) {
-            if (group->groupid == devices[i].group_id) {
-                count++;
-                break;
-            }
-        }
-    }
-
-    reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
-    reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
-    fds = &reset->group_fds[0];
-
-    /* Fill in group fds */
-    QLIST_FOREACH(group, &vfio_group_list, next) {
-        for (i = 0; i < info->count; i++) {
-            if (group->groupid == devices[i].group_id) {
-                fds[reset->count++] = group->fd;
-                break;
-            }
-        }
-    }
-
-    /* Bus reset! */
-    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
-    g_free(reset);
+    ret = vfio_pci_do_hot_reset(vdev, info);
 
     trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
                                     ret ? "%m" : "Success");
-- 
1.8.3.1
^ permalink raw reply related	[flat|nested] 25+ messages in thread
- * [Qemu-devel] [PATCH v9 03/11] vfio: add aer support for vfio device
  2016-07-19  7:38 [Qemu-devel] [PATCH v9 00/11] vfio-pci: pass the aer error to guest Zhou Jie
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 01/11] vfio: extract vfio_get_hot_reset_info as a single function Zhou Jie
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 02/11] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset Zhou Jie
@ 2016-07-19  7:38 ` Zhou Jie
  2016-08-31 19:44   ` Alex Williamson
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 04/11] vfio: refine function vfio_pci_host_match Zhou Jie
                   ` (7 subsequent siblings)
  10 siblings, 1 reply; 25+ messages in thread
From: Zhou Jie @ 2016-07-19  7:38 UTC (permalink / raw)
  To: alex.williamson; +Cc: qemu-devel, izumi.taku, mst, fan.chen, Chen Fan
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Calling pcie_aer_init to initilize aer related registers for
vfio device, then reload physical related registers to expose
device capability.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/vfio/pci.h |  3 +++
 2 files changed, 77 insertions(+), 1 deletion(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 6a6160b..11c895c 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1854,6 +1854,66 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
     return 0;
 }
 
+static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
+                          int pos, uint16_t size)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    PCIDevice *dev_iter;
+    uint8_t type;
+    uint32_t errcap;
+
+    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
+        pcie_add_capability(pdev, PCI_EXT_CAP_ID_ERR,
+                            cap_ver, pos, size);
+        return 0;
+    }
+
+    dev_iter = pci_bridge_get_device(pdev->bus);
+    if (!dev_iter) {
+        goto error;
+    }
+
+    while (dev_iter) {
+        if (!pci_is_express(dev_iter)) {
+            goto error;
+        }
+
+        type = pcie_cap_get_type(dev_iter);
+        if ((type != PCI_EXP_TYPE_ROOT_PORT &&
+             type != PCI_EXP_TYPE_UPSTREAM &&
+             type != PCI_EXP_TYPE_DOWNSTREAM)) {
+            goto error;
+        }
+
+        if (!dev_iter->exp.aer_cap) {
+            goto error;
+        }
+
+        dev_iter = pci_bridge_get_device(dev_iter->bus);
+    }
+
+    errcap = vfio_pci_read_config(pdev, pos + PCI_ERR_CAP, 4);
+    /*
+     * The ability to record multiple headers is depending on
+     * the state of the Multiple Header Recording Capable bit and
+     * enabled by the Multiple Header Recording Enable bit.
+     */
+    if ((errcap & PCI_ERR_CAP_MHRC) &&
+        (errcap & PCI_ERR_CAP_MHRE)) {
+        pdev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
+    } else {
+        pdev->exp.aer_log.log_max = 0;
+    }
+
+    pcie_cap_deverr_init(pdev);
+    return pcie_aer_init(pdev, pos, size);
+
+error:
+    error_report("vfio: Unable to enable AER for device %s, parent bus "
+                 "does not support AER signaling", vdev->vbasedev.name);
+    return -1;
+}
+
 static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
 {
     PCIDevice *pdev = &vdev->pdev;
@@ -1861,6 +1921,7 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
     uint16_t cap_id, next, size;
     uint8_t cap_ver;
     uint8_t *config;
+    int ret = 0;
 
     /* Only add extended caps if we have them and the guest can see them */
     if (!pci_is_express(pdev) || !pci_bus_is_express(pdev->bus) ||
@@ -1914,6 +1975,9 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
                                    PCI_EXT_CAP_NEXT_MASK);
 
         switch (cap_id) {
+        case PCI_EXT_CAP_ID_ERR:
+            ret = vfio_setup_aer(vdev, cap_ver, next, size);
+            break;
         case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */
             trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next);
             break;
@@ -1921,6 +1985,9 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
             pcie_add_capability(pdev, cap_id, cap_ver, next, size);
         }
 
+        if (ret) {
+            goto out;
+        }
     }
 
     /* Cleanup chain head ID if necessary */
@@ -1928,8 +1995,9 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
         pci_set_word(pdev->config + PCI_CONFIG_SPACE_SIZE, 0);
     }
 
+out:
     g_free(config);
-    return 0;
+    return ret;
 }
 
 static int vfio_add_capabilities(VFIOPCIDevice *vdev)
@@ -2698,6 +2766,11 @@ static int vfio_initfn(PCIDevice *pdev)
         goto out_teardown;
     }
 
+    if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+        !pdev->exp.aer_cap) {
+        goto out_teardown;
+    }
+
     if (vdev->vga) {
         vfio_vga_quirk_setup(vdev);
     }
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 7d482d9..5483044 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -15,6 +15,7 @@
 #include "qemu-common.h"
 #include "exec/memory.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bridge.h"
 #include "hw/vfio/vfio-common.h"
 #include "qemu/event_notifier.h"
 #include "qemu/queue.h"
@@ -132,6 +133,8 @@ typedef struct VFIOPCIDevice {
 #define VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT 2
 #define VFIO_FEATURE_ENABLE_IGD_OPREGION \
                                 (1 << VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT)
+#define VFIO_FEATURE_ENABLE_AER_BIT 3
+#define VFIO_FEATURE_ENABLE_AER (1 << VFIO_FEATURE_ENABLE_AER_BIT)
     int32_t bootindex;
     uint32_t igd_gms;
     uint8_t pm_cap;
-- 
1.8.3.1
^ permalink raw reply related	[flat|nested] 25+ messages in thread
- * Re: [Qemu-devel] [PATCH v9 03/11] vfio: add aer support for vfio device
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 03/11] vfio: add aer support for vfio device Zhou Jie
@ 2016-08-31 19:44   ` Alex Williamson
  2016-09-13  6:56     ` Dou Liyang
  0 siblings, 1 reply; 25+ messages in thread
From: Alex Williamson @ 2016-08-31 19:44 UTC (permalink / raw)
  To: Zhou Jie; +Cc: Chen Fan, izumi.taku, fan.chen, qemu-devel, mst
On Tue, 19 Jul 2016 15:38:21 +0800
Zhou Jie <zhoujie2011@cn.fujitsu.com> wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> Calling pcie_aer_init to initilize aer related registers for
> vfio device, then reload physical related registers to expose
> device capability.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/vfio/pci.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  hw/vfio/pci.h |  3 +++
>  2 files changed, 77 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 6a6160b..11c895c 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1854,6 +1854,66 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
>      return 0;
>  }
>  
> +static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
> +                          int pos, uint16_t size)
> +{
> +    PCIDevice *pdev = &vdev->pdev;
> +    PCIDevice *dev_iter;
> +    uint8_t type;
> +    uint32_t errcap;
> +
> +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
> +        pcie_add_capability(pdev, PCI_EXT_CAP_ID_ERR,
> +                            cap_ver, pos, size);
> +        return 0;
> +    }
> +
> +    dev_iter = pci_bridge_get_device(pdev->bus);
> +    if (!dev_iter) {
> +        goto error;
> +    }
> +
> +    while (dev_iter) {
> +        if (!pci_is_express(dev_iter)) {
> +            goto error;
> +        }
> +
> +        type = pcie_cap_get_type(dev_iter);
> +        if ((type != PCI_EXP_TYPE_ROOT_PORT &&
> +             type != PCI_EXP_TYPE_UPSTREAM &&
> +             type != PCI_EXP_TYPE_DOWNSTREAM)) {
> +            goto error;
> +        }
> +
> +        if (!dev_iter->exp.aer_cap) {
> +            goto error;
> +        }
> +
> +        dev_iter = pci_bridge_get_device(dev_iter->bus);
> +    }
> +
> +    errcap = vfio_pci_read_config(pdev, pos + PCI_ERR_CAP, 4);
> +    /*
> +     * The ability to record multiple headers is depending on
> +     * the state of the Multiple Header Recording Capable bit and
> +     * enabled by the Multiple Header Recording Enable bit.
> +     */
> +    if ((errcap & PCI_ERR_CAP_MHRC) &&
> +        (errcap & PCI_ERR_CAP_MHRE)) {
> +        pdev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
> +    } else {
> +        pdev->exp.aer_log.log_max = 0;
> +    }
> +
> +    pcie_cap_deverr_init(pdev);
> +    return pcie_aer_init(pdev, pos, size);
pcie_aer_init() adds a v2 AER capability regardless of the version of
the AER capability on the device.  Is this expected?  v2 defines a lot
more bits in various registers than v1, so are we simply hoping that
devices have reserved bits as zero like they're supposed to?  It's a
bit strange that for an Intel 82576 NIC I get a v1 AER capability w/o
aer=on and a v2 with.  Thanks,
Alex
> +
> +error:
> +    error_report("vfio: Unable to enable AER for device %s, parent bus "
> +                 "does not support AER signaling", vdev->vbasedev.name);
> +    return -1;
> +}
> +
>  static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
>  {
>      PCIDevice *pdev = &vdev->pdev;
> @@ -1861,6 +1921,7 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
>      uint16_t cap_id, next, size;
>      uint8_t cap_ver;
>      uint8_t *config;
> +    int ret = 0;
>  
>      /* Only add extended caps if we have them and the guest can see them */
>      if (!pci_is_express(pdev) || !pci_bus_is_express(pdev->bus) ||
> @@ -1914,6 +1975,9 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
>                                     PCI_EXT_CAP_NEXT_MASK);
>  
>          switch (cap_id) {
> +        case PCI_EXT_CAP_ID_ERR:
> +            ret = vfio_setup_aer(vdev, cap_ver, next, size);
> +            break;
>          case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */
>              trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next);
>              break;
> @@ -1921,6 +1985,9 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
>              pcie_add_capability(pdev, cap_id, cap_ver, next, size);
>          }
>  
> +        if (ret) {
> +            goto out;
> +        }
>      }
>  
>      /* Cleanup chain head ID if necessary */
> @@ -1928,8 +1995,9 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
>          pci_set_word(pdev->config + PCI_CONFIG_SPACE_SIZE, 0);
>      }
>  
> +out:
>      g_free(config);
> -    return 0;
> +    return ret;
>  }
>  
>  static int vfio_add_capabilities(VFIOPCIDevice *vdev)
> @@ -2698,6 +2766,11 @@ static int vfio_initfn(PCIDevice *pdev)
>          goto out_teardown;
>      }
>  
> +    if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> +        !pdev->exp.aer_cap) {
> +        goto out_teardown;
> +    }
> +
>      if (vdev->vga) {
>          vfio_vga_quirk_setup(vdev);
>      }
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 7d482d9..5483044 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -15,6 +15,7 @@
>  #include "qemu-common.h"
>  #include "exec/memory.h"
>  #include "hw/pci/pci.h"
> +#include "hw/pci/pci_bridge.h"
>  #include "hw/vfio/vfio-common.h"
>  #include "qemu/event_notifier.h"
>  #include "qemu/queue.h"
> @@ -132,6 +133,8 @@ typedef struct VFIOPCIDevice {
>  #define VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT 2
>  #define VFIO_FEATURE_ENABLE_IGD_OPREGION \
>                                  (1 << VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT)
> +#define VFIO_FEATURE_ENABLE_AER_BIT 3
> +#define VFIO_FEATURE_ENABLE_AER (1 << VFIO_FEATURE_ENABLE_AER_BIT)
>      int32_t bootindex;
>      uint32_t igd_gms;
>      uint8_t pm_cap;
^ permalink raw reply	[flat|nested] 25+ messages in thread
- * Re: [Qemu-devel] [PATCH v9 03/11] vfio: add aer support for vfio device
  2016-08-31 19:44   ` Alex Williamson
@ 2016-09-13  6:56     ` Dou Liyang
  2016-09-13 15:14       ` Alex Williamson
  0 siblings, 1 reply; 25+ messages in thread
From: Dou Liyang @ 2016-09-13  6:56 UTC (permalink / raw)
  To: Alex Williamson, Zhou Jie; +Cc: Chen Fan, izumi.taku, fan.chen, qemu-devel, mst
Hi Alex,
I am Dou.
I am testing these patches.
At 09/01/2016 03:44 AM, Alex Williamson wrote:
[...]
>> +
>> +    pcie_cap_deverr_init(pdev);
>> +    return pcie_aer_init(pdev, pos, size);
>
> pcie_aer_init() adds a v2 AER capability regardless of the version of
> the AER capability on the device.  Is this expected?
I think it is not good.
  v2 defines a lot
> more bits in various registers than v1, so are we simply hoping that
> devices have reserved bits as zero like they're supposed to?
I read the Capability Version in PCIe Spec. it says:
Capability Version – This field is a PCI-SIG defined version number
that indicates the version of the Capability structure present.
OR
This field must be 2h if the End-End TLP Prefix Supported bit (see
Section 7.8.15) is Set and must be 1h or 2h otherwise.
In my option, I guess that the spec may mean that v1/v2 is used for the
End-End TLP Prefix Supported bit supported.
And I find that Kernel and QEmu don't do some special work for it. this
feature may not affect the registers in AER.
can you give me some AER bits that are defined in v2 ,but not in v1?
It's a
> bit strange that for an Intel 82576 NIC I get a v1 AER capability w/o
> aer=on and a v2 with.  Thanks,
>
Yes, I do the test, and get the same result for me.
I use the following device to test:
06:00.0 Ethernet controller: Intel Corporation 82576 Gigabit Network
Connection (rev 01)
Firstly, In host, I use the command "lspci -vvv -s 06:00.0":
[...]
Capabilities: [100 v1] Advanced Error Reporting
UESta:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF-
  MalfTLP- ECRC- UnsupReq- ACSViol-
UEMsk:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF-
MalfTLP- ECRC- UnsupReq+ ACSViol-
UESvrt:	DLP+ SDES- TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt- RxOF+
MalfTLP+ ECRC- UnsupReq- ACSViol-
CESta:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr-
CEMsk:	RxErr- BadTLP+ BadDLLP- Rollover- Timeout- NonFatalErr+
AERCap:	First Error Pointer: 12, GenCap- CGenEn- ChkCap- ChkEn-
[...]
Secondly, I pass through the device to the guest. In the guest:
[...]
Capabilities: [100 v2] Advanced Error Reporting
UESta:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF-
  MalfTLP- ECRC- UnsupReq- ACSViol-
UEMsk:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF-
MalfTLP- ECRC- UnsupReq+ ACSViol-
UESvrt:	DLP+ SDES- TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt- RxOF+
MalfTLP+ ECRC- UnsupReq- ACSViol-
CESta:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr-
CEMsk:	RxErr- BadTLP+ BadDLLP- Rollover- Timeout- NonFatalErr+
AERCap:	First Error Pointer: 12, GenCap- CGenEn- ChkCap- ChkEn-
[...]
They look the same, except the capabilities version.
Thirdly, I drop the patches and do the test. After I launch QEmu,
In guest:
[...]
Capabilities: [100 v1] Advanced Error Reporting
[...]
Fourthly, I modify the PCI_ERR_VER: #define PCI_ERR_VER  1.
In guest:
[...]
Capabilities: [100 v1] Advanced Error Reporting
[...]
They also have the same features.
At last, I think the v1/v2 is not influence on our AER function.
But, it is actually strange that we can get a v1 AER capability
w/o aer=on and a v2 with.
I suggest that we add a capability version parameter to extend the
pcie_aer_init(),or add a new pcie_aer_v1_init() for the devices with
v1 AER cap.
[...]
Thanks,
Dou.
^ permalink raw reply	[flat|nested] 25+ messages in thread 
- * Re: [Qemu-devel] [PATCH v9 03/11] vfio: add aer support for vfio device
  2016-09-13  6:56     ` Dou Liyang
@ 2016-09-13 15:14       ` Alex Williamson
  0 siblings, 0 replies; 25+ messages in thread
From: Alex Williamson @ 2016-09-13 15:14 UTC (permalink / raw)
  To: Dou Liyang; +Cc: Zhou Jie, Chen Fan, izumi.taku, fan.chen, qemu-devel, mst
Hi Dou,
On Tue, 13 Sep 2016 14:56:15 +0800
Dou Liyang <douly.fnst@cn.fujitsu.com> wrote:
> Hi Alex,
> 
> I am Dou.
> I am testing these patches.
> 
> At 09/01/2016 03:44 AM, Alex Williamson wrote:
> 
> [...]
> 
> >> +
> >> +    pcie_cap_deverr_init(pdev);
> >> +    return pcie_aer_init(pdev, pos, size);  
> >
> > pcie_aer_init() adds a v2 AER capability regardless of the version of
> > the AER capability on the device.  Is this expected?  
> 
> I think it is not good.
> 
>   v2 defines a lot
> > more bits in various registers than v1, so are we simply hoping that
> > devices have reserved bits as zero like they're supposed to?  
> 
> I read the Capability Version in PCIe Spec. it says:
> 
> Capability Version – This field is a PCI-SIG defined version number
> that indicates the version of the Capability structure present.
> OR
> This field must be 2h if the End-End TLP Prefix Supported bit (see
> Section 7.8.15) is Set and must be 1h or 2h otherwise.
> 
> In my option, I guess that the spec may mean that v1/v2 is used for the
> End-End TLP Prefix Supported bit supported.
> 
> And I find that Kernel and QEmu don't do some special work for it. this
> feature may not affect the registers in AER.
> 
> can you give me some AER bits that are defined in v2 ,but not in v1?
There are quite a lot.  In the uncorrectable error status register bits
21-25 are only defined for v2, same for the mask and severity for this
register.  Bits 14 & 15 are new for v2 in the correctable error status
registers, status and mask.  Bits 9-11 are new in the control register.
> It's a
> > bit strange that for an Intel 82576 NIC I get a v1 AER capability w/o
> > aer=on and a v2 with.  Thanks,
> >  
> 
> Yes, I do the test, and get the same result for me.
> 
> I use the following device to test:
> 
> 06:00.0 Ethernet controller: Intel Corporation 82576 Gigabit Network
> Connection (rev 01)
> 
> Firstly, In host, I use the command "lspci -vvv -s 06:00.0":
> 
> [...]
> Capabilities: [100 v1] Advanced Error Reporting
> UESta:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF-
>   MalfTLP- ECRC- UnsupReq- ACSViol-
> UEMsk:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF-
> MalfTLP- ECRC- UnsupReq+ ACSViol-
> UESvrt:	DLP+ SDES- TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt- RxOF+
> MalfTLP+ ECRC- UnsupReq- ACSViol-
> CESta:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr-
> CEMsk:	RxErr- BadTLP+ BadDLLP- Rollover- Timeout- NonFatalErr+
> AERCap:	First Error Pointer: 12, GenCap- CGenEn- ChkCap- ChkEn-
> [...]
> 
> Secondly, I pass through the device to the guest. In the guest:
> 
> [...]
> Capabilities: [100 v2] Advanced Error Reporting
> UESta:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF-
>   MalfTLP- ECRC- UnsupReq- ACSViol-
> UEMsk:	DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF-
> MalfTLP- ECRC- UnsupReq+ ACSViol-
> UESvrt:	DLP+ SDES- TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt- RxOF+
> MalfTLP+ ECRC- UnsupReq- ACSViol-
> CESta:	RxErr- BadTLP- BadDLLP- Rollover- Timeout- NonFatalErr-
> CEMsk:	RxErr- BadTLP+ BadDLLP- Rollover- Timeout- NonFatalErr+
> AERCap:	First Error Pointer: 12, GenCap- CGenEn- ChkCap- ChkEn-
> [...]
> 
> They look the same, except the capabilities version.
> 
> Thirdly, I drop the patches and do the test. After I launch QEmu,
> In guest:
> 
> [...]
> Capabilities: [100 v1] Advanced Error Reporting
> [...]
> 
> Fourthly, I modify the PCI_ERR_VER: #define PCI_ERR_VER  1.
> In guest:
> [...]
> Capabilities: [100 v1] Advanced Error Reporting
> [...]
> 
> They also have the same features.
> 
> At last, I think the v1/v2 is not influence on our AER function.
> But, it is actually strange that we can get a v1 AER capability
> w/o aer=on and a v2 with.
> I suggest that we add a capability version parameter to extend the
> pcie_aer_init(),or add a new pcie_aer_v1_init() for the devices with
> v1 AER cap.
Agreed.  Thanks,
Alex
^ permalink raw reply	[flat|nested] 25+ messages in thread 
 
 
 
- * [Qemu-devel] [PATCH v9 04/11] vfio: refine function vfio_pci_host_match
  2016-07-19  7:38 [Qemu-devel] [PATCH v9 00/11] vfio-pci: pass the aer error to guest Zhou Jie
                   ` (2 preceding siblings ...)
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 03/11] vfio: add aer support for vfio device Zhou Jie
@ 2016-07-19  7:38 ` Zhou Jie
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 05/11] vfio: add check host bus reset is support or not Zhou Jie
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 25+ messages in thread
From: Zhou Jie @ 2016-07-19  7:38 UTC (permalink / raw)
  To: alex.williamson; +Cc: qemu-devel, izumi.taku, mst, fan.chen, Chen Fan
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 11c895c..21fd801 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2060,14 +2060,27 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
     vfio_intx_enable(vdev);
 }
 
+static int vfio_pci_name_to_addr(const char *name, PCIHostDeviceAddress *addr)
+{
+    if (strlen(name) != 12 ||
+        sscanf(name, "%04x:%02x:%02x.%1x", &addr->domain,
+               &addr->bus, &addr->slot, &addr->function) != 4) {
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
 static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
 {
-    char tmp[13];
+    PCIHostDeviceAddress tmp;
 
-    sprintf(tmp, "%04x:%02x:%02x.%1x", addr->domain,
-            addr->bus, addr->slot, addr->function);
+    if (vfio_pci_name_to_addr(name, &tmp)) {
+        return false;
+    }
 
-    return (strcmp(tmp, name) == 0);
+    return (tmp.domain == addr->domain && tmp.bus == addr->bus &&
+            tmp.slot == addr->slot && tmp.function == addr->function);
 }
 
 static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
-- 
1.8.3.1
^ permalink raw reply related	[flat|nested] 25+ messages in thread
- * [Qemu-devel] [PATCH v9 05/11] vfio: add check host bus reset is support or not
  2016-07-19  7:38 [Qemu-devel] [PATCH v9 00/11] vfio-pci: pass the aer error to guest Zhou Jie
                   ` (3 preceding siblings ...)
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 04/11] vfio: refine function vfio_pci_host_match Zhou Jie
@ 2016-07-19  7:38 ` Zhou Jie
  2016-08-31 19:56   ` Alex Williamson
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 06/11] pci: add a pci_function_is_valid callback to check function if valid Zhou Jie
                   ` (5 subsequent siblings)
  10 siblings, 1 reply; 25+ messages in thread
From: Zhou Jie @ 2016-07-19  7:38 UTC (permalink / raw)
  To: alex.williamson; +Cc: qemu-devel, izumi.taku, mst, fan.chen, Chen Fan
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
When assigning a vfio device with AER enabled, we must check whether
the device supports a host bus reset (ie. hot reset) as this may be
used by the guest OS in order to recover the device from an AER
error.  QEMU must therefore have the ability to perform a physical
host bus reset using the existing vfio APIs in response to a virtual
bus reset in the VM.  A physical bus reset affects all of the devices
on the host bus, therefore we place a few simplifying configuration
restriction on the VM:
 - All physical devices affected by a bus reset must be assigned to
   the VM with AER enabled on each and be configured on the same
   virtual bus in the VM.
 - No devices unaffected by the bus reset, be they physical, emulated,
   or paravirtual may be configured on the same virtual bus as a
   device supporting AER signaling through vfio.
In other words users wishing to enable AER on a multifunction device
need to assign all functions of the device to the same virtual bus
and enable AER support for each device.  The easiest way to
accomplish this is to identity map the physical functions to virtual
functions with multifunction enabled on the virtual device.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 278 +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 hw/vfio/pci.h |   1 +
 2 files changed, 256 insertions(+), 23 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 21fd801..242c1e4 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1693,6 +1693,42 @@ static void vfio_check_af_flr(VFIOPCIDevice *vdev, uint8_t pos)
     }
 }
 
+static int vfio_pci_name_to_addr(const char *name, PCIHostDeviceAddress *addr)
+{
+    if (strlen(name) != 12 ||
+        sscanf(name, "%04x:%02x:%02x.%1x", &addr->domain,
+               &addr->bus, &addr->slot, &addr->function) != 4) {
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
+{
+    PCIHostDeviceAddress tmp;
+
+    if (vfio_pci_name_to_addr(name, &tmp)) {
+        return false;
+    }
+
+    return (tmp.domain == addr->domain && tmp.bus == addr->bus &&
+            tmp.slot == addr->slot && tmp.function == addr->function);
+}
+
+static bool vfio_pci_host_match_slot(PCIHostDeviceAddress *addr,
+                                     const char *name)
+{
+    PCIHostDeviceAddress tmp;
+
+    if (vfio_pci_name_to_addr(name, &tmp)) {
+        return false;
+    }
+
+    return (tmp.domain == addr->domain && tmp.bus == addr->bus &&
+            tmp.slot == addr->slot);
+}
+
 /*
  * return negative with errno, return 0 on success.
  * if success, the point of ret_info fill with the affected device reset info.
@@ -1854,6 +1890,200 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
     return 0;
 }
 
+static int vfio_device_range_limit(PCIBus *bus)
+{
+    PCIDevice *br;
+
+    br = pci_bridge_get_device(bus);
+    if (!br ||
+        !pci_is_express(br) ||
+        !(br->exp.exp_cap) ||
+        pcie_cap_is_arifwd_enabled(br)) {
+        return 255;
+    }
+
+    return 8;
+}
+
+static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
+{
+    PCIBus *bus = vdev->pdev.bus;
+    struct vfio_pci_hot_reset_info *info = NULL;
+    struct vfio_pci_dependent_device *devices;
+    VFIOGroup *group;
+    int ret, i, devfn, range_limit;
+
+    ret = vfio_get_hot_reset_info(vdev, &info);
+    if (ret) {
+        error_setg(errp, "vfio: Cannot enable AER for device %s,"
+                   " device does not support hot reset.",
+                   vdev->vbasedev.name);
+        return;
+    }
+
+    /* List all affected devices by bus reset */
+    devices = &info->devices[0];
+
+    /* Verify that we have all the groups required */
+    for (i = 0; i < info->count; i++) {
+        PCIHostDeviceAddress host;
+        VFIOPCIDevice *tmp;
+        VFIODevice *vbasedev_iter;
+        bool found = false;
+
+        host.domain = devices[i].segment;
+        host.bus = devices[i].bus;
+        host.slot = PCI_SLOT(devices[i].devfn);
+        host.function = PCI_FUNC(devices[i].devfn);
+
+        /* Skip the current device */
+        if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
+            continue;
+        }
+
+        /* Ensure we own the group of the affected device */
+        QLIST_FOREACH(group, &vfio_group_list, next) {
+            if (group->groupid == devices[i].group_id) {
+                break;
+            }
+        }
+
+        if (!group) {
+            error_setg(errp, "vfio: Cannot enable AER for device %s, "
+                       "depends on group %d which is not owned.",
+                       vdev->vbasedev.name, devices[i].group_id);
+            goto out;
+        }
+
+        /* Ensure affected devices for reset on the same bus */
+        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
+                continue;
+            }
+            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+            if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
+                /*
+                 * AER errors may be broadcast to all functions of a multi-
+                 * function endpoint.  If any of those sibling functions are
+                 * also assigned, they need to have AER enabled or else an
+                 * error may continue to cause a vm_stop condition.  IOW,
+                 * AER setup of this function would be pointless.
+                 */
+                if (vfio_pci_host_match_slot(&host, vdev->vbasedev.name) &&
+                    !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
+                    error_setg(errp, "vfio: Cannot enable AER for device %s,"
+                               " on same slot the dependent device %s which"
+                               " does not enable AER.",
+                               vdev->vbasedev.name, tmp->vbasedev.name);
+                    goto out;
+                }
+
+                if (tmp->pdev.bus != bus) {
+                    error_setg(errp, "vfio: Cannot enable AER for device %s, "
+                               "the dependent device %s is not on the same bus",
+                               vdev->vbasedev.name, tmp->vbasedev.name);
+                    goto out;
+                }
+                found = true;
+                break;
+            }
+        }
+
+        /* Ensure all affected devices assigned to VM */
+        if (!found) {
+            error_setg(errp, "vfio: Cannot enable AER for device %s, "
+                       "the dependent device %04x:%02x:%02x.%x "
+                       "is not assigned to VM.",
+                       vdev->vbasedev.name, host.domain, host.bus,
+                       host.slot, host.function);
+            goto out;
+        }
+    }
+
+    /*
+     * The above code verified that all devices affected by a bus reset
+     * exist on the same bus in the VM.  To further simplify, we also
+     * require that there are no additional devices beyond those existing on
+     * the VM bus.
+     */
+    range_limit = vfio_device_range_limit(bus);
+    for (devfn = 0; devfn < range_limit; devfn++) {
+        VFIOPCIDevice *tmp;
+        PCIDevice *dev;
+        bool found = false;
+
+        dev = pci_find_device(bus, pci_bus_num(bus),
+                  PCI_DEVFN(PCI_SLOT(vdev->pdev.devfn), devfn));
+
+        if (!dev) {
+            continue;
+        }
+
+        if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
+            error_setg(errp, "vfio: Cannot enable AER for device %s, device"
+                             " %s: slot %d function%d cannot be configured"
+                             " on the same virtual bus",
+                             vdev->vbasedev.name, dev->name,
+                             PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+            goto out;
+        }
+
+        tmp = DO_UPCAST(VFIOPCIDevice, pdev, dev);
+        for (i = 0; i < info->count; i++) {
+            PCIHostDeviceAddress host;
+
+            host.domain = devices[i].segment;
+            host.bus = devices[i].bus;
+            host.slot = PCI_SLOT(devices[i].devfn);
+            host.function = PCI_FUNC(devices[i].devfn);
+
+            if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
+                found = true;
+                break;
+            }
+        }
+
+        if (!found) {
+            error_setg(errp, "vfio: Cannot enable AER for device %s, affected"
+                             " device %s does not be configured on the same"
+                             " virtual bus",
+                       vdev->vbasedev.name, tmp->vbasedev.name);
+            goto out;
+        }
+    }
+
+out:
+    g_free(info);
+    return;
+}
+
+static void vfio_aer_check_host_bus_reset(Error **errp)
+{
+    VFIOGroup *group;
+    VFIODevice *vbasedev;
+    VFIOPCIDevice *vdev;
+    Error *local_err = NULL;
+
+    /* Check All vfio-pci devices if have bus reset capability */
+    QLIST_FOREACH(group, &vfio_group_list, next) {
+        QLIST_FOREACH(vbasedev, &group->device_list, next) {
+            if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
+                continue;
+            }
+            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+            if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
+                vfio_check_hot_bus_reset(vdev, &local_err);
+                if (local_err) {
+                    error_propagate(errp, local_err);
+                    return;
+                }
+            }
+        }
+    }
+
+    return;
+}
+
 static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
                           int pos, uint16_t size)
 {
@@ -2060,29 +2290,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
     vfio_intx_enable(vdev);
 }
 
-static int vfio_pci_name_to_addr(const char *name, PCIHostDeviceAddress *addr)
-{
-    if (strlen(name) != 12 ||
-        sscanf(name, "%04x:%02x:%02x.%1x", &addr->domain,
-               &addr->bus, &addr->slot, &addr->function) != 4) {
-        return -EINVAL;
-    }
-
-    return 0;
-}
-
-static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
-{
-    PCIHostDeviceAddress tmp;
-
-    if (vfio_pci_name_to_addr(name, &tmp)) {
-        return false;
-    }
-
-    return (tmp.domain == addr->domain && tmp.bus == addr->bus &&
-            tmp.slot == addr->slot && tmp.function == addr->function);
-}
-
 static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
 {
     VFIOGroup *group;
@@ -2608,6 +2815,22 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
     vdev->req_enabled = false;
 }
 
+static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused)
+{
+    Error *local_err = NULL;
+
+    vfio_aer_check_host_bus_reset(&local_err);
+    if (local_err) {
+        fprintf(stderr, "%s\n", error_get_pretty(local_err));
+        error_free(local_err);
+        exit(1);
+    }
+}
+
+static Notifier machine_notifier = {
+    .notify = vfio_pci_machine_done_notify,
+};
+
 static int vfio_initfn(PCIDevice *pdev)
 {
     VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
@@ -3000,6 +3223,15 @@ static const TypeInfo vfio_pci_dev_info = {
 static void register_vfio_pci_dev_type(void)
 {
     type_register_static(&vfio_pci_dev_info);
+
+    /*
+     * The AER configuration may depend on multiple devices, so we cannot
+     * validate consistency after each device is initialized.  We can only
+     * depend on function initialization order (function 0 last) for hotplug
+     * devices, therefore a machine-init-done notifier is used to validate
+     * the configuration after all cold-plug devices are processed.
+     */
+     qemu_add_machine_init_done_notifier(&machine_notifier);
 }
 
 type_init(register_vfio_pci_dev_type)
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 5483044..e2faffb 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -15,6 +15,7 @@
 #include "qemu-common.h"
 #include "exec/memory.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bus.h"
 #include "hw/pci/pci_bridge.h"
 #include "hw/vfio/vfio-common.h"
 #include "qemu/event_notifier.h"
-- 
1.8.3.1
^ permalink raw reply related	[flat|nested] 25+ messages in thread
- * Re: [Qemu-devel] [PATCH v9 05/11] vfio: add check host bus reset is support or not
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 05/11] vfio: add check host bus reset is support or not Zhou Jie
@ 2016-08-31 19:56   ` Alex Williamson
  2016-09-01  2:12     ` Alex Williamson
  2016-09-22  8:34     ` Dou Liyang
  0 siblings, 2 replies; 25+ messages in thread
From: Alex Williamson @ 2016-08-31 19:56 UTC (permalink / raw)
  To: Zhou Jie; +Cc: Chen Fan, izumi.taku, fan.chen, qemu-devel, mst
On Tue, 19 Jul 2016 15:38:23 +0800
Zhou Jie <zhoujie2011@cn.fujitsu.com> wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> When assigning a vfio device with AER enabled, we must check whether
> the device supports a host bus reset (ie. hot reset) as this may be
> used by the guest OS in order to recover the device from an AER
> error.  QEMU must therefore have the ability to perform a physical
> host bus reset using the existing vfio APIs in response to a virtual
> bus reset in the VM.  A physical bus reset affects all of the devices
> on the host bus, therefore we place a few simplifying configuration
> restriction on the VM:
> 
>  - All physical devices affected by a bus reset must be assigned to
>    the VM with AER enabled on each and be configured on the same
>    virtual bus in the VM.
> 
>  - No devices unaffected by the bus reset, be they physical, emulated,
>    or paravirtual may be configured on the same virtual bus as a
>    device supporting AER signaling through vfio.
> 
> In other words users wishing to enable AER on a multifunction device
> need to assign all functions of the device to the same virtual bus
> and enable AER support for each device.  The easiest way to
> accomplish this is to identity map the physical functions to virtual
> functions with multifunction enabled on the virtual device.
Why am I able to start the following VM with aer=on for the vfio-pci
devices?
# lspci -tv
-[0000:00]-+-00.0  Intel Corporation 82G33/G31/P35/P31 Express DRAM Controller
           +-01.0  Device 1234:1111
           +-1c.0-[01]--
           +-1d.0-[02]--+-01.0  Intel Corporation 82576 Gigabit Network Connection
           |            \-01.1  Intel Corporation 82576 Gigabit Network Connection
           ...
# lspci -vvv -s 1d.0
00:1d.0 PCI bridge: Red Hat, Inc. QEMU PCI-PCI bridge (prog-if 00 [Normal decode])
The devices are behind a PCIe-to-PCI bridge, so shouldn't specifying
aer=on for the vfio-pci devices cause a configuration error?
commandline:
/home/alwillia/local/bin/qemu-system-x86_64 -name guest=rhel7-q35,debug-threads=on -S -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-11-rhel7-q35/master-key.aes -machine pc-q35-2.7,accel=kvm,usb=off,vmport=off -cpu IvyBridge -m 8192 -realtime mlock=off -smp 6,sockets=1,cores=6,threads=1 -uuid b20b28b4-9304-4e11-9ffa-0367aeb44afb -no-user-config -nodefaults -chardev socket,id=charmonitor,path=/var/lib/libvirt/qemu/domain-11-rhel7-q35/monitor.sock,server,nowait -mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc,driftfix=slew -global kvm-pit.lost_tick_policy=discard -no-hpet -no-shutdown -global ICH9-LPC.disable_s3=1 -global ICH9-LPC.disable_s4=1 -boot strict=on -device i82801b11-bridge,id=pci.1,bus=pcie.0,addr=0x1e -device pci-bridge,chassis_nr=2,id=pci.2,bus=pci.1,addr=0x1 -device pci-bridge,chassis_nr=3,id=pci.3,bus=pcie.0,addr=0x1d -device ioh3420,port=0xe0,chassis=4,id=pci.4,bus=pcie.0,addr=0x1c -device ich9-usb-ehci1,id=usb,bus=pci.2!
 ,addr=0x3.0x7 -device ich9-usb-uhci1,masterbus=usb.0,firstport=0,bus=pci.2,multifunction=on,addr=0x3 -device ich9-usb-uhci2,masterbus=usb.0,firstport=2,bus=pci.2,addr=0x3.0x1 -device ich9-usb-uhci3,masterbus=usb.0,firstport=4,bus=pci.2,addr=0x3.0x2 -drive file=/dev/rhel/rhel7-q35,format=raw,if=none,id=drive-virtio-disk0,cache=none,aio=native -device virtio-blk-pci,scsi=off,bus=pci.2,addr=0x5,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1 -netdev tap,fd=26,id=hostnet0,vhost=on,vhostfd=28 -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:50:ec:0d,bus=pci.2,addr=0x1 -chardev pty,id=charserial0 -device isa-serial,chardev=charserial0,id=serial0 -device usb-tablet,id=input0,bus=usb.0,port=1 -vnc 127.0.0.1:0 -device VGA,id=video0,vgamem_mb=16,bus=pcie.0,addr=0x1 -device intel-hda,id=sound0,bus=pci.2,addr=0x2 -device hda-duplex,id=sound0-codec0,bus=sound0.0,cad=0 -device vfio-pci,aer=on,host=07:00.0,id=hostdev0,bus=pci.3,multifunction=on,addr=0x1 -device vfio-pci,ae!
 r=on,host=07:00.1,id=hostdev1,bus=pci.3,addr=0x1.0x1 -msg timestamp=on
Thanks,
Alex
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/vfio/pci.c | 278 +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
>  hw/vfio/pci.h |   1 +
>  2 files changed, 256 insertions(+), 23 deletions(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 21fd801..242c1e4 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1693,6 +1693,42 @@ static void vfio_check_af_flr(VFIOPCIDevice *vdev, uint8_t pos)
>      }
>  }
>  
> +static int vfio_pci_name_to_addr(const char *name, PCIHostDeviceAddress *addr)
> +{
> +    if (strlen(name) != 12 ||
> +        sscanf(name, "%04x:%02x:%02x.%1x", &addr->domain,
> +               &addr->bus, &addr->slot, &addr->function) != 4) {
> +        return -EINVAL;
> +    }
> +
> +    return 0;
> +}
> +
> +static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
> +{
> +    PCIHostDeviceAddress tmp;
> +
> +    if (vfio_pci_name_to_addr(name, &tmp)) {
> +        return false;
> +    }
> +
> +    return (tmp.domain == addr->domain && tmp.bus == addr->bus &&
> +            tmp.slot == addr->slot && tmp.function == addr->function);
> +}
> +
> +static bool vfio_pci_host_match_slot(PCIHostDeviceAddress *addr,
> +                                     const char *name)
> +{
> +    PCIHostDeviceAddress tmp;
> +
> +    if (vfio_pci_name_to_addr(name, &tmp)) {
> +        return false;
> +    }
> +
> +    return (tmp.domain == addr->domain && tmp.bus == addr->bus &&
> +            tmp.slot == addr->slot);
> +}
> +
>  /*
>   * return negative with errno, return 0 on success.
>   * if success, the point of ret_info fill with the affected device reset info.
> @@ -1854,6 +1890,200 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
>      return 0;
>  }
>  
> +static int vfio_device_range_limit(PCIBus *bus)
> +{
> +    PCIDevice *br;
> +
> +    br = pci_bridge_get_device(bus);
> +    if (!br ||
> +        !pci_is_express(br) ||
> +        !(br->exp.exp_cap) ||
> +        pcie_cap_is_arifwd_enabled(br)) {
> +        return 255;
> +    }
> +
> +    return 8;
> +}
> +
> +static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
> +{
> +    PCIBus *bus = vdev->pdev.bus;
> +    struct vfio_pci_hot_reset_info *info = NULL;
> +    struct vfio_pci_dependent_device *devices;
> +    VFIOGroup *group;
> +    int ret, i, devfn, range_limit;
> +
> +    ret = vfio_get_hot_reset_info(vdev, &info);
> +    if (ret) {
> +        error_setg(errp, "vfio: Cannot enable AER for device %s,"
> +                   " device does not support hot reset.",
> +                   vdev->vbasedev.name);
> +        return;
> +    }
> +
> +    /* List all affected devices by bus reset */
> +    devices = &info->devices[0];
> +
> +    /* Verify that we have all the groups required */
> +    for (i = 0; i < info->count; i++) {
> +        PCIHostDeviceAddress host;
> +        VFIOPCIDevice *tmp;
> +        VFIODevice *vbasedev_iter;
> +        bool found = false;
> +
> +        host.domain = devices[i].segment;
> +        host.bus = devices[i].bus;
> +        host.slot = PCI_SLOT(devices[i].devfn);
> +        host.function = PCI_FUNC(devices[i].devfn);
> +
> +        /* Skip the current device */
> +        if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
> +            continue;
> +        }
> +
> +        /* Ensure we own the group of the affected device */
> +        QLIST_FOREACH(group, &vfio_group_list, next) {
> +            if (group->groupid == devices[i].group_id) {
> +                break;
> +            }
> +        }
> +
> +        if (!group) {
> +            error_setg(errp, "vfio: Cannot enable AER for device %s, "
> +                       "depends on group %d which is not owned.",
> +                       vdev->vbasedev.name, devices[i].group_id);
> +            goto out;
> +        }
> +
> +        /* Ensure affected devices for reset on the same bus */
> +        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> +                continue;
> +            }
> +            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
> +            if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
> +                /*
> +                 * AER errors may be broadcast to all functions of a multi-
> +                 * function endpoint.  If any of those sibling functions are
> +                 * also assigned, they need to have AER enabled or else an
> +                 * error may continue to cause a vm_stop condition.  IOW,
> +                 * AER setup of this function would be pointless.
> +                 */
> +                if (vfio_pci_host_match_slot(&host, vdev->vbasedev.name) &&
> +                    !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
> +                    error_setg(errp, "vfio: Cannot enable AER for device %s,"
> +                               " on same slot the dependent device %s which"
> +                               " does not enable AER.",
> +                               vdev->vbasedev.name, tmp->vbasedev.name);
> +                    goto out;
> +                }
> +
> +                if (tmp->pdev.bus != bus) {
> +                    error_setg(errp, "vfio: Cannot enable AER for device %s, "
> +                               "the dependent device %s is not on the same bus",
> +                               vdev->vbasedev.name, tmp->vbasedev.name);
> +                    goto out;
> +                }
> +                found = true;
> +                break;
> +            }
> +        }
> +
> +        /* Ensure all affected devices assigned to VM */
> +        if (!found) {
> +            error_setg(errp, "vfio: Cannot enable AER for device %s, "
> +                       "the dependent device %04x:%02x:%02x.%x "
> +                       "is not assigned to VM.",
> +                       vdev->vbasedev.name, host.domain, host.bus,
> +                       host.slot, host.function);
> +            goto out;
> +        }
> +    }
> +
> +    /*
> +     * The above code verified that all devices affected by a bus reset
> +     * exist on the same bus in the VM.  To further simplify, we also
> +     * require that there are no additional devices beyond those existing on
> +     * the VM bus.
> +     */
> +    range_limit = vfio_device_range_limit(bus);
> +    for (devfn = 0; devfn < range_limit; devfn++) {
> +        VFIOPCIDevice *tmp;
> +        PCIDevice *dev;
> +        bool found = false;
> +
> +        dev = pci_find_device(bus, pci_bus_num(bus),
> +                  PCI_DEVFN(PCI_SLOT(vdev->pdev.devfn), devfn));
> +
> +        if (!dev) {
> +            continue;
> +        }
> +
> +        if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> +            error_setg(errp, "vfio: Cannot enable AER for device %s, device"
> +                             " %s: slot %d function%d cannot be configured"
> +                             " on the same virtual bus",
> +                             vdev->vbasedev.name, dev->name,
> +                             PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
> +            goto out;
> +        }
> +
> +        tmp = DO_UPCAST(VFIOPCIDevice, pdev, dev);
> +        for (i = 0; i < info->count; i++) {
> +            PCIHostDeviceAddress host;
> +
> +            host.domain = devices[i].segment;
> +            host.bus = devices[i].bus;
> +            host.slot = PCI_SLOT(devices[i].devfn);
> +            host.function = PCI_FUNC(devices[i].devfn);
> +
> +            if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
> +                found = true;
> +                break;
> +            }
> +        }
> +
> +        if (!found) {
> +            error_setg(errp, "vfio: Cannot enable AER for device %s, affected"
> +                             " device %s does not be configured on the same"
> +                             " virtual bus",
> +                       vdev->vbasedev.name, tmp->vbasedev.name);
> +            goto out;
> +        }
> +    }
> +
> +out:
> +    g_free(info);
> +    return;
> +}
> +
> +static void vfio_aer_check_host_bus_reset(Error **errp)
> +{
> +    VFIOGroup *group;
> +    VFIODevice *vbasedev;
> +    VFIOPCIDevice *vdev;
> +    Error *local_err = NULL;
> +
> +    /* Check All vfio-pci devices if have bus reset capability */
> +    QLIST_FOREACH(group, &vfio_group_list, next) {
> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
> +            if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
> +                continue;
> +            }
> +            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> +            if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
> +                vfio_check_hot_bus_reset(vdev, &local_err);
> +                if (local_err) {
> +                    error_propagate(errp, local_err);
> +                    return;
> +                }
> +            }
> +        }
> +    }
> +
> +    return;
> +}
> +
>  static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>                            int pos, uint16_t size)
>  {
> @@ -2060,29 +2290,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
>      vfio_intx_enable(vdev);
>  }
>  
> -static int vfio_pci_name_to_addr(const char *name, PCIHostDeviceAddress *addr)
> -{
> -    if (strlen(name) != 12 ||
> -        sscanf(name, "%04x:%02x:%02x.%1x", &addr->domain,
> -               &addr->bus, &addr->slot, &addr->function) != 4) {
> -        return -EINVAL;
> -    }
> -
> -    return 0;
> -}
> -
> -static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
> -{
> -    PCIHostDeviceAddress tmp;
> -
> -    if (vfio_pci_name_to_addr(name, &tmp)) {
> -        return false;
> -    }
> -
> -    return (tmp.domain == addr->domain && tmp.bus == addr->bus &&
> -            tmp.slot == addr->slot && tmp.function == addr->function);
> -}
> -
>  static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
>  {
>      VFIOGroup *group;
> @@ -2608,6 +2815,22 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>      vdev->req_enabled = false;
>  }
>  
> +static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused)
> +{
> +    Error *local_err = NULL;
> +
> +    vfio_aer_check_host_bus_reset(&local_err);
> +    if (local_err) {
> +        fprintf(stderr, "%s\n", error_get_pretty(local_err));
> +        error_free(local_err);
> +        exit(1);
> +    }
> +}
> +
> +static Notifier machine_notifier = {
> +    .notify = vfio_pci_machine_done_notify,
> +};
> +
>  static int vfio_initfn(PCIDevice *pdev)
>  {
>      VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> @@ -3000,6 +3223,15 @@ static const TypeInfo vfio_pci_dev_info = {
>  static void register_vfio_pci_dev_type(void)
>  {
>      type_register_static(&vfio_pci_dev_info);
> +
> +    /*
> +     * The AER configuration may depend on multiple devices, so we cannot
> +     * validate consistency after each device is initialized.  We can only
> +     * depend on function initialization order (function 0 last) for hotplug
> +     * devices, therefore a machine-init-done notifier is used to validate
> +     * the configuration after all cold-plug devices are processed.
> +     */
> +     qemu_add_machine_init_done_notifier(&machine_notifier);
>  }
>  
>  type_init(register_vfio_pci_dev_type)
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 5483044..e2faffb 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -15,6 +15,7 @@
>  #include "qemu-common.h"
>  #include "exec/memory.h"
>  #include "hw/pci/pci.h"
> +#include "hw/pci/pci_bus.h"
>  #include "hw/pci/pci_bridge.h"
>  #include "hw/vfio/vfio-common.h"
>  #include "qemu/event_notifier.h"
^ permalink raw reply	[flat|nested] 25+ messages in thread
- * Re: [Qemu-devel] [PATCH v9 05/11] vfio: add check host bus reset is support or not
  2016-08-31 19:56   ` Alex Williamson
@ 2016-09-01  2:12     ` Alex Williamson
  2016-09-22 14:04       ` Dou Liyang
  2016-09-22  8:34     ` Dou Liyang
  1 sibling, 1 reply; 25+ messages in thread
From: Alex Williamson @ 2016-09-01  2:12 UTC (permalink / raw)
  To: Zhou Jie; +Cc: Chen Fan, izumi.taku, fan.chen, qemu-devel, mst
On Wed, 31 Aug 2016 13:56:20 -0600
Alex Williamson <alex.williamson@redhat.com> wrote:
> On Tue, 19 Jul 2016 15:38:23 +0800
> Zhou Jie <zhoujie2011@cn.fujitsu.com> wrote:
> 
> > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > 
> > When assigning a vfio device with AER enabled, we must check whether
> > the device supports a host bus reset (ie. hot reset) as this may be
> > used by the guest OS in order to recover the device from an AER
> > error.  QEMU must therefore have the ability to perform a physical
> > host bus reset using the existing vfio APIs in response to a virtual
> > bus reset in the VM.  A physical bus reset affects all of the devices
> > on the host bus, therefore we place a few simplifying configuration
> > restriction on the VM:
> > 
> >  - All physical devices affected by a bus reset must be assigned to
> >    the VM with AER enabled on each and be configured on the same
> >    virtual bus in the VM.
> > 
> >  - No devices unaffected by the bus reset, be they physical, emulated,
> >    or paravirtual may be configured on the same virtual bus as a
> >    device supporting AER signaling through vfio.
> > 
> > In other words users wishing to enable AER on a multifunction device
> > need to assign all functions of the device to the same virtual bus
> > and enable AER support for each device.  The easiest way to
> > accomplish this is to identity map the physical functions to virtual
> > functions with multifunction enabled on the virtual device.  
> 
> Why am I able to start the following VM with aer=on for the vfio-pci
> devices?
> 
> # lspci -tv
> -[0000:00]-+-00.0  Intel Corporation 82G33/G31/P35/P31 Express DRAM Controller
>            +-01.0  Device 1234:1111
>            +-1c.0-[01]--
>            +-1d.0-[02]--+-01.0  Intel Corporation 82576 Gigabit Network Connection
>            |            \-01.1  Intel Corporation 82576 Gigabit Network Connection
>            ...
> 
> # lspci -vvv -s 1d.0
> 00:1d.0 PCI bridge: Red Hat, Inc. QEMU PCI-PCI bridge (prog-if 00 [Normal decode])
> 
> The devices are behind a PCIe-to-PCI bridge, so shouldn't specifying
> aer=on for the vfio-pci devices cause a configuration error?
> 
> commandline:
> 
> /home/alwillia/local/bin/qemu-system-x86_64 -name guest=rhel7-q35,debug-threads=on -S -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-11-rhel7-q35/master-key.aes -machine pc-q35-2.7,accel=kvm,usb=off,vmport=off -cpu IvyBridge -m 8192 -realtime mlock=off -smp 6,sockets=1,cores=6,threads=1 -uuid b20b28b4-9304-4e11-9ffa-0367aeb44afb -no-user-config -nodefaults -chardev socket,id=charmonitor,path=/var/lib/libvirt/qemu/domain-11-rhel7-q35/monitor.sock,server,nowait -mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc,driftfix=slew -global kvm-pit.lost_tick_policy=discard -no-hpet -no-shutdown -global ICH9-LPC.disable_s3=1 -global ICH9-LPC.disable_s4=1 -boot strict=on -device i82801b11-bridge,id=pci.1,bus=pcie.0,addr=0x1e -device pci-bridge,chassis_nr=2,id=pci.2,bus=pci.1,addr=0x1 -device pci-bridge,chassis_nr=3,id=pci.3,bus=pcie.0,addr=0x1d -device ioh3420,port=0xe0,chassis=4,id=pci.4,bus=pcie.0,addr=0x1c -device ich9-usb-ehci1,id=usb,bus=pci!
 .2,addr=0x3.0x7 -device ich9-usb-uhci1,masterbus=usb.0,firstport=0,bus=pci.2,multifunction=on,addr=0x3 -device ich9-usb-uhci2,masterbus=usb.0,firstport=2,bus=pci.2,addr=0x3.0x1 -device ich9-usb-uhci3,masterbus=usb.0,firstport=4,bus=pci.2,addr=0x3.0x2 -drive file=/dev/rhel/rhel7-q35,format=raw,if=none,id=drive-virtio-disk0,cache=none,aio=native -device virtio-blk-pci,scsi=off,bus=pci.2,addr=0x5,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1 -netdev tap,fd=26,id=hostnet0,vhost=on,vhostfd=28 -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:50:ec:0d,bus=pci.2,addr=0x1 -chardev pty,id=charserial0 -device isa-serial,chardev=charserial0,id=serial0 -device usb-tablet,id=input0,bus=usb.0,port=1 -vnc 127.0.0.1:0 -device VGA,id=video0,vgamem_mb=16,bus=pcie.0,addr=0x1 -device intel-hda,id=sound0,bus=pci.2,addr=0x2 -device hda-duplex,id=sound0-codec0,bus=sound0.0,cad=0 -device vfio-pci,aer=on,host=07:00.0,id=hostdev0,bus=pci.3,multifunction=on,addr=0x1 -device vfio-pci,!
 aer=on,host=07:00.1,id=hostdev1,bus=pci.3,addr=0x1.0x1 -msg timestamp=on
> 
I had to move to a different system where I could actually inject an
aer error and created a config similar to above but with the 82576
ports downstream of the ioh3420 root port.  When I inject a malformed
TLP uncorrectable error, my RHEL7.2 guest does this:
[   35.995645] pcieport 0000:00:1c.0: AER: Multiple Uncorrected (Fatal) error received: id=0200
[   35.998483] igb 0000:02:00.0: PCIe Bus Error: severity=Uncorrected (Fatal), type=Unaccessible, id=0200(Unregistered Agent ID)
[   36.001965] igb 0000:02:00.0 enp2s0f0: PCIe link lost, device now detached
[   36.015092] igb 0000:02:00.1 enp2s0f1: PCIe link lost, device now detached
[   39.133185] igb 0000:02:00.0: enabling device (0000 -> 0002)
[   40.071245] igb 0000:02:00.1: enabling device (0000 -> 0002)
[   41.014451] BUG: unable to handle kernel paging request at 0000000000003818
[   41.015969] IP: [<ffffffffa02b438d>] igb_configure_tx_ring+0x14d/0x280 [igb]
[   41.017507] PGD 367e2067 PUD 7ae56067 PMD 0 
[   41.018497] Oops: 0002 [#1] SMP 
[   41.019242] Modules linked in: ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter snd_hda_codec_generic snd_hda_intel snd_hda_codec ppdev snd_hda_core snd_hwdep snd_seq snd_seq_device iTCO_wdt iTCO_vendor_support bochs_drm snd_pcm syscopyarea sysfillrect sysimgblt ttm virtio_balloon snd_timer snd igb drm_kms_helper soundcore ptp pps_core i2c_algo_bit i2c_i801 dca drm shpchp lpc_ich mfd_core pcspkr i2c_core parport_pc parport ip_tables xfs libcrc32c virtio_blk virtio_console virtio_net ahci libahci crc32c_intel serio_raw libata virtio_pci virtio_ring virtio dm_mirror dm_region_hash dm_log dm_mod
[   41.040590] CPU: 0 PID: 29 Comm: kworker/0:1 Not tainted 3.10.0-327.el7.x86_64 #1
[   41.042180] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014
[   41.044635] Workqueue: events aer_isr
[   41.045478] task: ffff880179435080 ti: ffff880179680000 task.ti: ffff880179680000
[   41.047097] RIP: 0010:[<ffffffffa02b438d>]  [<ffffffffa02b438d>] igb_configure_tx_ring+0x14d/0x280 [igb]
[   41.049151] RSP: 0018:ffff880179683bf8  EFLAGS: 00010246
[   41.050260] RAX: 0000000000003818 RBX: 0000000000000000 RCX: 0000000000003818
[   41.051747] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 00000000002896b3
[   41.053268] RBP: ffff880179683c20 R08: 0000000001010100 R09: 00000000ffffffe7
[   41.054730] R10: ffffea0001eb6100 R11: ffffffffa02afa31 R12: 0000000000000000
[   41.056201] R13: ffff880035dbc8c0 R14: ffff880175d03f80 R15: 000000017716e000
[   41.057673] FS:  0000000000000000(0000) GS:ffff88017fc00000(0000) knlGS:0000000000000000
[   41.059337] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[   41.060548] CR2: 0000000000003818 CR3: 0000000178331000 CR4: 00000000000006f0
[   41.062028] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   41.063534] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[   41.065025] Stack:
[   41.065473]  ffff880035dbc8c0 ffff880035dbce70 0000000000000001 ffff880035dbc8c8
[   41.067119]  ffff880035dbce70 ffff880179683c80 ffffffffa02b8a77 fefdf27269fb3cd8
[   41.068781]  2009f9ee3386436f eb9e4e66756bbfdd 34002f8114a5d65f 9535990856231c4b
[   41.094179] Call Trace:
[   41.118688]  [<ffffffffa02b8a77>] igb_configure+0x267/0x450 [igb]
[   41.144286]  [<ffffffffa02b94f1>] igb_up+0x21/0x1a0 [igb]
[   41.170606]  [<ffffffffa02b96a7>] igb_io_resume+0x37/0x70 [igb]
[   41.195846]  [<ffffffff813381e0>] ? pci_cleanup_aer_uncorrect_error_status+0x90/0x90
[   41.221767]  [<ffffffff81338228>] report_resume+0x48/0x60
[   41.246455]  [<ffffffff8131e359>] pci_walk_bus+0x79/0xa0
[   41.270722]  [<ffffffff813381e0>] ? pci_cleanup_aer_uncorrect_error_status+0x90/0x90
[   41.296747]  [<ffffffff813382f0>] broadcast_error_message+0xb0/0x100
[   41.321552]  [<ffffffff81338509>] do_recovery+0x1c9/0x280
[   41.345507]  [<ffffffff81338f58>] aer_isr+0x348/0x430
[   41.368851]  [<ffffffff8109d5fb>] process_one_work+0x17b/0x470
[   41.392157]  [<ffffffff8109e3cb>] worker_thread+0x11b/0x400
[   41.416852]  [<ffffffff8109e2b0>] ? rescuer_thread+0x400/0x400
[   41.441577]  [<ffffffff810a5aef>] kthread+0xcf/0xe0
[   41.465029]  [<ffffffff810a5a20>] ? kthread_create_on_node+0x140/0x140
[   41.488341]  [<ffffffff81645858>] ret_from_fork+0x58/0x90
[   41.511247]  [<ffffffff810a5a20>] ? kthread_create_on_node+0x140/0x140
[   41.535442] Code: c1 49 89 4e 30 49 8b 85 b8 05 00 00 48 85 c0 0f 84 39 01 00 00 81 c2 10 38 00 00 48 63 d2 48 01 d0 31 d2 89 10 49 8b 46 30 31 d2 <89> 10 41 8b 95 3c 06 00 00 b8 14 01 10 02 83 fa 05 74 0b 83 fa 
[   41.587718] RIP  [<ffffffffa02b438d>] igb_configure_tx_ring+0x14d/0x280 [igb]
[   41.610872]  RSP <ffff880179683bf8>
[   41.632301] CR2: 0000000000003818
And then it reboots.  So what RAS improvement have we bought ourselves
here?  What endpoints have you tested with this?  Which ones recovered
reliably?  Thanks,
Alex
^ permalink raw reply	[flat|nested] 25+ messages in thread 
- * Re: [Qemu-devel] [PATCH v9 05/11] vfio: add check host bus reset is support or not
  2016-09-01  2:12     ` Alex Williamson
@ 2016-09-22 14:04       ` Dou Liyang
  0 siblings, 0 replies; 25+ messages in thread
From: Dou Liyang @ 2016-09-22 14:04 UTC (permalink / raw)
  To: Alex Williamson, Zhou Jie; +Cc: Chen Fan, izumi.taku, fan.chen, qemu-devel, mst
Hi Alex,
At 09/01/2016 10:12 AM, Alex Williamson wrote:
[...]
>
> I had to move to a different system where I could actually inject an
> aer error and created a config similar to above but with the 82576
> ports downstream of the ioh3420 root port.  When I inject a malformed
> TLP uncorrectable error, my RHEL7.2 guest does this:
>
> [   35.995645] pcieport 0000:00:1c.0: AER: Multiple Uncorrected (Fatal) error received: id=0200
> [   35.998483] igb 0000:02:00.0: PCIe Bus Error: severity=Uncorrected (Fatal), type=Unaccessible, id=0200(Unregistered Agent ID)
> [   36.001965] igb 0000:02:00.0 enp2s0f0: PCIe link lost, device now detached
> [   36.015092] igb 0000:02:00.1 enp2s0f1: PCIe link lost, device now detached
> [   39.133185] igb 0000:02:00.0: enabling device (0000 -> 0002)
> [   40.071245] igb 0000:02:00.1: enabling device (0000 -> 0002)
> [   41.014451] BUG: unable to handle kernel paging request at 0000000000003818
> [   41.015969] IP: [<ffffffffa02b438d>] igb_configure_tx_ring+0x14d/0x280 [igb]
> [   41.017507] PGD 367e2067 PUD 7ae56067 PMD 0
> [   41.018497] Oops: 0002 [#1] SMP
> [   41.019242] Modules linked in: ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter snd_hda_codec_generic snd_hda_intel snd_hda_codec ppdev snd_hda_core snd_hwdep snd_seq snd_seq_device iTCO_wdt iTCO_vendor_support bochs_drm snd_pcm syscopyarea sysfillrect sysimgblt ttm virtio_balloon snd_timer snd igb drm_kms_helper soundcore ptp pps_core i2c_algo_bit i2c_i801 dca drm shpchp lpc_ich mfd_core pcspkr i2c_core parport_pc parport ip_tables xfs libcrc32c virtio_blk virtio_console virtio_net ahci libahci crc32c_intel serio_raw libata virtio_pci virtio_ring virtio dm_mirror dm_region_hash dm_log dm_mod
> [   41.040590] CPU: 0 PID: 29 Comm: kworker/0:1 Not tainted 3.10.0-327.el7.x86_64 #1
> [   41.042180] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014
> [   41.044635] Workqueue: events aer_isr
> [   41.045478] task: ffff880179435080 ti: ffff880179680000 task.ti: ffff880179680000
> [   41.047097] RIP: 0010:[<ffffffffa02b438d>]  [<ffffffffa02b438d>] igb_configure_tx_ring+0x14d/0x280 [igb]
> [   41.049151] RSP: 0018:ffff880179683bf8  EFLAGS: 00010246
> [   41.050260] RAX: 0000000000003818 RBX: 0000000000000000 RCX: 0000000000003818
> [   41.051747] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 00000000002896b3
> [   41.053268] RBP: ffff880179683c20 R08: 0000000001010100 R09: 00000000ffffffe7
> [   41.054730] R10: ffffea0001eb6100 R11: ffffffffa02afa31 R12: 0000000000000000
> [   41.056201] R13: ffff880035dbc8c0 R14: ffff880175d03f80 R15: 000000017716e000
> [   41.057673] FS:  0000000000000000(0000) GS:ffff88017fc00000(0000) knlGS:0000000000000000
> [   41.059337] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> [   41.060548] CR2: 0000000000003818 CR3: 0000000178331000 CR4: 00000000000006f0
> [   41.062028] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [   41.063534] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> [   41.065025] Stack:
> [   41.065473]  ffff880035dbc8c0 ffff880035dbce70 0000000000000001 ffff880035dbc8c8
> [   41.067119]  ffff880035dbce70 ffff880179683c80 ffffffffa02b8a77 fefdf27269fb3cd8
> [   41.068781]  2009f9ee3386436f eb9e4e66756bbfdd 34002f8114a5d65f 9535990856231c4b
> [   41.094179] Call Trace:
> [   41.118688]  [<ffffffffa02b8a77>] igb_configure+0x267/0x450 [igb]
> [   41.144286]  [<ffffffffa02b94f1>] igb_up+0x21/0x1a0 [igb]
> [   41.170606]  [<ffffffffa02b96a7>] igb_io_resume+0x37/0x70 [igb]
> [   41.195846]  [<ffffffff813381e0>] ? pci_cleanup_aer_uncorrect_error_status+0x90/0x90
> [   41.221767]  [<ffffffff81338228>] report_resume+0x48/0x60
> [   41.246455]  [<ffffffff8131e359>] pci_walk_bus+0x79/0xa0
> [   41.270722]  [<ffffffff813381e0>] ? pci_cleanup_aer_uncorrect_error_status+0x90/0x90
> [   41.296747]  [<ffffffff813382f0>] broadcast_error_message+0xb0/0x100
> [   41.321552]  [<ffffffff81338509>] do_recovery+0x1c9/0x280
> [   41.345507]  [<ffffffff81338f58>] aer_isr+0x348/0x430
> [   41.368851]  [<ffffffff8109d5fb>] process_one_work+0x17b/0x470
> [   41.392157]  [<ffffffff8109e3cb>] worker_thread+0x11b/0x400
> [   41.416852]  [<ffffffff8109e2b0>] ? rescuer_thread+0x400/0x400
> [   41.441577]  [<ffffffff810a5aef>] kthread+0xcf/0xe0
> [   41.465029]  [<ffffffff810a5a20>] ? kthread_create_on_node+0x140/0x140
> [   41.488341]  [<ffffffff81645858>] ret_from_fork+0x58/0x90
> [   41.511247]  [<ffffffff810a5a20>] ? kthread_create_on_node+0x140/0x140
> [   41.535442] Code: c1 49 89 4e 30 49 8b 85 b8 05 00 00 48 85 c0 0f 84 39 01 00 00 81 c2 10 38 00 00 48 63 d2 48 01 d0 31 d2 89 10 49 8b 46 30 31 d2 <89> 10 41 8b 95 3c 06 00 00 b8 14 01 10 02 83 fa 05 74 0b 83 fa
> [   41.587718] RIP  [<ffffffffa02b438d>] igb_configure_tx_ring+0x14d/0x280 [igb]
> [   41.610872]  RSP <ffff880179683bf8>
> [   41.632301] CR2: 0000000000003818
>
> And then it reboots.  So what RAS improvement have we bought ourselves
> here?  What endpoints have you tested with this?  Which ones recovered
> reliably?  Thanks,
I am working on it.
The endpoints I used to test are:
-device ioh3420,bus=pcie.0,addr=1c.0,port=1,id=bridge1,chassis=1 \
-device vfio-pci,host=06:00.1,bus=bridge1,addr=00.1,id=net1,aer=true \
-device 
vfio-pci,host=06:00.0,bus=bridge1,addr=00.0,id=net0,aer=true,multifunction=on 
\
When I tested them, sometimes, my guest even be kernel panic.
And I found that the bug was related with the states of the devices.
when the enp1s0f1 was not up, which just like below:
ifconfig -a
...
enp1s0f1: flags=4098<BROADCAST,MULTICAST>  mtu 1500
         ether 00:1b:21:67:3b:bd  txqueuelen 1000  (Ethernet)
         RX packets 0  bytes 0 (0.0 B)
         RX errors 0  dropped 0  overruns 0  frame 0
         TX packets 0  bytes 0 (0.0 B)
         TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
         device memory 0xfe420000-fe43ffff
...
Then, I injected fatal/nonfatal error, it could recovered reliably.
But, when I executed this command to make the network up:
ifconfig enp1s0f1 up
dmesg:
...
[   34.109886] IPv6: ADDRCONF(NETDEV_UP): enp1s0f1: link is not ready
[   36.232118] igb 0000:01:00.1 enp1s0f1: igb: enp1s0f1 NIC Link is Up 
1000 Mbps Full Duplex, Flow Control: RX/TX
[   36.232397] IPv6: ADDRCONF(NETDEV_CHANGE): enp1s0f1: link becomes ready
...
ifconfig:
...
enp1s0f1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
...
Then, I did the tests, I got the kernel panic:
        [  910.702002] ---[ end trace 3cd6a977a579a0bd ]---
        [  910.702002] Kernel panic - not syncing: Fatal exception
        [  910.702002] Kernel Offset: 0x0 from 0xffffffff81000000 
(relocation range: 0xf
        fffffff80000000-0xffffffff9fffffff) 
        [  910.702002] ---[ end Kernel panic - not syncing: Fatal exception
----------------------
The bug happened in Guest with "PCIe link lost, device now detached",
but both Host and QEMU can reset the devices well.
At First, I guessed it might be a igb bug[1] and did many jobs on it.
But, it don't work.
Currently, I guess if I should make the devices non-working before QEMU
reset the devices ?
OR
I guess if I need to investigate the bug in igb driver ?
Now, I have no idea about it, Could you give me some advice?
[1]:http://www.gossamer-threads.com/lists/linux/kernel/2274363
Thanks,
Dou
^ permalink raw reply	[flat|nested] 25+ messages in thread
 
- * Re: [Qemu-devel] [PATCH v9 05/11] vfio: add check host bus reset is support or not
  2016-08-31 19:56   ` Alex Williamson
  2016-09-01  2:12     ` Alex Williamson
@ 2016-09-22  8:34     ` Dou Liyang
  2016-09-22 14:03       ` Alex Williamson
  1 sibling, 1 reply; 25+ messages in thread
From: Dou Liyang @ 2016-09-22  8:34 UTC (permalink / raw)
  To: Alex Williamson, Zhou Jie; +Cc: Chen Fan, izumi.taku, fan.chen, qemu-devel, mst
Hi Alex,
At 09/01/2016 03:56 AM, Alex Williamson wrote:
> On Tue, 19 Jul 2016 15:38:23 +0800
> Zhou Jie <zhoujie2011@cn.fujitsu.com> wrote:
>
>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>
>> When assigning a vfio device with AER enabled, we must check whether
>> the device supports a host bus reset (ie. hot reset) as this may be
>> used by the guest OS in order to recover the device from an AER
>> error.  QEMU must therefore have the ability to perform a physical
>> host bus reset using the existing vfio APIs in response to a virtual
>> bus reset in the VM.  A physical bus reset affects all of the devices
>> on the host bus, therefore we place a few simplifying configuration
>> restriction on the VM:
>>
>>  - All physical devices affected by a bus reset must be assigned to
>>    the VM with AER enabled on each and be configured on the same
>>    virtual bus in the VM.
>>
>>  - No devices unaffected by the bus reset, be they physical, emulated,
>>    or paravirtual may be configured on the same virtual bus as a
>>    device supporting AER signaling through vfio.
>>
>> In other words users wishing to enable AER on a multifunction device
>> need to assign all functions of the device to the same virtual bus
>> and enable AER support for each device.  The easiest way to
>> accomplish this is to identity map the physical functions to virtual
>> functions with multifunction enabled on the virtual device.
>
> Why am I able to start the following VM with aer=on for the vfio-pci
> devices?
In my opinion, because, when we check out this situation in vfio_init,
our code just do nothing except going to out_teardown. so the devices 
which are behind a PCIe-to-PCI bridge will be regarded as the PCI devices.
>
> # lspci -tv
> -[0000:00]-+-00.0  Intel Corporation 82G33/G31/P35/P31 Express DRAM Controller
>            +-01.0  Device 1234:1111
>            +-1c.0-[01]--
>            +-1d.0-[02]--+-01.0  Intel Corporation 82576 Gigabit Network Connection
>            |            \-01.1  Intel Corporation 82576 Gigabit Network Connection
>            ...
>
> # lspci -vvv -s 1d.0
> 00:1d.0 PCI bridge: Red Hat, Inc. QEMU PCI-PCI bridge (prog-if 00 [Normal decode])
>
> The devices are behind a PCIe-to-PCI bridge, so shouldn't specifying
> aer=on for the vfio-pci devices cause a configuration error?
Yes, I think it may makes users confuse.
I added a error_report for it.
Then, I test many passible combinations, such as:
pci-bridge
     |- vfio-pci
-device pci-bridge,chassis_nr=3,id=pci.3,bus=pcie.0,addr=0x1d \
-device vfio-pci,aer=on,host=06:00.0,id=hostdev0,bus=pci.3,addr=1 \
error:
vfio: Unable to enable AER for device 0000:06:00.0, device does not 
support AER signaling
--------------------------------------------------------------------
pci-bridge
     |- vfio-pci
     |- vfio-pci
-device pci-bridge,chassis_nr=3,id=pci.3,bus=pcie.0,addr=0x1d \
-device vfio-pci,aer=on,host=06:00.0,id=hostdev0,bus=pci.3,addr=1 \
-device vfio-pci,aer=on,host=06:00.1,id=hostdev1,bus=pci.3,addr=1.1 \
error:
vfio: Unable to enable AER for device 0000:06:00.0, device does not 
support AER signaling
--------------------------------------------------------------------
ioh3420
   |- pci-bridge
       |- vfio-pci
       |- vfio-pci
-device ioh3420,bus=pcie.0,addr=1c.0,port=1,id=bridge1,chassis=1 \
-device pci-bridge,chassis_nr=3,id=pci.1,bus=bridge1,addr=0x1d \
-device vfio-pci,aer=on,host=06:00.0,id=hostdev0,bus=pci.1,addr=1 \
-device vfio-pci,aer=on,host=06:00.1,id=hostdev1,bus=pci.1,addr=1.1 \
error:
vfio: Unable to enable AER for device 0000:06:00.0, device does not 
support AER signaling
--------------------------------------------------------------------
just for test:
pci-bridge
   |- ioh3420
       |- vfio-pci
       |- vfio-pci
-device pci-bridge,chassis_nr=3,id=pci.1,bus=pcie.0,addr=0x1d \
-device ioh3420,bus=pci.1,addr=1c.0,port=1,id=bridge1,chassis=1 \
-device vfio-pci,aer=on,host=06:00.0,id=hostdev0,bus=bridge1,addr=1 \
-device vfio-pci,aer=on,host=06:00.1,id=hostdev1,bus=bridge1,addr=1.1 \
error:
vfio: Unable to enable AER for device 0000:06:00.0, parent bus does not 
support AER signaling
--------------------------------------------------------------------
ioh3420
    |- vfio-pci
    |- vfio-pci
-device ioh3420,bus=pcie.0,addr=1c.0,port=1,id=bridge1,chassis=1 \
-device vfio-pci,aer=on,host=06:00.0,id=hostdev0,bus=bridge1,addr=1 \
-device vfio-pci,aer=on,host=06:00.1,id=hostdev1,bus=bridge1,addr=1.1 \
OK
--------------------------------------------------------------------
...
Thanks,
Dou
>
[...]
>
^ permalink raw reply	[flat|nested] 25+ messages in thread
- * Re: [Qemu-devel] [PATCH v9 05/11] vfio: add check host bus reset is support or not
  2016-09-22  8:34     ` Dou Liyang
@ 2016-09-22 14:03       ` Alex Williamson
  2016-09-22 14:27         ` Dou Liyang
  0 siblings, 1 reply; 25+ messages in thread
From: Alex Williamson @ 2016-09-22 14:03 UTC (permalink / raw)
  To: Dou Liyang; +Cc: Zhou Jie, Chen Fan, izumi.taku, fan.chen, qemu-devel, mst
On Thu, 22 Sep 2016 16:34:32 +0800
Dou Liyang <douly.fnst@cn.fujitsu.com> wrote:
> Hi Alex,
> 
> At 09/01/2016 03:56 AM, Alex Williamson wrote:
> > On Tue, 19 Jul 2016 15:38:23 +0800
> > Zhou Jie <zhoujie2011@cn.fujitsu.com> wrote:
> >  
> >> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> >>
> >> When assigning a vfio device with AER enabled, we must check whether
> >> the device supports a host bus reset (ie. hot reset) as this may be
> >> used by the guest OS in order to recover the device from an AER
> >> error.  QEMU must therefore have the ability to perform a physical
> >> host bus reset using the existing vfio APIs in response to a virtual
> >> bus reset in the VM.  A physical bus reset affects all of the devices
> >> on the host bus, therefore we place a few simplifying configuration
> >> restriction on the VM:
> >>
> >>  - All physical devices affected by a bus reset must be assigned to
> >>    the VM with AER enabled on each and be configured on the same
> >>    virtual bus in the VM.
> >>
> >>  - No devices unaffected by the bus reset, be they physical, emulated,
> >>    or paravirtual may be configured on the same virtual bus as a
> >>    device supporting AER signaling through vfio.
> >>
> >> In other words users wishing to enable AER on a multifunction device
> >> need to assign all functions of the device to the same virtual bus
> >> and enable AER support for each device.  The easiest way to
> >> accomplish this is to identity map the physical functions to virtual
> >> functions with multifunction enabled on the virtual device.  
> >
> > Why am I able to start the following VM with aer=on for the vfio-pci
> > devices?  
> 
> In my opinion, because, when we check out this situation in vfio_init,
> our code just do nothing except going to out_teardown. so the devices 
> which are behind a PCIe-to-PCI bridge will be regarded as the PCI devices.
> 
> >
> > # lspci -tv
> > -[0000:00]-+-00.0  Intel Corporation 82G33/G31/P35/P31 Express DRAM Controller
> >            +-01.0  Device 1234:1111
> >            +-1c.0-[01]--
> >            +-1d.0-[02]--+-01.0  Intel Corporation 82576 Gigabit Network Connection
> >            |            \-01.1  Intel Corporation 82576 Gigabit Network Connection
> >            ...
> >
> > # lspci -vvv -s 1d.0
> > 00:1d.0 PCI bridge: Red Hat, Inc. QEMU PCI-PCI bridge (prog-if 00 [Normal decode])
> >
> > The devices are behind a PCIe-to-PCI bridge, so shouldn't specifying
> > aer=on for the vfio-pci devices cause a configuration error?  
> 
> Yes, I think it may makes users confuse.
> 
> I added a error_report for it.
I think we need more than an error_report, our strategy to date has
been that if the user specifies aer=on and we cannot support AER on the
device then we fail the initialization of the device.  Eric's patches
to convert vfio to realize and overhaul the error handling might make
this easier to accomplish.  Thanks,
Alex
> 
> Then, I test many passible combinations, such as:
> 
> pci-bridge
>      |- vfio-pci
> 
> -device pci-bridge,chassis_nr=3,id=pci.3,bus=pcie.0,addr=0x1d \
> -device vfio-pci,aer=on,host=06:00.0,id=hostdev0,bus=pci.3,addr=1 \
> 
> error:
> vfio: Unable to enable AER for device 0000:06:00.0, device does not 
> support AER signaling
> 
> --------------------------------------------------------------------
> 
> pci-bridge
>      |- vfio-pci
>      |- vfio-pci
> 
> -device pci-bridge,chassis_nr=3,id=pci.3,bus=pcie.0,addr=0x1d \
> -device vfio-pci,aer=on,host=06:00.0,id=hostdev0,bus=pci.3,addr=1 \
> -device vfio-pci,aer=on,host=06:00.1,id=hostdev1,bus=pci.3,addr=1.1 \
> 
> error:
> vfio: Unable to enable AER for device 0000:06:00.0, device does not 
> support AER signaling
> 
> --------------------------------------------------------------------
> ioh3420
>    |- pci-bridge
>        |- vfio-pci
>        |- vfio-pci
> 
> -device ioh3420,bus=pcie.0,addr=1c.0,port=1,id=bridge1,chassis=1 \
> -device pci-bridge,chassis_nr=3,id=pci.1,bus=bridge1,addr=0x1d \
> -device vfio-pci,aer=on,host=06:00.0,id=hostdev0,bus=pci.1,addr=1 \
> -device vfio-pci,aer=on,host=06:00.1,id=hostdev1,bus=pci.1,addr=1.1 \
> 
> error:
> vfio: Unable to enable AER for device 0000:06:00.0, device does not 
> support AER signaling
> 
> --------------------------------------------------------------------
> just for test:
> pci-bridge
>    |- ioh3420
>        |- vfio-pci
>        |- vfio-pci
> -device pci-bridge,chassis_nr=3,id=pci.1,bus=pcie.0,addr=0x1d \
> -device ioh3420,bus=pci.1,addr=1c.0,port=1,id=bridge1,chassis=1 \
> -device vfio-pci,aer=on,host=06:00.0,id=hostdev0,bus=bridge1,addr=1 \
> -device vfio-pci,aer=on,host=06:00.1,id=hostdev1,bus=bridge1,addr=1.1 \
> 
> error:
> vfio: Unable to enable AER for device 0000:06:00.0, parent bus does not 
> support AER signaling
> 
> --------------------------------------------------------------------
> ioh3420
>     |- vfio-pci
>     |- vfio-pci
> 
> -device ioh3420,bus=pcie.0,addr=1c.0,port=1,id=bridge1,chassis=1 \
> -device vfio-pci,aer=on,host=06:00.0,id=hostdev0,bus=bridge1,addr=1 \
> -device vfio-pci,aer=on,host=06:00.1,id=hostdev1,bus=bridge1,addr=1.1 \
> 
> OK
> --------------------------------------------------------------------
> ...
> 
> Thanks,
> Dou
> 
> >  
> 
> [...]
> 
> 
> >  
> 
> 
> 
> 
^ permalink raw reply	[flat|nested] 25+ messages in thread 
- * Re: [Qemu-devel] [PATCH v9 05/11] vfio: add check host bus reset is support or not
  2016-09-22 14:03       ` Alex Williamson
@ 2016-09-22 14:27         ` Dou Liyang
  0 siblings, 0 replies; 25+ messages in thread
From: Dou Liyang @ 2016-09-22 14:27 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Zhou Jie, Chen Fan, izumi.taku, fan.chen, qemu-devel, mst
Hi Alex,
At 09/22/2016 10:03 PM, Alex Williamson wrote:
> On Thu, 22 Sep 2016 16:34:32 +0800
> Dou Liyang <douly.fnst@cn.fujitsu.com> wrote:
>
>> Hi Alex,
>>
>> At 09/01/2016 03:56 AM, Alex Williamson wrote:
>>> On Tue, 19 Jul 2016 15:38:23 +0800
>>> Zhou Jie <zhoujie2011@cn.fujitsu.com> wrote:
>>>
>>>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>>>
>>>> When assigning a vfio device with AER enabled, we must check whether
>>>> the device supports a host bus reset (ie. hot reset) as this may be
>>>> used by the guest OS in order to recover the device from an AER
>>>> error.  QEMU must therefore have the ability to perform a physical
>>>> host bus reset using the existing vfio APIs in response to a virtual
>>>> bus reset in the VM.  A physical bus reset affects all of the devices
>>>> on the host bus, therefore we place a few simplifying configuration
>>>> restriction on the VM:
>>>>
>>>>  - All physical devices affected by a bus reset must be assigned to
>>>>    the VM with AER enabled on each and be configured on the same
>>>>    virtual bus in the VM.
>>>>
>>>>  - No devices unaffected by the bus reset, be they physical, emulated,
>>>>    or paravirtual may be configured on the same virtual bus as a
>>>>    device supporting AER signaling through vfio.
>>>>
>>>> In other words users wishing to enable AER on a multifunction device
>>>> need to assign all functions of the device to the same virtual bus
>>>> and enable AER support for each device.  The easiest way to
>>>> accomplish this is to identity map the physical functions to virtual
>>>> functions with multifunction enabled on the virtual device.
>>>
>>> Why am I able to start the following VM with aer=on for the vfio-pci
>>> devices?
>>
>> In my opinion, because, when we check out this situation in vfio_init,
>> our code just do nothing except going to out_teardown. so the devices
>> which are behind a PCIe-to-PCI bridge will be regarded as the PCI devices.
>>
>>>
>>> # lspci -tv
>>> -[0000:00]-+-00.0  Intel Corporation 82G33/G31/P35/P31 Express DRAM Controller
>>>            +-01.0  Device 1234:1111
>>>            +-1c.0-[01]--
>>>            +-1d.0-[02]--+-01.0  Intel Corporation 82576 Gigabit Network Connection
>>>            |            \-01.1  Intel Corporation 82576 Gigabit Network Connection
>>>            ...
>>>
>>> # lspci -vvv -s 1d.0
>>> 00:1d.0 PCI bridge: Red Hat, Inc. QEMU PCI-PCI bridge (prog-if 00 [Normal decode])
>>>
>>> The devices are behind a PCIe-to-PCI bridge, so shouldn't specifying
>>> aer=on for the vfio-pci devices cause a configuration error?
>>
>> Yes, I think it may makes users confuse.
>>
>> I added a error_report for it.
>
> I think we need more than an error_report, our strategy to date has
> been that if the user specifies aer=on and we cannot support AER on the
> device then we fail the initialization of the device.
Oh, I am sorry, I did not say clearly. I also made the init failure by
"return -EINVAL".
  Eric's patches
> to convert vfio to realize and overhaul the error handling might make
> this easier to accomplish.  Thanks,
Yes, I saw  Eric's patches too.
I will follow Eric's way.
Thanks very much.
Dou
>
> Alex
>
>>
>> Then, I test many passible combinations, such as:
>>
>> pci-bridge
>>      |- vfio-pci
>>
>> -device pci-bridge,chassis_nr=3,id=pci.3,bus=pcie.0,addr=0x1d \
>> -device vfio-pci,aer=on,host=06:00.0,id=hostdev0,bus=pci.3,addr=1 \
>>
>> error:
>> vfio: Unable to enable AER for device 0000:06:00.0, device does not
>> support AER signaling
>>
>> --------------------------------------------------------------------
>>
>> pci-bridge
>>      |- vfio-pci
>>      |- vfio-pci
>>
>> -device pci-bridge,chassis_nr=3,id=pci.3,bus=pcie.0,addr=0x1d \
>> -device vfio-pci,aer=on,host=06:00.0,id=hostdev0,bus=pci.3,addr=1 \
>> -device vfio-pci,aer=on,host=06:00.1,id=hostdev1,bus=pci.3,addr=1.1 \
>>
>> error:
>> vfio: Unable to enable AER for device 0000:06:00.0, device does not
>> support AER signaling
>>
>> --------------------------------------------------------------------
>> ioh3420
>>    |- pci-bridge
>>        |- vfio-pci
>>        |- vfio-pci
>>
>> -device ioh3420,bus=pcie.0,addr=1c.0,port=1,id=bridge1,chassis=1 \
>> -device pci-bridge,chassis_nr=3,id=pci.1,bus=bridge1,addr=0x1d \
>> -device vfio-pci,aer=on,host=06:00.0,id=hostdev0,bus=pci.1,addr=1 \
>> -device vfio-pci,aer=on,host=06:00.1,id=hostdev1,bus=pci.1,addr=1.1 \
>>
>> error:
>> vfio: Unable to enable AER for device 0000:06:00.0, device does not
>> support AER signaling
>>
>> --------------------------------------------------------------------
>> just for test:
>> pci-bridge
>>    |- ioh3420
>>        |- vfio-pci
>>        |- vfio-pci
>> -device pci-bridge,chassis_nr=3,id=pci.1,bus=pcie.0,addr=0x1d \
>> -device ioh3420,bus=pci.1,addr=1c.0,port=1,id=bridge1,chassis=1 \
>> -device vfio-pci,aer=on,host=06:00.0,id=hostdev0,bus=bridge1,addr=1 \
>> -device vfio-pci,aer=on,host=06:00.1,id=hostdev1,bus=bridge1,addr=1.1 \
>>
>> error:
>> vfio: Unable to enable AER for device 0000:06:00.0, parent bus does not
>> support AER signaling
>>
>> --------------------------------------------------------------------
>> ioh3420
>>     |- vfio-pci
>>     |- vfio-pci
>>
>> -device ioh3420,bus=pcie.0,addr=1c.0,port=1,id=bridge1,chassis=1 \
>> -device vfio-pci,aer=on,host=06:00.0,id=hostdev0,bus=bridge1,addr=1 \
>> -device vfio-pci,aer=on,host=06:00.1,id=hostdev1,bus=bridge1,addr=1.1 \
>>
>> OK
>> --------------------------------------------------------------------
>> ...
>>
>> Thanks,
>> Dou
>>
>>>
>>
>> [...]
>>
>>
>>>
>>
>>
>>
>>
>
>
>
^ permalink raw reply	[flat|nested] 25+ messages in thread 
 
 
 
 
- * [Qemu-devel] [PATCH v9 06/11] pci: add a pci_function_is_valid callback to check function if valid
  2016-07-19  7:38 [Qemu-devel] [PATCH v9 00/11] vfio-pci: pass the aer error to guest Zhou Jie
                   ` (4 preceding siblings ...)
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 05/11] vfio: add check host bus reset is support or not Zhou Jie
@ 2016-07-19  7:38 ` Zhou Jie
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 07/11] vfio: add check aer functionality for hotplug device Zhou Jie
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 25+ messages in thread
From: Zhou Jie @ 2016-07-19  7:38 UTC (permalink / raw)
  To: alex.williamson; +Cc: qemu-devel, izumi.taku, mst, fan.chen, Chen Fan
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
PCI hotplug requires that function 0 is added last to close the
slot.  Since vfio supporting AER, we require that the VM bus
contains the same set of devices as the host bus to support AER,
we can perform an AER validation test whenever a function 0 in
the VM is hot-added.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/pci/pci.c         | 32 ++++++++++++++++++++++++++++++++
 include/hw/pci/pci.h |  1 +
 2 files changed, 33 insertions(+)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 149994b..b292b42 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1940,6 +1940,22 @@ PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn)
     return bus->devices[devfn];
 }
 
+static void pci_function_is_valid(PCIBus *bus, PCIDevice *d, void *opaque)
+{
+    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(d);
+    Error **errp = opaque;
+
+    if (*errp) {
+        return;
+    }
+
+    if (!pc->is_valid_func) {
+        return;
+    }
+
+    pc->is_valid_func(d, errp);
+}
+
 static void pci_qdev_realize(DeviceState *qdev, Error **errp)
 {
     PCIDevice *pci_dev = (PCIDevice *)qdev;
@@ -1982,6 +1998,22 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
         pci_qdev_unrealize(DEVICE(pci_dev), NULL);
         return;
     }
+
+    /*
+     *  If the function number is 0, indicate the closure of the slot.
+     *  then we get the chance to check all functions on same device
+     *  if valid.
+     */
+    if (DEVICE(pci_dev)->hotplugged &&
+        pci_get_function_0(pci_dev) == pci_dev) {
+        pci_for_each_device(bus, pci_bus_num(bus),
+                            pci_function_is_valid, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            pci_qdev_unrealize(DEVICE(pci_dev), NULL);
+            return;
+        }
+    }
 }
 
 static void pci_default_realize(PCIDevice *dev, Error **errp)
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 9ed1624..68b6848 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -198,6 +198,7 @@ typedef struct PCIDeviceClass {
 
     void (*realize)(PCIDevice *dev, Error **errp);
     int (*init)(PCIDevice *dev);/* TODO convert to realize() and remove */
+    void (*is_valid_func)(PCIDevice *dev, Error **errp);
     PCIUnregisterFunc *exit;
     PCIConfigReadFunc *config_read;
     PCIConfigWriteFunc *config_write;
-- 
1.8.3.1
^ permalink raw reply related	[flat|nested] 25+ messages in thread
- * [Qemu-devel] [PATCH v9 07/11] vfio: add check aer functionality for hotplug device
  2016-07-19  7:38 [Qemu-devel] [PATCH v9 00/11] vfio-pci: pass the aer error to guest Zhou Jie
                   ` (5 preceding siblings ...)
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 06/11] pci: add a pci_function_is_valid callback to check function if valid Zhou Jie
@ 2016-07-19  7:38 ` Zhou Jie
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 08/11] vfio: vote the function 0 to do host bus reset when aer occurred Zhou Jie
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 25+ messages in thread
From: Zhou Jie @ 2016-07-19  7:38 UTC (permalink / raw)
  To: alex.williamson; +Cc: qemu-devel, izumi.taku, mst, fan.chen, Chen Fan
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
when function 0 is hot-added, we can check the vfio device
whether support hot bus reset.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 242c1e4..8bcb26b 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3149,6 +3149,19 @@ post_reset:
     vfio_pci_post_reset(vdev);
 }
 
+static void vfio_pci_is_valid(PCIDevice *dev, Error **errp)
+{
+    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, dev);
+    Error *local_err = NULL;
+
+    if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
+        vfio_check_hot_bus_reset(vdev, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+        }
+    }
+}
+
 static void vfio_instance_init(Object *obj)
 {
     PCIDevice *pci_dev = PCI_DEVICE(obj);
@@ -3206,6 +3219,7 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
     pdc->init = vfio_initfn;
     pdc->exit = vfio_exitfn;
+    pdc->is_valid_func = vfio_pci_is_valid;
     pdc->config_read = vfio_pci_read_config;
     pdc->config_write = vfio_pci_write_config;
     pdc->is_express = 1; /* We might be */
-- 
1.8.3.1
^ permalink raw reply related	[flat|nested] 25+ messages in thread
- * [Qemu-devel] [PATCH v9 08/11] vfio: vote the function 0 to do host bus reset when aer occurred
  2016-07-19  7:38 [Qemu-devel] [PATCH v9 00/11] vfio-pci: pass the aer error to guest Zhou Jie
                   ` (6 preceding siblings ...)
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 07/11] vfio: add check aer functionality for hotplug device Zhou Jie
@ 2016-07-19  7:38 ` Zhou Jie
  2016-10-09 13:07   ` Cao jin
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 09/11] vfio-pci: pass the aer error to guest Zhou Jie
                   ` (2 subsequent siblings)
  10 siblings, 1 reply; 25+ messages in thread
From: Zhou Jie @ 2016-07-19  7:38 UTC (permalink / raw)
  To: alex.williamson; +Cc: qemu-devel, izumi.taku, mst, fan.chen, Chen Fan
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Due to all devices assigned to VM on the same way as host if enable
aer, so we can easily do the hot reset by selecting the function #0
to do the hot reset.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 14 ++++++++++++++
 hw/vfio/pci.h |  1 +
 2 files changed, 15 insertions(+)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 8bcb26b..0521652 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1924,6 +1924,8 @@ static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
     /* List all affected devices by bus reset */
     devices = &info->devices[0];
 
+    vdev->single_depend_dev = (info->count == 1);
+
     /* Verify that we have all the groups required */
     for (i = 0; i < info->count; i++) {
         PCIHostDeviceAddress host;
@@ -3120,6 +3122,18 @@ static void vfio_pci_reset(DeviceState *dev)
 
     trace_vfio_pci_reset(vdev->vbasedev.name);
 
+    if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
+        PCIDevice *br = pci_bridge_get_device(pdev->bus);
+
+        if ((pci_get_word(br->config + PCI_BRIDGE_CONTROL) &
+             PCI_BRIDGE_CTL_BUS_RESET)) {
+            if (pci_get_function_0(pdev) == pdev) {
+                vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
+            }
+            return;
+        }
+    }
+
     vfio_pci_pre_reset(vdev);
 
     if (vdev->resetfn && !vdev->resetfn(vdev)) {
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index e2faffb..ed14322 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -147,6 +147,7 @@ typedef struct VFIOPCIDevice {
     bool no_kvm_intx;
     bool no_kvm_msi;
     bool no_kvm_msix;
+    bool single_depend_dev;
 } VFIOPCIDevice;
 
 uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
-- 
1.8.3.1
^ permalink raw reply related	[flat|nested] 25+ messages in thread
- * Re: [Qemu-devel] [PATCH v9 08/11] vfio: vote the function 0 to do host bus reset when aer occurred
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 08/11] vfio: vote the function 0 to do host bus reset when aer occurred Zhou Jie
@ 2016-10-09 13:07   ` Cao jin
  0 siblings, 0 replies; 25+ messages in thread
From: Cao jin @ 2016-10-09 13:07 UTC (permalink / raw)
  To: fan.chen; +Cc: qemu-devel, mst
帆,昨天电话里面好像还有个点没清楚,这个点也在这个patch里,不知道我理解 
的对不对,有空的时候请帮斧正下:
(已经理解,对于uncorrectable error, driver里是会做reset link的,也就是 
设置pci bridge的secondary bus reset的那个bit)
On 07/19/2016 03:38 PM, Zhou Jie wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>
> Due to all devices assigned to VM on the same way as host if enable
> aer, so we can easily do the hot reset by selecting the function #0
> to do the hot reset.
>
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>   hw/vfio/pci.c | 14 ++++++++++++++
>   hw/vfio/pci.h |  1 +
>   2 files changed, 15 insertions(+)
>
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 8bcb26b..0521652 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1924,6 +1924,8 @@ static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
>       /* List all affected devices by bus reset */
>       devices = &info->devices[0];
>
> +    vdev->single_depend_dev = (info->count == 1);
> +
info->count == 1 表示hot reset时候不会影响别的设备(1是该设备自己),那就 
是说,被 passthrough 的 function 所在的物理 bus 下,有且只有这一个 
function (也就是非multi-function设备的意思吗?)
>       /* Verify that we have all the groups required */
>       for (i = 0; i < info->count; i++) {
>           PCIHostDeviceAddress host;
> @@ -3120,6 +3122,18 @@ static void vfio_pci_reset(DeviceState *dev)
>
>       trace_vfio_pci_reset(vdev->vbasedev.name);
>
> +    if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
> +        PCIDevice *br = pci_bridge_get_device(pdev->bus);
> +
> +        if ((pci_get_word(br->config + PCI_BRIDGE_CONTROL) &
> +             PCI_BRIDGE_CTL_BUS_RESET)) {
> +            if (pci_get_function_0(pdev) == pdev) {
> +                vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> +            }
> +            return;
> +        }
> +    }
> +
有点不明白为什么需要这段,或者是想问:对于两个vfio passthrough的 
function,不同function发生aer时候,处理流程是不一样的,为啥呢?
-- 
Yours Sincerely,
Cao jin
^ permalink raw reply	[flat|nested] 25+ messages in thread
 
- * [Qemu-devel] [PATCH v9 09/11] vfio-pci: pass the aer error to guest
  2016-07-19  7:38 [Qemu-devel] [PATCH v9 00/11] vfio-pci: pass the aer error to guest Zhou Jie
                   ` (7 preceding siblings ...)
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 08/11] vfio: vote the function 0 to do host bus reset when aer occurred Zhou Jie
@ 2016-07-19  7:38 ` Zhou Jie
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 10/11] vfio: Add waiting for host aer error progress Zhou Jie
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 11/11] vfio: add 'aer' property to expose aercap Zhou Jie
  10 siblings, 0 replies; 25+ messages in thread
From: Zhou Jie @ 2016-07-19  7:38 UTC (permalink / raw)
  To: alex.williamson; +Cc: qemu-devel, izumi.taku, mst, fan.chen, Chen Fan
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
when the vfio device encounters an uncorrectable error in host,
the vfio_pci driver will signal the eventfd registered by this
vfio device, resulting in the qemu eventfd handler getting
invoked.
this patch is to pass the error to guest and let the guest driver
recover from the error.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 54 insertions(+), 6 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 0521652..0e42786 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2627,18 +2627,66 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
 static void vfio_err_notifier_handler(void *opaque)
 {
     VFIOPCIDevice *vdev = opaque;
+    PCIDevice *dev = &vdev->pdev;
+    Error *local_err = NULL;
+    PCIEAERMsg msg = {
+        .severity = 0,
+        .source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn,
+    };
 
     if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
         return;
     }
 
+
+    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
+        goto stop;
+    }
+
+    /*
+     * in case the real hardware configuration has been changed,
+     * here we should recheck the bus reset capability.
+     */
+    vfio_check_hot_bus_reset(vdev, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        goto stop;
+    }
+
+    /*
+     * we should read the error details from the real hardware
+     * configuration spaces, here we only need to do is signaling
+     * to guest an uncorrectable error has occurred.
+     */
+    if (dev->exp.aer_cap) {
+        uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
+        uint32_t uncor_status;
+        bool isfatal;
+
+        uncor_status = vfio_pci_read_config(dev,
+                           dev->exp.aer_cap + PCI_ERR_UNCOR_STATUS, 4);
+
+        /*
+         * if the error is not emitted by this device, we can
+         * just ignore it.
+         */
+        if (!(uncor_status & ~0UL)) {
+            return;
+        }
+
+        isfatal = uncor_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
+
+        msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
+                                 PCI_ERR_ROOT_CMD_NONFATAL_EN;
+
+        pcie_aer_msg(dev, &msg);
+        return;
+    }
+
+stop:
     /*
-     * TBD. Retrieve the error details and decide what action
-     * needs to be taken. One of the actions could be to pass
-     * the error to the guest and have the guest driver recover
-     * from the error. This requires that PCIe capabilities be
-     * exposed to the guest. For now, we just terminate the
-     * guest to contain the error.
+     * If the aer capability is not exposed to the guest. we just
+     * terminate the guest to contain the error.
      */
 
     error_report("%s(%s) Unrecoverable error detected. Please collect any data possible and then kill the guest", __func__, vdev->vbasedev.name);
-- 
1.8.3.1
^ permalink raw reply related	[flat|nested] 25+ messages in thread
- * [Qemu-devel] [PATCH v9 10/11] vfio: Add waiting for host aer error progress
  2016-07-19  7:38 [Qemu-devel] [PATCH v9 00/11] vfio-pci: pass the aer error to guest Zhou Jie
                   ` (8 preceding siblings ...)
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 09/11] vfio-pci: pass the aer error to guest Zhou Jie
@ 2016-07-19  7:38 ` Zhou Jie
  2016-08-31 20:13   ` Alex Williamson
  2016-10-24  7:56   ` Cao jin
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 11/11] vfio: add 'aer' property to expose aercap Zhou Jie
  10 siblings, 2 replies; 25+ messages in thread
From: Zhou Jie @ 2016-07-19  7:38 UTC (permalink / raw)
  To: alex.williamson; +Cc: qemu-devel, izumi.taku, mst, fan.chen, Chen Fan, Zhou Jie
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
For supporting aer recovery, host and guest would run the same aer
recovery code, that would do the secondary bus reset if the error
is fatal, the aer recovery process:
  1. error_detected
  2. reset_link (if fatal)
  3. slot_reset/mmio_enabled
  4. resume
It indicates that host will do secondary bus reset to reset
the physical devices under bus in step 2, that would cause
devices in D3 status in a short time. But in qemu, we register
an error detected handler, that would be invoked as host broadcasts
the error-detected event in step 1, in order to avoid guest do
reset_link when host do reset_link simultaneously. it may cause
fatal error. we poll the vfio_device_info to assure host reset
completely.
In qemu, the aer recovery process:
  1. Detect support for aer error progress
     If host vfio driver does not support for aer error progress,
     directly fail to boot up VM as with aer enabled.
  2. Immediately notify the VM on error detected.
  3. Wait for host aer error progress
     Poll the vfio_device_info, If it is still in aer error progress after
     some timeout, we would abort the guest directed bus reset
     altogether and unplug of the device to prevent it from further
     interacting with the VM.
  4. Reset bus.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Signed-off-by: Zhou Jie <zhoujie2011@cn.fujitsu.com>
---
 hw/vfio/pci.c              | 51 +++++++++++++++++++++++++++++++++++++++++++++-
 hw/vfio/pci.h              |  1 +
  |  4 ++++
 3 files changed, 55 insertions(+), 1 deletion(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 0e42786..777245c 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -35,6 +35,12 @@
 
 #define MSIX_CAP_LENGTH 12
 
+/*
+ * Timeout for waiting host aer error process, it is 3 seconds.
+ * For hardware bus reset 3 seconds will be enough.
+ */
+#define PCI_AER_PROCESS_TIMEOUT 3000000
+
 static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
 static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
 
@@ -1913,6 +1919,14 @@ static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
     VFIOGroup *group;
     int ret, i, devfn, range_limit;
 
+    if (!(vdev->vbasedev.flags & VFIO_DEVICE_FLAGS_AERPROCESS)) {
+        error_setg(errp, "vfio: Cannot enable AER for device %s,"
+                   " host vfio driver does not support for"
+                   " aer error progress",
+                   vdev->vbasedev.name);
+        return;
+    }
+
     ret = vfio_get_hot_reset_info(vdev, &info);
     if (ret) {
         error_setg(errp, "vfio: Cannot enable AER for device %s,"
@@ -2679,6 +2693,11 @@ static void vfio_err_notifier_handler(void *opaque)
         msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
                                  PCI_ERR_ROOT_CMD_NONFATAL_EN;
 
+        if (isfatal) {
+            PCIDevice *dev_0 = pci_get_function_0(dev);
+            VFIOPCIDevice *vdev_0 = DO_UPCAST(VFIOPCIDevice, pdev, dev_0);
+            vdev_0->pci_aer_error_signaled = true;
+        }
         pcie_aer_msg(dev, &msg);
         return;
     }
@@ -3163,6 +3182,19 @@ static void vfio_exitfn(PCIDevice *pdev)
     vfio_bars_exit(vdev);
 }
 
+static int vfio_aer_error_is_in_process(VFIOPCIDevice *vdev)
+{
+    struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
+    int ret;
+
+    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_INFO, &dev_info);
+    if (ret) {
+        error_report("vfio: error getting device info: %m");
+        return ret;
+    }
+    return dev_info.flags & VFIO_DEVICE_FLAGS_INAERPROCESS ? 1 : 0;
+}
+
 static void vfio_pci_reset(DeviceState *dev)
 {
     PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
@@ -3176,7 +3208,24 @@ static void vfio_pci_reset(DeviceState *dev)
         if ((pci_get_word(br->config + PCI_BRIDGE_CONTROL) &
              PCI_BRIDGE_CTL_BUS_RESET)) {
             if (pci_get_function_0(pdev) == pdev) {
-                vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
+                if (!vdev->pci_aer_error_signaled) {
+                    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
+                } else {
+                    int i;
+                    for (i = 0; i < 1000; i++) {
+                        if (!vfio_aer_error_is_in_process(vdev)) {
+                            break;
+                        }
+                        g_usleep(PCI_AER_PROCESS_TIMEOUT / 1000);
+                    }
+
+                    if (i == 1000) {
+                        qdev_unplug(&vdev->pdev.qdev, NULL);
+                    } else {
+                        vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
+                    }
+                    vdev->pci_aer_error_signaled = false;
+                }
             }
             return;
         }
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index ed14322..c9e0202 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -148,6 +148,7 @@ typedef struct VFIOPCIDevice {
     bool no_kvm_msi;
     bool no_kvm_msix;
     bool single_depend_dev;
+    bool pci_aer_error_signaled;
 } VFIOPCIDevice;
 
 uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
 --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 759b850..9295fca 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -198,6 +198,10 @@ struct vfio_device_info {
 #define VFIO_DEVICE_FLAGS_PCI	(1 << 1)	/* vfio-pci device */
 #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2)	/* vfio-platform device */
 #define VFIO_DEVICE_FLAGS_AMBA  (1 << 3)	/* vfio-amba device */
+/* support aer error progress */
+#define VFIO_DEVICE_FLAGS_AERPROCESS  (1 << 4)
+/* status in aer error progress */
+#define VFIO_DEVICE_FLAGS_INAERPROCESS  (1 << 5)
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
 };
-- 
1.8.3.1
^ permalink raw reply related	[flat|nested] 25+ messages in thread
- * Re: [Qemu-devel] [PATCH v9 10/11] vfio: Add waiting for host aer error progress
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 10/11] vfio: Add waiting for host aer error progress Zhou Jie
@ 2016-08-31 20:13   ` Alex Williamson
  2016-08-31 20:34     ` Michael S. Tsirkin
  2016-10-24  7:56   ` Cao jin
  1 sibling, 1 reply; 25+ messages in thread
From: Alex Williamson @ 2016-08-31 20:13 UTC (permalink / raw)
  To: Zhou Jie; +Cc: fan.chen, mst, qemu-devel, Chen Fan, izumi.taku
On Tue, 19 Jul 2016 15:38:28 +0800
Zhou Jie <zhoujie2011@cn.fujitsu.com> wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> For supporting aer recovery, host and guest would run the same aer
> recovery code, that would do the secondary bus reset if the error
> is fatal, the aer recovery process:
>   1. error_detected
>   2. reset_link (if fatal)
>   3. slot_reset/mmio_enabled
>   4. resume
> 
> It indicates that host will do secondary bus reset to reset
> the physical devices under bus in step 2, that would cause
> devices in D3 status in a short time. But in qemu, we register
> an error detected handler, that would be invoked as host broadcasts
> the error-detected event in step 1, in order to avoid guest do
> reset_link when host do reset_link simultaneously. it may cause
> fatal error. we poll the vfio_device_info to assure host reset
> completely.
> In qemu, the aer recovery process:
>   1. Detect support for aer error progress
>      If host vfio driver does not support for aer error progress,
>      directly fail to boot up VM as with aer enabled.
>   2. Immediately notify the VM on error detected.
>   3. Wait for host aer error progress
>      Poll the vfio_device_info, If it is still in aer error progress after
>      some timeout, we would abort the guest directed bus reset
>      altogether and unplug of the device to prevent it from further
>      interacting with the VM.
>   4. Reset bus.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> Signed-off-by: Zhou Jie <zhoujie2011@cn.fujitsu.com>
> ---
>  hw/vfio/pci.c              | 51 +++++++++++++++++++++++++++++++++++++++++++++-
>  hw/vfio/pci.h              |  1 +
>  linux-headers/linux/vfio.h |  4 ++++
>  3 files changed, 55 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 0e42786..777245c 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -35,6 +35,12 @@
>  
>  #define MSIX_CAP_LENGTH 12
>  
> +/*
> + * Timeout for waiting host aer error process, it is 3 seconds.
> + * For hardware bus reset 3 seconds will be enough.
> + */
> +#define PCI_AER_PROCESS_TIMEOUT 3000000
Why is 3 seconds "enough"?  What considerations went into determining
this that would need to be re-evaluated if we ever want to change it?
24 hours is enough, but why was 3 seconds chosen over 24 hours?  Why
would 2 seconds be a worse choice?  1?
> +
>  static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
>  static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
>  
> @@ -1913,6 +1919,14 @@ static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
>      VFIOGroup *group;
>      int ret, i, devfn, range_limit;
>  
> +    if (!(vdev->vbasedev.flags & VFIO_DEVICE_FLAGS_AERPROCESS)) {
> +        error_setg(errp, "vfio: Cannot enable AER for device %s,"
> +                   " host vfio driver does not support for"
> +                   " aer error progress",
> +                   vdev->vbasedev.name);
> +        return;
> +    }
> +
>      ret = vfio_get_hot_reset_info(vdev, &info);
>      if (ret) {
>          error_setg(errp, "vfio: Cannot enable AER for device %s,"
> @@ -2679,6 +2693,11 @@ static void vfio_err_notifier_handler(void *opaque)
>          msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
>                                   PCI_ERR_ROOT_CMD_NONFATAL_EN;
>  
> +        if (isfatal) {
> +            PCIDevice *dev_0 = pci_get_function_0(dev);
> +            VFIOPCIDevice *vdev_0 = DO_UPCAST(VFIOPCIDevice, pdev, dev_0);
> +            vdev_0->pci_aer_error_signaled = true;
> +        }
>          pcie_aer_msg(dev, &msg);
>          return;
>      }
> @@ -3163,6 +3182,19 @@ static void vfio_exitfn(PCIDevice *pdev)
>      vfio_bars_exit(vdev);
>  }
>  
> +static int vfio_aer_error_is_in_process(VFIOPCIDevice *vdev)
> +{
> +    struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
> +    int ret;
> +
> +    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_INFO, &dev_info);
> +    if (ret) {
> +        error_report("vfio: error getting device info: %m");
> +        return ret;
> +    }
> +    return dev_info.flags & VFIO_DEVICE_FLAGS_INAERPROCESS ? 1 : 0;
> +}
> +
>  static void vfio_pci_reset(DeviceState *dev)
>  {
>      PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
> @@ -3176,7 +3208,24 @@ static void vfio_pci_reset(DeviceState *dev)
>          if ((pci_get_word(br->config + PCI_BRIDGE_CONTROL) &
>               PCI_BRIDGE_CTL_BUS_RESET)) {
>              if (pci_get_function_0(pdev) == pdev) {
> -                vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> +                if (!vdev->pci_aer_error_signaled) {
> +                    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> +                } else {
> +                    int i;
> +                    for (i = 0; i < 1000; i++) {
> +                        if (!vfio_aer_error_is_in_process(vdev)) {
> +                            break;
> +                        }
> +                        g_usleep(PCI_AER_PROCESS_TIMEOUT / 1000);
> +                    }
> +
> +                    if (i == 1000) {
> +                        qdev_unplug(&vdev->pdev.qdev, NULL);
This looks a bit precarious to me, but I guess in the end we're only
really sending an attention button press on the hotplug slot.  Have you
forced this code path in testing and does the right thing happen for
both Windows and Linux guests?
> +                    } else {
> +                        vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> +                    }
> +                    vdev->pci_aer_error_signaled = false;
> +                }
>              }
>              return;
>          }
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index ed14322..c9e0202 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -148,6 +148,7 @@ typedef struct VFIOPCIDevice {
>      bool no_kvm_msi;
>      bool no_kvm_msix;
>      bool single_depend_dev;
> +    bool pci_aer_error_signaled;
>  } VFIOPCIDevice;
>  
>  uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> index 759b850..9295fca 100644
> --- a/linux-headers/linux/vfio.h
> +++ b/linux-headers/linux/vfio.h
> @@ -198,6 +198,10 @@ struct vfio_device_info {
>  #define VFIO_DEVICE_FLAGS_PCI	(1 << 1)	/* vfio-pci device */
>  #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2)	/* vfio-platform device */
>  #define VFIO_DEVICE_FLAGS_AMBA  (1 << 3)	/* vfio-amba device */
> +/* support aer error progress */
> +#define VFIO_DEVICE_FLAGS_AERPROCESS  (1 << 4)
> +/* status in aer error progress */
> +#define VFIO_DEVICE_FLAGS_INAERPROCESS  (1 << 5)
>  	__u32	num_regions;	/* Max region index + 1 */
>  	__u32	num_irqs;	/* Max IRQ index + 1 */
>  };
^ permalink raw reply	[flat|nested] 25+ messages in thread
- * Re: [Qemu-devel] [PATCH v9 10/11] vfio: Add waiting for host aer error progress
  2016-08-31 20:13   ` Alex Williamson
@ 2016-08-31 20:34     ` Michael S. Tsirkin
  0 siblings, 0 replies; 25+ messages in thread
From: Michael S. Tsirkin @ 2016-08-31 20:34 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Zhou Jie, fan.chen, qemu-devel, Chen Fan, izumi.taku
On Wed, Aug 31, 2016 at 02:13:09PM -0600, Alex Williamson wrote:
> On Tue, 19 Jul 2016 15:38:28 +0800
> Zhou Jie <zhoujie2011@cn.fujitsu.com> wrote:
> 
> > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > 
> > For supporting aer recovery, host and guest would run the same aer
> > recovery code, that would do the secondary bus reset if the error
> > is fatal, the aer recovery process:
> >   1. error_detected
> >   2. reset_link (if fatal)
> >   3. slot_reset/mmio_enabled
> >   4. resume
> > 
> > It indicates that host will do secondary bus reset to reset
> > the physical devices under bus in step 2, that would cause
> > devices in D3 status in a short time. But in qemu, we register
> > an error detected handler, that would be invoked as host broadcasts
> > the error-detected event in step 1, in order to avoid guest do
> > reset_link when host do reset_link simultaneously. it may cause
> > fatal error. we poll the vfio_device_info to assure host reset
> > completely.
> > In qemu, the aer recovery process:
> >   1. Detect support for aer error progress
> >      If host vfio driver does not support for aer error progress,
> >      directly fail to boot up VM as with aer enabled.
> >   2. Immediately notify the VM on error detected.
> >   3. Wait for host aer error progress
> >      Poll the vfio_device_info, If it is still in aer error progress after
> >      some timeout, we would abort the guest directed bus reset
> >      altogether and unplug of the device to prevent it from further
> >      interacting with the VM.
> >   4. Reset bus.
> > 
> > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > Signed-off-by: Zhou Jie <zhoujie2011@cn.fujitsu.com>
> > ---
> >  hw/vfio/pci.c              | 51 +++++++++++++++++++++++++++++++++++++++++++++-
> >  hw/vfio/pci.h              |  1 +
> >  linux-headers/linux/vfio.h |  4 ++++
> >  3 files changed, 55 insertions(+), 1 deletion(-)
> > 
> > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > index 0e42786..777245c 100644
> > --- a/hw/vfio/pci.c
> > +++ b/hw/vfio/pci.c
> > @@ -35,6 +35,12 @@
> >  
> >  #define MSIX_CAP_LENGTH 12
> >  
> > +/*
> > + * Timeout for waiting host aer error process, it is 3 seconds.
> > + * For hardware bus reset 3 seconds will be enough.
> > + */
> > +#define PCI_AER_PROCESS_TIMEOUT 3000000
> 
> Why is 3 seconds "enough"?  What considerations went into determining
> this that would need to be re-evaluated if we ever want to change it?
> 24 hours is enough, but why was 3 seconds chosen over 24 hours?  Why
> would 2 seconds be a worse choice?  1?
And just to clarify, the answer belongs in a code comment
and possibly commit log, not just in an email response.
> > +
> >  static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
> >  static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
> >  
> > @@ -1913,6 +1919,14 @@ static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
> >      VFIOGroup *group;
> >      int ret, i, devfn, range_limit;
> >  
> > +    if (!(vdev->vbasedev.flags & VFIO_DEVICE_FLAGS_AERPROCESS)) {
> > +        error_setg(errp, "vfio: Cannot enable AER for device %s,"
> > +                   " host vfio driver does not support for"
> > +                   " aer error progress",
> > +                   vdev->vbasedev.name);
> > +        return;
> > +    }
> > +
> >      ret = vfio_get_hot_reset_info(vdev, &info);
> >      if (ret) {
> >          error_setg(errp, "vfio: Cannot enable AER for device %s,"
> > @@ -2679,6 +2693,11 @@ static void vfio_err_notifier_handler(void *opaque)
> >          msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
> >                                   PCI_ERR_ROOT_CMD_NONFATAL_EN;
> >  
> > +        if (isfatal) {
> > +            PCIDevice *dev_0 = pci_get_function_0(dev);
> > +            VFIOPCIDevice *vdev_0 = DO_UPCAST(VFIOPCIDevice, pdev, dev_0);
> > +            vdev_0->pci_aer_error_signaled = true;
> > +        }
> >          pcie_aer_msg(dev, &msg);
> >          return;
> >      }
> > @@ -3163,6 +3182,19 @@ static void vfio_exitfn(PCIDevice *pdev)
> >      vfio_bars_exit(vdev);
> >  }
> >  
> > +static int vfio_aer_error_is_in_process(VFIOPCIDevice *vdev)
> > +{
> > +    struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
> > +    int ret;
> > +
> > +    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_INFO, &dev_info);
> > +    if (ret) {
> > +        error_report("vfio: error getting device info: %m");
> > +        return ret;
> > +    }
> > +    return dev_info.flags & VFIO_DEVICE_FLAGS_INAERPROCESS ? 1 : 0;
> > +}
> > +
> >  static void vfio_pci_reset(DeviceState *dev)
> >  {
> >      PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
> > @@ -3176,7 +3208,24 @@ static void vfio_pci_reset(DeviceState *dev)
> >          if ((pci_get_word(br->config + PCI_BRIDGE_CONTROL) &
> >               PCI_BRIDGE_CTL_BUS_RESET)) {
> >              if (pci_get_function_0(pdev) == pdev) {
> > -                vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> > +                if (!vdev->pci_aer_error_signaled) {
> > +                    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> > +                } else {
> > +                    int i;
> > +                    for (i = 0; i < 1000; i++) {
> > +                        if (!vfio_aer_error_is_in_process(vdev)) {
> > +                            break;
> > +                        }
> > +                        g_usleep(PCI_AER_PROCESS_TIMEOUT / 1000);
> > +                    }
> > +
> > +                    if (i == 1000) {
> > +                        qdev_unplug(&vdev->pdev.qdev, NULL);
> 
> This looks a bit precarious to me, but I guess in the end we're only
> really sending an attention button press on the hotplug slot.  Have you
> forced this code path in testing and does the right thing happen for
> both Windows and Linux guests?
If all else fails, require management to specify the timeout.
> > +                    } else {
> > +                        vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> > +                    }
> > +                    vdev->pci_aer_error_signaled = false;
> > +                }
> >              }
> >              return;
> >          }
> > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> > index ed14322..c9e0202 100644
> > --- a/hw/vfio/pci.h
> > +++ b/hw/vfio/pci.h
> > @@ -148,6 +148,7 @@ typedef struct VFIOPCIDevice {
> >      bool no_kvm_msi;
> >      bool no_kvm_msix;
> >      bool single_depend_dev;
> > +    bool pci_aer_error_signaled;
> >  } VFIOPCIDevice;
> >  
> >  uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
> > diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> > index 759b850..9295fca 100644
> > --- a/linux-headers/linux/vfio.h
> > +++ b/linux-headers/linux/vfio.h
> > @@ -198,6 +198,10 @@ struct vfio_device_info {
> >  #define VFIO_DEVICE_FLAGS_PCI	(1 << 1)	/* vfio-pci device */
> >  #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2)	/* vfio-platform device */
> >  #define VFIO_DEVICE_FLAGS_AMBA  (1 << 3)	/* vfio-amba device */
> > +/* support aer error progress */
> > +#define VFIO_DEVICE_FLAGS_AERPROCESS  (1 << 4)
> > +/* status in aer error progress */
> > +#define VFIO_DEVICE_FLAGS_INAERPROCESS  (1 << 5)
> >  	__u32	num_regions;	/* Max region index + 1 */
> >  	__u32	num_irqs;	/* Max IRQ index + 1 */
> >  };
^ permalink raw reply	[flat|nested] 25+ messages in thread
 
- * Re: [Qemu-devel] [PATCH v9 10/11] vfio: Add waiting for host aer error progress
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 10/11] vfio: Add waiting for host aer error progress Zhou Jie
  2016-08-31 20:13   ` Alex Williamson
@ 2016-10-24  7:56   ` Cao jin
  1 sibling, 0 replies; 25+ messages in thread
From: Cao jin @ 2016-10-24  7:56 UTC (permalink / raw)
  To: Zhou Jie, alex.williamson; +Cc: fan.chen, mst, qemu-devel, izumi.taku
Hi
On 07/19/2016 03:38 PM, Zhou Jie wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>
> For supporting aer recovery, host and guest would run the same aer
> recovery code, that would do the secondary bus reset if the error
> is fatal, the aer recovery process:
>    1. error_detected
>    2. reset_link (if fatal)
>    3. slot_reset/mmio_enabled
>    4. resume
>
> It indicates that host will do secondary bus reset to reset
> the physical devices under bus in step 2, that would cause
> devices in D3 status in a short time. But in qemu, we register
> an error detected handler, that would be invoked as host broadcasts
> the error-detected event in step 1, in order to avoid guest do
> reset_link when host do reset_link simultaneously. it may cause
> fatal error. we poll the vfio_device_info to assure host reset
> completely.
> In qemu, the aer recovery process:
>    1. Detect support for aer error progress
>       If host vfio driver does not support for aer error progress,
>       directly fail to boot up VM as with aer enabled.
>    2. Immediately notify the VM on error detected.
>    3. Wait for host aer error progress
>       Poll the vfio_device_info, If it is still in aer error progress after
>       some timeout, we would abort the guest directed bus reset
>       altogether and unplug of the device to prevent it from further
>       interacting with the VM.
>    4. Reset bus.
>
I have question about step 4(Reset bus). When guest reset link, guest 
set 'secondary bus reset' bit, then devices under the bus would do reset 
themselves separately. For vfio-pci, the emulated device, I think the 
previous logic[*] is fine. But now process looks like following:
1. One affected device: we do(or wait & do) bus 
reset(vfio_pci_hot_reset) directly
2. N affected devices: function 0 will do the same as 1.
    other affected devices will do nothing, just return.
these logic seems weird to me, are these what we want?
I don't see why we don't use the previous 'reset priority' for each 
vfio-pci emulated devices.
[*]reset priority
      1. If has "device specific reset function", then do it
      2. If has FLR, then do it.
      3. If it can do bus reset(only 1 affected device), then do it
      4. If has pm_reset, then do it
This is what I think:
static void vfio_pci_reset(DeviceState *dev)
{
     PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
     VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
     int i;
     for (i = 0; i < 1000; i++) {
         if (!vfio_aer_error_is_in_process(vdev)) {
             break;
         }
         g_usleep(PCI_AER_PROCESS_TIMEOUT / 1000);
     }
     if (i == 1000) {
         qdev_unplug(&vdev->pdev.qdev, NULL);
     } else {
         trace_vfio_pci_reset(vdev->vbasedev.name);
         vfio_pci_pre_reset(vdev);
         if (vdev->resetfn && !vdev->resetfn(vdev)) {
             goto post_reset;
         }
         if (vdev->vbasedev.reset_works &&
             (vdev->has_flr || !vdev->has_pm_reset) &&
             !ioctl(vdev->vbasedev.fd, VFIO_DEVICE_RESET)) {
             trace_vfio_pci_reset_flr(vdev->vbasedev.name);
             goto post_reset;
         }
         /* See if we can do our own bus reset */
         if (!vfio_pci_hot_reset_one(vdev)) {
             goto post_reset;
         }
         /* If nothing else works and the device supports PM reset, use 
it */
         if (vdev->vbasedev.reset_works && vdev->has_pm_reset &&
             !ioctl(vdev->vbasedev.fd, VFIO_DEVICE_RESET)) {
             trace_vfio_pci_reset_pm(vdev->vbasedev.name);
             goto post_reset;
         }
     }
post_reset:
     vfio_pci_post_reset(vdev);
}
Cao jin
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> Signed-off-by: Zhou Jie <zhoujie2011@cn.fujitsu.com>
> ---
>   hw/vfio/pci.c              | 51 +++++++++++++++++++++++++++++++++++++++++++++-
>   hw/vfio/pci.h              |  1 +
>   linux-headers/linux/vfio.h |  4 ++++
>   3 files changed, 55 insertions(+), 1 deletion(-)
>
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 0e42786..777245c 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -35,6 +35,12 @@
>
>   #define MSIX_CAP_LENGTH 12
>
> +/*
> + * Timeout for waiting host aer error process, it is 3 seconds.
> + * For hardware bus reset 3 seconds will be enough.
> + */
> +#define PCI_AER_PROCESS_TIMEOUT 3000000
> +
>   static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
>   static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
>
> @@ -1913,6 +1919,14 @@ static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
>       VFIOGroup *group;
>       int ret, i, devfn, range_limit;
>
> +    if (!(vdev->vbasedev.flags & VFIO_DEVICE_FLAGS_AERPROCESS)) {
> +        error_setg(errp, "vfio: Cannot enable AER for device %s,"
> +                   " host vfio driver does not support for"
> +                   " aer error progress",
> +                   vdev->vbasedev.name);
> +        return;
> +    }
> +
>       ret = vfio_get_hot_reset_info(vdev, &info);
>       if (ret) {
>           error_setg(errp, "vfio: Cannot enable AER for device %s,"
> @@ -2679,6 +2693,11 @@ static void vfio_err_notifier_handler(void *opaque)
>           msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
>                                    PCI_ERR_ROOT_CMD_NONFATAL_EN;
>
> +        if (isfatal) {
> +            PCIDevice *dev_0 = pci_get_function_0(dev);
> +            VFIOPCIDevice *vdev_0 = DO_UPCAST(VFIOPCIDevice, pdev, dev_0);
> +            vdev_0->pci_aer_error_signaled = true;
> +        }
>           pcie_aer_msg(dev, &msg);
>           return;
>       }
> @@ -3163,6 +3182,19 @@ static void vfio_exitfn(PCIDevice *pdev)
>       vfio_bars_exit(vdev);
>   }
>
> +static int vfio_aer_error_is_in_process(VFIOPCIDevice *vdev)
> +{
> +    struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
> +    int ret;
> +
> +    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_INFO, &dev_info);
> +    if (ret) {
> +        error_report("vfio: error getting device info: %m");
> +        return ret;
> +    }
> +    return dev_info.flags & VFIO_DEVICE_FLAGS_INAERPROCESS ? 1 : 0;
> +}
> +
>   static void vfio_pci_reset(DeviceState *dev)
>   {
>       PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
> @@ -3176,7 +3208,24 @@ static void vfio_pci_reset(DeviceState *dev)
>           if ((pci_get_word(br->config + PCI_BRIDGE_CONTROL) &
>                PCI_BRIDGE_CTL_BUS_RESET)) {
>               if (pci_get_function_0(pdev) == pdev) {
> -                vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> +                if (!vdev->pci_aer_error_signaled) {
> +                    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> +                } else {
> +                    int i;
> +                    for (i = 0; i < 1000; i++) {
> +                        if (!vfio_aer_error_is_in_process(vdev)) {
> +                            break;
> +                        }
> +                        g_usleep(PCI_AER_PROCESS_TIMEOUT / 1000);
> +                    }
> +
> +                    if (i == 1000) {
> +                        qdev_unplug(&vdev->pdev.qdev, NULL);
> +                    } else {
> +                        vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> +                    }
> +                    vdev->pci_aer_error_signaled = false;
> +                }
>               }
>               return;
>           }
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index ed14322..c9e0202 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -148,6 +148,7 @@ typedef struct VFIOPCIDevice {
>       bool no_kvm_msi;
>       bool no_kvm_msix;
>       bool single_depend_dev;
> +    bool pci_aer_error_signaled;
>   } VFIOPCIDevice;
>
>   uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> index 759b850..9295fca 100644
> --- a/linux-headers/linux/vfio.h
> +++ b/linux-headers/linux/vfio.h
> @@ -198,6 +198,10 @@ struct vfio_device_info {
>   #define VFIO_DEVICE_FLAGS_PCI	(1 << 1)	/* vfio-pci device */
>   #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2)	/* vfio-platform device */
>   #define VFIO_DEVICE_FLAGS_AMBA  (1 << 3)	/* vfio-amba device */
> +/* support aer error progress */
> +#define VFIO_DEVICE_FLAGS_AERPROCESS  (1 << 4)
> +/* status in aer error progress */
> +#define VFIO_DEVICE_FLAGS_INAERPROCESS  (1 << 5)
>   	__u32	num_regions;	/* Max region index + 1 */
>   	__u32	num_irqs;	/* Max IRQ index + 1 */
>   };
>
^ permalink raw reply	[flat|nested] 25+ messages in thread
 
- * [Qemu-devel] [PATCH v9 11/11] vfio: add 'aer' property to expose aercap
  2016-07-19  7:38 [Qemu-devel] [PATCH v9 00/11] vfio-pci: pass the aer error to guest Zhou Jie
                   ` (9 preceding siblings ...)
  2016-07-19  7:38 ` [Qemu-devel] [PATCH v9 10/11] vfio: Add waiting for host aer error progress Zhou Jie
@ 2016-07-19  7:38 ` Zhou Jie
  10 siblings, 0 replies; 25+ messages in thread
From: Zhou Jie @ 2016-07-19  7:38 UTC (permalink / raw)
  To: alex.williamson; +Cc: qemu-devel, izumi.taku, mst, fan.chen, Chen Fan
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
add 'aer' property to let user able to decide whether expose
the aer capability. by default we should disable aer feature,
because it needs configuration restrictions.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 2 ++
 1 file changed, 2 insertions(+)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 777245c..fdc27e6 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3305,6 +3305,8 @@ static Property vfio_pci_dev_properties[] = {
     DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
                        sub_device_id, PCI_ANY_ID),
     DEFINE_PROP_UINT32("x-igd-gms", VFIOPCIDevice, igd_gms, 0),
+    DEFINE_PROP_BIT("aer", VFIOPCIDevice, features,
+                    VFIO_FEATURE_ENABLE_AER_BIT, false),
     /*
      * TODO - support passed fds... is this necessary?
      * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
-- 
1.8.3.1
^ permalink raw reply related	[flat|nested] 25+ messages in thread