* [Qemu-devel] [patch v3 1/9] vfio: extract vfio_get_hot_reset_info as a single function
2016-03-15 1:35 [Qemu-devel] [patch v3 0/9] vfio-pci: pass the aer error to guest, part2 Cao jin
@ 2016-03-15 1:35 ` Cao jin
2016-03-15 1:35 ` [Qemu-devel] [patch v3 2/9] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset Cao jin
` (7 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Cao jin @ 2016-03-15 1:35 UTC (permalink / raw)
To: qemu-devel; +Cc: chen.fan.fnst, izumi.taku, alex.williamson, mst
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
the function is used to get affected devices by bus reset.
so here extract it, and can used for aer soon.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
hw/vfio/pci.c | 66 +++++++++++++++++++++++++++++++++++++++++++----------------
1 file changed, 48 insertions(+), 18 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 20b505f..487f279 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1691,6 +1691,51 @@ static void vfio_check_af_flr(VFIOPCIDevice *vdev, uint8_t pos)
}
}
+/*
+ * return negative with errno, return 0 on success.
+ * if success, the point of ret_info fill with the affected device reset info.
+ *
+ */
+static int vfio_get_hot_reset_info(VFIOPCIDevice *vdev,
+ struct vfio_pci_hot_reset_info **ret_info)
+{
+ struct vfio_pci_hot_reset_info *info;
+ int ret, count;
+
+ *ret_info = NULL;
+
+ info = g_malloc0(sizeof(*info));
+ info->argsz = sizeof(*info);
+
+ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
+ if (ret && errno != ENOSPC) {
+ ret = -errno;
+ goto error;
+ }
+
+ count = info->count;
+
+ info = g_realloc(info, sizeof(*info) +
+ (count * sizeof(struct vfio_pci_dependent_device)));
+ info->argsz = sizeof(*info) +
+ (count * sizeof(struct vfio_pci_dependent_device));
+
+ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
+ if (ret) {
+ ret = -errno;
+ error_report("vfio: hot reset info failed: %m");
+ goto error;
+ }
+
+ *ret_info = info;
+ info = NULL;
+
+ return 0;
+error:
+ g_free(info);
+ return ret;
+}
+
static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
{
PCIDevice *pdev = &vdev->pdev;
@@ -1830,7 +1875,7 @@ static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
{
VFIOGroup *group;
- struct vfio_pci_hot_reset_info *info;
+ struct vfio_pci_hot_reset_info *info = NULL;
struct vfio_pci_dependent_device *devices;
struct vfio_pci_hot_reset *reset;
int32_t *fds;
@@ -1842,12 +1887,8 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
vfio_pci_pre_reset(vdev);
vdev->vbasedev.needs_reset = false;
- info = g_malloc0(sizeof(*info));
- info->argsz = sizeof(*info);
-
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
- if (ret && errno != ENOSPC) {
- ret = -errno;
+ ret = vfio_get_hot_reset_info(vdev, &info);
+ if (ret) {
if (!vdev->has_pm_reset) {
error_report("vfio: Cannot reset device %04x:%02x:%02x.%x, "
"no available reset mechanism.", vdev->host.domain,
@@ -1856,18 +1897,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
goto out_single;
}
- count = info->count;
- info = g_realloc(info, sizeof(*info) + (count * sizeof(*devices)));
- info->argsz = sizeof(*info) + (count * sizeof(*devices));
devices = &info->devices[0];
-
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
- if (ret) {
- ret = -errno;
- error_report("vfio: hot reset info failed: %m");
- goto out_single;
- }
-
trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
/* Verify that we have all the groups required */
--
1.9.3
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [patch v3 2/9] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset
2016-03-15 1:35 [Qemu-devel] [patch v3 0/9] vfio-pci: pass the aer error to guest, part2 Cao jin
2016-03-15 1:35 ` [Qemu-devel] [patch v3 1/9] vfio: extract vfio_get_hot_reset_info as a single function Cao jin
@ 2016-03-15 1:35 ` Cao jin
2016-03-15 1:35 ` [Qemu-devel] [patch v3 3/9] vfio: add pcie extended capability support Cao jin
` (6 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Cao jin @ 2016-03-15 1:35 UTC (permalink / raw)
To: qemu-devel; +Cc: chen.fan.fnst, izumi.taku, alex.williamson, mst
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
squeeze out vfio_pci_do_hot_reset to do host bus reset when AER recovery.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
hw/vfio/pci.c | 75 +++++++++++++++++++++++++++++++++++------------------------
1 file changed, 44 insertions(+), 31 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 487f279..0a0c5b3 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1736,6 +1736,48 @@ error:
return ret;
}
+static int vfio_pci_do_hot_reset(VFIOPCIDevice *vdev,
+ struct vfio_pci_hot_reset_info *info)
+{
+ VFIOGroup *group;
+ struct vfio_pci_hot_reset *reset;
+ int32_t *fds;
+ int ret, i, count;
+ struct vfio_pci_dependent_device *devices;
+
+ /* Determine how many group fds need to be passed */
+ count = 0;
+ devices = &info->devices[0];
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ for (i = 0; i < info->count; i++) {
+ if (group->groupid == devices[i].group_id) {
+ count++;
+ break;
+ }
+ }
+ }
+
+ reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
+ reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
+ fds = &reset->group_fds[0];
+
+ /* Fill in group fds */
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ for (i = 0; i < info->count; i++) {
+ if (group->groupid == devices[i].group_id) {
+ fds[reset->count++] = group->fd;
+ break;
+ }
+ }
+ }
+
+ /* Bus reset! */
+ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
+ g_free(reset);
+
+ return ret;
+}
+
static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
{
PCIDevice *pdev = &vdev->pdev;
@@ -1877,9 +1919,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
VFIOGroup *group;
struct vfio_pci_hot_reset_info *info = NULL;
struct vfio_pci_dependent_device *devices;
- struct vfio_pci_hot_reset *reset;
- int32_t *fds;
- int ret, i, count;
+ int ret, i;
bool multi = false;
trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
@@ -1958,34 +1998,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
goto out_single;
}
- /* Determine how many group fds need to be passed */
- count = 0;
- QLIST_FOREACH(group, &vfio_group_list, next) {
- for (i = 0; i < info->count; i++) {
- if (group->groupid == devices[i].group_id) {
- count++;
- break;
- }
- }
- }
-
- reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
- reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
- fds = &reset->group_fds[0];
-
- /* Fill in group fds */
- QLIST_FOREACH(group, &vfio_group_list, next) {
- for (i = 0; i < info->count; i++) {
- if (group->groupid == devices[i].group_id) {
- fds[reset->count++] = group->fd;
- break;
- }
- }
- }
-
- /* Bus reset! */
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
- g_free(reset);
+ ret = vfio_pci_do_hot_reset(vdev, info);
trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
ret ? "%m" : "Success");
--
1.9.3
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [patch v3 3/9] vfio: add pcie extended capability support
2016-03-15 1:35 [Qemu-devel] [patch v3 0/9] vfio-pci: pass the aer error to guest, part2 Cao jin
2016-03-15 1:35 ` [Qemu-devel] [patch v3 1/9] vfio: extract vfio_get_hot_reset_info as a single function Cao jin
2016-03-15 1:35 ` [Qemu-devel] [patch v3 2/9] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset Cao jin
@ 2016-03-15 1:35 ` Cao jin
2016-03-15 1:35 ` [Qemu-devel] [patch v3 4/9] vfio: add aer support for vfio device Cao jin
` (5 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Cao jin @ 2016-03-15 1:35 UTC (permalink / raw)
To: qemu-devel; +Cc: chen.fan.fnst, izumi.taku, alex.williamson, mst
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
For vfio pcie device, we could expose the extended capability on
PCIE bus. due to add a new pcie capability at the tail of the chain,
in order to avoid config space overwritten, we introduce a copy config
for parsing extended caps. and rebuild the pcie extended config space.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
hw/vfio/pci.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 71 insertions(+), 1 deletion(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 0a0c5b3..c662fea 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1518,6 +1518,21 @@ static uint8_t vfio_std_cap_max_size(PCIDevice *pdev, uint8_t pos)
return next - pos;
}
+
+static uint16_t vfio_ext_cap_max_size(const uint8_t *config, uint16_t pos)
+{
+ uint16_t tmp, next = PCIE_CONFIG_SPACE_SIZE;
+
+ for (tmp = PCI_CONFIG_SPACE_SIZE; tmp;
+ tmp = PCI_EXT_CAP_NEXT(pci_get_long(config + tmp))) {
+ if (tmp > pos && tmp < next) {
+ next = tmp;
+ }
+ }
+
+ return next - pos;
+}
+
static void vfio_set_word_bits(uint8_t *buf, uint16_t val, uint16_t mask)
{
pci_set_word(buf, (pci_get_word(buf) & ~mask) | val);
@@ -1853,16 +1868,71 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
return 0;
}
+static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
+{
+ PCIDevice *pdev = &vdev->pdev;
+ uint32_t header;
+ uint16_t cap_id, next, size;
+ uint8_t cap_ver;
+ uint8_t *config;
+
+ /*
+ * pcie_add_capability always inserts the new capability at the tail
+ * of the chain. Therefore to end up with a chain that matches the
+ * physical device, we cache the config space to avoid overwriting
+ * the original config space when we parse the extended capabilities.
+ */
+ config = g_memdup(pdev->config, vdev->config_size);
+
+ for (next = PCI_CONFIG_SPACE_SIZE; next;
+ next = PCI_EXT_CAP_NEXT(pci_get_long(config + next))) {
+ header = pci_get_long(config + next);
+ cap_id = PCI_EXT_CAP_ID(header);
+ cap_ver = PCI_EXT_CAP_VER(header);
+
+ /*
+ * If it becomes important to configure extended capabilities to their
+ * actual size, use this as the default when it's something we don't
+ * recognize. Since QEMU doesn't actually handle many of the config
+ * accesses, exact size doesn't seem worthwhile.
+ */
+ size = vfio_ext_cap_max_size(config, next);
+
+ pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+ pci_set_long(dev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
+
+ /* Use emulated next pointer to allow dropping extended caps */
+ pci_long_test_and_set_mask(vdev->emulated_config_bits + next,
+ PCI_EXT_CAP_NEXT_MASK);
+ }
+
+ g_free(config);
+ return 0;
+}
+
static int vfio_add_capabilities(VFIOPCIDevice *vdev)
{
PCIDevice *pdev = &vdev->pdev;
+ int ret;
if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) ||
!pdev->config[PCI_CAPABILITY_LIST]) {
return 0; /* Nothing to add */
}
- return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
+ ret = vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
+ if (ret) {
+ return ret;
+ }
+
+ /* on PCI bus, it doesn't make sense to expose extended capabilities. */
+ if (!pci_is_express(pdev) ||
+ !pci_bus_is_express(pdev->bus) ||
+ !pci_get_long(pdev->config + PCI_CONFIG_SPACE_SIZE)) {
+ return 0;
+ }
+
+ return vfio_add_ext_cap(vdev);
}
static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
--
1.9.3
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [patch v3 4/9] vfio: add aer support for vfio device
2016-03-15 1:35 [Qemu-devel] [patch v3 0/9] vfio-pci: pass the aer error to guest, part2 Cao jin
` (2 preceding siblings ...)
2016-03-15 1:35 ` [Qemu-devel] [patch v3 3/9] vfio: add pcie extended capability support Cao jin
@ 2016-03-15 1:35 ` Cao jin
2016-03-15 1:35 ` [Qemu-devel] [patch v3 5/9] vfio: add check host bus reset is support or not Cao jin
` (4 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Cao jin @ 2016-03-15 1:35 UTC (permalink / raw)
To: qemu-devel; +Cc: chen.fan.fnst, izumi.taku, alex.williamson, mst
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Calling pcie_aer_init to initilize aer related registers for
vfio device, then reload physical related registers to expose
device capability.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
hw/vfio/pci.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
hw/vfio/pci.h | 3 +++
2 files changed, 85 insertions(+), 3 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c662fea..a77cc04 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1868,6 +1868,66 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
return 0;
}
+static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
+ int pos, uint16_t size)
+{
+ PCIDevice *pdev = &vdev->pdev;
+ PCIDevice *dev_iter;
+ uint8_t type;
+ uint32_t errcap;
+
+ if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
+ pcie_add_capability(pdev, PCI_EXT_CAP_ID_ERR,
+ cap_ver, pos, size);
+ return 0;
+ }
+
+ dev_iter = pci_bridge_get_device(pdev->bus);
+ if (!dev_iter) {
+ goto error;
+ }
+
+ while (dev_iter) {
+ if (!pci_is_express(dev_iter)) {
+ goto error;
+ }
+
+ type = pcie_cap_get_type(dev_iter);
+ if ((type != PCI_EXP_TYPE_ROOT_PORT &&
+ type != PCI_EXP_TYPE_UPSTREAM &&
+ type != PCI_EXP_TYPE_DOWNSTREAM)) {
+ goto error;
+ }
+
+ if (!dev_iter->exp.aer_cap) {
+ goto error;
+ }
+
+ dev_iter = pci_bridge_get_device(dev_iter->bus);
+ }
+
+ errcap = vfio_pci_read_config(pdev, pos + PCI_ERR_CAP, 4);
+ /*
+ * The ability to record multiple headers is depending on
+ * the state of the Multiple Header Recording Capable bit and
+ * enabled by the Multiple Header Recording Enable bit.
+ */
+ if ((errcap & PCI_ERR_CAP_MHRC) &&
+ (errcap & PCI_ERR_CAP_MHRE)) {
+ pdev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
+ } else {
+ pdev->exp.aer_log.log_max = 0;
+ }
+
+ pcie_cap_deverr_init(pdev);
+ return pcie_aer_init(pdev, pos, size);
+
+error:
+ error_report("vfio: Unable to enable AER for device %s, parent bus "
+ "does not support AER signaling", vdev->vbasedev.name);
+ return -1;
+}
+
static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
{
PCIDevice *pdev = &vdev->pdev;
@@ -1875,6 +1935,7 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
uint16_t cap_id, next, size;
uint8_t cap_ver;
uint8_t *config;
+ int ret = 0;
/*
* pcie_add_capability always inserts the new capability at the tail
@@ -1898,16 +1959,29 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
*/
size = vfio_ext_cap_max_size(config, next);
- pcie_add_capability(pdev, cap_id, cap_ver, next, size);
- pci_set_long(dev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
+ switch (cap_id) {
+ case PCI_EXT_CAP_ID_ERR:
+ ret = vfio_setup_aer(vdev, cap_ver, next, size);
+ break;
+ default:
+ pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+ break;
+ }
+
+ if (ret) {
+ goto out;
+ }
+
+ pci_set_long(pdev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
/* Use emulated next pointer to allow dropping extended caps */
pci_long_test_and_set_mask(vdev->emulated_config_bits + next,
PCI_EXT_CAP_NEXT_MASK);
}
+out:
g_free(config);
- return 0;
+ return ret;
}
static int vfio_add_capabilities(VFIOPCIDevice *vdev)
@@ -2662,6 +2736,11 @@ static int vfio_initfn(PCIDevice *pdev)
goto out_teardown;
}
+ if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+ !pdev->exp.aer_cap) {
+ goto out_teardown;
+ }
+
/* QEMU emulates all of MSI & MSIX */
if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 6256587..e0c53f2 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -15,6 +15,7 @@
#include "qemu-common.h"
#include "exec/memory.h"
#include "hw/pci/pci.h"
+#include "hw/pci/pci_bridge.h"
#include "hw/vfio/vfio-common.h"
#include "qemu/event_notifier.h"
#include "qemu/queue.h"
@@ -128,6 +129,8 @@ typedef struct VFIOPCIDevice {
#define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
#define VFIO_FEATURE_ENABLE_REQ_BIT 1
#define VFIO_FEATURE_ENABLE_REQ (1 << VFIO_FEATURE_ENABLE_REQ_BIT)
+#define VFIO_FEATURE_ENABLE_AER_BIT 2
+#define VFIO_FEATURE_ENABLE_AER (1 << VFIO_FEATURE_ENABLE_AER_BIT)
int32_t bootindex;
uint8_t pm_cap;
bool has_vga;
--
1.9.3
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [patch v3 5/9] vfio: add check host bus reset is support or not
2016-03-15 1:35 [Qemu-devel] [patch v3 0/9] vfio-pci: pass the aer error to guest, part2 Cao jin
` (3 preceding siblings ...)
2016-03-15 1:35 ` [Qemu-devel] [patch v3 4/9] vfio: add aer support for vfio device Cao jin
@ 2016-03-15 1:35 ` Cao jin
2016-03-15 18:19 ` Alex Williamson
2016-03-15 1:35 ` [Qemu-devel] [patch v3 6/9] vfio: add check aer functionality for hotplug device Cao jin
` (3 subsequent siblings)
8 siblings, 1 reply; 14+ messages in thread
From: Cao jin @ 2016-03-15 1:35 UTC (permalink / raw)
To: qemu-devel; +Cc: chen.fan.fnst, izumi.taku, alex.williamson, mst
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
when boot up a VM that assigning vfio devices with aer enabled, we
must check the vfio device whether support host bus reset. because
when one error occur. OS driver always recover the device by do a
bus reset, in order to recover the vfio device, qemu must able to do
a host bus reset to recover the device to default status. and for all
affected devices by the bus reset. we must check them whether all
are assigned to the VM and on the same virtual bus. meanwhile, for
simply done, the devices which don't affected by the host bus reset
are not allowed to assign to the same virtual bus.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
hw/vfio/pci.c | 219 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
hw/vfio/pci.h | 1 +
2 files changed, 213 insertions(+), 7 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a77cc04..9670271 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1868,6 +1868,197 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
return 0;
}
+static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1,
+ PCIHostDeviceAddress *host2)
+{
+ return (host1->domain == host2->domain && host1->bus == host2->bus &&
+ host1->slot == host2->slot);
+}
+
+static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
+ PCIHostDeviceAddress *host2)
+{
+ return (vfio_pci_host_slot_match(host1, host2) &&
+ host1->function == host2->function);
+}
+
+static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
+{
+ PCIBus *bus = vdev->pdev.bus;
+ struct vfio_pci_hot_reset_info *info = NULL;
+ struct vfio_pci_dependent_device *devices;
+ VFIOGroup *group;
+ int ret, i, devfn;
+
+ ret = vfio_get_hot_reset_info(vdev, &info);
+ if (ret) {
+ error_setg(errp, "vfio: Cannot enable AER for device %s,"
+ " device does not support hot reset.",
+ vdev->vbasedev.name);
+ return;
+ }
+
+ /* List all affected devices by bus reset */
+ devices = &info->devices[0];
+
+ /* Verify that we have all the groups required */
+ for (i = 0; i < info->count; i++) {
+ PCIHostDeviceAddress host;
+ VFIOPCIDevice *tmp;
+ VFIODevice *vbasedev_iter;
+ bool found = false;
+
+ host.domain = devices[i].segment;
+ host.bus = devices[i].bus;
+ host.slot = PCI_SLOT(devices[i].devfn);
+ host.function = PCI_FUNC(devices[i].devfn);
+
+ /* Skip the current device */
+ if (vfio_pci_host_match(&host, &vdev->host)) {
+ continue;
+ }
+
+ /* Ensure we own the group of the affected device */
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ if (group->groupid == devices[i].group_id) {
+ break;
+ }
+ }
+
+ if (!group) {
+ error_setg(errp, "vfio: Cannot enable AER for device %s, "
+ "depends on group %d which is not owned.",
+ vdev->vbasedev.name, devices[i].group_id);
+ goto out;
+ }
+
+ /* Ensure affected devices for reset on the same bus */
+ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+ if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
+ continue;
+ }
+ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+ if (vfio_pci_host_match(&host, &tmp->host)) {
+ /*
+ * AER errors may be broadcast to all functions of a multi-
+ * function endpoint. If any of those sibling functions are
+ * also assigned, they need to have AER enabled or else an
+ * error may continue to cause a vm_stop condition. IOW,
+ * AER setup of this function would be pointless.
+ */
+ if (vfio_pci_host_slot_match(&vdev->host, &tmp->host) &&
+ !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
+ error_setg(errp, "vfio: Cannot enable AER for device %s, on same slot"
+ " the dependent device %s which does not enable AER.",
+ vdev->vbasedev.name, tmp->vbasedev.name);
+ goto out;
+ }
+
+ if (tmp->pdev.bus != bus) {
+ error_setg(errp, "vfio: Cannot enable AER for device %s, "
+ "the dependent device %s is not on the same bus",
+ vdev->vbasedev.name, tmp->vbasedev.name);
+ goto out;
+ }
+ found = true;
+ break;
+ }
+ }
+
+ /* Ensure all affected devices assigned to VM */
+ if (!found) {
+ error_setg(errp, "vfio: Cannot enable AER for device %s, "
+ "the dependent device %04x:%02x:%02x.%x "
+ "is not assigned to VM.",
+ vdev->vbasedev.name, host.domain, host.bus,
+ host.slot, host.function);
+ goto out;
+ }
+ }
+
+ /*
+ * To make things simply, functions must are combined
+ * in the same way as on the host. no other irrelative
+ * functions on the virtual same bus was required.
+ */
+ for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
+ VFIOPCIDevice *tmp;
+ PCIDevice *dev;
+ bool found = false;
+
+ dev = bus->devices[devfn];
+ if (!dev) {
+ continue;
+ }
+
+ if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
+ error_setg(errp, "vfio: Cannot enable AER for device %s, "
+ "only support all dependent devices on the same bus, "
+ "%s is not one of the dependent devices.",
+ vdev->vbasedev.name, dev->name);
+ goto out;
+ }
+
+ tmp = DO_UPCAST(VFIOPCIDevice, pdev, dev);
+ for (i = 0; i < info->count; i++) {
+ PCIHostDeviceAddress host;
+
+ host.domain = devices[i].segment;
+ host.bus = devices[i].bus;
+ host.slot = PCI_SLOT(devices[i].devfn);
+ host.function = PCI_FUNC(devices[i].devfn);
+
+ if (vfio_pci_host_match(&host, &tmp->host)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ error_setg(errp, "vfio: Cannot enable AER for device %s, "
+ "only support all dependent devices on the same bus, "
+ "%04x:%02x:%02x.%x is not one of the dependent devices.",
+ vdev->vbasedev.name, tmp->host.domain, tmp->host.bus,
+ tmp->host.slot, tmp->host.function);
+ goto out;
+ }
+ }
+
+out:
+ g_free(info);
+ return;
+}
+
+static void vfio_aer_check_host_bus_reset(Error **errp)
+{
+ VFIOGroup *group;
+ VFIODevice *vbasedev;
+ VFIOPCIDevice *vdev;
+ Error *local_err = NULL;
+
+ /* Check All vfio-pci devices if have bus reset capability */
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ QLIST_FOREACH(vbasedev, &group->device_list, next) {
+ if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
+ continue;
+ }
+ vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+ if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
+ if (!pci_get_function_0(&vdev->pdev)) {
+ continue;
+ }
+ vfio_check_hot_bus_reset(vdev, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
+ }
+ }
+
+ return;
+}
+
static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
int pos, uint16_t size)
{
@@ -2051,13 +2242,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
vfio_intx_enable(vdev);
}
-static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
- PCIHostDeviceAddress *host2)
-{
- return (host1->domain == host2->domain && host1->bus == host2->bus &&
- host1->slot == host2->slot && host1->function == host2->function);
-}
-
static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
{
VFIOGroup *group;
@@ -2563,6 +2747,22 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
vdev->req_enabled = false;
}
+static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused)
+{
+ Error *local_err = NULL;
+
+ vfio_aer_check_host_bus_reset(&local_err);
+ if (local_err) {
+ fprintf(stderr, "%s\n", error_get_pretty(local_err));
+ error_free(local_err);
+ exit(1);
+ }
+}
+
+static Notifier machine_notifier = {
+ .notify = vfio_pci_machine_done_notify,
+};
+
static int vfio_initfn(PCIDevice *pdev)
{
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
@@ -2909,6 +3109,11 @@ static const TypeInfo vfio_pci_dev_info = {
static void register_vfio_pci_dev_type(void)
{
type_register_static(&vfio_pci_dev_info);
+ /*
+ * Register notifier when machine init is done, since we need
+ * check the configration manner after all vfio device are inited.
+ */
+ qemu_add_machine_init_done_notifier(&machine_notifier);
}
type_init(register_vfio_pci_dev_type)
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index e0c53f2..aff46c2 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -15,6 +15,7 @@
#include "qemu-common.h"
#include "exec/memory.h"
#include "hw/pci/pci.h"
+#include "hw/pci/pci_bus.h"
#include "hw/pci/pci_bridge.h"
#include "hw/vfio/vfio-common.h"
#include "qemu/event_notifier.h"
--
1.9.3
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [patch v3 5/9] vfio: add check host bus reset is support or not
2016-03-15 1:35 ` [Qemu-devel] [patch v3 5/9] vfio: add check host bus reset is support or not Cao jin
@ 2016-03-15 18:19 ` Alex Williamson
0 siblings, 0 replies; 14+ messages in thread
From: Alex Williamson @ 2016-03-15 18:19 UTC (permalink / raw)
To: Cao jin; +Cc: chen.fan.fnst, izumi.taku, qemu-devel, mst
On Tue, 15 Mar 2016 09:35:45 +0800
Cao jin <caoj.fnst@cn.fujitsu.com> wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>
> when boot up a VM that assigning vfio devices with aer enabled, we
> must check the vfio device whether support host bus reset. because
> when one error occur. OS driver always recover the device by do a
> bus reset, in order to recover the vfio device, qemu must able to do
> a host bus reset to recover the device to default status. and for all
> affected devices by the bus reset. we must check them whether all
> are assigned to the VM and on the same virtual bus. meanwhile, for
> simply done, the devices which don't affected by the host bus reset
> are not allowed to assign to the same virtual bus.
>
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
> hw/vfio/pci.c | 219 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> hw/vfio/pci.h | 1 +
> 2 files changed, 213 insertions(+), 7 deletions(-)
>
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index a77cc04..9670271 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1868,6 +1868,197 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
> return 0;
> }
>
> +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1,
> + PCIHostDeviceAddress *host2)
> +{
> + return (host1->domain == host2->domain && host1->bus == host2->bus &&
> + host1->slot == host2->slot);
> +}
> +
> +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> + PCIHostDeviceAddress *host2)
> +{
> + return (vfio_pci_host_slot_match(host1, host2) &&
> + host1->function == host2->function);
> +}
This needs to be rebased to current qemu.git and account for:
7df9381 vfio: Add sysfsdev property for pci & platform
It's possible that vdev.host is not populated if the device is
specified via sysfsdev. vbasedev.name is now the common data element
and I think for the purposes of a bus reset it's ok to expect it to
take the form of "%04x:%02x:%02x.%d". The current vfio_pci_host_match
function builds a comparison string and does a strcmp. You may want to
change that to build a PCIHostDeviceAddress to make masking out the
function number easier for a slot comparison.
If we have devices that don't match the "%04x:%02x:%02x.%d" format, the
expectation is that they won't support a hot reset ioctl.
> +
> +static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
> +{
> + PCIBus *bus = vdev->pdev.bus;
> + struct vfio_pci_hot_reset_info *info = NULL;
> + struct vfio_pci_dependent_device *devices;
> + VFIOGroup *group;
> + int ret, i, devfn;
> +
> + ret = vfio_get_hot_reset_info(vdev, &info);
> + if (ret) {
> + error_setg(errp, "vfio: Cannot enable AER for device %s,"
> + " device does not support hot reset.",
> + vdev->vbasedev.name);
> + return;
> + }
> +
> + /* List all affected devices by bus reset */
> + devices = &info->devices[0];
> +
> + /* Verify that we have all the groups required */
> + for (i = 0; i < info->count; i++) {
> + PCIHostDeviceAddress host;
> + VFIOPCIDevice *tmp;
> + VFIODevice *vbasedev_iter;
> + bool found = false;
> +
> + host.domain = devices[i].segment;
> + host.bus = devices[i].bus;
> + host.slot = PCI_SLOT(devices[i].devfn);
> + host.function = PCI_FUNC(devices[i].devfn);
> +
> + /* Skip the current device */
> + if (vfio_pci_host_match(&host, &vdev->host)) {
> + continue;
> + }
> +
> + /* Ensure we own the group of the affected device */
> + QLIST_FOREACH(group, &vfio_group_list, next) {
> + if (group->groupid == devices[i].group_id) {
> + break;
> + }
> + }
> +
> + if (!group) {
> + error_setg(errp, "vfio: Cannot enable AER for device %s, "
> + "depends on group %d which is not owned.",
> + vdev->vbasedev.name, devices[i].group_id);
> + goto out;
> + }
> +
> + /* Ensure affected devices for reset on the same bus */
> + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> + if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> + continue;
> + }
> + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
> + if (vfio_pci_host_match(&host, &tmp->host)) {
> + /*
> + * AER errors may be broadcast to all functions of a multi-
> + * function endpoint. If any of those sibling functions are
> + * also assigned, they need to have AER enabled or else an
> + * error may continue to cause a vm_stop condition. IOW,
> + * AER setup of this function would be pointless.
> + */
> + if (vfio_pci_host_slot_match(&vdev->host, &tmp->host) &&
> + !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
> + error_setg(errp, "vfio: Cannot enable AER for device %s, on same slot"
> + " the dependent device %s which does not enable AER.",
> + vdev->vbasedev.name, tmp->vbasedev.name);
> + goto out;
> + }
> +
> + if (tmp->pdev.bus != bus) {
> + error_setg(errp, "vfio: Cannot enable AER for device %s, "
> + "the dependent device %s is not on the same bus",
> + vdev->vbasedev.name, tmp->vbasedev.name);
> + goto out;
> + }
> + found = true;
> + break;
> + }
> + }
> +
> + /* Ensure all affected devices assigned to VM */
> + if (!found) {
> + error_setg(errp, "vfio: Cannot enable AER for device %s, "
> + "the dependent device %04x:%02x:%02x.%x "
> + "is not assigned to VM.",
> + vdev->vbasedev.name, host.domain, host.bus,
> + host.slot, host.function);
> + goto out;
> + }
> + }
> +
> + /*
> + * To make things simply, functions must are combined
> + * in the same way as on the host. no other irrelative
> + * functions on the virtual same bus was required.
> + */
> + for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
> + VFIOPCIDevice *tmp;
> + PCIDevice *dev;
> + bool found = false;
> +
> + dev = bus->devices[devfn];
> + if (!dev) {
> + continue;
> + }
bus->devices seems like private data for PCIBus, MST do you want some
abstraction for this? Can't this already be done in a round about sort
of way with pci_for_each_device()?
> +
> + if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> + error_setg(errp, "vfio: Cannot enable AER for device %s, "
> + "only support all dependent devices on the same bus, "
> + "%s is not one of the dependent devices.",
> + vdev->vbasedev.name, dev->name);
> + goto out;
> + }
> +
> + tmp = DO_UPCAST(VFIOPCIDevice, pdev, dev);
> + for (i = 0; i < info->count; i++) {
> + PCIHostDeviceAddress host;
> +
> + host.domain = devices[i].segment;
> + host.bus = devices[i].bus;
> + host.slot = PCI_SLOT(devices[i].devfn);
> + host.function = PCI_FUNC(devices[i].devfn);
> +
> + if (vfio_pci_host_match(&host, &tmp->host)) {
> + found = true;
> + break;
> + }
> + }
> +
> + if (!found) {
> + error_setg(errp, "vfio: Cannot enable AER for device %s, "
> + "only support all dependent devices on the same bus, "
> + "%04x:%02x:%02x.%x is not one of the dependent devices.",
> + vdev->vbasedev.name, tmp->host.domain, tmp->host.bus,
> + tmp->host.slot, tmp->host.function);
> + goto out;
> + }
> + }
> +
> +out:
> + g_free(info);
> + return;
> +}
> +
> +static void vfio_aer_check_host_bus_reset(Error **errp)
> +{
> + VFIOGroup *group;
> + VFIODevice *vbasedev;
> + VFIOPCIDevice *vdev;
> + Error *local_err = NULL;
> +
> + /* Check All vfio-pci devices if have bus reset capability */
> + QLIST_FOREACH(group, &vfio_group_list, next) {
> + QLIST_FOREACH(vbasedev, &group->device_list, next) {
> + if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
> + continue;
> + }
> + vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> + if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
> + if (!pci_get_function_0(&vdev->pdev)) {
> + continue;
> + }
> + vfio_check_hot_bus_reset(vdev, &local_err);
> + if (local_err) {
> + error_propagate(errp, local_err);
> + return;
> + }
> + }
> + }
> + }
> +
> + return;
> +}
> +
> static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
> int pos, uint16_t size)
> {
> @@ -2051,13 +2242,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
> vfio_intx_enable(vdev);
> }
>
> -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> - PCIHostDeviceAddress *host2)
> -{
> - return (host1->domain == host2->domain && host1->bus == host2->bus &&
> - host1->slot == host2->slot && host1->function == host2->function);
> -}
> -
> static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
> {
> VFIOGroup *group;
> @@ -2563,6 +2747,22 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
> vdev->req_enabled = false;
> }
>
> +static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused)
> +{
> + Error *local_err = NULL;
> +
> + vfio_aer_check_host_bus_reset(&local_err);
> + if (local_err) {
> + fprintf(stderr, "%s\n", error_get_pretty(local_err));
> + error_free(local_err);
> + exit(1);
> + }
> +}
> +
> +static Notifier machine_notifier = {
> + .notify = vfio_pci_machine_done_notify,
> +};
> +
> static int vfio_initfn(PCIDevice *pdev)
> {
> VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> @@ -2909,6 +3109,11 @@ static const TypeInfo vfio_pci_dev_info = {
> static void register_vfio_pci_dev_type(void)
> {
> type_register_static(&vfio_pci_dev_info);
> + /*
> + * Register notifier when machine init is done, since we need
> + * check the configration manner after all vfio device are inited.
> + */
> + qemu_add_machine_init_done_notifier(&machine_notifier);
> }
>
> type_init(register_vfio_pci_dev_type)
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index e0c53f2..aff46c2 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -15,6 +15,7 @@
> #include "qemu-common.h"
> #include "exec/memory.h"
> #include "hw/pci/pci.h"
> +#include "hw/pci/pci_bus.h"
> #include "hw/pci/pci_bridge.h"
> #include "hw/vfio/vfio-common.h"
> #include "qemu/event_notifier.h"
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Qemu-devel] [patch v3 6/9] vfio: add check aer functionality for hotplug device
2016-03-15 1:35 [Qemu-devel] [patch v3 0/9] vfio-pci: pass the aer error to guest, part2 Cao jin
` (4 preceding siblings ...)
2016-03-15 1:35 ` [Qemu-devel] [patch v3 5/9] vfio: add check host bus reset is support or not Cao jin
@ 2016-03-15 1:35 ` Cao jin
2016-03-15 18:36 ` Alex Williamson
2016-03-15 1:35 ` [Qemu-devel] [patch v3 7/9] vfio: vote the function 0 to do host bus reset when aer occurred Cao jin
` (2 subsequent siblings)
8 siblings, 1 reply; 14+ messages in thread
From: Cao jin @ 2016-03-15 1:35 UTC (permalink / raw)
To: qemu-devel; +Cc: chen.fan.fnst, izumi.taku, alex.williamson, mst
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
because we make the vfio functions are combined
in the same way as on the host for aer, so we can
do the aer check when the function 0 was hotplugged.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
hw/vfio/pci.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 50 insertions(+)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 9670271..223c0ee 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2029,6 +2029,40 @@ out:
return;
}
+static void vfio_bus_check_aer_functions(PCIDevice *pdev, Error **errp)
+{
+ PCIBus *bus = pdev->bus;
+ VFIOPCIDevice *vdev;
+ PCIDevice *dev;
+ Error *local_err = NULL;
+ int devfn;
+
+ for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
+ dev = bus->devices[devfn];
+ if (!dev) {
+ continue;
+ }
+
+ if (pci_get_function_0(dev) != pdev) {
+ return;
+ }
+
+ if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
+ continue;
+ }
+ vdev = DO_UPCAST(VFIOPCIDevice, pdev, dev);
+ if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
+ vfio_check_hot_bus_reset(vdev, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
+ }
+
+ return;
+}
+
static void vfio_aer_check_host_bus_reset(Error **errp)
{
VFIOGroup *group;
@@ -2962,6 +2996,22 @@ static int vfio_initfn(PCIDevice *pdev)
}
}
+ /*
+ * If this function is func 0, indicate the closure of the slot.
+ * we get the chance to check aer-enabled devices whether support
+ * hot bus reset.
+ */
+ if (DEVICE(pdev)->hotplugged &&
+ pdev == pci_get_function_0(pdev)) {
+ Error *local_err = NULL;
+
+ vfio_bus_check_aer_functions(pdev, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ goto out_teardown;
+ }
+ }
+
vfio_register_err_notifier(vdev);
vfio_register_req_notifier(vdev);
vfio_setup_resetfn_quirk(vdev);
--
1.9.3
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [patch v3 6/9] vfio: add check aer functionality for hotplug device
2016-03-15 1:35 ` [Qemu-devel] [patch v3 6/9] vfio: add check aer functionality for hotplug device Cao jin
@ 2016-03-15 18:36 ` Alex Williamson
0 siblings, 0 replies; 14+ messages in thread
From: Alex Williamson @ 2016-03-15 18:36 UTC (permalink / raw)
To: Cao jin; +Cc: chen.fan.fnst, izumi.taku, qemu-devel, mst
On Tue, 15 Mar 2016 09:35:46 +0800
Cao jin <caoj.fnst@cn.fujitsu.com> wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>
> because we make the vfio functions are combined
> in the same way as on the host for aer, so we can
> do the aer check when the function 0 was hotplugged.
>
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
> hw/vfio/pci.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 50 insertions(+)
>
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 9670271..223c0ee 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2029,6 +2029,40 @@ out:
> return;
> }
>
> +static void vfio_bus_check_aer_functions(PCIDevice *pdev, Error **errp)
> +{
> + PCIBus *bus = pdev->bus;
> + VFIOPCIDevice *vdev;
> + PCIDevice *dev;
> + Error *local_err = NULL;
> + int devfn;
> +
> + for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
> + dev = bus->devices[devfn];
> + if (!dev) {
> + continue;
> + }
> +
> + if (pci_get_function_0(dev) != pdev) {
> + return;
> + }
It seems like there are much easier ways to walk all functions on a
slot, rather than all devices on the bus, and not access PCIBus private
data:
for (fn = 0; fn < 8; fn++) {
dev = pci_find_device(pdev->bus, pci_bus_num(pdev->bus),
PCI_DEVFN(PCI_SLOT(pdev->devfn), fn));
if (!dev) {
continue;
}
...
> +
> + if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> + continue;
> + }
> + vdev = DO_UPCAST(VFIOPCIDevice, pdev, dev);
> + if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
> + vfio_check_hot_bus_reset(vdev, &local_err);
> + if (local_err) {
> + error_propagate(errp, local_err);
> + return;
> + }
> + }
> + }
> +
> + return;
> +}
> +
> static void vfio_aer_check_host_bus_reset(Error **errp)
> {
> VFIOGroup *group;
> @@ -2962,6 +2996,22 @@ static int vfio_initfn(PCIDevice *pdev)
> }
> }
>
> + /*
> + * If this function is func 0, indicate the closure of the slot.
> + * we get the chance to check aer-enabled devices whether support
> + * hot bus reset.
> + */
> + if (DEVICE(pdev)->hotplugged &&
> + pdev == pci_get_function_0(pdev)) {
> + Error *local_err = NULL;
> +
> + vfio_bus_check_aer_functions(pdev, &local_err);
> + if (local_err) {
> + error_report_err(local_err);
> + goto out_teardown;
> + }
> + }
> +
> vfio_register_err_notifier(vdev);
> vfio_register_req_notifier(vdev);
> vfio_setup_resetfn_quirk(vdev);
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Qemu-devel] [patch v3 7/9] vfio: vote the function 0 to do host bus reset when aer occurred
2016-03-15 1:35 [Qemu-devel] [patch v3 0/9] vfio-pci: pass the aer error to guest, part2 Cao jin
` (5 preceding siblings ...)
2016-03-15 1:35 ` [Qemu-devel] [patch v3 6/9] vfio: add check aer functionality for hotplug device Cao jin
@ 2016-03-15 1:35 ` Cao jin
2016-03-15 20:38 ` Alex Williamson
2016-03-15 1:35 ` [Qemu-devel] [patch v3 8/9] vfio-pci: pass the aer error to guest Cao jin
2016-03-15 1:35 ` [Qemu-devel] [patch v3 9/9] vfio: add 'aer' property to expose aercap Cao jin
8 siblings, 1 reply; 14+ messages in thread
From: Cao jin @ 2016-03-15 1:35 UTC (permalink / raw)
To: qemu-devel; +Cc: chen.fan.fnst, izumi.taku, alex.williamson, mst
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
hw/pci/pci.c | 2 ++
hw/vfio/pci.c | 14 ++++++++++++++
hw/vfio/pci.h | 1 +
include/hw/pci/pci_bus.h | 2 ++
4 files changed, 19 insertions(+)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index e67664d..953745d 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -276,6 +276,8 @@ static void pcibus_reset(BusState *qbus)
for (i = 0; i < bus->nirq; i++) {
assert(bus->irq_count[i] == 0);
}
+
+ bus->is_bus_rst = false;
}
static void pci_host_bus_register(DeviceState *host)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 223c0ee..b944d0b 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1901,6 +1901,8 @@ static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
/* List all affected devices by bus reset */
devices = &info->devices[0];
+ vdev->single_depend_dev = (info->count == 1);
+
/* Verify that we have all the groups required */
for (i = 0; i < info->count; i++) {
PCIHostDeviceAddress host;
@@ -2593,6 +2595,10 @@ static void vfio_err_notifier_handler(void *opaque)
return;
}
+ if ((vdev->features & VFIO_FEATURE_ENABLE_AER)) {
+ vdev->pdev.bus->is_bus_rst = true;
+ }
+
/*
* TBD. Retrieve the error details and decide what action
* needs to be taken. One of the actions could be to pass
@@ -3060,6 +3066,14 @@ static void vfio_pci_reset(DeviceState *dev)
trace_vfio_pci_reset(vdev->vbasedev.name);
+ if (pdev->bus->is_bus_rst) {
+ /* simply voting the function 0 to do hot bus reset */
+ if (pci_get_function_0(pdev) == pdev) {
+ vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
+ }
+ return;
+ }
+
vfio_pci_pre_reset(vdev);
if (vdev->resetfn && !vdev->resetfn(vdev)) {
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index aff46c2..32bd31f 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -143,6 +143,7 @@ typedef struct VFIOPCIDevice {
bool no_kvm_intx;
bool no_kvm_msi;
bool no_kvm_msix;
+ bool single_depend_dev;
} VFIOPCIDevice;
uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 403fec6..6bcd334 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -39,6 +39,8 @@ struct PCIBus {
Keep a count of the number of devices with raised IRQs. */
int nirq;
int *irq_count;
+
+ bool is_bus_rst;
};
typedef struct PCIBridgeWindows PCIBridgeWindows;
--
1.9.3
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [patch v3 7/9] vfio: vote the function 0 to do host bus reset when aer occurred
2016-03-15 1:35 ` [Qemu-devel] [patch v3 7/9] vfio: vote the function 0 to do host bus reset when aer occurred Cao jin
@ 2016-03-15 20:38 ` Alex Williamson
2016-03-18 1:15 ` Chen Fan
0 siblings, 1 reply; 14+ messages in thread
From: Alex Williamson @ 2016-03-15 20:38 UTC (permalink / raw)
To: Cao jin; +Cc: chen.fan.fnst, izumi.taku, qemu-devel, mst
On Tue, 15 Mar 2016 09:35:47 +0800
Cao jin <caoj.fnst@cn.fujitsu.com> wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
> hw/pci/pci.c | 2 ++
> hw/vfio/pci.c | 14 ++++++++++++++
> hw/vfio/pci.h | 1 +
> include/hw/pci/pci_bus.h | 2 ++
> 4 files changed, 19 insertions(+)
>
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index e67664d..953745d 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -276,6 +276,8 @@ static void pcibus_reset(BusState *qbus)
> for (i = 0; i < bus->nirq; i++) {
> assert(bus->irq_count[i] == 0);
> }
> +
> + bus->is_bus_rst = false;
> }
>
> static void pci_host_bus_register(DeviceState *host)
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 223c0ee..b944d0b 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1901,6 +1901,8 @@ static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
> /* List all affected devices by bus reset */
> devices = &info->devices[0];
>
> + vdev->single_depend_dev = (info->count == 1);
> +
> /* Verify that we have all the groups required */
> for (i = 0; i < info->count; i++) {
> PCIHostDeviceAddress host;
> @@ -2593,6 +2595,10 @@ static void vfio_err_notifier_handler(void *opaque)
> return;
> }
>
> + if ((vdev->features & VFIO_FEATURE_ENABLE_AER)) {
> + vdev->pdev.bus->is_bus_rst = true;
> + }
> +
So we're *assuming* that the next reset will be a bus reset because we
took an AER fault... what if that particular error got handled in
another way, maybe a device specific handler that doesn't do a bus
reset? The asymmetry of setting a value here and clearing it in PCI
code is pretty undesirable as well. Can we detect that the bus is in
reset on our own given the current set of configuration restrictions?
Seems pretty easy to test for an AER device with the parent bridge
having PCI_BRIDGE_CTL_BUS_RESET set and wait for function #0 to do a
hot reset. Thanks,
Alex
> /*
> * TBD. Retrieve the error details and decide what action
> * needs to be taken. One of the actions could be to pass
> @@ -3060,6 +3066,14 @@ static void vfio_pci_reset(DeviceState *dev)
>
> trace_vfio_pci_reset(vdev->vbasedev.name);
>
> + if (pdev->bus->is_bus_rst) {
> + /* simply voting the function 0 to do hot bus reset */
> + if (pci_get_function_0(pdev) == pdev) {
> + vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> + }
> + return;
> + }
> +
> vfio_pci_pre_reset(vdev);
>
> if (vdev->resetfn && !vdev->resetfn(vdev)) {
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index aff46c2..32bd31f 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -143,6 +143,7 @@ typedef struct VFIOPCIDevice {
> bool no_kvm_intx;
> bool no_kvm_msi;
> bool no_kvm_msix;
> + bool single_depend_dev;
> } VFIOPCIDevice;
>
> uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> index 403fec6..6bcd334 100644
> --- a/include/hw/pci/pci_bus.h
> +++ b/include/hw/pci/pci_bus.h
> @@ -39,6 +39,8 @@ struct PCIBus {
> Keep a count of the number of devices with raised IRQs. */
> int nirq;
> int *irq_count;
> +
> + bool is_bus_rst;
> };
>
> typedef struct PCIBridgeWindows PCIBridgeWindows;
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Qemu-devel] [patch v3 7/9] vfio: vote the function 0 to do host bus reset when aer occurred
2016-03-15 20:38 ` Alex Williamson
@ 2016-03-18 1:15 ` Chen Fan
0 siblings, 0 replies; 14+ messages in thread
From: Chen Fan @ 2016-03-18 1:15 UTC (permalink / raw)
To: Alex Williamson, Cao jin; +Cc: izumi.taku, qemu-devel, mst
On 03/16/2016 04:38 AM, Alex Williamson wrote:
> On Tue, 15 Mar 2016 09:35:47 +0800
> Cao jin <caoj.fnst@cn.fujitsu.com> wrote:
>
>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>> hw/pci/pci.c | 2 ++
>> hw/vfio/pci.c | 14 ++++++++++++++
>> hw/vfio/pci.h | 1 +
>> include/hw/pci/pci_bus.h | 2 ++
>> 4 files changed, 19 insertions(+)
>>
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index e67664d..953745d 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -276,6 +276,8 @@ static void pcibus_reset(BusState *qbus)
>> for (i = 0; i < bus->nirq; i++) {
>> assert(bus->irq_count[i] == 0);
>> }
>> +
>> + bus->is_bus_rst = false;
>> }
>>
>> static void pci_host_bus_register(DeviceState *host)
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 223c0ee..b944d0b 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -1901,6 +1901,8 @@ static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
>> /* List all affected devices by bus reset */
>> devices = &info->devices[0];
>>
>> + vdev->single_depend_dev = (info->count == 1);
>> +
>> /* Verify that we have all the groups required */
>> for (i = 0; i < info->count; i++) {
>> PCIHostDeviceAddress host;
>> @@ -2593,6 +2595,10 @@ static void vfio_err_notifier_handler(void *opaque)
>> return;
>> }
>>
>> + if ((vdev->features & VFIO_FEATURE_ENABLE_AER)) {
>> + vdev->pdev.bus->is_bus_rst = true;
>> + }
>> +
> So we're *assuming* that the next reset will be a bus reset because we
> took an AER fault... what if that particular error got handled in
> another way, maybe a device specific handler that doesn't do a bus
> reset? The asymmetry of setting a value here and clearing it in PCI
> code is pretty undesirable as well. Can we detect that the bus is in
> reset on our own given the current set of configuration restrictions?
> Seems pretty easy to test for an AER device with the parent bridge
> having PCI_BRIDGE_CTL_BUS_RESET set and wait for function #0 to do a
> hot reset. Thanks,
Indeed. Thanks for your help.
Chen
>
> Alex
>
>> /*
>> * TBD. Retrieve the error details and decide what action
>> * needs to be taken. One of the actions could be to pass
>> @@ -3060,6 +3066,14 @@ static void vfio_pci_reset(DeviceState *dev)
>>
>> trace_vfio_pci_reset(vdev->vbasedev.name);
>>
>> + if (pdev->bus->is_bus_rst) {
>> + /* simply voting the function 0 to do hot bus reset */
>> + if (pci_get_function_0(pdev) == pdev) {
>> + vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
>> + }
>> + return;
>> + }
>> +
>> vfio_pci_pre_reset(vdev);
>>
>> if (vdev->resetfn && !vdev->resetfn(vdev)) {
>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>> index aff46c2..32bd31f 100644
>> --- a/hw/vfio/pci.h
>> +++ b/hw/vfio/pci.h
>> @@ -143,6 +143,7 @@ typedef struct VFIOPCIDevice {
>> bool no_kvm_intx;
>> bool no_kvm_msi;
>> bool no_kvm_msix;
>> + bool single_depend_dev;
>> } VFIOPCIDevice;
>>
>> uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
>> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
>> index 403fec6..6bcd334 100644
>> --- a/include/hw/pci/pci_bus.h
>> +++ b/include/hw/pci/pci_bus.h
>> @@ -39,6 +39,8 @@ struct PCIBus {
>> Keep a count of the number of devices with raised IRQs. */
>> int nirq;
>> int *irq_count;
>> +
>> + bool is_bus_rst;
>> };
>>
>> typedef struct PCIBridgeWindows PCIBridgeWindows;
>
>
> .
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Qemu-devel] [patch v3 8/9] vfio-pci: pass the aer error to guest
2016-03-15 1:35 [Qemu-devel] [patch v3 0/9] vfio-pci: pass the aer error to guest, part2 Cao jin
` (6 preceding siblings ...)
2016-03-15 1:35 ` [Qemu-devel] [patch v3 7/9] vfio: vote the function 0 to do host bus reset when aer occurred Cao jin
@ 2016-03-15 1:35 ` Cao jin
2016-03-15 1:35 ` [Qemu-devel] [patch v3 9/9] vfio: add 'aer' property to expose aercap Cao jin
8 siblings, 0 replies; 14+ messages in thread
From: Cao jin @ 2016-03-15 1:35 UTC (permalink / raw)
To: qemu-devel; +Cc: chen.fan.fnst, izumi.taku, alex.williamson, mst
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
when the vfio device encounters an uncorrectable error in host,
the vfio_pci driver will signal the eventfd registered by this
vfio device, resulting in the qemu eventfd handler getting
invoked.
this patch is to pass the error to guest and let the guest driver
recover from the error.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
hw/vfio/pci.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 49 insertions(+), 7 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index b944d0b..6a7bb47 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2590,22 +2590,64 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
static void vfio_err_notifier_handler(void *opaque)
{
VFIOPCIDevice *vdev = opaque;
+ PCIDevice *dev = &vdev->pdev;
+ Error *local_err = NULL;
+ PCIEAERMsg msg = {
+ .severity = 0,
+ .source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn,
+ };
if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
return;
}
- if ((vdev->features & VFIO_FEATURE_ENABLE_AER)) {
+ /*
+ * in case the real hardware configuration has been changed,
+ * here we should recheck the bus reset capability.
+ */
+ if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
+ vfio_check_hot_bus_reset(vdev, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ goto stop;
+ }
vdev->pdev.bus->is_bus_rst = true;
+
+ /*
+ * we should read the error details from the real hardware
+ * configuration spaces, here we only need to do is signaling
+ * to guest an uncorrectable error has occurred.
+ */
+ if (dev->exp.aer_cap) {
+ uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
+ uint32_t uncor_status;
+ bool isfatal;
+
+ uncor_status = vfio_pci_read_config(dev,
+ dev->exp.aer_cap + PCI_ERR_UNCOR_STATUS, 4);
+
+ /*
+ * if the error is not emitted by this device, we can
+ * just ignore it.
+ */
+ if (!(uncor_status & ~0UL)) {
+ return;
+ }
+
+ isfatal = uncor_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
+
+ msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
+ PCI_ERR_ROOT_CMD_NONFATAL_EN;
+
+ pcie_aer_msg(dev, &msg);
+ return;
+ }
}
+stop:
/*
- * TBD. Retrieve the error details and decide what action
- * needs to be taken. One of the actions could be to pass
- * the error to the guest and have the guest driver recover
- * from the error. This requires that PCIe capabilities be
- * exposed to the guest. For now, we just terminate the
- * guest to contain the error.
+ * If the aer capability is not exposed to the guest. we just
+ * terminate the guest to contain the error.
*/
error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. "
--
1.9.3
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Qemu-devel] [patch v3 9/9] vfio: add 'aer' property to expose aercap
2016-03-15 1:35 [Qemu-devel] [patch v3 0/9] vfio-pci: pass the aer error to guest, part2 Cao jin
` (7 preceding siblings ...)
2016-03-15 1:35 ` [Qemu-devel] [patch v3 8/9] vfio-pci: pass the aer error to guest Cao jin
@ 2016-03-15 1:35 ` Cao jin
8 siblings, 0 replies; 14+ messages in thread
From: Cao jin @ 2016-03-15 1:35 UTC (permalink / raw)
To: qemu-devel; +Cc: chen.fan.fnst, izumi.taku, alex.williamson, mst
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
add 'aer' property to let user able to decide whether expose
the aer capability. by default we should disable aer feature,
because it needs configuration restrictions.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
hw/vfio/pci.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 6a7bb47..b67c374 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3173,6 +3173,8 @@ static Property vfio_pci_dev_properties[] = {
sub_vendor_id, PCI_ANY_ID),
DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
sub_device_id, PCI_ANY_ID),
+ DEFINE_PROP_BIT("aer", VFIOPCIDevice, features,
+ VFIO_FEATURE_ENABLE_AER_BIT, false),
/*
* TODO - support passed fds... is this necessary?
* DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
--
1.9.3
^ permalink raw reply related [flat|nested] 14+ messages in thread