* [Qemu-devel] [PATCH RFC v6 00/11] vfio-pci: pass the aer error to guest
@ 2015-04-29 8:48 Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 01/11] vfio: add pcie extanded capability support Chen Fan
` (10 more replies)
0 siblings, 11 replies; 15+ messages in thread
From: Chen Fan @ 2015-04-29 8:48 UTC (permalink / raw)
To: qemu-devel; +Cc: izumi.taku, alex.williamson
For now, for vfio pci passthough devices when qemu receives
an error from host aer report, there just terminate the guest,
but usually user want to know what error occurred but stop the
guest, so this patches add aer capability support for vfio device,
and pass the error to guest, and have guest driver to recover
from the error.
and turning on SERR# for error forwording in bridge control register
patch in seabios has been merged.
v5-v6:
1. add secondary bus callbacks to reset host bus.
v3-v4:
1. add 'x-aer' for user to off aer capability.
2. refactor vfio device to parse extended capabilities.
v2-v3:
1. refactor vfio device to parse extended capability.
2. add global property for piix4 to disable vfio aer cap.
v1-v2:
1. turn on SERR# for bridge control register in firmware.
2. initilize aer capability for vfio device.
3. fix some trivial bug.
Chen Fan (11):
vfio: add pcie extanded capability support
aer: impove pcie_aer_init to support vfio device
vfio: add aer support for vfio device
pcie_aer: expose pcie_aer_msg() interface
vfio-pci: pass the aer error to guest
vfio: add 'aer' property to expose aercap
pc: add HW_COMPAT_2_2 to disable aercap for vifo device
vfio: extract vfio_get_hot_reset_info as a single function
qdev: add bus reset_notifiers callbacks for host bus reset
vfio: squeeze out vfio_pci_do_hot_reset for support bus reset
vfio: add bus reset notifier for host bus reset
hw/i386/pc_q35.c | 4 +
hw/pci-bridge/ioh3420.c | 2 +-
hw/pci-bridge/xio3130_downstream.c | 2 +-
hw/pci-bridge/xio3130_upstream.c | 2 +-
hw/pci/pci.c | 6 +
hw/pci/pci_bridge.c | 3 +
hw/pci/pcie_aer.c | 6 +-
hw/vfio/pci.c | 455 +++++++++++++++++++++++++++++++------
include/hw/compat.h | 8 +
include/hw/pci/pci.h | 2 +
include/hw/pci/pci_bus.h | 2 +
include/hw/pci/pcie_aer.h | 3 +-
12 files changed, 415 insertions(+), 80 deletions(-)
--
1.9.3
^ permalink raw reply [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC v6 01/11] vfio: add pcie extanded capability support
2015-04-29 8:48 [Qemu-devel] [PATCH RFC v6 00/11] vfio-pci: pass the aer error to guest Chen Fan
@ 2015-04-29 8:48 ` Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 02/11] aer: impove pcie_aer_init to support vfio device Chen Fan
` (9 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Chen Fan @ 2015-04-29 8:48 UTC (permalink / raw)
To: qemu-devel; +Cc: izumi.taku, alex.williamson
For vfio pcie device, we could expose the extended capability on
PCIE bus. in order to avoid config space broken, we introduce
a copy config for parsing extended caps. and rebuild the pcie
extended config space.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
hw/vfio/pci.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 72 insertions(+), 1 deletion(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index cd15b20..8f6aedd 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2492,6 +2492,21 @@ static uint8_t vfio_std_cap_max_size(PCIDevice *pdev, uint8_t pos)
return next - pos;
}
+
+static uint16_t vfio_ext_cap_max_size(const uint8_t *config, uint16_t pos)
+{
+ uint16_t tmp, next = PCIE_CONFIG_SPACE_SIZE - 1;
+
+ for (tmp = PCI_CONFIG_SPACE_SIZE; tmp;
+ tmp = PCI_EXT_CAP_NEXT(pci_get_long(config + tmp))) {
+ if (tmp > pos && tmp < next) {
+ next = tmp;
+ }
+ }
+
+ return next - pos;
+}
+
static void vfio_set_word_bits(uint8_t *buf, uint16_t val, uint16_t mask)
{
pci_set_word(buf, (pci_get_word(buf) & ~mask) | val);
@@ -2715,16 +2730,72 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
return 0;
}
+static int vfio_add_ext_cap(VFIOPCIDevice *vdev, const uint8_t *config)
+{
+ PCIDevice *pdev = &vdev->pdev;
+ uint32_t header;
+ uint16_t cap_id, next, size;
+ uint8_t cap_ver;
+
+ for (next = PCI_CONFIG_SPACE_SIZE; next;
+ next = PCI_EXT_CAP_NEXT(pci_get_long(config + next))) {
+ header = pci_get_long(config + next);
+ cap_id = PCI_EXT_CAP_ID(header);
+ cap_ver = PCI_EXT_CAP_VER(header);
+
+ /*
+ * If it becomes important to configure extended capabilities to their
+ * actual size, use this as the default when it's something we don't
+ * recognize. Since QEMU doesn't actually handle many of the config
+ * accesses, exact size doesn't seem worthwhile.
+ */
+ size = vfio_ext_cap_max_size(config, next);
+
+ pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+ if (next == PCI_CONFIG_SPACE_SIZE) {
+ /* Begin the rebuild, we should set the next offset zero. */
+ pci_set_long(pdev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
+ }
+
+ /* Use emulated header pointer to allow dropping extended caps */
+ pci_set_long(vdev->emulated_config_bits + next, 0xffffffff);
+ }
+
+ return 0;
+}
+
static int vfio_add_capabilities(VFIOPCIDevice *vdev)
{
PCIDevice *pdev = &vdev->pdev;
+ int ret;
+ uint8_t *config;
if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) ||
!pdev->config[PCI_CAPABILITY_LIST]) {
return 0; /* Nothing to add */
}
- return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
+ ret = vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
+ if (ret) {
+ return ret;
+ }
+
+ /* on PCI bus, it doesn't make sense to expose extended capabilities. */
+ if (!pci_is_express(pdev) ||
+ !pci_bus_is_express(pdev->bus) ||
+ !pci_get_long(pdev->config + PCI_CONFIG_SPACE_SIZE)) {
+ return 0;
+ }
+
+ /*
+ * In order to avoid config space broken, here using a copy config to
+ * parse extended capabilities.
+ */
+ config = g_memdup(pdev->config, vdev->config_size);
+ ret = vfio_add_ext_cap(vdev, config);
+
+ g_free(config);
+ return ret;
}
static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
--
1.9.3
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC v6 02/11] aer: impove pcie_aer_init to support vfio device
2015-04-29 8:48 [Qemu-devel] [PATCH RFC v6 00/11] vfio-pci: pass the aer error to guest Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 01/11] vfio: add pcie extanded capability support Chen Fan
@ 2015-04-29 8:48 ` Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 03/11] vfio: add aer support for " Chen Fan
` (8 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Chen Fan @ 2015-04-29 8:48 UTC (permalink / raw)
To: qemu-devel; +Cc: izumi.taku, alex.williamson
pcie_aer_init was used to emulate an aer capability for pcie device,
but for vfio device, the aer config space size is mutable and is not
always equal to PCI_ERR_SIZEOF(0x48). it depends on where the TLP Prefix
register required, so here we add a size argument.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
hw/pci-bridge/ioh3420.c | 2 +-
hw/pci-bridge/xio3130_downstream.c | 2 +-
hw/pci-bridge/xio3130_upstream.c | 2 +-
hw/pci/pcie_aer.c | 4 ++--
include/hw/pci/pcie_aer.h | 2 +-
5 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
index cce2fdd..4d9cd3f 100644
--- a/hw/pci-bridge/ioh3420.c
+++ b/hw/pci-bridge/ioh3420.c
@@ -129,7 +129,7 @@ static int ioh3420_initfn(PCIDevice *d)
goto err_pcie_cap;
}
pcie_cap_root_init(d);
- rc = pcie_aer_init(d, IOH_EP_AER_OFFSET);
+ rc = pcie_aer_init(d, IOH_EP_AER_OFFSET, PCI_ERR_SIZEOF);
if (rc < 0) {
goto err;
}
diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c
index b3a6479..9737041 100644
--- a/hw/pci-bridge/xio3130_downstream.c
+++ b/hw/pci-bridge/xio3130_downstream.c
@@ -92,7 +92,7 @@ static int xio3130_downstream_initfn(PCIDevice *d)
goto err_pcie_cap;
}
pcie_cap_arifwd_init(d);
- rc = pcie_aer_init(d, XIO3130_AER_OFFSET);
+ rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
if (rc < 0) {
goto err;
}
diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c
index eada582..4d7f894 100644
--- a/hw/pci-bridge/xio3130_upstream.c
+++ b/hw/pci-bridge/xio3130_upstream.c
@@ -81,7 +81,7 @@ static int xio3130_upstream_initfn(PCIDevice *d)
}
pcie_cap_flr_init(d);
pcie_cap_deverr_init(d);
- rc = pcie_aer_init(d, XIO3130_AER_OFFSET);
+ rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
if (rc < 0) {
goto err;
}
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index b48c09c..ccf577e 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -94,12 +94,12 @@ static void aer_log_clear_all_err(PCIEAERLog *aer_log)
aer_log->log_num = 0;
}
-int pcie_aer_init(PCIDevice *dev, uint16_t offset)
+int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size)
{
PCIExpressDevice *exp;
pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_VER,
- offset, PCI_ERR_SIZEOF);
+ offset, size);
exp = &dev->exp;
exp->aer_cap = offset;
diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
index 2fb8388..156acb0 100644
--- a/include/hw/pci/pcie_aer.h
+++ b/include/hw/pci/pcie_aer.h
@@ -87,7 +87,7 @@ struct PCIEAERErr {
extern const VMStateDescription vmstate_pcie_aer_log;
-int pcie_aer_init(PCIDevice *dev, uint16_t offset);
+int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size);
void pcie_aer_exit(PCIDevice *dev);
void pcie_aer_write_config(PCIDevice *dev,
uint32_t addr, uint32_t val, int len);
--
1.9.3
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC v6 03/11] vfio: add aer support for vfio device
2015-04-29 8:48 [Qemu-devel] [PATCH RFC v6 00/11] vfio-pci: pass the aer error to guest Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 01/11] vfio: add pcie extanded capability support Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 02/11] aer: impove pcie_aer_init to support vfio device Chen Fan
@ 2015-04-29 8:48 ` Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 04/11] pcie_aer: expose pcie_aer_msg() interface Chen Fan
` (7 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Chen Fan @ 2015-04-29 8:48 UTC (permalink / raw)
To: qemu-devel; +Cc: izumi.taku, alex.williamson
Calling pcie_aer_init to initilize aer related registers for
vfio device, then reload physical related registers to expose
device capability.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
hw/vfio/pci.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 46 insertions(+), 1 deletion(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 8f6aedd..5069141 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2730,12 +2730,45 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
return 0;
}
+static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
+{
+ PCIDevice *pdev = &vdev->pdev;
+ uint8_t *exp_cap = pdev->config + pdev->exp.exp_cap;
+ uint32_t severity, errcap;
+ int ret;
+
+ errcap = vfio_pci_read_config(pdev, pdev->exp.aer_cap + PCI_ERR_CAP, 4);
+ /*
+ * The ability to record multiple headers is depending on
+ * the state of the Multiple Header Recording Capable bit and
+ * enabled by the Multiple Header Recording Enable bit.
+ */
+ if ((errcap & PCI_ERR_CAP_MHRC) &&
+ (errcap & PCI_ERR_CAP_MHRE)) {
+ pdev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
+ } else {
+ pdev->exp.aer_log.log_max = 0;
+ }
+ ret = pcie_aer_init(pdev, pos, size);
+ if (ret) {
+ return ret;
+ }
+
+ /* load physical registers */
+ severity = vfio_pci_read_config(pdev,
+ pdev->exp.aer_cap + PCI_ERR_UNCOR_SEVER, 4);
+ pci_long_test_and_clear_mask(exp_cap + PCI_ERR_UNCOR_SEVER, ~severity);
+
+ return 0;
+}
+
static int vfio_add_ext_cap(VFIOPCIDevice *vdev, const uint8_t *config)
{
PCIDevice *pdev = &vdev->pdev;
uint32_t header;
uint16_t cap_id, next, size;
uint8_t cap_ver;
+ int ret = 0;
for (next = PCI_CONFIG_SPACE_SIZE; next;
next = PCI_EXT_CAP_NEXT(pci_get_long(config + next))) {
@@ -2751,7 +2784,19 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev, const uint8_t *config)
*/
size = vfio_ext_cap_max_size(config, next);
- pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+ switch (cap_id) {
+ case PCI_EXT_CAP_ID_ERR:
+ ret = vfio_setup_aer(vdev, next, size);
+ break;
+ default:
+ pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+ break;
+ }
+
+ if (ret) {
+ return ret;
+ }
+
if (next == PCI_CONFIG_SPACE_SIZE) {
/* Begin the rebuild, we should set the next offset zero. */
pci_set_long(pdev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
--
1.9.3
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC v6 04/11] pcie_aer: expose pcie_aer_msg() interface
2015-04-29 8:48 [Qemu-devel] [PATCH RFC v6 00/11] vfio-pci: pass the aer error to guest Chen Fan
` (2 preceding siblings ...)
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 03/11] vfio: add aer support for " Chen Fan
@ 2015-04-29 8:48 ` Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 05/11] vfio-pci: pass the aer error to guest Chen Fan
` (6 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Chen Fan @ 2015-04-29 8:48 UTC (permalink / raw)
To: qemu-devel; +Cc: izumi.taku, alex.williamson
For vfio device, we need to propagate the aer error to
Guest OS. we use the pcie_aer_msg() to send aer error
to guest.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
hw/pci/pcie_aer.c | 2 +-
include/hw/pci/pcie_aer.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index ccf577e..58010c4 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -370,7 +370,7 @@ static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
*
* Walk up the bus tree from the device, propagate the error message.
*/
-static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
+void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
{
uint8_t type;
diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
index 156acb0..c2ee4e2 100644
--- a/include/hw/pci/pcie_aer.h
+++ b/include/hw/pci/pcie_aer.h
@@ -102,5 +102,6 @@ void pcie_aer_root_write_config(PCIDevice *dev,
/* error injection */
int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);
+void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg);
#endif /* QEMU_PCIE_AER_H */
--
1.9.3
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC v6 05/11] vfio-pci: pass the aer error to guest
2015-04-29 8:48 [Qemu-devel] [PATCH RFC v6 00/11] vfio-pci: pass the aer error to guest Chen Fan
` (3 preceding siblings ...)
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 04/11] pcie_aer: expose pcie_aer_msg() interface Chen Fan
@ 2015-04-29 8:48 ` Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 06/11] vfio: add 'aer' property to expose aercap Chen Fan
` (5 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Chen Fan @ 2015-04-29 8:48 UTC (permalink / raw)
To: qemu-devel; +Cc: izumi.taku, alex.williamson
when the vfio device encounters an uncorrectable error in host,
the vfio_pci driver will signal the eventfd registered by this
vfio device, the results in the qemu eventfd handler getting
invoked.
this patch is to pass the error to guest and have the guest driver
recover from the error.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
hw/vfio/pci.c | 34 ++++++++++++++++++++++++++++------
1 file changed, 28 insertions(+), 6 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 5069141..cdd8f24 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3248,18 +3248,40 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
static void vfio_err_notifier_handler(void *opaque)
{
VFIOPCIDevice *vdev = opaque;
+ PCIDevice *dev = &vdev->pdev;
+ PCIEAERMsg msg = {
+ .severity = 0,
+ .source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn,
+ };
if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
return;
}
+ /* we should read the error details from the real hardware
+ * configuration spaces, here we only need to do is signaling
+ * to guest an uncorrectable error has occurred.
+ */
+ if(dev->exp.aer_cap) {
+ uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
+ uint32_t uncor_status;
+ bool isfatal;
+
+ uncor_status = vfio_pci_read_config(dev,
+ dev->exp.aer_cap + PCI_ERR_UNCOR_STATUS, 4);
+
+ isfatal = uncor_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
+
+ msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
+ PCI_ERR_ROOT_CMD_NONFATAL_EN;
+
+ pcie_aer_msg(dev, &msg);
+ return;
+ }
+
/*
- * TBD. Retrieve the error details and decide what action
- * needs to be taken. One of the actions could be to pass
- * the error to the guest and have the guest driver recover
- * from the error. This requires that PCIe capabilities be
- * exposed to the guest. For now, we just terminate the
- * guest to contain the error.
+ * If the aer capability is not exposed to the guest. we just
+ * terminate the guest to contain the error.
*/
error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. "
--
1.9.3
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC v6 06/11] vfio: add 'aer' property to expose aercap
2015-04-29 8:48 [Qemu-devel] [PATCH RFC v6 00/11] vfio-pci: pass the aer error to guest Chen Fan
` (4 preceding siblings ...)
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 05/11] vfio-pci: pass the aer error to guest Chen Fan
@ 2015-04-29 8:48 ` Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 07/11] pc: add HW_COMPAT_2_2 to disable aercap for vifo device Chen Fan
` (4 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Chen Fan @ 2015-04-29 8:48 UTC (permalink / raw)
To: qemu-devel; +Cc: izumi.taku, alex.williamson
add 'aer' property to let user able to decide whether expose
the aer capability.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
hw/vfio/pci.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index cdd8f24..c306efc 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -159,6 +159,8 @@ typedef struct VFIOPCIDevice {
#define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
#define VFIO_FEATURE_ENABLE_REQ_BIT 1
#define VFIO_FEATURE_ENABLE_REQ (1 << VFIO_FEATURE_ENABLE_REQ_BIT)
+#define VFIO_FEATURE_ENABLE_AER_BIT 2
+#define VFIO_FEATURE_ENABLE_AER (1 << VFIO_FEATURE_ENABLE_AER_BIT)
int32_t bootindex;
uint8_t pm_cap;
bool has_vga;
@@ -2737,6 +2739,10 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
uint32_t severity, errcap;
int ret;
+ if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
+ return 0;
+ }
+
errcap = vfio_pci_read_config(pdev, pdev->exp.aer_cap + PCI_ERR_CAP, 4);
/*
* The ability to record multiple headers is depending on
@@ -3699,6 +3705,8 @@ static Property vfio_pci_dev_properties[] = {
VFIO_FEATURE_ENABLE_VGA_BIT, false),
DEFINE_PROP_BIT("x-req", VFIOPCIDevice, features,
VFIO_FEATURE_ENABLE_REQ_BIT, true),
+ DEFINE_PROP_BIT("aer", VFIOPCIDevice, features,
+ VFIO_FEATURE_ENABLE_AER_BIT, true),
DEFINE_PROP_INT32("bootindex", VFIOPCIDevice, bootindex, -1),
DEFINE_PROP_BOOL("x-mmap", VFIOPCIDevice, vbasedev.allow_mmap, true),
/*
--
1.9.3
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC v6 07/11] pc: add HW_COMPAT_2_2 to disable aercap for vifo device
2015-04-29 8:48 [Qemu-devel] [PATCH RFC v6 00/11] vfio-pci: pass the aer error to guest Chen Fan
` (5 preceding siblings ...)
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 06/11] vfio: add 'aer' property to expose aercap Chen Fan
@ 2015-04-29 8:48 ` Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 08/11] vfio: extract vfio_get_hot_reset_info as a single function Chen Fan
` (3 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Chen Fan @ 2015-04-29 8:48 UTC (permalink / raw)
To: qemu-devel; +Cc: izumi.taku, alex.williamson
for piix4 chipset, we don't need to expose aer, so introduce
HW_COMPAT_2_2 to disable aercap for all lower than 2.3.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
hw/i386/pc_q35.c | 4 ++++
include/hw/compat.h | 8 ++++++++
2 files changed, 12 insertions(+)
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index dcc17c0..ffd27a5 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -428,6 +428,10 @@ static QEMUMachine pc_q35_machine_v2_2 = {
PC_Q35_2_2_MACHINE_OPTIONS,
.name = "pc-q35-2.2",
.init = pc_q35_init_2_2,
+ .compat_props = (GlobalProperty[]) {
+ HW_COMPAT_2_2,
+ { /* end of list */ }
+ },
};
#define PC_Q35_2_1_MACHINE_OPTIONS \
diff --git a/include/hw/compat.h b/include/hw/compat.h
index 313682a..5f90f0f 100644
--- a/include/hw/compat.h
+++ b/include/hw/compat.h
@@ -1,7 +1,15 @@
#ifndef HW_COMPAT_H
#define HW_COMPAT_H
+#define HW_COMPAT_2_2 \
+ {\
+ .driver = "vfio-pci",\
+ .property = "x-aer",\
+ .value = "off",\
+ }
+
#define HW_COMPAT_2_1 \
+ HW_COMPAT_2_2, \
{\
.driver = "intel-hda",\
.property = "old_msi_addr",\
--
1.9.3
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC v6 08/11] vfio: extract vfio_get_hot_reset_info as a single function
2015-04-29 8:48 [Qemu-devel] [PATCH RFC v6 00/11] vfio-pci: pass the aer error to guest Chen Fan
` (6 preceding siblings ...)
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 07/11] pc: add HW_COMPAT_2_2 to disable aercap for vifo device Chen Fan
@ 2015-04-29 8:48 ` Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 09/11] qdev: add bus reset_notifiers callbacks for host bus reset Chen Fan
` (2 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: Chen Fan @ 2015-04-29 8:48 UTC (permalink / raw)
To: qemu-devel; +Cc: izumi.taku, alex.williamson
the function is used to get affected devices by bus reset.
so here extract it, and can used for aer soon.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
hw/vfio/pci.c | 69 ++++++++++++++++++++++++++++++++++++++++-------------------
1 file changed, 47 insertions(+), 22 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c306efc..b42c2a4 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2657,6 +2657,51 @@ static void vfio_check_af_flr(VFIOPCIDevice *vdev, uint8_t pos)
}
}
+/*
+ * return error with negative, return devices count with positive,
+ * return the ret_info fill with the affected device reset info.
+ *
+ */
+static int vfio_get_hot_reset_info(VFIOPCIDevice *vdev,
+ struct vfio_pci_hot_reset_info **ret_info)
+{
+ struct vfio_pci_hot_reset_info *info;
+ int ret, count;
+
+ *ret_info = NULL;
+
+ info = g_malloc0(sizeof(*info));
+ info->argsz = sizeof(*info);
+
+ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
+ if (ret && errno != ENOSPC) {
+ ret = -errno;
+ if (!vdev->has_pm_reset) {
+ error_report("vfio: Cannot reset device %04x:%02x:%02x.%x, "
+ "no available reset mechanism.", vdev->host.domain,
+ vdev->host.bus, vdev->host.slot, vdev->host.function);
+ }
+ return ret;
+ }
+
+ count = info->count;
+
+ info = g_realloc(info, sizeof(*info) +
+ (count * sizeof(struct vfio_pci_dependent_device)));
+ info->argsz = sizeof(*info) +
+ (count * sizeof(struct vfio_pci_dependent_device));
+
+ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
+ if (ret) {
+ ret = -errno;
+ error_report("vfio: hot reset info failed: %m");
+ return ret;
+ }
+
+ *ret_info = info;
+ return count;
+}
+
static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
{
PCIDevice *pdev = &vdev->pdev;
@@ -2913,32 +2958,12 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
vfio_pci_pre_reset(vdev);
vdev->vbasedev.needs_reset = false;
- info = g_malloc0(sizeof(*info));
- info->argsz = sizeof(*info);
-
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
- if (ret && errno != ENOSPC) {
- ret = -errno;
- if (!vdev->has_pm_reset) {
- error_report("vfio: Cannot reset device %04x:%02x:%02x.%x, "
- "no available reset mechanism.", vdev->host.domain,
- vdev->host.bus, vdev->host.slot, vdev->host.function);
- }
+ ret = vfio_get_hot_reset_info(vdev, &info);
+ if (ret < 0) {
goto out_single;
}
- count = info->count;
- info = g_realloc(info, sizeof(*info) + (count * sizeof(*devices)));
- info->argsz = sizeof(*info) + (count * sizeof(*devices));
devices = &info->devices[0];
-
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
- if (ret) {
- ret = -errno;
- error_report("vfio: hot reset info failed: %m");
- goto out_single;
- }
-
trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
/* Verify that we have all the groups required */
--
1.9.3
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC v6 09/11] qdev: add bus reset_notifiers callbacks for host bus reset
2015-04-29 8:48 [Qemu-devel] [PATCH RFC v6 00/11] vfio-pci: pass the aer error to guest Chen Fan
` (7 preceding siblings ...)
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 08/11] vfio: extract vfio_get_hot_reset_info as a single function Chen Fan
@ 2015-04-29 8:48 ` Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 10/11] vfio: squeeze out vfio_pci_do_hot_reset for support " Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 11/11] vfio: add bus reset notifier for host " Chen Fan
10 siblings, 0 replies; 15+ messages in thread
From: Chen Fan @ 2015-04-29 8:48 UTC (permalink / raw)
To: qemu-devel; +Cc: izumi.taku, alex.williamson
when recovery AER, we always need to reset the host bus to recovery
the devices under the bus, so add pci bus callbacks to reset host bus
when vfio support AER.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
hw/pci/pci.c | 6 ++++++
hw/pci/pci_bridge.c | 3 +++
include/hw/pci/pci.h | 2 ++
include/hw/pci/pci_bus.h | 2 ++
4 files changed, 13 insertions(+)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index b3d5100..427d4d6 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -74,11 +74,17 @@ static const VMStateDescription vmstate_pcibus = {
}
};
+void pci_bus_add_reset_notifier(PCIBus *bus, Notifier *notify)
+{
+ notifier_list_add(&bus->reset_notifiers, notify);
+}
+
static void pci_bus_realize(BusState *qbus, Error **errp)
{
PCIBus *bus = PCI_BUS(qbus);
vmstate_register(NULL, -1, &vmstate_pcibus, bus);
+ notifier_list_init(&bus->reset_notifiers);
}
static void pci_bus_unrealize(BusState *qbus, Error **errp)
diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index 40c97b1..24fee8b 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -267,6 +267,9 @@ void pci_bridge_write_config(PCIDevice *d,
newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
+ /* do host secondary bus reset for passthrough devices */
+ notifier_list_notify(&s->sec_bus.reset_notifiers, NULL);
+
/* Trigger hot reset on 0->1 transition. */
qbus_reset_all(&s->sec_bus.qbus);
}
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index d4ffead..79ae725 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -7,6 +7,7 @@
#include "exec/memory.h"
#include "sysemu/dma.h"
#include "qapi/error.h"
+#include "qemu/notify.h"
/* PCI includes legacy ISA access. */
#include "hw/isa/isa.h"
@@ -370,6 +371,7 @@ void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
void pci_device_set_intx_routing_notifier(PCIDevice *dev,
PCIINTxRoutingNotifier notifier);
void pci_device_reset(PCIDevice *dev);
+void pci_bus_add_reset_notifier(PCIBus *bus, Notifier *notify);
PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,
const char *default_model,
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index fabaeee..3b551d7 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -29,6 +29,8 @@ struct PCIBus {
Keep a count of the number of devices with raised IRQs. */
int nirq;
int *irq_count;
+
+ NotifierList reset_notifiers;
};
typedef struct PCIBridgeWindows PCIBridgeWindows;
--
1.9.3
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC v6 10/11] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset
2015-04-29 8:48 [Qemu-devel] [PATCH RFC v6 00/11] vfio-pci: pass the aer error to guest Chen Fan
` (8 preceding siblings ...)
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 09/11] qdev: add bus reset_notifiers callbacks for host bus reset Chen Fan
@ 2015-04-29 8:48 ` Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 11/11] vfio: add bus reset notifier for host " Chen Fan
10 siblings, 0 replies; 15+ messages in thread
From: Chen Fan @ 2015-04-29 8:48 UTC (permalink / raw)
To: qemu-devel; +Cc: izumi.taku, alex.williamson
squeeze out vfio_pci_do_hot_reset to do host bus reset when AER recovery.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
hw/vfio/pci.c | 75 +++++++++++++++++++++++++++++++++++------------------------
1 file changed, 44 insertions(+), 31 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index b42c2a4..060fb47 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2702,6 +2702,48 @@ static int vfio_get_hot_reset_info(VFIOPCIDevice *vdev,
return count;
}
+static int vfio_pci_do_hot_reset(VFIOPCIDevice *vdev,
+ struct vfio_pci_hot_reset_info *info)
+{
+ VFIOGroup *group;
+ struct vfio_pci_hot_reset *reset;
+ int32_t *fds;
+ int ret, i, count;
+ struct vfio_pci_dependent_device *devices;
+
+ /* Determine how many group fds need to be passed */
+ count = 0;
+ devices = &info->devices[0];
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ for (i = 0; i < info->count; i++) {
+ if (group->groupid == devices[i].group_id) {
+ count++;
+ break;
+ }
+ }
+ }
+
+ reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
+ reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
+ fds = &reset->group_fds[0];
+
+ /* Fill in group fds */
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ for (i = 0; i < info->count; i++) {
+ if (group->groupid == devices[i].group_id) {
+ fds[reset->count++] = group->fd;
+ break;
+ }
+ }
+ }
+
+ /* Bus reset! */
+ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
+ g_free(reset);
+
+ return ret;
+}
+
static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
{
PCIDevice *pdev = &vdev->pdev;
@@ -2948,9 +2990,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
VFIOGroup *group;
struct vfio_pci_hot_reset_info *info;
struct vfio_pci_dependent_device *devices;
- struct vfio_pci_hot_reset *reset;
- int32_t *fds;
- int ret, i, count;
+ int ret, i;
bool multi = false;
trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
@@ -3024,34 +3064,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
goto out_single;
}
- /* Determine how many group fds need to be passed */
- count = 0;
- QLIST_FOREACH(group, &vfio_group_list, next) {
- for (i = 0; i < info->count; i++) {
- if (group->groupid == devices[i].group_id) {
- count++;
- break;
- }
- }
- }
-
- reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
- reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
- fds = &reset->group_fds[0];
-
- /* Fill in group fds */
- QLIST_FOREACH(group, &vfio_group_list, next) {
- for (i = 0; i < info->count; i++) {
- if (group->groupid == devices[i].group_id) {
- fds[reset->count++] = group->fd;
- break;
- }
- }
- }
-
- /* Bus reset! */
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
- g_free(reset);
+ ret = vfio_pci_do_hot_reset(vdev, info);
trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
ret ? "%m" : "Success");
--
1.9.3
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [PATCH RFC v6 11/11] vfio: add bus reset notifier for host bus reset
2015-04-29 8:48 [Qemu-devel] [PATCH RFC v6 00/11] vfio-pci: pass the aer error to guest Chen Fan
` (9 preceding siblings ...)
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 10/11] vfio: squeeze out vfio_pci_do_hot_reset for support " Chen Fan
@ 2015-04-29 8:48 ` Chen Fan
2015-04-29 17:32 ` Alex Williamson
10 siblings, 1 reply; 15+ messages in thread
From: Chen Fan @ 2015-04-29 8:48 UTC (permalink / raw)
To: qemu-devel; +Cc: izumi.taku, alex.williamson
add host secondary bus reset for vfio when AER occurs, if reset failed,
we should stop vm.
Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
hw/vfio/pci.c | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 138 insertions(+), 13 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 060fb47..619daed 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -154,6 +154,8 @@ typedef struct VFIOPCIDevice {
PCIHostDeviceAddress host;
EventNotifier err_notifier;
EventNotifier req_notifier;
+
+ Notifier sec_bus_reset_notifier;
uint32_t features;
#define VFIO_FEATURE_ENABLE_VGA_BIT 0
#define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
@@ -2627,6 +2629,13 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size)
return pos;
}
+static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
+ PCIHostDeviceAddress *host2)
+{
+ return (host1->domain == host2->domain && host1->bus == host2->bus &&
+ host1->slot == host2->slot && host1->function == host2->function);
+}
+
static void vfio_check_pcie_flr(VFIOPCIDevice *vdev, uint8_t pos)
{
uint32_t cap = pci_get_long(vdev->pdev.config + pos + PCI_EXP_DEVCAP);
@@ -2819,6 +2828,131 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
return 0;
}
+static int vfio_aer_validate_devices(DeviceState *dev,
+ void *opaque)
+{
+ VFIOPCIDevice *vdev;
+ int i;
+ bool found = false;
+ struct vfio_pci_hot_reset_info *info = opaque;
+ struct vfio_pci_dependent_device *devices = &info->devices[0];
+
+ if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
+ return 0;
+ }
+
+ vdev = DO_UPCAST(VFIOPCIDevice, pdev, PCI_DEVICE(dev));
+ for (i = 0; i < info->count; i++) {
+ PCIHostDeviceAddress host;
+
+ host.domain = devices[i].segment;
+ host.bus = devices[i].bus;
+ host.slot = PCI_SLOT(devices[i].devfn);
+ host.function = PCI_FUNC(devices[i].devfn);
+
+ if (vfio_pci_host_match(&host, &vdev->host)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ error_report("vfio: Cannot reset parent bus with AER supported,"
+ "depends on device %s which is not contained.",
+ vdev->vbasedev.name);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void vfio_pci_vm_stop(VFIOPCIDevice *vdev)
+{
+ error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. "
+ "Please collect any data possible and then kill the guest",
+ __func__, vdev->host.domain, vdev->host.bus,
+ vdev->host.slot, vdev->host.function);
+
+ vm_stop(RUN_STATE_INTERNAL_ERROR);
+}
+
+static void vfio_pci_host_bus_reset(Notifier *n, void *opaque)
+{
+ VFIOPCIDevice *vdev = container_of(n, VFIOPCIDevice, sec_bus_reset_notifier);
+ PCIDevice *pdev = &vdev->pdev;
+ int ret, i;
+ struct vfio_pci_hot_reset_info *info;
+ struct vfio_pci_dependent_device *devices;
+ VFIOGroup *group;
+
+ if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
+ return;
+ }
+
+ /*
+ * Check the affected devices by virtual bus reset are contained in
+ * the set of groups.
+ */
+ ret = vfio_get_hot_reset_info(vdev, &info);
+ if (ret < 0) {
+ goto stop_vm;
+ }
+
+ devices = &info->devices[0];
+
+ /* Verify that we have all the groups required */
+ for (i = 0; i < info->count; i++) {
+ PCIHostDeviceAddress host;
+
+ host.domain = devices[i].segment;
+ host.bus = devices[i].bus;
+ host.slot = PCI_SLOT(devices[i].devfn);
+ host.function = PCI_FUNC(devices[i].devfn);
+
+ if (vfio_pci_host_match(&host, &vdev->host)) {
+ continue;
+ }
+
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ if (group->groupid == devices[i].group_id) {
+ break;
+ }
+ }
+
+ if (!group) {
+ if (!vdev->has_pm_reset) {
+ error_report("vfio: Cannot reset device %s with AER supported,"
+ "depends on group %d which is not owned.",
+ vdev->vbasedev.name, devices[i].group_id);
+ }
+ ret = -EPERM;
+ goto stop_vm;
+ }
+ }
+
+ /* Verify that we have all the affected devices under the bus */
+ ret = qbus_walk_children(BUS(pdev->bus), NULL, NULL,
+ vfio_aer_validate_devices,
+ NULL, info);
+ if (ret < 0) {
+ goto stop_vm;
+ }
+
+
+ /* bus reset! */
+ ret = vfio_pci_do_hot_reset(vdev, info);
+ if (ret < 0) {
+ goto stop_vm;
+ }
+
+ g_free(info);
+ return;
+
+stop_vm:
+ g_free(info);
+ vfio_pci_vm_stop(vdev);
+}
+
static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
{
PCIDevice *pdev = &vdev->pdev;
@@ -2852,6 +2986,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
pdev->exp.aer_cap + PCI_ERR_UNCOR_SEVER, 4);
pci_long_test_and_clear_mask(exp_cap + PCI_ERR_UNCOR_SEVER, ~severity);
+ vdev->sec_bus_reset_notifier.notify = vfio_pci_host_bus_reset;
+ pci_bus_add_reset_notifier(pdev->bus, &vdev->sec_bus_reset_notifier);
+
return 0;
}
@@ -2978,13 +3115,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
vfio_enable_intx(vdev);
}
-static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
- PCIHostDeviceAddress *host2)
-{
- return (host1->domain == host2->domain && host1->bus == host2->bus &&
- host1->slot == host2->slot && host1->function == host2->function);
-}
-
static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
{
VFIOGroup *group;
@@ -3328,12 +3458,7 @@ static void vfio_err_notifier_handler(void *opaque)
* terminate the guest to contain the error.
*/
- error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. "
- "Please collect any data possible and then kill the guest",
- __func__, vdev->host.domain, vdev->host.bus,
- vdev->host.slot, vdev->host.function);
-
- vm_stop(RUN_STATE_INTERNAL_ERROR);
+ vfio_pci_vm_stop(vdev);
}
/*
--
1.9.3
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [PATCH RFC v6 11/11] vfio: add bus reset notifier for host bus reset
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 11/11] vfio: add bus reset notifier for host " Chen Fan
@ 2015-04-29 17:32 ` Alex Williamson
2015-04-30 3:07 ` Chen Fan
0 siblings, 1 reply; 15+ messages in thread
From: Alex Williamson @ 2015-04-29 17:32 UTC (permalink / raw)
To: Chen Fan; +Cc: izumi.taku, qemu-devel
On Wed, 2015-04-29 at 16:48 +0800, Chen Fan wrote:
> add host secondary bus reset for vfio when AER occurs, if reset failed,
> we should stop vm.
>
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
> hw/vfio/pci.c | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 138 insertions(+), 13 deletions(-)
>
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 060fb47..619daed 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -154,6 +154,8 @@ typedef struct VFIOPCIDevice {
> PCIHostDeviceAddress host;
> EventNotifier err_notifier;
> EventNotifier req_notifier;
> +
> + Notifier sec_bus_reset_notifier;
> uint32_t features;
> #define VFIO_FEATURE_ENABLE_VGA_BIT 0
> #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
> @@ -2627,6 +2629,13 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size)
> return pos;
> }
>
> +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> + PCIHostDeviceAddress *host2)
> +{
> + return (host1->domain == host2->domain && host1->bus == host2->bus &&
> + host1->slot == host2->slot && host1->function == host2->function);
> +}
> +
> static void vfio_check_pcie_flr(VFIOPCIDevice *vdev, uint8_t pos)
> {
> uint32_t cap = pci_get_long(vdev->pdev.config + pos + PCI_EXP_DEVCAP);
> @@ -2819,6 +2828,131 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
> return 0;
> }
>
> +static int vfio_aer_validate_devices(DeviceState *dev,
> + void *opaque)
> +{
> + VFIOPCIDevice *vdev;
> + int i;
> + bool found = false;
> + struct vfio_pci_hot_reset_info *info = opaque;
> + struct vfio_pci_dependent_device *devices = &info->devices[0];
> +
> + if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> + return 0;
> + }
> +
> + vdev = DO_UPCAST(VFIOPCIDevice, pdev, PCI_DEVICE(dev));
> + for (i = 0; i < info->count; i++) {
> + PCIHostDeviceAddress host;
> +
> + host.domain = devices[i].segment;
> + host.bus = devices[i].bus;
> + host.slot = PCI_SLOT(devices[i].devfn);
> + host.function = PCI_FUNC(devices[i].devfn);
> +
> + if (vfio_pci_host_match(&host, &vdev->host)) {
> + found = true;
> + break;
> + }
> + }
> +
> + if (!found) {
> + error_report("vfio: Cannot reset parent bus with AER supported,"
> + "depends on device %s which is not contained.",
> + vdev->vbasedev.name);
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static void vfio_pci_vm_stop(VFIOPCIDevice *vdev)
> +{
> + error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. "
> + "Please collect any data possible and then kill the guest",
> + __func__, vdev->host.domain, vdev->host.bus,
> + vdev->host.slot, vdev->host.function);
> +
> + vm_stop(RUN_STATE_INTERNAL_ERROR);
> +}
> +
> +static void vfio_pci_host_bus_reset(Notifier *n, void *opaque)
> +{
> + VFIOPCIDevice *vdev = container_of(n, VFIOPCIDevice, sec_bus_reset_notifier);
> + PCIDevice *pdev = &vdev->pdev;
> + int ret, i;
> + struct vfio_pci_hot_reset_info *info;
> + struct vfio_pci_dependent_device *devices;
> + VFIOGroup *group;
> +
> + if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
> + return;
> + }
> +
> + /*
> + * Check the affected devices by virtual bus reset are contained in
> + * the set of groups.
> + */
> + ret = vfio_get_hot_reset_info(vdev, &info);
> + if (ret < 0) {
> + goto stop_vm;
> + }
> +
> + devices = &info->devices[0];
> +
> + /* Verify that we have all the groups required */
> + for (i = 0; i < info->count; i++) {
> + PCIHostDeviceAddress host;
> +
> + host.domain = devices[i].segment;
> + host.bus = devices[i].bus;
> + host.slot = PCI_SLOT(devices[i].devfn);
> + host.function = PCI_FUNC(devices[i].devfn);
> +
> + if (vfio_pci_host_match(&host, &vdev->host)) {
> + continue;
> + }
> +
> + QLIST_FOREACH(group, &vfio_group_list, next) {
> + if (group->groupid == devices[i].group_id) {
> + break;
> + }
> + }
> +
> + if (!group) {
> + if (!vdev->has_pm_reset) {
I'm not sure how has_pm_reset has anything to do with what we're testing
here. Copy-paste error?
> + error_report("vfio: Cannot reset device %s with AER supported,"
> + "depends on group %d which is not owned.",
> + vdev->vbasedev.name, devices[i].group_id);
> + }
> + ret = -EPERM;
> + goto stop_vm;
> + }
> + }
The above verifies that all of the devices affected by the bus reset are
owned by the VM.
> +
> + /* Verify that we have all the affected devices under the bus */
> + ret = qbus_walk_children(BUS(pdev->bus), NULL, NULL,
> + vfio_aer_validate_devices,
> + NULL, info);
And here we make sure that any vfio-pci devices affected by the bus
reset are contained in the list of affected devices. What we're still
missing is whether there are any affected devices that are exposed to
the VM that are not covered in this bus walk. For instance, if I assign
a multi-function device and place function 0 and 1 under separate, peer
root ports, I believe the above tests will confirm that the VM owns the
necessary groups and that the only vfio-pci device affected by the bus
reset is in the list of affected devices, but it will not verify that
the other function is not affected by the guest bus reset, but is
affected by the host bus reset.
> + if (ret < 0) {
> + goto stop_vm;
> + }
> +
> +
> + /* bus reset! */
> + ret = vfio_pci_do_hot_reset(vdev, info);
> + if (ret < 0) {
> + goto stop_vm;
> + }
> +
> + g_free(info);
> + return;
> +
> +stop_vm:
> + g_free(info);
> + vfio_pci_vm_stop(vdev);
> +}
> +
> static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
> {
> PCIDevice *pdev = &vdev->pdev;
> @@ -2852,6 +2986,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
> pdev->exp.aer_cap + PCI_ERR_UNCOR_SEVER, 4);
> pci_long_test_and_clear_mask(exp_cap + PCI_ERR_UNCOR_SEVER, ~severity);
>
> + vdev->sec_bus_reset_notifier.notify = vfio_pci_host_bus_reset;
> + pci_bus_add_reset_notifier(pdev->bus, &vdev->sec_bus_reset_notifier);
> +
Additionally, we're going to do a bus reset once for every vfio-pci
device subordinate to the bus being reset. That's not very efficient.
There are still a number of problems here. We're allowing users to
configure their VMs however they wish and enable AER, then only when
they do a bus reset (which we have no way to infer is related to an AER
event) do we check and potentially halt the VM if guest mapping of the
host topology prevents a bus reset. That seems sort of like making
backups of your data, but never testing that you can recover from the
backup until we need the data. I can't imagine that many users are
going to be able to get this correct and they won't know it's incorrect
until an AER event occurs.
Also, Do we need to worry about guest root complex devices? The guest
won't be able to perform a bus reset on the guest bus in that case. Is
AER even valid for RC devices since they don't have a parent downstream
port to signal the AER? This seems like another case where it's more
likely that a user will create an invalid configuration than it is they
will create a valid one, but we won't know until an AER occurs with the
code structure here.
Therefore I think that if the user specifies AER, we need to verify and
enforce at that point, ie. the initfn, that a host bus reset is
possible, that a guest reset is possible, and that a guest bus reset
fully contains the VM visible host devices affected by the reset.
I'd also like to see the patches ordered such that we provide all the
infrastructure to validate and enforce the configuration restrictions to
support AER before we enable it to the guest. The order here creates
bisect points where AER is exposed to the guest with unacceptable QEMU
handling. Thanks,
Alex
> return 0;
> }
>
> @@ -2978,13 +3115,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
> vfio_enable_intx(vdev);
> }
>
> -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> - PCIHostDeviceAddress *host2)
> -{
> - return (host1->domain == host2->domain && host1->bus == host2->bus &&
> - host1->slot == host2->slot && host1->function == host2->function);
> -}
> -
> static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
> {
> VFIOGroup *group;
> @@ -3328,12 +3458,7 @@ static void vfio_err_notifier_handler(void *opaque)
> * terminate the guest to contain the error.
> */
>
> - error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. "
> - "Please collect any data possible and then kill the guest",
> - __func__, vdev->host.domain, vdev->host.bus,
> - vdev->host.slot, vdev->host.function);
> -
> - vm_stop(RUN_STATE_INTERNAL_ERROR);
> + vfio_pci_vm_stop(vdev);
> }
>
> /*
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [PATCH RFC v6 11/11] vfio: add bus reset notifier for host bus reset
2015-04-29 17:32 ` Alex Williamson
@ 2015-04-30 3:07 ` Chen Fan
2015-04-30 3:21 ` Alex Williamson
0 siblings, 1 reply; 15+ messages in thread
From: Chen Fan @ 2015-04-30 3:07 UTC (permalink / raw)
To: Alex Williamson; +Cc: izumi.taku, qemu-devel
On 04/30/2015 01:32 AM, Alex Williamson wrote:
> On Wed, 2015-04-29 at 16:48 +0800, Chen Fan wrote:
>> add host secondary bus reset for vfio when AER occurs, if reset failed,
>> we should stop vm.
>>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>> hw/vfio/pci.c | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
>> 1 file changed, 138 insertions(+), 13 deletions(-)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 060fb47..619daed 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -154,6 +154,8 @@ typedef struct VFIOPCIDevice {
>> PCIHostDeviceAddress host;
>> EventNotifier err_notifier;
>> EventNotifier req_notifier;
>> +
>> + Notifier sec_bus_reset_notifier;
>> uint32_t features;
>> #define VFIO_FEATURE_ENABLE_VGA_BIT 0
>> #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
>> @@ -2627,6 +2629,13 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size)
>> return pos;
>> }
>>
>> +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
>> + PCIHostDeviceAddress *host2)
>> +{
>> + return (host1->domain == host2->domain && host1->bus == host2->bus &&
>> + host1->slot == host2->slot && host1->function == host2->function);
>> +}
>> +
>> static void vfio_check_pcie_flr(VFIOPCIDevice *vdev, uint8_t pos)
>> {
>> uint32_t cap = pci_get_long(vdev->pdev.config + pos + PCI_EXP_DEVCAP);
>> @@ -2819,6 +2828,131 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
>> return 0;
>> }
>>
>> +static int vfio_aer_validate_devices(DeviceState *dev,
>> + void *opaque)
>> +{
>> + VFIOPCIDevice *vdev;
>> + int i;
>> + bool found = false;
>> + struct vfio_pci_hot_reset_info *info = opaque;
>> + struct vfio_pci_dependent_device *devices = &info->devices[0];
>> +
>> + if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
>> + return 0;
>> + }
>> +
>> + vdev = DO_UPCAST(VFIOPCIDevice, pdev, PCI_DEVICE(dev));
>> + for (i = 0; i < info->count; i++) {
>> + PCIHostDeviceAddress host;
>> +
>> + host.domain = devices[i].segment;
>> + host.bus = devices[i].bus;
>> + host.slot = PCI_SLOT(devices[i].devfn);
>> + host.function = PCI_FUNC(devices[i].devfn);
>> +
>> + if (vfio_pci_host_match(&host, &vdev->host)) {
>> + found = true;
>> + break;
>> + }
>> + }
>> +
>> + if (!found) {
>> + error_report("vfio: Cannot reset parent bus with AER supported,"
>> + "depends on device %s which is not contained.",
>> + vdev->vbasedev.name);
>> + return -1;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static void vfio_pci_vm_stop(VFIOPCIDevice *vdev)
>> +{
>> + error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. "
>> + "Please collect any data possible and then kill the guest",
>> + __func__, vdev->host.domain, vdev->host.bus,
>> + vdev->host.slot, vdev->host.function);
>> +
>> + vm_stop(RUN_STATE_INTERNAL_ERROR);
>> +}
>> +
>> +static void vfio_pci_host_bus_reset(Notifier *n, void *opaque)
>> +{
>> + VFIOPCIDevice *vdev = container_of(n, VFIOPCIDevice, sec_bus_reset_notifier);
>> + PCIDevice *pdev = &vdev->pdev;
>> + int ret, i;
>> + struct vfio_pci_hot_reset_info *info;
>> + struct vfio_pci_dependent_device *devices;
>> + VFIOGroup *group;
>> +
>> + if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
>> + return;
>> + }
>> +
>> + /*
>> + * Check the affected devices by virtual bus reset are contained in
>> + * the set of groups.
>> + */
>> + ret = vfio_get_hot_reset_info(vdev, &info);
>> + if (ret < 0) {
>> + goto stop_vm;
>> + }
>> +
>> + devices = &info->devices[0];
>> +
>> + /* Verify that we have all the groups required */
>> + for (i = 0; i < info->count; i++) {
>> + PCIHostDeviceAddress host;
>> +
>> + host.domain = devices[i].segment;
>> + host.bus = devices[i].bus;
>> + host.slot = PCI_SLOT(devices[i].devfn);
>> + host.function = PCI_FUNC(devices[i].devfn);
>> +
>> + if (vfio_pci_host_match(&host, &vdev->host)) {
>> + continue;
>> + }
>> +
>> + QLIST_FOREACH(group, &vfio_group_list, next) {
>> + if (group->groupid == devices[i].group_id) {
>> + break;
>> + }
>> + }
>> +
>> + if (!group) {
>> + if (!vdev->has_pm_reset) {
> I'm not sure how has_pm_reset has anything to do with what we're testing
> here. Copy-paste error?
>
>> + error_report("vfio: Cannot reset device %s with AER supported,"
>> + "depends on group %d which is not owned.",
>> + vdev->vbasedev.name, devices[i].group_id);
>> + }
>> + ret = -EPERM;
>> + goto stop_vm;
>> + }
>> + }
>
> The above verifies that all of the devices affected by the bus reset are
> owned by the VM.
>
>> +
>> + /* Verify that we have all the affected devices under the bus */
>> + ret = qbus_walk_children(BUS(pdev->bus), NULL, NULL,
>> + vfio_aer_validate_devices,
>> + NULL, info);
> And here we make sure that any vfio-pci devices affected by the bus
> reset are contained in the list of affected devices. What we're still
> missing is whether there are any affected devices that are exposed to
> the VM that are not covered in this bus walk. For instance, if I assign
> a multi-function device and place function 0 and 1 under separate, peer
> root ports, I believe the above tests will confirm that the VM owns the
> necessary groups and that the only vfio-pci device affected by the bus
> reset is in the list of affected devices, but it will not verify that
> the other function is not affected by the guest bus reset, but is
> affected by the host bus reset.
you are right.
>
>> + if (ret < 0) {
>> + goto stop_vm;
>> + }
>> +
>> +
>> + /* bus reset! */
>> + ret = vfio_pci_do_hot_reset(vdev, info);
>> + if (ret < 0) {
>> + goto stop_vm;
>> + }
>> +
>> + g_free(info);
>> + return;
>> +
>> +stop_vm:
>> + g_free(info);
>> + vfio_pci_vm_stop(vdev);
>> +}
>> +
>> static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
>> {
>> PCIDevice *pdev = &vdev->pdev;
>> @@ -2852,6 +2986,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
>> pdev->exp.aer_cap + PCI_ERR_UNCOR_SEVER, 4);
>> pci_long_test_and_clear_mask(exp_cap + PCI_ERR_UNCOR_SEVER, ~severity);
>>
>> + vdev->sec_bus_reset_notifier.notify = vfio_pci_host_bus_reset;
>> + pci_bus_add_reset_notifier(pdev->bus, &vdev->sec_bus_reset_notifier);
>> +
> Additionally, we're going to do a bus reset once for every vfio-pci
> device subordinate to the bus being reset. That's not very efficient.
>
> There are still a number of problems here. We're allowing users to
> configure their VMs however they wish and enable AER, then only when
> they do a bus reset (which we have no way to infer is related to an AER
> event) do we check and potentially halt the VM if guest mapping of the
> host topology prevents a bus reset. That seems sort of like making
> backups of your data, but never testing that you can recover from the
> backup until we need the data. I can't imagine that many users are
> going to be able to get this correct and they won't know it's incorrect
> until an AER event occurs.
>
> Also, Do we need to worry about guest root complex devices? The guest
> won't be able to perform a bus reset on the guest bus in that case. Is
> AER even valid for RC devices since they don't have a parent downstream
> port to signal the AER? This seems like another case where it's more
> likely that a user will create an invalid configuration than it is they
> will create a valid one, but we won't know until an AER occurs with the
> code structure here.
do you refer to the root complex device is the root port of root complex?
the root port can notify system itself, the PCI Express aer driver
reset the upstream link between root port and upstream port of
subordinate switch. so I think we don't need to care that.
>
> Therefore I think that if the user specifies AER, we need to verify and
> enforce at that point, ie. the initfn, that a host bus reset is
> possible, that a guest reset is possible, and that a guest bus reset
> fully contains the VM visible host devices affected by the reset.
>
> I'd also like to see the patches ordered such that we provide all the
> infrastructure to validate and enforce the configuration restrictions to
> support AER before we enable it to the guest. The order here creates
> bisect points where AER is exposed to the guest with unacceptable QEMU
> handling. Thanks,
I'm sorry for that, due to this is RFC version I missed that,
and I will arrange the patches after this series.
Thanks,
Chen
>
> Alex
>
>> return 0;
>> }
>>
>> @@ -2978,13 +3115,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
>> vfio_enable_intx(vdev);
>> }
>>
>> -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
>> - PCIHostDeviceAddress *host2)
>> -{
>> - return (host1->domain == host2->domain && host1->bus == host2->bus &&
>> - host1->slot == host2->slot && host1->function == host2->function);
>> -}
>> -
>> static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
>> {
>> VFIOGroup *group;
>> @@ -3328,12 +3458,7 @@ static void vfio_err_notifier_handler(void *opaque)
>> * terminate the guest to contain the error.
>> */
>>
>> - error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. "
>> - "Please collect any data possible and then kill the guest",
>> - __func__, vdev->host.domain, vdev->host.bus,
>> - vdev->host.slot, vdev->host.function);
>> -
>> - vm_stop(RUN_STATE_INTERNAL_ERROR);
>> + vfio_pci_vm_stop(vdev);
>> }
>>
>> /*
>
>
> .
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [PATCH RFC v6 11/11] vfio: add bus reset notifier for host bus reset
2015-04-30 3:07 ` Chen Fan
@ 2015-04-30 3:21 ` Alex Williamson
0 siblings, 0 replies; 15+ messages in thread
From: Alex Williamson @ 2015-04-30 3:21 UTC (permalink / raw)
To: Chen Fan; +Cc: izumi.taku, qemu-devel
On Thu, 2015-04-30 at 11:07 +0800, Chen Fan wrote:
> On 04/30/2015 01:32 AM, Alex Williamson wrote:
> > On Wed, 2015-04-29 at 16:48 +0800, Chen Fan wrote:
> >> add host secondary bus reset for vfio when AER occurs, if reset failed,
> >> we should stop vm.
> >>
> >> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> >> ---
> >> hw/vfio/pci.c | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
> >> 1 file changed, 138 insertions(+), 13 deletions(-)
> >>
> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> >> index 060fb47..619daed 100644
> >> --- a/hw/vfio/pci.c
> >> +++ b/hw/vfio/pci.c
> >> @@ -154,6 +154,8 @@ typedef struct VFIOPCIDevice {
> >> PCIHostDeviceAddress host;
> >> EventNotifier err_notifier;
> >> EventNotifier req_notifier;
> >> +
> >> + Notifier sec_bus_reset_notifier;
> >> uint32_t features;
> >> #define VFIO_FEATURE_ENABLE_VGA_BIT 0
> >> #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
> >> @@ -2627,6 +2629,13 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size)
> >> return pos;
> >> }
> >>
> >> +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> >> + PCIHostDeviceAddress *host2)
> >> +{
> >> + return (host1->domain == host2->domain && host1->bus == host2->bus &&
> >> + host1->slot == host2->slot && host1->function == host2->function);
> >> +}
> >> +
> >> static void vfio_check_pcie_flr(VFIOPCIDevice *vdev, uint8_t pos)
> >> {
> >> uint32_t cap = pci_get_long(vdev->pdev.config + pos + PCI_EXP_DEVCAP);
> >> @@ -2819,6 +2828,131 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
> >> return 0;
> >> }
> >>
> >> +static int vfio_aer_validate_devices(DeviceState *dev,
> >> + void *opaque)
> >> +{
> >> + VFIOPCIDevice *vdev;
> >> + int i;
> >> + bool found = false;
> >> + struct vfio_pci_hot_reset_info *info = opaque;
> >> + struct vfio_pci_dependent_device *devices = &info->devices[0];
> >> +
> >> + if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> >> + return 0;
> >> + }
> >> +
> >> + vdev = DO_UPCAST(VFIOPCIDevice, pdev, PCI_DEVICE(dev));
> >> + for (i = 0; i < info->count; i++) {
> >> + PCIHostDeviceAddress host;
> >> +
> >> + host.domain = devices[i].segment;
> >> + host.bus = devices[i].bus;
> >> + host.slot = PCI_SLOT(devices[i].devfn);
> >> + host.function = PCI_FUNC(devices[i].devfn);
> >> +
> >> + if (vfio_pci_host_match(&host, &vdev->host)) {
> >> + found = true;
> >> + break;
> >> + }
> >> + }
> >> +
> >> + if (!found) {
> >> + error_report("vfio: Cannot reset parent bus with AER supported,"
> >> + "depends on device %s which is not contained.",
> >> + vdev->vbasedev.name);
> >> + return -1;
> >> + }
> >> +
> >> + return 0;
> >> +}
> >> +
> >> +static void vfio_pci_vm_stop(VFIOPCIDevice *vdev)
> >> +{
> >> + error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. "
> >> + "Please collect any data possible and then kill the guest",
> >> + __func__, vdev->host.domain, vdev->host.bus,
> >> + vdev->host.slot, vdev->host.function);
> >> +
> >> + vm_stop(RUN_STATE_INTERNAL_ERROR);
> >> +}
> >> +
> >> +static void vfio_pci_host_bus_reset(Notifier *n, void *opaque)
> >> +{
> >> + VFIOPCIDevice *vdev = container_of(n, VFIOPCIDevice, sec_bus_reset_notifier);
> >> + PCIDevice *pdev = &vdev->pdev;
> >> + int ret, i;
> >> + struct vfio_pci_hot_reset_info *info;
> >> + struct vfio_pci_dependent_device *devices;
> >> + VFIOGroup *group;
> >> +
> >> + if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
> >> + return;
> >> + }
> >> +
> >> + /*
> >> + * Check the affected devices by virtual bus reset are contained in
> >> + * the set of groups.
> >> + */
> >> + ret = vfio_get_hot_reset_info(vdev, &info);
> >> + if (ret < 0) {
> >> + goto stop_vm;
> >> + }
> >> +
> >> + devices = &info->devices[0];
> >> +
> >> + /* Verify that we have all the groups required */
> >> + for (i = 0; i < info->count; i++) {
> >> + PCIHostDeviceAddress host;
> >> +
> >> + host.domain = devices[i].segment;
> >> + host.bus = devices[i].bus;
> >> + host.slot = PCI_SLOT(devices[i].devfn);
> >> + host.function = PCI_FUNC(devices[i].devfn);
> >> +
> >> + if (vfio_pci_host_match(&host, &vdev->host)) {
> >> + continue;
> >> + }
> >> +
> >> + QLIST_FOREACH(group, &vfio_group_list, next) {
> >> + if (group->groupid == devices[i].group_id) {
> >> + break;
> >> + }
> >> + }
> >> +
> >> + if (!group) {
> >> + if (!vdev->has_pm_reset) {
> > I'm not sure how has_pm_reset has anything to do with what we're testing
> > here. Copy-paste error?
> >
> >> + error_report("vfio: Cannot reset device %s with AER supported,"
> >> + "depends on group %d which is not owned.",
> >> + vdev->vbasedev.name, devices[i].group_id);
> >> + }
> >> + ret = -EPERM;
> >> + goto stop_vm;
> >> + }
> >> + }
> >
> > The above verifies that all of the devices affected by the bus reset are
> > owned by the VM.
> >
> >> +
> >> + /* Verify that we have all the affected devices under the bus */
> >> + ret = qbus_walk_children(BUS(pdev->bus), NULL, NULL,
> >> + vfio_aer_validate_devices,
> >> + NULL, info);
> > And here we make sure that any vfio-pci devices affected by the bus
> > reset are contained in the list of affected devices. What we're still
> > missing is whether there are any affected devices that are exposed to
> > the VM that are not covered in this bus walk. For instance, if I assign
> > a multi-function device and place function 0 and 1 under separate, peer
> > root ports, I believe the above tests will confirm that the VM owns the
> > necessary groups and that the only vfio-pci device affected by the bus
> > reset is in the list of affected devices, but it will not verify that
> > the other function is not affected by the guest bus reset, but is
> > affected by the host bus reset.
> you are right.
>
> >
> >> + if (ret < 0) {
> >> + goto stop_vm;
> >> + }
> >> +
> >> +
> >> + /* bus reset! */
> >> + ret = vfio_pci_do_hot_reset(vdev, info);
> >> + if (ret < 0) {
> >> + goto stop_vm;
> >> + }
> >> +
> >> + g_free(info);
> >> + return;
> >> +
> >> +stop_vm:
> >> + g_free(info);
> >> + vfio_pci_vm_stop(vdev);
> >> +}
> >> +
> >> static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
> >> {
> >> PCIDevice *pdev = &vdev->pdev;
> >> @@ -2852,6 +2986,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
> >> pdev->exp.aer_cap + PCI_ERR_UNCOR_SEVER, 4);
> >> pci_long_test_and_clear_mask(exp_cap + PCI_ERR_UNCOR_SEVER, ~severity);
> >>
> >> + vdev->sec_bus_reset_notifier.notify = vfio_pci_host_bus_reset;
> >> + pci_bus_add_reset_notifier(pdev->bus, &vdev->sec_bus_reset_notifier);
> >> +
> > Additionally, we're going to do a bus reset once for every vfio-pci
> > device subordinate to the bus being reset. That's not very efficient.
> >
> > There are still a number of problems here. We're allowing users to
> > configure their VMs however they wish and enable AER, then only when
> > they do a bus reset (which we have no way to infer is related to an AER
> > event) do we check and potentially halt the VM if guest mapping of the
> > host topology prevents a bus reset. That seems sort of like making
> > backups of your data, but never testing that you can recover from the
> > backup until we need the data. I can't imagine that many users are
> > going to be able to get this correct and they won't know it's incorrect
> > until an AER event occurs.
> >
> > Also, Do we need to worry about guest root complex devices? The guest
> > won't be able to perform a bus reset on the guest bus in that case. Is
> > AER even valid for RC devices since they don't have a parent downstream
> > port to signal the AER? This seems like another case where it's more
> > likely that a user will create an invalid configuration than it is they
> > will create a valid one, but we won't know until an AER occurs with the
> > code structure here.
> do you refer to the root complex device is the root port of root complex?
> the root port can notify system itself, the PCI Express aer driver
> reset the upstream link between root port and upstream port of
> subordinate switch. so I think we don't need to care that.
No, I'm saying that we can place an assigned device directly on pcie.0,
the host bus, without using a root port. I don't know if we have any
way to signal an AER event for root complex devices, but the guest
certainly doesn't have a way to perform a bus reset on the host bus.
Thanks,
Alex
> > Therefore I think that if the user specifies AER, we need to verify and
> > enforce at that point, ie. the initfn, that a host bus reset is
> > possible, that a guest reset is possible, and that a guest bus reset
> > fully contains the VM visible host devices affected by the reset.
> >
> > I'd also like to see the patches ordered such that we provide all the
> > infrastructure to validate and enforce the configuration restrictions to
> > support AER before we enable it to the guest. The order here creates
> > bisect points where AER is exposed to the guest with unacceptable QEMU
> > handling. Thanks,
> I'm sorry for that, due to this is RFC version I missed that,
> and I will arrange the patches after this series.
>
> Thanks,
> Chen
>
>
> >
> > Alex
> >
> >> return 0;
> >> }
> >>
> >> @@ -2978,13 +3115,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
> >> vfio_enable_intx(vdev);
> >> }
> >>
> >> -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> >> - PCIHostDeviceAddress *host2)
> >> -{
> >> - return (host1->domain == host2->domain && host1->bus == host2->bus &&
> >> - host1->slot == host2->slot && host1->function == host2->function);
> >> -}
> >> -
> >> static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
> >> {
> >> VFIOGroup *group;
> >> @@ -3328,12 +3458,7 @@ static void vfio_err_notifier_handler(void *opaque)
> >> * terminate the guest to contain the error.
> >> */
> >>
> >> - error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. "
> >> - "Please collect any data possible and then kill the guest",
> >> - __func__, vdev->host.domain, vdev->host.bus,
> >> - vdev->host.slot, vdev->host.function);
> >> -
> >> - vm_stop(RUN_STATE_INTERNAL_ERROR);
> >> + vfio_pci_vm_stop(vdev);
> >> }
> >>
> >> /*
> >
> >
> > .
> >
>
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2015-04-30 3:28 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-04-29 8:48 [Qemu-devel] [PATCH RFC v6 00/11] vfio-pci: pass the aer error to guest Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 01/11] vfio: add pcie extanded capability support Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 02/11] aer: impove pcie_aer_init to support vfio device Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 03/11] vfio: add aer support for " Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 04/11] pcie_aer: expose pcie_aer_msg() interface Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 05/11] vfio-pci: pass the aer error to guest Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 06/11] vfio: add 'aer' property to expose aercap Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 07/11] pc: add HW_COMPAT_2_2 to disable aercap for vifo device Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 08/11] vfio: extract vfio_get_hot_reset_info as a single function Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 09/11] qdev: add bus reset_notifiers callbacks for host bus reset Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 10/11] vfio: squeeze out vfio_pci_do_hot_reset for support " Chen Fan
2015-04-29 8:48 ` [Qemu-devel] [PATCH RFC v6 11/11] vfio: add bus reset notifier for host " Chen Fan
2015-04-29 17:32 ` Alex Williamson
2015-04-30 3:07 ` Chen Fan
2015-04-30 3:21 ` Alex Williamson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).