qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v1 0/4] Error recovery for zPCI passthrough devices
@ 2025-08-13 17:41 Farhan Ali
  2025-08-13 17:41 ` [PATCH v1 1/4] [NOTFORMERGE] linux-headers: Update for zpci vfio device Farhan Ali
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Farhan Ali @ 2025-08-13 17:41 UTC (permalink / raw)
  To: qemu-s390x; +Cc: qemu-devel, mjrosato, thuth, alex.williamson, clg, alifm

Hi,

This patch series introduces support for error recovery for passthrough
PCI devices on System Z (s390x). This is the user space component for the Linux
kernel patches [1]. For QEMU on eventfd notification for PCI error from vfio-pci
driver we call the vfio error handler. We can use an architecture specific error
handler to override the default vfio error handler. 

For s390x specific error handler, we retrieve the architecture specific PCI error
information and inject the information into the guest. Once the guest receives
the error information, the guest drivers will drive the error recovery.
Typically recovery involves a device reset which translate to CLP
disable/enable cycle for the device.

I would appreciate some feedback on this patch series to understand if
such an approach is acceptable.

Thanks
Farhan

[1] https://lore.kernel.org/linux-s390/20250813170821.1115-1-alifm@linux.ibm.com/T/#m7c763e718501a2bbd77f0356f8845b77545d61e1

Farhan Ali (4):
  [NOTFORMERGE] linux-headers: Update for zpci vfio device
  vfio/pci: Add an architecture specific error handler
  s390x/pci: Add PCI error handling for vfio pci devices
  s390x/pci: Reset a device in error state

 hw/s390x/s390-pci-bus.c          | 12 +++++
 hw/s390x/s390-pci-vfio.c         | 88 ++++++++++++++++++++++++++++++++
 hw/vfio/pci.c                    |  5 ++
 hw/vfio/pci.h                    |  1 +
 include/hw/s390x/s390-pci-bus.h  |  1 +
 include/hw/s390x/s390-pci-vfio.h |  4 ++
 linux-headers/linux/vfio.h       |  2 +
 linux-headers/linux/vfio_zdev.h  |  5 ++
 8 files changed, 118 insertions(+)

-- 
2.43.0



^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v1 1/4] [NOTFORMERGE] linux-headers: Update for zpci vfio device
  2025-08-13 17:41 [PATCH v1 0/4] Error recovery for zPCI passthrough devices Farhan Ali
@ 2025-08-13 17:41 ` Farhan Ali
  2025-08-13 17:41 ` [PATCH v1 2/4] vfio/pci: Add an architecture specific error handler Farhan Ali
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Farhan Ali @ 2025-08-13 17:41 UTC (permalink / raw)
  To: qemu-s390x; +Cc: qemu-devel, mjrosato, thuth, alex.williamson, clg, alifm

Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
---
 linux-headers/linux/vfio.h      | 2 ++
 linux-headers/linux/vfio_zdev.h | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 79bf8c0cc5..a437169bce 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -369,6 +369,8 @@ struct vfio_region_info_cap_type {
  */
 #define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD	(1)
 
+#define VFIO_REGION_SUBTYPE_IBM_ZPCI_ERROR_REGION (2)
+
 /* sub-types for VFIO_REGION_TYPE_GFX */
 #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
 
diff --git a/linux-headers/linux/vfio_zdev.h b/linux-headers/linux/vfio_zdev.h
index 77f2aff1f2..bcd06f334a 100644
--- a/linux-headers/linux/vfio_zdev.h
+++ b/linux-headers/linux/vfio_zdev.h
@@ -82,4 +82,9 @@ struct vfio_device_info_cap_zpci_pfip {
 	__u8 pfip[];
 };
 
+struct vfio_device_zpci_err_region {
+	__u16 pec;
+	int pending_errors;
+};
+
 #endif
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v1 2/4] vfio/pci: Add an architecture specific error handler
  2025-08-13 17:41 [PATCH v1 0/4] Error recovery for zPCI passthrough devices Farhan Ali
  2025-08-13 17:41 ` [PATCH v1 1/4] [NOTFORMERGE] linux-headers: Update for zpci vfio device Farhan Ali
@ 2025-08-13 17:41 ` Farhan Ali
  2025-08-13 17:41 ` [PATCH v1 3/4] s390x/pci: Add PCI error handling for vfio pci devices Farhan Ali
  2025-08-13 17:41 ` [PATCH v1 4/4] s390x/pci: Reset a device in error state Farhan Ali
  3 siblings, 0 replies; 5+ messages in thread
From: Farhan Ali @ 2025-08-13 17:41 UTC (permalink / raw)
  To: qemu-s390x; +Cc: qemu-devel, mjrosato, thuth, alex.williamson, clg, alifm

Provide a architecture specific error handling callback,
that can be used by platforms to handle PCI errors for
passthrough devices.

Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
---
 hw/vfio/pci.c | 5 +++++
 hw/vfio/pci.h | 1 +
 2 files changed, 6 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 07257d0fa0..3c71d19306 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3026,6 +3026,11 @@ static void vfio_err_notifier_handler(void *opaque)
         return;
     }
 
+    if (vdev->arch_err_handler) {
+        vdev->arch_err_handler(vdev);
+        return;
+    }
+
     /*
      * TBD. Retrieve the error details and decide what action
      * needs to be taken. One of the actions could be to pass
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 810a842f4a..45d4405e47 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -145,6 +145,7 @@ struct VFIOPCIDevice {
     EventNotifier err_notifier;
     EventNotifier req_notifier;
     int (*resetfn)(struct VFIOPCIDevice *);
+    void (*arch_err_handler)(struct VFIOPCIDevice *);
     uint32_t vendor_id;
     uint32_t device_id;
     uint32_t sub_vendor_id;
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v1 3/4] s390x/pci: Add PCI error handling for vfio pci devices
  2025-08-13 17:41 [PATCH v1 0/4] Error recovery for zPCI passthrough devices Farhan Ali
  2025-08-13 17:41 ` [PATCH v1 1/4] [NOTFORMERGE] linux-headers: Update for zpci vfio device Farhan Ali
  2025-08-13 17:41 ` [PATCH v1 2/4] vfio/pci: Add an architecture specific error handler Farhan Ali
@ 2025-08-13 17:41 ` Farhan Ali
  2025-08-13 17:41 ` [PATCH v1 4/4] s390x/pci: Reset a device in error state Farhan Ali
  3 siblings, 0 replies; 5+ messages in thread
From: Farhan Ali @ 2025-08-13 17:41 UTC (permalink / raw)
  To: qemu-s390x; +Cc: qemu-devel, mjrosato, thuth, alex.williamson, clg, alifm

Add an s390x specific callback for vfio error handling.
For s390x pci devices, we have platform specific error
information. We need to retrieve this error information
for passthrough devices. This is done via a memory region
which exposes that information.

Once this error information is retrieved we can then
inject an error into the guest, and let the guest drive
the recovery.

Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
---
 hw/s390x/s390-pci-bus.c          |  5 ++
 hw/s390x/s390-pci-vfio.c         | 82 ++++++++++++++++++++++++++++++++
 include/hw/s390x/s390-pci-bus.h  |  1 +
 include/hw/s390x/s390-pci-vfio.h |  2 +
 4 files changed, 90 insertions(+)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index f87d2748b6..af42eb9938 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -158,6 +158,8 @@ static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev)
 {
     HotplugHandler *hotplug_ctrl;
 
+    qemu_mutex_destroy(&pbdev->err_handler_lock);
+
     if (pbdev->pft == ZPCI_PFT_ISM) {
         notifier_remove(&pbdev->shutdown_notifier);
     }
@@ -1140,6 +1142,7 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
         pbdev->iommu->pbdev = pbdev;
         pbdev->state = ZPCI_FS_DISABLED;
         set_pbdev_info(pbdev);
+        qemu_mutex_init(&pbdev->err_handler_lock);
 
         if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
             /*
@@ -1164,6 +1167,8 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
             pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev);
             /* Fill in CLP information passed via the vfio region */
             s390_pci_get_clp_info(pbdev);
+            /* Setup error handler for error recovery */
+            s390_pci_setup_err_handler(pbdev);
             if (!pbdev->interp) {
                 /* Do vfio passthrough but intercept for I/O */
                 pbdev->fh |= FH_SHM_VFIO;
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
index aaf91319b4..ceee342f4a 100644
--- a/hw/s390x/s390-pci-vfio.c
+++ b/hw/s390x/s390-pci-vfio.c
@@ -10,6 +10,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 
 #include <sys/ioctl.h>
 #include <linux/vfio.h>
@@ -103,6 +104,70 @@ void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt)
     }
 }
 
+static int s390_pci_read_error_region(VFIOPCIDevice *vfio_pci,
+                                      struct vfio_device_zpci_err_region *err)
+{
+    struct vfio_region_info *region = NULL;
+    g_autofree void *buf;
+    int ret;
+
+    ret = vfio_device_get_region_info_type(&vfio_pci->vbasedev,
+                    VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_IBM,
+                    VFIO_REGION_SUBTYPE_IBM_ZPCI_ERROR_REGION, &region);
+
+    if (ret) {
+        error_report("Failed to get the region info for passthrough device"
+                    " (rc=%d)", ret);
+        return ret;
+    }
+
+    buf = g_malloc0(region->size);
+
+    if (!buf) {
+        error_report("Failed to allocate memory for error region");
+        return -ENOMEM;
+    }
+
+    ret = pread(vfio_pci->vbasedev.fd, buf, region->size, region->offset);
+    if (ret != region->size) {
+        error_report("Failed to read vfio zpci error region");
+        return -EINVAL;
+    }
+
+    memcpy(err, (struct vfio_device_zpci_err_region *) buf,
+            sizeof(struct vfio_device_zpci_err_region));
+    return 0;
+}
+
+static void s390_pci_err_handler(VFIOPCIDevice *vfio_pci)
+{
+    S390PCIBusDevice *pbdev;
+    struct vfio_device_zpci_err_region err;
+    int ret;
+
+    pbdev = s390_pci_find_dev_by_target(s390_get_phb(),
+                                        DEVICE(&vfio_pci->pdev)->id);
+
+    QEMU_LOCK_GUARD(&pbdev->err_handler_lock);
+
+    ret = s390_pci_read_error_region(vfio_pci, &err);
+    if (ret) {
+        return;
+    }
+
+    pbdev->state = ZPCI_FS_ERROR;
+    s390_pci_generate_error_event(err.pec, pbdev->fh, pbdev->fid, 0, 0);
+
+    while (err.pending_errors) {
+        ret = s390_pci_read_error_region(vfio_pci, &err);
+        if (ret) {
+            return;
+        }
+        s390_pci_generate_error_event(err.pec, pbdev->fh, pbdev->fid, 0, 0);
+    }
+    return;
+}
+
 static void s390_pci_read_base(S390PCIBusDevice *pbdev,
                                struct vfio_device_info *info)
 {
@@ -369,3 +434,20 @@ void s390_pci_get_clp_info(S390PCIBusDevice *pbdev)
     s390_pci_read_util(pbdev, info);
     s390_pci_read_pfip(pbdev, info);
 }
+
+void s390_pci_setup_err_handler(S390PCIBusDevice *pbdev)
+{
+    int ret;
+    struct vfio_region_info *region = NULL;
+    VFIOPCIDevice *vdev =  container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+
+    ret = vfio_device_get_region_info_type(&vdev->vbasedev,
+                    VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_IBM,
+                    VFIO_REGION_SUBTYPE_IBM_ZPCI_ERROR_REGION, &region);
+
+    if (ret) {
+        info_report("Automated error recovery not available for passthrough device");
+        return;
+    }
+    vdev->arch_err_handler = s390_pci_err_handler;
+}
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
index 04944d4fed..3795e0bbfc 100644
--- a/include/hw/s390x/s390-pci-bus.h
+++ b/include/hw/s390x/s390-pci-bus.h
@@ -364,6 +364,7 @@ struct S390PCIBusDevice {
     bool forwarding_assist;
     bool aif;
     bool rtr_avail;
+    QemuMutex err_handler_lock;
     QTAILQ_ENTRY(S390PCIBusDevice) link;
 };
 
diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h
index ae1b126ff7..66b274293c 100644
--- a/include/hw/s390x/s390-pci-vfio.h
+++ b/include/hw/s390x/s390-pci-vfio.h
@@ -22,6 +22,7 @@ S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
 void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt);
 bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh);
 void s390_pci_get_clp_info(S390PCIBusDevice *pbdev);
+void s390_pci_setup_err_handler(S390PCIBusDevice *pbdev);
 #else
 static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail)
 {
@@ -39,6 +40,7 @@ static inline bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh)
     return false;
 }
 static inline void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) { }
+static inline void s390_pci_setup_err_handler(S390PCIBusDevice *pbdev) { }
 #endif
 
 #endif
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v1 4/4] s390x/pci: Reset a device in error state
  2025-08-13 17:41 [PATCH v1 0/4] Error recovery for zPCI passthrough devices Farhan Ali
                   ` (2 preceding siblings ...)
  2025-08-13 17:41 ` [PATCH v1 3/4] s390x/pci: Add PCI error handling for vfio pci devices Farhan Ali
@ 2025-08-13 17:41 ` Farhan Ali
  3 siblings, 0 replies; 5+ messages in thread
From: Farhan Ali @ 2025-08-13 17:41 UTC (permalink / raw)
  To: qemu-s390x; +Cc: qemu-devel, mjrosato, thuth, alex.williamson, clg, alifm

For passthrough devices in error state, for a guest driven
reset of the device we can attempt a hot reset to recover the
device. A hot reset of the device will trigger a CLP
disable/enable cycle on the host to bring the device into
a recovered state.

Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
---
 hw/s390x/s390-pci-bus.c          | 7 +++++++
 hw/s390x/s390-pci-vfio.c         | 6 ++++++
 include/hw/s390x/s390-pci-vfio.h | 2 ++
 3 files changed, 15 insertions(+)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index af42eb9938..c9c2d775f0 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -1493,6 +1493,8 @@ static void s390_pci_device_reset(DeviceState *dev)
         return;
     case ZPCI_FS_STANDBY:
         break;
+    case ZPCI_FS_ERROR:
+        break;
     default:
         pbdev->fh &= ~FH_MASK_ENABLE;
         pbdev->state = ZPCI_FS_DISABLED;
@@ -1505,6 +1507,11 @@ static void s390_pci_device_reset(DeviceState *dev)
     } else if (pbdev->summary_ind) {
         pci_dereg_irqs(pbdev);
     }
+
+    if (pbdev->state == ZPCI_FS_ERROR) {
+        s390_pci_reset(pbdev);
+    }
+
     if (pbdev->iommu->enabled) {
         pci_dereg_ioat(pbdev->iommu);
     }
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
index ceee342f4a..4ec5e2cd95 100644
--- a/hw/s390x/s390-pci-vfio.c
+++ b/hw/s390x/s390-pci-vfio.c
@@ -168,6 +168,12 @@ static void s390_pci_err_handler(VFIOPCIDevice *vfio_pci)
     return;
 }
 
+void s390_pci_reset(S390PCIBusDevice *pbdev)
+{
+    VFIOPCIDevice *vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+    ioctl(vfio_pci->vbasedev.fd, VFIO_DEVICE_RESET);
+}
+
 static void s390_pci_read_base(S390PCIBusDevice *pbdev,
                                struct vfio_device_info *info)
 {
diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h
index 66b274293c..c28dafeed8 100644
--- a/include/hw/s390x/s390-pci-vfio.h
+++ b/include/hw/s390x/s390-pci-vfio.h
@@ -23,6 +23,7 @@ void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt);
 bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh);
 void s390_pci_get_clp_info(S390PCIBusDevice *pbdev);
 void s390_pci_setup_err_handler(S390PCIBusDevice *pbdev);
+void s390_pci_reset(S390PCIBusDevice *pbdev);
 #else
 static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail)
 {
@@ -41,6 +42,7 @@ static inline bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh)
 }
 static inline void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) { }
 static inline void s390_pci_setup_err_handler(S390PCIBusDevice *pbdev) { }
+void s390_pci_reset(S390PCIBusDevice *pbdev) { }
 #endif
 
 #endif
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2025-08-13 17:44 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-13 17:41 [PATCH v1 0/4] Error recovery for zPCI passthrough devices Farhan Ali
2025-08-13 17:41 ` [PATCH v1 1/4] [NOTFORMERGE] linux-headers: Update for zpci vfio device Farhan Ali
2025-08-13 17:41 ` [PATCH v1 2/4] vfio/pci: Add an architecture specific error handler Farhan Ali
2025-08-13 17:41 ` [PATCH v1 3/4] s390x/pci: Add PCI error handling for vfio pci devices Farhan Ali
2025-08-13 17:41 ` [PATCH v1 4/4] s390x/pci: Reset a device in error state Farhan Ali

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).