From: Farhan Ali <alifm@linux.ibm.com>
To: Alex Williamson <alex@shazbot.org>
Cc: linux-s390@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-pci@vger.kernel.org, helgaas@kernel.org, lukas@wunner.de,
kbusch@kernel.org, clg@redhat.com, stable@vger.kernel.org,
schnelle@linux.ibm.com, mjrosato@linux.ibm.com
Subject: Re: [PATCH v11 6/9] s390/pci: Store PCI error information for passthrough devices
Date: Wed, 25 Mar 2026 11:06:35 -0700 [thread overview]
Message-ID: <14b68366-9942-4487-8388-1120d243f6f2@linux.ibm.com> (raw)
In-Reply-To: <20260325110158.6ec66502@shazbot.org>
On 3/25/2026 10:01 AM, Alex Williamson wrote:
> On Mon, 16 Mar 2026 12:15:41 -0700
> Farhan Ali <alifm@linux.ibm.com> wrote:
>
>> For a passthrough device we need co-operation from user space to recover
>> the device. This would require to bubble up any error information to user
>> space. Let's store this error information for passthrough devices, so it
>> can be retrieved later.
>>
>> Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
>> Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
>> ---
>> arch/s390/include/asm/pci.h | 28 ++++++++++
>> arch/s390/pci/pci.c | 1 +
>> arch/s390/pci/pci_event.c | 94 +++++++++++++++++++-------------
>> drivers/vfio/pci/vfio_pci_zdev.c | 2 +
>> 4 files changed, 87 insertions(+), 38 deletions(-)
>>
>> diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
>> index ec8a772bf526..383f6483b656 100644
>> --- a/arch/s390/include/asm/pci.h
>> +++ b/arch/s390/include/asm/pci.h
>> @@ -118,6 +118,31 @@ struct zpci_bus {
>> enum pci_bus_speed max_bus_speed;
>> };
>>
>> +/* Content Code Description for PCI Function Error */
>> +struct zpci_ccdf_err {
>> + u32 reserved1;
>> + u32 fh; /* function handle */
>> + u32 fid; /* function id */
>> + u32 ett : 4; /* expected table type */
>> + u32 mvn : 12; /* MSI vector number */
>> + u32 dmaas : 8; /* DMA address space */
>> + u32 reserved2 : 6;
>> + u32 q : 1; /* event qualifier */
>> + u32 rw : 1; /* read/write */
>> + u64 faddr; /* failing address */
>> + u32 reserved3;
>> + u16 reserved4;
>> + u16 pec; /* PCI event code */
>> +} __packed;
>> +
>> +#define ZPCI_ERR_PENDING_MAX 4
>> +struct zpci_ccdf_pending {
>> + u8 count;
>> + u8 head;
>> + u8 tail;
>> + struct zpci_ccdf_err err[ZPCI_ERR_PENDING_MAX];
>> +};
>> +
>> /* Private data per function */
>> struct zpci_dev {
>> struct zpci_bus *zbus;
>> @@ -193,6 +218,8 @@ struct zpci_dev {
>> struct iommu_domain *s390_domain; /* attached IOMMU domain */
>> struct kvm_zdev *kzdev;
>> struct mutex kzdev_lock;
>> + struct zpci_ccdf_pending pending_errs;
>> + struct mutex pending_errs_lock;
>> spinlock_t dom_lock; /* protect s390_domain change */
>> };
>>
>> @@ -331,6 +358,7 @@ void zpci_debug_exit_device(struct zpci_dev *);
>> int zpci_report_error(struct pci_dev *, struct zpci_report_error_header *);
>> int zpci_clear_error_state(struct zpci_dev *zdev);
>> int zpci_reset_load_store_blocked(struct zpci_dev *zdev);
>> +void zpci_cleanup_pending_errors(struct zpci_dev *zdev);
>>
>> #ifdef CONFIG_NUMA
>>
>> diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
>> index 87077e510266..bc253cc52056 100644
>> --- a/arch/s390/pci/pci.c
>> +++ b/arch/s390/pci/pci.c
>> @@ -915,6 +915,7 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
>> mutex_init(&zdev->state_lock);
>> mutex_init(&zdev->fmb_lock);
>> mutex_init(&zdev->kzdev_lock);
>> + mutex_init(&zdev->pending_errs_lock);
>>
>> return zdev;
>>
>> diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
>> index de504925f709..5b24f3a9fe23 100644
>> --- a/arch/s390/pci/pci_event.c
>> +++ b/arch/s390/pci/pci_event.c
>> @@ -17,23 +17,6 @@
>> #include "pci_bus.h"
>> #include "pci_report.h"
>>
>> -/* Content Code Description for PCI Function Error */
>> -struct zpci_ccdf_err {
>> - u32 reserved1;
>> - u32 fh; /* function handle */
>> - u32 fid; /* function id */
>> - u32 ett : 4; /* expected table type */
>> - u32 mvn : 12; /* MSI vector number */
>> - u32 dmaas : 8; /* DMA address space */
>> - u32 : 6;
>> - u32 q : 1; /* event qualifier */
>> - u32 rw : 1; /* read/write */
>> - u64 faddr; /* failing address */
>> - u32 reserved3;
>> - u16 reserved4;
>> - u16 pec; /* PCI event code */
>> -} __packed;
>> -
>> /* Content Code Description for PCI Function Availability */
>> struct zpci_ccdf_avail {
>> u32 reserved1;
>> @@ -75,6 +58,41 @@ static bool is_driver_supported(struct pci_driver *driver)
>> return true;
>> }
>>
>> +static void zpci_store_pci_error(struct pci_dev *pdev,
>> + struct zpci_ccdf_err *ccdf)
>> +{
>> + struct zpci_dev *zdev = to_zpci(pdev);
>> + int i;
>> +
>> + mutex_lock(&zdev->pending_errs_lock);
>> + if (zdev->pending_errs.count >= ZPCI_ERR_PENDING_MAX) {
>> + pr_err("%s: Maximum number (%d) of pending error events queued",
>> + pci_name(pdev), ZPCI_ERR_PENDING_MAX);
>> + mutex_unlock(&zdev->pending_errs_lock);
>> + return;
>> + }
>> +
>> + i = zdev->pending_errs.tail % ZPCI_ERR_PENDING_MAX;
>> + memcpy(&zdev->pending_errs.err[i], ccdf, sizeof(struct zpci_ccdf_err));
>> + zdev->pending_errs.tail++;
>> + zdev->pending_errs.count++;
>> + mutex_unlock(&zdev->pending_errs_lock);
>> +}
>> +
>> +void zpci_cleanup_pending_errors(struct zpci_dev *zdev)
>> +{
>> + struct pci_dev *pdev = NULL;
>> +
>> + guard(mutex)(&zdev->pending_errs_lock);
>> + pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
>> + if (zdev->pending_errs.count)
>> + pr_info("%s: Unhandled PCI error events count=%d",
>> + pci_name(pdev), zdev->pending_errs.count);
>> + memset(&zdev->pending_errs, 0, sizeof(struct zpci_ccdf_pending));
>> + pci_dev_put(pdev);
>> +}
>> +EXPORT_SYMBOL_GPL(zpci_cleanup_pending_errors);
>> +
>> static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
>> struct pci_driver *driver)
>> {
>> @@ -169,7 +187,8 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
>> * and the platform determines which functions are affected for
>> * multi-function devices.
>> */
>> -static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
>> +static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev,
>> + struct zpci_ccdf_err *ccdf)
>> {
>> pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
>> struct zpci_dev *zdev = to_zpci(pdev);
>> @@ -188,13 +207,6 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
>> }
>> pdev->error_state = pci_channel_io_frozen;
>>
>> - if (needs_mediated_recovery(pdev)) {
>> - pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
>> - pci_name(pdev));
>> - status_str = "failed (pass-through)";
>> - goto out_unlock;
>> - }
>> -
>> driver = to_pci_driver(pdev->dev.driver);
>> if (!is_driver_supported(driver)) {
>> if (!driver) {
>> @@ -210,12 +222,23 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
>> goto out_unlock;
>> }
>>
>> + if (needs_mediated_recovery(pdev))
>> + zpci_store_pci_error(pdev, ccdf);
>> +
>> ers_res = zpci_event_notify_error_detected(pdev, driver);
>> if (ers_result_indicates_abort(ers_res)) {
>> status_str = "failed (abort on detection)";
>> goto out_unlock;
>> }
>>
>> + if (needs_mediated_recovery(pdev)) {
>> + pr_info("%s: Leaving recovery of pass-through device to user-space\n",
>> + pci_name(pdev));
>> + ers_res = PCI_ERS_RESULT_RECOVERED;
>> + status_str = "in progress";
>> + goto out_unlock;
>> + }
>> +
>> if (ers_res != PCI_ERS_RESULT_NEED_RESET) {
>> ers_res = zpci_event_do_error_state_clear(pdev, driver);
>> if (ers_result_indicates_abort(ers_res)) {
>> @@ -260,25 +283,20 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
>> * @pdev: PCI function for which to report
>> * @es: PCI channel failure state to report
>> */
>> -static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
>> +static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es,
>> + struct zpci_ccdf_err *ccdf)
>> {
>> struct pci_driver *driver;
>>
>> pci_dev_lock(pdev);
>> pdev->error_state = es;
>> - /**
>> - * While vfio-pci's error_detected callback notifies user-space QEMU
>> - * reacts to this by freezing the guest. In an s390 environment PCI
>> - * errors are rarely fatal so this is overkill. Instead in the future
>> - * we will inject the error event and let the guest recover the device
>> - * itself.
>> - */
>> +
>> if (needs_mediated_recovery(pdev))
>> - goto out;
>> + zpci_store_pci_error(pdev, ccdf);
>> driver = to_pci_driver(pdev->dev.driver);
>> if (driver && driver->err_handler && driver->err_handler->error_detected)
>> driver->err_handler->error_detected(pdev, pdev->error_state);
>> -out:
>> +
>> pci_dev_unlock(pdev);
>> }
>>
>> @@ -324,12 +342,12 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
>> break;
>> case 0x0040: /* Service Action or Error Recovery Failed */
>> case 0x003b:
>> - zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
>> + zpci_event_io_failure(pdev, pci_channel_io_perm_failure, ccdf);
>> break;
>> default: /* PCI function left in the error state attempt to recover */
>> - ers_res = zpci_event_attempt_error_recovery(pdev);
>> + ers_res = zpci_event_attempt_error_recovery(pdev, ccdf);
>> if (ers_res != PCI_ERS_RESULT_RECOVERED)
>> - zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
>> + zpci_event_io_failure(pdev, pci_channel_io_perm_failure, ccdf);
>> break;
>> }
>> pci_dev_put(pdev);
>> diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c
>> index a7bc23ce8483..2be37eab9279 100644
>> --- a/drivers/vfio/pci/vfio_pci_zdev.c
>> +++ b/drivers/vfio/pci/vfio_pci_zdev.c
>> @@ -168,6 +168,8 @@ void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev)
>>
>> zdev->mediated_recovery = false;
>>
>> + zpci_cleanup_pending_errors(zdev);
>> +
>> if (!vdev->vdev.kvm)
>> return;
>>
> It begins to look here like the mediated_recovery should be protected
> by pending_errs_lock and perhaps there should be
> zpci_{start,stop}_mediated_recovery() where we set and clear the flag
> under mutex, while also clearing pending errors in the latter case.
> The various needs_mediated_recovery tests could be pulled in to test
> under mutex as well. Thanks,
>
> Alex
Thanks Alex for taking a look at the patches. I agree having the
mediated_recovery flag being protected by the mutex will be a better
approach. Will add the interfaces you suggested.
Thanks
Farhan
next prev parent reply other threads:[~2026-03-25 18:06 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-16 19:15 [PATCH v11 0/9] Error recovery for vfio-pci devices on s390x Farhan Ali
2026-03-16 19:15 ` [PATCH v11 1/9] PCI: Allow per function PCI slots Farhan Ali
2026-03-24 21:55 ` Bjorn Helgaas
2026-03-24 23:08 ` Farhan Ali
2026-03-24 23:20 ` Bjorn Helgaas
2026-03-16 19:15 ` [PATCH v11 2/9] s390/pci: Add architecture specific resource/bus address translation Farhan Ali
2026-03-24 23:06 ` Bjorn Helgaas
2026-03-24 23:47 ` Farhan Ali
2026-03-25 11:58 ` Ilpo Järvinen
2026-03-25 17:44 ` Farhan Ali
2026-03-16 19:15 ` [PATCH v11 3/9] PCI: Avoid saving config space state if inaccessible Farhan Ali
2026-03-24 21:40 ` Bjorn Helgaas
2026-03-24 22:38 ` Farhan Ali
2026-03-24 22:52 ` Bjorn Helgaas
2026-03-16 19:15 ` [PATCH v11 4/9] PCI: Add additional checks for flr reset Farhan Ali
2026-03-24 22:49 ` Bjorn Helgaas
2026-03-24 23:22 ` Farhan Ali
2026-03-25 16:25 ` Alex Williamson
2026-03-25 18:40 ` Farhan Ali
2026-03-16 19:15 ` [PATCH v11 5/9] s390/pci: Update the logic for detecting passthrough device Farhan Ali
2026-03-25 16:46 ` Alex Williamson
2026-03-16 19:15 ` [PATCH v11 6/9] s390/pci: Store PCI error information for passthrough devices Farhan Ali
2026-03-25 17:01 ` Alex Williamson
2026-03-25 18:06 ` Farhan Ali [this message]
2026-03-16 19:15 ` [PATCH v11 7/9] vfio-pci/zdev: Add a device feature for error information Farhan Ali
2026-03-25 17:18 ` Alex Williamson
2026-03-16 19:15 ` [PATCH v11 8/9] vfio: Add a reset_done callback for vfio-pci driver Farhan Ali
2026-03-25 17:30 ` Alex Williamson
2026-03-16 19:15 ` [PATCH v11 9/9] vfio: Remove the pcie check for VFIO_PCI_ERR_IRQ_INDEX Farhan Ali
2026-03-24 21:26 ` Bjorn Helgaas
2026-03-24 22:30 ` Farhan Ali
2026-03-25 17:50 ` Alex Williamson
2026-03-24 19:34 ` [PATCH v11 0/9] Error recovery for vfio-pci devices on s390x Farhan Ali
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=14b68366-9942-4487-8388-1120d243f6f2@linux.ibm.com \
--to=alifm@linux.ibm.com \
--cc=alex@shazbot.org \
--cc=clg@redhat.com \
--cc=helgaas@kernel.org \
--cc=kbusch@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-pci@vger.kernel.org \
--cc=linux-s390@vger.kernel.org \
--cc=lukas@wunner.de \
--cc=mjrosato@linux.ibm.com \
--cc=schnelle@linux.ibm.com \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox