From: Alex Williamson <alex@shazbot.org>
To: Farhan Ali <alifm@linux.ibm.com>
Cc: linux-s390@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-pci@vger.kernel.org, helgaas@kernel.org,
schnelle@linux.ibm.com, mjrosato@linux.ibm.com, alex@shazbot.org
Subject: Re: [PATCH v18 1/4] s390/pci: Store PCI error information for passthrough devices
Date: Wed, 3 Jun 2026 16:20:06 -0600 [thread overview]
Message-ID: <20260603162006.27b78de5@shazbot.org> (raw)
In-Reply-To: <20260603182415.2324-2-alifm@linux.ibm.com>
On Wed, 3 Jun 2026 11:24:12 -0700
Farhan Ali <alifm@linux.ibm.com> wrote:
> @@ -81,6 +52,47 @@ static bool is_driver_supported(struct pci_driver *driver)
> return true;
> }
>
> +static int zpci_store_pci_error(struct pci_dev *pdev,
> + struct zpci_ccdf_err *ccdf)
> +{
> + struct zpci_dev *zdev = to_zpci(pdev);
> + int i;
> +
> + guard(mutex)(&zdev->pending_errs_lock);
> + if (!zdev->pending_errs.mediated_recovery)
> + return -EINVAL;
> +
> + if (zdev->pending_errs.count >= ZPCI_ERR_PENDING_MAX) {
> + pr_err("%s: Maximum number (%d) of pending error events queued\n",
> + pci_name(pdev), ZPCI_ERR_PENDING_MAX);
Is this really an err condition or just a warn? Nothing is
fundamentally broken here, the queue is just full and we're losing
errors. Maybe this should be a warn?
Can this create a DoS if a device continues to generate errors and
nobody is consuming them? Userspace could ignore the error. This
should probably be _ratelimited.
pr_err + pci_name suggests this should be a pci_ or dev_ call and since
the pci variant doesn't exist, use dev_warn_ratelimited().
> + return -ENOMEM;
> + }
> +
> + i = zdev->pending_errs.tail % ZPCI_ERR_PENDING_MAX;
> + memcpy(&zdev->pending_errs.err[i], ccdf, sizeof(struct zpci_ccdf_err));
> + zdev->pending_errs.tail++;
> + zdev->pending_errs.count++;
> + return 0;
> +}
> +
> +void zpci_start_mediated_recovery(struct zpci_dev *zdev)
> +{
> + guard(mutex)(&zdev->pending_errs_lock);
> + zdev->pending_errs.mediated_recovery = true;
> +}
> +EXPORT_SYMBOL_GPL(zpci_start_mediated_recovery);
> +
> +void zpci_stop_mediated_recovery(struct zpci_dev *zdev)
> +{
> + guard(mutex)(&zdev->pending_errs_lock);
> + zdev->pending_errs.mediated_recovery = false;
> + if (zdev->pending_errs.count)
> + pr_info("Unhandled PCI error events count=%d for PCI function 0x%x\n",
> + zdev->pending_errs.count, zdev->fid);
It seems like there's always a race that an error could occur as the
user is closing the device. Is this really worth logging at anything
more than a dbg level, pci_dbg() in this case?
> + memset(&zdev->pending_errs, 0, sizeof(struct zpci_ccdf_pending));
> +}
> +EXPORT_SYMBOL_GPL(zpci_stop_mediated_recovery);
> +
> static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
> struct pci_driver *driver)
> {
> @@ -175,12 +187,15 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
> * and the platform determines which functions are affected for
> * multi-function devices.
> */
> -static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
> +static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev,
> + struct zpci_ccdf_err *ccdf)
> {
> pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
> struct zpci_dev *zdev = to_zpci(pdev);
> + bool mediated_recovery = false;
> char *status_str = "success";
> struct pci_driver *driver;
> + int rc;
>
> /*
> * Ensure that the PCI function is not removed concurrently, no driver
> @@ -194,13 +209,6 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
> }
> pdev->error_state = pci_channel_io_frozen;
>
> - if (is_passed_through(pdev)) {
> - pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
> - pci_name(pdev));
> - status_str = "failed (pass-through)";
> - goto out_unlock;
> - }
> -
> driver = to_pci_driver(pdev->dev.driver);
> if (!is_driver_supported(driver)) {
> if (!driver) {
> @@ -216,12 +224,24 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
> goto out_unlock;
> }
>
> + rc = zpci_store_pci_error(pdev, ccdf);
> + if (!rc || rc == -ENOMEM)
> + mediated_recovery = true;
This is a convoluted way to get the state of
zdev->pending_errs.mediated_recovery, which becomes invalid out of
pending_errs_lock anyway.
> +
> ers_res = zpci_event_notify_error_detected(pdev, driver);
> if (ers_result_indicates_abort(ers_res)) {
> status_str = "failed (abort on detection)";
> goto out_unlock;
> }
>
> + if (mediated_recovery) {
> + pr_info("%s: Leaving recovery of pass-through device to user-space\n",
> + pci_name(pdev));
> + ers_res = PCI_ERS_RESULT_RECOVERED;
> + status_str = "in progress";
> + goto out_unlock;
> + }
Since zdev->pending_errs.mediated_recovery is only valid while holding
pending_errs_lock, this is really no better than the
is_passed_through() test.
> +
> if (ers_res != PCI_ERS_RESULT_NEED_RESET) {
> ers_res = zpci_event_do_error_state_clear(pdev, driver);
> if (ers_result_indicates_abort(ers_res)) {
> @@ -266,25 +286,19 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
> * @pdev: PCI function for which to report
> * @es: PCI channel failure state to report
> */
> -static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
> +static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es,
> + struct zpci_ccdf_err *ccdf)
> {
> struct pci_driver *driver;
>
> pci_dev_lock(pdev);
> pdev->error_state = es;
> - /**
> - * While vfio-pci's error_detected callback notifies user-space QEMU
> - * reacts to this by freezing the guest. In an s390 environment PCI
> - * errors are rarely fatal so this is overkill. Instead in the future
> - * we will inject the error event and let the guest recover the device
> - * itself.
> - */
> - if (is_passed_through(pdev))
> - goto out;
> +
> + zpci_store_pci_error(pdev, ccdf);
> driver = to_pci_driver(pdev->dev.driver);
> if (driver && driver->err_handler && driver->err_handler->error_detected)
> driver->err_handler->error_detected(pdev, pdev->error_state);
How do you intend to stage this versus QEMU changes? This seems like a
big regression if we're suddenly triggering the eventfd that causes
QEMU to halt. Do you need userspace to opt-in to mediated recovery
rather than automatically enabling it on open? Thanks,
Alex
next prev parent reply other threads:[~2026-06-03 22:20 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-03 18:24 [PATCH v18 0/4] [VFIO] Error recovery for vfio-pci devices on s390x Farhan Ali
2026-06-03 18:24 ` [PATCH v18 1/4] s390/pci: Store PCI error information for passthrough devices Farhan Ali
2026-06-03 22:20 ` Alex Williamson [this message]
2026-06-03 23:35 ` Farhan Ali
2026-06-04 18:27 ` Alex Williamson
2026-06-03 18:24 ` [PATCH v18 2/4] vfio-pci/zdev: Add a device feature for error information Farhan Ali
2026-06-03 18:49 ` sashiko-bot
2026-06-03 22:37 ` Alex Williamson
2026-06-03 23:40 ` Farhan Ali
2026-06-03 18:24 ` [PATCH v18 3/4] vfio/pci: Add a reset_done callback for vfio-pci driver Farhan Ali
2026-06-03 19:04 ` sashiko-bot
2026-06-03 22:46 ` Alex Williamson
2026-06-04 0:01 ` Farhan Ali
2026-06-04 8:28 ` Keith Busch
2026-06-04 17:17 ` Farhan Ali
2026-06-04 19:57 ` Alex Williamson
2026-06-08 19:26 ` Farhan Ali
2026-06-09 19:16 ` Alex Williamson
2026-06-09 20:13 ` Farhan Ali
2026-06-04 20:42 ` Keith Busch
2026-06-05 18:41 ` Farhan Ali
2026-06-09 21:38 ` Keith Busch
2026-06-03 18:24 ` [PATCH v18 4/4] vfio/pci: Remove the pcie check for VFIO_PCI_ERR_IRQ_INDEX Farhan Ali
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260603162006.27b78de5@shazbot.org \
--to=alex@shazbot.org \
--cc=alifm@linux.ibm.com \
--cc=helgaas@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-pci@vger.kernel.org \
--cc=linux-s390@vger.kernel.org \
--cc=mjrosato@linux.ibm.com \
--cc=schnelle@linux.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.