Linux s390 Architecture development
 help / color / mirror / Atom feed
From: Alex Williamson <alex@shazbot.org>
To: Farhan Ali <alifm@linux.ibm.com>
Cc: linux-s390@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-pci@vger.kernel.org, helgaas@kernel.org,
	schnelle@linux.ibm.com, mjrosato@linux.ibm.com, alex@shazbot.org
Subject: Re: [PATCH v18 1/4] s390/pci: Store PCI error information for passthrough devices
Date: Wed, 3 Jun 2026 16:20:06 -0600	[thread overview]
Message-ID: <20260603162006.27b78de5@shazbot.org> (raw)
In-Reply-To: <20260603182415.2324-2-alifm@linux.ibm.com>

On Wed,  3 Jun 2026 11:24:12 -0700
Farhan Ali <alifm@linux.ibm.com> wrote:
> @@ -81,6 +52,47 @@ static bool is_driver_supported(struct pci_driver *driver)
>  	return true;
>  }
>  
> +static int zpci_store_pci_error(struct pci_dev *pdev,
> +				 struct zpci_ccdf_err *ccdf)
> +{
> +	struct zpci_dev *zdev = to_zpci(pdev);
> +	int i;
> +
> +	guard(mutex)(&zdev->pending_errs_lock);
> +	if (!zdev->pending_errs.mediated_recovery)
> +		return -EINVAL;
> +
> +	if (zdev->pending_errs.count >= ZPCI_ERR_PENDING_MAX) {
> +		pr_err("%s: Maximum number (%d) of pending error events queued\n",
> +		       pci_name(pdev), ZPCI_ERR_PENDING_MAX);

Is this really an err condition or just a warn?  Nothing is
fundamentally broken here, the queue is just full and we're losing
errors.  Maybe this should be a warn?

Can this create a DoS if a device continues to generate errors and
nobody is consuming them?  Userspace could ignore the error.  This
should probably be _ratelimited.

pr_err + pci_name suggests this should be a pci_ or dev_ call and since
the pci variant doesn't exist, use dev_warn_ratelimited().

> +		return -ENOMEM;
> +	}
> +
> +	i = zdev->pending_errs.tail % ZPCI_ERR_PENDING_MAX;
> +	memcpy(&zdev->pending_errs.err[i], ccdf, sizeof(struct zpci_ccdf_err));
> +	zdev->pending_errs.tail++;
> +	zdev->pending_errs.count++;
> +	return 0;
> +}
> +
> +void zpci_start_mediated_recovery(struct zpci_dev *zdev)
> +{
> +	guard(mutex)(&zdev->pending_errs_lock);
> +	zdev->pending_errs.mediated_recovery = true;
> +}
> +EXPORT_SYMBOL_GPL(zpci_start_mediated_recovery);
> +
> +void zpci_stop_mediated_recovery(struct zpci_dev *zdev)
> +{
> +	guard(mutex)(&zdev->pending_errs_lock);
> +	zdev->pending_errs.mediated_recovery = false;
> +	if (zdev->pending_errs.count)
> +		pr_info("Unhandled PCI error events count=%d for PCI function 0x%x\n",
> +			zdev->pending_errs.count, zdev->fid);

It seems like there's always a race that an error could occur as the
user is closing the device.  Is this really worth logging at anything
more than a dbg level, pci_dbg() in this case?

> +	memset(&zdev->pending_errs, 0, sizeof(struct zpci_ccdf_pending));
> +}
> +EXPORT_SYMBOL_GPL(zpci_stop_mediated_recovery);
> +
>  static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
>  							 struct pci_driver *driver)
>  {
> @@ -175,12 +187,15 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
>   * and the platform determines which functions are affected for
>   * multi-function devices.
>   */
> -static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
> +static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev,
> +							  struct zpci_ccdf_err *ccdf)
>  {
>  	pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
>  	struct zpci_dev *zdev = to_zpci(pdev);
> +	bool mediated_recovery = false;
>  	char *status_str = "success";
>  	struct pci_driver *driver;
> +	int rc;
>  
>  	/*
>  	 * Ensure that the PCI function is not removed concurrently, no driver
> @@ -194,13 +209,6 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
>  	}
>  	pdev->error_state = pci_channel_io_frozen;
>  
> -	if (is_passed_through(pdev)) {
> -		pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
> -			pci_name(pdev));
> -		status_str = "failed (pass-through)";
> -		goto out_unlock;
> -	}
> -
>  	driver = to_pci_driver(pdev->dev.driver);
>  	if (!is_driver_supported(driver)) {
>  		if (!driver) {
> @@ -216,12 +224,24 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
>  		goto out_unlock;
>  	}
>  
> +	rc = zpci_store_pci_error(pdev, ccdf);
> +	if (!rc || rc == -ENOMEM)
> +		mediated_recovery = true;

This is a convoluted way to get the state of
zdev->pending_errs.mediated_recovery, which becomes invalid out of
pending_errs_lock anyway.

> +
>  	ers_res = zpci_event_notify_error_detected(pdev, driver);
>  	if (ers_result_indicates_abort(ers_res)) {
>  		status_str = "failed (abort on detection)";
>  		goto out_unlock;
>  	}
>  
> +	if (mediated_recovery) {
> +		pr_info("%s: Leaving recovery of pass-through device to user-space\n",
> +			pci_name(pdev));
> +		ers_res = PCI_ERS_RESULT_RECOVERED;
> +		status_str = "in progress";
> +		goto out_unlock;
> +	}

Since zdev->pending_errs.mediated_recovery is only valid while holding
pending_errs_lock, this is really no better than the
is_passed_through() test.

> +
>  	if (ers_res != PCI_ERS_RESULT_NEED_RESET) {
>  		ers_res = zpci_event_do_error_state_clear(pdev, driver);
>  		if (ers_result_indicates_abort(ers_res)) {
> @@ -266,25 +286,19 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
>   * @pdev: PCI function for which to report
>   * @es: PCI channel failure state to report
>   */
> -static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
> +static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es,
> +				  struct zpci_ccdf_err *ccdf)
>  {
>  	struct pci_driver *driver;
>  
>  	pci_dev_lock(pdev);
>  	pdev->error_state = es;
> -	/**
> -	 * While vfio-pci's error_detected callback notifies user-space QEMU
> -	 * reacts to this by freezing the guest. In an s390 environment PCI
> -	 * errors are rarely fatal so this is overkill. Instead in the future
> -	 * we will inject the error event and let the guest recover the device
> -	 * itself.
> -	 */
> -	if (is_passed_through(pdev))
> -		goto out;
> +
> +	zpci_store_pci_error(pdev, ccdf);
>  	driver = to_pci_driver(pdev->dev.driver);
>  	if (driver && driver->err_handler && driver->err_handler->error_detected)
>  		driver->err_handler->error_detected(pdev, pdev->error_state);

How do you intend to stage this versus QEMU changes?  This seems like a
big regression if we're suddenly triggering the eventfd that causes
QEMU to halt.  Do you need userspace to opt-in to mediated recovery
rather than automatically enabling it on open?  Thanks,

Alex

  reply	other threads:[~2026-06-03 22:20 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-03 18:24 [PATCH v18 0/4] [VFIO] Error recovery for vfio-pci devices on s390x Farhan Ali
2026-06-03 18:24 ` [PATCH v18 1/4] s390/pci: Store PCI error information for passthrough devices Farhan Ali
2026-06-03 22:20   ` Alex Williamson [this message]
2026-06-03 23:35     ` Farhan Ali
2026-06-04 18:27       ` Alex Williamson
2026-06-03 18:24 ` [PATCH v18 2/4] vfio-pci/zdev: Add a device feature for error information Farhan Ali
2026-06-03 22:37   ` Alex Williamson
2026-06-03 23:40     ` Farhan Ali
2026-06-03 18:24 ` [PATCH v18 3/4] vfio/pci: Add a reset_done callback for vfio-pci driver Farhan Ali
2026-06-03 22:46   ` Alex Williamson
2026-06-04  0:01     ` Farhan Ali
2026-06-04  8:28   ` Keith Busch
2026-06-04 17:17     ` Farhan Ali
2026-06-04 19:57       ` Alex Williamson
2026-06-08 19:26         ` Farhan Ali
2026-06-09 19:16           ` Alex Williamson
2026-06-09 20:13             ` Farhan Ali
2026-06-04 20:42       ` Keith Busch
2026-06-05 18:41         ` Farhan Ali
2026-06-09 21:38           ` Keith Busch
2026-06-03 18:24 ` [PATCH v18 4/4] vfio/pci: Remove the pcie check for VFIO_PCI_ERR_IRQ_INDEX Farhan Ali

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260603162006.27b78de5@shazbot.org \
    --to=alex@shazbot.org \
    --cc=alifm@linux.ibm.com \
    --cc=helgaas@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=mjrosato@linux.ibm.com \
    --cc=schnelle@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox