public inbox for stable@vger.kernel.org
 help / color / mirror / Atom feed
From: Alex Williamson <alex@shazbot.org>
To: Farhan Ali <alifm@linux.ibm.com>
Cc: linux-s390@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-pci@vger.kernel.org, helgaas@kernel.org, lukas@wunner.de,
	kbusch@kernel.org, clg@redhat.com, stable@vger.kernel.org,
	schnelle@linux.ibm.com, mjrosato@linux.ibm.com, alex@shazbot.org
Subject: Re: [PATCH v11 6/9] s390/pci: Store PCI error information for passthrough devices
Date: Wed, 25 Mar 2026 11:01:58 -0600	[thread overview]
Message-ID: <20260325110158.6ec66502@shazbot.org> (raw)
In-Reply-To: <20260316191544.2279-7-alifm@linux.ibm.com>

On Mon, 16 Mar 2026 12:15:41 -0700
Farhan Ali <alifm@linux.ibm.com> wrote:

> For a passthrough device we need co-operation from user space to recover
> the device. This would require to bubble up any error information to user
> space.  Let's store this error information for passthrough devices, so it
> can be retrieved later.
> 
> Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
> Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
> ---
>  arch/s390/include/asm/pci.h      | 28 ++++++++++
>  arch/s390/pci/pci.c              |  1 +
>  arch/s390/pci/pci_event.c        | 94 +++++++++++++++++++-------------
>  drivers/vfio/pci/vfio_pci_zdev.c |  2 +
>  4 files changed, 87 insertions(+), 38 deletions(-)
> 
> diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
> index ec8a772bf526..383f6483b656 100644
> --- a/arch/s390/include/asm/pci.h
> +++ b/arch/s390/include/asm/pci.h
> @@ -118,6 +118,31 @@ struct zpci_bus {
>  	enum pci_bus_speed	max_bus_speed;
>  };
>  
> +/* Content Code Description for PCI Function Error */
> +struct zpci_ccdf_err {
> +	u32 reserved1;
> +	u32 fh;                         /* function handle */
> +	u32 fid;                        /* function id */
> +	u32 ett         :  4;           /* expected table type */
> +	u32 mvn         : 12;           /* MSI vector number */
> +	u32 dmaas       :  8;           /* DMA address space */
> +	u32 reserved2   :  6;
> +	u32 q           :  1;           /* event qualifier */
> +	u32 rw          :  1;           /* read/write */
> +	u64 faddr;                      /* failing address */
> +	u32 reserved3;
> +	u16 reserved4;
> +	u16 pec;                        /* PCI event code */
> +} __packed;
> +
> +#define ZPCI_ERR_PENDING_MAX 4
> +struct zpci_ccdf_pending {
> +	u8 count;
> +	u8 head;
> +	u8 tail;
> +	struct zpci_ccdf_err err[ZPCI_ERR_PENDING_MAX];
> +};
> +
>  /* Private data per function */
>  struct zpci_dev {
>  	struct zpci_bus *zbus;
> @@ -193,6 +218,8 @@ struct zpci_dev {
>  	struct iommu_domain *s390_domain; /* attached IOMMU domain */
>  	struct kvm_zdev *kzdev;
>  	struct mutex kzdev_lock;
> +	struct zpci_ccdf_pending pending_errs;
> +	struct mutex pending_errs_lock;
>  	spinlock_t dom_lock;		/* protect s390_domain change */
>  };
>  
> @@ -331,6 +358,7 @@ void zpci_debug_exit_device(struct zpci_dev *);
>  int zpci_report_error(struct pci_dev *, struct zpci_report_error_header *);
>  int zpci_clear_error_state(struct zpci_dev *zdev);
>  int zpci_reset_load_store_blocked(struct zpci_dev *zdev);
> +void zpci_cleanup_pending_errors(struct zpci_dev *zdev);
>  
>  #ifdef CONFIG_NUMA
>  
> diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
> index 87077e510266..bc253cc52056 100644
> --- a/arch/s390/pci/pci.c
> +++ b/arch/s390/pci/pci.c
> @@ -915,6 +915,7 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
>  	mutex_init(&zdev->state_lock);
>  	mutex_init(&zdev->fmb_lock);
>  	mutex_init(&zdev->kzdev_lock);
> +	mutex_init(&zdev->pending_errs_lock);
>  
>  	return zdev;
>  
> diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
> index de504925f709..5b24f3a9fe23 100644
> --- a/arch/s390/pci/pci_event.c
> +++ b/arch/s390/pci/pci_event.c
> @@ -17,23 +17,6 @@
>  #include "pci_bus.h"
>  #include "pci_report.h"
>  
> -/* Content Code Description for PCI Function Error */
> -struct zpci_ccdf_err {
> -	u32 reserved1;
> -	u32 fh;				/* function handle */
> -	u32 fid;			/* function id */
> -	u32 ett		:  4;		/* expected table type */
> -	u32 mvn		: 12;		/* MSI vector number */
> -	u32 dmaas	:  8;		/* DMA address space */
> -	u32		:  6;
> -	u32 q		:  1;		/* event qualifier */
> -	u32 rw		:  1;		/* read/write */
> -	u64 faddr;			/* failing address */
> -	u32 reserved3;
> -	u16 reserved4;
> -	u16 pec;			/* PCI event code */
> -} __packed;
> -
>  /* Content Code Description for PCI Function Availability */
>  struct zpci_ccdf_avail {
>  	u32 reserved1;
> @@ -75,6 +58,41 @@ static bool is_driver_supported(struct pci_driver *driver)
>  	return true;
>  }
>  
> +static void zpci_store_pci_error(struct pci_dev *pdev,
> +				 struct zpci_ccdf_err *ccdf)
> +{
> +	struct zpci_dev *zdev = to_zpci(pdev);
> +	int i;
> +
> +	mutex_lock(&zdev->pending_errs_lock);
> +	if (zdev->pending_errs.count >= ZPCI_ERR_PENDING_MAX) {
> +		pr_err("%s: Maximum number (%d) of pending error events queued",
> +		       pci_name(pdev), ZPCI_ERR_PENDING_MAX);
> +		mutex_unlock(&zdev->pending_errs_lock);
> +		return;
> +	}
> +
> +	i = zdev->pending_errs.tail % ZPCI_ERR_PENDING_MAX;
> +	memcpy(&zdev->pending_errs.err[i], ccdf, sizeof(struct zpci_ccdf_err));
> +	zdev->pending_errs.tail++;
> +	zdev->pending_errs.count++;
> +	mutex_unlock(&zdev->pending_errs_lock);
> +}
> +
> +void zpci_cleanup_pending_errors(struct zpci_dev *zdev)
> +{
> +	struct pci_dev *pdev = NULL;
> +
> +	guard(mutex)(&zdev->pending_errs_lock);
> +	pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
> +	if (zdev->pending_errs.count)
> +		pr_info("%s: Unhandled PCI error events count=%d",
> +			pci_name(pdev), zdev->pending_errs.count);
> +	memset(&zdev->pending_errs, 0, sizeof(struct zpci_ccdf_pending));
> +	pci_dev_put(pdev);
> +}
> +EXPORT_SYMBOL_GPL(zpci_cleanup_pending_errors);
> +
>  static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
>  							 struct pci_driver *driver)
>  {
> @@ -169,7 +187,8 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
>   * and the platform determines which functions are affected for
>   * multi-function devices.
>   */
> -static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
> +static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev,
> +							  struct zpci_ccdf_err *ccdf)
>  {
>  	pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
>  	struct zpci_dev *zdev = to_zpci(pdev);
> @@ -188,13 +207,6 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
>  	}
>  	pdev->error_state = pci_channel_io_frozen;
>  
> -	if (needs_mediated_recovery(pdev)) {
> -		pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
> -			pci_name(pdev));
> -		status_str = "failed (pass-through)";
> -		goto out_unlock;
> -	}
> -
>  	driver = to_pci_driver(pdev->dev.driver);
>  	if (!is_driver_supported(driver)) {
>  		if (!driver) {
> @@ -210,12 +222,23 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
>  		goto out_unlock;
>  	}
>  
> +	if (needs_mediated_recovery(pdev))
> +		zpci_store_pci_error(pdev, ccdf);
> +
>  	ers_res = zpci_event_notify_error_detected(pdev, driver);
>  	if (ers_result_indicates_abort(ers_res)) {
>  		status_str = "failed (abort on detection)";
>  		goto out_unlock;
>  	}
>  
> +	if (needs_mediated_recovery(pdev)) {
> +		pr_info("%s: Leaving recovery of pass-through device to user-space\n",
> +			pci_name(pdev));
> +		ers_res = PCI_ERS_RESULT_RECOVERED;
> +		status_str = "in progress";
> +		goto out_unlock;
> +	}
> +
>  	if (ers_res != PCI_ERS_RESULT_NEED_RESET) {
>  		ers_res = zpci_event_do_error_state_clear(pdev, driver);
>  		if (ers_result_indicates_abort(ers_res)) {
> @@ -260,25 +283,20 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
>   * @pdev: PCI function for which to report
>   * @es: PCI channel failure state to report
>   */
> -static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
> +static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es,
> +				  struct zpci_ccdf_err *ccdf)
>  {
>  	struct pci_driver *driver;
>  
>  	pci_dev_lock(pdev);
>  	pdev->error_state = es;
> -	/**
> -	 * While vfio-pci's error_detected callback notifies user-space QEMU
> -	 * reacts to this by freezing the guest. In an s390 environment PCI
> -	 * errors are rarely fatal so this is overkill. Instead in the future
> -	 * we will inject the error event and let the guest recover the device
> -	 * itself.
> -	 */
> +
>  	if (needs_mediated_recovery(pdev))
> -		goto out;
> +		zpci_store_pci_error(pdev, ccdf);
>  	driver = to_pci_driver(pdev->dev.driver);
>  	if (driver && driver->err_handler && driver->err_handler->error_detected)
>  		driver->err_handler->error_detected(pdev, pdev->error_state);
> -out:
> +
>  	pci_dev_unlock(pdev);
>  }
>  
> @@ -324,12 +342,12 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
>  		break;
>  	case 0x0040: /* Service Action or Error Recovery Failed */
>  	case 0x003b:
> -		zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
> +		zpci_event_io_failure(pdev, pci_channel_io_perm_failure, ccdf);
>  		break;
>  	default: /* PCI function left in the error state attempt to recover */
> -		ers_res = zpci_event_attempt_error_recovery(pdev);
> +		ers_res = zpci_event_attempt_error_recovery(pdev, ccdf);
>  		if (ers_res != PCI_ERS_RESULT_RECOVERED)
> -			zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
> +			zpci_event_io_failure(pdev, pci_channel_io_perm_failure, ccdf);
>  		break;
>  	}
>  	pci_dev_put(pdev);
> diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c
> index a7bc23ce8483..2be37eab9279 100644
> --- a/drivers/vfio/pci/vfio_pci_zdev.c
> +++ b/drivers/vfio/pci/vfio_pci_zdev.c
> @@ -168,6 +168,8 @@ void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev)
>  
>  	zdev->mediated_recovery = false;
>  
> +	zpci_cleanup_pending_errors(zdev);
> +
>  	if (!vdev->vdev.kvm)
>  		return;
>  

It begins to look here like the mediated_recovery should be protected
by pending_errs_lock and perhaps there should be
zpci_{start,stop}_mediated_recovery() where we set and clear the flag
under mutex, while also clearing pending errors in the latter case.
The various needs_mediated_recovery tests could be pulled in to test
under mutex as well.  Thanks,

Alex

  reply	other threads:[~2026-03-25 17:02 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-16 19:15 [PATCH v11 0/9] Error recovery for vfio-pci devices on s390x Farhan Ali
2026-03-16 19:15 ` [PATCH v11 1/9] PCI: Allow per function PCI slots Farhan Ali
2026-03-24 21:55   ` Bjorn Helgaas
2026-03-24 23:08     ` Farhan Ali
2026-03-24 23:20       ` Bjorn Helgaas
2026-03-16 19:15 ` [PATCH v11 2/9] s390/pci: Add architecture specific resource/bus address translation Farhan Ali
2026-03-24 23:06   ` Bjorn Helgaas
2026-03-24 23:47     ` Farhan Ali
2026-03-25 11:58     ` Ilpo Järvinen
2026-03-25 17:44       ` Farhan Ali
2026-03-16 19:15 ` [PATCH v11 3/9] PCI: Avoid saving config space state if inaccessible Farhan Ali
2026-03-24 21:40   ` Bjorn Helgaas
2026-03-24 22:38     ` Farhan Ali
2026-03-24 22:52       ` Bjorn Helgaas
2026-03-16 19:15 ` [PATCH v11 4/9] PCI: Add additional checks for flr reset Farhan Ali
2026-03-24 22:49   ` Bjorn Helgaas
2026-03-24 23:22     ` Farhan Ali
2026-03-25 16:25     ` Alex Williamson
2026-03-25 18:40       ` Farhan Ali
2026-03-16 19:15 ` [PATCH v11 5/9] s390/pci: Update the logic for detecting passthrough device Farhan Ali
2026-03-25 16:46   ` Alex Williamson
2026-03-16 19:15 ` [PATCH v11 6/9] s390/pci: Store PCI error information for passthrough devices Farhan Ali
2026-03-25 17:01   ` Alex Williamson [this message]
2026-03-25 18:06     ` Farhan Ali
2026-03-16 19:15 ` [PATCH v11 7/9] vfio-pci/zdev: Add a device feature for error information Farhan Ali
2026-03-25 17:18   ` Alex Williamson
2026-03-16 19:15 ` [PATCH v11 8/9] vfio: Add a reset_done callback for vfio-pci driver Farhan Ali
2026-03-25 17:30   ` Alex Williamson
2026-03-16 19:15 ` [PATCH v11 9/9] vfio: Remove the pcie check for VFIO_PCI_ERR_IRQ_INDEX Farhan Ali
2026-03-24 21:26   ` Bjorn Helgaas
2026-03-24 22:30     ` Farhan Ali
2026-03-25 17:50     ` Alex Williamson
2026-03-24 19:34 ` [PATCH v11 0/9] Error recovery for vfio-pci devices on s390x Farhan Ali

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260325110158.6ec66502@shazbot.org \
    --to=alex@shazbot.org \
    --cc=alifm@linux.ibm.com \
    --cc=clg@redhat.com \
    --cc=helgaas@kernel.org \
    --cc=kbusch@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=lukas@wunner.de \
    --cc=mjrosato@linux.ibm.com \
    --cc=schnelle@linux.ibm.com \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox