Linux PCI subsystem development
 help / color / mirror / Atom feed
From: "Ilpo Järvinen" <ilpo.jarvinen@linux.intel.com>
To: Guanghui Feng <guanghuifeng@linux.alibaba.com>
Cc: bhelgaas@google.com, linux-pci@vger.kernel.org,
	 alikernel-developer@linux.alibaba.com
Subject: Re: [PATCH v3] PCI: Fix PCIe SBR dev/link wait error
Date: Mon, 1 Dec 2025 11:24:51 +0200 (EET)	[thread overview]
Message-ID: <1086aed0-584b-4ab3-1b84-687e53ddddf9@linux.intel.com> (raw)
In-Reply-To: <20251130051735.3123755-1-guanghuifeng@linux.alibaba.com>

On Sun, 30 Nov 2025, Guanghui Feng wrote:

> When executing a PCIe secondary bus reset, all downstream switches and
> endpoints will generate reset events. Simultaneously, all PCIe links
> will undergo retraining, and each link will independently re-execute the
> LTSSM state machine training. Therefore, after executing the SBR, it is
> necessary to wait for all downstream links and devices to complete
> recovery. Otherwise, after the SBR returns, accessing devices with some
> links or endpoints not yet fully recovered may result in driver errors,
> or even trigger device offline issues.
> 
> Signed-off-by: Guanghui Feng <guanghuifeng@linux.alibaba.com>
> Reviewed-by: Guixin Liu <kanie@linux.alibaba.com>
> ---

The review comment I gave to v2 still stands to this v3 as well.

Please don't send new versions too rapidly but give people time to comment 
on the previous version. And please remember to explain what you changed 
in each version (below the -- line).

-- 
 i.

>  drivers/pci/pci.c | 141 +++++++++++++++++++++++++++++++++-------------
>  1 file changed, 101 insertions(+), 40 deletions(-)
> 
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index b14dd064006c..d91c65145739 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -4788,6 +4788,63 @@ static int pci_bus_max_d3cold_delay(const struct pci_bus *bus)
>  	return max(min_delay, max_delay);
>  }
>  
> +static int pci_readiness_check(struct pci_dev *pdev, struct pci_dev *child,
> +		unsigned long start_t, char *reset_type)
> +{
> +	int elapsed = jiffies_to_msecs(jiffies - start_t);
> +
> +	if (pci_dev_is_disconnected(pdev) || pci_dev_is_disconnected(child))
> +		return 0;
> +
> +	if (pcie_get_speed_cap(pdev) <= PCIE_SPEED_5_0GT) {
> +		u16 status;
> +
> +		pci_dbg(pdev, "waiting %d ms for downstream link\n", elapsed);
> +
> +		if (!pci_dev_wait(child, reset_type, 0))
> +			return 0;
> +
> +		if (PCI_RESET_WAIT > elapsed)
> +			return PCI_RESET_WAIT - elapsed;
> +
> +		/*
> +		 * If the port supports active link reporting we now check
> +		 * whether the link is active and if not bail out early with
> +		 * the assumption that the device is not present anymore.
> +		 */
> +		if (!pdev->link_active_reporting)
> +			return -ENOTTY;
> +
> +		pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &status);
> +		if (!(status & PCI_EXP_LNKSTA_DLLLA))
> +			return -ENOTTY;
> +
> +		if (!pci_dev_wait(child, reset_type, 0))
> +			return 0;
> +
> +		if (PCIE_RESET_READY_POLL_MS > elapsed)
> +			return PCIE_RESET_READY_POLL_MS - elapsed;
> +
> +		return -ENOTTY;
> +	}
> +
> +	pci_dbg(pdev, "waiting %d ms for downstream link, after activation\n",
> +		elapsed);
> +	if (!pcie_wait_for_link_delay(pdev, true, 0)) {
> +		/* Did not train, no need to wait any further */
> +		pci_info(pdev, "Data Link Layer Link Active not set in %d msec\n", elapsed);
> +		return -ENOTTY;
> +	}
> +
> +	if (!pci_dev_wait(child, reset_type, 0))
> +		return 0;
> +
> +	if (PCIE_RESET_READY_POLL_MS > elapsed)
> +		return PCIE_RESET_READY_POLL_MS - elapsed;
> +
> +	return -ENOTTY;
> +}
> +
>  /**
>   * pci_bridge_wait_for_secondary_bus - Wait for secondary bus to be accessible
>   * @dev: PCI bridge
> @@ -4802,12 +4859,14 @@ static int pci_bus_max_d3cold_delay(const struct pci_bus *bus)
>   * 4.3.2.
>   *
>   * Return 0 on success or -ENOTTY if the first device on the secondary bus
> - * failed to become accessible.
> + * failed to become accessible or a value greater than 0 indicates the
> + * left required waiting time..
>   */
> -int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
> +static int __pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, unsigned long start_t,
> +		char *reset_type)
>  {
> -	struct pci_dev *child __free(pci_dev_put) = NULL;
> -	int delay;
> +	struct pci_dev *child;
> +	int delay, ret, elapsed = jiffies_to_msecs(jiffies - start_t);
>  
>  	if (pci_dev_is_disconnected(dev))
>  		return 0;
> @@ -4835,8 +4894,6 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
>  		return 0;
>  	}
>  
> -	child = pci_dev_get(list_first_entry(&dev->subordinate->devices,
> -					     struct pci_dev, bus_list));
>  	up_read(&pci_bus_sem);
>  
>  	/*
> @@ -4844,8 +4901,10 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
>  	 * accessing the device after reset (that is 1000 ms + 100 ms).
>  	 */
>  	if (!pci_is_pcie(dev)) {
> -		pci_dbg(dev, "waiting %d ms for secondary bus\n", 1000 + delay);
> -		msleep(1000 + delay);
> +		if (1000 + delay > elapsed)
> +			return 1000 + delay - elapsed;
> +
> +		pci_dbg(dev, "waiting %d ms for secondary bus\n", elapsed);
>  		return 0;
>  	}
>  
> @@ -4867,41 +4926,45 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
>  	if (!pcie_downstream_port(dev))
>  		return 0;
>  
> -	if (pcie_get_speed_cap(dev) <= PCIE_SPEED_5_0GT) {
> -		u16 status;
> -
> -		pci_dbg(dev, "waiting %d ms for downstream link\n", delay);
> -		msleep(delay);
> -
> -		if (!pci_dev_wait(child, reset_type, PCI_RESET_WAIT - delay))
> -			return 0;
> +	if (delay > elapsed)
> +		return delay - elapsed;
>  
> +	down_read(&pci_bus_sem);
> +	list_for_each_entry(child, &dev->subordinate->devices, bus_list) {
>  		/*
> -		 * If the port supports active link reporting we now check
> -		 * whether the link is active and if not bail out early with
> -		 * the assumption that the device is not present anymore.
> +		 * Check if all devices under the same bus have completed
> +		 * the reset process, including multifunction devices in
> +		 * the same bus.
>  		 */
> -		if (!dev->link_active_reporting)
> -			return -ENOTTY;
> +		ret = pci_readiness_check(dev, child, start_t, reset_type);
>  
> -		pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &status);
> -		if (!(status & PCI_EXP_LNKSTA_DLLLA))
> -			return -ENOTTY;
> +		if (ret == 0 && child->subordinate)
> +			ret = __pci_bridge_wait_for_secondary_bus(child, start_t, reset_type);
>  
> -		return pci_dev_wait(child, reset_type,
> -				    PCIE_RESET_READY_POLL_MS - PCI_RESET_WAIT);
> +		if(ret)
> +			break;
>  	}
> +	up_read(&pci_bus_sem);
>  
> -	pci_dbg(dev, "waiting %d ms for downstream link, after activation\n",
> -		delay);
> -	if (!pcie_wait_for_link_delay(dev, true, delay)) {
> -		/* Did not train, no need to wait any further */
> -		pci_info(dev, "Data Link Layer Link Active not set in %d msec\n", delay);
> -		return -ENOTTY;
> +	return ret;
> +}
> +
> +int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
> +{
> +	int res, gap = 1;
> +	unsigned long start_t = jiffies;
> +
> +	res = __pci_bridge_wait_for_secondary_bus(dev, start_t, reset_type);
> +
> +	while (res > 0) {
> +		gap = gap < res ? gap : res;
> +		msleep(gap);
> +		gap <<= 1;
> +
> +		res = __pci_bridge_wait_for_secondary_bus(dev, start_t, reset_type);
>  	}
>  
> -	return pci_dev_wait(child, reset_type,
> -			    PCIE_RESET_READY_POLL_MS - delay);
> +	return res;
>  }
>  
>  void pci_reset_secondary_bus(struct pci_dev *dev)
> @@ -5542,10 +5605,8 @@ static void pci_bus_restore_locked(struct pci_bus *bus)
>  
>  	list_for_each_entry(dev, &bus->devices, bus_list) {
>  		pci_dev_restore(dev);
> -		if (dev->subordinate) {
> -			pci_bridge_wait_for_secondary_bus(dev, "bus reset");
> +		if (dev->subordinate)
>  			pci_bus_restore_locked(dev->subordinate);
> -		}
>  	}
>  }
>  
> @@ -5575,14 +5636,14 @@ static void pci_slot_restore_locked(struct pci_slot *slot)
>  {
>  	struct pci_dev *dev;
>  
> +	pci_bridge_wait_for_secondary_bus(slot->bus->self, "slot reset");
> +
>  	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
>  		if (!dev->slot || dev->slot != slot)
>  			continue;
>  		pci_dev_restore(dev);
> -		if (dev->subordinate) {
> -			pci_bridge_wait_for_secondary_bus(dev, "slot reset");
> +		if (dev->subordinate)
>  			pci_bus_restore_locked(dev->subordinate);
> -		}
>  	}
>  }
>  
> 


  reply	other threads:[~2025-12-01  9:24 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-24 10:45 [PATCH] PCI: Fix PCIe SBR dev/link wait error Guanghui Feng
2025-11-24 23:58 ` Bjorn Helgaas
2025-11-25  6:20   ` guanghui.fgh
2025-12-01 10:03     ` Lukas Wunner
2025-12-01 12:56       ` guanghuifeng
2025-12-01 13:26         ` Lukas Wunner
2025-12-01 14:46           ` guanghuifeng
2025-12-01 16:18             ` Lukas Wunner
2025-11-26  8:20 ` Ilpo Järvinen
2025-11-26 12:08   ` guanghui.fgh
2025-11-26 12:37     ` Ilpo Järvinen
2025-11-26 14:22       ` guanghui.fgh
2025-11-26 14:47         ` Ilpo Järvinen
2025-11-29 16:36           ` [PATCH v2] " Guanghui Feng
2025-12-01  9:21             ` Ilpo Järvinen
2025-12-01 12:21               ` guanghuifeng
2025-12-01 13:08                 ` Ilpo Järvinen
2025-12-02  4:32                   ` [PATCH v4 0/1] " Guanghui Feng
2025-12-02  4:32                     ` [PATCH v4 v4 1/1] " Guanghui Feng
2025-12-02 16:49                       ` Bjorn Helgaas
2025-12-02 16:51                         ` Bjorn Helgaas
2025-12-03 14:41                       ` Ilpo Järvinen
2025-11-30  5:17           ` [PATCH v3] " Guanghui Feng
2025-12-01  9:24             ` Ilpo Järvinen [this message]
2025-12-01 12:31               ` guanghuifeng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1086aed0-584b-4ab3-1b84-687e53ddddf9@linux.intel.com \
    --to=ilpo.jarvinen@linux.intel.com \
    --cc=alikernel-developer@linux.alibaba.com \
    --cc=bhelgaas@google.com \
    --cc=guanghuifeng@linux.alibaba.com \
    --cc=linux-pci@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox