From: Guanghui Feng <guanghuifeng@linux.alibaba.com>
To: bhelgaas@google.com
Cc: linux-pci@vger.kernel.org, kanie@linux.alibaba.com,
alikernel-developer@linux.alibaba.com
Subject: [PATCH] PCI: Fix PCIe SBR dev/link wait error
Date: Mon, 24 Nov 2025 18:45:02 +0800 [thread overview]
Message-ID: <20251124104502.777141-1-guanghuifeng@linux.alibaba.com> (raw)
When executing a PCIe secondary bus reset, all downstream switches and
endpoints will generate reset events. Simultaneously, all PCIe links
will undergo retraining, and each link will independently re-execute the
LTSSM state machine training. Therefore, after executing the SBR, it is
necessary to wait for all downstream links and devices to complete
recovery. Otherwise, after the SBR returns, accessing devices with some
links or endpoints not yet fully recovered may result in driver errors,
or even trigger device offline issues.
Signed-off-by: Guanghui Feng <guanghuifeng@linux.alibaba.com>
Signed-off-by: Guixin Liu <kanie@linux.alibaba.com>
---
drivers/pci/pci.c | 112 ++++++++++++++++++++++++++++++++++++++--------
1 file changed, 94 insertions(+), 18 deletions(-)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b14dd064006c..9cf13fe69d94 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4788,9 +4788,74 @@ static int pci_bus_max_d3cold_delay(const struct pci_bus *bus)
return max(min_delay, max_delay);
}
+int __pci_bridge_wait_for_secondary_bus(struct pci_dev *, unsigned long, char *);
+
+static int pci_dev_wait_child(struct pci_dev *pdev, unsigned long start_t, int timeout,
+ char *reset_type)
+{
+ struct pci_dev *child, **dev = NULL;
+ int ct = 0, i = 0, ret = 0, left = 1;
+ unsigned long dev_start_t;
+
+ down_read(&pci_bus_sem);
+
+ list_for_each_entry(child, &pdev->subordinate->devices, bus_list)
+ ct++;
+
+ if (ct) {
+ dev = kzalloc(sizeof(struct pci_dev *) * ct, GFP_KERNEL);
+
+ if(!dev) {
+ pci_err(pdev, "dev mem alloc err\n");
+ up_read(&pci_bus_sem);
+ return -ENOMEM;
+ }
+
+ list_for_each_entry(child, &pdev->subordinate->devices, bus_list)
+ dev[i++] = pci_dev_get(child);
+ }
+
+ up_read(&pci_bus_sem);
+
+ for (i = 0; i < ct; ++i) {
+ left = 1;
+
+dev_wait:
+ dev_start_t = jiffies;
+ ret = pci_dev_wait(dev[i], reset_type, left);
+ timeout -= jiffies_to_msecs(jiffies - dev_start_t);
+
+ if (ret) {
+ if (pci_dev_is_disconnected(dev[i]))
+ continue;
+
+ if (timeout <= 0)
+ goto end;
+
+ left <<= 1;
+ left = timeout > left ? left : timeout;
+ goto dev_wait;
+ }
+ }
+
+ for (i = 0; i < ct; ++i) {
+ ret = __pci_bridge_wait_for_secondary_bus(dev[i], start_t, reset_type);
+ if (ret)
+ break;
+ }
+
+end:
+ for (i = 0; i < ct; ++i)
+ pci_dev_put(dev[i]);
+
+ kfree(dev);
+ return ret;
+}
+
/**
- * pci_bridge_wait_for_secondary_bus - Wait for secondary bus to be accessible
+ * __pci_bridge_wait_for_secondary_bus - Wait for secondary bus to be accessible
* @dev: PCI bridge
+ * @start_t: wait start jiffies time
* @reset_type: reset type in human-readable form
*
* Handle necessary delays before access to the devices on the secondary
@@ -4804,10 +4869,9 @@ static int pci_bus_max_d3cold_delay(const struct pci_bus *bus)
* Return 0 on success or -ENOTTY if the first device on the secondary bus
* failed to become accessible.
*/
-int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
+int __pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, unsigned long start_t, char *reset_type)
{
- struct pci_dev *child __free(pci_dev_put) = NULL;
- int delay;
+ int delay, left;
if (pci_dev_is_disconnected(dev))
return 0;
@@ -4835,8 +4899,6 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
return 0;
}
- child = pci_dev_get(list_first_entry(&dev->subordinate->devices,
- struct pci_dev, bus_list));
up_read(&pci_bus_sem);
/*
@@ -4844,8 +4906,12 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
* accessing the device after reset (that is 1000 ms + 100 ms).
*/
if (!pci_is_pcie(dev)) {
- pci_dbg(dev, "waiting %d ms for secondary bus\n", 1000 + delay);
- msleep(1000 + delay);
+ left = 1000 + delay - jiffies_to_msecs(jiffies - start_t);
+ pci_dbg(dev, "waiting %d ms for secondary bus\n", left > 0 ? left : 0);
+
+ if (left > 0)
+ msleep(left);
+
return 0;
}
@@ -4870,10 +4936,14 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
if (pcie_get_speed_cap(dev) <= PCIE_SPEED_5_0GT) {
u16 status;
- pci_dbg(dev, "waiting %d ms for downstream link\n", delay);
- msleep(delay);
+ left = delay - jiffies_to_msecs(jiffies - start_t);
+ pci_dbg(dev, "waiting %d ms for downstream link\n", left > 0 ? left : 0);
+
+ if (left > 0)
+ msleep(left);
- if (!pci_dev_wait(child, reset_type, PCI_RESET_WAIT - delay))
+ left = PCI_RESET_WAIT - jiffies_to_msecs(jiffies - start_t);
+ if(!pci_dev_wait_child(dev, start_t, left > 0 ? left : 0, reset_type))
return 0;
/*
@@ -4888,20 +4958,26 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
if (!(status & PCI_EXP_LNKSTA_DLLLA))
return -ENOTTY;
- return pci_dev_wait(child, reset_type,
- PCIE_RESET_READY_POLL_MS - PCI_RESET_WAIT);
+ left = PCIE_RESET_READY_POLL_MS - jiffies_to_msecs(jiffies - start_t);
+ return pci_dev_wait_child(dev, start_t, left > 0 ? left : 0, reset_type);
}
- pci_dbg(dev, "waiting %d ms for downstream link, after activation\n",
- delay);
- if (!pcie_wait_for_link_delay(dev, true, delay)) {
+ left = delay - jiffies_to_msecs(jiffies - start_t);
+ pci_dbg(dev, "waiting %d ms for downstream link, after activation\n", left > 0 ? left : 0);
+
+ if (!pcie_wait_for_link_delay(dev, true, left > 0 ? left : 0)) {
/* Did not train, no need to wait any further */
pci_info(dev, "Data Link Layer Link Active not set in %d msec\n", delay);
return -ENOTTY;
}
- return pci_dev_wait(child, reset_type,
- PCIE_RESET_READY_POLL_MS - delay);
+ left = PCIE_RESET_READY_POLL_MS - jiffies_to_msecs(jiffies - start_t);
+ return pci_dev_wait_child(dev, start_t, left > 0 ? left : 0, reset_type);
+}
+
+int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
+{
+ return __pci_bridge_wait_for_secondary_bus(dev, jiffies, reset_type);
}
void pci_reset_secondary_bus(struct pci_dev *dev)
--
2.32.0.3.gf3a3e56d6
next reply other threads:[~2025-11-24 10:45 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-24 10:45 Guanghui Feng [this message]
2025-11-24 23:58 ` [PATCH] PCI: Fix PCIe SBR dev/link wait error Bjorn Helgaas
2025-11-25 6:20 ` guanghui.fgh
2025-12-01 10:03 ` Lukas Wunner
2025-12-01 12:56 ` guanghuifeng
2025-12-01 13:26 ` Lukas Wunner
2025-12-01 14:46 ` guanghuifeng
2025-12-01 16:18 ` Lukas Wunner
2025-11-26 8:20 ` Ilpo Järvinen
2025-11-26 12:08 ` guanghui.fgh
2025-11-26 12:37 ` Ilpo Järvinen
2025-11-26 14:22 ` guanghui.fgh
2025-11-26 14:47 ` Ilpo Järvinen
2025-11-29 16:36 ` [PATCH v2] " Guanghui Feng
2025-12-01 9:21 ` Ilpo Järvinen
2025-12-01 12:21 ` guanghuifeng
2025-12-01 13:08 ` Ilpo Järvinen
2025-12-02 4:32 ` [PATCH v4 0/1] " Guanghui Feng
2025-12-02 4:32 ` [PATCH v4 v4 1/1] " Guanghui Feng
2025-12-02 16:49 ` Bjorn Helgaas
2025-12-02 16:51 ` Bjorn Helgaas
2025-12-03 14:41 ` Ilpo Järvinen
2025-11-30 5:17 ` [PATCH v3] " Guanghui Feng
2025-12-01 9:24 ` Ilpo Järvinen
2025-12-01 12:31 ` guanghuifeng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251124104502.777141-1-guanghuifeng@linux.alibaba.com \
--to=guanghuifeng@linux.alibaba.com \
--cc=alikernel-developer@linux.alibaba.com \
--cc=bhelgaas@google.com \
--cc=kanie@linux.alibaba.com \
--cc=linux-pci@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox