public inbox for linux-usb@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
@ 2026-01-21  5:27 Chia-Lin Kao (AceLan)
  2026-01-21  5:35 ` AceLan Kao
  2026-01-21  6:01 ` Mika Westerberg
  0 siblings, 2 replies; 26+ messages in thread
From: Chia-Lin Kao (AceLan) @ 2026-01-21  5:27 UTC (permalink / raw)
  To: Andreas Noever, Mika Westerberg, Yehezkel Bernat, linux-usb,
	linux-kernel

PCIe devices behind Thunderbolt tunnels may fail to enumerate when
spurious hotplug events prevent pciehp from detecting link-up.

Root cause:

Spurious unplug events occur immediately after tunnel activation:

  [  932.438] thunderbolt: acking hot unplug event on 702:2
  [  932.852] thunderbolt: PCIe Up path activation complete
  [  932.855] thunderbolt: hotplug event for upstream port 702:2
            (unplug: 0)
  [  932.855] thunderbolt: hotplug event for upstream port 702:2
            (unplug: 1)

These events disrupt pciehp timing, causing device enumeration to fail
~70% of the time on affected hardware. Manual PCI rescan succeeds,
proving devices are present and functional on the bus.

Solution:

Schedule delayed work (300ms) after tunnel activation to:
1. Check if pciehp successfully enumerated devices (device count increased)
2. If not, trigger pci_rescan_bus() to discover devices manually
3. Log results for observability

The delayed work approach is non-blocking and only rescans when actually
needed, avoiding overhead on systems where pciehp works correctly.

Signed-off-by: Chia-Lin Kao (AceLan) <acelan.kao@canonical.com>
---
Logs: https://people.canonical.com/~acelan/bugs/tbt_storage/
merged.out.bad: Plugged-in TBT storage, but eventually fails to enumerate
merged.out.good: Plugged-in TBT storage, and successfully enumerates
merged.out.patched: Plugged-in TBT storage, it should fail without this
                    patch, but it works now
---
 drivers/thunderbolt/tb.c | 95 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)

diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c
index 293fc9f258a5c..1cfc9a265c453 100644
--- a/drivers/thunderbolt/tb.c
+++ b/drivers/thunderbolt/tb.c
@@ -11,6 +11,7 @@
 #include <linux/delay.h>
 #include <linux/pm_runtime.h>
 #include <linux/platform_data/x86/apple.h>
+#include <linux/pci.h>

 #include "tb.h"
 #include "tb_regs.h"
@@ -18,6 +19,7 @@

 #define TB_TIMEOUT		100	/* ms */
 #define TB_RELEASE_BW_TIMEOUT	10000	/* ms */
+#define TB_PCIEHP_ENUMERATION_DELAY 300	/* ms */

 /*
  * How many time bandwidth allocation request from graphics driver is
@@ -83,6 +85,16 @@ struct tb_hotplug_event {
 	int retry;
 };

+/* Delayed work to verify PCIe enumeration after tunnel activation */
+struct tb_pci_rescan_work {
+	struct delayed_work work;
+	struct tb *tb;
+	struct pci_bus *bus;
+	int devices_before;
+	u64 route;
+	u8 port;
+};
+
 static void tb_scan_port(struct tb_port *port);
 static void tb_handle_hotplug(struct work_struct *work);
 static void tb_dp_resource_unavailable(struct tb *tb, struct tb_port *port,
@@ -90,6 +102,60 @@ static void tb_dp_resource_unavailable(struct tb *tb, struct tb_port *port,
 static void tb_queue_dp_bandwidth_request(struct tb *tb, u64 route, u8 port,
 					  int retry, unsigned long delay);

+static void tb_pci_rescan_work_fn(struct work_struct *work)
+{
+	struct tb_pci_rescan_work *rescan_work =
+		container_of(work, typeof(*rescan_work), work.work);
+	struct tb *tb = rescan_work->tb;
+	struct pci_bus *bus = rescan_work->bus;
+	int devices_after = 0;
+	struct pci_dev *dev;
+	struct tb_switch *sw;
+	struct tb_port *port;
+
+	mutex_lock(&tb->lock);
+
+	sw = tb_switch_find_by_route(tb, rescan_work->route);
+	if (!sw) {
+		tb_dbg(tb, "Switch at route %llx disappeared, skipping rescan\n",
+		       rescan_work->route);
+		goto out_unlock;
+	}
+
+	port = &sw->ports[rescan_work->port];
+
+	pci_lock_rescan_remove();
+	for_each_pci_dev(dev)
+		devices_after++;
+	pci_unlock_rescan_remove();
+
+	if (devices_after > rescan_work->devices_before) {
+		tb_port_dbg(port, "pciehp enumerated %d new device(s)\n",
+			    devices_after - rescan_work->devices_before);
+	} else {
+		tb_port_info(port, "pciehp failed to enumerate devices, triggering rescan\n");
+
+		pci_lock_rescan_remove();
+		pci_rescan_bus(bus);
+
+		devices_after = 0;
+		for_each_pci_dev(dev)
+			devices_after++;
+		pci_unlock_rescan_remove();
+
+		if (devices_after > rescan_work->devices_before)
+			tb_port_info(port, "rescan found %d new device(s)\n",
+				     devices_after - rescan_work->devices_before);
+		else
+			tb_port_warn(port, "no devices found even after rescan\n");
+	}
+
+	tb_switch_put(sw);
+out_unlock:
+	mutex_unlock(&tb->lock);
+	kfree(rescan_work);
+}
+
 static void tb_queue_hotplug(struct tb *tb, u64 route, u8 port, bool unplug)
 {
 	struct tb_hotplug_event *ev;
@@ -2400,6 +2466,35 @@ static int tb_tunnel_pci(struct tb *tb, struct tb_switch *sw)
 		tb_sw_warn(sw, "failed to connect xHCI\n");

 	list_add_tail(&tunnel->list, &tcm->tunnel_list);
+
+	/* Verify pciehp enumeration; trigger rescan if needed */
+	if (tb->nhi && tb->nhi->pdev && tb->nhi->pdev->bus) {
+		struct pci_bus *bus = tb->nhi->pdev->bus;
+		struct pci_bus *scan_bus = bus->parent ? bus->parent : bus;
+		struct tb_pci_rescan_work *rescan_work;
+		struct pci_dev *dev;
+		int devices_before = 0;
+
+		pci_lock_rescan_remove();
+		for_each_pci_dev(dev)
+			devices_before++;
+		pci_unlock_rescan_remove();
+
+		rescan_work = kmalloc_obj(rescan_work, GFP_KERNEL);
+		if (!rescan_work)
+			return 0;
+
+		rescan_work->tb = tb;
+		rescan_work->bus = scan_bus;
+		rescan_work->devices_before = devices_before;
+		rescan_work->route = tb_route(sw);
+		rescan_work->port = up->port;
+
+		INIT_DELAYED_WORK(&rescan_work->work, tb_pci_rescan_work_fn);
+		queue_delayed_work(tb->wq, &rescan_work->work,
+				   msecs_to_jiffies(TB_PCIEHP_ENUMERATION_DELAY));
+	}
+
 	return 0;
 }

--
2.51.0


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-01-21  5:27 [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan Chia-Lin Kao (AceLan)
@ 2026-01-21  5:35 ` AceLan Kao
  2026-01-21  6:01 ` Mika Westerberg
  1 sibling, 0 replies; 26+ messages in thread
From: AceLan Kao @ 2026-01-21  5:35 UTC (permalink / raw)
  To: Andreas Noever, Mika Westerberg, Yehezkel Bernat, linux-usb,
	linux-kernel

Chia-Lin Kao (AceLan) <acelan.kao@canonical.com> 於 2026年1月21日週三 下午1:27寫道:
>
> PCIe devices behind Thunderbolt tunnels may fail to enumerate when
> spurious hotplug events prevent pciehp from detecting link-up.
>
> Root cause:
>
> Spurious unplug events occur immediately after tunnel activation:
>
>   [  932.438] thunderbolt: acking hot unplug event on 702:2
>   [  932.852] thunderbolt: PCIe Up path activation complete
>   [  932.855] thunderbolt: hotplug event for upstream port 702:2
>             (unplug: 0)
>   [  932.855] thunderbolt: hotplug event for upstream port 702:2
>             (unplug: 1)
>
> These events disrupt pciehp timing, causing device enumeration to fail
> ~70% of the time on affected hardware. Manual PCI rescan succeeds,
> proving devices are present and functional on the bus.
>
> Solution:
>
> Schedule delayed work (300ms) after tunnel activation to:
> 1. Check if pciehp successfully enumerated devices (device count increased)
> 2. If not, trigger pci_rescan_bus() to discover devices manually
> 3. Log results for observability
>
> The delayed work approach is non-blocking and only rescans when actually
> needed, avoiding overhead on systems where pciehp works correctly.
>
> Signed-off-by: Chia-Lin Kao (AceLan) <acelan.kao@canonical.com>
> ---
> Logs: https://people.canonical.com/~acelan/bugs/tbt_storage/
> merged.out.bad: Plugged-in TBT storage, but eventually fails to enumerate
> merged.out.good: Plugged-in TBT storage, and successfully enumerates
> merged.out.patched: Plugged-in TBT storage, it should fail without this
>                     patch, but it works now
> ---
>  drivers/thunderbolt/tb.c | 95 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 95 insertions(+)
>
> diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c
> index 293fc9f258a5c..1cfc9a265c453 100644
> --- a/drivers/thunderbolt/tb.c
> +++ b/drivers/thunderbolt/tb.c
> @@ -11,6 +11,7 @@
>  #include <linux/delay.h>
>  #include <linux/pm_runtime.h>
>  #include <linux/platform_data/x86/apple.h>
> +#include <linux/pci.h>
>
>  #include "tb.h"
>  #include "tb_regs.h"
> @@ -18,6 +19,7 @@
>
>  #define TB_TIMEOUT             100     /* ms */
>  #define TB_RELEASE_BW_TIMEOUT  10000   /* ms */
> +#define TB_PCIEHP_ENUMERATION_DELAY 300        /* ms */
>
>  /*
>   * How many time bandwidth allocation request from graphics driver is
> @@ -83,6 +85,16 @@ struct tb_hotplug_event {
>         int retry;
>  };
>
> +/* Delayed work to verify PCIe enumeration after tunnel activation */
> +struct tb_pci_rescan_work {
> +       struct delayed_work work;
> +       struct tb *tb;
> +       struct pci_bus *bus;
> +       int devices_before;
> +       u64 route;
> +       u8 port;
> +};
> +
>  static void tb_scan_port(struct tb_port *port);
>  static void tb_handle_hotplug(struct work_struct *work);
>  static void tb_dp_resource_unavailable(struct tb *tb, struct tb_port *port,
> @@ -90,6 +102,60 @@ static void tb_dp_resource_unavailable(struct tb *tb, struct tb_port *port,
>  static void tb_queue_dp_bandwidth_request(struct tb *tb, u64 route, u8 port,
>                                           int retry, unsigned long delay);
>
> +static void tb_pci_rescan_work_fn(struct work_struct *work)
> +{
> +       struct tb_pci_rescan_work *rescan_work =
> +               container_of(work, typeof(*rescan_work), work.work);
> +       struct tb *tb = rescan_work->tb;
> +       struct pci_bus *bus = rescan_work->bus;
> +       int devices_after = 0;
> +       struct pci_dev *dev;
> +       struct tb_switch *sw;
> +       struct tb_port *port;
> +
> +       mutex_lock(&tb->lock);
> +
> +       sw = tb_switch_find_by_route(tb, rescan_work->route);
> +       if (!sw) {
> +               tb_dbg(tb, "Switch at route %llx disappeared, skipping rescan\n",
> +                      rescan_work->route);
> +               goto out_unlock;
> +       }
> +
> +       port = &sw->ports[rescan_work->port];
> +
> +       pci_lock_rescan_remove();
> +       for_each_pci_dev(dev)
> +               devices_after++;
> +       pci_unlock_rescan_remove();
> +
> +       if (devices_after > rescan_work->devices_before) {
> +               tb_port_dbg(port, "pciehp enumerated %d new device(s)\n",
> +                           devices_after - rescan_work->devices_before);
> +       } else {
> +               tb_port_info(port, "pciehp failed to enumerate devices, triggering rescan\n");
> +
> +               pci_lock_rescan_remove();
> +               pci_rescan_bus(bus);
> +
> +               devices_after = 0;
> +               for_each_pci_dev(dev)
> +                       devices_after++;
> +               pci_unlock_rescan_remove();
> +
> +               if (devices_after > rescan_work->devices_before)
> +                       tb_port_info(port, "rescan found %d new device(s)\n",
> +                                    devices_after - rescan_work->devices_before);
> +               else
> +                       tb_port_warn(port, "no devices found even after rescan\n");
> +       }
> +
> +       tb_switch_put(sw);
> +out_unlock:
> +       mutex_unlock(&tb->lock);
> +       kfree(rescan_work);
> +}
> +
>  static void tb_queue_hotplug(struct tb *tb, u64 route, u8 port, bool unplug)
>  {
>         struct tb_hotplug_event *ev;
> @@ -2400,6 +2466,35 @@ static int tb_tunnel_pci(struct tb *tb, struct tb_switch *sw)
>                 tb_sw_warn(sw, "failed to connect xHCI\n");
>
>         list_add_tail(&tunnel->list, &tcm->tunnel_list);
> +
> +       /* Verify pciehp enumeration; trigger rescan if needed */
> +       if (tb->nhi && tb->nhi->pdev && tb->nhi->pdev->bus) {
> +               struct pci_bus *bus = tb->nhi->pdev->bus;
> +               struct pci_bus *scan_bus = bus->parent ? bus->parent : bus;
> +               struct tb_pci_rescan_work *rescan_work;
> +               struct pci_dev *dev;
> +               int devices_before = 0;
> +
> +               pci_lock_rescan_remove();
> +               for_each_pci_dev(dev)
> +                       devices_before++;
> +               pci_unlock_rescan_remove();
> +
> +               rescan_work = kmalloc_obj(rescan_work, GFP_KERNEL);
Sorry, didn't re-check after checkpatch modified it.
kmalloc_obj() is undefined here.
I'll submit v2 later.

> +               if (!rescan_work)
> +                       return 0;
> +
> +               rescan_work->tb = tb;
> +               rescan_work->bus = scan_bus;
> +               rescan_work->devices_before = devices_before;
> +               rescan_work->route = tb_route(sw);
> +               rescan_work->port = up->port;
> +
> +               INIT_DELAYED_WORK(&rescan_work->work, tb_pci_rescan_work_fn);
> +               queue_delayed_work(tb->wq, &rescan_work->work,
> +                                  msecs_to_jiffies(TB_PCIEHP_ENUMERATION_DELAY));
> +       }
> +
>         return 0;
>  }
>
> --
> 2.51.0
>


-- 
Chia-Lin Kao(AceLan)
http://blog.acelan.idv.tw/
E-Mail: acelan.kaoATcanonical.com (s/AT/@/)

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-01-21  5:27 [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan Chia-Lin Kao (AceLan)
  2026-01-21  5:35 ` AceLan Kao
@ 2026-01-21  6:01 ` Mika Westerberg
  2026-01-23  2:04   ` Chia-Lin Kao (AceLan)
  1 sibling, 1 reply; 26+ messages in thread
From: Mika Westerberg @ 2026-01-21  6:01 UTC (permalink / raw)
  To: Chia-Lin Kao (AceLan)
  Cc: Andreas Noever, Mika Westerberg, Yehezkel Bernat, linux-usb,
	linux-kernel

Hi,

On Wed, Jan 21, 2026 at 01:27:44PM +0800, Chia-Lin Kao (AceLan) wrote:
> PCIe devices behind Thunderbolt tunnels may fail to enumerate when
> spurious hotplug events prevent pciehp from detecting link-up.
> 
> Root cause:
> 
> Spurious unplug events occur immediately after tunnel activation:
> 
>   [  932.438] thunderbolt: acking hot unplug event on 702:2
>   [  932.852] thunderbolt: PCIe Up path activation complete
>   [  932.855] thunderbolt: hotplug event for upstream port 702:2
>             (unplug: 0)
>   [  932.855] thunderbolt: hotplug event for upstream port 702:2
>             (unplug: 1)
> 
> These events disrupt pciehp timing, causing device enumeration to fail
> ~70% of the time on affected hardware. Manual PCI rescan succeeds,
> proving devices are present and functional on the bus.

Thanks for the report!

They are likely TB3 xHCI "plug" events or so but they should not affect
anything really.

It may be that there is something in the TB3 compatibility side that we are
not doing which needs to be investigated.

From your merged.out.bad:

CM does xHCI connect here:

[  152.905840] [182] thunderbolt 0000:c7:00.6: 702: xHCI connect request
[  152.906865] [182] thunderbolt 0000:c7:00.6: hotplug event for upstream port 702:2 (unplug: 0)
[  152.906869] [182] thunderbolt 0000:c7:00.6: 2:8: got plug event for connected port, ignoring
[  152.906872] [182] thunderbolt 0000:c7:00.6: hotplug event for upstream port 702:2 (unplug: 1)
[  152.906875] [182] thunderbolt 0000:c7:00.6: 2:8: got unplug event for disconnected port, ignoring

[  192.931373] [49] thunderbolt 0000:c7:00.6: acking hot unplug event on 2:7

Can you comment out call to tb_switch_xhci_connect() and see if that
changes anything?

> Solution:
> 
> Schedule delayed work (300ms) after tunnel activation to:
> 1. Check if pciehp successfully enumerated devices (device count increased)
> 2. If not, trigger pci_rescan_bus() to discover devices manually
> 3. Log results for observability
> 
> The delayed work approach is non-blocking and only rescans when actually
> needed, avoiding overhead on systems where pciehp works correctly.

There is no way we are going to call PCI functions from the tb.c.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-01-21  6:01 ` Mika Westerberg
@ 2026-01-23  2:04   ` Chia-Lin Kao (AceLan)
  2026-01-23 12:01     ` Mika Westerberg
  0 siblings, 1 reply; 26+ messages in thread
From: Chia-Lin Kao (AceLan) @ 2026-01-23  2:04 UTC (permalink / raw)
  To: Mika Westerberg
  Cc: Andreas Noever, Mika Westerberg, Yehezkel Bernat, linux-usb,
	linux-kernel

On Wed, Jan 21, 2026 at 07:01:42AM +0100, Mika Westerberg wrote:
> Hi,
>
> On Wed, Jan 21, 2026 at 01:27:44PM +0800, Chia-Lin Kao (AceLan) wrote:
> > PCIe devices behind Thunderbolt tunnels may fail to enumerate when
> > spurious hotplug events prevent pciehp from detecting link-up.
> >
> > Root cause:
> >
> > Spurious unplug events occur immediately after tunnel activation:
> >
> >   [  932.438] thunderbolt: acking hot unplug event on 702:2
> >   [  932.852] thunderbolt: PCIe Up path activation complete
> >   [  932.855] thunderbolt: hotplug event for upstream port 702:2
> >             (unplug: 0)
> >   [  932.855] thunderbolt: hotplug event for upstream port 702:2
> >             (unplug: 1)
> >
> > These events disrupt pciehp timing, causing device enumeration to fail
> > ~70% of the time on affected hardware. Manual PCI rescan succeeds,
> > proving devices are present and functional on the bus.
>
> Thanks for the report!
>
> They are likely TB3 xHCI "plug" events or so but they should not affect
> anything really.
>
> It may be that there is something in the TB3 compatibility side that we are
> not doing which needs to be investigated.
>
> From your merged.out.bad:
>
> CM does xHCI connect here:
>
> [  152.905840] [182] thunderbolt 0000:c7:00.6: 702: xHCI connect request
> [  152.906865] [182] thunderbolt 0000:c7:00.6: hotplug event for upstream port 702:2 (unplug: 0)
> [  152.906869] [182] thunderbolt 0000:c7:00.6: 2:8: got plug event for connected port, ignoring
> [  152.906872] [182] thunderbolt 0000:c7:00.6: hotplug event for upstream port 702:2 (unplug: 1)
> [  152.906875] [182] thunderbolt 0000:c7:00.6: 2:8: got unplug event for disconnected port, ignoring
>
> [  192.931373] [49] thunderbolt 0000:c7:00.6: acking hot unplug event on 2:7
>
> Can you comment out call to tb_switch_xhci_connect() and see if that
> changes anything?
Here is what I modified, and the problem becomes a little bit complicated.

I did the following steps(1~5) and captured the tbtrace log at step 5.
https://people.canonical.com/~acelan/bugs/tbt_storage/merged.out.remove_tb_switch_xhci_connect.out
1. Plugged one tbt storage on the Dock and connected the dock to the
machine
2. Boot the machine up, and it recognizes the tbt storage
3. Plugged the second tbt storage on the dock, and it also can be
recognized(it was always failed in this step)
4. Unplugged the first and second tbt storage from the dock, and then
re-plugged the first tbt storage on the dock, and it can be recognized
5. Re-plugged the second tbt storage on the dock, and it fails.

(continue doing the following tests)
a. When the issue happens, re-plugging the second tbt storage doesn't
work.
b. Plugged both tbt storages on the dock, and then re-plugged the dock to
the machine, both tbt storages can be recognized.
	b.1 In this case, it works when re-plugging the first or the
	    second tbt storage on the dock(there is always one tbt storage
	    still connected to the dock)
	b.2 Removed both tbt storages from the dock, and then
	    re-plugged them one by one, and the second tbt storage can't be
	    recognized.
c. Plugged one tbt storage on the dock, and then re-connected the dock to
	the machine, the tbt storage can be recognized.
	c.1 Plugged the second tbt storage on the dock, and the second tbt
	    storage can be recognized.
	c.2 Re-plugged the first or the second tbt storage on the
	    dock, both tbt storages can be recognized.
	c.3 Removed both tbt storages from the dock, and then
	    re-plugged them one by one, and the second tbt storage can't be
	    recognized.(same as b.2)

The issue could be reproduced when connecting the second tbt storage
to the dock.
1. Connect the dock to the machine with any tbt storage
2. Or remove all tbt storages from the dock if the dock is connected
3. And then plugged tbt storages one by one, and the second one won't be
   recognized.

rescan finds the missing tbt storage, but it works only one time. Need
to rescan again when re-plugging the first or the second tbt storage.
   echo 1 | sudo tee /sys/bus/pci/rescan

BTW, when the second tbt storage can't be recognized, unplug the first tbt
storage from the dock and the second tbt storage can be recognized.
And then re-plugged the first tbt storage on the dock, it can't be
recognized. The behavior just looks like it's the second tbt storage.

diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c
index 4f5f1dfc0fbf..be7ff82a3846 100644
--- a/drivers/thunderbolt/tb.c
+++ b/drivers/thunderbolt/tb.c
@@ -2503,8 +2503,8 @@ static void tb_handle_hotplug(struct work_struct *work)
        } else if (port->remote) {
                tb_port_dbg(port, "got plug event for connected port, ignoring\n");
        } else if (!port->port && sw->authorized) {
-               tb_sw_dbg(sw, "xHCI connect request\n");
-               tb_switch_xhci_connect(sw);
+               tb_sw_dbg(sw, "DEBUG: Comment out xHCI connect request\n");
+               //tb_switch_xhci_connect(sw);
        } else {
                if (tb_port_is_null(port)) {
                        tb_port_dbg(port, "hotplug: scanning\n");

>
> > Solution:
> >
> > Schedule delayed work (300ms) after tunnel activation to:
> > 1. Check if pciehp successfully enumerated devices (device count increased)
> > 2. If not, trigger pci_rescan_bus() to discover devices manually
> > 3. Log results for observability
> >
> > The delayed work approach is non-blocking and only rescans when actually
> > needed, avoiding overhead on systems where pciehp works correctly.
>
> There is no way we are going to call PCI functions from the tb.c.

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-01-23  2:04   ` Chia-Lin Kao (AceLan)
@ 2026-01-23 12:01     ` Mika Westerberg
       [not found]       ` <aXbTfLUJ-lEfNzgX@acelan-Precision-5480>
  0 siblings, 1 reply; 26+ messages in thread
From: Mika Westerberg @ 2026-01-23 12:01 UTC (permalink / raw)
  To: Chia-Lin Kao (AceLan), Andreas Noever, Mika Westerberg,
	Yehezkel Bernat, linux-usb, linux-kernel

Hi,

On Fri, Jan 23, 2026 at 10:04:11AM +0800, Chia-Lin Kao (AceLan) wrote:
> > Can you comment out call to tb_switch_xhci_connect() and see if that
> > changes anything?
>
> Here is what I modified, and the problem becomes a little bit complicated.

Okay I see it did not change anything (well this is kind of what I
expected). Thanks for trying.

I see in your log that the PCIe tunnel is established just fine. It's just
that there is no PCIe hotplug happening or it is happening but the PCIe
Downstream Port is not waking up.

I figured you have following USB4/TB topology, right?

  AMD Host <-> GR Hub <-> TB3 Hub
                  ^
                  |
                TB3 Hub

What if you run 'lspci' after the issue reproduces? Does that bring the
missing PCIe devices? I suspect that this is due to older TB3 devices that
they may need bit more time to get the PCIe link (going over the tunnel) up
and running.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
       [not found]       ` <aXbTfLUJ-lEfNzgX@acelan-Precision-5480>
@ 2026-01-26  5:42         ` Mika Westerberg
       [not found]           ` <aXcWNw9Qfo5L9WVi@acelan-Precision-5480>
  0 siblings, 1 reply; 26+ messages in thread
From: Mika Westerberg @ 2026-01-26  5:42 UTC (permalink / raw)
  To: Chia-Lin Kao (AceLan), Andreas Noever, Mika Westerberg,
	Yehezkel Bernat, linux-usb, linux-kernel

On Mon, Jan 26, 2026 at 11:30:47AM +0800, Chia-Lin Kao (AceLan) wrote:
> Hi,
> On Fri, Jan 23, 2026 at 01:01:12PM +0100, Mika Westerberg wrote:
> > Hi,
> >
> > On Fri, Jan 23, 2026 at 10:04:11AM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > Can you comment out call to tb_switch_xhci_connect() and see if that
> > > > changes anything?
> > >
> > > Here is what I modified, and the problem becomes a little bit complicated.
> >
> > Okay I see it did not change anything (well this is kind of what I
> > expected). Thanks for trying.
> >
> > I see in your log that the PCIe tunnel is established just fine. It's just
> > that there is no PCIe hotplug happening or it is happening but the PCIe
> > Downstream Port is not waking up.
> >
> > I figured you have following USB4/TB topology, right?
> >
> >   AMD Host <-> GR Hub <-> TB3 Hub
> >                   ^
> >                   |
> >                 TB3 Hub
> Should be more like this
>   AMD Host <-> Dell TB4 Dock <-> OWC Envoy Express (1-502)
>                              \
>                               <-> OWC Envoy Express (1-702)
> or
>   AMD Host (1-0, domain1)
>       |
>       └─ Port 2 ──→ Dell Thunderbolt 4 Dock (1-2)
>                       ├─ Port 5 ──→ OWC Envoy Express (1-502)
>                       └─ Port 7 ──→ OWC Envoy Express (1-702)

Okay so the same ;-)

> > What if you run 'lspci' after the issue reproduces? Does that bring the
> > missing PCIe devices? I suspect that this is due to older TB3 devices that
> > they may need bit more time to get the PCIe link (going over the tunnel) up
> > and running.
> lspci doesn't bring back the missing tbt storage.

Forgot to mention that let it (the whole topology) enter runtime suspend
before you run lspci.

> It's not about the timing issue, you can't get the tbt storage works
> with multiple re-plug. And after rescan, you can always get the tbt storage
> work after replug.
> 
> And reproduce the issue again by unplugging the 2 tbt storages, and
> then plugging them back in one by one. The second one will not be
> recognized. There is a hotplug event, but it just stops somewhere
> in the middle.

You mean the first one always works?

What if you connect them directly to the host, one by one?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
       [not found]           ` <aXcWNw9Qfo5L9WVi@acelan-Precision-5480>
@ 2026-01-26 11:56             ` Mika Westerberg
       [not found]               ` <aXg1eBudRAaCZpmR@acelan-Precision-5480>
  0 siblings, 1 reply; 26+ messages in thread
From: Mika Westerberg @ 2026-01-26 11:56 UTC (permalink / raw)
  To: Chia-Lin Kao (AceLan), Andreas Noever, Mika Westerberg,
	Yehezkel Bernat, linux-usb, linux-kernel

On Mon, Jan 26, 2026 at 03:48:48PM +0800, Chia-Lin Kao (AceLan) wrote:
> On Mon, Jan 26, 2026 at 06:42:31AM +0100, Mika Westerberg wrote:
> > On Mon, Jan 26, 2026 at 11:30:47AM +0800, Chia-Lin Kao (AceLan) wrote:
> > > Hi,
> > > On Fri, Jan 23, 2026 at 01:01:12PM +0100, Mika Westerberg wrote:
> > > > Hi,
> > > >
> > > > On Fri, Jan 23, 2026 at 10:04:11AM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > > > Can you comment out call to tb_switch_xhci_connect() and see if that
> > > > > > changes anything?
> > > > >
> > > > > Here is what I modified, and the problem becomes a little bit complicated.
> > > >
> > > > Okay I see it did not change anything (well this is kind of what I
> > > > expected). Thanks for trying.
> > > >
> > > > I see in your log that the PCIe tunnel is established just fine. It's just
> > > > that there is no PCIe hotplug happening or it is happening but the PCIe
> > > > Downstream Port is not waking up.
> > > >
> > > > I figured you have following USB4/TB topology, right?
> > > >
> > > >   AMD Host <-> GR Hub <-> TB3 Hub
> > > >                   ^
> > > >                   |
> > > >                 TB3 Hub
> > > Should be more like this
> > >   AMD Host <-> Dell TB4 Dock <-> OWC Envoy Express (1-502)
> > >                              \
> > >                               <-> OWC Envoy Express (1-702)
> > > or
> > >   AMD Host (1-0, domain1)
> > >       |
> > >       └─ Port 2 ──→ Dell Thunderbolt 4 Dock (1-2)
> > >                       ├─ Port 5 ──→ OWC Envoy Express (1-502)
> > >                       └─ Port 7 ──→ OWC Envoy Express (1-702)
> >
> > Okay so the same ;-)
> >
> > > > What if you run 'lspci' after the issue reproduces? Does that bring the
> > > > missing PCIe devices? I suspect that this is due to older TB3 devices that
> > > > they may need bit more time to get the PCIe link (going over the tunnel) up
> > > > and running.
> > > lspci doesn't bring back the missing tbt storage.
> >
> > Forgot to mention that let it (the whole topology) enter runtime suspend
> > before you run lspci.
> 
> https://people.canonical.com/~acelan/bugs/tbt_storage/dmesg_lspci.log
> 
> The behavior is strange, the following 3 devices keep entering D3cold and then comes back
> to D0 quickly. So, I'm not sure if the lspci do the actions you want.

Yes. I should have mentioned so the lspci is there exactly to trigger
runtime resume of the topology. I was hoping the PCIe links get
re-established properly then.

Can you do so that you:

1. Plug in the dock.
2. Plug in the other storage to the dock.
3. Block runtime PM from the PCIe Downstream Port that should lead to the
   second storage device PCIe Upstream Port

 # echo on > /sys/bus/pci/devices/DEVICE/power/control

4. Connect the second storage device and enable PCIe tunnel.

Does that make it work each time?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
       [not found]               ` <aXg1eBudRAaCZpmR@acelan-Precision-5480>
@ 2026-01-27  8:45                 ` Mika Westerberg
  2026-01-27 10:17                   ` Mika Westerberg
  0 siblings, 1 reply; 26+ messages in thread
From: Mika Westerberg @ 2026-01-27  8:45 UTC (permalink / raw)
  To: Chia-Lin Kao (AceLan), Andreas Noever, Mika Westerberg,
	Yehezkel Bernat, linux-usb, linux-kernel

On Tue, Jan 27, 2026 at 01:04:20PM +0800, Chia-Lin Kao (AceLan) wrote:
> On Mon, Jan 26, 2026 at 12:56:54PM +0100, Mika Westerberg wrote:
> > On Mon, Jan 26, 2026 at 03:48:48PM +0800, Chia-Lin Kao (AceLan) wrote:
> > > On Mon, Jan 26, 2026 at 06:42:31AM +0100, Mika Westerberg wrote:
> > > > On Mon, Jan 26, 2026 at 11:30:47AM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > > Hi,
> > > > > On Fri, Jan 23, 2026 at 01:01:12PM +0100, Mika Westerberg wrote:
> > > > > > Hi,
> > > > > >
> > > > > > On Fri, Jan 23, 2026 at 10:04:11AM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > > > > > Can you comment out call to tb_switch_xhci_connect() and see if that
> > > > > > > > changes anything?
> > > > > > >
> > > > > > > Here is what I modified, and the problem becomes a little bit complicated.
> > > > > >
> > > > > > Okay I see it did not change anything (well this is kind of what I
> > > > > > expected). Thanks for trying.
> > > > > >
> > > > > > I see in your log that the PCIe tunnel is established just fine. It's just
> > > > > > that there is no PCIe hotplug happening or it is happening but the PCIe
> > > > > > Downstream Port is not waking up.
> > > > > >
> > > > > > I figured you have following USB4/TB topology, right?
> > > > > >
> > > > > >   AMD Host <-> GR Hub <-> TB3 Hub
> > > > > >                   ^
> > > > > >                   |
> > > > > >                 TB3 Hub
> > > > > Should be more like this
> > > > >   AMD Host <-> Dell TB4 Dock <-> OWC Envoy Express (1-502)
> > > > >                              \
> > > > >                               <-> OWC Envoy Express (1-702)
> > > > > or
> > > > >   AMD Host (1-0, domain1)
> > > > >       |
> > > > >       └─ Port 2 ──→ Dell Thunderbolt 4 Dock (1-2)
> > > > >                       ├─ Port 5 ──→ OWC Envoy Express (1-502)
> > > > >                       └─ Port 7 ──→ OWC Envoy Express (1-702)
> > > >
> > > > Okay so the same ;-)
> > > >
> > > > > > What if you run 'lspci' after the issue reproduces? Does that bring the
> > > > > > missing PCIe devices? I suspect that this is due to older TB3 devices that
> > > > > > they may need bit more time to get the PCIe link (going over the tunnel) up
> > > > > > and running.
> > > > > lspci doesn't bring back the missing tbt storage.
> > > >
> > > > Forgot to mention that let it (the whole topology) enter runtime suspend
> > > > before you run lspci.
> > >
> > > https://people.canonical.com/~acelan/bugs/tbt_storage/dmesg_lspci.log
> > >
> > > The behavior is strange, the following 3 devices keep entering D3cold and then comes back
> > > to D0 quickly. So, I'm not sure if the lspci do the actions you want.
> >
> > Yes. I should have mentioned so the lspci is there exactly to trigger
> > runtime resume of the topology. I was hoping the PCIe links get
> > re-established properly then.
> >
> > Can you do so that you:
> >
> > 1. Plug in the dock.
> > 2. Plug in the other storage to the dock.
> > 3. Block runtime PM from the PCIe Downstream Port that should lead to the
> >    second storage device PCIe Upstream Port
> >
> >  # echo on > /sys/bus/pci/devices/DEVICE/power/control
> >
> > 4. Connect the second storage device and enable PCIe tunnel.
> >
> > Does that make it work each time?
> Yes, follow the steps makes it work.
> 
>    echo on | sudo tee /sys/bus/pci/devices/*/*/power/control
> 
> Re-plug the dock, need to disable the runpm again.

But can you just block it from the PCIe Downstream Port that leads to the
"non-working" storage before you enable PCIe tunnel? Not for all the
devices.

(let me know if you want help locating the correct device).

Does it still work?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-01-27  8:45                 ` Mika Westerberg
@ 2026-01-27 10:17                   ` Mika Westerberg
  2026-01-29  5:45                     ` Chia-Lin Kao (AceLan)
  0 siblings, 1 reply; 26+ messages in thread
From: Mika Westerberg @ 2026-01-27 10:17 UTC (permalink / raw)
  To: Chia-Lin Kao (AceLan), Andreas Noever, Mika Westerberg,
	Yehezkel Bernat, linux-usb, linux-kernel, Gil Fine

On Tue, Jan 27, 2026 at 09:45:13AM +0100, Mika Westerberg wrote:
> On Tue, Jan 27, 2026 at 01:04:20PM +0800, Chia-Lin Kao (AceLan) wrote:
> > On Mon, Jan 26, 2026 at 12:56:54PM +0100, Mika Westerberg wrote:
> > > On Mon, Jan 26, 2026 at 03:48:48PM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > On Mon, Jan 26, 2026 at 06:42:31AM +0100, Mika Westerberg wrote:
> > > > > On Mon, Jan 26, 2026 at 11:30:47AM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > > > Hi,
> > > > > > On Fri, Jan 23, 2026 at 01:01:12PM +0100, Mika Westerberg wrote:
> > > > > > > Hi,
> > > > > > >
> > > > > > > On Fri, Jan 23, 2026 at 10:04:11AM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > > > > > > Can you comment out call to tb_switch_xhci_connect() and see if that
> > > > > > > > > changes anything?
> > > > > > > >
> > > > > > > > Here is what I modified, and the problem becomes a little bit complicated.
> > > > > > >
> > > > > > > Okay I see it did not change anything (well this is kind of what I
> > > > > > > expected). Thanks for trying.
> > > > > > >
> > > > > > > I see in your log that the PCIe tunnel is established just fine. It's just
> > > > > > > that there is no PCIe hotplug happening or it is happening but the PCIe
> > > > > > > Downstream Port is not waking up.
> > > > > > >
> > > > > > > I figured you have following USB4/TB topology, right?
> > > > > > >
> > > > > > >   AMD Host <-> GR Hub <-> TB3 Hub
> > > > > > >                   ^
> > > > > > >                   |
> > > > > > >                 TB3 Hub
> > > > > > Should be more like this
> > > > > >   AMD Host <-> Dell TB4 Dock <-> OWC Envoy Express (1-502)
> > > > > >                              \
> > > > > >                               <-> OWC Envoy Express (1-702)
> > > > > > or
> > > > > >   AMD Host (1-0, domain1)
> > > > > >       |
> > > > > >       └─ Port 2 ──→ Dell Thunderbolt 4 Dock (1-2)
> > > > > >                       ├─ Port 5 ──→ OWC Envoy Express (1-502)
> > > > > >                       └─ Port 7 ──→ OWC Envoy Express (1-702)
> > > > >
> > > > > Okay so the same ;-)
> > > > >
> > > > > > > What if you run 'lspci' after the issue reproduces? Does that bring the
> > > > > > > missing PCIe devices? I suspect that this is due to older TB3 devices that
> > > > > > > they may need bit more time to get the PCIe link (going over the tunnel) up
> > > > > > > and running.
> > > > > > lspci doesn't bring back the missing tbt storage.
> > > > >
> > > > > Forgot to mention that let it (the whole topology) enter runtime suspend
> > > > > before you run lspci.
> > > >
> > > > https://people.canonical.com/~acelan/bugs/tbt_storage/dmesg_lspci.log
> > > >
> > > > The behavior is strange, the following 3 devices keep entering D3cold and then comes back
> > > > to D0 quickly. So, I'm not sure if the lspci do the actions you want.
> > >
> > > Yes. I should have mentioned so the lspci is there exactly to trigger
> > > runtime resume of the topology. I was hoping the PCIe links get
> > > re-established properly then.
> > >
> > > Can you do so that you:
> > >
> > > 1. Plug in the dock.
> > > 2. Plug in the other storage to the dock.
> > > 3. Block runtime PM from the PCIe Downstream Port that should lead to the
> > >    second storage device PCIe Upstream Port
> > >
> > >  # echo on > /sys/bus/pci/devices/DEVICE/power/control
> > >
> > > 4. Connect the second storage device and enable PCIe tunnel.
> > >
> > > Does that make it work each time?
> > Yes, follow the steps makes it work.
> > 
> >    echo on | sudo tee /sys/bus/pci/devices/*/*/power/control
> > 
> > Re-plug the dock, need to disable the runpm again.
> 
> But can you just block it from the PCIe Downstream Port that leads to the
> "non-working" storage before you enable PCIe tunnel? Not for all the
> devices.
> 
> (let me know if you want help locating the correct device).
> 
> Does it still work?

(+Gil)

There is also one patch that fixes the driver to follow more closely the CM
guide and that's related to the PCIe tunneling and may actually explain the
issue you see:

  https://lore.kernel.org/linux-usb/20260127094953.GF2275908@black.igk.intel.com/

Note it only does that for USB4 routers so you may need to tune that so
that it skips that check but there is complication because IIRC LTTSM bits
do not match the USB4 ones. One thing to try is to just check the USB4 PCIe
adapter side that it is in detect.

Second thing to try is to disable PCIe ASPM L1. We can do that from CM side
like we do already in tb_switch_pcie_l1_enable(). If possible you can try
so that you disable it from the BIOS (but don't use the pcie_aspm=  command
lne parameter).

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-01-27 10:17                   ` Mika Westerberg
@ 2026-01-29  5:45                     ` Chia-Lin Kao (AceLan)
  2026-01-29  6:50                       ` Mika Westerberg
  0 siblings, 1 reply; 26+ messages in thread
From: Chia-Lin Kao (AceLan) @ 2026-01-29  5:45 UTC (permalink / raw)
  To: Mika Westerberg
  Cc: Andreas Noever, Mika Westerberg, Yehezkel Bernat, linux-usb,
	linux-kernel, Gil Fine

On Tue, Jan 27, 2026 at 11:17:01AM +0100, Mika Westerberg wrote:
> On Tue, Jan 27, 2026 at 09:45:13AM +0100, Mika Westerberg wrote:
> > On Tue, Jan 27, 2026 at 01:04:20PM +0800, Chia-Lin Kao (AceLan) wrote:
> > > On Mon, Jan 26, 2026 at 12:56:54PM +0100, Mika Westerberg wrote:
> > > > On Mon, Jan 26, 2026 at 03:48:48PM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > > On Mon, Jan 26, 2026 at 06:42:31AM +0100, Mika Westerberg wrote:
> > > > > > On Mon, Jan 26, 2026 at 11:30:47AM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > > > > Hi,
> > > > > > > On Fri, Jan 23, 2026 at 01:01:12PM +0100, Mika Westerberg wrote:
> > > > > > > > Hi,
> > > > > > > >
> > > > > > > > On Fri, Jan 23, 2026 at 10:04:11AM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > > > > > > > Can you comment out call to tb_switch_xhci_connect() and see if that
> > > > > > > > > > changes anything?
> > > > > > > > >
> > > > > > > > > Here is what I modified, and the problem becomes a little bit complicated.
> > > > > > > >
> > > > > > > > Okay I see it did not change anything (well this is kind of what I
> > > > > > > > expected). Thanks for trying.
> > > > > > > >
> > > > > > > > I see in your log that the PCIe tunnel is established just fine. It's just
> > > > > > > > that there is no PCIe hotplug happening or it is happening but the PCIe
> > > > > > > > Downstream Port is not waking up.
> > > > > > > >
> > > > > > > > I figured you have following USB4/TB topology, right?
> > > > > > > >
> > > > > > > >   AMD Host <-> GR Hub <-> TB3 Hub
> > > > > > > >                   ^
> > > > > > > >                   |
> > > > > > > >                 TB3 Hub
> > > > > > > Should be more like this
> > > > > > >   AMD Host <-> Dell TB4 Dock <-> OWC Envoy Express (1-502)
> > > > > > >                              \
> > > > > > >                               <-> OWC Envoy Express (1-702)
> > > > > > > or
> > > > > > >   AMD Host (1-0, domain1)
> > > > > > >       |
> > > > > > >       └─ Port 2 ──→ Dell Thunderbolt 4 Dock (1-2)
> > > > > > >                       ├─ Port 5 ──→ OWC Envoy Express (1-502)
> > > > > > >                       └─ Port 7 ──→ OWC Envoy Express (1-702)
> > > > > >
> > > > > > Okay so the same ;-)
> > > > > >
> > > > > > > > What if you run 'lspci' after the issue reproduces? Does that bring the
> > > > > > > > missing PCIe devices? I suspect that this is due to older TB3 devices that
> > > > > > > > they may need bit more time to get the PCIe link (going over the tunnel) up
> > > > > > > > and running.
> > > > > > > lspci doesn't bring back the missing tbt storage.
> > > > > >
> > > > > > Forgot to mention that let it (the whole topology) enter runtime suspend
> > > > > > before you run lspci.
> > > > >
> > > > > https://people.canonical.com/~acelan/bugs/tbt_storage/dmesg_lspci.log
> > > > >
> > > > > The behavior is strange, the following 3 devices keep entering D3cold and then comes back
> > > > > to D0 quickly. So, I'm not sure if the lspci do the actions you want.
> > > >
> > > > Yes. I should have mentioned so the lspci is there exactly to trigger
> > > > runtime resume of the topology. I was hoping the PCIe links get
> > > > re-established properly then.
> > > >
> > > > Can you do so that you:
> > > >
> > > > 1. Plug in the dock.
> > > > 2. Plug in the other storage to the dock.
> > > > 3. Block runtime PM from the PCIe Downstream Port that should lead to the
> > > >    second storage device PCIe Upstream Port
> > > >
> > > >  # echo on > /sys/bus/pci/devices/DEVICE/power/control
> > > >
> > > > 4. Connect the second storage device and enable PCIe tunnel.
> > > >
> > > > Does that make it work each time?
> > > Yes, follow the steps makes it work.
> > >
> > >    echo on | sudo tee /sys/bus/pci/devices/*/*/power/control
> > >
> > > Re-plug the dock, need to disable the runpm again.
> >
> > But can you just block it from the PCIe Downstream Port that leads to the
> > "non-working" storage before you enable PCIe tunnel? Not for all the
> > devices.
> >
> > (let me know if you want help locating the correct device).
> >
> > Does it still work?
Here's the full PCI device chain graph:

    0000:00:01.2 - AMD PCI Root Port
        |
        └─ 0000:61:00.0 - Intel Thunderbolt 4 Bridge [Goshen Ridge 2020]
               |
               └─ 0000:62:02.0 - Intel Thunderbolt 4 Bridge [Goshen Ridge 2020]
                      |
                      └─ 0000:83:00.0 - Intel TBT3 Bridge (Upstream Port) [Alpine Ridge LP]
                             |
                             └─ 0000:84:01.0 - Intel TBT3 Bridge (Downstream Port) [Alpine Ridge LP]
                                    |
                                    └─ 0000:85:00.0 - Sandisk PC SN740 NVMe SSD (nvme2)

When the tbt storage is not recognized, we don't have 83:00.0 and its
downstream port 84:01.0.

$ ls /sys/bus/pci/devices
0000:00:00.0  0000:00:02.1  0000:00:08.1  0000:00:18.1  0000:00:18.7  0000:62:04.0  0000:c3:00.0  0000:c5:00.5  0000:c7:00.4
0000:00:00.2  0000:00:02.3  0000:00:08.2  0000:00:18.2  0000:61:00.0  0000:a2:00.0  0000:c4:00.0  0000:c5:00.7  0000:c7:00.5
0000:00:01.0  0000:00:02.4  0000:00:08.3  0000:00:18.3  0000:62:00.0  0000:a3:01.0  0000:c5:00.0  0000:c6:00.0  0000:c7:00.6
0000:00:01.1  0000:00:02.5  0000:00:14.0  0000:00:18.4  0000:62:01.0  0000:a4:00.0  0000:c5:00.1  0000:c6:00.1
0000:00:01.2  0000:00:03.0  0000:00:14.3  0000:00:18.5  0000:62:02.0  0000:c1:00.0  0000:c5:00.2  0000:c7:00.0
0000:00:02.0  0000:00:08.0  0000:00:18.0  0000:00:18.6  0000:62:03.0  0000:c2:00.0  0000:c5:00.4  0000:c7:00.3

Disable runpm on 62:02.0, then we have 83:00.0 and its downstream port
84:01.0 and 85:00.0, and then the tbt storage is recognized.

$ echo on | sudo tee /sys/bus/pci/devices/0000:62:02.0/power/control
on

$ ls /sys/bus/pci/devices
0000:00:00.0  0000:00:02.1  0000:00:08.1  0000:00:18.1  0000:00:18.7  0000:62:04.0  0000:a4:00.0  0000:c5:00.1  0000:c6:00.1
0000:00:00.2  0000:00:02.3  0000:00:08.2  0000:00:18.2  0000:61:00.0  0000:83:00.0  0000:c1:00.0  0000:c5:00.2  0000:c7:00.0
0000:00:01.0  0000:00:02.4  0000:00:08.3  0000:00:18.3  0000:62:00.0  0000:84:01.0  0000:c2:00.0  0000:c5:00.4  0000:c7:00.3
0000:00:01.1  0000:00:02.5  0000:00:14.0  0000:00:18.4  0000:62:01.0  0000:85:00.0  0000:c3:00.0  0000:c5:00.5  0000:c7:00.4
0000:00:01.2  0000:00:03.0  0000:00:14.3  0000:00:18.5  0000:62:02.0  0000:a2:00.0  0000:c4:00.0  0000:c5:00.7  0000:c7:00.5
0000:00:02.0  0000:00:08.0  0000:00:18.0  0000:00:18.6  0000:62:03.0  0000:a3:01.0  0000:c5:00.0  0000:c6:00.0  0000:c7:00.6

BTW, rescan also workaround the issue

$ echo 1 | sudo tee /sys/bus/pci/devices/0000:62:02.0/rescan
>
> (+Gil)
>
> There is also one patch that fixes the driver to follow more closely the CM
> guide and that's related to the PCIe tunneling and may actually explain the
> issue you see:
>
>   https://lore.kernel.org/linux-usb/20260127094953.GF2275908@black.igk.intel.com/
>
> Note it only does that for USB4 routers so you may need to tune that so
> that it skips that check but there is complication because IIRC LTTSM bits
> do not match the USB4 ones. One thing to try is to just check the USB4 PCIe
> adapter side that it is in detect.
I applied this series directly on top of 6.19-rc6, and the issue
persists.

Here is the modification I tried, but it doesn't work.

diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c
index 51fd05b3e341..0672d3e3c24e 100644
--- a/drivers/thunderbolt/tunnel.c
+++ b/drivers/thunderbolt/tunnel.c
@@ -302,14 +302,38 @@ static int tb_pci_pre_activate(struct tb_tunnel *tunnel)
        struct tb_port *up = tunnel->dst_port;
        int ret;

-       if (!tb_switch_is_usb4(down->sw) || !tb_switch_is_usb4(up->sw))
-               return 0;
+       /*
+        * Try checking LTSSM state for both USB4 and TBT3 devices.
+        * Check at least the USB4 side if only one side is USB4.
+        */
+       tb_port_dbg(down, "PCIe downstream port: USB4=%d\n", tb_switch_is_usb4(down->sw));
+       tb_port_dbg(up, "PCIe upstream port: USB4=%d\n", tb_switch_is_usb4(up->sw));

-       ret = usb4_pci_port_check_ltssm_state(down, TB_PCIE_LTSSM_DETECT);
-       if (ret)
-               return ret;
+       if (tb_switch_is_usb4(down->sw)) {
+               tb_port_dbg(down, "Checking PCIe downstream LTSSM state\n");
+               ret = usb4_pci_port_check_ltssm_state(down, TB_PCIE_LTSSM_DETECT);
+               if (ret) {
+                       tb_port_warn(down, "PCIe adapter not in detect state: %d\n", ret);
+                       return ret;
+               }
+               tb_port_dbg(down, "PCIe downstream adapter in detect state\n");
+       } else {
+               tb_port_dbg(down, "Skipping LTSSM check (not USB4)\n");
+       }
+
+       if (tb_switch_is_usb4(up->sw)) {
+               tb_port_dbg(up, "Checking PCIe upstream LTSSM state\n");
+               ret = usb4_pci_port_check_ltssm_state(up, TB_PCIE_LTSSM_DETECT);
+               if (ret) {
+                       tb_port_warn(up, "PCIe adapter not in detect state: %d\n", ret);
+                       return ret;
+               }
+               tb_port_dbg(up, "PCIe upstream adapter in detect state\n");
+       } else {
+               tb_port_dbg(up, "Skipping LTSSM check (not USB4)\n");
+       }

-       return usb4_pci_port_check_ltssm_state(up, TB_PCIE_LTSSM_DETECT);
+       return 0;
 }

 static int tb_pci_set_ext_encapsulation(struct tb_tunnel *tunnel, bool enable)

$ sudo dmesg | egrep "PCIe|USB4"

Plug the first tbt storage,

[  460.465644] [1668] thunderbolt 0000:c7:00.6:  Port 4: 8086:15c0 (Revision: 1, TB Version: 1, Type: PCIe (0x100102))
[  460.891208] [3953] thunderbolt 0000:c7:00.6: 2:12: PCIe downstream port: USB4=1
[  460.891210] [3953] thunderbolt 0000:c7:00.6: 702:4: PCIe upstream port: USB4=0
[  460.891212] [3953] thunderbolt 0000:c7:00.6: 2:12: Checking PCIe downstream LTSSM state
[  460.891327] [3953] thunderbolt 0000:c7:00.6: 2:12: PCIe downstream adapter in detect state
[  460.891328] [3953] thunderbolt 0000:c7:00.6: 702:4: Skipping LTSSM check (not USB4)
[  460.891329] [3953] thunderbolt 0000:c7:00.6: activating PCIe Down path from 2:12 to 702:4
[  460.891849] [3953] thunderbolt 0000:c7:00.6: PCIe Down path activation complete
[  460.891850] [3953] thunderbolt 0000:c7:00.6: activating PCIe Up path from 702:4 to 2:12
[  460.892375] [3953] thunderbolt 0000:c7:00.6: PCIe Up path activation complete
[  461.018893] pci 0000:a2:00.0: [8086:15c0] type 01 class 0x060400 PCIe Switch Upstream Port
[  461.019746] pci 0000:a2:00.0: 2.000 Gb/s available PCIe bandwidth, limited by 2.5 GT/s PCIe x1 link at 0000:00:01.2 (capable of 8.000 Gb/s with 2.5 GT/s PCIe x4 link)
[  461.020620] pci 0000:a3:01.0: [8086:15c0] type 01 class 0x060400 PCIe Switch Downstream Port
[  461.021846] pci 0000:a4:00.0: [144d:a809] type 00 class 0x010802 PCIe Endpoint
[  461.022555] pci 0000:a4:00.0: 2.000 Gb/s available PCIe bandwidth, limited by 2.5 GT/s PCIe x1 link at 0000:00:01.2 (capable of 31.504 Gb/s with 8.0 GT/s PCIe x4 link)

And then the second tbt storage,

[  472.025559] [1668] thunderbolt 0000:c7:00.6:  Port 4: 8086:15c0 (Revision: 1, TB Version: 1, Type: PCIe (0x100102))
[  472.451726] [3953] thunderbolt 0000:c7:00.6: 2:11: PCIe downstream port: USB4=1
[  472.451728] [3953] thunderbolt 0000:c7:00.6: 502:4: PCIe upstream port: USB4=0
[  472.451729] [3953] thunderbolt 0000:c7:00.6: 2:11: Checking PCIe downstream LTSSM state
[  472.451851] [3953] thunderbolt 0000:c7:00.6: 2:11: PCIe downstream adapter in detect state
[  472.451852] [3953] thunderbolt 0000:c7:00.6: 502:4: Skipping LTSSM check (not USB4)
[  472.451853] [3953] thunderbolt 0000:c7:00.6: activating PCIe Down path from 2:11 to 502:4
[  472.452373] [3953] thunderbolt 0000:c7:00.6: PCIe Down path activation complete
[  472.452374] [3953] thunderbolt 0000:c7:00.6: activating PCIe Up path from 502:4 to 2:11
[  472.452893] [3953] thunderbolt 0000:c7:00.6: PCIe Up path activation complete

My issue should be happening after the PCIe tunnel is activated.

>
> Second thing to try is to disable PCIe ASPM L1. We can do that from CM side
> like we do already in tb_switch_pcie_l1_enable(). If possible you can try
> so that you disable it from the BIOS (but don't use the pcie_aspm=  command
> lne parameter).
I can't disable PCIe ASPM L1 from the driver, and there is no option in
the BIOS to switch it off.

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-01-29  5:45                     ` Chia-Lin Kao (AceLan)
@ 2026-01-29  6:50                       ` Mika Westerberg
  2026-02-03  9:04                         ` Jayi Li
  2026-02-12  4:16                         ` AceLan Kao
  0 siblings, 2 replies; 26+ messages in thread
From: Mika Westerberg @ 2026-01-29  6:50 UTC (permalink / raw)
  To: Chia-Lin Kao (AceLan), Andreas Noever, Mika Westerberg,
	Yehezkel Bernat, linux-usb, linux-kernel, Gil Fine

On Thu, Jan 29, 2026 at 01:45:51PM +0800, Chia-Lin Kao (AceLan) wrote:
> On Tue, Jan 27, 2026 at 11:17:01AM +0100, Mika Westerberg wrote:
> > On Tue, Jan 27, 2026 at 09:45:13AM +0100, Mika Westerberg wrote:
> > > On Tue, Jan 27, 2026 at 01:04:20PM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > On Mon, Jan 26, 2026 at 12:56:54PM +0100, Mika Westerberg wrote:
> > > > > On Mon, Jan 26, 2026 at 03:48:48PM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > > > On Mon, Jan 26, 2026 at 06:42:31AM +0100, Mika Westerberg wrote:
> > > > > > > On Mon, Jan 26, 2026 at 11:30:47AM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > > > > > Hi,
> > > > > > > > On Fri, Jan 23, 2026 at 01:01:12PM +0100, Mika Westerberg wrote:
> > > > > > > > > Hi,
> > > > > > > > >
> > > > > > > > > On Fri, Jan 23, 2026 at 10:04:11AM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > > > > > > > > Can you comment out call to tb_switch_xhci_connect() and see if that
> > > > > > > > > > > changes anything?
> > > > > > > > > >
> > > > > > > > > > Here is what I modified, and the problem becomes a little bit complicated.
> > > > > > > > >
> > > > > > > > > Okay I see it did not change anything (well this is kind of what I
> > > > > > > > > expected). Thanks for trying.
> > > > > > > > >
> > > > > > > > > I see in your log that the PCIe tunnel is established just fine. It's just
> > > > > > > > > that there is no PCIe hotplug happening or it is happening but the PCIe
> > > > > > > > > Downstream Port is not waking up.
> > > > > > > > >
> > > > > > > > > I figured you have following USB4/TB topology, right?
> > > > > > > > >
> > > > > > > > >   AMD Host <-> GR Hub <-> TB3 Hub
> > > > > > > > >                   ^
> > > > > > > > >                   |
> > > > > > > > >                 TB3 Hub
> > > > > > > > Should be more like this
> > > > > > > >   AMD Host <-> Dell TB4 Dock <-> OWC Envoy Express (1-502)
> > > > > > > >                              \
> > > > > > > >                               <-> OWC Envoy Express (1-702)
> > > > > > > > or
> > > > > > > >   AMD Host (1-0, domain1)
> > > > > > > >       |
> > > > > > > >       └─ Port 2 ──→ Dell Thunderbolt 4 Dock (1-2)
> > > > > > > >                       ├─ Port 5 ──→ OWC Envoy Express (1-502)
> > > > > > > >                       └─ Port 7 ──→ OWC Envoy Express (1-702)
> > > > > > >
> > > > > > > Okay so the same ;-)
> > > > > > >
> > > > > > > > > What if you run 'lspci' after the issue reproduces? Does that bring the
> > > > > > > > > missing PCIe devices? I suspect that this is due to older TB3 devices that
> > > > > > > > > they may need bit more time to get the PCIe link (going over the tunnel) up
> > > > > > > > > and running.
> > > > > > > > lspci doesn't bring back the missing tbt storage.
> > > > > > >
> > > > > > > Forgot to mention that let it (the whole topology) enter runtime suspend
> > > > > > > before you run lspci.
> > > > > >
> > > > > > https://people.canonical.com/~acelan/bugs/tbt_storage/dmesg_lspci.log
> > > > > >
> > > > > > The behavior is strange, the following 3 devices keep entering D3cold and then comes back
> > > > > > to D0 quickly. So, I'm not sure if the lspci do the actions you want.
> > > > >
> > > > > Yes. I should have mentioned so the lspci is there exactly to trigger
> > > > > runtime resume of the topology. I was hoping the PCIe links get
> > > > > re-established properly then.
> > > > >
> > > > > Can you do so that you:
> > > > >
> > > > > 1. Plug in the dock.
> > > > > 2. Plug in the other storage to the dock.
> > > > > 3. Block runtime PM from the PCIe Downstream Port that should lead to the
> > > > >    second storage device PCIe Upstream Port
> > > > >
> > > > >  # echo on > /sys/bus/pci/devices/DEVICE/power/control
> > > > >
> > > > > 4. Connect the second storage device and enable PCIe tunnel.
> > > > >
> > > > > Does that make it work each time?
> > > > Yes, follow the steps makes it work.
> > > >
> > > >    echo on | sudo tee /sys/bus/pci/devices/*/*/power/control
> > > >
> > > > Re-plug the dock, need to disable the runpm again.
> > >
> > > But can you just block it from the PCIe Downstream Port that leads to the
> > > "non-working" storage before you enable PCIe tunnel? Not for all the
> > > devices.
> > >
> > > (let me know if you want help locating the correct device).
> > >
> > > Does it still work?
> Here's the full PCI device chain graph:
> 
>     0000:00:01.2 - AMD PCI Root Port
>         |
>         └─ 0000:61:00.0 - Intel Thunderbolt 4 Bridge [Goshen Ridge 2020]
>                |
>                └─ 0000:62:02.0 - Intel Thunderbolt 4 Bridge [Goshen Ridge 2020]
>                       |
>                       └─ 0000:83:00.0 - Intel TBT3 Bridge (Upstream Port) [Alpine Ridge LP]
>                              |
>                              └─ 0000:84:01.0 - Intel TBT3 Bridge (Downstream Port) [Alpine Ridge LP]
>                                     |
>                                     └─ 0000:85:00.0 - Sandisk PC SN740 NVMe SSD (nvme2)
> 
> When the tbt storage is not recognized, we don't have 83:00.0 and its
> downstream port 84:01.0.
> 
> $ ls /sys/bus/pci/devices
> 0000:00:00.0  0000:00:02.1  0000:00:08.1  0000:00:18.1  0000:00:18.7  0000:62:04.0  0000:c3:00.0  0000:c5:00.5  0000:c7:00.4
> 0000:00:00.2  0000:00:02.3  0000:00:08.2  0000:00:18.2  0000:61:00.0  0000:a2:00.0  0000:c4:00.0  0000:c5:00.7  0000:c7:00.5
> 0000:00:01.0  0000:00:02.4  0000:00:08.3  0000:00:18.3  0000:62:00.0  0000:a3:01.0  0000:c5:00.0  0000:c6:00.0  0000:c7:00.6
> 0000:00:01.1  0000:00:02.5  0000:00:14.0  0000:00:18.4  0000:62:01.0  0000:a4:00.0  0000:c5:00.1  0000:c6:00.1
> 0000:00:01.2  0000:00:03.0  0000:00:14.3  0000:00:18.5  0000:62:02.0  0000:c1:00.0  0000:c5:00.2  0000:c7:00.0
> 0000:00:02.0  0000:00:08.0  0000:00:18.0  0000:00:18.6  0000:62:03.0  0000:c2:00.0  0000:c5:00.4  0000:c7:00.3
> 
> Disable runpm on 62:02.0, then we have 83:00.0 and its downstream port
> 84:01.0 and 85:00.0, and then the tbt storage is recognized.

Okay that means there is nothing wrong with the PCIe tunnel itself it's
just that the PCIe side either does not get the PME or does not see that
the PCIe link becomes active (e.g the PCIe Downstream Port runtime suspends
itself before the link status changes).

PME work so that there is wake first on Intel it's GPE that wakes up the
root port and then PCIe stack wakes up devices and then the PME message is
sent to the root complex.

If you do this on Intel host do you see the same?

> 
> $ echo on | sudo tee /sys/bus/pci/devices/0000:62:02.0/power/control
> on
> 
> $ ls /sys/bus/pci/devices
> 0000:00:00.0  0000:00:02.1  0000:00:08.1  0000:00:18.1  0000:00:18.7  0000:62:04.0  0000:a4:00.0  0000:c5:00.1  0000:c6:00.1
> 0000:00:00.2  0000:00:02.3  0000:00:08.2  0000:00:18.2  0000:61:00.0  0000:83:00.0  0000:c1:00.0  0000:c5:00.2  0000:c7:00.0
> 0000:00:01.0  0000:00:02.4  0000:00:08.3  0000:00:18.3  0000:62:00.0  0000:84:01.0  0000:c2:00.0  0000:c5:00.4  0000:c7:00.3
> 0000:00:01.1  0000:00:02.5  0000:00:14.0  0000:00:18.4  0000:62:01.0  0000:85:00.0  0000:c3:00.0  0000:c5:00.5  0000:c7:00.4
> 0000:00:01.2  0000:00:03.0  0000:00:14.3  0000:00:18.5  0000:62:02.0  0000:a2:00.0  0000:c4:00.0  0000:c5:00.7  0000:c7:00.5
> 0000:00:02.0  0000:00:08.0  0000:00:18.0  0000:00:18.6  0000:62:03.0  0000:a3:01.0  0000:c5:00.0  0000:c6:00.0  0000:c7:00.6
> 
> BTW, rescan also workaround the issue
> 
> $ echo 1 | sudo tee /sys/bus/pci/devices/0000:62:02.0/rescan
> >
> > (+Gil)
> >
> > There is also one patch that fixes the driver to follow more closely the CM
> > guide and that's related to the PCIe tunneling and may actually explain the
> > issue you see:
> >
> >   https://lore.kernel.org/linux-usb/20260127094953.GF2275908@black.igk.intel.com/
> >
> > Note it only does that for USB4 routers so you may need to tune that so
> > that it skips that check but there is complication because IIRC LTTSM bits
> > do not match the USB4 ones. One thing to try is to just check the USB4 PCIe
> > adapter side that it is in detect.
> I applied this series directly on top of 6.19-rc6, and the issue
> persists.
> 
> Here is the modification I tried, but it doesn't work.
> 
> diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c
> index 51fd05b3e341..0672d3e3c24e 100644
> --- a/drivers/thunderbolt/tunnel.c
> +++ b/drivers/thunderbolt/tunnel.c
> @@ -302,14 +302,38 @@ static int tb_pci_pre_activate(struct tb_tunnel *tunnel)
>         struct tb_port *up = tunnel->dst_port;
>         int ret;
> 
> -       if (!tb_switch_is_usb4(down->sw) || !tb_switch_is_usb4(up->sw))
> -               return 0;
> +       /*
> +        * Try checking LTSSM state for both USB4 and TBT3 devices.
> +        * Check at least the USB4 side if only one side is USB4.
> +        */
> +       tb_port_dbg(down, "PCIe downstream port: USB4=%d\n", tb_switch_is_usb4(down->sw));
> +       tb_port_dbg(up, "PCIe upstream port: USB4=%d\n", tb_switch_is_usb4(up->sw));
> 
> -       ret = usb4_pci_port_check_ltssm_state(down, TB_PCIE_LTSSM_DETECT);
> -       if (ret)
> -               return ret;
> +       if (tb_switch_is_usb4(down->sw)) {
> +               tb_port_dbg(down, "Checking PCIe downstream LTSSM state\n");
> +               ret = usb4_pci_port_check_ltssm_state(down, TB_PCIE_LTSSM_DETECT);
> +               if (ret) {
> +                       tb_port_warn(down, "PCIe adapter not in detect state: %d\n", ret);
> +                       return ret;
> +               }
> +               tb_port_dbg(down, "PCIe downstream adapter in detect state\n");
> +       } else {
> +               tb_port_dbg(down, "Skipping LTSSM check (not USB4)\n");
> +       }
> +
> +       if (tb_switch_is_usb4(up->sw)) {
> +               tb_port_dbg(up, "Checking PCIe upstream LTSSM state\n");
> +               ret = usb4_pci_port_check_ltssm_state(up, TB_PCIE_LTSSM_DETECT);
> +               if (ret) {
> +                       tb_port_warn(up, "PCIe adapter not in detect state: %d\n", ret);
> +                       return ret;
> +               }
> +               tb_port_dbg(up, "PCIe upstream adapter in detect state\n");
> +       } else {
> +               tb_port_dbg(up, "Skipping LTSSM check (not USB4)\n");
> +       }
> 
> -       return usb4_pci_port_check_ltssm_state(up, TB_PCIE_LTSSM_DETECT);
> +       return 0;
>  }
> 
>  static int tb_pci_set_ext_encapsulation(struct tb_tunnel *tunnel, bool enable)
> 
> $ sudo dmesg | egrep "PCIe|USB4"
> 
> Plug the first tbt storage,
> 
> [  460.465644] [1668] thunderbolt 0000:c7:00.6:  Port 4: 8086:15c0 (Revision: 1, TB Version: 1, Type: PCIe (0x100102))
> [  460.891208] [3953] thunderbolt 0000:c7:00.6: 2:12: PCIe downstream port: USB4=1
> [  460.891210] [3953] thunderbolt 0000:c7:00.6: 702:4: PCIe upstream port: USB4=0
> [  460.891212] [3953] thunderbolt 0000:c7:00.6: 2:12: Checking PCIe downstream LTSSM state
> [  460.891327] [3953] thunderbolt 0000:c7:00.6: 2:12: PCIe downstream adapter in detect state
> [  460.891328] [3953] thunderbolt 0000:c7:00.6: 702:4: Skipping LTSSM check (not USB4)
> [  460.891329] [3953] thunderbolt 0000:c7:00.6: activating PCIe Down path from 2:12 to 702:4
> [  460.891849] [3953] thunderbolt 0000:c7:00.6: PCIe Down path activation complete
> [  460.891850] [3953] thunderbolt 0000:c7:00.6: activating PCIe Up path from 702:4 to 2:12
> [  460.892375] [3953] thunderbolt 0000:c7:00.6: PCIe Up path activation complete
> [  461.018893] pci 0000:a2:00.0: [8086:15c0] type 01 class 0x060400 PCIe Switch Upstream Port
> [  461.019746] pci 0000:a2:00.0: 2.000 Gb/s available PCIe bandwidth, limited by 2.5 GT/s PCIe x1 link at 0000:00:01.2 (capable of 8.000 Gb/s with 2.5 GT/s PCIe x4 link)
> [  461.020620] pci 0000:a3:01.0: [8086:15c0] type 01 class 0x060400 PCIe Switch Downstream Port
> [  461.021846] pci 0000:a4:00.0: [144d:a809] type 00 class 0x010802 PCIe Endpoint
> [  461.022555] pci 0000:a4:00.0: 2.000 Gb/s available PCIe bandwidth, limited by 2.5 GT/s PCIe x1 link at 0000:00:01.2 (capable of 31.504 Gb/s with 8.0 GT/s PCIe x4 link)
> 
> And then the second tbt storage,
> 
> [  472.025559] [1668] thunderbolt 0000:c7:00.6:  Port 4: 8086:15c0 (Revision: 1, TB Version: 1, Type: PCIe (0x100102))
> [  472.451726] [3953] thunderbolt 0000:c7:00.6: 2:11: PCIe downstream port: USB4=1
> [  472.451728] [3953] thunderbolt 0000:c7:00.6: 502:4: PCIe upstream port: USB4=0
> [  472.451729] [3953] thunderbolt 0000:c7:00.6: 2:11: Checking PCIe downstream LTSSM state
> [  472.451851] [3953] thunderbolt 0000:c7:00.6: 2:11: PCIe downstream adapter in detect state
> [  472.451852] [3953] thunderbolt 0000:c7:00.6: 502:4: Skipping LTSSM check (not USB4)
> [  472.451853] [3953] thunderbolt 0000:c7:00.6: activating PCIe Down path from 2:11 to 502:4
> [  472.452373] [3953] thunderbolt 0000:c7:00.6: PCIe Down path activation complete
> [  472.452374] [3953] thunderbolt 0000:c7:00.6: activating PCIe Up path from 502:4 to 2:11
> [  472.452893] [3953] thunderbolt 0000:c7:00.6: PCIe Up path activation complete
> 
> My issue should be happening after the PCIe tunnel is activated.

Right at that point the PCIe Downstream Port probably is already back
runtime suspended.

Here you could try this:

# echo 250 > /sys/bus/pci/devices/0000:62:02.0/power/autosuspend_delay

It should keep the port wake little more longer hopefully it can then catch
the link becoming active.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-01-29  6:50                       ` Mika Westerberg
@ 2026-02-03  9:04                         ` Jayi Li
  2026-02-03  9:39                           ` Mika Westerberg
  2026-02-12  4:16                         ` AceLan Kao
  1 sibling, 1 reply; 26+ messages in thread
From: Jayi Li @ 2026-02-03  9:04 UTC (permalink / raw)
  To: Mika Westerberg, Chia-Lin Kao (AceLan), Andreas Noever,
	Mika Westerberg, Yehezkel Bernat, linux-usb, linux-kernel,
	Gil Fine

Hi,

在 2026/1/29 14:50, Mika Westerberg 写道:
> On Thu, Jan 29, 2026 at 01:45:51PM +0800, Chia-Lin Kao (AceLan) wrote:
>> On Tue, Jan 27, 2026 at 11:17:01AM +0100, Mika Westerberg wrote:
>>> On Tue, Jan 27, 2026 at 09:45:13AM +0100, Mika Westerberg wrote:
>>>> On Tue, Jan 27, 2026 at 01:04:20PM +0800, Chia-Lin Kao (AceLan) wrote:
>>>>> On Mon, Jan 26, 2026 at 12:56:54PM +0100, Mika Westerberg wrote:
>>>>>> On Mon, Jan 26, 2026 at 03:48:48PM +0800, Chia-Lin Kao (AceLan) wrote:
>>>>>>> On Mon, Jan 26, 2026 at 06:42:31AM +0100, Mika Westerberg wrote:
>>>>>>>> On Mon, Jan 26, 2026 at 11:30:47AM +0800, Chia-Lin Kao (AceLan) wrote:
>>>>>>>>> Hi,
>>>>>>>>> On Fri, Jan 23, 2026 at 01:01:12PM +0100, Mika Westerberg wrote:
>>>>>>>>>> Hi,
>>>>>>>>>>
>>>>>>>>>> On Fri, Jan 23, 2026 at 10:04:11AM +0800, Chia-Lin Kao (AceLan) wrote:
>>>>>>>>>>>> Can you comment out call to tb_switch_xhci_connect() and see if that
>>>>>>>>>>>> changes anything?
>>>>>>>>>>> Here is what I modified, and the problem becomes a little bit complicated.
>>>>>>>>>> Okay I see it did not change anything (well this is kind of what I
>>>>>>>>>> expected). Thanks for trying.
>>>>>>>>>>
>>>>>>>>>> I see in your log that the PCIe tunnel is established just fine. It's just
>>>>>>>>>> that there is no PCIe hotplug happening or it is happening but the PCIe
>>>>>>>>>> Downstream Port is not waking up.
>>>>>>>>>>
>>>>>>>>>> I figured you have following USB4/TB topology, right?
>>>>>>>>>>
>>>>>>>>>>    AMD Host <-> GR Hub <-> TB3 Hub
>>>>>>>>>>                    ^
>>>>>>>>>>                    |
>>>>>>>>>>                  TB3 Hub
>>>>>>>>> Should be more like this
>>>>>>>>>    AMD Host <-> Dell TB4 Dock <-> OWC Envoy Express (1-502)
>>>>>>>>>                               \
>>>>>>>>>                                <-> OWC Envoy Express (1-702)
>>>>>>>>> or
>>>>>>>>>    AMD Host (1-0, domain1)
>>>>>>>>>        |
>>>>>>>>>        └─ Port 2 ──→ Dell Thunderbolt 4 Dock (1-2)
>>>>>>>>>                        ├─ Port 5 ──→ OWC Envoy Express (1-502)
>>>>>>>>>                        └─ Port 7 ──→ OWC Envoy Express (1-702)
>>>>>>>> Okay so the same ;-)
>>>>>>>>
>>>>>>>>>> What if you run 'lspci' after the issue reproduces? Does that bring the
>>>>>>>>>> missing PCIe devices? I suspect that this is due to older TB3 devices that
>>>>>>>>>> they may need bit more time to get the PCIe link (going over the tunnel) up
>>>>>>>>>> and running.
>>>>>>>>> lspci doesn't bring back the missing tbt storage.
>>>>>>>> Forgot to mention that let it (the whole topology) enter runtime suspend
>>>>>>>> before you run lspci.
>>>>>>> https://people.canonical.com/~acelan/bugs/tbt_storage/dmesg_lspci.log
>>>>>>>
>>>>>>> The behavior is strange, the following 3 devices keep entering D3cold and then comes back
>>>>>>> to D0 quickly. So, I'm not sure if the lspci do the actions you want.
>>>>>> Yes. I should have mentioned so the lspci is there exactly to trigger
>>>>>> runtime resume of the topology. I was hoping the PCIe links get
>>>>>> re-established properly then.
>>>>>>
>>>>>> Can you do so that you:
>>>>>>
>>>>>> 1. Plug in the dock.
>>>>>> 2. Plug in the other storage to the dock.
>>>>>> 3. Block runtime PM from the PCIe Downstream Port that should lead to the
>>>>>>     second storage device PCIe Upstream Port
>>>>>>
>>>>>>   # echo on > /sys/bus/pci/devices/DEVICE/power/control
>>>>>>
>>>>>> 4. Connect the second storage device and enable PCIe tunnel.
>>>>>>
>>>>>> Does that make it work each time?
>>>>> Yes, follow the steps makes it work.
>>>>>
>>>>>     echo on | sudo tee /sys/bus/pci/devices/*/*/power/control
>>>>>
>>>>> Re-plug the dock, need to disable the runpm again.
>>>> But can you just block it from the PCIe Downstream Port that leads to the
>>>> "non-working" storage before you enable PCIe tunnel? Not for all the
>>>> devices.
>>>>
>>>> (let me know if you want help locating the correct device).
>>>>
>>>> Does it still work?
>> Here's the full PCI device chain graph:
>>
>>      0000:00:01.2 - AMD PCI Root Port
>>          |
>>          └─ 0000:61:00.0 - Intel Thunderbolt 4 Bridge [Goshen Ridge 2020]
>>                 |
>>                 └─ 0000:62:02.0 - Intel Thunderbolt 4 Bridge [Goshen Ridge 2020]
>>                        |
>>                        └─ 0000:83:00.0 - Intel TBT3 Bridge (Upstream Port) [Alpine Ridge LP]
>>                               |
>>                               └─ 0000:84:01.0 - Intel TBT3 Bridge (Downstream Port) [Alpine Ridge LP]
>>                                      |
>>                                      └─ 0000:85:00.0 - Sandisk PC SN740 NVMe SSD (nvme2)
>>
>> When the tbt storage is not recognized, we don't have 83:00.0 and its
>> downstream port 84:01.0.
>>
>> $ ls /sys/bus/pci/devices
>> 0000:00:00.0  0000:00:02.1  0000:00:08.1  0000:00:18.1  0000:00:18.7  0000:62:04.0  0000:c3:00.0  0000:c5:00.5  0000:c7:00.4
>> 0000:00:00.2  0000:00:02.3  0000:00:08.2  0000:00:18.2  0000:61:00.0  0000:a2:00.0  0000:c4:00.0  0000:c5:00.7  0000:c7:00.5
>> 0000:00:01.0  0000:00:02.4  0000:00:08.3  0000:00:18.3  0000:62:00.0  0000:a3:01.0  0000:c5:00.0  0000:c6:00.0  0000:c7:00.6
>> 0000:00:01.1  0000:00:02.5  0000:00:14.0  0000:00:18.4  0000:62:01.0  0000:a4:00.0  0000:c5:00.1  0000:c6:00.1
>> 0000:00:01.2  0000:00:03.0  0000:00:14.3  0000:00:18.5  0000:62:02.0  0000:c1:00.0  0000:c5:00.2  0000:c7:00.0
>> 0000:00:02.0  0000:00:08.0  0000:00:18.0  0000:00:18.6  0000:62:03.0  0000:c2:00.0  0000:c5:00.4  0000:c7:00.3
>>
>> Disable runpm on 62:02.0, then we have 83:00.0 and its downstream port
>> 84:01.0 and 85:00.0, and then the tbt storage is recognized.
> Okay that means there is nothing wrong with the PCIe tunnel itself it's
> just that the PCIe side either does not get the PME or does not see that
> the PCIe link becomes active (e.g the PCIe Downstream Port runtime suspends
> itself before the link status changes).
>
> PME work so that there is wake first on Intel it's GPE that wakes up the
> root port and then PCIe stack wakes up devices and then the PME message is
> sent to the root complex.
>
> If you do this on Intel host do you see the same?

I also encountered a similar issue where the PCIe hotplug IRQ is not 
received
after path setup completion. This was observed specifically during 
Thunderbolt 3
device hotplug testing.

To investigate, I applied a debug patch (attached below) to dump 
ADP_PCIE_CS_0.
I observed that when the issue occurs, the PCIe upstream port's LTSSM is 
not in the DETECT state,
yet the PE (Port Enable) bit remains set to 1.

My workaround is to check the LTSSM state before the path setup.
If this specific anomaly is detected, I explicitly set PE to 0 to reset 
the link state.
With this change, the link returns to the correct state. After the path 
setup completes,
the PCIe hotplug IRQ is received correctly.

I'm not sure if this is relevant to this issue, but sharing just in case.

Here is the debug patch I used to observe the ADP_PCIE_CS_0 state:

diff --git a/drivers/thunderbolt/path.c b/drivers/thunderbolt/path.c
index d5d1f520571b..d8808cb614a4 100644
--- a/drivers/thunderbolt/path.c
+++ b/drivers/thunderbolt/path.c
@@ -491,6 +491,25 @@ void tb_path_deactivate(struct tb_path *path)
         path->activated = false;
  }

+void print_adp_pcie_cs_0(struct tb_port *port)
+{
+       u32 val;
+       int ret;
+
+       if (!port || !port->cap_adap ||
+           (!tb_port_is_pcie_down(port) && !tb_port_is_pcie_up(port)))
+               return;
+
+       ret = tb_port_read(port, &val, TB_CFG_PORT,
+                          port->cap_adap + ADP_PCIE_CS_0, 1);
+
+       if (ret)
+               tb_port_warn(port, "failed to read ADP_PCIE_CS_0: %d\n", 
ret);
+       else
+               tb_port_info(port, "ADP_PCIE_CS_0 = 0x%08x\n", val);
+}
+EXPORT_SYMBOL_GPL(print_adp_pcie_cs_0);
+
  /**
   * tb_path_activate() - activate a path
   * @path: Path to activate
@@ -582,6 +601,17 @@ int tb_path_activate(struct tb_path *path)
         }
         path->activated = true;
         tb_dbg(path->tb, "path activation complete\n");
+
+       if (path) {
+               pr_info("tb_path_activated: Path %s activated, length: 
%d\n",
+                       path->name, path->path_length);
+
+               for (i = 0; i < path->path_length; i++) {
+  print_adp_pcie_cs_0(path->hops[i].in_port);
+  print_adp_pcie_cs_0(path->hops[i].out_port);
+               }
+       }
+
         return 0;
  err:
         tb_WARN(path->tb, "path activation failed\n");
diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c
index b1458b741b7d..22c70f18f0ff 100644
--- a/drivers/thunderbolt/tunnel.c
+++ b/drivers/thunderbolt/tunnel.c
@@ -208,6 +208,9 @@ static int tb_pci_activate(struct tb_tunnel *tunnel, 
bool activate)
                         return res;
         }

+       print_adp_pcie_cs_0(tunnel->src_port);
+       print_adp_pcie_cs_0(tunnel->dst_port);
+
         return activate ? 0 : tb_pci_set_ext_encapsulation(tunnel, 
activate);
  }

@@ -2191,6 +2194,9 @@ int tb_tunnel_restart(struct tb_tunnel *tunnel)
                 }
         }

+       print_adp_pcie_cs_0(tunnel->src_port);
+       print_adp_pcie_cs_0(tunnel->dst_port);
+
         if (tunnel->init) {
                 res = tunnel->init(tunnel);
                 if (res)


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-02-03  9:04                         ` Jayi Li
@ 2026-02-03  9:39                           ` Mika Westerberg
  2026-02-03 10:00                             ` Jayi Li
  0 siblings, 1 reply; 26+ messages in thread
From: Mika Westerberg @ 2026-02-03  9:39 UTC (permalink / raw)
  To: Jayi Li
  Cc: Chia-Lin Kao (AceLan), Andreas Noever, Mika Westerberg,
	Yehezkel Bernat, linux-usb, linux-kernel, Gil Fine

Hi,

On Tue, Feb 03, 2026 at 05:04:53PM +0800, Jayi Li wrote:
> > If you do this on Intel host do you see the same?
> 
> I also encountered a similar issue where the PCIe hotplug IRQ is not
> received
> after path setup completion. This was observed specifically during
> Thunderbolt 3
> device hotplug testing.
> 
> To investigate, I applied a debug patch (attached below) to dump
> ADP_PCIE_CS_0.
> I observed that when the issue occurs, the PCIe upstream port's LTSSM is not
> in the DETECT state,
> yet the PE (Port Enable) bit remains set to 1.

The PCIe Upstream Port is TB3 device?

For TB3 there is no LTTSM state in that register so the value you read can
be anything.

We can do it for USB4, like with this patch:

https://lore.kernel.org/linux-usb/20260127094953.GF2275908@black.igk.intel.com/

It should be modified so that it just checks for the USB4 side.

> My workaround is to check the LTSSM state before the path setup.
> If this specific anomaly is detected, I explicitly set PE to 0 to reset the
> link state.
> With this change, the link returns to the correct state. After the path
> setup completes,
> the PCIe hotplug IRQ is received correctly.
> 
> I'm not sure if this is relevant to this issue, but sharing just in case.

Thanks for sharing!

It could be. What device this is? 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-02-03  9:39                           ` Mika Westerberg
@ 2026-02-03 10:00                             ` Jayi Li
  2026-02-03 10:07                               ` Mika Westerberg
  0 siblings, 1 reply; 26+ messages in thread
From: Jayi Li @ 2026-02-03 10:00 UTC (permalink / raw)
  To: Mika Westerberg
  Cc: Chia-Lin Kao (AceLan), Andreas Noever, Mika Westerberg,
	Yehezkel Bernat, linux-usb, linux-kernel, Gil Fine

Hi,

在 2026/2/3 17:39, Mika Westerberg 写道:
> Hi,
>
> On Tue, Feb 03, 2026 at 05:04:53PM +0800, Jayi Li wrote:
>>> If you do this on Intel host do you see the same?
>> I also encountered a similar issue where the PCIe hotplug IRQ is not
>> received
>> after path setup completion. This was observed specifically during
>> Thunderbolt 3
>> device hotplug testing.
>>
>> To investigate, I applied a debug patch (attached below) to dump
>> ADP_PCIE_CS_0.
>> I observed that when the issue occurs, the PCIe upstream port's LTSSM is not
>> in the DETECT state,
>> yet the PE (Port Enable) bit remains set to 1.
> The PCIe Upstream Port is TB3 device?
Yes.
>
> For TB3 there is no LTTSM state in that register so the value you read can
> be anything.
Apologies for the confusion. I wasn't aware that ADP_PCIE_CS_0 does not 
reflect the LTSSM state on Thunderbolt 3.
> We can do it for USB4, like with this patch:
>
> https://lore.kernel.org/linux-usb/20260127094953.GF2275908@black.igk.intel.com/
>
> It should be modified so that it just checks for the USB4 side.
>
>> My workaround is to check the LTSSM state before the path setup.
>> If this specific anomaly is detected, I explicitly set PE to 0 to reset the
>> link state.
>> With this change, the link returns to the correct state. After the path
>> setup completes,
>> the PCIe hotplug IRQ is received correctly.
>>
>> I'm not sure if this is relevant to this issue, but sharing just in case.
> Thanks for sharing!
>
> It could be. What device this is?
The device is Targus DOCK221.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-02-03 10:00                             ` Jayi Li
@ 2026-02-03 10:07                               ` Mika Westerberg
  2026-02-04  2:37                                 ` Jayi Li
  0 siblings, 1 reply; 26+ messages in thread
From: Mika Westerberg @ 2026-02-03 10:07 UTC (permalink / raw)
  To: Jayi Li
  Cc: Chia-Lin Kao (AceLan), Andreas Noever, Mika Westerberg,
	Yehezkel Bernat, linux-usb, linux-kernel, Gil Fine

On Tue, Feb 03, 2026 at 06:00:06PM +0800, Jayi Li wrote:
> > > I'm not sure if this is relevant to this issue, but sharing just in case.
> > Thanks for sharing!
> > 
> > It could be. What device this is?
> The device is Targus DOCK221.

Is the host Intel or AMD (well can you share bit more details about the
topology)? Then if you block runtime PM of the PCIe Downstream Port that
leads to the TB3 device like:

 # echo on > /sys/bus/pci/devices/DEVICE/power/control

Does it work?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-02-03 10:07                               ` Mika Westerberg
@ 2026-02-04  2:37                                 ` Jayi Li
  2026-02-04 12:37                                   ` Mika Westerberg
  0 siblings, 1 reply; 26+ messages in thread
From: Jayi Li @ 2026-02-04  2:37 UTC (permalink / raw)
  To: Mika Westerberg
  Cc: Chia-Lin Kao (AceLan), Andreas Noever, Mika Westerberg,
	Yehezkel Bernat, linux-usb, linux-kernel, Gil Fine


在 2026/2/3 18:07, Mika Westerberg 写道:
> On Tue, Feb 03, 2026 at 06:00:06PM +0800, Jayi Li wrote:
>>>> I'm not sure if this is relevant to this issue, but sharing just in case.
>>> Thanks for sharing!
>>>
>>> It could be. What device this is?
>> The device is Targus DOCK221.
> Is the host Intel or AMD (well can you share bit more details about the
> topology)? Then if you block runtime PM of the PCIe Downstream Port that
> leads to the TB3 device like:
>
>   # echo on > /sys/bus/pci/devices/DEVICE/power/control
>
> Does it work?

The host is ASMedia.

  ASMedia Host (0-0, domain0)
          |
         └─ Port 3 ──→ Thunderbolt 3 Dock (0-3)

I tried disabling runtime PM by writing 'on' to power/control for the 
downstream port, but it did not work.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-02-04  2:37                                 ` Jayi Li
@ 2026-02-04 12:37                                   ` Mika Westerberg
  2026-02-06  1:17                                     ` Jayi Li
  2026-02-09  1:30                                     ` Jayi Li
  0 siblings, 2 replies; 26+ messages in thread
From: Mika Westerberg @ 2026-02-04 12:37 UTC (permalink / raw)
  To: Jayi Li
  Cc: Chia-Lin Kao (AceLan), Andreas Noever, Mika Westerberg,
	Yehezkel Bernat, linux-usb, linux-kernel, Gil Fine

On Wed, Feb 04, 2026 at 10:37:58AM +0800, Jayi Li wrote:
> 
> 在 2026/2/3 18:07, Mika Westerberg 写道:
> > On Tue, Feb 03, 2026 at 06:00:06PM +0800, Jayi Li wrote:
> > > > > I'm not sure if this is relevant to this issue, but sharing just in case.
> > > > Thanks for sharing!
> > > > 
> > > > It could be. What device this is?
> > > The device is Targus DOCK221.
> > Is the host Intel or AMD (well can you share bit more details about the
> > topology)? Then if you block runtime PM of the PCIe Downstream Port that
> > leads to the TB3 device like:
> > 
> >   # echo on > /sys/bus/pci/devices/DEVICE/power/control
> > 
> > Does it work?
> 
> The host is ASMedia.

Ah okay. I don't have any experience with ASMedia host. What system/laptop
comes with that?

>  ASMedia Host (0-0, domain0)
>          |
>         └─ Port 3 ──→ Thunderbolt 3 Dock (0-3)
> 
> I tried disabling runtime PM by writing 'on' to power/control for the
> downstream port, but it did not work.

I think that's integrated into the SoC, right? So it's a PCIe root port
used for tunneling. What does 'sudo lspci -vv' say?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-02-04 12:37                                   ` Mika Westerberg
@ 2026-02-06  1:17                                     ` Jayi Li
  2026-02-09  1:30                                     ` Jayi Li
  1 sibling, 0 replies; 26+ messages in thread
From: Jayi Li @ 2026-02-06  1:17 UTC (permalink / raw)
  To: Mika Westerberg
  Cc: Chia-Lin Kao (AceLan), Andreas Noever, Mika Westerberg,
	Yehezkel Bernat, linux-usb, linux-kernel, Gil Fine


在 2026/2/4 20:37, Mika Westerberg 写道:
> On Wed, Feb 04, 2026 at 10:37:58AM +0800, Jayi Li wrote:
>> 在 2026/2/3 18:07, Mika Westerberg 写道:
>>> On Tue, Feb 03, 2026 at 06:00:06PM +0800, Jayi Li wrote:
>>>>>> I'm not sure if this is relevant to this issue, but sharing just in case.
>>>>> Thanks for sharing!
>>>>>
>>>>> It could be. What device this is?
>>>> The device is Targus DOCK221.
>>> Is the host Intel or AMD (well can you share bit more details about the
>>> topology)? Then if you block runtime PM of the PCIe Downstream Port that
>>> leads to the TB3 device like:
>>>
>>>    # echo on > /sys/bus/pci/devices/DEVICE/power/control
>>>
>>> Does it work?
>> The host is ASMedia.
> Ah okay. I don't have any experience with ASMedia host. What system/laptop
> comes with that?
>
>>   ASMedia Host (0-0, domain0)
>>           |
>>          └─ Port 3 ──→ Thunderbolt 3 Dock (0-3)
>>
>> I tried disabling runtime PM by writing 'on' to power/control for the
>> downstream port, but it did not work.
> I think that's integrated into the SoC, right? So it's a PCIe root port
> used for tunneling. What does 'sudo lspci -vv' say?
Thanks for your help. The issue I encountered occurred during a testing 
phase. I will try to verify it later on an Intel/AMD host system and 
provide updates if available. Sorry that I can't offer more details at 
the moment.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-02-04 12:37                                   ` Mika Westerberg
  2026-02-06  1:17                                     ` Jayi Li
@ 2026-02-09  1:30                                     ` Jayi Li
  1 sibling, 0 replies; 26+ messages in thread
From: Jayi Li @ 2026-02-09  1:30 UTC (permalink / raw)
  To: Mika Westerberg
  Cc: Chia-Lin Kao (AceLan), Andreas Noever, Mika Westerberg,
	Yehezkel Bernat, linux-usb, linux-kernel, Gil Fine


在 2026/2/4 20:37, Mika Westerberg 写道:
> On Wed, Feb 04, 2026 at 10:37:58AM +0800, Jayi Li wrote:
>> 在 2026/2/3 18:07, Mika Westerberg 写道:
>>> On Tue, Feb 03, 2026 at 06:00:06PM +0800, Jayi Li wrote:
>>>>>> I'm not sure if this is relevant to this issue, but sharing just in case.
>>>>> Thanks for sharing!
>>>>>
>>>>> It could be. What device this is?
>>>> The device is Targus DOCK221.
>>> Is the host Intel or AMD (well can you share bit more details about the
>>> topology)? Then if you block runtime PM of the PCIe Downstream Port that
>>> leads to the TB3 device like:
>>>
>>>    # echo on > /sys/bus/pci/devices/DEVICE/power/control
>>>
>>> Does it work?
>> The host is ASMedia.
> Ah okay. I don't have any experience with ASMedia host. What system/laptop
> comes with that?
>
>>   ASMedia Host (0-0, domain0)
>>           |
>>          └─ Port 3 ──→ Thunderbolt 3 Dock (0-3)
>>
>> I tried disabling runtime PM by writing 'on' to power/control for the
>> downstream port, but it did not work.
> I think that's integrated into the SoC, right? So it's a PCIe root port
> used for tunneling. What does 'sudo lspci -vv' say?
Thanks for your help. The issue I encountered occurred during a testing 
phase. I will try to verify it later on an Intel/AMD host system and 
provide updates if available. Sorry that I can't offer more details at 
the moment.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-01-29  6:50                       ` Mika Westerberg
  2026-02-03  9:04                         ` Jayi Li
@ 2026-02-12  4:16                         ` AceLan Kao
  2026-02-12  7:07                           ` Mika Westerberg
  1 sibling, 1 reply; 26+ messages in thread
From: AceLan Kao @ 2026-02-12  4:16 UTC (permalink / raw)
  To: Mika Westerberg
  Cc: Andreas Noever, Mika Westerberg, Yehezkel Bernat, linux-usb,
	linux-kernel, Gil Fine

Mika Westerberg <mika.westerberg@linux.intel.com> 於 2026年1月29日週四 下午2:50寫道:
>
> On Thu, Jan 29, 2026 at 01:45:51PM +0800, Chia-Lin Kao (AceLan) wrote:
> > On Tue, Jan 27, 2026 at 11:17:01AM +0100, Mika Westerberg wrote:
> > > On Tue, Jan 27, 2026 at 09:45:13AM +0100, Mika Westerberg wrote:
> > > > On Tue, Jan 27, 2026 at 01:04:20PM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > > On Mon, Jan 26, 2026 at 12:56:54PM +0100, Mika Westerberg wrote:
> > > > > > On Mon, Jan 26, 2026 at 03:48:48PM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > > > > On Mon, Jan 26, 2026 at 06:42:31AM +0100, Mika Westerberg wrote:
> > > > > > > > On Mon, Jan 26, 2026 at 11:30:47AM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > > > > > > Hi,
> > > > > > > > > On Fri, Jan 23, 2026 at 01:01:12PM +0100, Mika Westerberg wrote:
> > > > > > > > > > Hi,
> > > > > > > > > >
> > > > > > > > > > On Fri, Jan 23, 2026 at 10:04:11AM +0800, Chia-Lin Kao (AceLan) wrote:
> > > > > > > > > > > > Can you comment out call to tb_switch_xhci_connect() and see if that
> > > > > > > > > > > > changes anything?
> > > > > > > > > > >
> > > > > > > > > > > Here is what I modified, and the problem becomes a little bit complicated.
> > > > > > > > > >
> > > > > > > > > > Okay I see it did not change anything (well this is kind of what I
> > > > > > > > > > expected). Thanks for trying.
> > > > > > > > > >
> > > > > > > > > > I see in your log that the PCIe tunnel is established just fine. It's just
> > > > > > > > > > that there is no PCIe hotplug happening or it is happening but the PCIe
> > > > > > > > > > Downstream Port is not waking up.
> > > > > > > > > >
> > > > > > > > > > I figured you have following USB4/TB topology, right?
> > > > > > > > > >
> > > > > > > > > >   AMD Host <-> GR Hub <-> TB3 Hub
> > > > > > > > > >                   ^
> > > > > > > > > >                   |
> > > > > > > > > >                 TB3 Hub
> > > > > > > > > Should be more like this
> > > > > > > > >   AMD Host <-> Dell TB4 Dock <-> OWC Envoy Express (1-502)
> > > > > > > > >                              \
> > > > > > > > >                               <-> OWC Envoy Express (1-702)
> > > > > > > > > or
> > > > > > > > >   AMD Host (1-0, domain1)
> > > > > > > > >       |
> > > > > > > > >       └─ Port 2 ──→ Dell Thunderbolt 4 Dock (1-2)
> > > > > > > > >                       ├─ Port 5 ──→ OWC Envoy Express (1-502)
> > > > > > > > >                       └─ Port 7 ──→ OWC Envoy Express (1-702)
> > > > > > > >
> > > > > > > > Okay so the same ;-)
> > > > > > > >
> > > > > > > > > > What if you run 'lspci' after the issue reproduces? Does that bring the
> > > > > > > > > > missing PCIe devices? I suspect that this is due to older TB3 devices that
> > > > > > > > > > they may need bit more time to get the PCIe link (going over the tunnel) up
> > > > > > > > > > and running.
> > > > > > > > > lspci doesn't bring back the missing tbt storage.
> > > > > > > >
> > > > > > > > Forgot to mention that let it (the whole topology) enter runtime suspend
> > > > > > > > before you run lspci.
> > > > > > >
> > > > > > > https://people.canonical.com/~acelan/bugs/tbt_storage/dmesg_lspci.log
> > > > > > >
> > > > > > > The behavior is strange, the following 3 devices keep entering D3cold and then comes back
> > > > > > > to D0 quickly. So, I'm not sure if the lspci do the actions you want.
> > > > > >
> > > > > > Yes. I should have mentioned so the lspci is there exactly to trigger
> > > > > > runtime resume of the topology. I was hoping the PCIe links get
> > > > > > re-established properly then.
> > > > > >
> > > > > > Can you do so that you:
> > > > > >
> > > > > > 1. Plug in the dock.
> > > > > > 2. Plug in the other storage to the dock.
> > > > > > 3. Block runtime PM from the PCIe Downstream Port that should lead to the
> > > > > >    second storage device PCIe Upstream Port
> > > > > >
> > > > > >  # echo on > /sys/bus/pci/devices/DEVICE/power/control
> > > > > >
> > > > > > 4. Connect the second storage device and enable PCIe tunnel.
> > > > > >
> > > > > > Does that make it work each time?
> > > > > Yes, follow the steps makes it work.
> > > > >
> > > > >    echo on | sudo tee /sys/bus/pci/devices/*/*/power/control
> > > > >
> > > > > Re-plug the dock, need to disable the runpm again.
> > > >
> > > > But can you just block it from the PCIe Downstream Port that leads to the
> > > > "non-working" storage before you enable PCIe tunnel? Not for all the
> > > > devices.
> > > >
> > > > (let me know if you want help locating the correct device).
> > > >
> > > > Does it still work?
> > Here's the full PCI device chain graph:
> >
> >     0000:00:01.2 - AMD PCI Root Port
> >         |
> >         └─ 0000:61:00.0 - Intel Thunderbolt 4 Bridge [Goshen Ridge 2020]
> >                |
> >                └─ 0000:62:02.0 - Intel Thunderbolt 4 Bridge [Goshen Ridge 2020]
> >                       |
> >                       └─ 0000:83:00.0 - Intel TBT3 Bridge (Upstream Port) [Alpine Ridge LP]
> >                              |
> >                              └─ 0000:84:01.0 - Intel TBT3 Bridge (Downstream Port) [Alpine Ridge LP]
> >                                     |
> >                                     └─ 0000:85:00.0 - Sandisk PC SN740 NVMe SSD (nvme2)
> >
> > When the tbt storage is not recognized, we don't have 83:00.0 and its
> > downstream port 84:01.0.
> >
> > $ ls /sys/bus/pci/devices
> > 0000:00:00.0  0000:00:02.1  0000:00:08.1  0000:00:18.1  0000:00:18.7  0000:62:04.0  0000:c3:00.0  0000:c5:00.5  0000:c7:00.4
> > 0000:00:00.2  0000:00:02.3  0000:00:08.2  0000:00:18.2  0000:61:00.0  0000:a2:00.0  0000:c4:00.0  0000:c5:00.7  0000:c7:00.5
> > 0000:00:01.0  0000:00:02.4  0000:00:08.3  0000:00:18.3  0000:62:00.0  0000:a3:01.0  0000:c5:00.0  0000:c6:00.0  0000:c7:00.6
> > 0000:00:01.1  0000:00:02.5  0000:00:14.0  0000:00:18.4  0000:62:01.0  0000:a4:00.0  0000:c5:00.1  0000:c6:00.1
> > 0000:00:01.2  0000:00:03.0  0000:00:14.3  0000:00:18.5  0000:62:02.0  0000:c1:00.0  0000:c5:00.2  0000:c7:00.0
> > 0000:00:02.0  0000:00:08.0  0000:00:18.0  0000:00:18.6  0000:62:03.0  0000:c2:00.0  0000:c5:00.4  0000:c7:00.3
> >
> > Disable runpm on 62:02.0, then we have 83:00.0 and its downstream port
> > 84:01.0 and 85:00.0, and then the tbt storage is recognized.
Got troubles with mutt, my reply got rejected :(

Using gmail and copy/paste the content below again.
>
> Okay that means there is nothing wrong with the PCIe tunnel itself it's
> just that the PCIe side either does not get the PME or does not see that
> the PCIe link becomes active (e.g the PCIe Downstream Port runtime suspends
> itself before the link status changes).
>
> PME work so that there is wake first on Intel it's GPE that wakes up the
> root port and then PCIe stack wakes up devices and then the PME message is
> sent to the root complex.
>
> If you do this on Intel host do you see the same?
Intel host exhibits another symptom, I think the root cause is different.

Plug in the dock, and then plug in the tbt storage to the dock one by
one, both storage can be detected.

Plug both tbt storage to the dock, and then plug in the dock to the
machine, only one tbt storage appears. In rare chance, both tbt
storages show up, but most of the time, only one tbt storage is detected.
In this case, none of disable runpm, rescan, or lspci work. So, it's
most likely another issue.
>
> >
> > $ echo on | sudo tee /sys/bus/pci/devices/0000:62:02.0/power/control
> > on
> >
> > $ ls /sys/bus/pci/devices
> > 0000:00:00.0  0000:00:02.1  0000:00:08.1  0000:00:18.1  0000:00:18.7  0000:62:04.0  0000:a4:00.0  0000:c5:00.1  0000:c6:00.1
> > 0000:00:00.2  0000:00:02.3  0000:00:08.2  0000:00:18.2  0000:61:00.0  0000:83:00.0  0000:c1:00.0  0000:c5:00.2  0000:c7:00.0
> > 0000:00:01.0  0000:00:02.4  0000:00:08.3  0000:00:18.3  0000:62:00.0  0000:84:01.0  0000:c2:00.0  0000:c5:00.4  0000:c7:00.3
> > 0000:00:01.1  0000:00:02.5  0000:00:14.0  0000:00:18.4  0000:62:01.0  0000:85:00.0  0000:c3:00.0  0000:c5:00.5  0000:c7:00.4
> > 0000:00:01.2  0000:00:03.0  0000:00:14.3  0000:00:18.5  0000:62:02.0  0000:a2:00.0  0000:c4:00.0  0000:c5:00.7  0000:c7:00.5
> > 0000:00:02.0  0000:00:08.0  0000:00:18.0  0000:00:18.6  0000:62:03.0  0000:a3:01.0  0000:c5:00.0  0000:c6:00.0  0000:c7:00.6
> >
> > BTW, rescan also workaround the issue
> >
> > $ echo 1 | sudo tee /sys/bus/pci/devices/0000:62:02.0/rescan
> > >
> > > (+Gil)
> > >
> > > There is also one patch that fixes the driver to follow more closely the CM
> > > guide and that's related to the PCIe tunneling and may actually explain the
> > > issue you see:
> > >
> > >   https://lore.kernel.org/linux-usb/20260127094953.GF2275908@black.igk.intel.com/
> > >
> > > Note it only does that for USB4 routers so you may need to tune that so
> > > that it skips that check but there is complication because IIRC LTTSM bits
> > > do not match the USB4 ones. One thing to try is to just check the USB4 PCIe
> > > adapter side that it is in detect.
> > I applied this series directly on top of 6.19-rc6, and the issue
> > persists.
> >
> > Here is the modification I tried, but it doesn't work.
> >
> > diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c
> > index 51fd05b3e341..0672d3e3c24e 100644
> > --- a/drivers/thunderbolt/tunnel.c
> > +++ b/drivers/thunderbolt/tunnel.c
> > @@ -302,14 +302,38 @@ static int tb_pci_pre_activate(struct tb_tunnel *tunnel)
> >         struct tb_port *up = tunnel->dst_port;
> >         int ret;
> >
> > -       if (!tb_switch_is_usb4(down->sw) || !tb_switch_is_usb4(up->sw))
> > -               return 0;
> > +       /*
> > +        * Try checking LTSSM state for both USB4 and TBT3 devices.
> > +        * Check at least the USB4 side if only one side is USB4.
> > +        */
> > +       tb_port_dbg(down, "PCIe downstream port: USB4=%d\n", tb_switch_is_usb4(down->sw));
> > +       tb_port_dbg(up, "PCIe upstream port: USB4=%d\n", tb_switch_is_usb4(up->sw));
> >
> > -       ret = usb4_pci_port_check_ltssm_state(down, TB_PCIE_LTSSM_DETECT);
> > -       if (ret)
> > -               return ret;
> > +       if (tb_switch_is_usb4(down->sw)) {
> > +               tb_port_dbg(down, "Checking PCIe downstream LTSSM state\n");
> > +               ret = usb4_pci_port_check_ltssm_state(down, TB_PCIE_LTSSM_DETECT);
> > +               if (ret) {
> > +                       tb_port_warn(down, "PCIe adapter not in detect state: %d\n", ret);
> > +                       return ret;
> > +               }
> > +               tb_port_dbg(down, "PCIe downstream adapter in detect state\n");
> > +       } else {
> > +               tb_port_dbg(down, "Skipping LTSSM check (not USB4)\n");
> > +       }
> > +
> > +       if (tb_switch_is_usb4(up->sw)) {
> > +               tb_port_dbg(up, "Checking PCIe upstream LTSSM state\n");
> > +               ret = usb4_pci_port_check_ltssm_state(up, TB_PCIE_LTSSM_DETECT);
> > +               if (ret) {
> > +                       tb_port_warn(up, "PCIe adapter not in detect state: %d\n", ret);
> > +                       return ret;
> > +               }
> > +               tb_port_dbg(up, "PCIe upstream adapter in detect state\n");
> > +       } else {
> > +               tb_port_dbg(up, "Skipping LTSSM check (not USB4)\n");
> > +       }
> >
> > -       return usb4_pci_port_check_ltssm_state(up, TB_PCIE_LTSSM_DETECT);
> > +       return 0;
> >  }
> >
> >  static int tb_pci_set_ext_encapsulation(struct tb_tunnel *tunnel, bool enable)
> >
> > $ sudo dmesg | egrep "PCIe|USB4"
> >
> > Plug the first tbt storage,
> >
> > [  460.465644] [1668] thunderbolt 0000:c7:00.6:  Port 4: 8086:15c0 (Revision: 1, TB Version: 1, Type: PCIe (0x100102))
> > [  460.891208] [3953] thunderbolt 0000:c7:00.6: 2:12: PCIe downstream port: USB4=1
> > [  460.891210] [3953] thunderbolt 0000:c7:00.6: 702:4: PCIe upstream port: USB4=0
> > [  460.891212] [3953] thunderbolt 0000:c7:00.6: 2:12: Checking PCIe downstream LTSSM state
> > [  460.891327] [3953] thunderbolt 0000:c7:00.6: 2:12: PCIe downstream adapter in detect state
> > [  460.891328] [3953] thunderbolt 0000:c7:00.6: 702:4: Skipping LTSSM check (not USB4)
> > [  460.891329] [3953] thunderbolt 0000:c7:00.6: activating PCIe Down path from 2:12 to 702:4
> > [  460.891849] [3953] thunderbolt 0000:c7:00.6: PCIe Down path activation complete
> > [  460.891850] [3953] thunderbolt 0000:c7:00.6: activating PCIe Up path from 702:4 to 2:12
> > [  460.892375] [3953] thunderbolt 0000:c7:00.6: PCIe Up path activation complete
> > [  461.018893] pci 0000:a2:00.0: [8086:15c0] type 01 class 0x060400 PCIe Switch Upstream Port
> > [  461.019746] pci 0000:a2:00.0: 2.000 Gb/s available PCIe bandwidth, limited by 2.5 GT/s PCIe x1 link at 0000:00:01.2 (capable of 8.000 Gb/s with 2.5 GT/s PCIe x4 link)
> > [  461.020620] pci 0000:a3:01.0: [8086:15c0] type 01 class 0x060400 PCIe Switch Downstream Port
> > [  461.021846] pci 0000:a4:00.0: [144d:a809] type 00 class 0x010802 PCIe Endpoint
> > [  461.022555] pci 0000:a4:00.0: 2.000 Gb/s available PCIe bandwidth, limited by 2.5 GT/s PCIe x1 link at 0000:00:01.2 (capable of 31.504 Gb/s with 8.0 GT/s PCIe x4 link)
> >
> > And then the second tbt storage,
> >
> > [  472.025559] [1668] thunderbolt 0000:c7:00.6:  Port 4: 8086:15c0 (Revision: 1, TB Version: 1, Type: PCIe (0x100102))
> > [  472.451726] [3953] thunderbolt 0000:c7:00.6: 2:11: PCIe downstream port: USB4=1
> > [  472.451728] [3953] thunderbolt 0000:c7:00.6: 502:4: PCIe upstream port: USB4=0
> > [  472.451729] [3953] thunderbolt 0000:c7:00.6: 2:11: Checking PCIe downstream LTSSM state
> > [  472.451851] [3953] thunderbolt 0000:c7:00.6: 2:11: PCIe downstream adapter in detect state
> > [  472.451852] [3953] thunderbolt 0000:c7:00.6: 502:4: Skipping LTSSM check (not USB4)
> > [  472.451853] [3953] thunderbolt 0000:c7:00.6: activating PCIe Down path from 2:11 to 502:4
> > [  472.452373] [3953] thunderbolt 0000:c7:00.6: PCIe Down path activation complete
> > [  472.452374] [3953] thunderbolt 0000:c7:00.6: activating PCIe Up path from 502:4 to 2:11
> > [  472.452893] [3953] thunderbolt 0000:c7:00.6: PCIe Up path activation complete
> >
> > My issue should be happening after the PCIe tunnel is activated.
>
> Right at that point the PCIe Downstream Port probably is already back
> runtime suspended.
>
> Here you could try this:
>
> # echo 250 > /sys/bus/pci/devices/0000:62:02.0/power/autosuspend_delay
No luck, I enlarged the number to 1000, but still can't recognize the
second tbt storage.

I tried to wake up the PCIe ports in the beginning of tb_tunnel_pci() and
it works.

+       pdev = pci_get_domain_bus_and_slot(0, 0x62, PCI_DEVFN(0x02, 0));
+       if (pdev) {
+               if (pdev->dev.power.runtime_status == RPM_SUSPENDED)
+                       pm_runtime_get_sync(&pdev->dev);
+               pci_dev_put(pdev);
+       }

But I can't find a generic way to get the bus and slot number, and
would you consider this a feasible approach?

>
> It should keep the port wake little more longer hopefully it can then catch
> the link becoming active.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-02-12  4:16                         ` AceLan Kao
@ 2026-02-12  7:07                           ` Mika Westerberg
  2026-02-12  7:34                             ` Mika Westerberg
  0 siblings, 1 reply; 26+ messages in thread
From: Mika Westerberg @ 2026-02-12  7:07 UTC (permalink / raw)
  To: AceLan Kao
  Cc: Andreas Noever, Mika Westerberg, Yehezkel Bernat, linux-usb,
	linux-kernel, Gil Fine

On Thu, Feb 12, 2026 at 12:16:03PM +0800, AceLan Kao wrote:
> > > Disable runpm on 62:02.0, then we have 83:00.0 and its downstream port
> > > 84:01.0 and 85:00.0, and then the tbt storage is recognized.
> Got troubles with mutt, my reply got rejected :(
> 
> Using gmail and copy/paste the content below again.

Okay.

> > Okay that means there is nothing wrong with the PCIe tunnel itself it's
> > just that the PCIe side either does not get the PME or does not see that
> > the PCIe link becomes active (e.g the PCIe Downstream Port runtime suspends
> > itself before the link status changes).
> >
> > PME work so that there is wake first on Intel it's GPE that wakes up the
> > root port and then PCIe stack wakes up devices and then the PME message is
> > sent to the root complex.
> >
> > If you do this on Intel host do you see the same?
> Intel host exhibits another symptom, I think the root cause is different.
> 
> Plug in the dock, and then plug in the tbt storage to the dock one by
> one, both storage can be detected.
> 
> Plug both tbt storage to the dock, and then plug in the dock to the
> machine, only one tbt storage appears. In rare chance, both tbt
> storages show up, but most of the time, only one tbt storage is detected.
> In this case, none of disable runpm, rescan, or lspci work. So, it's
> most likely another issue.

By "detected" you mean the TB device is not detected on TB bus? Or it is
detected on TB bus but creating PCIe tunnel does not make the content
visible on PCIe bus?

You can check this from dmesg, the driver logs if it sees the plug event.
Or run tblist (from tbtools) and see if the device is listed.

I suspect former and in that case it is likely a PD/retimer related issue
rather than software. I see these once in a while especially with new
hardware where the PD firmare is not yet stabilized. If there is TB/USB4
link then all is working from TB/USB4 perspective.

> > Right at that point the PCIe Downstream Port probably is already back
> > runtime suspended.
> >
> > Here you could try this:
> >
> > # echo 250 > /sys/bus/pci/devices/0000:62:02.0/power/autosuspend_delay
> No luck, I enlarged the number to 1000, but still can't recognize the
> second tbt storage.

What about -1?

That's effectively same as blocking runtime PM completely so should work.

> I tried to wake up the PCIe ports in the beginning of tb_tunnel_pci() and
> it works.
> 
> +       pdev = pci_get_domain_bus_and_slot(0, 0x62, PCI_DEVFN(0x02, 0));
> +       if (pdev) {
> +               if (pdev->dev.power.runtime_status == RPM_SUSPENDED)
> +                       pm_runtime_get_sync(&pdev->dev);
> +               pci_dev_put(pdev);
> +       }
> 
> But I can't find a generic way to get the bus and slot number, and
> would you consider this a feasible approach?

No I don't want any (more) PCI related hacks in the driver.

This is not a TB issue, it's a PCIe issue. I suspect it has something to do
with handling PME/GPE on AMD side. Essentially when runtime PM is blocked
the PCIe hotplug driver notices the tunnel just fine. When it is runtime
suspended (e.g D3) it should send PME to the root complex that the brings
the topology up so that the hotplug driver can detect the presence but this
does not seem to happen.

If you enable dynamic debugging of pciehp, do you see anything happening
when you create the second PCIe tunnel? I suspect not.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-02-12  7:07                           ` Mika Westerberg
@ 2026-02-12  7:34                             ` Mika Westerberg
  2026-02-13  0:34                               ` Mario Limonciello
  0 siblings, 1 reply; 26+ messages in thread
From: Mika Westerberg @ 2026-02-12  7:34 UTC (permalink / raw)
  To: AceLan Kao
  Cc: Andreas Noever, Mika Westerberg, Yehezkel Bernat, linux-usb,
	linux-kernel, Gil Fine, Mario Limonciello, Sanath.S

[+Cc AMD folks]

On Thu, Feb 12, 2026 at 08:07:54AM +0100, Mika Westerberg wrote:
> On Thu, Feb 12, 2026 at 12:16:03PM +0800, AceLan Kao wrote:
> > > > Disable runpm on 62:02.0, then we have 83:00.0 and its downstream port
> > > > 84:01.0 and 85:00.0, and then the tbt storage is recognized.
> > Got troubles with mutt, my reply got rejected :(
> > 
> > Using gmail and copy/paste the content below again.
> 
> Okay.
> 
> > > Okay that means there is nothing wrong with the PCIe tunnel itself it's
> > > just that the PCIe side either does not get the PME or does not see that
> > > the PCIe link becomes active (e.g the PCIe Downstream Port runtime suspends
> > > itself before the link status changes).
> > >
> > > PME work so that there is wake first on Intel it's GPE that wakes up the
> > > root port and then PCIe stack wakes up devices and then the PME message is
> > > sent to the root complex.
> > >
> > > If you do this on Intel host do you see the same?
> > Intel host exhibits another symptom, I think the root cause is different.
> > 
> > Plug in the dock, and then plug in the tbt storage to the dock one by
> > one, both storage can be detected.
> > 
> > Plug both tbt storage to the dock, and then plug in the dock to the
> > machine, only one tbt storage appears. In rare chance, both tbt
> > storages show up, but most of the time, only one tbt storage is detected.
> > In this case, none of disable runpm, rescan, or lspci work. So, it's
> > most likely another issue.
> 
> By "detected" you mean the TB device is not detected on TB bus? Or it is
> detected on TB bus but creating PCIe tunnel does not make the content
> visible on PCIe bus?
> 
> You can check this from dmesg, the driver logs if it sees the plug event.
> Or run tblist (from tbtools) and see if the device is listed.
> 
> I suspect former and in that case it is likely a PD/retimer related issue
> rather than software. I see these once in a while especially with new
> hardware where the PD firmare is not yet stabilized. If there is TB/USB4
> link then all is working from TB/USB4 perspective.
> 
> > > Right at that point the PCIe Downstream Port probably is already back
> > > runtime suspended.
> > >
> > > Here you could try this:
> > >
> > > # echo 250 > /sys/bus/pci/devices/0000:62:02.0/power/autosuspend_delay
> > No luck, I enlarged the number to 1000, but still can't recognize the
> > second tbt storage.
> 
> What about -1?
> 
> That's effectively same as blocking runtime PM completely so should work.
> 
> > I tried to wake up the PCIe ports in the beginning of tb_tunnel_pci() and
> > it works.
> > 
> > +       pdev = pci_get_domain_bus_and_slot(0, 0x62, PCI_DEVFN(0x02, 0));
> > +       if (pdev) {
> > +               if (pdev->dev.power.runtime_status == RPM_SUSPENDED)
> > +                       pm_runtime_get_sync(&pdev->dev);
> > +               pci_dev_put(pdev);
> > +       }
> > 
> > But I can't find a generic way to get the bus and slot number, and
> > would you consider this a feasible approach?
> 
> No I don't want any (more) PCI related hacks in the driver.
> 
> This is not a TB issue, it's a PCIe issue. I suspect it has something to do
> with handling PME/GPE on AMD side. Essentially when runtime PM is blocked
> the PCIe hotplug driver notices the tunnel just fine. When it is runtime
> suspended (e.g D3) it should send PME to the root complex that the brings
> the topology up so that the hotplug driver can detect the presence but this
> does not seem to happen.
> 
> If you enable dynamic debugging of pciehp, do you see anything happening
> when you create the second PCIe tunnel? I suspect not.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-02-12  7:34                             ` Mika Westerberg
@ 2026-02-13  0:34                               ` Mario Limonciello
  2026-02-18 19:21                                 ` Mario Limonciello
  0 siblings, 1 reply; 26+ messages in thread
From: Mario Limonciello @ 2026-02-13  0:34 UTC (permalink / raw)
  To: Mika Westerberg, AceLan Kao
  Cc: Andreas Noever, Mika Westerberg, Yehezkel Bernat, linux-usb,
	linux-kernel, Gil Fine, Sanath.S, Kovacs, Alexander

++

On 2/12/2026 1:34 AM, Mika Westerberg wrote:
> [+Cc AMD folks]
> 
> On Thu, Feb 12, 2026 at 08:07:54AM +0100, Mika Westerberg wrote:
>> On Thu, Feb 12, 2026 at 12:16:03PM +0800, AceLan Kao wrote:
>>>>> Disable runpm on 62:02.0, then we have 83:00.0 and its downstream port
>>>>> 84:01.0 and 85:00.0, and then the tbt storage is recognized.
>>> Got troubles with mutt, my reply got rejected :(
>>>
>>> Using gmail and copy/paste the content below again.
>>
>> Okay.
>>
>>>> Okay that means there is nothing wrong with the PCIe tunnel itself it's
>>>> just that the PCIe side either does not get the PME or does not see that
>>>> the PCIe link becomes active (e.g the PCIe Downstream Port runtime suspends
>>>> itself before the link status changes).
>>>>
>>>> PME work so that there is wake first on Intel it's GPE that wakes up the
>>>> root port and then PCIe stack wakes up devices and then the PME message is
>>>> sent to the root complex.
>>>>
>>>> If you do this on Intel host do you see the same?
>>> Intel host exhibits another symptom, I think the root cause is different.
>>>
>>> Plug in the dock, and then plug in the tbt storage to the dock one by
>>> one, both storage can be detected.
>>>
>>> Plug both tbt storage to the dock, and then plug in the dock to the
>>> machine, only one tbt storage appears. In rare chance, both tbt
>>> storages show up, but most of the time, only one tbt storage is detected.
>>> In this case, none of disable runpm, rescan, or lspci work. So, it's
>>> most likely another issue.
>>
>> By "detected" you mean the TB device is not detected on TB bus? Or it is
>> detected on TB bus but creating PCIe tunnel does not make the content
>> visible on PCIe bus?
>>
>> You can check this from dmesg, the driver logs if it sees the plug event.
>> Or run tblist (from tbtools) and see if the device is listed.
>>
>> I suspect former and in that case it is likely a PD/retimer related issue
>> rather than software. I see these once in a while especially with new
>> hardware where the PD firmare is not yet stabilized. If there is TB/USB4
>> link then all is working from TB/USB4 perspective.
>>
>>>> Right at that point the PCIe Downstream Port probably is already back
>>>> runtime suspended.
>>>>
>>>> Here you could try this:
>>>>
>>>> # echo 250 > /sys/bus/pci/devices/0000:62:02.0/power/autosuspend_delay
>>> No luck, I enlarged the number to 1000, but still can't recognize the
>>> second tbt storage.
>>
>> What about -1?
>>
>> That's effectively same as blocking runtime PM completely so should work.
>>
>>> I tried to wake up the PCIe ports in the beginning of tb_tunnel_pci() and
>>> it works.
>>>
>>> +       pdev = pci_get_domain_bus_and_slot(0, 0x62, PCI_DEVFN(0x02, 0));
>>> +       if (pdev) {
>>> +               if (pdev->dev.power.runtime_status == RPM_SUSPENDED)
>>> +                       pm_runtime_get_sync(&pdev->dev);
>>> +               pci_dev_put(pdev);
>>> +       }
>>>
>>> But I can't find a generic way to get the bus and slot number, and
>>> would you consider this a feasible approach?
>>
>> No I don't want any (more) PCI related hacks in the driver.
>>
>> This is not a TB issue, it's a PCIe issue. I suspect it has something to do
>> with handling PME/GPE on AMD side. Essentially when runtime PM is blocked
>> the PCIe hotplug driver notices the tunnel just fine. When it is runtime
>> suspended (e.g D3) it should send PME to the root complex that the brings
>> the topology up so that the hotplug driver can detect the presence but this
>> does not seem to happen.
>>
>> If you enable dynamic debugging of pciehp, do you see anything happening
>> when you create the second PCIe tunnel? I suspect not.

 From what I see above I agree this does seem like a PME delivery issue 
of some sort.  Any chance you can put this on a PCIe analyzer and 
confirm whether the PME was ever sent?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-02-13  0:34                               ` Mario Limonciello
@ 2026-02-18 19:21                                 ` Mario Limonciello
  2026-02-18 21:35                                   ` Mario Limonciello
  0 siblings, 1 reply; 26+ messages in thread
From: Mario Limonciello @ 2026-02-18 19:21 UTC (permalink / raw)
  To: Mika Westerberg, AceLan Kao
  Cc: Andreas Noever, Mika Westerberg, Yehezkel Bernat, linux-usb,
	linux-kernel, Gil Fine, Sanath.S, Kovacs, Alexander

On 2/12/26 6:34 PM, Mario Limonciello wrote:
> ++
> 
> On 2/12/2026 1:34 AM, Mika Westerberg wrote:
>> [+Cc AMD folks]
>>
>> On Thu, Feb 12, 2026 at 08:07:54AM +0100, Mika Westerberg wrote:
>>> On Thu, Feb 12, 2026 at 12:16:03PM +0800, AceLan Kao wrote:
>>>>>> Disable runpm on 62:02.0, then we have 83:00.0 and its downstream 
>>>>>> port
>>>>>> 84:01.0 and 85:00.0, and then the tbt storage is recognized.
>>>> Got troubles with mutt, my reply got rejected :(
>>>>
>>>> Using gmail and copy/paste the content below again.
>>>
>>> Okay.
>>>
>>>>> Okay that means there is nothing wrong with the PCIe tunnel itself 
>>>>> it's
>>>>> just that the PCIe side either does not get the PME or does not see 
>>>>> that
>>>>> the PCIe link becomes active (e.g the PCIe Downstream Port runtime 
>>>>> suspends
>>>>> itself before the link status changes).
>>>>>
>>>>> PME work so that there is wake first on Intel it's GPE that wakes 
>>>>> up the
>>>>> root port and then PCIe stack wakes up devices and then the PME 
>>>>> message is
>>>>> sent to the root complex.
>>>>>
>>>>> If you do this on Intel host do you see the same?
>>>> Intel host exhibits another symptom, I think the root cause is 
>>>> different.
>>>>
>>>> Plug in the dock, and then plug in the tbt storage to the dock one by
>>>> one, both storage can be detected.
>>>>
>>>> Plug both tbt storage to the dock, and then plug in the dock to the
>>>> machine, only one tbt storage appears. In rare chance, both tbt
>>>> storages show up, but most of the time, only one tbt storage is 
>>>> detected.
>>>> In this case, none of disable runpm, rescan, or lspci work. So, it's
>>>> most likely another issue.
>>>
>>> By "detected" you mean the TB device is not detected on TB bus? Or it is
>>> detected on TB bus but creating PCIe tunnel does not make the content
>>> visible on PCIe bus?
>>>
>>> You can check this from dmesg, the driver logs if it sees the plug 
>>> event.
>>> Or run tblist (from tbtools) and see if the device is listed.
>>>
>>> I suspect former and in that case it is likely a PD/retimer related 
>>> issue
>>> rather than software. I see these once in a while especially with new
>>> hardware where the PD firmare is not yet stabilized. If there is TB/USB4
>>> link then all is working from TB/USB4 perspective.
>>>
>>>>> Right at that point the PCIe Downstream Port probably is already back
>>>>> runtime suspended.
>>>>>
>>>>> Here you could try this:
>>>>>
>>>>> # echo 250 > /sys/bus/pci/devices/0000:62:02.0/power/autosuspend_delay
>>>> No luck, I enlarged the number to 1000, but still can't recognize the
>>>> second tbt storage.
>>>
>>> What about -1?
>>>
>>> That's effectively same as blocking runtime PM completely so should 
>>> work.
>>>
>>>> I tried to wake up the PCIe ports in the beginning of 
>>>> tb_tunnel_pci() and
>>>> it works.
>>>>
>>>> +       pdev = pci_get_domain_bus_and_slot(0, 0x62, PCI_DEVFN(0x02, 
>>>> 0));
>>>> +       if (pdev) {
>>>> +               if (pdev->dev.power.runtime_status == RPM_SUSPENDED)
>>>> +                       pm_runtime_get_sync(&pdev->dev);
>>>> +               pci_dev_put(pdev);
>>>> +       }
>>>>
>>>> But I can't find a generic way to get the bus and slot number, and
>>>> would you consider this a feasible approach?
>>>
>>> No I don't want any (more) PCI related hacks in the driver.
>>>
>>> This is not a TB issue, it's a PCIe issue. I suspect it has something 
>>> to do
>>> with handling PME/GPE on AMD side. Essentially when runtime PM is 
>>> blocked
>>> the PCIe hotplug driver notices the tunnel just fine. When it is runtime
>>> suspended (e.g D3) it should send PME to the root complex that the 
>>> brings
>>> the topology up so that the hotplug driver can detect the presence 
>>> but this
>>> does not seem to happen.
>>>
>>> If you enable dynamic debugging of pciehp, do you see anything happening
>>> when you create the second PCIe tunnel? I suspect not.
> 
>  From what I see above I agree this does seem like a PME delivery issue 
> of some sort.  Any chance you can put this on a PCIe analyzer and 
> confirm whether the PME was ever sent?

We could reproduce the behavior on our side using a USB4 hub and two 
NVME disks.  Keeping the root port for tunneling at D0 (turning off 
runtime PM) avoided it.

In cross referencing an Intel system (Intel Core Ultra 200V) we saw that 
it couldn't reproduce.  But looking at differences we noticed the root 
port for tunneling on this system stayed at D0 the entire time.  Is that 
expected?



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-02-18 19:21                                 ` Mario Limonciello
@ 2026-02-18 21:35                                   ` Mario Limonciello
  2026-02-23  5:55                                     ` Mika Westerberg
  0 siblings, 1 reply; 26+ messages in thread
From: Mario Limonciello @ 2026-02-18 21:35 UTC (permalink / raw)
  To: Mika Westerberg, AceLan Kao
  Cc: Andreas Noever, Mika Westerberg, Yehezkel Bernat, linux-usb,
	linux-kernel, Gil Fine, Sanath.S, Kovacs, Alexander

On 2/18/26 1:21 PM, Mario Limonciello wrote:
> On 2/12/26 6:34 PM, Mario Limonciello wrote:
>> ++
>>
>> On 2/12/2026 1:34 AM, Mika Westerberg wrote:
>>> [+Cc AMD folks]
>>>
>>> On Thu, Feb 12, 2026 at 08:07:54AM +0100, Mika Westerberg wrote:
>>>> On Thu, Feb 12, 2026 at 12:16:03PM +0800, AceLan Kao wrote:
>>>>>>> Disable runpm on 62:02.0, then we have 83:00.0 and its downstream 
>>>>>>> port
>>>>>>> 84:01.0 and 85:00.0, and then the tbt storage is recognized.
>>>>> Got troubles with mutt, my reply got rejected :(
>>>>>
>>>>> Using gmail and copy/paste the content below again.
>>>>
>>>> Okay.
>>>>
>>>>>> Okay that means there is nothing wrong with the PCIe tunnel itself 
>>>>>> it's
>>>>>> just that the PCIe side either does not get the PME or does not 
>>>>>> see that
>>>>>> the PCIe link becomes active (e.g the PCIe Downstream Port runtime 
>>>>>> suspends
>>>>>> itself before the link status changes).
>>>>>>
>>>>>> PME work so that there is wake first on Intel it's GPE that wakes 
>>>>>> up the
>>>>>> root port and then PCIe stack wakes up devices and then the PME 
>>>>>> message is
>>>>>> sent to the root complex.
>>>>>>
>>>>>> If you do this on Intel host do you see the same?
>>>>> Intel host exhibits another symptom, I think the root cause is 
>>>>> different.
>>>>>
>>>>> Plug in the dock, and then plug in the tbt storage to the dock one by
>>>>> one, both storage can be detected.
>>>>>
>>>>> Plug both tbt storage to the dock, and then plug in the dock to the
>>>>> machine, only one tbt storage appears. In rare chance, both tbt
>>>>> storages show up, but most of the time, only one tbt storage is 
>>>>> detected.
>>>>> In this case, none of disable runpm, rescan, or lspci work. So, it's
>>>>> most likely another issue.
>>>>
>>>> By "detected" you mean the TB device is not detected on TB bus? Or 
>>>> it is
>>>> detected on TB bus but creating PCIe tunnel does not make the content
>>>> visible on PCIe bus?
>>>>
>>>> You can check this from dmesg, the driver logs if it sees the plug 
>>>> event.
>>>> Or run tblist (from tbtools) and see if the device is listed.
>>>>
>>>> I suspect former and in that case it is likely a PD/retimer related 
>>>> issue
>>>> rather than software. I see these once in a while especially with new
>>>> hardware where the PD firmare is not yet stabilized. If there is TB/ 
>>>> USB4
>>>> link then all is working from TB/USB4 perspective.
>>>>
>>>>>> Right at that point the PCIe Downstream Port probably is already back
>>>>>> runtime suspended.
>>>>>>
>>>>>> Here you could try this:
>>>>>>
>>>>>> # echo 250 > /sys/bus/pci/devices/0000:62:02.0/power/ 
>>>>>> autosuspend_delay
>>>>> No luck, I enlarged the number to 1000, but still can't recognize the
>>>>> second tbt storage.
>>>>
>>>> What about -1?
>>>>
>>>> That's effectively same as blocking runtime PM completely so should 
>>>> work.
>>>>
>>>>> I tried to wake up the PCIe ports in the beginning of 
>>>>> tb_tunnel_pci() and
>>>>> it works.
>>>>>
>>>>> +       pdev = pci_get_domain_bus_and_slot(0, 0x62, PCI_DEVFN(0x02, 
>>>>> 0));
>>>>> +       if (pdev) {
>>>>> +               if (pdev->dev.power.runtime_status == RPM_SUSPENDED)
>>>>> +                       pm_runtime_get_sync(&pdev->dev);
>>>>> +               pci_dev_put(pdev);
>>>>> +       }
>>>>>
>>>>> But I can't find a generic way to get the bus and slot number, and
>>>>> would you consider this a feasible approach?
>>>>
>>>> No I don't want any (more) PCI related hacks in the driver.
>>>>
>>>> This is not a TB issue, it's a PCIe issue. I suspect it has 
>>>> something to do
>>>> with handling PME/GPE on AMD side. Essentially when runtime PM is 
>>>> blocked
>>>> the PCIe hotplug driver notices the tunnel just fine. When it is 
>>>> runtime
>>>> suspended (e.g D3) it should send PME to the root complex that the 
>>>> brings
>>>> the topology up so that the hotplug driver can detect the presence 
>>>> but this
>>>> does not seem to happen.
>>>>
>>>> If you enable dynamic debugging of pciehp, do you see anything 
>>>> happening
>>>> when you create the second PCIe tunnel? I suspect not.
>>
>>  From what I see above I agree this does seem like a PME delivery 
>> issue of some sort.  Any chance you can put this on a PCIe analyzer 
>> and confirm whether the PME was ever sent?
> 
> We could reproduce the behavior on our side using a USB4 hub and two 
> NVME disks.  Keeping the root port for tunneling at D0 (turning off 
> runtime PM) avoided it.
> 
> In cross referencing an Intel system (Intel Core Ultra 200V) we saw that 
> it couldn't reproduce.  But looking at differences we noticed the root 
> port for tunneling on this system stayed at D0 the entire time.  Is that 
> expected?
> 
> 

Actually in further re-testing that was an incorrect observation.  Our 
internal team will keep digging into what's going on with the PME in the 
AMD case.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
  2026-02-18 21:35                                   ` Mario Limonciello
@ 2026-02-23  5:55                                     ` Mika Westerberg
  0 siblings, 0 replies; 26+ messages in thread
From: Mika Westerberg @ 2026-02-23  5:55 UTC (permalink / raw)
  To: Mario Limonciello
  Cc: AceLan Kao, Andreas Noever, Mika Westerberg, Yehezkel Bernat,
	linux-usb, linux-kernel, Gil Fine, Sanath.S, Kovacs, Alexander

On Wed, Feb 18, 2026 at 03:35:36PM -0600, Mario Limonciello wrote:
> On 2/18/26 1:21 PM, Mario Limonciello wrote:
> > On 2/12/26 6:34 PM, Mario Limonciello wrote:
> > > ++
> > > 
> > > On 2/12/2026 1:34 AM, Mika Westerberg wrote:
> > > > [+Cc AMD folks]
> > > > 
> > > > On Thu, Feb 12, 2026 at 08:07:54AM +0100, Mika Westerberg wrote:
> > > > > On Thu, Feb 12, 2026 at 12:16:03PM +0800, AceLan Kao wrote:
> > > > > > > > Disable runpm on 62:02.0, then we have 83:00.0
> > > > > > > > and its downstream port
> > > > > > > > 84:01.0 and 85:00.0, and then the tbt storage is recognized.
> > > > > > Got troubles with mutt, my reply got rejected :(
> > > > > > 
> > > > > > Using gmail and copy/paste the content below again.
> > > > > 
> > > > > Okay.
> > > > > 
> > > > > > > Okay that means there is nothing wrong with the PCIe
> > > > > > > tunnel itself it's
> > > > > > > just that the PCIe side either does not get the PME
> > > > > > > or does not see that
> > > > > > > the PCIe link becomes active (e.g the PCIe
> > > > > > > Downstream Port runtime suspends
> > > > > > > itself before the link status changes).
> > > > > > > 
> > > > > > > PME work so that there is wake first on Intel it's
> > > > > > > GPE that wakes up the
> > > > > > > root port and then PCIe stack wakes up devices and
> > > > > > > then the PME message is
> > > > > > > sent to the root complex.
> > > > > > > 
> > > > > > > If you do this on Intel host do you see the same?
> > > > > > Intel host exhibits another symptom, I think the root
> > > > > > cause is different.
> > > > > > 
> > > > > > Plug in the dock, and then plug in the tbt storage to the dock one by
> > > > > > one, both storage can be detected.
> > > > > > 
> > > > > > Plug both tbt storage to the dock, and then plug in the dock to the
> > > > > > machine, only one tbt storage appears. In rare chance, both tbt
> > > > > > storages show up, but most of the time, only one tbt
> > > > > > storage is detected.
> > > > > > In this case, none of disable runpm, rescan, or lspci work. So, it's
> > > > > > most likely another issue.
> > > > > 
> > > > > By "detected" you mean the TB device is not detected on TB
> > > > > bus? Or it is
> > > > > detected on TB bus but creating PCIe tunnel does not make the content
> > > > > visible on PCIe bus?
> > > > > 
> > > > > You can check this from dmesg, the driver logs if it sees
> > > > > the plug event.
> > > > > Or run tblist (from tbtools) and see if the device is listed.
> > > > > 
> > > > > I suspect former and in that case it is likely a PD/retimer
> > > > > related issue
> > > > > rather than software. I see these once in a while especially with new
> > > > > hardware where the PD firmare is not yet stabilized. If
> > > > > there is TB/ USB4
> > > > > link then all is working from TB/USB4 perspective.
> > > > > 
> > > > > > > Right at that point the PCIe Downstream Port probably is already back
> > > > > > > runtime suspended.
> > > > > > > 
> > > > > > > Here you could try this:
> > > > > > > 
> > > > > > > # echo 250 >
> > > > > > > /sys/bus/pci/devices/0000:62:02.0/power/
> > > > > > > autosuspend_delay
> > > > > > No luck, I enlarged the number to 1000, but still can't recognize the
> > > > > > second tbt storage.
> > > > > 
> > > > > What about -1?
> > > > > 
> > > > > That's effectively same as blocking runtime PM completely so
> > > > > should work.
> > > > > 
> > > > > > I tried to wake up the PCIe ports in the beginning of
> > > > > > tb_tunnel_pci() and
> > > > > > it works.
> > > > > > 
> > > > > > +       pdev = pci_get_domain_bus_and_slot(0, 0x62,
> > > > > > PCI_DEVFN(0x02, 0));
> > > > > > +       if (pdev) {
> > > > > > +               if (pdev->dev.power.runtime_status == RPM_SUSPENDED)
> > > > > > +                       pm_runtime_get_sync(&pdev->dev);
> > > > > > +               pci_dev_put(pdev);
> > > > > > +       }
> > > > > > 
> > > > > > But I can't find a generic way to get the bus and slot number, and
> > > > > > would you consider this a feasible approach?
> > > > > 
> > > > > No I don't want any (more) PCI related hacks in the driver.
> > > > > 
> > > > > This is not a TB issue, it's a PCIe issue. I suspect it has
> > > > > something to do
> > > > > with handling PME/GPE on AMD side. Essentially when runtime
> > > > > PM is blocked
> > > > > the PCIe hotplug driver notices the tunnel just fine. When
> > > > > it is runtime
> > > > > suspended (e.g D3) it should send PME to the root complex
> > > > > that the brings
> > > > > the topology up so that the hotplug driver can detect the
> > > > > presence but this
> > > > > does not seem to happen.
> > > > > 
> > > > > If you enable dynamic debugging of pciehp, do you see
> > > > > anything happening
> > > > > when you create the second PCIe tunnel? I suspect not.
> > > 
> > >  From what I see above I agree this does seem like a PME delivery
> > > issue of some sort.  Any chance you can put this on a PCIe analyzer
> > > and confirm whether the PME was ever sent?
> > 
> > We could reproduce the behavior on our side using a USB4 hub and two
> > NVME disks.  Keeping the root port for tunneling at D0 (turning off
> > runtime PM) avoided it.
> > 
> > In cross referencing an Intel system (Intel Core Ultra 200V) we saw that
> > it couldn't reproduce.  But looking at differences we noticed the root
> > port for tunneling on this system stayed at D0 the entire time.  Is that
> > expected?
> > 
> > 
> 
> Actually in further re-testing that was an incorrect observation.  Our
> internal team will keep digging into what's going on with the PME in the AMD
> case.

Okay thanks for the update!

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2026-02-23  5:55 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-21  5:27 [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan Chia-Lin Kao (AceLan)
2026-01-21  5:35 ` AceLan Kao
2026-01-21  6:01 ` Mika Westerberg
2026-01-23  2:04   ` Chia-Lin Kao (AceLan)
2026-01-23 12:01     ` Mika Westerberg
     [not found]       ` <aXbTfLUJ-lEfNzgX@acelan-Precision-5480>
2026-01-26  5:42         ` Mika Westerberg
     [not found]           ` <aXcWNw9Qfo5L9WVi@acelan-Precision-5480>
2026-01-26 11:56             ` Mika Westerberg
     [not found]               ` <aXg1eBudRAaCZpmR@acelan-Precision-5480>
2026-01-27  8:45                 ` Mika Westerberg
2026-01-27 10:17                   ` Mika Westerberg
2026-01-29  5:45                     ` Chia-Lin Kao (AceLan)
2026-01-29  6:50                       ` Mika Westerberg
2026-02-03  9:04                         ` Jayi Li
2026-02-03  9:39                           ` Mika Westerberg
2026-02-03 10:00                             ` Jayi Li
2026-02-03 10:07                               ` Mika Westerberg
2026-02-04  2:37                                 ` Jayi Li
2026-02-04 12:37                                   ` Mika Westerberg
2026-02-06  1:17                                     ` Jayi Li
2026-02-09  1:30                                     ` Jayi Li
2026-02-12  4:16                         ` AceLan Kao
2026-02-12  7:07                           ` Mika Westerberg
2026-02-12  7:34                             ` Mika Westerberg
2026-02-13  0:34                               ` Mario Limonciello
2026-02-18 19:21                                 ` Mario Limonciello
2026-02-18 21:35                                   ` Mario Limonciello
2026-02-23  5:55                                     ` Mika Westerberg

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox