Linux USB
 help / color / mirror / Atom feed
* [RFT PATCH 0/3] xhci: avoid futile stop endpoint command and host teardown
@ 2026-06-29 12:30 Mathias Nyman
  2026-06-29 12:30 ` [RFT PATCH 1/3] xhci: include all root port children in recovery prevention on link error Mathias Nyman
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Mathias Nyman @ 2026-06-29 12:30 UTC (permalink / raw)
  To: linux-usb; +Cc: raoxu, michal.pecio, Mathias Nyman

This series is an attempt to avoid stop endpoint command failure
leading to xhci teardown with messages:

xHCI host not responding to stop endpoint command
xHCI host controller not responding, assume dead

Series is based on a discussion with Michal and Xu Rao:

https://lore.kernel.org/linux-usb/D9BA02889D046D23+20260617100957.2888108-1-raoxu@uniontech.com/

A Renesas xHC host with a device behind a disconnected hub failed to
complete a stop endoint command immediately after a endpoint soft
reset and restart. Michal had some plausible theories why stop endpoint
command issued immediately after endpoint restart fails after disconnect.

Many of these cases can be avoided as xhci driver knows the rootport
link state and can avoid futile transfer retry and recovery on link
error or disconnect

I don't have a easy way to reproduce this myself, all testing is
welcome

Thanks
Mathias

Mathias Nyman (3):
  xhci: include all root port children in recovery prevention on link
    error
  xhci: prevent endpoint recovery after roothub disconnect
  xhci: avoid xHC endpoint changes after disconnect or link error.

 drivers/usb/host/xhci-ring.c | 50 +++++++++++++++++++++++++-----------
 drivers/usb/host/xhci.c      |  9 ++++---
 drivers/usb/host/xhci.h      | 11 +++-----
 3 files changed, 43 insertions(+), 27 deletions(-)

-- 
2.43.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [RFT PATCH 1/3] xhci: include all root port children in recovery prevention on link error
  2026-06-29 12:30 [RFT PATCH 0/3] xhci: avoid futile stop endpoint command and host teardown Mathias Nyman
@ 2026-06-29 12:30 ` Mathias Nyman
  2026-06-29 12:30 ` [RFT PATCH 2/3] xhci: prevent endpoint recovery after roothub disconnect Mathias Nyman
  2026-06-29 12:30 ` [RFT PATCH 3/3] xhci: avoid xHC endpoint changes after disconnect or link error Mathias Nyman
  2 siblings, 0 replies; 4+ messages in thread
From: Mathias Nyman @ 2026-06-29 12:30 UTC (permalink / raw)
  To: linux-usb; +Cc: raoxu, michal.pecio, Mathias Nyman

Driver already prevents useless transfer retry and endpoint recovery
for devices directly connected to a root port with link errors.

These devices are either disconnecting or will be reset. Link is gone.

Move the flag indicating link error from the xhci device structure to
the root port strucure, allowing all child devices behind hubs to easily
check for root port link errors, avoiding useless transfer retries and
endpoint recovery.

This extends the previous endpoint recovery prevention in
commit b8c3b718087b ("usb: xhci: Don't try to recover an endpoint if port is in error state.")
Only root port link errors can be detected early by xhci driver,
not link errors between external hubs and their children.

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
---
 drivers/usb/host/xhci-ring.c | 27 +++++++++++++++------------
 drivers/usb/host/xhci.c      |  4 +---
 drivers/usb/host/xhci.h      |  9 +--------
 3 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index e47e644b296e..020e924c1ced 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -993,7 +993,7 @@ static int xhci_handle_halted_endpoint(struct xhci_hcd *xhci,
 	 * Avoid resetting endpoint if link is inactive. Can cause host hang.
 	 * Device will be reset soon to recover the link so don't do anything
 	 */
-	if (ep->vdev->flags & VDEV_PORT_ERROR)
+	if (ep->vdev->rhub_port->link_inactive)
 		return -ENODEV;
 
 	/* add td to cancelled list and let reset ep handler take care of it */
@@ -1992,13 +1992,15 @@ static void xhci_cavium_reset_phy_quirk(struct xhci_hcd *xhci)
 static void handle_port_status(struct xhci_hcd *xhci, union xhci_trb *event)
 {
 	struct xhci_virt_device *vdev = NULL;
-	struct usb_hcd *hcd;
-	u32 port_id;
-	u32 portsc, cmd_reg;
-	unsigned int hcd_portnum;
 	struct xhci_bus_state *bus_state;
-	bool bogus_port_status = false;
 	struct xhci_port *port;
+	struct usb_hcd *hcd;
+	bool bogus_port_status = false;
+	unsigned int hcd_portnum;
+	u32 cmd_reg;
+	u32 port_id;
+	u32 portsc;
+	u32 pls;
 
 	/* Port status change events always have a successful completion code */
 	if (GET_COMP_CODE(le32_to_cpu(event->generic.field[2])) != COMP_SUCCESS)
@@ -2035,6 +2037,7 @@ static void handle_port_status(struct xhci_hcd *xhci, union xhci_trb *event)
 	bus_state = &port->rhub->bus_state;
 	hcd_portnum = port->hcd_portnum;
 	portsc = xhci_portsc_readl(port);
+	pls = portsc & PORT_PLS_MASK;
 
 	xhci_dbg(xhci, "Port change event, %d-%d, id %d, portsc: 0x%x\n",
 		 hcd->self.busnum, hcd_portnum + 1, port_id, portsc);
@@ -2046,12 +2049,12 @@ static void handle_port_status(struct xhci_hcd *xhci, union xhci_trb *event)
 		usb_hcd_resume_root_hub(hcd);
 	}
 
-	if (vdev && (portsc & PORT_PLS_MASK) == XDEV_INACTIVE) {
-		if (!(portsc & PORT_RESET))
-			vdev->flags |= VDEV_PORT_ERROR;
-	} else if (vdev && portsc & PORT_RC) {
-		vdev->flags &= ~VDEV_PORT_ERROR;
-	}
+	/*
+	 * Tag broken links to avoid retries while hub driver sorts it out.
+	 * Link status is not relible while port is in reset.
+	 */
+	if (!(portsc & PORT_RESET))
+		port->link_inactive = (pls == XDEV_INACTIVE);
 
 	if ((portsc & PORT_PLC) && (portsc & PORT_PLS_MASK) == XDEV_RESUME) {
 		xhci_dbg(xhci, "port resume event for port %d\n", port_id);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index a54f5b57f205..cc92b316d877 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1667,7 +1667,7 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
 		goto free_priv;
 	}
 
-	if (xhci->devs[slot_id]->flags & VDEV_PORT_ERROR) {
+	if (xhci->devs[slot_id]->rhub_port->link_inactive) {
 		xhci_dbg(xhci, "Can't queue urb, port error, link inactive\n");
 		ret = -ENODEV;
 		goto free_priv;
@@ -4029,7 +4029,6 @@ static int xhci_discover_or_reset_device(struct usb_hcd *hcd,
 				xhci_get_slot_state(xhci, virt_dev->out_ctx));
 		xhci_dbg(xhci, "Not freeing device rings.\n");
 		/* Don't treat this as an error.  May change my mind later. */
-		virt_dev->flags = 0;
 		ret = 0;
 		goto command_cleanup;
 	case COMP_SUCCESS:
@@ -4081,7 +4080,6 @@ static int xhci_discover_or_reset_device(struct usb_hcd *hcd,
 	}
 	/* If necessary, update the number of active TTs on this root port */
 	xhci_update_tt_active_eps(xhci, virt_dev, old_active_eps);
-	virt_dev->flags = 0;
 	ret = 0;
 
 command_cleanup:
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index aeecd301f207..717a7fd60a76 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -750,14 +750,6 @@ struct xhci_virt_device {
 	struct xhci_port		*rhub_port;
 	struct xhci_interval_bw_table	*bw_table;
 	struct xhci_tt_bw_info		*tt_info;
-	/*
-	 * flags for state tracking based on events and issued commands.
-	 * Software can not rely on states from output contexts because of
-	 * latency between events and xHC updating output context values.
-	 * See xhci 1.1 section 4.8.3 for more details
-	 */
-	unsigned long			flags;
-#define VDEV_PORT_ERROR			BIT(0) /* Port error, link inactive */
 
 	/* The current max exit latency for the enabled USB3 link states. */
 	u16				current_mel;
@@ -1478,6 +1470,7 @@ struct xhci_port {
 	int			hcd_portnum;
 	struct xhci_hub		*rhub;
 	struct xhci_port_cap	*port_cap;
+	unsigned int		link_inactive:1;
 	unsigned int		lpm_incapable:1;
 	unsigned long		resume_timestamp;
 	bool			rexit_active;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [RFT PATCH 2/3] xhci: prevent endpoint recovery after roothub disconnect
  2026-06-29 12:30 [RFT PATCH 0/3] xhci: avoid futile stop endpoint command and host teardown Mathias Nyman
  2026-06-29 12:30 ` [RFT PATCH 1/3] xhci: include all root port children in recovery prevention on link error Mathias Nyman
@ 2026-06-29 12:30 ` Mathias Nyman
  2026-06-29 12:30 ` [RFT PATCH 3/3] xhci: avoid xHC endpoint changes after disconnect or link error Mathias Nyman
  2 siblings, 0 replies; 4+ messages in thread
From: Mathias Nyman @ 2026-06-29 12:30 UTC (permalink / raw)
  To: linux-usb; +Cc: raoxu, michal.pecio, Mathias Nyman

Prevent transfer retry and endpoint recovery if the device or its parent
disconnected from the roothub. Just like link error case.

There is a suspicion some xHC controllers may stop processing endpoint
related commands after the last USB device disconnects from the host.

Disconnect often causes transaction errors, xhci driver tries to (soft)
reset and restart the endpoint to recover it.
Hub driver again will cancel all pending URBs once disconnect is detected,
stopping the endpoint right after (soft) reset restarted it.
xHC controller sometimes fail to complete the stop endpoint command,
leading to driver timing out, and tearing down xhci

Prevent extra endpoint (soft) reset after xhci driver is aware of the
parent roothub port disconnect.

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
---
 drivers/usb/host/xhci-ring.c | 12 ++++++++----
 drivers/usb/host/xhci.h      |  1 +
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 020e924c1ced..5e9efd3aa629 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -986,14 +986,16 @@ static int xhci_handle_halted_endpoint(struct xhci_hcd *xhci,
 				struct xhci_td *td,
 				enum xhci_ep_reset_type reset_type)
 {
+	struct xhci_port *rhub_port = ep->vdev->rhub_port;
 	unsigned int slot_id = ep->vdev->slot_id;
 	int err;
 
 	/*
-	 * Avoid resetting endpoint if link is inactive. Can cause host hang.
-	 * Device will be reset soon to recover the link so don't do anything
+	 * Avoid resetting endpoint if link is inactive or device disonnected.
+	 * Can cause host hang.
+	 * Device will be reset to recover an inactive link, so don't do anything
 	 */
-	if (ep->vdev->rhub_port->link_inactive)
+	if (rhub_port->link_inactive || !rhub_port->connected)
 		return -ENODEV;
 
 	/* add td to cancelled list and let reset ep handler take care of it */
@@ -2053,8 +2055,10 @@ static void handle_port_status(struct xhci_hcd *xhci, union xhci_trb *event)
 	 * Tag broken links to avoid retries while hub driver sorts it out.
 	 * Link status is not relible while port is in reset.
 	 */
-	if (!(portsc & PORT_RESET))
+	if (!(portsc & PORT_RESET)) {
 		port->link_inactive = (pls == XDEV_INACTIVE);
+		port->connected = !!(portsc & PORT_CONNECT);
+	}
 
 	if ((portsc & PORT_PLC) && (portsc & PORT_PLS_MASK) == XDEV_RESUME) {
 		xhci_dbg(xhci, "port resume event for port %d\n", port_id);
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 717a7fd60a76..fd5533bdb261 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1471,6 +1471,7 @@ struct xhci_port {
 	struct xhci_hub		*rhub;
 	struct xhci_port_cap	*port_cap;
 	unsigned int		link_inactive:1;
+	unsigned int		connected:1;
 	unsigned int		lpm_incapable:1;
 	unsigned long		resume_timestamp;
 	bool			rexit_active;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [RFT PATCH 3/3] xhci: avoid xHC endpoint changes after disconnect or link error.
  2026-06-29 12:30 [RFT PATCH 0/3] xhci: avoid futile stop endpoint command and host teardown Mathias Nyman
  2026-06-29 12:30 ` [RFT PATCH 1/3] xhci: include all root port children in recovery prevention on link error Mathias Nyman
  2026-06-29 12:30 ` [RFT PATCH 2/3] xhci: prevent endpoint recovery after roothub disconnect Mathias Nyman
@ 2026-06-29 12:30 ` Mathias Nyman
  2 siblings, 0 replies; 4+ messages in thread
From: Mathias Nyman @ 2026-06-29 12:30 UTC (permalink / raw)
  To: linux-usb; +Cc: raoxu, michal.pecio, Mathias Nyman

Avoid all extra endpoint state changes after the roothub link
is lost due to disconnect or link error, and endpoint is known
to be in a non-running state.

Rapid endpoint state changes involving endpoint reset, restart, and
stopping the endpoint have caused xHC failures to complete stop
endpoint command. xhci driver sees this as a fatal flaw and tears
down xhci.

These endpoint state changes are normally part of recovery from
transaction errors or URB cancel.
In this case recovery is not needed.

Add an endpoint state called EP_DROP_PENDING.
Set ep->ep_state |= EP_DROP_PENDING when an endpoint is found in a
halted or stopped non-running state, and the roothub link is
lost. Prevent endpoint from restarting.

URB cancel doesn't need to stop the endpoint if EP_DROP_PENDONG is set.
URBs can be given back directly.
Endpoint is, and will remain stopped until it's dropped.

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
---
 drivers/usb/host/xhci-ring.c | 19 ++++++++++++++++---
 drivers/usb/host/xhci.c      |  5 ++++-
 drivers/usb/host/xhci.h      |  1 +
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 5e9efd3aa629..83947a476f14 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -561,8 +561,8 @@ void xhci_ring_ep_doorbell(struct xhci_hcd *xhci,
 	 * pointer command pending because the device can choose to start any
 	 * stream once the endpoint is on the HW schedule.
 	 */
-	if ((ep_state & EP_STOP_CMD_PENDING) || (ep_state & SET_DEQ_PENDING) ||
-	    (ep_state & EP_HALTED) || (ep_state & EP_CLEARING_TT))
+	if (ep_state & (EP_STOP_CMD_PENDING | SET_DEQ_PENDING | EP_HALTED |
+			EP_CLEARING_TT | EP_DROP_PENDING))
 		return;
 
 	trace_xhci_ring_ep_doorbell(slot_id, DB_VALUE(ep_index, stream_id));
@@ -995,8 +995,10 @@ static int xhci_handle_halted_endpoint(struct xhci_hcd *xhci,
 	 * Can cause host hang.
 	 * Device will be reset to recover an inactive link, so don't do anything
 	 */
-	if (rhub_port->link_inactive || !rhub_port->connected)
+	if (rhub_port->link_inactive || !rhub_port->connected) {
+		ep->ep_state |= EP_DROP_PENDING;
 		return -ENODEV;
+	}
 
 	/* add td to cancelled list and let reset ep handler take care of it */
 	if (reset_type == EP_HARD_RESET) {
@@ -1066,6 +1068,13 @@ static int xhci_invalidate_cancelled_tds(struct xhci_virt_ep *ep)
 				  td->urb, td->urb->stream_id);
 			continue;
 		}
+
+		/* device disconnected or link error, ep will be dropped */
+		if (ep->ep_state & EP_DROP_PENDING) {
+			td->cancel_status = TD_CLEARED;
+			continue;
+		}
+
 		/*
 		 * If a ring stopped on the TD we need to cancel then we have to
 		 * move the xHC endpoint ring dequeue pointer past this TD.
@@ -1296,6 +1305,10 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id,
 		}
 	}
 
+	/* link is inactive or disconnected, ep is not running and shouldn't be restarted */
+	if (ep->vdev->rhub_port->link_inactive || !ep->vdev->rhub_port->connected)
+		ep->ep_state |= EP_DROP_PENDING;
+
 	/* will queue a set TR deq if stopped on a cancelled, uncleared TD */
 	xhci_invalidate_cancelled_tds(ep);
 	ep->ep_state &= ~EP_STOP_CMD_PENDING;
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index cc92b316d877..66e302cdef2f 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1855,7 +1855,7 @@ static int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 	}
 
 	/* In this case no commands are pending but the endpoint is stopped */
-	if (ep->ep_state & EP_CLEARING_TT) {
+	if (ep->ep_state & (EP_CLEARING_TT | EP_DROP_PENDING)) {
 		/* and cancelled TDs can be given back right away */
 		xhci_dbg(xhci, "Invalidating TDs instantly on slot %d ep %d in state 0x%x\n",
 				urb->dev->slot_id, ep_index, ep->ep_state);
@@ -4083,6 +4083,9 @@ static int xhci_discover_or_reset_device(struct usb_hcd *hcd,
 	ret = 0;
 
 command_cleanup:
+	for (i = 0; i < EP_CTX_PER_DEV; i++)
+		virt_dev->eps[i].ep_state &= ~EP_DROP_PENDING;
+
 	xhci_free_command(xhci, reset_device_cmd);
 	return ret;
 }
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index fd5533bdb261..a3293e90857b 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -674,6 +674,7 @@ struct xhci_virt_ep {
 #define EP_SOFT_CLEAR_TOGGLE	BIT(7)
 /* usb_hub_clear_tt_buffer is in progress */
 #define EP_CLEARING_TT		BIT(8)
+#define EP_DROP_PENDING		BIT(9) /* port disconnect or link error, don't restart */
 	/* ----  Related to URB cancellation ---- */
 	struct list_head	cancelled_td_list;
 	struct xhci_hcd		*xhci;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-06-29 12:30 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-29 12:30 [RFT PATCH 0/3] xhci: avoid futile stop endpoint command and host teardown Mathias Nyman
2026-06-29 12:30 ` [RFT PATCH 1/3] xhci: include all root port children in recovery prevention on link error Mathias Nyman
2026-06-29 12:30 ` [RFT PATCH 2/3] xhci: prevent endpoint recovery after roothub disconnect Mathias Nyman
2026-06-29 12:30 ` [RFT PATCH 3/3] xhci: avoid xHC endpoint changes after disconnect or link error Mathias Nyman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox