Linux PCI subsystem development
 help / color / mirror / Atom feed
From: Keith Busch <kbusch@meta.com>
To: <linux-pci@vger.kernel.org>, <lukas@wunner.de>, <helgaas@kernel.org>
Cc: Keith Busch <kbusch@kernel.org>
Subject: [PATCH RESEND 1/1] PCI: pciehp: fix concurrent sub-tree removal deadlock
Date: Thu, 16 Oct 2025 12:30:49 -0700	[thread overview]
Message-ID: <20251016193049.881212-2-kbusch@meta.com> (raw)
In-Reply-To: <20251016193049.881212-1-kbusch@meta.com>

From: Keith Busch <kbusch@kernel.org>

PCIe hotplug events modify the topology in their IRQ thread once it can
acquire the global pci_rescan_remove_lock.

If a different removal event happens to acquire that lock first, and
that removal event is for the parent device of the bridge processing the
other hotplug event, then we are deadlocked: the parent removal will
wait indefinitely on the child's IRQ thread because the parent is
holding the global lock the child thread needs to make forward progress.

Introduce a new locking function that aborts if the device is being
removed. The following are stack traces of the deadlock:

Task A:

  pciehp_unconfigure_device+0x41/0x120
  pciehp_disable_slot+0x3c/0xc0
  pciehp_handle_presence_or_link_change+0x28f/0x3e0
  pciehp_ist+0xc3/0x210
  irq_thread_fn+0x19/0x40

Task B:

  __synchronize_irq+0x5b/0x90
  free_irq+0x192/0x2e0
  pcie_shutdown_notification+0x3b/0x40
  pciehp_remove+0x23/0x50
  pcie_port_remove_service+0x2c/0x40
  device_release_driver_internal+0x11f/0x180
  bus_remove_device+0xc5/0x110
  device_del+0x126/0x340
  device_unregister+0x13/0x50
  remove_iter+0x17/0x20
  device_for_each_child+0x4a/0x70
  pcie_portdrv_remove+0x23/0x40
  pci_device_remove+0x24/0x60
  device_release_driver_internal+0x11f/0x180
  pci_stop_bus_device+0x57/0x80
  pci_stop_bus_device+0x2c/0x80
  pci_stop_and_remove_bus_device+0xe/0x20
  pciehp_unconfigure_device+0x76/0x120
  pciehp_disable_slot+0x3c/0xc0
  pciehp_handle_presence_or_link_change+0x28f/0x3e0
  pciehp_ist+0xc3/0x210
  irq_thread_fn+0x19/0x40

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/pci/hotplug/pciehp_pci.c | 12 +++++++++---
 drivers/pci/pci.h                |  1 +
 drivers/pci/probe.c              | 25 +++++++++++++++++++++++++
 include/linux/pci.h              |  2 ++
 4 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c
index 65e50bee1a8c0..8ad4f8a931d0b 100644
--- a/drivers/pci/hotplug/pciehp_pci.c
+++ b/drivers/pci/hotplug/pciehp_pci.c
@@ -34,9 +34,12 @@ int pciehp_configure_device(struct controller *ctrl)
 	struct pci_dev *dev;
 	struct pci_dev *bridge = ctrl->pcie->port;
 	struct pci_bus *parent = bridge->subordinate;
-	int num, ret = 0;
+	int num, ret;
 
-	pci_lock_rescan_remove();
+	ret = pci_trylock_rescan_remove(bridge);
+	if (!ret)
+		return -ENODEV;
+	ret = 0;
 
 	dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
 	if (dev) {
@@ -97,6 +100,7 @@ void pciehp_unconfigure_device(struct controller *ctrl, bool presence)
 	struct pci_dev *dev, *temp;
 	struct pci_bus *parent = ctrl->pcie->port->subordinate;
 	u16 command;
+	int ret;
 
 	ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:00\n",
 		 __func__, pci_domain_nr(parent), parent->number);
@@ -104,7 +108,9 @@ void pciehp_unconfigure_device(struct controller *ctrl, bool presence)
 	if (!presence)
 		pci_walk_bus(parent, pci_dev_set_disconnected, NULL);
 
-	pci_lock_rescan_remove();
+	ret = pci_trylock_rescan_remove(parent->self);
+	if (!ret)
+		return;
 
 	/*
 	 * Stopping an SR-IOV PF device removes all the associated VFs,
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 4492b809094b5..0b2f1f5f99ac7 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -650,6 +650,7 @@ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused)
 {
 	pci_dev_set_io_state(dev, pci_channel_io_perm_failure);
 	pci_doe_disconnected(dev);
+	pci_notify_disconnected();
 
 	return 0;
 }
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index c83e75a0ec126..5086e23eb84d9 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -3515,12 +3515,37 @@ void pci_lock_rescan_remove(void)
 }
 EXPORT_SYMBOL_GPL(pci_lock_rescan_remove);
 
+static DECLARE_WAIT_QUEUE_HEAD(pci_lock_wq);
+
+/*
+ * pci_trylock_rescan_remove() - keep trying to take the lock until successful
+ *				 or notified the device is disconnected
+ *
+ * Returns 1 if the lock was successfully taken, 0 otherwise.
+ */
+bool pci_trylock_rescan_remove(struct pci_dev *dev)
+{
+	int ret;
+
+	wait_event(pci_lock_wq,
+		   (ret = mutex_trylock(&pci_rescan_remove_lock)) == 1 ||
+		   pci_dev_is_disconnected(dev));
+
+	return ret;
+}
+
 void pci_unlock_rescan_remove(void)
 {
 	mutex_unlock(&pci_rescan_remove_lock);
+	wake_up_all(&pci_lock_wq);
 }
 EXPORT_SYMBOL_GPL(pci_unlock_rescan_remove);
 
+void pci_notify_disconnected(void)
+{
+	wake_up_all(&pci_lock_wq);
+}
+
 static int __init pci_sort_bf_cmp(const struct device *d_a,
 				  const struct device *d_b)
 {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index d1fdf81fbe1e4..059d99c31204f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1478,7 +1478,9 @@ void set_pcie_hotplug_bridge(struct pci_dev *pdev);
 /* Functions for PCI Hotplug drivers to use */
 unsigned int pci_rescan_bus(struct pci_bus *bus);
 void pci_lock_rescan_remove(void);
+bool pci_trylock_rescan_remove(struct pci_dev *dev);
 void pci_unlock_rescan_remove(void);
+void pci_notify_disconnected(void);
 
 /* Vital Product Data routines */
 ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
-- 
2.47.3


  reply	other threads:[~2025-10-16 19:31 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-16 19:30 [PATCH RESEND 0/1] pcie hotplug deadlock fix Keith Busch
2025-10-16 19:30 ` Keith Busch [this message]
2025-10-17 10:06   ` [PATCH RESEND 1/1] PCI: pciehp: fix concurrent sub-tree removal deadlock kernel test robot
2025-10-17 10:06   ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251016193049.881212-2-kbusch@meta.com \
    --to=kbusch@meta.com \
    --cc=helgaas@kernel.org \
    --cc=kbusch@kernel.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=lukas@wunner.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox