From: Keith Busch <kbusch@meta.com>
To: <linux-pci@vger.kernel.org>, <lukas@wunner.de>, <bhelgaas@google.com>
Cc: Keith Busch <kbusch@kernel.org>
Subject: [PATCH 1/2] PCI: pciehp: fix concurrent sub-tree removal deadlock
Date: Wed, 12 Jun 2024 11:10:23 -0700 [thread overview]
Message-ID: <20240612181024.3577119-2-kbusch@meta.com> (raw)
In-Reply-To: <20240612181024.3577119-1-kbusch@meta.com>
From: Keith Busch <kbusch@kernel.org>
PCIe hotplug events modify the topology in their IRQ thread once it can
acquire the global pci_rescan_remove_lock.
If a different removal event happens to acquire that lock first, and
that removal event is for the parent device of the bridge processing the
other hotplug event, then we are deadlocked: the parent removal will
wait indefinitely on the child's IRQ thread because the parent is
holding the global lock the child thread needs to make forward progress.
Introduce a new locking function that aborts if the device is being
removed. The following are stack traces of the deadlock:
Task A:
pciehp_unconfigure_device+0x41/0x120
pciehp_disable_slot+0x3c/0xc0
pciehp_handle_presence_or_link_change+0x28f/0x3e0
pciehp_ist+0xc3/0x210
irq_thread_fn+0x19/0x40
Task B:
__synchronize_irq+0x5b/0x90
free_irq+0x192/0x2e0
pcie_shutdown_notification+0x3b/0x40
pciehp_remove+0x23/0x50
pcie_port_remove_service+0x2c/0x40
device_release_driver_internal+0x11f/0x180
bus_remove_device+0xc5/0x110
device_del+0x126/0x340
device_unregister+0x13/0x50
remove_iter+0x17/0x20
device_for_each_child+0x4a/0x70
pcie_portdrv_remove+0x23/0x40
pci_device_remove+0x24/0x60
device_release_driver_internal+0x11f/0x180
pci_stop_bus_device+0x57/0x80
pci_stop_bus_device+0x2c/0x80
pci_stop_and_remove_bus_device+0xe/0x20
pciehp_unconfigure_device+0x76/0x120
pciehp_disable_slot+0x3c/0xc0
pciehp_handle_presence_or_link_change+0x28f/0x3e0
pciehp_ist+0xc3/0x210
irq_thread_fn+0x19/0x40
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
drivers/pci/hotplug/pciehp_pci.c | 12 +++++++++---
drivers/pci/pci.h | 1 +
drivers/pci/probe.c | 24 ++++++++++++++++++++++++
include/linux/pci.h | 2 ++
4 files changed, 36 insertions(+), 3 deletions(-)
diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c
index ad12515a4a121..ca6237b0732c8 100644
--- a/drivers/pci/hotplug/pciehp_pci.c
+++ b/drivers/pci/hotplug/pciehp_pci.c
@@ -34,9 +34,12 @@ int pciehp_configure_device(struct controller *ctrl)
struct pci_dev *dev;
struct pci_dev *bridge = ctrl->pcie->port;
struct pci_bus *parent = bridge->subordinate;
- int num, ret = 0;
+ int num, ret;
- pci_lock_rescan_remove();
+ ret = pci_trylock_rescan_remove(bridge);
+ if (!ret)
+ return -ENODEV;
+ ret = 0;
dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
if (dev) {
@@ -93,6 +96,7 @@ void pciehp_unconfigure_device(struct controller *ctrl, bool presence)
struct pci_dev *dev, *temp;
struct pci_bus *parent = ctrl->pcie->port->subordinate;
u16 command;
+ int ret;
ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:00\n",
__func__, pci_domain_nr(parent), parent->number);
@@ -100,7 +104,9 @@ void pciehp_unconfigure_device(struct controller *ctrl, bool presence)
if (!presence)
pci_walk_bus(parent, pci_dev_set_disconnected, NULL);
- pci_lock_rescan_remove();
+ ret = pci_trylock_rescan_remove(parent->self);
+ if (!ret)
+ return;
/*
* Stopping an SR-IOV PF device removes all the associated VFs,
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index fd44565c47562..f525490a02122 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -370,6 +370,7 @@ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused)
{
pci_dev_set_io_state(dev, pci_channel_io_perm_failure);
pci_doe_disconnected(dev);
+ pci_notify_disconnected();
return 0;
}
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 5fbabb4e3425f..d2e19a1d1a45b 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -3302,6 +3302,7 @@ EXPORT_SYMBOL_GPL(pci_rescan_bus);
* routines should always be executed under this mutex.
*/
static DEFINE_MUTEX(pci_rescan_remove_lock);
+static DECLARE_WAIT_QUEUE_HEAD(pci_lock_wq);
void pci_lock_rescan_remove(void)
{
@@ -3309,12 +3310,35 @@ void pci_lock_rescan_remove(void)
}
EXPORT_SYMBOL_GPL(pci_lock_rescan_remove);
+/*
+ * pci_trylock_rescan_remove() - keep trying to take the lock until successful
+ * or notified the device is disconnected
+ *
+ * Returns 1 if the lock was successfully taken, 0 otherwise.
+ */
+bool pci_trylock_rescan_remove(struct pci_dev *dev)
+{
+ int ret;
+
+ wait_event(pci_lock_wq,
+ (ret = mutex_trylock(&pci_rescan_remove_lock)) == 1 ||
+ pci_dev_is_disconnected(dev));
+
+ return ret;
+}
+
void pci_unlock_rescan_remove(void)
{
mutex_unlock(&pci_rescan_remove_lock);
+ wake_up_all(&pci_lock_wq);
}
EXPORT_SYMBOL_GPL(pci_unlock_rescan_remove);
+void pci_notify_disconnected(void)
+{
+ wake_up_all(&pci_lock_wq);
+}
+
static int __init pci_sort_bf_cmp(const struct device *d_a,
const struct device *d_b)
{
diff --git a/include/linux/pci.h b/include/linux/pci.h
index cafc5ab1cbcb4..b05aaf9aac6c8 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1442,7 +1442,9 @@ void set_pcie_hotplug_bridge(struct pci_dev *pdev);
unsigned int pci_rescan_bus_bridge_resize(struct pci_dev *bridge);
unsigned int pci_rescan_bus(struct pci_bus *bus);
void pci_lock_rescan_remove(void);
+bool pci_trylock_rescan_remove(struct pci_dev *dev);
void pci_unlock_rescan_remove(void);
+void pci_notify_disconnected(void);
/* Vital Product Data routines */
ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
--
2.43.0
next prev parent reply other threads:[~2024-06-12 18:10 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-12 18:10 [PATCH 0/2] pcie hotplug and error fixes Keith Busch
2024-06-12 18:10 ` Keith Busch [this message]
2024-06-12 18:10 ` [PATCH 2/2] PCI: err: ensure stable topology during handling Keith Busch
2024-06-12 18:11 ` [PATCH 0/2] pcie hotplug and error fixes Keith Busch
-- strict thread matches above, loose matches on Subject: below --
2024-06-10 22:03 Keith Busch
2024-06-10 22:03 ` [PATCH 1/2] PCI: pciehp: fix concurrent sub-tree removal deadlock Keith Busch
2024-06-11 6:19 ` kernel test robot
2024-06-11 6:19 ` kernel test robot
2024-06-11 14:33 ` Keith Busch
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240612181024.3577119-2-kbusch@meta.com \
--to=kbusch@meta.com \
--cc=bhelgaas@google.com \
--cc=kbusch@kernel.org \
--cc=linux-pci@vger.kernel.org \
--cc=lukas@wunner.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox