public inbox for linux-pci@vger.kernel.org
 help / color / mirror / Atom feed
From: Keith Busch <kbusch@meta.com>
To: <linux-pci@vger.kernel.org>, <helgaas@kernel.org>
Cc: <alex@shazbot.org>, <lukas@wunner.de>, <dan.j.williams@intel.com>,
	<guojinhui.liam@bytedance.com>, <ilpo.jarvinen@linux.intel.com>,
	Keith Busch <kbusch@kernel.org>
Subject: [PATCHv3 3/4] pci: remove slot specific lock/unlock and save/restore
Date: Thu, 5 Feb 2026 13:25:32 -0800	[thread overview]
Message-ID: <20260205212533.1512153-4-kbusch@meta.com> (raw)
In-Reply-To: <20260205212533.1512153-1-kbusch@meta.com>

From: Keith Busch <kbusch@kernel.org>

The Linux pci driver resolves a "slot" to the "D" in the B:D.f (see
PCI_SLOT()). A pcie "slot reset" is a secondary bus reset, which affects
every function on every "D", not just the ones with a matching "slot".
The slot lock/unlock and save/restore functions, however, are only
handling a subset of the functions, breaking the rest.

ARI devices with more than 8 functions fail because their state is not
properly handled, nor is the attached driver notified of the reset. In
the best case, the device will appear unresponsive to the driver,
resulting in unexpected errors. A worse possibility may panic the kernel
if in flight transactions trigger hardware reported errors like this
real observation:

  vfio-pci 0000:01:00.0: resetting
  vfio-pci 0000:01:00.0: reset done
  {1}[Hardware Error]:  Error 1, type: fatal
  {1}[Hardware Error]:   section_type: PCIe error
  {1}[Hardware Error]:   port_type: 0, PCIe end point
  {1}[Hardware Error]:   version: 0.2
  {1}[Hardware Error]:   command: 0x0140, status: 0x0010
  {1}[Hardware Error]:   device_id: 0000:01:01.0
  {1}[Hardware Error]:   slot: 0
  {1}[Hardware Error]:   secondary_bus: 0x00
  {1}[Hardware Error]:   vendor_id: 0x1d9b, device_id: 0x0207
  {1}[Hardware Error]:   class_code: 020000
  {1}[Hardware Error]:   bridge: secondary_status: 0x0000, control: 0x0000
  {1}[Hardware Error]:   aer_cor_status: 0x00008000, aer_cor_mask: 0x00002000
  {1}[Hardware Error]:   aer_uncor_status: 0x00010000, aer_uncor_mask: 0x00100000
  {1}[Hardware Error]:   aer_uncor_severity: 0x006f6030
  {1}[Hardware Error]:   TLP Header: 0a412800 00192080 60000004 00000004
  GHES: Fatal hardware error but panic disabled
  Kernel panic - not syncing: GHES: Fatal hardware error

Fix this by properly locking and notifying the entire affected bus
topology, not just specific matching slots. For architectures that
support "slot" specific resets, this patch potentially introduces an
insignificant amount of overhead, but is otherwise harmless.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/pci/pci.c | 147 ++++------------------------------------------
 1 file changed, 11 insertions(+), 136 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index e00af20ea7376..df9ed73dad416 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5287,96 +5287,6 @@ static int pci_bus_trylock(struct pci_bus *bus)
 	return 0;
 }
 
-/* Do any devices on or below this slot prevent a bus reset? */
-static bool pci_slot_resettable(struct pci_slot *slot)
-{
-	struct pci_dev *dev, *bridge = slot->bus->self;
-
-	if (bridge && (bridge->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET))
-		return false;
-
-	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
-		if (!dev->slot || dev->slot != slot)
-			continue;
-		if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET ||
-		    (dev->subordinate && !pci_bus_resettable(dev->subordinate)))
-			return false;
-	}
-
-	return true;
-}
-
-/* Lock devices from the top of the tree down */
-static void pci_slot_lock(struct pci_slot *slot)
-{
-	struct pci_dev *dev, *bridge = slot->bus->self;
-
-	if (bridge)
-		pci_dev_lock(bridge);
-
-	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
-		if (!dev->slot || dev->slot != slot)
-			continue;
-		if (dev->subordinate)
-			pci_bus_lock(dev->subordinate);
-		else
-			pci_dev_lock(dev);
-	}
-}
-
-/* Unlock devices from the bottom of the tree up */
-static void pci_slot_unlock(struct pci_slot *slot)
-{
-	struct pci_dev *dev, *bridge = slot->bus->self;
-
-	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
-		if (!dev->slot || dev->slot != slot)
-			continue;
-		if (dev->subordinate)
-			pci_bus_unlock(dev->subordinate);
-		else
-			pci_dev_unlock(dev);
-	}
-
-	if (bridge)
-		pci_dev_unlock(bridge);
-}
-
-/* Return 1 on successful lock, 0 on contention */
-static int pci_slot_trylock(struct pci_slot *slot)
-{
-	struct pci_dev *dev, *bridge = slot->bus->self;
-
-	if (bridge && !pci_dev_trylock(bridge))
-		return 0;
-
-	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
-		if (!dev->slot || dev->slot != slot)
-			continue;
-		if (dev->subordinate) {
-			if (!pci_bus_trylock(dev->subordinate))
-				goto unlock;
-		} else if (!pci_dev_trylock(dev))
-			goto unlock;
-	}
-	return 1;
-
-unlock:
-	list_for_each_entry_continue_reverse(dev,
-					     &slot->bus->devices, bus_list) {
-		if (!dev->slot || dev->slot != slot)
-			continue;
-		if (dev->subordinate)
-			pci_bus_unlock(dev->subordinate);
-		else
-			pci_dev_unlock(dev);
-	}
-
-	if (bridge)
-		pci_dev_unlock(bridge);
-	return 0;
-}
-
 /*
  * Save and disable devices from the top of the tree down while holding
  * the @dev mutex lock for the entire tree.
@@ -5410,59 +5320,23 @@ static void pci_bus_restore_locked(struct pci_bus *bus)
 	}
 }
 
-/*
- * Save and disable devices from the top of the tree down while holding
- * the @dev mutex lock for the entire tree.
- */
-static void pci_slot_save_and_disable_locked(struct pci_slot *slot)
-{
-	struct pci_dev *dev;
-
-	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
-		if (!dev->slot || dev->slot != slot)
-			continue;
-		pci_dev_save_and_disable(dev);
-		if (dev->subordinate)
-			pci_bus_save_and_disable_locked(dev->subordinate);
-	}
-}
-
-/*
- * Restore devices from top of the tree down while holding @dev mutex lock
- * for the entire tree.  Parent bridges need to be restored before we can
- * get to subordinate devices.
- */
-static void pci_slot_restore_locked(struct pci_slot *slot)
-{
-	struct pci_dev *dev;
-
-	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
-		if (!dev->slot || dev->slot != slot)
-			continue;
-		pci_dev_restore(dev);
-		if (dev->subordinate) {
-			pci_bridge_wait_for_secondary_bus(dev, "slot reset");
-			pci_bus_restore_locked(dev->subordinate);
-		}
-	}
-}
-
 static int pci_slot_reset(struct pci_slot *slot, bool probe)
 {
+	struct pci_bus *bus = slot ? slot->bus : NULL;
 	int rc;
 
-	if (!slot || !pci_slot_resettable(slot))
+	if (!slot || !bus || !pci_bus_resettable(bus))
 		return -ENOTTY;
 
 	if (!probe)
-		pci_slot_lock(slot);
+		pci_bus_lock(bus);
 
 	might_sleep();
 
 	rc = pci_reset_hotplug_slot(slot->hotplug, probe);
 
 	if (!probe)
-		pci_slot_unlock(slot);
+		pci_bus_unlock(bus);
 
 	return rc;
 }
@@ -5489,25 +5363,26 @@ EXPORT_SYMBOL_GPL(pci_probe_reset_slot);
  * wrap the bus reset to avoid spurious slot related events such as hotplug.
  * Generally a slot reset should be attempted before a bus reset.  All of the
  * function of the slot and any subordinate buses behind the slot are reset
- * through this function.  PCI config space of all devices in the slot and
- * behind the slot is saved before and restored after reset.
+ * through this function.  PCI config space of all devices below the slot bus
+ * are saved before and restored after reset.
  *
  * Same as above except return -EAGAIN if the slot cannot be locked
  */
 static int pci_try_reset_slot(struct pci_slot *slot)
 {
+	struct pci_bus *bus = slot->bus;
 	int rc;
 
 	rc = pci_slot_reset(slot, PCI_RESET_PROBE);
 	if (rc)
 		return rc;
 
-	if (pci_slot_trylock(slot)) {
-		pci_slot_save_and_disable_locked(slot);
+	if (pci_bus_trylock(bus)) {
+		pci_bus_save_and_disable_locked(bus);
 		might_sleep();
 		rc = pci_reset_hotplug_slot(slot->hotplug, PCI_RESET_DO_RESET);
-		pci_slot_restore_locked(slot);
-		pci_slot_unlock(slot);
+		pci_bus_restore_locked(bus);
+		pci_bus_unlock(bus);
 	} else
 		rc = -EAGAIN;
 
-- 
2.47.3


  parent reply	other threads:[~2026-02-05 21:26 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-05 21:25 [PATCHv3 0/4] pci slot reset handling fixes Keith Busch
2026-02-05 21:25 ` [PATCHv3 1/4] pci: rename __pci_bus_reset and __pci_slot_reset Keith Busch
2026-02-06 17:22   ` Keith Busch
2026-02-10 20:44   ` dan.j.williams
2026-02-05 21:25 ` [PATCHv3 2/4] pci: allow all bus devices to use the same slot Keith Busch
2026-02-10 20:00   ` dan.j.williams
2026-02-10 20:28     ` Keith Busch
2026-02-10 20:51       ` dan.j.williams
2026-02-05 21:25 ` Keith Busch [this message]
2026-02-10 22:03   ` [PATCHv3 3/4] pci: remove slot specific lock/unlock and save/restore dan.j.williams
2026-02-10 23:25     ` Keith Busch
2026-02-10 23:48       ` dan.j.williams
2026-02-10 23:46   ` Alex Williamson
2026-02-11  0:12     ` Keith Busch
2026-02-11 15:22       ` Alex Williamson
2026-02-11 15:54         ` Keith Busch
2026-02-05 21:25 ` [PATCHv3 4/4] pci: make reset_subordinate hotplug safe Keith Busch
2026-02-10 22:14   ` dan.j.williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260205212533.1512153-4-kbusch@meta.com \
    --to=kbusch@meta.com \
    --cc=alex@shazbot.org \
    --cc=dan.j.williams@intel.com \
    --cc=guojinhui.liam@bytedance.com \
    --cc=helgaas@kernel.org \
    --cc=ilpo.jarvinen@linux.intel.com \
    --cc=kbusch@kernel.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=lukas@wunner.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox