From: Bert Karwatzki <spasswolf@web.de>
To: linux-kernel@vger.kernel.org
Cc: "Bert Karwatzki" <spasswolf@web.de>,
linux-next@vger.kernel.org, linux-stable@vger.kernel.org,
regressions@lists.linux.dev, linux-pci@vger.kernel.org,
linux-acpi@vger.kernel.org,
"Mario Limonciello" <superm1@kernel.org>,
"Christian König" <christian.koenig@amd.com>,
"Rafael J . Wysocki" <rafael.j.wysocki@intel.com>
Subject: [REGRESSION 03/04] Crash during resume of pcie bridge
Date: Mon, 6 Oct 2025 14:09:42 +0200 [thread overview]
Message-ID: <20251006120944.7880-4-spasswolf@web.de> (raw)
In-Reply-To: <20251006120944.7880-1-spasswolf@web.de>
In order to get a working crash I removed some of the monitoring again:
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index bc365c0dbe2f..a984ccd4a2a0 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -514,60 +514,46 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
switch (type) {
case ACPI_NOTIFY_BUS_CHECK:
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_BUS_CHECK\n", __func__, __LINE__);
acpi_handle_debug(handle, "ACPI_NOTIFY_BUS_CHECK event\n");
break;
case ACPI_NOTIFY_DEVICE_CHECK:
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_DEVICE_CHECK\n", __func__, __LINE__);
acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK event\n");
break;
case ACPI_NOTIFY_DEVICE_WAKE:
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_DEVICE_WAKE\n", __func__, __LINE__);
acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_WAKE event\n");
return;
case ACPI_NOTIFY_EJECT_REQUEST:
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_EJECT_REQUEST\n", __func__, __LINE__);
acpi_handle_debug(handle, "ACPI_NOTIFY_EJECT_REQUEST event\n");
break;
case ACPI_NOTIFY_DEVICE_CHECK_LIGHT:
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_DEVICE_CHECK_LIGHT\n", __func__, __LINE__);
acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK_LIGHT event\n");
/* TBD: Exactly what does 'light' mean? */
return;
case ACPI_NOTIFY_FREQUENCY_MISMATCH:
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_FREQUENCY_MISMATCH\n", __func__, __LINE__);
acpi_handle_err(handle, "Device cannot be configured due "
"to a frequency mismatch\n");
return;
case ACPI_NOTIFY_BUS_MODE_MISMATCH:
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_BUS_MODE_MISMATCH\n", __func__, __LINE__);
acpi_handle_err(handle, "Device cannot be configured due "
"to a bus mode mismatch\n");
return;
case ACPI_NOTIFY_POWER_FAULT:
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_POWER_FAULT\n", __func__, __LINE__);
acpi_handle_err(handle, "Device has suffered a power fault\n");
return;
default:
- printk(KERN_INFO "%s %d: acpi unknown event type\n", __func__, __LINE__);
acpi_handle_debug(handle, "Unknown event type 0x%x\n", type);
return;
}
adev = acpi_get_acpi_dev(handle);
- if (adev)
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
- else
- printk(KERN_INFO "%s %d: adev = NULL\n", __func__, __LINE__);
-
if (adev && ACPI_SUCCESS(acpi_hotplug_schedule(adev, type)))
return;
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 9a7dc432b50d..4e0583274b8f 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -539,7 +539,6 @@ static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used)
if (!adev)
return;
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
mutex_lock(&acpi_pm_notifier_lock);
if (adev->wakeup.flags.notifier_present) {
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 0f6a16856119..5ff343096ece 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1167,7 +1167,6 @@ void acpi_os_wait_events_complete(void)
* Make sure the GPE handler or the fixed event handler is not used
* on another CPU after removal.
*/
- printk(KERN_INFO "%s %d\n", __func__, __LINE__);
if (acpi_sci_irq_valid())
synchronize_hardirq(acpi_sci_irq);
flush_workqueue(kacpid_wq);
@@ -1185,7 +1184,6 @@ static void acpi_hotplug_work_fn(struct work_struct *work)
{
struct acpi_hp_work *hpw = container_of(work, struct acpi_hp_work, work);
- printk(KERN_INFO "%s %d\n", __func__, __LINE__);
acpi_os_wait_events_complete();
acpi_device_hotplug(hpw->adev, hpw->src);
kfree(hpw);
@@ -1194,7 +1192,6 @@ static void acpi_hotplug_work_fn(struct work_struct *work)
acpi_status acpi_hotplug_schedule(struct acpi_device *adev, u32 src)
{
struct acpi_hp_work *hpw;
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
acpi_handle_debug(adev->handle,
"Scheduling hotplug event %u for deferred handling\n",
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index d53be7e0388d..065abe56f440 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -251,7 +251,6 @@ static int acpi_scan_check_and_detach(struct acpi_device *adev, void *p)
{
struct acpi_scan_handler *handler = adev->handler;
uintptr_t flags = (uintptr_t)p;
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
acpi_dev_for_each_child_reverse(adev, acpi_scan_check_and_detach, p);
@@ -315,7 +314,6 @@ static void acpi_scan_check_subtree(struct acpi_device *adev)
{
uintptr_t flags = ACPI_SCAN_CHECK_FLAG_STATUS;
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
acpi_scan_check_and_detach(adev, (void *)flags);
}
@@ -371,7 +369,6 @@ static int acpi_scan_rescan_bus(struct acpi_device *adev)
{
struct acpi_scan_handler *handler = adev->handler;
int ret;
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
if (handler && handler->hotplug.scan_dependent)
ret = handler->hotplug.scan_dependent(adev);
@@ -388,7 +385,6 @@ static int acpi_scan_device_check(struct acpi_device *adev)
{
struct acpi_device *parent;
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
acpi_scan_check_subtree(adev);
if (!acpi_device_is_present(adev))
@@ -416,24 +412,19 @@ static int acpi_scan_device_check(struct acpi_device *adev)
static int acpi_scan_bus_check(struct acpi_device *adev)
{
acpi_scan_check_subtree(adev);
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
return acpi_scan_rescan_bus(adev);
}
static int acpi_generic_hotplug_event(struct acpi_device *adev, u32 type)
{
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
switch (type) {
case ACPI_NOTIFY_BUS_CHECK:
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
return acpi_scan_bus_check(adev);
case ACPI_NOTIFY_DEVICE_CHECK:
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
return acpi_scan_device_check(adev);
case ACPI_NOTIFY_EJECT_REQUEST:
case ACPI_OST_EC_OSPM_EJECT:
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
if (adev->handler && !adev->handler->hotplug.enabled) {
dev_info(&adev->dev, "Eject disabled\n");
return -EPERM;
@@ -450,7 +441,6 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src)
u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
int error = -ENODEV;
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
lock_device_hotplug();
mutex_lock(&acpi_scan_lock);
@@ -476,10 +466,9 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src)
* There may be additional notify handlers for device objects
* without the .event() callback, so ignore them here.
*/
- if (notify) {
- dev_info(&adev->dev, "%s %d: calling notify = %px\n", __func__, __LINE__, (void *) notify);
+ if (notify)
error = notify(adev, src);
- } else
+ else
goto out;
}
switch (error) {
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 895898c3cd56..27cce7f1b1d3 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -142,6 +142,8 @@ EXPORT_SYMBOL_GPL(pm_runtime_suspended_time);
*/
static void pm_runtime_deactivate_timer(struct device *dev)
{
+ if (!strcmp(dev_name(dev), "0000:00:01.1"))
+ dev_info(dev, "%s %d\n", __func__, __LINE__);
if (dev->power.timer_expires > 0) {
hrtimer_try_to_cancel(&dev->power.suspend_timer);
dev->power.timer_expires = 0;
@@ -787,8 +789,6 @@ static int rpm_resume(struct device *dev, int rpmflags)
struct device *parent = NULL;
int retval = 0;
- if (!strcmp(dev_name(dev), "0000:00:01.1"))
- dev_info(dev, "%s %d\n", __func__, __LINE__);
trace_rpm_resume(dev, rpmflags);
repeat:
@@ -815,7 +815,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
* future.
*/
if (!strcmp(dev_name(dev), "0000:00:01.1"))
- dev_info(dev, "%s %d\n", __func__, __LINE__);
+ dev_info(dev, "%s %d dev = %px\n", __func__, __LINE__, dev);
dev->power.request = RPM_REQ_NONE;
if (!dev->power.timer_autosuspends)
pm_runtime_deactivate_timer(dev);
@@ -1231,22 +1231,16 @@ int __pm_runtime_resume(struct device *dev, int rpmflags)
{
unsigned long flags;
int retval;
- if (!strcmp(dev_name(dev), "0000:00:01.1"))
- dev_info(dev, "%s %d\n", __func__, __LINE__);
might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe &&
dev->power.runtime_status != RPM_ACTIVE);
- if (!strcmp(dev_name(dev), "0000:00:01.1"))
- dev_info(dev, "%s %d\n", __func__, __LINE__);
if (rpmflags & RPM_GET_PUT)
atomic_inc(&dev->power.usage_count);
- if (!strcmp(dev_name(dev), "0000:00:01.1"))
- dev_info(dev, "%s %d\n", __func__, __LINE__);
spin_lock_irqsave(&dev->power.lock, flags);
if (!strcmp(dev_name(dev), "0000:00:01.1"))
- dev_info(dev, "%s %d\n", __func__, __LINE__);
+ dev_info(dev, "%s %d dev = %px\n", __func__, __LINE__, dev);
retval = rpm_resume(dev, rpmflags);
spin_unlock_irqrestore(&dev->power.lock, flags);
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index e56ab308da20..e21255b97251 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -484,7 +484,6 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
struct pci_dev *dev;
struct pci_bus *bus = slot->bus;
struct acpiphp_func *func;
- printk(KERN_INFO "%s %d\n", __func__, __LINE__);
if (bridge && bus->self && hotplug_is_native(bus->self)) {
/*
@@ -495,14 +494,11 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
* as a Thunderbolt host controller.
*/
for_each_pci_bridge(dev, bus) {
- dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
if (PCI_SLOT(dev->devfn) == slot->device) {
- dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
acpiphp_native_scan_bridge(dev);
}
}
} else {
- printk(KERN_INFO "%s %d\n", __func__, __LINE__);
LIST_HEAD(add_list);
int max, pass;
@@ -510,15 +506,12 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
max = acpiphp_max_busnr(bus);
for (pass = 0; pass < 2; pass++) {
for_each_pci_bridge(dev, bus) {
- dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
if (PCI_SLOT(dev->devfn) != slot->device) {
- printk(KERN_INFO "%s %d\n", __func__, __LINE__);
continue;
}
max = pci_scan_bridge(bus, dev, max, pass);
if (pass && dev->subordinate) {
- dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
check_hotplug_bridge(slot, dev);
pcibios_resource_survey_bus(dev->subordinate);
__pci_bus_size_bridges(dev->subordinate,
@@ -535,7 +528,6 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
list_for_each_entry(dev, &bus->devices, bus_list) {
/* Assume that newly added devices are powered on already. */
- dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
if (!pci_dev_is_added(dev))
dev->current_state = PCI_D0;
}
@@ -554,7 +546,6 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
}
pci_dev_put(dev);
}
- printk(KERN_INFO "%s %d\n", __func__, __LINE__);
}
/**
@@ -823,7 +814,6 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
switch (type) {
case ACPI_NOTIFY_BUS_CHECK:
/* bus re-enumerate */
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_BUS_CHECK\n", __func__, __LINE__);
acpi_handle_debug(handle, "Bus check in %s()\n", __func__);
if (bridge)
acpiphp_check_bridge(bridge);
@@ -834,7 +824,6 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
case ACPI_NOTIFY_DEVICE_CHECK:
/* device check */
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_DEVICE_CHECK\n", __func__, __LINE__);
acpi_handle_debug(handle, "Device check in %s()\n", __func__);
if (bridge) {
acpiphp_check_bridge(bridge);
@@ -850,23 +839,19 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
case ACPI_NOTIFY_EJECT_REQUEST:
/* request device eject */
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_EJECT_REQUEST\n", __func__, __LINE__);
acpi_handle_debug(handle, "Eject request in %s()\n", __func__);
acpiphp_disable_and_eject_slot(slot);
break;
}
pci_unlock_rescan_remove();
- printk(KERN_INFO "%s %d:\n", __func__, __LINE__);
if (bridge)
put_bridge(bridge);
- printk(KERN_INFO "%s %d:\n", __func__, __LINE__);
}
static int acpiphp_hotplug_notify(struct acpi_device *adev, u32 type)
{
struct acpiphp_context *context;
- dev_info(&adev->dev, "%s %d: %s = %px\n", __func__, __LINE__, __func__, (void *) acpiphp_hotplug_notify);
context = acpiphp_grab_context(adev);
if (!context)
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 0888b0d5ec73..d88d6b6ccf5b 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -508,8 +508,6 @@ static inline int pm_runtime_get(struct device *dev)
*/
static inline int pm_runtime_get_sync(struct device *dev)
{
- if (!strcmp(dev_name(dev), "0000:00:01.1"))
- dev_info(dev, "%s\n", __func__);
return __pm_runtime_resume(dev, RPM_GET_PUT);
}
This is the message from 6.17.0-rc6-next-20250917-gpudebug-00028-gf99cf81b1da7 crashing,
captured via netconsole:
2025-10-06T04:52:35.932429+02:00 [T248]evmisc-0132 ev_queue_notify_reques: Dispatching Notify on [GPP0] (Device) Value 0x00 (Bus Check) Node 0000000069c9623b
2025-10-06T04:52:35.932429+02:00 [T177395]pcieport 0000:00:01.1: acpiphp_check_bridge 708#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [T177395]pcieport 0000:00:01.1: __pm_runtime_resume 1243 dev = ffff97c001c930c8#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: rpm_resume 818 dev = ffff97c001c930c8#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: pm_runtime_deactivate_timer 146#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: rpm_resume 930#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: rpm_resume 959#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: rpm_resume 818 dev = ffff97c001c930c8#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: pm_runtime_deactivate_timer 146#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: rpm_resume 965#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
So the crash seems to happen in this part of rpm_resume():
[...]
skip_parent:
if (!strcmp(dev_name(dev), "0000:00:01.1"))
dev_info(dev, "%s %d\n", __func__, __LINE__); // this is the last reported line
if (dev->power.no_callbacks)
goto no_callback; /* Assume success. */
__update_runtime_status(dev, RPM_RESUMING);
callback = RPM_GET_CALLBACK(dev, runtime_resume);
dev_pm_disable_wake_irq_check(dev, false);
retval = rpm_callback(callback, dev);
if (retval) {
if (!strcmp(dev_name(dev), "0000:00:01.1"))
dev_info(dev, "%s %d\n", __func__, __LINE__);
__update_runtime_status(dev, RPM_SUSPENDED);
pm_runtime_cancel_pending(dev);
dev_pm_enable_wake_irq_check(dev, false);
} else {
no_callback:
if (!strcmp(dev_name(dev), "0000:00:01.1"))
dev_info(dev, "%s %d\n", __func__, __LINE__);
[...]
Bert Karwatzki
next prev parent reply other threads:[~2025-10-06 12:10 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-06 12:09 [REGRESSION 00/04] Crash during resume of pcie bridge Bert Karwatzki
2025-10-06 12:09 ` [REGRESSION 01/04] " Bert Karwatzki
2025-10-06 12:09 ` [REGRESSION 02/04] " Bert Karwatzki
2025-10-06 12:09 ` Bert Karwatzki [this message]
2025-10-06 12:09 ` [REGRESSION 04/04] " Bert Karwatzki
2025-10-06 12:39 ` [REGRESSION 00/04] " Christian König
2025-10-06 16:22 ` Bert Karwatzki
2025-10-07 6:50 ` Bert Karwatzki
2025-10-07 21:33 ` Mario Limonciello
2025-10-13 16:29 ` Bert Karwatzki
2025-10-13 18:51 ` Mario Limonciello
2025-10-14 10:50 ` Christian König
[not found] ` <1853e2af7f70cf726df278137b6d2d89d9d9dc82.camel@web.de>
2025-10-31 13:38 ` Bert Karwatzki
2025-10-31 13:47 ` Bert Karwatzki
2025-10-31 18:35 ` Bert Karwatzki
2025-11-05 11:44 ` Bert Karwatzki
2025-11-05 21:31 ` Mario Limonciello (AMD) (kernel.org)
2025-11-07 13:09 ` Bert Karwatzki
2025-11-07 17:09 ` Bert Karwatzki
2025-11-10 13:33 ` Christian König
2025-11-16 21:08 ` Crash during resume of pcie bridge due to infinite loop in ACPICA Bert Karwatzki
2025-11-17 16:40 ` Rafael J. Wysocki
2025-11-24 22:34 ` Bert Karwatzki
2025-11-25 19:46 ` Rafael J. Wysocki
2025-11-27 0:08 ` Bert Karwatzki
2025-11-27 13:02 ` Rafael J. Wysocki
2025-11-28 20:47 ` Bert Karwatzki
2025-12-02 18:59 ` Rafael J. Wysocki
2025-12-02 19:53 ` Bert Karwatzki
2025-12-02 20:01 ` Rafael J. Wysocki
2025-12-05 10:05 ` Crash during resume of pcie bridge due to incorrect error handling Bert Karwatzki
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251006120944.7880-4-spasswolf@web.de \
--to=spasswolf@web.de \
--cc=christian.koenig@amd.com \
--cc=linux-acpi@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-next@vger.kernel.org \
--cc=linux-pci@vger.kernel.org \
--cc=linux-stable@vger.kernel.org \
--cc=rafael.j.wysocki@intel.com \
--cc=regressions@lists.linux.dev \
--cc=superm1@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).