From: Bert Karwatzki <spasswolf@web.de>
To: linux-kernel@vger.kernel.org
Cc: "Bert Karwatzki" <spasswolf@web.de>,
linux-next@vger.kernel.org, linux-stable@vger.kernel.org,
regressions@lists.linux.dev, linux-pci@vger.kernel.org,
linux-acpi@vger.kernel.org,
"Mario Limonciello" <superm1@kernel.org>,
"Christian König" <christian.koenig@amd.com>,
"Rafael J . Wysocki" <rafael.j.wysocki@intel.com>
Subject: [REGRESSION 03/04] Crash during resume of pcie bridge
Date: Mon, 6 Oct 2025 14:09:42 +0200 [thread overview]
Message-ID: <20251006120944.7880-4-spasswolf@web.de> (raw)
In-Reply-To: <20251006120944.7880-1-spasswolf@web.de>
In order to get a working crash I removed some of the monitoring again:
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index bc365c0dbe2f..a984ccd4a2a0 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -514,60 +514,46 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
switch (type) {
case ACPI_NOTIFY_BUS_CHECK:
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_BUS_CHECK\n", __func__, __LINE__);
acpi_handle_debug(handle, "ACPI_NOTIFY_BUS_CHECK event\n");
break;
case ACPI_NOTIFY_DEVICE_CHECK:
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_DEVICE_CHECK\n", __func__, __LINE__);
acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK event\n");
break;
case ACPI_NOTIFY_DEVICE_WAKE:
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_DEVICE_WAKE\n", __func__, __LINE__);
acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_WAKE event\n");
return;
case ACPI_NOTIFY_EJECT_REQUEST:
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_EJECT_REQUEST\n", __func__, __LINE__);
acpi_handle_debug(handle, "ACPI_NOTIFY_EJECT_REQUEST event\n");
break;
case ACPI_NOTIFY_DEVICE_CHECK_LIGHT:
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_DEVICE_CHECK_LIGHT\n", __func__, __LINE__);
acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK_LIGHT event\n");
/* TBD: Exactly what does 'light' mean? */
return;
case ACPI_NOTIFY_FREQUENCY_MISMATCH:
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_FREQUENCY_MISMATCH\n", __func__, __LINE__);
acpi_handle_err(handle, "Device cannot be configured due "
"to a frequency mismatch\n");
return;
case ACPI_NOTIFY_BUS_MODE_MISMATCH:
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_BUS_MODE_MISMATCH\n", __func__, __LINE__);
acpi_handle_err(handle, "Device cannot be configured due "
"to a bus mode mismatch\n");
return;
case ACPI_NOTIFY_POWER_FAULT:
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_POWER_FAULT\n", __func__, __LINE__);
acpi_handle_err(handle, "Device has suffered a power fault\n");
return;
default:
- printk(KERN_INFO "%s %d: acpi unknown event type\n", __func__, __LINE__);
acpi_handle_debug(handle, "Unknown event type 0x%x\n", type);
return;
}
adev = acpi_get_acpi_dev(handle);
- if (adev)
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
- else
- printk(KERN_INFO "%s %d: adev = NULL\n", __func__, __LINE__);
-
if (adev && ACPI_SUCCESS(acpi_hotplug_schedule(adev, type)))
return;
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 9a7dc432b50d..4e0583274b8f 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -539,7 +539,6 @@ static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used)
if (!adev)
return;
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
mutex_lock(&acpi_pm_notifier_lock);
if (adev->wakeup.flags.notifier_present) {
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 0f6a16856119..5ff343096ece 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1167,7 +1167,6 @@ void acpi_os_wait_events_complete(void)
* Make sure the GPE handler or the fixed event handler is not used
* on another CPU after removal.
*/
- printk(KERN_INFO "%s %d\n", __func__, __LINE__);
if (acpi_sci_irq_valid())
synchronize_hardirq(acpi_sci_irq);
flush_workqueue(kacpid_wq);
@@ -1185,7 +1184,6 @@ static void acpi_hotplug_work_fn(struct work_struct *work)
{
struct acpi_hp_work *hpw = container_of(work, struct acpi_hp_work, work);
- printk(KERN_INFO "%s %d\n", __func__, __LINE__);
acpi_os_wait_events_complete();
acpi_device_hotplug(hpw->adev, hpw->src);
kfree(hpw);
@@ -1194,7 +1192,6 @@ static void acpi_hotplug_work_fn(struct work_struct *work)
acpi_status acpi_hotplug_schedule(struct acpi_device *adev, u32 src)
{
struct acpi_hp_work *hpw;
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
acpi_handle_debug(adev->handle,
"Scheduling hotplug event %u for deferred handling\n",
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index d53be7e0388d..065abe56f440 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -251,7 +251,6 @@ static int acpi_scan_check_and_detach(struct acpi_device *adev, void *p)
{
struct acpi_scan_handler *handler = adev->handler;
uintptr_t flags = (uintptr_t)p;
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
acpi_dev_for_each_child_reverse(adev, acpi_scan_check_and_detach, p);
@@ -315,7 +314,6 @@ static void acpi_scan_check_subtree(struct acpi_device *adev)
{
uintptr_t flags = ACPI_SCAN_CHECK_FLAG_STATUS;
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
acpi_scan_check_and_detach(adev, (void *)flags);
}
@@ -371,7 +369,6 @@ static int acpi_scan_rescan_bus(struct acpi_device *adev)
{
struct acpi_scan_handler *handler = adev->handler;
int ret;
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
if (handler && handler->hotplug.scan_dependent)
ret = handler->hotplug.scan_dependent(adev);
@@ -388,7 +385,6 @@ static int acpi_scan_device_check(struct acpi_device *adev)
{
struct acpi_device *parent;
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
acpi_scan_check_subtree(adev);
if (!acpi_device_is_present(adev))
@@ -416,24 +412,19 @@ static int acpi_scan_device_check(struct acpi_device *adev)
static int acpi_scan_bus_check(struct acpi_device *adev)
{
acpi_scan_check_subtree(adev);
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
return acpi_scan_rescan_bus(adev);
}
static int acpi_generic_hotplug_event(struct acpi_device *adev, u32 type)
{
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
switch (type) {
case ACPI_NOTIFY_BUS_CHECK:
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
return acpi_scan_bus_check(adev);
case ACPI_NOTIFY_DEVICE_CHECK:
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
return acpi_scan_device_check(adev);
case ACPI_NOTIFY_EJECT_REQUEST:
case ACPI_OST_EC_OSPM_EJECT:
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
if (adev->handler && !adev->handler->hotplug.enabled) {
dev_info(&adev->dev, "Eject disabled\n");
return -EPERM;
@@ -450,7 +441,6 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src)
u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
int error = -ENODEV;
- dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
lock_device_hotplug();
mutex_lock(&acpi_scan_lock);
@@ -476,10 +466,9 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src)
* There may be additional notify handlers for device objects
* without the .event() callback, so ignore them here.
*/
- if (notify) {
- dev_info(&adev->dev, "%s %d: calling notify = %px\n", __func__, __LINE__, (void *) notify);
+ if (notify)
error = notify(adev, src);
- } else
+ else
goto out;
}
switch (error) {
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 895898c3cd56..27cce7f1b1d3 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -142,6 +142,8 @@ EXPORT_SYMBOL_GPL(pm_runtime_suspended_time);
*/
static void pm_runtime_deactivate_timer(struct device *dev)
{
+ if (!strcmp(dev_name(dev), "0000:00:01.1"))
+ dev_info(dev, "%s %d\n", __func__, __LINE__);
if (dev->power.timer_expires > 0) {
hrtimer_try_to_cancel(&dev->power.suspend_timer);
dev->power.timer_expires = 0;
@@ -787,8 +789,6 @@ static int rpm_resume(struct device *dev, int rpmflags)
struct device *parent = NULL;
int retval = 0;
- if (!strcmp(dev_name(dev), "0000:00:01.1"))
- dev_info(dev, "%s %d\n", __func__, __LINE__);
trace_rpm_resume(dev, rpmflags);
repeat:
@@ -815,7 +815,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
* future.
*/
if (!strcmp(dev_name(dev), "0000:00:01.1"))
- dev_info(dev, "%s %d\n", __func__, __LINE__);
+ dev_info(dev, "%s %d dev = %px\n", __func__, __LINE__, dev);
dev->power.request = RPM_REQ_NONE;
if (!dev->power.timer_autosuspends)
pm_runtime_deactivate_timer(dev);
@@ -1231,22 +1231,16 @@ int __pm_runtime_resume(struct device *dev, int rpmflags)
{
unsigned long flags;
int retval;
- if (!strcmp(dev_name(dev), "0000:00:01.1"))
- dev_info(dev, "%s %d\n", __func__, __LINE__);
might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe &&
dev->power.runtime_status != RPM_ACTIVE);
- if (!strcmp(dev_name(dev), "0000:00:01.1"))
- dev_info(dev, "%s %d\n", __func__, __LINE__);
if (rpmflags & RPM_GET_PUT)
atomic_inc(&dev->power.usage_count);
- if (!strcmp(dev_name(dev), "0000:00:01.1"))
- dev_info(dev, "%s %d\n", __func__, __LINE__);
spin_lock_irqsave(&dev->power.lock, flags);
if (!strcmp(dev_name(dev), "0000:00:01.1"))
- dev_info(dev, "%s %d\n", __func__, __LINE__);
+ dev_info(dev, "%s %d dev = %px\n", __func__, __LINE__, dev);
retval = rpm_resume(dev, rpmflags);
spin_unlock_irqrestore(&dev->power.lock, flags);
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index e56ab308da20..e21255b97251 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -484,7 +484,6 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
struct pci_dev *dev;
struct pci_bus *bus = slot->bus;
struct acpiphp_func *func;
- printk(KERN_INFO "%s %d\n", __func__, __LINE__);
if (bridge && bus->self && hotplug_is_native(bus->self)) {
/*
@@ -495,14 +494,11 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
* as a Thunderbolt host controller.
*/
for_each_pci_bridge(dev, bus) {
- dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
if (PCI_SLOT(dev->devfn) == slot->device) {
- dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
acpiphp_native_scan_bridge(dev);
}
}
} else {
- printk(KERN_INFO "%s %d\n", __func__, __LINE__);
LIST_HEAD(add_list);
int max, pass;
@@ -510,15 +506,12 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
max = acpiphp_max_busnr(bus);
for (pass = 0; pass < 2; pass++) {
for_each_pci_bridge(dev, bus) {
- dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
if (PCI_SLOT(dev->devfn) != slot->device) {
- printk(KERN_INFO "%s %d\n", __func__, __LINE__);
continue;
}
max = pci_scan_bridge(bus, dev, max, pass);
if (pass && dev->subordinate) {
- dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
check_hotplug_bridge(slot, dev);
pcibios_resource_survey_bus(dev->subordinate);
__pci_bus_size_bridges(dev->subordinate,
@@ -535,7 +528,6 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
list_for_each_entry(dev, &bus->devices, bus_list) {
/* Assume that newly added devices are powered on already. */
- dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
if (!pci_dev_is_added(dev))
dev->current_state = PCI_D0;
}
@@ -554,7 +546,6 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
}
pci_dev_put(dev);
}
- printk(KERN_INFO "%s %d\n", __func__, __LINE__);
}
/**
@@ -823,7 +814,6 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
switch (type) {
case ACPI_NOTIFY_BUS_CHECK:
/* bus re-enumerate */
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_BUS_CHECK\n", __func__, __LINE__);
acpi_handle_debug(handle, "Bus check in %s()\n", __func__);
if (bridge)
acpiphp_check_bridge(bridge);
@@ -834,7 +824,6 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
case ACPI_NOTIFY_DEVICE_CHECK:
/* device check */
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_DEVICE_CHECK\n", __func__, __LINE__);
acpi_handle_debug(handle, "Device check in %s()\n", __func__);
if (bridge) {
acpiphp_check_bridge(bridge);
@@ -850,23 +839,19 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
case ACPI_NOTIFY_EJECT_REQUEST:
/* request device eject */
- printk(KERN_INFO "%s %d: ACPI_NOTIFY_EJECT_REQUEST\n", __func__, __LINE__);
acpi_handle_debug(handle, "Eject request in %s()\n", __func__);
acpiphp_disable_and_eject_slot(slot);
break;
}
pci_unlock_rescan_remove();
- printk(KERN_INFO "%s %d:\n", __func__, __LINE__);
if (bridge)
put_bridge(bridge);
- printk(KERN_INFO "%s %d:\n", __func__, __LINE__);
}
static int acpiphp_hotplug_notify(struct acpi_device *adev, u32 type)
{
struct acpiphp_context *context;
- dev_info(&adev->dev, "%s %d: %s = %px\n", __func__, __LINE__, __func__, (void *) acpiphp_hotplug_notify);
context = acpiphp_grab_context(adev);
if (!context)
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 0888b0d5ec73..d88d6b6ccf5b 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -508,8 +508,6 @@ static inline int pm_runtime_get(struct device *dev)
*/
static inline int pm_runtime_get_sync(struct device *dev)
{
- if (!strcmp(dev_name(dev), "0000:00:01.1"))
- dev_info(dev, "%s\n", __func__);
return __pm_runtime_resume(dev, RPM_GET_PUT);
}
This is the message from 6.17.0-rc6-next-20250917-gpudebug-00028-gf99cf81b1da7 crashing,
captured via netconsole:
2025-10-06T04:52:35.932429+02:00 [T248]evmisc-0132 ev_queue_notify_reques: Dispatching Notify on [GPP0] (Device) Value 0x00 (Bus Check) Node 0000000069c9623b
2025-10-06T04:52:35.932429+02:00 [T177395]pcieport 0000:00:01.1: acpiphp_check_bridge 708#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [T177395]pcieport 0000:00:01.1: __pm_runtime_resume 1243 dev = ffff97c001c930c8#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: rpm_resume 818 dev = ffff97c001c930c8#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: pm_runtime_deactivate_timer 146#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: rpm_resume 930#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: rpm_resume 959#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: rpm_resume 818 dev = ffff97c001c930c8#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: pm_runtime_deactivate_timer 146#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
2025-10-06T04:52:35.932429+02:00 [177395]pcieport 0000:00:01.1: rpm_resume 965#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
So the crash seems to happen in this part of rpm_resume():
[...]
skip_parent:
if (!strcmp(dev_name(dev), "0000:00:01.1"))
dev_info(dev, "%s %d\n", __func__, __LINE__); // this is the last reported line
if (dev->power.no_callbacks)
goto no_callback; /* Assume success. */
__update_runtime_status(dev, RPM_RESUMING);
callback = RPM_GET_CALLBACK(dev, runtime_resume);
dev_pm_disable_wake_irq_check(dev, false);
retval = rpm_callback(callback, dev);
if (retval) {
if (!strcmp(dev_name(dev), "0000:00:01.1"))
dev_info(dev, "%s %d\n", __func__, __LINE__);
__update_runtime_status(dev, RPM_SUSPENDED);
pm_runtime_cancel_pending(dev);
dev_pm_enable_wake_irq_check(dev, false);
} else {
no_callback:
if (!strcmp(dev_name(dev), "0000:00:01.1"))
dev_info(dev, "%s %d\n", __func__, __LINE__);
[...]
Bert Karwatzki
next prev parent reply other threads:[~2025-10-06 12:10 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-06 12:09 [REGRESSION 00/04] Crash during resume of pcie bridge Bert Karwatzki
2025-10-06 12:09 ` [REGRESSION 01/04] " Bert Karwatzki
2025-10-06 12:09 ` [REGRESSION 02/04] " Bert Karwatzki
2025-10-06 12:09 ` Bert Karwatzki [this message]
2025-10-06 12:09 ` [REGRESSION 04/04] " Bert Karwatzki
2025-10-06 12:39 ` [REGRESSION 00/04] " Christian König
2025-10-06 16:22 ` Bert Karwatzki
2025-10-07 6:50 ` Bert Karwatzki
2025-10-07 21:33 ` Mario Limonciello
2025-10-13 16:29 ` Bert Karwatzki
2025-10-13 18:51 ` Mario Limonciello
2025-10-14 10:50 ` Christian König
2025-10-27 9:57 ` Bert Karwatzki
2025-10-31 13:38 ` Bert Karwatzki
2025-10-31 13:47 ` Bert Karwatzki
2025-10-31 18:35 ` Bert Karwatzki
2025-11-05 11:44 ` Bert Karwatzki
2025-11-05 21:31 ` Mario Limonciello (AMD) (kernel.org)
2025-11-07 13:09 ` Bert Karwatzki
2025-11-07 17:09 ` Bert Karwatzki
2025-11-10 13:33 ` Christian König
2025-11-16 21:08 ` Crash during resume of pcie bridge due to infinite loop in ACPICA Bert Karwatzki
2025-11-17 16:40 ` Rafael J. Wysocki
2025-11-24 22:34 ` Bert Karwatzki
2025-11-25 19:46 ` Rafael J. Wysocki
2025-11-27 0:08 ` Bert Karwatzki
2025-11-27 13:02 ` Rafael J. Wysocki
2025-11-28 20:47 ` Bert Karwatzki
2025-12-02 18:59 ` Rafael J. Wysocki
2025-12-02 19:53 ` Bert Karwatzki
2025-12-02 20:01 ` Rafael J. Wysocki
2025-12-05 10:05 ` Crash during resume of pcie bridge due to incorrect error handling Bert Karwatzki
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251006120944.7880-4-spasswolf@web.de \
--to=spasswolf@web.de \
--cc=christian.koenig@amd.com \
--cc=linux-acpi@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-next@vger.kernel.org \
--cc=linux-pci@vger.kernel.org \
--cc=linux-stable@vger.kernel.org \
--cc=rafael.j.wysocki@intel.com \
--cc=regressions@lists.linux.dev \
--cc=superm1@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.