From: Bert Karwatzki <spasswolf@web.de>
To: linux-kernel@vger.kernel.org
Cc: "Bert Karwatzki" <spasswolf@web.de>,
linux-next@vger.kernel.org, linux-stable@vger.kernel.org,
regressions@lists.linux.dev, linux-pci@vger.kernel.org,
linux-acpi@vger.kernel.org,
"Mario Limonciello" <superm1@kernel.org>,
"Christian König" <christian.koenig@amd.com>,
"Rafael J . Wysocki" <rafael.j.wysocki@intel.com>
Subject: [REGRESSION 01/04] Crash during resume of pcie bridge
Date: Mon, 6 Oct 2025 14:09:40 +0200 [thread overview]
Message-ID: <20251006120944.7880-2-spasswolf@web.de> (raw)
In-Reply-To: <20251006120944.7880-1-spasswolf@web.de>
To further debug the issue I inserted calls to dev_info() and printk() into
the amdgpu suspend/resume code, and the acpi and pcie hotplug resume code.
This is the the patch used in kernel version
6.17.0-rc6-next-20250917-gpudebug-00021-gab98d880e3c8 (see list in previous mail)
(on top of next-20250917)
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index a984ccd4a2a0..bc365c0dbe2f 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -514,46 +514,60 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
switch (type) {
case ACPI_NOTIFY_BUS_CHECK:
+ printk(KERN_INFO "%s %d: ACPI_NOTIFY_BUS_CHECK\n", __func__, __LINE__);
acpi_handle_debug(handle, "ACPI_NOTIFY_BUS_CHECK event\n");
break;
case ACPI_NOTIFY_DEVICE_CHECK:
+ printk(KERN_INFO "%s %d: ACPI_NOTIFY_DEVICE_CHECK\n", __func__, __LINE__);
acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK event\n");
break;
case ACPI_NOTIFY_DEVICE_WAKE:
+ printk(KERN_INFO "%s %d: ACPI_NOTIFY_DEVICE_WAKE\n", __func__, __LINE__);
acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_WAKE event\n");
return;
case ACPI_NOTIFY_EJECT_REQUEST:
+ printk(KERN_INFO "%s %d: ACPI_NOTIFY_EJECT_REQUEST\n", __func__, __LINE__);
acpi_handle_debug(handle, "ACPI_NOTIFY_EJECT_REQUEST event\n");
break;
case ACPI_NOTIFY_DEVICE_CHECK_LIGHT:
+ printk(KERN_INFO "%s %d: ACPI_NOTIFY_DEVICE_CHECK_LIGHT\n", __func__, __LINE__);
acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK_LIGHT event\n");
/* TBD: Exactly what does 'light' mean? */
return;
case ACPI_NOTIFY_FREQUENCY_MISMATCH:
+ printk(KERN_INFO "%s %d: ACPI_NOTIFY_FREQUENCY_MISMATCH\n", __func__, __LINE__);
acpi_handle_err(handle, "Device cannot be configured due "
"to a frequency mismatch\n");
return;
case ACPI_NOTIFY_BUS_MODE_MISMATCH:
+ printk(KERN_INFO "%s %d: ACPI_NOTIFY_BUS_MODE_MISMATCH\n", __func__, __LINE__);
acpi_handle_err(handle, "Device cannot be configured due "
"to a bus mode mismatch\n");
return;
case ACPI_NOTIFY_POWER_FAULT:
+ printk(KERN_INFO "%s %d: ACPI_NOTIFY_POWER_FAULT\n", __func__, __LINE__);
acpi_handle_err(handle, "Device has suffered a power fault\n");
return;
default:
+ printk(KERN_INFO "%s %d: acpi unknown event type\n", __func__, __LINE__);
acpi_handle_debug(handle, "Unknown event type 0x%x\n", type);
return;
}
adev = acpi_get_acpi_dev(handle);
+ if (adev)
+ dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
+ else
+ printk(KERN_INFO "%s %d: adev = NULL\n", __func__, __LINE__);
+
if (adev && ACPI_SUCCESS(acpi_hotplug_schedule(adev, type)))
return;
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 4e0583274b8f..9a7dc432b50d 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -539,6 +539,7 @@ static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used)
if (!adev)
return;
+ dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
mutex_lock(&acpi_pm_notifier_lock);
if (adev->wakeup.flags.notifier_present) {
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 5ff343096ece..0f6a16856119 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1167,6 +1167,7 @@ void acpi_os_wait_events_complete(void)
* Make sure the GPE handler or the fixed event handler is not used
* on another CPU after removal.
*/
+ printk(KERN_INFO "%s %d\n", __func__, __LINE__);
if (acpi_sci_irq_valid())
synchronize_hardirq(acpi_sci_irq);
flush_workqueue(kacpid_wq);
@@ -1184,6 +1185,7 @@ static void acpi_hotplug_work_fn(struct work_struct *work)
{
struct acpi_hp_work *hpw = container_of(work, struct acpi_hp_work, work);
+ printk(KERN_INFO "%s %d\n", __func__, __LINE__);
acpi_os_wait_events_complete();
acpi_device_hotplug(hpw->adev, hpw->src);
kfree(hpw);
@@ -1192,6 +1194,7 @@ static void acpi_hotplug_work_fn(struct work_struct *work)
acpi_status acpi_hotplug_schedule(struct acpi_device *adev, u32 src)
{
struct acpi_hp_work *hpw;
+ dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
acpi_handle_debug(adev->handle,
"Scheduling hotplug event %u for deferred handling\n",
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 065abe56f440..d53be7e0388d 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -251,6 +251,7 @@ static int acpi_scan_check_and_detach(struct acpi_device *adev, void *p)
{
struct acpi_scan_handler *handler = adev->handler;
uintptr_t flags = (uintptr_t)p;
+ dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
acpi_dev_for_each_child_reverse(adev, acpi_scan_check_and_detach, p);
@@ -314,6 +315,7 @@ static void acpi_scan_check_subtree(struct acpi_device *adev)
{
uintptr_t flags = ACPI_SCAN_CHECK_FLAG_STATUS;
+ dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
acpi_scan_check_and_detach(adev, (void *)flags);
}
@@ -369,6 +371,7 @@ static int acpi_scan_rescan_bus(struct acpi_device *adev)
{
struct acpi_scan_handler *handler = adev->handler;
int ret;
+ dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
if (handler && handler->hotplug.scan_dependent)
ret = handler->hotplug.scan_dependent(adev);
@@ -385,6 +388,7 @@ static int acpi_scan_device_check(struct acpi_device *adev)
{
struct acpi_device *parent;
+ dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
acpi_scan_check_subtree(adev);
if (!acpi_device_is_present(adev))
@@ -412,19 +416,24 @@ static int acpi_scan_device_check(struct acpi_device *adev)
static int acpi_scan_bus_check(struct acpi_device *adev)
{
acpi_scan_check_subtree(adev);
+ dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
return acpi_scan_rescan_bus(adev);
}
static int acpi_generic_hotplug_event(struct acpi_device *adev, u32 type)
{
+ dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
switch (type) {
case ACPI_NOTIFY_BUS_CHECK:
+ dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
return acpi_scan_bus_check(adev);
case ACPI_NOTIFY_DEVICE_CHECK:
+ dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
return acpi_scan_device_check(adev);
case ACPI_NOTIFY_EJECT_REQUEST:
case ACPI_OST_EC_OSPM_EJECT:
+ dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
if (adev->handler && !adev->handler->hotplug.enabled) {
dev_info(&adev->dev, "Eject disabled\n");
return -EPERM;
@@ -441,6 +450,7 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src)
u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
int error = -ENODEV;
+ dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
lock_device_hotplug();
mutex_lock(&acpi_scan_lock);
@@ -466,9 +476,10 @@ void acpi_device_hotplug(struct acpi_device *adev, u32 src)
* There may be additional notify handlers for device objects
* without the .event() callback, so ignore them here.
*/
- if (notify)
+ if (notify) {
+ dev_info(&adev->dev, "%s %d: calling notify = %px\n", __func__, __LINE__, (void *) notify);
error = notify(adev, src);
- else
+ } else
goto out;
}
switch (error) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 6c62e27b9800..4f00e15e7759 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -168,6 +168,7 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif,
atif_arg_elements[1].integer.value = 0;
}
+ printk(KERN_INFO "%s %d\n", __func__, __LINE__);
status = acpi_evaluate_object(atif->handle, NULL, &atif_arg,
&buffer);
obj = (union acpi_object *)buffer.pointer;
@@ -559,6 +560,7 @@ static union acpi_object *amdgpu_atcs_call(struct amdgpu_atcs *atcs,
atcs_arg_elements[1].integer.value = 0;
}
+ printk(KERN_INFO "%s %d\n", __func__, __LINE__);
status = acpi_evaluate_object(atcs->handle, NULL, &atcs_arg, &buffer);
/* Fail only if calling the method fails and ATIF is supported */
@@ -608,6 +610,7 @@ static int amdgpu_atcs_verify_interface(struct amdgpu_atcs *atcs)
size_t size;
int err = 0;
+ printk(KERN_INFO "%s %d\n", __func__, __LINE__);
info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_VERIFY_INTERFACE, NULL);
if (!info)
return -EIO;
@@ -682,6 +685,7 @@ int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev)
if (!atcs->functions.pcie_dev_rdy)
return -EINVAL;
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_PCIE_DEVICE_READY_NOTIFICATION, NULL);
if (!info)
return -EIO;
@@ -733,6 +737,7 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
params.pointer = &atcs_input;
while (retry--) {
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST, ¶ms);
if (!info)
return -EIO;
@@ -798,6 +803,7 @@ int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
params.length = sizeof(struct atcs_pwr_shift_input);
params.pointer = &atcs_input;
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
info = amdgpu_atcs_call(atcs, ATCS_FUNCTION_POWER_SHIFT_CONTROL, ¶ms);
if (!info) {
DRM_ERROR("ATCS PSC update failed\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 3893e6fc2f03..ed3063f09007 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -123,6 +123,7 @@ static union acpi_object *amdgpu_atpx_call(acpi_handle handle, int function,
atpx_arg_elements[1].integer.value = 0;
}
+ printk(KERN_INFO "%s %d\n", __func__, __LINE__);
status = acpi_evaluate_object(handle, NULL, &atpx_arg, &buffer);
/* Fail only if calling the method fails and ATPX is supported */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 00e96419fcda..542d039cfd42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -272,6 +272,7 @@ static int amdgpu_atrm_call(acpi_handle atrm_handle, uint8_t *bios,
atrm_arg_elements[1].type = ACPI_TYPE_INTEGER;
atrm_arg_elements[1].integer.value = len;
+ printk(KERN_INFO "%s %d\n", __func__, __LINE__);
status = acpi_evaluate_object(atrm_handle, NULL, &atrm_arg, &buffer);
if (ACPI_FAILURE(status)) {
DRM_ERROR("failed to evaluate ATRM got %s\n", acpi_format_exception(status));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 0fdfde3dcb9f..bab504d1d24d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5194,6 +5194,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
struct amdgpu_device *adev = drm_to_adev(dev);
int r = 0;
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
@@ -5208,6 +5209,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
return r;
}
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3))
dev_warn(adev->dev, "smart shift update failed\n");
@@ -5286,6 +5288,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
struct amdgpu_device *adev = drm_to_adev(dev);
int r = 0;
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_virt_request_full_gpu(adev, true);
if (r)
@@ -5379,6 +5382,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
amdgpu_vram_mgr_clear_reset_blocks(adev);
adev->in_suspend = false;
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0))
dev_warn(adev->dev, "smart shift update failed\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index ece251cbe8c3..165bd79fce82 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2795,6 +2795,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
struct drm_device *drm_dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
int ret, i;
+ dev_info(dev, "%s %d\n", __func__, __LINE__);
if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) {
pm_runtime_forbid(dev);
@@ -2874,6 +2875,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
struct drm_device *drm_dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
int ret;
+ dev_info(dev, "%s %d\n", __func__, __LINE__);
if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 8841d7213de4..576ff827d80c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -7475,6 +7475,7 @@ static int gfx_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
struct amdgpu_device *adev = ip_block->adev;
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
if (!amdgpu_emu_mode)
gfx_v10_0_init_golden_registers(adev);
@@ -7529,6 +7530,7 @@ static int gfx_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
static int gfx_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
cancel_delayed_work_sync(&adev->gfx.idle_work);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index d7499be8c4bf..fd4062e97e11 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -983,6 +983,7 @@ static int gmc_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
int r;
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode;
@@ -1029,6 +1030,7 @@ static void gmc_v10_0_gart_disable(struct amdgpu_device *adev)
static int gmc_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
gmc_v10_0_gart_disable(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
index d1a011c40ba2..a181c9965282 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
@@ -174,6 +174,7 @@ static int jpeg_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
(adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0);
@@ -212,6 +213,7 @@ static int jpeg_v3_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
+ dev_info(ip_block->adev->dev, "%s %d\n", __func__, __LINE__);
r = jpeg_v3_0_hw_fini(ip_block);
if (r)
return r;
@@ -232,6 +234,7 @@ static int jpeg_v3_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
+ dev_info(ip_block->adev->dev, "%s %d\n", __func__, __LINE__);
r = amdgpu_jpeg_resume(ip_block->adev);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index 4cd325149b63..f33f5e2e6e53 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -320,6 +320,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
u32 ih_chicken;
int ret;
int i;
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
/* disable irqs */
ret = navi10_ih_toggle_interrupts(adev, false);
@@ -385,6 +386,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
*/
static void navi10_ih_irq_disable(struct amdgpu_device *adev)
{
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
force_update_wptr_for_self_int(adev, 0, 8, false);
navi10_ih_toggle_interrupts(adev, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 3bd44c24f692..78f60da4f498 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -697,6 +697,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
{
int i, r;
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
for (i = 0; i < adev->sdma.num_instances; i++) {
r = sdma_v5_2_gfx_resume_instance(adev, i, false);
if (r)
@@ -819,6 +820,7 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
int r = 0;
struct amdgpu_ip_block *ip_block;
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
if (amdgpu_sriov_vf(adev)) {
sdma_v5_2_ctx_switch_enable(adev, false);
sdma_v5_2_enable(adev, false);
@@ -1404,6 +1406,7 @@ static int sdma_v5_2_hw_fini(struct amdgpu_ip_block *ip_block)
if (amdgpu_sriov_vf(adev))
return 0;
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
sdma_v5_2_ctx_switch_enable(adev, false);
sdma_v5_2_enable(adev, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index d9cf8f0feeb3..b31062f212b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -367,6 +367,7 @@ static int vcn_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, j, r;
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
if (amdgpu_sriov_vf(adev)) {
r = vcn_v3_0_start_sriov(adev);
@@ -441,6 +442,7 @@ static int vcn_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
int i;
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
@@ -474,6 +476,7 @@ static int vcn_v3_0_suspend(struct amdgpu_ip_block *ip_block)
struct amdgpu_device *adev = ip_block->adev;
int r, i;
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
r = vcn_v3_0_hw_fini(ip_block);
if (r)
return r;
@@ -498,6 +501,7 @@ static int vcn_v3_0_resume(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_device *adev = ip_block->adev;
int r, i;
+ dev_info(adev->dev, "%s %d\n", __func__, __LINE__);
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
r = amdgpu_vcn_resume(ip_block->adev, i);
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 5b1f271c6034..e56ab308da20 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -484,6 +484,7 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
struct pci_dev *dev;
struct pci_bus *bus = slot->bus;
struct acpiphp_func *func;
+ printk(KERN_INFO "%s %d\n", __func__, __LINE__);
if (bridge && bus->self && hotplug_is_native(bus->self)) {
/*
@@ -494,10 +495,14 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
* as a Thunderbolt host controller.
*/
for_each_pci_bridge(dev, bus) {
- if (PCI_SLOT(dev->devfn) == slot->device)
+ dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
+ if (PCI_SLOT(dev->devfn) == slot->device) {
+ dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
acpiphp_native_scan_bridge(dev);
+ }
}
} else {
+ printk(KERN_INFO "%s %d\n", __func__, __LINE__);
LIST_HEAD(add_list);
int max, pass;
@@ -505,11 +510,15 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
max = acpiphp_max_busnr(bus);
for (pass = 0; pass < 2; pass++) {
for_each_pci_bridge(dev, bus) {
- if (PCI_SLOT(dev->devfn) != slot->device)
+ dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
+ if (PCI_SLOT(dev->devfn) != slot->device) {
+ printk(KERN_INFO "%s %d\n", __func__, __LINE__);
continue;
+ }
max = pci_scan_bridge(bus, dev, max, pass);
if (pass && dev->subordinate) {
+ dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
check_hotplug_bridge(slot, dev);
pcibios_resource_survey_bus(dev->subordinate);
__pci_bus_size_bridges(dev->subordinate,
@@ -526,6 +535,7 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
list_for_each_entry(dev, &bus->devices, bus_list) {
/* Assume that newly added devices are powered on already. */
+ dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
if (!pci_dev_is_added(dev))
dev->current_state = PCI_D0;
}
@@ -544,6 +554,7 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
}
pci_dev_put(dev);
}
+ printk(KERN_INFO "%s %d\n", __func__, __LINE__);
}
/**
@@ -702,31 +713,43 @@ static void acpiphp_check_bridge(struct acpiphp_bridge *bridge)
if (bridge->is_going_away)
return;
- if (bridge->pci_dev)
+ if (bridge->pci_dev) {
+ dev_info(&bridge->pci_dev->dev, "%s %d\n", __func__, __LINE__);
pm_runtime_get_sync(&bridge->pci_dev->dev);
+ }
+ dev_info(&bridge->pci_dev->dev, "%s %d\n", __func__, __LINE__);
list_for_each_entry(slot, &bridge->slots, node) {
struct pci_bus *bus = slot->bus;
struct pci_dev *dev, *tmp;
+ dev_info(&bridge->pci_dev->dev, "%s %d\n", __func__, __LINE__);
if (slot_no_hotplug(slot)) {
- ; /* do nothing */
+ /* do nothing */
+ dev_info(&bridge->pci_dev->dev, "%s %d\n", __func__, __LINE__);
} else if (device_status_valid(get_slot_status(slot))) {
/* remove stale devices if any */
list_for_each_entry_safe_reverse(dev, tmp,
- &bus->devices, bus_list)
+ &bus->devices, bus_list) {
+ dev_info(&dev->dev, "%s %d\n", __func__, __LINE__);
if (PCI_SLOT(dev->devfn) == slot->device)
trim_stale_devices(dev);
+ }
/* configure all functions */
+ dev_info(&bridge->pci_dev->dev, "%s %d\n", __func__, __LINE__);
enable_slot(slot, true);
} else {
+ dev_info(&bridge->pci_dev->dev, "%s %d\n", __func__, __LINE__);
disable_slot(slot);
}
}
- if (bridge->pci_dev)
+ if (bridge->pci_dev) {
pm_runtime_put(&bridge->pci_dev->dev);
+ dev_info(&bridge->pci_dev->dev, "%s %d\n", __func__, __LINE__);
+ }
+ dev_info(&bridge->pci_dev->dev, "%s %d\n", __func__, __LINE__);
}
/*
@@ -760,6 +783,7 @@ static void acpiphp_sanitize_bus(struct pci_bus *bus)
void acpiphp_check_host_bridge(struct acpi_device *adev)
{
struct acpiphp_bridge *bridge = NULL;
+ dev_info(&adev->dev, "%s %d\n", __func__, __LINE__);
acpi_lock_hp_context();
if (adev->hp) {
@@ -799,6 +823,7 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
switch (type) {
case ACPI_NOTIFY_BUS_CHECK:
/* bus re-enumerate */
+ printk(KERN_INFO "%s %d: ACPI_NOTIFY_BUS_CHECK\n", __func__, __LINE__);
acpi_handle_debug(handle, "Bus check in %s()\n", __func__);
if (bridge)
acpiphp_check_bridge(bridge);
@@ -809,6 +834,7 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
case ACPI_NOTIFY_DEVICE_CHECK:
/* device check */
+ printk(KERN_INFO "%s %d: ACPI_NOTIFY_DEVICE_CHECK\n", __func__, __LINE__);
acpi_handle_debug(handle, "Device check in %s()\n", __func__);
if (bridge) {
acpiphp_check_bridge(bridge);
@@ -824,19 +850,23 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
case ACPI_NOTIFY_EJECT_REQUEST:
/* request device eject */
+ printk(KERN_INFO "%s %d: ACPI_NOTIFY_EJECT_REQUEST\n", __func__, __LINE__);
acpi_handle_debug(handle, "Eject request in %s()\n", __func__);
acpiphp_disable_and_eject_slot(slot);
break;
}
pci_unlock_rescan_remove();
+ printk(KERN_INFO "%s %d:\n", __func__, __LINE__);
if (bridge)
put_bridge(bridge);
+ printk(KERN_INFO "%s %d:\n", __func__, __LINE__);
}
static int acpiphp_hotplug_notify(struct acpi_device *adev, u32 type)
{
struct acpiphp_context *context;
+ dev_info(&adev->dev, "%s %d: %s = %px\n", __func__, __LINE__, __func__, (void *) acpiphp_hotplug_notify);
context = acpiphp_grab_context(adev);
if (!context)
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index f59baa912970..8f90f91c0a07 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -266,6 +266,7 @@ static void pciehp_disable_interrupt(struct pcie_device *dev)
* Disable hotplug interrupt so that it does not trigger
* immediately when the downstream link goes down.
*/
+ dev_info(&dev->device, "%s %d\n", __func__, __LINE__);
if (pme_is_native(dev))
pcie_disable_interrupt(get_service_data(dev));
}
@@ -273,6 +274,7 @@ static void pciehp_disable_interrupt(struct pcie_device *dev)
#ifdef CONFIG_PM_SLEEP
static int pciehp_suspend(struct pcie_device *dev)
{
+ dev_info(&dev->device, "%s %d\n", __func__, __LINE__);
/*
* If the port is already runtime suspended we can keep it that
* way.
@@ -287,6 +289,7 @@ static int pciehp_suspend(struct pcie_device *dev)
static int pciehp_resume_noirq(struct pcie_device *dev)
{
struct controller *ctrl = get_service_data(dev);
+ dev_info(&dev->device, "%s %d\n", __func__, __LINE__);
/* pci_restore_state() just wrote to the Slot Control register */
ctrl->cmd_started = jiffies;
@@ -317,6 +320,7 @@ static int pciehp_resume_noirq(struct pcie_device *dev)
static int pciehp_resume(struct pcie_device *dev)
{
struct controller *ctrl = get_service_data(dev);
+ dev_info(&dev->device, "%s %d\n", __func__, __LINE__);
if (pme_is_native(dev))
pcie_enable_interrupt(ctrl);
@@ -328,6 +332,7 @@ static int pciehp_resume(struct pcie_device *dev)
static int pciehp_runtime_suspend(struct pcie_device *dev)
{
+ dev_info(&dev->device, "%s %d\n", __func__, __LINE__);
pciehp_disable_interrupt(dev);
return 0;
}
@@ -335,6 +340,7 @@ static int pciehp_runtime_suspend(struct pcie_device *dev)
static int pciehp_runtime_resume(struct pcie_device *dev)
{
struct controller *ctrl = get_service_data(dev);
+ dev_info(&dev->device, "%s %d\n", __func__, __LINE__);
/* pci_restore_state() just wrote to the Slot Control register */
ctrl->cmd_started = jiffies;
This gives as output when crashing (only the last few lines, which don not
appear in /var/log/kern.log, but are captured with netconsole)
The processess involved here are the following:
T254: [irq/40-ACPI:Event] (this is a threaded interrupt handler for ACPI events)
The other two processes are [kworker/mm_percpu_wq] workqueues.
2025-09-30T02:25:57.704378+02:00 [T254]evmisc-0132 ev_queue_notify_reques: Dispatching Notify on [GPP0] (Device) Value 0x00 (Bus Check) Node 0000000017caa1c9
2025-09-30T02:25:57.704378+02:00 [T61442]acpi_bus_notify 517: ACPI_NOTIFY_BUS_CHECK
2025-09-30T02:25:57.704378+02:00 [T61442]acpi device:00: acpi_bus_notify 567#012 SUBSYSTEM=acpi#012 DEVICE=+acpi:device:00
2025-09-30T02:25:57.704378+02:00 [T61442]acpi device:00: acpi_hotplug_schedule 1197#012 SUBSYSTEM=acpi#012 DEVICE=+acpi:device:00
2025-09-30T02:25:57.704378+02:00 [T77816]acpi_hotplug_work_fn 1188
2025-09-30T02:25:57.704378+02:00 [T77816]acpi_os_wait_events_complete 1170
2025-09-30T02:25:57.704378+02:00 [T77816]acpi device:00: acpi_device_hotplug 453#012 SUBSYSTEM=acpi#012 DEVICE=+acpi:device:00
2025-09-30T02:25:57.704378+02:00 [T77816]acpi device:00: acpi_device_hotplug 480: calling notify = ffffffffb8a24fc0#012 SUBSYSTEM=acpi#012 DEVICE=+acpi:device:00
2025-09-30T02:25:57.704378+02:00 [T77816]acpi device:00: acpiphp_hotplug_notify 869: acpiphp_hotplug_notify = ffffffffb8a24fc0#012 SUBSYSTEM=acpi#012 DEVICE=+acpi:device:00
2025-09-30T02:25:57.704378+02:00 [T77816]hotplug_event 826: ACPI_NOTIFY_BUS_CHECK
2025-09-30T02:25:57.704378+02:00 [T77816]pcieport 0000:00:01.1: acpiphp_check_bridge 717#012 SUBSYSTEM=pci#012 DEVICE=+pci:0000:00:01.1
So the problem as appears to be happening inside of acpiphp_check_bridge():
static void acpiphp_check_bridge(struct acpiphp_bridge *bridge)
{
struct acpiphp_slot *slot;
/* Bail out if the bridge is going away. */
if (bridge->is_going_away)
return;
if (bridge->pci_dev) {
dev_info(&bridge->pci_dev->dev, "%s %d\n", __func__, __LINE__); // This is the last reported line.
pm_runtime_get_sync(&bridge->pci_dev->dev);
}
dev_info(&bridge->pci_dev->dev, "%s %d\n", __func__, __LINE__); // This line is not reported during a crash.
Bert Karwatzki
next prev parent reply other threads:[~2025-10-06 12:09 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-06 12:09 [REGRESSION 00/04] Crash during resume of pcie bridge Bert Karwatzki
2025-10-06 12:09 ` Bert Karwatzki [this message]
2025-10-06 12:09 ` [REGRESSION 02/04] " Bert Karwatzki
2025-10-06 12:09 ` [REGRESSION 03/04] " Bert Karwatzki
2025-10-06 12:09 ` [REGRESSION 04/04] " Bert Karwatzki
2025-10-06 12:39 ` [REGRESSION 00/04] " Christian König
2025-10-06 16:22 ` Bert Karwatzki
2025-10-07 6:50 ` Bert Karwatzki
2025-10-07 21:33 ` Mario Limonciello
2025-10-13 16:29 ` Bert Karwatzki
2025-10-13 18:51 ` Mario Limonciello
2025-10-14 10:50 ` Christian König
2025-10-27 9:57 ` Bert Karwatzki
2025-10-31 13:38 ` Bert Karwatzki
2025-10-31 13:47 ` Bert Karwatzki
2025-10-31 18:35 ` Bert Karwatzki
2025-11-05 11:44 ` Bert Karwatzki
2025-11-05 21:31 ` Mario Limonciello (AMD) (kernel.org)
2025-11-07 13:09 ` Bert Karwatzki
2025-11-07 17:09 ` Bert Karwatzki
2025-11-10 13:33 ` Christian König
2025-11-16 21:08 ` Crash during resume of pcie bridge due to infinite loop in ACPICA Bert Karwatzki
2025-11-17 16:40 ` Rafael J. Wysocki
2025-11-24 22:34 ` Bert Karwatzki
2025-11-25 19:46 ` Rafael J. Wysocki
2025-11-27 0:08 ` Bert Karwatzki
2025-11-27 13:02 ` Rafael J. Wysocki
2025-11-28 20:47 ` Bert Karwatzki
2025-12-02 18:59 ` Rafael J. Wysocki
2025-12-02 19:53 ` Bert Karwatzki
2025-12-02 20:01 ` Rafael J. Wysocki
2025-12-05 10:05 ` Crash during resume of pcie bridge due to incorrect error handling Bert Karwatzki
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251006120944.7880-2-spasswolf@web.de \
--to=spasswolf@web.de \
--cc=christian.koenig@amd.com \
--cc=linux-acpi@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-next@vger.kernel.org \
--cc=linux-pci@vger.kernel.org \
--cc=linux-stable@vger.kernel.org \
--cc=rafael.j.wysocki@intel.com \
--cc=regressions@lists.linux.dev \
--cc=superm1@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.