All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: correct single device PCIe reset flow for DPC status
@ 2026-04-08  5:41 Ce Sun
  2026-04-10  1:25 ` Sun, Ce(Overlord)
  2026-04-10  3:55 ` Lazar, Lijo
  0 siblings, 2 replies; 3+ messages in thread
From: Ce Sun @ 2026-04-08  5:41 UTC (permalink / raw)
  To: amd-gfx; +Cc: Hawking.Zhang, Ce Sun

For triggering the dpc event with a single device, we still need
to set the in_link_reset flag and the dpc status.

Signed-off-by: Ce Sun <cesun102@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index deb41c095b59..0bdb54ab9a53 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -6291,6 +6291,9 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
 			amdgpu_reset_set_dpc_status(adev, true);
 
 			mutex_lock(&hive->hive_lock);
+		} else {
+			if (amdgpu_device_bus_status_check(adev))
+				amdgpu_reset_set_dpc_status(adev, true);
 		}
 		memset(&reset_context, 0, sizeof(reset_context));
 		INIT_LIST_HEAD(&device_list);
@@ -6411,6 +6414,7 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
 			tmp_adev->pcie_reset_ctx.in_link_reset = true;
 	} else {
+		adev->pcie_reset_ctx.in_link_reset = true;
 		set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
 	}
 
@@ -6467,8 +6471,10 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
 			tmp_adev->pcie_reset_ctx.in_link_reset = false;
 			list_add_tail(&tmp_adev->reset_list, &device_list);
 		}
-	} else
+	} else {
+		adev->pcie_reset_ctx.in_link_reset = false;
 		list_add_tail(&adev->reset_list, &device_list);
+	}
 
 	amdgpu_device_sched_resume(&device_list, NULL, NULL);
 	amdgpu_device_gpu_resume(adev, &device_list, false);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] drm/amdgpu: correct single device PCIe reset flow for DPC status
  2026-04-08  5:41 [PATCH] drm/amdgpu: correct single device PCIe reset flow for DPC status Ce Sun
@ 2026-04-10  1:25 ` Sun, Ce(Overlord)
  2026-04-10  3:55 ` Lazar, Lijo
  1 sibling, 0 replies; 3+ messages in thread
From: Sun, Ce(Overlord) @ 2026-04-10  1:25 UTC (permalink / raw)
  To: amd-gfx@lists.freedesktop.org; +Cc: Zhang, Hawking

[-- Attachment #1: Type: text/plain, Size: 2424 bytes --]

[AMD Official Use Only - AMD Internal Distribution Only]

ping
________________________________
From: Sun, Ce(Overlord) <Ce.Sun@amd.com>
Sent: Wednesday, April 8, 2026 1:41 PM
To: amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Cc: Zhang, Hawking <Hawking.Zhang@amd.com>; Sun, Ce(Overlord) <Ce.Sun@amd.com>
Subject: [PATCH] drm/amdgpu: correct single device PCIe reset flow for DPC status

For triggering the dpc event with a single device, we still need
to set the in_link_reset flag and the dpc status.

Signed-off-by: Ce Sun <cesun102@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index deb41c095b59..0bdb54ab9a53 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -6291,6 +6291,9 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
                         amdgpu_reset_set_dpc_status(adev, true);

                         mutex_lock(&hive->hive_lock);
+               } else {
+                       if (amdgpu_device_bus_status_check(adev))
+                               amdgpu_reset_set_dpc_status(adev, true);
                 }
                 memset(&reset_context, 0, sizeof(reset_context));
                 INIT_LIST_HEAD(&device_list);
@@ -6411,6 +6414,7 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
                 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
                         tmp_adev->pcie_reset_ctx.in_link_reset = true;
         } else {
+               adev->pcie_reset_ctx.in_link_reset = true;
                 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
         }

@@ -6467,8 +6471,10 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
                         tmp_adev->pcie_reset_ctx.in_link_reset = false;
                         list_add_tail(&tmp_adev->reset_list, &device_list);
                 }
-       } else
+       } else {
+               adev->pcie_reset_ctx.in_link_reset = false;
                 list_add_tail(&adev->reset_list, &device_list);
+       }

         amdgpu_device_sched_resume(&device_list, NULL, NULL);
         amdgpu_device_gpu_resume(adev, &device_list, false);
--
2.34.1


[-- Attachment #2: Type: text/html, Size: 5518 bytes --]

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] drm/amdgpu: correct single device PCIe reset flow for DPC status
  2026-04-08  5:41 [PATCH] drm/amdgpu: correct single device PCIe reset flow for DPC status Ce Sun
  2026-04-10  1:25 ` Sun, Ce(Overlord)
@ 2026-04-10  3:55 ` Lazar, Lijo
  1 sibling, 0 replies; 3+ messages in thread
From: Lazar, Lijo @ 2026-04-10  3:55 UTC (permalink / raw)
  To: Ce Sun, amd-gfx; +Cc: Hawking.Zhang



On 08-Apr-26 11:11 AM, Ce Sun wrote:
> For triggering the dpc event with a single device, we still need
> to set the in_link_reset flag and the dpc status.
> 
> Signed-off-by: Ce Sun <cesun102@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 +++++++-
>   1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index deb41c095b59..0bdb54ab9a53 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -6291,6 +6291,9 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
>   			amdgpu_reset_set_dpc_status(adev, true);
>   
>   			mutex_lock(&hive->hive_lock);
> +		} else {
> +			if (amdgpu_device_bus_status_check(adev))
> +				amdgpu_reset_set_dpc_status(adev, true);
>   		}
>   		memset(&reset_context, 0, sizeof(reset_context));
>   		INIT_LIST_HEAD(&device_list);
> @@ -6411,6 +6414,7 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
>   		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
>   			tmp_adev->pcie_reset_ctx.in_link_reset = true;
>   	} else {
> +		adev->pcie_reset_ctx.in_link_reset = true;

For the device which gets into DPC, this status is set in detect stage 
itself within amdgpu_device_recovery_prepare() -

https://github.com/torvalds/linux/blob/master/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c#L6245

You may keep the same for single device case also.

Thanks,
Lijo

>   		set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
>   	}
>   
> @@ -6467,8 +6471,10 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
>   			tmp_adev->pcie_reset_ctx.in_link_reset = false;
>   			list_add_tail(&tmp_adev->reset_list, &device_list);
>   		}
> -	} else
> +	} else {
> +		adev->pcie_reset_ctx.in_link_reset = false;
>   		list_add_tail(&adev->reset_list, &device_list);
> +	}
>   
>   	amdgpu_device_sched_resume(&device_list, NULL, NULL);
>   	amdgpu_device_gpu_resume(adev, &device_list, false);


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2026-04-10  3:55 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-08  5:41 [PATCH] drm/amdgpu: correct single device PCIe reset flow for DPC status Ce Sun
2026-04-10  1:25 ` Sun, Ce(Overlord)
2026-04-10  3:55 ` Lazar, Lijo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.