AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/2] drm/amdgpu: abort KIQ waits when there is a pending reset
@ 2024-08-02 18:39 Victor Skvortsov
  2024-08-03 15:12 ` Lazar, Lijo
  0 siblings, 1 reply; 4+ messages in thread
From: Victor Skvortsov @ 2024-08-02 18:39 UTC (permalink / raw)
  To: amd-gfx; +Cc: Victor Skvortsov, Lazar Lijo

Stop waiting for the KIQ to return back when there is a reset pending.
It's quite likely that the KIQ will never response.

Signed-off-by: Victor Skvortsov <victor.skvortsov@amd.com>
Suggested-by: Lazar Lijo <Lijo.Lazar@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c   | 3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index c02659025656..8962be257942 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -785,7 +785,8 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev,
 		goto failed_kiq;
 
 	might_sleep();
-	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY&&
+		!amdgpu_reset_pending(adev->reset_domain)) {
 
 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index 4ae581f3fcb5..f33a4e0ffba1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -136,6 +136,11 @@ static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *doma
 	return queue_work(domain->wq, work);
 }
 
+static inline bool amdgpu_reset_pending(struct amdgpu_reset_domain *domain) {
+	lockdep_assert_held(&domain->sem);
+	return rwsem_is_contended(&domain->sem);
+}
+
 void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain);
 
 void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: abort KIQ waits when there is a pending reset
  2024-08-02 18:39 [PATCH 2/2] drm/amdgpu: abort KIQ waits when there is a pending reset Victor Skvortsov
@ 2024-08-03 15:12 ` Lazar, Lijo
  2024-08-06 12:42   ` [PATCH v2 " Victor Skvortsov
  0 siblings, 1 reply; 4+ messages in thread
From: Lazar, Lijo @ 2024-08-03 15:12 UTC (permalink / raw)
  To: Victor Skvortsov, amd-gfx; +Cc: Christian König, Vignesh Chander



On 8/3/2024 12:09 AM, Victor Skvortsov wrote:
> Stop waiting for the KIQ to return back when there is a reset pending.
> It's quite likely that the KIQ will never response.
> 
> Signed-off-by: Victor Skvortsov <victor.skvortsov@amd.com>

Copying Christian/Vignesh

The patch is originally from Christian. Please keep the author as
Christian and you may add Tested-By.

Thanks,
Lijo

> Suggested-by: Lazar Lijo <Lijo.Lazar@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c   | 3 ++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 5 +++++
>  2 files changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index c02659025656..8962be257942 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -785,7 +785,8 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev,
>  		goto failed_kiq;
>  
>  	might_sleep();
> -	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
> +	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY&&
> +		!amdgpu_reset_pending(adev->reset_domain)) {
>  
>  		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
>  		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
> index 4ae581f3fcb5..f33a4e0ffba1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
> @@ -136,6 +136,11 @@ static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *doma
>  	return queue_work(domain->wq, work);
>  }
>  
> +static inline bool amdgpu_reset_pending(struct amdgpu_reset_domain *domain) {
> +	lockdep_assert_held(&domain->sem);
> +	return rwsem_is_contended(&domain->sem);
> +}
> +
>  void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain);
>  
>  void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain);

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v2 2/2] drm/amdgpu: abort KIQ waits when there is a pending reset
  2024-08-03 15:12 ` Lazar, Lijo
@ 2024-08-06 12:42   ` Victor Skvortsov
  2024-08-14  1:38     ` Zhang, Hawking
  0 siblings, 1 reply; 4+ messages in thread
From: Victor Skvortsov @ 2024-08-06 12:42 UTC (permalink / raw)
  To: Vignesh.Chander, Lijo.Lazar, Yunxiang.Li, Christian.Koenig,
	amd-gfx
  Cc: Victor Skvortsov

Stop waiting for the KIQ to return back when there is a reset pending.
It's quite likely that the KIQ will never response.

Signed-off-by: Koenig Christian <Christian.Koenig@amd.com>
Suggested-by: Lazar Lijo <Lijo.Lazar@amd.com>
Tested-by: Victor Skvortsov <victor.skvortsov@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c   | 3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index c02659025656..8962be257942 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -785,7 +785,8 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev,
 		goto failed_kiq;
 
 	might_sleep();
-	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY&&
+		!amdgpu_reset_pending(adev->reset_domain)) {
 
 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index 4ae581f3fcb5..f33a4e0ffba1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -136,6 +136,11 @@ static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *doma
 	return queue_work(domain->wq, work);
 }
 
+static inline bool amdgpu_reset_pending(struct amdgpu_reset_domain *domain) {
+	lockdep_assert_held(&domain->sem);
+	return rwsem_is_contended(&domain->sem);
+}
+
 void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain);
 
 void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* RE: [PATCH v2 2/2] drm/amdgpu: abort KIQ waits when there is a pending reset
  2024-08-06 12:42   ` [PATCH v2 " Victor Skvortsov
@ 2024-08-14  1:38     ` Zhang, Hawking
  0 siblings, 0 replies; 4+ messages in thread
From: Zhang, Hawking @ 2024-08-14  1:38 UTC (permalink / raw)
  To: Skvortsov, Victor, Chander, Vignesh, Lazar, Lijo,
	Li, Yunxiang (Teddy), Koenig, Christian,
	amd-gfx@lists.freedesktop.org
  Cc: Skvortsov, Victor

[AMD Official Use Only - AMD Internal Distribution Only]

Series is
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>

Regards,
Hawking
-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Victor Skvortsov
Sent: Tuesday, August 6, 2024 20:43
To: Chander, Vignesh <Vignesh.Chander@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>; Li, Yunxiang (Teddy) <Yunxiang.Li@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Skvortsov, Victor <Victor.Skvortsov@amd.com>
Subject: [PATCH v2 2/2] drm/amdgpu: abort KIQ waits when there is a pending reset

Stop waiting for the KIQ to return back when there is a reset pending.
It's quite likely that the KIQ will never response.

Signed-off-by: Koenig Christian <Christian.Koenig@amd.com>
Suggested-by: Lazar Lijo <Lijo.Lazar@amd.com>
Tested-by: Victor Skvortsov <victor.skvortsov@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c   | 3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index c02659025656..8962be257942 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -785,7 +785,8 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev,
                goto failed_kiq;

        might_sleep();
-       while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+       while (r < 1 && cnt++ < MAX_KIQ_REG_TRY&&
+               !amdgpu_reset_pending(adev->reset_domain)) {

                msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
                r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index 4ae581f3fcb5..f33a4e0ffba1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -136,6 +136,11 @@ static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *doma
        return queue_work(domain->wq, work);
 }

+static inline bool amdgpu_reset_pending(struct amdgpu_reset_domain *domain) {
+       lockdep_assert_held(&domain->sem);
+       return rwsem_is_contended(&domain->sem); }
+
 void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain);

 void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain);
--
2.34.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-08-14  1:38 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-08-02 18:39 [PATCH 2/2] drm/amdgpu: abort KIQ waits when there is a pending reset Victor Skvortsov
2024-08-03 15:12 ` Lazar, Lijo
2024-08-06 12:42   ` [PATCH v2 " Victor Skvortsov
2024-08-14  1:38     ` Zhang, Hawking

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox