AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] drm/amdkfd: Ignored various return code
@ 2025-11-13 18:31 Andrew Martin
  2025-11-14 18:53 ` Philip Yang
  2025-11-14 21:58 ` Felix Kuehling
  0 siblings, 2 replies; 4+ messages in thread
From: Andrew Martin @ 2025-11-13 18:31 UTC (permalink / raw)
  To: amd-gfx; +Cc: Andrew Martin

The return code of a non void function should not be ignored. In cases
where we do not care, the code needs to suppress it.

Signed-off-by: Andrew Martin <andrew.martin@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 13 ++++++++-----
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c   |  3 ++-
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 644f79f3c9af..e4438fca6283 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -313,8 +313,10 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
 void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
 {
 	if (amdgpu_device_should_recover_gpu(adev))
-		amdgpu_reset_domain_schedule(adev->reset_domain,
-					     &adev->kfd.reset_work);
+		WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+							&adev->kfd.reset_work),
+							"Failed to queue work! at %s",
+							 __func__);
 }
 
 int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
@@ -715,9 +717,10 @@ void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
 		if (gfx_block != NULL)
 			gfx_block->version->funcs->set_powergating_state((void *)gfx_block, state);
 	}
-	amdgpu_dpm_switch_power_profile(adev,
-					PP_SMC_POWER_PROFILE_COMPUTE,
-					!idle);
+	WARN_ONCE(!amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_COMPUTE, !idle),
+		  "(%s) failed to disable video power profile mode",
+		  __func__);
+
 }
 
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 22925df6a791..025609a4abcf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2780,7 +2780,8 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
 		 * saved in MES.
 		 */
 		if (pdd->dev->kfd->shared_resources.enable_mes)
-			kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
+			(void)kfd_dbg_set_mes_debug_mode(pdd,
+							 !kfd_dbg_has_cwsr_workaround(pdd->dev));
 	}
 
 	p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] drm/amdkfd: Ignored various return code
  2025-11-13 18:31 [PATCH v3] drm/amdkfd: Ignored various return code Andrew Martin
@ 2025-11-14 18:53 ` Philip Yang
  2025-11-14 19:15   ` Martin, Andrew
  2025-11-14 21:58 ` Felix Kuehling
  1 sibling, 1 reply; 4+ messages in thread
From: Philip Yang @ 2025-11-14 18:53 UTC (permalink / raw)
  To: Andrew Martin, amd-gfx


On 2025-11-13 13:31, Andrew Martin wrote:
> The return code of a non void function should not be ignored. In cases
> where we do not care, the code needs to suppress it.
>
> Signed-off-by: Andrew Martin <andrew.martin@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 13 ++++++++-----
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c   |  3 ++-
>   2 files changed, 10 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 644f79f3c9af..e4438fca6283 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -313,8 +313,10 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
>   void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
>   {
>   	if (amdgpu_device_should_recover_gpu(adev))
> -		amdgpu_reset_domain_schedule(adev->reset_domain,
> -					     &adev->kfd.reset_work);
> +		WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
> +							&adev->kfd.reset_work),
> +							"Failed to queue work! at %s",
> +							 __func__);
amdgpu_reset_domain_schedule only return false if reset_work already 
exist in the workqueue, it is fine to ignore the return value, don't 
want WARN_ONCE for this.
>   }
>   
>   int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
> @@ -715,9 +717,10 @@ void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
>   		if (gfx_block != NULL)
>   			gfx_block->version->funcs->set_powergating_state((void *)gfx_block, state);
>   	}
> -	amdgpu_dpm_switch_power_profile(adev,
> -					PP_SMC_POWER_PROFILE_COMPUTE,
> -					!idle);
> +	WARN_ONCE(!amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_COMPUTE, !idle),
> +		  "(%s) failed to disable video power profile mode",
> +		  __func__);

amdgpu_dpm_switch_power_profile return false for SRIOV, 
pp_funcs->switch_power_profile may return false for many cases, like if 
(!hwmgr || !hwmgr->pm_en), I think we don't need WARN for those cases.

> +
>   }
>   
>   bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 22925df6a791..025609a4abcf 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -2780,7 +2780,8 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
>   		 * saved in MES.
>   		 */
>   		if (pdd->dev->kfd->shared_resources.enable_mes)
> -			kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
> +			(void)kfd_dbg_set_mes_debug_mode(pdd,
> +							 !kfd_dbg_has_cwsr_workaround(pdd->dev));

This looks good. It is easier to review if one patch only fix one 
ignored return code issue.

Regards,

Philip

>   	}
>   
>   	p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH v3] drm/amdkfd: Ignored various return code
  2025-11-14 18:53 ` Philip Yang
@ 2025-11-14 19:15   ` Martin, Andrew
  0 siblings, 0 replies; 4+ messages in thread
From: Martin, Andrew @ 2025-11-14 19:15 UTC (permalink / raw)
  To: Yang, Philip, amd-gfx@lists.freedesktop.org

[AMD Official Use Only - AMD Internal Distribution Only]

Greetings @Yang, Philip

> This looks good. It is easier to review if one patch only fix one ignored return code issue.

Thanks for the useful comments. The next patch(es) will remove the WARN_ONCE and the user "(void)" to suppress the unneeded return code.    I will also  break up the next update into 3 patches (1 for each  ignore ).

> -----Original Message-----
> From: Yang, Philip <Philip.Yang@amd.com>
> Sent: Friday, November 14, 2025 1:53 PM
> To: Martin, Andrew <Andrew.Martin@amd.com>; amd-
> gfx@lists.freedesktop.org
> Subject: Re: [PATCH v3] drm/amdkfd: Ignored various return code
>
>
> On 2025-11-13 13:31, Andrew Martin wrote:
> > The return code of a non void function should not be ignored. In cases
> > where we do not care, the code needs to suppress it.
> >
> > Signed-off-by: Andrew Martin <andrew.martin@amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 13 ++++++++-----
> >   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c   |  3 ++-
> >   2 files changed, 10 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > index 644f79f3c9af..e4438fca6283 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> > @@ -313,8 +313,10 @@ int amdgpu_amdkfd_post_reset(struct
> amdgpu_device *adev)
> >   void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
> >   {
> >     if (amdgpu_device_should_recover_gpu(adev))
> > -           amdgpu_reset_domain_schedule(adev->reset_domain,
> > -                                        &adev->kfd.reset_work);
> > +           WARN_ONCE(!amdgpu_reset_domain_schedule(adev-
> >reset_domain,
> > +                                                   &adev-
> >kfd.reset_work),
> > +                                                   "Failed to queue work!
> at %s",
> > +                                                    __func__);
> amdgpu_reset_domain_schedule only return false if reset_work already exist in
> the workqueue, it is fine to ignore the return value, don't want WARN_ONCE
> for this.
> >   }
> >
> >   int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t
> > size, @@ -715,9 +717,10 @@ void amdgpu_amdkfd_set_compute_idle(struct
> amdgpu_device *adev, bool idle)
> >             if (gfx_block != NULL)
> >                     gfx_block->version->funcs-
> >set_powergating_state((void *)gfx_block, state);
> >     }
> > -   amdgpu_dpm_switch_power_profile(adev,
> > -                                   PP_SMC_POWER_PROFILE_COMPUTE,
> > -                                   !idle);
> > +   WARN_ONCE(!amdgpu_dpm_switch_power_profile(adev,
> PP_SMC_POWER_PROFILE_COMPUTE, !idle),
> > +             "(%s) failed to disable video power profile mode",
> > +             __func__);
>
> amdgpu_dpm_switch_power_profile return false for SRIOV, pp_funcs-
> >switch_power_profile may return false for many cases, like if (!hwmgr ||
> !hwmgr->pm_en), I think we don't need WARN for those cases.
>
> > +
> >   }
> >
> >   bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > index 22925df6a791..025609a4abcf 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > @@ -2780,7 +2780,8 @@ static int runtime_enable(struct kfd_process *p,
> uint64_t r_debug,
> >              * saved in MES.
> >              */
> >             if (pdd->dev->kfd->shared_resources.enable_mes)
> > -                   kfd_dbg_set_mes_debug_mode(pdd,
> !kfd_dbg_has_cwsr_workaround(pdd->dev));
> > +                   (void)kfd_dbg_set_mes_debug_mode(pdd,
> > +
> !kfd_dbg_has_cwsr_workaround(pdd->dev));
>
> This looks good. It is easier to review if one patch only fix one ignored return
> code issue.
>
> Regards,
>
> Philip
>
> >     }
> >
> >     p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] drm/amdkfd: Ignored various return code
  2025-11-13 18:31 [PATCH v3] drm/amdkfd: Ignored various return code Andrew Martin
  2025-11-14 18:53 ` Philip Yang
@ 2025-11-14 21:58 ` Felix Kuehling
  1 sibling, 0 replies; 4+ messages in thread
From: Felix Kuehling @ 2025-11-14 21:58 UTC (permalink / raw)
  To: Andrew Martin, amd-gfx


On 2025-11-13 13:31, Andrew Martin wrote:
> The return code of a non void function should not be ignored. In cases
> where we do not care, the code needs to suppress it.
>
> Signed-off-by: Andrew Martin <andrew.martin@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 13 ++++++++-----
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c   |  3 ++-
>   2 files changed, 10 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 644f79f3c9af..e4438fca6283 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -313,8 +313,10 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
>   void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
>   {
>   	if (amdgpu_device_should_recover_gpu(adev))
> -		amdgpu_reset_domain_schedule(adev->reset_domain,
> -					     &adev->kfd.reset_work);
> +		WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
> +							&adev->kfd.reset_work),
> +							"Failed to queue work! at %s",
> +							 __func__);

The return value is not an error. It only indicates that reset work was 
already queued.


>   }
>   
>   int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
> @@ -715,9 +717,10 @@ void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
>   		if (gfx_block != NULL)
>   			gfx_block->version->funcs->set_powergating_state((void *)gfx_block, state);
>   	}
> -	amdgpu_dpm_switch_power_profile(adev,
> -					PP_SMC_POWER_PROFILE_COMPUTE,
> -					!idle);
> +	WARN_ONCE(!amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_COMPUTE, !idle),
> +		  "(%s) failed to disable video power profile mode",
> +		  __func__);

I think 0 means success here. But I agree with Philip that we can 
probably ignore errors here. I believe some chips just don't support 
power profiles and return an error here.


> +
>   }
>   
>   bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 22925df6a791..025609a4abcf 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -2780,7 +2780,8 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
>   		 * saved in MES.
>   		 */
>   		if (pdd->dev->kfd->shared_resources.enable_mes)
> -			kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
> +			(void)kfd_dbg_set_mes_debug_mode(pdd,
> +							 !kfd_dbg_has_cwsr_workaround(pdd->dev));

In this case we could actually handle the error and return an error up 
to the caller.

Regards,
   Felix


>   	}
>   
>   	p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-11-14 21:58 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-13 18:31 [PATCH v3] drm/amdkfd: Ignored various return code Andrew Martin
2025-11-14 18:53 ` Philip Yang
2025-11-14 19:15   ` Martin, Andrew
2025-11-14 21:58 ` Felix Kuehling

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox