public inbox for stable@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] accel/ivpu: Fix deadlock in ivpu_ms_cleanup()
       [not found] <20250325114306.3740022-1-maciej.falkowski@linux.intel.com>
@ 2025-03-25 11:43 ` Maciej Falkowski
  2025-03-25 20:50   ` Lizhi Hou
  2025-03-25 11:43 ` [PATCH 2/2] accel/ivpu: Fix PM related deadlocks in MS IOCTLs Maciej Falkowski
  1 sibling, 1 reply; 8+ messages in thread
From: Maciej Falkowski @ 2025-03-25 11:43 UTC (permalink / raw)
  To: dri-devel
  Cc: oded.gabbay, quic_jhugo, jacek.lawrynowicz, lizhi.hou, stable,
	Maciej Falkowski

From: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>

Fix deadlock in ivpu_ms_cleanup() by preventing runtime resume after
file_priv->ms_lock is acquired.

During a failure in runtime resume, a cold boot is executed, which
calls ivpu_ms_cleanup_all(). This function calls ivpu_ms_cleanup()
that acquires file_priv->ms_lock and causes the deadlock.

Fixes: cdfad4db7756 ("accel/ivpu: Add NPU profiling support")
Cc: <stable@vger.kernel.org> # v6.11+
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Signed-off-by: Maciej Falkowski <maciej.falkowski@linux.intel.com>
---
 drivers/accel/ivpu/ivpu_ms.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c
index ffe7b10f8a76..eb485cf15ad6 100644
--- a/drivers/accel/ivpu/ivpu_ms.c
+++ b/drivers/accel/ivpu/ivpu_ms.c
@@ -4,6 +4,7 @@
  */
 
 #include <drm/drm_file.h>
+#include <linux/pm_runtime.h>
 
 #include "ivpu_drv.h"
 #include "ivpu_gem.h"
@@ -281,6 +282,9 @@ int ivpu_ms_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *
 void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
 {
 	struct ivpu_ms_instance *ms, *tmp;
+	struct ivpu_device *vdev = file_priv->vdev;
+
+	pm_runtime_get_sync(vdev->drm.dev);
 
 	mutex_lock(&file_priv->ms_lock);
 
@@ -293,6 +297,8 @@ void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
 		free_instance(file_priv, ms);
 
 	mutex_unlock(&file_priv->ms_lock);
+
+	pm_runtime_put_autosuspend(vdev->drm.dev);
 }
 
 void ivpu_ms_cleanup_all(struct ivpu_device *vdev)
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] accel/ivpu: Fix PM related deadlocks in MS IOCTLs
       [not found] <20250325114306.3740022-1-maciej.falkowski@linux.intel.com>
  2025-03-25 11:43 ` [PATCH 1/2] accel/ivpu: Fix deadlock in ivpu_ms_cleanup() Maciej Falkowski
@ 2025-03-25 11:43 ` Maciej Falkowski
  2025-03-28 15:31   ` Lizhi Hou
  1 sibling, 1 reply; 8+ messages in thread
From: Maciej Falkowski @ 2025-03-25 11:43 UTC (permalink / raw)
  To: dri-devel
  Cc: oded.gabbay, quic_jhugo, jacek.lawrynowicz, lizhi.hou, stable,
	Maciej Falkowski

From: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>

Prevent runtime resume/suspend while MS IOCTLs are in progress.
Failed suspend will call ivpu_ms_cleanup() that would try to acquire
file_priv->ms_lock, which is already held by the IOCTLs.

Fixes: cdfad4db7756 ("accel/ivpu: Add NPU profiling support")
Cc: <stable@vger.kernel.org> # v6.11+
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Signed-off-by: Maciej Falkowski <maciej.falkowski@linux.intel.com>
---
 drivers/accel/ivpu/ivpu_debugfs.c |  4 ++--
 drivers/accel/ivpu/ivpu_ms.c      | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
index 0825851656a2..f0dad0c9ce33 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@@ -332,7 +332,7 @@ ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t si
 		return -EINVAL;
 
 	ret = ivpu_rpm_get(vdev);
-	if (ret)
+	if (ret < 0)
 		return ret;
 
 	ivpu_pm_trigger_recovery(vdev, "debugfs");
@@ -383,7 +383,7 @@ static int dct_active_set(void *data, u64 active_percent)
 		return -EINVAL;
 
 	ret = ivpu_rpm_get(vdev);
-	if (ret)
+	if (ret < 0)
 		return ret;
 
 	if (active_percent)
diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c
index eb485cf15ad6..2a043baf10ca 100644
--- a/drivers/accel/ivpu/ivpu_ms.c
+++ b/drivers/accel/ivpu/ivpu_ms.c
@@ -45,6 +45,10 @@ int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
 	    args->sampling_period_ns < MS_MIN_SAMPLE_PERIOD_NS)
 		return -EINVAL;
 
+	ret = ivpu_rpm_get(vdev);
+	if (ret < 0)
+		return ret;
+
 	mutex_lock(&file_priv->ms_lock);
 
 	if (get_instance_by_mask(file_priv, args->metric_group_mask)) {
@@ -97,6 +101,8 @@ int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
 	kfree(ms);
 unlock:
 	mutex_unlock(&file_priv->ms_lock);
+
+	ivpu_rpm_put(vdev);
 	return ret;
 }
 
@@ -161,6 +167,10 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *
 	if (!args->metric_group_mask)
 		return -EINVAL;
 
+	ret = ivpu_rpm_get(vdev);
+	if (ret < 0)
+		return ret;
+
 	mutex_lock(&file_priv->ms_lock);
 
 	ms = get_instance_by_mask(file_priv, args->metric_group_mask);
@@ -188,6 +198,7 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *
 unlock:
 	mutex_unlock(&file_priv->ms_lock);
 
+	ivpu_rpm_put(vdev);
 	return ret;
 }
 
@@ -205,11 +216,17 @@ int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file
 {
 	struct ivpu_file_priv *file_priv = file->driver_priv;
 	struct drm_ivpu_metric_streamer_stop *args = data;
+	struct ivpu_device *vdev = file_priv->vdev;
 	struct ivpu_ms_instance *ms;
+	int ret;
 
 	if (!args->metric_group_mask)
 		return -EINVAL;
 
+	ret = ivpu_rpm_get(vdev);
+	if (ret < 0)
+		return ret;
+
 	mutex_lock(&file_priv->ms_lock);
 
 	ms = get_instance_by_mask(file_priv, args->metric_group_mask);
@@ -218,6 +235,7 @@ int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file
 
 	mutex_unlock(&file_priv->ms_lock);
 
+	ivpu_rpm_put(vdev);
 	return ms ? 0 : -EINVAL;
 }
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] accel/ivpu: Fix deadlock in ivpu_ms_cleanup()
  2025-03-25 11:43 ` [PATCH 1/2] accel/ivpu: Fix deadlock in ivpu_ms_cleanup() Maciej Falkowski
@ 2025-03-25 20:50   ` Lizhi Hou
  2025-03-26  8:06     ` Jacek Lawrynowicz
  0 siblings, 1 reply; 8+ messages in thread
From: Lizhi Hou @ 2025-03-25 20:50 UTC (permalink / raw)
  To: Maciej Falkowski, dri-devel
  Cc: oded.gabbay, quic_jhugo, jacek.lawrynowicz, stable


On 3/25/25 04:43, Maciej Falkowski wrote:
> From: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
>
> Fix deadlock in ivpu_ms_cleanup() by preventing runtime resume after
> file_priv->ms_lock is acquired.
>
> During a failure in runtime resume, a cold boot is executed, which
> calls ivpu_ms_cleanup_all(). This function calls ivpu_ms_cleanup()
> that acquires file_priv->ms_lock and causes the deadlock.
>
> Fixes: cdfad4db7756 ("accel/ivpu: Add NPU profiling support")
> Cc: <stable@vger.kernel.org> # v6.11+
> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
> Signed-off-by: Maciej Falkowski <maciej.falkowski@linux.intel.com>
> ---
>   drivers/accel/ivpu/ivpu_ms.c | 6 ++++++
>   1 file changed, 6 insertions(+)
>
> diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c
> index ffe7b10f8a76..eb485cf15ad6 100644
> --- a/drivers/accel/ivpu/ivpu_ms.c
> +++ b/drivers/accel/ivpu/ivpu_ms.c
> @@ -4,6 +4,7 @@
>    */
>   
>   #include <drm/drm_file.h>
> +#include <linux/pm_runtime.h>
>   
>   #include "ivpu_drv.h"
>   #include "ivpu_gem.h"
> @@ -281,6 +282,9 @@ int ivpu_ms_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *
>   void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
>   {
>   	struct ivpu_ms_instance *ms, *tmp;
> +	struct ivpu_device *vdev = file_priv->vdev;
> +
> +	pm_runtime_get_sync(vdev->drm.dev);

Could get_sync() be failed here? Maybe it is better to add warning for 
failure?


Lizhi

>   
>   	mutex_lock(&file_priv->ms_lock);
>   
> @@ -293,6 +297,8 @@ void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
>   		free_instance(file_priv, ms);
>   
>   	mutex_unlock(&file_priv->ms_lock);
> +
> +	pm_runtime_put_autosuspend(vdev->drm.dev);
>   }
>   
>   void ivpu_ms_cleanup_all(struct ivpu_device *vdev)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] accel/ivpu: Fix deadlock in ivpu_ms_cleanup()
  2025-03-25 20:50   ` Lizhi Hou
@ 2025-03-26  8:06     ` Jacek Lawrynowicz
  2025-03-27 17:38       ` Lizhi Hou
  0 siblings, 1 reply; 8+ messages in thread
From: Jacek Lawrynowicz @ 2025-03-26  8:06 UTC (permalink / raw)
  To: Lizhi Hou, Maciej Falkowski, dri-devel; +Cc: oded.gabbay, quic_jhugo, stable

Hi,

On 3/25/2025 9:50 PM, Lizhi Hou wrote:
> 
> On 3/25/25 04:43, Maciej Falkowski wrote:
>> From: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
>>
>> Fix deadlock in ivpu_ms_cleanup() by preventing runtime resume after
>> file_priv->ms_lock is acquired.
>>
>> During a failure in runtime resume, a cold boot is executed, which
>> calls ivpu_ms_cleanup_all(). This function calls ivpu_ms_cleanup()
>> that acquires file_priv->ms_lock and causes the deadlock.
>>
>> Fixes: cdfad4db7756 ("accel/ivpu: Add NPU profiling support")
>> Cc: <stable@vger.kernel.org> # v6.11+
>> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
>> Signed-off-by: Maciej Falkowski <maciej.falkowski@linux.intel.com>
>> ---
>>   drivers/accel/ivpu/ivpu_ms.c | 6 ++++++
>>   1 file changed, 6 insertions(+)
>>
>> diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c
>> index ffe7b10f8a76..eb485cf15ad6 100644
>> --- a/drivers/accel/ivpu/ivpu_ms.c
>> +++ b/drivers/accel/ivpu/ivpu_ms.c
>> @@ -4,6 +4,7 @@
>>    */
>>     #include <drm/drm_file.h>
>> +#include <linux/pm_runtime.h>
>>     #include "ivpu_drv.h"
>>   #include "ivpu_gem.h"
>> @@ -281,6 +282,9 @@ int ivpu_ms_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *
>>   void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
>>   {
>>       struct ivpu_ms_instance *ms, *tmp;
>> +    struct ivpu_device *vdev = file_priv->vdev;
>> +
>> +    pm_runtime_get_sync(vdev->drm.dev);
> 
> Could get_sync() be failed here? Maybe it is better to add warning for failure?

Yes, this could fail but we already have detailed warnings in runtime resume callback (ivpu_pm_runtime_resume_cb()).
> 
>>         mutex_lock(&file_priv->ms_lock);
>>   @@ -293,6 +297,8 @@ void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
>>           free_instance(file_priv, ms);
>>         mutex_unlock(&file_priv->ms_lock);
>> +
>> +    pm_runtime_put_autosuspend(vdev->drm.dev);
>>   }
>>     void ivpu_ms_cleanup_all(struct ivpu_device *vdev)

Regards,
Jacek


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] accel/ivpu: Fix deadlock in ivpu_ms_cleanup()
  2025-03-26  8:06     ` Jacek Lawrynowicz
@ 2025-03-27 17:38       ` Lizhi Hou
  2025-03-28  8:23         ` Jacek Lawrynowicz
  0 siblings, 1 reply; 8+ messages in thread
From: Lizhi Hou @ 2025-03-27 17:38 UTC (permalink / raw)
  To: Jacek Lawrynowicz, Maciej Falkowski, dri-devel
  Cc: oded.gabbay, quic_jhugo, stable


On 3/26/25 01:06, Jacek Lawrynowicz wrote:
> Hi,
>
> On 3/25/2025 9:50 PM, Lizhi Hou wrote:
>> On 3/25/25 04:43, Maciej Falkowski wrote:
>>> From: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
>>>
>>> Fix deadlock in ivpu_ms_cleanup() by preventing runtime resume after
>>> file_priv->ms_lock is acquired.
>>>
>>> During a failure in runtime resume, a cold boot is executed, which
>>> calls ivpu_ms_cleanup_all(). This function calls ivpu_ms_cleanup()
>>> that acquires file_priv->ms_lock and causes the deadlock.
>>>
>>> Fixes: cdfad4db7756 ("accel/ivpu: Add NPU profiling support")
>>> Cc: <stable@vger.kernel.org> # v6.11+
>>> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
>>> Signed-off-by: Maciej Falkowski <maciej.falkowski@linux.intel.com>
>>> ---
>>>    drivers/accel/ivpu/ivpu_ms.c | 6 ++++++
>>>    1 file changed, 6 insertions(+)
>>>
>>> diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c
>>> index ffe7b10f8a76..eb485cf15ad6 100644
>>> --- a/drivers/accel/ivpu/ivpu_ms.c
>>> +++ b/drivers/accel/ivpu/ivpu_ms.c
>>> @@ -4,6 +4,7 @@
>>>     */
>>>      #include <drm/drm_file.h>
>>> +#include <linux/pm_runtime.h>
>>>      #include "ivpu_drv.h"
>>>    #include "ivpu_gem.h"
>>> @@ -281,6 +282,9 @@ int ivpu_ms_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *
>>>    void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
>>>    {
>>>        struct ivpu_ms_instance *ms, *tmp;
>>> +    struct ivpu_device *vdev = file_priv->vdev;
>>> +
>>> +    pm_runtime_get_sync(vdev->drm.dev);
>> Could get_sync() be failed here? Maybe it is better to add warning for failure?
> Yes, this could fail but we already have detailed warnings in runtime resume callback (ivpu_pm_runtime_resume_cb()).

Will the deadlock still happens if this function fails?


Lizhi

>>>          mutex_lock(&file_priv->ms_lock);
>>>    @@ -293,6 +297,8 @@ void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
>>>            free_instance(file_priv, ms);
>>>          mutex_unlock(&file_priv->ms_lock);
>>> +
>>> +    pm_runtime_put_autosuspend(vdev->drm.dev);
>>>    }
>>>      void ivpu_ms_cleanup_all(struct ivpu_device *vdev)
> Regards,
> Jacek
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] accel/ivpu: Fix deadlock in ivpu_ms_cleanup()
  2025-03-27 17:38       ` Lizhi Hou
@ 2025-03-28  8:23         ` Jacek Lawrynowicz
  2025-03-28 15:29           ` Lizhi Hou
  0 siblings, 1 reply; 8+ messages in thread
From: Jacek Lawrynowicz @ 2025-03-28  8:23 UTC (permalink / raw)
  To: Lizhi Hou, Maciej Falkowski, dri-devel; +Cc: oded.gabbay, quic_jhugo, stable



On 3/27/2025 6:38 PM, Lizhi Hou wrote:
> 
> On 3/26/25 01:06, Jacek Lawrynowicz wrote:
>> Hi,
>>
>> On 3/25/2025 9:50 PM, Lizhi Hou wrote:
>>> On 3/25/25 04:43, Maciej Falkowski wrote:
>>>> From: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
>>>>
>>>> Fix deadlock in ivpu_ms_cleanup() by preventing runtime resume after
>>>> file_priv->ms_lock is acquired.
>>>>
>>>> During a failure in runtime resume, a cold boot is executed, which
>>>> calls ivpu_ms_cleanup_all(). This function calls ivpu_ms_cleanup()
>>>> that acquires file_priv->ms_lock and causes the deadlock.
>>>>
>>>> Fixes: cdfad4db7756 ("accel/ivpu: Add NPU profiling support")
>>>> Cc: <stable@vger.kernel.org> # v6.11+
>>>> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
>>>> Signed-off-by: Maciej Falkowski <maciej.falkowski@linux.intel.com>
>>>> ---
>>>>    drivers/accel/ivpu/ivpu_ms.c | 6 ++++++
>>>>    1 file changed, 6 insertions(+)
>>>>
>>>> diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c
>>>> index ffe7b10f8a76..eb485cf15ad6 100644
>>>> --- a/drivers/accel/ivpu/ivpu_ms.c
>>>> +++ b/drivers/accel/ivpu/ivpu_ms.c
>>>> @@ -4,6 +4,7 @@
>>>>     */
>>>>      #include <drm/drm_file.h>
>>>> +#include <linux/pm_runtime.h>
>>>>      #include "ivpu_drv.h"
>>>>    #include "ivpu_gem.h"
>>>> @@ -281,6 +282,9 @@ int ivpu_ms_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *
>>>>    void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
>>>>    {
>>>>        struct ivpu_ms_instance *ms, *tmp;
>>>> +    struct ivpu_device *vdev = file_priv->vdev;
>>>> +
>>>> +    pm_runtime_get_sync(vdev->drm.dev);
>>> Could get_sync() be failed here? Maybe it is better to add warning for failure?
>> Yes, this could fail but we already have detailed warnings in runtime resume callback (ivpu_pm_runtime_resume_cb()).
> 
> Will the deadlock still happens if this function fails?

No. The deadlock was caused by runtime resume in free_instance().
pm_runtime_get_sync() will always bump PM usage counter, so there will be no resume regardless if it fails or not.

>>>>          mutex_lock(&file_priv->ms_lock);
>>>>    @@ -293,6 +297,8 @@ void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
>>>>            free_instance(file_priv, ms);
>>>>          mutex_unlock(&file_priv->ms_lock);
>>>> +
>>>> +    pm_runtime_put_autosuspend(vdev->drm.dev);
>>>>    }
>>>>      void ivpu_ms_cleanup_all(struct ivpu_device *vdev)
>> Regards,
>> Jacek
>>


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] accel/ivpu: Fix deadlock in ivpu_ms_cleanup()
  2025-03-28  8:23         ` Jacek Lawrynowicz
@ 2025-03-28 15:29           ` Lizhi Hou
  0 siblings, 0 replies; 8+ messages in thread
From: Lizhi Hou @ 2025-03-28 15:29 UTC (permalink / raw)
  To: Jacek Lawrynowicz, Maciej Falkowski, dri-devel
  Cc: oded.gabbay, quic_jhugo, stable


On 3/28/25 01:23, Jacek Lawrynowicz wrote:
>
> On 3/27/2025 6:38 PM, Lizhi Hou wrote:
>> On 3/26/25 01:06, Jacek Lawrynowicz wrote:
>>> Hi,
>>>
>>> On 3/25/2025 9:50 PM, Lizhi Hou wrote:
>>>> On 3/25/25 04:43, Maciej Falkowski wrote:
>>>>> From: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
>>>>>
>>>>> Fix deadlock in ivpu_ms_cleanup() by preventing runtime resume after
>>>>> file_priv->ms_lock is acquired.
>>>>>
>>>>> During a failure in runtime resume, a cold boot is executed, which
>>>>> calls ivpu_ms_cleanup_all(). This function calls ivpu_ms_cleanup()
>>>>> that acquires file_priv->ms_lock and causes the deadlock.
>>>>>
>>>>> Fixes: cdfad4db7756 ("accel/ivpu: Add NPU profiling support")
>>>>> Cc: <stable@vger.kernel.org> # v6.11+
>>>>> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
>>>>> Signed-off-by: Maciej Falkowski <maciej.falkowski@linux.intel.com>
>>>>> ---
>>>>>     drivers/accel/ivpu/ivpu_ms.c | 6 ++++++
>>>>>     1 file changed, 6 insertions(+)
>>>>>
>>>>> diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c
>>>>> index ffe7b10f8a76..eb485cf15ad6 100644
>>>>> --- a/drivers/accel/ivpu/ivpu_ms.c
>>>>> +++ b/drivers/accel/ivpu/ivpu_ms.c
>>>>> @@ -4,6 +4,7 @@
>>>>>      */
>>>>>       #include <drm/drm_file.h>
>>>>> +#include <linux/pm_runtime.h>
>>>>>       #include "ivpu_drv.h"
>>>>>     #include "ivpu_gem.h"
>>>>> @@ -281,6 +282,9 @@ int ivpu_ms_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *
>>>>>     void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
>>>>>     {
>>>>>         struct ivpu_ms_instance *ms, *tmp;
>>>>> +    struct ivpu_device *vdev = file_priv->vdev;
>>>>> +
>>>>> +    pm_runtime_get_sync(vdev->drm.dev);
>>>> Could get_sync() be failed here? Maybe it is better to add warning for failure?
>>> Yes, this could fail but we already have detailed warnings in runtime resume callback (ivpu_pm_runtime_resume_cb()).
>> Will the deadlock still happens if this function fails?
> No. The deadlock was caused by runtime resume in free_instance().
> pm_runtime_get_sync() will always bump PM usage counter, so there will be no resume regardless if it fails or not.
Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
>>>>>           mutex_lock(&file_priv->ms_lock);
>>>>>     @@ -293,6 +297,8 @@ void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv)
>>>>>             free_instance(file_priv, ms);
>>>>>           mutex_unlock(&file_priv->ms_lock);
>>>>> +
>>>>> +    pm_runtime_put_autosuspend(vdev->drm.dev);
>>>>>     }
>>>>>       void ivpu_ms_cleanup_all(struct ivpu_device *vdev)
>>> Regards,
>>> Jacek
>>>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] accel/ivpu: Fix PM related deadlocks in MS IOCTLs
  2025-03-25 11:43 ` [PATCH 2/2] accel/ivpu: Fix PM related deadlocks in MS IOCTLs Maciej Falkowski
@ 2025-03-28 15:31   ` Lizhi Hou
  0 siblings, 0 replies; 8+ messages in thread
From: Lizhi Hou @ 2025-03-28 15:31 UTC (permalink / raw)
  To: Maciej Falkowski, dri-devel
  Cc: oded.gabbay, quic_jhugo, jacek.lawrynowicz, stable


On 3/25/25 04:43, Maciej Falkowski wrote:
> From: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
>
> Prevent runtime resume/suspend while MS IOCTLs are in progress.
> Failed suspend will call ivpu_ms_cleanup() that would try to acquire
> file_priv->ms_lock, which is already held by the IOCTLs.
>
> Fixes: cdfad4db7756 ("accel/ivpu: Add NPU profiling support")
> Cc: <stable@vger.kernel.org> # v6.11+
> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
> Signed-off-by: Maciej Falkowski <maciej.falkowski@linux.intel.com>
> ---
>   drivers/accel/ivpu/ivpu_debugfs.c |  4 ++--
>   drivers/accel/ivpu/ivpu_ms.c      | 18 ++++++++++++++++++
>   2 files changed, 20 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
> index 0825851656a2..f0dad0c9ce33 100644
> --- a/drivers/accel/ivpu/ivpu_debugfs.c
> +++ b/drivers/accel/ivpu/ivpu_debugfs.c
> @@ -332,7 +332,7 @@ ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t si
>   		return -EINVAL;
>   
>   	ret = ivpu_rpm_get(vdev);
> -	if (ret)
> +	if (ret < 0)
>   		return ret;
>   
>   	ivpu_pm_trigger_recovery(vdev, "debugfs");
> @@ -383,7 +383,7 @@ static int dct_active_set(void *data, u64 active_percent)
>   		return -EINVAL;
>   
>   	ret = ivpu_rpm_get(vdev);
> -	if (ret)
> +	if (ret < 0)
>   		return ret;
>   
>   	if (active_percent)
> diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c
> index eb485cf15ad6..2a043baf10ca 100644
> --- a/drivers/accel/ivpu/ivpu_ms.c
> +++ b/drivers/accel/ivpu/ivpu_ms.c
> @@ -45,6 +45,10 @@ int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
>   	    args->sampling_period_ns < MS_MIN_SAMPLE_PERIOD_NS)
>   		return -EINVAL;
>   
> +	ret = ivpu_rpm_get(vdev);
> +	if (ret < 0)
> +		return ret;
> +
>   	mutex_lock(&file_priv->ms_lock);
>   
>   	if (get_instance_by_mask(file_priv, args->metric_group_mask)) {
> @@ -97,6 +101,8 @@ int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
>   	kfree(ms);
>   unlock:
>   	mutex_unlock(&file_priv->ms_lock);
> +
> +	ivpu_rpm_put(vdev);
>   	return ret;
>   }
>   
> @@ -161,6 +167,10 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *
>   	if (!args->metric_group_mask)
>   		return -EINVAL;
>   
> +	ret = ivpu_rpm_get(vdev);
> +	if (ret < 0)
> +		return ret;
> +
>   	mutex_lock(&file_priv->ms_lock);
>   
>   	ms = get_instance_by_mask(file_priv, args->metric_group_mask);
> @@ -188,6 +198,7 @@ int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *
>   unlock:
>   	mutex_unlock(&file_priv->ms_lock);
>   
> +	ivpu_rpm_put(vdev);
>   	return ret;
>   }
>   
> @@ -205,11 +216,17 @@ int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file
>   {
>   	struct ivpu_file_priv *file_priv = file->driver_priv;
>   	struct drm_ivpu_metric_streamer_stop *args = data;
> +	struct ivpu_device *vdev = file_priv->vdev;
>   	struct ivpu_ms_instance *ms;
> +	int ret;
>   
>   	if (!args->metric_group_mask)
>   		return -EINVAL;
>   
> +	ret = ivpu_rpm_get(vdev);
> +	if (ret < 0)
> +		return ret;
> +
>   	mutex_lock(&file_priv->ms_lock);
>   
>   	ms = get_instance_by_mask(file_priv, args->metric_group_mask);
> @@ -218,6 +235,7 @@ int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file
>   
>   	mutex_unlock(&file_priv->ms_lock);
>   
> +	ivpu_rpm_put(vdev);
Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
>   	return ms ? 0 : -EINVAL;
>   }
>   

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2025-03-28 15:31 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20250325114306.3740022-1-maciej.falkowski@linux.intel.com>
2025-03-25 11:43 ` [PATCH 1/2] accel/ivpu: Fix deadlock in ivpu_ms_cleanup() Maciej Falkowski
2025-03-25 20:50   ` Lizhi Hou
2025-03-26  8:06     ` Jacek Lawrynowicz
2025-03-27 17:38       ` Lizhi Hou
2025-03-28  8:23         ` Jacek Lawrynowicz
2025-03-28 15:29           ` Lizhi Hou
2025-03-25 11:43 ` [PATCH 2/2] accel/ivpu: Fix PM related deadlocks in MS IOCTLs Maciej Falkowski
2025-03-28 15:31   ` Lizhi Hou

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox