Linux kernel -stable discussions
 help / color / mirror / Atom feed
* [PATCH 6.12.y] drm/amd/display: fix dmub access race condition
@ 2025-10-17 18:01 Timothy Pearson
  2025-10-20  9:08 ` Greg KH
  0 siblings, 1 reply; 3+ messages in thread
From: Timothy Pearson @ 2025-10-17 18:01 UTC (permalink / raw)
  To: stable

From: Aurabindo Pillai <aurabindo.pillai@amd.com>

Justificiation:
This fixes DisplayPort lockups on Polaris GPUs during DPMS transitions,
which have been a major headache on our POWER9 platforms.  Backport to
Debian stable kernel version.

[ Upstream commit c210b757b400959577a5a17b783b5959b82baed8 ]

Accessing DC from amdgpu_dm is usually preceded by acquisition of
dc_lock mutex. Most of the DC API that DM calls are under a DC lock.
However, there are a few that are not. Some DC API called from interrupt
context end up sending DMUB commands via a DC API, while other threads were
using DMUB. This was apparent from a race between calls for setting idle
optimization enable/disable and the DC API to set vmin/vmax.

Offload the call to dc_stream_adjust_vmin_vmax() to a thread instead
of directly calling them from the interrupt handler such that it waits
for dc_lock.

[Timothy Pearson]

Modified header file patch to apply to 6.12

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Signed-off-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Roman Li <roman.li@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 55 +++++++++++++++++--
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 14 +++++
 2 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index b02ff92bae0b..fd6d66832ccf 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -533,6 +533,50 @@ static void dm_pflip_high_irq(void *interrupt_params)
 		      amdgpu_crtc->crtc_id, amdgpu_crtc, vrr_active, (int)!e);
 }
 
+static void dm_handle_vmin_vmax_update(struct work_struct *offload_work)
+{
+	struct vupdate_offload_work *work = container_of(offload_work, struct vupdate_offload_work, work);
+	struct amdgpu_device *adev = work->adev;
+	struct dc_stream_state *stream = work->stream;
+	struct dc_crtc_timing_adjust *adjust = work->adjust;
+
+	mutex_lock(&adev->dm.dc_lock);
+	dc_stream_adjust_vmin_vmax(adev->dm.dc, stream, adjust);
+	mutex_unlock(&adev->dm.dc_lock);
+
+	dc_stream_release(stream);
+	kfree(work->adjust);
+	kfree(work);
+}
+
+static void schedule_dc_vmin_vmax(struct amdgpu_device *adev,
+	struct dc_stream_state *stream,
+	struct dc_crtc_timing_adjust *adjust)
+{
+	struct vupdate_offload_work *offload_work = kzalloc(sizeof(*offload_work), GFP_KERNEL);
+	if (!offload_work) {
+		drm_dbg_driver(adev_to_drm(adev), "Failed to allocate vupdate_offload_work\n");
+		return;
+	}
+
+	struct dc_crtc_timing_adjust *adjust_copy = kzalloc(sizeof(*adjust_copy), GFP_KERNEL);
+	if (!adjust_copy) {
+		drm_dbg_driver(adev_to_drm(adev), "Failed to allocate adjust_copy\n");
+		kfree(offload_work);
+		return;
+	}
+
+	dc_stream_retain(stream);
+	memcpy(adjust_copy, adjust, sizeof(*adjust_copy));
+
+	INIT_WORK(&offload_work->work, dm_handle_vmin_vmax_update);
+	offload_work->adev = adev;
+	offload_work->stream = stream;
+	offload_work->adjust = adjust_copy;
+
+	queue_work(system_wq, &offload_work->work);
+}
+
 static void dm_vupdate_high_irq(void *interrupt_params)
 {
 	struct common_irq_params *irq_params = interrupt_params;
@@ -582,10 +626,9 @@ static void dm_vupdate_high_irq(void *interrupt_params)
 				    acrtc->dm_irq_params.stream,
 				    &acrtc->dm_irq_params.vrr_params);
 
-				dc_stream_adjust_vmin_vmax(
-				    adev->dm.dc,
-				    acrtc->dm_irq_params.stream,
-				    &acrtc->dm_irq_params.vrr_params.adjust);
+				schedule_dc_vmin_vmax(adev,
+					acrtc->dm_irq_params.stream,
+					&acrtc->dm_irq_params.vrr_params.adjust);
 				spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
 			}
 		}
@@ -675,8 +718,8 @@ static void dm_crtc_high_irq(void *interrupt_params)
 					     acrtc->dm_irq_params.stream,
 					     &acrtc->dm_irq_params.vrr_params);
 
-		dc_stream_adjust_vmin_vmax(adev->dm.dc, acrtc->dm_irq_params.stream,
-					   &acrtc->dm_irq_params.vrr_params.adjust);
+		schedule_dc_vmin_vmax(adev, acrtc->dm_irq_params.stream,
+				&acrtc->dm_irq_params.vrr_params.adjust);
 	}
 
 	/*
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 9603352ee094..aa99e226a381 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -1012,4 +1012,18 @@ void dm_free_gpu_mem(struct amdgpu_device *adev,
 
 bool amdgpu_dm_is_headless(struct amdgpu_device *adev);
 
+/**
+ * struct dm_vupdate_work - Work data for periodic action in idle
+ * @work: Kernel work data for the work event
+ * @adev: amdgpu_device back pointer
+ * @stream: DC stream associated with the crtc
+ * @adjust: DC CRTC timing adjust to be applied to the crtc
+ */
+struct vupdate_offload_work {
+       struct work_struct work;
+       struct amdgpu_device *adev;
+       struct dc_stream_state *stream;
+       struct dc_crtc_timing_adjust *adjust;
+};
+
 #endif /* __AMDGPU_DM_H__ */
-- 
2.47.2


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH 6.12.y] drm/amd/display: fix dmub access race condition
  2025-10-17 18:01 [PATCH 6.12.y] drm/amd/display: fix dmub access race condition Timothy Pearson
@ 2025-10-20  9:08 ` Greg KH
  2025-10-20 13:49   ` Timothy Pearson
  0 siblings, 1 reply; 3+ messages in thread
From: Greg KH @ 2025-10-20  9:08 UTC (permalink / raw)
  To: Timothy Pearson; +Cc: stable

On Fri, Oct 17, 2025 at 01:01:12PM -0500, Timothy Pearson wrote:
> +/**
> + * struct dm_vupdate_work - Work data for periodic action in idle
> + * @work: Kernel work data for the work event
> + * @adev: amdgpu_device back pointer
> + * @stream: DC stream associated with the crtc
> + * @adjust: DC CRTC timing adjust to be applied to the crtc
> + */
> +struct vupdate_offload_work {
> +       struct work_struct work;
> +       struct amdgpu_device *adev;
> +       struct dc_stream_state *stream;
> +       struct dc_crtc_timing_adjust *adjust;
> +};

What happened to the proper formatting of this structure?  You lost all
tabs :(

Please fix up and resend.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 6.12.y] drm/amd/display: fix dmub access race condition
  2025-10-20  9:08 ` Greg KH
@ 2025-10-20 13:49   ` Timothy Pearson
  0 siblings, 0 replies; 3+ messages in thread
From: Timothy Pearson @ 2025-10-20 13:49 UTC (permalink / raw)
  To: Greg Kroah-Hartman; +Cc: stable



----- Original Message -----
> From: "Greg Kroah-Hartman" <gregkh@linuxfoundation.org>
> To: "Timothy Pearson" <tpearson@raptorengineering.com>
> Cc: "stable" <stable@vger.kernel.org>
> Sent: Monday, October 20, 2025 4:08:40 AM
> Subject: Re: [PATCH 6.12.y] drm/amd/display: fix dmub access race condition

> On Fri, Oct 17, 2025 at 01:01:12PM -0500, Timothy Pearson wrote:
>> +/**
>> + * struct dm_vupdate_work - Work data for periodic action in idle
>> + * @work: Kernel work data for the work event
>> + * @adev: amdgpu_device back pointer
>> + * @stream: DC stream associated with the crtc
>> + * @adjust: DC CRTC timing adjust to be applied to the crtc
>> + */
>> +struct vupdate_offload_work {
>> +       struct work_struct work;
>> +       struct amdgpu_device *adev;
>> +       struct dc_stream_state *stream;
>> +       struct dc_crtc_timing_adjust *adjust;
>> +};
> 
> What happened to the proper formatting of this structure?  You lost all
> tabs :(

I'm not sure.  Please accept my apologies.

> Please fix up and resend.

Investigating further after some additional crash reports over the weekend, it looks like there is a whole batch of patches needed to stabilize amdgpu on 6.12.  For now, I'm going to withdraw this until I get a better handle on whether we can reasonably patch 6.12 or just need to run 6.16+ with these GPUs.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-10-20 13:49 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-17 18:01 [PATCH 6.12.y] drm/amd/display: fix dmub access race condition Timothy Pearson
2025-10-20  9:08 ` Greg KH
2025-10-20 13:49   ` Timothy Pearson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox