* [PATCH v6 1/2] drm/atomic: attempt full modeset on page flip timeout
@ 2026-05-05 18:20 Hamza Mahfooz
2026-05-05 18:20 ` [PATCH v6 2/2] drm/amd/display: add DMU timeout recovery support Hamza Mahfooz
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: Hamza Mahfooz @ 2026-05-05 18:20 UTC (permalink / raw)
To: dri-devel
Cc: Hamza Mahfooz, Harry Wentland, Leo Li, Rodrigo Siqueira,
Alex Deucher, Christian König, David Airlie, Simona Vetter,
Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
Mario Limonciello, Alex Hung, Wayne Lin, Timur Kristóf,
Aurabindo Pillai, Mario Limonciello (AMD), Ivan Lipski,
Chenyu Chen, Matthew Schwartz, Tom Chung, Roman Li, Takashi Iwai,
Colin Ian King, Charlene Liu, Kees Cook, amd-gfx, linux-kernel
We should try to recover from page flip timeouts. Forcing
a full modeset should be generic across all atomic KMS drivers,
so try that first.
Signed-off-by: Hamza Mahfooz <someguy@effective-light.com>
---
drivers/gpu/drm/drm_atomic_helper.c | 49 +++++++++++++++++++++++++++--
1 file changed, 46 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index a768398a1884..7ee9d52f63c5 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1926,6 +1926,43 @@ drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
}
EXPORT_SYMBOL(drm_atomic_helper_wait_for_vblanks);
+static int force_full_modeset(struct drm_crtc *crtc)
+{
+ struct drm_modeset_acquire_ctx ctx;
+ struct drm_crtc_state *crtc_state;
+ struct drm_atomic_state *state;
+ int ret;
+ int err;
+
+ if (drm_atomic_crtc_needs_modeset(crtc->state))
+ return -EBUSY;
+
+ DRM_MODESET_LOCK_ALL_BEGIN(crtc->dev, ctx, 0, err);
+ state = drm_atomic_state_alloc(crtc->dev);
+ if (!state)
+ return -ENOMEM;
+
+ state->acquire_ctx = &ctx;
+
+ crtc_state = drm_atomic_get_crtc_state(state, crtc);
+ if (IS_ERR(crtc_state)) {
+ ret = PTR_ERR(crtc_state);
+ goto out;
+ }
+
+ crtc_state->mode_changed = true;
+
+ drm_info(crtc->dev,
+ "[CRTC:%d:%s] Attempting force full modeset...\n",
+ crtc->base.id, crtc->name);
+
+ ret = drm_atomic_commit(state);
+out:
+ drm_atomic_state_put(state);
+ DRM_MODESET_LOCK_ALL_END(crtc->dev, ctx, err);
+ return ret;
+}
+
/**
* drm_atomic_helper_wait_for_flip_done - wait for all page flips to be done
* @dev: DRM device
@@ -1949,17 +1986,23 @@ void drm_atomic_helper_wait_for_flip_done(struct drm_device *dev,
for (i = 0; i < dev->mode_config.num_crtc; i++) {
struct drm_crtc_commit *commit = state->crtcs[i].commit;
- int ret;
crtc = state->crtcs[i].ptr;
if (!crtc || !commit)
continue;
- ret = wait_for_completion_timeout(&commit->flip_done, 10 * HZ);
- if (ret == 0)
+ if (!wait_for_completion_timeout(&commit->flip_done, 10 * HZ)) {
+ int ret;
drm_err(dev, "[CRTC:%d:%s] flip_done timed out\n",
crtc->base.id, crtc->name);
+
+ ret = force_full_modeset(crtc);
+ if (ret)
+ drm_err(dev,
+ "[CRTC:%d:%s] force full modeset failed! ret=%d\n",
+ crtc->base.id, crtc->name, ret);
+ }
}
if (state->fake_commit)
--
2.54.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v6 2/2] drm/amd/display: add DMU timeout recovery support
2026-05-05 18:20 [PATCH v6 1/2] drm/atomic: attempt full modeset on page flip timeout Hamza Mahfooz
@ 2026-05-05 18:20 ` Hamza Mahfooz
2026-05-05 18:31 ` Mario Limonciello
2026-05-06 17:36 ` Hamza Mahfooz
2026-05-05 18:31 ` [PATCH v6 1/2] drm/atomic: attempt full modeset on page flip timeout Mario Limonciello
2026-05-06 18:47 ` Ville Syrjälä
2 siblings, 2 replies; 8+ messages in thread
From: Hamza Mahfooz @ 2026-05-05 18:20 UTC (permalink / raw)
To: dri-devel
Cc: Hamza Mahfooz, Leo Li, Harry Wentland, Rodrigo Siqueira,
Alex Deucher, Christian König, David Airlie, Simona Vetter,
Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
Mario Limonciello, Alex Hung, Wayne Lin, Aurabindo Pillai,
Timur Kristóf, Mario Limonciello (AMD), Ivan Lipski,
Chenyu Chen, Matthew Schwartz, Tom Chung, Roman Li, Takashi Iwai,
Colin Ian King, Charlene Liu, Kees Cook, amd-gfx, linux-kernel
DMU already has robust hung state tracking, but timeout recovery
was never hooked up, so do so now.
Reviewed-by: Leo Li <sunpeng.li@amd.com>
Signed-off-by: Hamza Mahfooz <someguy@effective-light.com>
---
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 13 ++++++++-----
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 1 +
.../drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 12 ++++++++++--
3 files changed, 19 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e96a12ff2d31..763da9a9032d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1246,7 +1246,7 @@ static void amdgpu_dm_audio_eld_notify(struct amdgpu_device *adev, int pin)
}
}
-static int dm_dmub_hw_init(struct amdgpu_device *adev)
+int amdgpu_dm_dmub_hw_init(struct amdgpu_device *adev)
{
const struct dmcub_firmware_header_v1_0 *hdr;
struct dmub_srv *dmub_srv = adev->dm.dmub_srv;
@@ -1315,7 +1315,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
/* if adev->firmware.load_type == AMDGPU_FW_LOAD_PSP,
* amdgpu_ucode_init_single_fw will load dmub firmware
* fw_inst_const part to cw0; otherwise, the firmware back door load
- * will be done by dm_dmub_hw_init
+ * will be done by amdgpu_dm_dmub_hw_init().
*/
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
memcpy(fb_info->fb[DMUB_WINDOW_0_INST_CONST].cpu_addr, fw_inst_const,
@@ -1457,7 +1457,7 @@ static void dm_dmub_hw_resume(struct amdgpu_device *adev)
drm_warn(adev_to_drm(adev), "Wait for DMUB auto-load failed: %d\n", status);
} else {
/* Perform the full hardware initialization. */
- r = dm_dmub_hw_init(adev);
+ r = amdgpu_dm_dmub_hw_init(adev);
if (r)
drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r);
}
@@ -2041,6 +2041,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
goto error;
}
+ adev->dm.dc->debug.enable_dmu_recovery =
+ amdgpu_device_should_recover_gpu(adev);
+
if (amdgpu_dc_debug_mask & DC_DISABLE_PIPE_SPLIT) {
adev->dm.dc->debug.force_single_disp_pipe_split = false;
adev->dm.dc->debug.pipe_split_policy = MPC_SPLIT_AVOID;
@@ -2090,7 +2093,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
if (adev->dm.dc->caps.dp_hdmi21_pcon_support)
drm_info(adev_to_drm(adev), "DP-HDMI FRL PCON supported\n");
- r = dm_dmub_hw_init(adev);
+ r = amdgpu_dm_dmub_hw_init(adev);
if (r) {
drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r);
goto error;
@@ -3604,7 +3607,7 @@ static int dm_resume(struct amdgpu_ip_block *ip_block)
*/
link_enc_cfg_copy(adev->dm.dc->current_state, dc_state);
- r = dm_dmub_hw_init(adev);
+ r = amdgpu_dm_dmub_hw_init(adev);
if (r) {
drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r);
return r;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 74a8fe1a1999..dc808ee83c2a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -1086,6 +1086,7 @@ int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
#define MAX_COLOR_LEGACY_LUT_ENTRIES 256
void amdgpu_dm_init_color_mod(void);
+int amdgpu_dm_dmub_hw_init(struct amdgpu_device *adev);
int amdgpu_dm_create_color_properties(struct amdgpu_device *adev);
int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state);
int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index 3b8ae7798a93..8f10117483e2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -33,6 +33,7 @@
#include <drm/drm_atomic.h>
#include <drm/drm_probe_helper.h>
#include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
#include <drm/drm_edid.h>
#include <drm/drm_fixed.h>
@@ -1165,8 +1166,15 @@ void dm_set_dcn_clocks(struct dc_context *ctx, struct dc_clocks *clks)
void dm_helpers_dmu_timeout(struct dc_context *ctx)
{
- // TODO:
- //amdgpu_device_gpu_recover(dc_context->driver-context, NULL);
+ struct amdgpu_device *adev = ctx->driver_context;
+
+ lockdep_assert_held(&adev->dm.dc_lock);
+
+ drm_info(adev_to_drm(adev), "attempting firmware reset\n");
+ if (amdgpu_dm_dmub_hw_init(adev))
+ drm_dev_wedged_event(adev_to_drm(adev),
+ DRM_WEDGE_RECOVERY_REBIND |
+ DRM_WEDGE_RECOVERY_BUS_RESET, NULL);
}
void dm_helpers_smu_timeout(struct dc_context *ctx, unsigned int msg_id, unsigned int param, unsigned int timeout_us)
--
2.54.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH v6 1/2] drm/atomic: attempt full modeset on page flip timeout
2026-05-05 18:20 [PATCH v6 1/2] drm/atomic: attempt full modeset on page flip timeout Hamza Mahfooz
2026-05-05 18:20 ` [PATCH v6 2/2] drm/amd/display: add DMU timeout recovery support Hamza Mahfooz
@ 2026-05-05 18:31 ` Mario Limonciello
2026-05-06 18:47 ` Ville Syrjälä
2 siblings, 0 replies; 8+ messages in thread
From: Mario Limonciello @ 2026-05-05 18:31 UTC (permalink / raw)
To: Hamza Mahfooz, dri-devel
Cc: Harry Wentland, Leo Li, Rodrigo Siqueira, Alex Deucher,
Christian König, David Airlie, Simona Vetter,
Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, Alex Hung,
Wayne Lin, Timur Kristóf, Aurabindo Pillai,
Mario Limonciello (AMD), Ivan Lipski, Chenyu Chen,
Matthew Schwartz, Tom Chung, Roman Li, Takashi Iwai,
Colin Ian King, Charlene Liu, Kees Cook, amd-gfx, linux-kernel
On 5/5/26 13:20, Hamza Mahfooz wrote:
> We should try to recover from page flip timeouts. Forcing
> a full modeset should be generic across all atomic KMS drivers,
> so try that first.
>
> Signed-off-by: Hamza Mahfooz <someguy@effective-light.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
> ---
> drivers/gpu/drm/drm_atomic_helper.c | 49 +++++++++++++++++++++++++++--
> 1 file changed, 46 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
> index a768398a1884..7ee9d52f63c5 100644
> --- a/drivers/gpu/drm/drm_atomic_helper.c
> +++ b/drivers/gpu/drm/drm_atomic_helper.c
> @@ -1926,6 +1926,43 @@ drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
> }
> EXPORT_SYMBOL(drm_atomic_helper_wait_for_vblanks);
>
> +static int force_full_modeset(struct drm_crtc *crtc)
> +{
> + struct drm_modeset_acquire_ctx ctx;
> + struct drm_crtc_state *crtc_state;
> + struct drm_atomic_state *state;
> + int ret;
> + int err;
> +
> + if (drm_atomic_crtc_needs_modeset(crtc->state))
> + return -EBUSY;
> +
> + DRM_MODESET_LOCK_ALL_BEGIN(crtc->dev, ctx, 0, err);
> + state = drm_atomic_state_alloc(crtc->dev);
> + if (!state)
> + return -ENOMEM;
> +
> + state->acquire_ctx = &ctx;
> +
> + crtc_state = drm_atomic_get_crtc_state(state, crtc);
> + if (IS_ERR(crtc_state)) {
> + ret = PTR_ERR(crtc_state);
> + goto out;
> + }
> +
> + crtc_state->mode_changed = true;
> +
> + drm_info(crtc->dev,
> + "[CRTC:%d:%s] Attempting force full modeset...\n",
> + crtc->base.id, crtc->name);
> +
> + ret = drm_atomic_commit(state);
> +out:
> + drm_atomic_state_put(state);
> + DRM_MODESET_LOCK_ALL_END(crtc->dev, ctx, err);
> + return ret;
> +}
> +
> /**
> * drm_atomic_helper_wait_for_flip_done - wait for all page flips to be done
> * @dev: DRM device
> @@ -1949,17 +1986,23 @@ void drm_atomic_helper_wait_for_flip_done(struct drm_device *dev,
>
> for (i = 0; i < dev->mode_config.num_crtc; i++) {
> struct drm_crtc_commit *commit = state->crtcs[i].commit;
> - int ret;
>
> crtc = state->crtcs[i].ptr;
>
> if (!crtc || !commit)
> continue;
>
> - ret = wait_for_completion_timeout(&commit->flip_done, 10 * HZ);
> - if (ret == 0)
> + if (!wait_for_completion_timeout(&commit->flip_done, 10 * HZ)) {
> + int ret;
> drm_err(dev, "[CRTC:%d:%s] flip_done timed out\n",
> crtc->base.id, crtc->name);
> +
> + ret = force_full_modeset(crtc);
> + if (ret)
> + drm_err(dev,
> + "[CRTC:%d:%s] force full modeset failed! ret=%d\n",
> + crtc->base.id, crtc->name, ret);
> + }
> }
>
> if (state->fake_commit)
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v6 2/2] drm/amd/display: add DMU timeout recovery support
2026-05-05 18:20 ` [PATCH v6 2/2] drm/amd/display: add DMU timeout recovery support Hamza Mahfooz
@ 2026-05-05 18:31 ` Mario Limonciello
2026-05-06 17:36 ` Hamza Mahfooz
1 sibling, 0 replies; 8+ messages in thread
From: Mario Limonciello @ 2026-05-05 18:31 UTC (permalink / raw)
To: Hamza Mahfooz, dri-devel
Cc: Leo Li, Harry Wentland, Rodrigo Siqueira, Alex Deucher,
Christian König, David Airlie, Simona Vetter,
Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann, Alex Hung,
Wayne Lin, Aurabindo Pillai, Timur Kristóf,
Mario Limonciello (AMD), Ivan Lipski, Chenyu Chen,
Matthew Schwartz, Tom Chung, Roman Li, Takashi Iwai,
Colin Ian King, Charlene Liu, Kees Cook, amd-gfx, linux-kernel
On 5/5/26 13:20, Hamza Mahfooz wrote:
> DMU already has robust hung state tracking, but timeout recovery
> was never hooked up, so do so now.
>
> Reviewed-by: Leo Li <sunpeng.li@amd.com>
> Signed-off-by: Hamza Mahfooz <someguy@effective-light.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
> ---
> drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 13 ++++++++-----
> drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 1 +
> .../drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 12 ++++++++++--
> 3 files changed, 19 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index e96a12ff2d31..763da9a9032d 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -1246,7 +1246,7 @@ static void amdgpu_dm_audio_eld_notify(struct amdgpu_device *adev, int pin)
> }
> }
>
> -static int dm_dmub_hw_init(struct amdgpu_device *adev)
> +int amdgpu_dm_dmub_hw_init(struct amdgpu_device *adev)
> {
> const struct dmcub_firmware_header_v1_0 *hdr;
> struct dmub_srv *dmub_srv = adev->dm.dmub_srv;
> @@ -1315,7 +1315,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
> /* if adev->firmware.load_type == AMDGPU_FW_LOAD_PSP,
> * amdgpu_ucode_init_single_fw will load dmub firmware
> * fw_inst_const part to cw0; otherwise, the firmware back door load
> - * will be done by dm_dmub_hw_init
> + * will be done by amdgpu_dm_dmub_hw_init().
> */
> if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
> memcpy(fb_info->fb[DMUB_WINDOW_0_INST_CONST].cpu_addr, fw_inst_const,
> @@ -1457,7 +1457,7 @@ static void dm_dmub_hw_resume(struct amdgpu_device *adev)
> drm_warn(adev_to_drm(adev), "Wait for DMUB auto-load failed: %d\n", status);
> } else {
> /* Perform the full hardware initialization. */
> - r = dm_dmub_hw_init(adev);
> + r = amdgpu_dm_dmub_hw_init(adev);
> if (r)
> drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r);
> }
> @@ -2041,6 +2041,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
> goto error;
> }
>
> + adev->dm.dc->debug.enable_dmu_recovery =
> + amdgpu_device_should_recover_gpu(adev);
> +
> if (amdgpu_dc_debug_mask & DC_DISABLE_PIPE_SPLIT) {
> adev->dm.dc->debug.force_single_disp_pipe_split = false;
> adev->dm.dc->debug.pipe_split_policy = MPC_SPLIT_AVOID;
> @@ -2090,7 +2093,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
> if (adev->dm.dc->caps.dp_hdmi21_pcon_support)
> drm_info(adev_to_drm(adev), "DP-HDMI FRL PCON supported\n");
>
> - r = dm_dmub_hw_init(adev);
> + r = amdgpu_dm_dmub_hw_init(adev);
> if (r) {
> drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r);
> goto error;
> @@ -3604,7 +3607,7 @@ static int dm_resume(struct amdgpu_ip_block *ip_block)
> */
> link_enc_cfg_copy(adev->dm.dc->current_state, dc_state);
>
> - r = dm_dmub_hw_init(adev);
> + r = amdgpu_dm_dmub_hw_init(adev);
> if (r) {
> drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r);
> return r;
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> index 74a8fe1a1999..dc808ee83c2a 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> @@ -1086,6 +1086,7 @@ int amdgpu_dm_verify_lut3d_size(struct amdgpu_device *adev,
> #define MAX_COLOR_LEGACY_LUT_ENTRIES 256
>
> void amdgpu_dm_init_color_mod(void);
> +int amdgpu_dm_dmub_hw_init(struct amdgpu_device *adev);
> int amdgpu_dm_create_color_properties(struct amdgpu_device *adev);
> int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state);
> int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc);
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
> index 3b8ae7798a93..8f10117483e2 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
> @@ -33,6 +33,7 @@
> #include <drm/drm_atomic.h>
> #include <drm/drm_probe_helper.h>
> #include <drm/amdgpu_drm.h>
> +#include <drm/drm_drv.h>
> #include <drm/drm_edid.h>
> #include <drm/drm_fixed.h>
>
> @@ -1165,8 +1166,15 @@ void dm_set_dcn_clocks(struct dc_context *ctx, struct dc_clocks *clks)
>
> void dm_helpers_dmu_timeout(struct dc_context *ctx)
> {
> - // TODO:
> - //amdgpu_device_gpu_recover(dc_context->driver-context, NULL);
> + struct amdgpu_device *adev = ctx->driver_context;
> +
> + lockdep_assert_held(&adev->dm.dc_lock);
> +
> + drm_info(adev_to_drm(adev), "attempting firmware reset\n");
> + if (amdgpu_dm_dmub_hw_init(adev))
> + drm_dev_wedged_event(adev_to_drm(adev),
> + DRM_WEDGE_RECOVERY_REBIND |
> + DRM_WEDGE_RECOVERY_BUS_RESET, NULL);
> }
>
> void dm_helpers_smu_timeout(struct dc_context *ctx, unsigned int msg_id, unsigned int param, unsigned int timeout_us)
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v6 2/2] drm/amd/display: add DMU timeout recovery support
2026-05-05 18:20 ` [PATCH v6 2/2] drm/amd/display: add DMU timeout recovery support Hamza Mahfooz
2026-05-05 18:31 ` Mario Limonciello
@ 2026-05-06 17:36 ` Hamza Mahfooz
2026-05-06 19:31 ` Leo Li
1 sibling, 1 reply; 8+ messages in thread
From: Hamza Mahfooz @ 2026-05-06 17:36 UTC (permalink / raw)
To: dri-devel
Cc: Leo Li, Harry Wentland, Rodrigo Siqueira, Alex Deucher,
Christian König, David Airlie, Simona Vetter,
Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
Mario Limonciello, Alex Hung, Wayne Lin, Aurabindo Pillai,
Timur Kristóf, Mario Limonciello (AMD), Ivan Lipski,
Chenyu Chen, Matthew Schwartz, Tom Chung, Roman Li, Takashi Iwai,
Colin Ian King, Charlene Liu, Kees Cook, amd-gfx, linux-kernel
On Tue, May 05, 2026 at 02:20:58PM -0400, Hamza Mahfooz wrote:
> DMU already has robust hung state tracking, but timeout recovery
> was never hooked up, so do so now.
>
> Reviewed-by: Leo Li <sunpeng.li@amd.com>
> Signed-off-by: Hamza Mahfooz <someguy@effective-light.com>
Does anyone mind if I merge this through drm-misc?
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v6 1/2] drm/atomic: attempt full modeset on page flip timeout
2026-05-05 18:20 [PATCH v6 1/2] drm/atomic: attempt full modeset on page flip timeout Hamza Mahfooz
2026-05-05 18:20 ` [PATCH v6 2/2] drm/amd/display: add DMU timeout recovery support Hamza Mahfooz
2026-05-05 18:31 ` [PATCH v6 1/2] drm/atomic: attempt full modeset on page flip timeout Mario Limonciello
@ 2026-05-06 18:47 ` Ville Syrjälä
2026-05-07 0:22 ` Hamza Mahfooz
2 siblings, 1 reply; 8+ messages in thread
From: Ville Syrjälä @ 2026-05-06 18:47 UTC (permalink / raw)
To: Hamza Mahfooz
Cc: dri-devel, Harry Wentland, Leo Li, Rodrigo Siqueira, Alex Deucher,
Christian König, David Airlie, Simona Vetter,
Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
Mario Limonciello, Alex Hung, Wayne Lin, Timur Kristóf,
Aurabindo Pillai, Mario Limonciello (AMD), Ivan Lipski,
Chenyu Chen, Matthew Schwartz, Tom Chung, Roman Li, Takashi Iwai,
Colin Ian King, Charlene Liu, Kees Cook, amd-gfx, linux-kernel
On Tue, May 05, 2026 at 02:20:57PM -0400, Hamza Mahfooz wrote:
> We should try to recover from page flip timeouts. Forcing
> a full modeset should be generic across all atomic KMS drivers,
> so try that first.
>
> Signed-off-by: Hamza Mahfooz <someguy@effective-light.com>
> ---
> drivers/gpu/drm/drm_atomic_helper.c | 49 +++++++++++++++++++++++++++--
> 1 file changed, 46 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
> index a768398a1884..7ee9d52f63c5 100644
> --- a/drivers/gpu/drm/drm_atomic_helper.c
> +++ b/drivers/gpu/drm/drm_atomic_helper.c
> @@ -1926,6 +1926,43 @@ drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
> }
> EXPORT_SYMBOL(drm_atomic_helper_wait_for_vblanks);
>
> +static int force_full_modeset(struct drm_crtc *crtc)
> +{
> + struct drm_modeset_acquire_ctx ctx;
> + struct drm_crtc_state *crtc_state;
> + struct drm_atomic_state *state;
> + int ret;
> + int err;
> +
> + if (drm_atomic_crtc_needs_modeset(crtc->state))
> + return -EBUSY;
> +
> + DRM_MODESET_LOCK_ALL_BEGIN(crtc->dev, ctx, 0, err);
> + state = drm_atomic_state_alloc(crtc->dev);
> + if (!state)
> + return -ENOMEM;
> +
> + state->acquire_ctx = &ctx;
> +
> + crtc_state = drm_atomic_get_crtc_state(state, crtc);
> + if (IS_ERR(crtc_state)) {
> + ret = PTR_ERR(crtc_state);
> + goto out;
> + }
> +
> + crtc_state->mode_changed = true;
> +
> + drm_info(crtc->dev,
> + "[CRTC:%d:%s] Attempting force full modeset...\n",
> + crtc->base.id, crtc->name);
> +
> + ret = drm_atomic_commit(state);
> +out:
> + drm_atomic_state_put(state);
> + DRM_MODESET_LOCK_ALL_END(crtc->dev, ctx, err);
> + return ret;
> +}
> +
> /**
> * drm_atomic_helper_wait_for_flip_done - wait for all page flips to be done
> * @dev: DRM device
> @@ -1949,17 +1986,23 @@ void drm_atomic_helper_wait_for_flip_done(struct drm_device *dev,
>
> for (i = 0; i < dev->mode_config.num_crtc; i++) {
> struct drm_crtc_commit *commit = state->crtcs[i].commit;
> - int ret;
>
> crtc = state->crtcs[i].ptr;
>
> if (!crtc || !commit)
> continue;
>
> - ret = wait_for_completion_timeout(&commit->flip_done, 10 * HZ);
> - if (ret == 0)
> + if (!wait_for_completion_timeout(&commit->flip_done, 10 * HZ)) {
> + int ret;
> drm_err(dev, "[CRTC:%d:%s] flip_done timed out\n",
> crtc->base.id, crtc->name);
> +
> + ret = force_full_modeset(crtc);
This looks like some kind of ugly hack to paper over a driver bug.
I really don't want this for i915/xe because all it'll end up doing
is make it harder to debug any real issues.
> + if (ret)
> + drm_err(dev,
> + "[CRTC:%d:%s] force full modeset failed! ret=%d\n",
> + crtc->base.id, crtc->name, ret);
> + }
> }
>
> if (state->fake_commit)
> --
> 2.54.0
--
Ville Syrjälä
Intel
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v6 2/2] drm/amd/display: add DMU timeout recovery support
2026-05-06 17:36 ` Hamza Mahfooz
@ 2026-05-06 19:31 ` Leo Li
0 siblings, 0 replies; 8+ messages in thread
From: Leo Li @ 2026-05-06 19:31 UTC (permalink / raw)
To: Hamza Mahfooz, dri-devel
Cc: Harry Wentland, Rodrigo Siqueira, Alex Deucher,
Christian König, David Airlie, Simona Vetter,
Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
Mario Limonciello, Alex Hung, Wayne Lin, Aurabindo Pillai,
Timur Kristóf, Mario Limonciello (AMD), Ivan Lipski,
Chenyu Chen, Matthew Schwartz, Tom Chung, Roman Li, Takashi Iwai,
Colin Ian King, Charlene Liu, Kees Cook, amd-gfx, linux-kernel
On 2026-05-06 13:36, Hamza Mahfooz wrote:
> On Tue, May 05, 2026 at 02:20:58PM -0400, Hamza Mahfooz wrote:
>> DMU already has robust hung state tracking, but timeout recovery
>> was never hooked up, so do so now.
>>
>> Reviewed-by: Leo Li <sunpeng.li@amd.com>
>> Signed-off-by: Hamza Mahfooz <someguy@effective-light.com>
>
> Does anyone mind if I merge this through drm-misc?
I'd prefer to have this go through asdn and the regular promotion testing.
One thing that some internal folks raised is that the DMUB timeout `MAX_WAIT_US` might be too short. On some panels, backlight control are known to take 250ms+. Triggering a DMUB reset rather than falling through may have unintended consequences.
I'll spin something up for that soon-ish.
- Leo
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v6 1/2] drm/atomic: attempt full modeset on page flip timeout
2026-05-06 18:47 ` Ville Syrjälä
@ 2026-05-07 0:22 ` Hamza Mahfooz
0 siblings, 0 replies; 8+ messages in thread
From: Hamza Mahfooz @ 2026-05-07 0:22 UTC (permalink / raw)
To: Ville Syrjälä
Cc: dri-devel, Harry Wentland, Leo Li, Rodrigo Siqueira, Alex Deucher,
Christian König, David Airlie, Simona Vetter,
Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
Mario Limonciello, Alex Hung, Wayne Lin, Timur Kristóf,
Aurabindo Pillai, Mario Limonciello (AMD), Ivan Lipski,
Chenyu Chen, Matthew Schwartz, Tom Chung, Roman Li, Takashi Iwai,
Colin Ian King, Charlene Liu, Kees Cook, amd-gfx, linux-kernel
On Wed, May 06, 2026 at 09:47:05PM +0300, Ville Syrjälä wrote:
> On Tue, May 05, 2026 at 02:20:57PM -0400, Hamza Mahfooz wrote:
> > We should try to recover from page flip timeouts. Forcing
> > a full modeset should be generic across all atomic KMS drivers,
> > so try that first.
> >
> > Signed-off-by: Hamza Mahfooz <someguy@effective-light.com>
> > ---
> > drivers/gpu/drm/drm_atomic_helper.c | 49 +++++++++++++++++++++++++++--
> > 1 file changed, 46 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
> > index a768398a1884..7ee9d52f63c5 100644
> > --- a/drivers/gpu/drm/drm_atomic_helper.c
> > +++ b/drivers/gpu/drm/drm_atomic_helper.c
> > @@ -1926,6 +1926,43 @@ drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
> > }
> > EXPORT_SYMBOL(drm_atomic_helper_wait_for_vblanks);
> >
> > +static int force_full_modeset(struct drm_crtc *crtc)
> > +{
> > + struct drm_modeset_acquire_ctx ctx;
> > + struct drm_crtc_state *crtc_state;
> > + struct drm_atomic_state *state;
> > + int ret;
> > + int err;
> > +
> > + if (drm_atomic_crtc_needs_modeset(crtc->state))
> > + return -EBUSY;
> > +
> > + DRM_MODESET_LOCK_ALL_BEGIN(crtc->dev, ctx, 0, err);
> > + state = drm_atomic_state_alloc(crtc->dev);
> > + if (!state)
> > + return -ENOMEM;
> > +
> > + state->acquire_ctx = &ctx;
> > +
> > + crtc_state = drm_atomic_get_crtc_state(state, crtc);
> > + if (IS_ERR(crtc_state)) {
> > + ret = PTR_ERR(crtc_state);
> > + goto out;
> > + }
> > +
> > + crtc_state->mode_changed = true;
> > +
> > + drm_info(crtc->dev,
> > + "[CRTC:%d:%s] Attempting force full modeset...\n",
> > + crtc->base.id, crtc->name);
> > +
> > + ret = drm_atomic_commit(state);
> > +out:
> > + drm_atomic_state_put(state);
> > + DRM_MODESET_LOCK_ALL_END(crtc->dev, ctx, err);
> > + return ret;
> > +}
> > +
> > /**
> > * drm_atomic_helper_wait_for_flip_done - wait for all page flips to be done
> > * @dev: DRM device
> > @@ -1949,17 +1986,23 @@ void drm_atomic_helper_wait_for_flip_done(struct drm_device *dev,
> >
> > for (i = 0; i < dev->mode_config.num_crtc; i++) {
> > struct drm_crtc_commit *commit = state->crtcs[i].commit;
> > - int ret;
> >
> > crtc = state->crtcs[i].ptr;
> >
> > if (!crtc || !commit)
> > continue;
> >
> > - ret = wait_for_completion_timeout(&commit->flip_done, 10 * HZ);
> > - if (ret == 0)
> > + if (!wait_for_completion_timeout(&commit->flip_done, 10 * HZ)) {
> > + int ret;
> > drm_err(dev, "[CRTC:%d:%s] flip_done timed out\n",
> > crtc->base.id, crtc->name);
> > +
> > + ret = force_full_modeset(crtc);
>
> This looks like some kind of ugly hack to paper over a driver bug.
> I really don't want this for i915/xe because all it'll end up doing
> is make it harder to debug any real issues.
In that case, would you be okay with having
drm_atomic_helper_wait_for_flip_done() return an error code, or did you
have something else in mind?
>
> > + if (ret)
> > + drm_err(dev,
> > + "[CRTC:%d:%s] force full modeset failed! ret=%d\n",
> > + crtc->base.id, crtc->name, ret);
> > + }
> > }
> >
> > if (state->fake_commit)
> > --
> > 2.54.0
>
> --
> Ville Syrjälä
> Intel
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2026-05-07 0:23 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-05 18:20 [PATCH v6 1/2] drm/atomic: attempt full modeset on page flip timeout Hamza Mahfooz
2026-05-05 18:20 ` [PATCH v6 2/2] drm/amd/display: add DMU timeout recovery support Hamza Mahfooz
2026-05-05 18:31 ` Mario Limonciello
2026-05-06 17:36 ` Hamza Mahfooz
2026-05-06 19:31 ` Leo Li
2026-05-05 18:31 ` [PATCH v6 1/2] drm/atomic: attempt full modeset on page flip timeout Mario Limonciello
2026-05-06 18:47 ` Ville Syrjälä
2026-05-07 0:22 ` Hamza Mahfooz
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox