dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] drm/vblank: Fix hard lockup in drm_handle_vblank()
@ 2025-05-10  9:47 Zeng Heng
  2025-05-13  9:19 ` Ville Syrjälä
  0 siblings, 1 reply; 2+ messages in thread
From: Zeng Heng @ 2025-05-10  9:47 UTC (permalink / raw)
  To: tzimmermann, airlied, maarten.lankhorst, mario.kleiner, airlied,
	mripard, simona
  Cc: bobo.shaobowang, dri-devel, linux-kernel

When we performed fuzz testing on DRM using syzkaller, we encountered
the following hard lockup issue:

Kernel panic - not syncing: Hard LOCKUP
CPU: 3 PID: 0 Comm: swapper/3 Not tainted 6.6.0+ #21
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
Call Trace:
 <IRQ>
 hrtimer_cancel+0x52/0x70 kernel/time/hrtimer.c:1449
 __disable_vblank drivers/gpu/drm/drm_vblank.c:434 [inline]
 drm_vblank_disable_and_save+0x27f/0x3c0 drivers/gpu/drm/drm_vblank.c:478
 vblank_disable_fn+0x15d/0x1b0 drivers/gpu/drm/drm_vblank.c:495
 call_timer_fn+0x39/0x280 kernel/time/timer.c:1700
 expire_timers+0x22d/0x3c0 kernel/time/timer.c:1751
 __run_timers kernel/time/timer.c:2022 [inline]
 run_timer_softirq+0x315/0x8a0 kernel/time/timer.c:2035
 handle_softirqs+0x195/0x580 kernel/softirq.c:553
 __do_softirq kernel/softirq.c:587 [inline]
 </IRQ>

This is a deadlock issue as follows:

    CPU3				CPU 7

vblank_disable_fn()
  drm_vblank_disable_and_save()
  spin_lock(vblank_time_lock)
				hrtimer_interrupt()
				  vkms_vblank_simulate()
				    drm_handle_vblank()
				      // wait for CPU3 to release vblank_time_lock
				      spin_lock(vblank_time_lock)
    vkms_disable_vblank()
      // wait for vblank_hrtimer on CPU7 to finish
      hrtimer_cancel(vblank_hrtimer)

The call of hrtimer_cancel() has hold vblank_time_lock which would prevent
completion of the hrtimer's callback function.

Therefore, in drm_handle_vblank(), we move the check for the
vblank->enabled variable to the time when vblank_time_lock() is acquired.
If the CRTC event has already been canceled, the drm_handle_vblank() will
be terminated and will no longer attempt to acquire vblank_time_lock.

In the same time, in drm_vblank_disable_and_save(), we set vblank->enabled
to disable before calling hrtimer_cancel() to avoid endless waiting in
hrtimer_cancel_wait_running().

Fixes: 27641c3f003e ("drm/vblank: Add support for precise vblank timestamping.")
Signed-off-by: Zeng Heng <zengheng4@huawei.com>
---
 drivers/gpu/drm/drm_vblank.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c
index 78958ddf8485..56b80e5ede2a 100644
--- a/drivers/gpu/drm/drm_vblank.c
+++ b/drivers/gpu/drm/drm_vblank.c
@@ -471,6 +471,8 @@ void drm_vblank_disable_and_save(struct drm_device *dev, unsigned int pipe)
 	if (!vblank->enabled)
 		goto out;
 
+	vblank->enabled = false;
+
 	/*
 	 * Update the count and timestamp to maintain the
 	 * appearance that the counter has been ticking all along until
@@ -479,7 +481,6 @@ void drm_vblank_disable_and_save(struct drm_device *dev, unsigned int pipe)
 	 */
 	drm_update_vblank_count(dev, pipe, false);
 	__disable_vblank(dev, pipe);
-	vblank->enabled = false;
 
 out:
 	spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags);
@@ -1932,14 +1933,13 @@ bool drm_handle_vblank(struct drm_device *dev, unsigned int pipe)
 	 * vblank enable/disable, as this would cause inconsistent
 	 * or corrupted timestamps and vblank counts.
 	 */
-	spin_lock(&dev->vblank_time_lock);
-
-	/* Vblank irq handling disabled. Nothing to do. */
-	if (!vblank->enabled) {
-		spin_unlock(&dev->vblank_time_lock);
-		spin_unlock_irqrestore(&dev->event_lock, irqflags);
-		return false;
-	}
+	do {
+		/* Vblank irq handling disabled. Nothing to do. */
+		if (!vblank->enabled) {
+			spin_unlock_irqrestore(&dev->event_lock, irqflags);
+			return false;
+		}
+	} while (!spin_trylock(&dev->vblank_time_lock));
 
 	drm_update_vblank_count(dev, pipe, true);
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] drm/vblank: Fix hard lockup in drm_handle_vblank()
  2025-05-10  9:47 [PATCH] drm/vblank: Fix hard lockup in drm_handle_vblank() Zeng Heng
@ 2025-05-13  9:19 ` Ville Syrjälä
  0 siblings, 0 replies; 2+ messages in thread
From: Ville Syrjälä @ 2025-05-13  9:19 UTC (permalink / raw)
  To: Zeng Heng
  Cc: tzimmermann, airlied, maarten.lankhorst, mario.kleiner, airlied,
	mripard, simona, bobo.shaobowang, dri-devel, linux-kernel

On Sat, May 10, 2025 at 05:47:57PM +0800, Zeng Heng wrote:
> When we performed fuzz testing on DRM using syzkaller, we encountered
> the following hard lockup issue:
> 
> Kernel panic - not syncing: Hard LOCKUP
> CPU: 3 PID: 0 Comm: swapper/3 Not tainted 6.6.0+ #21
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
> Call Trace:
>  <IRQ>
>  hrtimer_cancel+0x52/0x70 kernel/time/hrtimer.c:1449
>  __disable_vblank drivers/gpu/drm/drm_vblank.c:434 [inline]
>  drm_vblank_disable_and_save+0x27f/0x3c0 drivers/gpu/drm/drm_vblank.c:478
>  vblank_disable_fn+0x15d/0x1b0 drivers/gpu/drm/drm_vblank.c:495
>  call_timer_fn+0x39/0x280 kernel/time/timer.c:1700
>  expire_timers+0x22d/0x3c0 kernel/time/timer.c:1751
>  __run_timers kernel/time/timer.c:2022 [inline]
>  run_timer_softirq+0x315/0x8a0 kernel/time/timer.c:2035
>  handle_softirqs+0x195/0x580 kernel/softirq.c:553
>  __do_softirq kernel/softirq.c:587 [inline]
>  </IRQ>
> 
> This is a deadlock issue as follows:
> 
>     CPU3				CPU 7
> 
> vblank_disable_fn()
>   drm_vblank_disable_and_save()
>   spin_lock(vblank_time_lock)
> 				hrtimer_interrupt()
> 				  vkms_vblank_simulate()
> 				    drm_handle_vblank()
> 				      // wait for CPU3 to release vblank_time_lock
> 				      spin_lock(vblank_time_lock)
>     vkms_disable_vblank()
>       // wait for vblank_hrtimer on CPU7 to finish
>       hrtimer_cancel(vblank_hrtimer)

Looks like a vkms bug to me, so should IMO be fixed there instead
of hacking around it in the common vblank code.

Maybe vkms can just have the timer not rearm itself when it notices
that vblank interrupts are disabled?

> 
> The call of hrtimer_cancel() has hold vblank_time_lock which would prevent
> completion of the hrtimer's callback function.
> 
> Therefore, in drm_handle_vblank(), we move the check for the
> vblank->enabled variable to the time when vblank_time_lock() is acquired.
> If the CRTC event has already been canceled, the drm_handle_vblank() will
> be terminated and will no longer attempt to acquire vblank_time_lock.
> 
> In the same time, in drm_vblank_disable_and_save(), we set vblank->enabled
> to disable before calling hrtimer_cancel() to avoid endless waiting in
> hrtimer_cancel_wait_running().
> 
> Fixes: 27641c3f003e ("drm/vblank: Add support for precise vblank timestamping.")
> Signed-off-by: Zeng Heng <zengheng4@huawei.com>
> ---
>  drivers/gpu/drm/drm_vblank.c | 18 +++++++++---------
>  1 file changed, 9 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c
> index 78958ddf8485..56b80e5ede2a 100644
> --- a/drivers/gpu/drm/drm_vblank.c
> +++ b/drivers/gpu/drm/drm_vblank.c
> @@ -471,6 +471,8 @@ void drm_vblank_disable_and_save(struct drm_device *dev, unsigned int pipe)
>  	if (!vblank->enabled)
>  		goto out;
>  
> +	vblank->enabled = false;
> +
>  	/*
>  	 * Update the count and timestamp to maintain the
>  	 * appearance that the counter has been ticking all along until
> @@ -479,7 +481,6 @@ void drm_vblank_disable_and_save(struct drm_device *dev, unsigned int pipe)
>  	 */
>  	drm_update_vblank_count(dev, pipe, false);
>  	__disable_vblank(dev, pipe);
> -	vblank->enabled = false;
>  
>  out:
>  	spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags);
> @@ -1932,14 +1933,13 @@ bool drm_handle_vblank(struct drm_device *dev, unsigned int pipe)
>  	 * vblank enable/disable, as this would cause inconsistent
>  	 * or corrupted timestamps and vblank counts.
>  	 */
> -	spin_lock(&dev->vblank_time_lock);
> -
> -	/* Vblank irq handling disabled. Nothing to do. */
> -	if (!vblank->enabled) {
> -		spin_unlock(&dev->vblank_time_lock);
> -		spin_unlock_irqrestore(&dev->event_lock, irqflags);
> -		return false;
> -	}
> +	do {
> +		/* Vblank irq handling disabled. Nothing to do. */
> +		if (!vblank->enabled) {
> +			spin_unlock_irqrestore(&dev->event_lock, irqflags);
> +			return false;
> +		}
> +	} while (!spin_trylock(&dev->vblank_time_lock));
>  
>  	drm_update_vblank_count(dev, pipe, true);
>  
> -- 
> 2.25.1

-- 
Ville Syrjälä
Intel

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2025-05-13  9:19 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-10  9:47 [PATCH] drm/vblank: Fix hard lockup in drm_handle_vblank() Zeng Heng
2025-05-13  9:19 ` Ville Syrjälä

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).