* [PATCH] drm/i915: Flush outstanding unpin tasks before pageflipping
@ 2012-09-28 11:29 Chris Wilson
2012-09-28 12:05 ` Ville Syrjälä
0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2012-09-28 11:29 UTC (permalink / raw)
To: intel-gfx
If we accumulate unpin tasks because we are pageflipping faster than the
system can schedule its workers, we can effectively create a
pin-leak. The solution taken here is to limit the number of unpin tasks
we have per-crtc and to flush those outstanding tasks if we accumulate
too many. This should prevent any jitter in the normal case, and also
prevent the hang if we should run too fast.
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=46991
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/intel_display.c | 20 +++++++++++++++-----
drivers/gpu/drm/i915/intel_drv.h | 4 +++-
2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 04407fd..14f1b51 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -6310,14 +6310,19 @@ static void intel_unpin_work_fn(struct work_struct *__work)
{
struct intel_unpin_work *work =
container_of(__work, struct intel_unpin_work, work);
+ struct drm_device *dev = work->crtc->dev;
- mutex_lock(&work->dev->struct_mutex);
+ mutex_lock(&dev->struct_mutex);
intel_unpin_fb_obj(work->old_fb_obj);
drm_gem_object_unreference(&work->pending_flip_obj->base);
drm_gem_object_unreference(&work->old_fb_obj->base);
- intel_update_fbc(work->dev);
- mutex_unlock(&work->dev->struct_mutex);
+ intel_update_fbc(dev);
+ mutex_unlock(&dev->struct_mutex);
+
+ BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
+ atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
+
kfree(work);
}
@@ -6389,7 +6394,7 @@ static void do_intel_finish_page_flip(struct drm_device *dev,
if (atomic_read(&obj->pending_flip) == 0)
wake_up(&dev_priv->pending_flip_queue);
- schedule_work(&work->work);
+ queue_work(dev_priv->wq, &work->work);
trace_i915_flip_complete(intel_crtc->plane, work->pending_flip_obj);
}
@@ -6690,7 +6695,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
return -ENOMEM;
work->event = event;
- work->dev = crtc->dev;
+ work->crtc = crtc;
intel_fb = to_intel_framebuffer(crtc->fb);
work->old_fb_obj = intel_fb->obj;
INIT_WORK(&work->work, intel_unpin_work_fn);
@@ -6715,6 +6720,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
intel_fb = to_intel_framebuffer(fb);
obj = intel_fb->obj;
+ if (atomic_read(&intel_crtc->unpin_work_count) == 2)
+ flush_workqueue(dev_priv->wq);
+
ret = i915_mutex_lock_interruptible(dev);
if (ret)
goto cleanup;
@@ -6733,6 +6741,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
* the flip occurs and the object is no longer visible.
*/
atomic_add(1 << intel_crtc->plane, &work->old_fb_obj->pending_flip);
+ atomic_inc(&intel_crtc->unpin_work_count);
ret = dev_priv->display.queue_flip(dev, crtc, fb, obj);
if (ret)
@@ -6747,6 +6756,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
return 0;
cleanup_pending:
+ atomic_dec(&intel_crtc->unpin_work_count);
atomic_sub(1 << intel_crtc->plane, &work->old_fb_obj->pending_flip);
drm_gem_object_unreference(&work->old_fb_obj->base);
drm_gem_object_unreference(&obj->base);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 5515c45..acc1d08 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -203,6 +203,8 @@ struct intel_crtc {
} vblank_work;
int fdi_lanes;
+ atomic_t unpin_work_count;
+
/* Display surface base address adjustement for pageflips. Note that on
* gen4+ this only adjusts up to a tile, offsets within a tile are
* handled in the hw itself (with the TILEOFF register). */
@@ -387,7 +389,7 @@ intel_get_crtc_for_plane(struct drm_device *dev, int plane)
struct intel_unpin_work {
struct work_struct work;
- struct drm_device *dev;
+ struct drm_crtc *crtc;
struct drm_i915_gem_object *old_fb_obj;
struct drm_i915_gem_object *pending_flip_obj;
struct drm_pending_vblank_event *event;
--
1.7.10.4
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH] drm/i915: Flush outstanding unpin tasks before pageflipping
2012-09-28 11:29 [PATCH] drm/i915: Flush outstanding unpin tasks before pageflipping Chris Wilson
@ 2012-09-28 12:05 ` Ville Syrjälä
2012-09-28 12:07 ` Chris Wilson
0 siblings, 1 reply; 17+ messages in thread
From: Ville Syrjälä @ 2012-09-28 12:05 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Fri, Sep 28, 2012 at 12:29:56PM +0100, Chris Wilson wrote:
> If we accumulate unpin tasks because we are pageflipping faster than the
> system can schedule its workers, we can effectively create a
> pin-leak. The solution taken here is to limit the number of unpin tasks
> we have per-crtc and to flush those outstanding tasks if we accumulate
> too many. This should prevent any jitter in the normal case, and also
> prevent the hang if we should run too fast.
>
> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=46991
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/intel_display.c | 20 +++++++++++++++-----
> drivers/gpu/drm/i915/intel_drv.h | 4 +++-
> 2 files changed, 18 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 04407fd..14f1b51 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -6310,14 +6310,19 @@ static void intel_unpin_work_fn(struct work_struct *__work)
> {
> struct intel_unpin_work *work =
> container_of(__work, struct intel_unpin_work, work);
> + struct drm_device *dev = work->crtc->dev;
>
> - mutex_lock(&work->dev->struct_mutex);
> + mutex_lock(&dev->struct_mutex);
> intel_unpin_fb_obj(work->old_fb_obj);
> drm_gem_object_unreference(&work->pending_flip_obj->base);
> drm_gem_object_unreference(&work->old_fb_obj->base);
>
> - intel_update_fbc(work->dev);
> - mutex_unlock(&work->dev->struct_mutex);
> + intel_update_fbc(dev);
> + mutex_unlock(&dev->struct_mutex);
> +
> + BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
> + atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
AFAICS you always have struct_mutex locked in the relevant functions,
so no need for an atomic variable.
--
Ville Syrjälä
Intel OTC
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH] drm/i915: Flush outstanding unpin tasks before pageflipping
2012-09-28 12:05 ` Ville Syrjälä
@ 2012-09-28 12:07 ` Chris Wilson
2012-09-28 12:20 ` Ville Syrjälä
0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2012-09-28 12:07 UTC (permalink / raw)
Cc: intel-gfx
[-- Attachment #1: Type: text/plain, Size: 2282 bytes --]
On Fri, 28 Sep 2012 15:05:01 +0300, Ville Syrjälä <ville.syrjala@linux.intel.com> wrote:
> On Fri, Sep 28, 2012 at 12:29:56PM +0100, Chris Wilson wrote:
> > If we accumulate unpin tasks because we are pageflipping faster than the
> > system can schedule its workers, we can effectively create a
> > pin-leak. The solution taken here is to limit the number of unpin tasks
> > we have per-crtc and to flush those outstanding tasks if we accumulate
> > too many. This should prevent any jitter in the normal case, and also
> > prevent the hang if we should run too fast.
> >
> > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=46991
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> > drivers/gpu/drm/i915/intel_display.c | 20 +++++++++++++++-----
> > drivers/gpu/drm/i915/intel_drv.h | 4 +++-
> > 2 files changed, 18 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > index 04407fd..14f1b51 100644
> > --- a/drivers/gpu/drm/i915/intel_display.c
> > +++ b/drivers/gpu/drm/i915/intel_display.c
> > @@ -6310,14 +6310,19 @@ static void intel_unpin_work_fn(struct work_struct *__work)
> > {
> > struct intel_unpin_work *work =
> > container_of(__work, struct intel_unpin_work, work);
> > + struct drm_device *dev = work->crtc->dev;
> >
> > - mutex_lock(&work->dev->struct_mutex);
> > + mutex_lock(&dev->struct_mutex);
> > intel_unpin_fb_obj(work->old_fb_obj);
> > drm_gem_object_unreference(&work->pending_flip_obj->base);
> > drm_gem_object_unreference(&work->old_fb_obj->base);
> >
> > - intel_update_fbc(work->dev);
> > - mutex_unlock(&work->dev->struct_mutex);
> > + intel_update_fbc(dev);
> > + mutex_unlock(&dev->struct_mutex);
> > +
> > + BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
> > + atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
>
> AFAICS you always have struct_mutex locked in the relevant functions,
> so no need for an atomic variable.
It's not in every case, since we need to do the flush without holding
the lock, we have the choice of making this variable atomic, or taking
and dropping the lock. Obviously I choose the former.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
[-- Attachment #2: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH] drm/i915: Flush outstanding unpin tasks before pageflipping
2012-09-28 12:07 ` Chris Wilson
@ 2012-09-28 12:20 ` Ville Syrjälä
0 siblings, 0 replies; 17+ messages in thread
From: Ville Syrjälä @ 2012-09-28 12:20 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Fri, Sep 28, 2012 at 01:07:59PM +0100, Chris Wilson wrote:
> On Fri, 28 Sep 2012 15:05:01 +0300, Ville Syrjälä <ville.syrjala@linux.intel.com> wrote:
> > On Fri, Sep 28, 2012 at 12:29:56PM +0100, Chris Wilson wrote:
> > > If we accumulate unpin tasks because we are pageflipping faster than the
> > > system can schedule its workers, we can effectively create a
> > > pin-leak. The solution taken here is to limit the number of unpin tasks
> > > we have per-crtc and to flush those outstanding tasks if we accumulate
> > > too many. This should prevent any jitter in the normal case, and also
> > > prevent the hang if we should run too fast.
> > >
> > > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=46991
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > ---
> > > drivers/gpu/drm/i915/intel_display.c | 20 +++++++++++++++-----
> > > drivers/gpu/drm/i915/intel_drv.h | 4 +++-
> > > 2 files changed, 18 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > > index 04407fd..14f1b51 100644
> > > --- a/drivers/gpu/drm/i915/intel_display.c
> > > +++ b/drivers/gpu/drm/i915/intel_display.c
> > > @@ -6310,14 +6310,19 @@ static void intel_unpin_work_fn(struct work_struct *__work)
> > > {
> > > struct intel_unpin_work *work =
> > > container_of(__work, struct intel_unpin_work, work);
> > > + struct drm_device *dev = work->crtc->dev;
> > >
> > > - mutex_lock(&work->dev->struct_mutex);
> > > + mutex_lock(&dev->struct_mutex);
> > > intel_unpin_fb_obj(work->old_fb_obj);
> > > drm_gem_object_unreference(&work->pending_flip_obj->base);
> > > drm_gem_object_unreference(&work->old_fb_obj->base);
> > >
> > > - intel_update_fbc(work->dev);
> > > - mutex_unlock(&work->dev->struct_mutex);
> > > + intel_update_fbc(dev);
> > > + mutex_unlock(&dev->struct_mutex);
> > > +
> > > + BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
> > > + atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
> >
> > AFAICS you always have struct_mutex locked in the relevant functions,
> > so no need for an atomic variable.
>
> It's not in every case, since we need to do the flush without holding
> the lock, we have the choice of making this variable atomic, or taking
> and dropping the lock. Obviously I choose the former.
Ah right. I missed the conditional flush.
--
Ville Syrjälä
Intel OTC
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH] drm/i915: Flush outstanding unpin tasks before pageflipping
@ 2012-11-01 9:26 Chris Wilson
2012-11-01 15:07 ` Jesse Barnes
2012-11-20 16:15 ` Daniel Vetter
0 siblings, 2 replies; 17+ messages in thread
From: Chris Wilson @ 2012-11-01 9:26 UTC (permalink / raw)
To: intel-gfx
If we accumulate unpin tasks because we are pageflipping faster than the
system can schedule its workers, we can effectively create a
pin-leak. The solution taken here is to limit the number of unpin tasks
we have per-crtc and to flush those outstanding tasks if we accumulate
too many. This should prevent any jitter in the normal case, and also
prevent the hang if we should run too fast.
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=46991
Reported-and-tested-by: Tvrtko Ursulin <tvrtko.ursulin@onelan.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/intel_display.c | 22 ++++++++++++++++------
drivers/gpu/drm/i915/intel_drv.h | 4 +++-
2 files changed, 19 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 69b1739..800b195 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -6908,14 +6908,19 @@ static void intel_unpin_work_fn(struct work_struct *__work)
{
struct intel_unpin_work *work =
container_of(__work, struct intel_unpin_work, work);
+ struct drm_device *dev = work->crtc->dev;
- mutex_lock(&work->dev->struct_mutex);
+ mutex_lock(&dev->struct_mutex);
intel_unpin_fb_obj(work->old_fb_obj);
drm_gem_object_unreference(&work->pending_flip_obj->base);
drm_gem_object_unreference(&work->old_fb_obj->base);
- intel_update_fbc(work->dev);
- mutex_unlock(&work->dev->struct_mutex);
+ intel_update_fbc(dev);
+ mutex_unlock(&dev->struct_mutex);
+
+ BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
+ atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
+
kfree(work);
}
@@ -6963,9 +6968,9 @@ static void do_intel_finish_page_flip(struct drm_device *dev,
atomic_clear_mask(1 << intel_crtc->plane,
&obj->pending_flip.counter);
-
wake_up(&dev_priv->pending_flip_queue);
- schedule_work(&work->work);
+
+ queue_work(dev_priv->wq, &work->work);
trace_i915_flip_complete(intel_crtc->plane, work->pending_flip_obj);
}
@@ -7266,7 +7271,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
return -ENOMEM;
work->event = event;
- work->dev = crtc->dev;
+ work->crtc = crtc;
intel_fb = to_intel_framebuffer(crtc->fb);
work->old_fb_obj = intel_fb->obj;
INIT_WORK(&work->work, intel_unpin_work_fn);
@@ -7291,6 +7296,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
intel_fb = to_intel_framebuffer(fb);
obj = intel_fb->obj;
+ if (atomic_read(&intel_crtc->unpin_work_count) >= 2)
+ flush_workqueue(dev_priv->wq);
+
ret = i915_mutex_lock_interruptible(dev);
if (ret)
goto cleanup;
@@ -7309,6 +7317,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
* the flip occurs and the object is no longer visible.
*/
atomic_add(1 << intel_crtc->plane, &work->old_fb_obj->pending_flip);
+ atomic_inc(&intel_crtc->unpin_work_count);
ret = dev_priv->display.queue_flip(dev, crtc, fb, obj);
if (ret)
@@ -7323,6 +7332,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
return 0;
cleanup_pending:
+ atomic_dec(&intel_crtc->unpin_work_count);
atomic_sub(1 << intel_crtc->plane, &work->old_fb_obj->pending_flip);
drm_gem_object_unreference(&work->old_fb_obj->base);
drm_gem_object_unreference(&obj->base);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 164696f..1345c44 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -216,6 +216,8 @@ struct intel_crtc {
} vblank_work;
int fdi_lanes;
+ atomic_t unpin_work_count;
+
/* Display surface base address adjustement for pageflips. Note that on
* gen4+ this only adjusts up to a tile, offsets within a tile are
* handled in the hw itself (with the TILEOFF register). */
@@ -403,7 +405,7 @@ intel_get_crtc_for_plane(struct drm_device *dev, int plane)
struct intel_unpin_work {
struct work_struct work;
- struct drm_device *dev;
+ struct drm_crtc *crtc;
struct drm_i915_gem_object *old_fb_obj;
struct drm_i915_gem_object *pending_flip_obj;
struct drm_pending_vblank_event *event;
--
1.7.10.4
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH] drm/i915: Flush outstanding unpin tasks before pageflipping
2012-11-01 9:26 Chris Wilson
@ 2012-11-01 15:07 ` Jesse Barnes
2012-11-01 15:18 ` Chris Wilson
2012-11-20 16:15 ` Daniel Vetter
1 sibling, 1 reply; 17+ messages in thread
From: Jesse Barnes @ 2012-11-01 15:07 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Thu, 1 Nov 2012 09:26:26 +0000
Chris Wilson <chris@chris-wilson.co.uk> wrote:
> If we accumulate unpin tasks because we are pageflipping faster than the
> system can schedule its workers, we can effectively create a
> pin-leak. The solution taken here is to limit the number of unpin tasks
> we have per-crtc and to flush those outstanding tasks if we accumulate
> too many. This should prevent any jitter in the normal case, and also
> prevent the hang if we should run too fast.
>
> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=46991
> Reported-and-tested-by: Tvrtko Ursulin <tvrtko.ursulin@onelan.co.uk>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/intel_display.c | 22 ++++++++++++++++------
> drivers/gpu/drm/i915/intel_drv.h | 4 +++-
> 2 files changed, 19 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 69b1739..800b195 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -6908,14 +6908,19 @@ static void intel_unpin_work_fn(struct work_struct *__work)
> {
> struct intel_unpin_work *work =
> container_of(__work, struct intel_unpin_work, work);
> + struct drm_device *dev = work->crtc->dev;
>
> - mutex_lock(&work->dev->struct_mutex);
> + mutex_lock(&dev->struct_mutex);
> intel_unpin_fb_obj(work->old_fb_obj);
> drm_gem_object_unreference(&work->pending_flip_obj->base);
> drm_gem_object_unreference(&work->old_fb_obj->base);
>
> - intel_update_fbc(work->dev);
> - mutex_unlock(&work->dev->struct_mutex);
> + intel_update_fbc(dev);
> + mutex_unlock(&dev->struct_mutex);
> +
> + BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
> + atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
> +
> kfree(work);
> }
>
> @@ -6963,9 +6968,9 @@ static void do_intel_finish_page_flip(struct drm_device *dev,
>
> atomic_clear_mask(1 << intel_crtc->plane,
> &obj->pending_flip.counter);
> -
> wake_up(&dev_priv->pending_flip_queue);
> - schedule_work(&work->work);
> +
> + queue_work(dev_priv->wq, &work->work);
>
> trace_i915_flip_complete(intel_crtc->plane, work->pending_flip_obj);
> }
> @@ -7266,7 +7271,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
> return -ENOMEM;
>
> work->event = event;
> - work->dev = crtc->dev;
> + work->crtc = crtc;
> intel_fb = to_intel_framebuffer(crtc->fb);
> work->old_fb_obj = intel_fb->obj;
> INIT_WORK(&work->work, intel_unpin_work_fn);
> @@ -7291,6 +7296,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
> intel_fb = to_intel_framebuffer(fb);
> obj = intel_fb->obj;
>
> + if (atomic_read(&intel_crtc->unpin_work_count) >= 2)
> + flush_workqueue(dev_priv->wq);
> +
Have you by chance tested this with the async flip patch? I wonder if
in that case whether 2 is too small, and something like 100 might be
better (though really async flips are for cases where we can't keep up
with refresh, so a small number shouldn't hurt too much there either).
--
Jesse Barnes, Intel Open Source Technology Center
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH] drm/i915: Flush outstanding unpin tasks before pageflipping
2012-11-01 15:07 ` Jesse Barnes
@ 2012-11-01 15:18 ` Chris Wilson
2012-11-01 15:29 ` Daniel Vetter
0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2012-11-01 15:18 UTC (permalink / raw)
To: Jesse Barnes; +Cc: intel-gfx
On Thu, 1 Nov 2012 08:07:59 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> On Thu, 1 Nov 2012 09:26:26 +0000
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> > If we accumulate unpin tasks because we are pageflipping faster than the
> > system can schedule its workers, we can effectively create a
> > pin-leak. The solution taken here is to limit the number of unpin tasks
> > we have per-crtc and to flush those outstanding tasks if we accumulate
> > too many. This should prevent any jitter in the normal case, and also
> > prevent the hang if we should run too fast.
> >
> > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=46991
> > Reported-and-tested-by: Tvrtko Ursulin <tvrtko.ursulin@onelan.co.uk>
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> > drivers/gpu/drm/i915/intel_display.c | 22 ++++++++++++++++------
> > drivers/gpu/drm/i915/intel_drv.h | 4 +++-
> > 2 files changed, 19 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > index 69b1739..800b195 100644
> > --- a/drivers/gpu/drm/i915/intel_display.c
> > +++ b/drivers/gpu/drm/i915/intel_display.c
> > @@ -6908,14 +6908,19 @@ static void intel_unpin_work_fn(struct work_struct *__work)
> > {
> > struct intel_unpin_work *work =
> > container_of(__work, struct intel_unpin_work, work);
> > + struct drm_device *dev = work->crtc->dev;
> >
> > - mutex_lock(&work->dev->struct_mutex);
> > + mutex_lock(&dev->struct_mutex);
> > intel_unpin_fb_obj(work->old_fb_obj);
> > drm_gem_object_unreference(&work->pending_flip_obj->base);
> > drm_gem_object_unreference(&work->old_fb_obj->base);
> >
> > - intel_update_fbc(work->dev);
> > - mutex_unlock(&work->dev->struct_mutex);
> > + intel_update_fbc(dev);
> > + mutex_unlock(&dev->struct_mutex);
> > +
> > + BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
> > + atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
> > +
> > kfree(work);
> > }
> >
> > @@ -6963,9 +6968,9 @@ static void do_intel_finish_page_flip(struct drm_device *dev,
> >
> > atomic_clear_mask(1 << intel_crtc->plane,
> > &obj->pending_flip.counter);
> > -
> > wake_up(&dev_priv->pending_flip_queue);
> > - schedule_work(&work->work);
> > +
> > + queue_work(dev_priv->wq, &work->work);
> >
> > trace_i915_flip_complete(intel_crtc->plane, work->pending_flip_obj);
> > }
> > @@ -7266,7 +7271,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
> > return -ENOMEM;
> >
> > work->event = event;
> > - work->dev = crtc->dev;
> > + work->crtc = crtc;
> > intel_fb = to_intel_framebuffer(crtc->fb);
> > work->old_fb_obj = intel_fb->obj;
> > INIT_WORK(&work->work, intel_unpin_work_fn);
> > @@ -7291,6 +7296,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
> > intel_fb = to_intel_framebuffer(fb);
> > obj = intel_fb->obj;
> >
> > + if (atomic_read(&intel_crtc->unpin_work_count) >= 2)
> > + flush_workqueue(dev_priv->wq);
> > +
>
> Have you by chance tested this with the async flip patch? I wonder if
> in that case whether 2 is too small, and something like 100 might be
> better (though really async flips are for cases where we can't keep up
> with refresh, so a small number shouldn't hurt too much there either).
The limit on 2 is due to the limited resolution of pincount. Hence my
earlier fear for your async flip patch.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH] drm/i915: Flush outstanding unpin tasks before pageflipping
2012-11-01 15:18 ` Chris Wilson
@ 2012-11-01 15:29 ` Daniel Vetter
2012-11-01 15:34 ` Jesse Barnes
0 siblings, 1 reply; 17+ messages in thread
From: Daniel Vetter @ 2012-11-01 15:29 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Thu, Nov 01, 2012 at 03:18:46PM +0000, Chris Wilson wrote:
> On Thu, 1 Nov 2012 08:07:59 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > On Thu, 1 Nov 2012 09:26:26 +0000
> > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >
> > > If we accumulate unpin tasks because we are pageflipping faster than the
> > > system can schedule its workers, we can effectively create a
> > > pin-leak. The solution taken here is to limit the number of unpin tasks
> > > we have per-crtc and to flush those outstanding tasks if we accumulate
> > > too many. This should prevent any jitter in the normal case, and also
> > > prevent the hang if we should run too fast.
> > >
> > > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=46991
> > > Reported-and-tested-by: Tvrtko Ursulin <tvrtko.ursulin@onelan.co.uk>
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > ---
> > > drivers/gpu/drm/i915/intel_display.c | 22 ++++++++++++++++------
> > > drivers/gpu/drm/i915/intel_drv.h | 4 +++-
> > > 2 files changed, 19 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > > index 69b1739..800b195 100644
> > > --- a/drivers/gpu/drm/i915/intel_display.c
> > > +++ b/drivers/gpu/drm/i915/intel_display.c
> > > @@ -6908,14 +6908,19 @@ static void intel_unpin_work_fn(struct work_struct *__work)
> > > {
> > > struct intel_unpin_work *work =
> > > container_of(__work, struct intel_unpin_work, work);
> > > + struct drm_device *dev = work->crtc->dev;
> > >
> > > - mutex_lock(&work->dev->struct_mutex);
> > > + mutex_lock(&dev->struct_mutex);
> > > intel_unpin_fb_obj(work->old_fb_obj);
> > > drm_gem_object_unreference(&work->pending_flip_obj->base);
> > > drm_gem_object_unreference(&work->old_fb_obj->base);
> > >
> > > - intel_update_fbc(work->dev);
> > > - mutex_unlock(&work->dev->struct_mutex);
> > > + intel_update_fbc(dev);
> > > + mutex_unlock(&dev->struct_mutex);
> > > +
> > > + BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
> > > + atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
> > > +
> > > kfree(work);
> > > }
> > >
> > > @@ -6963,9 +6968,9 @@ static void do_intel_finish_page_flip(struct drm_device *dev,
> > >
> > > atomic_clear_mask(1 << intel_crtc->plane,
> > > &obj->pending_flip.counter);
> > > -
> > > wake_up(&dev_priv->pending_flip_queue);
> > > - schedule_work(&work->work);
> > > +
> > > + queue_work(dev_priv->wq, &work->work);
> > >
> > > trace_i915_flip_complete(intel_crtc->plane, work->pending_flip_obj);
> > > }
> > > @@ -7266,7 +7271,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
> > > return -ENOMEM;
> > >
> > > work->event = event;
> > > - work->dev = crtc->dev;
> > > + work->crtc = crtc;
> > > intel_fb = to_intel_framebuffer(crtc->fb);
> > > work->old_fb_obj = intel_fb->obj;
> > > INIT_WORK(&work->work, intel_unpin_work_fn);
> > > @@ -7291,6 +7296,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
> > > intel_fb = to_intel_framebuffer(fb);
> > > obj = intel_fb->obj;
> > >
> > > + if (atomic_read(&intel_crtc->unpin_work_count) >= 2)
> > > + flush_workqueue(dev_priv->wq);
> > > +
> >
> > Have you by chance tested this with the async flip patch? I wonder if
> > in that case whether 2 is too small, and something like 100 might be
> > better (though really async flips are for cases where we can't keep up
> > with refresh, so a small number shouldn't hurt too much there either).
>
> The limit on 2 is due to the limited resolution of pincount. Hence my
> earlier fear for your async flip patch.
I think for asyn flips we simply need to have a real flip queue in our
code, instead of abusing the implicit list in the workqueue code ...
One other thing is that with async flips we don't have a natural limit on
the number of pinned framebuffers any more, which means we can easily
exhaust all mappable GTT space. Hence we need to integrate that new,
explicit flip queue into our eviction code, too.
For now I'm rather happy with the flush_wq ducttape presented here ;-)
Cheers, Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH] drm/i915: Flush outstanding unpin tasks before pageflipping
2012-11-01 15:29 ` Daniel Vetter
@ 2012-11-01 15:34 ` Jesse Barnes
2012-11-01 15:52 ` Chris Wilson
0 siblings, 1 reply; 17+ messages in thread
From: Jesse Barnes @ 2012-11-01 15:34 UTC (permalink / raw)
To: Daniel Vetter; +Cc: intel-gfx
On Thu, 1 Nov 2012 16:29:35 +0100
Daniel Vetter <daniel@ffwll.ch> wrote:
> On Thu, Nov 01, 2012 at 03:18:46PM +0000, Chris Wilson wrote:
> > On Thu, 1 Nov 2012 08:07:59 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > > On Thu, 1 Nov 2012 09:26:26 +0000
> > > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > >
> > > > If we accumulate unpin tasks because we are pageflipping faster than the
> > > > system can schedule its workers, we can effectively create a
> > > > pin-leak. The solution taken here is to limit the number of unpin tasks
> > > > we have per-crtc and to flush those outstanding tasks if we accumulate
> > > > too many. This should prevent any jitter in the normal case, and also
> > > > prevent the hang if we should run too fast.
> > > >
> > > > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=46991
> > > > Reported-and-tested-by: Tvrtko Ursulin <tvrtko.ursulin@onelan.co.uk>
> > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > ---
> > > > drivers/gpu/drm/i915/intel_display.c | 22 ++++++++++++++++------
> > > > drivers/gpu/drm/i915/intel_drv.h | 4 +++-
> > > > 2 files changed, 19 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > > > index 69b1739..800b195 100644
> > > > --- a/drivers/gpu/drm/i915/intel_display.c
> > > > +++ b/drivers/gpu/drm/i915/intel_display.c
> > > > @@ -6908,14 +6908,19 @@ static void intel_unpin_work_fn(struct work_struct *__work)
> > > > {
> > > > struct intel_unpin_work *work =
> > > > container_of(__work, struct intel_unpin_work, work);
> > > > + struct drm_device *dev = work->crtc->dev;
> > > >
> > > > - mutex_lock(&work->dev->struct_mutex);
> > > > + mutex_lock(&dev->struct_mutex);
> > > > intel_unpin_fb_obj(work->old_fb_obj);
> > > > drm_gem_object_unreference(&work->pending_flip_obj->base);
> > > > drm_gem_object_unreference(&work->old_fb_obj->base);
> > > >
> > > > - intel_update_fbc(work->dev);
> > > > - mutex_unlock(&work->dev->struct_mutex);
> > > > + intel_update_fbc(dev);
> > > > + mutex_unlock(&dev->struct_mutex);
> > > > +
> > > > + BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
> > > > + atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
> > > > +
> > > > kfree(work);
> > > > }
> > > >
> > > > @@ -6963,9 +6968,9 @@ static void do_intel_finish_page_flip(struct drm_device *dev,
> > > >
> > > > atomic_clear_mask(1 << intel_crtc->plane,
> > > > &obj->pending_flip.counter);
> > > > -
> > > > wake_up(&dev_priv->pending_flip_queue);
> > > > - schedule_work(&work->work);
> > > > +
> > > > + queue_work(dev_priv->wq, &work->work);
> > > >
> > > > trace_i915_flip_complete(intel_crtc->plane, work->pending_flip_obj);
> > > > }
> > > > @@ -7266,7 +7271,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
> > > > return -ENOMEM;
> > > >
> > > > work->event = event;
> > > > - work->dev = crtc->dev;
> > > > + work->crtc = crtc;
> > > > intel_fb = to_intel_framebuffer(crtc->fb);
> > > > work->old_fb_obj = intel_fb->obj;
> > > > INIT_WORK(&work->work, intel_unpin_work_fn);
> > > > @@ -7291,6 +7296,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
> > > > intel_fb = to_intel_framebuffer(fb);
> > > > obj = intel_fb->obj;
> > > >
> > > > + if (atomic_read(&intel_crtc->unpin_work_count) >= 2)
> > > > + flush_workqueue(dev_priv->wq);
> > > > +
> > >
> > > Have you by chance tested this with the async flip patch? I wonder if
> > > in that case whether 2 is too small, and something like 100 might be
> > > better (though really async flips are for cases where we can't keep up
> > > with refresh, so a small number shouldn't hurt too much there either).
> >
> > The limit on 2 is due to the limited resolution of pincount. Hence my
> > earlier fear for your async flip patch.
>
> I think for asyn flips we simply need to have a real flip queue in our
> code, instead of abusing the implicit list in the workqueue code ...
>
> One other thing is that with async flips we don't have a natural limit on
> the number of pinned framebuffers any more, which means we can easily
> exhaust all mappable GTT space. Hence we need to integrate that new,
> explicit flip queue into our eviction code, too.
>
> For now I'm rather happy with the flush_wq ducttape presented here ;-)
Yeah I don't have a problem with it as long as we don't block when
queuing flips in real life. :)
--
Jesse Barnes, Intel Open Source Technology Center
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH] drm/i915: Flush outstanding unpin tasks before pageflipping
2012-11-01 15:34 ` Jesse Barnes
@ 2012-11-01 15:52 ` Chris Wilson
2012-11-01 16:04 ` Jesse Barnes
0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2012-11-01 15:52 UTC (permalink / raw)
To: Jesse Barnes, Daniel Vetter; +Cc: intel-gfx
On Thu, 1 Nov 2012 08:34:47 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> On Thu, 1 Nov 2012 16:29:35 +0100
> Daniel Vetter <daniel@ffwll.ch> wrote:
>
> > On Thu, Nov 01, 2012 at 03:18:46PM +0000, Chris Wilson wrote:
> > > On Thu, 1 Nov 2012 08:07:59 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > > > On Thu, 1 Nov 2012 09:26:26 +0000
> > > > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > >
> > > > > If we accumulate unpin tasks because we are pageflipping faster than the
> > > > > system can schedule its workers, we can effectively create a
> > > > > pin-leak. The solution taken here is to limit the number of unpin tasks
> > > > > we have per-crtc and to flush those outstanding tasks if we accumulate
> > > > > too many. This should prevent any jitter in the normal case, and also
> > > > > prevent the hang if we should run too fast.
> > > > >
> > > > > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=46991
> > > > > Reported-and-tested-by: Tvrtko Ursulin <tvrtko.ursulin@onelan.co.uk>
> > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > > ---
> > > > > drivers/gpu/drm/i915/intel_display.c | 22 ++++++++++++++++------
> > > > > drivers/gpu/drm/i915/intel_drv.h | 4 +++-
> > > > > 2 files changed, 19 insertions(+), 7 deletions(-)
> > > > >
> > > > > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > > > > index 69b1739..800b195 100644
> > > > > --- a/drivers/gpu/drm/i915/intel_display.c
> > > > > +++ b/drivers/gpu/drm/i915/intel_display.c
> > > > > @@ -6908,14 +6908,19 @@ static void intel_unpin_work_fn(struct work_struct *__work)
> > > > > {
> > > > > struct intel_unpin_work *work =
> > > > > container_of(__work, struct intel_unpin_work, work);
> > > > > + struct drm_device *dev = work->crtc->dev;
> > > > >
> > > > > - mutex_lock(&work->dev->struct_mutex);
> > > > > + mutex_lock(&dev->struct_mutex);
> > > > > intel_unpin_fb_obj(work->old_fb_obj);
> > > > > drm_gem_object_unreference(&work->pending_flip_obj->base);
> > > > > drm_gem_object_unreference(&work->old_fb_obj->base);
> > > > >
> > > > > - intel_update_fbc(work->dev);
> > > > > - mutex_unlock(&work->dev->struct_mutex);
> > > > > + intel_update_fbc(dev);
> > > > > + mutex_unlock(&dev->struct_mutex);
> > > > > +
> > > > > + BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
> > > > > + atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
> > > > > +
> > > > > kfree(work);
> > > > > }
> > > > >
> > > > > @@ -6963,9 +6968,9 @@ static void do_intel_finish_page_flip(struct drm_device *dev,
> > > > >
> > > > > atomic_clear_mask(1 << intel_crtc->plane,
> > > > > &obj->pending_flip.counter);
> > > > > -
> > > > > wake_up(&dev_priv->pending_flip_queue);
> > > > > - schedule_work(&work->work);
> > > > > +
> > > > > + queue_work(dev_priv->wq, &work->work);
> > > > >
> > > > > trace_i915_flip_complete(intel_crtc->plane, work->pending_flip_obj);
> > > > > }
> > > > > @@ -7266,7 +7271,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
> > > > > return -ENOMEM;
> > > > >
> > > > > work->event = event;
> > > > > - work->dev = crtc->dev;
> > > > > + work->crtc = crtc;
> > > > > intel_fb = to_intel_framebuffer(crtc->fb);
> > > > > work->old_fb_obj = intel_fb->obj;
> > > > > INIT_WORK(&work->work, intel_unpin_work_fn);
> > > > > @@ -7291,6 +7296,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
> > > > > intel_fb = to_intel_framebuffer(fb);
> > > > > obj = intel_fb->obj;
> > > > >
> > > > > + if (atomic_read(&intel_crtc->unpin_work_count) >= 2)
> > > > > + flush_workqueue(dev_priv->wq);
> > > > > +
> > > >
> > > > Have you by chance tested this with the async flip patch? I wonder if
> > > > in that case whether 2 is too small, and something like 100 might be
> > > > better (though really async flips are for cases where we can't keep up
> > > > with refresh, so a small number shouldn't hurt too much there either).
> > >
> > > The limit on 2 is due to the limited resolution of pincount. Hence my
> > > earlier fear for your async flip patch.
> >
> > I think for asyn flips we simply need to have a real flip queue in our
> > code, instead of abusing the implicit list in the workqueue code ...
> >
> > One other thing is that with async flips we don't have a natural limit on
> > the number of pinned framebuffers any more, which means we can easily
> > exhaust all mappable GTT space. Hence we need to integrate that new,
> > explicit flip queue into our eviction code, too.
> >
> > For now I'm rather happy with the flush_wq ducttape presented here ;-)
>
> Yeah I don't have a problem with it as long as we don't block when
> queuing flips in real life. :)
Actually I've justified the blocking here to myself, and prefer it to
simply running the crtc->unpin_work. If userspace is swamping the system
so badly that we can run the kthreads quick enough, it deserves a stall.
Note that the unpin leak is still about the 3rd most common bug in fedora,
so this stall will be forced on many machines.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH] drm/i915: Flush outstanding unpin tasks before pageflipping
2012-11-01 15:52 ` Chris Wilson
@ 2012-11-01 16:04 ` Jesse Barnes
2012-11-01 16:20 ` Chris Wilson
0 siblings, 1 reply; 17+ messages in thread
From: Jesse Barnes @ 2012-11-01 16:04 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Thu, 01 Nov 2012 15:52:23 +0000
Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Thu, 1 Nov 2012 08:34:47 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > On Thu, 1 Nov 2012 16:29:35 +0100
> > Daniel Vetter <daniel@ffwll.ch> wrote:
> >
> > > On Thu, Nov 01, 2012 at 03:18:46PM +0000, Chris Wilson wrote:
> > > > On Thu, 1 Nov 2012 08:07:59 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > > > > On Thu, 1 Nov 2012 09:26:26 +0000
> > > > > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > > >
> > > > > > If we accumulate unpin tasks because we are pageflipping faster than the
> > > > > > system can schedule its workers, we can effectively create a
> > > > > > pin-leak. The solution taken here is to limit the number of unpin tasks
> > > > > > we have per-crtc and to flush those outstanding tasks if we accumulate
> > > > > > too many. This should prevent any jitter in the normal case, and also
> > > > > > prevent the hang if we should run too fast.
> > > > > >
> > > > > > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=46991
> > > > > > Reported-and-tested-by: Tvrtko Ursulin <tvrtko.ursulin@onelan.co.uk>
> > > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > > > ---
> > > > > > drivers/gpu/drm/i915/intel_display.c | 22 ++++++++++++++++------
> > > > > > drivers/gpu/drm/i915/intel_drv.h | 4 +++-
> > > > > > 2 files changed, 19 insertions(+), 7 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > > > > > index 69b1739..800b195 100644
> > > > > > --- a/drivers/gpu/drm/i915/intel_display.c
> > > > > > +++ b/drivers/gpu/drm/i915/intel_display.c
> > > > > > @@ -6908,14 +6908,19 @@ static void intel_unpin_work_fn(struct work_struct *__work)
> > > > > > {
> > > > > > struct intel_unpin_work *work =
> > > > > > container_of(__work, struct intel_unpin_work, work);
> > > > > > + struct drm_device *dev = work->crtc->dev;
> > > > > >
> > > > > > - mutex_lock(&work->dev->struct_mutex);
> > > > > > + mutex_lock(&dev->struct_mutex);
> > > > > > intel_unpin_fb_obj(work->old_fb_obj);
> > > > > > drm_gem_object_unreference(&work->pending_flip_obj->base);
> > > > > > drm_gem_object_unreference(&work->old_fb_obj->base);
> > > > > >
> > > > > > - intel_update_fbc(work->dev);
> > > > > > - mutex_unlock(&work->dev->struct_mutex);
> > > > > > + intel_update_fbc(dev);
> > > > > > + mutex_unlock(&dev->struct_mutex);
> > > > > > +
> > > > > > + BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
> > > > > > + atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
> > > > > > +
> > > > > > kfree(work);
> > > > > > }
> > > > > >
> > > > > > @@ -6963,9 +6968,9 @@ static void do_intel_finish_page_flip(struct drm_device *dev,
> > > > > >
> > > > > > atomic_clear_mask(1 << intel_crtc->plane,
> > > > > > &obj->pending_flip.counter);
> > > > > > -
> > > > > > wake_up(&dev_priv->pending_flip_queue);
> > > > > > - schedule_work(&work->work);
> > > > > > +
> > > > > > + queue_work(dev_priv->wq, &work->work);
> > > > > >
> > > > > > trace_i915_flip_complete(intel_crtc->plane, work->pending_flip_obj);
> > > > > > }
> > > > > > @@ -7266,7 +7271,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
> > > > > > return -ENOMEM;
> > > > > >
> > > > > > work->event = event;
> > > > > > - work->dev = crtc->dev;
> > > > > > + work->crtc = crtc;
> > > > > > intel_fb = to_intel_framebuffer(crtc->fb);
> > > > > > work->old_fb_obj = intel_fb->obj;
> > > > > > INIT_WORK(&work->work, intel_unpin_work_fn);
> > > > > > @@ -7291,6 +7296,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
> > > > > > intel_fb = to_intel_framebuffer(fb);
> > > > > > obj = intel_fb->obj;
> > > > > >
> > > > > > + if (atomic_read(&intel_crtc->unpin_work_count) >= 2)
> > > > > > + flush_workqueue(dev_priv->wq);
> > > > > > +
> > > > >
> > > > > Have you by chance tested this with the async flip patch? I wonder if
> > > > > in that case whether 2 is too small, and something like 100 might be
> > > > > better (though really async flips are for cases where we can't keep up
> > > > > with refresh, so a small number shouldn't hurt too much there either).
> > > >
> > > > The limit on 2 is due to the limited resolution of pincount. Hence my
> > > > earlier fear for your async flip patch.
> > >
> > > I think for asyn flips we simply need to have a real flip queue in our
> > > code, instead of abusing the implicit list in the workqueue code ...
> > >
> > > One other thing is that with async flips we don't have a natural limit on
> > > the number of pinned framebuffers any more, which means we can easily
> > > exhaust all mappable GTT space. Hence we need to integrate that new,
> > > explicit flip queue into our eviction code, too.
> > >
> > > For now I'm rather happy with the flush_wq ducttape presented here ;-)
> >
> > Yeah I don't have a problem with it as long as we don't block when
> > queuing flips in real life. :)
>
> Actually I've justified the blocking here to myself, and prefer it to
> simply running the crtc->unpin_work. If userspace is swamping the system
> so badly that we can run the kthreads quick enough, it deserves a stall.
> Note that the unpin leak is still about the 3rd most common bug in fedora,
> so this stall will be forced on many machines.
Hm funky, why does Fedora hit it so much? Does some of the GNOME shell
stuff run unthrottled or something?
--
Jesse Barnes, Intel Open Source Technology Center
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH] drm/i915: Flush outstanding unpin tasks before pageflipping
2012-11-01 16:04 ` Jesse Barnes
@ 2012-11-01 16:20 ` Chris Wilson
2012-11-01 16:52 ` Tvrtko Ursulin
0 siblings, 1 reply; 17+ messages in thread
From: Chris Wilson @ 2012-11-01 16:20 UTC (permalink / raw)
To: Jesse Barnes; +Cc: intel-gfx, Tvrtko Ursulin
On Thu, 1 Nov 2012 09:04:02 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> On Thu, 01 Nov 2012 15:52:23 +0000
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> > Actually I've justified the blocking here to myself, and prefer it to
> > simply running the crtc->unpin_work. If userspace is swamping the system
> > so badly that we can run the kthreads quick enough, it deserves a stall.
> > Note that the unpin leak is still about the 3rd most common bug in fedora,
> > so this stall will be forced on many machines.
>
> Hm funky, why does Fedora hit it so much? Does some of the GNOME shell
> stuff run unthrottled or something?
I don't think so. I trust that in Tvrtko's use case, he is not so much as
hogging the GPU as keeping the system as a whole relatively busy. So I
suspect it is more to do with CPU starvation of the kthreads than
anything else.
Tvrtko, do you have any feeling for why your machine was easily
suspectible to this leak? Are the stalls noticeable and do they affect
your performance targets?
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] drm/i915: Flush outstanding unpin tasks before pageflipping
2012-11-01 16:20 ` Chris Wilson
@ 2012-11-01 16:52 ` Tvrtko Ursulin
2012-11-01 16:58 ` Jesse Barnes
2012-11-02 21:31 ` Eric Anholt
0 siblings, 2 replies; 17+ messages in thread
From: Tvrtko Ursulin @ 2012-11-01 16:52 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Thursday 01 November 2012 16:20:03 Chris Wilson wrote:
> On Thu, 1 Nov 2012 09:04:02 -0700, Jesse Barnes <jbarnes@virtuousgeek.org>
wrote:
> > On Thu, 01 Nov 2012 15:52:23 +0000
> >
> > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > Actually I've justified the blocking here to myself, and prefer it to
> > > simply running the crtc->unpin_work. If userspace is swamping the system
> > > so badly that we can run the kthreads quick enough, it deserves a stall.
> > > Note that the unpin leak is still about the 3rd most common bug in
> > > fedora,
> > > so this stall will be forced on many machines.
> >
> > Hm funky, why does Fedora hit it so much? Does some of the GNOME shell
> > stuff run unthrottled or something?
>
> I don't think so. I trust that in Tvrtko's use case, he is not so much as
> hogging the GPU as keeping the system as a whole relatively busy. So I
> suspect it is more to do with CPU starvation of the kthreads than
> anything else.
>
> Tvrtko, do you have any feeling for why your machine was easily
> suspectible to this leak? Are the stalls noticeable and do they affect
> your performance targets?
We didn't bother looking for any stalls, but for a long time we were
occasionally hitting this pin_count BUG i915_gem_object_pin. So it didn't in
fact affect our performance targets as much it completely wrecked our system.
If this patch causes an occasional stall instead, given that this bug triggers
every 3-4 hours of uptime, we are fine with that. If a frame or so is missed
every couple hours on low end hardware we don't care that much.
More on the actual workload...
Only recently we got lucky and found a platform and workload where it happens
reliably. And this patch reliably fixes that.
In this workload CPU is being loaded 50-60% decoding a movie and rendering it
to a full screen window. Our proprietary compositor page flips at 60Hz only,
not faster. Together with another small semi-transparent window being rendered
on top of the full screen movie. Movie played is a 25fps one, which means the
full screen window is damaged 25 out of 60 frames (give or take) which is when
we render to our back buffer and page flip at the vsync rate (60Hz).
According to intel_gpu_top tool, GPU load is roughly at 40%, apart from the
"Framebuffer Compression" metric which is maxed out, if that is one is at all
valid.
This particular scenario triggers the bug only on two of our Atom based
platform both with a NM10/Pineview G/i915 chipset.
Tvrtko
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] drm/i915: Flush outstanding unpin tasks before pageflipping
2012-11-01 16:52 ` Tvrtko Ursulin
@ 2012-11-01 16:58 ` Jesse Barnes
2012-11-05 11:36 ` Simon Farnsworth
2012-11-02 21:31 ` Eric Anholt
1 sibling, 1 reply; 17+ messages in thread
From: Jesse Barnes @ 2012-11-01 16:58 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: intel-gfx
On Thu, 01 Nov 2012 16:52:05 +0000
Tvrtko Ursulin <tvrtko.ursulin@onelan.co.uk> wrote:
> On Thursday 01 November 2012 16:20:03 Chris Wilson wrote:
> > On Thu, 1 Nov 2012 09:04:02 -0700, Jesse Barnes <jbarnes@virtuousgeek.org>
> wrote:
> > > On Thu, 01 Nov 2012 15:52:23 +0000
> > >
> > > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > > Actually I've justified the blocking here to myself, and prefer it to
> > > > simply running the crtc->unpin_work. If userspace is swamping the system
> > > > so badly that we can run the kthreads quick enough, it deserves a stall.
> > > > Note that the unpin leak is still about the 3rd most common bug in
> > > > fedora,
> > > > so this stall will be forced on many machines.
> > >
> > > Hm funky, why does Fedora hit it so much? Does some of the GNOME shell
> > > stuff run unthrottled or something?
> >
> > I don't think so. I trust that in Tvrtko's use case, he is not so much as
> > hogging the GPU as keeping the system as a whole relatively busy. So I
> > suspect it is more to do with CPU starvation of the kthreads than
> > anything else.
> >
> > Tvrtko, do you have any feeling for why your machine was easily
> > suspectible to this leak? Are the stalls noticeable and do they affect
> > your performance targets?
>
> We didn't bother looking for any stalls, but for a long time we were
> occasionally hitting this pin_count BUG i915_gem_object_pin. So it didn't in
> fact affect our performance targets as much it completely wrecked our system.
>
> If this patch causes an occasional stall instead, given that this bug triggers
> every 3-4 hours of uptime, we are fine with that. If a frame or so is missed
> every couple hours on low end hardware we don't care that much.
>
> More on the actual workload...
>
> Only recently we got lucky and found a platform and workload where it happens
> reliably. And this patch reliably fixes that.
>
> In this workload CPU is being loaded 50-60% decoding a movie and rendering it
> to a full screen window. Our proprietary compositor page flips at 60Hz only,
> not faster. Together with another small semi-transparent window being rendered
> on top of the full screen movie. Movie played is a 25fps one, which means the
> full screen window is damaged 25 out of 60 frames (give or take) which is when
> we render to our back buffer and page flip at the vsync rate (60Hz).
>
> According to intel_gpu_top tool, GPU load is roughly at 40%, apart from the
> "Framebuffer Compression" metric which is maxed out, if that is one is at all
> valid.
>
> This particular scenario triggers the bug only on two of our Atom based
> platform both with a NM10/Pineview G/i915 chipset.
Ah ok on Atom you're probably CPU constrained a bit, but still at
50-60% utilization the kthreads should be running at least sometimes...
But it sounds like a case of the kthreads not running instead of
queueing too fast anyway (not that the latter is really possible
without some hacking to the flip code).
--
Jesse Barnes, Intel Open Source Technology Center
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] drm/i915: Flush outstanding unpin tasks before pageflipping
2012-11-01 16:58 ` Jesse Barnes
@ 2012-11-05 11:36 ` Simon Farnsworth
0 siblings, 0 replies; 17+ messages in thread
From: Simon Farnsworth @ 2012-11-05 11:36 UTC (permalink / raw)
To: intel-gfx; +Cc: Tvrtko Ursulin
[-- Attachment #1.1: Type: text/plain, Size: 3546 bytes --]
On Thursday 1 November 2012 09:58:51 Jesse Barnes wrote:
> On Thu, 01 Nov 2012 16:52:05 +0000
> Tvrtko Ursulin <tvrtko.ursulin@onelan.co.uk> wrote:
>
> > On Thursday 01 November 2012 16:20:03 Chris Wilson wrote:
> > > On Thu, 1 Nov 2012 09:04:02 -0700, Jesse Barnes <jbarnes@virtuousgeek.org>
> > wrote:
> > > > On Thu, 01 Nov 2012 15:52:23 +0000
> > > >
> > > > Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > > > Actually I've justified the blocking here to myself, and prefer it to
> > > > > simply running the crtc->unpin_work. If userspace is swamping the system
> > > > > so badly that we can run the kthreads quick enough, it deserves a stall.
> > > > > Note that the unpin leak is still about the 3rd most common bug in
> > > > > fedora,
> > > > > so this stall will be forced on many machines.
> > > >
> > > > Hm funky, why does Fedora hit it so much? Does some of the GNOME shell
> > > > stuff run unthrottled or something?
> > >
> > > I don't think so. I trust that in Tvrtko's use case, he is not so much as
> > > hogging the GPU as keeping the system as a whole relatively busy. So I
> > > suspect it is more to do with CPU starvation of the kthreads than
> > > anything else.
> > >
> > > Tvrtko, do you have any feeling for why your machine was easily
> > > suspectible to this leak? Are the stalls noticeable and do they affect
> > > your performance targets?
> >
> > We didn't bother looking for any stalls, but for a long time we were
> > occasionally hitting this pin_count BUG i915_gem_object_pin. So it didn't in
> > fact affect our performance targets as much it completely wrecked our system.
> >
> > If this patch causes an occasional stall instead, given that this bug triggers
> > every 3-4 hours of uptime, we are fine with that. If a frame or so is missed
> > every couple hours on low end hardware we don't care that much.
> >
> > More on the actual workload...
> >
> > Only recently we got lucky and found a platform and workload where it happens
> > reliably. And this patch reliably fixes that.
> >
> > In this workload CPU is being loaded 50-60% decoding a movie and rendering it
> > to a full screen window. Our proprietary compositor page flips at 60Hz only,
> > not faster. Together with another small semi-transparent window being rendered
> > on top of the full screen movie. Movie played is a 25fps one, which means the
> > full screen window is damaged 25 out of 60 frames (give or take) which is when
> > we render to our back buffer and page flip at the vsync rate (60Hz).
> >
> > According to intel_gpu_top tool, GPU load is roughly at 40%, apart from the
> > "Framebuffer Compression" metric which is maxed out, if that is one is at all
> > valid.
> >
> > This particular scenario triggers the bug only on two of our Atom based
> > platform both with a NM10/Pineview G/i915 chipset.
>
> Ah ok on Atom you're probably CPU constrained a bit, but still at
> 50-60% utilization the kthreads should be running at least sometimes...
>
> But it sounds like a case of the kthreads not running instead of
> queueing too fast anyway (not that the latter is really possible
> without some hacking to the flip code).
>
It may help you here to know that we run both our compositor and the X server
at real-time priorities - both are SCHED_RR static priority 1 (the lowest
realtime priority). IIRC, the kthreads run at SCHED_OTHER priority, so we are
quite capable of starving them during a burst of activity.
--
Simon Farnsworth
Software Engineer
ONELAN Ltd
http://www.onelan.com
[-- Attachment #1.2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 490 bytes --]
[-- Attachment #2: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] drm/i915: Flush outstanding unpin tasks before pageflipping
2012-11-01 16:52 ` Tvrtko Ursulin
2012-11-01 16:58 ` Jesse Barnes
@ 2012-11-02 21:31 ` Eric Anholt
1 sibling, 0 replies; 17+ messages in thread
From: Eric Anholt @ 2012-11-02 21:31 UTC (permalink / raw)
To: Tvrtko Ursulin, Chris Wilson; +Cc: intel-gfx
[-- Attachment #1.1: Type: text/plain, Size: 490 bytes --]
Tvrtko Ursulin <tvrtko.ursulin@onelan.co.uk> writes:
> According to intel_gpu_top tool, GPU load is roughly at 40%, apart from the
> "Framebuffer Compression" metric which is maxed out, if that is one is at all
> valid.
Often a bit is not actually hooked up to anything, in which case it will
be interpreted as 100% busy. We should probably turn those off on those
specific chips, but it's not a very well maintained tool (because it's
not a super useful tool, unfortunately).
[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]
[-- Attachment #2: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] drm/i915: Flush outstanding unpin tasks before pageflipping
2012-11-01 9:26 Chris Wilson
2012-11-01 15:07 ` Jesse Barnes
@ 2012-11-20 16:15 ` Daniel Vetter
1 sibling, 0 replies; 17+ messages in thread
From: Daniel Vetter @ 2012-11-20 16:15 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Thu, Nov 01, 2012 at 09:26:26AM +0000, Chris Wilson wrote:
> If we accumulate unpin tasks because we are pageflipping faster than the
> system can schedule its workers, we can effectively create a
> pin-leak. The solution taken here is to limit the number of unpin tasks
> we have per-crtc and to flush those outstanding tasks if we accumulate
> too many. This should prevent any jitter in the normal case, and also
> prevent the hang if we should run too fast.
>
> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=46991
> Reported-and-tested-by: Tvrtko Ursulin <tvrtko.ursulin@onelan.co.uk>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Queued for -next with a note added to the commit message about the
workqueue related deadlock. Thanks for the patch.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2012-11-20 16:14 UTC | newest]
Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-09-28 11:29 [PATCH] drm/i915: Flush outstanding unpin tasks before pageflipping Chris Wilson
2012-09-28 12:05 ` Ville Syrjälä
2012-09-28 12:07 ` Chris Wilson
2012-09-28 12:20 ` Ville Syrjälä
-- strict thread matches above, loose matches on Subject: below --
2012-11-01 9:26 Chris Wilson
2012-11-01 15:07 ` Jesse Barnes
2012-11-01 15:18 ` Chris Wilson
2012-11-01 15:29 ` Daniel Vetter
2012-11-01 15:34 ` Jesse Barnes
2012-11-01 15:52 ` Chris Wilson
2012-11-01 16:04 ` Jesse Barnes
2012-11-01 16:20 ` Chris Wilson
2012-11-01 16:52 ` Tvrtko Ursulin
2012-11-01 16:58 ` Jesse Barnes
2012-11-05 11:36 ` Simon Farnsworth
2012-11-02 21:31 ` Eric Anholt
2012-11-20 16:15 ` Daniel Vetter
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox