[PATCH] drm/i915: disable flushing_list/gpu_write

public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed

* [PATCH] drm/i915: disable flushing_list/gpu_write_list
@ 2012-06-13 18:45 Daniel Vetter
  2012-06-13 21:05 ` Chris Wilson
  0 siblings, 1 reply; 3+ messages in thread
From: Daniel Vetter @ 2012-06-13 18:45 UTC (permalink / raw)
  To: Intel Graphics Development; +Cc: Daniel Vetter

This is just the minimal patch to disable all this code so that we can
do decent amounts of QA before we rip it all out.

The complicating thing is that we need to flush the gpu caches after
the batchbuffer is emitted. Which is past the point of no return where
execbuffer can't fail any more (otherwise we risk submitting the same
batch multiple times).

Hence we need to add a flag to track whether any caches associated
with that ring are dirty. And emit the flush in add_request if that's
the case.

Note that this has a quite a few behaviour changes:
- Caches get flushed/invalidated unconditionally.
- Invalidation now happens after potential inter-ring sync.

I've bantered around a bit with Chris on irc whether this fixes
anything, and it might or might not. The only thing clear is that with
these changes it's much easier to reason about correctness.

Also rip out a lone get_next_request_seqno in the execbuffer
retire_commands function. I've dug around and I couldn't figure out
why that is still there, with the outstanding lazy request stuff it
shouldn't be necessary.

v2: Chris Wilson complained that I also invalidate the read caches
when flushing after a batchbuffer. Now optimized.

v3: Added some comments to explain the new flushing behaviour.

Cc: Eric Anholt <eric@anholt.net>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c            |   25 +++++++++++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   52 ++++++---------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h    |    1 +
 3 files changed, 33 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2884b08..f756cb4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1568,6 +1568,21 @@ i915_add_request(struct intel_ring_buffer *ring,
 	int was_empty;
 	int ret;
 
+	/*
+	 * Emit any outstanding flushes - execbuf can fail to emit the flush
+	 * after having emitted the batchbuffer command. Hence we need to fix
+	 * things up similar to emitting the lazy request. The difference here
+	 * is that the flush _must_ happen before the next request, no matter
+	 * what.
+	 */
+	if (ring->gpu_caches_dirty) {
+		ret = i915_gem_flush_ring(ring, 0, I915_GEM_GPU_DOMAINS);
+		if (ret)
+			return ret;
+
+		ring->gpu_caches_dirty = false;
+	}
+
 	BUG_ON(request == NULL);
 	seqno = i915_gem_next_request_seqno(ring);
 
@@ -1613,6 +1628,9 @@ i915_add_request(struct intel_ring_buffer *ring,
 			queue_delayed_work(dev_priv->wq,
 					   &dev_priv->mm.retire_work, HZ);
 	}
+
+	WARN_ON(!list_empty(&ring->gpu_write_list));
+
 	return 0;
 }
 
@@ -1827,14 +1845,11 @@ i915_gem_retire_work_handler(struct work_struct *work)
 	 */
 	idle = true;
 	for_each_ring(ring, dev_priv, i) {
-		if (!list_empty(&ring->gpu_write_list)) {
+		if (ring->gpu_caches_dirty) {
 			struct drm_i915_gem_request *request;
-			int ret;
 
-			ret = i915_gem_flush_ring(ring,
-						  0, I915_GEM_GPU_DOMAINS);
 			request = kzalloc(sizeof(*request), GFP_KERNEL);
-			if (ret || request == NULL ||
+			if (request == NULL ||
 			    i915_add_request(ring, NULL, request))
 			    kfree(request);
 		}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 974a9f1..455c71c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -810,33 +810,16 @@ err:
 	return ret;
 }
 
-static int
+static void
 i915_gem_execbuffer_flush(struct drm_device *dev,
 			  uint32_t invalidate_domains,
-			  uint32_t flush_domains,
-			  uint32_t flush_rings)
+			  uint32_t flush_domains)
 {
-	drm_i915_private_t *dev_priv = dev->dev_private;
-	int i, ret;
-
 	if (flush_domains & I915_GEM_DOMAIN_CPU)
 		intel_gtt_chipset_flush();
 
 	if (flush_domains & I915_GEM_DOMAIN_GTT)
 		wmb();
-
-	if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
-		for (i = 0; i < I915_NUM_RINGS; i++)
-			if (flush_rings & (1 << i)) {
-				ret = i915_gem_flush_ring(&dev_priv->ring[i],
-							  invalidate_domains,
-							  flush_domains);
-				if (ret)
-					return ret;
-			}
-	}
-
-	return 0;
 }
 
 static int
@@ -885,12 +868,9 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
 		i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
 
 	if (cd.invalidate_domains | cd.flush_domains) {
-		ret = i915_gem_execbuffer_flush(ring->dev,
-						cd.invalidate_domains,
-						cd.flush_domains,
-						cd.flush_rings);
-		if (ret)
-			return ret;
+		i915_gem_execbuffer_flush(ring->dev,
+					  cd.invalidate_domains,
+					  cd.flush_domains);
 	}
 
 	if (cd.flips) {
@@ -905,6 +885,11 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
 			return ret;
 	}
 
+	/* Unconditionally invalidate gpu caches. */
+	ret = i915_gem_flush_ring(ring, I915_GEM_GPU_DOMAINS, 0);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
@@ -983,26 +968,13 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
 				    struct intel_ring_buffer *ring)
 {
 	struct drm_i915_gem_request *request;
-	u32 invalidate;
 
-	/*
-	 * Ensure that the commands in the batch buffer are
-	 * finished before the interrupt fires.
-	 *
-	 * The sampler always gets flushed on i965 (sigh).
-	 */
-	invalidate = I915_GEM_DOMAIN_COMMAND;
-	if (INTEL_INFO(dev)->gen >= 4)
-		invalidate |= I915_GEM_DOMAIN_SAMPLER;
-	if (ring->flush(ring, invalidate, 0)) {
-		i915_gem_next_request_seqno(ring);
-		return;
-	}
+	/* Unconditionally force add_request to emit a full flush. */
+	ring->gpu_caches_dirty = true;
 
 	/* Add a breadcrumb for the completion of the batch buffer */
 	request = kzalloc(sizeof(*request), GFP_KERNEL);
 	if (request == NULL || i915_add_request(ring, file, request)) {
-		i915_gem_next_request_seqno(ring);
 		kfree(request);
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 55d3da2..5e90b78 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -113,6 +113,7 @@ struct  intel_ring_buffer {
 	 * Do we have some not yet emitted requests outstanding?
 	 */
 	u32 outstanding_lazy_request;
+	bool gpu_caches_dirty;
 
 	wait_queue_head_t irq_queue;
 
-- 
1.7.7.6

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] drm/i915: disable flushing_list/gpu_write_list
  2012-06-13 18:45 [PATCH] drm/i915: disable flushing_list/gpu_write_list Daniel Vetter
@ 2012-06-13 21:05 ` Chris Wilson
  2012-06-20 11:55   ` Daniel Vetter
  0 siblings, 1 reply; 3+ messages in thread
From: Chris Wilson @ 2012-06-13 21:05 UTC (permalink / raw)
  To: Intel Graphics Development; +Cc: Daniel Vetter

On Wed, 13 Jun 2012 20:45:19 +0200, Daniel Vetter <daniel.vetter@ffwll.ch> wrote:
> This is just the minimal patch to disable all this code so that we can
> do decent amounts of QA before we rip it all out.
> 
> The complicating thing is that we need to flush the gpu caches after
> the batchbuffer is emitted. Which is past the point of no return where
> execbuffer can't fail any more (otherwise we risk submitting the same
> batch multiple times).
> 
> Hence we need to add a flag to track whether any caches associated
> with that ring are dirty. And emit the flush in add_request if that's
> the case.
> 
> Note that this has a quite a few behaviour changes:
> - Caches get flushed/invalidated unconditionally.
> - Invalidation now happens after potential inter-ring sync.
> 
> I've bantered around a bit with Chris on irc whether this fixes
> anything, and it might or might not. The only thing clear is that with
> these changes it's much easier to reason about correctness.
> 
> Also rip out a lone get_next_request_seqno in the execbuffer
> retire_commands function. I've dug around and I couldn't figure out
> why that is still there, with the outstanding lazy request stuff it
> shouldn't be necessary.
> 
> v2: Chris Wilson complained that I also invalidate the read caches
> when flushing after a batchbuffer. Now optimized.
> 
> v3: Added some comments to explain the new flushing behaviour.
> 
> Cc: Eric Anholt <eric@anholt.net>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>

This seems to work fine for 2D workloads, so
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

I'll follow up in a few days with a tested-by if nothing untoward
happens.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] drm/i915: disable flushing_list/gpu_write_list
  2012-06-13 21:05 ` Chris Wilson
@ 2012-06-20 11:55   ` Daniel Vetter
  0 siblings, 0 replies; 3+ messages in thread
From: Daniel Vetter @ 2012-06-20 11:55 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Daniel Vetter, Intel Graphics Development

On Wed, Jun 13, 2012 at 10:05:39PM +0100, Chris Wilson wrote:
> On Wed, 13 Jun 2012 20:45:19 +0200, Daniel Vetter <daniel.vetter@ffwll.ch> wrote:
> > This is just the minimal patch to disable all this code so that we can
> > do decent amounts of QA before we rip it all out.
> > 
> > The complicating thing is that we need to flush the gpu caches after
> > the batchbuffer is emitted. Which is past the point of no return where
> > execbuffer can't fail any more (otherwise we risk submitting the same
> > batch multiple times).
> > 
> > Hence we need to add a flag to track whether any caches associated
> > with that ring are dirty. And emit the flush in add_request if that's
> > the case.
> > 
> > Note that this has a quite a few behaviour changes:
> > - Caches get flushed/invalidated unconditionally.
> > - Invalidation now happens after potential inter-ring sync.
> > 
> > I've bantered around a bit with Chris on irc whether this fixes
> > anything, and it might or might not. The only thing clear is that with
> > these changes it's much easier to reason about correctness.
> > 
> > Also rip out a lone get_next_request_seqno in the execbuffer
> > retire_commands function. I've dug around and I couldn't figure out
> > why that is still there, with the outstanding lazy request stuff it
> > shouldn't be necessary.
> > 
> > v2: Chris Wilson complained that I also invalidate the read caches
> > when flushing after a batchbuffer. Now optimized.
> > 
> > v3: Added some comments to explain the new flushing behaviour.
> > 
> > Cc: Eric Anholt <eric@anholt.net>
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> This seems to work fine for 2D workloads, so
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Ok, after testing this again on my snb with the context stuff applied I've
queued this up for -next. Let's see how well it fares ;-)

Thanks for your review.
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2012-06-20 11:54 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-06-13 18:45 [PATCH] drm/i915: disable flushing_list/gpu_write_list Daniel Vetter
2012-06-13 21:05 ` Chris Wilson
2012-06-20 11:55   ` Daniel Vetter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox