[PATCH] drm/i915: Implement workaround for broken CS tlb on i830/845

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH] drm/i915: Implement workaround for broken CS tlb on i830/845
@ 2012-12-16 22:46 Chris Wilson
  2012-12-17 13:33 ` Chris Wilson
  0 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2012-12-16 22:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

From: Daniel Vetter <daniel.vetter@ffwll.ch>

Now that Chris Wilson demonstrated that the key for stability on early
gen 2 is to simple _never_ exchange the physical backing storage of
batch buffers I've tried a stab at a kernel solution. Doesn't look too
nefarious imho, now that I don't try to be too clever for my own good
any more.

v2: After discussing the various techniques, we've decided to always blit
batches on the suspect devices, but allow userspace to opt out of the
kernel workaround assume full responsibility for providing coherent
batches. The principal reason is that avoiding the blit does improve
performance in a few key microbenchmarks and also in cairo-trace
replays.

Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_dma.c            |    3 ++
 drivers/gpu/drm/i915/i915_drv.h            |    4 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    2 +
 drivers/gpu/drm/i915/i915_irq.c            |    3 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c    |   78 ++++++++++++++++++++++++----
 drivers/gpu/drm/i915/intel_ringbuffer.h    |    1 +
 include/uapi/drm/i915_drm.h                |   10 ++++
 7 files changed, 92 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 8f63cd5..99daa89 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -989,6 +989,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_SECURE_BATCHES:
 		value = capable(CAP_SYS_ADMIN);
 		break;
+	case I915_PARAM_HAS_PINNED_BATCHES:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG_DRIVER("Unknown parameter %d\n",
 				 param->param);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 557843d..50c9bee 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1100,6 +1100,7 @@ struct drm_i915_gem_object {
 	 */
 	atomic_t pending_flip;
 };
+#define to_gem_object(obj) (&((struct drm_i915_gem_object *)(obj))->base)
 
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
@@ -1196,6 +1197,9 @@ struct drm_i915_file_private {
 #define HAS_OVERLAY(dev)		(INTEL_INFO(dev)->has_overlay)
 #define OVERLAY_NEEDS_PHYSICAL(dev)	(INTEL_INFO(dev)->overlay_needs_physical)
 
+/* Early gen2 have a totally busted CS tlb and require pinned batches. */
+#define HAS_BROKEN_CS_TLB(dev)		(IS_I830(dev) || IS_845G(dev))
+
 /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte
  * rows, which changed the alignment requirements and fence programming.
  */
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 43fb6d4..a8e7f2b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -820,6 +820,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
 		flags |= I915_DISPATCH_SECURE;
 	}
+	if (args->flags & I915_EXEC_IS_PINNED)
+		flags |= I915_DISPATCH_PINNED;
 
 	switch (args->flags & I915_EXEC_RING_MASK) {
 	case I915_EXEC_DEFAULT:
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index a4dc97f..6ebdf1e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1087,6 +1087,9 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
 	if (!ring->get_seqno)
 		return NULL;
 
+	if (HAS_BROKEN_CS_TLB(dev_priv->dev))
+		return i915_error_object_create(dev_priv, ring->private);
+
 	seqno = ring->get_seqno(ring, false);
 	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
 		if (obj->ring != ring)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 2346b92..d0fe4cb 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -547,9 +547,14 @@ static int init_render_ring(struct intel_ring_buffer *ring)
 
 static void render_ring_cleanup(struct intel_ring_buffer *ring)
 {
+	struct drm_device *dev = ring->dev;
+
 	if (!ring->private)
 		return;
 
+	if (HAS_BROKEN_CS_TLB(dev))
+		drm_gem_object_unreference(to_gem_object(ring->private));
+
 	cleanup_pipe_control(ring);
 }
 
@@ -969,6 +974,8 @@ i965_dispatch_execbuffer(struct intel_ring_buffer *ring,
 	return 0;
 }
 
+/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
+#define I830_BATCH_LIMIT (256*1024)
 static int
 i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
 				u32 offset, u32 len,
@@ -976,15 +983,47 @@ i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
 {
 	int ret;
 
-	ret = intel_ring_begin(ring, 4);
-	if (ret)
-		return ret;
+	if (flags & I915_DISPATCH_PINNED) {
+		ret = intel_ring_begin(ring, 4);
+		if (ret)
+			return ret;
 
-	intel_ring_emit(ring, MI_BATCH_BUFFER);
-	intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
-	intel_ring_emit(ring, offset + len - 8);
-	intel_ring_emit(ring, 0);
-	intel_ring_advance(ring);
+		intel_ring_emit(ring, MI_BATCH_BUFFER);
+		intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
+		intel_ring_emit(ring, offset + len - 8);
+		intel_ring_emit(ring, MI_NOOP);
+		intel_ring_advance(ring);
+	} else {
+		struct drm_i915_gem_object *obj = ring->private;
+		u32 cs_offset = obj->gtt_offset;
+
+		if (len > I830_BATCH_LIMIT)
+			return -ENOSPC;
+
+		ret = intel_ring_begin(ring, 9+3);
+		if (ret)
+			return ret;
+		/* Blit the batch (which has now all relocs applied) to the stable batch
+		 * scratch bo area (so that the CS never stumbles over its tlb
+		 * invalidation bug) ... */
+		intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD |
+				XY_SRC_COPY_BLT_WRITE_ALPHA |
+				XY_SRC_COPY_BLT_WRITE_RGB);
+		intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096);
+		intel_ring_emit(ring, 0);
+		intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024);
+		intel_ring_emit(ring, cs_offset);
+		intel_ring_emit(ring, 0);
+		intel_ring_emit(ring, 4096);
+		intel_ring_emit(ring, offset);
+		intel_ring_emit(ring, MI_FLUSH);
+
+		/* ... and execute it. */
+		intel_ring_emit(ring, MI_BATCH_BUFFER);
+		intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
+		intel_ring_emit(ring, cs_offset + len - 8);
+		intel_ring_advance(ring);
+	}
 
 	return 0;
 }
@@ -1148,7 +1187,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 	 * of the buffer.
 	 */
 	ring->effective_size = ring->size;
-	if (IS_I830(ring->dev) || IS_845G(ring->dev))
+	if (HAS_BROKEN_CS_TLB(ring->dev))
 		ring->effective_size -= 128;
 
 	return 0;
@@ -1596,6 +1635,27 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 	ring->init = init_render_ring;
 	ring->cleanup = render_ring_cleanup;
 
+	/* Workaround batchbuffer to combat CS tlb bug. */
+	if (HAS_BROKEN_CS_TLB(dev)) {
+		struct drm_i915_gem_object *obj;
+		int ret;
+
+		obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT);
+		if (obj == NULL) {
+			DRM_ERROR("Failed to allocate batch bo\n");
+			return -ENOMEM;
+		}
+
+		ret = i915_gem_object_pin(obj, 0, true, false);
+		if (ret != 0) {
+			drm_gem_object_unreference(&obj->base);
+			DRM_ERROR("Failed to ping batch bo\n");
+			return ret;
+		}
+
+		ring->private = obj;
+	}
+
 	return intel_init_ring_buffer(dev, ring);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 526182e..6af87cd 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -94,6 +94,7 @@ struct  intel_ring_buffer {
 					       u32 offset, u32 length,
 					       unsigned flags);
 #define I915_DISPATCH_SECURE 0x1
+#define I915_DISPATCH_PINNED 0x2
 	void		(*cleanup)(struct intel_ring_buffer *ring);
 	int		(*sync_to)(struct intel_ring_buffer *ring,
 				   struct intel_ring_buffer *to,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index b746a3c..c4d2e9c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -307,6 +307,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_PRIME_VMAP_FLUSH	 21
 #define I915_PARAM_RSVD_FOR_FUTURE_USE	 22
 #define I915_PARAM_HAS_SECURE_BATCHES	 23
+#define I915_PARAM_HAS_PINNED_BATCHES	 24
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -677,6 +678,15 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_SECURE		(1<<9)
 
+/** Inform the kernel that the batch is and will always be pinned. This
+ * negates the requirement for a workaround to be performed to avoid
+ * an incoherent CS (such as can be found on 830/845). If this flag is
+ * not passed, the kernel will endeavour to make sure the batch is
+ * coherent with the CS before execution. If this flag is passed,
+ * userspace assumes the responsibility for ensuring the same.
+ */
+#define I915_EXEC_IS_PINNED		(1<<10)
+
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
 	(eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915: Implement workaround for broken CS tlb on i830/845
  2012-12-16 22:46 [PATCH] drm/i915: Implement workaround for broken CS tlb on i830/845 Chris Wilson
@ 2012-12-17 13:33 ` Chris Wilson
  2012-12-17 13:43   ` Daniel Vetter
  2012-12-17 15:18   ` Chris Wilson
  0 siblings, 2 replies; 10+ messages in thread
From: Chris Wilson @ 2012-12-17 13:33 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

On Sun, 16 Dec 2012 22:46:00 +0000, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> @@ -1087,6 +1087,9 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
>  	if (!ring->get_seqno)
>  		return NULL;
>  
> +	if (HAS_BROKEN_CS_TLB(dev_priv->dev))
> +		return i915_error_object_create(dev_priv, ring->private);

Hmm, this is complicated by userspace opting out of the CS w/a, and
imposes quite a burden upon our simple seq interface.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915: Implement workaround for broken CS tlb on i830/845
  2012-12-17 13:33 ` Chris Wilson
@ 2012-12-17 13:43   ` Daniel Vetter
  2012-12-17 14:46     ` Ville Syrjälä
  2012-12-17 15:18   ` Chris Wilson
  1 sibling, 1 reply; 10+ messages in thread
From: Daniel Vetter @ 2012-12-17 13:43 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Daniel Vetter, intel-gfx

On Mon, Dec 17, 2012 at 01:33:01PM +0000, Chris Wilson wrote:
> On Sun, 16 Dec 2012 22:46:00 +0000, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > @@ -1087,6 +1087,9 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
> >  	if (!ring->get_seqno)
> >  		return NULL;
> >  
> > +	if (HAS_BROKEN_CS_TLB(dev_priv->dev))
> > +		return i915_error_object_create(dev_priv, ring->private);
> 
> Hmm, this is complicated by userspace opting out of the CS w/a, and
> imposes quite a burden upon our simple seq interface.

Right, I've written this without userspace being able to opt out in mind
... for the seq -ENOMEM, I guess it's just time to bite the bullet. Loads
of the error_states for the ilk fallout couldn't be dumped (but could be
captured) by bug reporters already :(
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915: Implement workaround for broken CS tlb on i830/845
  2012-12-17 13:43   ` Daniel Vetter
@ 2012-12-17 14:46     ` Ville Syrjälä
  2012-12-17 15:12       ` Daniel Vetter
  0 siblings, 1 reply; 10+ messages in thread
From: Ville Syrjälä @ 2012-12-17 14:46 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Daniel Vetter, intel-gfx

On Mon, Dec 17, 2012 at 02:43:03PM +0100, Daniel Vetter wrote:
> On Mon, Dec 17, 2012 at 01:33:01PM +0000, Chris Wilson wrote:
> > On Sun, 16 Dec 2012 22:46:00 +0000, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > @@ -1087,6 +1087,9 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
> > >  	if (!ring->get_seqno)
> > >  		return NULL;
> > >  
> > > +	if (HAS_BROKEN_CS_TLB(dev_priv->dev))
> > > +		return i915_error_object_create(dev_priv, ring->private);
> > 
> > Hmm, this is complicated by userspace opting out of the CS w/a, and
> > imposes quite a burden upon our simple seq interface.
> 
> Right, I've written this without userspace being able to opt out in mind
> ... for the seq -ENOMEM, I guess it's just time to bite the bullet. Loads
> of the error_states for the ilk fallout couldn't be dumped (but could be
> captured) by bug reporters already :(

Assuming you're talking about debugfs error_state returning -ENOMEM due
to seq_file's massive kmalloc(), I had a couple of ideas for fixing it
in seq_file itself.

1) just use vmalloc()

2) use multiple pages instead of one big allocation

 seq_printf() {
   try to print the line
   if not enough space {
     mark the end of valid data in current page
     allocate a new page
     print again
   }
 }

And adjust seq_read()/seq_lseek accordingly.

Of course then you can't print anything > PAGE_SIZE,
but that seems unlikely anyway, and if really needed
it could try to allocate something larger than a page
when needed.

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915: Implement workaround for broken CS tlb on i830/845
  2012-12-17 14:46     ` Ville Syrjälä
@ 2012-12-17 15:12       ` Daniel Vetter
  2012-12-17 15:39         ` Ville Syrjälä
  0 siblings, 1 reply; 10+ messages in thread
From: Daniel Vetter @ 2012-12-17 15:12 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: Daniel Vetter, intel-gfx

On Mon, Dec 17, 2012 at 04:46:17PM +0200, Ville Syrjälä wrote:
> On Mon, Dec 17, 2012 at 02:43:03PM +0100, Daniel Vetter wrote:
> > On Mon, Dec 17, 2012 at 01:33:01PM +0000, Chris Wilson wrote:
> > > On Sun, 16 Dec 2012 22:46:00 +0000, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > > @@ -1087,6 +1087,9 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
> > > >  	if (!ring->get_seqno)
> > > >  		return NULL;
> > > >  
> > > > +	if (HAS_BROKEN_CS_TLB(dev_priv->dev))
> > > > +		return i915_error_object_create(dev_priv, ring->private);
> > > 
> > > Hmm, this is complicated by userspace opting out of the CS w/a, and
> > > imposes quite a burden upon our simple seq interface.
> > 
> > Right, I've written this without userspace being able to opt out in mind
> > ... for the seq -ENOMEM, I guess it's just time to bite the bullet. Loads
> > of the error_states for the ilk fallout couldn't be dumped (but could be
> > captured) by bug reporters already :(
> 
> Assuming you're talking about debugfs error_state returning -ENOMEM due
> to seq_file's massive kmalloc(), I had a couple of ideas for fixing it
> in seq_file itself.
> 
> 1) just use vmalloc()
> 
> 2) use multiple pages instead of one big allocation
> 
>  seq_printf() {
>    try to print the line
>    if not enough space {
>      mark the end of valid data in current page
>      allocate a new page
>      print again
>    }
>  }
> 
> And adjust seq_read()/seq_lseek accordingly.
> 
> Of course then you can't print anything > PAGE_SIZE,
> but that seems unlikely anyway, and if really needed
> it could try to allocate something larger than a page
> when needed.

Afaik that's pretty much what the real seq_file interface does, we just
need to port the error state dump code over from the simple_seq_file to
the real one. But since seq_file is designed for more regular stuff, this
will be a slight pain, so I've tried to avoid this ... It sounds like I'm
running low on excuses ;-)
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915: Implement workaround for broken CS tlb on i830/845
  2012-12-17 15:12       ` Daniel Vetter
@ 2012-12-17 15:39         ` Ville Syrjälä
  0 siblings, 0 replies; 10+ messages in thread
From: Ville Syrjälä @ 2012-12-17 15:39 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Daniel Vetter, intel-gfx

On Mon, Dec 17, 2012 at 04:12:18PM +0100, Daniel Vetter wrote:
> On Mon, Dec 17, 2012 at 04:46:17PM +0200, Ville Syrjälä wrote:
> > On Mon, Dec 17, 2012 at 02:43:03PM +0100, Daniel Vetter wrote:
> > > On Mon, Dec 17, 2012 at 01:33:01PM +0000, Chris Wilson wrote:
> > > > On Sun, 16 Dec 2012 22:46:00 +0000, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > > > @@ -1087,6 +1087,9 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
> > > > >  	if (!ring->get_seqno)
> > > > >  		return NULL;
> > > > >  
> > > > > +	if (HAS_BROKEN_CS_TLB(dev_priv->dev))
> > > > > +		return i915_error_object_create(dev_priv, ring->private);
> > > > 
> > > > Hmm, this is complicated by userspace opting out of the CS w/a, and
> > > > imposes quite a burden upon our simple seq interface.
> > > 
> > > Right, I've written this without userspace being able to opt out in mind
> > > ... for the seq -ENOMEM, I guess it's just time to bite the bullet. Loads
> > > of the error_states for the ilk fallout couldn't be dumped (but could be
> > > captured) by bug reporters already :(
> > 
> > Assuming you're talking about debugfs error_state returning -ENOMEM due
> > to seq_file's massive kmalloc(), I had a couple of ideas for fixing it
> > in seq_file itself.
> > 
> > 1) just use vmalloc()
> > 
> > 2) use multiple pages instead of one big allocation
> > 
> >  seq_printf() {
> >    try to print the line
> >    if not enough space {
> >      mark the end of valid data in current page
> >      allocate a new page
> >      print again
> >    }
> >  }
> > 
> > And adjust seq_read()/seq_lseek accordingly.
> > 
> > Of course then you can't print anything > PAGE_SIZE,
> > but that seems unlikely anyway, and if really needed
> > it could try to allocate something larger than a page
> > when needed.
> 
> Afaik that's pretty much what the real seq_file interface does,

AFAICS it just starts w/ a buffer size of one page, and it just
retries in a loop calling ->show() and doubling the buffer size
every time seq_printf() indicated an overflow. So it wants to
print the whole thing into a single contiguous buffer.

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915: Implement workaround for broken CS tlb on i830/845
  2012-12-17 13:33 ` Chris Wilson
  2012-12-17 13:43   ` Daniel Vetter
@ 2012-12-17 15:18   ` Chris Wilson
  2012-12-17 15:23     ` Chris Wilson
  1 sibling, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2012-12-17 15:18 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

On Mon, 17 Dec 2012 13:33:01 +0000, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Sun, 16 Dec 2012 22:46:00 +0000, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > @@ -1087,6 +1087,9 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
> >  	if (!ring->get_seqno)
> >  		return NULL;
> >  
> > +	if (HAS_BROKEN_CS_TLB(dev_priv->dev))
> > +		return i915_error_object_create(dev_priv, ring->private);
> 
> Hmm, this is complicated by userspace opting out of the CS w/a, and
> imposes quite a burden upon our simple seq interface.

One possible solution is:

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 6ebdf1e..0e3bd04 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1087,8 +1087,14 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev
        if (!ring->get_seqno)
                return NULL;
 
-       if (HAS_BROKEN_CS_TLB(dev_priv->dev))
-               return i915_error_object_create(dev_priv, ring->private);
+       if (HAS_BROKEN_CS_TLB(dev_priv->dev)) {
+               u32 acthd = I915_READ(ACTHD);
+
+               obj = ring->private;
+               if (acthd >= obj->gtt_offset &&
+                   acthd <= obj->gtt_offset + obj->base.size)
+                       return i915_error_object_create(dev_priv, obj);
+       }
 
        seqno = ring->get_seqno(ring, false);
        list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915: Implement workaround for broken CS tlb on i830/845
  2012-12-17 15:18   ` Chris Wilson
@ 2012-12-17 15:23     ` Chris Wilson
  2012-12-17 15:30       ` Daniel Vetter
  0 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2012-12-17 15:23 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

On Mon, 17 Dec 2012 15:18:41 +0000, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 6ebdf1e..0e3bd04 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1087,8 +1087,14 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev
>         if (!ring->get_seqno)
>                 return NULL;
>  
> -       if (HAS_BROKEN_CS_TLB(dev_priv->dev))
> -               return i915_error_object_create(dev_priv, ring->private);
> +       if (HAS_BROKEN_CS_TLB(dev_priv->dev)) {
> +               u32 acthd = I915_READ(ACTHD);

All the implicit knowledge here is making me worried:

if (WARN_ON(ring->id != RCS)) return NULL;

> +
> +               obj = ring->private;
> +               if (acthd >= obj->gtt_offset &&
> +                   acthd <= obj->gtt_offset + obj->base.size)
> +                       return i915_error_object_create(dev_priv, obj);
> +       }

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] drm/i915: Implement workaround for broken CS tlb on i830/845
  2012-12-17 15:23     ` Chris Wilson
@ 2012-12-17 15:30       ` Daniel Vetter
  0 siblings, 0 replies; 10+ messages in thread
From: Daniel Vetter @ 2012-12-17 15:30 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Daniel Vetter, intel-gfx

On Mon, Dec 17, 2012 at 03:23:22PM +0000, Chris Wilson wrote:
> On Mon, 17 Dec 2012 15:18:41 +0000, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index 6ebdf1e..0e3bd04 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -1087,8 +1087,14 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev
> >         if (!ring->get_seqno)
> >                 return NULL;
> >  
> > -       if (HAS_BROKEN_CS_TLB(dev_priv->dev))
> > -               return i915_error_object_create(dev_priv, ring->private);
> > +       if (HAS_BROKEN_CS_TLB(dev_priv->dev)) {
> > +               u32 acthd = I915_READ(ACTHD);
> 
> All the implicit knowledge here is making me worried:
> 
> if (WARN_ON(ring->id != RCS)) return NULL;

Applied both fixups and merged to -fixes, with the little s/<=/</ bikeshed
applied - the potential off-by-one confuses my parser. Let's see how it
holds up.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH] drm/i915: Implement workaround for broken CS tlb on i830/845
@ 2012-12-16 17:08 Daniel Vetter
  0 siblings, 0 replies; 10+ messages in thread
From: Daniel Vetter @ 2012-12-16 17:08 UTC (permalink / raw)
  To: Intel Graphics Development; +Cc: Daniel Vetter

Now that Chris Wilson demonstrated that the key for stability on ealry
gen 2 is to simple _never_ exchange the physical backing storage of
batch buffers I've tried a stab at a kernel solution. Doesn't look too
nefarious imho, now that I don't try to be too clever for my own good
any more.

Open issue is how to coordinate the workaround between the kernel and
SNA, since that just pins a set of batches for the same effect:
a) Add a flag to the kernel, disable the w/a in SNA when it's set.
b) Check in the kernel if the batch bo is pinned, if so userspace has
the w/a already and we don't need to copy things again - a bit ugly to
wire up with the current approach, since the function implement the
batch w/a is far away from where we can detect whether the batch is
pinned by userspace already.
c) Don't care, blitting batches isn't too expensive.

Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h         |  3 ++
 drivers/gpu/drm/i915/i915_irq.c         |  6 ++++
 drivers/gpu/drm/i915/intel_ringbuffer.c | 58 ++++++++++++++++++++++++++++++---
 3 files changed, 63 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 10ebd51..3d2a78b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1233,6 +1233,9 @@ struct drm_i915_file_private {
 #define HAS_OVERLAY(dev)		(INTEL_INFO(dev)->has_overlay)
 #define OVERLAY_NEEDS_PHYSICAL(dev)	(INTEL_INFO(dev)->overlay_needs_physical)
 
+/* Early gen2 have a totally busted CS tlb and require pinned batches. */
+#define HAS_BROKEN_CS_TLB(dev)		(IS_I830(dev) || IS_845G(dev))
+
 /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte
  * rows, which changed the alignment requirements and fence programming.
  */
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 1d40f77..7d4ea0b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1119,6 +1119,12 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
 	if (!ring->get_seqno)
 		return NULL;
 
+	if (HAS_BROKEN_CS_TLB(dev_priv->dev)) {
+		obj = ring->private;
+
+		return i915_error_object_create(dev_priv, obj);
+	}
+
 	seqno = ring->get_seqno(ring, false);
 	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
 		if (obj->ring != ring)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index fdc6334..fb7c7b5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -547,9 +547,17 @@ static int init_render_ring(struct intel_ring_buffer *ring)
 
 static void render_ring_cleanup(struct intel_ring_buffer *ring)
 {
+	struct drm_device *dev = ring->dev;
+
 	if (!ring->private)
 		return;
 
+	if (HAS_BROKEN_CS_TLB(dev)) {
+		struct drm_i915_gem_object *obj = ring->private;
+
+		drm_gem_object_unreference(&obj->base);
+	}
+
 	cleanup_pipe_control(ring);
 }
 
@@ -985,20 +993,41 @@ i965_dispatch_execbuffer(struct intel_ring_buffer *ring,
 	return 0;
 }
 
+/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
+#define I830_BATCH_LIMIT (256*1024)
 static int
 i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
 				u32 offset, u32 len,
 				unsigned flags)
 {
+	struct drm_i915_gem_object *obj = ring->private;
+	u32 cs_offset = obj->gtt_offset;
 	int ret;
 
-	ret = intel_ring_begin(ring, 4);
+	if (len > I830_BATCH_LIMIT)
+		return -ENOSPC;
+
+	ret = intel_ring_begin(ring, 8+4);
 	if (ret)
 		return ret;
+	/* Blit the batch (which has now all relocs applied) to the stable batch
+	 * scratch bo area (so that the CS never stumbles over its tlb
+	 * invalidation bug) ... */
+	intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD |
+			      XY_SRC_COPY_BLT_WRITE_ALPHA |
+			      XY_SRC_COPY_BLT_WRITE_RGB);
+	intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096);
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024);
+	intel_ring_emit(ring, cs_offset);
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, 4096);
+	intel_ring_emit(ring, offset);
 
+	/* ... and execute it. */
 	intel_ring_emit(ring, MI_BATCH_BUFFER);
-	intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
-	intel_ring_emit(ring, offset + len - 8);
+	intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
+	intel_ring_emit(ring, cs_offset + len - 8);
 	intel_ring_emit(ring, 0);
 	intel_ring_advance(ring);
 
@@ -1168,7 +1197,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 	 * of the buffer.
 	 */
 	ring->effective_size = ring->size;
-	if (IS_I830(ring->dev) || IS_845G(ring->dev))
+	if (HAS_BROKEN_CS_TLB(ring->dev))
 		ring->effective_size -= 128;
 
 	return 0;
@@ -1645,6 +1674,27 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 	ring->init = init_render_ring;
 	ring->cleanup = render_ring_cleanup;
 
+	/* Workaround batchbuffer to combat CS tlb bug. */
+	if (HAS_BROKEN_CS_TLB(dev)) {
+		struct drm_i915_gem_object *obj;
+		int ret;
+
+		obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT);
+		if (obj == NULL) {
+			DRM_ERROR("Failed to allocate batch bo\n");
+			return -ENOMEM;
+		}
+
+		ret = i915_gem_object_pin(obj, 4096, true, false);
+		if (ret != 0) {
+			drm_gem_object_unreference(&obj->base);
+			DRM_ERROR("Failed to ping batch bo\n");
+			return ret;
+		}
+
+		ring->private = obj;
+	}
+
 	return intel_init_ring_buffer(dev, ring);
 }
 
-- 
1.7.11.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2012-12-17 15:39 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-12-16 22:46 [PATCH] drm/i915: Implement workaround for broken CS tlb on i830/845 Chris Wilson
2012-12-17 13:33 ` Chris Wilson
2012-12-17 13:43   ` Daniel Vetter
2012-12-17 14:46     ` Ville Syrjälä
2012-12-17 15:12       ` Daniel Vetter
2012-12-17 15:39         ` Ville Syrjälä
2012-12-17 15:18   ` Chris Wilson
2012-12-17 15:23     ` Chris Wilson
2012-12-17 15:30       ` Daniel Vetter
  -- strict thread matches above, loose matches on Subject: below --
2012-12-16 17:08 Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.