All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] i915: Remove implied length of 2 from GFX_OP_PIPE_CONTROL #define.
@ 2011-10-04  6:02 Kenneth Graunke
  2011-10-04  6:02 ` [PATCH 2/3] i915: Rename PIPE_CONTROL bit defines to be less terse Kenneth Graunke
                   ` (3 more replies)
  0 siblings, 4 replies; 22+ messages in thread
From: Kenneth Graunke @ 2011-10-04  6:02 UTC (permalink / raw)
  To: intel-gfx

Not all PIPE_CONTROLs have a length of 2, so remove it from the #define
and make each invocation specify the desired length.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 drivers/gpu/drm/i915/i915_reg.h         |    2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |    4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 138eae1..d691781 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -242,7 +242,7 @@
 #define   ASYNC_FLIP                (1<<22)
 #define   DISPLAY_PLANE_A           (0<<20)
 #define   DISPLAY_PLANE_B           (1<<20)
-#define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24)|2)
+#define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24))
 #define   PIPE_CONTROL_QW_WRITE	(1<<14)
 #define   PIPE_CONTROL_DEPTH_STALL (1<<13)
 #define   PIPE_CONTROL_WC_FLUSH	(1<<12)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 0e99589..67ce601 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -467,7 +467,7 @@ pc_render_add_request(struct intel_ring_buffer *ring,
 	if (ret)
 		return ret;
 
-	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE |
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | 2 | PIPE_CONTROL_QW_WRITE |
 			PIPE_CONTROL_WC_FLUSH | PIPE_CONTROL_TC_FLUSH);
 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
 	intel_ring_emit(ring, seqno);
@@ -483,7 +483,7 @@ pc_render_add_request(struct intel_ring_buffer *ring,
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
 	scratch_addr += 128;
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
-	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE |
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | 2 | PIPE_CONTROL_QW_WRITE |
 			PIPE_CONTROL_WC_FLUSH | PIPE_CONTROL_TC_FLUSH |
 			PIPE_CONTROL_NOTIFY);
 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
-- 
1.7.6.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/3] i915: Rename PIPE_CONTROL bit defines to be less terse.
  2011-10-04  6:02 [PATCH 1/3] i915: Remove implied length of 2 from GFX_OP_PIPE_CONTROL #define Kenneth Graunke
@ 2011-10-04  6:02 ` Kenneth Graunke
  2011-10-04  8:30   ` Daniel Vetter
  2011-10-05 22:29   ` Ben Widawsky
  2011-10-04  6:02 ` [PATCH 3/3] drm/i915: Use PIPE_CONTROL for flushing on gen6+ Kenneth Graunke
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 22+ messages in thread
From: Kenneth Graunke @ 2011-10-04  6:02 UTC (permalink / raw)
  To: intel-gfx

"STALL_AT_SCOREBOARD" is much clearer than "STALL_EN" now that there are
several different kinds of stalls.  Also, "INSTRUCTION_CACHE_FLUSH" is a
lot easier to understand at a glance than the terse "IS_FLUSH."

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 drivers/gpu/drm/i915/i915_reg.h         |   16 ++++++++--------
 drivers/gpu/drm/i915/intel_ringbuffer.c |    6 ++++--
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index d691781..bfe8488 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -243,15 +243,15 @@
 #define   DISPLAY_PLANE_A           (0<<20)
 #define   DISPLAY_PLANE_B           (1<<20)
 #define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24))
-#define   PIPE_CONTROL_QW_WRITE	(1<<14)
-#define   PIPE_CONTROL_DEPTH_STALL (1<<13)
-#define   PIPE_CONTROL_WC_FLUSH	(1<<12)
-#define   PIPE_CONTROL_IS_FLUSH	(1<<11) /* MBZ on Ironlake */
-#define   PIPE_CONTROL_TC_FLUSH (1<<10) /* GM45+ only */
-#define   PIPE_CONTROL_ISP_DIS	(1<<9)
-#define   PIPE_CONTROL_NOTIFY	(1<<8)
+#define   PIPE_CONTROL_QW_WRITE			(1<<14)
+#define   PIPE_CONTROL_DEPTH_STALL		(1<<13)
+#define   PIPE_CONTROL_WRITE_FLUSH		(1<<12)
+#define   PIPE_CONTROL_INSTRUCTION_CACHE_FLUSH	(1<<11) /* MBZ on Ironlake */
+#define   PIPE_CONTROL_TEXTURE_CACHE_FLUSH	(1<<10) /* GM45+ only */
+#define   PIPE_CONTROL_INDIRECT_STATE_DISABLE	(1<<9)
+#define   PIPE_CONTROL_NOTIFY			(1<<8)
+#define   PIPE_CONTROL_STALL_AT_SCOREBOARD	(1<<1)
 #define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
-#define   PIPE_CONTROL_STALL_EN	(1<<1) /* in addr word, Ironlake+ only */
 
 
 /*
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 67ce601..2b572fd 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -468,7 +468,8 @@ pc_render_add_request(struct intel_ring_buffer *ring,
 		return ret;
 
 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | 2 | PIPE_CONTROL_QW_WRITE |
-			PIPE_CONTROL_WC_FLUSH | PIPE_CONTROL_TC_FLUSH);
+			PIPE_CONTROL_WRITE_FLUSH |
+			PIPE_CONTROL_TEXTURE_CACHE_FLUSH);
 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
 	intel_ring_emit(ring, seqno);
 	intel_ring_emit(ring, 0);
@@ -484,7 +485,8 @@ pc_render_add_request(struct intel_ring_buffer *ring,
 	scratch_addr += 128;
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | 2 | PIPE_CONTROL_QW_WRITE |
-			PIPE_CONTROL_WC_FLUSH | PIPE_CONTROL_TC_FLUSH |
+			PIPE_CONTROL_WRITE_FLUSH |
+			PIPE_CONTROL_TEXTURE_CACHE_FLUSH |
 			PIPE_CONTROL_NOTIFY);
 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
 	intel_ring_emit(ring, seqno);
-- 
1.7.6.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 3/3] drm/i915: Use PIPE_CONTROL for flushing on gen6+.
  2011-10-04  6:02 [PATCH 1/3] i915: Remove implied length of 2 from GFX_OP_PIPE_CONTROL #define Kenneth Graunke
  2011-10-04  6:02 ` [PATCH 2/3] i915: Rename PIPE_CONTROL bit defines to be less terse Kenneth Graunke
@ 2011-10-04  6:02 ` Kenneth Graunke
  2011-10-05 22:57   ` Ben Widawsky
  2011-10-05 23:39   ` Ben Widawsky
  2011-10-05 20:35 ` [PATCH 1/3] i915: Remove implied length of 2 from GFX_OP_PIPE_CONTROL #define Ben Widawsky
  2011-10-11 11:09 ` Daniel Vetter
  3 siblings, 2 replies; 22+ messages in thread
From: Kenneth Graunke @ 2011-10-04  6:02 UTC (permalink / raw)
  To: intel-gfx

From: Jesse Barnes <jbarnes@virtuousgeek.org>

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
---
 drivers/gpu/drm/i915/i915_reg.h         |    5 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |  136 ++++++++++++++++++++++++++++---
 2 files changed, 129 insertions(+), 12 deletions(-)

v2:
 - Add State & Constant Cache bits as suggested by Daniel.
 - Specify length directly rather than hiding it in a GEN6 #define.
 - Use more verbose bit field names, for clarity.
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index bfe8488..81713ae 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -243,6 +243,7 @@
 #define   DISPLAY_PLANE_A           (0<<20)
 #define   DISPLAY_PLANE_B           (1<<20)
 #define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24))
+#define   PIPE_CONTROL_CS_STALL			(1<<20)
 #define   PIPE_CONTROL_QW_WRITE			(1<<14)
 #define   PIPE_CONTROL_DEPTH_STALL		(1<<13)
 #define   PIPE_CONTROL_WRITE_FLUSH		(1<<12)
@@ -250,7 +251,11 @@
 #define   PIPE_CONTROL_TEXTURE_CACHE_FLUSH	(1<<10) /* GM45+ only */
 #define   PIPE_CONTROL_INDIRECT_STATE_DISABLE	(1<<9)
 #define   PIPE_CONTROL_NOTIFY			(1<<8)
+#define   PIPE_CONTROL_VF_CACHE_INVALIDATE	(1<<4)
+#define   PIPE_CONTROL_CONST_CACHE_INVALIDATE	(1<<3)
+#define   PIPE_CONTROL_STATE_CACHE_INVALIDATE	(1<<2)
 #define   PIPE_CONTROL_STALL_AT_SCOREBOARD	(1<<1)
+#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH	(1<<0)
 #define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
 
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 2b572fd..f841d5c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -34,6 +34,16 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
+/*
+ * 965+ support PIPE_CONTROL commands, which provide finer grained control
+ * over cache flushing.
+ */
+struct pipe_control {
+	struct drm_i915_gem_object *obj;
+	volatile u32 *cpu_page;
+	u32 gtt_offset;
+};
+
 static inline int ring_space(struct intel_ring_buffer *ring)
 {
 	int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
@@ -123,6 +133,118 @@ render_ring_flush(struct intel_ring_buffer *ring,
 	return 0;
 }
 
+/**
+ * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
+ * implementing two workarounds on gen6.  From section 1.4.7.1
+ * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
+ *
+ * [DevSNB-C+{W/A}] Before any depth stall flush (including those
+ * produced by non-pipelined state commands), software needs to first
+ * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
+ * 0.
+ *
+ * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
+ * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
+ *
+ * And the workaround for these two requires this workaround first:
+ *
+ * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * BEFORE the pipe-control with a post-sync op and no write-cache
+ * flushes.
+ *
+ * And this last workaround is tricky because of the requirements on
+ * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
+ * volume 2 part 1:
+ *
+ *     "1 of the following must also be set:
+ *      - Render Target Cache Flush Enable ([12] of DW1)
+ *      - Depth Cache Flush Enable ([0] of DW1)
+ *      - Stall at Pixel Scoreboard ([1] of DW1)
+ *      - Depth Stall ([13] of DW1)
+ *      - Post-Sync Operation ([13] of DW1)
+ *      - Notify Enable ([8] of DW1)"
+ *
+ * The cache flushes require the workaround flush that triggered this
+ * one, so we can't use it.  Depth stall would trigger the same.
+ * Post-sync nonzero is what triggered this second workaround, so we
+ * can't use that one either.  Notify enable is IRQs, which aren't
+ * really our business.  That leaves only stall at scoreboard.
+ */
+static int
+intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
+{
+	struct pipe_control *pc = ring->private;
+	u32 scratch_addr = pc->gtt_offset + 128;
+	int ret;
+
+
+	ret = intel_ring_begin(ring, 6);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | 3);
+	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
+			PIPE_CONTROL_STALL_AT_SCOREBOARD);
+	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
+	intel_ring_emit(ring, 0); /* low dword */
+	intel_ring_emit(ring, 0); /* high dword */
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	ret = intel_ring_begin(ring, 6);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | 3);
+	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
+	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	return 0;
+}
+
+static int
+gen6_render_ring_flush(struct intel_ring_buffer *ring,
+                         u32 invalidate_domains, u32 flush_domains)
+{
+	u32 flags = 0;
+	struct pipe_control *pc = ring->private;
+	u32 scratch_addr = pc->gtt_offset + 128;
+	int ret;
+
+	/* Force SNB workarounds for PIPE_CONTROL flushes */
+	intel_emit_post_sync_nonzero_flush(ring);
+
+	/* Just flush everything.  Experiments have shown that reducing the
+	 * number of bits based on the write domains has little performance
+	 * impact.
+	 */
+	flags |= PIPE_CONTROL_WRITE_FLUSH;
+	flags |= PIPE_CONTROL_INSTRUCTION_CACHE_FLUSH;
+	flags |= PIPE_CONTROL_TEXTURE_CACHE_FLUSH;
+	flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+	flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+	flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+	flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+
+	ret = intel_ring_begin(ring, 6);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | 3);
+	intel_ring_emit(ring, flags);
+	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
+	intel_ring_emit(ring, 0); /* lower dword */
+	intel_ring_emit(ring, 0); /* uppwer dword */
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	return 0;
+}
+
 static void ring_write_tail(struct intel_ring_buffer *ring,
 			    u32 value)
 {
@@ -206,16 +328,6 @@ static int init_ring_common(struct intel_ring_buffer *ring)
 	return 0;
 }
 
-/*
- * 965+ support PIPE_CONTROL commands, which provide finer grained control
- * over cache flushing.
- */
-struct pipe_control {
-	struct drm_i915_gem_object *obj;
-	volatile u32 *cpu_page;
-	u32 gtt_offset;
-};
-
 static int
 init_pipe_control(struct intel_ring_buffer *ring)
 {
@@ -296,8 +408,7 @@ static int init_render_ring(struct intel_ring_buffer *ring)
 				   GFX_MODE_ENABLE(GFX_REPLAY_MODE));
 	}
 
-	if (INTEL_INFO(dev)->gen >= 6) {
-	} else if (IS_GEN5(dev)) {
+	if (INTEL_INFO(dev)->gen >= 5) {
 		ret = init_pipe_control(ring);
 		if (ret)
 			return ret;
@@ -1360,6 +1471,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 	*ring = render_ring;
 	if (INTEL_INFO(dev)->gen >= 6) {
 		ring->add_request = gen6_add_request;
+		ring->flush = gen6_render_ring_flush;
 		ring->irq_get = gen6_render_ring_get_irq;
 		ring->irq_put = gen6_render_ring_put_irq;
 	} else if (IS_GEN5(dev)) {
-- 
1.7.6.1

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/3] i915: Rename PIPE_CONTROL bit defines to be less terse.
  2011-10-04  6:02 ` [PATCH 2/3] i915: Rename PIPE_CONTROL bit defines to be less terse Kenneth Graunke
@ 2011-10-04  8:30   ` Daniel Vetter
  2011-10-05  5:13     ` Kenneth Graunke
  2011-10-05 22:29   ` Ben Widawsky
  1 sibling, 1 reply; 22+ messages in thread
From: Daniel Vetter @ 2011-10-04  8:30 UTC (permalink / raw)
  To: Kenneth Graunke; +Cc: intel-gfx

On Mon, Oct 03, 2011 at 11:02:39PM -0700, Kenneth Graunke wrote:
> "STALL_AT_SCOREBOARD" is much clearer than "STALL_EN" now that there are
> several different kinds of stalls.  Also, "INSTRUCTION_CACHE_FLUSH" is a
> lot easier to understand at a glance than the terse "IS_FLUSH."
> 
> Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
> ---
>  drivers/gpu/drm/i915/i915_reg.h         |   16 ++++++++--------
>  drivers/gpu/drm/i915/intel_ringbuffer.c |    6 ++++--
>  2 files changed, 12 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index d691781..bfe8488 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -243,15 +243,15 @@
>  #define   DISPLAY_PLANE_A           (0<<20)
>  #define   DISPLAY_PLANE_B           (1<<20)
>  #define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24))
> -#define   PIPE_CONTROL_QW_WRITE	(1<<14)
> -#define   PIPE_CONTROL_DEPTH_STALL (1<<13)
> -#define   PIPE_CONTROL_WC_FLUSH	(1<<12)
> -#define   PIPE_CONTROL_IS_FLUSH	(1<<11) /* MBZ on Ironlake */
> -#define   PIPE_CONTROL_TC_FLUSH (1<<10) /* GM45+ only */
> -#define   PIPE_CONTROL_ISP_DIS	(1<<9)
> -#define   PIPE_CONTROL_NOTIFY	(1<<8)
> +#define   PIPE_CONTROL_QW_WRITE			(1<<14)
> +#define   PIPE_CONTROL_DEPTH_STALL		(1<<13)
> +#define   PIPE_CONTROL_WRITE_FLUSH		(1<<12)
> +#define   PIPE_CONTROL_INSTRUCTION_CACHE_FLUSH	(1<<11) /* MBZ on Ironlake */
> +#define   PIPE_CONTROL_TEXTURE_CACHE_FLUSH	(1<<10) /* GM45+ only */

Minor bikeshed: You retain the _FLUSH for the read-only
instruction/texture caches, but use _INVALIDATE for the new bits for
read-only caches. I think we want _INVALIDATE for all of them.

Otherwise, these three patches are:
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> +#define   PIPE_CONTROL_INDIRECT_STATE_DISABLE	(1<<9)
> +#define   PIPE_CONTROL_NOTIFY			(1<<8)
> +#define   PIPE_CONTROL_STALL_AT_SCOREBOARD	(1<<1)
>  #define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
> -#define   PIPE_CONTROL_STALL_EN	(1<<1) /* in addr word, Ironlake+ only */
>  
>  
>  /*
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 67ce601..2b572fd 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -468,7 +468,8 @@ pc_render_add_request(struct intel_ring_buffer *ring,
>  		return ret;
>  
>  	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | 2 | PIPE_CONTROL_QW_WRITE |
> -			PIPE_CONTROL_WC_FLUSH | PIPE_CONTROL_TC_FLUSH);
> +			PIPE_CONTROL_WRITE_FLUSH |
> +			PIPE_CONTROL_TEXTURE_CACHE_FLUSH);
>  	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
>  	intel_ring_emit(ring, seqno);
>  	intel_ring_emit(ring, 0);
> @@ -484,7 +485,8 @@ pc_render_add_request(struct intel_ring_buffer *ring,
>  	scratch_addr += 128;
>  	PIPE_CONTROL_FLUSH(ring, scratch_addr);
>  	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | 2 | PIPE_CONTROL_QW_WRITE |
> -			PIPE_CONTROL_WC_FLUSH | PIPE_CONTROL_TC_FLUSH |
> +			PIPE_CONTROL_WRITE_FLUSH |
> +			PIPE_CONTROL_TEXTURE_CACHE_FLUSH |
>  			PIPE_CONTROL_NOTIFY);
>  	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
>  	intel_ring_emit(ring, seqno);
> -- 
> 1.7.6.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/3] i915: Rename PIPE_CONTROL bit defines to be less terse.
  2011-10-04  8:30   ` Daniel Vetter
@ 2011-10-05  5:13     ` Kenneth Graunke
  2011-10-05 10:05       ` Daniel Vetter
  0 siblings, 1 reply; 22+ messages in thread
From: Kenneth Graunke @ 2011-10-05  5:13 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On 10/04/2011 01:30 AM, Daniel Vetter wrote:
> On Mon, Oct 03, 2011 at 11:02:39PM -0700, Kenneth Graunke wrote:
>> "STALL_AT_SCOREBOARD" is much clearer than "STALL_EN" now that there are
>> several different kinds of stalls.  Also, "INSTRUCTION_CACHE_FLUSH" is a
>> lot easier to understand at a glance than the terse "IS_FLUSH."
>>
>> Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
>> ---
>>  drivers/gpu/drm/i915/i915_reg.h         |   16 ++++++++--------
>>  drivers/gpu/drm/i915/intel_ringbuffer.c |    6 ++++--
>>  2 files changed, 12 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index d691781..bfe8488 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -243,15 +243,15 @@
>>  #define   DISPLAY_PLANE_A           (0<<20)
>>  #define   DISPLAY_PLANE_B           (1<<20)
>>  #define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24))
>> -#define   PIPE_CONTROL_QW_WRITE	(1<<14)
>> -#define   PIPE_CONTROL_DEPTH_STALL (1<<13)
>> -#define   PIPE_CONTROL_WC_FLUSH	(1<<12)
>> -#define   PIPE_CONTROL_IS_FLUSH	(1<<11) /* MBZ on Ironlake */
>> -#define   PIPE_CONTROL_TC_FLUSH (1<<10) /* GM45+ only */
>> -#define   PIPE_CONTROL_ISP_DIS	(1<<9)
>> -#define   PIPE_CONTROL_NOTIFY	(1<<8)
>> +#define   PIPE_CONTROL_QW_WRITE			(1<<14)
>> +#define   PIPE_CONTROL_DEPTH_STALL		(1<<13)
>> +#define   PIPE_CONTROL_WRITE_FLUSH		(1<<12)
>> +#define   PIPE_CONTROL_INSTRUCTION_CACHE_FLUSH	(1<<11) /* MBZ on Ironlake */
>> +#define   PIPE_CONTROL_TEXTURE_CACHE_FLUSH	(1<<10) /* GM45+ only */
> 
> Minor bikeshed: You retain the _FLUSH for the read-only
> instruction/texture caches, but use _INVALIDATE for the new bits for
> read-only caches. I think we want _INVALIDATE for all of them.
> 
> Otherwise, these three patches are:
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Yeah.  The docs for pre-SNB use "Flush" in all the field names, while
the docs for SNB+ switch to "Invalidate".  So I just used it for the new
ones.  If we'd rather go with "invalidate" for the old bits too, I can
do that.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/3] i915: Rename PIPE_CONTROL bit defines to be less terse.
  2011-10-05  5:13     ` Kenneth Graunke
@ 2011-10-05 10:05       ` Daniel Vetter
  0 siblings, 0 replies; 22+ messages in thread
From: Daniel Vetter @ 2011-10-05 10:05 UTC (permalink / raw)
  To: Kenneth Graunke; +Cc: intel-gfx

On Tue, Oct 04, 2011 at 10:13:47PM -0700, Kenneth Graunke wrote:
> On 10/04/2011 01:30 AM, Daniel Vetter wrote:
> > On Mon, Oct 03, 2011 at 11:02:39PM -0700, Kenneth Graunke wrote:
> >> "STALL_AT_SCOREBOARD" is much clearer than "STALL_EN" now that there are
> >> several different kinds of stalls.  Also, "INSTRUCTION_CACHE_FLUSH" is a
> >> lot easier to understand at a glance than the terse "IS_FLUSH."
> >>
> >> Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
> >> ---
> >>  drivers/gpu/drm/i915/i915_reg.h         |   16 ++++++++--------
> >>  drivers/gpu/drm/i915/intel_ringbuffer.c |    6 ++++--
> >>  2 files changed, 12 insertions(+), 10 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> >> index d691781..bfe8488 100644
> >> --- a/drivers/gpu/drm/i915/i915_reg.h
> >> +++ b/drivers/gpu/drm/i915/i915_reg.h
> >> @@ -243,15 +243,15 @@
> >>  #define   DISPLAY_PLANE_A           (0<<20)
> >>  #define   DISPLAY_PLANE_B           (1<<20)
> >>  #define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24))
> >> -#define   PIPE_CONTROL_QW_WRITE	(1<<14)
> >> -#define   PIPE_CONTROL_DEPTH_STALL (1<<13)
> >> -#define   PIPE_CONTROL_WC_FLUSH	(1<<12)
> >> -#define   PIPE_CONTROL_IS_FLUSH	(1<<11) /* MBZ on Ironlake */
> >> -#define   PIPE_CONTROL_TC_FLUSH (1<<10) /* GM45+ only */
> >> -#define   PIPE_CONTROL_ISP_DIS	(1<<9)
> >> -#define   PIPE_CONTROL_NOTIFY	(1<<8)
> >> +#define   PIPE_CONTROL_QW_WRITE			(1<<14)
> >> +#define   PIPE_CONTROL_DEPTH_STALL		(1<<13)
> >> +#define   PIPE_CONTROL_WRITE_FLUSH		(1<<12)
> >> +#define   PIPE_CONTROL_INSTRUCTION_CACHE_FLUSH	(1<<11) /* MBZ on Ironlake */
> >> +#define   PIPE_CONTROL_TEXTURE_CACHE_FLUSH	(1<<10) /* GM45+ only */
> > 
> > Minor bikeshed: You retain the _FLUSH for the read-only
> > instruction/texture caches, but use _INVALIDATE for the new bits for
> > read-only caches. I think we want _INVALIDATE for all of them.
> > 
> > Otherwise, these three patches are:
> > Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> Yeah.  The docs for pre-SNB use "Flush" in all the field names, while
> the docs for SNB+ switch to "Invalidate".  So I just used it for the new
> ones.  If we'd rather go with "invalidate" for the old bits too, I can
> do that.

Invalidate agrees more with our own cache tracking code and how the legacy
MI_FLUSH is defined. So I vote for that because I like my bikesheds to be
painted consistenly ;-)
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] i915: Remove implied length of 2 from GFX_OP_PIPE_CONTROL #define.
  2011-10-04  6:02 [PATCH 1/3] i915: Remove implied length of 2 from GFX_OP_PIPE_CONTROL #define Kenneth Graunke
  2011-10-04  6:02 ` [PATCH 2/3] i915: Rename PIPE_CONTROL bit defines to be less terse Kenneth Graunke
  2011-10-04  6:02 ` [PATCH 3/3] drm/i915: Use PIPE_CONTROL for flushing on gen6+ Kenneth Graunke
@ 2011-10-05 20:35 ` Ben Widawsky
  2011-10-11 11:09 ` Daniel Vetter
  3 siblings, 0 replies; 22+ messages in thread
From: Ben Widawsky @ 2011-10-05 20:35 UTC (permalink / raw)
  To: Kenneth Graunke; +Cc: intel-gfx

On Mon, Oct 03, 2011 at 11:02:38PM -0700, Kenneth Graunke wrote:
> Not all PIPE_CONTROLs have a length of 2, so remove it from the #define
> and make each invocation specify the desired length.
> 
> Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
> ---
>  drivers/gpu/drm/i915/i915_reg.h         |    2 +-
>  drivers/gpu/drm/i915/intel_ringbuffer.c |    4 ++--
>  2 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 138eae1..d691781 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -242,7 +242,7 @@
>  #define   ASYNC_FLIP                (1<<22)
>  #define   DISPLAY_PLANE_A           (0<<20)
>  #define   DISPLAY_PLANE_B           (1<<20)
> -#define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24)|2)
> +#define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24))
>  #define   PIPE_CONTROL_QW_WRITE	(1<<14)
>  #define   PIPE_CONTROL_DEPTH_STALL (1<<13)
>  #define   PIPE_CONTROL_WC_FLUSH	(1<<12)

Can you do this like the other multilength commands we use?
Domething like:
#define GFX_OP_PIPE_CONTROL(n) ((0x3<<29)|(0x3<<27)|(0x2<<24) | n)

Ben

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 2/3] i915: Rename PIPE_CONTROL bit defines to be less terse.
  2011-10-04  6:02 ` [PATCH 2/3] i915: Rename PIPE_CONTROL bit defines to be less terse Kenneth Graunke
  2011-10-04  8:30   ` Daniel Vetter
@ 2011-10-05 22:29   ` Ben Widawsky
  1 sibling, 0 replies; 22+ messages in thread
From: Ben Widawsky @ 2011-10-05 22:29 UTC (permalink / raw)
  To: Kenneth Graunke; +Cc: intel-gfx

On Mon, Oct 03, 2011 at 11:02:39PM -0700, Kenneth Graunke wrote:
> "STALL_AT_SCOREBOARD" is much clearer than "STALL_EN" now that there are
> several different kinds of stalls.  Also, "INSTRUCTION_CACHE_FLUSH" is a
> lot easier to understand at a glance than the terse "IS_FLUSH."
> 
> Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
> ---
>  drivers/gpu/drm/i915/i915_reg.h         |   16 ++++++++--------
>  drivers/gpu/drm/i915/intel_ringbuffer.c |    6 ++++--
>  2 files changed, 12 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index d691781..bfe8488 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -243,15 +243,15 @@
>  #define   DISPLAY_PLANE_A           (0<<20)
>  #define   DISPLAY_PLANE_B           (1<<20)
>  #define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24))
> -#define   PIPE_CONTROL_QW_WRITE	(1<<14)
> -#define   PIPE_CONTROL_DEPTH_STALL (1<<13)
> -#define   PIPE_CONTROL_WC_FLUSH	(1<<12)
> -#define   PIPE_CONTROL_IS_FLUSH	(1<<11) /* MBZ on Ironlake */
> -#define   PIPE_CONTROL_TC_FLUSH (1<<10) /* GM45+ only */
> -#define   PIPE_CONTROL_ISP_DIS	(1<<9)
> -#define   PIPE_CONTROL_NOTIFY	(1<<8)
> +#define   PIPE_CONTROL_QW_WRITE			(1<<14)
> +#define   PIPE_CONTROL_DEPTH_STALL		(1<<13)
> +#define   PIPE_CONTROL_WRITE_FLUSH		(1<<12)
> +#define   PIPE_CONTROL_INSTRUCTION_CACHE_FLUSH	(1<<11) /* MBZ on Ironlake */
> +#define   PIPE_CONTROL_TEXTURE_CACHE_FLUSH	(1<<10) /* GM45+ only */
> +#define   PIPE_CONTROL_INDIRECT_STATE_DISABLE	(1<<9)
> +#define   PIPE_CONTROL_NOTIFY			(1<<8)
> +#define   PIPE_CONTROL_STALL_AT_SCOREBOARD	(1<<1)
>  #define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
> -#define   PIPE_CONTROL_STALL_EN	(1<<1) /* in addr word, Ironlake+ only */

What does "write flush" mean? At least for Gen6+:
#define   PIPE_CONTROL_RENDER_CACHE_FLUSH	(1<<12)

If this is invalid before Ironlake, let's change the comment, otherwise
ignore.
#define   PIPE_CONTROL_INSTRUCTION_CACHE_FLUSH	(1<<11) /* Gen6+ */

Ben

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use PIPE_CONTROL for flushing on gen6+.
  2011-10-04  6:02 ` [PATCH 3/3] drm/i915: Use PIPE_CONTROL for flushing on gen6+ Kenneth Graunke
@ 2011-10-05 22:57   ` Ben Widawsky
  2011-10-05 23:36     ` Chris Wilson
  2011-10-06  0:59     ` Eric Anholt
  2011-10-05 23:39   ` Ben Widawsky
  1 sibling, 2 replies; 22+ messages in thread
From: Ben Widawsky @ 2011-10-05 22:57 UTC (permalink / raw)
  To: Kenneth Graunke; +Cc: intel-gfx

On Mon,  3 Oct 2011 23:02:40 -0700
Kenneth Graunke <kenneth@whitecape.org> wrote:

> From: Jesse Barnes <jbarnes@virtuousgeek.org>
> 
> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
> ---
>  drivers/gpu/drm/i915/i915_reg.h         |    5 +
>  drivers/gpu/drm/i915/intel_ringbuffer.c |  136 ++++++++++++++++++++++++++++---
>  2 files changed, 129 insertions(+), 12 deletions(-)
> 
> v2:
>  - Add State & Constant Cache bits as suggested by Daniel.
>  - Specify length directly rather than hiding it in a GEN6 #define.
>  - Use more verbose bit field names, for clarity.
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index bfe8488..81713ae 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -243,6 +243,7 @@
>  #define   DISPLAY_PLANE_A           (0<<20)
>  #define   DISPLAY_PLANE_B           (1<<20)
>  #define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24))
> +#define   PIPE_CONTROL_CS_STALL			(1<<20)
>  #define   PIPE_CONTROL_QW_WRITE			(1<<14)
>  #define   PIPE_CONTROL_DEPTH_STALL		(1<<13)
>  #define   PIPE_CONTROL_WRITE_FLUSH		(1<<12)
> @@ -250,7 +251,11 @@
>  #define   PIPE_CONTROL_TEXTURE_CACHE_FLUSH	(1<<10) /* GM45+ only */
>  #define   PIPE_CONTROL_INDIRECT_STATE_DISABLE	(1<<9)
>  #define   PIPE_CONTROL_NOTIFY			(1<<8)
> +#define   PIPE_CONTROL_VF_CACHE_INVALIDATE	(1<<4)
> +#define   PIPE_CONTROL_CONST_CACHE_INVALIDATE	(1<<3)
> +#define   PIPE_CONTROL_STATE_CACHE_INVALIDATE	(1<<2)
>  #define   PIPE_CONTROL_STALL_AT_SCOREBOARD	(1<<1)
> +#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH	(1<<0)
>  #define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
>  
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 2b572fd..f841d5c 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -34,6 +34,16 @@
>  #include "i915_trace.h"
>  #include "intel_drv.h"
>  
> +/*
> + * 965+ support PIPE_CONTROL commands, which provide finer grained control
> + * over cache flushing.
> + */
> +struct pipe_control {
> +	struct drm_i915_gem_object *obj;
> +	volatile u32 *cpu_page;
> +	u32 gtt_offset;
> +};
> +
>  static inline int ring_space(struct intel_ring_buffer *ring)
>  {
>  	int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
> @@ -123,6 +133,118 @@ render_ring_flush(struct intel_ring_buffer *ring,
>  	return 0;
>  }
>  
> +/**
> + * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
> + * implementing two workarounds on gen6.  From section 1.4.7.1
> + * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
> + *
> + * [DevSNB-C+{W/A}] Before any depth stall flush (including those
> + * produced by non-pipelined state commands), software needs to first
> + * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
> + * 0.
> + *
> + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
> + * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
> + *
> + * And the workaround for these two requires this workaround first:
> + *
> + * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
> + * BEFORE the pipe-control with a post-sync op and no write-cache
> + * flushes.
> + *
> + * And this last workaround is tricky because of the requirements on
> + * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
> + * volume 2 part 1:
> + *
> + *     "1 of the following must also be set:
> + *      - Render Target Cache Flush Enable ([12] of DW1)
> + *      - Depth Cache Flush Enable ([0] of DW1)
> + *      - Stall at Pixel Scoreboard ([1] of DW1)
> + *      - Depth Stall ([13] of DW1)
> + *      - Post-Sync Operation ([13] of DW1)
> + *      - Notify Enable ([8] of DW1)"
> + *
> + * The cache flushes require the workaround flush that triggered this
> + * one, so we can't use it.  Depth stall would trigger the same.
> + * Post-sync nonzero is what triggered this second workaround, so we
> + * can't use that one either.  Notify enable is IRQs, which aren't
> + * really our business.  That leaves only stall at scoreboard.
> + */
> +static int
> +intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
> +{
> +	struct pipe_control *pc = ring->private;
> +	u32 scratch_addr = pc->gtt_offset + 128;
> +	int ret;
> +
> +
> +	ret = intel_ring_begin(ring, 6);
> +	if (ret)
> +		return ret;
> +
> +	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | 3);
> +	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
> +			PIPE_CONTROL_STALL_AT_SCOREBOARD);
> +	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
> +	intel_ring_emit(ring, 0); /* low dword */
> +	intel_ring_emit(ring, 0); /* high dword */
> +	intel_ring_emit(ring, MI_NOOP);
> +	intel_ring_advance(ring);
> +
> +	ret = intel_ring_begin(ring, 6);
> +	if (ret)
> +		return ret;
> +
> +	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | 3);
> +	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
> +	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
> +	intel_ring_emit(ring, 0);
> +	intel_ring_emit(ring, 0);
> +	intel_ring_emit(ring, MI_NOOP);
> +	intel_ring_advance(ring);
> +
> +	return 0;
> +}
> +
> +static int
> +gen6_render_ring_flush(struct intel_ring_buffer *ring,
> +                         u32 invalidate_domains, u32 flush_domains)
> +{
> +	u32 flags = 0;
> +	struct pipe_control *pc = ring->private;
> +	u32 scratch_addr = pc->gtt_offset + 128;
> +	int ret;
> +
> +	/* Force SNB workarounds for PIPE_CONTROL flushes */
> +	intel_emit_post_sync_nonzero_flush(ring);
> +
> +	/* Just flush everything.  Experiments have shown that reducing the
> +	 * number of bits based on the write domains has little performance
> +	 * impact.
> +	 */
> +	flags |= PIPE_CONTROL_WRITE_FLUSH;
> +	flags |= PIPE_CONTROL_INSTRUCTION_CACHE_FLUSH;
> +	flags |= PIPE_CONTROL_TEXTURE_CACHE_FLUSH;
> +	flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
> +	flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
> +	flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
> +	flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;

I think we also want a TLB invalidate here, bit 18.  This requires another
workaround before issuing this flush: We need 2 Store Data Commands (such as
MI_STORE_DATA_IMM or MI_STORE_DATA_INDEX) before sending PIPE_CONTROL w/ stall
(20) and TLB inv bit (18) set

Ben

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use PIPE_CONTROL for flushing on gen6+.
  2011-10-05 22:57   ` Ben Widawsky
@ 2011-10-05 23:36     ` Chris Wilson
  2011-10-05 23:54       ` Daniel Vetter
  2011-10-06  0:59     ` Eric Anholt
  1 sibling, 1 reply; 22+ messages in thread
From: Chris Wilson @ 2011-10-05 23:36 UTC (permalink / raw)
  To: Ben Widawsky, Kenneth Graunke; +Cc: intel-gfx

On Wed, 5 Oct 2011 15:57:13 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> I think we also want a TLB invalidate here, bit 18.  This requires another
> workaround before issuing this flush: We need 2 Store Data Commands (such as
> MI_STORE_DATA_IMM or MI_STORE_DATA_INDEX) before sending PIPE_CONTROL w/ stall
> (20) and TLB inv bit (18) set

Isn't that workaround itself rather hand-wavy? As in it gives the
hardware sufficient time to complete outstanding writes, but not
necessarily. Or am I thinking of yet another workaround...
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 3/3] drm/i915: Use PIPE_CONTROL for flushing on gen6+.
  2011-10-04  6:02 ` [PATCH 3/3] drm/i915: Use PIPE_CONTROL for flushing on gen6+ Kenneth Graunke
  2011-10-05 22:57   ` Ben Widawsky
@ 2011-10-05 23:39   ` Ben Widawsky
  1 sibling, 0 replies; 22+ messages in thread
From: Ben Widawsky @ 2011-10-05 23:39 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ben Widawsky

From: Jesse Barnes <jbarnes@virtuousgeek.org>

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_reg.h         |    6 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c |  148 ++++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/intel_ringbuffer.h |    1 +
 3 files changed, 143 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index bfe8488..69905b2 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -243,6 +243,8 @@
 #define   DISPLAY_PLANE_A           (0<<20)
 #define   DISPLAY_PLANE_B           (1<<20)
 #define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24))
+#define   PIPE_CONTROL_CS_STALL			(1<<20)
+#define   PIPE_CONTROL_TLB_INVALIDATE		(1<<18)
 #define   PIPE_CONTROL_QW_WRITE			(1<<14)
 #define   PIPE_CONTROL_DEPTH_STALL		(1<<13)
 #define   PIPE_CONTROL_WRITE_FLUSH		(1<<12)
@@ -250,7 +252,11 @@
 #define   PIPE_CONTROL_TEXTURE_CACHE_FLUSH	(1<<10) /* GM45+ only */
 #define   PIPE_CONTROL_INDIRECT_STATE_DISABLE	(1<<9)
 #define   PIPE_CONTROL_NOTIFY			(1<<8)
+#define   PIPE_CONTROL_VF_CACHE_INVALIDATE	(1<<4)
+#define   PIPE_CONTROL_CONST_CACHE_INVALIDATE	(1<<3)
+#define   PIPE_CONTROL_STATE_CACHE_INVALIDATE	(1<<2)
 #define   PIPE_CONTROL_STALL_AT_SCOREBOARD	(1<<1)
+#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH	(1<<0)
 #define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
 
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 2b572fd..b9464ee 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -34,6 +34,16 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
+/*
+ * 965+ support PIPE_CONTROL commands, which provide finer grained control
+ * over cache flushing.
+ */
+struct pipe_control {
+	struct drm_i915_gem_object *obj;
+	volatile u32 *cpu_page;
+	u32 gtt_offset;
+};
+
 static inline int ring_space(struct intel_ring_buffer *ring)
 {
 	int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
@@ -123,6 +133,130 @@ render_ring_flush(struct intel_ring_buffer *ring,
 	return 0;
 }
 
+/**
+ * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
+ * implementing two workarounds on gen6.  From section 1.4.7.1
+ * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
+ *
+ * [DevSNB-C+{W/A}] Before any depth stall flush (including those
+ * produced by non-pipelined state commands), software needs to first
+ * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
+ * 0.
+ *
+ * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
+ * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
+ *
+ * And the workaround for these two requires this workaround first:
+ *
+ * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * BEFORE the pipe-control with a post-sync op and no write-cache
+ * flushes.
+ *
+ * And this last workaround is tricky because of the requirements on
+ * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
+ * volume 2 part 1:
+ *
+ *     "1 of the following must also be set:
+ *      - Render Target Cache Flush Enable ([12] of DW1)
+ *      - Depth Cache Flush Enable ([0] of DW1)
+ *      - Stall at Pixel Scoreboard ([1] of DW1)
+ *      - Depth Stall ([13] of DW1)
+ *      - Post-Sync Operation ([13] of DW1)
+ *      - Notify Enable ([8] of DW1)"
+ *
+ * The cache flushes require the workaround flush that triggered this
+ * one, so we can't use it.  Depth stall would trigger the same.
+ * Post-sync nonzero is what triggered this second workaround, so we
+ * can't use that one either.  Notify enable is IRQs, which aren't
+ * really our business.  That leaves only stall at scoreboard.
+ */
+static int
+intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
+{
+	struct pipe_control *pc = ring->private;
+	u32 scratch_addr = pc->gtt_offset + 128;
+	int ret;
+
+
+	ret = intel_ring_begin(ring, 6);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | 3);
+	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
+			PIPE_CONTROL_STALL_AT_SCOREBOARD);
+	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
+	intel_ring_emit(ring, 0); /* low dword */
+	intel_ring_emit(ring, 0); /* high dword */
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	ret = intel_ring_begin(ring, 6);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | 3);
+	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
+	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	return 0;
+}
+
+static int
+gen6_render_ring_flush(struct intel_ring_buffer *ring,
+                         u32 invalidate_domains, u32 flush_domains)
+{
+	u32 flags = 0;
+	struct pipe_control *pc = ring->private;
+	u32 scratch_addr = pc->gtt_offset + 128;
+	int ret;
+
+	/* Force SNB workarounds for PIPE_CONTROL flushes */
+	intel_emit_post_sync_nonzero_flush(ring);
+
+	/* Just flush everything.  Experiments have shown that reducing the
+	 * number of bits based on the write domains has little performance
+	 * impact.
+	 * When performing a PIPE_CONTROL with TLB invalidate, the driver must 2
+	 * Store Data Commands (such as MI_STORE_DATA_IMM or
+	 * MI_STORE_DATA_INDEX) before the PIPE_CONTROL.
+	 */
+	flags |= PIPE_CONTROL_WRITE_FLUSH;
+	flags |= PIPE_CONTROL_INSTRUCTION_CACHE_FLUSH;
+	flags |= PIPE_CONTROL_TEXTURE_CACHE_FLUSH;
+	flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+	flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+	flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+	flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+	flags |= PIPE_CONTROL_TLB_INVALIDATE;
+
+	ret = intel_ring_begin(ring, 14);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
+	intel_ring_emit(ring, I915_DEV_NULL_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
+	intel_ring_emit(ring, I915_DEV_NULL_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | 3);
+	intel_ring_emit(ring, flags);
+	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
+	intel_ring_emit(ring, 0); /* lower dword */
+	intel_ring_emit(ring, 0); /* uppwer dword */
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	return 0;
+}
+
 static void ring_write_tail(struct intel_ring_buffer *ring,
 			    u32 value)
 {
@@ -206,16 +340,6 @@ static int init_ring_common(struct intel_ring_buffer *ring)
 	return 0;
 }
 
-/*
- * 965+ support PIPE_CONTROL commands, which provide finer grained control
- * over cache flushing.
- */
-struct pipe_control {
-	struct drm_i915_gem_object *obj;
-	volatile u32 *cpu_page;
-	u32 gtt_offset;
-};
-
 static int
 init_pipe_control(struct intel_ring_buffer *ring)
 {
@@ -296,8 +420,7 @@ static int init_render_ring(struct intel_ring_buffer *ring)
 				   GFX_MODE_ENABLE(GFX_REPLAY_MODE));
 	}
 
-	if (INTEL_INFO(dev)->gen >= 6) {
-	} else if (IS_GEN5(dev)) {
+	if (INTEL_INFO(dev)->gen >= 5) {
 		ret = init_pipe_control(ring);
 		if (ret)
 			return ret;
@@ -1360,6 +1483,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 	*ring = render_ring;
 	if (INTEL_INFO(dev)->gen >= 6) {
 		ring->add_request = gen6_add_request;
+		ring->flush = gen6_render_ring_flush;
 		ring->irq_get = gen6_render_ring_get_irq;
 		ring->irq_put = gen6_render_ring_put_irq;
 	} else if (IS_GEN5(dev)) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 68281c9..5ab5f00 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -164,6 +164,7 @@ intel_read_status_page(struct intel_ring_buffer *ring,
 #define READ_BREADCRUMB(dev_priv) READ_HWSP(dev_priv, I915_BREADCRUMB_INDEX)
 #define I915_GEM_HWS_INDEX		0x20
 #define I915_BREADCRUMB_INDEX		0x21
+#define I915_DEV_NULL_INDEX		0x22
 
 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring);
 
-- 
1.7.7

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use PIPE_CONTROL for flushing on gen6+.
  2011-10-05 23:36     ` Chris Wilson
@ 2011-10-05 23:54       ` Daniel Vetter
  0 siblings, 0 replies; 22+ messages in thread
From: Daniel Vetter @ 2011-10-05 23:54 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Ben Widawsky, intel-gfx

On Thu, Oct 06, 2011 at 12:36:03AM +0100, Chris Wilson wrote:
> On Wed, 5 Oct 2011 15:57:13 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> > I think we also want a TLB invalidate here, bit 18.  This requires another
> > workaround before issuing this flush: We need 2 Store Data Commands (such as
> > MI_STORE_DATA_IMM or MI_STORE_DATA_INDEX) before sending PIPE_CONTROL w/ stall
> > (20) and TLB inv bit (18) set
> 
> Isn't that workaround itself rather hand-wavy? As in it gives the
> hardware sufficient time to complete outstanding writes, but not
> necessarily. Or am I thinking of yet another workaround...

MI_STORE seems to be the hw teams favourite for adding these delays to
paper over hw issues. I've seen it all over bspec ...
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use PIPE_CONTROL for flushing on gen6+.
  2011-10-05 22:57   ` Ben Widawsky
  2011-10-05 23:36     ` Chris Wilson
@ 2011-10-06  0:59     ` Eric Anholt
  2011-10-06  5:15       ` Ben Widawsky
  1 sibling, 1 reply; 22+ messages in thread
From: Eric Anholt @ 2011-10-06  0:59 UTC (permalink / raw)
  To: Ben Widawsky, Kenneth Graunke; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 911 bytes --]

On Wed, 5 Oct 2011 15:57:13 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> I think we also want a TLB invalidate here, bit 18.  This requires another
> workaround before issuing this flush: We need 2 Store Data Commands (such as
> MI_STORE_DATA_IMM or MI_STORE_DATA_INDEX) before sending PIPE_CONTROL w/ stall
> (20) and TLB inv bit (18) set

From the docs for GFX_MODE:

    "This field controls the invalidation if the TLB cache inside the
     hardware. When enabled this bit limits the invalidation of the TLB
     only to batch buffer boundaries or to pipe_control commands which
     have the TLB invalidation bit set. If disabled, the TLB caches are
     flushed for every full flush of the pipeline"

We're already getting TLB invalidate at batchbuffer boundaries
(actually, even more: every pipeline stall, since that bit is 0 on my
hardware).  What would we need this new flush for?

[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use PIPE_CONTROL for flushing on gen6+.
  2011-10-06  0:59     ` Eric Anholt
@ 2011-10-06  5:15       ` Ben Widawsky
  2011-10-06 18:00         ` Eric Anholt
  0 siblings, 1 reply; 22+ messages in thread
From: Ben Widawsky @ 2011-10-06  5:15 UTC (permalink / raw)
  To: Eric Anholt; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 1401 bytes --]

On Wed, Oct 05, 2011 at 05:59:31PM -0700, Eric Anholt wrote:
> On Wed, 5 Oct 2011 15:57:13 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> > I think we also want a TLB invalidate here, bit 18.  This requires another
> > workaround before issuing this flush: We need 2 Store Data Commands (such as
> > MI_STORE_DATA_IMM or MI_STORE_DATA_INDEX) before sending PIPE_CONTROL w/ stall
> > (20) and TLB inv bit (18) set
> 
> From the docs for GFX_MODE:
> 
>     "This field controls the invalidation if the TLB cache inside the
>      hardware. When enabled this bit limits the invalidation of the TLB
>      only to batch buffer boundaries or to pipe_control commands which
>      have the TLB invalidation bit set. If disabled, the TLB caches are
>      flushed for every full flush of the pipeline"
> 
> We're already getting TLB invalidate at batchbuffer boundaries
> (actually, even more: every pipeline stall, since that bit is 0 on my
> hardware).  What would we need this new flush for?

Does this only mean after each batchbuffer (MI_BATCH_BUFFER_END) or also
before actually jumping to the location at MI_BATCH_BUFFER_START? If so,
what happens when we map or unmap, in between batches? Won't the TLBs
need to be flushed in between?

Also, wouldn't it be nice for the kernel to flush TLBs without having to
submit a batch (no specific use case in mind for this)? 

Ben

[-- Attachment #1.2: Type: application/pgp-signature, Size: 490 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use PIPE_CONTROL for flushing on gen6+.
  2011-10-06  5:15       ` Ben Widawsky
@ 2011-10-06 18:00         ` Eric Anholt
  2011-10-06 19:01           ` Ben Widawsky
  0 siblings, 1 reply; 22+ messages in thread
From: Eric Anholt @ 2011-10-06 18:00 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 1849 bytes --]

On Thu, 6 Oct 2011 05:15:23 +0000, Ben Widawsky <ben@bwidawsk.net> wrote:
Non-text part: multipart/signed
> On Wed, Oct 05, 2011 at 05:59:31PM -0700, Eric Anholt wrote:
> > On Wed, 5 Oct 2011 15:57:13 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> > > I think we also want a TLB invalidate here, bit 18.  This requires another
> > > workaround before issuing this flush: We need 2 Store Data Commands (such as
> > > MI_STORE_DATA_IMM or MI_STORE_DATA_INDEX) before sending PIPE_CONTROL w/ stall
> > > (20) and TLB inv bit (18) set
> > 
> > From the docs for GFX_MODE:
> > 
> >     "This field controls the invalidation if the TLB cache inside the
> >      hardware. When enabled this bit limits the invalidation of the TLB
> >      only to batch buffer boundaries or to pipe_control commands which
> >      have the TLB invalidation bit set. If disabled, the TLB caches are
> >      flushed for every full flush of the pipeline"
> > 
> > We're already getting TLB invalidate at batchbuffer boundaries
> > (actually, even more: every pipeline stall, since that bit is 0 on my
> > hardware).  What would we need this new flush for?
> 
> Does this only mean after each batchbuffer (MI_BATCH_BUFFER_END) or also
> before actually jumping to the location at MI_BATCH_BUFFER_START? If so,
> what happens when we map or unmap, in between batches? Won't the TLBs
> need to be flushed in between?

Are you talking about for GTT mapped access?  TLBs for those are flushed
at PTE update.

> Also, wouldn't it be nice for the kernel to flush TLBs without having to
> submit a batch (no specific use case in mind for this)? 

Why would it be nice?  We know when we need them flushed for our needs,
there's that table that tells when the hardware flushes them, and the
two appear to add up to "don't do anything else" to me.


[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use PIPE_CONTROL for flushing on gen6+.
  2011-10-06 18:00         ` Eric Anholt
@ 2011-10-06 19:01           ` Ben Widawsky
  0 siblings, 0 replies; 22+ messages in thread
From: Ben Widawsky @ 2011-10-06 19:01 UTC (permalink / raw)
  To: Eric Anholt; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 2244 bytes --]

On Thu, Oct 06, 2011 at 11:00:18AM -0700, Eric Anholt wrote:
> On Thu, 6 Oct 2011 05:15:23 +0000, Ben Widawsky <ben@bwidawsk.net> wrote:
> Non-text part: multipart/signed
> > On Wed, Oct 05, 2011 at 05:59:31PM -0700, Eric Anholt wrote:
> > > On Wed, 5 Oct 2011 15:57:13 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> > > > I think we also want a TLB invalidate here, bit 18.  This requires another
> > > > workaround before issuing this flush: We need 2 Store Data Commands (such as
> > > > MI_STORE_DATA_IMM or MI_STORE_DATA_INDEX) before sending PIPE_CONTROL w/ stall
> > > > (20) and TLB inv bit (18) set
> > > 
> > > From the docs for GFX_MODE:
> > > 
> > >     "This field controls the invalidation if the TLB cache inside the
> > >      hardware. When enabled this bit limits the invalidation of the TLB
> > >      only to batch buffer boundaries or to pipe_control commands which
> > >      have the TLB invalidation bit set. If disabled, the TLB caches are
> > >      flushed for every full flush of the pipeline"
> > > 
> > > We're already getting TLB invalidate at batchbuffer boundaries
> > > (actually, even more: every pipeline stall, since that bit is 0 on my
> > > hardware).  What would we need this new flush for?
> > 
> > Does this only mean after each batchbuffer (MI_BATCH_BUFFER_END) or also
> > before actually jumping to the location at MI_BATCH_BUFFER_START? If so,
> > what happens when we map or unmap, in between batches? Won't the TLBs
> > need to be flushed in between?
> 
> Are you talking about for GTT mapped access?  TLBs for those are flushed
> at PTE update.
> 
> > Also, wouldn't it be nice for the kernel to flush TLBs without having to
> > submit a batch (no specific use case in mind for this)? 
> 
> Why would it be nice?  We know when we need them flushed for our needs,
> there's that table that tells when the hardware flushes them, and the
> two appear to add up to "don't do anything else" to me.
> 

Ok, we drop the patch. I *do* think it doesn't hurt, and it will make
things just work if we ever change the GFX_MODE bit, but we can always
re add this later as well.

I don't understand the pipeline flushing well enough to push it any
further.

Ben

[-- Attachment #1.2: Type: application/pgp-signature, Size: 490 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] i915: Remove implied length of 2 from GFX_OP_PIPE_CONTROL #define.
  2011-10-04  6:02 [PATCH 1/3] i915: Remove implied length of 2 from GFX_OP_PIPE_CONTROL #define Kenneth Graunke
                   ` (2 preceding siblings ...)
  2011-10-05 20:35 ` [PATCH 1/3] i915: Remove implied length of 2 from GFX_OP_PIPE_CONTROL #define Ben Widawsky
@ 2011-10-11 11:09 ` Daniel Vetter
  2011-10-11 17:20   ` Jesse Barnes
  3 siblings, 1 reply; 22+ messages in thread
From: Daniel Vetter @ 2011-10-11 11:09 UTC (permalink / raw)
  To: Kenneth Graunke; +Cc: intel-gfx

Switching to PIPE_CONTROL with the snb workaround seems to fix the hangs I
see when running cairo-perf-traces.

Tested-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] i915: Remove implied length of 2 from GFX_OP_PIPE_CONTROL #define.
  2011-10-11 11:09 ` Daniel Vetter
@ 2011-10-11 17:20   ` Jesse Barnes
  2011-10-11 18:39     ` Ben Widawsky
  0 siblings, 1 reply; 22+ messages in thread
From: Jesse Barnes @ 2011-10-11 17:20 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 435 bytes --]

On Tue, 11 Oct 2011 13:09:17 +0200
Daniel Vetter <daniel@ffwll.ch> wrote:

> Switching to PIPE_CONTROL with the snb workaround seems to fix the hangs I
> see when running cairo-perf-traces.
> 
> Tested-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Ah wow, so it actually fixes a bug.  Let's get this upstream soon then;
Ben do you want to re-post or should I?

Thanks,
-- 
Jesse Barnes, Intel Open Source Technology Center

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] i915: Remove implied length of 2 from GFX_OP_PIPE_CONTROL #define.
  2011-10-11 17:20   ` Jesse Barnes
@ 2011-10-11 18:39     ` Ben Widawsky
  2011-10-11 18:53       ` Jesse Barnes
  0 siblings, 1 reply; 22+ messages in thread
From: Ben Widawsky @ 2011-10-11 18:39 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Tue, 11 Oct 2011 10:20:04 -0700
Jesse Barnes <jbarnes@virtuousgeek.org> wrote:

> On Tue, 11 Oct 2011 13:09:17 +0200
> Daniel Vetter <daniel@ffwll.ch> wrote:
> 
> > Switching to PIPE_CONTROL with the snb workaround seems to fix the hangs I
> > see when running cairo-perf-traces.
> > 
> > Tested-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> Ah wow, so it actually fixes a bug.  Let's get this upstream soon then;
> Ben do you want to re-post or should I?
> 
> Thanks,
If nobody else thinks it's a good idea, just slap on my r-b and call it
done.

I don't care enough to repost or ask you to do so.

Ben

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] i915: Remove implied length of 2 from GFX_OP_PIPE_CONTROL #define.
  2011-10-11 18:39     ` Ben Widawsky
@ 2011-10-11 18:53       ` Jesse Barnes
  2011-10-11 22:17         ` Keith Packard
  0 siblings, 1 reply; 22+ messages in thread
From: Jesse Barnes @ 2011-10-11 18:53 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx, Keith


[-- Attachment #1.1: Type: text/plain, Size: 870 bytes --]

On Tue, 11 Oct 2011 11:39:36 -0700
Ben Widawsky <ben@bwidawsk.net> wrote:

> On Tue, 11 Oct 2011 10:20:04 -0700
> Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> 
> > On Tue, 11 Oct 2011 13:09:17 +0200
> > Daniel Vetter <daniel@ffwll.ch> wrote:
> > 
> > > Switching to PIPE_CONTROL with the snb workaround seems to fix the hangs I
> > > see when running cairo-perf-traces.
> > > 
> > > Tested-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > 
> > Ah wow, so it actually fixes a bug.  Let's get this upstream soon then;
> > Ben do you want to re-post or should I?
> > 
> > Thanks,
> If nobody else thinks it's a good idea, just slap on my r-b and call it
> done.
> 
> I don't care enough to repost or ask you to do so.

Ok as long as it still applies, great.

Keith can you pick this up?

-- 
Jesse Barnes, Intel Open Source Technology Center

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 3/3] drm/i915: Use PIPE_CONTROL for flushing on gen6+.
  2011-10-11 21:41 [PATCH 1/3] drm/i915: " Daniel Vetter
@ 2011-10-11 21:41 ` Daniel Vetter
  0 siblings, 0 replies; 22+ messages in thread
From: Daniel Vetter @ 2011-10-11 21:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

From: Jesse Barnes <jbarnes@virtuousgeek.org>

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
[danvet: this seems to fix cairo-perf-trace hangs on my snb]
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_reg.h         |    5 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |  136 ++++++++++++++++++++++++++++---
 2 files changed, 129 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 7f496a1..eca64e1 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -243,6 +243,7 @@
 #define   DISPLAY_PLANE_A           (0<<20)
 #define   DISPLAY_PLANE_B           (1<<20)
 #define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|(len-2))
+#define   PIPE_CONTROL_CS_STALL				(1<<20)
 #define   PIPE_CONTROL_QW_WRITE				(1<<14)
 #define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
 #define   PIPE_CONTROL_WRITE_FLUSH			(1<<12)
@@ -250,7 +251,11 @@
 #define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE		(1<<10) /* GM45+ only */
 #define   PIPE_CONTROL_INDIRECT_STATE_DISABLE		(1<<9)
 #define   PIPE_CONTROL_NOTIFY				(1<<8)
+#define   PIPE_CONTROL_VF_CACHE_INVALIDATE		(1<<4)
+#define   PIPE_CONTROL_CONST_CACHE_INVALIDATE		(1<<3)
+#define   PIPE_CONTROL_STATE_CACHE_INVALIDATE		(1<<2)
 #define   PIPE_CONTROL_STALL_AT_SCOREBOARD		(1<<1)
+#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
 #define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
 
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 2f3c3e1..6f71cff 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -34,6 +34,16 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
+/*
+ * 965+ support PIPE_CONTROL commands, which provide finer grained control
+ * over cache flushing.
+ */
+struct pipe_control {
+	struct drm_i915_gem_object *obj;
+	volatile u32 *cpu_page;
+	u32 gtt_offset;
+};
+
 static inline int ring_space(struct intel_ring_buffer *ring)
 {
 	int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
@@ -123,6 +133,118 @@ render_ring_flush(struct intel_ring_buffer *ring,
 	return 0;
 }
 
+/**
+ * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
+ * implementing two workarounds on gen6.  From section 1.4.7.1
+ * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
+ *
+ * [DevSNB-C+{W/A}] Before any depth stall flush (including those
+ * produced by non-pipelined state commands), software needs to first
+ * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
+ * 0.
+ *
+ * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
+ * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
+ *
+ * And the workaround for these two requires this workaround first:
+ *
+ * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * BEFORE the pipe-control with a post-sync op and no write-cache
+ * flushes.
+ *
+ * And this last workaround is tricky because of the requirements on
+ * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
+ * volume 2 part 1:
+ *
+ *     "1 of the following must also be set:
+ *      - Render Target Cache Flush Enable ([12] of DW1)
+ *      - Depth Cache Flush Enable ([0] of DW1)
+ *      - Stall at Pixel Scoreboard ([1] of DW1)
+ *      - Depth Stall ([13] of DW1)
+ *      - Post-Sync Operation ([13] of DW1)
+ *      - Notify Enable ([8] of DW1)"
+ *
+ * The cache flushes require the workaround flush that triggered this
+ * one, so we can't use it.  Depth stall would trigger the same.
+ * Post-sync nonzero is what triggered this second workaround, so we
+ * can't use that one either.  Notify enable is IRQs, which aren't
+ * really our business.  That leaves only stall at scoreboard.
+ */
+static int
+intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
+{
+	struct pipe_control *pc = ring->private;
+	u32 scratch_addr = pc->gtt_offset + 128;
+	int ret;
+
+
+	ret = intel_ring_begin(ring, 6);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
+	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
+			PIPE_CONTROL_STALL_AT_SCOREBOARD);
+	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
+	intel_ring_emit(ring, 0); /* low dword */
+	intel_ring_emit(ring, 0); /* high dword */
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	ret = intel_ring_begin(ring, 6);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
+	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
+	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	return 0;
+}
+
+static int
+gen6_render_ring_flush(struct intel_ring_buffer *ring,
+                         u32 invalidate_domains, u32 flush_domains)
+{
+	u32 flags = 0;
+	struct pipe_control *pc = ring->private;
+	u32 scratch_addr = pc->gtt_offset + 128;
+	int ret;
+
+	/* Force SNB workarounds for PIPE_CONTROL flushes */
+	intel_emit_post_sync_nonzero_flush(ring);
+
+	/* Just flush everything.  Experiments have shown that reducing the
+	 * number of bits based on the write domains has little performance
+	 * impact.
+	 */
+	flags |= PIPE_CONTROL_WRITE_FLUSH;
+	flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+	flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+	flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+	flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+	flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+	flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+
+	ret = intel_ring_begin(ring, 6);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
+	intel_ring_emit(ring, flags);
+	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
+	intel_ring_emit(ring, 0); /* lower dword */
+	intel_ring_emit(ring, 0); /* uppwer dword */
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	return 0;
+}
+
 static void ring_write_tail(struct intel_ring_buffer *ring,
 			    u32 value)
 {
@@ -206,16 +328,6 @@ static int init_ring_common(struct intel_ring_buffer *ring)
 	return 0;
 }
 
-/*
- * 965+ support PIPE_CONTROL commands, which provide finer grained control
- * over cache flushing.
- */
-struct pipe_control {
-	struct drm_i915_gem_object *obj;
-	volatile u32 *cpu_page;
-	u32 gtt_offset;
-};
-
 static int
 init_pipe_control(struct intel_ring_buffer *ring)
 {
@@ -296,8 +408,7 @@ static int init_render_ring(struct intel_ring_buffer *ring)
 				   GFX_MODE_ENABLE(GFX_REPLAY_MODE));
 	}
 
-	if (INTEL_INFO(dev)->gen >= 6) {
-	} else if (IS_GEN5(dev)) {
+	if (INTEL_INFO(dev)->gen >= 5) {
 		ret = init_pipe_control(ring);
 		if (ret)
 			return ret;
@@ -1369,6 +1480,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 	*ring = render_ring;
 	if (INTEL_INFO(dev)->gen >= 6) {
 		ring->add_request = gen6_add_request;
+		ring->flush = gen6_render_ring_flush;
 		ring->irq_get = gen6_render_ring_get_irq;
 		ring->irq_put = gen6_render_ring_put_irq;
 	} else if (IS_GEN5(dev)) {
-- 
1.7.6.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/3] i915: Remove implied length of 2 from GFX_OP_PIPE_CONTROL #define.
  2011-10-11 18:53       ` Jesse Barnes
@ 2011-10-11 22:17         ` Keith Packard
  0 siblings, 0 replies; 22+ messages in thread
From: Keith Packard @ 2011-10-11 22:17 UTC (permalink / raw)
  To: Jesse Barnes, Ben Widawsky; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 176 bytes --]

On Tue, 11 Oct 2011 11:53:56 -0700, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:

> Keith can you pick this up?

Yup, I'll make it work.

-- 
keith.packard@intel.com

[-- Attachment #1.2: Type: application/pgp-signature, Size: 827 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2011-10-11 22:42 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-10-04  6:02 [PATCH 1/3] i915: Remove implied length of 2 from GFX_OP_PIPE_CONTROL #define Kenneth Graunke
2011-10-04  6:02 ` [PATCH 2/3] i915: Rename PIPE_CONTROL bit defines to be less terse Kenneth Graunke
2011-10-04  8:30   ` Daniel Vetter
2011-10-05  5:13     ` Kenneth Graunke
2011-10-05 10:05       ` Daniel Vetter
2011-10-05 22:29   ` Ben Widawsky
2011-10-04  6:02 ` [PATCH 3/3] drm/i915: Use PIPE_CONTROL for flushing on gen6+ Kenneth Graunke
2011-10-05 22:57   ` Ben Widawsky
2011-10-05 23:36     ` Chris Wilson
2011-10-05 23:54       ` Daniel Vetter
2011-10-06  0:59     ` Eric Anholt
2011-10-06  5:15       ` Ben Widawsky
2011-10-06 18:00         ` Eric Anholt
2011-10-06 19:01           ` Ben Widawsky
2011-10-05 23:39   ` Ben Widawsky
2011-10-05 20:35 ` [PATCH 1/3] i915: Remove implied length of 2 from GFX_OP_PIPE_CONTROL #define Ben Widawsky
2011-10-11 11:09 ` Daniel Vetter
2011-10-11 17:20   ` Jesse Barnes
2011-10-11 18:39     ` Ben Widawsky
2011-10-11 18:53       ` Jesse Barnes
2011-10-11 22:17         ` Keith Packard
  -- strict thread matches above, loose matches on Subject: below --
2011-10-11 21:41 [PATCH 1/3] drm/i915: " Daniel Vetter
2011-10-11 21:41 ` [PATCH 3/3] drm/i915: Use PIPE_CONTROL for flushing on gen6+ Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.