public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
* igt tests/rendercopy: Add option to test resource streamer functionality
@ 2014-05-06 19:48 Abdiel Janulgue
  2014-05-06 19:48 ` [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables Abdiel Janulgue
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Abdiel Janulgue @ 2014-05-06 19:48 UTC (permalink / raw)
  To: intel-gfx

This patchseries tests a resource streamer feature called hw-generated binding tables,
which can be enabled from userspace. It is essentially a newer format that uses the hw to
generate and submit binding tables to the GPU. It uses the basic rendercopy test to toggle
the feature on and off and to verify if the resulting data is still correct.

Abdiel Janulgue (2):
      rendercopy/bdw: Enable hw-generated binding tables
      tests/gem_render_copy: Add option to test resource streamer

 lib/gen8_render.h       |   13 +++++++
 lib/intel_batchbuffer.c |   12 ++++++
 lib/intel_batchbuffer.h |    3 ++
 lib/intel_reg.h         |    3 ++
 lib/rendercopy_gen8.c   |   97 ++++++++++++++++++++++++++++++++++++++++++++---
 tests/gem_render_copy.c |    8 +++-
 6 files changed, 130 insertions(+), 6 deletions(-)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables
  2014-05-06 19:48 igt tests/rendercopy: Add option to test resource streamer functionality Abdiel Janulgue
@ 2014-05-06 19:48 ` Abdiel Janulgue
  2014-05-06 21:38   ` Ville Syrjälä
  2014-05-07 11:49   ` Ville Syrjälä
  2014-05-06 19:48 ` [PATCH 2/2] tests/gem_render_copy: Add option to test resource streamer Abdiel Janulgue
  2014-05-06 20:19 ` igt tests/rendercopy: Add option to test resource streamer functionality Daniel Vetter
  2 siblings, 2 replies; 16+ messages in thread
From: Abdiel Janulgue @ 2014-05-06 19:48 UTC (permalink / raw)
  To: intel-gfx

Use on-chip hw-binding table generator to generate binding
tables when the test emits SURFACE_STATES packets. The hw generates
these binding table packets internally so we don't have to
allocate space on the batchbuffer.

Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
---
 lib/gen8_render.h       |   13 +++++++
 lib/intel_batchbuffer.c |   12 ++++++
 lib/intel_batchbuffer.h |    3 ++
 lib/intel_reg.h         |    3 ++
 lib/rendercopy_gen8.c   |   97 ++++++++++++++++++++++++++++++++++++++++++++---
 5 files changed, 123 insertions(+), 5 deletions(-)

diff --git a/lib/gen8_render.h b/lib/gen8_render.h
index fffc100..4d5f7ba 100644
--- a/lib/gen8_render.h
+++ b/lib/gen8_render.h
@@ -83,6 +83,19 @@
 /* Random shifts */
 #define GEN8_3DSTATE_PS_MAX_THREADS_SHIFT 23
 
+/* GEN8+ resource streamer */
+#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC       (0x7919 << 16) /* GEN8 */
+# define BINDING_TABLE_POOL_ENABLE              0x0800
+#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS          (0x7843 << 16) /* GEN8 */
+#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS          (0x7844 << 16) /* GEN8 */
+#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS          (0x7845 << 16) /* GEN8 */
+#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS          (0x7846 << 16) /* GEN8 */
+#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS          (0x7847 << 16) /* GEN8 */
+
+/* Toggling RS needs PIPE_CONTROL SC invalidate */
+#define _3DSTATE_PIPE_CONTROL		((0x3 << 29) | (3 << 27) | (2 << 24))
+#define PIPE_CONTROL_STATE_CACHE_INVALIDATE	(1 << 2)
+
 /* Shamelessly ripped from mesa */
 struct gen8_surface_state
 {
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index e868922..26c9b89 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -81,6 +81,14 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch)
 	memset(batch->buffer, 0, sizeof(batch->buffer));
 
 	batch->ptr = batch->buffer;
+
+	if (batch->hw_bt_pool_bo != NULL) {
+		drm_intel_bo_unreference(batch->hw_bt_pool_bo);
+		batch->hw_bt_pool_bo = NULL;
+	}
+
+	batch->hw_bt_pool_bo = drm_intel_bo_alloc(batch->bufmgr, "hw_bt",
+						  131072, 4096);
 }
 
 /**
@@ -116,6 +124,10 @@ intel_batchbuffer_free(struct intel_batchbuffer *batch)
 {
 	drm_intel_bo_unreference(batch->bo);
 	batch->bo = NULL;
+
+	drm_intel_bo_unreference(batch->hw_bt_pool_bo);
+	batch->hw_bt_pool_bo = NULL;
+
 	free(batch);
 }
 
diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
index 49dbcf0..2a516bc 100644
--- a/lib/intel_batchbuffer.h
+++ b/lib/intel_batchbuffer.h
@@ -18,6 +18,9 @@ struct intel_batchbuffer {
 	uint8_t buffer[BATCH_SZ];
 	uint8_t *ptr;
 	uint8_t *state;
+
+	drm_intel_bo *hw_bt_pool_bo;
+	int use_resource_streamer;
 };
 
 struct intel_batchbuffer *intel_batchbuffer_alloc(drm_intel_bufmgr *bufmgr,
diff --git a/lib/intel_reg.h b/lib/intel_reg.h
index 4b3a102..1cb9a29 100644
--- a/lib/intel_reg.h
+++ b/lib/intel_reg.h
@@ -2560,6 +2560,9 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define MI_BATCH_NON_SECURE		(1)
 #define MI_BATCH_NON_SECURE_I965	(1 << 8)
 
+/* Resource Streamer */
+#define MI_RS_CONTROL           (0x6 << 23)
+
 #define MAX_DISPLAY_PIPES	2
 
 typedef enum {
diff --git a/lib/rendercopy_gen8.c b/lib/rendercopy_gen8.c
index 6f5a698..b0dd0f3 100644
--- a/lib/rendercopy_gen8.c
+++ b/lib/rendercopy_gen8.c
@@ -169,12 +169,14 @@ static void
 gen6_render_flush(struct intel_batchbuffer *batch,
 		  drm_intel_context *context, uint32_t batch_end)
 {
-	int ret;
+	int ret = 0;
+	uint32_t flags = 0;
 
 	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
+	flags = batch->use_resource_streamer ? I915_EXEC_RESOURCE_STREAMER : 0;
 	if (ret == 0)
 		ret = drm_intel_gem_bo_context_exec(batch->bo, context,
-						    batch_end, 0);
+						    batch_end, flags | I915_EXEC_RENDER);
 	assert(ret == 0);
 }
 
@@ -228,18 +230,83 @@ gen8_bind_buf(struct intel_batchbuffer *batch, struct igt_buf *buf,
 	return offset;
 }
 
+static void
+gen8_hw_binding_table(struct intel_batchbuffer *batch, bool enable)
+{
+	if (!enable) {
+		OUT_BATCH(MI_RS_CONTROL | 0x0);
+
+		OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC | (4 - 2));
+		/* binding table pool base address */
+		OUT_BATCH(0);
+		OUT_BATCH(0);
+		/* Upper bound */
+		OUT_BATCH(0);
+
+		OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
+		OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+		OUT_BATCH(0);
+		OUT_BATCH(0);
+		OUT_BATCH(0);
+		OUT_BATCH(0);
+
+		return;
+        }
+	OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC | (4 - 2));
+
+	/* binding table pool base address */
+	OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0,
+                  BINDING_TABLE_POOL_ENABLE);
+	OUT_BATCH(0);
+
+	/* Upper bound */
+	OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0,
+                  batch->hw_bt_pool_bo->size);
+
+	OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
+	OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+static uint32_t
+gen8_rs_bind_surfaces(struct intel_batchbuffer *batch,
+		      struct igt_buf *src,
+		      struct igt_buf *dst,
+		      uint32_t *surf0, uint32_t *surf1)
+{
+	*surf0 = gen8_bind_buf(batch, dst, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 1);
+	*surf1 = gen8_bind_buf(batch, src, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 0);
+
+	return 0;
+}
+
+static void
+gen8_rs_edit_surfaces(struct intel_batchbuffer *batch,
+		      uint32_t surf0, uint32_t surf1)
+{
+	OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_EDIT_PS | (4 - 2));
+	OUT_BATCH(0x3);
+	{
+		OUT_BATCH(0 << 16 | surf0 >> 6);
+		OUT_BATCH(1 << 16 | surf1 >> 6);
+	}
+}
+
 static uint32_t
 gen8_bind_surfaces(struct intel_batchbuffer *batch,
 		   struct igt_buf *src,
 		   struct igt_buf *dst)
 {
-	uint32_t *binding_table, offset;
+	uint32_t offset = 0;
+	uint32_t *binding_table;
 
 	binding_table = batch_alloc(batch, 8, 32);
 	offset = batch_offset(batch, binding_table);
 	annotation_add_state(&aub_annotations, AUB_TRACE_BINDING_TABLE,
 			     offset, 8);
-
 	binding_table[0] =
 		gen8_bind_buf(batch, dst, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 1);
 	binding_table[1] =
@@ -512,6 +579,9 @@ gen7_emit_push_constants(struct intel_batchbuffer *batch) {
 
 static void
 gen8_emit_state_base_address(struct intel_batchbuffer *batch) {
+	if (batch->use_resource_streamer)
+		OUT_BATCH(MI_RS_CONTROL | 0x0);
+
 	OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (16 - 2));
 
 	/* general */
@@ -546,6 +616,9 @@ gen8_emit_state_base_address(struct intel_batchbuffer *batch) {
 	OUT_BATCH(0xfffff000 | 1);
 	/* intruction buffer size */
 	OUT_BATCH(1 << 12 | 1);
+
+	if (batch->use_resource_streamer)
+		OUT_BATCH(MI_RS_CONTROL | 0x1);
 }
 
 static void
@@ -918,6 +991,7 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
 	uint32_t scissor_state;
 	uint32_t vertex_buffer;
 	uint32_t batch_end;
+	uint32_t surf0 = 0, surf1 = 1;
 
 	intel_batchbuffer_flush_with_context(batch, context);
 
@@ -927,7 +1001,11 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
 
 	annotation_init(&aub_annotations);
 
-	ps_binding_table  = gen8_bind_surfaces(batch, src, dst);
+	if (batch->use_resource_streamer)
+		ps_binding_table = gen8_rs_bind_surfaces(batch, src, dst,
+							 &surf0, &surf1);
+	else
+		ps_binding_table  = gen8_bind_surfaces(batch, src, dst);
 	ps_sampler_state  = gen8_create_sampler(batch);
 	ps_kernel_off = gen8_fill_ps(batch, ps_kernel, sizeof(ps_kernel));
 	vertex_buffer = gen7_fill_vertex_buffer_data(batch, src,
@@ -955,6 +1033,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
 
 	gen8_emit_state_base_address(batch);
 
+	if (batch->use_resource_streamer)
+               gen8_hw_binding_table(batch, true);
+
 	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC);
 	OUT_BATCH(viewport.cc_state);
 	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
@@ -978,6 +1059,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
 
 	gen8_emit_sf(batch);
 
+	if (batch->use_resource_streamer)
+		gen8_rs_edit_surfaces(batch, surf0, surf1);
+
 	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS);
 	OUT_BATCH(ps_binding_table);
 
@@ -1001,6 +1085,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
 	gen8_emit_vf_topology(batch);
 	gen8_emit_primitive(batch, vertex_buffer);
 
+	if (batch->use_resource_streamer)
+		gen8_hw_binding_table(batch, false);
+
 	OUT_BATCH(MI_BATCH_BUFFER_END);
 
 	batch_end = batch_align(batch, 8);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/2] tests/gem_render_copy: Add option to test resource streamer
  2014-05-06 19:48 igt tests/rendercopy: Add option to test resource streamer functionality Abdiel Janulgue
  2014-05-06 19:48 ` [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables Abdiel Janulgue
@ 2014-05-06 19:48 ` Abdiel Janulgue
  2014-05-06 20:19 ` igt tests/rendercopy: Add option to test resource streamer functionality Daniel Vetter
  2 siblings, 0 replies; 16+ messages in thread
From: Abdiel Janulgue @ 2014-05-06 19:48 UTC (permalink / raw)
  To: intel-gfx

Add option in basic test for the render_copy to test and toggle
hw-generated binding tables feature.

Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
---
 tests/gem_render_copy.c |    8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tests/gem_render_copy.c b/tests/gem_render_copy.c
index 331b7ce..c9dfba0 100644
--- a/tests/gem_render_copy.c
+++ b/tests/gem_render_copy.c
@@ -131,14 +131,19 @@ int main(int argc, char **argv)
 	int opt;
 	int opt_dump_png = false;
 	int opt_dump_aub = igt_aub_dump_enabled();
+	int opt_use_rs = false;
 
 	igt_simple_init();
 
-	while ((opt = getopt(argc, argv, "d")) != -1) {
+	while ((opt = getopt(argc, argv, "d:r")) != -1) {
 		switch (opt) {
 		case 'd':
 			opt_dump_png = true;
 			break;
+		case 'r':
+			printf("Use resource streamer\n");
+			opt_use_rs = true;
+			break;
 		default:
 			break;
 		}
@@ -156,6 +161,7 @@ int main(int argc, char **argv)
 			      "no render-copy function\n");
 
 		batch = intel_batchbuffer_alloc(data.bufmgr, data.devid);
+		batch->use_resource_streamer = opt_use_rs;
 		igt_assert(batch);
 	}
 
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: igt tests/rendercopy: Add option to test resource streamer functionality
  2014-05-06 19:48 igt tests/rendercopy: Add option to test resource streamer functionality Abdiel Janulgue
  2014-05-06 19:48 ` [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables Abdiel Janulgue
  2014-05-06 19:48 ` [PATCH 2/2] tests/gem_render_copy: Add option to test resource streamer Abdiel Janulgue
@ 2014-05-06 20:19 ` Daniel Vetter
  2 siblings, 0 replies; 16+ messages in thread
From: Daniel Vetter @ 2014-05-06 20:19 UTC (permalink / raw)
  To: Abdiel Janulgue; +Cc: intel-gfx

On Tue, May 06, 2014 at 10:48:01PM +0300, Abdiel Janulgue wrote:
> This patchseries tests a resource streamer feature called hw-generated binding tables,
> which can be enabled from userspace. It is essentially a newer format that uses the hw to
> generate and submit binding tables to the GPU. It uses the basic rendercopy test to toggle
> the feature on and off and to verify if the resulting data is still correct.
> 
> Abdiel Janulgue (2):
>       rendercopy/bdw: Enable hw-generated binding tables
>       tests/gem_render_copy: Add option to test resource streamer

Since you add a new execbuf flag you also need to update the
gem_exec_params test. I think we should have tests for both !bdw and !rcs
to make sure the checking is complete.
-Daniel

> 
>  lib/gen8_render.h       |   13 +++++++
>  lib/intel_batchbuffer.c |   12 ++++++
>  lib/intel_batchbuffer.h |    3 ++
>  lib/intel_reg.h         |    3 ++
>  lib/rendercopy_gen8.c   |   97 ++++++++++++++++++++++++++++++++++++++++++++---
>  tests/gem_render_copy.c |    8 +++-
>  6 files changed, 130 insertions(+), 6 deletions(-)
> 
> 
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables
  2014-05-06 19:48 ` [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables Abdiel Janulgue
@ 2014-05-06 21:38   ` Ville Syrjälä
  2014-05-06 22:09     ` Abdiel Janulgue
  2014-05-07 11:49   ` Ville Syrjälä
  1 sibling, 1 reply; 16+ messages in thread
From: Ville Syrjälä @ 2014-05-06 21:38 UTC (permalink / raw)
  To: Abdiel Janulgue; +Cc: intel-gfx

On Tue, May 06, 2014 at 10:48:02PM +0300, Abdiel Janulgue wrote:
> Use on-chip hw-binding table generator to generate binding
> tables when the test emits SURFACE_STATES packets. The hw generates
> these binding table packets internally so we don't have to
> allocate space on the batchbuffer.
> 
> Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
> ---
>  lib/gen8_render.h       |   13 +++++++
>  lib/intel_batchbuffer.c |   12 ++++++
>  lib/intel_batchbuffer.h |    3 ++
>  lib/intel_reg.h         |    3 ++
>  lib/rendercopy_gen8.c   |   97 ++++++++++++++++++++++++++++++++++++++++++++---
>  5 files changed, 123 insertions(+), 5 deletions(-)
> 
> diff --git a/lib/gen8_render.h b/lib/gen8_render.h
> index fffc100..4d5f7ba 100644
> --- a/lib/gen8_render.h
> +++ b/lib/gen8_render.h
> @@ -83,6 +83,19 @@
>  /* Random shifts */
>  #define GEN8_3DSTATE_PS_MAX_THREADS_SHIFT 23
>  
> +/* GEN8+ resource streamer */
> +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC       (0x7919 << 16) /* GEN8 */
> +# define BINDING_TABLE_POOL_ENABLE              0x0800
> +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS          (0x7843 << 16) /* GEN8 */
> +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS          (0x7844 << 16) /* GEN8 */
> +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS          (0x7845 << 16) /* GEN8 */
> +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS          (0x7846 << 16) /* GEN8 */
> +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS          (0x7847 << 16) /* GEN8 */
> +
> +/* Toggling RS needs PIPE_CONTROL SC invalidate */
> +#define _3DSTATE_PIPE_CONTROL		((0x3 << 29) | (3 << 27) | (2 << 24))
> +#define PIPE_CONTROL_STATE_CACHE_INVALIDATE	(1 << 2)
> +
>  /* Shamelessly ripped from mesa */
>  struct gen8_surface_state
>  {
> diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
> index e868922..26c9b89 100644
> --- a/lib/intel_batchbuffer.c
> +++ b/lib/intel_batchbuffer.c
> @@ -81,6 +81,14 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch)
>  	memset(batch->buffer, 0, sizeof(batch->buffer));
>  
>  	batch->ptr = batch->buffer;
> +
> +	if (batch->hw_bt_pool_bo != NULL) {
> +		drm_intel_bo_unreference(batch->hw_bt_pool_bo);
> +		batch->hw_bt_pool_bo = NULL;
> +	}
> +
> +	batch->hw_bt_pool_bo = drm_intel_bo_alloc(batch->bufmgr, "hw_bt",
> +						  131072, 4096);
>  }
>  
>  /**
> @@ -116,6 +124,10 @@ intel_batchbuffer_free(struct intel_batchbuffer *batch)
>  {
>  	drm_intel_bo_unreference(batch->bo);
>  	batch->bo = NULL;
> +
> +	drm_intel_bo_unreference(batch->hw_bt_pool_bo);
> +	batch->hw_bt_pool_bo = NULL;
> +
>  	free(batch);
>  }
>  
> diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
> index 49dbcf0..2a516bc 100644
> --- a/lib/intel_batchbuffer.h
> +++ b/lib/intel_batchbuffer.h
> @@ -18,6 +18,9 @@ struct intel_batchbuffer {
>  	uint8_t buffer[BATCH_SZ];
>  	uint8_t *ptr;
>  	uint8_t *state;
> +
> +	drm_intel_bo *hw_bt_pool_bo;
> +	int use_resource_streamer;
>  };
>  
>  struct intel_batchbuffer *intel_batchbuffer_alloc(drm_intel_bufmgr *bufmgr,
> diff --git a/lib/intel_reg.h b/lib/intel_reg.h
> index 4b3a102..1cb9a29 100644
> --- a/lib/intel_reg.h
> +++ b/lib/intel_reg.h
> @@ -2560,6 +2560,9 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>  #define MI_BATCH_NON_SECURE		(1)
>  #define MI_BATCH_NON_SECURE_I965	(1 << 8)
>  
> +/* Resource Streamer */
> +#define MI_RS_CONTROL           (0x6 << 23)
> +
>  #define MAX_DISPLAY_PIPES	2
>  
>  typedef enum {
> diff --git a/lib/rendercopy_gen8.c b/lib/rendercopy_gen8.c
> index 6f5a698..b0dd0f3 100644
> --- a/lib/rendercopy_gen8.c
> +++ b/lib/rendercopy_gen8.c
> @@ -169,12 +169,14 @@ static void
>  gen6_render_flush(struct intel_batchbuffer *batch,
>  		  drm_intel_context *context, uint32_t batch_end)
>  {
> -	int ret;
> +	int ret = 0;
> +	uint32_t flags = 0;
>  
>  	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
> +	flags = batch->use_resource_streamer ? I915_EXEC_RESOURCE_STREAMER : 0;
>  	if (ret == 0)
>  		ret = drm_intel_gem_bo_context_exec(batch->bo, context,
> -						    batch_end, 0);
> +						    batch_end, flags | I915_EXEC_RENDER);
>  	assert(ret == 0);
>  }
>  
> @@ -228,18 +230,83 @@ gen8_bind_buf(struct intel_batchbuffer *batch, struct igt_buf *buf,
>  	return offset;
>  }
>  
> +static void
> +gen8_hw_binding_table(struct intel_batchbuffer *batch, bool enable)
> +{
> +	if (!enable) {
> +		OUT_BATCH(MI_RS_CONTROL | 0x0);
> +
> +		OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC | (4 - 2));
> +		/* binding table pool base address */
> +		OUT_BATCH(0);
> +		OUT_BATCH(0);
> +		/* Upper bound */
> +		OUT_BATCH(0);
> +

WaCsStallBeforeStateCacheInvalidate says you need to emit another
PIPE_CONTROL here with CS stall bit set. And thanks to the other
restrictions that PIPE_CONTORL needs one of the other magic bits also
set. Stall at scoreboard perhaps? I think that's what we set in the
kernel usually.

> +		OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
> +		OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE);
> +		OUT_BATCH(0);
> +		OUT_BATCH(0);
> +		OUT_BATCH(0);
> +		OUT_BATCH(0);
> +
> +		return;
> +        }
> +	OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC | (4 - 2));
> +
> +	/* binding table pool base address */
> +	OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0,
> +                  BINDING_TABLE_POOL_ENABLE);
> +	OUT_BATCH(0);

I suppose it doesn't matter much which domain we use here. But the
hardware writes to the BT pool so maybe you should set write domain
as well?

> +
> +	/* Upper bound */
> +	OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0,
> +                  batch->hw_bt_pool_bo->size);

AFAICS on bdw this should be just the bt pool size and not a reloc.

> +

Again need another CS stall PIPE_CONTROL.

> +	OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
> +	OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE);
> +	OUT_BATCH(0);
> +	OUT_BATCH(0);
> +	OUT_BATCH(0);
> +	OUT_BATCH(0);
> +}
> +
> +static uint32_t
> +gen8_rs_bind_surfaces(struct intel_batchbuffer *batch,
> +		      struct igt_buf *src,
> +		      struct igt_buf *dst,
> +		      uint32_t *surf0, uint32_t *surf1)
> +{
> +	*surf0 = gen8_bind_buf(batch, dst, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 1);
> +	*surf1 = gen8_bind_buf(batch, src, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 0);
> +
> +	return 0;
> +}
> +
> +static void
> +gen8_rs_edit_surfaces(struct intel_batchbuffer *batch,
> +		      uint32_t surf0, uint32_t surf1)
> +{
> +	OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_EDIT_PS | (4 - 2));
> +	OUT_BATCH(0x3);

So I take it there's no need to clear the entries first?

> +	{
> +		OUT_BATCH(0 << 16 | surf0 >> 6);
> +		OUT_BATCH(1 << 16 | surf1 >> 6);
> +	}
> +}
> +
>  static uint32_t
>  gen8_bind_surfaces(struct intel_batchbuffer *batch,
>  		   struct igt_buf *src,
>  		   struct igt_buf *dst)
>  {
> -	uint32_t *binding_table, offset;
> +	uint32_t offset = 0;
> +	uint32_t *binding_table;
>  
>  	binding_table = batch_alloc(batch, 8, 32);
>  	offset = batch_offset(batch, binding_table);
>  	annotation_add_state(&aub_annotations, AUB_TRACE_BINDING_TABLE,
>  			     offset, 8);
> -
>  	binding_table[0] =
>  		gen8_bind_buf(batch, dst, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 1);
>  	binding_table[1] =
> @@ -512,6 +579,9 @@ gen7_emit_push_constants(struct intel_batchbuffer *batch) {
>  
>  static void
>  gen8_emit_state_base_address(struct intel_batchbuffer *batch) {
> +	if (batch->use_resource_streamer)
> +		OUT_BATCH(MI_RS_CONTROL | 0x0);
> +
>  	OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (16 - 2));
>  
>  	/* general */
> @@ -546,6 +616,9 @@ gen8_emit_state_base_address(struct intel_batchbuffer *batch) {
>  	OUT_BATCH(0xfffff000 | 1);
>  	/* intruction buffer size */
>  	OUT_BATCH(1 << 12 | 1);
> +
> +	if (batch->use_resource_streamer)
> +		OUT_BATCH(MI_RS_CONTROL | 0x1);
>  }
>  
>  static void
> @@ -918,6 +991,7 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
>  	uint32_t scissor_state;
>  	uint32_t vertex_buffer;
>  	uint32_t batch_end;
> +	uint32_t surf0 = 0, surf1 = 1;
>  
>  	intel_batchbuffer_flush_with_context(batch, context);
>  
> @@ -927,7 +1001,11 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
>  
>  	annotation_init(&aub_annotations);
>  
> -	ps_binding_table  = gen8_bind_surfaces(batch, src, dst);
> +	if (batch->use_resource_streamer)
> +		ps_binding_table = gen8_rs_bind_surfaces(batch, src, dst,
> +							 &surf0, &surf1);
> +	else
> +		ps_binding_table  = gen8_bind_surfaces(batch, src, dst);
>  	ps_sampler_state  = gen8_create_sampler(batch);
>  	ps_kernel_off = gen8_fill_ps(batch, ps_kernel, sizeof(ps_kernel));
>  	vertex_buffer = gen7_fill_vertex_buffer_data(batch, src,
> @@ -955,6 +1033,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
>  
>  	gen8_emit_state_base_address(batch);
>  
> +	if (batch->use_resource_streamer)
> +               gen8_hw_binding_table(batch, true);
> +
>  	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC);
>  	OUT_BATCH(viewport.cc_state);
>  	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
> @@ -978,6 +1059,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
>  
>  	gen8_emit_sf(batch);
>  
> +	if (batch->use_resource_streamer)
> +		gen8_rs_edit_surfaces(batch, surf0, surf1);
> +
>  	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS);
>  	OUT_BATCH(ps_binding_table);
>  
> @@ -1001,6 +1085,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
>  	gen8_emit_vf_topology(batch);
>  	gen8_emit_primitive(batch, vertex_buffer);
>  
> +	if (batch->use_resource_streamer)
> +		gen8_hw_binding_table(batch, false);
> +
>  	OUT_BATCH(MI_BATCH_BUFFER_END);
>  
>  	batch_end = batch_align(batch, 8);
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables
  2014-05-06 21:38   ` Ville Syrjälä
@ 2014-05-06 22:09     ` Abdiel Janulgue
  0 siblings, 0 replies; 16+ messages in thread
From: Abdiel Janulgue @ 2014-05-06 22:09 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx

On Wednesday, May 07, 2014 12:38:04 AM Ville Syrjälä wrote:
> On Tue, May 06, 2014 at 10:48:02PM +0300, Abdiel Janulgue wrote:
> > Use on-chip hw-binding table generator to generate binding
> > tables when the test emits SURFACE_STATES packets. The hw generates
> > these binding table packets internally so we don't have to
> > allocate space on the batchbuffer.
> > 
> > Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
> > ---
> > 
> >  lib/gen8_render.h       |   13 +++++++
> >  lib/intel_batchbuffer.c |   12 ++++++
> >  lib/intel_batchbuffer.h |    3 ++
> >  lib/intel_reg.h         |    3 ++
> >  lib/rendercopy_gen8.c   |   97
> >  ++++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 123
> >  insertions(+), 5 deletions(-)
> > 
> > diff --git a/lib/gen8_render.h b/lib/gen8_render.h
> > index fffc100..4d5f7ba 100644
> > --- a/lib/gen8_render.h
> > +++ b/lib/gen8_render.h
> > @@ -83,6 +83,19 @@
> > 
> >  /* Random shifts */
> >  #define GEN8_3DSTATE_PS_MAX_THREADS_SHIFT 23
> > 
> > +/* GEN8+ resource streamer */
> > +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC       (0x7919 << 16) /*
> > GEN8 */ +# define BINDING_TABLE_POOL_ENABLE              0x0800
> > +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS          (0x7843 << 16) /*
> > GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS          (0x7844 <<
> > 16) /* GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS         
> > (0x7845 << 16) /* GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS    
> >      (0x7846 << 16) /* GEN8 */ +#define
> > GEN8_3DSTATE_BINDING_TABLE_EDIT_PS          (0x7847 << 16) /* GEN8 */ +
> > +/* Toggling RS needs PIPE_CONTROL SC invalidate */
> > +#define _3DSTATE_PIPE_CONTROL		((0x3 << 29) | (3 << 27) | (2 << 24))
> > +#define PIPE_CONTROL_STATE_CACHE_INVALIDATE	(1 << 2)
> > +
> > 
> >  /* Shamelessly ripped from mesa */
> >  struct gen8_surface_state
> >  {
> > 
> > diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
> > index e868922..26c9b89 100644
> > --- a/lib/intel_batchbuffer.c
> > +++ b/lib/intel_batchbuffer.c
> > @@ -81,6 +81,14 @@ intel_batchbuffer_reset(struct intel_batchbuffer
> > *batch)
> > 
> >  	memset(batch->buffer, 0, sizeof(batch->buffer));
> >  	
> >  	batch->ptr = batch->buffer;
> > 
> > +
> > +	if (batch->hw_bt_pool_bo != NULL) {
> > +		drm_intel_bo_unreference(batch->hw_bt_pool_bo);
> > +		batch->hw_bt_pool_bo = NULL;
> > +	}
> > +
> > +	batch->hw_bt_pool_bo = drm_intel_bo_alloc(batch->bufmgr, "hw_bt",
> > +						  131072, 4096);
> > 
> >  }
> >  
> >  /**
> > 
> > @@ -116,6 +124,10 @@ intel_batchbuffer_free(struct intel_batchbuffer
> > *batch)> 
> >  {
> >  
> >  	drm_intel_bo_unreference(batch->bo);
> >  	batch->bo = NULL;
> > 
> > +
> > +	drm_intel_bo_unreference(batch->hw_bt_pool_bo);
> > +	batch->hw_bt_pool_bo = NULL;
> > +
> > 
> >  	free(batch);
> >  
> >  }
> > 
> > diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
> > index 49dbcf0..2a516bc 100644
> > --- a/lib/intel_batchbuffer.h
> > +++ b/lib/intel_batchbuffer.h
> > @@ -18,6 +18,9 @@ struct intel_batchbuffer {
> > 
> >  	uint8_t buffer[BATCH_SZ];
> >  	uint8_t *ptr;
> >  	uint8_t *state;
> > 
> > +
> > +	drm_intel_bo *hw_bt_pool_bo;
> > +	int use_resource_streamer;
> > 
> >  };
> >  
> >  struct intel_batchbuffer *intel_batchbuffer_alloc(drm_intel_bufmgr
> >  *bufmgr,
> > 
> > diff --git a/lib/intel_reg.h b/lib/intel_reg.h
> > index 4b3a102..1cb9a29 100644
> > --- a/lib/intel_reg.h
> > +++ b/lib/intel_reg.h
> > @@ -2560,6 +2560,9 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> > SOFTWARE.> 
> >  #define MI_BATCH_NON_SECURE		(1)
> >  #define MI_BATCH_NON_SECURE_I965	(1 << 8)
> > 
> > +/* Resource Streamer */
> > +#define MI_RS_CONTROL           (0x6 << 23)
> > +
> > 
> >  #define MAX_DISPLAY_PIPES	2
> >  
> >  typedef enum {
> > 
> > diff --git a/lib/rendercopy_gen8.c b/lib/rendercopy_gen8.c
> > index 6f5a698..b0dd0f3 100644
> > --- a/lib/rendercopy_gen8.c
> > +++ b/lib/rendercopy_gen8.c
> > @@ -169,12 +169,14 @@ static void
> > 
> >  gen6_render_flush(struct intel_batchbuffer *batch,
> >  
> >  		  drm_intel_context *context, uint32_t batch_end)
> >  
> >  {
> > 
> > -	int ret;
> > +	int ret = 0;
> > +	uint32_t flags = 0;
> > 
> >  	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
> > 
> > +	flags = batch->use_resource_streamer ? I915_EXEC_RESOURCE_STREAMER : 0;
> > 
> >  	if (ret == 0)
> >  	
> >  		ret = drm_intel_gem_bo_context_exec(batch->bo, context,
> > 
> > -						    batch_end, 0);
> > +						    batch_end, flags | I915_EXEC_RENDER);
> > 
> >  	assert(ret == 0);
> >  
> >  }
> > 
> > @@ -228,18 +230,83 @@ gen8_bind_buf(struct intel_batchbuffer *batch,
> > struct igt_buf *buf,> 
> >  	return offset;
> >  
> >  }
> > 
> > +static void
> > +gen8_hw_binding_table(struct intel_batchbuffer *batch, bool enable)
> > +{
> > +	if (!enable) {
> > +		OUT_BATCH(MI_RS_CONTROL | 0x0);
> > +
> > +		OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC | (4 - 2));
> > +		/* binding table pool base address */
> > +		OUT_BATCH(0);
> > +		OUT_BATCH(0);
> > +		/* Upper bound */
> > +		OUT_BATCH(0);
> > +
> 
> WaCsStallBeforeStateCacheInvalidate says you need to emit another
> PIPE_CONTROL here with CS stall bit set. And thanks to the other
> restrictions that PIPE_CONTORL needs one of the other magic bits also
> set. Stall at scoreboard perhaps? I think that's what we set in the
> kernel usually.

It used to have the CS stall bit. But it seems to work without it. In any
case, you might be right. It's a good idea to put in back in.
> 
> > +		OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
> > +		OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE);
> > +		OUT_BATCH(0);
> > +		OUT_BATCH(0);
> > +		OUT_BATCH(0);
> > +		OUT_BATCH(0);
> > +
> > +		return;
> > +        }
> > +	OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC | (4 - 2));
> > +
> > +	/* binding table pool base address */
> > +	OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0,
> > +                  BINDING_TABLE_POOL_ENABLE);
> > +	OUT_BATCH(0);
> 
> I suppose it doesn't matter much which domain we use here. But the
> hardware writes to the BT pool so maybe you should set write domain
> as well?
> 
> > +
> > +	/* Upper bound */
> > +	OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0,
> > +                  batch->hw_bt_pool_bo->size);
> 
> AFAICS on bdw this should be just the bt pool size and not a reloc.
> 
> > +
> 
> Again need another CS stall PIPE_CONTROL.
> 
> > +	OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
> > +	OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE);
> > +	OUT_BATCH(0);
> > +	OUT_BATCH(0);
> > +	OUT_BATCH(0);
> > +	OUT_BATCH(0);
> > +}
> > +
> > +static uint32_t
> > +gen8_rs_bind_surfaces(struct intel_batchbuffer *batch,
> > +		      struct igt_buf *src,
> > +		      struct igt_buf *dst,
> > +		      uint32_t *surf0, uint32_t *surf1)
> > +{
> > +	*surf0 = gen8_bind_buf(batch, dst, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM,
> > 1);
> > +	*surf1 = gen8_bind_buf(batch, src, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM,
> > 0);
> > +
> > +	return 0;
> > +}
> > +
> > +static void
> > +gen8_rs_edit_surfaces(struct intel_batchbuffer *batch,
> > +		      uint32_t surf0, uint32_t surf1)
> > +{
> > +	OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_EDIT_PS | (4 - 2));
> > +	OUT_BATCH(0x3);
> 
> So I take it there's no need to clear the entries first?

I don't think it's necessary on this case. The clear is only
neded if the binding table pool is re-used from the previous
batch - which might contain stale entries. At least it causes
hungs in Mesa. In this test, we are allocating the pool same
time as the batch is allocated.


> 
> > +	{
> > +		OUT_BATCH(0 << 16 | surf0 >> 6);
> > +		OUT_BATCH(1 << 16 | surf1 >> 6);
> > +	}
> > +}
> > +
> > 
> >  static uint32_t
> >  gen8_bind_surfaces(struct intel_batchbuffer *batch,
> >  
> >  		   struct igt_buf *src,
> >  		   struct igt_buf *dst)
> >  
> >  {
> > 
> > -	uint32_t *binding_table, offset;
> > +	uint32_t offset = 0;
> > +	uint32_t *binding_table;
> > 
> >  	binding_table = batch_alloc(batch, 8, 32);
> >  	offset = batch_offset(batch, binding_table);
> >  	annotation_add_state(&aub_annotations, AUB_TRACE_BINDING_TABLE,
> >  	
> >  			     offset, 8);
> > 
> > -
> > 
> >  	binding_table[0] =
> >  	
> >  		gen8_bind_buf(batch, dst, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 1);
> >  	
> >  	binding_table[1] =
> > 
> > @@ -512,6 +579,9 @@ gen7_emit_push_constants(struct intel_batchbuffer
> > *batch) {> 
> >  static void
> >  gen8_emit_state_base_address(struct intel_batchbuffer *batch) {
> > 
> > +	if (batch->use_resource_streamer)
> > +		OUT_BATCH(MI_RS_CONTROL | 0x0);
> > +
> > 
> >  	OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (16 - 2));
> >  	
> >  	/* general */
> > 
> > @@ -546,6 +616,9 @@ gen8_emit_state_base_address(struct intel_batchbuffer
> > *batch) {> 
> >  	OUT_BATCH(0xfffff000 | 1);
> >  	/* intruction buffer size */
> >  	OUT_BATCH(1 << 12 | 1);
> > 
> > +
> > +	if (batch->use_resource_streamer)
> > +		OUT_BATCH(MI_RS_CONTROL | 0x1);
> > 
> >  }
> >  
> >  static void
> > 
> > @@ -918,6 +991,7 @@ void gen8_render_copyfunc(struct intel_batchbuffer
> > *batch,> 
> >  	uint32_t scissor_state;
> >  	uint32_t vertex_buffer;
> >  	uint32_t batch_end;
> > 
> > +	uint32_t surf0 = 0, surf1 = 1;
> > 
> >  	intel_batchbuffer_flush_with_context(batch, context);
> > 
> > @@ -927,7 +1001,11 @@ void gen8_render_copyfunc(struct intel_batchbuffer
> > *batch,> 
> >  	annotation_init(&aub_annotations);
> > 
> > -	ps_binding_table  = gen8_bind_surfaces(batch, src, dst);
> > +	if (batch->use_resource_streamer)
> > +		ps_binding_table = gen8_rs_bind_surfaces(batch, src, dst,
> > +							 &surf0, &surf1);
> > +	else
> > +		ps_binding_table  = gen8_bind_surfaces(batch, src, dst);
> > 
> >  	ps_sampler_state  = gen8_create_sampler(batch);
> >  	ps_kernel_off = gen8_fill_ps(batch, ps_kernel, sizeof(ps_kernel));
> >  	vertex_buffer = gen7_fill_vertex_buffer_data(batch, src,
> > 
> > @@ -955,6 +1033,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer
> > *batch,> 
> >  	gen8_emit_state_base_address(batch);
> > 
> > +	if (batch->use_resource_streamer)
> > +               gen8_hw_binding_table(batch, true);
> > +
> > 
> >  	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC);
> >  	OUT_BATCH(viewport.cc_state);
> >  	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
> > 
> > @@ -978,6 +1059,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer
> > *batch,> 
> >  	gen8_emit_sf(batch);
> > 
> > +	if (batch->use_resource_streamer)
> > +		gen8_rs_edit_surfaces(batch, surf0, surf1);
> > +
> > 
> >  	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS);
> >  	OUT_BATCH(ps_binding_table);
> > 
> > @@ -1001,6 +1085,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer
> > *batch,> 
> >  	gen8_emit_vf_topology(batch);
> >  	gen8_emit_primitive(batch, vertex_buffer);
> > 
> > +	if (batch->use_resource_streamer)
> > +		gen8_hw_binding_table(batch, false);
> > +
> > 
> >  	OUT_BATCH(MI_BATCH_BUFFER_END);
> >  	
> >  	batch_end = batch_align(batch, 8);


Thanks for the review. I'll update v2 soon to address the comments

-abdiel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables
  2014-05-06 19:48 ` [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables Abdiel Janulgue
  2014-05-06 21:38   ` Ville Syrjälä
@ 2014-05-07 11:49   ` Ville Syrjälä
  2014-05-07 11:55     ` Chris Wilson
  2014-05-07 20:59     ` Abdiel Janulgue
  1 sibling, 2 replies; 16+ messages in thread
From: Ville Syrjälä @ 2014-05-07 11:49 UTC (permalink / raw)
  To: Abdiel Janulgue; +Cc: intel-gfx

I quickly cobbled together a hsw version of this and gave it a whirl on
one machine. Seems to work just fine here, and no lockups when switching
between hw and sw binding tables. Did you get the lockups on hsw even
with rendercopy?

Here's my hsw version:

>From 17eeb8021815e2c18d6ba9b2185a37904296c2d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 7 May 2014 12:33:01 +0300
Subject: [PATCH] rendercopy: use resource streamer on hsw
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 lib/gen7_render.h     |  16 +++++++-
 lib/rendercopy_gen7.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 115 insertions(+), 4 deletions(-)

diff --git a/lib/gen7_render.h b/lib/gen7_render.h
index 1661d4c..58a88ef 100644
--- a/lib/gen7_render.h
+++ b/lib/gen7_render.h
@@ -155,8 +155,11 @@
 #define GEN7_PIPE_CONTROL_IS_FLUSH      (1 << 11)
 #define GEN7_PIPE_CONTROL_TC_FLUSH      (1 << 10)
 #define GEN7_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
-#define GEN7_PIPE_CONTROL_GLOBAL_GTT    (1 << 2)
-#define GEN7_PIPE_CONTROL_LOCAL_PGTT    (0 << 2)
+#define GEN7_PIPE_CONTROL_FLUSH		(1 << 7)
+#define GEN7_PIPE_CONTROL_DC_FLUSH      (1 << 5)
+#define GEN7_PIPE_CONTROL_VF_INVALIDATE (1 << 4)
+#define GEN7_PIPE_CONTROL_CC_INVALIDATE (1 << 2)
+#define GEN7_PIPE_CONTROL_SC_INVALIDATE (1 << 2)
 #define GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD   (1 << 1)
 #define GEN7_PIPE_CONTROL_DEPTH_CACHE_FLUSH	(1 << 0)
 
@@ -1361,4 +1364,13 @@ typedef enum {
 	EXTEND_COUNT
 } sampler_extend_t;
 
+/* HSW+ resource streamer */
+#define HSW_3DSTATE_BINDING_TABLE_POOL_ALLOC	GEN7_3D(3, 1, 0x19)
+# define BINDING_TABLE_POOL_ENABLE		(1 << 11)
+#define HSW_3DSTATE_BINDING_TABLE_EDIT_VS	GEN7_3D(3, 0, 0x43)
+#define HSW_3DSTATE_BINDING_TABLE_EDIT_GS	GEN7_3D(3, 0, 0x44)
+#define HSW_3DSTATE_BINDING_TABLE_EDIT_HS	GEN7_3D(3, 0, 0x45)
+#define HSW_3DSTATE_BINDING_TABLE_EDIT_DS	GEN7_3D(3, 0, 0x46)
+#define HSW_3DSTATE_BINDING_TABLE_EDIT_PS	GEN7_3D(3, 0, 0x47)
+
 #endif
diff --git a/lib/rendercopy_gen7.c b/lib/rendercopy_gen7.c
index 5131d8f..4efccb9 100644
--- a/lib/rendercopy_gen7.c
+++ b/lib/rendercopy_gen7.c
@@ -21,6 +21,9 @@
 #include "gen7_render.h"
 #include "intel_reg.h"
 
+#ifndef I915_EXEC_RESOURCE_STREAMER
+#define I915_EXEC_RESOURCE_STREAMER (1<<13)
+#endif
 
 static const uint32_t ps_kernel[][4] = {
 	{ 0x0080005a, 0x2e2077bd, 0x000000c0, 0x008d0040 },
@@ -73,11 +76,14 @@ gen7_render_flush(struct intel_batchbuffer *batch,
 		  drm_intel_context *context, uint32_t batch_end)
 {
 	int ret;
+	uint32_t flags = I915_EXEC_RENDER;
 
 	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
+	if (batch->use_resource_streamer)
+		flags |= I915_EXEC_RESOURCE_STREAMER;
 	if (ret == 0)
 		ret = drm_intel_gem_bo_context_exec(batch->bo, context,
-						    batch_end, 0);
+						    batch_end, flags);
 	assert(ret == 0);
 }
 
@@ -219,6 +225,75 @@ static void gen7_emit_vertex_buffer(struct intel_batchbuffer *batch,
 	OUT_BATCH(0);
 }
 
+static void
+gen7_hw_binding_table(struct intel_batchbuffer *batch, bool enable)
+{
+	if (!enable) {
+		OUT_BATCH(MI_RS_CONTROL | 0x0);
+
+		OUT_BATCH(HSW_3DSTATE_BINDING_TABLE_POOL_ALLOC | (3 - 2));
+		/* binding table pool base address */
+		OUT_BATCH(3 << 5);
+		/* Upper bound */
+		OUT_BATCH(0);
+
+		OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
+		OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL | GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
+		OUT_BATCH(0);
+		OUT_BATCH(0);
+
+		OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
+		OUT_BATCH(GEN7_PIPE_CONTROL_SC_INVALIDATE);
+		OUT_BATCH(0);
+		OUT_BATCH(0);
+
+		return;
+        }
+	OUT_BATCH(HSW_3DSTATE_BINDING_TABLE_POOL_ALLOC | (3 - 2));
+
+	/* binding table pool base address */
+	OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0,
+                  BINDING_TABLE_POOL_ENABLE | (3 << 5));
+
+	/* Upper bound */
+	OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0,
+                  batch->hw_bt_pool_bo->size);
+
+	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
+	OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL | GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
+	OUT_BATCH(GEN7_PIPE_CONTROL_SC_INVALIDATE);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+static uint32_t
+gen7_rs_bind_surfaces(struct intel_batchbuffer *batch,
+		      struct igt_buf *src,
+		      struct igt_buf *dst,
+		      uint32_t *surf0, uint32_t *surf1)
+{
+	*surf0 = gen7_bind_buf(batch, dst, GEN7_SURFACEFORMAT_B8G8R8A8_UNORM, 1);
+	*surf1 = gen7_bind_buf(batch, src, GEN7_SURFACEFORMAT_B8G8R8A8_UNORM, 0);
+
+	return 0;
+}
+
+static void
+gen7_rs_edit_surfaces(struct intel_batchbuffer *batch,
+		      uint32_t surf0, uint32_t surf1)
+{
+	OUT_BATCH(HSW_3DSTATE_BINDING_TABLE_EDIT_PS | (4 - 2));
+	OUT_BATCH(0x3);
+	{
+		OUT_BATCH(0 << 16 | surf0 >> 5);
+		OUT_BATCH(1 << 16 | surf1 >> 5);
+	}
+}
+
 static uint32_t
 gen7_bind_surfaces(struct intel_batchbuffer *batch,
 		   struct igt_buf *src,
@@ -241,8 +316,19 @@ gen7_emit_binding_table(struct intel_batchbuffer *batch,
 			struct igt_buf *src,
 			struct igt_buf *dst)
 {
+	uint32_t surf0 = 0, surf1 = 1;
+	uint32_t binding_table;
+
+	if (batch->use_resource_streamer) {
+		binding_table = gen7_rs_bind_surfaces(batch, src, dst,
+							 &surf0, &surf1);
+		gen7_rs_edit_surfaces(batch, surf0, surf1);
+	} else {
+		binding_table = gen7_bind_surfaces(batch, src, dst);
+	}
+
 	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
-	OUT_BATCH(gen7_bind_surfaces(batch, src, dst));
+	OUT_BATCH(binding_table);
 }
 
 static void
@@ -273,6 +359,9 @@ gen7_create_blend_state(struct intel_batchbuffer *batch)
 static void
 gen7_emit_state_base_address(struct intel_batchbuffer *batch)
 {
+	if (batch->use_resource_streamer)
+		OUT_BATCH(MI_RS_CONTROL | 0x0);
+
 	OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
 	OUT_BATCH(0);
 	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
@@ -284,6 +373,9 @@ gen7_emit_state_base_address(struct intel_batchbuffer *batch)
 	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
 	OUT_BATCH(0);
 	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
+
+	if (batch->use_resource_streamer)
+		OUT_BATCH(MI_RS_CONTROL | 0x1);
 }
 
 static uint32_t
@@ -545,6 +637,10 @@ void gen7_render_copyfunc(struct intel_batchbuffer *batch,
 	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_3D);
 
 	gen7_emit_state_base_address(batch);
+
+	if (batch->use_resource_streamer)
+		gen7_hw_binding_table(batch, true);
+
 	gen7_emit_multisample(batch);
 	gen7_emit_urb(batch);
 	gen7_emit_vs(batch);
@@ -576,6 +672,9 @@ void gen7_render_copyfunc(struct intel_batchbuffer *batch,
         OUT_BATCH(0);   /* start instance location */
         OUT_BATCH(0);   /* index buffer offset, ignored */
 
+	if (batch->use_resource_streamer)
+		gen7_hw_binding_table(batch, false);
+
 	OUT_BATCH(MI_BATCH_BUFFER_END);
 
 	batch_end = batch->ptr - batch->buffer;
-- 
1.8.3.2

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables
  2014-05-07 11:49   ` Ville Syrjälä
@ 2014-05-07 11:55     ` Chris Wilson
  2014-05-07 20:59     ` Abdiel Janulgue
  1 sibling, 0 replies; 16+ messages in thread
From: Chris Wilson @ 2014-05-07 11:55 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx

On Wed, May 07, 2014 at 02:49:31PM +0300, Ville Syrjälä wrote:
> I quickly cobbled together a hsw version of this and gave it a whirl on
> one machine. Seems to work just fine here, and no lockups when switching
> between hw and sw binding tables. Did you get the lockups on hsw even
> with rendercopy?

This intrigues me that we should do a context continuation rendercpy
(with the state setup split across multiple batches) and lots of context
switching.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables
  2014-05-07 11:49   ` Ville Syrjälä
  2014-05-07 11:55     ` Chris Wilson
@ 2014-05-07 20:59     ` Abdiel Janulgue
  2014-05-08  9:54       ` Ville Syrjälä
  1 sibling, 1 reply; 16+ messages in thread
From: Abdiel Janulgue @ 2014-05-07 20:59 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx

On Wednesday, May 07, 2014 02:49:31 PM Ville Syrjälä wrote:
> I quickly cobbled together a hsw version of this and gave it a whirl on
> one machine. Seems to work just fine here, and no lockups when switching
> between hw and sw binding tables. Did you get the lockups on hsw even
> with rendercopy?
> 
> Here's my hsw version:
> 
> 
> +static void
> +gen7_hw_binding_table(struct intel_batchbuffer *batch, bool enable)
> +{
> +	if (!enable) {
> +		OUT_BATCH(MI_RS_CONTROL | 0x0);
> +
> +		OUT_BATCH(HSW_3DSTATE_BINDING_TABLE_POOL_ALLOC | (3 - 2));
> +		/* binding table pool base address */

This bit I missed and the source of my troubles for the past few months.

> +		OUT_BATCH(3 << 5);

Yep, I confirm toggling on HSW does work quite well now. I'll now update the 
patches to include HSW path on the kernel. I also take back my previous 
statement that RS is broken on HSW! :)

Thanks a lot Ville!


> +		/* Upper bound */
> +		OUT_BATCH(0);
> +
> +		OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
> +		OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL |
> GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD); +		OUT_BATCH(0);
> +		OUT_BATCH(0);
> +
> +		OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
> +		OUT_BATCH(GEN7_PIPE_CONTROL_SC_INVALIDATE);
> +		OUT_BATCH(0);
> +		OUT_BATCH(0);
> +
> +		return;
> +        }

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables
  2014-05-07 20:59     ` Abdiel Janulgue
@ 2014-05-08  9:54       ` Ville Syrjälä
  2014-05-08 10:18         ` Ville Syrjälä
  0 siblings, 1 reply; 16+ messages in thread
From: Ville Syrjälä @ 2014-05-08  9:54 UTC (permalink / raw)
  To: Abdiel Janulgue; +Cc: intel-gfx

On Wed, May 07, 2014 at 11:59:23PM +0300, Abdiel Janulgue wrote:
> On Wednesday, May 07, 2014 02:49:31 PM Ville Syrjälä wrote:
> > I quickly cobbled together a hsw version of this and gave it a whirl on
> > one machine. Seems to work just fine here, and no lockups when switching
> > between hw and sw binding tables. Did you get the lockups on hsw even
> > with rendercopy?
> > 
> > Here's my hsw version:
> > 
> > 
> > +static void
> > +gen7_hw_binding_table(struct intel_batchbuffer *batch, bool enable)
> > +{
> > +	if (!enable) {
> > +		OUT_BATCH(MI_RS_CONTROL | 0x0);
> > +
> > +		OUT_BATCH(HSW_3DSTATE_BINDING_TABLE_POOL_ALLOC | (3 - 2));
> > +		/* binding table pool base address */
> 
> This bit I missed and the source of my troubles for the past few months.
> 
> > +		OUT_BATCH(3 << 5);
> 
> Yep, I confirm toggling on HSW does work quite well now. I'll now update the 
> patches to include HSW path on the kernel. I also take back my previous 
> statement that RS is broken on HSW! :)

Excellent.

I was wondering a bit if we need to make the kernel turn off the hw
binding tables between batches, but since we now have per fd default
contexts and 3DSTATE_BINDING_TABLE_POOL_ALLOC should be saved in the
context, maybe we don't actually need to do that. Although it seems
like that would cause problems when we switch to the global default
context since we use the restore_inhibit flag there. So maybe we need
to special case the default context here and force the hw binding
tables off when switching to it.

Would be nice to have some igts for this to test how things work when
you combine rs vs. no-rs in different ways using single context,
multiple contexts, and multiple fds.

> 
> Thanks a lot Ville!

I'm glad I could help.

> 
> 
> > +		/* Upper bound */
> > +		OUT_BATCH(0);
> > +
> > +		OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
> > +		OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL |
> > GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD); +		OUT_BATCH(0);
> > +		OUT_BATCH(0);
> > +
> > +		OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
> > +		OUT_BATCH(GEN7_PIPE_CONTROL_SC_INVALIDATE);
> > +		OUT_BATCH(0);
> > +		OUT_BATCH(0);
> > +
> > +		return;
> > +        }

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables
  2014-05-08  9:54       ` Ville Syrjälä
@ 2014-05-08 10:18         ` Ville Syrjälä
  2014-05-12 16:40           ` Daniel Vetter
  0 siblings, 1 reply; 16+ messages in thread
From: Ville Syrjälä @ 2014-05-08 10:18 UTC (permalink / raw)
  To: Abdiel Janulgue; +Cc: intel-gfx

On Thu, May 08, 2014 at 12:54:47PM +0300, Ville Syrjälä wrote:
> On Wed, May 07, 2014 at 11:59:23PM +0300, Abdiel Janulgue wrote:
> > On Wednesday, May 07, 2014 02:49:31 PM Ville Syrjälä wrote:
> > > I quickly cobbled together a hsw version of this and gave it a whirl on
> > > one machine. Seems to work just fine here, and no lockups when switching
> > > between hw and sw binding tables. Did you get the lockups on hsw even
> > > with rendercopy?
> > > 
> > > Here's my hsw version:
> > > 
> > > 
> > > +static void
> > > +gen7_hw_binding_table(struct intel_batchbuffer *batch, bool enable)
> > > +{
> > > +	if (!enable) {
> > > +		OUT_BATCH(MI_RS_CONTROL | 0x0);
> > > +
> > > +		OUT_BATCH(HSW_3DSTATE_BINDING_TABLE_POOL_ALLOC | (3 - 2));
> > > +		/* binding table pool base address */
> > 
> > This bit I missed and the source of my troubles for the past few months.
> > 
> > > +		OUT_BATCH(3 << 5);
> > 
> > Yep, I confirm toggling on HSW does work quite well now. I'll now update the 
> > patches to include HSW path on the kernel. I also take back my previous 
> > statement that RS is broken on HSW! :)
> 
> Excellent.
> 
> I was wondering a bit if we need to make the kernel turn off the hw
> binding tables between batches, but since we now have per fd default
> contexts and 3DSTATE_BINDING_TABLE_POOL_ALLOC should be saved in the
> context, maybe we don't actually need to do that. Although it seems
> like that would cause problems when we switch to the global default
> context since we use the restore_inhibit flag there. So maybe we need
> to special case the default context here and force the hw binding
> tables off when switching to it.

Ah actually we still inhibit restore even with the per-file default
contexts. So I guess it's going to be a problemn whenever we switch
to any default context and the hw binding tables were left enabled.

So I guess there are two options: either drop the restore_inhibit flag
for default contexts (I kind of like this idea but maybe other people
hate it), or have the kernel turn off the hw binding tables when
switching to any default context. The latter would also imply that if
user space wants to use the resource streamer with the default context,
it has to turn the binding tables on at the start of every batch since
it can't know whether the kernel turned them off.

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables
  2014-05-08 10:18         ` Ville Syrjälä
@ 2014-05-12 16:40           ` Daniel Vetter
  2014-05-16 11:36             ` Chris Wilson
  0 siblings, 1 reply; 16+ messages in thread
From: Daniel Vetter @ 2014-05-12 16:40 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx

On Thu, May 08, 2014 at 01:18:40PM +0300, Ville Syrjälä wrote:
> On Thu, May 08, 2014 at 12:54:47PM +0300, Ville Syrjälä wrote:
> > On Wed, May 07, 2014 at 11:59:23PM +0300, Abdiel Janulgue wrote:
> > > On Wednesday, May 07, 2014 02:49:31 PM Ville Syrjälä wrote:
> > > > I quickly cobbled together a hsw version of this and gave it a whirl on
> > > > one machine. Seems to work just fine here, and no lockups when switching
> > > > between hw and sw binding tables. Did you get the lockups on hsw even
> > > > with rendercopy?
> > > > 
> > > > Here's my hsw version:
> > > > 
> > > > 
> > > > +static void
> > > > +gen7_hw_binding_table(struct intel_batchbuffer *batch, bool enable)
> > > > +{
> > > > +	if (!enable) {
> > > > +		OUT_BATCH(MI_RS_CONTROL | 0x0);
> > > > +
> > > > +		OUT_BATCH(HSW_3DSTATE_BINDING_TABLE_POOL_ALLOC | (3 - 2));
> > > > +		/* binding table pool base address */
> > > 
> > > This bit I missed and the source of my troubles for the past few months.
> > > 
> > > > +		OUT_BATCH(3 << 5);
> > > 
> > > Yep, I confirm toggling on HSW does work quite well now. I'll now update the 
> > > patches to include HSW path on the kernel. I also take back my previous 
> > > statement that RS is broken on HSW! :)
> > 
> > Excellent.
> > 
> > I was wondering a bit if we need to make the kernel turn off the hw
> > binding tables between batches, but since we now have per fd default
> > contexts and 3DSTATE_BINDING_TABLE_POOL_ALLOC should be saved in the
> > context, maybe we don't actually need to do that. Although it seems
> > like that would cause problems when we switch to the global default
> > context since we use the restore_inhibit flag there. So maybe we need
> > to special case the default context here and force the hw binding
> > tables off when switching to it.
> 
> Ah actually we still inhibit restore even with the per-file default
> contexts. So I guess it's going to be a problemn whenever we switch
> to any default context and the hw binding tables were left enabled.
> 
> So I guess there are two options: either drop the restore_inhibit flag
> for default contexts (I kind of like this idea but maybe other people
> hate it), or have the kernel turn off the hw binding tables when
> switching to any default context. The latter would also imply that if
> user space wants to use the resource streamer with the default context,
> it has to turn the binding tables on at the start of every batch since
> it can't know whether the kernel turned them off.

I think as soon as we have the golden context stuff from Mika we could
drop our usage of restore_inhibit. We only need that to avoid the hw
getting angry if the context state is illegal afaik.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables
  2014-05-12 16:40           ` Daniel Vetter
@ 2014-05-16 11:36             ` Chris Wilson
  2014-05-16 14:27               ` Daniel Vetter
  0 siblings, 1 reply; 16+ messages in thread
From: Chris Wilson @ 2014-05-16 11:36 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Mon, May 12, 2014 at 06:40:43PM +0200, Daniel Vetter wrote:
> I think as soon as we have the golden context stuff from Mika we could
> drop our usage of restore_inhibit. We only need that to avoid the hw
> getting angry if the context state is illegal afaik.

Apart from the contexts being over 100x larger than the state required
to switch between clients, and that the current code regressed to always
update the context between every batch.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables
  2014-05-16 11:36             ` Chris Wilson
@ 2014-05-16 14:27               ` Daniel Vetter
  2014-05-16 14:45                 ` Chris Wilson
  0 siblings, 1 reply; 16+ messages in thread
From: Daniel Vetter @ 2014-05-16 14:27 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, Ville Syrjälä, intel-gfx

On Fri, May 16, 2014 at 12:36:09PM +0100, Chris Wilson wrote:
> On Mon, May 12, 2014 at 06:40:43PM +0200, Daniel Vetter wrote:
> > I think as soon as we have the golden context stuff from Mika we could
> > drop our usage of restore_inhibit. We only need that to avoid the hw
> > getting angry if the context state is illegal afaik.
> 
> Apart from the contexts being over 100x larger than the state required
> to switch between clients, and that the current code regressed to always
> update the context between every batch.

We still have the from == to early return in do_switch. That doesn't do
what it should any more?
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables
  2014-05-16 14:27               ` Daniel Vetter
@ 2014-05-16 14:45                 ` Chris Wilson
  2014-05-16 15:07                   ` Daniel Vetter
  0 siblings, 1 reply; 16+ messages in thread
From: Chris Wilson @ 2014-05-16 14:45 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Fri, May 16, 2014 at 04:27:34PM +0200, Daniel Vetter wrote:
> On Fri, May 16, 2014 at 12:36:09PM +0100, Chris Wilson wrote:
> > On Mon, May 12, 2014 at 06:40:43PM +0200, Daniel Vetter wrote:
> > > I think as soon as we have the golden context stuff from Mika we could
> > > drop our usage of restore_inhibit. We only need that to avoid the hw
> > > getting angry if the context state is illegal afaik.
> > 
> > Apart from the contexts being over 100x larger than the state required
> > to switch between clients, and that the current code regressed to always
> > update the context between every batch.
> 
> We still have the from == to early return in do_switch. That doesn't do
> what it should any more?

No.

commit 0009e46cd54324c4af20b0b52b89973b1b914167
Author: Ben Widawsky <ben@bwidawsk.net>
Date:   Fri Dec 6 14:11:02 2013 -0800

    drm/i915: Track which ring a context ran on
    
    Previously we dropped the association of a context to a ring. It is
    however very important to know which ring a context ran on (we could
    have reused the other member, but I was nitpicky).
    
    This is very important when we switch address spaces, which unlike
    context objects, do change per ring.
    
    As an example, if we have:
    
            RCS   BCS
    ctx            A
    ctx      A
    ctx      B
    ctx            B
    
    Without tracking the last ring B ran on, we wouldn't know to switch the
    address space on BCS in the last row.
    
    As a result, we no longer need to track which ring a context "belongs"
    to, as it never really made much sense anyway.
    
    Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
    Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>

was just baloney.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables
  2014-05-16 14:45                 ` Chris Wilson
@ 2014-05-16 15:07                   ` Daniel Vetter
  0 siblings, 0 replies; 16+ messages in thread
From: Daniel Vetter @ 2014-05-16 15:07 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, Ville Syrjälä, intel-gfx

On Fri, May 16, 2014 at 03:45:24PM +0100, Chris Wilson wrote:
> On Fri, May 16, 2014 at 04:27:34PM +0200, Daniel Vetter wrote:
> > On Fri, May 16, 2014 at 12:36:09PM +0100, Chris Wilson wrote:
> > > On Mon, May 12, 2014 at 06:40:43PM +0200, Daniel Vetter wrote:
> > > > I think as soon as we have the golden context stuff from Mika we could
> > > > drop our usage of restore_inhibit. We only need that to avoid the hw
> > > > getting angry if the context state is illegal afaik.
> > > 
> > > Apart from the contexts being over 100x larger than the state required
> > > to switch between clients, and that the current code regressed to always
> > > update the context between every batch.
> > 
> > We still have the from == to early return in do_switch. That doesn't do
> > what it should any more?
> 
> No.
> 
> commit 0009e46cd54324c4af20b0b52b89973b1b914167
> Author: Ben Widawsky <ben@bwidawsk.net>
> Date:   Fri Dec 6 14:11:02 2013 -0800
> 
>     drm/i915: Track which ring a context ran on
>     
>     Previously we dropped the association of a context to a ring. It is
>     however very important to know which ring a context ran on (we could
>     have reused the other member, but I was nitpicky).
>     
>     This is very important when we switch address spaces, which unlike
>     context objects, do change per ring.
>     
>     As an example, if we have:
>     
>             RCS   BCS
>     ctx            A
>     ctx      A
>     ctx      B
>     ctx            B
>     
>     Without tracking the last ring B ran on, we wouldn't know to switch the
>     address space on BCS in the last row.
>     
>     As a result, we no longer need to track which ring a context "belongs"
>     to, as it never really made much sense anyway.
>     
>     Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
>     Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> was just baloney.

Oh, totally forgotten about that fun. I'll add it as a new subtask to the
big full ppgtt jira :(

Thanks digging this out again.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2014-05-16 15:07 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-05-06 19:48 igt tests/rendercopy: Add option to test resource streamer functionality Abdiel Janulgue
2014-05-06 19:48 ` [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables Abdiel Janulgue
2014-05-06 21:38   ` Ville Syrjälä
2014-05-06 22:09     ` Abdiel Janulgue
2014-05-07 11:49   ` Ville Syrjälä
2014-05-07 11:55     ` Chris Wilson
2014-05-07 20:59     ` Abdiel Janulgue
2014-05-08  9:54       ` Ville Syrjälä
2014-05-08 10:18         ` Ville Syrjälä
2014-05-12 16:40           ` Daniel Vetter
2014-05-16 11:36             ` Chris Wilson
2014-05-16 14:27               ` Daniel Vetter
2014-05-16 14:45                 ` Chris Wilson
2014-05-16 15:07                   ` Daniel Vetter
2014-05-06 19:48 ` [PATCH 2/2] tests/gem_render_copy: Add option to test resource streamer Abdiel Janulgue
2014-05-06 20:19 ` igt tests/rendercopy: Add option to test resource streamer functionality Daniel Vetter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox