All of lore.kernel.org
 help / color / mirror / Atom feed
From: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
To: "Ville Syrjälä" <ville.syrjala@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables
Date: Wed, 07 May 2014 01:09:01 +0300	[thread overview]
Message-ID: <7457677.5dFOX2uRul@abj-desktop> (raw)
In-Reply-To: <20140506213804.GD18465@intel.com>

On Wednesday, May 07, 2014 12:38:04 AM Ville Syrjälä wrote:
> On Tue, May 06, 2014 at 10:48:02PM +0300, Abdiel Janulgue wrote:
> > Use on-chip hw-binding table generator to generate binding
> > tables when the test emits SURFACE_STATES packets. The hw generates
> > these binding table packets internally so we don't have to
> > allocate space on the batchbuffer.
> > 
> > Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
> > ---
> > 
> >  lib/gen8_render.h       |   13 +++++++
> >  lib/intel_batchbuffer.c |   12 ++++++
> >  lib/intel_batchbuffer.h |    3 ++
> >  lib/intel_reg.h         |    3 ++
> >  lib/rendercopy_gen8.c   |   97
> >  ++++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 123
> >  insertions(+), 5 deletions(-)
> > 
> > diff --git a/lib/gen8_render.h b/lib/gen8_render.h
> > index fffc100..4d5f7ba 100644
> > --- a/lib/gen8_render.h
> > +++ b/lib/gen8_render.h
> > @@ -83,6 +83,19 @@
> > 
> >  /* Random shifts */
> >  #define GEN8_3DSTATE_PS_MAX_THREADS_SHIFT 23
> > 
> > +/* GEN8+ resource streamer */
> > +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC       (0x7919 << 16) /*
> > GEN8 */ +# define BINDING_TABLE_POOL_ENABLE              0x0800
> > +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS          (0x7843 << 16) /*
> > GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS          (0x7844 <<
> > 16) /* GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS         
> > (0x7845 << 16) /* GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS    
> >      (0x7846 << 16) /* GEN8 */ +#define
> > GEN8_3DSTATE_BINDING_TABLE_EDIT_PS          (0x7847 << 16) /* GEN8 */ +
> > +/* Toggling RS needs PIPE_CONTROL SC invalidate */
> > +#define _3DSTATE_PIPE_CONTROL		((0x3 << 29) | (3 << 27) | (2 << 24))
> > +#define PIPE_CONTROL_STATE_CACHE_INVALIDATE	(1 << 2)
> > +
> > 
> >  /* Shamelessly ripped from mesa */
> >  struct gen8_surface_state
> >  {
> > 
> > diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
> > index e868922..26c9b89 100644
> > --- a/lib/intel_batchbuffer.c
> > +++ b/lib/intel_batchbuffer.c
> > @@ -81,6 +81,14 @@ intel_batchbuffer_reset(struct intel_batchbuffer
> > *batch)
> > 
> >  	memset(batch->buffer, 0, sizeof(batch->buffer));
> >  	
> >  	batch->ptr = batch->buffer;
> > 
> > +
> > +	if (batch->hw_bt_pool_bo != NULL) {
> > +		drm_intel_bo_unreference(batch->hw_bt_pool_bo);
> > +		batch->hw_bt_pool_bo = NULL;
> > +	}
> > +
> > +	batch->hw_bt_pool_bo = drm_intel_bo_alloc(batch->bufmgr, "hw_bt",
> > +						  131072, 4096);
> > 
> >  }
> >  
> >  /**
> > 
> > @@ -116,6 +124,10 @@ intel_batchbuffer_free(struct intel_batchbuffer
> > *batch)> 
> >  {
> >  
> >  	drm_intel_bo_unreference(batch->bo);
> >  	batch->bo = NULL;
> > 
> > +
> > +	drm_intel_bo_unreference(batch->hw_bt_pool_bo);
> > +	batch->hw_bt_pool_bo = NULL;
> > +
> > 
> >  	free(batch);
> >  
> >  }
> > 
> > diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
> > index 49dbcf0..2a516bc 100644
> > --- a/lib/intel_batchbuffer.h
> > +++ b/lib/intel_batchbuffer.h
> > @@ -18,6 +18,9 @@ struct intel_batchbuffer {
> > 
> >  	uint8_t buffer[BATCH_SZ];
> >  	uint8_t *ptr;
> >  	uint8_t *state;
> > 
> > +
> > +	drm_intel_bo *hw_bt_pool_bo;
> > +	int use_resource_streamer;
> > 
> >  };
> >  
> >  struct intel_batchbuffer *intel_batchbuffer_alloc(drm_intel_bufmgr
> >  *bufmgr,
> > 
> > diff --git a/lib/intel_reg.h b/lib/intel_reg.h
> > index 4b3a102..1cb9a29 100644
> > --- a/lib/intel_reg.h
> > +++ b/lib/intel_reg.h
> > @@ -2560,6 +2560,9 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> > SOFTWARE.> 
> >  #define MI_BATCH_NON_SECURE		(1)
> >  #define MI_BATCH_NON_SECURE_I965	(1 << 8)
> > 
> > +/* Resource Streamer */
> > +#define MI_RS_CONTROL           (0x6 << 23)
> > +
> > 
> >  #define MAX_DISPLAY_PIPES	2
> >  
> >  typedef enum {
> > 
> > diff --git a/lib/rendercopy_gen8.c b/lib/rendercopy_gen8.c
> > index 6f5a698..b0dd0f3 100644
> > --- a/lib/rendercopy_gen8.c
> > +++ b/lib/rendercopy_gen8.c
> > @@ -169,12 +169,14 @@ static void
> > 
> >  gen6_render_flush(struct intel_batchbuffer *batch,
> >  
> >  		  drm_intel_context *context, uint32_t batch_end)
> >  
> >  {
> > 
> > -	int ret;
> > +	int ret = 0;
> > +	uint32_t flags = 0;
> > 
> >  	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
> > 
> > +	flags = batch->use_resource_streamer ? I915_EXEC_RESOURCE_STREAMER : 0;
> > 
> >  	if (ret == 0)
> >  	
> >  		ret = drm_intel_gem_bo_context_exec(batch->bo, context,
> > 
> > -						    batch_end, 0);
> > +						    batch_end, flags | I915_EXEC_RENDER);
> > 
> >  	assert(ret == 0);
> >  
> >  }
> > 
> > @@ -228,18 +230,83 @@ gen8_bind_buf(struct intel_batchbuffer *batch,
> > struct igt_buf *buf,> 
> >  	return offset;
> >  
> >  }
> > 
> > +static void
> > +gen8_hw_binding_table(struct intel_batchbuffer *batch, bool enable)
> > +{
> > +	if (!enable) {
> > +		OUT_BATCH(MI_RS_CONTROL | 0x0);
> > +
> > +		OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC | (4 - 2));
> > +		/* binding table pool base address */
> > +		OUT_BATCH(0);
> > +		OUT_BATCH(0);
> > +		/* Upper bound */
> > +		OUT_BATCH(0);
> > +
> 
> WaCsStallBeforeStateCacheInvalidate says you need to emit another
> PIPE_CONTROL here with CS stall bit set. And thanks to the other
> restrictions that PIPE_CONTORL needs one of the other magic bits also
> set. Stall at scoreboard perhaps? I think that's what we set in the
> kernel usually.

It used to have the CS stall bit. But it seems to work without it. In any
case, you might be right. It's a good idea to put in back in.
> 
> > +		OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
> > +		OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE);
> > +		OUT_BATCH(0);
> > +		OUT_BATCH(0);
> > +		OUT_BATCH(0);
> > +		OUT_BATCH(0);
> > +
> > +		return;
> > +        }
> > +	OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC | (4 - 2));
> > +
> > +	/* binding table pool base address */
> > +	OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0,
> > +                  BINDING_TABLE_POOL_ENABLE);
> > +	OUT_BATCH(0);
> 
> I suppose it doesn't matter much which domain we use here. But the
> hardware writes to the BT pool so maybe you should set write domain
> as well?
> 
> > +
> > +	/* Upper bound */
> > +	OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0,
> > +                  batch->hw_bt_pool_bo->size);
> 
> AFAICS on bdw this should be just the bt pool size and not a reloc.
> 
> > +
> 
> Again need another CS stall PIPE_CONTROL.
> 
> > +	OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
> > +	OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE);
> > +	OUT_BATCH(0);
> > +	OUT_BATCH(0);
> > +	OUT_BATCH(0);
> > +	OUT_BATCH(0);
> > +}
> > +
> > +static uint32_t
> > +gen8_rs_bind_surfaces(struct intel_batchbuffer *batch,
> > +		      struct igt_buf *src,
> > +		      struct igt_buf *dst,
> > +		      uint32_t *surf0, uint32_t *surf1)
> > +{
> > +	*surf0 = gen8_bind_buf(batch, dst, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM,
> > 1);
> > +	*surf1 = gen8_bind_buf(batch, src, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM,
> > 0);
> > +
> > +	return 0;
> > +}
> > +
> > +static void
> > +gen8_rs_edit_surfaces(struct intel_batchbuffer *batch,
> > +		      uint32_t surf0, uint32_t surf1)
> > +{
> > +	OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_EDIT_PS | (4 - 2));
> > +	OUT_BATCH(0x3);
> 
> So I take it there's no need to clear the entries first?

I don't think it's necessary on this case. The clear is only
neded if the binding table pool is re-used from the previous
batch - which might contain stale entries. At least it causes
hungs in Mesa. In this test, we are allocating the pool same
time as the batch is allocated.


> 
> > +	{
> > +		OUT_BATCH(0 << 16 | surf0 >> 6);
> > +		OUT_BATCH(1 << 16 | surf1 >> 6);
> > +	}
> > +}
> > +
> > 
> >  static uint32_t
> >  gen8_bind_surfaces(struct intel_batchbuffer *batch,
> >  
> >  		   struct igt_buf *src,
> >  		   struct igt_buf *dst)
> >  
> >  {
> > 
> > -	uint32_t *binding_table, offset;
> > +	uint32_t offset = 0;
> > +	uint32_t *binding_table;
> > 
> >  	binding_table = batch_alloc(batch, 8, 32);
> >  	offset = batch_offset(batch, binding_table);
> >  	annotation_add_state(&aub_annotations, AUB_TRACE_BINDING_TABLE,
> >  	
> >  			     offset, 8);
> > 
> > -
> > 
> >  	binding_table[0] =
> >  	
> >  		gen8_bind_buf(batch, dst, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 1);
> >  	
> >  	binding_table[1] =
> > 
> > @@ -512,6 +579,9 @@ gen7_emit_push_constants(struct intel_batchbuffer
> > *batch) {> 
> >  static void
> >  gen8_emit_state_base_address(struct intel_batchbuffer *batch) {
> > 
> > +	if (batch->use_resource_streamer)
> > +		OUT_BATCH(MI_RS_CONTROL | 0x0);
> > +
> > 
> >  	OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (16 - 2));
> >  	
> >  	/* general */
> > 
> > @@ -546,6 +616,9 @@ gen8_emit_state_base_address(struct intel_batchbuffer
> > *batch) {> 
> >  	OUT_BATCH(0xfffff000 | 1);
> >  	/* intruction buffer size */
> >  	OUT_BATCH(1 << 12 | 1);
> > 
> > +
> > +	if (batch->use_resource_streamer)
> > +		OUT_BATCH(MI_RS_CONTROL | 0x1);
> > 
> >  }
> >  
> >  static void
> > 
> > @@ -918,6 +991,7 @@ void gen8_render_copyfunc(struct intel_batchbuffer
> > *batch,> 
> >  	uint32_t scissor_state;
> >  	uint32_t vertex_buffer;
> >  	uint32_t batch_end;
> > 
> > +	uint32_t surf0 = 0, surf1 = 1;
> > 
> >  	intel_batchbuffer_flush_with_context(batch, context);
> > 
> > @@ -927,7 +1001,11 @@ void gen8_render_copyfunc(struct intel_batchbuffer
> > *batch,> 
> >  	annotation_init(&aub_annotations);
> > 
> > -	ps_binding_table  = gen8_bind_surfaces(batch, src, dst);
> > +	if (batch->use_resource_streamer)
> > +		ps_binding_table = gen8_rs_bind_surfaces(batch, src, dst,
> > +							 &surf0, &surf1);
> > +	else
> > +		ps_binding_table  = gen8_bind_surfaces(batch, src, dst);
> > 
> >  	ps_sampler_state  = gen8_create_sampler(batch);
> >  	ps_kernel_off = gen8_fill_ps(batch, ps_kernel, sizeof(ps_kernel));
> >  	vertex_buffer = gen7_fill_vertex_buffer_data(batch, src,
> > 
> > @@ -955,6 +1033,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer
> > *batch,> 
> >  	gen8_emit_state_base_address(batch);
> > 
> > +	if (batch->use_resource_streamer)
> > +               gen8_hw_binding_table(batch, true);
> > +
> > 
> >  	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC);
> >  	OUT_BATCH(viewport.cc_state);
> >  	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
> > 
> > @@ -978,6 +1059,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer
> > *batch,> 
> >  	gen8_emit_sf(batch);
> > 
> > +	if (batch->use_resource_streamer)
> > +		gen8_rs_edit_surfaces(batch, surf0, surf1);
> > +
> > 
> >  	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS);
> >  	OUT_BATCH(ps_binding_table);
> > 
> > @@ -1001,6 +1085,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer
> > *batch,> 
> >  	gen8_emit_vf_topology(batch);
> >  	gen8_emit_primitive(batch, vertex_buffer);
> > 
> > +	if (batch->use_resource_streamer)
> > +		gen8_hw_binding_table(batch, false);
> > +
> > 
> >  	OUT_BATCH(MI_BATCH_BUFFER_END);
> >  	
> >  	batch_end = batch_align(batch, 8);


Thanks for the review. I'll update v2 soon to address the comments

-abdiel

  reply	other threads:[~2014-05-06 22:03 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-05-06 19:48 igt tests/rendercopy: Add option to test resource streamer functionality Abdiel Janulgue
2014-05-06 19:48 ` [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables Abdiel Janulgue
2014-05-06 21:38   ` Ville Syrjälä
2014-05-06 22:09     ` Abdiel Janulgue [this message]
2014-05-07 11:49   ` Ville Syrjälä
2014-05-07 11:55     ` Chris Wilson
2014-05-07 20:59     ` Abdiel Janulgue
2014-05-08  9:54       ` Ville Syrjälä
2014-05-08 10:18         ` Ville Syrjälä
2014-05-12 16:40           ` Daniel Vetter
2014-05-16 11:36             ` Chris Wilson
2014-05-16 14:27               ` Daniel Vetter
2014-05-16 14:45                 ` Chris Wilson
2014-05-16 15:07                   ` Daniel Vetter
2014-05-06 19:48 ` [PATCH 2/2] tests/gem_render_copy: Add option to test resource streamer Abdiel Janulgue
2014-05-06 20:19 ` igt tests/rendercopy: Add option to test resource streamer functionality Daniel Vetter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7457677.5dFOX2uRul@abj-desktop \
    --to=abdiel.janulgue@linux.intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=ville.syrjala@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.