From: "Ville Syrjälä" <ville.syrjala@linux.intel.com>
To: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables
Date: Wed, 7 May 2014 00:38:04 +0300 [thread overview]
Message-ID: <20140506213804.GD18465@intel.com> (raw)
In-Reply-To: <1399405683-27409-2-git-send-email-abdiel.janulgue@linux.intel.com>
On Tue, May 06, 2014 at 10:48:02PM +0300, Abdiel Janulgue wrote:
> Use on-chip hw-binding table generator to generate binding
> tables when the test emits SURFACE_STATES packets. The hw generates
> these binding table packets internally so we don't have to
> allocate space on the batchbuffer.
>
> Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
> ---
> lib/gen8_render.h | 13 +++++++
> lib/intel_batchbuffer.c | 12 ++++++
> lib/intel_batchbuffer.h | 3 ++
> lib/intel_reg.h | 3 ++
> lib/rendercopy_gen8.c | 97 ++++++++++++++++++++++++++++++++++++++++++++---
> 5 files changed, 123 insertions(+), 5 deletions(-)
>
> diff --git a/lib/gen8_render.h b/lib/gen8_render.h
> index fffc100..4d5f7ba 100644
> --- a/lib/gen8_render.h
> +++ b/lib/gen8_render.h
> @@ -83,6 +83,19 @@
> /* Random shifts */
> #define GEN8_3DSTATE_PS_MAX_THREADS_SHIFT 23
>
> +/* GEN8+ resource streamer */
> +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC (0x7919 << 16) /* GEN8 */
> +# define BINDING_TABLE_POOL_ENABLE 0x0800
> +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS (0x7843 << 16) /* GEN8 */
> +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS (0x7844 << 16) /* GEN8 */
> +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS (0x7845 << 16) /* GEN8 */
> +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS (0x7846 << 16) /* GEN8 */
> +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS (0x7847 << 16) /* GEN8 */
> +
> +/* Toggling RS needs PIPE_CONTROL SC invalidate */
> +#define _3DSTATE_PIPE_CONTROL ((0x3 << 29) | (3 << 27) | (2 << 24))
> +#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2)
> +
> /* Shamelessly ripped from mesa */
> struct gen8_surface_state
> {
> diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
> index e868922..26c9b89 100644
> --- a/lib/intel_batchbuffer.c
> +++ b/lib/intel_batchbuffer.c
> @@ -81,6 +81,14 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch)
> memset(batch->buffer, 0, sizeof(batch->buffer));
>
> batch->ptr = batch->buffer;
> +
> + if (batch->hw_bt_pool_bo != NULL) {
> + drm_intel_bo_unreference(batch->hw_bt_pool_bo);
> + batch->hw_bt_pool_bo = NULL;
> + }
> +
> + batch->hw_bt_pool_bo = drm_intel_bo_alloc(batch->bufmgr, "hw_bt",
> + 131072, 4096);
> }
>
> /**
> @@ -116,6 +124,10 @@ intel_batchbuffer_free(struct intel_batchbuffer *batch)
> {
> drm_intel_bo_unreference(batch->bo);
> batch->bo = NULL;
> +
> + drm_intel_bo_unreference(batch->hw_bt_pool_bo);
> + batch->hw_bt_pool_bo = NULL;
> +
> free(batch);
> }
>
> diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
> index 49dbcf0..2a516bc 100644
> --- a/lib/intel_batchbuffer.h
> +++ b/lib/intel_batchbuffer.h
> @@ -18,6 +18,9 @@ struct intel_batchbuffer {
> uint8_t buffer[BATCH_SZ];
> uint8_t *ptr;
> uint8_t *state;
> +
> + drm_intel_bo *hw_bt_pool_bo;
> + int use_resource_streamer;
> };
>
> struct intel_batchbuffer *intel_batchbuffer_alloc(drm_intel_bufmgr *bufmgr,
> diff --git a/lib/intel_reg.h b/lib/intel_reg.h
> index 4b3a102..1cb9a29 100644
> --- a/lib/intel_reg.h
> +++ b/lib/intel_reg.h
> @@ -2560,6 +2560,9 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> #define MI_BATCH_NON_SECURE (1)
> #define MI_BATCH_NON_SECURE_I965 (1 << 8)
>
> +/* Resource Streamer */
> +#define MI_RS_CONTROL (0x6 << 23)
> +
> #define MAX_DISPLAY_PIPES 2
>
> typedef enum {
> diff --git a/lib/rendercopy_gen8.c b/lib/rendercopy_gen8.c
> index 6f5a698..b0dd0f3 100644
> --- a/lib/rendercopy_gen8.c
> +++ b/lib/rendercopy_gen8.c
> @@ -169,12 +169,14 @@ static void
> gen6_render_flush(struct intel_batchbuffer *batch,
> drm_intel_context *context, uint32_t batch_end)
> {
> - int ret;
> + int ret = 0;
> + uint32_t flags = 0;
>
> ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
> + flags = batch->use_resource_streamer ? I915_EXEC_RESOURCE_STREAMER : 0;
> if (ret == 0)
> ret = drm_intel_gem_bo_context_exec(batch->bo, context,
> - batch_end, 0);
> + batch_end, flags | I915_EXEC_RENDER);
> assert(ret == 0);
> }
>
> @@ -228,18 +230,83 @@ gen8_bind_buf(struct intel_batchbuffer *batch, struct igt_buf *buf,
> return offset;
> }
>
> +static void
> +gen8_hw_binding_table(struct intel_batchbuffer *batch, bool enable)
> +{
> + if (!enable) {
> + OUT_BATCH(MI_RS_CONTROL | 0x0);
> +
> + OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC | (4 - 2));
> + /* binding table pool base address */
> + OUT_BATCH(0);
> + OUT_BATCH(0);
> + /* Upper bound */
> + OUT_BATCH(0);
> +
WaCsStallBeforeStateCacheInvalidate says you need to emit another
PIPE_CONTROL here with CS stall bit set. And thanks to the other
restrictions that PIPE_CONTORL needs one of the other magic bits also
set. Stall at scoreboard perhaps? I think that's what we set in the
kernel usually.
> + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
> + OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE);
> + OUT_BATCH(0);
> + OUT_BATCH(0);
> + OUT_BATCH(0);
> + OUT_BATCH(0);
> +
> + return;
> + }
> + OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC | (4 - 2));
> +
> + /* binding table pool base address */
> + OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0,
> + BINDING_TABLE_POOL_ENABLE);
> + OUT_BATCH(0);
I suppose it doesn't matter much which domain we use here. But the
hardware writes to the BT pool so maybe you should set write domain
as well?
> +
> + /* Upper bound */
> + OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0,
> + batch->hw_bt_pool_bo->size);
AFAICS on bdw this should be just the bt pool size and not a reloc.
> +
Again need another CS stall PIPE_CONTROL.
> + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
> + OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE);
> + OUT_BATCH(0);
> + OUT_BATCH(0);
> + OUT_BATCH(0);
> + OUT_BATCH(0);
> +}
> +
> +static uint32_t
> +gen8_rs_bind_surfaces(struct intel_batchbuffer *batch,
> + struct igt_buf *src,
> + struct igt_buf *dst,
> + uint32_t *surf0, uint32_t *surf1)
> +{
> + *surf0 = gen8_bind_buf(batch, dst, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 1);
> + *surf1 = gen8_bind_buf(batch, src, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 0);
> +
> + return 0;
> +}
> +
> +static void
> +gen8_rs_edit_surfaces(struct intel_batchbuffer *batch,
> + uint32_t surf0, uint32_t surf1)
> +{
> + OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_EDIT_PS | (4 - 2));
> + OUT_BATCH(0x3);
So I take it there's no need to clear the entries first?
> + {
> + OUT_BATCH(0 << 16 | surf0 >> 6);
> + OUT_BATCH(1 << 16 | surf1 >> 6);
> + }
> +}
> +
> static uint32_t
> gen8_bind_surfaces(struct intel_batchbuffer *batch,
> struct igt_buf *src,
> struct igt_buf *dst)
> {
> - uint32_t *binding_table, offset;
> + uint32_t offset = 0;
> + uint32_t *binding_table;
>
> binding_table = batch_alloc(batch, 8, 32);
> offset = batch_offset(batch, binding_table);
> annotation_add_state(&aub_annotations, AUB_TRACE_BINDING_TABLE,
> offset, 8);
> -
> binding_table[0] =
> gen8_bind_buf(batch, dst, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 1);
> binding_table[1] =
> @@ -512,6 +579,9 @@ gen7_emit_push_constants(struct intel_batchbuffer *batch) {
>
> static void
> gen8_emit_state_base_address(struct intel_batchbuffer *batch) {
> + if (batch->use_resource_streamer)
> + OUT_BATCH(MI_RS_CONTROL | 0x0);
> +
> OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (16 - 2));
>
> /* general */
> @@ -546,6 +616,9 @@ gen8_emit_state_base_address(struct intel_batchbuffer *batch) {
> OUT_BATCH(0xfffff000 | 1);
> /* intruction buffer size */
> OUT_BATCH(1 << 12 | 1);
> +
> + if (batch->use_resource_streamer)
> + OUT_BATCH(MI_RS_CONTROL | 0x1);
> }
>
> static void
> @@ -918,6 +991,7 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
> uint32_t scissor_state;
> uint32_t vertex_buffer;
> uint32_t batch_end;
> + uint32_t surf0 = 0, surf1 = 1;
>
> intel_batchbuffer_flush_with_context(batch, context);
>
> @@ -927,7 +1001,11 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
>
> annotation_init(&aub_annotations);
>
> - ps_binding_table = gen8_bind_surfaces(batch, src, dst);
> + if (batch->use_resource_streamer)
> + ps_binding_table = gen8_rs_bind_surfaces(batch, src, dst,
> + &surf0, &surf1);
> + else
> + ps_binding_table = gen8_bind_surfaces(batch, src, dst);
> ps_sampler_state = gen8_create_sampler(batch);
> ps_kernel_off = gen8_fill_ps(batch, ps_kernel, sizeof(ps_kernel));
> vertex_buffer = gen7_fill_vertex_buffer_data(batch, src,
> @@ -955,6 +1033,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
>
> gen8_emit_state_base_address(batch);
>
> + if (batch->use_resource_streamer)
> + gen8_hw_binding_table(batch, true);
> +
> OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC);
> OUT_BATCH(viewport.cc_state);
> OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
> @@ -978,6 +1059,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
>
> gen8_emit_sf(batch);
>
> + if (batch->use_resource_streamer)
> + gen8_rs_edit_surfaces(batch, surf0, surf1);
> +
> OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS);
> OUT_BATCH(ps_binding_table);
>
> @@ -1001,6 +1085,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch,
> gen8_emit_vf_topology(batch);
> gen8_emit_primitive(batch, vertex_buffer);
>
> + if (batch->use_resource_streamer)
> + gen8_hw_binding_table(batch, false);
> +
> OUT_BATCH(MI_BATCH_BUFFER_END);
>
> batch_end = batch_align(batch, 8);
> --
> 1.7.9.5
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Ville Syrjälä
Intel OTC
next prev parent reply other threads:[~2014-05-06 21:38 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-05-06 19:48 igt tests/rendercopy: Add option to test resource streamer functionality Abdiel Janulgue
2014-05-06 19:48 ` [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables Abdiel Janulgue
2014-05-06 21:38 ` Ville Syrjälä [this message]
2014-05-06 22:09 ` Abdiel Janulgue
2014-05-07 11:49 ` Ville Syrjälä
2014-05-07 11:55 ` Chris Wilson
2014-05-07 20:59 ` Abdiel Janulgue
2014-05-08 9:54 ` Ville Syrjälä
2014-05-08 10:18 ` Ville Syrjälä
2014-05-12 16:40 ` Daniel Vetter
2014-05-16 11:36 ` Chris Wilson
2014-05-16 14:27 ` Daniel Vetter
2014-05-16 14:45 ` Chris Wilson
2014-05-16 15:07 ` Daniel Vetter
2014-05-06 19:48 ` [PATCH 2/2] tests/gem_render_copy: Add option to test resource streamer Abdiel Janulgue
2014-05-06 20:19 ` igt tests/rendercopy: Add option to test resource streamer functionality Daniel Vetter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20140506213804.GD18465@intel.com \
--to=ville.syrjala@linux.intel.com \
--cc=abdiel.janulgue@linux.intel.com \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.