All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 02/33] drm/i915: Measure the required reserved size for request emission
Date: Fri, 25 Jan 2019 10:34:37 +0200	[thread overview]
Message-ID: <87sgxhkuea.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20190125023005.1007-2-chris@chris-wilson.co.uk>

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Instead of tediously and fragilely counting up the number of dwords
> required to emit the breadcrumb to seal a request, fake a request and
> measure it automatically once during engine setup.
>
> The downside is that this requires a fair amount of mocking to create a
> proper breadcrumb. Still, should be less error prone in future as the
> breadcrumb size fluctuates!

We are quick to notice, but this method saves brains and time,
review time.

>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/intel_engine_cs.c       | 49 ++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_lrc.c             | 12 +++--
>  drivers/gpu/drm/i915/intel_ringbuffer.c      | 24 +++++++---
>  drivers/gpu/drm/i915/intel_ringbuffer.h      |  2 +-
>  drivers/gpu/drm/i915/selftests/mock_engine.c |  4 +-
>  5 files changed, 77 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 2f3c71f6d313..883ba208d1c2 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -604,6 +604,47 @@ static void __intel_context_unpin(struct i915_gem_context *ctx,
>  	intel_context_unpin(to_intel_context(ctx, engine));
>  }
>  
> +struct measure_breadcrumb {
> +	struct i915_request rq;
> +	struct i915_timeline timeline;
> +	struct intel_ring ring;
> +	u32 cs[1024];
> +};
> +
> +static int measure_breadcrumb_sz(struct intel_engine_cs *engine)
> +{
> +	struct measure_breadcrumb *frame;
> +	unsigned int dw;
> +
> +	GEM_BUG_ON(!engine->i915->gt.scratch);
> +
> +	frame = kzalloc(sizeof(*frame), GFP_KERNEL);
> +	if (!frame)
> +		return -ENOMEM;
> +
> +	i915_timeline_init(engine->i915, &frame->timeline, engine->name);

You could init with null name. This is so short lived
and we dont expect debugs. If it ever leaks into wild,
blowout would be instant. Now the name is the same as
the real deal.

> +
> +	frame->ring.timeline = &frame->timeline;
> +	frame->ring.vaddr = frame->cs;
> +	frame->ring.size = sizeof(frame->cs);
> +	frame->ring.effective_size = frame->ring.size;
> +	frame->ring.space = frame->ring.size - 8;

Why 2 dwords short? Just curious as it doesn't seem
to matter in this use case.

> +	INIT_LIST_HEAD(&frame->ring.request_list);
> +
> +	frame->rq.i915 = engine->i915;
> +	frame->rq.engine = engine;
> +	frame->rq.ring = &frame->ring;
> +	frame->rq.timeline = &frame->timeline;
> +
> +	dw = engine->emit_breadcrumb(&frame->rq, frame->cs) - frame->cs;
> +	GEM_BUG_ON(dw != engine->emit_breadcrumb_sz);

Peace of mind provided =)

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> +
> +	i915_timeline_fini(&frame->timeline);
> +	kfree(frame);
> +
> +	return dw;
> +}
> +
>  /**
>   * intel_engines_init_common - initialize cengine state which might require hw access
>   * @engine: Engine to initialize.
> @@ -657,8 +698,16 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
>  	if (ret)
>  		goto err_breadcrumbs;
>  
> +	ret = measure_breadcrumb_sz(engine);
> +	if (ret < 0)
> +		goto err_status_page;
> +
> +	engine->emit_breadcrumb_sz = ret;
> +
>  	return 0;
>  
> +err_status_page:
> +	cleanup_status_page(engine);
>  err_breadcrumbs:
>  	intel_engine_fini_breadcrumbs(engine);
>  err_unpin_preempt:
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 9155cc675924..d2299425cf2f 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2051,15 +2051,17 @@ static int gen8_emit_flush_render(struct i915_request *request,
>   * used as a workaround for not being allowed to do lite
>   * restore with HEAD==TAIL (WaIdleLiteRestore).
>   */
> -static void gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
> +static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
>  {
>  	/* Ensure there's always at least one preemption point per-request. */
>  	*cs++ = MI_ARB_CHECK;
>  	*cs++ = MI_NOOP;
>  	request->wa_tail = intel_ring_offset(request, cs);
> +
> +	return cs;
>  }
>  
> -static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
> +static u32 *gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
>  {
>  	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
>  	BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
> @@ -2071,11 +2073,11 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
>  	request->tail = intel_ring_offset(request, cs);
>  	assert_ring_tail_valid(request->ring, request->tail);
>  
> -	gen8_emit_wa_tail(request, cs);
> +	return gen8_emit_wa_tail(request, cs);
>  }
>  static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
>  
> -static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> +static u32 *gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>  {
>  	/* We're using qword write, seqno should be aligned to 8 bytes. */
>  	BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
> @@ -2095,7 +2097,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>  	request->tail = intel_ring_offset(request, cs);
>  	assert_ring_tail_valid(request->ring, request->tail);
>  
> -	gen8_emit_wa_tail(request, cs);
> +	return gen8_emit_wa_tail(request, cs);
>  }
>  static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS;
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index e39e483d8d16..107c4934e2fa 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -299,7 +299,7 @@ gen6_render_ring_flush(struct i915_request *rq, u32 mode)
>  	return 0;
>  }
>  
> -static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
> +static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>  {
>  	/* First we do the gen6_emit_post_sync_nonzero_flush w/a */
>  	*cs++ = GFX_OP_PIPE_CONTROL(4);
> @@ -327,6 +327,8 @@ static void gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>  
>  	rq->tail = intel_ring_offset(rq, cs);
>  	assert_ring_tail_valid(rq->ring, rq->tail);
> +
> +	return cs;
>  }
>  static const int gen6_rcs_emit_breadcrumb_sz = 14;
>  
> @@ -409,7 +411,7 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
>  	return 0;
>  }
>  
> -static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
> +static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>  {
>  	*cs++ = GFX_OP_PIPE_CONTROL(4);
>  	*cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> @@ -427,10 +429,12 @@ static void gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>  
>  	rq->tail = intel_ring_offset(rq, cs);
>  	assert_ring_tail_valid(rq->ring, rq->tail);
> +
> +	return cs;
>  }
>  static const int gen7_rcs_emit_breadcrumb_sz = 6;
>  
> -static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
> +static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>  {
>  	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW;
>  	*cs++ = intel_hws_seqno_address(rq->engine) | MI_FLUSH_DW_USE_GTT;
> @@ -439,11 +443,13 @@ static void gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>  
>  	rq->tail = intel_ring_offset(rq, cs);
>  	assert_ring_tail_valid(rq->ring, rq->tail);
> +
> +	return cs;
>  }
>  static const int gen6_xcs_emit_breadcrumb_sz = 4;
>  
>  #define GEN7_XCS_WA 32
> -static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
> +static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>  {
>  	int i;
>  
> @@ -466,6 +472,8 @@ static void gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>  
>  	rq->tail = intel_ring_offset(rq, cs);
>  	assert_ring_tail_valid(rq->ring, rq->tail);
> +
> +	return cs;
>  }
>  static const int gen7_xcs_emit_breadcrumb_sz = 8 + GEN7_XCS_WA * 3;
>  #undef GEN7_XCS_WA
> @@ -861,7 +869,7 @@ static void i9xx_submit_request(struct i915_request *request)
>  			intel_ring_set_tail(request->ring, request->tail));
>  }
>  
> -static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
> +static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>  {
>  	*cs++ = MI_FLUSH;
>  
> @@ -874,11 +882,13 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>  
>  	rq->tail = intel_ring_offset(rq, cs);
>  	assert_ring_tail_valid(rq->ring, rq->tail);
> +
> +	return cs;
>  }
>  static const int i9xx_emit_breadcrumb_sz = 6;
>  
>  #define GEN5_WA_STORES 8 /* must be at least 1! */
> -static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
> +static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>  {
>  	int i;
>  
> @@ -895,6 +905,8 @@ static void gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>  
>  	rq->tail = intel_ring_offset(rq, cs);
>  	assert_ring_tail_valid(rq->ring, rq->tail);
> +
> +	return cs;
>  }
>  static const int gen5_emit_breadcrumb_sz = GEN5_WA_STORES * 3 + 2;
>  #undef GEN5_WA_STORES
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index c3ef0f9bf321..479bd53d4ac6 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -470,7 +470,7 @@ struct intel_engine_cs {
>  					 unsigned int dispatch_flags);
>  #define I915_DISPATCH_SECURE BIT(0)
>  #define I915_DISPATCH_PINNED BIT(1)
> -	void		(*emit_breadcrumb)(struct i915_request *rq, u32 *cs);
> +	u32		*(*emit_breadcrumb)(struct i915_request *rq, u32 *cs);
>  	int		emit_breadcrumb_sz;
>  
>  	/* Pass the request to the hardware queue (e.g. directly into
> diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
> index 442ec2aeec81..905318b7ae18 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_engine.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
> @@ -159,9 +159,9 @@ static int mock_emit_flush(struct i915_request *request,
>  	return 0;
>  }
>  
> -static void mock_emit_breadcrumb(struct i915_request *request,
> -				 u32 *flags)
> +static u32 *mock_emit_breadcrumb(struct i915_request *request, u32 *cs)
>  {
> +	return cs;
>  }
>  
>  static void mock_submit_request(struct i915_request *request)
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2019-01-25  8:36 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-25  2:29 [PATCH 01/33] drm/i915/execlists: Move RPCS setup to context pin Chris Wilson
2019-01-25  2:29 ` [PATCH 02/33] drm/i915: Measure the required reserved size for request emission Chris Wilson
2019-01-25  8:34   ` Mika Kuoppala [this message]
2019-01-25  9:52     ` Chris Wilson
2019-01-25  2:29 ` [PATCH 03/33] drm/i915: Remove manual breadcumb counting Chris Wilson
2019-01-25  9:21   ` Mika Kuoppala
2019-01-25  2:29 ` [PATCH 04/33] drm/i915: Compute the HWS offsets explicitly Chris Wilson
2019-01-25  9:26   ` Mika Kuoppala
2019-01-25  2:29 ` [PATCH 05/33] drm/i915/execlists: Suppress preempting self Chris Wilson
2019-01-25  2:29 ` [PATCH 06/33] drm/i915/execlists: Suppress redundant preemption Chris Wilson
2019-01-25  2:29 ` [PATCH 07/33] drm/i915/selftests: Apply a subtest filter Chris Wilson
2019-01-25 11:44   ` Mika Kuoppala
2019-01-25 11:48     ` Chris Wilson
2019-01-29 10:37   ` Joonas Lahtinen
2019-01-25  2:29 ` [PATCH 08/33] drm/i915: Make all GPU resets atomic Chris Wilson
2019-01-25  2:29 ` [PATCH 09/33] drm/i915/guc: Disable global reset Chris Wilson
2019-01-25  2:29 ` [PATCH 10/33] drm/i915: Remove GPU reset dependence on struct_mutex Chris Wilson
2019-01-25 12:50   ` Mika Kuoppala
2019-01-25  2:29 ` [PATCH 11/33] drm/i915/selftests: Trim struct_mutex duration for set-wedged selftest Chris Wilson
2019-01-25  2:29 ` [PATCH 12/33] drm/i915: Issue engine resets onto idle engines Chris Wilson
2019-01-25  2:29 ` [PATCH 13/33] drm/i915: Stop tracking MRU activity on VMA Chris Wilson
2019-01-25  2:29 ` [PATCH 14/33] drm/i915: Pull VM lists under the VM mutex Chris Wilson
2019-01-25  2:29 ` [PATCH 15/33] drm/i915: Move vma lookup to its own lock Chris Wilson
2019-01-25  2:29 ` [PATCH 16/33] drm/i915: Always allocate an object/vma for the HWSP Chris Wilson
2019-01-25  2:29 ` [PATCH 17/33] drm/i915: Add timeline barrier support Chris Wilson
2019-01-25  2:29 ` [PATCH 18/33] drm/i915: Move list of timelines under its own lock Chris Wilson
2019-01-25  2:29 ` [PATCH 19/33] drm/i915: Introduce concept of per-timeline (context) HWSP Chris Wilson
2019-01-25  2:29 ` [PATCH 20/33] drm/i915: Enlarge vma->pin_count Chris Wilson
2019-01-25  2:29 ` [PATCH 21/33] drm/i915: Allocate a status page for each timeline Chris Wilson
2019-01-25  2:29 ` [PATCH 22/33] drm/i915: Share per-timeline HWSP using a slab suballocator Chris Wilson
2019-01-25  2:29 ` [PATCH 23/33] drm/i915: Track the context's seqno in its own timeline HWSP Chris Wilson
2019-01-25  2:29 ` [PATCH 24/33] drm/i915: Track active timelines Chris Wilson
2019-01-25  2:29 ` [PATCH 25/33] drm/i915: Identify active requests Chris Wilson
2019-01-25  2:29 ` [PATCH 26/33] drm/i915: Remove the intel_engine_notify tracepoint Chris Wilson
2019-01-25 14:10   ` Tvrtko Ursulin
2019-01-25  2:29 ` [PATCH 27/33] drm/i915: Replace global breadcrumbs with per-context interrupt tracking Chris Wilson
2019-01-25 13:54   ` Tvrtko Ursulin
2019-01-25 14:26     ` Chris Wilson
2019-01-25 14:39       ` Chris Wilson
2019-01-25  2:30 ` [PATCH 28/33] drm/i915: Drop fake breadcrumb irq Chris Wilson
2019-01-25 11:07   ` Tvrtko Ursulin
2019-01-25  2:30 ` [PATCH 29/33] drm/i915: Implement an "idle" barrier Chris Wilson
2019-01-25  8:43   ` Chris Wilson
2019-01-25  2:30 ` [PATCH 30/33] drm/i915: Keep timeline HWSP allocated until the system is idle Chris Wilson
2019-01-25  2:30 ` [PATCH 31/33] drm/i915/execlists: Refactor out can_merge_rq() Chris Wilson
2019-01-25  2:30 ` [PATCH 32/33] drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+ Chris Wilson
2019-01-25  2:30 ` [PATCH 33/33] drm/i915: Prioritise non-busywait semaphore workloads Chris Wilson
2019-01-25  3:13 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/33] drm/i915/execlists: Move RPCS setup to context pin Patchwork
2019-01-25  3:27 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-01-25  3:34 ` ✗ Fi.CI.BAT: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87sgxhkuea.fsf@gaia.fi.intel.com \
    --to=mika.kuoppala@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.