From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Cc: stable@vger.kernel.org, Chris Wilson <chris@chris-wilson.co.uk>
Subject: Re: [Intel-gfx] [PATCH] drm/i915/gt: Use the local HWSP offset during submission
Date: Fri, 23 Oct 2020 14:25:08 +0300 [thread overview]
Message-ID: <87r1ppqj57.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20201022064127.10159-1-chris@chris-wilson.co.uk>
Chris Wilson <chris@chris-wilson.co.uk> writes:
> We wrap the timeline on construction of the next request, but there may
> still be requests in flight that have not yet finalized the breadcrumb.
> (The breadcrumb is delayed as we need engine-local offsets, and for the
> virtual engine that is not known until execution.) As such, by the time
> we write to the timeline's HWSP offset it may have changed, and we
> should use the value we preserved in the request instead.
>
> Though the window is small and infrequent (at full flow we can expect a
> timeline's seqno to wrap once every 30 minutes), the impact of writing
> the old seqno into the new HWSP is severe: the old requests are never
> completed, and the new requests are completed before they are even
> submitted.
>
> Fixes: ebece7539242 ("drm/i915: Keep timeline HWSP allocated until idle across the system")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: <stable@vger.kernel.org> # v5.2+
> ---
> drivers/gpu/drm/i915/gt/intel_lrc.c | 27 +++++++++++++------
> drivers/gpu/drm/i915/gt/intel_timeline.c | 18 +++++++------
> .../gpu/drm/i915/gt/intel_timeline_types.h | 2 ++
> 3 files changed, 31 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index c22d47cc6701..d0be98b67138 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -3597,6 +3597,19 @@ static const struct intel_context_ops execlists_context_ops = {
> .destroy = execlists_context_destroy,
> };
>
> +static u32 hwsp_offset(const struct i915_request *rq)
> +{
> + const struct intel_timeline_cacheline *cl;
> +
> + /* Before the request is executed, the timeline/cachline is fixed */
s/cachline/cacheline
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> +
> + cl = rcu_dereference_protected(rq->hwsp_cacheline, 1);
> + if (cl)
> + return cl->ggtt_offset;
> +
> + return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset;
> +}
> +
> static int gen8_emit_init_breadcrumb(struct i915_request *rq)
> {
> u32 *cs;
> @@ -3619,7 +3632,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
> *cs++ = MI_NOOP;
>
> *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
> - *cs++ = i915_request_timeline(rq)->hwsp_offset;
> + *cs++ = hwsp_offset(rq);
> *cs++ = 0;
> *cs++ = rq->fence.seqno - 1;
>
> @@ -4939,11 +4952,9 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
> return gen8_emit_wa_tail(request, cs);
> }
>
> -static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs)
> +static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
> {
> - u32 addr = i915_request_active_timeline(request)->hwsp_offset;
> -
> - return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
> + return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
> }
>
> static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
> @@ -4962,7 +4973,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
> cs = gen8_emit_ggtt_write_rcs(cs,
> request->fence.seqno,
> - i915_request_active_timeline(request)->hwsp_offset,
> + hwsp_offset(request),
> PIPE_CONTROL_FLUSH_ENABLE |
> PIPE_CONTROL_CS_STALL);
>
> @@ -4974,7 +4985,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> {
> cs = gen8_emit_ggtt_write_rcs(cs,
> request->fence.seqno,
> - i915_request_active_timeline(request)->hwsp_offset,
> + hwsp_offset(request),
> PIPE_CONTROL_CS_STALL |
> PIPE_CONTROL_TILE_CACHE_FLUSH |
> PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> @@ -5044,7 +5055,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> {
> cs = gen12_emit_ggtt_write_rcs(cs,
> request->fence.seqno,
> - i915_request_active_timeline(request)->hwsp_offset,
> + hwsp_offset(request),
> PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
> PIPE_CONTROL_CS_STALL |
> PIPE_CONTROL_TILE_CACHE_FLUSH |
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> index a2f74cefe4c3..7ea94d201fe6 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> @@ -188,10 +188,14 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
> return cl;
> }
>
> -static void cacheline_acquire(struct intel_timeline_cacheline *cl)
> +static void cacheline_acquire(struct intel_timeline_cacheline *cl,
> + u32 ggtt_offset)
> {
> - if (cl)
> - i915_active_acquire(&cl->active);
> + if (!cl)
> + return;
> +
> + cl->ggtt_offset = ggtt_offset;
> + i915_active_acquire(&cl->active);
> }
>
> static void cacheline_release(struct intel_timeline_cacheline *cl)
> @@ -340,7 +344,7 @@ int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
> GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
> tl->fence_context, tl->hwsp_offset);
>
> - cacheline_acquire(tl->hwsp_cacheline);
> + cacheline_acquire(tl->hwsp_cacheline, tl->hwsp_offset);
> if (atomic_fetch_inc(&tl->pin_count)) {
> cacheline_release(tl->hwsp_cacheline);
> __i915_vma_unpin(tl->hwsp_ggtt);
> @@ -515,7 +519,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
> GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
> tl->fence_context, tl->hwsp_offset);
>
> - cacheline_acquire(cl);
> + cacheline_acquire(cl, tl->hwsp_offset);
> tl->hwsp_cacheline = cl;
>
> *seqno = timeline_advance(tl);
> @@ -573,9 +577,7 @@ int intel_timeline_read_hwsp(struct i915_request *from,
> if (err)
> goto out;
>
> - *hwsp = i915_ggtt_offset(cl->hwsp->vma) +
> - ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;
> -
> + *hwsp = cl->ggtt_offset;
> out:
> i915_active_release(&cl->active);
> return err;
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> index 02181c5020db..4474f487f589 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> @@ -94,6 +94,8 @@ struct intel_timeline_cacheline {
> struct intel_timeline_hwsp *hwsp;
> void *vaddr;
>
> + u32 ggtt_offset;
> +
> struct rcu_head rcu;
> };
>
> --
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2020-10-23 11:26 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-10-21 22:04 [Intel-gfx] [PATCH 1/2] drm/i915/gt: Use the local HWSP offset during submission Chris Wilson
2020-10-21 22:04 ` [Intel-gfx] [PATCH 2/2] drm/i915/selftests: Exercise intel_timeline_read_hwsp() Chris Wilson
2020-10-23 11:26 ` Mika Kuoppala
2020-10-21 22:30 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/2] drm/i915/gt: Use the local HWSP offset during submission Patchwork
2020-10-21 22:31 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2020-10-21 22:55 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-10-22 1:16 ` [Intel-gfx] [PATCH 1/2] " kernel test robot
2020-10-22 3:36 ` [Intel-gfx] ✗ Fi.CI.IGT: failure for series starting with [1/2] " Patchwork
2020-10-22 6:41 ` [Intel-gfx] [PATCH] " Chris Wilson
2020-10-23 11:25 ` Mika Kuoppala [this message]
2020-10-22 7:01 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with drm/i915/gt: Use the local HWSP offset during submission (rev2) Patchwork
2020-10-22 7:02 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2020-10-22 7:26 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-10-22 9:50 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87r1ppqj57.fsf@gaia.fi.intel.com \
--to=mika.kuoppala@linux.intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox