From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 1/3] drm/i915/execlists: Switch to using STOP_RING instead of a semaphore
Date: Tue, 25 Jun 2019 13:25:21 +0300 [thread overview]
Message-ID: <87blym55da.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20190624130707.13292-1-chris@chris-wilson.co.uk>
Chris Wilson <chris@chris-wilson.co.uk> writes:
> Not all mediated environments yet support HW semaphores, so we should
> avoid using one for our preempt-to-busy barrier and instead request that
> the CS be paused and not advance on to the next execlist.
>
> There's a natural advantage with the register writes being serialised
> with the writes to the ELSP register that should allow the barrier to be
> much more constrained. On the other hand, we can no longer track the
> extents of the barrier and so must be a lot more lenient in accepting
> early CS events.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_engine_types.h | 8 +++
> drivers/gpu/drm/i915/gt/intel_lrc.c | 69 ++++++++------------
> 2 files changed, 35 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 7e056114344e..1353df264236 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -166,6 +166,8 @@ struct intel_engine_execlists {
> */
> bool no_priolist;
>
> + u16 ring_mode;
> +
> /**
> * @submit_reg: gen-specific execlist submission register
> * set to the ExecList Submission Port (elsp) register pre-Gen11 and to
> @@ -179,6 +181,12 @@ struct intel_engine_execlists {
> */
> u32 __iomem *ctrl_reg;
>
> + /**
> + * @ring_mode: the engine control register, used to freeze the command
> + * streamer around preempt-to-busy
> + */
> + u32 __iomem *ring_reg;
> +
> #define EXECLIST_MAX_PORTS 2
> /**
> * @active: the currently known context executing on HW
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 28685ba91a2c..c2aaab4b743e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -224,24 +224,18 @@ static void execlists_init_reg_state(u32 *reg_state,
> struct intel_engine_cs *engine,
> struct intel_ring *ring);
>
> -static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
> +static inline void ring_set_paused(struct intel_engine_cs *engine, u32 state)
> {
> - return (i915_ggtt_offset(engine->status_page.vma) +
> - I915_GEM_HWS_PREEMPT_ADDR);
> -}
> + u16 x;
>
> -static inline void
> -ring_set_paused(const struct intel_engine_cs *engine, int state)
> -{
> - /*
> - * We inspect HWS_PREEMPT with a semaphore inside
> - * engine->emit_fini_breadcrumb. If the dword is true,
> - * the ring is paused as the semaphore will busywait
> - * until the dword is false.
> - */
> - engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
> - if (state)
> - wmb();
> + x = engine->execlists.ring_mode;
> + x &= ~(state >> 16);
> + x |= state;
> + if (x == engine->execlists.ring_mode)
> + return;
You want to keep the state to reduce noneffective writes?
I would like ring_freeze() and ring_thaw() and
the state parameter removed, as I don't see the
value in callsites to macro the masked bits.
> +
> + engine->execlists.ring_mode = x;
> + writel(state, engine->execlists.ring_reg);
> }
>
> static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> @@ -981,7 +975,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> * as we unwind (and until we resubmit) so that we do
> * not accidentally tell it to go backwards.
> */
> - ring_set_paused(engine, 1);
> + ring_set_paused(engine, _MASKED_BIT_ENABLE(STOP_RING));
>
> /*
> * Note that we have not stopped the GPU at this point,
> @@ -1010,7 +1004,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> last->sched.attr.priority,
> execlists->queue_priority_hint);
>
> - ring_set_paused(engine, 1);
> + ring_set_paused(engine, _MASKED_BIT_ENABLE(STOP_RING));
> defer_active(engine);
>
> /*
> @@ -1244,9 +1238,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> *port = execlists_schedule_in(last, port - execlists->pending);
> memset(port + 1, 0, (last_port - port) * sizeof(*port));
> execlists_submit_ports(engine);
> - } else {
> - ring_set_paused(engine, 0);
> }
> +
> + if (!inject_preempt_hang(execlists))
> + ring_set_paused(engine, _MASKED_BIT_DISABLE(STOP_RING));
This is just a superset of the previous unpausing now?
> }
>
> void
> @@ -1355,9 +1350,6 @@ static void process_csb(struct intel_engine_cs *engine)
>
> if (enable_timeslice(engine))
> mod_timer(&execlists->timer, jiffies + 1);
> -
> - if (!inject_preempt_hang(execlists))
> - ring_set_paused(engine, 0);
> } else if (status & GEN8_CTX_STATUS_PREEMPTED) {
> struct i915_request * const *port = execlists->active;
>
> @@ -1378,8 +1370,15 @@ static void process_csb(struct intel_engine_cs *engine)
> * ordered, that the breadcrumb write is
> * coherent (visible from the CPU) before the
> * user interrupt and CSB is processed.
> + *
> + * The caveat here applies when we are injecting
> + * a completion event via manipulation of the
> + * RING_MI_MODE; this may occur before the
> + * request is completed and appears as a
> + * normal context-switch (0x14).
As in we unpause and get a completion event?
-Mika
> */
> - GEM_BUG_ON(!i915_request_completed(rq));
> + GEM_BUG_ON(!i915_request_completed(rq) &&
> + !execlists->pending[0]);
> execlists_schedule_out(rq);
>
> GEM_BUG_ON(execlists->active - execlists->inflight >
> @@ -2133,7 +2132,7 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
> struct intel_engine_execlists * const execlists = &engine->execlists;
> const unsigned int reset_value = execlists->csb_size - 1;
>
> - ring_set_paused(engine, 0);
> + ring_set_paused(engine, _MASKED_BIT_DISABLE(STOP_RING));
>
> /*
> * After a reset, the HW starts writing into CSB entry [0]. We
> @@ -2581,19 +2580,6 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
> return cs;
> }
>
> -static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
> -{
> - *cs++ = MI_SEMAPHORE_WAIT |
> - MI_SEMAPHORE_GLOBAL_GTT |
> - MI_SEMAPHORE_POLL |
> - MI_SEMAPHORE_SAD_EQ_SDD;
> - *cs++ = 0;
> - *cs++ = intel_hws_preempt_address(request->engine);
> - *cs++ = 0;
> -
> - return cs;
> -}
> -
> static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
> {
> cs = gen8_emit_ggtt_write(cs,
> @@ -2601,9 +2587,7 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
> request->timeline->hwsp_offset,
> 0);
> *cs++ = MI_USER_INTERRUPT;
> -
> *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> - cs = emit_preempt_busywait(request, cs);
>
> request->tail = intel_ring_offset(request, cs);
> assert_ring_tail_valid(request->ring, request->tail);
> @@ -2625,9 +2609,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> PIPE_CONTROL_CS_STALL,
> 0);
> *cs++ = MI_USER_INTERRUPT;
> -
> *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> - cs = emit_preempt_busywait(request, cs);
>
> request->tail = intel_ring_offset(request, cs);
> assert_ring_tail_valid(request->ring, request->tail);
> @@ -2807,6 +2789,9 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
> execlists->csb_write =
> &engine->status_page.addr[intel_hws_csb_write_index(i915)];
>
> + execlists->ring_reg =
> + uncore->regs + i915_mmio_reg_offset(RING_MI_MODE(base));
> +
> if (INTEL_GEN(i915) < 11)
> execlists->csb_size = GEN8_CSB_ENTRIES;
> else
> --
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2019-06-25 10:25 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-06-24 13:07 [PATCH 1/3] drm/i915/execlists: Switch to using STOP_RING instead of a semaphore Chris Wilson
2019-06-24 13:07 ` [PATCH 2/3] drm/i915: Emit final breadcrumb in request_add Chris Wilson
2019-06-24 13:07 ` [PATCH 3/3] drm/i915/execlists: Convert recursive defer_request() into iterative Chris Wilson
2019-06-24 15:01 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915/execlists: Switch to using STOP_RING instead of a semaphore Patchwork
2019-06-25 10:25 ` Mika Kuoppala [this message]
2019-06-25 10:34 ` [PATCH 1/3] " Chris Wilson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87blym55da.fsf@gaia.fi.intel.com \
--to=mika.kuoppala@linux.intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.