All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/i915/execlists: Switch to using STOP_RING instead of a semaphore
@ 2019-06-24 13:07 Chris Wilson
  2019-06-24 13:07 ` [PATCH 2/3] drm/i915: Emit final breadcrumb in request_add Chris Wilson
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Chris Wilson @ 2019-06-24 13:07 UTC (permalink / raw)
  To: intel-gfx

Not all mediated environments yet support HW semaphores, so we should
avoid using one for our preempt-to-busy barrier and instead request that
the CS be paused and not advance on to the next execlist.

There's a natural advantage with the register writes being serialised
with the writes to the ELSP register that should allow the barrier to be
much more constrained. On the other hand, we can no longer track the
extents of the barrier and so must be a lot more lenient in accepting
early CS events.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  8 +++
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 69 ++++++++------------
 2 files changed, 35 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 7e056114344e..1353df264236 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -166,6 +166,8 @@ struct intel_engine_execlists {
 	 */
 	bool no_priolist;
 
+	u16 ring_mode;
+
 	/**
 	 * @submit_reg: gen-specific execlist submission register
 	 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to
@@ -179,6 +181,12 @@ struct intel_engine_execlists {
 	 */
 	u32 __iomem *ctrl_reg;
 
+	/**
+	 * @ring_mode: the engine control register, used to freeze the command
+	 * streamer around preempt-to-busy
+	 */
+	u32 __iomem *ring_reg;
+
 #define EXECLIST_MAX_PORTS 2
 	/**
 	 * @active: the currently known context executing on HW
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 28685ba91a2c..c2aaab4b743e 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -224,24 +224,18 @@ static void execlists_init_reg_state(u32 *reg_state,
 				     struct intel_engine_cs *engine,
 				     struct intel_ring *ring);
 
-static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
+static inline void ring_set_paused(struct intel_engine_cs *engine, u32 state)
 {
-	return (i915_ggtt_offset(engine->status_page.vma) +
-		I915_GEM_HWS_PREEMPT_ADDR);
-}
+	u16 x;
 
-static inline void
-ring_set_paused(const struct intel_engine_cs *engine, int state)
-{
-	/*
-	 * We inspect HWS_PREEMPT with a semaphore inside
-	 * engine->emit_fini_breadcrumb. If the dword is true,
-	 * the ring is paused as the semaphore will busywait
-	 * until the dword is false.
-	 */
-	engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
-	if (state)
-		wmb();
+	x = engine->execlists.ring_mode;
+	x &= ~(state >> 16);
+	x |= state;
+	if (x == engine->execlists.ring_mode)
+		return;
+
+	engine->execlists.ring_mode = x;
+	writel(state, engine->execlists.ring_reg);
 }
 
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
@@ -981,7 +975,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			 * as we unwind (and until we resubmit) so that we do
 			 * not accidentally tell it to go backwards.
 			 */
-			ring_set_paused(engine, 1);
+			ring_set_paused(engine, _MASKED_BIT_ENABLE(STOP_RING));
 
 			/*
 			 * Note that we have not stopped the GPU at this point,
@@ -1010,7 +1004,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				  last->sched.attr.priority,
 				  execlists->queue_priority_hint);
 
-			ring_set_paused(engine, 1);
+			ring_set_paused(engine, _MASKED_BIT_ENABLE(STOP_RING));
 			defer_active(engine);
 
 			/*
@@ -1244,9 +1238,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		*port = execlists_schedule_in(last, port - execlists->pending);
 		memset(port + 1, 0, (last_port - port) * sizeof(*port));
 		execlists_submit_ports(engine);
-	} else {
-		ring_set_paused(engine, 0);
 	}
+
+	if (!inject_preempt_hang(execlists))
+		ring_set_paused(engine, _MASKED_BIT_DISABLE(STOP_RING));
 }
 
 void
@@ -1355,9 +1350,6 @@ static void process_csb(struct intel_engine_cs *engine)
 
 			if (enable_timeslice(engine))
 				mod_timer(&execlists->timer, jiffies + 1);
-
-			if (!inject_preempt_hang(execlists))
-				ring_set_paused(engine, 0);
 		} else if (status & GEN8_CTX_STATUS_PREEMPTED) {
 			struct i915_request * const *port = execlists->active;
 
@@ -1378,8 +1370,15 @@ static void process_csb(struct intel_engine_cs *engine)
 			 * ordered, that the breadcrumb write is
 			 * coherent (visible from the CPU) before the
 			 * user interrupt and CSB is processed.
+			 *
+			 * The caveat here applies when we are injecting
+			 * a completion event via manipulation of the
+			 * RING_MI_MODE; this may occur before the
+			 * request is completed and appears as a
+			 * normal context-switch (0x14).
 			 */
-			GEM_BUG_ON(!i915_request_completed(rq));
+			GEM_BUG_ON(!i915_request_completed(rq) &&
+				   !execlists->pending[0]);
 			execlists_schedule_out(rq);
 
 			GEM_BUG_ON(execlists->active - execlists->inflight >
@@ -2133,7 +2132,7 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	const unsigned int reset_value = execlists->csb_size - 1;
 
-	ring_set_paused(engine, 0);
+	ring_set_paused(engine, _MASKED_BIT_DISABLE(STOP_RING));
 
 	/*
 	 * After a reset, the HW starts writing into CSB entry [0]. We
@@ -2581,19 +2580,6 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
 	return cs;
 }
 
-static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
-{
-	*cs++ = MI_SEMAPHORE_WAIT |
-		MI_SEMAPHORE_GLOBAL_GTT |
-		MI_SEMAPHORE_POLL |
-		MI_SEMAPHORE_SAD_EQ_SDD;
-	*cs++ = 0;
-	*cs++ = intel_hws_preempt_address(request->engine);
-	*cs++ = 0;
-
-	return cs;
-}
-
 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 {
 	cs = gen8_emit_ggtt_write(cs,
@@ -2601,9 +2587,7 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 				  request->timeline->hwsp_offset,
 				  0);
 	*cs++ = MI_USER_INTERRUPT;
-
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-	cs = emit_preempt_busywait(request, cs);
 
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
@@ -2625,9 +2609,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 				    PIPE_CONTROL_CS_STALL,
 				    0);
 	*cs++ = MI_USER_INTERRUPT;
-
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-	cs = emit_preempt_busywait(request, cs);
 
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
@@ -2807,6 +2789,9 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
 	execlists->csb_write =
 		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
 
+	execlists->ring_reg =
+		uncore->regs + i915_mmio_reg_offset(RING_MI_MODE(base));
+
 	if (INTEL_GEN(i915) < 11)
 		execlists->csb_size = GEN8_CSB_ENTRIES;
 	else
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/3] drm/i915: Emit final breadcrumb in request_add
  2019-06-24 13:07 [PATCH 1/3] drm/i915/execlists: Switch to using STOP_RING instead of a semaphore Chris Wilson
@ 2019-06-24 13:07 ` Chris Wilson
  2019-06-24 13:07 ` [PATCH 3/3] drm/i915/execlists: Convert recursive defer_request() into iterative Chris Wilson
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Chris Wilson @ 2019-06-24 13:07 UTC (permalink / raw)
  To: intel-gfx

With everything now known at the point of adding the request to the
context's timeline, we can move the emission of the final breadcrumb to
the request add and pull it out of the CS interrupt service handler.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 5ff87c4a0cd5..f45dd2c260d4 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -437,9 +437,6 @@ void __i915_request_submit(struct i915_request *request)
 
 	spin_unlock(&request->lock);
 
-	engine->emit_fini_breadcrumb(request,
-				     request->ring->vaddr + request->postfix);
-
 	engine->serial++;
 
 	trace_i915_request_execute(request);
@@ -1178,6 +1175,9 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
 	GEM_BUG_ON(IS_ERR(cs));
 	rq->postfix = intel_ring_offset(rq, cs);
 
+	cs = engine->emit_fini_breadcrumb(rq, cs);
+	GEM_BUG_ON(rq->ring->vaddr + rq->ring->emit != cs);
+
 	prev = __i915_request_add_to_timeline(rq);
 
 	list_add_tail(&rq->ring_link, &ring->request_list);
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/3] drm/i915/execlists: Convert recursive defer_request() into iterative
  2019-06-24 13:07 [PATCH 1/3] drm/i915/execlists: Switch to using STOP_RING instead of a semaphore Chris Wilson
  2019-06-24 13:07 ` [PATCH 2/3] drm/i915: Emit final breadcrumb in request_add Chris Wilson
@ 2019-06-24 13:07 ` Chris Wilson
  2019-06-24 15:01 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915/execlists: Switch to using STOP_RING instead of a semaphore Patchwork
  2019-06-25 10:25 ` [PATCH 1/3] " Mika Kuoppala
  3 siblings, 0 replies; 6+ messages in thread
From: Chris Wilson @ 2019-06-24 13:07 UTC (permalink / raw)
  To: intel-gfx

As this engine owns the lock around rq->sched.link (for those waiters
submitted to this engine), we can use that link as an element in a local
list. We can thus replace the recursive algorithm with an iterative walk
over the ordered list of waiters.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 52 +++++++++++++++--------------
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index c2aaab4b743e..17b178dba7c0 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -827,10 +827,9 @@ last_active(const struct intel_engine_execlists *execlists)
 	return *last;
 }
 
-static void
-defer_request(struct i915_request * const rq, struct list_head * const pl)
+static void defer_request(struct i915_request *rq, struct list_head * const pl)
 {
-	struct i915_dependency *p;
+	LIST_HEAD(list);
 
 	/*
 	 * We want to move the interrupted request to the back of
@@ -839,34 +838,37 @@ defer_request(struct i915_request * const rq, struct list_head * const pl)
 	 * flight and were waiting for the interrupted request to
 	 * be run after it again.
 	 */
-	list_move_tail(&rq->sched.link, pl);
+	do {
+		struct i915_dependency *p;
 
-	list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
-		struct i915_request *w =
-			container_of(p->waiter, typeof(*w), sched);
+		GEM_BUG_ON(i915_request_is_active(rq));
+		list_move_tail(&rq->sched.link, pl);
 
-		/* Leave semaphores spinning on the other engines */
-		if (w->engine != rq->engine)
-			continue;
+		list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+			struct i915_request *w =
+				container_of(p->waiter, typeof(*w), sched);
 
-		/* No waiter should start before the active request completed */
-		GEM_BUG_ON(i915_request_started(w));
+			/* Leave semaphores spinning on the other engines */
+			if (w->engine != rq->engine)
+				continue;
 
-		GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
-		if (rq_prio(w) < rq_prio(rq))
-			continue;
+			/* No waiter should start before its signaler */
+			GEM_BUG_ON(i915_request_started(w) &&
+				   !i915_request_completed(rq));
 
-		if (list_empty(&w->sched.link))
-			continue; /* Not yet submitted; unready */
+			GEM_BUG_ON(i915_request_is_active(w));
+			if (list_empty(&w->sched.link))
+				continue; /* Not yet submitted; unready */
 
-		/*
-		 * This should be very shallow as it is limited by the
-		 * number of requests that can fit in a ring (<64) and
-		 * the number of contexts that can be in flight on this
-		 * engine.
-		 */
-		defer_request(w, pl);
-	}
+			if (rq_prio(w) < rq_prio(rq))
+				continue;
+
+			GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
+			list_move_tail(&w->sched.link, &list);
+		}
+
+		rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+	} while (rq);
 }
 
 static void defer_active(struct intel_engine_cs *engine)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915/execlists: Switch to using STOP_RING instead of a semaphore
  2019-06-24 13:07 [PATCH 1/3] drm/i915/execlists: Switch to using STOP_RING instead of a semaphore Chris Wilson
  2019-06-24 13:07 ` [PATCH 2/3] drm/i915: Emit final breadcrumb in request_add Chris Wilson
  2019-06-24 13:07 ` [PATCH 3/3] drm/i915/execlists: Convert recursive defer_request() into iterative Chris Wilson
@ 2019-06-24 15:01 ` Patchwork
  2019-06-25 10:25 ` [PATCH 1/3] " Mika Kuoppala
  3 siblings, 0 replies; 6+ messages in thread
From: Patchwork @ 2019-06-24 15:01 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915/execlists: Switch to using STOP_RING instead of a semaphore
URL   : https://patchwork.freedesktop.org/series/62641/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_6334 -> Patchwork_13405
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_13405 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_13405, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13405/

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_13405:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live_execlists:
    - fi-cfl-8700k:       [PASS][1] -> [DMESG-FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6334/fi-cfl-8700k/igt@i915_selftest@live_execlists.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13405/fi-cfl-8700k/igt@i915_selftest@live_execlists.html

  
Known issues
------------

  Here are the changes found in Patchwork_13405 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live_blt:
    - fi-skl-iommu:       [PASS][3] -> [INCOMPLETE][4] ([fdo#108602])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6334/fi-skl-iommu/igt@i915_selftest@live_blt.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13405/fi-skl-iommu/igt@i915_selftest@live_blt.html

  * igt@kms_frontbuffer_tracking@basic:
    - fi-icl-u2:          [PASS][5] -> [FAIL][6] ([fdo#103167])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6334/fi-icl-u2/igt@kms_frontbuffer_tracking@basic.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13405/fi-icl-u2/igt@kms_frontbuffer_tracking@basic.html

  
#### Possible fixes ####

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-kbl-7500u:       [FAIL][7] ([fdo#109485]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6334/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13405/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html

  * igt@kms_frontbuffer_tracking@basic:
    - fi-hsw-peppy:       [DMESG-WARN][9] ([fdo#102614]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6334/fi-hsw-peppy/igt@kms_frontbuffer_tracking@basic.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13405/fi-hsw-peppy/igt@kms_frontbuffer_tracking@basic.html

  
  [fdo#102614]: https://bugs.freedesktop.org/show_bug.cgi?id=102614
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#108602]: https://bugs.freedesktop.org/show_bug.cgi?id=108602
  [fdo#109485]: https://bugs.freedesktop.org/show_bug.cgi?id=109485


Participating hosts (50 -> 40)
------------------------------

  Additional (1): fi-byt-j1900 
  Missing    (11): fi-kbl-soraka fi-cml-u2 fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-byt-clapper fi-icl-u3 fi-icl-y fi-icl-dsi fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_6334 -> Patchwork_13405

  CI_DRM_6334: 86d1a866f35634912d7699f1eb4d04a2007df18e @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5065: f454000b5ba221b19d696a27415fe5824d743284 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_13405: 837e62fe62a4f050e66b61701428f2b640e7e2be @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

837e62fe62a4 drm/i915/execlists: Convert recursive defer_request() into iterative
68e7226beb2c drm/i915: Emit final breadcrumb in request_add
ccee12ecc162 drm/i915/execlists: Switch to using STOP_RING instead of a semaphore

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13405/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/3] drm/i915/execlists: Switch to using STOP_RING instead of a semaphore
  2019-06-24 13:07 [PATCH 1/3] drm/i915/execlists: Switch to using STOP_RING instead of a semaphore Chris Wilson
                   ` (2 preceding siblings ...)
  2019-06-24 15:01 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915/execlists: Switch to using STOP_RING instead of a semaphore Patchwork
@ 2019-06-25 10:25 ` Mika Kuoppala
  2019-06-25 10:34   ` Chris Wilson
  3 siblings, 1 reply; 6+ messages in thread
From: Mika Kuoppala @ 2019-06-25 10:25 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Chris Wilson <chris@chris-wilson.co.uk> writes:

> Not all mediated environments yet support HW semaphores, so we should
> avoid using one for our preempt-to-busy barrier and instead request that
> the CS be paused and not advance on to the next execlist.
>
> There's a natural advantage with the register writes being serialised
> with the writes to the ELSP register that should allow the barrier to be
> much more constrained. On the other hand, we can no longer track the
> extents of the barrier and so must be a lot more lenient in accepting
> early CS events.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_types.h |  8 +++
>  drivers/gpu/drm/i915/gt/intel_lrc.c          | 69 ++++++++------------
>  2 files changed, 35 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 7e056114344e..1353df264236 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -166,6 +166,8 @@ struct intel_engine_execlists {
>  	 */
>  	bool no_priolist;
>  
> +	u16 ring_mode;
> +
>  	/**
>  	 * @submit_reg: gen-specific execlist submission register
>  	 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to
> @@ -179,6 +181,12 @@ struct intel_engine_execlists {
>  	 */
>  	u32 __iomem *ctrl_reg;
>  
> +	/**
> +	 * @ring_mode: the engine control register, used to freeze the command
> +	 * streamer around preempt-to-busy
> +	 */
> +	u32 __iomem *ring_reg;
> +
>  #define EXECLIST_MAX_PORTS 2
>  	/**
>  	 * @active: the currently known context executing on HW
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 28685ba91a2c..c2aaab4b743e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -224,24 +224,18 @@ static void execlists_init_reg_state(u32 *reg_state,
>  				     struct intel_engine_cs *engine,
>  				     struct intel_ring *ring);
>  
> -static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
> +static inline void ring_set_paused(struct intel_engine_cs *engine, u32 state)
>  {
> -	return (i915_ggtt_offset(engine->status_page.vma) +
> -		I915_GEM_HWS_PREEMPT_ADDR);
> -}
> +	u16 x;
>  
> -static inline void
> -ring_set_paused(const struct intel_engine_cs *engine, int state)
> -{
> -	/*
> -	 * We inspect HWS_PREEMPT with a semaphore inside
> -	 * engine->emit_fini_breadcrumb. If the dword is true,
> -	 * the ring is paused as the semaphore will busywait
> -	 * until the dword is false.
> -	 */
> -	engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
> -	if (state)
> -		wmb();
> +	x = engine->execlists.ring_mode;
> +	x &= ~(state >> 16);
> +	x |= state;
> +	if (x == engine->execlists.ring_mode)
> +		return;

You want to keep the state to reduce noneffective writes?

I would like ring_freeze() and ring_thaw() and
the state parameter removed, as I don't see the
value in callsites to macro the masked bits.

> +
> +	engine->execlists.ring_mode = x;
> +	writel(state, engine->execlists.ring_reg);
>  }
>  
>  static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> @@ -981,7 +975,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  			 * as we unwind (and until we resubmit) so that we do
>  			 * not accidentally tell it to go backwards.
>  			 */
> -			ring_set_paused(engine, 1);
> +			ring_set_paused(engine, _MASKED_BIT_ENABLE(STOP_RING));
>  
>  			/*
>  			 * Note that we have not stopped the GPU at this point,
> @@ -1010,7 +1004,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  				  last->sched.attr.priority,
>  				  execlists->queue_priority_hint);
>  
> -			ring_set_paused(engine, 1);
> +			ring_set_paused(engine, _MASKED_BIT_ENABLE(STOP_RING));
>  			defer_active(engine);
>  
>  			/*
> @@ -1244,9 +1238,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  		*port = execlists_schedule_in(last, port - execlists->pending);
>  		memset(port + 1, 0, (last_port - port) * sizeof(*port));
>  		execlists_submit_ports(engine);
> -	} else {
> -		ring_set_paused(engine, 0);
>  	}
> +
> +	if (!inject_preempt_hang(execlists))
> +		ring_set_paused(engine, _MASKED_BIT_DISABLE(STOP_RING));

This is just a superset of the previous unpausing now?

>  }
>  
>  void
> @@ -1355,9 +1350,6 @@ static void process_csb(struct intel_engine_cs *engine)
>  
>  			if (enable_timeslice(engine))
>  				mod_timer(&execlists->timer, jiffies + 1);
> -
> -			if (!inject_preempt_hang(execlists))
> -				ring_set_paused(engine, 0);
>  		} else if (status & GEN8_CTX_STATUS_PREEMPTED) {
>  			struct i915_request * const *port = execlists->active;
>  
> @@ -1378,8 +1370,15 @@ static void process_csb(struct intel_engine_cs *engine)
>  			 * ordered, that the breadcrumb write is
>  			 * coherent (visible from the CPU) before the
>  			 * user interrupt and CSB is processed.
> +			 *
> +			 * The caveat here applies when we are injecting
> +			 * a completion event via manipulation of the
> +			 * RING_MI_MODE; this may occur before the
> +			 * request is completed and appears as a
> +			 * normal context-switch (0x14).

As in we unpause and get a completion event?

-Mika


>  			 */
> -			GEM_BUG_ON(!i915_request_completed(rq));
> +			GEM_BUG_ON(!i915_request_completed(rq) &&
> +				   !execlists->pending[0]);
>  			execlists_schedule_out(rq);
>  
>  			GEM_BUG_ON(execlists->active - execlists->inflight >
> @@ -2133,7 +2132,7 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
>  	struct intel_engine_execlists * const execlists = &engine->execlists;
>  	const unsigned int reset_value = execlists->csb_size - 1;
>  
> -	ring_set_paused(engine, 0);
> +	ring_set_paused(engine, _MASKED_BIT_DISABLE(STOP_RING));
>  
>  	/*
>  	 * After a reset, the HW starts writing into CSB entry [0]. We
> @@ -2581,19 +2580,6 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
>  	return cs;
>  }
>  
> -static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
> -{
> -	*cs++ = MI_SEMAPHORE_WAIT |
> -		MI_SEMAPHORE_GLOBAL_GTT |
> -		MI_SEMAPHORE_POLL |
> -		MI_SEMAPHORE_SAD_EQ_SDD;
> -	*cs++ = 0;
> -	*cs++ = intel_hws_preempt_address(request->engine);
> -	*cs++ = 0;
> -
> -	return cs;
> -}
> -
>  static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
>  {
>  	cs = gen8_emit_ggtt_write(cs,
> @@ -2601,9 +2587,7 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
>  				  request->timeline->hwsp_offset,
>  				  0);
>  	*cs++ = MI_USER_INTERRUPT;
> -
>  	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> -	cs = emit_preempt_busywait(request, cs);
>  
>  	request->tail = intel_ring_offset(request, cs);
>  	assert_ring_tail_valid(request->ring, request->tail);
> @@ -2625,9 +2609,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>  				    PIPE_CONTROL_CS_STALL,
>  				    0);
>  	*cs++ = MI_USER_INTERRUPT;
> -
>  	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> -	cs = emit_preempt_busywait(request, cs);
>  
>  	request->tail = intel_ring_offset(request, cs);
>  	assert_ring_tail_valid(request->ring, request->tail);
> @@ -2807,6 +2789,9 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
>  	execlists->csb_write =
>  		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
>  
> +	execlists->ring_reg =
> +		uncore->regs + i915_mmio_reg_offset(RING_MI_MODE(base));
> +
>  	if (INTEL_GEN(i915) < 11)
>  		execlists->csb_size = GEN8_CSB_ENTRIES;
>  	else
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/3] drm/i915/execlists: Switch to using STOP_RING instead of a semaphore
  2019-06-25 10:25 ` [PATCH 1/3] " Mika Kuoppala
@ 2019-06-25 10:34   ` Chris Wilson
  0 siblings, 0 replies; 6+ messages in thread
From: Chris Wilson @ 2019-06-25 10:34 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

Quoting Mika Kuoppala (2019-06-25 11:25:21)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Not all mediated environments yet support HW semaphores, so we should
> > avoid using one for our preempt-to-busy barrier and instead request that
> > the CS be paused and not advance on to the next execlist.
> >
> > There's a natural advantage with the register writes being serialised
> > with the writes to the ELSP register that should allow the barrier to be
> > much more constrained. On the other hand, we can no longer track the
> > extents of the barrier and so must be a lot more lenient in accepting
> > early CS events.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/gt/intel_engine_types.h |  8 +++
> >  drivers/gpu/drm/i915/gt/intel_lrc.c          | 69 ++++++++------------
> >  2 files changed, 35 insertions(+), 42 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > index 7e056114344e..1353df264236 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > @@ -166,6 +166,8 @@ struct intel_engine_execlists {
> >        */
> >       bool no_priolist;
> >  
> > +     u16 ring_mode;
> > +
> >       /**
> >        * @submit_reg: gen-specific execlist submission register
> >        * set to the ExecList Submission Port (elsp) register pre-Gen11 and to
> > @@ -179,6 +181,12 @@ struct intel_engine_execlists {
> >        */
> >       u32 __iomem *ctrl_reg;
> >  
> > +     /**
> > +      * @ring_mode: the engine control register, used to freeze the command
> > +      * streamer around preempt-to-busy
> > +      */
> > +     u32 __iomem *ring_reg;
> > +
> >  #define EXECLIST_MAX_PORTS 2
> >       /**
> >        * @active: the currently known context executing on HW
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index 28685ba91a2c..c2aaab4b743e 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -224,24 +224,18 @@ static void execlists_init_reg_state(u32 *reg_state,
> >                                    struct intel_engine_cs *engine,
> >                                    struct intel_ring *ring);
> >  
> > -static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
> > +static inline void ring_set_paused(struct intel_engine_cs *engine, u32 state)
> >  {
> > -     return (i915_ggtt_offset(engine->status_page.vma) +
> > -             I915_GEM_HWS_PREEMPT_ADDR);
> > -}
> > +     u16 x;
> >  
> > -static inline void
> > -ring_set_paused(const struct intel_engine_cs *engine, int state)
> > -{
> > -     /*
> > -      * We inspect HWS_PREEMPT with a semaphore inside
> > -      * engine->emit_fini_breadcrumb. If the dword is true,
> > -      * the ring is paused as the semaphore will busywait
> > -      * until the dword is false.
> > -      */
> > -     engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
> > -     if (state)
> > -             wmb();
> > +     x = engine->execlists.ring_mode;
> > +     x &= ~(state >> 16);
> > +     x |= state;
> > +     if (x == engine->execlists.ring_mode)
> > +             return;
> 
> You want to keep the state to reduce noneffective writes?

There are redundant writes, yes. No sense in a costly mmio for those.

> I would like ring_freeze() and ring_thaw() and
> the state parameter removed, as I don't see the
> value in callsites to macro the masked bits.
> 
> > +
> > +     engine->execlists.ring_mode = x;
> > +     writel(state, engine->execlists.ring_reg);
> >  }
> >  
> >  static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> > @@ -981,7 +975,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >                        * as we unwind (and until we resubmit) so that we do
> >                        * not accidentally tell it to go backwards.
> >                        */
> > -                     ring_set_paused(engine, 1);
> > +                     ring_set_paused(engine, _MASKED_BIT_ENABLE(STOP_RING));
> >  
> >                       /*
> >                        * Note that we have not stopped the GPU at this point,
> > @@ -1010,7 +1004,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >                                 last->sched.attr.priority,
> >                                 execlists->queue_priority_hint);
> >  
> > -                     ring_set_paused(engine, 1);
> > +                     ring_set_paused(engine, _MASKED_BIT_ENABLE(STOP_RING));
> >                       defer_active(engine);
> >  
> >                       /*
> > @@ -1244,9 +1238,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >               *port = execlists_schedule_in(last, port - execlists->pending);
> >               memset(port + 1, 0, (last_port - port) * sizeof(*port));
> >               execlists_submit_ports(engine);
> > -     } else {
> > -             ring_set_paused(engine, 0);
> >       }
> > +
> > +     if (!inject_preempt_hang(execlists))
> > +             ring_set_paused(engine, _MASKED_BIT_DISABLE(STOP_RING));
> 
> This is just a superset of the previous unpausing now?

The theory is that with the mmio we can take a different approach -- and
in fact need to mark the rings as unpaused before the preemption so that
on preemption, we don't get an immediate arbitration event as it spots
the ring is stopped.

> >  void
> > @@ -1355,9 +1350,6 @@ static void process_csb(struct intel_engine_cs *engine)
> >  
> >                       if (enable_timeslice(engine))
> >                               mod_timer(&execlists->timer, jiffies + 1);
> > -
> > -                     if (!inject_preempt_hang(execlists))
> > -                             ring_set_paused(engine, 0);
> >               } else if (status & GEN8_CTX_STATUS_PREEMPTED) {
> >                       struct i915_request * const *port = execlists->active;
> >  
> > @@ -1378,8 +1370,15 @@ static void process_csb(struct intel_engine_cs *engine)
> >                        * ordered, that the breadcrumb write is
> >                        * coherent (visible from the CPU) before the
> >                        * user interrupt and CSB is processed.
> > +                      *
> > +                      * The caveat here applies when we are injecting
> > +                      * a completion event via manipulation of the
> > +                      * RING_MI_MODE; this may occur before the
> > +                      * request is completed and appears as a
> > +                      * normal context-switch (0x14).
> 
> As in we unpause and get a completion event?

When the CS hits an arbitration point and it sees the STOP_RING bit, it
performs a context switch. /o\ Not the behaviour we need.

Using the mmio doesn't stand up to stability testing, we are not able to
control the RING_HEAD advancement carefully enough. There might be a
way, there's a few niceties to the approach that make it worth
investigating. But at the moment, it is inadequate.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-06-25 10:35 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-06-24 13:07 [PATCH 1/3] drm/i915/execlists: Switch to using STOP_RING instead of a semaphore Chris Wilson
2019-06-24 13:07 ` [PATCH 2/3] drm/i915: Emit final breadcrumb in request_add Chris Wilson
2019-06-24 13:07 ` [PATCH 3/3] drm/i915/execlists: Convert recursive defer_request() into iterative Chris Wilson
2019-06-24 15:01 ` ✗ Fi.CI.BAT: failure for series starting with [1/3] drm/i915/execlists: Switch to using STOP_RING instead of a semaphore Patchwork
2019-06-25 10:25 ` [PATCH 1/3] " Mika Kuoppala
2019-06-25 10:34   ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.