All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH 1/8] drm/i915/execlists: Fix hold/unhold recursion
@ 2020-02-09 13:19 Chris Wilson
  2020-02-09 13:19 ` [Intel-gfx] [PATCH 2/8] drm/i915/execlists: Ignore tracek for nop process_csb Chris Wilson
                   ` (9 more replies)
  0 siblings, 10 replies; 12+ messages in thread
From: Chris Wilson @ 2020-02-09 13:19 UTC (permalink / raw)
  To: intel-gfx

In eliminating the recursion from walking the tree of signalers/waiters
for processing the hold/unhold operations, a crucial error crept in
where we looked at the parent request and not the list element when
processing the list.

Brown paper bag, much?

Closes: https://gitlab.freedesktop.org/drm/intel/issues/1166
Fixes: 748317386afb ("drm/i915/execlists: Offline error capture")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 4bedc66bebb1..21385070ad15 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2374,10 +2374,10 @@ static void __execlists_hold(struct i915_request *rq)
 		if (i915_request_is_active(rq))
 			__i915_request_unsubmit(rq);
 
-		RQ_TRACE(rq, "on hold\n");
 		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
 		list_move_tail(&rq->sched.link, &rq->engine->active.hold);
 		i915_request_set_hold(rq);
+		RQ_TRACE(rq, "on hold\n");
 
 		for_each_waiter(p, rq) {
 			struct i915_request *w =
@@ -2393,7 +2393,7 @@ static void __execlists_hold(struct i915_request *rq)
 			if (i915_request_completed(w))
 				continue;
 
-			if (i915_request_on_hold(rq))
+			if (i915_request_on_hold(w))
 				continue;
 
 			list_move_tail(&w->sched.link, &list);
@@ -2451,6 +2451,7 @@ static bool execlists_hold(struct intel_engine_cs *engine,
 	GEM_BUG_ON(i915_request_on_hold(rq));
 	GEM_BUG_ON(rq->engine != engine);
 	__execlists_hold(rq);
+	GEM_BUG_ON(list_empty(&engine->active.hold));
 
 unlock:
 	spin_unlock_irq(&engine->active.lock);
@@ -2486,6 +2487,8 @@ static void __execlists_unhold(struct i915_request *rq)
 	do {
 		struct i915_dependency *p;
 
+		RQ_TRACE(rq, "hold release\n");
+
 		GEM_BUG_ON(!i915_request_on_hold(rq));
 		GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
 
@@ -2494,7 +2497,6 @@ static void __execlists_unhold(struct i915_request *rq)
 			       i915_sched_lookup_priolist(rq->engine,
 							  rq_prio(rq)));
 		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
-		RQ_TRACE(rq, "hold release\n");
 
 		/* Also release any children on this engine that are ready */
 		for_each_waiter(p, rq) {
@@ -2504,11 +2506,11 @@ static void __execlists_unhold(struct i915_request *rq)
 			if (w->engine != rq->engine)
 				continue;
 
-			if (!i915_request_on_hold(rq))
+			if (!i915_request_on_hold(w))
 				continue;
 
 			/* Check that no other parents are also on hold */
-			if (hold_request(rq))
+			if (hold_request(w))
 				continue;
 
 			list_move_tail(&w->sched.link, &list);
@@ -2806,6 +2808,7 @@ static void execlists_submit_request(struct i915_request *request)
 	spin_lock_irqsave(&engine->active.lock, flags);
 
 	if (unlikely(ancestor_on_hold(engine, request))) {
+		RQ_TRACE(request, "ancestor on hold\n");
 		list_add_tail(&request->sched.link, &engine->active.hold);
 		i915_request_set_hold(request);
 	} else {
-- 
2.25.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Intel-gfx] [PATCH 2/8] drm/i915/execlists: Ignore tracek for nop process_csb
  2020-02-09 13:19 [Intel-gfx] [PATCH 1/8] drm/i915/execlists: Fix hold/unhold recursion Chris Wilson
@ 2020-02-09 13:19 ` Chris Wilson
  2020-02-09 13:19 ` [Intel-gfx] [PATCH 3/8] drm/i915/selftests: Exercise timeslice rewinding Chris Wilson
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2020-02-09 13:19 UTC (permalink / raw)
  To: intel-gfx

Recording the frequent inspection of CSB head/tail when there is
expected to be no update adds noise to the debug trace. (Not entirely
useless, but since we know the sequence of function calls, we can
surmise the function was called -- so redundant.)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 21385070ad15..230470c58ec9 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2248,7 +2248,6 @@ static void process_csb(struct intel_engine_cs *engine)
 	 */
 	head = execlists->csb_head;
 	tail = READ_ONCE(*execlists->csb_write);
-	ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
 	if (unlikely(head == tail))
 		return;
 
@@ -2262,6 +2261,7 @@ static void process_csb(struct intel_engine_cs *engine)
 	 */
 	rmb();
 
+	ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
 	do {
 		bool promote;
 
-- 
2.25.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Intel-gfx] [PATCH 3/8] drm/i915/selftests: Exercise timeslice rewinding
  2020-02-09 13:19 [Intel-gfx] [PATCH 1/8] drm/i915/execlists: Fix hold/unhold recursion Chris Wilson
  2020-02-09 13:19 ` [Intel-gfx] [PATCH 2/8] drm/i915/execlists: Ignore tracek for nop process_csb Chris Wilson
@ 2020-02-09 13:19 ` Chris Wilson
  2020-02-09 13:19 ` [Intel-gfx] [PATCH 4/8] drm/i915/selftests: Remove erroneous intel_engine_pm_put Chris Wilson
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2020-02-09 13:19 UTC (permalink / raw)
  To: intel-gfx

Originally, I did not expect having to rewind a context upon
timeslicing: the point was to replace the executing context with an idle
one! However, given a second context that depends on requests from the
first, we may have to split the requests along the first context to
execute the second, causing us to replay the first context and have to
rewind the RING_TAIL.

References: 5ba32c7be81e ("drm/i915/execlists: Always force a context reload when rewinding RING_TAIL")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 201 ++++++++++++++++++++++++-
 1 file changed, 200 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 79b9f7d092e4..282833da7b26 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -76,8 +76,11 @@ static int wait_for_submit(struct intel_engine_cs *engine,
 	do {
 		cond_resched();
 		intel_engine_flush_submission(engine);
-		if (i915_request_is_active(rq))
+		if (i915_request_is_active(rq) &&
+		    !READ_ONCE(engine->execlists.pending[0])) {
+			tasklet_unlock_wait(&engine->execlists.tasklet);
 			return 0;
+		}
 	} while (time_before(jiffies, timeout));
 
 	return -ETIME;
@@ -772,6 +775,201 @@ static int live_timeslice_preempt(void *arg)
 	return err;
 }
 
+static struct i915_request *
+create_rewinder(struct intel_context *ce,
+		struct i915_request *wait,
+		int slot)
+{
+	struct i915_request *rq;
+	u32 offset = i915_ggtt_offset(ce->engine->status_page.vma) + 4000;
+	u32 *cs;
+	int err;
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq))
+		return rq;
+
+	if (wait) {
+		err = i915_request_await_dma_fence(rq, &wait->fence);
+		if (err)
+			goto err;
+	}
+
+	cs = intel_ring_begin(rq, 10);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err;
+	}
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	*cs++ = MI_NOOP;
+
+	*cs++ = MI_SEMAPHORE_WAIT |
+		MI_SEMAPHORE_GLOBAL_GTT |
+		MI_SEMAPHORE_POLL |
+		MI_SEMAPHORE_SAD_NEQ_SDD;
+	*cs++ = 0;
+	*cs++ = offset;
+	*cs++ = 0;
+
+	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
+	*cs++ = offset + slot * sizeof(u32);
+	*cs++ = 0;
+
+	intel_ring_advance(rq, cs);
+
+	rq->sched.attr.priority = I915_PRIORITY_MASK;
+err:
+	i915_request_get(rq);
+	i915_request_add(rq);
+	if (err) {
+		i915_request_put(rq);
+		return ERR_PTR(err);
+	}
+
+	return rq;
+}
+
+static int live_timeslice_rewind(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	/*
+	 * The usual presumption on timeslice expiration is that we replace
+	 * the active context with another. However, given a chain of
+	 * dependencies we may end up with replacing the context with itself,
+	 * but only a few of those requests, forcing us to rewind the
+	 * RING_TAIL of the original request.
+	 */
+	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+		return 0;
+
+	for_each_engine(engine, gt, id) {
+		struct i915_request *rq[3] = {};
+		struct intel_context *ce;
+		unsigned long heartbeat;
+		unsigned long timeslice;
+		int i, err = 0;
+		u32 *slot;
+
+		if (!intel_engine_has_timeslices(engine))
+			continue;
+
+		/*
+		 * A:rq1 -- semaphore wait, timestamp X
+		 * A:rq2 -- write timestamp Y
+		 *
+		 * B:rq1 [await A:rq1] -- write timestamp Z
+		 *
+		 * Force timeslice, release sempahore.
+		 *
+		 * Expect evaluation order XZY
+		 */
+
+		engine_heartbeat_disable(engine, &heartbeat);
+		timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
+
+		slot = memset(engine->status_page.addr + 1000,
+			      0, 4 * sizeof(u32));
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			goto err;
+		}
+
+		rq[0] = create_rewinder(ce, NULL, 1);
+		if (IS_ERR(rq[0])) {
+			intel_context_put(ce);
+			goto err;
+		}
+
+		rq[1] = create_rewinder(ce, NULL, 2);
+		intel_context_put(ce);
+		if (IS_ERR(rq[1]))
+			goto err;
+
+		err = wait_for_submit(engine, rq[1], HZ / 2);
+		if (err) {
+			pr_err("%s: failed to submit first context\n",
+			       engine->name);
+			goto err;
+		}
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			goto err;
+		}
+
+		rq[2] = create_rewinder(ce, rq[0], 3);
+		intel_context_put(ce);
+		if (IS_ERR(rq[2]))
+			goto err;
+
+		err = wait_for_submit(engine, rq[2], HZ / 2);
+		if (err) {
+			pr_err("%s: failed to submit second context\n",
+			       engine->name);
+			goto err;
+		}
+		GEM_BUG_ON(!timer_pending(&engine->execlists.timer));
+
+		/* Wait for the timeslice to kick in */
+		del_timer(&engine->execlists.timer);
+		tasklet_hi_schedule(&engine->execlists.tasklet);
+		intel_engine_flush_submission(engine);
+
+		/* Release the hounds! */
+		slot[0] = 1;
+		wmb();
+
+		for (i = 1; i <= 3; i++) {
+			unsigned long timeout = jiffies + HZ / 2;
+
+			while (!READ_ONCE(slot[i]) &&
+			       time_before(jiffies, timeout))
+				;
+
+			if (!time_before(jiffies, timeout)) {
+				pr_err("%s: rq[%d] timed out\n",
+				       engine->name, i - 1);
+				err = -ETIME;
+				goto err;
+			}
+
+			pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
+		}
+
+		/* XZY: XZ < XY */
+		if (slot[3] - slot[1] >= slot[2] - slot[1]) {
+			pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
+			       engine->name,
+			       slot[3] - slot[1],
+			       slot[2] - slot[1]);
+			err = -EINVAL;
+		}
+
+err:
+		memset(slot, 0xff, 4 * sizeof(u32));
+		wmb();
+
+		engine->props.timeslice_duration_ms = timeslice;
+		engine_heartbeat_enable(engine, heartbeat);
+		for (i = 0; i < 3; i++)
+			i915_request_put(rq[i]);
+		if (igt_flush_test(gt->i915))
+			err = -EIO;
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static struct i915_request *nop_request(struct intel_engine_cs *engine)
 {
 	struct i915_request *rq;
@@ -3740,6 +3938,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_hold_reset),
 		SUBTEST(live_error_interrupt),
 		SUBTEST(live_timeslice_preempt),
+		SUBTEST(live_timeslice_rewind),
 		SUBTEST(live_timeslice_queue),
 		SUBTEST(live_busywait_preempt),
 		SUBTEST(live_preempt),
-- 
2.25.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Intel-gfx] [PATCH 4/8] drm/i915/selftests: Remove erroneous intel_engine_pm_put
  2020-02-09 13:19 [Intel-gfx] [PATCH 1/8] drm/i915/execlists: Fix hold/unhold recursion Chris Wilson
  2020-02-09 13:19 ` [Intel-gfx] [PATCH 2/8] drm/i915/execlists: Ignore tracek for nop process_csb Chris Wilson
  2020-02-09 13:19 ` [Intel-gfx] [PATCH 3/8] drm/i915/selftests: Exercise timeslice rewinding Chris Wilson
@ 2020-02-09 13:19 ` Chris Wilson
  2020-02-10 12:09   ` Tvrtko Ursulin
  2020-02-09 13:19 ` [Intel-gfx] [PATCH 5/8] drm/i915/selftests: Relax timeout for error-interrupt reset processing Chris Wilson
                   ` (6 subsequent siblings)
  9 siblings, 1 reply; 12+ messages in thread
From: Chris Wilson @ 2020-02-09 13:19 UTC (permalink / raw)
  To: intel-gfx

On an allocation error path for live_hwsp_alternate, we dropped the
engine wakeref before we had even acquired it.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/selftest_timeline.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index e59bf7e31d83..c2578a0f2f14 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -606,7 +606,6 @@ static int live_hwsp_alternate(void *arg)
 
 			tl = checked_intel_timeline_create(gt);
 			if (IS_ERR(tl)) {
-				intel_engine_pm_put(engine);
 				err = PTR_ERR(tl);
 				goto out;
 			}
-- 
2.25.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Intel-gfx] [PATCH 5/8] drm/i915/selftests: Relax timeout for error-interrupt reset processing
  2020-02-09 13:19 [Intel-gfx] [PATCH 1/8] drm/i915/execlists: Fix hold/unhold recursion Chris Wilson
                   ` (2 preceding siblings ...)
  2020-02-09 13:19 ` [Intel-gfx] [PATCH 4/8] drm/i915/selftests: Remove erroneous intel_engine_pm_put Chris Wilson
@ 2020-02-09 13:19 ` Chris Wilson
  2020-02-09 13:19 ` [Intel-gfx] [PATCH 6/8] drm/i915/gem: Don't leak non-persistent requests on changing engines Chris Wilson
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2020-02-09 13:19 UTC (permalink / raw)
  To: intel-gfx

We can not require that the system process a tasklet in reasonable time
(thanks be to ksoftirqd), but we can insist that having waited
sufficiently for the error interrupt to have been raised and having
kicked the tasklet, the reset has begun and the request will be marked
as in error (if not already completed).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 282833da7b26..f0b5af3b2746 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -503,14 +503,21 @@ static int live_error_interrupt(void *arg)
 			}
 
 			for (i = 0; i < ARRAY_SIZE(client); i++) {
-				if (i915_request_wait(client[i], 0, HZ / 5) < 0) {
-					pr_err("%s: %s request still executing!\n",
-					       engine->name,
-					       error_repr(p->error[i]));
+				if (i915_request_wait(client[i], 0, HZ / 5) < 0)
+					pr_debug("%s: %s request incomplete!\n",
+						 engine->name,
+						 error_repr(p->error[i]));
+
+				if (!i915_request_started(client[i])) {
+					pr_debug("%s: %s request not stated!\n",
+						 engine->name,
+						 error_repr(p->error[i]));
 					err = -ETIME;
 					goto out;
 				}
 
+				/* Kick the tasklet to process the error */
+				intel_engine_flush_submission(engine);
 				if (client[i]->fence.error != p->error[i]) {
 					pr_err("%s: %s request completed with wrong error code: %d\n",
 					       engine->name,
-- 
2.25.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Intel-gfx] [PATCH 6/8] drm/i915/gem: Don't leak non-persistent requests on changing engines
  2020-02-09 13:19 [Intel-gfx] [PATCH 1/8] drm/i915/execlists: Fix hold/unhold recursion Chris Wilson
                   ` (3 preceding siblings ...)
  2020-02-09 13:19 ` [Intel-gfx] [PATCH 5/8] drm/i915/selftests: Relax timeout for error-interrupt reset processing Chris Wilson
@ 2020-02-09 13:19 ` Chris Wilson
  2020-02-09 13:19 ` [Intel-gfx] [PATCH 7/8] drm/i915: Disable use of hwsp_cacheline for kernel_context Chris Wilson
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2020-02-09 13:19 UTC (permalink / raw)
  To: intel-gfx

If we have a set of active engines marked as being non-persistent, we
lose track of those if the user replaces those engines with
I915_CONTEXT_PARAM_ENGINES. As part of our uABI contract is that
non-persistent requests are terminated if they are no longer being
tracked by the user's context (in order to prevent a lost request
causing an untracked and so unstoppable GPU hang), we need to apply the
same context cancellation upon changing engines.

v2: Track stale engines[] so we only reap at context closure.

Fixes: a0e047156cde ("drm/i915/gem: Make context persistence optional")
Testcase: igt/gem_ctx_peristence/replace
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 118 ++++++++++++++++--
 .../gpu/drm/i915/gem/i915_gem_context_types.h |  13 +-
 drivers/gpu/drm/i915/i915_sw_fence.c          |  15 ++-
 drivers/gpu/drm/i915/i915_sw_fence.h          |   2 +-
 4 files changed, 135 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index cfaf5bbdbcab..808a24606b47 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -270,7 +270,8 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
 	if (!e)
 		return ERR_PTR(-ENOMEM);
 
-	init_rcu_head(&e->rcu);
+	e->ctx = ctx;
+
 	for_each_engine(engine, gt, id) {
 		struct intel_context *ce;
 
@@ -450,7 +451,7 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
 	return engine;
 }
 
-static void kill_context(struct i915_gem_context *ctx)
+static void kill_engines(struct i915_gem_engines *engines)
 {
 	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
@@ -462,7 +463,7 @@ static void kill_context(struct i915_gem_context *ctx)
 	 * However, we only care about pending requests, so only include
 	 * engines on which there are incomplete requests.
 	 */
-	for_each_gem_engine(ce, __context_engines_static(ctx), it) {
+	for_each_gem_engine(ce, engines, it) {
 		struct intel_engine_cs *engine;
 
 		if (intel_context_set_banned(ce))
@@ -484,10 +485,41 @@ static void kill_context(struct i915_gem_context *ctx)
 			 * the context from the GPU, we have to resort to a full
 			 * reset. We hope the collateral damage is worth it.
 			 */
-			__reset_context(ctx, engine);
+			__reset_context(engines->ctx, engine);
 	}
 }
 
+static void kill_stale_engines(struct i915_gem_context *ctx)
+{
+	struct i915_gem_engines *pos, *next;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->stale.lock, flags);
+	list_for_each_entry_safe(pos, next, &ctx->stale.engines, link) {
+		if (!i915_sw_fence_await(&pos->fence))
+			continue;
+
+		spin_unlock_irqrestore(&ctx->stale.lock, flags);
+
+		kill_engines(pos);
+
+		spin_lock_irqsave(&ctx->stale.lock, flags);
+		list_safe_reset_next(pos, next, link);
+		list_del_init(&pos->link);
+
+		i915_sw_fence_complete(&pos->fence);
+	}
+	spin_unlock_irqrestore(&ctx->stale.lock, flags);
+}
+
+static void kill_context(struct i915_gem_context *ctx)
+{
+	if (!list_empty(&ctx->stale.engines))
+		kill_stale_engines(ctx);
+
+	kill_engines(__context_engines_static(ctx));
+}
+
 static void set_closed_name(struct i915_gem_context *ctx)
 {
 	char *s;
@@ -602,6 +634,9 @@ __create_context(struct drm_i915_private *i915)
 	ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
 	mutex_init(&ctx->mutex);
 
+	spin_lock_init(&ctx->stale.lock);
+	INIT_LIST_HEAD(&ctx->stale.engines);
+
 	mutex_init(&ctx->engines_mutex);
 	e = default_engines(ctx);
 	if (IS_ERR(e)) {
@@ -1529,6 +1564,71 @@ static const i915_user_extension_fn set_engines__extensions[] = {
 	[I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
 };
 
+static int engines_notify(struct i915_sw_fence *fence,
+			  enum i915_sw_fence_notify state)
+{
+	struct i915_gem_engines *engines =
+		container_of(fence, typeof(*engines), fence);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		if (!list_empty(&engines->link)) {
+			struct i915_gem_context *ctx = engines->ctx;
+			unsigned long flags;
+
+			spin_lock_irqsave(&ctx->stale.lock, flags);
+			list_del(&engines->link);
+			spin_unlock_irqrestore(&ctx->stale.lock, flags);
+		}
+		break;
+
+	case FENCE_FREE:
+		init_rcu_head(&engines->rcu);
+		call_rcu(&engines->rcu, free_engines_rcu);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static void engines_idle_release(struct i915_gem_engines *engines)
+{
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
+	unsigned long flags;
+
+	GEM_BUG_ON(!engines);
+	i915_sw_fence_init(&engines->fence, engines_notify);
+
+	spin_lock_irqsave(&engines->ctx->stale.lock, flags);
+	list_add(&engines->link, &engines->ctx->stale.engines);
+	spin_unlock_irqrestore(&engines->ctx->stale.lock, flags);
+
+	for_each_gem_engine(ce, engines, it) {
+		struct dma_fence *fence;
+		int err;
+
+		if (!ce->timeline)
+			continue;
+
+		fence = i915_active_fence_get(&ce->timeline->last_request);
+		if (!fence)
+			continue;
+
+		err = i915_sw_fence_await_dma_fence(&engines->fence,
+						    fence, 0,
+						    GFP_KERNEL);
+
+		dma_fence_put(fence);
+		if (err < 0) {
+			kill_engines(engines);
+			break;
+		}
+	}
+
+	i915_sw_fence_commit(&engines->fence);
+}
+
 static int
 set_engines(struct i915_gem_context *ctx,
 	    const struct drm_i915_gem_context_param *args)
@@ -1571,7 +1671,8 @@ set_engines(struct i915_gem_context *ctx,
 	if (!set.engines)
 		return -ENOMEM;
 
-	init_rcu_head(&set.engines->rcu);
+	set.engines->ctx = ctx;
+
 	for (n = 0; n < num_engines; n++) {
 		struct i915_engine_class_instance ci;
 		struct intel_engine_cs *engine;
@@ -1631,7 +1732,8 @@ set_engines(struct i915_gem_context *ctx,
 	set.engines = rcu_replace_pointer(ctx->engines, set.engines, 1);
 	mutex_unlock(&ctx->engines_mutex);
 
-	call_rcu(&set.engines->rcu, free_engines_rcu);
+	/* Keep track of old engine sets for kill_context() */
+	engines_idle_release(set.engines);
 
 	return 0;
 }
@@ -1646,7 +1748,6 @@ __copy_engines(struct i915_gem_engines *e)
 	if (!copy)
 		return ERR_PTR(-ENOMEM);
 
-	init_rcu_head(&copy->rcu);
 	for (n = 0; n < e->num_engines; n++) {
 		if (e->engines[n])
 			copy->engines[n] = intel_context_get(e->engines[n]);
@@ -1890,7 +1991,8 @@ static int clone_engines(struct i915_gem_context *dst,
 	if (!clone)
 		goto err_unlock;
 
-	init_rcu_head(&clone->rcu);
+	clone->ctx = dst;
+
 	for (n = 0; n < e->num_engines; n++) {
 		struct intel_engine_cs *engine;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 017ca803ab47..8d996dde8046 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -20,6 +20,7 @@
 #include "gt/intel_context_types.h"
 
 #include "i915_scheduler.h"
+#include "i915_sw_fence.h"
 
 struct pid;
 
@@ -30,7 +31,12 @@ struct intel_timeline;
 struct intel_ring;
 
 struct i915_gem_engines {
-	struct rcu_head rcu;
+	union {
+		struct rcu_head rcu;
+		struct list_head link;
+	};
+	struct i915_sw_fence fence;
+	struct i915_gem_context *ctx;
 	unsigned int num_engines;
 	struct intel_context *engines[];
 };
@@ -173,6 +179,11 @@ struct i915_gem_context {
 	 * context in messages.
 	 */
 	char name[TASK_COMM_LEN + 8];
+
+	struct {
+		struct spinlock lock;
+		struct list_head engines;
+	} stale;
 };
 
 #endif /* __I915_GEM_CONTEXT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 51ba97daf2a0..9a20b7246f91 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -211,10 +211,19 @@ void i915_sw_fence_complete(struct i915_sw_fence *fence)
 	__i915_sw_fence_complete(fence, NULL);
 }
 
-void i915_sw_fence_await(struct i915_sw_fence *fence)
+bool i915_sw_fence_await(struct i915_sw_fence *fence)
 {
-	debug_fence_assert(fence);
-	WARN_ON(atomic_inc_return(&fence->pending) <= 1);
+	int old, new;
+
+	new = atomic_read(&fence->pending);
+	do {
+		if (new < 1)
+			return false;
+
+		old = new++;
+	} while ((new = atomic_cmpxchg(&fence->pending, old, new)) != old);
+
+	return true;
 }
 
 void __i915_sw_fence_init(struct i915_sw_fence *fence,
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index 19e806ce43bc..30a863353ee6 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -91,7 +91,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 				    unsigned long timeout,
 				    gfp_t gfp);
 
-void i915_sw_fence_await(struct i915_sw_fence *fence);
+bool i915_sw_fence_await(struct i915_sw_fence *fence);
 void i915_sw_fence_complete(struct i915_sw_fence *fence);
 
 static inline bool i915_sw_fence_signaled(const struct i915_sw_fence *fence)
-- 
2.25.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Intel-gfx] [PATCH 7/8] drm/i915: Disable use of hwsp_cacheline for kernel_context
  2020-02-09 13:19 [Intel-gfx] [PATCH 1/8] drm/i915/execlists: Fix hold/unhold recursion Chris Wilson
                   ` (4 preceding siblings ...)
  2020-02-09 13:19 ` [Intel-gfx] [PATCH 6/8] drm/i915/gem: Don't leak non-persistent requests on changing engines Chris Wilson
@ 2020-02-09 13:19 ` Chris Wilson
  2020-02-09 13:19 ` [Intel-gfx] [PATCH 8/8] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore Chris Wilson
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2020-02-09 13:19 UTC (permalink / raw)
  To: intel-gfx

Currently on execlists, we use a local hwsp for the kernel_context,
rather than the engine's HWSP, as this is the default for execlists.
However, seqno rollover requires allocating a new HWSP cachline, and may
require pinning a new HWSP page in the GTT. This operation requiring
pinning in the GGTT is not allowed within the kernel_context timeline,
as doing so may require re-entering the kernel_context in order to evict
from the GGTT. As we want to avoid requiring a new HWSP for the
kernel_context, we can use the permanently pinned engine's HWSP instead.
However to do so we must prevent the use of semaphores reading the
kernel_context's HWSP, as the use of semaphores do not support rollover
onto the same cacheline. Fortunately, the kernel_context is mostly
isolated, so unlikely to give benefit to semaphores.

Reported-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c    | 14 ++++++++++++--
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 12 +++++++++---
 drivers/gpu/drm/i915/i915_request.c    | 25 ++++++++++++++++++++-----
 3 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 230470c58ec9..259d74f9325b 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2963,7 +2963,8 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
 {
 	u32 *cs;
 
-	GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb);
+	if (!i915_request_timeline(rq)->has_initial_breadcrumb)
+		return 0;
 
 	cs = intel_ring_begin(rq, 6);
 	if (IS_ERR(cs))
@@ -4615,8 +4616,17 @@ static int __execlists_context_alloc(struct intel_context *ce,
 
 	if (!ce->timeline) {
 		struct intel_timeline *tl;
+		struct i915_vma *hwsp;
+
+		/*
+		 * Use the static global HWSP for the kernel context, and
+		 * a dynamically allocated cacheline for everyone else.
+		 */
+		hwsp = NULL;
+		if (unlikely(intel_context_is_barrier(ce)))
+			hwsp = engine->status_page.vma;
 
-		tl = intel_timeline_create(engine->gt, NULL);
+		tl = intel_timeline_create(engine->gt, hwsp);
 		if (IS_ERR(tl)) {
 			ret = PTR_ERR(tl);
 			goto error_deref_obj;
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index f0b5af3b2746..09657bb16523 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -3614,15 +3614,21 @@ static int bond_virtual_engine(struct intel_gt *gt,
 	rq[0] = ERR_PTR(-ENOMEM);
 	for_each_engine(master, gt, id) {
 		struct i915_sw_fence fence = {};
+		struct intel_context *ce;
 
 		if (master->class == class)
 			continue;
 
+		ce = intel_context_create(master);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			goto out;
+		}
+
 		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
 
-		rq[0] = igt_spinner_create_request(&spin,
-						   master->kernel_context,
-						   MI_NOOP);
+		rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP);
+		intel_context_put(ce);
 		if (IS_ERR(rq[0])) {
 			err = PTR_ERR(rq[0]);
 			goto out;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 0ecc2cf64216..ec3449d47c95 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -881,11 +881,28 @@ __emit_semaphore_wait(struct i915_request *to,
 	return 0;
 }
 
+static bool can_use_semaphore(const struct i915_request *rq)
+{
+	bool ok;
+
+	rcu_read_lock();
+	ok = rcu_dereference(rq->timeline)->hwsp_cacheline;
+	rcu_read_unlock();
+
+	return ok;
+}
+
 static int
 emit_semaphore_wait(struct i915_request *to,
 		    struct i915_request *from,
 		    gfp_t gfp)
 {
+	if (!intel_context_use_semaphores(to->context))
+		goto await_fence;
+
+	if (!can_use_semaphore(from))
+		goto await_fence;
+
 	/* Just emit the first semaphore we see as request space is limited. */
 	if (already_busywaiting(to) & from->engine->mask)
 		goto await_fence;
@@ -931,12 +948,8 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
 		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
 						       &from->submit,
 						       I915_FENCE_GFP);
-	else if (intel_context_use_semaphores(to->context))
-		ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
 	else
-		ret = i915_sw_fence_await_dma_fence(&to->submit,
-						    &from->fence, 0,
-						    I915_FENCE_GFP);
+		ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
 	if (ret < 0)
 		return ret;
 
@@ -1035,6 +1048,8 @@ __i915_request_await_execution(struct i915_request *to,
 {
 	int err;
 
+	GEM_BUG_ON(intel_context_is_barrier(from->context));
+
 	/* Submit both requests at the same time */
 	err = __await_execution(to, from, hook, I915_FENCE_GFP);
 	if (err)
-- 
2.25.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Intel-gfx] [PATCH 8/8] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore
  2020-02-09 13:19 [Intel-gfx] [PATCH 1/8] drm/i915/execlists: Fix hold/unhold recursion Chris Wilson
                   ` (5 preceding siblings ...)
  2020-02-09 13:19 ` [Intel-gfx] [PATCH 7/8] drm/i915: Disable use of hwsp_cacheline for kernel_context Chris Wilson
@ 2020-02-09 13:19 ` Chris Wilson
  2020-02-09 13:35 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/8] drm/i915/execlists: Fix hold/unhold recursion Patchwork
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2020-02-09 13:19 UTC (permalink / raw)
  To: intel-gfx

If we find ourselves waiting on a MI_SEMAPHORE_WAIT, either within the
user batch or in our own preamble, the engine raises a
GT_WAIT_ON_SEMAPHORE interrupt. We can unmask that interrupt and so
respond to a semaphore wait by yielding the timeslice, if we have
another context to yield to!

The only real complication is that the interrupt is only generated for
the start of the semaphore wait, and is asynchronous to our
process_csb() -- that is, we may not have registered the timeslice before
we see the interrupt. To ensure we don't miss a potential semaphore
blocking forward progress (e.g. selftests/live_timeslice_preempt) we mark
the interrupt and apply it to the next timeslice regardless of whether it
was active at the time.

v2: We use semaphores in preempt-to-busy, within the timeslicing
implementation itself! Ergo, when we do insert a preemption due to an
expired timeslice, the new context may start with the missed semaphore
flagged by the retired context and be yielded, ad infinitum. To avoid
this, read the context id at the time of the semaphore interrupt and
only yield if that context is still active.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c    |  6 +++
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  9 +++++
 drivers/gpu/drm/i915/gt/intel_gt_irq.c       | 13 ++++++-
 drivers/gpu/drm/i915/gt/intel_lrc.c          | 40 +++++++++++++++++---
 drivers/gpu/drm/i915/i915_reg.h              |  1 +
 5 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index f6f5e1ec48fc..89f201a5a219 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1288,6 +1288,12 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 
 	if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
 		drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
+	if (HAS_EXECLISTS(dev_priv)) {
+		drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
+			   ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
+		drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
+			   ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
+	}
 	drm_printf(m, "\tRING_START: 0x%08x\n",
 		   ENGINE_READ(engine, RING_START));
 	drm_printf(m, "\tRING_HEAD:  0x%08x\n",
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 45e36d963ea7..8eb7365b4230 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -157,6 +157,15 @@ struct intel_engine_execlists {
 	 */
 	struct i915_priolist default_priolist;
 
+	/**
+	 * @yield: CCID at the time of the last semaphore-wait interrupt.
+	 *
+	 * Instead of leaving a semaphore busy-spinning on an engine, we would
+	 * like to switch to another ready context, i.e. yielding the semaphore
+	 * timeslice.
+	 */
+	u32 yield;
+
 	/**
 	 * @error_interrupt: CS Master EIR
 	 *
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index f0e7fd95165a..875bd0392ffc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -39,6 +39,13 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 		}
 	}
 
+	if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
+		WRITE_ONCE(engine->execlists.yield,
+			   ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
+		if (del_timer(&engine->execlists.timer))
+			tasklet = true;
+	}
+
 	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
 		tasklet = true;
 
@@ -228,7 +235,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
 	const u32 irqs =
 		GT_CS_MASTER_ERROR_INTERRUPT |
 		GT_RENDER_USER_INTERRUPT |
-		GT_CONTEXT_SWITCH_INTERRUPT;
+		GT_CONTEXT_SWITCH_INTERRUPT |
+		GT_WAIT_SEMAPHORE_INTERRUPT;
 	struct intel_uncore *uncore = gt->uncore;
 	const u32 dmask = irqs << 16 | irqs;
 	const u32 smask = irqs << 16;
@@ -366,7 +374,8 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)
 	const u32 irqs =
 		GT_CS_MASTER_ERROR_INTERRUPT |
 		GT_RENDER_USER_INTERRUPT |
-		GT_CONTEXT_SWITCH_INTERRUPT;
+		GT_CONTEXT_SWITCH_INTERRUPT |
+		GT_WAIT_SEMAPHORE_INTERRUPT;
 	const u32 gt_interrupts[] = {
 		irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT,
 		irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 259d74f9325b..4b6a707e7268 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1685,7 +1685,8 @@ static void defer_active(struct intel_engine_cs *engine)
 }
 
 static bool
-need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
+need_timeslice(const struct intel_engine_cs *engine,
+	       const struct i915_request *rq)
 {
 	int hint;
 
@@ -1701,6 +1702,31 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
 	return hint >= effective_prio(rq);
 }
 
+static bool
+timeslice_yield(const struct intel_engine_execlists *el,
+		const struct i915_request *rq)
+{
+	/*
+	 * Once bitten, forever smitten!
+	 *
+	 * If the active context ever busy-waited on a semaphore,
+	 * it will be treated as a hog until the end of its timeslice.
+	 * The HW only sends an interrupt on the first miss, and we
+	 * do know if that semaphore has been signaled, or even if it
+	 * is now stuck on another semaphore. Play safe, yield if it
+	 * might be stuck -- it will be given a fresh timeslice in
+	 * the near future.
+	 */
+	return upper_32_bits(rq->context->lrc_desc) == READ_ONCE(el->yield);
+}
+
+static bool
+timeslice_expired(const struct intel_engine_execlists *el,
+		  const struct i915_request *rq)
+{
+	return timer_expired(&el->timer) || timeslice_yield(el, rq);
+}
+
 static int
 switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
 {
@@ -1716,8 +1742,7 @@ timeslice(const struct intel_engine_cs *engine)
 	return READ_ONCE(engine->props.timeslice_duration_ms);
 }
 
-static unsigned long
-active_timeslice(const struct intel_engine_cs *engine)
+static unsigned long active_timeslice(const struct intel_engine_cs *engine)
 {
 	const struct i915_request *rq = *engine->execlists.active;
 
@@ -1860,13 +1885,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 
 			last = NULL;
 		} else if (need_timeslice(engine, last) &&
-			   timer_expired(&engine->execlists.timer)) {
+			   timeslice_expired(execlists, last)) {
 			ENGINE_TRACE(engine,
-				     "expired last=%llx:%lld, prio=%d, hint=%d\n",
+				     "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",
 				     last->fence.context,
 				     last->fence.seqno,
 				     last->sched.attr.priority,
-				     execlists->queue_priority_hint);
+				     execlists->queue_priority_hint,
+				     yesno(timeslice_yield(execlists, last)));
 
 			ring_set_paused(engine, 1);
 			defer_active(engine);
@@ -2126,6 +2152,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		}
 		clear_ports(port + 1, last_port - port);
 
+		WRITE_ONCE(execlists->yield, -1);
 		execlists_submit_ports(engine);
 		set_preempt_timeout(engine);
 	} else {
@@ -4336,6 +4363,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
 	engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
+	engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
 }
 
 static void rcs_submission_override(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b09c1d6dc0aa..0f1fcc863f3d 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3090,6 +3090,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define GT_BSD_CS_ERROR_INTERRUPT		(1 << 15)
 #define GT_BSD_USER_INTERRUPT			(1 << 12)
 #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1	(1 << 11) /* hsw+; rsvd on snb, ivb, vlv */
+#define GT_WAIT_SEMAPHORE_INTERRUPT		REG_BIT(11) /* bdw+ */
 #define GT_CONTEXT_SWITCH_INTERRUPT		(1 <<  8)
 #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT	(1 <<  5) /* !snb */
 #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	(1 <<  4)
-- 
2.25.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/8] drm/i915/execlists: Fix hold/unhold recursion
  2020-02-09 13:19 [Intel-gfx] [PATCH 1/8] drm/i915/execlists: Fix hold/unhold recursion Chris Wilson
                   ` (6 preceding siblings ...)
  2020-02-09 13:19 ` [Intel-gfx] [PATCH 8/8] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore Chris Wilson
@ 2020-02-09 13:35 ` Patchwork
  2020-02-09 14:21 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
  2020-02-10 12:04 ` [Intel-gfx] [PATCH 1/8] " Tvrtko Ursulin
  9 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2020-02-09 13:35 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/8] drm/i915/execlists: Fix hold/unhold recursion
URL   : https://patchwork.freedesktop.org/series/73213/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
f109a5e034a5 drm/i915/execlists: Fix hold/unhold recursion
ea73f5349664 drm/i915/execlists: Ignore tracek for nop process_csb
b64ee83d5fb8 drm/i915/selftests: Exercise timeslice rewinding
-:13: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#13: 
References: 5ba32c7be81e ("drm/i915/execlists: Always force a context reload when rewinding RING_TAIL")

-:13: ERROR:GIT_COMMIT_ID: Please use git commit description style 'commit <12+ chars of sha1> ("<title line>")' - ie: 'commit 5ba32c7be81e ("drm/i915/execlists: Always force a context reload when rewinding RING_TAIL")'
#13: 
References: 5ba32c7be81e ("drm/i915/execlists: Always force a context reload when rewinding RING_TAIL")

-:188: WARNING:MEMORY_BARRIER: memory barrier without comment
#188: FILE: drivers/gpu/drm/i915/gt/selftest_lrc.c:928:
+		wmb();

-:218: WARNING:MEMORY_BARRIER: memory barrier without comment
#218: FILE: drivers/gpu/drm/i915/gt/selftest_lrc.c:958:
+		wmb();

total: 1 errors, 3 warnings, 0 checks, 220 lines checked
7d37baa5d994 drm/i915/selftests: Remove erroneous intel_engine_pm_put
e1d68a2e4980 drm/i915/selftests: Relax timeout for error-interrupt reset processing
b9a3a68f8194 drm/i915/gem: Don't leak non-persistent requests on changing engines
-:249: WARNING:USE_SPINLOCK_T: struct spinlock should be spinlock_t
#249: FILE: drivers/gpu/drm/i915/gem/i915_gem_context_types.h:184:
+		struct spinlock lock;

total: 0 errors, 1 warnings, 0 checks, 242 lines checked
821d98111deb drm/i915: Disable use of hwsp_cacheline for kernel_context
148f14e2d25f drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/8] drm/i915/execlists: Fix hold/unhold recursion
  2020-02-09 13:19 [Intel-gfx] [PATCH 1/8] drm/i915/execlists: Fix hold/unhold recursion Chris Wilson
                   ` (7 preceding siblings ...)
  2020-02-09 13:35 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/8] drm/i915/execlists: Fix hold/unhold recursion Patchwork
@ 2020-02-09 14:21 ` Patchwork
  2020-02-10 12:04 ` [Intel-gfx] [PATCH 1/8] " Tvrtko Ursulin
  9 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2020-02-09 14:21 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/8] drm/i915/execlists: Fix hold/unhold recursion
URL   : https://patchwork.freedesktop.org/series/73213/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_7893 -> Patchwork_16500
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16500/index.html

Known issues
------------

  Here are the changes found in Patchwork_16500 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_close_race@basic-threads:
    - fi-byt-n2820:       [PASS][1] -> [INCOMPLETE][2] ([i915#45])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7893/fi-byt-n2820/igt@gem_close_race@basic-threads.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16500/fi-byt-n2820/igt@gem_close_race@basic-threads.html

  
#### Possible fixes ####

  * igt@i915_selftest@live_blt:
    - fi-hsw-4770r:       [DMESG-FAIL][3] ([i915#553] / [i915#725]) -> [PASS][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7893/fi-hsw-4770r/igt@i915_selftest@live_blt.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16500/fi-hsw-4770r/igt@i915_selftest@live_blt.html
    - fi-ivb-3770:        [DMESG-FAIL][5] ([i915#725]) -> [PASS][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7893/fi-ivb-3770/igt@i915_selftest@live_blt.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16500/fi-ivb-3770/igt@i915_selftest@live_blt.html

  * igt@i915_selftest@live_execlists:
    - fi-icl-y:           [DMESG-FAIL][7] ([fdo#108569]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7893/fi-icl-y/igt@i915_selftest@live_execlists.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16500/fi-icl-y/igt@i915_selftest@live_execlists.html

  * igt@i915_selftest@live_gem_contexts:
    - fi-cfl-guc:         [INCOMPLETE][9] ([fdo#106070] / [i915#424]) -> [PASS][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7893/fi-cfl-guc/igt@i915_selftest@live_gem_contexts.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16500/fi-cfl-guc/igt@i915_selftest@live_gem_contexts.html

  * igt@kms_flip@basic-flip-vs-dpms:
    - fi-skl-6770hq:      [SKIP][11] ([fdo#109271]) -> [PASS][12] +27 similar issues
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7893/fi-skl-6770hq/igt@kms_flip@basic-flip-vs-dpms.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16500/fi-skl-6770hq/igt@kms_flip@basic-flip-vs-dpms.html

  
  [fdo#106070]: https://bugs.freedesktop.org/show_bug.cgi?id=106070
  [fdo#108569]: https://bugs.freedesktop.org/show_bug.cgi?id=108569
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [i915#424]: https://gitlab.freedesktop.org/drm/intel/issues/424
  [i915#45]: https://gitlab.freedesktop.org/drm/intel/issues/45
  [i915#553]: https://gitlab.freedesktop.org/drm/intel/issues/553
  [i915#725]: https://gitlab.freedesktop.org/drm/intel/issues/725


Participating hosts (37 -> 40)
------------------------------

  Additional (9): fi-byt-j1900 fi-hsw-peppy fi-glk-dsi fi-gdg-551 fi-bsw-kefka fi-skl-lmem fi-blb-e6850 fi-bsw-nick fi-kbl-r 
  Missing    (6): fi-kbl-7500u fi-ctg-p8600 fi-icl-dsi fi-icl-guc fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_7893 -> Patchwork_16500

  CI-20190529: 20190529
  CI_DRM_7893: 0d2e3557edaad8a29a76c056207b312fcd761ac9 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5428: ff86abde4a54c15dfe7ad70e5a8c75fd41757683 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_16500: 148f14e2d25f997977e545280f426230283729d0 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

148f14e2d25f drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore
821d98111deb drm/i915: Disable use of hwsp_cacheline for kernel_context
b9a3a68f8194 drm/i915/gem: Don't leak non-persistent requests on changing engines
e1d68a2e4980 drm/i915/selftests: Relax timeout for error-interrupt reset processing
7d37baa5d994 drm/i915/selftests: Remove erroneous intel_engine_pm_put
b64ee83d5fb8 drm/i915/selftests: Exercise timeslice rewinding
ea73f5349664 drm/i915/execlists: Ignore tracek for nop process_csb
f109a5e034a5 drm/i915/execlists: Fix hold/unhold recursion

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_16500/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Intel-gfx] [PATCH 1/8] drm/i915/execlists: Fix hold/unhold recursion
  2020-02-09 13:19 [Intel-gfx] [PATCH 1/8] drm/i915/execlists: Fix hold/unhold recursion Chris Wilson
                   ` (8 preceding siblings ...)
  2020-02-09 14:21 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
@ 2020-02-10 12:04 ` Tvrtko Ursulin
  9 siblings, 0 replies; 12+ messages in thread
From: Tvrtko Ursulin @ 2020-02-10 12:04 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 09/02/2020 13:19, Chris Wilson wrote:
> In eliminating the recursion from walking the tree of signalers/waiters
> for processing the hold/unhold operations, a crucial error crept in
> where we looked at the parent request and not the list element when
> processing the list.
> 
> Brown paper bag, much?
> 
> Closes: https://gitlab.freedesktop.org/drm/intel/issues/1166
> Fixes: 748317386afb ("drm/i915/execlists: Offline error capture")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c | 13 ++++++++-----
>   1 file changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 4bedc66bebb1..21385070ad15 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2374,10 +2374,10 @@ static void __execlists_hold(struct i915_request *rq)
>   		if (i915_request_is_active(rq))
>   			__i915_request_unsubmit(rq);
>   
> -		RQ_TRACE(rq, "on hold\n");
>   		clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
>   		list_move_tail(&rq->sched.link, &rq->engine->active.hold);
>   		i915_request_set_hold(rq);
> +		RQ_TRACE(rq, "on hold\n");
>   
>   		for_each_waiter(p, rq) {
>   			struct i915_request *w =
> @@ -2393,7 +2393,7 @@ static void __execlists_hold(struct i915_request *rq)
>   			if (i915_request_completed(w))
>   				continue;
>   
> -			if (i915_request_on_hold(rq))
> +			if (i915_request_on_hold(w))
>   				continue;
>   
>   			list_move_tail(&w->sched.link, &list);
> @@ -2451,6 +2451,7 @@ static bool execlists_hold(struct intel_engine_cs *engine,
>   	GEM_BUG_ON(i915_request_on_hold(rq));
>   	GEM_BUG_ON(rq->engine != engine);
>   	__execlists_hold(rq);
> +	GEM_BUG_ON(list_empty(&engine->active.hold));
>   
>   unlock:
>   	spin_unlock_irq(&engine->active.lock);
> @@ -2486,6 +2487,8 @@ static void __execlists_unhold(struct i915_request *rq)
>   	do {
>   		struct i915_dependency *p;
>   
> +		RQ_TRACE(rq, "hold release\n");
> +
>   		GEM_BUG_ON(!i915_request_on_hold(rq));
>   		GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
>   
> @@ -2494,7 +2497,6 @@ static void __execlists_unhold(struct i915_request *rq)
>   			       i915_sched_lookup_priolist(rq->engine,
>   							  rq_prio(rq)));
>   		set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
> -		RQ_TRACE(rq, "hold release\n");
>   
>   		/* Also release any children on this engine that are ready */
>   		for_each_waiter(p, rq) {
> @@ -2504,11 +2506,11 @@ static void __execlists_unhold(struct i915_request *rq)
>   			if (w->engine != rq->engine)
>   				continue;
>   
> -			if (!i915_request_on_hold(rq))
> +			if (!i915_request_on_hold(w))
>   				continue;
>   
>   			/* Check that no other parents are also on hold */
> -			if (hold_request(rq))
> +			if (hold_request(w))
>   				continue;
>   
>   			list_move_tail(&w->sched.link, &list);
> @@ -2806,6 +2808,7 @@ static void execlists_submit_request(struct i915_request *request)
>   	spin_lock_irqsave(&engine->active.lock, flags);
>   
>   	if (unlikely(ancestor_on_hold(engine, request))) {
> +		RQ_TRACE(request, "ancestor on hold\n");
>   		list_add_tail(&request->sched.link, &engine->active.hold);
>   		i915_request_set_hold(request);
>   	} else {
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [Intel-gfx] [PATCH 4/8] drm/i915/selftests: Remove erroneous intel_engine_pm_put
  2020-02-09 13:19 ` [Intel-gfx] [PATCH 4/8] drm/i915/selftests: Remove erroneous intel_engine_pm_put Chris Wilson
@ 2020-02-10 12:09   ` Tvrtko Ursulin
  0 siblings, 0 replies; 12+ messages in thread
From: Tvrtko Ursulin @ 2020-02-10 12:09 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 09/02/2020 13:19, Chris Wilson wrote:
> On an allocation error path for live_hwsp_alternate, we dropped the
> engine wakeref before we had even acquired it.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gt/selftest_timeline.c | 1 -
>   1 file changed, 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
> index e59bf7e31d83..c2578a0f2f14 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
> @@ -606,7 +606,6 @@ static int live_hwsp_alternate(void *arg)
>   
>   			tl = checked_intel_timeline_create(gt);
>   			if (IS_ERR(tl)) {
> -				intel_engine_pm_put(engine);
>   				err = PTR_ERR(tl);
>   				goto out;
>   			}
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2020-02-10 12:09 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-02-09 13:19 [Intel-gfx] [PATCH 1/8] drm/i915/execlists: Fix hold/unhold recursion Chris Wilson
2020-02-09 13:19 ` [Intel-gfx] [PATCH 2/8] drm/i915/execlists: Ignore tracek for nop process_csb Chris Wilson
2020-02-09 13:19 ` [Intel-gfx] [PATCH 3/8] drm/i915/selftests: Exercise timeslice rewinding Chris Wilson
2020-02-09 13:19 ` [Intel-gfx] [PATCH 4/8] drm/i915/selftests: Remove erroneous intel_engine_pm_put Chris Wilson
2020-02-10 12:09   ` Tvrtko Ursulin
2020-02-09 13:19 ` [Intel-gfx] [PATCH 5/8] drm/i915/selftests: Relax timeout for error-interrupt reset processing Chris Wilson
2020-02-09 13:19 ` [Intel-gfx] [PATCH 6/8] drm/i915/gem: Don't leak non-persistent requests on changing engines Chris Wilson
2020-02-09 13:19 ` [Intel-gfx] [PATCH 7/8] drm/i915: Disable use of hwsp_cacheline for kernel_context Chris Wilson
2020-02-09 13:19 ` [Intel-gfx] [PATCH 8/8] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore Chris Wilson
2020-02-09 13:35 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/8] drm/i915/execlists: Fix hold/unhold recursion Patchwork
2020-02-09 14:21 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-02-10 12:04 ` [Intel-gfx] [PATCH 1/8] " Tvrtko Ursulin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.