All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Restore context and pd for ringbuffer submission after reset
@ 2017-02-04 19:37 Chris Wilson
  2017-02-04 19:46 ` Chris Wilson
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Chris Wilson @ 2017-02-04 19:37 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

Following a reset, the context and page directory registers are lost.
However, the queue of requests that we resubmit after the reset may
depend upon them - the registers are restored from a context image, but
that restore may be inhibited and may simply be absent from the request
if it was in the middle of a sequence using the same context. If we
prime the CCID/PD registers with the first request in the queue (even
for the hung request), we prevent invalid memory access for the
following requests (and continually hung engines).

Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---

This could do with going to stable but requires a few odds and ends, such
as dma_fence_set_error(). Oh well, fortunately it is not as bad it might
seem since these registers are restored from the context - but that then
requires a mesa context to reset the GPU state (as fortunately we called
MI_SET_CONTEXT at the start of every batch!), but any other request in
the meantime will likely hang again.

(Also I left gen8/ringbuffer reset_hw as an exercise for the reader)
-Chris

---
 drivers/gpu/drm/i915/i915_gem.c         | 18 +++++++-----------
 drivers/gpu/drm/i915/intel_lrc.c        |  6 +++++-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 30 +++++++++++++++++++++++++++---
 3 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fe32b1edbda2..7a16c859c875 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2735,21 +2735,17 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
 		engine->irq_seqno_barrier(engine);
 
 	request = i915_gem_find_active_request(engine);
-	if (!request)
-		return;
-
-	if (!i915_gem_reset_request(request))
-		return;
+	if (request && i915_gem_reset_request(request)) {
+		DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
+				 engine->name, request->global_seqno);
 
-	DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
-			 engine->name, request->global_seqno);
+		/* If this context is now banned, skip all pending requests. */
+		if (i915_gem_context_is_banned(request->ctx))
+			engine_skip_context(request);
+	}
 
 	/* Setup the CS to resume from the breadcrumb of the hung request */
 	engine->reset_hw(engine, request);
-
-	/* If this context is now banned, skip all of its pending requests. */
-	if (i915_gem_context_is_banned(request->ctx))
-		engine_skip_context(request);
 }
 
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 44a92ea464ba..f44dd42e46b5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1324,7 +1324,10 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 			      struct drm_i915_gem_request *request)
 {
 	struct execlist_port *port = engine->execlist_port;
-	struct intel_context *ce = &request->ctx->engine[engine->id];
+	struct intel_context *ce;
+
+	if (!request || request->fence.error != -EIO)
+		return;
 
 	/* We want a simple context + ring to execute the breadcrumb update.
 	 * We cannot rely on the context being intact across the GPU hang,
@@ -1333,6 +1336,7 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	 * future request will be after userspace has had the opportunity
 	 * to recreate its own state.
 	 */
+	ce = &request->ctx->engine[engine->id];
 	execlists_init_reg_state(ce->lrc_reg_state,
 				 request->ctx, engine, ce->ring);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d32cbba25d98..b30c1dc3061c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -599,10 +599,34 @@ static int init_ring_common(struct intel_engine_cs *engine)
 static void reset_ring_common(struct intel_engine_cs *engine,
 			      struct drm_i915_gem_request *request)
 {
-	struct intel_ring *ring = request->ring;
+	if (request) {
+		struct drm_i915_private *dev_priv = request->i915;
+		struct intel_context *ce = &request->ctx->engine[engine->id];
+		struct i915_hw_ppgtt *ppgtt;
+
+		if (ce->state) {
+			I915_WRITE(CCID,
+				   i915_ggtt_offset(ce->state) |
+				   BIT(3) | BIT(2) | CCID_EN);
+		}
 
-	ring->head = request->postfix;
-	ring->last_retired_head = -1;
+		ppgtt = request->ctx->ppgtt ?: engine->i915->mm.aliasing_ppgtt;
+		if (ppgtt) {
+			u32 pd_offset = ppgtt->pd.base.ggtt_offset << 10;
+
+			I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
+			I915_WRITE(RING_PP_DIR_BASE(engine), pd_offset);
+		}
+
+		if (request->fence.error == -EIO) {
+			struct intel_ring *ring = request->ring;
+
+			ring->head = request->postfix;
+			ring->last_retired_head = -1;
+		}
+	} else {
+		engine->legacy_active_context = NULL;
+	}
 }
 
 static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2017-02-07 21:47 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-02-04 19:37 [PATCH] drm/i915: Restore context and pd for ringbuffer submission after reset Chris Wilson
2017-02-04 19:46 ` Chris Wilson
2017-02-06  9:10 ` ✗ Fi.CI.BAT: failure for " Patchwork
2017-02-06 15:33 ` [PATCH] " Mika Kuoppala
2017-02-06 15:51   ` Chris Wilson
2017-02-07  9:58   ` [PATCH v2] " Chris Wilson
2017-02-07 15:24     ` [PATCH v4] " Chris Wilson
2017-02-07 15:54       ` Mika Kuoppala
2017-02-07 21:47         ` Chris Wilson
2017-02-07 10:17 ` ✗ Fi.CI.BAT: warning for drm/i915: Restore context and pd for ringbuffer submission after reset (rev2) Patchwork
2017-02-07 16:52 ` ✓ Fi.CI.BAT: success for drm/i915: Restore context and pd for ringbuffer submission after reset (rev3) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.