public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH v3 1/2] drm/i915: Move engine->submit_request selection to a vfunc
@ 2017-03-14  9:34 Chris Wilson
  2017-03-14  9:34 ` [PATCH v3 2/2] drm/i915: Restore engine->submit_request before unwedging Chris Wilson
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Chris Wilson @ 2017-03-14  9:34 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

It turns out that we may want to restore the original
engine->submit_request (and engine->schedule) callbacks from more than
just the guc <-> execlists transition. Move this to a vfunc so we can
have a common interface.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_guc_submission.c |  2 +-
 drivers/gpu/drm/i915/intel_engine_cs.c     | 10 ++++++++++
 drivers/gpu/drm/i915/intel_lrc.c           | 15 +++++----------
 drivers/gpu/drm/i915/intel_lrc.h           |  1 -
 drivers/gpu/drm/i915/intel_ringbuffer.c    | 15 +++++++++++++--
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  4 ++++
 6 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index b4d24cd7639a..119b5c073833 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -1051,7 +1051,7 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv)
 		return;
 
 	/* Revert back to manual ELSP submission */
-	intel_execlists_enable_submission(dev_priv);
+	intel_engines_enable_submission(dev_priv);
 }
 
 void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 73fe718516a5..5663ebab851f 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -191,6 +191,7 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
 			goto cleanup;
 		}
 
+		engine->enable_submission(engine);
 		mask |= ENGINE_MASK(id);
 	}
 
@@ -1115,6 +1116,15 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv)
 	return true;
 }
 
+void intel_engines_enable_submission(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, i915, id)
+		engine->enable_submission(engine);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_engine.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 89f38e7def9f..f79df7a51e60 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1560,15 +1560,10 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 	kfree(engine);
 }
 
-void intel_execlists_enable_submission(struct drm_i915_private *dev_priv)
+static void logical_ring_enable_submission(struct intel_engine_cs *engine)
 {
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	for_each_engine(engine, dev_priv, id) {
-		engine->submit_request = execlists_submit_request;
-		engine->schedule = execlists_schedule;
-	}
+	engine->submit_request = execlists_submit_request;
+	engine->schedule = execlists_schedule;
 }
 
 static void
@@ -1586,8 +1581,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 	engine->emit_flush = gen8_emit_flush;
 	engine->emit_breadcrumb = gen8_emit_breadcrumb;
 	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz;
-	engine->submit_request = execlists_submit_request;
-	engine->schedule = execlists_schedule;
+
+	engine->enable_submission = logical_ring_enable_submission;
 
 	engine->irq_enable = gen8_logical_ring_enable_irq;
 	engine->irq_disable = gen8_logical_ring_disable_irq;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 5fc07761caff..e8015e7bf4e9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -87,6 +87,5 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
 /* Execlists */
 int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv,
 				    int enable_execlists);
-void intel_execlists_enable_submission(struct drm_i915_private *dev_priv);
 
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 4a864f8c9387..5b141f6639b6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2050,6 +2050,16 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
 	}
 }
 
+static void i9xx_enable_submission(struct intel_engine_cs *engine)
+{
+	engine->submit_request = i9xx_submit_request;
+}
+
+static void gen6_bsd_enable_submission(struct intel_engine_cs *engine)
+{
+	engine->submit_request = gen6_bsd_submit_request;
+}
+
 static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 				      struct intel_engine_cs *engine)
 {
@@ -2080,7 +2090,8 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 				engine->emit_breadcrumb_sz++;
 		}
 	}
-	engine->submit_request = i9xx_submit_request;
+
+	engine->enable_submission = i9xx_enable_submission;
 
 	if (INTEL_GEN(dev_priv) >= 8)
 		engine->emit_bb_start = gen8_emit_bb_start;
@@ -2165,7 +2176,7 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
 	if (INTEL_GEN(dev_priv) >= 6) {
 		/* gen6 bsd needs a special wa for tail updates */
 		if (IS_GEN6(dev_priv))
-			engine->submit_request = gen6_bsd_submit_request;
+			engine->enable_submission = gen6_bsd_enable_submission;
 		engine->emit_flush = gen6_bsd_ring_flush;
 		if (INTEL_GEN(dev_priv) < 8)
 			engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0ef491df5b4e..30d9820d978c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -273,6 +273,8 @@ struct intel_engine_cs {
 	void		(*reset_hw)(struct intel_engine_cs *engine,
 				    struct drm_i915_gem_request *req);
 
+	void		(*enable_submission)(struct intel_engine_cs *engine);
+
 	int		(*context_pin)(struct intel_engine_cs *engine,
 				       struct i915_gem_context *ctx);
 	void		(*context_unpin)(struct intel_engine_cs *engine,
@@ -669,4 +671,6 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
+void intel_engines_enable_submission(struct drm_i915_private *i915);
+
 #endif /* _INTEL_RINGBUFFER_H_ */
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v3 2/2] drm/i915: Restore engine->submit_request before unwedging
  2017-03-14  9:34 [PATCH v3 1/2] drm/i915: Move engine->submit_request selection to a vfunc Chris Wilson
@ 2017-03-14  9:34 ` Chris Wilson
  2017-03-15  9:23   ` Tvrtko Ursulin
  2017-03-14 16:17 ` ✓ Fi.CI.BAT: success for series starting with [v3,1/2] drm/i915: Move engine->submit_request selection to a vfunc Patchwork
  2017-03-14 16:31 ` [PATCH v3 1/2] " Tvrtko Ursulin
  2 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2017-03-14  9:34 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

When we wedge the device, we override engine->submit_request with a nop
to ensure that all in-flight requests are marked in error. However, igt
would like to unwedge the device to test -EIO handling. This requires us
to flush those in-flight requests and restore the original
engine->submit_request.

v2: Use a vfunc to unify enabling request submission to engines
v3: Split new vfunc to a separate patch.

Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests")
Testcase: igt/gem_eio
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c |  2 +-
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/i915_gem.c | 51 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index e312b61ba6bb..11d1066b673c 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1822,7 +1822,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
 		return;
 
 	/* Clear any previous failed attempts at recovery. Time to try again. */
-	__clear_bit(I915_WEDGED, &error->flags);
+	i915_gem_unset_wedged(dev_priv);
 	error->reset_count++;
 
 	pr_notice("drm/i915: Resetting chip after gpu hang\n");
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 48ff64812289..53a791d8d992 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3406,6 +3406,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
 void i915_gem_reset(struct drm_i915_private *dev_priv);
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
+void i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
 
 void i915_gem_init_mmio(struct drm_i915_private *i915);
 int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 202bb850f260..e06830916a05 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3000,6 +3000,57 @@ void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
 	mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
 }
 
+void i915_gem_unset_wedged(struct drm_i915_private *i915)
+{
+	struct i915_gem_timeline *tl;
+	int i;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
+		return;
+
+	/* Before unwedging, make sure that all pending operations
+	 * are flushed and errored out. No more can be submitted until
+	 * we reset the wedged bit.
+	 */
+	list_for_each_entry(tl, &i915->gt.timelines, link) {
+		for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
+			struct drm_i915_gem_request *rq;
+
+			rq = i915_gem_active_peek(&tl->engine[i].last_request,
+						  &i915->drm.struct_mutex);
+			if (!rq)
+				continue;
+
+			/* We can't use our normal waiter as we want to
+			 * avoid recursively trying to handle the current
+			 * reset. The basic dma_fence_default_wait() installs
+			 * a callback for dma_fence_signal(), which is
+			 * triggered by our nop handler (indirectly, the
+			 * callback enables the signaler thread which is
+			 * woken by the nop_submit_request() advancing the seqno
+			 * and when the seqno passes the fence, the signaler
+			 * then signals the fence waking us up).
+			 */
+			dma_fence_default_wait(&rq->fence, false,
+					       MAX_SCHEDULE_TIMEOUT);
+		}
+	}
+
+	/* Undo nop_submit_request. We prevent all new i915 requests from
+	 * being queued (by disallowing execbuf whilst wedged) so having
+	 * waited for all active requests above, we know the system is idle
+	 * and do not have to worry about a thread being inside
+	 * engine->submit_request() as we swap over. So unlike installing
+	 * the nop_submit_request on reset, we can do this from normal
+	 * context and do not require stop_machine().
+	 */
+	intel_engines_enable_submission(i915);
+
+	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
+	clear_bit(I915_WEDGED, &i915->gpu_error.flags);
+}
+
 static void
 i915_gem_retire_work_handler(struct work_struct *work)
 {
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* ✓ Fi.CI.BAT: success for series starting with [v3,1/2] drm/i915: Move engine->submit_request selection to a vfunc
  2017-03-14  9:34 [PATCH v3 1/2] drm/i915: Move engine->submit_request selection to a vfunc Chris Wilson
  2017-03-14  9:34 ` [PATCH v3 2/2] drm/i915: Restore engine->submit_request before unwedging Chris Wilson
@ 2017-03-14 16:17 ` Patchwork
  2017-03-14 16:31 ` [PATCH v3 1/2] " Tvrtko Ursulin
  2 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2017-03-14 16:17 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [v3,1/2] drm/i915: Move engine->submit_request selection to a vfunc
URL   : https://patchwork.freedesktop.org/series/21203/
State : success

== Summary ==

Series 21203v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/21203/revisions/1/mbox/

Test gem_exec_flush:
        Subgroup basic-batch-kernel-default-uc:
                fail       -> PASS       (fi-snb-2600) fdo#100007
Test gem_exec_suspend:
        Subgroup basic-s4-devices:
                dmesg-warn -> PASS       (fi-bxt-t5700) fdo#100125
Test kms_cursor_legacy:
        Subgroup basic-flip-after-cursor-atomic:
                fail       -> PASS       (fi-ivb-3770)

fdo#100007 https://bugs.freedesktop.org/show_bug.cgi?id=100007
fdo#100125 https://bugs.freedesktop.org/show_bug.cgi?id=100125

fi-bdw-5557u     total:278  pass:267  dwarn:0   dfail:0   fail:0   skip:11  time: 456s
fi-bsw-n3050     total:278  pass:239  dwarn:0   dfail:0   fail:0   skip:39  time: 584s
fi-bxt-j4205     total:278  pass:259  dwarn:0   dfail:0   fail:0   skip:19  time: 531s
fi-bxt-t5700     total:278  pass:258  dwarn:0   dfail:0   fail:0   skip:20  time: 555s
fi-byt-j1900     total:278  pass:251  dwarn:0   dfail:0   fail:0   skip:27  time: 497s
fi-byt-n2820     total:278  pass:247  dwarn:0   dfail:0   fail:0   skip:31  time: 495s
fi-hsw-4770      total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time: 435s
fi-hsw-4770r     total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time: 430s
fi-ilk-650       total:278  pass:228  dwarn:0   dfail:0   fail:0   skip:50  time: 435s
fi-ivb-3520m     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time: 517s
fi-ivb-3770      total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time: 499s
fi-kbl-7500u     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time: 481s
fi-skl-6260u     total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time: 507s
fi-skl-6700hq    total:278  pass:261  dwarn:0   dfail:0   fail:0   skip:17  time: 609s
fi-skl-6700k     total:278  pass:256  dwarn:4   dfail:0   fail:0   skip:18  time: 486s
fi-skl-6770hq    total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time: 546s
fi-snb-2520m     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time: 546s
fi-snb-2600      total:278  pass:249  dwarn:0   dfail:0   fail:0   skip:29  time: 417s

c8ddc827b1fe6c0860e1171faca769c37c700131 drm-tip: 2017y-03m-14d-14h-49m-17s UTC integration manifest
afce339 drm/i915: Restore engine->submit_request before unwedging
e1d8ea3 drm/i915: Move engine->submit_request selection to a vfunc

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_4160/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3 1/2] drm/i915: Move engine->submit_request selection to a vfunc
  2017-03-14  9:34 [PATCH v3 1/2] drm/i915: Move engine->submit_request selection to a vfunc Chris Wilson
  2017-03-14  9:34 ` [PATCH v3 2/2] drm/i915: Restore engine->submit_request before unwedging Chris Wilson
  2017-03-14 16:17 ` ✓ Fi.CI.BAT: success for series starting with [v3,1/2] drm/i915: Move engine->submit_request selection to a vfunc Patchwork
@ 2017-03-14 16:31 ` Tvrtko Ursulin
  2017-03-14 21:33   ` Chris Wilson
  2 siblings, 1 reply; 11+ messages in thread
From: Tvrtko Ursulin @ 2017-03-14 16:31 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Mika Kuoppala


On 14/03/2017 09:34, Chris Wilson wrote:
> It turns out that we may want to restore the original
> engine->submit_request (and engine->schedule) callbacks from more than
> just the guc <-> execlists transition. Move this to a vfunc so we can
> have a common interface.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_guc_submission.c |  2 +-
>  drivers/gpu/drm/i915/intel_engine_cs.c     | 10 ++++++++++
>  drivers/gpu/drm/i915/intel_lrc.c           | 15 +++++----------
>  drivers/gpu/drm/i915/intel_lrc.h           |  1 -
>  drivers/gpu/drm/i915/intel_ringbuffer.c    | 15 +++++++++++++--
>  drivers/gpu/drm/i915/intel_ringbuffer.h    |  4 ++++
>  6 files changed, 33 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index b4d24cd7639a..119b5c073833 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -1051,7 +1051,7 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv)
>  		return;
>
>  	/* Revert back to manual ELSP submission */
> -	intel_execlists_enable_submission(dev_priv);
> +	intel_engines_enable_submission(dev_priv);

intel_engines_default_submission came to my mind but that will also be 
misleading once the guc switch is toggled. But I think less misleading 
than enable_submission. And vfunc name maybe as assign_default_submission?

>  }
>
>  void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 73fe718516a5..5663ebab851f 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -191,6 +191,7 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
>  			goto cleanup;
>  		}
>
> +		engine->enable_submission(engine);

Could be moved to intel_engines_setup_common if the logical_ring_setup 
was re-arranged a bit so that the default_vfuncs are assigned before it. 
Legacy looks like it would be alright with that approach already.

My thinking here is not to expose this vfunc so prominently in the code 
since it is a bit of a low level internal implementation thing.

Thoughts?

Regards,

Tvrtko


>  		mask |= ENGINE_MASK(id);
>  	}
>
> @@ -1115,6 +1116,15 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv)
>  	return true;
>  }
>
> +void intel_engines_enable_submission(struct drm_i915_private *i915)
> +{
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +
> +	for_each_engine(engine, i915, id)
> +		engine->enable_submission(engine);
> +}
> +
>  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>  #include "selftests/mock_engine.c"
>  #endif
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 89f38e7def9f..f79df7a51e60 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1560,15 +1560,10 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
>  	kfree(engine);
>  }
>
> -void intel_execlists_enable_submission(struct drm_i915_private *dev_priv)
> +static void logical_ring_enable_submission(struct intel_engine_cs *engine)
>  {
> -	struct intel_engine_cs *engine;
> -	enum intel_engine_id id;
> -
> -	for_each_engine(engine, dev_priv, id) {
> -		engine->submit_request = execlists_submit_request;
> -		engine->schedule = execlists_schedule;
> -	}
> +	engine->submit_request = execlists_submit_request;
> +	engine->schedule = execlists_schedule;
>  }
>
>  static void
> @@ -1586,8 +1581,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
>  	engine->emit_flush = gen8_emit_flush;
>  	engine->emit_breadcrumb = gen8_emit_breadcrumb;
>  	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz;
> -	engine->submit_request = execlists_submit_request;
> -	engine->schedule = execlists_schedule;
> +
> +	engine->enable_submission = logical_ring_enable_submission;
>
>  	engine->irq_enable = gen8_logical_ring_enable_irq;
>  	engine->irq_disable = gen8_logical_ring_disable_irq;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 5fc07761caff..e8015e7bf4e9 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -87,6 +87,5 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
>  /* Execlists */
>  int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv,
>  				    int enable_execlists);
> -void intel_execlists_enable_submission(struct drm_i915_private *dev_priv);
>
>  #endif /* _INTEL_LRC_H_ */
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 4a864f8c9387..5b141f6639b6 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2050,6 +2050,16 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
>  	}
>  }
>
> +static void i9xx_enable_submission(struct intel_engine_cs *engine)
> +{
> +	engine->submit_request = i9xx_submit_request;
> +}
> +
> +static void gen6_bsd_enable_submission(struct intel_engine_cs *engine)
> +{
> +	engine->submit_request = gen6_bsd_submit_request;
> +}
> +
>  static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
>  				      struct intel_engine_cs *engine)
>  {
> @@ -2080,7 +2090,8 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
>  				engine->emit_breadcrumb_sz++;
>  		}
>  	}
> -	engine->submit_request = i9xx_submit_request;
> +
> +	engine->enable_submission = i9xx_enable_submission;
>
>  	if (INTEL_GEN(dev_priv) >= 8)
>  		engine->emit_bb_start = gen8_emit_bb_start;
> @@ -2165,7 +2176,7 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
>  	if (INTEL_GEN(dev_priv) >= 6) {
>  		/* gen6 bsd needs a special wa for tail updates */
>  		if (IS_GEN6(dev_priv))
> -			engine->submit_request = gen6_bsd_submit_request;
> +			engine->enable_submission = gen6_bsd_enable_submission;
>  		engine->emit_flush = gen6_bsd_ring_flush;
>  		if (INTEL_GEN(dev_priv) < 8)
>  			engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 0ef491df5b4e..30d9820d978c 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -273,6 +273,8 @@ struct intel_engine_cs {
>  	void		(*reset_hw)(struct intel_engine_cs *engine,
>  				    struct drm_i915_gem_request *req);
>
> +	void		(*enable_submission)(struct intel_engine_cs *engine);
> +
>  	int		(*context_pin)(struct intel_engine_cs *engine,
>  				       struct i915_gem_context *ctx);
>  	void		(*context_unpin)(struct intel_engine_cs *engine,
> @@ -669,4 +671,6 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
>  bool intel_engine_is_idle(struct intel_engine_cs *engine);
>  bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
>
> +void intel_engines_enable_submission(struct drm_i915_private *i915);
> +
>  #endif /* _INTEL_RINGBUFFER_H_ */
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3 1/2] drm/i915: Move engine->submit_request selection to a vfunc
  2017-03-14 16:31 ` [PATCH v3 1/2] " Tvrtko Ursulin
@ 2017-03-14 21:33   ` Chris Wilson
  2017-03-15  8:14     ` Tvrtko Ursulin
  0 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2017-03-14 21:33 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx, Mika Kuoppala

On Tue, Mar 14, 2017 at 04:31:58PM +0000, Tvrtko Ursulin wrote:
> 
> On 14/03/2017 09:34, Chris Wilson wrote:
> >It turns out that we may want to restore the original
> >engine->submit_request (and engine->schedule) callbacks from more than
> >just the guc <-> execlists transition. Move this to a vfunc so we can
> >have a common interface.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> >---
> > drivers/gpu/drm/i915/i915_guc_submission.c |  2 +-
> > drivers/gpu/drm/i915/intel_engine_cs.c     | 10 ++++++++++
> > drivers/gpu/drm/i915/intel_lrc.c           | 15 +++++----------
> > drivers/gpu/drm/i915/intel_lrc.h           |  1 -
> > drivers/gpu/drm/i915/intel_ringbuffer.c    | 15 +++++++++++++--
> > drivers/gpu/drm/i915/intel_ringbuffer.h    |  4 ++++
> > 6 files changed, 33 insertions(+), 14 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> >index b4d24cd7639a..119b5c073833 100644
> >--- a/drivers/gpu/drm/i915/i915_guc_submission.c
> >+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> >@@ -1051,7 +1051,7 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv)
> > 		return;
> >
> > 	/* Revert back to manual ELSP submission */
> >-	intel_execlists_enable_submission(dev_priv);
> >+	intel_engines_enable_submission(dev_priv);
> 
> intel_engines_default_submission came to my mind but that will also
> be misleading once the guc switch is toggled. But I think less
> misleading than enable_submission. And vfunc name maybe as
> assign_default_submission?

intel_engines_reset_default_submission
engine->set_default_submission

Not overly enamoured, but the above seems like the best compromise so
far.

> 
> > }
> >
> > void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
> >diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> >index 73fe718516a5..5663ebab851f 100644
> >--- a/drivers/gpu/drm/i915/intel_engine_cs.c
> >+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> >@@ -191,6 +191,7 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
> > 			goto cleanup;
> > 		}
> >
> >+		engine->enable_submission(engine);
> 
> Could be moved to intel_engines_setup_common if the
> logical_ring_setup was re-arranged a bit so that the default_vfuncs
> are assigned before it. Legacy looks like it would be alright with
> that approach already.
> 
> My thinking here is not to expose this vfunc so prominently in the
> code since it is a bit of a low level internal implementation thing.

The concern is reasonable, but equally moving it to
intel_engine_setup_common() is hairy. Otoh, I think it is suitable for
intel_engine_init_common(). Happy?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3 1/2] drm/i915: Move engine->submit_request selection to a vfunc
  2017-03-14 21:33   ` Chris Wilson
@ 2017-03-15  8:14     ` Tvrtko Ursulin
  2017-03-15  9:41       ` Chris Wilson
  0 siblings, 1 reply; 11+ messages in thread
From: Tvrtko Ursulin @ 2017-03-15  8:14 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx, Mika Kuoppala


On 14/03/2017 21:33, Chris Wilson wrote:
> On Tue, Mar 14, 2017 at 04:31:58PM +0000, Tvrtko Ursulin wrote:
>>
>> On 14/03/2017 09:34, Chris Wilson wrote:
>>> It turns out that we may want to restore the original
>>> engine->submit_request (and engine->schedule) callbacks from more than
>>> just the guc <-> execlists transition. Move this to a vfunc so we can
>>> have a common interface.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>>> ---
>>> drivers/gpu/drm/i915/i915_guc_submission.c |  2 +-
>>> drivers/gpu/drm/i915/intel_engine_cs.c     | 10 ++++++++++
>>> drivers/gpu/drm/i915/intel_lrc.c           | 15 +++++----------
>>> drivers/gpu/drm/i915/intel_lrc.h           |  1 -
>>> drivers/gpu/drm/i915/intel_ringbuffer.c    | 15 +++++++++++++--
>>> drivers/gpu/drm/i915/intel_ringbuffer.h    |  4 ++++
>>> 6 files changed, 33 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
>>> index b4d24cd7639a..119b5c073833 100644
>>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>>> @@ -1051,7 +1051,7 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv)
>>> 		return;
>>>
>>> 	/* Revert back to manual ELSP submission */
>>> -	intel_execlists_enable_submission(dev_priv);
>>> +	intel_engines_enable_submission(dev_priv);
>>
>> intel_engines_default_submission came to my mind but that will also
>> be misleading once the guc switch is toggled. But I think less
>> misleading than enable_submission. And vfunc name maybe as
>> assign_default_submission?
>
> intel_engines_reset_default_submission
> engine->set_default_submission
>
> Not overly enamoured, but the above seems like the best compromise so
> far.

Sounds good.

>>> }
>>>
>>> void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
>>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
>>> index 73fe718516a5..5663ebab851f 100644
>>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>>> @@ -191,6 +191,7 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
>>> 			goto cleanup;
>>> 		}
>>>
>>> +		engine->enable_submission(engine);
>>
>> Could be moved to intel_engines_setup_common if the
>> logical_ring_setup was re-arranged a bit so that the default_vfuncs
>> are assigned before it. Legacy looks like it would be alright with
>> that approach already.
>>
>> My thinking here is not to expose this vfunc so prominently in the
>> code since it is a bit of a low level internal implementation thing.
>
> The concern is reasonable, but equally moving it to
> intel_engine_setup_common() is hairy. Otoh, I think it is suitable for
> intel_engine_init_common(). Happy?

Why do you think it is hairy for intel_engine_setup_common? It keeps the 
setup/init split for lrc, where all vfuncs are initialized in the setup 
phase which was the intention.

Legacy is a bit more uncleanly split now that I look at it, but putting 
it at intel_engine_init_common would be just more of the same.

Regards,

Tvrtko



_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: Restore engine->submit_request before unwedging
  2017-03-14  9:34 ` [PATCH v3 2/2] drm/i915: Restore engine->submit_request before unwedging Chris Wilson
@ 2017-03-15  9:23   ` Tvrtko Ursulin
  2017-03-15  9:34     ` Chris Wilson
  0 siblings, 1 reply; 11+ messages in thread
From: Tvrtko Ursulin @ 2017-03-15  9:23 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Mika Kuoppala


On 14/03/2017 09:34, Chris Wilson wrote:
> When we wedge the device, we override engine->submit_request with a nop
> to ensure that all in-flight requests are marked in error. However, igt
> would like to unwedge the device to test -EIO handling. This requires us
> to flush those in-flight requests and restore the original
> engine->submit_request.
>
> v2: Use a vfunc to unify enabling request submission to engines
> v3: Split new vfunc to a separate patch.
>
> Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests")
> Testcase: igt/gem_eio
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.c |  2 +-
>  drivers/gpu/drm/i915/i915_drv.h |  1 +
>  drivers/gpu/drm/i915/i915_gem.c | 51 +++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 53 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index e312b61ba6bb..11d1066b673c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1822,7 +1822,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
>  		return;
>
>  	/* Clear any previous failed attempts at recovery. Time to try again. */
> -	__clear_bit(I915_WEDGED, &error->flags);
> +	i915_gem_unset_wedged(dev_priv);
>  	error->reset_count++;
>
>  	pr_notice("drm/i915: Resetting chip after gpu hang\n");
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 48ff64812289..53a791d8d992 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3406,6 +3406,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
>  void i915_gem_reset(struct drm_i915_private *dev_priv);
>  void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
>  void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
> +void i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
>
>  void i915_gem_init_mmio(struct drm_i915_private *i915);
>  int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 202bb850f260..e06830916a05 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3000,6 +3000,57 @@ void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
>  	mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
>  }
>
> +void i915_gem_unset_wedged(struct drm_i915_private *i915)
> +{
> +	struct i915_gem_timeline *tl;
> +	int i;
> +
> +	lockdep_assert_held(&i915->drm.struct_mutex);
> +	if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
> +		return;
> +
> +	/* Before unwedging, make sure that all pending operations
> +	 * are flushed and errored out. No more can be submitted until
> +	 * we reset the wedged bit.
> +	 */
> +	list_for_each_entry(tl, &i915->gt.timelines, link) {
> +		for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
> +			struct drm_i915_gem_request *rq;
> +
> +			rq = i915_gem_active_peek(&tl->engine[i].last_request,
> +						  &i915->drm.struct_mutex);
> +			if (!rq)
> +				continue;
> +
> +			/* We can't use our normal waiter as we want to
> +			 * avoid recursively trying to handle the current
> +			 * reset. The basic dma_fence_default_wait() installs
> +			 * a callback for dma_fence_signal(), which is
> +			 * triggered by our nop handler (indirectly, the
> +			 * callback enables the signaler thread which is
> +			 * woken by the nop_submit_request() advancing the seqno
> +			 * and when the seqno passes the fence, the signaler
> +			 * then signals the fence waking us up).
> +			 */
> +			dma_fence_default_wait(&rq->fence, false,
> +					       MAX_SCHEDULE_TIMEOUT);
> +		}
> +	}
> +
> +	/* Undo nop_submit_request. We prevent all new i915 requests from
> +	 * being queued (by disallowing execbuf whilst wedged) so having
> +	 * waited for all active requests above, we know the system is idle
> +	 * and do not have to worry about a thread being inside
> +	 * engine->submit_request() as we swap over. So unlike installing
> +	 * the nop_submit_request on reset, we can do this from normal
> +	 * context and do not require stop_machine().
> +	 */
> +	intel_engines_enable_submission(i915);

So the point of the dma_fence_default_wait above is it to ensure all 
nop_submit_request call backs have completed? I don't at the moment 
understand how could there be such callbacks since unwedge is happening 
after the wedge. So the wedge already installed the nop handler, and by 
the time we get to another reset attempt, isn't it already guaranteed 
all of those have exited?

Regards,

Tvrtko

> +
> +	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
> +	clear_bit(I915_WEDGED, &i915->gpu_error.flags);
> +}
> +
>  static void
>  i915_gem_retire_work_handler(struct work_struct *work)
>  {
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: Restore engine->submit_request before unwedging
  2017-03-15  9:23   ` Tvrtko Ursulin
@ 2017-03-15  9:34     ` Chris Wilson
  2017-03-15 10:04       ` Tvrtko Ursulin
  0 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2017-03-15  9:34 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx, Mika Kuoppala

On Wed, Mar 15, 2017 at 09:23:01AM +0000, Tvrtko Ursulin wrote:
> 
> On 14/03/2017 09:34, Chris Wilson wrote:
> >When we wedge the device, we override engine->submit_request with a nop
> >to ensure that all in-flight requests are marked in error. However, igt
> >would like to unwedge the device to test -EIO handling. This requires us
> >to flush those in-flight requests and restore the original
> >engine->submit_request.
> >
> >v2: Use a vfunc to unify enabling request submission to engines
> >v3: Split new vfunc to a separate patch.
> >
> >Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests")
> >Testcase: igt/gem_eio
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> >---
> > drivers/gpu/drm/i915/i915_drv.c |  2 +-
> > drivers/gpu/drm/i915/i915_drv.h |  1 +
> > drivers/gpu/drm/i915/i915_gem.c | 51 +++++++++++++++++++++++++++++++++++++++++
> > 3 files changed, 53 insertions(+), 1 deletion(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> >index e312b61ba6bb..11d1066b673c 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.c
> >+++ b/drivers/gpu/drm/i915/i915_drv.c
> >@@ -1822,7 +1822,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
> > 		return;
> >
> > 	/* Clear any previous failed attempts at recovery. Time to try again. */
> >-	__clear_bit(I915_WEDGED, &error->flags);
> >+	i915_gem_unset_wedged(dev_priv);
> > 	error->reset_count++;
> >
> > 	pr_notice("drm/i915: Resetting chip after gpu hang\n");
> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >index 48ff64812289..53a791d8d992 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >@@ -3406,6 +3406,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
> > void i915_gem_reset(struct drm_i915_private *dev_priv);
> > void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
> > void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
> >+void i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
> >
> > void i915_gem_init_mmio(struct drm_i915_private *i915);
> > int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
> >diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >index 202bb850f260..e06830916a05 100644
> >--- a/drivers/gpu/drm/i915/i915_gem.c
> >+++ b/drivers/gpu/drm/i915/i915_gem.c
> >@@ -3000,6 +3000,57 @@ void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
> > 	mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
> > }
> >
> >+void i915_gem_unset_wedged(struct drm_i915_private *i915)
> >+{
> >+	struct i915_gem_timeline *tl;
> >+	int i;
> >+
> >+	lockdep_assert_held(&i915->drm.struct_mutex);
> >+	if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
> >+		return;
> >+
> >+	/* Before unwedging, make sure that all pending operations
> >+	 * are flushed and errored out. No more can be submitted until
> >+	 * we reset the wedged bit.
> >+	 */
> >+	list_for_each_entry(tl, &i915->gt.timelines, link) {
> >+		for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
> >+			struct drm_i915_gem_request *rq;
> >+
> >+			rq = i915_gem_active_peek(&tl->engine[i].last_request,
> >+						  &i915->drm.struct_mutex);
> >+			if (!rq)
> >+				continue;
> >+
> >+			/* We can't use our normal waiter as we want to
> >+			 * avoid recursively trying to handle the current
> >+			 * reset. The basic dma_fence_default_wait() installs
> >+			 * a callback for dma_fence_signal(), which is
> >+			 * triggered by our nop handler (indirectly, the
> >+			 * callback enables the signaler thread which is
> >+			 * woken by the nop_submit_request() advancing the seqno
> >+			 * and when the seqno passes the fence, the signaler
> >+			 * then signals the fence waking us up).
> >+			 */
> >+			dma_fence_default_wait(&rq->fence, false,
> >+					       MAX_SCHEDULE_TIMEOUT);
> >+		}
> >+	}
> >+
> >+	/* Undo nop_submit_request. We prevent all new i915 requests from
> >+	 * being queued (by disallowing execbuf whilst wedged) so having
> >+	 * waited for all active requests above, we know the system is idle
> >+	 * and do not have to worry about a thread being inside
> >+	 * engine->submit_request() as we swap over. So unlike installing
> >+	 * the nop_submit_request on reset, we can do this from normal
> >+	 * context and do not require stop_machine().
> >+	 */
> >+	intel_engines_enable_submission(i915);
> 
> So the point of the dma_fence_default_wait above is it to ensure all
> nop_submit_request call backs have completed? I don't at the moment
> understand how could there be such callbacks since unwedge is
> happening after the wedge. So the wedge already installed the nop
> handler, and by the time we get to another reset attempt, isn't it
> already guaranteed all of those have exited?

There's no such guarantee. The nop_submit_request() is to ensure that all
third party driven requests are flushed and our requests are marked with
dma_fence_set_error(-EIO) and not submitted to hw.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3 1/2] drm/i915: Move engine->submit_request selection to a vfunc
  2017-03-15  8:14     ` Tvrtko Ursulin
@ 2017-03-15  9:41       ` Chris Wilson
  2017-03-15 10:05         ` Tvrtko Ursulin
  0 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2017-03-15  9:41 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx, Mika Kuoppala

On Wed, Mar 15, 2017 at 08:14:04AM +0000, Tvrtko Ursulin wrote:
> 
> On 14/03/2017 21:33, Chris Wilson wrote:
> >On Tue, Mar 14, 2017 at 04:31:58PM +0000, Tvrtko Ursulin wrote:
> >>
> >>On 14/03/2017 09:34, Chris Wilson wrote:
> >>>}
> >>>
> >>>void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
> >>>diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> >>>index 73fe718516a5..5663ebab851f 100644
> >>>--- a/drivers/gpu/drm/i915/intel_engine_cs.c
> >>>+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> >>>@@ -191,6 +191,7 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
> >>>			goto cleanup;
> >>>		}
> >>>
> >>>+		engine->enable_submission(engine);
> >>
> >>Could be moved to intel_engines_setup_common if the
> >>logical_ring_setup was re-arranged a bit so that the default_vfuncs
> >>are assigned before it. Legacy looks like it would be alright with
> >>that approach already.
> >>
> >>My thinking here is not to expose this vfunc so prominently in the
> >>code since it is a bit of a low level internal implementation thing.
> >
> >The concern is reasonable, but equally moving it to
> >intel_engine_setup_common() is hairy. Otoh, I think it is suitable for
> >intel_engine_init_common(). Happy?
> 
> Why do you think it is hairy for intel_engine_setup_common? It keeps
> the setup/init split for lrc, where all vfuncs are initialized in
> the setup phase which was the intention.

At the moment, setup_common() has no dependencies. Having the callback
for engine->set_default_submission inside init_common() doesn't break
the pattern of all vfuncs being in setup_common() - if you no longer
regard the two vfuncs set by the callback as being part of that set.

In init_common() we already utilize the state set on the engine, the
callback seems consistent there.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: Restore engine->submit_request before unwedging
  2017-03-15  9:34     ` Chris Wilson
@ 2017-03-15 10:04       ` Tvrtko Ursulin
  0 siblings, 0 replies; 11+ messages in thread
From: Tvrtko Ursulin @ 2017-03-15 10:04 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx, Mika Kuoppala


On 15/03/2017 09:34, Chris Wilson wrote:
> On Wed, Mar 15, 2017 at 09:23:01AM +0000, Tvrtko Ursulin wrote:
>>
>> On 14/03/2017 09:34, Chris Wilson wrote:
>>> When we wedge the device, we override engine->submit_request with a nop
>>> to ensure that all in-flight requests are marked in error. However, igt
>>> would like to unwedge the device to test -EIO handling. This requires us
>>> to flush those in-flight requests and restore the original
>>> engine->submit_request.
>>>
>>> v2: Use a vfunc to unify enabling request submission to engines
>>> v3: Split new vfunc to a separate patch.
>>>
>>> Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests")
>>> Testcase: igt/gem_eio
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>>> ---
>>> drivers/gpu/drm/i915/i915_drv.c |  2 +-
>>> drivers/gpu/drm/i915/i915_drv.h |  1 +
>>> drivers/gpu/drm/i915/i915_gem.c | 51 +++++++++++++++++++++++++++++++++++++++++
>>> 3 files changed, 53 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
>>> index e312b61ba6bb..11d1066b673c 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.c
>>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>>> @@ -1822,7 +1822,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
>>> 		return;
>>>
>>> 	/* Clear any previous failed attempts at recovery. Time to try again. */
>>> -	__clear_bit(I915_WEDGED, &error->flags);
>>> +	i915_gem_unset_wedged(dev_priv);
>>> 	error->reset_count++;
>>>
>>> 	pr_notice("drm/i915: Resetting chip after gpu hang\n");
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>> index 48ff64812289..53a791d8d992 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -3406,6 +3406,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
>>> void i915_gem_reset(struct drm_i915_private *dev_priv);
>>> void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
>>> void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
>>> +void i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
>>>
>>> void i915_gem_init_mmio(struct drm_i915_private *i915);
>>> int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>>> index 202bb850f260..e06830916a05 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>> @@ -3000,6 +3000,57 @@ void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
>>> 	mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
>>> }
>>>
>>> +void i915_gem_unset_wedged(struct drm_i915_private *i915)
>>> +{
>>> +	struct i915_gem_timeline *tl;
>>> +	int i;
>>> +
>>> +	lockdep_assert_held(&i915->drm.struct_mutex);
>>> +	if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
>>> +		return;
>>> +
>>> +	/* Before unwedging, make sure that all pending operations
>>> +	 * are flushed and errored out. No more can be submitted until
>>> +	 * we reset the wedged bit.
>>> +	 */
>>> +	list_for_each_entry(tl, &i915->gt.timelines, link) {
>>> +		for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
>>> +			struct drm_i915_gem_request *rq;
>>> +
>>> +			rq = i915_gem_active_peek(&tl->engine[i].last_request,
>>> +						  &i915->drm.struct_mutex);
>>> +			if (!rq)
>>> +				continue;
>>> +
>>> +			/* We can't use our normal waiter as we want to
>>> +			 * avoid recursively trying to handle the current
>>> +			 * reset. The basic dma_fence_default_wait() installs
>>> +			 * a callback for dma_fence_signal(), which is
>>> +			 * triggered by our nop handler (indirectly, the
>>> +			 * callback enables the signaler thread which is
>>> +			 * woken by the nop_submit_request() advancing the seqno
>>> +			 * and when the seqno passes the fence, the signaler
>>> +			 * then signals the fence waking us up).
>>> +			 */
>>> +			dma_fence_default_wait(&rq->fence, false,
>>> +					       MAX_SCHEDULE_TIMEOUT);
>>> +		}
>>> +	}
>>> +
>>> +	/* Undo nop_submit_request. We prevent all new i915 requests from
>>> +	 * being queued (by disallowing execbuf whilst wedged) so having
>>> +	 * waited for all active requests above, we know the system is idle
>>> +	 * and do not have to worry about a thread being inside
>>> +	 * engine->submit_request() as we swap over. So unlike installing
>>> +	 * the nop_submit_request on reset, we can do this from normal
>>> +	 * context and do not require stop_machine().
>>> +	 */
>>> +	intel_engines_enable_submission(i915);
>>
>> So the point of the dma_fence_default_wait above is it to ensure all
>> nop_submit_request call backs have completed? I don't at the moment
>> understand how could there be such callbacks since unwedge is
>> happening after the wedge. So the wedge already installed the nop
>> handler, and by the time we get to another reset attempt, isn't it
>> already guaranteed all of those have exited?
>
> There's no such guarantee. The nop_submit_request() is to ensure that all
> third party driven requests are flushed and our requests are marked with
> dma_fence_set_error(-EIO) and not submitted to hw.

I was thinking that the above loop is only about the runnable requests 
but it's not. Okay, I think I get it now.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3 1/2] drm/i915: Move engine->submit_request selection to a vfunc
  2017-03-15  9:41       ` Chris Wilson
@ 2017-03-15 10:05         ` Tvrtko Ursulin
  0 siblings, 0 replies; 11+ messages in thread
From: Tvrtko Ursulin @ 2017-03-15 10:05 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx, Mika Kuoppala


On 15/03/2017 09:41, Chris Wilson wrote:
> On Wed, Mar 15, 2017 at 08:14:04AM +0000, Tvrtko Ursulin wrote:
>>
>> On 14/03/2017 21:33, Chris Wilson wrote:
>>> On Tue, Mar 14, 2017 at 04:31:58PM +0000, Tvrtko Ursulin wrote:
>>>>
>>>> On 14/03/2017 09:34, Chris Wilson wrote:
>>>>> }
>>>>>
>>>>> void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
>>>>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>> index 73fe718516a5..5663ebab851f 100644
>>>>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>> @@ -191,6 +191,7 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
>>>>> 			goto cleanup;
>>>>> 		}
>>>>>
>>>>> +		engine->enable_submission(engine);
>>>>
>>>> Could be moved to intel_engines_setup_common if the
>>>> logical_ring_setup was re-arranged a bit so that the default_vfuncs
>>>> are assigned before it. Legacy looks like it would be alright with
>>>> that approach already.
>>>>
>>>> My thinking here is not to expose this vfunc so prominently in the
>>>> code since it is a bit of a low level internal implementation thing.
>>>
>>> The concern is reasonable, but equally moving it to
>>> intel_engine_setup_common() is hairy. Otoh, I think it is suitable for
>>> intel_engine_init_common(). Happy?
>>
>> Why do you think it is hairy for intel_engine_setup_common? It keeps
>> the setup/init split for lrc, where all vfuncs are initialized in
>> the setup phase which was the intention.
>
> At the moment, setup_common() has no dependencies. Having the callback
> for engine->set_default_submission inside init_common() doesn't break
> the pattern of all vfuncs being in setup_common() - if you no longer
> regard the two vfuncs set by the callback as being part of that set.
>
> In init_common() we already utilize the state set on the engine, the
> callback seems consistent there.

Okay, you convinced me. :)

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2017-03-15 10:05 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-03-14  9:34 [PATCH v3 1/2] drm/i915: Move engine->submit_request selection to a vfunc Chris Wilson
2017-03-14  9:34 ` [PATCH v3 2/2] drm/i915: Restore engine->submit_request before unwedging Chris Wilson
2017-03-15  9:23   ` Tvrtko Ursulin
2017-03-15  9:34     ` Chris Wilson
2017-03-15 10:04       ` Tvrtko Ursulin
2017-03-14 16:17 ` ✓ Fi.CI.BAT: success for series starting with [v3,1/2] drm/i915: Move engine->submit_request selection to a vfunc Patchwork
2017-03-14 16:31 ` [PATCH v3 1/2] " Tvrtko Ursulin
2017-03-14 21:33   ` Chris Wilson
2017-03-15  8:14     ` Tvrtko Ursulin
2017-03-15  9:41       ` Chris Wilson
2017-03-15 10:05         ` Tvrtko Ursulin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox