From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 7/8] drm/i915: Serialise resets with wedging
Date: Fri, 08 Feb 2019 16:30:17 +0200 [thread overview]
Message-ID: <87bm3m2w06.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20190207071829.5574-7-chris@chris-wilson.co.uk>
Chris Wilson <chris@chris-wilson.co.uk> writes:
> Prevent concurrent set-wedge with ongoing resets (and vice versa) by
> taking the same wedge_mutex around both operations.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_reset.c | 68 ++++++++++++++++++-------------
> 1 file changed, 40 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
> index c4fcb450bd80..9494b015185a 100644
> --- a/drivers/gpu/drm/i915/i915_reset.c
> +++ b/drivers/gpu/drm/i915/i915_reset.c
> @@ -794,17 +794,14 @@ static void nop_submit_request(struct i915_request *request)
> intel_engine_queue_breadcrumbs(engine);
> }
>
> -void i915_gem_set_wedged(struct drm_i915_private *i915)
> +static void __i915_gem_set_wedged(struct drm_i915_private *i915)
> {
> struct i915_gpu_error *error = &i915->gpu_error;
> struct intel_engine_cs *engine;
> enum intel_engine_id id;
>
> - mutex_lock(&error->wedge_mutex);
> - if (test_bit(I915_WEDGED, &error->flags)) {
> - mutex_unlock(&error->wedge_mutex);
> + if (test_bit(I915_WEDGED, &error->flags))
> return;
> - }
>
> if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) {
> struct drm_printer p = drm_debug_printer(__func__);
> @@ -853,12 +850,18 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
> set_bit(I915_WEDGED, &error->flags);
>
> GEM_TRACE("end\n");
> - mutex_unlock(&error->wedge_mutex);
> +}
>
> - wake_up_all(&error->reset_queue);
> +void i915_gem_set_wedged(struct drm_i915_private *i915)
> +{
> + struct i915_gpu_error *error = &i915->gpu_error;
> +
> + mutex_lock(&error->wedge_mutex);
> + __i915_gem_set_wedged(i915);
> + mutex_unlock(&error->wedge_mutex);
> }
>
> -bool i915_gem_unset_wedged(struct drm_i915_private *i915)
> +static bool __i915_gem_unset_wedged(struct drm_i915_private *i915)
> {
> struct i915_gpu_error *error = &i915->gpu_error;
> struct i915_timeline *tl;
> @@ -869,8 +872,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
> if (!i915->gt.scratch) /* Never full initialised, recovery impossible */
> return false;
>
> - mutex_lock(&error->wedge_mutex);
> -
> GEM_TRACE("start\n");
>
> /*
> @@ -921,11 +922,21 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
> smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
> clear_bit(I915_WEDGED, &i915->gpu_error.flags);
>
> - mutex_unlock(&i915->gpu_error.wedge_mutex);
> -
> return true;
> }
>
> +bool i915_gem_unset_wedged(struct drm_i915_private *i915)
> +{
> + struct i915_gpu_error *error = &i915->gpu_error;
> + bool result;
> +
> + mutex_lock(&error->wedge_mutex);
> + result = __i915_gem_unset_wedged(i915);
> + mutex_unlock(&error->wedge_mutex);
> +
> + return result;
> +}
> +
> static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
> {
> int err, i;
> @@ -975,7 +986,7 @@ void i915_reset(struct drm_i915_private *i915,
> GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
>
> /* Clear any previous failed attempts at recovery. Time to try again. */
> - if (!i915_gem_unset_wedged(i915))
> + if (!__i915_gem_unset_wedged(i915))
> return;
>
> if (reason)
> @@ -1037,7 +1048,7 @@ void i915_reset(struct drm_i915_private *i915,
> */
> add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
> error:
> - i915_gem_set_wedged(i915);
> + __i915_gem_set_wedged(i915);
> goto finish;
> }
>
> @@ -1129,7 +1140,9 @@ static void i915_reset_device(struct drm_i915_private *i915,
> i915_wedge_on_timeout(&w, i915, 5 * HZ) {
> intel_prepare_reset(i915);
>
> + mutex_lock(&error->wedge_mutex);
> i915_reset(i915, engine_mask, reason);
> + mutex_unlock(&error->wedge_mutex);
>
> intel_finish_reset(i915);
> }
> @@ -1197,6 +1210,7 @@ void i915_handle_error(struct drm_i915_private *i915,
> unsigned long flags,
> const char *fmt, ...)
> {
> + struct i915_gpu_error *error = &i915->gpu_error;
> struct intel_engine_cs *engine;
> intel_wakeref_t wakeref;
> unsigned int tmp;
> @@ -1233,20 +1247,19 @@ void i915_handle_error(struct drm_i915_private *i915,
> * Try engine reset when available. We fall back to full reset if
> * single reset fails.
> */
> - if (intel_has_reset_engine(i915) &&
> - !i915_terminally_wedged(&i915->gpu_error)) {
> + if (intel_has_reset_engine(i915) && !i915_terminally_wedged(error)) {
> for_each_engine_masked(engine, i915, engine_mask, tmp) {
> BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
> if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
> - &i915->gpu_error.flags))
> + &error->flags))
> continue;
>
> if (i915_reset_engine(engine, msg) == 0)
> engine_mask &= ~intel_engine_flag(engine);
>
> clear_bit(I915_RESET_ENGINE + engine->id,
> - &i915->gpu_error.flags);
> - wake_up_bit(&i915->gpu_error.flags,
> + &error->flags);
> + wake_up_bit(&error->flags,
> I915_RESET_ENGINE + engine->id);
> }
> }
> @@ -1255,10 +1268,9 @@ void i915_handle_error(struct drm_i915_private *i915,
> goto out;
>
> /* Full reset needs the mutex, stop any other user trying to do so. */
> - if (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags)) {
> - wait_event(i915->gpu_error.reset_queue,
> - !test_bit(I915_RESET_BACKOFF,
> - &i915->gpu_error.flags));
> + if (test_and_set_bit(I915_RESET_BACKOFF, &error->flags)) {
> + wait_event(error->reset_queue,
> + !test_bit(I915_RESET_BACKOFF, &error->flags));
> goto out; /* piggy-back on the other reset */
> }
>
> @@ -1268,8 +1280,8 @@ void i915_handle_error(struct drm_i915_private *i915,
> /* Prevent any other reset-engine attempt. */
> for_each_engine(engine, i915, tmp) {
> while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
> - &i915->gpu_error.flags))
> - wait_on_bit(&i915->gpu_error.flags,
> + &error->flags))
> + wait_on_bit(&error->flags,
> I915_RESET_ENGINE + engine->id,
> TASK_UNINTERRUPTIBLE);
> }
> @@ -1278,11 +1290,11 @@ void i915_handle_error(struct drm_i915_private *i915,
>
> for_each_engine(engine, i915, tmp) {
> clear_bit(I915_RESET_ENGINE + engine->id,
> - &i915->gpu_error.flags);
> + &error->flags);
> }
>
> - clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
> - wake_up_all(&i915->gpu_error.reset_queue);
> + clear_bit(I915_RESET_BACKOFF, &error->flags);
> + wake_up_all(&error->reset_queue);
>
> out:
> intel_runtime_pm_put(i915, wakeref);
> --
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2019-02-08 14:30 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-02-07 7:18 [PATCH 1/8] drm/i915: Hack and slash, throttle execbuffer hogs Chris Wilson
2019-02-07 7:18 ` [PATCH 2/8] drm/i915: Defer removing fence register tracking to rpm wakeup Chris Wilson
2019-02-07 13:22 ` Mika Kuoppala
2019-02-07 13:38 ` Chris Wilson
2019-02-07 14:09 ` Mika Kuoppala
2019-02-07 14:13 ` Chris Wilson
2019-02-07 7:18 ` [PATCH 3/8] drm/i915: Revoke mmaps and prevent access to fence registers across reset Chris Wilson
2019-02-07 15:05 ` Mika Kuoppala
2019-02-07 7:18 ` [PATCH 4/8] drm/i915: Force the GPU reset upon wedging Chris Wilson
2019-02-08 9:31 ` Mika Kuoppala
2019-02-08 9:47 ` Chris Wilson
2019-02-07 7:18 ` [PATCH 5/8] drm/i915: Uninterruptibly drain the timelines on unwedging Chris Wilson
2019-02-08 9:46 ` Mika Kuoppala
2019-02-08 10:00 ` Chris Wilson
2019-02-08 15:07 ` Mika Kuoppala
2019-02-08 15:13 ` Chris Wilson
2019-02-07 7:18 ` [PATCH 6/8] drm/i915: Wait for old resets before applying debugfs/i915_wedged Chris Wilson
2019-02-08 9:56 ` Mika Kuoppala
2019-02-08 10:01 ` Chris Wilson
2019-02-07 7:18 ` [PATCH 7/8] drm/i915: Serialise resets with wedging Chris Wilson
2019-02-08 14:30 ` Mika Kuoppala [this message]
2019-02-07 7:18 ` [PATCH 8/8] drm/i915: Don't claim an unstarted request was guilty Chris Wilson
2019-02-07 7:41 ` [PATCH] " Chris Wilson
2019-02-08 14:47 ` Mika Kuoppala
2019-02-08 14:58 ` Chris Wilson
2019-02-08 15:31 ` Mika Kuoppala
2019-02-07 8:08 ` ✗ Fi.CI.SPARSE: warning for series starting with [1/8] drm/i915: Hack and slash, throttle execbuffer hogs (rev2) Patchwork
2019-02-07 8:25 ` ✓ Fi.CI.BAT: success " Patchwork
2019-02-07 9:53 ` ✓ Fi.CI.IGT: " Patchwork
2019-02-07 16:01 ` [PATCH 1/8] drm/i915: Hack and slash, throttle execbuffer hogs Joonas Lahtinen
2019-02-07 16:05 ` Chris Wilson
2019-02-07 16:21 ` Chris Wilson
-- strict thread matches above, loose matches on Subject: below --
2019-02-06 17:11 Chris Wilson
2019-02-06 17:11 ` [PATCH 7/8] drm/i915: Serialise resets with wedging Chris Wilson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87bm3m2w06.fsf@gaia.fi.intel.com \
--to=mika.kuoppala@linux.intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox