All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 1/7] drm/i915/selftests: Flush old resets between engines
Date: Mon, 05 Feb 2018 16:27:35 +0200	[thread overview]
Message-ID: <87372fv7e0.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20180205092201.19476-1-chris@chris-wilson.co.uk>

Chris Wilson <chris@chris-wilson.co.uk> writes:

> When injecting rapid resets, we must be careful to at least wait for the
> previous reset to have taken effect and the engine restarted. If we
> perform a second reset before that has happened, we will notice that the
> engine hasn't recovered and declare it lost, wedging the device and
> failing. In practice, since we wait for each hanging batch to start
> before injecting the reset, this too-fast-reset condition can only be
> triggered when moving onto the next engine in the test, so we need only
> wait for the existing reset to complete before switching engines.
>
> v2: Wrap up the wait inside a safety net to bail out in case of angry hw.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Michel Thierry <michel.thierry@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 65 ++++++++++++++++++++++--
>  1 file changed, 62 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> index d1f91a533afa..a4f4ff22389b 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> @@ -244,6 +244,57 @@ static u32 hws_seqno(const struct hang *h,
>  	return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]);
>  }
>  
> +struct wedge_me {
> +	struct delayed_work work;
> +	struct drm_i915_private *i915;
> +	const void *symbol;
> +};
> +
> +static void wedge_me(struct work_struct *work)
> +{
> +	struct wedge_me *w = container_of(work, typeof(*w), work.work);
> +
> +	pr_err("%pS timed out, cancelling all further testing.\n",
> +	       w->symbol);
> +	i915_gem_set_wedged(w->i915);
> +}
> +
> +static void __init_wedge(struct wedge_me *w,
> +			 struct drm_i915_private *i915,
> +			 long timeout,
> +			 const void *symbol)
> +{
> +	w->i915 = i915;
> +	w->symbol = symbol;
> +
> +	INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me);
> +	schedule_delayed_work(&w->work, timeout);
> +}
> +
> +static void __fini_wedge(struct wedge_me *w)
> +{
> +	cancel_delayed_work_sync(&w->work);
> +	destroy_delayed_work_on_stack(&w->work);
> +	w->i915 = NULL;
> +}
> +
> +#define wedge_on_timeout(W, DEV, TIMEOUT)				\
> +	for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \
> +	     (W)->i915;							\
> +	     __fini_wedge((W)))
> +
> +static int flush_test(struct drm_i915_private *i915, unsigned int flags)
> +{
> +	struct wedge_me w;
> +
> +	cond_resched();
> +
> +	wedge_on_timeout(&w, i915, HZ)
> +		i915_gem_wait_for_idle(i915, flags);
> +
> +	return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0;
> +}
> +
>  static void hang_fini(struct hang *h)
>  {
>  	*h->batch = MI_BATCH_BUFFER_END;
> @@ -255,7 +306,7 @@ static void hang_fini(struct hang *h)
>  	i915_gem_object_unpin_map(h->hws);
>  	i915_gem_object_put(h->hws);
>  
> -	i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED);
> +	flush_test(h->i915, I915_WAIT_LOCKED);
>  }
>  
>  static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq)
> @@ -487,7 +538,9 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
>  		if (err)
>  			break;
>  
> -		cond_resched();
> +		err = flush_test(i915, 0);
> +		if (err)
> +			break;
>  	}
>  
>  	if (i915_terminally_wedged(&i915->gpu_error))
> @@ -726,7 +779,9 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915,
>  		if (err)
>  			break;
>  
> -		cond_resched();
> +		err = flush_test(i915, 0);
> +		if (err)
> +			break;
>  	}
>  
>  	if (i915_terminally_wedged(&i915->gpu_error))
> @@ -952,6 +1007,10 @@ static int igt_reset_queue(void *arg)
>  		i915_gem_chipset_flush(i915);
>  
>  		i915_gem_request_put(prev);
> +
> +		err = flush_test(i915, I915_WAIT_LOCKED);
> +		if (err)
> +			break;
>  	}
>  
>  fini:
> -- 
> 2.15.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

      parent reply	other threads:[~2018-02-05 14:28 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-02-05  9:21 [PATCH 1/7] drm/i915/selftests: Flush old resets between engines Chris Wilson
2018-02-05  9:21 ` [PATCH 2/7] drm/i915/selftests: Use a sacrificial context for hang testing Chris Wilson
2018-02-05 14:02   ` Mika Kuoppala
2018-02-05 14:06     ` Chris Wilson
2018-02-05  9:21 ` [PATCH 3/7] drm/i915/execlists: Move the reset bits to a more natural home Chris Wilson
2018-02-05 15:19   ` Mika Kuoppala
2018-02-05  9:21 ` [PATCH 4/7] drm/i915: Skip post-reset request emission if the engine is not idle Chris Wilson
2018-02-05  9:21 ` [PATCH 5/7] drm/i915: Show the GPU state when declaring wedged Chris Wilson
2018-02-05  9:51   ` Mika Kuoppala
2018-02-05 10:02     ` Chris Wilson
2018-02-05  9:22 ` [PATCH 6/7] drm/i915/execlists: Remove the startup spam Chris Wilson
2018-02-05  9:22 ` [PATCH 7/7] drm/i915: Remove unbannable context spam from reset Chris Wilson
2018-02-05  9:30   ` Chris Wilson
2018-02-05 13:27     ` Chris Wilson
2018-02-05  9:58 ` ✗ Fi.CI.BAT: warning for series starting with [1/7] drm/i915/selftests: Flush old resets between engines Patchwork
2018-02-05 14:27 ` Mika Kuoppala [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87372fv7e0.fsf@gaia.fi.intel.com \
    --to=mika.kuoppala@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.