public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
From: Arun Siluvery <arun.siluvery@linux.intel.com>
To: Mika Kuoppala <mika.kuoppala@linux.intel.com>,
	intel-gfx@lists.freedesktop.org
Cc: miku@iki.fi
Subject: Re: [PATCH] drm/i915: Inspect subunit states on hangcheck
Date: Tue, 1 Dec 2015 12:56:55 +0000	[thread overview]
Message-ID: <565D9917.7020006@linux.intel.com> (raw)
In-Reply-To: <1448972245-13236-1-git-send-email-mika.kuoppala@intel.com>

On 01/12/2015 12:17, Mika Kuoppala wrote:
> If head seems stuck and engine in question is rcs,
> inspect subunit state transitions before deciding that
> this really is a hang instead of limited progress.
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=93029
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Dave Gordon <david.s.gordon@intel.com>
> Cc: Daniel Vetter <daniel@ffwll.ch>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_irq.c         | 49 +++++++++++++++++++++++++++++----
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  1 +
>   2 files changed, 45 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index e88d692..e6ae54f 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2913,13 +2913,31 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
>   		ring->hangcheck.deadlock = 0;
>   }
>
> -static enum intel_ring_hangcheck_action
> -ring_stuck(struct intel_engine_cs *ring, u64 acthd)
> +static bool subunits_stuck(struct intel_engine_cs *ring)
>   {
> -	struct drm_device *dev = ring->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	u32 tmp;
> +	int i;
> +	u32 instdone[I915_NUM_INSTDONE_REG];
> +	bool stuck;
> +
> +	if (ring->id != RCS)
> +		return true;
> +
> +	i915_get_extra_instdone(ring->dev, instdone);
>
> +	stuck = true;
> +	for (i = 0; i < I915_NUM_INSTDONE_REG; i++) {
> +		if (instdone[i] != ring->hangcheck.instdone[i])
> +			stuck = false;

This may not be completely reliable. Tomas Elf in his TDR tests observed 
that instdone kept changing even when CS is hung and in a stable state.

regards
Arun

> +
> +		ring->hangcheck.instdone[i] = instdone[i];
> +	}
> +
> +	return stuck;
> +}
> +
> +static enum intel_ring_hangcheck_action
> +head_stuck(struct intel_engine_cs *ring, u64 acthd)
> +{
>   	if (acthd != ring->hangcheck.acthd) {
>   		if (acthd > ring->hangcheck.max_acthd) {
>   			ring->hangcheck.max_acthd = acthd;
> @@ -2929,6 +2947,24 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd)
>   		return HANGCHECK_ACTIVE_LOOP;
>   	}
>
> +	if (!subunits_stuck(ring))
> +		return HANGCHECK_ACTIVE_LOOP;
> +
> +	return HANGCHECK_HUNG;
> +}
> +
> +static enum intel_ring_hangcheck_action
> +ring_stuck(struct intel_engine_cs *ring, u64 acthd)
> +{
> +	struct drm_device *dev = ring->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	enum intel_ring_hangcheck_action ha;
> +	u32 tmp;
> +
> +	ha = head_stuck(ring, acthd);
> +	if (ha != HANGCHECK_HUNG)
> +		return ha;
> +
>   	if (IS_GEN2(dev))
>   		return HANGCHECK_HUNG;
>
> @@ -3064,6 +3100,9 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>   				ring->hangcheck.score--;
>
>   			ring->hangcheck.acthd = ring->hangcheck.max_acthd = 0;
> +
> +			memset(ring->hangcheck.instdone, 0,
> +			       sizeof(ring->hangcheck.instdone));
>   		}
>
>   		ring->hangcheck.seqno = seqno;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 5d1eb20..b8fe92e 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -93,6 +93,7 @@ struct intel_ring_hangcheck {
>   	int score;
>   	enum intel_ring_hangcheck_action action;
>   	int deadlock;
> +	u32 instdone[I915_NUM_INSTDONE_REG];
>   };
>
>   struct intel_ringbuffer {
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2015-12-01 12:56 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-12-01 12:17 [PATCH] drm/i915: Inspect subunit states on hangcheck Mika Kuoppala
2015-12-01 12:31 ` Chris Wilson
2015-12-01 12:56 ` Arun Siluvery [this message]
2015-12-01 12:58 ` Mika Kuoppala
2015-12-01 15:56 ` Mika Kuoppala
2015-12-10 13:54   ` Chris Wilson
2015-12-10 16:30     ` Mika Kuoppala
2016-01-08 14:54     ` Mika Kuoppala
2016-01-08 15:10       ` Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=565D9917.7020006@linux.intel.com \
    --to=arun.siluvery@linux.intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=mika.kuoppala@linux.intel.com \
    --cc=miku@iki.fi \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox