All of lore.kernel.org
 help / color / mirror / Atom feed
From: Arun Siluvery <arun.siluvery@linux.intel.com>
To: Mika Kuoppala <mika.kuoppala@linux.intel.com>,
	intel-gfx@lists.freedesktop.org
Cc: miku@iki.fi
Subject: Re: [PATCH] drm/i915: Inspect subunit states on hangcheck
Date: Tue, 1 Dec 2015 12:56:55 +0000	[thread overview]
Message-ID: <565D9917.7020006@linux.intel.com> (raw)
In-Reply-To: <1448972245-13236-1-git-send-email-mika.kuoppala@intel.com>

On 01/12/2015 12:17, Mika Kuoppala wrote:
> If head seems stuck and engine in question is rcs,
> inspect subunit state transitions before deciding that
> this really is a hang instead of limited progress.
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=93029
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Dave Gordon <david.s.gordon@intel.com>
> Cc: Daniel Vetter <daniel@ffwll.ch>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_irq.c         | 49 +++++++++++++++++++++++++++++----
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  1 +
>   2 files changed, 45 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index e88d692..e6ae54f 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2913,13 +2913,31 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
>   		ring->hangcheck.deadlock = 0;
>   }
>
> -static enum intel_ring_hangcheck_action
> -ring_stuck(struct intel_engine_cs *ring, u64 acthd)
> +static bool subunits_stuck(struct intel_engine_cs *ring)
>   {
> -	struct drm_device *dev = ring->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	u32 tmp;
> +	int i;
> +	u32 instdone[I915_NUM_INSTDONE_REG];
> +	bool stuck;
> +
> +	if (ring->id != RCS)
> +		return true;
> +
> +	i915_get_extra_instdone(ring->dev, instdone);
>
> +	stuck = true;
> +	for (i = 0; i < I915_NUM_INSTDONE_REG; i++) {
> +		if (instdone[i] != ring->hangcheck.instdone[i])
> +			stuck = false;

This may not be completely reliable. Tomas Elf in his TDR tests observed 
that instdone kept changing even when CS is hung and in a stable state.

regards
Arun

> +
> +		ring->hangcheck.instdone[i] = instdone[i];
> +	}
> +
> +	return stuck;
> +}
> +
> +static enum intel_ring_hangcheck_action
> +head_stuck(struct intel_engine_cs *ring, u64 acthd)
> +{
>   	if (acthd != ring->hangcheck.acthd) {
>   		if (acthd > ring->hangcheck.max_acthd) {
>   			ring->hangcheck.max_acthd = acthd;
> @@ -2929,6 +2947,24 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd)
>   		return HANGCHECK_ACTIVE_LOOP;
>   	}
>
> +	if (!subunits_stuck(ring))
> +		return HANGCHECK_ACTIVE_LOOP;
> +
> +	return HANGCHECK_HUNG;
> +}
> +
> +static enum intel_ring_hangcheck_action
> +ring_stuck(struct intel_engine_cs *ring, u64 acthd)
> +{
> +	struct drm_device *dev = ring->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	enum intel_ring_hangcheck_action ha;
> +	u32 tmp;
> +
> +	ha = head_stuck(ring, acthd);
> +	if (ha != HANGCHECK_HUNG)
> +		return ha;
> +
>   	if (IS_GEN2(dev))
>   		return HANGCHECK_HUNG;
>
> @@ -3064,6 +3100,9 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>   				ring->hangcheck.score--;
>
>   			ring->hangcheck.acthd = ring->hangcheck.max_acthd = 0;
> +
> +			memset(ring->hangcheck.instdone, 0,
> +			       sizeof(ring->hangcheck.instdone));
>   		}
>
>   		ring->hangcheck.seqno = seqno;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 5d1eb20..b8fe92e 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -93,6 +93,7 @@ struct intel_ring_hangcheck {
>   	int score;
>   	enum intel_ring_hangcheck_action action;
>   	int deadlock;
> +	u32 instdone[I915_NUM_INSTDONE_REG];
>   };
>
>   struct intel_ringbuffer {
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2015-12-01 12:56 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-12-01 12:17 [PATCH] drm/i915: Inspect subunit states on hangcheck Mika Kuoppala
2015-12-01 12:31 ` Chris Wilson
2015-12-01 12:56 ` Arun Siluvery [this message]
2015-12-01 12:58 ` Mika Kuoppala
2015-12-01 15:56 ` Mika Kuoppala
2015-12-10 13:54   ` Chris Wilson
2015-12-10 16:30     ` Mika Kuoppala
2016-01-08 14:54     ` Mika Kuoppala
2016-01-08 15:10       ` Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=565D9917.7020006@linux.intel.com \
    --to=arun.siluvery@linux.intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=mika.kuoppala@linux.intel.com \
    --cc=miku@iki.fi \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.