All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/3] drm/i915: Seek only one guilty batch per hanged ring
Date: Fri, 17 Jan 2014 16:50:25 +0200	[thread overview]
Message-ID: <8761pizmq6.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <1389968431-24123-2-git-send-email-mika.kuoppala@intel.com>

Mika Kuoppala <mika.kuoppala@linux.intel.com> writes:

> Instead of going through all the requests to find a batch that
> hanged the machine, use hangcheck score and the fact that
> first noncompleted request on hanged ring is, with great
> probability, the guilty one. This also ensure that we get one
> guilty batch per hang instead of possibly more (for each ring)
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=73652

Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>

missing in here.
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c         |   19 ++++++++++---------
>  drivers/gpu/drm/i915/i915_irq.c         |    3 +--
>  drivers/gpu/drm/i915/intel_ringbuffer.h |    2 ++
>  3 files changed, 13 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index d270351..27a97c3 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2322,20 +2322,17 @@ static bool i915_context_is_banned(const struct i915_ctx_hang_stats *hs)
>  
>  static void i915_set_reset_status(struct intel_ring_buffer *ring,
>  				  struct drm_i915_gem_request *request,
> -				  u32 acthd)
> +				  u32 acthd, const bool guilty)
>  {
>  	struct i915_ctx_hang_stats *hs = NULL;
> -	bool inside, guilty;
> +	bool inside;
>  	unsigned long offset = 0;
>  
> -	/* Innocent until proven guilty */
> -	guilty = false;
> -
>  	if (request->batch_obj)
>  		offset = i915_gem_obj_offset(request->batch_obj,
>  					     request_to_vm(request));
>  
> -	if (ring->hangcheck.action != HANGCHECK_WAIT &&
> +	if (guilty &&
>  	    i915_request_guilty(request, acthd, &inside)) {
>  		DRM_DEBUG("%s hung %s bo (0x%lx ctx %d) at 0x%x\n",
>  			  ring->name,
> @@ -2343,8 +2340,6 @@ static void i915_set_reset_status(struct intel_ring_buffer *ring,
>  			  offset,
>  			  request->ctx ? request->ctx->id : 0,
>  			  acthd);
> -
> -		guilty = true;
>  	}
>  
>  	/* If contexts are disabled or this is the default context, use
> @@ -2383,12 +2378,18 @@ static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
>  	u32 completed_seqno = ring->get_seqno(ring, false);
>  	u32 acthd = intel_ring_get_active_head(ring);
>  	struct drm_i915_gem_request *request;
> +	bool guilty = false;
>  
>  	list_for_each_entry(request, &ring->request_list, list) {
>  		if (i915_seqno_passed(completed_seqno, request->seqno))
>  			continue;
>  
> -		i915_set_reset_status(ring, request, acthd);
> +		if (!guilty && ring->hangcheck.score >= HANGCHECK_SCORE_GUILTY) {
> +			guilty = true;
> +			i915_set_reset_status(ring, request, acthd, true);
> +		} else {
> +			i915_set_reset_status(ring, request, acthd, false);
> +		}
>  	}
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 6d11e25..e24f9ef 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2473,7 +2473,6 @@ static void i915_hangcheck_elapsed(unsigned long data)
>  #define BUSY 1
>  #define KICK 5
>  #define HUNG 20
> -#define FIRE 30
>  
>  	if (!i915_enable_hangcheck)
>  		return;
> @@ -2557,7 +2556,7 @@ static void i915_hangcheck_elapsed(unsigned long data)
>  	}
>  
>  	for_each_ring(ring, dev_priv, i) {
> -		if (ring->hangcheck.score > FIRE) {
> +		if (ring->hangcheck.score >= HANGCHECK_SCORE_GUILTY) {
>  			DRM_INFO("%s on %s\n",
>  				 stuck[i] ? "stuck" : "no progress",
>  				 ring->name);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 71a73f4..6018793 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -41,6 +41,8 @@ enum intel_ring_hangcheck_action {
>  	HANGCHECK_HUNG,
>  };
>  
> +#define HANGCHECK_SCORE_GUILTY 31
> +
>  struct intel_ring_hangcheck {
>  	bool deadlock;
>  	u32 seqno;
> -- 
> 1.7.9.5

  reply	other threads:[~2014-01-17 14:55 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-01-17 14:20 [PATCH 1/3] drm/i915: Tune down output when context is banned Mika Kuoppala
2014-01-17 14:20 ` [PATCH 2/3] drm/i915: Seek only one guilty batch per hanged ring Mika Kuoppala
2014-01-17 14:50   ` Mika Kuoppala [this message]
2014-01-26 18:49   ` Ben Widawsky
2014-01-29 15:20     ` Mika Kuoppala
2014-01-17 14:20 ` [PATCH 3/3] drm/i915: Get rid of acthd based batch search on reset stats Mika Kuoppala
2014-01-26 18:58   ` Ben Widawsky
2014-01-28 16:30     ` Rodrigo Vivi
2014-01-17 14:27 ` [PATCH 1/3] drm/i915: Tune down output when context is banned Chris Wilson
2014-01-22 15:41 ` [PATCH v2 1/3] drm/i915: Tune down debug " Mika Kuoppala
2014-01-26 18:17   ` Ben Widawsky
2014-01-29 15:28     ` Mika Kuoppala

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8761pizmq6.fsf@gaia.fi.intel.com \
    --to=mika.kuoppala@linux.intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.