From: "Ville Syrjälä" <ville.syrjala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>
Cc: intel-gfx@lists.freedesktop.org, Ben Widawsky <ben@bwidawsk.net>,
stable@vger.kernel.org
Subject: Re: [PATCH] drm/i915: Decouple GPU error reporting from ring initialisation
Date: Mon, 27 Jan 2014 16:05:24 +0200 [thread overview]
Message-ID: <20140127140524.GT9454@intel.com> (raw)
In-Reply-To: <1390830754-952-1-git-send-email-chris@chris-wilson.co.uk>
On Mon, Jan 27, 2014 at 01:52:34PM +0000, Chris Wilson wrote:
> Currently we report through our error state only the rings that have
> been initialised (as detected by ring->obj). This check is done after
> the GPU reset and ring re-initialisation, which means that the software
> state may not be the same as when we captured the hardware error and we
> may not print out any of the vital information for debugging the hang.
>
> This (and the implied object leak) is a regression from
>
> commit 3d57e5bd1284f44e325f3a52d966259ed42f9e05
> Author: Ben Widawsky <ben@bwidawsk.net>
> Date: Mon Oct 14 10:01:36 2013 -0700
>
> drm/i915: Do a fuller init after reset
>
> Note that we are already starting to get bug reports with incomplete
> error states from 3.13.
>
> v2: Prevent a NULL dereference on 830gm/845g after a GPU reset where
> the scratch obj may be NULL.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ben Widawsky <ben@bwidawsk.net>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=74094
> Cc: stable@vger.kernel.org
Looks OK to me.
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 1 +
> drivers/gpu/drm/i915/i915_gpu_error.c | 22 +++++++++++++++-------
> 2 files changed, 16 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2e6c67d944eb..0249c9aa345a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -335,6 +335,7 @@ struct drm_i915_error_state {
> struct timeval time;
>
> struct drm_i915_error_ring {
> + bool valid;
> struct drm_i915_error_object {
> int page_count;
> u32 gtt_offset;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 6832473bc386..96e945c3d44f 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -240,6 +240,9 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
> unsigned ring)
> {
> BUG_ON(ring >= I915_NUM_RINGS); /* shut up confused gcc */
> + if (!error->ring[ring].valid)
> + return;
> +
> err_printf(m, "%s command stream:\n", ring_str(ring));
> err_printf(m, " HEAD: 0x%08x\n", error->head[ring]);
> err_printf(m, " TAIL: 0x%08x\n", error->tail[ring]);
> @@ -295,7 +298,6 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
> struct drm_device *dev = error_priv->dev;
> drm_i915_private_t *dev_priv = dev->dev_private;
> struct drm_i915_error_state *error = error_priv->error;
> - struct intel_ring_buffer *ring;
> int i, j, page, offset, elt;
>
> if (!error) {
> @@ -330,7 +332,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
> if (INTEL_INFO(dev)->gen == 7)
> err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
>
> - for_each_ring(ring, dev_priv, i)
> + for (i = 0; i < ARRAY_SIZE(error->ring); i++)
> i915_ring_error_state(m, dev, error, i);
>
> for (i = 0; i < error->vm_count; i++) {
> @@ -405,8 +407,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
> }
> }
>
> - obj = error->ring[i].ctx;
> - if (obj) {
> + if ((obj = error->ring[i].ctx)) {
> err_printf(m, "%s --- HW Context = 0x%08x\n",
> dev_priv->ring[i].name,
> obj->gtt_offset);
> @@ -730,7 +731,8 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
> return NULL;
>
> obj = ring->scratch.obj;
> - if (acthd >= i915_gem_obj_ggtt_offset(obj) &&
> + if (obj != NULL &&
> + acthd >= i915_gem_obj_ggtt_offset(obj) &&
> acthd < i915_gem_obj_ggtt_offset(obj) + obj->base.size)
> return i915_error_ggtt_object_create(dev_priv, obj);
> }
> @@ -875,11 +877,17 @@ static void i915_gem_record_rings(struct drm_device *dev,
> struct drm_i915_error_state *error)
> {
> struct drm_i915_private *dev_priv = dev->dev_private;
> - struct intel_ring_buffer *ring;
> struct drm_i915_gem_request *request;
> int i, count;
>
> - for_each_ring(ring, dev_priv, i) {
> + for (i = 0; i < I915_NUM_RINGS; i++) {
> + struct intel_ring_buffer *ring = &dev_priv->ring[i];
> +
> + if (ring->dev == NULL)
> + continue;
> +
> + error->ring[i].valid = true;
> +
> i915_record_ring_state(dev, error, ring);
>
> error->ring[i].batchbuffer =
> --
> 1.8.5.3
--
Ville Syrjälä
Intel OTC
next prev parent reply other threads:[~2014-01-27 14:05 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-01-23 21:49 [PATCH] drm/i915: Decouple GPU error reporting from ring initialisation Chris Wilson
2014-01-24 11:50 ` Ville Syrjälä
2014-01-24 11:55 ` Chris Wilson
2014-01-24 12:06 ` Ville Syrjälä
2014-01-27 13:52 ` Chris Wilson
2014-01-27 14:05 ` Ville Syrjälä [this message]
2014-01-27 16:13 ` [Intel-gfx] " Daniel Vetter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20140127140524.GT9454@intel.com \
--to=ville.syrjala@linux.intel.com \
--cc=ben@bwidawsk.net \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox