All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 1/5] drm/i915: Print captured bo for all VM in	error state
Date: Wed, 13 Aug 2014 17:50:38 +0300	[thread overview]
Message-ID: <87lhqs1m0x.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <1407870351-6064-1-git-send-email-chris@chris-wilson.co.uk>

Chris Wilson <chris@chris-wilson.co.uk> writes:

> The current error state harks back to the era of just a single VM. For
> full-ppgtt, we capture every bo on every VM. It behoves us to then print
> every bo for every VM, which we currently fail to do and so miss vital
> information in the error state.
>
> v2: Use the vma address rather than -1!
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Offsets can collide between different vm areas.

If we add vm index also to the captured batchbuffer objects,
we could print it part of the offset '%d:0x%x' that would easily
identify vm and we would immediately see what vm was active on a ring.

-Mika

> ---
>  drivers/gpu/drm/i915/i915_drv.h       |  2 +
>  drivers/gpu/drm/i915/i915_gpu_error.c | 80 ++++++++++++++++++++++++-----------
>  2 files changed, 58 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 1bf2cea..e0dcd70 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -396,6 +396,7 @@ struct drm_i915_error_state {
>  		pid_t pid;
>  		char comm[TASK_COMM_LEN];
>  	} ring[I915_NUM_RINGS];
> +
>  	struct drm_i915_error_buffer {
>  		u32 size;
>  		u32 name;
> @@ -414,6 +415,7 @@ struct drm_i915_error_state {
>  	} **active_bo, **pinned_bo;
>  
>  	u32 *active_bo_count, *pinned_bo_count;
> +	u32 vm_count;
>  };
>  
>  struct intel_connector;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index fc11ac6..35e70d5 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -192,10 +192,10 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
>  				struct drm_i915_error_buffer *err,
>  				int count)
>  {
> -	err_printf(m, "%s [%d]:\n", name, count);
> +	err_printf(m, "  %s [%d]:\n", name, count);
>  
>  	while (count--) {
> -		err_printf(m, "  %08x %8u %02x %02x %x %x",
> +		err_printf(m, "    %08x %8u %02x %02x %x %x",
>  			   err->gtt_offset,
>  			   err->size,
>  			   err->read_domains,
> @@ -393,15 +393,17 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
>  		i915_ring_error_state(m, dev, &error->ring[i]);
>  	}
>  
> -	if (error->active_bo)
> +	for (i = 0; i < error->vm_count; i++) {
> +		err_printf(m, "vm[%d]\n", i);
> +
>  		print_error_buffers(m, "Active",
> -				    error->active_bo[0],
> -				    error->active_bo_count[0]);
> +				    error->active_bo[i],
> +				    error->active_bo_count[i]);
>  
> -	if (error->pinned_bo)
>  		print_error_buffers(m, "Pinned",
> -				    error->pinned_bo[0],
> -				    error->pinned_bo_count[0]);
> +				    error->pinned_bo[i],
> +				    error->pinned_bo_count[i]);
> +	}
>  
>  	for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
>  		obj = error->ring[i].batchbuffer;
> @@ -644,13 +646,15 @@ unwind:
>  				       (src)->base.size>>PAGE_SHIFT)
>  
>  static void capture_bo(struct drm_i915_error_buffer *err,
> -		       struct drm_i915_gem_object *obj)
> +		       struct i915_vma *vma)
>  {
> +	struct drm_i915_gem_object *obj = vma->obj;
> +
>  	err->size = obj->base.size;
>  	err->name = obj->base.name;
>  	err->rseqno = obj->last_read_seqno;
>  	err->wseqno = obj->last_write_seqno;
> -	err->gtt_offset = i915_gem_obj_ggtt_offset(obj);
> +	err->gtt_offset = vma->node.start;
>  	err->read_domains = obj->base.read_domains;
>  	err->write_domain = obj->base.write_domain;
>  	err->fence_reg = obj->fence_reg;
> @@ -674,7 +678,7 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err,
>  	int i = 0;
>  
>  	list_for_each_entry(vma, head, mm_list) {
> -		capture_bo(err++, vma->obj);
> +		capture_bo(err++, vma);
>  		if (++i == count)
>  			break;
>  	}
> @@ -683,21 +687,27 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err,
>  }
>  
>  static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
> -			     int count, struct list_head *head)
> +			     int count, struct list_head *head,
> +			     struct i915_address_space *vm)
>  {
>  	struct drm_i915_gem_object *obj;
> -	int i = 0;
> +	struct drm_i915_error_buffer * const first = err;
> +	struct drm_i915_error_buffer * const last = err + count;
>  
>  	list_for_each_entry(obj, head, global_list) {
> -		if (!i915_gem_obj_is_pinned(obj))
> -			continue;
> +		struct i915_vma *vma;
>  
> -		capture_bo(err++, obj);
> -		if (++i == count)
> +		if (err == last)
>  			break;
> +
> +		list_for_each_entry(vma, &obj->vma_list, vma_link)
> +			if (vma->vm == vm && vma->pin_count > 0) {
> +				capture_bo(err++, vma);
> +				break;
> +			}
>  	}
>  
> -	return i;
> +	return err - first;
>  }
>  
>  /* Generate a semi-unique error code. The code is not meant to have meaning, The
> @@ -1053,9 +1063,14 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
>  	list_for_each_entry(vma, &vm->active_list, mm_list)
>  		i++;
>  	error->active_bo_count[ndx] = i;
> -	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
> -		if (i915_gem_obj_is_pinned(obj))
> -			i++;
> +
> +	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> +		list_for_each_entry(vma, &obj->vma_list, vma_link)
> +			if (vma->vm == vm && vma->pin_count > 0) {
> +				i++;
> +				break;
> +			}
> +	}
>  	error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx];
>  
>  	if (i) {
> @@ -1074,7 +1089,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
>  		error->pinned_bo_count[ndx] =
>  			capture_pinned_bo(pinned_bo,
>  					  error->pinned_bo_count[ndx],
> -					  &dev_priv->mm.bound_list);
> +					  &dev_priv->mm.bound_list, vm);
>  	error->active_bo[ndx] = active_bo;
>  	error->pinned_bo[ndx] = pinned_bo;
>  }
> @@ -1095,8 +1110,25 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
>  	error->pinned_bo_count = kcalloc(cnt, sizeof(*error->pinned_bo_count),
>  					 GFP_ATOMIC);
>  
> -	list_for_each_entry(vm, &dev_priv->vm_list, global_link)
> -		i915_gem_capture_vm(dev_priv, error, vm, i++);
> +	if (error->active_bo == NULL ||
> +	    error->pinned_bo == NULL ||
> +	    error->active_bo_count == NULL ||
> +	    error->pinned_bo_count == NULL) {
> +		kfree(error->active_bo);
> +		kfree(error->active_bo_count);
> +		kfree(error->pinned_bo);
> +		kfree(error->pinned_bo_count);
> +
> +		error->active_bo = NULL;
> +		error->active_bo_count = NULL;
> +		error->pinned_bo = NULL;
> +		error->pinned_bo_count = NULL;
> +	} else {
> +		list_for_each_entry(vm, &dev_priv->vm_list, global_link)
> +			i915_gem_capture_vm(dev_priv, error, vm, i++);
> +
> +		error->vm_count = cnt;
> +	}
>  }
>  
>  /* Capture all registers which don't fit into another category. */
> -- 
> 1.9.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2014-08-13 14:51 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-08-12 19:05 [PATCH 1/5] drm/i915: Print captured bo for all VM in error state Chris Wilson
2014-08-12 19:05 ` [PATCH 2/5] drm/i915: Do not access stolen memory directly by the CPU, even for error capture Chris Wilson
2014-08-14 14:51   ` Mika Kuoppala
2014-08-14 19:35     ` Chris Wilson
2014-08-15 11:11   ` Mika Kuoppala
2014-08-15 18:07     ` Mika Kuoppala
2014-08-12 19:05 ` [PATCH 3/5] drm/i915: Remove num_pages parameter to i915_error_object_create() Chris Wilson
2014-08-15 18:07   ` Mika Kuoppala
2014-08-12 19:05 ` [PATCH 4/5] drm/i915: Suppress a WARN on reading an object back for a GPU hang Chris Wilson
2014-08-15 18:09   ` Mika Kuoppala
2014-08-25 21:27     ` Daniel Vetter
2014-08-12 19:05 ` [PATCH 5/5] drm/i915: s/seqno/request/ tracking inside objects Chris Wilson
2014-08-27  9:55   ` Daniel Vetter
2014-08-27 10:39     ` Chris Wilson
2014-09-02 10:06       ` John Harrison
2014-09-06  9:12         ` Chris Wilson
2014-08-13 14:50 ` Mika Kuoppala [this message]
2014-08-14  6:50   ` [PATCH 1/5] drm/i915: Print captured bo for all VM in error state Chris Wilson
2014-08-14 10:18     ` Mika Kuoppala
2014-08-14 15:03       ` Daniel Vetter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87lhqs1m0x.fsf@gaia.fi.intel.com \
    --to=mika.kuoppala@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.