From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 1/5] drm/i915: Print captured bo for all VM in error state
Date: Wed, 13 Aug 2014 17:50:38 +0300 [thread overview]
Message-ID: <87lhqs1m0x.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <1407870351-6064-1-git-send-email-chris@chris-wilson.co.uk>
Chris Wilson <chris@chris-wilson.co.uk> writes:
> The current error state harks back to the era of just a single VM. For
> full-ppgtt, we capture every bo on every VM. It behoves us to then print
> every bo for every VM, which we currently fail to do and so miss vital
> information in the error state.
>
> v2: Use the vma address rather than -1!
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Offsets can collide between different vm areas.
If we add vm index also to the captured batchbuffer objects,
we could print it part of the offset '%d:0x%x' that would easily
identify vm and we would immediately see what vm was active on a ring.
-Mika
> ---
> drivers/gpu/drm/i915/i915_drv.h | 2 +
> drivers/gpu/drm/i915/i915_gpu_error.c | 80 ++++++++++++++++++++++++-----------
> 2 files changed, 58 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 1bf2cea..e0dcd70 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -396,6 +396,7 @@ struct drm_i915_error_state {
> pid_t pid;
> char comm[TASK_COMM_LEN];
> } ring[I915_NUM_RINGS];
> +
> struct drm_i915_error_buffer {
> u32 size;
> u32 name;
> @@ -414,6 +415,7 @@ struct drm_i915_error_state {
> } **active_bo, **pinned_bo;
>
> u32 *active_bo_count, *pinned_bo_count;
> + u32 vm_count;
> };
>
> struct intel_connector;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index fc11ac6..35e70d5 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -192,10 +192,10 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
> struct drm_i915_error_buffer *err,
> int count)
> {
> - err_printf(m, "%s [%d]:\n", name, count);
> + err_printf(m, " %s [%d]:\n", name, count);
>
> while (count--) {
> - err_printf(m, " %08x %8u %02x %02x %x %x",
> + err_printf(m, " %08x %8u %02x %02x %x %x",
> err->gtt_offset,
> err->size,
> err->read_domains,
> @@ -393,15 +393,17 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
> i915_ring_error_state(m, dev, &error->ring[i]);
> }
>
> - if (error->active_bo)
> + for (i = 0; i < error->vm_count; i++) {
> + err_printf(m, "vm[%d]\n", i);
> +
> print_error_buffers(m, "Active",
> - error->active_bo[0],
> - error->active_bo_count[0]);
> + error->active_bo[i],
> + error->active_bo_count[i]);
>
> - if (error->pinned_bo)
> print_error_buffers(m, "Pinned",
> - error->pinned_bo[0],
> - error->pinned_bo_count[0]);
> + error->pinned_bo[i],
> + error->pinned_bo_count[i]);
> + }
>
> for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
> obj = error->ring[i].batchbuffer;
> @@ -644,13 +646,15 @@ unwind:
> (src)->base.size>>PAGE_SHIFT)
>
> static void capture_bo(struct drm_i915_error_buffer *err,
> - struct drm_i915_gem_object *obj)
> + struct i915_vma *vma)
> {
> + struct drm_i915_gem_object *obj = vma->obj;
> +
> err->size = obj->base.size;
> err->name = obj->base.name;
> err->rseqno = obj->last_read_seqno;
> err->wseqno = obj->last_write_seqno;
> - err->gtt_offset = i915_gem_obj_ggtt_offset(obj);
> + err->gtt_offset = vma->node.start;
> err->read_domains = obj->base.read_domains;
> err->write_domain = obj->base.write_domain;
> err->fence_reg = obj->fence_reg;
> @@ -674,7 +678,7 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err,
> int i = 0;
>
> list_for_each_entry(vma, head, mm_list) {
> - capture_bo(err++, vma->obj);
> + capture_bo(err++, vma);
> if (++i == count)
> break;
> }
> @@ -683,21 +687,27 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err,
> }
>
> static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
> - int count, struct list_head *head)
> + int count, struct list_head *head,
> + struct i915_address_space *vm)
> {
> struct drm_i915_gem_object *obj;
> - int i = 0;
> + struct drm_i915_error_buffer * const first = err;
> + struct drm_i915_error_buffer * const last = err + count;
>
> list_for_each_entry(obj, head, global_list) {
> - if (!i915_gem_obj_is_pinned(obj))
> - continue;
> + struct i915_vma *vma;
>
> - capture_bo(err++, obj);
> - if (++i == count)
> + if (err == last)
> break;
> +
> + list_for_each_entry(vma, &obj->vma_list, vma_link)
> + if (vma->vm == vm && vma->pin_count > 0) {
> + capture_bo(err++, vma);
> + break;
> + }
> }
>
> - return i;
> + return err - first;
> }
>
> /* Generate a semi-unique error code. The code is not meant to have meaning, The
> @@ -1053,9 +1063,14 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
> list_for_each_entry(vma, &vm->active_list, mm_list)
> i++;
> error->active_bo_count[ndx] = i;
> - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
> - if (i915_gem_obj_is_pinned(obj))
> - i++;
> +
> + list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> + list_for_each_entry(vma, &obj->vma_list, vma_link)
> + if (vma->vm == vm && vma->pin_count > 0) {
> + i++;
> + break;
> + }
> + }
> error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx];
>
> if (i) {
> @@ -1074,7 +1089,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
> error->pinned_bo_count[ndx] =
> capture_pinned_bo(pinned_bo,
> error->pinned_bo_count[ndx],
> - &dev_priv->mm.bound_list);
> + &dev_priv->mm.bound_list, vm);
> error->active_bo[ndx] = active_bo;
> error->pinned_bo[ndx] = pinned_bo;
> }
> @@ -1095,8 +1110,25 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
> error->pinned_bo_count = kcalloc(cnt, sizeof(*error->pinned_bo_count),
> GFP_ATOMIC);
>
> - list_for_each_entry(vm, &dev_priv->vm_list, global_link)
> - i915_gem_capture_vm(dev_priv, error, vm, i++);
> + if (error->active_bo == NULL ||
> + error->pinned_bo == NULL ||
> + error->active_bo_count == NULL ||
> + error->pinned_bo_count == NULL) {
> + kfree(error->active_bo);
> + kfree(error->active_bo_count);
> + kfree(error->pinned_bo);
> + kfree(error->pinned_bo_count);
> +
> + error->active_bo = NULL;
> + error->active_bo_count = NULL;
> + error->pinned_bo = NULL;
> + error->pinned_bo_count = NULL;
> + } else {
> + list_for_each_entry(vm, &dev_priv->vm_list, global_link)
> + i915_gem_capture_vm(dev_priv, error, vm, i++);
> +
> + error->vm_count = cnt;
> + }
> }
>
> /* Capture all registers which don't fit into another category. */
> --
> 1.9.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2014-08-13 14:51 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-08-12 19:05 [PATCH 1/5] drm/i915: Print captured bo for all VM in error state Chris Wilson
2014-08-12 19:05 ` [PATCH 2/5] drm/i915: Do not access stolen memory directly by the CPU, even for error capture Chris Wilson
2014-08-14 14:51 ` Mika Kuoppala
2014-08-14 19:35 ` Chris Wilson
2014-08-15 11:11 ` Mika Kuoppala
2014-08-15 18:07 ` Mika Kuoppala
2014-08-12 19:05 ` [PATCH 3/5] drm/i915: Remove num_pages parameter to i915_error_object_create() Chris Wilson
2014-08-15 18:07 ` Mika Kuoppala
2014-08-12 19:05 ` [PATCH 4/5] drm/i915: Suppress a WARN on reading an object back for a GPU hang Chris Wilson
2014-08-15 18:09 ` Mika Kuoppala
2014-08-25 21:27 ` Daniel Vetter
2014-08-12 19:05 ` [PATCH 5/5] drm/i915: s/seqno/request/ tracking inside objects Chris Wilson
2014-08-27 9:55 ` Daniel Vetter
2014-08-27 10:39 ` Chris Wilson
2014-09-02 10:06 ` John Harrison
2014-09-06 9:12 ` Chris Wilson
2014-08-13 14:50 ` Mika Kuoppala [this message]
2014-08-14 6:50 ` [PATCH 1/5] drm/i915: Print captured bo for all VM in error state Chris Wilson
2014-08-14 10:18 ` Mika Kuoppala
2014-08-14 15:03 ` Daniel Vetter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87lhqs1m0x.fsf@gaia.fi.intel.com \
--to=mika.kuoppala@linux.intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox