All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Subject: Re: [Intel-gfx] [PATCH] drm/i915: Drop rq->ring->vma peeking from error capture
Date: Fri, 24 Apr 2020 23:38:24 +0300	[thread overview]
Message-ID: <87k124liv3.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20200424191410.27570-1-chris@chris-wilson.co.uk>

Chris Wilson <chris@chris-wilson.co.uk> writes:

> We only hold the active spinlock while dumping the error state, and this
> does not prevent another thread from retiring the request -- as it is
> quite possible that despite us capturing the current state, the GPU has
> completed the request. As such, it is dangerous to dereference state
> below the request as it may already be freed, and the simplest way to
> avoid the danger is not include it in the error state.
>
> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1788
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Andi Shyti <andi.shyti@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_gpu_error.c | 18 ++++++++++++------
>  drivers/gpu/drm/i915/i915_gpu_error.h |  1 -
>  2 files changed, 12 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 424ad975a360..6dd2fc0b0d47 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -467,14 +467,14 @@ static void error_print_request(struct drm_i915_error_state_buf *m,
>  	if (!erq->seqno)
>  		return;
>  
> -	err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, start %08x, head %08x, tail %08x\n",
> +	err_printf(m, "%s pid %d, seqno %8x:%08x%s%s, prio %d, head %08x, tail %08x\n",
>  		   prefix, erq->pid, erq->context, erq->seqno,
>  		   test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
>  			    &erq->flags) ? "!" : "",
>  		   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
>  			    &erq->flags) ? "+" : "",
>  		   erq->sched_attr.priority,
> -		   erq->start, erq->head, erq->tail);
> +		   erq->head, erq->tail);
>  }
>  
>  static void error_print_context(struct drm_i915_error_state_buf *m,
> @@ -1204,16 +1204,18 @@ static void engine_record_registers(struct intel_engine_coredump *ee)
>  	}
>  }
>  
> -static void record_request(const struct i915_request *request,
> +static bool record_request(const struct i915_request *request,
>  			   struct i915_request_coredump *erq)
>  {
>  	const struct i915_gem_context *ctx;
>  
> +	if (i915_request_completed(request))
> +		return false;
> +
>  	erq->flags = request->fence.flags;
>  	erq->context = request->fence.context;
>  	erq->seqno = request->fence.seqno;
>  	erq->sched_attr = request->sched.attr;
> -	erq->start = i915_ggtt_offset(request->ring->vma);
>  	erq->head = request->head;
>  	erq->tail = request->tail;
>  
> @@ -1223,6 +1225,8 @@ static void record_request(const struct i915_request *request,
>  	if (ctx)
>  		erq->pid = pid_nr(ctx->pid);
>  	rcu_read_unlock();
> +
> +	return true;
>  }
>  
>  static void engine_record_execlists(struct intel_engine_coredump *ee)
> @@ -1231,8 +1235,10 @@ static void engine_record_execlists(struct intel_engine_coredump *ee)
>  	struct i915_request * const *port = el->active;
>  	unsigned int n = 0;
>  
> -	while (*port)
> -		record_request(*port++, &ee->execlist[n++]);
> +	while (*port) {
> +		if (record_request(*port++, &ee->execlist[n]))
> +			n++;
> +	}
>  
>  	ee->num_ports = n;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index 0d1f6c8ff355..fa2d82a6de04 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -50,7 +50,6 @@ struct i915_request_coredump {
>  	pid_t pid;
>  	u32 context;
>  	u32 seqno;
> -	u32 start;
>  	u32 head;
>  	u32 tail;
>  	struct i915_sched_attr sched_attr;
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2020-04-24 20:40 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-24 19:14 [Intel-gfx] [PATCH] drm/i915: Drop rq->ring->vma peeking from error capture Chris Wilson
2020-04-24 19:48 ` [Intel-gfx] ✗ Fi.CI.BAT: failure for " Patchwork
2020-04-24 20:38 ` Mika Kuoppala [this message]
2020-04-24 20:51 ` [Intel-gfx] [PATCH] " Andi Shyti

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87k124liv3.fsf@gaia.fi.intel.com \
    --to=mika.kuoppala@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.