* [PATCH] drm/i915: Record the current requests queue for execlists upon hang
@ 2016-10-12 16:14 Chris Wilson
2016-10-12 18:20 ` ✓ Fi.CI.BAT: success for " Patchwork
2016-10-13 9:51 ` [PATCH] " Mika Kuoppala
0 siblings, 2 replies; 4+ messages in thread
From: Chris Wilson @ 2016-10-12 16:14 UTC (permalink / raw)
To: intel-gfx; +Cc: Mika Kuoppala
Mika wanted to know what requests were pending at the time of a hang as
we now track which requests we have submitted to the hardware.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 3 +-
drivers/gpu/drm/i915/i915_gpu_error.c | 64 ++++++++++++++++++++++++-----------
2 files changed, 47 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bf397b643cc0..6360e807c6ba 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -832,10 +832,11 @@ struct drm_i915_error_state {
struct drm_i915_error_request {
long jiffies;
pid_t pid;
+ u32 context;
u32 seqno;
u32 head;
u32 tail;
- } *requests;
+ } *requests, execlist[2];
struct drm_i915_error_waiter {
char comm[TASK_COMM_LEN];
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 78cc13b9b2a5..026b78c66219 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -363,6 +363,20 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m,
ee->instdone.row[slice][subslice]);
}
+static void error_print_request(struct drm_i915_error_state_buf *m,
+ const char *prefix,
+ struct drm_i915_error_request *erq)
+{
+ if (!erq->seqno)
+ return;
+
+ err_printf(m, "%s pid %d, seqno %8x:%08x, emitted %dms ago, head %08x, tail %08x\n",
+ prefix, erq->pid,
+ erq->context, erq->seqno,
+ jiffies_to_msecs(jiffies - erq->jiffies),
+ erq->head, erq->tail);
+}
+
static void error_print_engine(struct drm_i915_error_state_buf *m,
struct drm_i915_error_engine *ee)
{
@@ -434,6 +448,8 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
err_printf(m, " hangcheck: %s [%d]\n",
hangcheck_action_to_str(ee->hangcheck_action),
ee->hangcheck_score);
+ error_print_request(m, " ELSP[0]: ", &ee->execlist[0]);
+ error_print_request(m, " ELSP[1]: ", &ee->execlist[1]);
}
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
@@ -649,14 +665,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
err_printf(m, "%s --- %d requests\n",
dev_priv->engine[i].name,
ee->num_requests);
- for (j = 0; j < ee->num_requests; j++) {
- err_printf(m, " pid %d, seqno 0x%08x, emitted %ld, head 0x%08x, tail 0x%08x\n",
- ee->requests[j].pid,
- ee->requests[j].seqno,
- ee->requests[j].jiffies,
- ee->requests[j].head,
- ee->requests[j].tail);
- }
+ for (j = 0; j < ee->num_requests; j++)
+ error_print_request(m, " ", &ee->requests[j]);
}
if (IS_ERR(ee->waiters)) {
@@ -1155,6 +1165,20 @@ static void error_record_engine_registers(struct drm_i915_error_state *error,
}
}
+static void record_request(struct drm_i915_gem_request *request,
+ struct drm_i915_error_request *erq)
+{
+ erq->context = request->ctx->hw_id;
+ erq->seqno = request->fence.seqno;
+ erq->jiffies = request->emitted_jiffies;
+ erq->head = request->head;
+ erq->tail = request->tail;
+
+ rcu_read_lock();
+ erq->pid = request->ctx->pid ? pid_nr(request->ctx->pid) : 0;
+ rcu_read_unlock();
+}
+
static void engine_record_requests(struct intel_engine_cs *engine,
struct drm_i915_gem_request *first,
struct drm_i915_error_engine *ee)
@@ -1178,8 +1202,6 @@ static void engine_record_requests(struct intel_engine_cs *engine,
count = 0;
request = first;
list_for_each_entry_from(request, &engine->request_list, link) {
- struct drm_i915_error_request *erq;
-
if (count >= ee->num_requests) {
/*
* If the ring request list was changed in
@@ -1199,19 +1221,22 @@ static void engine_record_requests(struct intel_engine_cs *engine,
break;
}
- erq = &ee->requests[count++];
- erq->seqno = request->fence.seqno;
- erq->jiffies = request->emitted_jiffies;
- erq->head = request->head;
- erq->tail = request->tail;
-
- rcu_read_lock();
- erq->pid = request->ctx->pid ? pid_nr(request->ctx->pid) : 0;
- rcu_read_unlock();
+ record_request(request, &ee->requests[count++]);
}
ee->num_requests = count;
}
+static void error_record_engine_execlists(struct intel_engine_cs *engine,
+ struct drm_i915_error_engine *ee)
+{
+ unsigned int n;
+
+ for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
+ if (engine->execlist_port[n].request)
+ record_request(engine->execlist_port[n].request,
+ &ee->execlist[n]);
+}
+
static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
struct drm_i915_error_state *error)
{
@@ -1236,6 +1261,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
error_record_engine_registers(error, engine, ee);
error_record_engine_waiters(engine, ee);
+ error_record_engine_execlists(engine, ee);
request = i915_gem_find_active_request(engine);
if (request) {
--
2.9.3
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 4+ messages in thread
* ✓ Fi.CI.BAT: success for drm/i915: Record the current requests queue for execlists upon hang
2016-10-12 16:14 [PATCH] drm/i915: Record the current requests queue for execlists upon hang Chris Wilson
@ 2016-10-12 18:20 ` Patchwork
2016-10-13 9:51 ` [PATCH] " Mika Kuoppala
1 sibling, 0 replies; 4+ messages in thread
From: Patchwork @ 2016-10-12 18:20 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: drm/i915: Record the current requests queue for execlists upon hang
URL : https://patchwork.freedesktop.org/series/13660/
State : success
== Summary ==
Series 13660v1 drm/i915: Record the current requests queue for execlists upon hang
https://patchwork.freedesktop.org/api/1.0/series/13660/revisions/1/mbox/
Test drv_module_reload_basic:
skip -> PASS (fi-skl-6770hq)
Test kms_flip:
Subgroup basic-flip-vs-modeset:
dmesg-warn -> PASS (fi-skl-6770hq)
Test kms_psr_sink_crc:
Subgroup psr_basic:
dmesg-warn -> PASS (fi-skl-6700hq)
Test vgem_basic:
Subgroup unload:
skip -> PASS (fi-kbl-7200u)
skip -> PASS (fi-hsw-4770)
fi-bdw-5557u total:248 pass:232 dwarn:0 dfail:0 fail:0 skip:16
fi-bsw-n3050 total:248 pass:205 dwarn:0 dfail:0 fail:0 skip:43
fi-bxt-t5700 total:248 pass:217 dwarn:0 dfail:0 fail:0 skip:31
fi-byt-j1900 total:248 pass:213 dwarn:2 dfail:0 fail:1 skip:32
fi-byt-n2820 total:248 pass:211 dwarn:0 dfail:0 fail:1 skip:36
fi-hsw-4770 total:248 pass:225 dwarn:0 dfail:0 fail:0 skip:23
fi-hsw-4770r total:248 pass:225 dwarn:0 dfail:0 fail:0 skip:23
fi-ivb-3520m total:248 pass:222 dwarn:0 dfail:0 fail:0 skip:26
fi-ivb-3770 total:248 pass:222 dwarn:0 dfail:0 fail:0 skip:26
fi-kbl-7200u total:248 pass:223 dwarn:0 dfail:0 fail:0 skip:25
fi-skl-6260u total:248 pass:233 dwarn:0 dfail:0 fail:0 skip:15
fi-skl-6700hq total:248 pass:225 dwarn:0 dfail:0 fail:0 skip:23
fi-skl-6700k total:248 pass:222 dwarn:1 dfail:0 fail:0 skip:25
fi-skl-6770hq total:248 pass:231 dwarn:1 dfail:0 fail:1 skip:15
fi-snb-2520m total:248 pass:211 dwarn:0 dfail:0 fail:0 skip:37
fi-snb-2600 total:248 pass:210 dwarn:0 dfail:0 fail:0 skip:38
Results at /archive/results/CI_IGT_test/Patchwork_2692/
14740bb25ec36fe4ce8042af3eb48aeb45e5bc13 drm-intel-nightly: 2016y-10m-12d-16h-18m-24s UTC integration manifest
988e045 drm/i915: Record the current requests queue for execlists upon hang
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] drm/i915: Record the current requests queue for execlists upon hang
2016-10-12 16:14 [PATCH] drm/i915: Record the current requests queue for execlists upon hang Chris Wilson
2016-10-12 18:20 ` ✓ Fi.CI.BAT: success for " Patchwork
@ 2016-10-13 9:51 ` Mika Kuoppala
2016-10-13 10:16 ` Chris Wilson
1 sibling, 1 reply; 4+ messages in thread
From: Mika Kuoppala @ 2016-10-13 9:51 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
Chris Wilson <chris@chris-wilson.co.uk> writes:
> Mika wanted to know what requests were pending at the time of a hang as
> we now track which requests we have submitted to the hardware.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 3 +-
> drivers/gpu/drm/i915/i915_gpu_error.c | 64 ++++++++++++++++++++++++-----------
> 2 files changed, 47 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index bf397b643cc0..6360e807c6ba 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -832,10 +832,11 @@ struct drm_i915_error_state {
> struct drm_i915_error_request {
> long jiffies;
> pid_t pid;
> + u32 context;
> u32 seqno;
> u32 head;
> u32 tail;
> - } *requests;
> + } *requests, execlist[2];
>
> struct drm_i915_error_waiter {
> char comm[TASK_COMM_LEN];
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 78cc13b9b2a5..026b78c66219 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -363,6 +363,20 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m,
> ee->instdone.row[slice][subslice]);
> }
>
> +static void error_print_request(struct drm_i915_error_state_buf *m,
> + const char *prefix,
> + struct drm_i915_error_request *erq)
> +{
> + if (!erq->seqno)
> + return;
> +
> + err_printf(m, "%s pid %d, seqno %8x:%08x, emitted %dms ago, head %08x, tail %08x\n",
> + prefix, erq->pid,
> + erq->context, erq->seqno,
> + jiffies_to_msecs(jiffies - erq->jiffies),
> + erq->head, erq->tail);
> +}
> +
> static void error_print_engine(struct drm_i915_error_state_buf *m,
> struct drm_i915_error_engine *ee)
> {
> @@ -434,6 +448,8 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
> err_printf(m, " hangcheck: %s [%d]\n",
> hangcheck_action_to_str(ee->hangcheck_action),
> ee->hangcheck_score);
> + error_print_request(m, " ELSP[0]: ", &ee->execlist[0]);
> + error_print_request(m, " ELSP[1]: ", &ee->execlist[1]);
> }
>
> void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
> @@ -649,14 +665,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
> err_printf(m, "%s --- %d requests\n",
> dev_priv->engine[i].name,
> ee->num_requests);
> - for (j = 0; j < ee->num_requests; j++) {
> - err_printf(m, " pid %d, seqno 0x%08x, emitted %ld, head 0x%08x, tail 0x%08x\n",
> - ee->requests[j].pid,
> - ee->requests[j].seqno,
> - ee->requests[j].jiffies,
> - ee->requests[j].head,
> - ee->requests[j].tail);
> - }
> + for (j = 0; j < ee->num_requests; j++)
> + error_print_request(m, " ", &ee->requests[j]);
> }
>
> if (IS_ERR(ee->waiters)) {
> @@ -1155,6 +1165,20 @@ static void error_record_engine_registers(struct drm_i915_error_state *error,
> }
> }
>
> +static void record_request(struct drm_i915_gem_request *request,
> + struct drm_i915_error_request *erq)
> +{
> + erq->context = request->ctx->hw_id;
> + erq->seqno = request->fence.seqno;
> + erq->jiffies = request->emitted_jiffies;
> + erq->head = request->head;
> + erq->tail = request->tail;
> +
> + rcu_read_lock();
> + erq->pid = request->ctx->pid ? pid_nr(request->ctx->pid) : 0;
This lock is only for the pid_nr and nothing to do with ctx dereference?
Not that it was added by this patch...
> + rcu_read_unlock();
> +}
> +
> static void engine_record_requests(struct intel_engine_cs *engine,
> struct drm_i915_gem_request *first,
> struct drm_i915_error_engine *ee)
> @@ -1178,8 +1202,6 @@ static void engine_record_requests(struct intel_engine_cs *engine,
> count = 0;
> request = first;
> list_for_each_entry_from(request, &engine->request_list, link) {
> - struct drm_i915_error_request *erq;
> -
> if (count >= ee->num_requests) {
> /*
> * If the ring request list was changed in
> @@ -1199,19 +1221,22 @@ static void engine_record_requests(struct intel_engine_cs *engine,
> break;
> }
>
> - erq = &ee->requests[count++];
> - erq->seqno = request->fence.seqno;
> - erq->jiffies = request->emitted_jiffies;
> - erq->head = request->head;
> - erq->tail = request->tail;
> -
> - rcu_read_lock();
> - erq->pid = request->ctx->pid ? pid_nr(request->ctx->pid) : 0;
> - rcu_read_unlock();
> + record_request(request, &ee->requests[count++]);
> }
> ee->num_requests = count;
> }
>
> +static void error_record_engine_execlists(struct intel_engine_cs *engine,
> + struct drm_i915_error_engine *ee)
> +{
> + unsigned int n;
> +
> + for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
> + if (engine->execlist_port[n].request)
> + record_request(engine->execlist_port[n].request,
> + &ee->execlist[n]);
Ok even if we get interrupt at around here and reset the ports,
the pointer should stay in request_list and at that part we should be
safe.
And with retirement, we are in no more unsafer waters as with the other
requests.
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
> +}
> +
> static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
> struct drm_i915_error_state *error)
> {
> @@ -1236,6 +1261,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
>
> error_record_engine_registers(error, engine, ee);
> error_record_engine_waiters(engine, ee);
> + error_record_engine_execlists(engine, ee);
>
> request = i915_gem_find_active_request(engine);
> if (request) {
> --
> 2.9.3
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] drm/i915: Record the current requests queue for execlists upon hang
2016-10-13 9:51 ` [PATCH] " Mika Kuoppala
@ 2016-10-13 10:16 ` Chris Wilson
0 siblings, 0 replies; 4+ messages in thread
From: Chris Wilson @ 2016-10-13 10:16 UTC (permalink / raw)
To: Mika Kuoppala; +Cc: intel-gfx
On Thu, Oct 13, 2016 at 12:51:26PM +0300, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> > +static void record_request(struct drm_i915_gem_request *request,
> > + struct drm_i915_error_request *erq)
> > +{
> > + erq->context = request->ctx->hw_id;
> > + erq->seqno = request->fence.seqno;
> > + erq->jiffies = request->emitted_jiffies;
> > + erq->head = request->head;
> > + erq->tail = request->tail;
> > +
> > + rcu_read_lock();
> > + erq->pid = request->ctx->pid ? pid_nr(request->ctx->pid) : 0;
>
> This lock is only for the pid_nr and nothing to do with ctx dereference?
> Not that it was added by this patch...
It's for the struct task lookup inside pid_nr.
But...
> > + for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
> > + if (engine->execlist_port[n].request)
> > + record_request(engine->execlist_port[n].request,
> > + &ee->execlist[n]);
>
> Ok even if we get interrupt at around here and reset the ports,
> the pointer should stay in request_list and at that part we should be
> safe.
Note that we don't even get interrupts anymore as we completely stop the
machine whilst capturing. So even rcu_read_lock() above is overkill,
mere documentation.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2016-10-13 10:16 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-10-12 16:14 [PATCH] drm/i915: Record the current requests queue for execlists upon hang Chris Wilson
2016-10-12 18:20 ` ✓ Fi.CI.BAT: success for " Patchwork
2016-10-13 9:51 ` [PATCH] " Mika Kuoppala
2016-10-13 10:16 ` Chris Wilson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox