All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Print error state times relative to capture
@ 2018-04-27 15:12 Mika Kuoppala
  2018-04-27 15:32 ` Chris Wilson
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Mika Kuoppala @ 2018-04-27 15:12 UTC (permalink / raw)
  To: intel-gfx

Using plain jiffies in error state output makes the output
time differences relative to the current system time. This
is wrong as it makes output time differences dependent
of when the error state is printed rather than when it is
captured.

Store capture jiffies into error state and use it
when outputting the state to fix time differences output.

v2: use engine timestamp as epoch, output formatting (Chris)

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 48 +++++++++++++++++++++++++++++------
 drivers/gpu/drm/i915/i915_gpu_error.h |  2 ++
 2 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 671ffa37614e..fca550954be0 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -403,16 +403,17 @@ static const char *bannable(const struct drm_i915_error_context *ctx)
 }
 
 static void error_print_request(struct drm_i915_error_state_buf *m,
+				const struct i915_gpu_state *error,
 				const char *prefix,
 				const struct drm_i915_error_request *erq)
 {
 	if (!erq->seqno)
 		return;
 
-	err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms ago, head %08x, tail %08x\n",
+	err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, head %08x, tail %08x\n",
 		   prefix, erq->pid, erq->ban_score,
 		   erq->context, erq->seqno, erq->sched_attr.priority,
-		   jiffies_to_msecs(jiffies - erq->jiffies),
+		   jiffies_to_msecs(erq->jiffies - error->epoch),
 		   erq->head, erq->tail);
 }
 
@@ -427,8 +428,10 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
 }
 
 static void error_print_engine(struct drm_i915_error_state_buf *m,
-			       const struct drm_i915_error_engine *ee)
+			       const struct i915_gpu_state *error,
+			       const int engine_n)
 {
+	const struct drm_i915_error_engine *ee = &error->engine[engine_n];
 	int n;
 
 	err_printf(m, "%s command stream:\n",
@@ -497,14 +500,18 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 	err_printf(m, "  hangcheck stall: %s\n", yesno(ee->hangcheck_stalled));
 	err_printf(m, "  hangcheck action: %s\n",
 		   hangcheck_action_to_str(ee->hangcheck_action));
-	err_printf(m, "  hangcheck action timestamp: %lu, %u ms ago\n",
+
+	err_printf(m, "  hangcheck action timestamp: %dms (%lu%s)\n",
+		   jiffies_to_msecs(ee->hangcheck_timestamp -
+				    error->epoch),
 		   ee->hangcheck_timestamp,
-		   jiffies_to_msecs(jiffies - ee->hangcheck_timestamp));
+		   ee->hangcheck_timestamp == error->epoch ? "; epoch" : "");
+
 	err_printf(m, "  engine reset count: %u\n", ee->reset_count);
 
 	for (n = 0; n < ee->num_ports; n++) {
 		err_printf(m, "  ELSP[%d]:", n);
-		error_print_request(m, " ", &ee->execlist[n]);
+		error_print_request(m, error, " ", &ee->execlist[n]);
 	}
 
 	error_print_context(m, "  Active context: ", &ee->context);
@@ -650,6 +657,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	ts = ktime_to_timespec64(error->uptime);
 	err_printf(m, "Uptime: %lld s %ld us\n",
 		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
+	err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ);
+	err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n",
+		   error->capture,
+		   jiffies_to_msecs(jiffies - error->capture),
+		   jiffies_to_msecs(error->capture - error->epoch));
 
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
 		if (error->engine[i].hangcheck_stalled &&
@@ -710,7 +722,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
 		if (error->engine[i].engine_id != -1)
-			error_print_engine(m, &error->engine[i]);
+			error_print_engine(m, error, i);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) {
@@ -769,7 +781,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 				   dev_priv->engine[i]->name,
 				   ee->num_requests);
 			for (j = 0; j < ee->num_requests; j++)
-				error_print_request(m, " ", &ee->requests[j]);
+				error_print_request(m, error, " ",
+						    &ee->requests[j]);
 		}
 
 		if (IS_ERR(ee->waiters)) {
@@ -1735,6 +1748,22 @@ static void capture_params(struct i915_gpu_state *error)
 #undef DUP
 }
 
+static unsigned long capture_find_epoch(const struct i915_gpu_state *error)
+{
+	unsigned long epoch = error->capture;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
+		const struct drm_i915_error_engine *ee = &error->engine[i];
+
+		if (ee->hangcheck_stalled &&
+		    time_before(ee->hangcheck_timestamp, epoch))
+			epoch = ee->hangcheck_timestamp;
+	}
+
+	return epoch;
+}
+
 static int capture(void *data)
 {
 	struct i915_gpu_state *error = data;
@@ -1743,6 +1772,7 @@ static int capture(void *data)
 	error->boottime = ktime_get_boottime();
 	error->uptime = ktime_sub(ktime_get(),
 				  error->i915->gt.last_init_time);
+	error->capture = jiffies;
 
 	capture_params(error);
 	capture_gen_state(error);
@@ -1756,6 +1786,8 @@ static int capture(void *data)
 	error->overlay = intel_overlay_capture_error_state(error->i915);
 	error->display = intel_display_capture_error_state(error->i915);
 
+	error->epoch = capture_find_epoch(error);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 5d6fdcbc092c..0accd2ed72d9 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -31,6 +31,8 @@ struct i915_gpu_state {
 	ktime_t time;
 	ktime_t boottime;
 	ktime_t uptime;
+	unsigned long capture;
+	unsigned long epoch;
 
 	struct drm_i915_private *i915;
 
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread
* [PATCH] drm/i915: Print error state times relative to capture
@ 2018-04-16 13:40 Mika Kuoppala
  2018-04-16 13:52 ` Chris Wilson
  0 siblings, 1 reply; 7+ messages in thread
From: Mika Kuoppala @ 2018-04-16 13:40 UTC (permalink / raw)
  To: intel-gfx

Using plain jiffies in error state output makes the output
time differences relative to the current system time. This
is wrong as it makes output time differences dependent
of when the error state is printed rather than when it is
captured.

Store capture jiffies into error state and use it
when outputting the state to fix time differences output.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 20 +++++++++++++-------
 drivers/gpu/drm/i915/i915_gpu_error.h |  1 +
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index effaf982b19b..fc7e76f21111 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -404,7 +404,8 @@ static const char *bannable(const struct drm_i915_error_context *ctx)
 
 static void error_print_request(struct drm_i915_error_state_buf *m,
 				const char *prefix,
-				const struct drm_i915_error_request *erq)
+				const struct drm_i915_error_request *erq,
+				const unsigned long timestamp)
 {
 	if (!erq->seqno)
 		return;
@@ -412,7 +413,7 @@ static void error_print_request(struct drm_i915_error_state_buf *m,
 	err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms ago, head %08x, tail %08x\n",
 		   prefix, erq->pid, erq->ban_score,
 		   erq->context, erq->seqno, erq->priority,
-		   jiffies_to_msecs(jiffies - erq->jiffies),
+		   jiffies_to_msecs(timestamp - erq->jiffies),
 		   erq->head, erq->tail);
 }
 
@@ -427,7 +428,8 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
 }
 
 static void error_print_engine(struct drm_i915_error_state_buf *m,
-			       const struct drm_i915_error_engine *ee)
+			       const struct drm_i915_error_engine *ee,
+			       const unsigned long timestamp)
 {
 	int n;
 
@@ -499,12 +501,12 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 		   hangcheck_action_to_str(ee->hangcheck_action));
 	err_printf(m, "  hangcheck action timestamp: %lu, %u ms ago\n",
 		   ee->hangcheck_timestamp,
-		   jiffies_to_msecs(jiffies - ee->hangcheck_timestamp));
+		   jiffies_to_msecs(timestamp - ee->hangcheck_timestamp));
 	err_printf(m, "  engine reset count: %u\n", ee->reset_count);
 
 	for (n = 0; n < ee->num_ports; n++) {
 		err_printf(m, "  ELSP[%d]:", n);
-		error_print_request(m, " ", &ee->execlist[n]);
+		error_print_request(m, " ", &ee->execlist[n], timestamp);
 	}
 
 	error_print_context(m, "  Active context: ", &ee->context);
@@ -650,6 +652,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	ts = ktime_to_timespec64(error->uptime);
 	err_printf(m, "Uptime: %lld s %ld us\n",
 		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
+	err_printf(m, "Jiffies: %lu (now %lu)\n", error->timestamp, jiffies);
 
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
 		if (error->engine[i].hangcheck_stalled &&
@@ -710,7 +713,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
 		if (error->engine[i].engine_id != -1)
-			error_print_engine(m, &error->engine[i]);
+			error_print_engine(m, &error->engine[i],
+					   error->timestamp);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) {
@@ -769,7 +773,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 				   dev_priv->engine[i]->name,
 				   ee->num_requests);
 			for (j = 0; j < ee->num_requests; j++)
-				error_print_request(m, " ", &ee->requests[j]);
+				error_print_request(m, " ", &ee->requests[j],
+						    error->timestamp);
 		}
 
 		if (IS_ERR(ee->waiters)) {
@@ -1743,6 +1748,7 @@ static int capture(void *data)
 	error->boottime = ktime_get_boottime();
 	error->uptime = ktime_sub(ktime_get(),
 				  error->i915->gt.last_init_time);
+	error->timestamp = jiffies;
 
 	capture_params(error);
 	capture_gen_state(error);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index c05b6034d718..1f36ad526bc5 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -30,6 +30,7 @@ struct i915_gpu_state {
 	ktime_t time;
 	ktime_t boottime;
 	ktime_t uptime;
+	unsigned long timestamp; /* jiffies */
 
 	struct drm_i915_private *i915;
 
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-05-02  8:34 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-04-27 15:12 [PATCH] drm/i915: Print error state times relative to capture Mika Kuoppala
2018-04-27 15:32 ` Chris Wilson
2018-05-02  8:34   ` Mika Kuoppala
2018-04-27 15:38 ` ✓ Fi.CI.BAT: success for drm/i915: Print error state times relative to capture (rev2) Patchwork
2018-04-27 19:16 ` ✓ Fi.CI.IGT: " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2018-04-16 13:40 [PATCH] drm/i915: Print error state times relative to capture Mika Kuoppala
2018-04-16 13:52 ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.