[PATCH 1/2] drm/i915: collect per ring page fault info on error

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/2] drm/i915: collect per ring page fault info on error
@ 2011-09-19 19:07 Ben Widawsky
  2011-09-19 19:07 ` [PATCH 2/2] drm/i915: check acthd for all rings Ben Widawsky
  2011-09-20  9:42 ` [PATCH 1/2] drm/i915: collect per ring page fault info on error Chris Wilson
  0 siblings, 2 replies; 4+ messages in thread
From: Ben Widawsky @ 2011-09-19 19:07 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ben Widawsky

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_debugfs.c |    3 +++
 drivers/gpu/drm/i915/i915_drv.h     |    1 +
 drivers/gpu/drm/i915/i915_irq.c     |    3 +++
 drivers/gpu/drm/i915/i915_reg.h     |    3 +++
 4 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3c395a5..3cdf638 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -750,6 +750,9 @@ static int i915_error_state(struct seq_file *m, void *unused)
 	seq_printf(m, "EIR: 0x%08x\n", error->eir);
 	seq_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er);
 	if (INTEL_INFO(dev)->gen >= 6) {
+		seq_printf(m, "GFX Page Fault: 0x%08x\n", error->page_fault[RCS]);
+		seq_printf(m, "Media Page Fault: 0x%08x\n", error->page_fault[VCS]);
+		seq_printf(m, "Blitter Page Fault: 0x%08x\n", error->page_fault[BCS]);
 		seq_printf(m, "ERROR: 0x%08x\n", error->error);
 		seq_printf(m, "Blitter command stream:\n");
 		seq_printf(m, "  ACTHD:    0x%08x\n", error->bcs_acthd);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7916bd9..0447461 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -153,6 +153,7 @@ struct drm_i915_error_state {
 	u32 ipehr;
 	u32 instdone;
 	u32 acthd;
+	u32 page_fault[I915_NUM_RINGS];
 	u32 error; /* gen6+ */
 	u32 bcs_acthd; /* gen6+ blt engine */
 	u32 bcs_ipehr;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 9cbb0cd..99bd330 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -914,6 +914,9 @@ static void i915_capture_error_state(struct drm_device *dev)
 	error->instpm = I915_READ(INSTPM);
 	error->error = 0;
 	if (INTEL_INFO(dev)->gen >= 6) {
+		error->page_fault[RCS] = I915_READ(GEN6_GFX_FAULT);
+		error->page_fault[VCS] = I915_READ(GEN6_MED_FAULT);
+		error->page_fault[BCS] = I915_READ(GEN6_BLT_FAULT);
 		error->error = I915_READ(ERROR_GEN6);
 
 		error->bcs_acthd = I915_READ(BCS_ACTHD);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 542453f..5a74f89 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -355,6 +355,9 @@
 #define BCS_IPEHR	0x22068
 #define BCS_ACTHD	0x22074
 
+#define GEN6_GFX_FAULT	0x04094
+#define GEN6_MED_FAULT	0x04194
+#define GEN6_BLT_FAULT	0x04294
 #define ERROR_GEN6	0x040a0
 
 /* GM45+ chicken bits -- debug workaround bits that may be required
-- 
1.7.6.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] drm/i915: check acthd for all rings
  2011-09-19 19:07 [PATCH 1/2] drm/i915: collect per ring page fault info on error Ben Widawsky
@ 2011-09-19 19:07 ` Ben Widawsky
  2011-09-19 20:36   ` Ben Widawsky
  2011-09-20  9:42 ` [PATCH 1/2] drm/i915: collect per ring page fault info on error Chris Wilson
  1 sibling, 1 reply; 4+ messages in thread
From: Ben Widawsky @ 2011-09-19 19:07 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ben Widawsky

On Gen6+ we have other rings which may be in use. We haven't hung if the
blit or media ring is still going

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_debugfs.c |    6 +-
 drivers/gpu/drm/i915/i915_drv.h     |    6 +-
 drivers/gpu/drm/i915/i915_irq.c     |  113 +++++++++++++++++++++--------------
 3 files changed, 73 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3cdf638..0431358 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -755,20 +755,20 @@ static int i915_error_state(struct seq_file *m, void *unused)
 		seq_printf(m, "Blitter Page Fault: 0x%08x\n", error->page_fault[BCS]);
 		seq_printf(m, "ERROR: 0x%08x\n", error->error);
 		seq_printf(m, "Blitter command stream:\n");
-		seq_printf(m, "  ACTHD:    0x%08x\n", error->bcs_acthd);
+		seq_printf(m, "  ACTHD:    0x%08x\n", error->acthd[BCS]);
 		seq_printf(m, "  IPEIR:    0x%08x\n", error->bcs_ipeir);
 		seq_printf(m, "  IPEHR:    0x%08x\n", error->bcs_ipehr);
 		seq_printf(m, "  INSTDONE: 0x%08x\n", error->bcs_instdone);
 		seq_printf(m, "  seqno:    0x%08x\n", error->bcs_seqno);
 		seq_printf(m, "Video (BSD) command stream:\n");
-		seq_printf(m, "  ACTHD:    0x%08x\n", error->vcs_acthd);
+		seq_printf(m, "  ACTHD:    0x%08x\n", error->acthd[VCS]);
 		seq_printf(m, "  IPEIR:    0x%08x\n", error->vcs_ipeir);
 		seq_printf(m, "  IPEHR:    0x%08x\n", error->vcs_ipehr);
 		seq_printf(m, "  INSTDONE: 0x%08x\n", error->vcs_instdone);
 		seq_printf(m, "  seqno:    0x%08x\n", error->vcs_seqno);
 	}
 	seq_printf(m, "Render command stream:\n");
-	seq_printf(m, "  ACTHD: 0x%08x\n", error->acthd);
+	seq_printf(m, "  ACTHD: 0x%08x\n", error->acthd[RCS]);
 	seq_printf(m, "  IPEIR: 0x%08x\n", error->ipeir);
 	seq_printf(m, "  IPEHR: 0x%08x\n", error->ipehr);
 	seq_printf(m, "  INSTDONE: 0x%08x\n", error->instdone);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0447461..36ecae8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -152,15 +152,13 @@ struct drm_i915_error_state {
 	u32 ipeir;
 	u32 ipehr;
 	u32 instdone;
-	u32 acthd;
+	u32 acthd[I915_NUM_RINGS];
 	u32 page_fault[I915_NUM_RINGS];
 	u32 error; /* gen6+ */
-	u32 bcs_acthd; /* gen6+ blt engine */
 	u32 bcs_ipehr;
 	u32 bcs_ipeir;
 	u32 bcs_instdone;
 	u32 bcs_seqno;
-	u32 vcs_acthd; /* gen6+ bsd engine */
 	u32 vcs_ipehr;
 	u32 vcs_ipeir;
 	u32 vcs_instdone;
@@ -330,7 +328,7 @@ typedef struct drm_i915_private {
 #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
 	struct timer_list hangcheck_timer;
 	int hangcheck_count;
-	uint32_t last_acthd;
+	uint32_t last_acthd[I915_NUM_RINGS];
 	uint32_t last_instdone;
 	uint32_t last_instdone1;
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 99bd330..ddbee8c 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -919,7 +919,7 @@ static void i915_capture_error_state(struct drm_device *dev)
 		error->page_fault[BCS] = I915_READ(GEN6_BLT_FAULT);
 		error->error = I915_READ(ERROR_GEN6);
 
-		error->bcs_acthd = I915_READ(BCS_ACTHD);
+		error->acthd[BCS] = I915_READ(BCS_ACTHD);
 		error->bcs_ipehr = I915_READ(BCS_IPEHR);
 		error->bcs_ipeir = I915_READ(BCS_IPEIR);
 		error->bcs_instdone = I915_READ(BCS_INSTDONE);
@@ -927,7 +927,7 @@ static void i915_capture_error_state(struct drm_device *dev)
 		if (dev_priv->ring[BCS].get_seqno)
 			error->bcs_seqno = dev_priv->ring[BCS].get_seqno(&dev_priv->ring[BCS]);
 
-		error->vcs_acthd = I915_READ(VCS_ACTHD);
+		error->acthd[VCS] = I915_READ(VCS_ACTHD);
 		error->vcs_ipehr = I915_READ(VCS_IPEHR);
 		error->vcs_ipeir = I915_READ(VCS_IPEIR);
 		error->vcs_instdone = I915_READ(VCS_INSTDONE);
@@ -941,13 +941,13 @@ static void i915_capture_error_state(struct drm_device *dev)
 		error->instdone = I915_READ(INSTDONE_I965);
 		error->instps = I915_READ(INSTPS);
 		error->instdone1 = I915_READ(INSTDONE1);
-		error->acthd = I915_READ(ACTHD_I965);
+		error->acthd[RCS] = I915_READ(ACTHD_I965);
 		error->bbaddr = I915_READ64(BB_ADDR);
 	} else {
 		error->ipeir = I915_READ(IPEIR);
 		error->ipehr = I915_READ(IPEHR);
 		error->instdone = I915_READ(INSTDONE);
-		error->acthd = I915_READ(ACTHD);
+		error->acthd[RCS] = I915_READ(ACTHD);
 		error->bbaddr = 0;
 	}
 	i915_gem_record_fences(dev, error);
@@ -1659,6 +1659,50 @@ static bool kick_ring(struct intel_ring_buffer *ring)
 	return false;
 }
 
+static bool
+acthd_stuck(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t acthd, vcs_acthd, bcs_acthd;
+	uint32_t instdone = 0, instdone1 = 0;
+	bool rcs_stuck, others_stuck = true;
+
+	acthd = intel_ring_get_active_head(&dev_priv->ring[RCS]);
+	switch (INTEL_INFO(dev)->gen) {
+	case 7:
+	case 6:
+		vcs_acthd = intel_ring_get_active_head(&dev_priv->ring[VCS]);
+		bcs_acthd = intel_ring_get_active_head(&dev_priv->ring[BCS]);
+		others_stuck = (dev_priv->last_acthd[2] == bcs_acthd) &&
+			       (dev_priv->last_acthd[1] == vcs_acthd);
+		dev_priv->last_acthd[2] = bcs_acthd;
+		dev_priv->last_acthd[1] = vcs_acthd;
+		break;
+	case 5:
+	case 4:
+		instdone = I915_READ(INSTDONE_I965);
+		instdone1 = I915_READ(INSTDONE1);
+		break;
+	case 3:
+	case 2:
+		instdone = I915_READ(INSTDONE);
+		instdone1 = 0;
+		break;
+	default:
+		DRM_ERROR("bad\n");
+		return false;
+	}
+	rcs_stuck = dev_priv->last_acthd[0] == acthd;
+
+	dev_priv->last_acthd[0] = acthd;
+	dev_priv->last_instdone = instdone;
+	dev_priv->last_instdone1 = instdone1;
+
+	if (dev_priv->hangcheck_count++ == 0)
+		return false;
+
+	return rcs_stuck && others_stuck;
+}
 /**
  * This is called when the chip hasn't reported back with completed
  * batchbuffers in a long time. The first time this is called we simply record
@@ -1669,7 +1713,6 @@ void i915_hangcheck_elapsed(unsigned long data)
 {
 	struct drm_device *dev = (struct drm_device *)data;
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	uint32_t acthd, instdone, instdone1;
 	bool err = false;
 
 	if (!i915_enable_hangcheck)
@@ -1685,50 +1728,30 @@ void i915_hangcheck_elapsed(unsigned long data)
 		return;
 	}
 
-	if (INTEL_INFO(dev)->gen < 4) {
-		acthd = I915_READ(ACTHD);
-		instdone = I915_READ(INSTDONE);
-		instdone1 = 0;
-	} else {
-		acthd = I915_READ(ACTHD_I965);
-		instdone = I915_READ(INSTDONE_I965);
-		instdone1 = I915_READ(INSTDONE1);
-	}
-
-	if (dev_priv->last_acthd == acthd &&
-	    dev_priv->last_instdone == instdone &&
-	    dev_priv->last_instdone1 == instdone1) {
-		if (dev_priv->hangcheck_count++ > 1) {
-			DRM_ERROR("Hangcheck timer elapsed... GPU hung\n");
+	if (acthd_stuck(dev)) {
+		DRM_ERROR("Hangcheck timer elapsed... GPU hung\n");
 
-			if (!IS_GEN2(dev)) {
-				/* Is the chip hanging on a WAIT_FOR_EVENT?
-				 * If so we can simply poke the RB_WAIT bit
-				 * and break the hang. This should work on
-				 * all but the second generation chipsets.
-				 */
-
-				if (kick_ring(&dev_priv->ring[RCS]))
-					goto repeat;
+		if (!IS_GEN2(dev)) {
+			/* Is the chip hanging on a WAIT_FOR_EVENT?
+			 * If so we can simply poke the RB_WAIT bit
+			 * and break the hang. This should work on
+			 * all but the second generation chipsets.
+			 */
 
-				if (HAS_BSD(dev) &&
-				    kick_ring(&dev_priv->ring[VCS]))
-					goto repeat;
+			if (kick_ring(&dev_priv->ring[RCS]))
+				goto repeat;
 
-				if (HAS_BLT(dev) &&
-				    kick_ring(&dev_priv->ring[BCS]))
-					goto repeat;
-			}
+			if (HAS_BSD(dev) &&
+			    kick_ring(&dev_priv->ring[VCS]))
+				goto repeat;
 
-			i915_handle_error(dev, true);
-			return;
+			if (HAS_BLT(dev) &&
+			    kick_ring(&dev_priv->ring[BCS]))
+				goto repeat;
 		}
-	} else {
-		dev_priv->hangcheck_count = 0;
 
-		dev_priv->last_acthd = acthd;
-		dev_priv->last_instdone = instdone;
-		dev_priv->last_instdone1 = instdone1;
+		i915_handle_error(dev, true);
+		return;
 	}
 
 repeat:
-- 
1.7.6.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] drm/i915: check acthd for all rings
  2011-09-19 19:07 ` [PATCH 2/2] drm/i915: check acthd for all rings Ben Widawsky
@ 2011-09-19 20:36   ` Ben Widawsky
  0 siblings, 0 replies; 4+ messages in thread
From: Ben Widawsky @ 2011-09-19 20:36 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ben Widawsky

On Gen6+ we have other rings which may be in use. We haven't hung if the
blit or media ring is still going

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_debugfs.c |    6 +-
 drivers/gpu/drm/i915/i915_drv.h     |    6 +-
 drivers/gpu/drm/i915/i915_irq.c     |  113 +++++++++++++++++++++--------------
 3 files changed, 73 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3cdf638..0431358 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -755,20 +755,20 @@ static int i915_error_state(struct seq_file *m, void *unused)
 		seq_printf(m, "Blitter Page Fault: 0x%08x\n", error->page_fault[BCS]);
 		seq_printf(m, "ERROR: 0x%08x\n", error->error);
 		seq_printf(m, "Blitter command stream:\n");
-		seq_printf(m, "  ACTHD:    0x%08x\n", error->bcs_acthd);
+		seq_printf(m, "  ACTHD:    0x%08x\n", error->acthd[BCS]);
 		seq_printf(m, "  IPEIR:    0x%08x\n", error->bcs_ipeir);
 		seq_printf(m, "  IPEHR:    0x%08x\n", error->bcs_ipehr);
 		seq_printf(m, "  INSTDONE: 0x%08x\n", error->bcs_instdone);
 		seq_printf(m, "  seqno:    0x%08x\n", error->bcs_seqno);
 		seq_printf(m, "Video (BSD) command stream:\n");
-		seq_printf(m, "  ACTHD:    0x%08x\n", error->vcs_acthd);
+		seq_printf(m, "  ACTHD:    0x%08x\n", error->acthd[VCS]);
 		seq_printf(m, "  IPEIR:    0x%08x\n", error->vcs_ipeir);
 		seq_printf(m, "  IPEHR:    0x%08x\n", error->vcs_ipehr);
 		seq_printf(m, "  INSTDONE: 0x%08x\n", error->vcs_instdone);
 		seq_printf(m, "  seqno:    0x%08x\n", error->vcs_seqno);
 	}
 	seq_printf(m, "Render command stream:\n");
-	seq_printf(m, "  ACTHD: 0x%08x\n", error->acthd);
+	seq_printf(m, "  ACTHD: 0x%08x\n", error->acthd[RCS]);
 	seq_printf(m, "  IPEIR: 0x%08x\n", error->ipeir);
 	seq_printf(m, "  IPEHR: 0x%08x\n", error->ipehr);
 	seq_printf(m, "  INSTDONE: 0x%08x\n", error->instdone);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0447461..36ecae8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -152,15 +152,13 @@ struct drm_i915_error_state {
 	u32 ipeir;
 	u32 ipehr;
 	u32 instdone;
-	u32 acthd;
+	u32 acthd[I915_NUM_RINGS];
 	u32 page_fault[I915_NUM_RINGS];
 	u32 error; /* gen6+ */
-	u32 bcs_acthd; /* gen6+ blt engine */
 	u32 bcs_ipehr;
 	u32 bcs_ipeir;
 	u32 bcs_instdone;
 	u32 bcs_seqno;
-	u32 vcs_acthd; /* gen6+ bsd engine */
 	u32 vcs_ipehr;
 	u32 vcs_ipeir;
 	u32 vcs_instdone;
@@ -330,7 +328,7 @@ typedef struct drm_i915_private {
 #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
 	struct timer_list hangcheck_timer;
 	int hangcheck_count;
-	uint32_t last_acthd;
+	uint32_t last_acthd[I915_NUM_RINGS];
 	uint32_t last_instdone;
 	uint32_t last_instdone1;
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 99bd330..df14c28 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -919,7 +919,7 @@ static void i915_capture_error_state(struct drm_device *dev)
 		error->page_fault[BCS] = I915_READ(GEN6_BLT_FAULT);
 		error->error = I915_READ(ERROR_GEN6);
 
-		error->bcs_acthd = I915_READ(BCS_ACTHD);
+		error->acthd[BCS] = I915_READ(BCS_ACTHD);
 		error->bcs_ipehr = I915_READ(BCS_IPEHR);
 		error->bcs_ipeir = I915_READ(BCS_IPEIR);
 		error->bcs_instdone = I915_READ(BCS_INSTDONE);
@@ -927,7 +927,7 @@ static void i915_capture_error_state(struct drm_device *dev)
 		if (dev_priv->ring[BCS].get_seqno)
 			error->bcs_seqno = dev_priv->ring[BCS].get_seqno(&dev_priv->ring[BCS]);
 
-		error->vcs_acthd = I915_READ(VCS_ACTHD);
+		error->acthd[VCS] = I915_READ(VCS_ACTHD);
 		error->vcs_ipehr = I915_READ(VCS_IPEHR);
 		error->vcs_ipeir = I915_READ(VCS_IPEIR);
 		error->vcs_instdone = I915_READ(VCS_INSTDONE);
@@ -941,13 +941,13 @@ static void i915_capture_error_state(struct drm_device *dev)
 		error->instdone = I915_READ(INSTDONE_I965);
 		error->instps = I915_READ(INSTPS);
 		error->instdone1 = I915_READ(INSTDONE1);
-		error->acthd = I915_READ(ACTHD_I965);
+		error->acthd[RCS] = I915_READ(ACTHD_I965);
 		error->bbaddr = I915_READ64(BB_ADDR);
 	} else {
 		error->ipeir = I915_READ(IPEIR);
 		error->ipehr = I915_READ(IPEHR);
 		error->instdone = I915_READ(INSTDONE);
-		error->acthd = I915_READ(ACTHD);
+		error->acthd[RCS] = I915_READ(ACTHD);
 		error->bbaddr = 0;
 	}
 	i915_gem_record_fences(dev, error);
@@ -1659,6 +1659,50 @@ static bool kick_ring(struct intel_ring_buffer *ring)
 	return false;
 }
 
+static bool
+acthd_stuck(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t acthd, vcs_acthd, bcs_acthd;
+	uint32_t instdone = 0, instdone1 = 0;
+	bool rcs_stuck, others_stuck = true;
+
+	acthd = intel_ring_get_active_head(&dev_priv->ring[RCS]);
+	switch (INTEL_INFO(dev)->gen) {
+	case 7:
+	case 6:
+		vcs_acthd = intel_ring_get_active_head(&dev_priv->ring[VCS]);
+		bcs_acthd = intel_ring_get_active_head(&dev_priv->ring[BCS]);
+		others_stuck = (dev_priv->last_acthd[2] == bcs_acthd) &&
+			       (dev_priv->last_acthd[1] == vcs_acthd);
+		dev_priv->last_acthd[2] = bcs_acthd;
+		dev_priv->last_acthd[1] = vcs_acthd;
+		break;
+	case 5:
+	case 4:
+		instdone = I915_READ(INSTDONE_I965);
+		instdone1 = I915_READ(INSTDONE1);
+		break;
+	case 3:
+	case 2:
+		instdone = I915_READ(INSTDONE);
+		instdone1 = 0;
+		break;
+	default:
+		BUG();
+		return false;
+	}
+	rcs_stuck = dev_priv->last_acthd[0] == acthd;
+
+	dev_priv->last_acthd[0] = acthd;
+	dev_priv->last_instdone = instdone;
+	dev_priv->last_instdone1 = instdone1;
+
+	if (dev_priv->hangcheck_count++ == 0)
+		return false;
+
+	return rcs_stuck && others_stuck;
+}
 /**
  * This is called when the chip hasn't reported back with completed
  * batchbuffers in a long time. The first time this is called we simply record
@@ -1669,7 +1713,6 @@ void i915_hangcheck_elapsed(unsigned long data)
 {
 	struct drm_device *dev = (struct drm_device *)data;
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	uint32_t acthd, instdone, instdone1;
 	bool err = false;
 
 	if (!i915_enable_hangcheck)
@@ -1685,50 +1728,30 @@ void i915_hangcheck_elapsed(unsigned long data)
 		return;
 	}
 
-	if (INTEL_INFO(dev)->gen < 4) {
-		acthd = I915_READ(ACTHD);
-		instdone = I915_READ(INSTDONE);
-		instdone1 = 0;
-	} else {
-		acthd = I915_READ(ACTHD_I965);
-		instdone = I915_READ(INSTDONE_I965);
-		instdone1 = I915_READ(INSTDONE1);
-	}
-
-	if (dev_priv->last_acthd == acthd &&
-	    dev_priv->last_instdone == instdone &&
-	    dev_priv->last_instdone1 == instdone1) {
-		if (dev_priv->hangcheck_count++ > 1) {
-			DRM_ERROR("Hangcheck timer elapsed... GPU hung\n");
+	if (acthd_stuck(dev)) {
+		DRM_ERROR("Hangcheck timer elapsed... GPU hung\n");
 
-			if (!IS_GEN2(dev)) {
-				/* Is the chip hanging on a WAIT_FOR_EVENT?
-				 * If so we can simply poke the RB_WAIT bit
-				 * and break the hang. This should work on
-				 * all but the second generation chipsets.
-				 */
-
-				if (kick_ring(&dev_priv->ring[RCS]))
-					goto repeat;
+		if (!IS_GEN2(dev)) {
+			/* Is the chip hanging on a WAIT_FOR_EVENT?
+			 * If so we can simply poke the RB_WAIT bit
+			 * and break the hang. This should work on
+			 * all but the second generation chipsets.
+			 */
 
-				if (HAS_BSD(dev) &&
-				    kick_ring(&dev_priv->ring[VCS]))
-					goto repeat;
+			if (kick_ring(&dev_priv->ring[RCS]))
+				goto repeat;
 
-				if (HAS_BLT(dev) &&
-				    kick_ring(&dev_priv->ring[BCS]))
-					goto repeat;
-			}
+			if (HAS_BSD(dev) &&
+			    kick_ring(&dev_priv->ring[VCS]))
+				goto repeat;
 
-			i915_handle_error(dev, true);
-			return;
+			if (HAS_BLT(dev) &&
+			    kick_ring(&dev_priv->ring[BCS]))
+				goto repeat;
 		}
-	} else {
-		dev_priv->hangcheck_count = 0;
 
-		dev_priv->last_acthd = acthd;
-		dev_priv->last_instdone = instdone;
-		dev_priv->last_instdone1 = instdone1;
+		i915_handle_error(dev, true);
+		return;
 	}
 
 repeat:
-- 
1.7.6.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2] drm/i915: collect per ring page fault info on error
  2011-09-19 19:07 [PATCH 1/2] drm/i915: collect per ring page fault info on error Ben Widawsky
  2011-09-19 19:07 ` [PATCH 2/2] drm/i915: check acthd for all rings Ben Widawsky
@ 2011-09-20  9:42 ` Chris Wilson
  1 sibling, 0 replies; 4+ messages in thread
From: Chris Wilson @ 2011-09-20  9:42 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ben Widawsky

On Mon, 19 Sep 2011 12:07:54 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>

These are a nice set of cleanups and the extra fault reporting may come
in handy. (I thought that fault register was to be used in conjunction
with ppgtt in order for the gpu to fault in pages...)

As Daniel mentioned we need to include the VCS for the acthd_stuck()
check. I mentioned that I thought the general ERROR register was
actually per-ring, but checking the specs I don't see it replicated. The
only silly thing is that this introduces an array of register values for
only some of the per-ring registers, hence a bit of inconsistency and
just calling out for a complete overhaul of those per-ring structures
and hopefully code reduction ;-)

Per-ring per-gen hangcheck/error recording/error reporting may be
useful. More likely overkill. ;-)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2011-09-20  9:43 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-09-19 19:07 [PATCH 1/2] drm/i915: collect per ring page fault info on error Ben Widawsky
2011-09-19 19:07 ` [PATCH 2/2] drm/i915: check acthd for all rings Ben Widawsky
2011-09-19 20:36   ` Ben Widawsky
2011-09-20  9:42 ` [PATCH 1/2] drm/i915: collect per ring page fault info on error Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.