intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Subject: [PATCH 3/5] drm/i915: Harden detection of missed interrupts
Date: Tue, 16 Feb 2016 11:47:46 +0000	[thread overview]
Message-ID: <1455623268-10023-4-git-send-email-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <1455623268-10023-1-git-send-email-chris@chris-wilson.co.uk>

Only declare a missed interrupt if we find that the GPU is idle with
waiters and a hangcheck interval has passed in which no new user
interrupts have been raised.

v2: Clear the stuck interrupt marker between successful batches

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     | 11 +++++++----
 drivers/gpu/drm/i915/i915_irq.c         | 10 +++++++++-
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 ++
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index c4df580ed0de..f3ba97ad3e00 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -730,10 +730,10 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
 static void i915_ring_seqno_info(struct seq_file *m,
 				 struct intel_engine_cs *ring)
 {
-	if (ring->get_seqno) {
-		seq_printf(m, "Current sequence (%s): %x\n",
-			   ring->name, ring->get_seqno(ring));
-	}
+	seq_printf(m, "Current sequence (%s): %x\n",
+		   ring->name, ring->get_seqno(ring));
+	seq_printf(m, "Current user interrupts (%s): %x\n",
+		   ring->name, READ_ONCE(ring->user_interrupts));
 }
 
 static int i915_gem_seqno_info(struct seq_file *m, void *data)
@@ -1361,6 +1361,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 		seq_printf(m, "%s:\n", ring->name);
 		seq_printf(m, "\tseqno = %x [current %x]\n",
 			   ring->hangcheck.seqno, seqno[i]);
+		seq_printf(m, "\tuser interrupts = %x [current %x]\n",
+			   ring->hangcheck.user_interrupts,
+			   ring->user_interrupts);
 		seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
 			   (long long)ring->hangcheck.acthd,
 			   (long long)acthd[i]);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 07bc2cdd6252..c0aeff607130 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1000,6 +1000,7 @@ static void notify_ring(struct intel_engine_cs *ring)
 		return;
 
 	trace_i915_gem_request_notify(ring);
+	ring->user_interrupts++;
 
 	wake_up_all(&ring->irq_queue);
 }
@@ -3097,6 +3098,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 	for_each_ring(ring, dev_priv, i) {
 		u64 acthd;
 		u32 seqno;
+		unsigned user_interrupts;
 		bool busy = true;
 
 		semaphore_clear_deadlocks(dev_priv);
@@ -3113,6 +3115,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 
 		acthd = intel_ring_get_active_head(ring);
 		seqno = ring->get_seqno(ring);
+		user_interrupts = READ_ONCE(ring->user_interrupts);
 
 		if (ring->hangcheck.seqno == seqno) {
 			if (ring_idle(ring, seqno)) {
@@ -3120,7 +3123,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 
 				if (waitqueue_active(&ring->irq_queue)) {
 					/* Issue a wake-up to catch stuck h/w. */
-					if (!test_and_set_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings)) {
+					if (ring->hangcheck.user_interrupts == user_interrupts &&
+					    !test_and_set_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings)) {
 						if (!(dev_priv->gpu_error.test_irq_rings & intel_ring_flag(ring)))
 							DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
 								  ring->name);
@@ -3183,10 +3187,14 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 
 			memset(ring->hangcheck.instdone, 0,
 			       sizeof(ring->hangcheck.instdone));
+
+			/* Reset stuck interrupts between batch advances */
+			user_interrupts = 0;
 		}
 
 		ring->hangcheck.seqno = seqno;
 		ring->hangcheck.acthd = acthd;
+		ring->hangcheck.user_interrupts = user_interrupts;
 		busy_count += busy;
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 4cea04491392..dfb14bfe5bc8 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -90,6 +90,7 @@ struct intel_ring_hangcheck {
 	u64 acthd;
 	u64 max_acthd;
 	u32 seqno;
+	unsigned user_interrupts;
 	int score;
 	enum intel_ring_hangcheck_action action;
 	int deadlock;
@@ -306,6 +307,7 @@ struct  intel_engine_cs {
 	 * inspecting request list.
 	 */
 	u32 last_submitted_seqno;
+	unsigned user_interrupts;
 
 	bool gpu_caches_dirty;
 
-- 
2.7.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2016-02-16 11:48 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-16 11:47 Missed interrupt false positives mitigation Chris Wilson
2016-02-16 11:47 ` [PATCH 1/5] drm/i915: Remove forcewake dance from seqno/irq barrier on legacy gen6+ Chris Wilson
2016-02-16 11:47 ` [PATCH 2/5] drm/i915: Separate out the seqno-barrier from engine->get_seqno Chris Wilson
2016-02-16 12:51   ` Mika Kuoppala
2016-02-16 11:47 ` Chris Wilson [this message]
2016-02-16 12:51   ` [PATCH 3/5] drm/i915: Harden detection of missed interrupts Mika Kuoppala
2016-02-16 11:47 ` [PATCH 4/5] drm/i915: Use simplest form for flushing the single cacheline in the HWS Chris Wilson
2016-02-16 12:58   ` Mika Kuoppala
2016-02-19 11:49     ` Chris Wilson
2016-02-19 13:42   ` Mika Kuoppala
2016-02-16 11:47 ` [PATCH 5/5] drm/i915: Replace manual barrier() with READ_ONCE() in HWS accessor Chris Wilson
2016-02-16 12:14 ` ✗ Fi.CI.BAT: warning for series starting with [1/5] drm/i915: Remove forcewake dance from seqno/irq barrier on legacy gen6+ Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1455623268-10023-4-git-send-email-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=mika.kuoppala@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).