All of lore.kernel.org
 help / color / mirror / Atom feed
From: Maarten Lankhorst <maarten.lankhorst@canonical.com>
To: "Christian König" <deathsimple@vodafone.de>,
	"Deucher, Alexander" <Alexander.Deucher@amd.com>
Cc: "dri-devel@lists.freedesktop.org" <dri-devel@lists.freedesktop.org>
Subject: [PATCH v3 2/3] drm/radeon: handle lockup in delayed work, v3
Date: Tue, 19 Aug 2014 14:27:18 +0200	[thread overview]
Message-ID: <53F342A6.7030803@canonical.com> (raw)
In-Reply-To: <53F341A3.6060902@canonical.com>

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
---
V1 had a nasty bug breaking gpu lockup recovery. The fix is not
allowing radeon_fence_driver_check_lockup to take exclusive_lock,
and kill it during lockup recovery instead.
V2 used delayed work that ran during lockup recovery, but required
read lock. I've fixed this by downgrading the write, and retrying if
recovery fails.

Current v3 uses queue_delayed_work instead of mod_delayed_work, because
of a livelock with ttm delayed destroy.
It also only enables the delayed work if kms irq's are enabled, and because of
that it looked better to move sw_irq_get/put in radeon_fence_wait_seq a little
to only be called once.

---
 drivers/gpu/drm/radeon/radeon.h       |   2 +
 drivers/gpu/drm/radeon/radeon_fence.c | 162 ++++++++++++++++++++--------------
 2 files changed, 100 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 9d97409c0443..8774231631c5 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -349,6 +349,7 @@ extern void evergreen_tiling_fields(unsigned tiling_flags, unsigned *bankw,
  * Fences.
  */
 struct radeon_fence_driver {
+	struct radeon_device		*rdev;
 	uint32_t			scratch_reg;
 	uint64_t			gpu_addr;
 	volatile uint32_t		*cpu_addr;
@@ -356,6 +357,7 @@ struct radeon_fence_driver {
 	uint64_t			sync_seq[RADEON_NUM_RINGS];
 	atomic64_t			last_seq;
 	bool				initialized;
+	struct delayed_work		fence_check_work;
 };
 
 struct radeon_fence {
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 913787085dfa..5755abf81e01 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -97,6 +97,20 @@ static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
 	return seq;
 }
 
+static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring)
+{
+	if (!atomic_read(&rdev->irq.ring_int[ring]))
+		return;
+
+	/*
+	 * Do not reset the timer here with mod_delayed_work,
+	 * this can livelock in an interaction with TTM delayed destroy.
+	 */
+	queue_delayed_work(system_power_efficient_wq,
+			   &rdev->fence_drv[ring].fence_check_work,
+			   RADEON_FENCE_JIFFIES_TIMEOUT);
+}
+
 /**
  * radeon_fence_emit - emit a fence on the requested ring
  *
@@ -122,19 +136,23 @@ int radeon_fence_emit(struct radeon_device *rdev,
 	(*fence)->ring = ring;
 	radeon_fence_ring_emit(rdev, ring, *fence);
 	trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
+	radeon_fence_schedule_check(rdev, ring);
 	return 0;
 }
 
 /**
- * radeon_fence_process - process a fence
+ * radeon_fence_process_nowake - process a fence without waking up the fence queue
  *
  * @rdev: radeon_device pointer
  * @ring: ring index the fence is associated with
  *
  * Checks the current fence value and wakes the fence queue
  * if the sequence number has increased (all asics).
+ *
+ * Returns true if activity occured on the ring, and the fence_queue should
+ * be waken up.
  */
-void radeon_fence_process(struct radeon_device *rdev, int ring)
+static bool radeon_fence_process_nowake(struct radeon_device *rdev, int ring)
 {
 	uint64_t seq, last_seq, last_emitted;
 	unsigned count_loop = 0;
@@ -190,7 +208,51 @@ void radeon_fence_process(struct radeon_device *rdev, int ring)
 		}
 	} while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
 
-	if (wake)
+	if (seq < last_emitted)
+		radeon_fence_schedule_check(rdev, ring);
+
+	return wake;
+}
+
+static void radeon_fence_driver_check_lockup(struct work_struct *work)
+{
+	struct radeon_fence_driver *fence_drv;
+	struct radeon_device *rdev;
+	unsigned long iring;
+
+	fence_drv = container_of(work, struct radeon_fence_driver, fence_check_work.work);
+	rdev = fence_drv->rdev;
+	iring = fence_drv - &rdev->fence_drv[0];
+
+	down_read(&rdev->exclusive_lock);
+	if (radeon_fence_process_nowake(rdev, iring))
+		wake_up_all(&rdev->fence_queue);
+	else if (radeon_ring_is_lockup(rdev, iring, &rdev->ring[iring])) {
+		/* good news we believe it's a lockup */
+		dev_warn(rdev->dev, "GPU lockup (current fence id "
+			 "0x%016llx last fence id 0x%016llx on ring %ld)\n",
+			 (uint64_t)atomic64_read(&fence_drv->last_seq),
+			 fence_drv->sync_seq[iring], iring);
+
+		/* remember that we need an reset */
+		rdev->needs_reset = true;
+		wake_up_all(&rdev->fence_queue);
+	}
+	up_read(&rdev->exclusive_lock);
+}
+
+/**
+ * radeon_fence_process - process a fence
+ *
+ * @rdev: radeon_device pointer
+ * @ring: ring index the fence is associated with
+ *
+ * Checks the current fence value and wakes the fence queue
+ * if the sequence number has increased (all asics).
+ */
+void radeon_fence_process(struct radeon_device *rdev, int ring)
+{
+	if (radeon_fence_process_nowake(rdev, ring))
 		wake_up_all(&rdev->fence_queue);
 }
 
@@ -302,84 +364,52 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,
 {
 	uint64_t last_seq[RADEON_NUM_RINGS];
 	bool signaled;
-	int i, r;
+	long r;
+	int i;
 
-	while (!radeon_fence_any_seq_signaled(rdev, target_seq)) {
+	signaled = radeon_fence_any_seq_signaled(rdev, target_seq);
+	if (signaled)
+		return 0;
 
-		/* Save current sequence values, used to check for GPU lockups */
-		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-			if (!target_seq[i])
-				continue;
+	/* Save current sequence values, used to check for GPU lockups */
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		if (!target_seq[i])
+			continue;
 
-			last_seq[i] = atomic64_read(&rdev->fence_drv[i].last_seq);
-			trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
-			radeon_irq_kms_sw_irq_get(rdev, i);
-		}
+		last_seq[i] = atomic64_read(&rdev->fence_drv[i].last_seq);
+		trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
+		radeon_irq_kms_sw_irq_get(rdev, i);
+	}
 
+	while (!signaled) {
 		if (intr) {
 			r = wait_event_interruptible_timeout(rdev->fence_queue, (
 				(signaled = radeon_fence_any_seq_signaled(rdev, target_seq))
-				 || rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT);
+				 || rdev->needs_reset), MAX_SCHEDULE_TIMEOUT);
 		} else {
 			r = wait_event_timeout(rdev->fence_queue, (
 				(signaled = radeon_fence_any_seq_signaled(rdev, target_seq))
-				 || rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT);
+				 || rdev->needs_reset), MAX_SCHEDULE_TIMEOUT);
 		}
 
-		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-			if (!target_seq[i])
-				continue;
+		if (r < 0)
+			break;
 
-			radeon_irq_kms_sw_irq_put(rdev, i);
-			trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
+		if (rdev->needs_reset) {
+			r = -EDEADLK;
+			break;
 		}
+	}
 
-		if (unlikely(r < 0))
-			return r;
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		if (!target_seq[i])
+			continue;
 
-		if (unlikely(!signaled)) {
-			if (rdev->needs_reset)
-				return -EDEADLK;
-
-			/* we were interrupted for some reason and fence
-			 * isn't signaled yet, resume waiting */
-			if (r)
-				continue;
-
-			for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-				if (!target_seq[i])
-					continue;
-
-				if (last_seq[i] != atomic64_read(&rdev->fence_drv[i].last_seq))
-					break;
-			}
-
-			if (i != RADEON_NUM_RINGS)
-				continue;
-
-			for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-				if (!target_seq[i])
-					continue;
-
-				if (radeon_ring_is_lockup(rdev, i, &rdev->ring[i]))
-					break;
-			}
-
-			if (i < RADEON_NUM_RINGS) {
-				/* good news we believe it's a lockup */
-				dev_warn(rdev->dev, "GPU lockup (waiting for "
-					 "0x%016llx last fence id 0x%016llx on"
-					 " ring %d)\n",
-					 target_seq[i], last_seq[i], i);
-
-				/* remember that we need an reset */
-				rdev->needs_reset = true;
-				wake_up_all(&rdev->fence_queue);
-				return -EDEADLK;
-			}
-		}
+		radeon_irq_kms_sw_irq_put(rdev, i);
+		trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
 	}
-	return 0;
+
+	return r < 0 ? r : 0;
 }
 
 /**
@@ -711,6 +741,9 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
 		rdev->fence_drv[ring].sync_seq[i] = 0;
 	atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
 	rdev->fence_drv[ring].initialized = false;
+	INIT_DELAYED_WORK(&rdev->fence_drv[ring].fence_check_work,
+			  radeon_fence_driver_check_lockup);
+	rdev->fence_drv[ring].rdev = rdev;
 }
 
 /**
@@ -760,6 +793,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
 			/* no need to trigger GPU reset as we are unloading */
 			radeon_fence_driver_force_completion(rdev);
 		}
+		cancel_delayed_work_sync(&rdev->fence_drv[ring].fence_check_work);
 		wake_up_all(&rdev->fence_queue);
 		radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
 		rdev->fence_drv[ring].initialized = false;
-- 
2.0.4

  reply	other threads:[~2014-08-19 12:27 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-08-19 12:22 [PATCH v3 1/3] drm/radeon: take exclusive_lock in read mode during, ring tests, v3 Maarten Lankhorst
2014-08-19 12:27 ` Maarten Lankhorst [this message]
2014-08-19 12:27 ` [PATCH v3 3/3] drm/radeon: add timeout argument to, radeon_fence_wait_seq Maarten Lankhorst
2014-08-19 13:06 ` [PATCH v3 1/3] drm/radeon: take exclusive_lock in read mode during, ring tests, v3 Christian König
2014-08-19 13:57   ` Maarten Lankhorst
2014-08-20 13:20   ` Maarten Lankhorst

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=53F342A6.7030803@canonical.com \
    --to=maarten.lankhorst@canonical.com \
    --cc=Alexander.Deucher@amd.com \
    --cc=deathsimple@vodafone.de \
    --cc=dri-devel@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.