* [PATCH v3 1/3] drm/radeon: take exclusive_lock in read mode during, ring tests, v3
@ 2014-08-19 12:22 Maarten Lankhorst
2014-08-19 12:27 ` [PATCH v3 2/3] drm/radeon: handle lockup in delayed work, v3 Maarten Lankhorst
` (2 more replies)
0 siblings, 3 replies; 6+ messages in thread
From: Maarten Lankhorst @ 2014-08-19 12:22 UTC (permalink / raw)
To: Christian König, Deucher, Alexander; +Cc: dri-devel@lists.freedesktop.org
This is needed for the next commit, because the lockup detection
will need the read lock to run.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
---
radeon_pm_compute_clocks already checks if dpm is enabled, so no need to check a second time.
Because of locking and waiting stuff the radeon_pm_compute_clocks and resume_force_mode calls
have to be done with read lock held.
Seems to survive on my radeon when catting /sys/kernel/debug/dri/0/radeon_gpu_reset although
uvd fails to reset, and that ring gets disabled as a result.
drivers/gpu/drm/radeon/radeon.h | 2 +-
drivers/gpu/drm/radeon/radeon_device.c | 57 +++++++++++++++++++--------------
drivers/gpu/drm/radeon/radeon_display.c | 4 ++-
3 files changed, 37 insertions(+), 26 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index b281886f6f51..9d97409c0443 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2315,7 +2315,7 @@ struct radeon_device {
bool need_dma32;
bool accel_working;
bool fastfb_working; /* IGP feature*/
- bool needs_reset;
+ bool needs_reset, in_reset;
struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
const struct firmware *me_fw; /* all family ME firmware */
const struct firmware *pfp_fw; /* r6/700 PFP firmware */
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 6a219bcee66d..124d8994e02a 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1671,6 +1671,7 @@ int radeon_gpu_reset(struct radeon_device *rdev)
down_write(&rdev->exclusive_lock);
if (!rdev->needs_reset) {
+ WARN_ON(rdev->in_reset);
up_write(&rdev->exclusive_lock);
return 0;
}
@@ -1683,17 +1684,21 @@ int radeon_gpu_reset(struct radeon_device *rdev)
radeon_suspend(rdev);
radeon_hpd_fini(rdev);
- for (i = 0; i < RADEON_NUM_RINGS; ++i) {
- ring_sizes[i] = radeon_ring_backup(rdev, &rdev->ring[i],
- &ring_data[i]);
- if (ring_sizes[i]) {
- saved = true;
- dev_info(rdev->dev, "Saved %d dwords of commands "
- "on ring %d.\n", ring_sizes[i], i);
+ if (!rdev->in_reset) {
+ rdev->in_reset = true;
+
+ for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+ ring_sizes[i] = radeon_ring_backup(rdev, &rdev->ring[i],
+ &ring_data[i]);
+ if (ring_sizes[i]) {
+ saved = true;
+ dev_info(rdev->dev, "Saved %d dwords of commands "
+ "on ring %d.\n", ring_sizes[i], i);
+ }
}
- }
+ } else
+ memset(ring_data, 0, sizeof(ring_data));
-retry:
r = radeon_asic_reset(rdev);
if (!r) {
dev_info(rdev->dev, "GPU reset succeeded, trying to resume\n");
@@ -1709,16 +1714,6 @@ retry:
ring_sizes[i] = 0;
ring_data[i] = NULL;
}
-
- r = radeon_ib_ring_tests(rdev);
- if (r) {
- dev_err(rdev->dev, "ib ring test failed (%d).\n", r);
- if (saved) {
- saved = false;
- radeon_suspend(rdev);
- goto retry;
- }
- }
} else {
radeon_fence_driver_force_completion(rdev);
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
@@ -1728,8 +1723,7 @@ retry:
if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
/* do dpm late init */
- r = radeon_pm_late_init(rdev);
- if (r) {
+ if (radeon_pm_late_init(rdev)) {
rdev->pm.dpm_enabled = false;
DRM_ERROR("radeon_pm_late_init failed, disabling dpm\n");
}
@@ -1753,19 +1747,34 @@ retry:
/* reset hpd state */
radeon_hpd_init(rdev);
+ ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
+ downgrade_write(&rdev->exclusive_lock);
+
drm_helper_resume_force_mode(rdev->ddev);
/* set the power state here in case we are a PX system or headless */
- if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled)
+ if ((rdev->pm.pm_method == PM_METHOD_DPM))
radeon_pm_compute_clocks(rdev);
- ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
+ if (!r) {
+ r = radeon_ib_ring_tests(rdev);
+ if (r) {
+ dev_err(rdev->dev, "ib ring test failed (%d).\n", r);
+ if (saved) {
+ rdev->needs_reset = true;
+ up_read(&rdev->exclusive_lock);
+ return -EAGAIN;
+ }
+ }
+ }
+
if (r) {
/* bad news, how to tell it to userspace ? */
dev_info(rdev->dev, "GPU reset failed\n");
}
- up_write(&rdev->exclusive_lock);
+ rdev->in_reset = false;
+ up_read(&rdev->exclusive_lock);
return r;
}
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 3fdf87318069..bd0d687379ee 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -405,7 +405,9 @@ static void radeon_flip_work_func(struct work_struct *__work)
r = radeon_fence_wait(work->fence, false);
if (r == -EDEADLK) {
up_read(&rdev->exclusive_lock);
- r = radeon_gpu_reset(rdev);
+ do {
+ r = radeon_gpu_reset(rdev);
+ } while (r == -EAGAIN);
down_read(&rdev->exclusive_lock);
}
if (r)
--
2.0.4
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v3 2/3] drm/radeon: handle lockup in delayed work, v3
2014-08-19 12:22 [PATCH v3 1/3] drm/radeon: take exclusive_lock in read mode during, ring tests, v3 Maarten Lankhorst
@ 2014-08-19 12:27 ` Maarten Lankhorst
2014-08-19 12:27 ` [PATCH v3 3/3] drm/radeon: add timeout argument to, radeon_fence_wait_seq Maarten Lankhorst
2014-08-19 13:06 ` [PATCH v3 1/3] drm/radeon: take exclusive_lock in read mode during, ring tests, v3 Christian König
2 siblings, 0 replies; 6+ messages in thread
From: Maarten Lankhorst @ 2014-08-19 12:27 UTC (permalink / raw)
To: Christian König, Deucher, Alexander; +Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
---
V1 had a nasty bug breaking gpu lockup recovery. The fix is not
allowing radeon_fence_driver_check_lockup to take exclusive_lock,
and kill it during lockup recovery instead.
V2 used delayed work that ran during lockup recovery, but required
read lock. I've fixed this by downgrading the write, and retrying if
recovery fails.
Current v3 uses queue_delayed_work instead of mod_delayed_work, because
of a livelock with ttm delayed destroy.
It also only enables the delayed work if kms irq's are enabled, and because of
that it looked better to move sw_irq_get/put in radeon_fence_wait_seq a little
to only be called once.
---
drivers/gpu/drm/radeon/radeon.h | 2 +
drivers/gpu/drm/radeon/radeon_fence.c | 162 ++++++++++++++++++++--------------
2 files changed, 100 insertions(+), 64 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 9d97409c0443..8774231631c5 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -349,6 +349,7 @@ extern void evergreen_tiling_fields(unsigned tiling_flags, unsigned *bankw,
* Fences.
*/
struct radeon_fence_driver {
+ struct radeon_device *rdev;
uint32_t scratch_reg;
uint64_t gpu_addr;
volatile uint32_t *cpu_addr;
@@ -356,6 +357,7 @@ struct radeon_fence_driver {
uint64_t sync_seq[RADEON_NUM_RINGS];
atomic64_t last_seq;
bool initialized;
+ struct delayed_work fence_check_work;
};
struct radeon_fence {
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 913787085dfa..5755abf81e01 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -97,6 +97,20 @@ static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
return seq;
}
+static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring)
+{
+ if (!atomic_read(&rdev->irq.ring_int[ring]))
+ return;
+
+ /*
+ * Do not reset the timer here with mod_delayed_work,
+ * this can livelock in an interaction with TTM delayed destroy.
+ */
+ queue_delayed_work(system_power_efficient_wq,
+ &rdev->fence_drv[ring].fence_check_work,
+ RADEON_FENCE_JIFFIES_TIMEOUT);
+}
+
/**
* radeon_fence_emit - emit a fence on the requested ring
*
@@ -122,19 +136,23 @@ int radeon_fence_emit(struct radeon_device *rdev,
(*fence)->ring = ring;
radeon_fence_ring_emit(rdev, ring, *fence);
trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
+ radeon_fence_schedule_check(rdev, ring);
return 0;
}
/**
- * radeon_fence_process - process a fence
+ * radeon_fence_process_nowake - process a fence without waking up the fence queue
*
* @rdev: radeon_device pointer
* @ring: ring index the fence is associated with
*
* Checks the current fence value and wakes the fence queue
* if the sequence number has increased (all asics).
+ *
+ * Returns true if activity occured on the ring, and the fence_queue should
+ * be waken up.
*/
-void radeon_fence_process(struct radeon_device *rdev, int ring)
+static bool radeon_fence_process_nowake(struct radeon_device *rdev, int ring)
{
uint64_t seq, last_seq, last_emitted;
unsigned count_loop = 0;
@@ -190,7 +208,51 @@ void radeon_fence_process(struct radeon_device *rdev, int ring)
}
} while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
- if (wake)
+ if (seq < last_emitted)
+ radeon_fence_schedule_check(rdev, ring);
+
+ return wake;
+}
+
+static void radeon_fence_driver_check_lockup(struct work_struct *work)
+{
+ struct radeon_fence_driver *fence_drv;
+ struct radeon_device *rdev;
+ unsigned long iring;
+
+ fence_drv = container_of(work, struct radeon_fence_driver, fence_check_work.work);
+ rdev = fence_drv->rdev;
+ iring = fence_drv - &rdev->fence_drv[0];
+
+ down_read(&rdev->exclusive_lock);
+ if (radeon_fence_process_nowake(rdev, iring))
+ wake_up_all(&rdev->fence_queue);
+ else if (radeon_ring_is_lockup(rdev, iring, &rdev->ring[iring])) {
+ /* good news we believe it's a lockup */
+ dev_warn(rdev->dev, "GPU lockup (current fence id "
+ "0x%016llx last fence id 0x%016llx on ring %ld)\n",
+ (uint64_t)atomic64_read(&fence_drv->last_seq),
+ fence_drv->sync_seq[iring], iring);
+
+ /* remember that we need an reset */
+ rdev->needs_reset = true;
+ wake_up_all(&rdev->fence_queue);
+ }
+ up_read(&rdev->exclusive_lock);
+}
+
+/**
+ * radeon_fence_process - process a fence
+ *
+ * @rdev: radeon_device pointer
+ * @ring: ring index the fence is associated with
+ *
+ * Checks the current fence value and wakes the fence queue
+ * if the sequence number has increased (all asics).
+ */
+void radeon_fence_process(struct radeon_device *rdev, int ring)
+{
+ if (radeon_fence_process_nowake(rdev, ring))
wake_up_all(&rdev->fence_queue);
}
@@ -302,84 +364,52 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,
{
uint64_t last_seq[RADEON_NUM_RINGS];
bool signaled;
- int i, r;
+ long r;
+ int i;
- while (!radeon_fence_any_seq_signaled(rdev, target_seq)) {
+ signaled = radeon_fence_any_seq_signaled(rdev, target_seq);
+ if (signaled)
+ return 0;
- /* Save current sequence values, used to check for GPU lockups */
- for (i = 0; i < RADEON_NUM_RINGS; ++i) {
- if (!target_seq[i])
- continue;
+ /* Save current sequence values, used to check for GPU lockups */
+ for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+ if (!target_seq[i])
+ continue;
- last_seq[i] = atomic64_read(&rdev->fence_drv[i].last_seq);
- trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
- radeon_irq_kms_sw_irq_get(rdev, i);
- }
+ last_seq[i] = atomic64_read(&rdev->fence_drv[i].last_seq);
+ trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
+ radeon_irq_kms_sw_irq_get(rdev, i);
+ }
+ while (!signaled) {
if (intr) {
r = wait_event_interruptible_timeout(rdev->fence_queue, (
(signaled = radeon_fence_any_seq_signaled(rdev, target_seq))
- || rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT);
+ || rdev->needs_reset), MAX_SCHEDULE_TIMEOUT);
} else {
r = wait_event_timeout(rdev->fence_queue, (
(signaled = radeon_fence_any_seq_signaled(rdev, target_seq))
- || rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT);
+ || rdev->needs_reset), MAX_SCHEDULE_TIMEOUT);
}
- for (i = 0; i < RADEON_NUM_RINGS; ++i) {
- if (!target_seq[i])
- continue;
+ if (r < 0)
+ break;
- radeon_irq_kms_sw_irq_put(rdev, i);
- trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
+ if (rdev->needs_reset) {
+ r = -EDEADLK;
+ break;
}
+ }
- if (unlikely(r < 0))
- return r;
+ for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+ if (!target_seq[i])
+ continue;
- if (unlikely(!signaled)) {
- if (rdev->needs_reset)
- return -EDEADLK;
-
- /* we were interrupted for some reason and fence
- * isn't signaled yet, resume waiting */
- if (r)
- continue;
-
- for (i = 0; i < RADEON_NUM_RINGS; ++i) {
- if (!target_seq[i])
- continue;
-
- if (last_seq[i] != atomic64_read(&rdev->fence_drv[i].last_seq))
- break;
- }
-
- if (i != RADEON_NUM_RINGS)
- continue;
-
- for (i = 0; i < RADEON_NUM_RINGS; ++i) {
- if (!target_seq[i])
- continue;
-
- if (radeon_ring_is_lockup(rdev, i, &rdev->ring[i]))
- break;
- }
-
- if (i < RADEON_NUM_RINGS) {
- /* good news we believe it's a lockup */
- dev_warn(rdev->dev, "GPU lockup (waiting for "
- "0x%016llx last fence id 0x%016llx on"
- " ring %d)\n",
- target_seq[i], last_seq[i], i);
-
- /* remember that we need an reset */
- rdev->needs_reset = true;
- wake_up_all(&rdev->fence_queue);
- return -EDEADLK;
- }
- }
+ radeon_irq_kms_sw_irq_put(rdev, i);
+ trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
}
- return 0;
+
+ return r < 0 ? r : 0;
}
/**
@@ -711,6 +741,9 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
rdev->fence_drv[ring].sync_seq[i] = 0;
atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
rdev->fence_drv[ring].initialized = false;
+ INIT_DELAYED_WORK(&rdev->fence_drv[ring].fence_check_work,
+ radeon_fence_driver_check_lockup);
+ rdev->fence_drv[ring].rdev = rdev;
}
/**
@@ -760,6 +793,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
/* no need to trigger GPU reset as we are unloading */
radeon_fence_driver_force_completion(rdev);
}
+ cancel_delayed_work_sync(&rdev->fence_drv[ring].fence_check_work);
wake_up_all(&rdev->fence_queue);
radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
rdev->fence_drv[ring].initialized = false;
--
2.0.4
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v3 3/3] drm/radeon: add timeout argument to, radeon_fence_wait_seq
2014-08-19 12:22 [PATCH v3 1/3] drm/radeon: take exclusive_lock in read mode during, ring tests, v3 Maarten Lankhorst
2014-08-19 12:27 ` [PATCH v3 2/3] drm/radeon: handle lockup in delayed work, v3 Maarten Lankhorst
@ 2014-08-19 12:27 ` Maarten Lankhorst
2014-08-19 13:06 ` [PATCH v3 1/3] drm/radeon: take exclusive_lock in read mode during, ring tests, v3 Christian König
2 siblings, 0 replies; 6+ messages in thread
From: Maarten Lankhorst @ 2014-08-19 12:27 UTC (permalink / raw)
To: Christian König, Deucher, Alexander; +Cc: dri-devel@lists.freedesktop.org
This makes it possible to wait for a specific amount of time,
rather than wait until infinity.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Reviewed-by: Christian König <deathsimple@vodafone.de>
---
drivers/gpu/drm/radeon/radeon_fence.c | 54 ++++++++++++++++++++---------------
1 file changed, 31 insertions(+), 23 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 5755abf81e01..ad9ae251cd9d 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -345,31 +345,34 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
}
/**
- * radeon_fence_wait_seq - wait for a specific sequence numbers
+ * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers
*
* @rdev: radeon device pointer
* @target_seq: sequence number(s) we want to wait for
* @intr: use interruptable sleep
+ * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
*
* Wait for the requested sequence number(s) to be written by any ring
* (all asics). Sequnce number array is indexed by ring id.
* @intr selects whether to use interruptable (true) or non-interruptable
* (false) sleep when waiting for the sequence number. Helper function
* for radeon_fence_wait_*().
- * Returns 0 if the sequence number has passed, error for all other cases.
+ * Returns remaining time if the sequence number has passed, 0 when
+ * the wait timeout, or an error for all other cases.
* -EDEADLK is returned when a GPU lockup has been detected.
*/
-static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,
- bool intr)
+static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
+ u64 *target_seq, bool intr,
+ long timeout)
{
uint64_t last_seq[RADEON_NUM_RINGS];
bool signaled;
- long r;
+ long r = timeout;
int i;
signaled = radeon_fence_any_seq_signaled(rdev, target_seq);
- if (signaled)
- return 0;
+ if (signaled || !r)
+ return r;
/* Save current sequence values, used to check for GPU lockups */
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
@@ -385,14 +388,14 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,
if (intr) {
r = wait_event_interruptible_timeout(rdev->fence_queue, (
(signaled = radeon_fence_any_seq_signaled(rdev, target_seq))
- || rdev->needs_reset), MAX_SCHEDULE_TIMEOUT);
+ || rdev->needs_reset), r);
} else {
r = wait_event_timeout(rdev->fence_queue, (
(signaled = radeon_fence_any_seq_signaled(rdev, target_seq))
- || rdev->needs_reset), MAX_SCHEDULE_TIMEOUT);
+ || rdev->needs_reset), r);
}
- if (r < 0)
+ if (r <= 0)
break;
if (rdev->needs_reset) {
@@ -409,14 +412,14 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,
trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
}
- return r < 0 ? r : 0;
+ return r;
}
/**
* radeon_fence_wait - wait for a fence to signal
*
* @fence: radeon fence object
- * @intr: use interruptable sleep
+ * @intr: use interruptible sleep
*
* Wait for the requested fence to signal (all asics).
* @intr selects whether to use interruptable (true) or non-interruptable
@@ -426,7 +429,7 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,
int radeon_fence_wait(struct radeon_fence *fence, bool intr)
{
uint64_t seq[RADEON_NUM_RINGS] = {};
- int r;
+ long r;
if (fence == NULL) {
WARN(1, "Querying an invalid fence : %p !\n", fence);
@@ -437,9 +440,10 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ)
return 0;
- r = radeon_fence_wait_seq(fence->rdev, seq, intr);
- if (r)
+ r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
+ if (r < 0) {
return r;
+ }
fence->seq = RADEON_FENCE_SIGNALED_SEQ;
return 0;
@@ -464,7 +468,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
{
uint64_t seq[RADEON_NUM_RINGS];
unsigned i, num_rings = 0;
- int r;
+ long r;
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
seq[i] = 0;
@@ -485,8 +489,8 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
if (num_rings == 0)
return -ENOENT;
- r = radeon_fence_wait_seq(rdev, seq, intr);
- if (r) {
+ r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
+ if (r < 0) {
return r;
}
return 0;
@@ -505,6 +509,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
{
uint64_t seq[RADEON_NUM_RINGS] = {};
+ long r;
seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) {
@@ -512,7 +517,10 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
already the last emited fence */
return -ENOENT;
}
- return radeon_fence_wait_seq(rdev, seq, false);
+ r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
+ if (r < 0)
+ return r;
+ return 0;
}
/**
@@ -528,18 +536,18 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
{
uint64_t seq[RADEON_NUM_RINGS] = {};
- int r;
+ long r;
seq[ring] = rdev->fence_drv[ring].sync_seq[ring];
if (!seq[ring])
return 0;
- r = radeon_fence_wait_seq(rdev, seq, false);
- if (r) {
+ r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
+ if (r < 0) {
if (r == -EDEADLK)
return -EDEADLK;
- dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%d)\n",
+ dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n",
ring, r);
}
return 0;
--
2.0.4
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH v3 1/3] drm/radeon: take exclusive_lock in read mode during, ring tests, v3
2014-08-19 12:22 [PATCH v3 1/3] drm/radeon: take exclusive_lock in read mode during, ring tests, v3 Maarten Lankhorst
2014-08-19 12:27 ` [PATCH v3 2/3] drm/radeon: handle lockup in delayed work, v3 Maarten Lankhorst
2014-08-19 12:27 ` [PATCH v3 3/3] drm/radeon: add timeout argument to, radeon_fence_wait_seq Maarten Lankhorst
@ 2014-08-19 13:06 ` Christian König
2014-08-19 13:57 ` Maarten Lankhorst
2014-08-20 13:20 ` Maarten Lankhorst
2 siblings, 2 replies; 6+ messages in thread
From: Christian König @ 2014-08-19 13:06 UTC (permalink / raw)
To: Maarten Lankhorst, Deucher, Alexander; +Cc: dri-devel@lists.freedesktop.org
Am 19.08.2014 um 14:22 schrieb Maarten Lankhorst:
> This is needed for the next commit, because the lockup detection
> will need the read lock to run.
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
> ---
> radeon_pm_compute_clocks already checks if dpm is enabled, so no need to check a second time.
>
> Because of locking and waiting stuff the radeon_pm_compute_clocks and resume_force_mode calls
> have to be done with read lock held.
>
> Seems to survive on my radeon when catting /sys/kernel/debug/dri/0/radeon_gpu_reset although
> uvd fails to reset, and that ring gets disabled as a result.
Depending on what hardware you have it's normal that UVD doesn't reset
properly. I still haven't figured out the correct sequence in which I
need to disable/enable the different UVD blocks on all hardware generations.
It seems to work fine on my Cayman, but doesn't for example on Turks
(which at least theoretically should have the same UVD block). It should
be fine as long as the engines gets properly disabled when the IB test
fails after an reset.
Another common source of reset instability is DPM, while it now seems to
be stable on NI and BTC I can't get a single reset to work once I use it.
Regarding the patch it looks good now, but I still want to test it a bit,
Christian.
>
> drivers/gpu/drm/radeon/radeon.h | 2 +-
> drivers/gpu/drm/radeon/radeon_device.c | 57 +++++++++++++++++++--------------
> drivers/gpu/drm/radeon/radeon_display.c | 4 ++-
> 3 files changed, 37 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index b281886f6f51..9d97409c0443 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -2315,7 +2315,7 @@ struct radeon_device {
> bool need_dma32;
> bool accel_working;
> bool fastfb_working; /* IGP feature*/
> - bool needs_reset;
> + bool needs_reset, in_reset;
> struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
> const struct firmware *me_fw; /* all family ME firmware */
> const struct firmware *pfp_fw; /* r6/700 PFP firmware */
> diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
> index 6a219bcee66d..124d8994e02a 100644
> --- a/drivers/gpu/drm/radeon/radeon_device.c
> +++ b/drivers/gpu/drm/radeon/radeon_device.c
> @@ -1671,6 +1671,7 @@ int radeon_gpu_reset(struct radeon_device *rdev)
> down_write(&rdev->exclusive_lock);
>
> if (!rdev->needs_reset) {
> + WARN_ON(rdev->in_reset);
> up_write(&rdev->exclusive_lock);
> return 0;
> }
> @@ -1683,17 +1684,21 @@ int radeon_gpu_reset(struct radeon_device *rdev)
> radeon_suspend(rdev);
> radeon_hpd_fini(rdev);
>
> - for (i = 0; i < RADEON_NUM_RINGS; ++i) {
> - ring_sizes[i] = radeon_ring_backup(rdev, &rdev->ring[i],
> - &ring_data[i]);
> - if (ring_sizes[i]) {
> - saved = true;
> - dev_info(rdev->dev, "Saved %d dwords of commands "
> - "on ring %d.\n", ring_sizes[i], i);
> + if (!rdev->in_reset) {
> + rdev->in_reset = true;
> +
> + for (i = 0; i < RADEON_NUM_RINGS; ++i) {
> + ring_sizes[i] = radeon_ring_backup(rdev, &rdev->ring[i],
> + &ring_data[i]);
> + if (ring_sizes[i]) {
> + saved = true;
> + dev_info(rdev->dev, "Saved %d dwords of commands "
> + "on ring %d.\n", ring_sizes[i], i);
> + }
> }
> - }
> + } else
> + memset(ring_data, 0, sizeof(ring_data));
>
> -retry:
> r = radeon_asic_reset(rdev);
> if (!r) {
> dev_info(rdev->dev, "GPU reset succeeded, trying to resume\n");
> @@ -1709,16 +1714,6 @@ retry:
> ring_sizes[i] = 0;
> ring_data[i] = NULL;
> }
> -
> - r = radeon_ib_ring_tests(rdev);
> - if (r) {
> - dev_err(rdev->dev, "ib ring test failed (%d).\n", r);
> - if (saved) {
> - saved = false;
> - radeon_suspend(rdev);
> - goto retry;
> - }
> - }
> } else {
> radeon_fence_driver_force_completion(rdev);
> for (i = 0; i < RADEON_NUM_RINGS; ++i) {
> @@ -1728,8 +1723,7 @@ retry:
>
> if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
> /* do dpm late init */
> - r = radeon_pm_late_init(rdev);
> - if (r) {
> + if (radeon_pm_late_init(rdev)) {
> rdev->pm.dpm_enabled = false;
> DRM_ERROR("radeon_pm_late_init failed, disabling dpm\n");
> }
> @@ -1753,19 +1747,34 @@ retry:
> /* reset hpd state */
> radeon_hpd_init(rdev);
>
> + ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
> + downgrade_write(&rdev->exclusive_lock);
> +
> drm_helper_resume_force_mode(rdev->ddev);
>
> /* set the power state here in case we are a PX system or headless */
> - if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled)
> + if ((rdev->pm.pm_method == PM_METHOD_DPM))
> radeon_pm_compute_clocks(rdev);
>
> - ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
> + if (!r) {
> + r = radeon_ib_ring_tests(rdev);
> + if (r) {
> + dev_err(rdev->dev, "ib ring test failed (%d).\n", r);
> + if (saved) {
> + rdev->needs_reset = true;
> + up_read(&rdev->exclusive_lock);
> + return -EAGAIN;
> + }
> + }
> + }
> +
> if (r) {
> /* bad news, how to tell it to userspace ? */
> dev_info(rdev->dev, "GPU reset failed\n");
> }
>
> - up_write(&rdev->exclusive_lock);
> + rdev->in_reset = false;
> + up_read(&rdev->exclusive_lock);
> return r;
> }
>
> diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
> index 3fdf87318069..bd0d687379ee 100644
> --- a/drivers/gpu/drm/radeon/radeon_display.c
> +++ b/drivers/gpu/drm/radeon/radeon_display.c
> @@ -405,7 +405,9 @@ static void radeon_flip_work_func(struct work_struct *__work)
> r = radeon_fence_wait(work->fence, false);
> if (r == -EDEADLK) {
> up_read(&rdev->exclusive_lock);
> - r = radeon_gpu_reset(rdev);
> + do {
> + r = radeon_gpu_reset(rdev);
> + } while (r == -EAGAIN);
> down_read(&rdev->exclusive_lock);
> }
> if (r)
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v3 1/3] drm/radeon: take exclusive_lock in read mode during, ring tests, v3
2014-08-19 13:06 ` [PATCH v3 1/3] drm/radeon: take exclusive_lock in read mode during, ring tests, v3 Christian König
@ 2014-08-19 13:57 ` Maarten Lankhorst
2014-08-20 13:20 ` Maarten Lankhorst
1 sibling, 0 replies; 6+ messages in thread
From: Maarten Lankhorst @ 2014-08-19 13:57 UTC (permalink / raw)
To: Christian König, Deucher, Alexander; +Cc: dri-devel@lists.freedesktop.org
Op 19-08-14 om 15:06 schreef Christian König:
> Am 19.08.2014 um 14:22 schrieb Maarten Lankhorst:
>> This is needed for the next commit, because the lockup detection
>> will need the read lock to run.
>>
>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
>> ---
>> radeon_pm_compute_clocks already checks if dpm is enabled, so no need to check a second time.
>>
>> Because of locking and waiting stuff the radeon_pm_compute_clocks and resume_force_mode calls
>> have to be done with read lock held.
>>
>> Seems to survive on my radeon when catting /sys/kernel/debug/dri/0/radeon_gpu_reset although
>> uvd fails to reset, and that ring gets disabled as a result.
>
> Depending on what hardware you have it's normal that UVD doesn't reset properly. I still haven't figured out the correct sequence in which I need to disable/enable the different UVD blocks on all hardware generations.
>
> It seems to work fine on my Cayman, but doesn't for example on Turks (which at least theoretically should have the same UVD block). It should be fine as long as the engines gets properly disabled when the IB test fails after an reset.
>
> Another common source of reset instability is DPM, while it now seems to be stable on NI and BTC I can't get a single reset to work once I use it.
Ah, in my testing I've hit both (on redwood). :-) But with DPM failures far less frequent than UVD failures, which trigger consistently.
Turks had some issues before, but my laptop completely fails early during boot with 3.17-rc1 so I can't test it.
~Maarten
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v3 1/3] drm/radeon: take exclusive_lock in read mode during, ring tests, v3
2014-08-19 13:06 ` [PATCH v3 1/3] drm/radeon: take exclusive_lock in read mode during, ring tests, v3 Christian König
2014-08-19 13:57 ` Maarten Lankhorst
@ 2014-08-20 13:20 ` Maarten Lankhorst
1 sibling, 0 replies; 6+ messages in thread
From: Maarten Lankhorst @ 2014-08-20 13:20 UTC (permalink / raw)
To: Christian König, Deucher, Alexander; +Cc: dri-devel@lists.freedesktop.org
[-- Attachment #1: Type: text/plain, Size: 10237 bytes --]
Hey,
I found another bug related to gpu reset, one that seems to trigger more reliable with the delayed work changes.
Op 19-08-14 om 15:06 schreef Christian König:
> Am 19.08.2014 um 14:22 schrieb Maarten Lankhorst:
>> This is needed for the next commit, because the lockup detection
>> will need the read lock to run.
>>
>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
>> ---
>> radeon_pm_compute_clocks already checks if dpm is enabled, so no need to check a second time.
>>
>> Because of locking and waiting stuff the radeon_pm_compute_clocks and resume_force_mode calls
>> have to be done with read lock held.
>>
>> Seems to survive on my radeon when catting /sys/kernel/debug/dri/0/radeon_gpu_reset although
>> uvd fails to reset, and that ring gets disabled as a result.
>
> Depending on what hardware you have it's normal that UVD doesn't reset properly. I still haven't figured out the correct sequence in which I need to disable/enable the different UVD blocks on all hardware generations.
>
> It seems to work fine on my Cayman, but doesn't for example on Turks (which at least theoretically should have the same UVD block). It should be fine as long as the engines gets properly disabled when the IB test fails after an reset.
>
> Another common source of reset instability is DPM, while it now seems to be stable on NI and BTC I can't get a single reset to work once I use it.
>
> Regarding the patch it looks good now, but I still want to test it a bit,
> Christian.
It seems that if UVD fails a second lockup recovery could in theory be
attempted because the uvd ring is locked up, and the delayed work doesn't
check if the ring is still ready or not.
The changes in "drm/radeon: handle lockup in delayed work, v3" expose
this bug reliably, because it forces hangups to always be checked.
This results in repeated lockup recovery occuring.
Does this alternate patch look better?
I've also attached a patch to this mail test for lockups on all rings, and if they have
any saved ring data the radeon_gpu_reset function will be retried.
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index b281886f6f51..eca6382f259f 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -370,7 +370,7 @@ struct radeon_fence {
int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
int radeon_fence_driver_init(struct radeon_device *rdev);
void radeon_fence_driver_fini(struct radeon_device *rdev);
-void radeon_fence_driver_force_completion(struct radeon_device *rdev);
+void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring);
int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring);
void radeon_fence_process(struct radeon_device *rdev, int ring);
bool radeon_fence_signaled(struct radeon_fence *fence);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 6a219bcee66d..e13e408832e5 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1395,9 +1395,7 @@ int radeon_device_init(struct radeon_device *rdev,
if (r)
return r;
- r = radeon_ib_ring_tests(rdev);
- if (r)
- DRM_ERROR("ib ring test failed (%d).\n", r);
+ radeon_ib_ring_tests(rdev);
r = radeon_gem_debugfs_init(rdev);
if (r) {
@@ -1486,7 +1484,6 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
struct drm_crtc *crtc;
struct drm_connector *connector;
int i, r;
- bool force_completion = false;
if (dev == NULL || dev->dev_private == NULL) {
return -ENODEV;
@@ -1530,12 +1527,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
r = radeon_fence_wait_empty(rdev, i);
if (r) {
/* delay GPU reset to resume */
- force_completion = true;
+ radeon_fence_driver_force_completion(rdev, i);
}
}
- if (force_completion) {
- radeon_fence_driver_force_completion(rdev);
- }
radeon_save_bios_scratch_regs(rdev);
@@ -1595,9 +1589,7 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
radeon_agp_resume(rdev);
radeon_resume(rdev);
- r = radeon_ib_ring_tests(rdev);
- if (r)
- DRM_ERROR("ib ring test failed (%d).\n", r);
+ radeon_ib_ring_tests(rdev);
if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
/* do dpm late init */
@@ -1663,13 +1655,10 @@ int radeon_gpu_reset(struct radeon_device *rdev)
unsigned ring_sizes[RADEON_NUM_RINGS];
uint32_t *ring_data[RADEON_NUM_RINGS];
- bool saved = false;
-
int i, r;
int resched;
down_write(&rdev->exclusive_lock);
-
if (!rdev->needs_reset) {
up_write(&rdev->exclusive_lock);
return 0;
@@ -1687,13 +1676,11 @@ int radeon_gpu_reset(struct radeon_device *rdev)
ring_sizes[i] = radeon_ring_backup(rdev, &rdev->ring[i],
&ring_data[i]);
if (ring_sizes[i]) {
- saved = true;
dev_info(rdev->dev, "Saved %d dwords of commands "
"on ring %d.\n", ring_sizes[i], i);
}
}
-retry:
r = radeon_asic_reset(rdev);
if (!r) {
dev_info(rdev->dev, "GPU reset succeeded, trying to resume\n");
@@ -1702,34 +1689,19 @@ retry:
radeon_restore_bios_scratch_regs(rdev);
- if (!r) {
- for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+ for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+ if (!r && ring_data[i]) {
radeon_ring_restore(rdev, &rdev->ring[i],
ring_sizes[i], ring_data[i]);
- ring_sizes[i] = 0;
- ring_data[i] = NULL;
- }
-
- r = radeon_ib_ring_tests(rdev);
- if (r) {
- dev_err(rdev->dev, "ib ring test failed (%d).\n", r);
- if (saved) {
- saved = false;
- radeon_suspend(rdev);
- goto retry;
- }
- }
- } else {
- radeon_fence_driver_force_completion(rdev);
- for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+ } else {
+ radeon_fence_driver_force_completion(rdev, i);
kfree(ring_data[i]);
}
}
if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
/* do dpm late init */
- r = radeon_pm_late_init(rdev);
- if (r) {
+ if (radeon_pm_late_init(rdev)) {
rdev->pm.dpm_enabled = false;
DRM_ERROR("radeon_pm_late_init failed, disabling dpm\n");
}
@@ -1753,19 +1725,27 @@ retry:
/* reset hpd state */
radeon_hpd_init(rdev);
+ ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
+ downgrade_write(&rdev->exclusive_lock);
+
drm_helper_resume_force_mode(rdev->ddev);
/* set the power state here in case we are a PX system or headless */
- if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled)
+ if ((rdev->pm.pm_method == PM_METHOD_DPM))
radeon_pm_compute_clocks(rdev);
- ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
- if (r) {
+ if (!r) {
+ r = radeon_ib_ring_tests(rdev);
+ if (r && ring_data[RADEON_RING_TYPE_GFX_INDEX])
+ r = -EAGAIN;
+ }
+
+ if (r && r != -EAGAIN) {
/* bad news, how to tell it to userspace ? */
dev_info(rdev->dev, "GPU reset failed\n");
}
- up_write(&rdev->exclusive_lock);
+ up_read(&rdev->exclusive_lock);
return r;
}
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 3fdf87318069..bd0d687379ee 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -405,7 +405,9 @@ static void radeon_flip_work_func(struct work_struct *__work)
r = radeon_fence_wait(work->fence, false);
if (r == -EDEADLK) {
up_read(&rdev->exclusive_lock);
- r = radeon_gpu_reset(rdev);
+ do {
+ r = radeon_gpu_reset(rdev);
+ } while (r == -EAGAIN);
down_read(&rdev->exclusive_lock);
}
if (r)
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 913787085dfa..f17dfea6465c 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -758,7 +758,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
r = radeon_fence_wait_empty(rdev, ring);
if (r) {
/* no need to trigger GPU reset as we are unloading */
- radeon_fence_driver_force_completion(rdev);
+ radeon_fence_driver_force_completion(rdev, ring);
}
wake_up_all(&rdev->fence_queue);
radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
@@ -771,19 +771,15 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
* radeon_fence_driver_force_completion - force all fence waiter to complete
*
* @rdev: radeon device pointer
+ * @ring: the ring to complete
*
* In case of GPU reset failure make sure no process keep waiting on fence
* that will never complete.
*/
-void radeon_fence_driver_force_completion(struct radeon_device *rdev)
+void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring)
{
- int ring;
-
- for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
- if (!rdev->fence_drv[ring].initialized)
- continue;
+ if (rdev->fence_drv[ring].initialized)
radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
- }
}
diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c
index 5bf2c0a05827..f2b2e2ee2f3f 100644
--- a/drivers/gpu/drm/radeon/radeon_ib.c
+++ b/drivers/gpu/drm/radeon/radeon_ib.c
@@ -271,6 +271,7 @@ int radeon_ib_ring_tests(struct radeon_device *rdev)
if (r) {
ring->ready = false;
rdev->needs_reset = false;
+ radeon_fence_driver_force_completion(rdev, i);
if (i == RADEON_RING_TYPE_GFX_INDEX) {
/* oh, oh, that's really bad */
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index d65607902537..3b4e69f26014 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -304,7 +304,7 @@ unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring
mutex_lock(&rdev->ring_lock);
*data = NULL;
- if (ring->ring_obj == NULL) {
+ if (ring->ring_obj == NULL || !ring->ready) {
mutex_unlock(&rdev->ring_lock);
return 0;
}
[-- Attachment #2: radeon-test-all-rings.diff --]
[-- Type: text/x-patch, Size: 1118 bytes --]
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index e13e408832e5..9253e1253780 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1735,8 +1735,38 @@ int radeon_gpu_reset(struct radeon_device *rdev)
radeon_pm_compute_clocks(rdev);
if (!r) {
- r = radeon_ib_ring_tests(rdev);
- if (r && ring_data[RADEON_RING_TYPE_GFX_INDEX])
+ bool retry = false;
+
+ for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+ struct radeon_ring *ring = &rdev->ring[i];
+
+ if (!ring->ready)
+ continue;
+
+ r = radeon_ib_test(rdev, i, ring);
+ if (!r)
+ continue;
+
+ ring->ready = false;
+ dev_err(rdev->dev, "failed testing IB on ring %d (%d)\n", i, r);
+
+ if (ring_data[i]) {
+ retry = true;
+ continue;
+ } else {
+ radeon_fence_driver_force_completion(rdev, i);
+ rdev->needs_reset = false;
+ r = 0;
+ }
+
+ if (i == RADEON_RING_TYPE_GFX_INDEX) {
+ dev_err(rdev->dev, "disabling acceleration\n");
+ rdev->accel_working = false;
+ break;
+ }
+ }
+
+ if (retry)
r = -EAGAIN;
}
[-- Attachment #3: Type: text/plain, Size: 159 bytes --]
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2014-08-20 13:20 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-08-19 12:22 [PATCH v3 1/3] drm/radeon: take exclusive_lock in read mode during, ring tests, v3 Maarten Lankhorst
2014-08-19 12:27 ` [PATCH v3 2/3] drm/radeon: handle lockup in delayed work, v3 Maarten Lankhorst
2014-08-19 12:27 ` [PATCH v3 3/3] drm/radeon: add timeout argument to, radeon_fence_wait_seq Maarten Lankhorst
2014-08-19 13:06 ` [PATCH v3 1/3] drm/radeon: take exclusive_lock in read mode during, ring tests, v3 Christian König
2014-08-19 13:57 ` Maarten Lankhorst
2014-08-20 13:20 ` Maarten Lankhorst
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.