All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/radeon: move ring locking out of reset path
@ 2012-06-29 14:45 Christian König
  2012-06-29 14:45 ` [PATCH 2/3] drm/radeon: add error handling to fence_wait_empty_locked Christian König
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Christian König @ 2012-06-29 14:45 UTC (permalink / raw)
  To: dri-devel

Hold the ring lock the whole time the reset is in progress,
otherwise another process can submit new jobs.

Signed-off-by: Christian König <deathsimple@vodafone.de>
---
 drivers/gpu/drm/radeon/evergreen.c     |    8 ++++----
 drivers/gpu/drm/radeon/ni.c            |    8 ++++----
 drivers/gpu/drm/radeon/r100.c          |    8 ++++----
 drivers/gpu/drm/radeon/r300.c          |    4 ++--
 drivers/gpu/drm/radeon/r420.c          |    8 ++++----
 drivers/gpu/drm/radeon/r600.c          |    8 ++++----
 drivers/gpu/drm/radeon/radeon_device.c |   14 ++++++++------
 drivers/gpu/drm/radeon/rv515.c         |    4 ++--
 drivers/gpu/drm/radeon/si.c            |   12 ++++++------
 9 files changed, 38 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index f716e08..09e6da8 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -1418,7 +1418,7 @@ static int evergreen_cp_start(struct radeon_device *rdev)
 	int r, i;
 	uint32_t cp_me;
 
-	r = radeon_ring_lock(rdev, ring, 7);
+	r = radeon_ring_alloc(rdev, ring, 7);
 	if (r) {
 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
 		return r;
@@ -1430,12 +1430,12 @@ static int evergreen_cp_start(struct radeon_device *rdev)
 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
 	radeon_ring_write(ring, 0);
 	radeon_ring_write(ring, 0);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_commit(rdev, ring);
 
 	cp_me = 0xff;
 	WREG32(CP_ME_CNTL, cp_me);
 
-	r = radeon_ring_lock(rdev, ring, evergreen_default_size + 19);
+	r = radeon_ring_alloc(rdev, ring, evergreen_default_size + 19);
 	if (r) {
 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
 		return r;
@@ -1473,7 +1473,7 @@ static int evergreen_cp_start(struct radeon_device *rdev)
 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
 	radeon_ring_write(ring, 0x00000010); /*  */
 
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_commit(rdev, ring);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 2366be3..7e516ce 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -918,7 +918,7 @@ static int cayman_cp_start(struct radeon_device *rdev)
 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	int r, i;
 
-	r = radeon_ring_lock(rdev, ring, 7);
+	r = radeon_ring_alloc(rdev, ring, 7);
 	if (r) {
 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
 		return r;
@@ -930,11 +930,11 @@ static int cayman_cp_start(struct radeon_device *rdev)
 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
 	radeon_ring_write(ring, 0);
 	radeon_ring_write(ring, 0);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_commit(rdev, ring);
 
 	cayman_cp_enable(rdev, true);
 
-	r = radeon_ring_lock(rdev, ring, cayman_default_size + 19);
+	r = radeon_ring_alloc(rdev, ring, cayman_default_size + 19);
 	if (r) {
 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
 		return r;
@@ -972,7 +972,7 @@ static int cayman_cp_start(struct radeon_device *rdev)
 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
 	radeon_ring_write(ring, 0x00000010); /*  */
 
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_commit(rdev, ring);
 
 	/* XXX init other rings */
 
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 35825bf..3901a68 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -955,7 +955,7 @@ void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	int r;
 
-	r = radeon_ring_lock(rdev, ring, 2);
+	r = radeon_ring_alloc(rdev, ring, 2);
 	if (r) {
 		return;
 	}
@@ -965,7 +965,7 @@ void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring)
 			  RADEON_ISYNC_ANY3D_IDLE2D |
 			  RADEON_ISYNC_WAIT_IDLEGUI |
 			  RADEON_ISYNC_CPSCRATCH_IDLEGUI);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_commit(rdev, ring);
 }
 
 
@@ -3631,7 +3631,7 @@ int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		return r;
 	}
 	WREG32(scratch, 0xCAFEDEAD);
-	r = radeon_ring_lock(rdev, ring, 2);
+	r = radeon_ring_alloc(rdev, ring, 2);
 	if (r) {
 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
 		radeon_scratch_free(rdev, scratch);
@@ -3639,7 +3639,7 @@ int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	}
 	radeon_ring_write(ring, PACKET0(scratch, 0));
 	radeon_ring_write(ring, 0xDEADBEEF);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_commit(rdev, ring);
 	for (i = 0; i < rdev->usec_timeout; i++) {
 		tmp = RREG32(scratch);
 		if (tmp == 0xDEADBEEF) {
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 97722a3..32d3510 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -229,7 +229,7 @@ void r300_ring_start(struct radeon_device *rdev, struct radeon_ring *ring)
 		break;
 	}
 
-	r = radeon_ring_lock(rdev, ring, 64);
+	r = radeon_ring_alloc(rdev, ring, 64);
 	if (r) {
 		return;
 	}
@@ -293,7 +293,7 @@ void r300_ring_start(struct radeon_device *rdev, struct radeon_ring *ring)
 	radeon_ring_write(ring,
 			  R300_GEOMETRY_ROUND_NEAREST |
 			  R300_COLOR_ROUND_NEAREST);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_commit(rdev, ring);
 }
 
 void r300_errata(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index 99137be..b9a4560 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -208,11 +208,11 @@ static void r420_cp_errata_init(struct radeon_device *rdev)
 	 * of the CP init, apparently.
 	 */
 	radeon_scratch_get(rdev, &rdev->config.r300.resync_scratch);
-	radeon_ring_lock(rdev, ring, 8);
+	radeon_ring_alloc(rdev, ring, 8);
 	radeon_ring_write(ring, PACKET0(R300_CP_RESYNC_ADDR, 1));
 	radeon_ring_write(ring, rdev->config.r300.resync_scratch);
 	radeon_ring_write(ring, 0xDEADBEEF);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_commit(rdev, ring);
 }
 
 static void r420_cp_errata_fini(struct radeon_device *rdev)
@@ -222,10 +222,10 @@ static void r420_cp_errata_fini(struct radeon_device *rdev)
 	/* Catch the RESYNC we dispatched all the way back,
 	 * at the very beginning of the CP init.
 	 */
-	radeon_ring_lock(rdev, ring, 8);
+	radeon_ring_alloc(rdev, ring, 8);
 	radeon_ring_write(ring, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
 	radeon_ring_write(ring, R300_RB3D_DC_FINISH);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_commit(rdev, ring);
 	radeon_scratch_free(rdev, rdev->config.r300.resync_scratch);
 }
 
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 43d0c41..fe60bf8 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2066,7 +2066,7 @@ int r600_cp_start(struct radeon_device *rdev)
 	int r;
 	uint32_t cp_me;
 
-	r = radeon_ring_lock(rdev, ring, 7);
+	r = radeon_ring_alloc(rdev, ring, 7);
 	if (r) {
 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
 		return r;
@@ -2083,7 +2083,7 @@ int r600_cp_start(struct radeon_device *rdev)
 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
 	radeon_ring_write(ring, 0);
 	radeon_ring_write(ring, 0);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_commit(rdev, ring);
 
 	cp_me = 0xff;
 	WREG32(R_0086D8_CP_ME_CNTL, cp_me);
@@ -2198,7 +2198,7 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		return r;
 	}
 	WREG32(scratch, 0xCAFEDEAD);
-	r = radeon_ring_lock(rdev, ring, 3);
+	r = radeon_ring_alloc(rdev, ring, 3);
 	if (r) {
 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ridx, r);
 		radeon_scratch_free(rdev, scratch);
@@ -2207,7 +2207,7 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
 	radeon_ring_write(ring, ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
 	radeon_ring_write(ring, 0xDEADBEEF);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_commit(rdev, ring);
 	for (i = 0; i < rdev->usec_timeout; i++) {
 		tmp = RREG32(scratch);
 		if (tmp == 0xDEADBEEF)
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index f654ba8..9df4130 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -991,26 +991,28 @@ int radeon_gpu_reset(struct radeon_device *rdev)
 	radeon_save_bios_scratch_regs(rdev);
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
-	radeon_suspend(rdev);
 
+	mutex_lock(&rdev->ring_lock);
+	radeon_suspend(rdev);
 	r = radeon_asic_reset(rdev);
 	if (!r) {
 		dev_info(rdev->dev, "GPU reset succeed\n");
 		radeon_resume(rdev);
-		radeon_restore_bios_scratch_regs(rdev);
-		drm_helper_resume_force_mode(rdev->ddev);
-		ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
 	}
+	mutex_unlock(&rdev->ring_lock);
+
+	/* no matter what restore video mode */
+	radeon_restore_bios_scratch_regs(rdev);
+	drm_helper_resume_force_mode(rdev->ddev);
+	ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
 
 	if (r) {
 		/* bad news, how to tell it to userspace ? */
 		dev_info(rdev->dev, "GPU reset failed\n");
 	}
-
 	return r;
 }
 
-
 /*
  * Debugfs
  */
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index 7f08ced..48441ce 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -57,7 +57,7 @@ void rv515_ring_start(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	int r;
 
-	r = radeon_ring_lock(rdev, ring, 64);
+	r = radeon_ring_alloc(rdev, ring, 64);
 	if (r) {
 		return;
 	}
@@ -118,7 +118,7 @@ void rv515_ring_start(struct radeon_device *rdev, struct radeon_ring *ring)
 	radeon_ring_write(ring, GEOMETRY_ROUND_NEAREST | COLOR_ROUND_NEAREST);
 	radeon_ring_write(ring, PACKET0(0x20C8, 0));
 	radeon_ring_write(ring, 0);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_commit(rdev, ring);
 }
 
 int rv515_mc_wait_for_idle(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 34603b3c8..e38a360 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -1851,7 +1851,7 @@ static int si_cp_start(struct radeon_device *rdev)
 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	int r, i;
 
-	r = radeon_ring_lock(rdev, ring, 7 + 4);
+	r = radeon_ring_alloc(rdev, ring, 7 + 4);
 	if (r) {
 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
 		return r;
@@ -1870,11 +1870,11 @@ static int si_cp_start(struct radeon_device *rdev)
 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
 	radeon_ring_write(ring, 0xc000);
 	radeon_ring_write(ring, 0xe000);
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_commit(rdev, ring);
 
 	si_cp_enable(rdev, true);
 
-	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
+	r = radeon_ring_alloc(rdev, ring, si_default_size + 10);
 	if (r) {
 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
 		return r;
@@ -1899,17 +1899,17 @@ static int si_cp_start(struct radeon_device *rdev)
 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
 
-	radeon_ring_unlock_commit(rdev, ring);
+	radeon_ring_commit(rdev, ring);
 
 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
 		ring = &rdev->ring[i];
-		r = radeon_ring_lock(rdev, ring, 2);
+		r = radeon_ring_alloc(rdev, ring, 2);
 
 		/* clear the compute context state */
 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
 		radeon_ring_write(ring, 0);
 
-		radeon_ring_unlock_commit(rdev, ring);
+		radeon_ring_commit(rdev, ring);
 	}
 
 	return 0;
-- 
1.7.9.5

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/3] drm/radeon: add error handling to fence_wait_empty_locked
  2012-06-29 14:45 [PATCH 1/3] drm/radeon: move ring locking out of reset path Christian König
@ 2012-06-29 14:45 ` Christian König
  2012-06-29 15:19   ` Michel Dänzer
  2012-06-29 14:45 ` [PATCH 3/3] drm/radeon: add error handling to radeon_vm_unbind_locked Christian König
  2012-06-29 15:09 ` [PATCH 1/3] drm/radeon: move ring locking out of reset path Michel Dänzer
  2 siblings, 1 reply; 12+ messages in thread
From: Christian König @ 2012-06-29 14:45 UTC (permalink / raw)
  To: dri-devel

Instead of returning the error handle it directly
and while at it fix the comments about the ring lock.

Signed-off-by: Christian König <deathsimple@vodafone.de>
---
 drivers/gpu/drm/radeon/radeon.h       |    2 +-
 drivers/gpu/drm/radeon/radeon_fence.c |   33 +++++++++++++++++++++------------
 2 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 77b4519b..5861ec8 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -239,7 +239,7 @@ void radeon_fence_process(struct radeon_device *rdev, int ring);
 bool radeon_fence_signaled(struct radeon_fence *fence);
 int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
 int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring);
-int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring);
+void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring);
 int radeon_fence_wait_any(struct radeon_device *rdev,
 			  struct radeon_fence **fences,
 			  bool intr);
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 7b55625..be4e4f3 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -440,14 +440,11 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
 	return 0;
 }
 
+/* caller must hold ring lock */
 int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
 {
 	uint64_t seq;
 
-	/* We are not protected by ring lock when reading current seq but
-	 * it's ok as worst case is we return to early while we could have
-	 * wait.
-	 */
 	seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
 	if (seq >= rdev->fence_drv[ring].sync_seq[ring]) {
 		/* nothing to wait for, last_seq is
@@ -457,15 +454,27 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
 	return radeon_fence_wait_seq(rdev, seq, ring, false, false);
 }
 
-int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
+/* caller must hold ring lock */
+void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
 {
-	/* We are not protected by ring lock when reading current seq
-	 * but it's ok as wait empty is call from place where no more
-	 * activity can be scheduled so there won't be concurrent access
-	 * to seq value.
-	 */
-	return radeon_fence_wait_seq(rdev, rdev->fence_drv[ring].sync_seq[ring],
-				     ring, false, false);
+	uint64_t seq = rdev->fence_drv[ring].sync_seq[ring];
+
+	while(1) {
+		int r;
+		r = radeon_fence_wait_seq(rdev, seq, ring, false, false);
+		if (r == -EDEADLK) {
+			mutex_unlock(&rdev->ring_lock);
+			r = radeon_gpu_reset(rdev);
+			mutex_lock(&rdev->ring_lock);
+			if (!r)
+				continue;
+		}
+		if (r) {
+			dev_err(rdev->dev, "error waiting for ring to become"
+				" idle (%d)\n", r);
+		}
+		return;
+	}
 }
 
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
-- 
1.7.9.5

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/3] drm/radeon: add error handling to radeon_vm_unbind_locked
  2012-06-29 14:45 [PATCH 1/3] drm/radeon: move ring locking out of reset path Christian König
  2012-06-29 14:45 ` [PATCH 2/3] drm/radeon: add error handling to fence_wait_empty_locked Christian König
@ 2012-06-29 14:45 ` Christian König
  2012-06-29 15:13   ` Michel Dänzer
  2012-06-29 15:09 ` [PATCH 1/3] drm/radeon: move ring locking out of reset path Michel Dänzer
  2 siblings, 1 reply; 12+ messages in thread
From: Christian König @ 2012-06-29 14:45 UTC (permalink / raw)
  To: dri-devel

Waiting for a fence can fail for different reasons,
the most common is a deadlock.

Signed-off-by: Christian König <deathsimple@vodafone.de>
---
 drivers/gpu/drm/radeon/radeon_gart.c |   17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 2b34c1a..ee11c50 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -316,10 +316,21 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev,
 	}
 
 	/* wait for vm use to end */
-	if (vm->fence) {
-		radeon_fence_wait(vm->fence, false);
-		radeon_fence_unref(&vm->fence);
+	while (vm->fence) {
+		int r;
+		r = radeon_fence_wait(vm->fence, false);
+		if (r)
+			DRM_ERROR("error while waiting for fence: %d\n", r);
+		if (r == -EDEADLK) {
+			mutex_unlock(&rdev->vm_manager.lock);
+			r = radeon_gpu_reset(rdev);
+			mutex_lock(&rdev->vm_manager.lock);
+			if (!r)
+				continue;
+		}
+		break;
 	}
+	radeon_fence_unref(&vm->fence);
 
 	/* hw unbind */
 	rdev->vm_manager.funcs->unbind(rdev, vm);
-- 
1.7.9.5

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] drm/radeon: move ring locking out of reset path
  2012-06-29 14:45 [PATCH 1/3] drm/radeon: move ring locking out of reset path Christian König
  2012-06-29 14:45 ` [PATCH 2/3] drm/radeon: add error handling to fence_wait_empty_locked Christian König
  2012-06-29 14:45 ` [PATCH 3/3] drm/radeon: add error handling to radeon_vm_unbind_locked Christian König
@ 2012-06-29 15:09 ` Michel Dänzer
  2012-06-29 15:18   ` Christian König
  2 siblings, 1 reply; 12+ messages in thread
From: Michel Dänzer @ 2012-06-29 15:09 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Fre, 2012-06-29 at 16:45 +0200, Christian König wrote: 
> Hold the ring lock the whole time the reset is in progress,
> otherwise another process can submit new jobs.

Sounds good, but doesn't this create other paths (e.g. initialization,
resume) where the ring is being accessed without holding the lock? Isn't
that a problem?


-- 
Earthling Michel Dänzer           |                   http://www.amd.com
Libre software enthusiast         |          Debian, X and DRI developer

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/3] drm/radeon: add error handling to radeon_vm_unbind_locked
  2012-06-29 14:45 ` [PATCH 3/3] drm/radeon: add error handling to radeon_vm_unbind_locked Christian König
@ 2012-06-29 15:13   ` Michel Dänzer
  0 siblings, 0 replies; 12+ messages in thread
From: Michel Dänzer @ 2012-06-29 15:13 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Fre, 2012-06-29 at 16:45 +0200, Christian König wrote: 
> Waiting for a fence can fail for different reasons,
> the most common is a deadlock.
> 
> Signed-off-by: Christian König <deathsimple@vodafone.de>

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>


-- 
Earthling Michel Dänzer           |                   http://www.amd.com
Libre software enthusiast         |          Debian, X and DRI developer

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] drm/radeon: move ring locking out of reset path
  2012-06-29 15:09 ` [PATCH 1/3] drm/radeon: move ring locking out of reset path Michel Dänzer
@ 2012-06-29 15:18   ` Christian König
  2012-06-29 16:15     ` Michel Dänzer
  0 siblings, 1 reply; 12+ messages in thread
From: Christian König @ 2012-06-29 15:18 UTC (permalink / raw)
  To: Michel Dänzer; +Cc: dri-devel

On 29.06.2012 17:09, Michel Dänzer wrote:
> On Fre, 2012-06-29 at 16:45 +0200, Christian König wrote:
>> Hold the ring lock the whole time the reset is in progress,
>> otherwise another process can submit new jobs.
> Sounds good, but doesn't this create other paths (e.g. initialization,
> resume) where the ring is being accessed without holding the lock? Isn't
> that a problem?

Thought about that also.

For init I'm pretty sure that no application can submit commands before 
we are done, otherwise we are doomed anyway.

For resume I'm not really sure, but I think that applications are 
resumed after the hardware driver had a chance of doing so.

Christian.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/3] drm/radeon: add error handling to fence_wait_empty_locked
  2012-06-29 14:45 ` [PATCH 2/3] drm/radeon: add error handling to fence_wait_empty_locked Christian König
@ 2012-06-29 15:19   ` Michel Dänzer
  0 siblings, 0 replies; 12+ messages in thread
From: Michel Dänzer @ 2012-06-29 15:19 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Fre, 2012-06-29 at 16:45 +0200, Christian König wrote: 
> Instead of returning the error handle it directly
> and while at it fix the comments about the ring lock.

Sounds like this should really be two separate changes?

> Signed-off-by: Christian König <deathsimple@vodafone.de>

Either way,

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>


-- 
Earthling Michel Dänzer           |                   http://www.amd.com
Libre software enthusiast         |          Debian, X and DRI developer

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] drm/radeon: move ring locking out of reset path
  2012-06-29 15:18   ` Christian König
@ 2012-06-29 16:15     ` Michel Dänzer
  2012-07-02 15:41       ` Jerome Glisse
  0 siblings, 1 reply; 12+ messages in thread
From: Michel Dänzer @ 2012-06-29 16:15 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Fre, 2012-06-29 at 17:18 +0200, Christian König wrote: 
> On 29.06.2012 17:09, Michel Dänzer wrote:
> > On Fre, 2012-06-29 at 16:45 +0200, Christian König wrote:
> >> Hold the ring lock the whole time the reset is in progress,
> >> otherwise another process can submit new jobs.
> > Sounds good, but doesn't this create other paths (e.g. initialization,
> > resume) where the ring is being accessed without holding the lock? Isn't
> > that a problem?
> 
> Thought about that also.
> 
> For init I'm pretty sure that no application can submit commands before 
> we are done, otherwise we are doomed anyway.
> 
> For resume I'm not really sure, but I think that applications are 
> resumed after the hardware driver had a chance of doing so.

I hope you're right... but if it's not too much trouble, it might be
better to be safe than sorry and take the lock for those paths as well.


-- 
Earthling Michel Dänzer           |                   http://www.amd.com
Libre software enthusiast         |          Debian, X and DRI developer

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] drm/radeon: move ring locking out of reset path
  2012-06-29 16:15     ` Michel Dänzer
@ 2012-07-02 15:41       ` Jerome Glisse
  2012-07-02 15:58         ` Christian König
  0 siblings, 1 reply; 12+ messages in thread
From: Jerome Glisse @ 2012-07-02 15:41 UTC (permalink / raw)
  To: Michel Dänzer; +Cc: dri-devel

On Fri, Jun 29, 2012 at 12:15 PM, Michel Dänzer <michel@daenzer.net> wrote:
> On Fre, 2012-06-29 at 17:18 +0200, Christian König wrote:
>> On 29.06.2012 17:09, Michel Dänzer wrote:
>> > On Fre, 2012-06-29 at 16:45 +0200, Christian König wrote:
>> >> Hold the ring lock the whole time the reset is in progress,
>> >> otherwise another process can submit new jobs.
>> > Sounds good, but doesn't this create other paths (e.g. initialization,
>> > resume) where the ring is being accessed without holding the lock? Isn't
>> > that a problem?
>>
>> Thought about that also.
>>
>> For init I'm pretty sure that no application can submit commands before
>> we are done, otherwise we are doomed anyway.
>>
>> For resume I'm not really sure, but I think that applications are
>> resumed after the hardware driver had a chance of doing so.
>
> I hope you're right... but if it's not too much trouble, it might be
> better to be safe than sorry and take the lock for those paths as well.
>
>

NAK this is the wrong way to solve the issue, we need a global lock on
all path that can trigger gpu activities. Previously it was the cs
mutex, but i haven't thought about it too much when it got removed. So
to fix the situation i am sending a patch with rw semaphore.

Cheers,
Jerome

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] drm/radeon: move ring locking out of reset path
  2012-07-02 15:41       ` Jerome Glisse
@ 2012-07-02 15:58         ` Christian König
  2012-07-02 16:01           ` Jerome Glisse
  2012-07-02 16:15           ` Jerome Glisse
  0 siblings, 2 replies; 12+ messages in thread
From: Christian König @ 2012-07-02 15:58 UTC (permalink / raw)
  To: Jerome Glisse; +Cc: Michel Dänzer, dri-devel

On 02.07.2012 17:41, Jerome Glisse wrote:
> On Fri, Jun 29, 2012 at 12:15 PM, Michel Dänzer <michel@daenzer.net> wrote:
>> On Fre, 2012-06-29 at 17:18 +0200, Christian König wrote:
>>> On 29.06.2012 17:09, Michel Dänzer wrote:
>>>> On Fre, 2012-06-29 at 16:45 +0200, Christian König wrote:
>>>>> Hold the ring lock the whole time the reset is in progress,
>>>>> otherwise another process can submit new jobs.
>>>> Sounds good, but doesn't this create other paths (e.g. initialization,
>>>> resume) where the ring is being accessed without holding the lock? Isn't
>>>> that a problem?
>>> Thought about that also.
>>>
>>> For init I'm pretty sure that no application can submit commands before
>>> we are done, otherwise we are doomed anyway.
>>>
>>> For resume I'm not really sure, but I think that applications are
>>> resumed after the hardware driver had a chance of doing so.
>> I hope you're right... but if it's not too much trouble, it might be
>> better to be safe than sorry and take the lock for those paths as well.
>>
>>
> NAK this is the wrong way to solve the issue, we need a global lock on
> all path that can trigger gpu activities. Previously it was the cs
> mutex, but i haven't thought about it too much when it got removed. So
> to fix the situation i am sending a patch with rw semaphore.
So what I'm missing? What else can trigger GPU activity when not the rings?

I'm currently working on ring-partial resets and also resets where you 
only skip over a single faulty IB instead of flushing the whole ring. 
And my current idea for that to work is that we hold the ring lock while 
we do suspend, ring_save, asic_reset, resume and ring_restore.

Christian.

>
> Cheers,
> Jerome
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] drm/radeon: move ring locking out of reset path
  2012-07-02 15:58         ` Christian König
@ 2012-07-02 16:01           ` Jerome Glisse
  2012-07-02 16:15           ` Jerome Glisse
  1 sibling, 0 replies; 12+ messages in thread
From: Jerome Glisse @ 2012-07-02 16:01 UTC (permalink / raw)
  To: Christian König; +Cc: Michel Dänzer, dri-devel

On Mon, Jul 2, 2012 at 11:58 AM, Christian König
<deathsimple@vodafone.de> wrote:
> On 02.07.2012 17:41, Jerome Glisse wrote:
>>
>> On Fri, Jun 29, 2012 at 12:15 PM, Michel Dänzer <michel@daenzer.net>
>> wrote:
>>>
>>> On Fre, 2012-06-29 at 17:18 +0200, Christian König wrote:
>>>>
>>>> On 29.06.2012 17:09, Michel Dänzer wrote:
>>>>>
>>>>> On Fre, 2012-06-29 at 16:45 +0200, Christian König wrote:
>>>>>>
>>>>>> Hold the ring lock the whole time the reset is in progress,
>>>>>> otherwise another process can submit new jobs.
>>>>>
>>>>> Sounds good, but doesn't this create other paths (e.g. initialization,
>>>>> resume) where the ring is being accessed without holding the lock?
>>>>> Isn't
>>>>> that a problem?
>>>>
>>>> Thought about that also.
>>>>
>>>> For init I'm pretty sure that no application can submit commands before
>>>> we are done, otherwise we are doomed anyway.
>>>>
>>>> For resume I'm not really sure, but I think that applications are
>>>> resumed after the hardware driver had a chance of doing so.
>>>
>>> I hope you're right... but if it's not too much trouble, it might be
>>> better to be safe than sorry and take the lock for those paths as well.
>>>
>>>
>> NAK this is the wrong way to solve the issue, we need a global lock on
>> all path that can trigger gpu activities. Previously it was the cs
>> mutex, but i haven't thought about it too much when it got removed. So
>> to fix the situation i am sending a patch with rw semaphore.
>
> So what I'm missing? What else can trigger GPU activity when not the rings?
>
> I'm currently working on ring-partial resets and also resets where you only
> skip over a single faulty IB instead of flushing the whole ring. And my
> current idea for that to work is that we hold the ring lock while we do
> suspend, ring_save, asic_reset, resume and ring_restore.
>
> Christian.
>

I just sent a patch, the idea is that you want gpu reset to be an
exclusive operation like gpu init, or gpu resume. So by taking rw
semaphore you allow the gpu reset to be exclusive and so you know
nobody can trigger gpu activies while still allowing concurrency in
case no gpu reset is on going.

Cheers,
Jerome

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/3] drm/radeon: move ring locking out of reset path
  2012-07-02 15:58         ` Christian König
  2012-07-02 16:01           ` Jerome Glisse
@ 2012-07-02 16:15           ` Jerome Glisse
  1 sibling, 0 replies; 12+ messages in thread
From: Jerome Glisse @ 2012-07-02 16:15 UTC (permalink / raw)
  To: Christian König; +Cc: Michel Dänzer, dri-devel

On Mon, Jul 2, 2012 at 11:58 AM, Christian König
<deathsimple@vodafone.de> wrote:
> On 02.07.2012 17:41, Jerome Glisse wrote:
>>
>> On Fri, Jun 29, 2012 at 12:15 PM, Michel Dänzer <michel@daenzer.net>
>> wrote:
>>>
>>> On Fre, 2012-06-29 at 17:18 +0200, Christian König wrote:
>>>>
>>>> On 29.06.2012 17:09, Michel Dänzer wrote:
>>>>>
>>>>> On Fre, 2012-06-29 at 16:45 +0200, Christian König wrote:
>>>>>>
>>>>>> Hold the ring lock the whole time the reset is in progress,
>>>>>> otherwise another process can submit new jobs.
>>>>>
>>>>> Sounds good, but doesn't this create other paths (e.g. initialization,
>>>>> resume) where the ring is being accessed without holding the lock?
>>>>> Isn't
>>>>> that a problem?
>>>>
>>>> Thought about that also.
>>>>
>>>> For init I'm pretty sure that no application can submit commands before
>>>> we are done, otherwise we are doomed anyway.
>>>>
>>>> For resume I'm not really sure, but I think that applications are
>>>> resumed after the hardware driver had a chance of doing so.
>>>
>>> I hope you're right... but if it's not too much trouble, it might be
>>> better to be safe than sorry and take the lock for those paths as well.
>>>
>>>
>> NAK this is the wrong way to solve the issue, we need a global lock on
>> all path that can trigger gpu activities. Previously it was the cs
>> mutex, but i haven't thought about it too much when it got removed. So
>> to fix the situation i am sending a patch with rw semaphore.
>
> So what I'm missing? What else can trigger GPU activity when not the rings?
>
> I'm currently working on ring-partial resets and also resets where you only
> skip over a single faulty IB instead of flushing the whole ring. And my
> current idea for that to work is that we hold the ring lock while we do
> suspend, ring_save, asic_reset, resume and ring_restore.
>
> Christian.
>

I should add that gpu_reset should be an heavy reset, and if you want
to only reset one ring and see if gpu can continue without heavy reset
then you should do it as a special ring reset that doesn't reset mc
and some other block but only the affected ring (and i am assuming
that hw behave properly here). If that light weight ring reset doesn't
work than let the heavy reset kicks in. So yes your light weight per
ring reset would only need to take the ring lock but now need to
change the ring lock usage we have now.

Cheers,
Jerome

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2012-07-02 16:15 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-06-29 14:45 [PATCH 1/3] drm/radeon: move ring locking out of reset path Christian König
2012-06-29 14:45 ` [PATCH 2/3] drm/radeon: add error handling to fence_wait_empty_locked Christian König
2012-06-29 15:19   ` Michel Dänzer
2012-06-29 14:45 ` [PATCH 3/3] drm/radeon: add error handling to radeon_vm_unbind_locked Christian König
2012-06-29 15:13   ` Michel Dänzer
2012-06-29 15:09 ` [PATCH 1/3] drm/radeon: move ring locking out of reset path Michel Dänzer
2012-06-29 15:18   ` Christian König
2012-06-29 16:15     ` Michel Dänzer
2012-07-02 15:41       ` Jerome Glisse
2012-07-02 15:58         ` Christian König
2012-07-02 16:01           ` Jerome Glisse
2012-07-02 16:15           ` Jerome Glisse

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.