[PATCH 1/2] drm/radeon: Make sure CS mutex is held across GPU reset.

* [PATCH 1/2] drm/radeon: Make sure CS mutex is held across GPU reset.
@ 2011-09-15 18:21 Michel Dänzer
  2011-09-15 18:21 ` [PATCH 2/2] drm/radeon: Hold the CS mutex across suspend/resume Michel Dänzer
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Michel Dänzer @ 2011-09-15 18:21 UTC (permalink / raw)
  To: dri-devel

From: Michel Dänzer <michel.daenzer@amd.com>

This was only the case if the GPU reset was triggered from the CS ioctl,
otherwise other processes could happily enter the CS ioctl and wreak havoc
during the GPU reset.

This is a little complicated because the GPU reset can be triggered from the
CS ioctl, in which case we're already holding the mutex, or from other call
paths, in which case we need to lock the mutex. AFAICT the mutex API doesn't
allow nested locking or finding out the mutex owner, so we need to handle this
with some helper functions.

Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
---
 drivers/gpu/drm/radeon/radeon.h        |   60 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/radeon/radeon_cs.c     |   14 ++++----
 drivers/gpu/drm/radeon/radeon_device.c |   16 ++++++++
 3 files changed, 83 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index ef0e0e0..89304d9 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1203,6 +1203,8 @@ struct radeon_device {
 	struct radeon_pm		pm;
 	uint32_t			bios_scratch[RADEON_BIOS_NUM_SCRATCH];
 	struct mutex			cs_mutex;
+	struct task_struct		*cs_mutex_owner;
+	struct mutex			cs_mutex_owner_mutex;
 	struct radeon_wb		wb;
 	struct radeon_dummy_page	dummy_page;
 	bool				gpu_lockup;
@@ -1241,6 +1243,64 @@ struct radeon_device {
 	struct radeon_i2c_chan *i2c_bus[RADEON_MAX_I2C_BUS];
 };
 
+
+/*
+ * CS mutex helpers
+ */
+
+static inline void cs_mutex_lock(struct radeon_device *rdev)
+{
+	mutex_lock(&rdev->cs_mutex);
+
+	mutex_lock(&rdev->cs_mutex_owner_mutex);
+	rdev->cs_mutex_owner = current;
+	mutex_unlock(&rdev->cs_mutex_owner_mutex);
+}
+
+static inline void cs_mutex_unlock(struct radeon_device *rdev)
+{
+	mutex_lock(&rdev->cs_mutex_owner_mutex);
+	rdev->cs_mutex_owner = NULL;
+	mutex_unlock(&rdev->cs_mutex_owner_mutex);
+
+	mutex_unlock(&rdev->cs_mutex);
+}
+
+/*
+ * Check if this process locked the CS mutex already; if it didn't, lock it.
+ *
+ * Returns:
+ * true: This function locked the mutex.
+ * false: This function didn't lock the mutex (this process already locked it
+ * before), so the caller probably shouldn't unlock it.
+ */
+static inline int cs_mutex_ensure_locked(struct radeon_device *rdev)
+{
+	int took_mutex;
+	int have_mutex;
+
+	mutex_lock(&rdev->cs_mutex_owner_mutex);
+	took_mutex = mutex_trylock(&rdev->cs_mutex);
+	if (took_mutex) {
+		/* The mutex was unlocked before, so it's ours now */
+		rdev->cs_mutex_owner = current;
+		have_mutex = true;
+	} else {
+		/* Somebody already locked the mutex. Was it this process? */
+		have_mutex = (rdev->cs_mutex_owner == current);
+	}
+	mutex_unlock(&rdev->cs_mutex_owner_mutex);
+
+	if (!have_mutex) {
+		/* Another process locked the mutex, take it */
+		cs_mutex_lock(rdev);
+		took_mutex = true;
+	}
+
+	return took_mutex;
+}
+
+
 int radeon_device_init(struct radeon_device *rdev,
 		       struct drm_device *ddev,
 		       struct pci_dev *pdev,
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index fae00c0..61e3063 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -222,7 +222,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	struct radeon_cs_chunk *ib_chunk;
 	int r;
 
-	mutex_lock(&rdev->cs_mutex);
+	cs_mutex_lock(rdev);
 	/* initialize parser */
 	memset(&parser, 0, sizeof(struct radeon_cs_parser));
 	parser.filp = filp;
@@ -233,14 +233,14 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	if (r) {
 		DRM_ERROR("Failed to initialize parser !\n");
 		radeon_cs_parser_fini(&parser, r);
-		mutex_unlock(&rdev->cs_mutex);
+		cs_mutex_unlock(rdev);
 		return r;
 	}
 	r =  radeon_ib_get(rdev, &parser.ib);
 	if (r) {
 		DRM_ERROR("Failed to get ib !\n");
 		radeon_cs_parser_fini(&parser, r);
-		mutex_unlock(&rdev->cs_mutex);
+		cs_mutex_unlock(rdev);
 		return r;
 	}
 	r = radeon_cs_parser_relocs(&parser);
@@ -248,7 +248,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		if (r != -ERESTARTSYS)
 			DRM_ERROR("Failed to parse relocation %d!\n", r);
 		radeon_cs_parser_fini(&parser, r);
-		mutex_unlock(&rdev->cs_mutex);
+		cs_mutex_unlock(rdev);
 		return r;
 	}
 	/* Copy the packet into the IB, the parser will read from the
@@ -260,14 +260,14 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	if (r || parser.parser_error) {
 		DRM_ERROR("Invalid command stream !\n");
 		radeon_cs_parser_fini(&parser, r);
-		mutex_unlock(&rdev->cs_mutex);
+		cs_mutex_unlock(rdev);
 		return r;
 	}
 	r = radeon_cs_finish_pages(&parser);
 	if (r) {
 		DRM_ERROR("Invalid command stream !\n");
 		radeon_cs_parser_fini(&parser, r);
-		mutex_unlock(&rdev->cs_mutex);
+		cs_mutex_unlock(rdev);
 		return r;
 	}
 	r = radeon_ib_schedule(rdev, parser.ib);
@@ -275,7 +275,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		DRM_ERROR("Failed to schedule IB !\n");
 	}
 	radeon_cs_parser_fini(&parser, r);
-	mutex_unlock(&rdev->cs_mutex);
+	cs_mutex_unlock(rdev);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 5f0fd85..5f3d02d 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -712,6 +712,8 @@ int radeon_device_init(struct radeon_device *rdev,
 
 	/* mutex initialization are all done here so we
 	 * can recall function without having locking issues */
+	mutex_init(&rdev->cs_mutex_owner_mutex);
+	rdev->cs_mutex_owner = NULL;
 	mutex_init(&rdev->cs_mutex);
 	mutex_init(&rdev->ib_pool.mutex);
 	mutex_init(&rdev->cp.mutex);
@@ -949,6 +951,16 @@ int radeon_gpu_reset(struct radeon_device *rdev)
 {
 	int r;
 	int resched;
+	int took_cs_mutex;
+
+	/* Prevent CS ioctl from interfering */
+	took_cs_mutex = cs_mutex_ensure_locked(rdev);
+	if (!rdev->gpu_lockup) {
+		DRM_DEBUG("GPU already reset by previous CS mutex holder\n");
+		if (took_cs_mutex)
+			cs_mutex_unlock(rdev);
+		return 0;
+	}
 
 	radeon_save_bios_scratch_regs(rdev);
 	/* block TTM */
@@ -962,8 +974,12 @@ int radeon_gpu_reset(struct radeon_device *rdev)
 		radeon_restore_bios_scratch_regs(rdev);
 		drm_helper_resume_force_mode(rdev->ddev);
 		ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
+		if (took_cs_mutex)
+			cs_mutex_unlock(rdev);
 		return 0;
 	}
+	if (took_cs_mutex)
+		cs_mutex_unlock(rdev);
 	/* bad news, how to tell it to userspace ? */
 	dev_info(rdev->dev, "GPU reset failed\n");
 	return r;
-- 
1.7.6.3

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 6+ messages in thread