All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/6] drm/msm: GPU debugging enhancements
@ 2017-10-24 13:22 Rob Clark
       [not found] ` <20171024132256.20286-1-robdclark-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2017-10-24 13:22   ` Rob Clark
  0 siblings, 2 replies; 13+ messages in thread
From: Rob Clark @ 2017-10-24 13:22 UTC (permalink / raw)
  To: dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: linux-arm-msm-u79uwXL29TY76Z2rM5mHXA, Jordan Crouse,
	freedreno-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Rob Clark

Something that I've been using locally for a while, and found quite
helpful for debugging gallium driver issues:

 1) Better messages about what process triggered a crash, which is
    in particular particularly useful for piglit
 2) Mechanism to dump just submits which triggered GPU hangs.

Eventually I want to extend #2 to also dump submits that trigger
faults, but this is going to require spiffing out IOMMU framework
somewhat.. in particular we can't dump from IRQ context, so a
workqueue is needed, but we want to keep the IOMMU stalled until
after we dump the submit.  Maybe we'll get something useable out
of the currently discussed SVM patches.

Rob Clark (6):
  drm/msm: show task cmdline in gpu recovery messages
  drm/msm: add special _get_vaddr_active() for cmdstream dumps
  drm/msm: split rd debugfs file
  drm/msm/rd: allow adding addition msg to top of dump
  drm/msm: preserve IOVAs in submit's bo table
  drm/msm: dump submits which triggered gpu hang

 drivers/gpu/drm/msm/msm_drv.h        |   7 ++-
 drivers/gpu/drm/msm/msm_gem.c        |  22 ++++++-
 drivers/gpu/drm/msm/msm_gem_submit.c |  11 ++--
 drivers/gpu/drm/msm/msm_gpu.c        |  90 +++++++++++++++++++--------
 drivers/gpu/drm/msm/msm_rd.c         | 116 ++++++++++++++++++++++++++---------
 5 files changed, 183 insertions(+), 63 deletions(-)

-- 
2.13.6

_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/6] drm/msm: show task cmdline in gpu recovery messages
  2017-10-24 13:22 [PATCH 0/6] drm/msm: GPU debugging enhancements Rob Clark
@ 2017-10-24 13:22     ` Rob Clark
  2017-10-24 13:22   ` Rob Clark
  1 sibling, 0 replies; 13+ messages in thread
From: Rob Clark @ 2017-10-24 13:22 UTC (permalink / raw)
  To: dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: David Airlie, linux-arm-msm-u79uwXL29TY76Z2rM5mHXA, Rob Clark,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Jordan Crouse,
	freedreno-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Now that freedreno gallium driver defaults to using submit_queue task
(render reordering), just showing task->comm is not so useful (ie. it is
always "flush_queue:0"), so also dump the cmdline.  This should also be
more useful for piglit/shader_runner.

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gpu.c | 54 +++++++++++++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index a05aa119f22b..d26a7282466e 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -235,6 +235,20 @@ static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
 	}
 }
 
+static struct msm_gem_submit *
+find_submit(struct msm_ringbuffer *ring, uint32_t fence)
+{
+	struct msm_gem_submit *submit;
+
+	WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex));
+
+	list_for_each_entry(submit, &ring->submits, node)
+		if (submit->seqno == fence)
+			return submit;
+
+	return NULL;
+}
+
 static void retire_submits(struct msm_gpu *gpu);
 
 static void recover_worker(struct work_struct *work)
@@ -268,19 +282,35 @@ static void recover_worker(struct work_struct *work)
 	dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name);
 	fence = cur_ring->memptrs->fence + 1;
 
-	list_for_each_entry(submit, &cur_ring->submits, node) {
-		if (submit->seqno == fence) {
-			struct task_struct *task;
-
-			rcu_read_lock();
-			task = pid_task(submit->pid, PIDTYPE_PID);
-			if (task) {
-				dev_err(dev->dev, "%s: offending task: %s\n",
-						gpu->name, task->comm);
-			}
-			rcu_read_unlock();
-			break;
+	submit = find_submit(cur_ring, fence);
+	if (submit) {
+		struct task_struct *task;
+
+		rcu_read_lock();
+		task = pid_task(submit->pid, PIDTYPE_PID);
+		if (task) {
+			char buf[256];
+			int len;
+
+			/*
+			 * so slightly annoying, in other paths like mmap'ing gem
+			 * buffers, mmap_sem is acquired before struct_mutex, which
+			 * means we can't hold struct_mutex across the call to
+			 * get_cmdline().  But we don't need 'submit' any further
+			 * so no issue if hypothetically it goes away when dropping
+			 * struct_mutex.  For good measure null out the submit ptr
+			 * to make this obvious.
+			 */
+			submit = NULL;
+			mutex_unlock(&dev->struct_mutex);
+			len = get_cmdline(task, buf, sizeof(buf));
+			mutex_lock(&dev->struct_mutex);
+
+			dev_err(dev->dev, "%s: offending task: %s (%-*s)\n",
+					gpu->name, task->comm, len, buf);
 		}
+		rcu_read_unlock();
+
 	}
 
 	if (msm_gpu_active(gpu)) {
-- 
2.13.6

_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 1/6] drm/msm: show task cmdline in gpu recovery messages
@ 2017-10-24 13:22     ` Rob Clark
  0 siblings, 0 replies; 13+ messages in thread
From: Rob Clark @ 2017-10-24 13:22 UTC (permalink / raw)
  To: dri-devel
  Cc: linux-arm-msm, freedreno, Jordan Crouse, Rob Clark, David Airlie,
	linux-kernel

Now that freedreno gallium driver defaults to using submit_queue task
(render reordering), just showing task->comm is not so useful (ie. it is
always "flush_queue:0"), so also dump the cmdline.  This should also be
more useful for piglit/shader_runner.

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gpu.c | 54 +++++++++++++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index a05aa119f22b..d26a7282466e 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -235,6 +235,20 @@ static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
 	}
 }
 
+static struct msm_gem_submit *
+find_submit(struct msm_ringbuffer *ring, uint32_t fence)
+{
+	struct msm_gem_submit *submit;
+
+	WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex));
+
+	list_for_each_entry(submit, &ring->submits, node)
+		if (submit->seqno == fence)
+			return submit;
+
+	return NULL;
+}
+
 static void retire_submits(struct msm_gpu *gpu);
 
 static void recover_worker(struct work_struct *work)
@@ -268,19 +282,35 @@ static void recover_worker(struct work_struct *work)
 	dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name);
 	fence = cur_ring->memptrs->fence + 1;
 
-	list_for_each_entry(submit, &cur_ring->submits, node) {
-		if (submit->seqno == fence) {
-			struct task_struct *task;
-
-			rcu_read_lock();
-			task = pid_task(submit->pid, PIDTYPE_PID);
-			if (task) {
-				dev_err(dev->dev, "%s: offending task: %s\n",
-						gpu->name, task->comm);
-			}
-			rcu_read_unlock();
-			break;
+	submit = find_submit(cur_ring, fence);
+	if (submit) {
+		struct task_struct *task;
+
+		rcu_read_lock();
+		task = pid_task(submit->pid, PIDTYPE_PID);
+		if (task) {
+			char buf[256];
+			int len;
+
+			/*
+			 * so slightly annoying, in other paths like mmap'ing gem
+			 * buffers, mmap_sem is acquired before struct_mutex, which
+			 * means we can't hold struct_mutex across the call to
+			 * get_cmdline().  But we don't need 'submit' any further
+			 * so no issue if hypothetically it goes away when dropping
+			 * struct_mutex.  For good measure null out the submit ptr
+			 * to make this obvious.
+			 */
+			submit = NULL;
+			mutex_unlock(&dev->struct_mutex);
+			len = get_cmdline(task, buf, sizeof(buf));
+			mutex_lock(&dev->struct_mutex);
+
+			dev_err(dev->dev, "%s: offending task: %s (%-*s)\n",
+					gpu->name, task->comm, len, buf);
 		}
+		rcu_read_unlock();
+
 	}
 
 	if (msm_gpu_active(gpu)) {
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/6] drm/msm: add special _get_vaddr_active() for cmdstream dumps
  2017-10-24 13:22 [PATCH 0/6] drm/msm: GPU debugging enhancements Rob Clark
@ 2017-10-24 13:22     ` Rob Clark
  2017-10-24 13:22   ` Rob Clark
  1 sibling, 0 replies; 13+ messages in thread
From: Rob Clark @ 2017-10-24 13:22 UTC (permalink / raw)
  To: dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: David Airlie, linux-arm-msm-u79uwXL29TY76Z2rM5mHXA, Rob Clark,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Jordan Crouse,
	freedreno-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Prep work for adding a debugfs file that dumps just submits which
trigger hangs/faults.  In this case the bo may already be in the
MADV_DONTNEED state, but will be still on the active list (since
the submit hasn't completed yet).  So the normal check that the
bo is in the WILLNEED state does not apply.  (But of course the bo
should definitely not be in the PURGED state!)

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_drv.h |  1 +
 drivers/gpu/drm/msm/msm_gem.c | 22 ++++++++++++++++++++--
 drivers/gpu/drm/msm/msm_rd.c  |  2 +-
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index c46dc12dbbcc..666fce66f9dd 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -213,6 +213,7 @@ struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev,
 int msm_gem_prime_pin(struct drm_gem_object *obj);
 void msm_gem_prime_unpin(struct drm_gem_object *obj);
 void *msm_gem_get_vaddr(struct drm_gem_object *obj);
+void *msm_gem_get_vaddr_active(struct drm_gem_object *obj);
 void msm_gem_put_vaddr(struct drm_gem_object *obj);
 int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv);
 int msm_gem_sync_object(struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index ea5bb0e1632c..81fe6d6740ce 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -470,14 +470,16 @@ int msm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
 	return ret;
 }
 
-void *msm_gem_get_vaddr(struct drm_gem_object *obj)
+static void *get_vaddr(struct drm_gem_object *obj, unsigned madv)
 {
 	struct msm_gem_object *msm_obj = to_msm_bo(obj);
 	int ret = 0;
 
 	mutex_lock(&msm_obj->lock);
 
-	if (WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED)) {
+	if (WARN_ON(msm_obj->madv > madv)) {
+		dev_err(obj->dev->dev, "Invalid madv state: %u vs %u\n",
+			msm_obj->madv, madv);
 		mutex_unlock(&msm_obj->lock);
 		return ERR_PTR(-EBUSY);
 	}
@@ -513,6 +515,22 @@ void *msm_gem_get_vaddr(struct drm_gem_object *obj)
 	return ERR_PTR(ret);
 }
 
+void *msm_gem_get_vaddr(struct drm_gem_object *obj)
+{
+	return get_vaddr(obj, MSM_MADV_WILLNEED);
+}
+
+/*
+ * Don't use this!  It is for the very special case of dumping
+ * submits from GPU hangs or faults, were the bo may already
+ * be MSM_MADV_DONTNEED, but we know the buffer is still on the
+ * active list.
+ */
+void *msm_gem_get_vaddr_active(struct drm_gem_object *obj)
+{
+	return get_vaddr(obj, __MSM_MADV_PURGED);
+}
+
 void msm_gem_put_vaddr(struct drm_gem_object *obj)
 {
 	struct msm_gem_object *msm_obj = to_msm_bo(obj);
diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index 35da0a07e567..981bb17c11a7 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -294,7 +294,7 @@ static void snapshot_buf(struct msm_rd_state *rd,
 	if (!(submit->bos[idx].flags & MSM_SUBMIT_BO_READ))
 		return;
 
-	buf = msm_gem_get_vaddr(&obj->base);
+	buf = msm_gem_get_vaddr_active(&obj->base);
 	if (IS_ERR(buf))
 		return;
 
-- 
2.13.6

_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/6] drm/msm: add special _get_vaddr_active() for cmdstream dumps
@ 2017-10-24 13:22     ` Rob Clark
  0 siblings, 0 replies; 13+ messages in thread
From: Rob Clark @ 2017-10-24 13:22 UTC (permalink / raw)
  To: dri-devel
  Cc: linux-arm-msm, freedreno, Jordan Crouse, Rob Clark, David Airlie,
	linux-kernel

Prep work for adding a debugfs file that dumps just submits which
trigger hangs/faults.  In this case the bo may already be in the
MADV_DONTNEED state, but will be still on the active list (since
the submit hasn't completed yet).  So the normal check that the
bo is in the WILLNEED state does not apply.  (But of course the bo
should definitely not be in the PURGED state!)

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_drv.h |  1 +
 drivers/gpu/drm/msm/msm_gem.c | 22 ++++++++++++++++++++--
 drivers/gpu/drm/msm/msm_rd.c  |  2 +-
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index c46dc12dbbcc..666fce66f9dd 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -213,6 +213,7 @@ struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev,
 int msm_gem_prime_pin(struct drm_gem_object *obj);
 void msm_gem_prime_unpin(struct drm_gem_object *obj);
 void *msm_gem_get_vaddr(struct drm_gem_object *obj);
+void *msm_gem_get_vaddr_active(struct drm_gem_object *obj);
 void msm_gem_put_vaddr(struct drm_gem_object *obj);
 int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv);
 int msm_gem_sync_object(struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index ea5bb0e1632c..81fe6d6740ce 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -470,14 +470,16 @@ int msm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
 	return ret;
 }
 
-void *msm_gem_get_vaddr(struct drm_gem_object *obj)
+static void *get_vaddr(struct drm_gem_object *obj, unsigned madv)
 {
 	struct msm_gem_object *msm_obj = to_msm_bo(obj);
 	int ret = 0;
 
 	mutex_lock(&msm_obj->lock);
 
-	if (WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED)) {
+	if (WARN_ON(msm_obj->madv > madv)) {
+		dev_err(obj->dev->dev, "Invalid madv state: %u vs %u\n",
+			msm_obj->madv, madv);
 		mutex_unlock(&msm_obj->lock);
 		return ERR_PTR(-EBUSY);
 	}
@@ -513,6 +515,22 @@ void *msm_gem_get_vaddr(struct drm_gem_object *obj)
 	return ERR_PTR(ret);
 }
 
+void *msm_gem_get_vaddr(struct drm_gem_object *obj)
+{
+	return get_vaddr(obj, MSM_MADV_WILLNEED);
+}
+
+/*
+ * Don't use this!  It is for the very special case of dumping
+ * submits from GPU hangs or faults, were the bo may already
+ * be MSM_MADV_DONTNEED, but we know the buffer is still on the
+ * active list.
+ */
+void *msm_gem_get_vaddr_active(struct drm_gem_object *obj)
+{
+	return get_vaddr(obj, __MSM_MADV_PURGED);
+}
+
 void msm_gem_put_vaddr(struct drm_gem_object *obj)
 {
 	struct msm_gem_object *msm_obj = to_msm_bo(obj);
diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index 35da0a07e567..981bb17c11a7 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -294,7 +294,7 @@ static void snapshot_buf(struct msm_rd_state *rd,
 	if (!(submit->bos[idx].flags & MSM_SUBMIT_BO_READ))
 		return;
 
-	buf = msm_gem_get_vaddr(&obj->base);
+	buf = msm_gem_get_vaddr_active(&obj->base);
 	if (IS_ERR(buf))
 		return;
 
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 3/6] drm/msm: split rd debugfs file
  2017-10-24 13:22 [PATCH 0/6] drm/msm: GPU debugging enhancements Rob Clark
@ 2017-10-24 13:22     ` Rob Clark
  2017-10-24 13:22   ` Rob Clark
  1 sibling, 0 replies; 13+ messages in thread
From: Rob Clark @ 2017-10-24 13:22 UTC (permalink / raw)
  To: dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: David Airlie, linux-arm-msm-u79uwXL29TY76Z2rM5mHXA, Rob Clark,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Jordan Crouse,
	freedreno-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Split into two instances, the existing $debugfs/rd which continues to
dump all submits, and $debugfs/hangrd which will be used to dump just
submits that cause gpu hangs (and eventually faults, but that will
require some iommu framework enhancements).

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_drv.h |   5 ++-
 drivers/gpu/drm/msm/msm_gpu.c |   2 +-
 drivers/gpu/drm/msm/msm_rd.c  | 101 +++++++++++++++++++++++++++++++-----------
 3 files changed, 79 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 666fce66f9dd..b01bd7fa9c2b 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -108,7 +108,8 @@ struct msm_drm_private {
 
 	struct drm_fb_helper *fbdev;
 
-	struct msm_rd_state *rd;
+	struct msm_rd_state *rd;       /* debugfs to dump all submits */
+	struct msm_rd_state *hangrd;   /* debugfs to dump hanging submits */
 	struct msm_perf_state *perf;
 
 	/* list of GEM objects: */
@@ -298,7 +299,7 @@ void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m);
 int msm_debugfs_late_init(struct drm_device *dev);
 int msm_rd_debugfs_init(struct drm_minor *minor);
 void msm_rd_debugfs_cleanup(struct msm_drm_private *priv);
-void msm_rd_dump_submit(struct msm_gem_submit *submit);
+void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit);
 int msm_perf_debugfs_init(struct drm_minor *minor);
 void msm_perf_debugfs_cleanup(struct msm_drm_private *priv);
 #else
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index d26a7282466e..5baeb34f098e 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -556,7 +556,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 
 	list_add_tail(&submit->node, &ring->submits);
 
-	msm_rd_dump_submit(submit);
+	msm_rd_dump_submit(priv->rd, submit);
 
 	update_sw_cntrs(gpu);
 
diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index 981bb17c11a7..d689407c809a 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -19,11 +19,17 @@
  *
  *   tail -f /sys/kernel/debug/dri/<minor>/rd > logfile.rd
  *
- * To log the cmdstream in a format that is understood by freedreno/cffdump
+ * to log the cmdstream in a format that is understood by freedreno/cffdump
  * utility.  By comparing the last successfully completed fence #, to the
  * cmdstream for the next fence, you can narrow down which process and submit
  * caused the gpu crash/lockup.
  *
+ * Additionally:
+ *
+ *   tail -f /sys/kernel/debug/dri/<minor>/hangrd > logfile.rd
+ *
+ * will capture just the cmdstream from submits which triggered a GPU hang.
+ *
  * This bypasses drm_debugfs_create_files() mainly because we need to use
  * our own fops for a bit more control.  In particular, we don't want to
  * do anything if userspace doesn't have the debugfs file open.
@@ -220,53 +226,89 @@ static const struct file_operations rd_debugfs_fops = {
 	.release = rd_release,
 };
 
-int msm_rd_debugfs_init(struct drm_minor *minor)
+
+static void rd_cleanup(struct msm_rd_state *rd)
+{
+	if (!rd)
+		return;
+
+	mutex_destroy(&rd->read_lock);
+	kfree(rd);
+}
+
+static struct msm_rd_state *rd_init(struct drm_minor *minor, const char *name)
 {
-	struct msm_drm_private *priv = minor->dev->dev_private;
 	struct msm_rd_state *rd;
 	struct dentry *ent;
-
-	/* only create on first minor: */
-	if (priv->rd)
-		return 0;
+	int ret = 0;
 
 	rd = kzalloc(sizeof(*rd), GFP_KERNEL);
 	if (!rd)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	rd->dev = minor->dev;
 	rd->fifo.buf = rd->buf;
 
 	mutex_init(&rd->read_lock);
-	priv->rd = rd;
 
 	init_waitqueue_head(&rd->fifo_event);
 
-	ent = debugfs_create_file("rd", S_IFREG | S_IRUGO,
+	ent = debugfs_create_file(name, S_IFREG | S_IRUGO,
 			minor->debugfs_root, rd, &rd_debugfs_fops);
 	if (!ent) {
-		DRM_ERROR("Cannot create /sys/kernel/debug/dri/%pd/rd\n",
-				minor->debugfs_root);
+		DRM_ERROR("Cannot create /sys/kernel/debug/dri/%pd/%s\n",
+				minor->debugfs_root, name);
+		ret = -ENOMEM;
 		goto fail;
 	}
 
+	return rd;
+
+fail:
+	rd_cleanup(rd);
+	return ERR_PTR(ret);
+}
+
+int msm_rd_debugfs_init(struct drm_minor *minor)
+{
+	struct msm_drm_private *priv = minor->dev->dev_private;
+	struct msm_rd_state *rd;
+	int ret;
+
+	/* only create on first minor: */
+	if (priv->rd)
+		return 0;
+
+	rd = rd_init(minor, "rd");
+	if (IS_ERR(rd)) {
+		ret = PTR_ERR(rd);
+		goto fail;
+	}
+
+	priv->rd = rd;
+
+	rd = rd_init(minor, "hangrd");
+	if (IS_ERR(rd)) {
+		ret = PTR_ERR(rd);
+		goto fail;
+	}
+
+	priv->hangrd = rd;
+
 	return 0;
 
 fail:
 	msm_rd_debugfs_cleanup(priv);
-	return -1;
+	return ret;
 }
 
 void msm_rd_debugfs_cleanup(struct msm_drm_private *priv)
 {
-	struct msm_rd_state *rd = priv->rd;
-
-	if (!rd)
-		return;
-
+	rd_cleanup(priv->rd);
 	priv->rd = NULL;
-	mutex_destroy(&rd->read_lock);
-	kfree(rd);
+
+	rd_cleanup(priv->hangrd);
+	priv->hangrd = NULL;
 }
 
 static void snapshot_buf(struct msm_rd_state *rd,
@@ -304,11 +346,10 @@ static void snapshot_buf(struct msm_rd_state *rd,
 }
 
 /* called under struct_mutex */
-void msm_rd_dump_submit(struct msm_gem_submit *submit)
+void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit)
 {
 	struct drm_device *dev = submit->dev;
-	struct msm_drm_private *priv = dev->dev_private;
-	struct msm_rd_state *rd = priv->rd;
+	struct task_struct *task;
 	char msg[128];
 	int i, n;
 
@@ -320,9 +361,17 @@ void msm_rd_dump_submit(struct msm_gem_submit *submit)
 	 */
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
-	n = snprintf(msg, sizeof(msg), "%.*s/%d: fence=%u",
-			TASK_COMM_LEN, current->comm, task_pid_nr(current),
-			submit->fence->seqno);
+	rcu_read_lock();
+	task = pid_task(submit->pid, PIDTYPE_PID);
+	if (task) {
+		n = snprintf(msg, sizeof(msg), "%.*s/%d: fence=%u",
+				TASK_COMM_LEN, task->comm,
+				pid_nr(submit->pid), submit->seqno);
+	} else {
+		n = snprintf(msg, sizeof(msg), "???/%d: fence=%u",
+				pid_nr(submit->pid), submit->seqno);
+	}
+	rcu_read_unlock();
 
 	rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4));
 
-- 
2.13.6

_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 3/6] drm/msm: split rd debugfs file
@ 2017-10-24 13:22     ` Rob Clark
  0 siblings, 0 replies; 13+ messages in thread
From: Rob Clark @ 2017-10-24 13:22 UTC (permalink / raw)
  To: dri-devel
  Cc: linux-arm-msm, freedreno, Jordan Crouse, Rob Clark, David Airlie,
	linux-kernel

Split into two instances, the existing $debugfs/rd which continues to
dump all submits, and $debugfs/hangrd which will be used to dump just
submits that cause gpu hangs (and eventually faults, but that will
require some iommu framework enhancements).

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_drv.h |   5 ++-
 drivers/gpu/drm/msm/msm_gpu.c |   2 +-
 drivers/gpu/drm/msm/msm_rd.c  | 101 +++++++++++++++++++++++++++++++-----------
 3 files changed, 79 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 666fce66f9dd..b01bd7fa9c2b 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -108,7 +108,8 @@ struct msm_drm_private {
 
 	struct drm_fb_helper *fbdev;
 
-	struct msm_rd_state *rd;
+	struct msm_rd_state *rd;       /* debugfs to dump all submits */
+	struct msm_rd_state *hangrd;   /* debugfs to dump hanging submits */
 	struct msm_perf_state *perf;
 
 	/* list of GEM objects: */
@@ -298,7 +299,7 @@ void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m);
 int msm_debugfs_late_init(struct drm_device *dev);
 int msm_rd_debugfs_init(struct drm_minor *minor);
 void msm_rd_debugfs_cleanup(struct msm_drm_private *priv);
-void msm_rd_dump_submit(struct msm_gem_submit *submit);
+void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit);
 int msm_perf_debugfs_init(struct drm_minor *minor);
 void msm_perf_debugfs_cleanup(struct msm_drm_private *priv);
 #else
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index d26a7282466e..5baeb34f098e 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -556,7 +556,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 
 	list_add_tail(&submit->node, &ring->submits);
 
-	msm_rd_dump_submit(submit);
+	msm_rd_dump_submit(priv->rd, submit);
 
 	update_sw_cntrs(gpu);
 
diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index 981bb17c11a7..d689407c809a 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -19,11 +19,17 @@
  *
  *   tail -f /sys/kernel/debug/dri/<minor>/rd > logfile.rd
  *
- * To log the cmdstream in a format that is understood by freedreno/cffdump
+ * to log the cmdstream in a format that is understood by freedreno/cffdump
  * utility.  By comparing the last successfully completed fence #, to the
  * cmdstream for the next fence, you can narrow down which process and submit
  * caused the gpu crash/lockup.
  *
+ * Additionally:
+ *
+ *   tail -f /sys/kernel/debug/dri/<minor>/hangrd > logfile.rd
+ *
+ * will capture just the cmdstream from submits which triggered a GPU hang.
+ *
  * This bypasses drm_debugfs_create_files() mainly because we need to use
  * our own fops for a bit more control.  In particular, we don't want to
  * do anything if userspace doesn't have the debugfs file open.
@@ -220,53 +226,89 @@ static const struct file_operations rd_debugfs_fops = {
 	.release = rd_release,
 };
 
-int msm_rd_debugfs_init(struct drm_minor *minor)
+
+static void rd_cleanup(struct msm_rd_state *rd)
+{
+	if (!rd)
+		return;
+
+	mutex_destroy(&rd->read_lock);
+	kfree(rd);
+}
+
+static struct msm_rd_state *rd_init(struct drm_minor *minor, const char *name)
 {
-	struct msm_drm_private *priv = minor->dev->dev_private;
 	struct msm_rd_state *rd;
 	struct dentry *ent;
-
-	/* only create on first minor: */
-	if (priv->rd)
-		return 0;
+	int ret = 0;
 
 	rd = kzalloc(sizeof(*rd), GFP_KERNEL);
 	if (!rd)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	rd->dev = minor->dev;
 	rd->fifo.buf = rd->buf;
 
 	mutex_init(&rd->read_lock);
-	priv->rd = rd;
 
 	init_waitqueue_head(&rd->fifo_event);
 
-	ent = debugfs_create_file("rd", S_IFREG | S_IRUGO,
+	ent = debugfs_create_file(name, S_IFREG | S_IRUGO,
 			minor->debugfs_root, rd, &rd_debugfs_fops);
 	if (!ent) {
-		DRM_ERROR("Cannot create /sys/kernel/debug/dri/%pd/rd\n",
-				minor->debugfs_root);
+		DRM_ERROR("Cannot create /sys/kernel/debug/dri/%pd/%s\n",
+				minor->debugfs_root, name);
+		ret = -ENOMEM;
 		goto fail;
 	}
 
+	return rd;
+
+fail:
+	rd_cleanup(rd);
+	return ERR_PTR(ret);
+}
+
+int msm_rd_debugfs_init(struct drm_minor *minor)
+{
+	struct msm_drm_private *priv = minor->dev->dev_private;
+	struct msm_rd_state *rd;
+	int ret;
+
+	/* only create on first minor: */
+	if (priv->rd)
+		return 0;
+
+	rd = rd_init(minor, "rd");
+	if (IS_ERR(rd)) {
+		ret = PTR_ERR(rd);
+		goto fail;
+	}
+
+	priv->rd = rd;
+
+	rd = rd_init(minor, "hangrd");
+	if (IS_ERR(rd)) {
+		ret = PTR_ERR(rd);
+		goto fail;
+	}
+
+	priv->hangrd = rd;
+
 	return 0;
 
 fail:
 	msm_rd_debugfs_cleanup(priv);
-	return -1;
+	return ret;
 }
 
 void msm_rd_debugfs_cleanup(struct msm_drm_private *priv)
 {
-	struct msm_rd_state *rd = priv->rd;
-
-	if (!rd)
-		return;
-
+	rd_cleanup(priv->rd);
 	priv->rd = NULL;
-	mutex_destroy(&rd->read_lock);
-	kfree(rd);
+
+	rd_cleanup(priv->hangrd);
+	priv->hangrd = NULL;
 }
 
 static void snapshot_buf(struct msm_rd_state *rd,
@@ -304,11 +346,10 @@ static void snapshot_buf(struct msm_rd_state *rd,
 }
 
 /* called under struct_mutex */
-void msm_rd_dump_submit(struct msm_gem_submit *submit)
+void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit)
 {
 	struct drm_device *dev = submit->dev;
-	struct msm_drm_private *priv = dev->dev_private;
-	struct msm_rd_state *rd = priv->rd;
+	struct task_struct *task;
 	char msg[128];
 	int i, n;
 
@@ -320,9 +361,17 @@ void msm_rd_dump_submit(struct msm_gem_submit *submit)
 	 */
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
-	n = snprintf(msg, sizeof(msg), "%.*s/%d: fence=%u",
-			TASK_COMM_LEN, current->comm, task_pid_nr(current),
-			submit->fence->seqno);
+	rcu_read_lock();
+	task = pid_task(submit->pid, PIDTYPE_PID);
+	if (task) {
+		n = snprintf(msg, sizeof(msg), "%.*s/%d: fence=%u",
+				TASK_COMM_LEN, task->comm,
+				pid_nr(submit->pid), submit->seqno);
+	} else {
+		n = snprintf(msg, sizeof(msg), "???/%d: fence=%u",
+				pid_nr(submit->pid), submit->seqno);
+	}
+	rcu_read_unlock();
 
 	rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4));
 
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 4/6] drm/msm/rd: allow adding addition msg to top of dump
  2017-10-24 13:22 [PATCH 0/6] drm/msm: GPU debugging enhancements Rob Clark
@ 2017-10-24 13:22     ` Rob Clark
  2017-10-24 13:22   ` Rob Clark
  1 sibling, 0 replies; 13+ messages in thread
From: Rob Clark @ 2017-10-24 13:22 UTC (permalink / raw)
  To: dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: David Airlie, linux-arm-msm-u79uwXL29TY76Z2rM5mHXA, Rob Clark,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Jordan Crouse,
	freedreno-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

For faults or hangs, it is nice to be able to include a bit more
information.

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_drv.h |  3 ++-
 drivers/gpu/drm/msm/msm_gpu.c |  2 +-
 drivers/gpu/drm/msm/msm_rd.c  | 15 +++++++++++++--
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index b01bd7fa9c2b..a007bbf4d94d 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -299,7 +299,8 @@ void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m);
 int msm_debugfs_late_init(struct drm_device *dev);
 int msm_rd_debugfs_init(struct drm_minor *minor);
 void msm_rd_debugfs_cleanup(struct msm_drm_private *priv);
-void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit);
+void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit,
+		const char *fmt, ...);
 int msm_perf_debugfs_init(struct drm_minor *minor);
 void msm_perf_debugfs_cleanup(struct msm_drm_private *priv);
 #else
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 5baeb34f098e..403baea19329 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -556,7 +556,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 
 	list_add_tail(&submit->node, &ring->submits);
 
-	msm_rd_dump_submit(priv->rd, submit);
+	msm_rd_dump_submit(priv->rd, submit, NULL);
 
 	update_sw_cntrs(gpu);
 
diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index d689407c809a..3aa8a8576abe 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -346,11 +346,12 @@ static void snapshot_buf(struct msm_rd_state *rd,
 }
 
 /* called under struct_mutex */
-void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit)
+void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit,
+		const char *fmt, ...)
 {
 	struct drm_device *dev = submit->dev;
 	struct task_struct *task;
-	char msg[128];
+	char msg[256];
 	int i, n;
 
 	if (!rd->open)
@@ -361,6 +362,16 @@ void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit)
 	 */
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
+	if (fmt) {
+		va_list args;
+
+		va_start(args, fmt);
+		n = vsnprintf(msg, sizeof(msg), fmt, args);
+		va_end(args);
+
+		rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4));
+	}
+
 	rcu_read_lock();
 	task = pid_task(submit->pid, PIDTYPE_PID);
 	if (task) {
-- 
2.13.6

_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 4/6] drm/msm/rd: allow adding addition msg to top of dump
@ 2017-10-24 13:22     ` Rob Clark
  0 siblings, 0 replies; 13+ messages in thread
From: Rob Clark @ 2017-10-24 13:22 UTC (permalink / raw)
  To: dri-devel
  Cc: linux-arm-msm, freedreno, Jordan Crouse, Rob Clark, David Airlie,
	linux-kernel

For faults or hangs, it is nice to be able to include a bit more
information.

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_drv.h |  3 ++-
 drivers/gpu/drm/msm/msm_gpu.c |  2 +-
 drivers/gpu/drm/msm/msm_rd.c  | 15 +++++++++++++--
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index b01bd7fa9c2b..a007bbf4d94d 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -299,7 +299,8 @@ void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m);
 int msm_debugfs_late_init(struct drm_device *dev);
 int msm_rd_debugfs_init(struct drm_minor *minor);
 void msm_rd_debugfs_cleanup(struct msm_drm_private *priv);
-void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit);
+void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit,
+		const char *fmt, ...);
 int msm_perf_debugfs_init(struct drm_minor *minor);
 void msm_perf_debugfs_cleanup(struct msm_drm_private *priv);
 #else
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 5baeb34f098e..403baea19329 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -556,7 +556,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 
 	list_add_tail(&submit->node, &ring->submits);
 
-	msm_rd_dump_submit(priv->rd, submit);
+	msm_rd_dump_submit(priv->rd, submit, NULL);
 
 	update_sw_cntrs(gpu);
 
diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index d689407c809a..3aa8a8576abe 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -346,11 +346,12 @@ static void snapshot_buf(struct msm_rd_state *rd,
 }
 
 /* called under struct_mutex */
-void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit)
+void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit,
+		const char *fmt, ...)
 {
 	struct drm_device *dev = submit->dev;
 	struct task_struct *task;
-	char msg[128];
+	char msg[256];
 	int i, n;
 
 	if (!rd->open)
@@ -361,6 +362,16 @@ void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit)
 	 */
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
+	if (fmt) {
+		va_list args;
+
+		va_start(args, fmt);
+		n = vsnprintf(msg, sizeof(msg), fmt, args);
+		va_end(args);
+
+		rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4));
+	}
+
 	rcu_read_lock();
 	task = pid_task(submit->pid, PIDTYPE_PID);
 	if (task) {
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 5/6] drm/msm: preserve IOVAs in submit's bo table
  2017-10-24 13:22 [PATCH 0/6] drm/msm: GPU debugging enhancements Rob Clark
@ 2017-10-24 13:22   ` Rob Clark
  2017-10-24 13:22   ` Rob Clark
  1 sibling, 0 replies; 13+ messages in thread
From: Rob Clark @ 2017-10-24 13:22 UTC (permalink / raw)
  To: dri-devel; +Cc: linux-arm-msm, linux-kernel, freedreno

We need this if we want to dump the submit after cleanup (ie. from hang
or fault).  But in the backoff/unpin case we want to clear them.  So add
a flag so we can skip clearing the IOVAs in at cleanup.

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gem_submit.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 69868682f8f4..b8dc8f96caf2 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -161,7 +161,8 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
 	return ret;
 }
 
-static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i)
+static void submit_unlock_unpin_bo(struct msm_gem_submit *submit,
+		int i, bool backoff)
 {
 	struct msm_gem_object *msm_obj = submit->bos[i].obj;
 
@@ -171,7 +172,7 @@ static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i)
 	if (submit->bos[i].flags & BO_LOCKED)
 		ww_mutex_unlock(&msm_obj->resv->lock);
 
-	if (!(submit->bos[i].flags & BO_VALID))
+	if (backoff && !(submit->bos[i].flags & BO_VALID))
 		submit->bos[i].iova = 0;
 
 	submit->bos[i].flags &= ~(BO_LOCKED | BO_PINNED);
@@ -206,10 +207,10 @@ static int submit_lock_objects(struct msm_gem_submit *submit)
 
 fail:
 	for (; i >= 0; i--)
-		submit_unlock_unpin_bo(submit, i);
+		submit_unlock_unpin_bo(submit, i, true);
 
 	if (slow_locked > 0)
-		submit_unlock_unpin_bo(submit, slow_locked);
+		submit_unlock_unpin_bo(submit, slow_locked, true);
 
 	if (ret == -EDEADLK) {
 		struct msm_gem_object *msm_obj = submit->bos[contended].obj;
@@ -393,7 +394,7 @@ static void submit_cleanup(struct msm_gem_submit *submit)
 
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct msm_gem_object *msm_obj = submit->bos[i].obj;
-		submit_unlock_unpin_bo(submit, i);
+		submit_unlock_unpin_bo(submit, i, false);
 		list_del_init(&msm_obj->submit_entry);
 		drm_gem_object_unreference(&msm_obj->base);
 	}
-- 
2.13.6

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 5/6] drm/msm: preserve IOVAs in submit's bo table
@ 2017-10-24 13:22   ` Rob Clark
  0 siblings, 0 replies; 13+ messages in thread
From: Rob Clark @ 2017-10-24 13:22 UTC (permalink / raw)
  To: dri-devel
  Cc: linux-arm-msm, freedreno, Jordan Crouse, Rob Clark, David Airlie,
	linux-kernel

We need this if we want to dump the submit after cleanup (ie. from hang
or fault).  But in the backoff/unpin case we want to clear them.  So add
a flag so we can skip clearing the IOVAs in at cleanup.

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gem_submit.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 69868682f8f4..b8dc8f96caf2 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -161,7 +161,8 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
 	return ret;
 }
 
-static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i)
+static void submit_unlock_unpin_bo(struct msm_gem_submit *submit,
+		int i, bool backoff)
 {
 	struct msm_gem_object *msm_obj = submit->bos[i].obj;
 
@@ -171,7 +172,7 @@ static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i)
 	if (submit->bos[i].flags & BO_LOCKED)
 		ww_mutex_unlock(&msm_obj->resv->lock);
 
-	if (!(submit->bos[i].flags & BO_VALID))
+	if (backoff && !(submit->bos[i].flags & BO_VALID))
 		submit->bos[i].iova = 0;
 
 	submit->bos[i].flags &= ~(BO_LOCKED | BO_PINNED);
@@ -206,10 +207,10 @@ static int submit_lock_objects(struct msm_gem_submit *submit)
 
 fail:
 	for (; i >= 0; i--)
-		submit_unlock_unpin_bo(submit, i);
+		submit_unlock_unpin_bo(submit, i, true);
 
 	if (slow_locked > 0)
-		submit_unlock_unpin_bo(submit, slow_locked);
+		submit_unlock_unpin_bo(submit, slow_locked, true);
 
 	if (ret == -EDEADLK) {
 		struct msm_gem_object *msm_obj = submit->bos[contended].obj;
@@ -393,7 +394,7 @@ static void submit_cleanup(struct msm_gem_submit *submit)
 
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct msm_gem_object *msm_obj = submit->bos[i].obj;
-		submit_unlock_unpin_bo(submit, i);
+		submit_unlock_unpin_bo(submit, i, false);
 		list_del_init(&msm_obj->submit_entry);
 		drm_gem_object_unreference(&msm_obj->base);
 	}
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 6/6] drm/msm: dump submits which triggered gpu hang
  2017-10-24 13:22 [PATCH 0/6] drm/msm: GPU debugging enhancements Rob Clark
@ 2017-10-24 13:22     ` Rob Clark
  2017-10-24 13:22   ` Rob Clark
  1 sibling, 0 replies; 13+ messages in thread
From: Rob Clark @ 2017-10-24 13:22 UTC (permalink / raw)
  To: dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: David Airlie, linux-arm-msm-u79uwXL29TY76Z2rM5mHXA, Rob Clark,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Jordan Crouse,
	freedreno-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Note we need to move update_fences() to after msm_rd_dump_submit(),
otherwise the bo's referenced by the submit may no longer be valid.

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gpu.c | 52 +++++++++++++++++++++++++------------------
 1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 403baea19329..939ea98908b8 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -255,34 +255,16 @@ static void recover_worker(struct work_struct *work)
 {
 	struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work);
 	struct drm_device *dev = gpu->dev;
+	struct msm_drm_private *priv = dev->dev_private;
 	struct msm_gem_submit *submit;
 	struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
-	uint64_t fence;
 	int i;
 
-	/* Update all the rings with the latest and greatest fence */
-	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
-		struct msm_ringbuffer *ring = gpu->rb[i];
-
-		fence = ring->memptrs->fence;
-
-		/*
-		 * For the current (faulting?) ring/submit advance the fence by
-		 * one more to clear the faulting submit
-		 */
-		if (ring == cur_ring)
-			fence = fence + 1;
-
-		update_fences(gpu, ring, fence);
-	}
-
 	mutex_lock(&dev->struct_mutex);
 
-
 	dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name);
-	fence = cur_ring->memptrs->fence + 1;
 
-	submit = find_submit(cur_ring, fence);
+	submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1);
 	if (submit) {
 		struct task_struct *task;
 
@@ -306,11 +288,37 @@ static void recover_worker(struct work_struct *work)
 			len = get_cmdline(task, buf, sizeof(buf));
 			mutex_lock(&dev->struct_mutex);
 
-			dev_err(dev->dev, "%s: offending task: %s (%-*s)\n",
-					gpu->name, task->comm, len, buf);
+			dev_err(dev->dev, "%s: offending task: %s (%.*s)\n",
+				gpu->name, task->comm, len, buf);
+
+			msm_rd_dump_submit(priv->hangrd, submit,
+				"offending task: %s (%.*s)", task->comm,
+				len, buf);
+		} else {
+			msm_rd_dump_submit(priv->hangrd, submit, NULL);
 		}
 		rcu_read_unlock();
+	}
+
+
+	/*
+	 * Update all the rings with the latest and greatest fence.. this
+	 * needs to happen after msm_rd_dump_submit() to ensure that the
+	 * bo's referenced by the offending submit are still around.
+	 */
+	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
+		struct msm_ringbuffer *ring = gpu->rb[i];
+
+		uint32_t fence = ring->memptrs->fence;
 
+		/*
+		 * For the current (faulting?) ring/submit advance the fence by
+		 * one more to clear the faulting submit
+		 */
+		if (ring == cur_ring)
+			fence++;
+
+		update_fences(gpu, ring, fence);
 	}
 
 	if (msm_gpu_active(gpu)) {
-- 
2.13.6

_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 6/6] drm/msm: dump submits which triggered gpu hang
@ 2017-10-24 13:22     ` Rob Clark
  0 siblings, 0 replies; 13+ messages in thread
From: Rob Clark @ 2017-10-24 13:22 UTC (permalink / raw)
  To: dri-devel
  Cc: linux-arm-msm, freedreno, Jordan Crouse, Rob Clark, David Airlie,
	linux-kernel

Note we need to move update_fences() to after msm_rd_dump_submit(),
otherwise the bo's referenced by the submit may no longer be valid.

Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gpu.c | 52 +++++++++++++++++++++++++------------------
 1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 403baea19329..939ea98908b8 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -255,34 +255,16 @@ static void recover_worker(struct work_struct *work)
 {
 	struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work);
 	struct drm_device *dev = gpu->dev;
+	struct msm_drm_private *priv = dev->dev_private;
 	struct msm_gem_submit *submit;
 	struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
-	uint64_t fence;
 	int i;
 
-	/* Update all the rings with the latest and greatest fence */
-	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
-		struct msm_ringbuffer *ring = gpu->rb[i];
-
-		fence = ring->memptrs->fence;
-
-		/*
-		 * For the current (faulting?) ring/submit advance the fence by
-		 * one more to clear the faulting submit
-		 */
-		if (ring == cur_ring)
-			fence = fence + 1;
-
-		update_fences(gpu, ring, fence);
-	}
-
 	mutex_lock(&dev->struct_mutex);
 
-
 	dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name);
-	fence = cur_ring->memptrs->fence + 1;
 
-	submit = find_submit(cur_ring, fence);
+	submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1);
 	if (submit) {
 		struct task_struct *task;
 
@@ -306,11 +288,37 @@ static void recover_worker(struct work_struct *work)
 			len = get_cmdline(task, buf, sizeof(buf));
 			mutex_lock(&dev->struct_mutex);
 
-			dev_err(dev->dev, "%s: offending task: %s (%-*s)\n",
-					gpu->name, task->comm, len, buf);
+			dev_err(dev->dev, "%s: offending task: %s (%.*s)\n",
+				gpu->name, task->comm, len, buf);
+
+			msm_rd_dump_submit(priv->hangrd, submit,
+				"offending task: %s (%.*s)", task->comm,
+				len, buf);
+		} else {
+			msm_rd_dump_submit(priv->hangrd, submit, NULL);
 		}
 		rcu_read_unlock();
+	}
+
+
+	/*
+	 * Update all the rings with the latest and greatest fence.. this
+	 * needs to happen after msm_rd_dump_submit() to ensure that the
+	 * bo's referenced by the offending submit are still around.
+	 */
+	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
+		struct msm_ringbuffer *ring = gpu->rb[i];
+
+		uint32_t fence = ring->memptrs->fence;
 
+		/*
+		 * For the current (faulting?) ring/submit advance the fence by
+		 * one more to clear the faulting submit
+		 */
+		if (ring == cur_ring)
+			fence++;
+
+		update_fences(gpu, ring, fence);
 	}
 
 	if (msm_gpu_active(gpu)) {
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2017-10-24 13:24 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-10-24 13:22 [PATCH 0/6] drm/msm: GPU debugging enhancements Rob Clark
     [not found] ` <20171024132256.20286-1-robdclark-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-10-24 13:22   ` [PATCH 1/6] drm/msm: show task cmdline in gpu recovery messages Rob Clark
2017-10-24 13:22     ` Rob Clark
2017-10-24 13:22   ` [PATCH 2/6] drm/msm: add special _get_vaddr_active() for cmdstream dumps Rob Clark
2017-10-24 13:22     ` Rob Clark
2017-10-24 13:22   ` [PATCH 3/6] drm/msm: split rd debugfs file Rob Clark
2017-10-24 13:22     ` Rob Clark
2017-10-24 13:22   ` [PATCH 4/6] drm/msm/rd: allow adding addition msg to top of dump Rob Clark
2017-10-24 13:22     ` Rob Clark
2017-10-24 13:22   ` [PATCH 6/6] drm/msm: dump submits which triggered gpu hang Rob Clark
2017-10-24 13:22     ` Rob Clark
2017-10-24 13:22 ` [PATCH 5/6] drm/msm: preserve IOVAs in submit's bo table Rob Clark
2017-10-24 13:22   ` Rob Clark

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.