* [PATCH 1/2] drm/i915: Convert hangcheck from a timer into a delayed work item
@ 2015-01-23 12:44 Mika Kuoppala
2015-01-23 12:44 ` [PATCH 2/2] drm/i915: Display current hangcheck status in debugfs Mika Kuoppala
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: Mika Kuoppala @ 2015-01-23 12:44 UTC (permalink / raw)
To: intel-gfx; +Cc: Jani Nikula, Daniel Vetter, miku
From: Chris Wilson <chris@chris-wilson.co.uk>
When run as a timer, i915_hangcheck_elapsed() must adhere to all the
rules of running in a softirq context. This is advantageous to us as we
want to minimise the risk that a driver bug will prevent us from
detecting a hung GPU. However, that is irrelevant if the driver bug
prevents us from resetting and recovering. Still it is prudent not to
rely on mutexes inside the checker, but given the coarseness of
dev->struct_mutex doing so is extremely hard.
Give in and run from a work queue, i.e. outside of softirq.
v2:
The conversion does have one significant change, from the use of
mod_timer to schedule_delayed_work, means that the time that we execute
the first hangcheck is fixed and not continually deferred by later work.
This has the advantage of not allowing userspace to fill the ring before
hangcheck can finally run. At the same time, it removes the ability for
the interrupt to defer the hangcheck as well. This is sensible for that
an interrupt is only for a single engine, whereas we perform hangcheck
globally, so whilst one ring may have hung, the other could be running
normally and preventing the hangcheck from firing.
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Daniel Vetter <dnaiel.vetter@ffwll.chm>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> (v2)
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
drivers/gpu/drm/i915/i915_dma.c | 2 +-
drivers/gpu/drm/i915/i915_drv.c | 2 +-
drivers/gpu/drm/i915/i915_drv.h | 2 +-
drivers/gpu/drm/i915/i915_gem.c | 2 +-
drivers/gpu/drm/i915/i915_irq.c | 23 +++++++++--------------
5 files changed, 13 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 51e8fe5..7c64669 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -934,7 +934,7 @@ int i915_driver_unload(struct drm_device *dev)
}
/* Free error state after interrupts are fully disabled. */
- del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
+ cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
cancel_work_sync(&dev_priv->gpu_error.work);
i915_destroy_error_state(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 66c72bd..cb1468d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1396,7 +1396,7 @@ static int intel_runtime_suspend(struct device *device)
return ret;
}
- del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
+ cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
intel_uncore_forcewake_reset(dev, false);
dev_priv->pm.suspended = true;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0d67b17..ce2acdd 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1345,7 +1345,7 @@ struct i915_gpu_error {
/* Hang gpu twice in this window and your context gets banned */
#define DRM_I915_CTX_BAN_PERIOD DIV_ROUND_UP(8*DRM_I915_HANGCHECK_PERIOD, 1000)
- struct timer_list hangcheck_timer;
+ struct delayed_work hangcheck_work;
/* For reset and error_state handling. */
spinlock_t lock;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fc81889..8a178cd 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4615,7 +4615,7 @@ i915_gem_suspend(struct drm_device *dev)
i915_gem_stop_ringbuffers(dev);
mutex_unlock(&dev->struct_mutex);
- del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
+ cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
cancel_delayed_work_sync(&dev_priv->mm.retire_work);
flush_delayed_work(&dev_priv->mm.idle_work);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 25b2299..a188c7d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2980,10 +2980,12 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd)
* we kick the ring. If we see no progress on three subsequent calls
* we assume chip is wedged and try to fix it by resetting the chip.
*/
-static void i915_hangcheck_elapsed(unsigned long data)
+static void i915_hangcheck_elapsed(struct work_struct *work)
{
- struct drm_device *dev = (struct drm_device *)data;
- struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_i915_private *dev_priv =
+ container_of(work, typeof(*dev_priv),
+ gpu_error.hangcheck_work.work);
+ struct drm_device *dev = dev_priv->dev;
struct intel_engine_cs *ring;
int i;
int busy_count = 0, rings_hung = 0;
@@ -3097,17 +3099,11 @@ static void i915_hangcheck_elapsed(unsigned long data)
void i915_queue_hangcheck(struct drm_device *dev)
{
- struct drm_i915_private *dev_priv = dev->dev_private;
- struct timer_list *timer = &dev_priv->gpu_error.hangcheck_timer;
-
if (!i915.enable_hangcheck)
return;
- /* Don't continually defer the hangcheck, but make sure it is active */
- if (timer_pending(timer))
- return;
- mod_timer(timer,
- round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
+ schedule_delayed_work(&to_i915(dev)->gpu_error.hangcheck_work,
+ round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES));
}
static void ibx_irq_reset(struct drm_device *dev)
@@ -4351,9 +4347,8 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
else
dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
- setup_timer(&dev_priv->gpu_error.hangcheck_timer,
- i915_hangcheck_elapsed,
- (unsigned long) dev);
+ INIT_DELAYED_WORK(&dev_priv->gpu_error.hangcheck_work,
+ i915_hangcheck_elapsed);
INIT_DELAYED_WORK(&dev_priv->hotplug_reenable_work,
intel_hpd_irq_reenable_work);
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/2] drm/i915: Display current hangcheck status in debugfs
2015-01-23 12:44 [PATCH 1/2] drm/i915: Convert hangcheck from a timer into a delayed work item Mika Kuoppala
@ 2015-01-23 12:44 ` Mika Kuoppala
2015-01-23 13:19 ` Ville Syrjälä
2015-01-23 18:24 ` shuang.he
2015-01-23 15:43 ` [PATCH 1/2] drm/i915: Convert hangcheck from a timer into a delayed work item Daniel Vetter
2015-01-23 20:58 ` Chris Wilson
2 siblings, 2 replies; 8+ messages in thread
From: Mika Kuoppala @ 2015-01-23 12:44 UTC (permalink / raw)
To: intel-gfx; +Cc: miku
From: Chris Wilson <chris@chris-wilson.co.uk>
For example,
/sys/kernel/debug/dri/0/i915_hangcheck_info:
Hangcheck active, fires in 15887800ms
render ring:
seqno = -4059 [current -583]
action = 2
score = 0
ACTHD = 1ee8 [current 21f980]
max ACTHD = 0
v2: Include expiration ETA. Can anyone spot a problem?
v3: Convert for workqueued hangcheck (Mika)
Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com) (v2)
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> (v2)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> (v2)
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 36 ++++++++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 2ad4c48..1502c96 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1219,6 +1219,41 @@ out:
return ret;
}
+static int i915_hangcheck_info(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = m->private;
+ struct drm_i915_private *dev_priv = to_i915(node->minor->dev);
+ struct intel_engine_cs *ring;
+ int i;
+
+ if (!i915.enable_hangcheck) {
+ seq_printf(m, "Hangcheck disabled\n");
+ return 0;
+ }
+
+ if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) {
+ seq_printf(m, "Hangcheck active, fires in %dms\n",
+ jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires -
+ jiffies));
+ } else
+ seq_printf(m, "Hangcheck inactive\n");
+
+ for_each_ring(ring, dev_priv, i) {
+ seq_printf(m, "%s:\n", ring->name);
+ seq_printf(m, "\tseqno = %d [current %d]\n",
+ ring->hangcheck.seqno, ring->get_seqno(ring, false));
+ seq_printf(m, "\taction = %d\n", ring->hangcheck.action);
+ seq_printf(m, "\tscore = %d\n", ring->hangcheck.score);
+ seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
+ (long long)ring->hangcheck.acthd,
+ (long long)intel_ring_get_active_head(ring));
+ seq_printf(m, "\tmax ACTHD = 0x%08llx\n",
+ (long long)ring->hangcheck.max_acthd);
+ }
+
+ return 0;
+}
+
static int ironlake_drpc_info(struct seq_file *m)
{
struct drm_info_node *node = m->private;
@@ -4407,6 +4442,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{"i915_gem_hws_vebox", i915_hws_info, 0, (void *)VECS},
{"i915_gem_batch_pool", i915_gem_batch_pool_info, 0},
{"i915_frequency_info", i915_frequency_info, 0},
+ {"i915_hangcheck_info", i915_hangcheck_info, 0},
{"i915_drpc_info", i915_drpc_info, 0},
{"i915_emon_status", i915_emon_status, 0},
{"i915_ring_freq_table", i915_ring_freq_table, 0},
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] drm/i915: Display current hangcheck status in debugfs
2015-01-23 12:44 ` [PATCH 2/2] drm/i915: Display current hangcheck status in debugfs Mika Kuoppala
@ 2015-01-23 13:19 ` Ville Syrjälä
2015-01-23 14:16 ` Mika Kuoppala
2015-01-23 20:53 ` Chris Wilson
2015-01-23 18:24 ` shuang.he
1 sibling, 2 replies; 8+ messages in thread
From: Ville Syrjälä @ 2015-01-23 13:19 UTC (permalink / raw)
To: Mika Kuoppala; +Cc: intel-gfx, miku
On Fri, Jan 23, 2015 at 02:44:08PM +0200, Mika Kuoppala wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
>
> For example,
>
> /sys/kernel/debug/dri/0/i915_hangcheck_info:
>
> Hangcheck active, fires in 15887800ms
> render ring:
> seqno = -4059 [current -583]
> action = 2
> score = 0
> ACTHD = 1ee8 [current 21f980]
> max ACTHD = 0
>
> v2: Include expiration ETA. Can anyone spot a problem?
> v3: Convert for workqueued hangcheck (Mika)
>
> Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com) (v2)
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> (v2)
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> (v2)
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 36 ++++++++++++++++++++++++++++++++++++
> 1 file changed, 36 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 2ad4c48..1502c96 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1219,6 +1219,41 @@ out:
> return ret;
> }
>
> +static int i915_hangcheck_info(struct seq_file *m, void *unused)
> +{
> + struct drm_info_node *node = m->private;
> + struct drm_i915_private *dev_priv = to_i915(node->minor->dev);
> + struct intel_engine_cs *ring;
> + int i;
> +
> + if (!i915.enable_hangcheck) {
> + seq_printf(m, "Hangcheck disabled\n");
> + return 0;
> + }
> +
> + if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) {
> + seq_printf(m, "Hangcheck active, fires in %dms\n",
> + jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires -
> + jiffies));
> + } else
> + seq_printf(m, "Hangcheck inactive\n");
> +
> + for_each_ring(ring, dev_priv, i) {
> + seq_printf(m, "%s:\n", ring->name);
> + seq_printf(m, "\tseqno = %d [current %d]\n",
%u ?
> + ring->hangcheck.seqno, ring->get_seqno(ring, false));
> + seq_printf(m, "\taction = %d\n", ring->hangcheck.action);
> + seq_printf(m, "\tscore = %d\n", ring->hangcheck.score);
> + seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
> + (long long)ring->hangcheck.acthd,
> + (long long)intel_ring_get_active_head(ring));
> + seq_printf(m, "\tmax ACTHD = 0x%08llx\n",
> + (long long)ring->hangcheck.max_acthd);
> + }
> +
> + return 0;
> +}
> +
> static int ironlake_drpc_info(struct seq_file *m)
> {
> struct drm_info_node *node = m->private;
> @@ -4407,6 +4442,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
> {"i915_gem_hws_vebox", i915_hws_info, 0, (void *)VECS},
> {"i915_gem_batch_pool", i915_gem_batch_pool_info, 0},
> {"i915_frequency_info", i915_frequency_info, 0},
> + {"i915_hangcheck_info", i915_hangcheck_info, 0},
> {"i915_drpc_info", i915_drpc_info, 0},
> {"i915_emon_status", i915_emon_status, 0},
> {"i915_ring_freq_table", i915_ring_freq_table, 0},
> --
> 1.9.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 2/2] drm/i915: Display current hangcheck status in debugfs
2015-01-23 13:19 ` Ville Syrjälä
@ 2015-01-23 14:16 ` Mika Kuoppala
2015-01-23 20:53 ` Chris Wilson
1 sibling, 0 replies; 8+ messages in thread
From: Mika Kuoppala @ 2015-01-23 14:16 UTC (permalink / raw)
To: intel-gfx; +Cc: miku
From: Chris Wilson <chris@chris-wilson.co.uk>
For example,
/sys/kernel/debug/dri/0/i915_hangcheck_info:
Hangcheck active, fires in 15887800ms
render ring:
seqno = -4059 [current -583]
action = 2
score = 0
ACTHD = 1ee8 [current 21f980]
max ACTHD = 0
v2: Include expiration ETA. Can anyone spot a problem?
v3: Convert for workqueued hangcheck (Mika)
v4: Print seqnos as unsigned ints (Ville)
Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com) (v2)
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> (v2)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> (v2)
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 36 ++++++++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 2ad4c48..01b00e2 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1219,6 +1219,41 @@ out:
return ret;
}
+static int i915_hangcheck_info(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = m->private;
+ struct drm_i915_private *dev_priv = to_i915(node->minor->dev);
+ struct intel_engine_cs *ring;
+ int i;
+
+ if (!i915.enable_hangcheck) {
+ seq_printf(m, "Hangcheck disabled\n");
+ return 0;
+ }
+
+ if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) {
+ seq_printf(m, "Hangcheck active, fires in %dms\n",
+ jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires -
+ jiffies));
+ } else
+ seq_printf(m, "Hangcheck inactive\n");
+
+ for_each_ring(ring, dev_priv, i) {
+ seq_printf(m, "%s:\n", ring->name);
+ seq_printf(m, "\tseqno = %u [current %u]\n",
+ ring->hangcheck.seqno, ring->get_seqno(ring, false));
+ seq_printf(m, "\taction = %d\n", ring->hangcheck.action);
+ seq_printf(m, "\tscore = %d\n", ring->hangcheck.score);
+ seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
+ (long long)ring->hangcheck.acthd,
+ (long long)intel_ring_get_active_head(ring));
+ seq_printf(m, "\tmax ACTHD = 0x%08llx\n",
+ (long long)ring->hangcheck.max_acthd);
+ }
+
+ return 0;
+}
+
static int ironlake_drpc_info(struct seq_file *m)
{
struct drm_info_node *node = m->private;
@@ -4407,6 +4442,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
{"i915_gem_hws_vebox", i915_hws_info, 0, (void *)VECS},
{"i915_gem_batch_pool", i915_gem_batch_pool_info, 0},
{"i915_frequency_info", i915_frequency_info, 0},
+ {"i915_hangcheck_info", i915_hangcheck_info, 0},
{"i915_drpc_info", i915_drpc_info, 0},
{"i915_emon_status", i915_emon_status, 0},
{"i915_ring_freq_table", i915_ring_freq_table, 0},
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 1/2] drm/i915: Convert hangcheck from a timer into a delayed work item
2015-01-23 12:44 [PATCH 1/2] drm/i915: Convert hangcheck from a timer into a delayed work item Mika Kuoppala
2015-01-23 12:44 ` [PATCH 2/2] drm/i915: Display current hangcheck status in debugfs Mika Kuoppala
@ 2015-01-23 15:43 ` Daniel Vetter
2015-01-23 20:58 ` Chris Wilson
2 siblings, 0 replies; 8+ messages in thread
From: Daniel Vetter @ 2015-01-23 15:43 UTC (permalink / raw)
To: Mika Kuoppala; +Cc: Jani Nikula, Daniel Vetter, intel-gfx, miku
On Fri, Jan 23, 2015 at 02:44:07PM +0200, Mika Kuoppala wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
>
> When run as a timer, i915_hangcheck_elapsed() must adhere to all the
> rules of running in a softirq context. This is advantageous to us as we
> want to minimise the risk that a driver bug will prevent us from
> detecting a hung GPU. However, that is irrelevant if the driver bug
> prevents us from resetting and recovering. Still it is prudent not to
> rely on mutexes inside the checker, but given the coarseness of
> dev->struct_mutex doing so is extremely hard.
>
> Give in and run from a work queue, i.e. outside of softirq.
>
> v2:
>
> The conversion does have one significant change, from the use of
> mod_timer to schedule_delayed_work, means that the time that we execute
> the first hangcheck is fixed and not continually deferred by later work.
> This has the advantage of not allowing userspace to fill the ring before
> hangcheck can finally run. At the same time, it removes the ability for
> the interrupt to defer the hangcheck as well. This is sensible for that
> an interrupt is only for a single engine, whereas we perform hangcheck
> globally, so whilst one ring may have hung, the other could be running
> normally and preventing the hangcheck from firing.
>
> Cc: Jani Nikula <jani.nikula@intel.com>
> Cc: Daniel Vetter <dnaiel.vetter@ffwll.chm>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> (v2)
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
One thing special with timers is that they'll always run, and if you do a
del_timer_sync from process context you can't deadlock with timer A
because you hold some locks for timer B that's in front of A in the
queues. With workqueues that's not the case, and it's really easy to cause
deadlocks by blocking some random work item in front of the queue by
accident.
I think for this switch we need our own, dedicated hangcheck work queue,
with it's own thread to make sure it gets run reliable. But besides that I
really like this chnage.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] drm/i915: Display current hangcheck status in debugfs
2015-01-23 12:44 ` [PATCH 2/2] drm/i915: Display current hangcheck status in debugfs Mika Kuoppala
2015-01-23 13:19 ` Ville Syrjälä
@ 2015-01-23 18:24 ` shuang.he
1 sibling, 0 replies; 8+ messages in thread
From: shuang.he @ 2015-01-23 18:24 UTC (permalink / raw)
To: shuang.he, ethan.gao, intel-gfx, mika.kuoppala
Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 5634
-------------------------------------Summary-------------------------------------
Platform Delta drm-intel-nightly Series Applied
PNV -1 353/353 352/353
ILK 353/353 353/353
SNB +1 399/422 400/422
IVB +1 486/487 487/487
BYT 296/296 296/296
HSW +1-2 507/508 506/508
BDW 399/402 399/402
-------------------------------------Detailed-------------------------------------
Platform Test drm-intel-nightly Series Applied
*PNV igt_gen3_render_linear_blits PASS(3, M7M23) CRASH(1, M23)
SNB igt_kms_flip_nonexisting-fb NSPT(1, M35)PASS(5, M35M22) PASS(1, M22)
IVB igt_gem_pwrite_pread_snooped-pwrite-blt-cpu_mmap-performance DMESG_WARN(2, M21)PASS(1, M4) PASS(1, M21)
*HSW igt_gem_storedw_batches_loop_normal PASS(2, M19M20) DMESG_WARN(1, M20)
HSW igt_gem_storedw_loop_blt DMESG_WARN(2, M19M20)PASS(4, M20M40) PASS(1, M20)
*HSW igt_pm_rpm_debugfs-read PASS(2, M19M20) DMESG_WARN(1, M20)
Note: You need to pay more attention to line start with '*'
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] drm/i915: Display current hangcheck status in debugfs
2015-01-23 13:19 ` Ville Syrjälä
2015-01-23 14:16 ` Mika Kuoppala
@ 2015-01-23 20:53 ` Chris Wilson
1 sibling, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2015-01-23 20:53 UTC (permalink / raw)
To: Ville Syrjälä; +Cc: intel-gfx, miku
On Fri, Jan 23, 2015 at 03:19:16PM +0200, Ville Syrjälä wrote:
> On Fri, Jan 23, 2015 at 02:44:08PM +0200, Mika Kuoppala wrote:
> > From: Chris Wilson <chris@chris-wilson.co.uk>
> >
> > For example,
> >
> > /sys/kernel/debug/dri/0/i915_hangcheck_info:
> >
> > Hangcheck active, fires in 15887800ms
> > render ring:
> > seqno = -4059 [current -583]
> > action = 2
> > score = 0
> > ACTHD = 1ee8 [current 21f980]
> > max ACTHD = 0
> >
> > v2: Include expiration ETA. Can anyone spot a problem?
> > v3: Convert for workqueued hangcheck (Mika)
> >
> > Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com) (v2)
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> (v2)
> > Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> (v2)
> > Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> > ---
> > drivers/gpu/drm/i915/i915_debugfs.c | 36 ++++++++++++++++++++++++++++++++++++
> > 1 file changed, 36 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index 2ad4c48..1502c96 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -1219,6 +1219,41 @@ out:
> > return ret;
> > }
> >
> > +static int i915_hangcheck_info(struct seq_file *m, void *unused)
> > +{
> > + struct drm_info_node *node = m->private;
> > + struct drm_i915_private *dev_priv = to_i915(node->minor->dev);
> > + struct intel_engine_cs *ring;
> > + int i;
> > +
> > + if (!i915.enable_hangcheck) {
> > + seq_printf(m, "Hangcheck disabled\n");
> > + return 0;
> > + }
> > +
> > + if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) {
> > + seq_printf(m, "Hangcheck active, fires in %dms\n",
> > + jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires -
> > + jiffies));
> > + } else
> > + seq_printf(m, "Hangcheck inactive\n");
> > +
> > + for_each_ring(ring, dev_priv, i) {
> > + seq_printf(m, "%s:\n", ring->name);
> > + seq_printf(m, "\tseqno = %d [current %d]\n",
>
> %u ?
%x. Then it will be more consistent with how we print it else. (No one
has ever accused me of being consistent!)
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/2] drm/i915: Convert hangcheck from a timer into a delayed work item
2015-01-23 12:44 [PATCH 1/2] drm/i915: Convert hangcheck from a timer into a delayed work item Mika Kuoppala
2015-01-23 12:44 ` [PATCH 2/2] drm/i915: Display current hangcheck status in debugfs Mika Kuoppala
2015-01-23 15:43 ` [PATCH 1/2] drm/i915: Convert hangcheck from a timer into a delayed work item Daniel Vetter
@ 2015-01-23 20:58 ` Chris Wilson
2 siblings, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2015-01-23 20:58 UTC (permalink / raw)
To: Mika Kuoppala; +Cc: Jani Nikula, Daniel Vetter, intel-gfx, miku
On Fri, Jan 23, 2015 at 02:44:07PM +0200, Mika Kuoppala wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
>
> When run as a timer, i915_hangcheck_elapsed() must adhere to all the
> rules of running in a softirq context. This is advantageous to us as we
> want to minimise the risk that a driver bug will prevent us from
> detecting a hung GPU. However, that is irrelevant if the driver bug
> prevents us from resetting and recovering. Still it is prudent not to
> rely on mutexes inside the checker, but given the coarseness of
> dev->struct_mutex doing so is extremely hard.
>
> Give in and run from a work queue, i.e. outside of softirq.
>
> v2:
>
> The conversion does have one significant change, from the use of
> mod_timer to schedule_delayed_work, means that the time that we execute
> the first hangcheck is fixed and not continually deferred by later work.
> This has the advantage of not allowing userspace to fill the ring before
> hangcheck can finally run. At the same time, it removes the ability for
> the interrupt to defer the hangcheck as well. This is sensible for that
> an interrupt is only for a single engine, whereas we perform hangcheck
> globally, so whilst one ring may have hung, the other could be running
> normally and preventing the hangcheck from firing.
We can drop this comment since we have already applied this change in an
earlier patch.
> @@ -3097,17 +3099,11 @@ static void i915_hangcheck_elapsed(unsigned long data)
>
> void i915_queue_hangcheck(struct drm_device *dev)
> {
> - struct drm_i915_private *dev_priv = dev->dev_private;
> - struct timer_list *timer = &dev_priv->gpu_error.hangcheck_timer;
> -
> if (!i915.enable_hangcheck)
> return;
>
> - /* Don't continually defer the hangcheck, but make sure it is active */
Keep the comment, or more preferrably let's capture the reason why we
don't want to continually defer the hangcheck:
/* Don't continually defer the hangcheck so that it is always run at
* least once after work has been scheduled on any ring. Otherwise,
* we will ignore a hung ring if a second ring is kept busy.
*/
> - if (timer_pending(timer))
> - return;
> - mod_timer(timer,
> - round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
> + schedule_delayed_work(&to_i915(dev)->gpu_error.hangcheck_work,
> + round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES));
> }
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2015-01-23 20:58 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-01-23 12:44 [PATCH 1/2] drm/i915: Convert hangcheck from a timer into a delayed work item Mika Kuoppala
2015-01-23 12:44 ` [PATCH 2/2] drm/i915: Display current hangcheck status in debugfs Mika Kuoppala
2015-01-23 13:19 ` Ville Syrjälä
2015-01-23 14:16 ` Mika Kuoppala
2015-01-23 20:53 ` Chris Wilson
2015-01-23 18:24 ` shuang.he
2015-01-23 15:43 ` [PATCH 1/2] drm/i915: Convert hangcheck from a timer into a delayed work item Daniel Vetter
2015-01-23 20:58 ` Chris Wilson
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.