* [PATCH] drm/i915: hangcheck timeout for debugfs
@ 2011-06-23 22:49 Ben Widawsky
2011-06-23 23:48 ` Chris Wilson
0 siblings, 1 reply; 6+ messages in thread
From: Ben Widawsky @ 2011-06-23 22:49 UTC (permalink / raw)
To: intel-gfx
Provide a user accessible way to change the hangcheck timer. This is
useful mostly for disabling the timer completely (value <= 0).
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Cc: Keith Packard <keithp@keithp.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 88 +++++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_dma.c | 1 +
drivers/gpu/drm/i915/i915_drv.h | 1 +
drivers/gpu/drm/i915/i915_gem.c | 7 ++-
drivers/gpu/drm/i915/i915_irq.c | 17 +++++--
5 files changed, 107 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 4d46441..b6582de 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1395,6 +1395,88 @@ static int i915_forcewake_create(struct dentry *root, struct drm_minor *minor)
return drm_add_fake_info_node(minor, ent, &i915_forcewake_fops);
}
+static int i915_hangcheck_open(struct inode *inode, struct file *file)
+{
+ file->private_data = inode->i_private;
+ return 0;
+}
+
+static ssize_t i915_hangcheck_read(struct file *filp,
+ char __user *ubuf,
+ size_t max,
+ loff_t *ppos)
+{
+ struct drm_device *dev = filp->private_data;
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ char buf[7];
+ int len, timeout;
+
+ timeout = atomic_read(&dev_priv->hangcheck_timeout);
+ len = snprintf(buf, sizeof(buf), "%d\n", timeout);
+
+ return simple_read_from_buffer(ubuf, max, ppos, buf, len);
+}
+
+static ssize_t i915_hangcheck_write(struct file *filp,
+ const char __user *ubuf,
+ size_t cnt,
+ loff_t *ppos)
+{
+ struct drm_device *dev = filp->private_data;
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ char buf[7]; /* 99.9 seconds max */
+ int new_timeout = 0;
+ int ret;
+
+ if (cnt > 0) {
+ if (cnt > sizeof (buf) - 1)
+ return -EINVAL;
+
+ if (copy_from_user(buf, ubuf, cnt))
+ return -EFAULT;
+ buf[cnt] = 0;
+
+ new_timeout = simple_strtol(buf, NULL, 0);
+ }
+
+ ret = mutex_lock_interruptible(&dev->struct_mutex);
+ if (ret)
+ return ret;
+
+ if (new_timeout <= 0) {
+ new_timeout = 0;
+ del_timer_sync(&dev_priv->hangcheck_timer);
+ }
+
+ atomic_set(&dev_priv->hangcheck_timeout, new_timeout);
+
+ mutex_unlock(&dev->struct_mutex);
+
+ return cnt;
+}
+
+static const struct file_operations i915_hangcheck_fops = {
+ .owner = THIS_MODULE,
+ .open = i915_hangcheck_open,
+ .read = i915_hangcheck_read,
+ .write = i915_hangcheck_write,
+};
+
+static int i915_hangcheck_create(struct dentry *root, struct drm_minor *minor)
+{
+ struct drm_device *dev = minor->dev;
+ struct dentry *ent;
+
+ ent = debugfs_create_file("i915_hangcheck_timeout",
+ S_IRUGO | S_IWUSR,
+ root, dev,
+ &i915_hangcheck_fops);
+ if (IS_ERR(ent))
+ return PTR_ERR(ent);
+
+ return drm_add_fake_info_node(minor, ent, &i915_hangcheck_fops);
+}
+
static struct drm_info_list i915_debugfs_list[] = {
{"i915_capabilities", i915_capabilities, 0},
{"i915_gem_objects", i915_gem_object_info, 0},
@@ -1448,6 +1530,10 @@ int i915_debugfs_init(struct drm_minor *minor)
if (ret)
return ret;
+ ret = i915_hangcheck_create(minor->debugfs_root, minor);
+ if (ret)
+ return ret;
+
return drm_debugfs_create_files(i915_debugfs_list,
I915_DEBUGFS_ENTRIES,
minor->debugfs_root, minor);
@@ -1457,6 +1543,8 @@ void i915_debugfs_cleanup(struct drm_minor *minor)
{
drm_debugfs_remove_files(i915_debugfs_list,
I915_DEBUGFS_ENTRIES, minor);
+ drm_debugfs_remove_files((struct drm_info_list *) &i915_hangcheck_fops,
+ 1, minor);
drm_debugfs_remove_files((struct drm_info_list *) &i915_forcewake_fops,
1, minor);
drm_debugfs_remove_files((struct drm_info_list *) &i915_wedged_fops,
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 0239e99..e44a4a6 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -2092,6 +2092,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
setup_timer(&dev_priv->hangcheck_timer, i915_hangcheck_elapsed,
(unsigned long) dev);
+ atomic_set(&dev_priv->hangcheck_timeout, DRM_I915_HANGCHECK_PERIOD);
spin_lock(&mchdev_lock);
i915_mch_dev = dev_priv;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8a9fd91..f518e76 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -320,6 +320,7 @@ typedef struct drm_i915_private {
#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
struct timer_list hangcheck_timer;
int hangcheck_count;
+ atomic_t hangcheck_timeout;
uint32_t last_acthd;
uint32_t last_instdone;
uint32_t last_instdone1;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cb1f61d..48b140e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1777,8 +1777,11 @@ i915_add_request(struct intel_ring_buffer *ring,
ring->outstanding_lazy_request = false;
if (!dev_priv->mm.suspended) {
- mod_timer(&dev_priv->hangcheck_timer,
- jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+ int timeout = atomic_read(&dev_priv->hangcheck_timeout);
+ if (timeout) {
+ mod_timer(&dev_priv->hangcheck_timer, jiffies +
+ msecs_to_jiffies(timeout));
+ }
if (was_empty)
queue_delayed_work(dev_priv->wq,
&dev_priv->mm.retire_work, HZ);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b9fafe3..20316d3 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -352,6 +352,7 @@ static void notify_ring(struct drm_device *dev,
{
struct drm_i915_private *dev_priv = dev->dev_private;
u32 seqno;
+ int timeout;
if (ring->obj == NULL)
return;
@@ -362,9 +363,12 @@ static void notify_ring(struct drm_device *dev,
ring->irq_seqno = seqno;
wake_up_all(&ring->irq_queue);
- dev_priv->hangcheck_count = 0;
- mod_timer(&dev_priv->hangcheck_timer,
- jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+ timeout = atomic_read(&dev_priv->hangcheck_timeout);
+ if (timeout) {
+ dev_priv->hangcheck_count = 0;
+ mod_timer(&dev_priv->hangcheck_timer, jiffies +
+ msecs_to_jiffies(timeout));
+ }
}
static void gen6_pm_rps_work(struct work_struct *work)
@@ -1721,9 +1725,12 @@ void i915_hangcheck_elapsed(unsigned long data)
}
repeat:
+ /* should never be 0, we should drain the timer before setting * to 0 */
+ WARN_ON(atomic_read(&dev_priv->hangcheck_timeout) == 0);
+
/* Reset timer case chip hangs without another request being added */
- mod_timer(&dev_priv->hangcheck_timer,
- jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+ mod_timer(&dev_priv->hangcheck_timer, jiffies +
+ msecs_to_jiffies(atomic_read(&dev_priv->hangcheck_timeout));
}
/* drm_dma.h hooks
--
1.7.5.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] drm/i915: hangcheck timeout for debugfs
2011-06-23 22:49 [PATCH] drm/i915: hangcheck timeout for debugfs Ben Widawsky
@ 2011-06-23 23:48 ` Chris Wilson
2011-06-24 15:46 ` Ben Widawsky
0 siblings, 1 reply; 6+ messages in thread
From: Chris Wilson @ 2011-06-23 23:48 UTC (permalink / raw)
To: Ben Widawsky, intel-gfx
On Thu, 23 Jun 2011 15:49:14 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> Provide a user accessible way to change the hangcheck timer. This is
> useful mostly for disabling the timer completely (value <= 0).
Having i915.hangcheck_interval as a read/write module parameter was
better. :-p
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] drm/i915: hangcheck timeout for debugfs
2011-06-23 23:48 ` Chris Wilson
@ 2011-06-24 15:46 ` Ben Widawsky
2011-06-25 9:23 ` Chris Wilson
0 siblings, 1 reply; 6+ messages in thread
From: Ben Widawsky @ 2011-06-24 15:46 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Fri, Jun 24, 2011 at 12:48:22AM +0100, Chris Wilson wrote:
> On Thu, 23 Jun 2011 15:49:14 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> > Provide a user accessible way to change the hangcheck timer. This is
> > useful mostly for disabling the timer completely (value <= 0).
>
> Having i915.hangcheck_interval as a read/write module parameter was
> better. :-p
> -Chris
I considered this, but I wasn't sure how to manage the sysfs parameters,
and prevent users from doing stupid things. Furthermore, I think to be
correct we must delete sync the timer if the user requests an interval
of 0, and we can only do that if we have struct mutex (again the sysfs
problem).
Ben
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] drm/i915: hangcheck timeout for debugfs
2011-06-24 15:46 ` Ben Widawsky
@ 2011-06-25 9:23 ` Chris Wilson
2011-06-26 0:20 ` Ben Widawsky
0 siblings, 1 reply; 6+ messages in thread
From: Chris Wilson @ 2011-06-25 9:23 UTC (permalink / raw)
To: Ben Widawsky; +Cc: intel-gfx
On Fri, 24 Jun 2011 08:46:53 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> On Fri, Jun 24, 2011 at 12:48:22AM +0100, Chris Wilson wrote:
> > On Thu, 23 Jun 2011 15:49:14 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> > > Provide a user accessible way to change the hangcheck timer. This is
> > > useful mostly for disabling the timer completely (value <= 0).
> >
> > Having i915.hangcheck_interval as a read/write module parameter was
> > better. :-p
> > -Chris
>
> I considered this, but I wasn't sure how to manage the sysfs parameters,
> and prevent users from doing stupid things. Furthermore, I think to be
> correct we must delete sync the timer if the user requests an interval
> of 0, and we can only do that if we have struct mutex (again the sysfs
> problem).
You can either register a callback for when the parameter changes, but in
this case it is as easy as deleting the timer in the next hangcheck before
touching any GPU state. In that scenario the timer will only be enabled
again after the next execbuffers, that's a restriction I can live with for
simple code and keeping parameters out of debugfs.
On the other hand, a debugfs would allow for a per-device parameter. For
that day in the far far future with multiprocessor igfx. Surreal isn't it?
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] drm/i915: hangcheck timeout for debugfs
2011-06-25 9:23 ` Chris Wilson
@ 2011-06-26 0:20 ` Ben Widawsky
2011-06-26 8:38 ` Chris Wilson
0 siblings, 1 reply; 6+ messages in thread
From: Ben Widawsky @ 2011-06-26 0:20 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Sat, Jun 25, 2011 at 10:23:28AM +0100, Chris Wilson wrote:
> On Fri, 24 Jun 2011 08:46:53 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> > On Fri, Jun 24, 2011 at 12:48:22AM +0100, Chris Wilson wrote:
> > > On Thu, 23 Jun 2011 15:49:14 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> > > > Provide a user accessible way to change the hangcheck timer. This is
> > > > useful mostly for disabling the timer completely (value <= 0).
> > >
> > > Having i915.hangcheck_interval as a read/write module parameter was
> > > better. :-p
> > > -Chris
> >
> > I considered this, but I wasn't sure how to manage the sysfs parameters,
> > and prevent users from doing stupid things. Furthermore, I think to be
> > correct we must delete sync the timer if the user requests an interval
> > of 0, and we can only do that if we have struct mutex (again the sysfs
> > problem).
>
> You can either register a callback for when the parameter changes, but in
> this case it is as easy as deleting the timer in the next hangcheck before
> touching any GPU state. In that scenario the timer will only be enabled
> again after the next execbuffers, that's a restriction I can live with for
> simple code and keeping parameters out of debugfs.
>
> On the other hand, a debugfs would allow for a per-device parameter. For
> that day in the far far future with multiprocessor igfx. Surreal isn't it?
> -Chris
So what's the verdict? In term of LOC, your suggestion would probably be
smaller, but in terms of complexity I actually think the current patch
would be easier to understand, although to be fair, I didn't actually
try coding it to see.
If you feel strongly about a module parameter being the better solution,
I will code it up.
Ben
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] drm/i915: hangcheck timeout for debugfs
2011-06-26 0:20 ` Ben Widawsky
@ 2011-06-26 8:38 ` Chris Wilson
0 siblings, 0 replies; 6+ messages in thread
From: Chris Wilson @ 2011-06-26 8:38 UTC (permalink / raw)
To: Ben Widawsky; +Cc: intel-gfx
On Sat, 25 Jun 2011 17:20:11 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> If you feel strongly about a module parameter being the better solution,
> I will code it up.
The more I think about about it, the more opposed I am to putting
parameters in debugfs, or more correctly being caught putting them there.
;-)
So please give the check for extinction of the timer in hangcheck a whirl.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2011-06-26 8:38 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-06-23 22:49 [PATCH] drm/i915: hangcheck timeout for debugfs Ben Widawsky
2011-06-23 23:48 ` Chris Wilson
2011-06-24 15:46 ` Ben Widawsky
2011-06-25 9:23 ` Chris Wilson
2011-06-26 0:20 ` Ben Widawsky
2011-06-26 8:38 ` Chris Wilson
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.