* [PATCH 1/2] drm/i915: properly lock gt_fifo_count @ 2011-11-06 0:31 Daniel Vetter 2011-11-06 0:31 ` [PATCH 2/2] drm/i915: protect force_wake_(get|put) with the gt_lock Daniel Vetter 2011-11-06 0:41 ` [PATCH 1/2] drm/i915: properly lock gt_fifo_count Daniel Vetter 0 siblings, 2 replies; 10+ messages in thread From: Daniel Vetter @ 2011-11-06 0:31 UTC (permalink / raw) To: intel-gfx; +Cc: Daniel Vetter Use a combination of atomic_t and a spinlocked slow-path to make most writes fast. Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch> --- drivers/gpu/drm/i915/i915_drv.c | 28 ++++++++++++++++++++-------- drivers/gpu/drm/i915/i915_drv.h | 8 +++++--- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 548e04b..cefc3b9 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -377,17 +377,29 @@ void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv) void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv) { - if (dev_priv->gt_fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES) { - int loop = 500; - u32 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES); - while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) { - udelay(10); + unsigned long irqflags; + int loop; + u32 fifo; + + if (atomic_sub_return(1, &dev_priv->gt_fifo_count) + < GT_FIFO_NUM_RESERVED_ENTRIES) { + spin_lock_irqsave(&dev_priv->gt_lock, irqflags); + /* Check whether we've raced with somebody else reading the fifo + * counter for the hw. */ + if (atomic_sub_return(1, &dev_priv->gt_fifo_count) + < GT_FIFO_FREE_ENTRIES) { + loop = 500; fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES); + while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) { + udelay(10); + fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES); + } + WARN_ON(loop < 0 && fifo <= GT_FIFO_NUM_RESERVED_ENTRIES); + fifo = fifo ? fifo - 1 : 0; /* prevent underflow */ + atomic_set(&dev_priv->gt_fifo_count, fifo); } - WARN_ON(loop < 0 && fifo <= GT_FIFO_NUM_RESERVED_ENTRIES); - dev_priv->gt_fifo_count = fifo; + spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); } - dev_priv->gt_fifo_count--; } static int i915_drm_freeze(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bd98fb3..74cf78e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -276,7 +276,11 @@ typedef struct drm_i915_private { int relative_constants_mode; void __iomem *regs; - u32 gt_fifo_count; + /** Concurrent writes to gt_fifo_count are protected by gt_lock */ + atomic_t gt_fifo_count; + atomic_t forcewake_count; + /** gt_lock is taking in irq contexts. */ + struct spinlock gt_lock; struct intel_gmbus { struct i2c_adapter adapter; @@ -725,8 +729,6 @@ typedef struct drm_i915_private { struct drm_property *broadcast_rgb_property; struct drm_property *force_audio_property; - - atomic_t forcewake_count; } drm_i915_private_t; enum i915_cache_level { -- 1.7.6.4 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 2/2] drm/i915: protect force_wake_(get|put) with the gt_lock 2011-11-06 0:31 [PATCH 1/2] drm/i915: properly lock gt_fifo_count Daniel Vetter @ 2011-11-06 0:31 ` Daniel Vetter 2011-11-06 0:41 ` [PATCH 1/2] drm/i915: properly lock gt_fifo_count Daniel Vetter 1 sibling, 0 replies; 10+ messages in thread From: Daniel Vetter @ 2011-11-06 0:31 UTC (permalink / raw) To: intel-gfx; +Cc: Daniel Vetter We don't have any read in a fastpath that needs forcewake, so I've decided to not care much about overhead. This prevents tests/gem_hangcheck_forcewake from i-g-t from killing my snb on recent kernels - something must have slightly changed the timings. Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch> --- drivers/gpu/drm/i915/i915_debugfs.c | 8 ++++++-- drivers/gpu/drm/i915/i915_drv.c | 19 +++++++++++++------ drivers/gpu/drm/i915/i915_drv.h | 3 ++- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 5ba63ad..51b21eb 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1320,9 +1320,13 @@ static int i915_gen6_forcewake_count_info(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = dev->dev_private; + unsigned forcewake_count; - seq_printf(m, "forcewake count = %d\n", - atomic_read(&dev_priv->forcewake_count)); + spin_lock_irq(&dev_priv->gt_lock); + forcewake_count = dev_priv->forcewake_count; + spin_unlock_irq(&dev_priv->gt_lock); + + seq_printf(m, "forcewake count = %d\n", forcewake_count); return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index cefc3b9..14753f5 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -351,11 +351,12 @@ static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv) */ void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv) { - WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); + unsigned long irqflags; - /* Forcewake is atomic in case we get in here without the lock */ - if (atomic_add_return(1, &dev_priv->forcewake_count) == 1) + spin_lock_irqsave(&dev_priv->gt_lock, irqflags); + if (dev_priv->forcewake_count++ == 0) __gen6_gt_force_wake_get(dev_priv); + spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); } static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv) @@ -369,10 +370,13 @@ static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv) */ void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv) { - WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); + unsigned long irqflags; + + spin_lock_irqsave(&dev_priv->gt_lock, irqflags); - if (atomic_dec_and_test(&dev_priv->forcewake_count)) + if (--dev_priv->forcewake_count == 0) __gen6_gt_force_wake_put(dev_priv); + spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); } void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv) @@ -615,6 +619,7 @@ int i915_reset(struct drm_device *dev, u8 flags) * need to */ bool need_display = true; + unsigned long irqflags; int ret; if (!i915_try_reset) @@ -633,8 +638,10 @@ int i915_reset(struct drm_device *dev, u8 flags) case 6: ret = gen6_do_reset(dev, flags); /* If reset with a user forcewake, try to restore */ - if (atomic_read(&dev_priv->forcewake_count)) + spin_lock_irqsave(&dev_priv->gt_lock, irqflags); + if (dev_priv->forcewake_count) __gen6_gt_force_wake_get(dev_priv); + spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); break; case 5: ret = ironlake_do_reset(dev, flags); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 74cf78e..cbac640 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -278,7 +278,8 @@ typedef struct drm_i915_private { void __iomem *regs; /** Concurrent writes to gt_fifo_count are protected by gt_lock */ atomic_t gt_fifo_count; - atomic_t forcewake_count; + /** forcewake_count is protected by gt_lock */ + unsigned forcewake_count; /** gt_lock is taking in irq contexts. */ struct spinlock gt_lock; -- 1.7.6.4 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 1/2] drm/i915: properly lock gt_fifo_count 2011-11-06 0:31 [PATCH 1/2] drm/i915: properly lock gt_fifo_count Daniel Vetter 2011-11-06 0:31 ` [PATCH 2/2] drm/i915: protect force_wake_(get|put) with the gt_lock Daniel Vetter @ 2011-11-06 0:41 ` Daniel Vetter 2011-11-06 8:39 ` Chris Wilson 1 sibling, 1 reply; 10+ messages in thread From: Daniel Vetter @ 2011-11-06 0:41 UTC (permalink / raw) To: intel-gfx; +Cc: Daniel Vetter Use a combination of atomic_t and a spinlocked slow-path to make most writes fast. v2: Properly initialize the gt_lock. Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch> --- drivers/gpu/drm/i915/i915_dma.c | 1 + drivers/gpu/drm/i915/i915_drv.c | 28 ++++++++++++++++++++-------- drivers/gpu/drm/i915/i915_drv.h | 8 +++++--- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 2eac955..ab3a3fd 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -2031,6 +2031,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (!IS_I945G(dev) && !IS_I945GM(dev)) pci_enable_msi(dev->pdev); + spin_lock_init(&dev_priv->gt_lock); spin_lock_init(&dev_priv->irq_lock); spin_lock_init(&dev_priv->error_lock); spin_lock_init(&dev_priv->rps_lock); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 548e04b..cefc3b9 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -377,17 +377,29 @@ void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv) void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv) { - if (dev_priv->gt_fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES) { - int loop = 500; - u32 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES); - while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) { - udelay(10); + unsigned long irqflags; + int loop; + u32 fifo; + + if (atomic_sub_return(1, &dev_priv->gt_fifo_count) + < GT_FIFO_NUM_RESERVED_ENTRIES) { + spin_lock_irqsave(&dev_priv->gt_lock, irqflags); + /* Check whether we've raced with somebody else reading the fifo + * counter for the hw. */ + if (atomic_sub_return(1, &dev_priv->gt_fifo_count) + < GT_FIFO_FREE_ENTRIES) { + loop = 500; fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES); + while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) { + udelay(10); + fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES); + } + WARN_ON(loop < 0 && fifo <= GT_FIFO_NUM_RESERVED_ENTRIES); + fifo = fifo ? fifo - 1 : 0; /* prevent underflow */ + atomic_set(&dev_priv->gt_fifo_count, fifo); } - WARN_ON(loop < 0 && fifo <= GT_FIFO_NUM_RESERVED_ENTRIES); - dev_priv->gt_fifo_count = fifo; + spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); } - dev_priv->gt_fifo_count--; } static int i915_drm_freeze(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bd98fb3..74cf78e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -276,7 +276,11 @@ typedef struct drm_i915_private { int relative_constants_mode; void __iomem *regs; - u32 gt_fifo_count; + /** Concurrent writes to gt_fifo_count are protected by gt_lock */ + atomic_t gt_fifo_count; + atomic_t forcewake_count; + /** gt_lock is taking in irq contexts. */ + struct spinlock gt_lock; struct intel_gmbus { struct i2c_adapter adapter; @@ -725,8 +729,6 @@ typedef struct drm_i915_private { struct drm_property *broadcast_rgb_property; struct drm_property *force_audio_property; - - atomic_t forcewake_count; } drm_i915_private_t; enum i915_cache_level { -- 1.7.6.4 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2] drm/i915: properly lock gt_fifo_count 2011-11-06 0:41 ` [PATCH 1/2] drm/i915: properly lock gt_fifo_count Daniel Vetter @ 2011-11-06 8:39 ` Chris Wilson 2011-11-06 10:46 ` Daniel Vetter 0 siblings, 1 reply; 10+ messages in thread From: Chris Wilson @ 2011-11-06 8:39 UTC (permalink / raw) To: intel-gfx; +Cc: Daniel Vetter On Sun, 6 Nov 2011 01:41:35 +0100, Daniel Vetter <daniel.vetter@ffwll.ch> wrote: > Use a combination of atomic_t and a spinlocked slow-path to make most > writes fast. What happened to the rule that this was protected by struct_mutex? Did you find a violation? Or is this step 1 in the hundred step plan to clarify and fix the locking around register access? -Chris -- Chris Wilson, Intel Open Source Technology Centre ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/2] drm/i915: properly lock gt_fifo_count 2011-11-06 8:39 ` Chris Wilson @ 2011-11-06 10:46 ` Daniel Vetter 2011-11-06 11:31 ` [PATCH] drm/i915: protect force_wake_(get|put) with the gt_lock Daniel Vetter 0 siblings, 1 reply; 10+ messages in thread From: Daniel Vetter @ 2011-11-06 10:46 UTC (permalink / raw) To: Chris Wilson; +Cc: Daniel Vetter, intel-gfx On Sun, Nov 06, 2011 at 08:39:46AM +0000, Chris Wilson wrote: > On Sun, 6 Nov 2011 01:41:35 +0100, Daniel Vetter <daniel.vetter@ffwll.ch> wrote: > > Use a combination of atomic_t and a spinlocked slow-path to make most > > writes fast. > > What happened to the rule that this was protected by struct_mutex? > Did you find a violation? Or is this step 1 in the hundred step plan to > clarify and fix the locking around register access? The real problem is the read side, which isn't really protected by struct_mutex everywhere. And we can't change that because we want to capture the error_state without taking struct_mutex. Somehow I've gotten a bit overenthusiastic about all this and decided to start with that 5 year plan to clean up our locking around register access. After some more thinking this morning I've noticed that my trick is fundamentally racy. Before I embarass myself even more I'll drop this patch and just resend the read side locking fix so that the hangcheck won't kill my machine anymore. -Daniel -- Daniel Vetter Mail: daniel@ffwll.ch Mobile: +41 (0)79 365 57 48 ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH] drm/i915: protect force_wake_(get|put) with the gt_lock 2011-11-06 10:46 ` Daniel Vetter @ 2011-11-06 11:31 ` Daniel Vetter 2011-11-06 11:57 ` Chris Wilson 0 siblings, 1 reply; 10+ messages in thread From: Daniel Vetter @ 2011-11-06 11:31 UTC (permalink / raw) To: intel-gfx; +Cc: Daniel Vetter We don't have any read in a fastpath that needs forcewake, so I've decided to not care much about overhead. This prevents tests/gem_hangcheck_forcewake from i-g-t from killing my snb on recent kernels - something must have slightly changed the timings. v2: Drop the previous patch for the register writes. Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch> --- drivers/gpu/drm/i915/i915_debugfs.c | 8 ++++++-- drivers/gpu/drm/i915/i915_dma.c | 1 + drivers/gpu/drm/i915/i915_drv.c | 19 +++++++++++++------ drivers/gpu/drm/i915/i915_drv.h | 10 +++++++--- 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 5ba63ad..51b21eb 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1320,9 +1320,13 @@ static int i915_gen6_forcewake_count_info(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = dev->dev_private; + unsigned forcewake_count; - seq_printf(m, "forcewake count = %d\n", - atomic_read(&dev_priv->forcewake_count)); + spin_lock_irq(&dev_priv->gt_lock); + forcewake_count = dev_priv->forcewake_count; + spin_unlock_irq(&dev_priv->gt_lock); + + seq_printf(m, "forcewake count = %d\n", forcewake_count); return 0; } diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 2eac955..ab3a3fd 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -2031,6 +2031,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (!IS_I945G(dev) && !IS_I945GM(dev)) pci_enable_msi(dev->pdev); + spin_lock_init(&dev_priv->gt_lock); spin_lock_init(&dev_priv->irq_lock); spin_lock_init(&dev_priv->error_lock); spin_lock_init(&dev_priv->rps_lock); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 548e04b..bab4e08 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -351,11 +351,12 @@ static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv) */ void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv) { - WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); + unsigned long irqflags; - /* Forcewake is atomic in case we get in here without the lock */ - if (atomic_add_return(1, &dev_priv->forcewake_count) == 1) + spin_lock_irqsave(&dev_priv->gt_lock, irqflags); + if (dev_priv->forcewake_count++ == 0) __gen6_gt_force_wake_get(dev_priv); + spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); } static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv) @@ -369,10 +370,13 @@ static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv) */ void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv) { - WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); + unsigned long irqflags; - if (atomic_dec_and_test(&dev_priv->forcewake_count)) + spin_lock_irqsave(&dev_priv->gt_lock, irqflags); + + if (--dev_priv->forcewake_count == 0) __gen6_gt_force_wake_put(dev_priv); + spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); } void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv) @@ -603,6 +607,7 @@ int i915_reset(struct drm_device *dev, u8 flags) * need to */ bool need_display = true; + unsigned long irqflags; int ret; if (!i915_try_reset) @@ -621,8 +626,10 @@ int i915_reset(struct drm_device *dev, u8 flags) case 6: ret = gen6_do_reset(dev, flags); /* If reset with a user forcewake, try to restore */ - if (atomic_read(&dev_priv->forcewake_count)) + spin_lock_irqsave(&dev_priv->gt_lock, irqflags); + if (dev_priv->forcewake_count) __gen6_gt_force_wake_get(dev_priv); + spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); break; case 5: ret = ironlake_do_reset(dev, flags); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bd98fb3..25036f5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -276,7 +276,13 @@ typedef struct drm_i915_private { int relative_constants_mode; void __iomem *regs; - u32 gt_fifo_count; + /** gt_fifo_count and the subsequent register write are synchronized + * with dev->struct_mutex. */ + unsigned gt_fifo_count; + /** forcewake_count is protected by gt_lock */ + unsigned forcewake_count; + /** gt_lock is also taken in irq contexts. */ + struct spinlock gt_lock; struct intel_gmbus { struct i2c_adapter adapter; @@ -725,8 +731,6 @@ typedef struct drm_i915_private { struct drm_property *broadcast_rgb_property; struct drm_property *force_audio_property; - - atomic_t forcewake_count; } drm_i915_private_t; enum i915_cache_level { -- 1.7.6.4 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH] drm/i915: protect force_wake_(get|put) with the gt_lock 2011-11-06 11:31 ` [PATCH] drm/i915: protect force_wake_(get|put) with the gt_lock Daniel Vetter @ 2011-11-06 11:57 ` Chris Wilson 2011-11-06 12:35 ` Daniel Vetter 0 siblings, 1 reply; 10+ messages in thread From: Chris Wilson @ 2011-11-06 11:57 UTC (permalink / raw) To: intel-gfx; +Cc: Daniel Vetter On Sun, 6 Nov 2011 12:31:34 +0100, Daniel Vetter <daniel.vetter@ffwll.ch> wrote: > We don't have any read in a fastpath that needs forcewake, so I've > decided to not care much about overhead. > > This prevents tests/gem_hangcheck_forcewake from i-g-t from killing my > snb on recent kernels - something must have slightly changed the > timings. Almost there. You just haven't explained the rationale for *this* patch, which is that hangcheck needs to acquire the forcewake in order to read the registers and hangcheck must not take the struct_mutex (or else deadlock with wait_request and a hung GPU). So there is a choice here: introduce a new locking rule for forcewake, or use the existing struct_mutex inside hangcheck and therefore drop the mutex for wait_request. The first definitely feels safer than dropping struct_mutex on waits, and I haven't thought of any tangible benefits for doing so (other than concurrent clients might see an improvement). -Chris -- Chris Wilson, Intel Open Source Technology Centre ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH] drm/i915: protect force_wake_(get|put) with the gt_lock 2011-11-06 11:57 ` Chris Wilson @ 2011-11-06 12:35 ` Daniel Vetter 2011-11-06 21:01 ` Chris Wilson 0 siblings, 1 reply; 10+ messages in thread From: Daniel Vetter @ 2011-11-06 12:35 UTC (permalink / raw) To: intel-gfx; +Cc: Daniel Vetter The problem this patch solves is that the forcewake accounting necessary for register reads is protected by dev->struct_mutex. But the hangcheck and error_capture code need to access registers without grabbing this mutex because we hold it while waiting for the gpu. So a new lock is required. Because currently the error_state capture is called from the error irq handler and the hangcheck code runs from a timer, it needs to be an irqsafe spinlock (note that the registers used by the irq handler (neglecting the error handling part) only uses registers that don't need the forcewake dance). We could tune this down to a normal spinlock when we rework the error_state capture and hangcheck code to run from a workqueue. But we don't have any read in a fastpath that needs forcewake, so I've decided to not care much about overhead. This prevents tests/gem_hangcheck_forcewake from i-g-t from killing my snb on recent kernels - something must have slightly changed the timings. On previous kernels it only trigger a WARN about the broken locking. v2: Drop the previous patch for the register writes. v3: Improve the commit message per Chris Wilson's suggestions. Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch> --- drivers/gpu/drm/i915/i915_debugfs.c | 8 ++++++-- drivers/gpu/drm/i915/i915_dma.c | 1 + drivers/gpu/drm/i915/i915_drv.c | 19 +++++++++++++------ drivers/gpu/drm/i915/i915_drv.h | 10 +++++++--- 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 5ba63ad..51b21eb 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1320,9 +1320,13 @@ static int i915_gen6_forcewake_count_info(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = dev->dev_private; + unsigned forcewake_count; - seq_printf(m, "forcewake count = %d\n", - atomic_read(&dev_priv->forcewake_count)); + spin_lock_irq(&dev_priv->gt_lock); + forcewake_count = dev_priv->forcewake_count; + spin_unlock_irq(&dev_priv->gt_lock); + + seq_printf(m, "forcewake count = %d\n", forcewake_count); return 0; } diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 2eac955..ab3a3fd 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -2031,6 +2031,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (!IS_I945G(dev) && !IS_I945GM(dev)) pci_enable_msi(dev->pdev); + spin_lock_init(&dev_priv->gt_lock); spin_lock_init(&dev_priv->irq_lock); spin_lock_init(&dev_priv->error_lock); spin_lock_init(&dev_priv->rps_lock); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 548e04b..bab4e08 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -351,11 +351,12 @@ static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv) */ void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv) { - WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); + unsigned long irqflags; - /* Forcewake is atomic in case we get in here without the lock */ - if (atomic_add_return(1, &dev_priv->forcewake_count) == 1) + spin_lock_irqsave(&dev_priv->gt_lock, irqflags); + if (dev_priv->forcewake_count++ == 0) __gen6_gt_force_wake_get(dev_priv); + spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); } static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv) @@ -369,10 +370,13 @@ static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv) */ void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv) { - WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); + unsigned long irqflags; - if (atomic_dec_and_test(&dev_priv->forcewake_count)) + spin_lock_irqsave(&dev_priv->gt_lock, irqflags); + + if (--dev_priv->forcewake_count == 0) __gen6_gt_force_wake_put(dev_priv); + spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); } void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv) @@ -603,6 +607,7 @@ int i915_reset(struct drm_device *dev, u8 flags) * need to */ bool need_display = true; + unsigned long irqflags; int ret; if (!i915_try_reset) @@ -621,8 +626,10 @@ int i915_reset(struct drm_device *dev, u8 flags) case 6: ret = gen6_do_reset(dev, flags); /* If reset with a user forcewake, try to restore */ - if (atomic_read(&dev_priv->forcewake_count)) + spin_lock_irqsave(&dev_priv->gt_lock, irqflags); + if (dev_priv->forcewake_count) __gen6_gt_force_wake_get(dev_priv); + spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); break; case 5: ret = ironlake_do_reset(dev, flags); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bd98fb3..25036f5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -276,7 +276,13 @@ typedef struct drm_i915_private { int relative_constants_mode; void __iomem *regs; - u32 gt_fifo_count; + /** gt_fifo_count and the subsequent register write are synchronized + * with dev->struct_mutex. */ + unsigned gt_fifo_count; + /** forcewake_count is protected by gt_lock */ + unsigned forcewake_count; + /** gt_lock is also taken in irq contexts. */ + struct spinlock gt_lock; struct intel_gmbus { struct i2c_adapter adapter; @@ -725,8 +731,6 @@ typedef struct drm_i915_private { struct drm_property *broadcast_rgb_property; struct drm_property *force_audio_property; - - atomic_t forcewake_count; } drm_i915_private_t; enum i915_cache_level { -- 1.7.6.4 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH] drm/i915: protect force_wake_(get|put) with the gt_lock 2011-11-06 12:35 ` Daniel Vetter @ 2011-11-06 21:01 ` Chris Wilson 2011-11-06 22:06 ` Daniel Vetter 0 siblings, 1 reply; 10+ messages in thread From: Chris Wilson @ 2011-11-06 21:01 UTC (permalink / raw) To: intel-gfx; +Cc: Daniel Vetter On Sun, 6 Nov 2011 13:35:22 +0100, Daniel Vetter <daniel.vetter@ffwll.ch> wrote: > The problem this patch solves is that the forcewake accounting > necessary for register reads is protected by dev->struct_mutex. But the > hangcheck and error_capture code need to access registers without > grabbing this mutex because we hold it while waiting for the gpu. > So a new lock is required. Because currently the error_state capture > is called from the error irq handler and the hangcheck code runs from > a timer, it needs to be an irqsafe spinlock (note that the registers > used by the irq handler (neglecting the error handling part) only uses > registers that don't need the forcewake dance). > > We could tune this down to a normal spinlock when we rework the > error_state capture and hangcheck code to run from a workqueue. But > we don't have any read in a fastpath that needs forcewake, so I've > decided to not care much about overhead. > > This prevents tests/gem_hangcheck_forcewake from i-g-t from killing my > snb on recent kernels - something must have slightly changed the > timings. On previous kernels it only trigger a WARN about the broken > locking. > > v2: Drop the previous patch for the register writes. > > v3: Improve the commit message per Chris Wilson's suggestions. > > Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> One minor oddity left ;-) > --- > drivers/gpu/drm/i915/i915_debugfs.c | 8 ++++++-- > drivers/gpu/drm/i915/i915_dma.c | 1 + > drivers/gpu/drm/i915/i915_drv.c | 19 +++++++++++++------ > drivers/gpu/drm/i915/i915_drv.h | 10 +++++++--- > 4 files changed, 27 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 5ba63ad..51b21eb 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -1320,9 +1320,13 @@ static int i915_gen6_forcewake_count_info(struct seq_file *m, void *data) > struct drm_info_node *node = (struct drm_info_node *) m->private; > struct drm_device *dev = node->minor->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > + unsigned forcewake_count; > > - seq_printf(m, "forcewake count = %d\n", > - atomic_read(&dev_priv->forcewake_count)); > + spin_lock_irq(&dev_priv->gt_lock); > + forcewake_count = dev_priv->forcewake_count; > + spin_unlock_irq(&dev_priv->gt_lock); > + > + seq_printf(m, "forcewake count = %d\n", forcewake_count); Is it signed or unsigned? Who cares? ;-) -Chris -- Chris Wilson, Intel Open Source Technology Centre ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH] drm/i915: protect force_wake_(get|put) with the gt_lock 2011-11-06 21:01 ` Chris Wilson @ 2011-11-06 22:06 ` Daniel Vetter 0 siblings, 0 replies; 10+ messages in thread From: Daniel Vetter @ 2011-11-06 22:06 UTC (permalink / raw) To: intel-gfx; +Cc: Daniel Vetter The problem this patch solves is that the forcewake accounting necessary for register reads is protected by dev->struct_mutex. But the hangcheck and error_capture code need to access registers without grabbing this mutex because we hold it while waiting for the gpu. So a new lock is required. Because currently the error_state capture is called from the error irq handler and the hangcheck code runs from a timer, it needs to be an irqsafe spinlock (note that the registers used by the irq handler (neglecting the error handling part) only uses registers that don't need the forcewake dance). We could tune this down to a normal spinlock when we rework the error_state capture and hangcheck code to run from a workqueue. But we don't have any read in a fastpath that needs forcewake, so I've decided to not care much about overhead. This prevents tests/gem_hangcheck_forcewake from i-g-t from killing my snb on recent kernels - something must have slightly changed the timings. On previous kernels it only trigger a WARN about the broken locking. v2: Drop the previous patch for the register writes. v3: Improve the commit message per Chris Wilson's suggestions. Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_debugfs.c | 8 ++++++-- drivers/gpu/drm/i915/i915_dma.c | 1 + drivers/gpu/drm/i915/i915_drv.c | 19 +++++++++++++------ drivers/gpu/drm/i915/i915_drv.h | 10 +++++++--- 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 5ba63ad..397f76c 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1320,9 +1320,13 @@ static int i915_gen6_forcewake_count_info(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = dev->dev_private; + unsigned forcewake_count; - seq_printf(m, "forcewake count = %d\n", - atomic_read(&dev_priv->forcewake_count)); + spin_lock_irq(&dev_priv->gt_lock); + forcewake_count = dev_priv->forcewake_count; + spin_unlock_irq(&dev_priv->gt_lock); + + seq_printf(m, "forcewake count = %u\n", forcewake_count); return 0; } diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 2eac955..ab3a3fd 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -2031,6 +2031,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (!IS_I945G(dev) && !IS_I945GM(dev)) pci_enable_msi(dev->pdev); + spin_lock_init(&dev_priv->gt_lock); spin_lock_init(&dev_priv->irq_lock); spin_lock_init(&dev_priv->error_lock); spin_lock_init(&dev_priv->rps_lock); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 548e04b..bab4e08 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -351,11 +351,12 @@ static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv) */ void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv) { - WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); + unsigned long irqflags; - /* Forcewake is atomic in case we get in here without the lock */ - if (atomic_add_return(1, &dev_priv->forcewake_count) == 1) + spin_lock_irqsave(&dev_priv->gt_lock, irqflags); + if (dev_priv->forcewake_count++ == 0) __gen6_gt_force_wake_get(dev_priv); + spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); } static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv) @@ -369,10 +370,13 @@ static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv) */ void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv) { - WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); + unsigned long irqflags; - if (atomic_dec_and_test(&dev_priv->forcewake_count)) + spin_lock_irqsave(&dev_priv->gt_lock, irqflags); + + if (--dev_priv->forcewake_count == 0) __gen6_gt_force_wake_put(dev_priv); + spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); } void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv) @@ -603,6 +607,7 @@ int i915_reset(struct drm_device *dev, u8 flags) * need to */ bool need_display = true; + unsigned long irqflags; int ret; if (!i915_try_reset) @@ -621,8 +626,10 @@ int i915_reset(struct drm_device *dev, u8 flags) case 6: ret = gen6_do_reset(dev, flags); /* If reset with a user forcewake, try to restore */ - if (atomic_read(&dev_priv->forcewake_count)) + spin_lock_irqsave(&dev_priv->gt_lock, irqflags); + if (dev_priv->forcewake_count) __gen6_gt_force_wake_get(dev_priv); + spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); break; case 5: ret = ironlake_do_reset(dev, flags); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bd98fb3..25036f5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -276,7 +276,13 @@ typedef struct drm_i915_private { int relative_constants_mode; void __iomem *regs; - u32 gt_fifo_count; + /** gt_fifo_count and the subsequent register write are synchronized + * with dev->struct_mutex. */ + unsigned gt_fifo_count; + /** forcewake_count is protected by gt_lock */ + unsigned forcewake_count; + /** gt_lock is also taken in irq contexts. */ + struct spinlock gt_lock; struct intel_gmbus { struct i2c_adapter adapter; @@ -725,8 +731,6 @@ typedef struct drm_i915_private { struct drm_property *broadcast_rgb_property; struct drm_property *force_audio_property; - - atomic_t forcewake_count; } drm_i915_private_t; enum i915_cache_level { -- 1.7.6.3 ^ permalink raw reply related [flat|nested] 10+ messages in thread
end of thread, other threads:[~2011-11-06 22:07 UTC | newest] Thread overview: 10+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2011-11-06 0:31 [PATCH 1/2] drm/i915: properly lock gt_fifo_count Daniel Vetter 2011-11-06 0:31 ` [PATCH 2/2] drm/i915: protect force_wake_(get|put) with the gt_lock Daniel Vetter 2011-11-06 0:41 ` [PATCH 1/2] drm/i915: properly lock gt_fifo_count Daniel Vetter 2011-11-06 8:39 ` Chris Wilson 2011-11-06 10:46 ` Daniel Vetter 2011-11-06 11:31 ` [PATCH] drm/i915: protect force_wake_(get|put) with the gt_lock Daniel Vetter 2011-11-06 11:57 ` Chris Wilson 2011-11-06 12:35 ` Daniel Vetter 2011-11-06 21:01 ` Chris Wilson 2011-11-06 22:06 ` Daniel Vetter
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox