* [RFC] drm/i915: Reduce locking in command submission
@ 2014-12-11 15:41 Tvrtko Ursulin
2014-12-15 13:06 ` Daniel Vetter
2015-01-15 11:21 ` [PATCH] " Tvrtko Ursulin
0 siblings, 2 replies; 9+ messages in thread
From: Tvrtko Ursulin @ 2014-12-11 15:41 UTC (permalink / raw)
To: Intel-gfx
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Eliminate six needless spin lock/unlock pairs when writing ELSP.
RFC for now with some #define copy and paste.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Dave Gordon <david.s.gordon@intel.com>
---
drivers/gpu/drm/i915/intel_lrc.c | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a82020e..f2f4a28 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -276,6 +276,10 @@ static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj)
return desc;
}
+#define __raw_i915_read32(dev_priv__, reg__) readl((dev_priv__)->regs + (reg__))
+#define __raw_i915_write32(dev_priv__, reg__, val__) writel(val__, (dev_priv__)->regs + (reg__))
+#define __raw_posting_read(dev_priv__, reg__) (void)__raw_i915_read32(dev_priv__, reg__)
+
static void execlists_elsp_write(struct intel_engine_cs *ring,
struct drm_i915_gem_object *ctx_obj0,
struct drm_i915_gem_object *ctx_obj1)
@@ -323,19 +327,17 @@ static void execlists_elsp_write(struct intel_engine_cs *ring,
dev_priv->uncore.funcs.force_wake_get(dev_priv,
FORCEWAKE_ALL);
}
- spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
- I915_WRITE(RING_ELSP(ring), desc[1]);
- I915_WRITE(RING_ELSP(ring), desc[0]);
- I915_WRITE(RING_ELSP(ring), desc[3]);
+ __raw_i915_write32(dev_priv, RING_ELSP(ring), desc[1]);
+ __raw_i915_write32(dev_priv, RING_ELSP(ring), desc[0]);
+ __raw_i915_write32(dev_priv, RING_ELSP(ring), desc[3]);
/* The context is automatically loaded after the following */
- I915_WRITE(RING_ELSP(ring), desc[2]);
+ __raw_i915_write32(dev_priv, RING_ELSP(ring), desc[2]);
/* ELSP is a wo register, so use another nearby reg for posting instead */
- POSTING_READ(RING_EXECLIST_STATUS(ring));
+ __raw_posting_read(dev_priv, RING_EXECLIST_STATUS(ring));
/* Release Force Wakeup (see the big comment above). */
- spin_lock_irqsave(&dev_priv->uncore.lock, flags);
if (IS_CHERRYVIEW(dev) || INTEL_INFO(dev)->gen >= 9) {
if (--dev_priv->uncore.fw_rendercount == 0)
dev_priv->uncore.funcs.force_wake_put(dev_priv,
--
2.1.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [RFC] drm/i915: Reduce locking in command submission
2014-12-11 15:41 [RFC] drm/i915: Reduce locking in command submission Tvrtko Ursulin
@ 2014-12-15 13:06 ` Daniel Vetter
2014-12-16 13:34 ` Tvrtko Ursulin
2015-01-14 10:13 ` Tvrtko Ursulin
2015-01-15 11:21 ` [PATCH] " Tvrtko Ursulin
1 sibling, 2 replies; 9+ messages in thread
From: Daniel Vetter @ 2014-12-15 13:06 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: Intel-gfx
On Thu, Dec 11, 2014 at 03:41:34PM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Eliminate six needless spin lock/unlock pairs when writing ELSP.
>
> RFC for now with some #define copy and paste.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Dave Gordon <david.s.gordon@intel.com>
Yeah makes sense. I'm on the fence whether we should do an all-uppercase
conversion of the raw mmio macros, would be a nothc more consistent. And
some perf data for this patch would be good, too.
-Daniel
> ---
> drivers/gpu/drm/i915/intel_lrc.c | 16 +++++++++-------
> 1 file changed, 9 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index a82020e..f2f4a28 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -276,6 +276,10 @@ static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj)
> return desc;
> }
>
> +#define __raw_i915_read32(dev_priv__, reg__) readl((dev_priv__)->regs + (reg__))
> +#define __raw_i915_write32(dev_priv__, reg__, val__) writel(val__, (dev_priv__)->regs + (reg__))
> +#define __raw_posting_read(dev_priv__, reg__) (void)__raw_i915_read32(dev_priv__, reg__)
> +
> static void execlists_elsp_write(struct intel_engine_cs *ring,
> struct drm_i915_gem_object *ctx_obj0,
> struct drm_i915_gem_object *ctx_obj1)
> @@ -323,19 +327,17 @@ static void execlists_elsp_write(struct intel_engine_cs *ring,
> dev_priv->uncore.funcs.force_wake_get(dev_priv,
> FORCEWAKE_ALL);
> }
> - spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
>
> - I915_WRITE(RING_ELSP(ring), desc[1]);
> - I915_WRITE(RING_ELSP(ring), desc[0]);
> - I915_WRITE(RING_ELSP(ring), desc[3]);
> + __raw_i915_write32(dev_priv, RING_ELSP(ring), desc[1]);
> + __raw_i915_write32(dev_priv, RING_ELSP(ring), desc[0]);
> + __raw_i915_write32(dev_priv, RING_ELSP(ring), desc[3]);
> /* The context is automatically loaded after the following */
> - I915_WRITE(RING_ELSP(ring), desc[2]);
> + __raw_i915_write32(dev_priv, RING_ELSP(ring), desc[2]);
>
> /* ELSP is a wo register, so use another nearby reg for posting instead */
> - POSTING_READ(RING_EXECLIST_STATUS(ring));
> + __raw_posting_read(dev_priv, RING_EXECLIST_STATUS(ring));
>
> /* Release Force Wakeup (see the big comment above). */
> - spin_lock_irqsave(&dev_priv->uncore.lock, flags);
> if (IS_CHERRYVIEW(dev) || INTEL_INFO(dev)->gen >= 9) {
> if (--dev_priv->uncore.fw_rendercount == 0)
> dev_priv->uncore.funcs.force_wake_put(dev_priv,
> --
> 2.1.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFC] drm/i915: Reduce locking in command submission
2014-12-15 13:06 ` Daniel Vetter
@ 2014-12-16 13:34 ` Tvrtko Ursulin
2015-01-14 10:13 ` Tvrtko Ursulin
1 sibling, 0 replies; 9+ messages in thread
From: Tvrtko Ursulin @ 2014-12-16 13:34 UTC (permalink / raw)
To: Daniel Vetter; +Cc: Intel-gfx
On 12/15/2014 01:06 PM, Daniel Vetter wrote:
> On Thu, Dec 11, 2014 at 03:41:34PM +0000, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Eliminate six needless spin lock/unlock pairs when writing ELSP.
>>
>> RFC for now with some #define copy and paste.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Dave Gordon <david.s.gordon@intel.com>
>
> Yeah makes sense. I'm on the fence whether we should do an all-uppercase
> conversion of the raw mmio macros, would be a nothc more consistent. And
> some perf data for this patch would be good, too.
I know perf data would be good but I had no time to set up a suitable
platform for testing. This was more like a drive-by since it's not
pretty so it annoyed me. Will see if I can get back to this in the near
future.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFC] drm/i915: Reduce locking in command submission
2014-12-15 13:06 ` Daniel Vetter
2014-12-16 13:34 ` Tvrtko Ursulin
@ 2015-01-14 10:13 ` Tvrtko Ursulin
1 sibling, 0 replies; 9+ messages in thread
From: Tvrtko Ursulin @ 2015-01-14 10:13 UTC (permalink / raw)
To: Daniel Vetter; +Cc: Intel-gfx, Ben Widawsky
On 12/15/2014 01:06 PM, Daniel Vetter wrote:
> On Thu, Dec 11, 2014 at 03:41:34PM +0000, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Eliminate six needless spin lock/unlock pairs when writing ELSP.
>>
>> RFC for now with some #define copy and paste.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Dave Gordon <david.s.gordon@intel.com>
>
> Yeah makes sense. I'm on the fence whether we should do an all-uppercase
> conversion of the raw mmio macros, would be a nothc more consistent. And
> some perf data for this patch would be good, too.
With regards to perf data, Ben Widawsky was kind enough to give this
patch a spin on his perf test bed (CHV), on a range of OGL benchmarks.
Apparently only two results have "confidence t-score" > 95% (statistics
is not my area), bench_OglBatch4 and bench_OglDeferred which show 0.51%
and 0.73% gains respectively.
Looking just on the basis of those two, I'd say the patch is worth
cleaning up since it is a good gain for such a simple change.
Other results show anything from 4.29% slowdown (!*)
(bench_OglTexFilterAniso) to a 7.08% gain (bench_OglMultithreaded).
Average across all benchmarks is a 0.38% gain.
Thoughts?
Regards,
Tvrtko
* I can't really understand regressions for some tests?!
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH] drm/i915: Reduce locking in command submission
2014-12-11 15:41 [RFC] drm/i915: Reduce locking in command submission Tvrtko Ursulin
2014-12-15 13:06 ` Daniel Vetter
@ 2015-01-15 11:21 ` Tvrtko Ursulin
2015-01-15 16:54 ` Ben Widawsky
2015-01-16 0:19 ` shuang.he
1 sibling, 2 replies; 9+ messages in thread
From: Tvrtko Ursulin @ 2015-01-15 11:21 UTC (permalink / raw)
To: Intel-gfx; +Cc: Daniel Vetter, Ben Widawsky
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
This eliminates six needless spin lock/unlock pairs when writing out ELSP. Apart
from tidier code main benefit is between 0.51% and 0.73% speedup on some OGL
tests under CHV (bench_OglBatch4 bench_OglDeferred respectively).
Kindly benchmarked by Ben Widawsky.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Dave Gordon <david.s.gordon@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Ben Widawsky <ben@bwidawsk.net>
---
drivers/gpu/drm/i915/i915_drv.h | 15 +++++++++++++++
drivers/gpu/drm/i915/intel_lrc.c | 13 ++++++-------
drivers/gpu/drm/i915/intel_uncore.c | 14 --------------
3 files changed, 21 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 66f0c60..33d577a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3197,6 +3197,21 @@ int vlv_freq_opcode(struct drm_i915_private *dev_priv, int val);
#define POSTING_READ(reg) (void)I915_READ_NOTRACE(reg)
#define POSTING_READ16(reg) (void)I915_READ16_NOTRACE(reg)
+/* Raw MMIO access with no forcewake handling, use with care. */
+#define __raw_i915_read8(dev_priv__, reg__) readb((dev_priv__)->regs + (reg__))
+#define __raw_i915_write8(dev_priv__, reg__, val__) writeb(val__, (dev_priv__)->regs + (reg__))
+
+#define __raw_i915_read16(dev_priv__, reg__) readw((dev_priv__)->regs + (reg__))
+#define __raw_i915_write16(dev_priv__, reg__, val__) writew(val__, (dev_priv__)->regs + (reg__))
+
+#define __raw_i915_read32(dev_priv__, reg__) readl((dev_priv__)->regs + (reg__))
+#define __raw_i915_write32(dev_priv__, reg__, val__) writel(val__, (dev_priv__)->regs + (reg__))
+
+#define __raw_i915_read64(dev_priv__, reg__) readq((dev_priv__)->regs + (reg__))
+#define __raw_i915_write64(dev_priv__, reg__, val__) writeq(val__, (dev_priv__)->regs + (reg__))
+
+#define __raw_posting_read(dev_priv__, reg__) (void)__raw_i915_read32(dev_priv__, reg__)
+
/* "Broadcast RGB" property */
#define INTEL_BROADCAST_RGB_AUTO 0
#define INTEL_BROADCAST_RGB_FULL 1
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e405b61..e22b866 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -305,6 +305,7 @@ static void execlists_elsp_write(struct intel_engine_cs *ring,
* Instead, we do the runtime_pm_get/put when creating/destroying requests.
*/
spin_lock_irqsave(&dev_priv->uncore.lock, flags);
+
if (IS_CHERRYVIEW(dev) || INTEL_INFO(dev)->gen >= 9) {
if (dev_priv->uncore.fw_rendercount++ == 0)
dev_priv->uncore.funcs.force_wake_get(dev_priv,
@@ -322,19 +323,17 @@ static void execlists_elsp_write(struct intel_engine_cs *ring,
dev_priv->uncore.funcs.force_wake_get(dev_priv,
FORCEWAKE_ALL);
}
- spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
- I915_WRITE(RING_ELSP(ring), desc[1]);
- I915_WRITE(RING_ELSP(ring), desc[0]);
- I915_WRITE(RING_ELSP(ring), desc[3]);
+ __raw_i915_write32(dev_priv, RING_ELSP(ring), desc[1]);
+ __raw_i915_write32(dev_priv, RING_ELSP(ring), desc[0]);
+ __raw_i915_write32(dev_priv, RING_ELSP(ring), desc[3]);
/* The context is automatically loaded after the following */
- I915_WRITE(RING_ELSP(ring), desc[2]);
+ __raw_i915_write32(dev_priv, RING_ELSP(ring), desc[2]);
/* ELSP is a wo register, so use another nearby reg for posting instead */
- POSTING_READ(RING_EXECLIST_STATUS(ring));
+ __raw_posting_read(dev_priv, RING_EXECLIST_STATUS(ring));
/* Release Force Wakeup (see the big comment above). */
- spin_lock_irqsave(&dev_priv->uncore.lock, flags);
if (IS_CHERRYVIEW(dev) || INTEL_INFO(dev)->gen >= 9) {
if (--dev_priv->uncore.fw_rendercount == 0)
dev_priv->uncore.funcs.force_wake_put(dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index e9561de..9a31932 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -26,20 +26,6 @@
#define FORCEWAKE_ACK_TIMEOUT_MS 2
-#define __raw_i915_read8(dev_priv__, reg__) readb((dev_priv__)->regs + (reg__))
-#define __raw_i915_write8(dev_priv__, reg__, val__) writeb(val__, (dev_priv__)->regs + (reg__))
-
-#define __raw_i915_read16(dev_priv__, reg__) readw((dev_priv__)->regs + (reg__))
-#define __raw_i915_write16(dev_priv__, reg__, val__) writew(val__, (dev_priv__)->regs + (reg__))
-
-#define __raw_i915_read32(dev_priv__, reg__) readl((dev_priv__)->regs + (reg__))
-#define __raw_i915_write32(dev_priv__, reg__, val__) writel(val__, (dev_priv__)->regs + (reg__))
-
-#define __raw_i915_read64(dev_priv__, reg__) readq((dev_priv__)->regs + (reg__))
-#define __raw_i915_write64(dev_priv__, reg__, val__) writeq(val__, (dev_priv__)->regs + (reg__))
-
-#define __raw_posting_read(dev_priv__, reg__) (void)__raw_i915_read32(dev_priv__, reg__)
-
static void
assert_device_not_suspended(struct drm_i915_private *dev_priv)
{
--
2.2.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/i915: Reduce locking in command submission
2015-01-15 11:21 ` [PATCH] " Tvrtko Ursulin
@ 2015-01-15 16:54 ` Ben Widawsky
2015-01-15 17:05 ` Tvrtko Ursulin
2015-01-16 0:19 ` shuang.he
1 sibling, 1 reply; 9+ messages in thread
From: Ben Widawsky @ 2015-01-15 16:54 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: Daniel Vetter, Intel-gfx
On Thu, Jan 15, 2015 at 11:21:30AM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> This eliminates six needless spin lock/unlock pairs when writing out ELSP. Apart
> from tidier code main benefit is between 0.51% and 0.73% speedup on some OGL
> tests under CHV (bench_OglBatch4 bench_OglDeferred respectively).
With 95% confidence t-test on n=5
>
> Kindly benchmarked by Ben Widawsky.
FWIW, as I mentioned on IRC, I think the reduction of the unnecessary forcewake
(someone should fix the shadow register list) is probably more beneficial than
removing the spin on an uncontested lock. I was tempted to try that myself, but
I didn't have time or much interest since your patch accomplishes the same
thing.
The sucky thing, which I actually care about since I've been doing a lot of
profiling, is the raw MMIO doesn't show up with our i915 trace functions. It's
obtainable still, but then I get a mess of other stuff I don't want.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Dave Gordon <david.s.gordon@intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Ben Widawsky <ben@bwidawsk.net>
[snip]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/i915: Reduce locking in command submission
2015-01-15 16:54 ` Ben Widawsky
@ 2015-01-15 17:05 ` Tvrtko Ursulin
2015-01-15 23:42 ` Ben Widawsky
0 siblings, 1 reply; 9+ messages in thread
From: Tvrtko Ursulin @ 2015-01-15 17:05 UTC (permalink / raw)
To: Ben Widawsky; +Cc: Daniel Vetter, Intel-gfx
On 01/15/2015 04:54 PM, Ben Widawsky wrote:
> On Thu, Jan 15, 2015 at 11:21:30AM +0000, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> This eliminates six needless spin lock/unlock pairs when writing out ELSP. Apart
>> from tidier code main benefit is between 0.51% and 0.73% speedup on some OGL
>> tests under CHV (bench_OglBatch4 bench_OglDeferred respectively).
>
> With 95% confidence t-test on n=5
>
>>
>> Kindly benchmarked by Ben Widawsky.
>
> FWIW, as I mentioned on IRC, I think the reduction of the unnecessary forcewake
> (someone should fix the shadow register list) is probably more beneficial than
> removing the spin on an uncontested lock. I was tempted to try that myself, but
> I didn't have time or much interest since your patch accomplishes the same
> thing.
I missed that IRC discussion, but I don't think it was doing forcewakes
since the outer block in execlists_elsp_write bumps the counters which
made I915_WRITE & co skip them.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/i915: Reduce locking in command submission
2015-01-15 17:05 ` Tvrtko Ursulin
@ 2015-01-15 23:42 ` Ben Widawsky
0 siblings, 0 replies; 9+ messages in thread
From: Ben Widawsky @ 2015-01-15 23:42 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: Daniel Vetter, Intel-gfx
On Thu, Jan 15, 2015 at 05:05:30PM +0000, Tvrtko Ursulin wrote:
>
> On 01/15/2015 04:54 PM, Ben Widawsky wrote:
> >On Thu, Jan 15, 2015 at 11:21:30AM +0000, Tvrtko Ursulin wrote:
> >>From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >>This eliminates six needless spin lock/unlock pairs when writing out ELSP. Apart
> >>from tidier code main benefit is between 0.51% and 0.73% speedup on some OGL
> >>tests under CHV (bench_OglBatch4 bench_OglDeferred respectively).
> >
> >With 95% confidence t-test on n=5
> >
> >>
> >>Kindly benchmarked by Ben Widawsky.
> >
> >FWIW, as I mentioned on IRC, I think the reduction of the unnecessary forcewake
> >(someone should fix the shadow register list) is probably more beneficial than
> >removing the spin on an uncontested lock. I was tempted to try that myself, but
> >I didn't have time or much interest since your patch accomplishes the same
> >thing.
>
> I missed that IRC discussion, but I don't think it was doing forcewakes
> since the outer block in execlists_elsp_write bumps the counters which made
> I915_WRITE & co skip them.
>
> Regards,
>
> Tvrtko
I didn't check the locking but it looks like it could actually get decremented
once the spinlock is released. Probably never happens, but I think it's
possible.
I completely missed that block somehow. I think my eyes skipped over it because
how could getting forcewake take like 10+ lines :D
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/i915: Reduce locking in command submission
2015-01-15 11:21 ` [PATCH] " Tvrtko Ursulin
2015-01-15 16:54 ` Ben Widawsky
@ 2015-01-16 0:19 ` shuang.he
1 sibling, 0 replies; 9+ messages in thread
From: shuang.he @ 2015-01-16 0:19 UTC (permalink / raw)
To: shuang.he, ethan.gao, intel-gfx, tvrtko.ursulin
Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 5585
-------------------------------------Summary-------------------------------------
Platform Delta drm-intel-nightly Series Applied
PNV 353/353 353/353
ILK -1 200/200 199/200
SNB 400/422 400/422
IVB 487/487 487/487
BYT 296/296 296/296
HSW +22-1 486/508 507/508
BDW -1 402/402 401/402
-------------------------------------Detailed-------------------------------------
Platform Test drm-intel-nightly Series Applied
*ILK igt_gem_concurrent_blit_gtt-bcs-overwrite-source PASS(2, M37) NO_RESULT(1, M37)
HSW igt_kms_cursor_crc_cursor-size-change NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
HSW igt_kms_fence_pin_leak NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
HSW igt_kms_flip_event_leak NSPT(2, M40)PASS(3, M20) PASS(1, M20)
HSW igt_kms_flip_flip-vs-dpms-off-vs-modeset DMESG_WARN(2, M20M40)PASS(1, M40) DMESG_WARN(1, M20)
HSW igt_kms_mmio_vs_cs_flip_setcrtc_vs_cs_flip NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
HSW igt_kms_mmio_vs_cs_flip_setplane_vs_cs_flip NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
HSW igt_pm_lpsp_non-edp NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
HSW igt_pm_rpm_cursor NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
HSW igt_pm_rpm_cursor-dpms NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
HSW igt_pm_rpm_dpms-mode-unset-non-lpsp NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
HSW igt_pm_rpm_dpms-non-lpsp NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
HSW igt_pm_rpm_drm-resources-equal NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
HSW igt_pm_rpm_fences NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
HSW igt_pm_rpm_fences-dpms NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
HSW igt_pm_rpm_gem-execbuf NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
HSW igt_pm_rpm_gem-mmap-cpu NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
HSW igt_pm_rpm_gem-mmap-gtt NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
HSW igt_pm_rpm_gem-pread NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
HSW igt_pm_rpm_i2c NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
HSW igt_pm_rpm_modeset-non-lpsp NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
HSW igt_pm_rpm_modeset-non-lpsp-stress-no-wait NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
HSW igt_pm_rpm_pci-d3-state NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
HSW igt_pm_rpm_rte NSPT(1, M40)PASS(4, M20M40) PASS(1, M20)
*BDW igt_gem_concurrent_blit_gtt-rcs-early-read-interruptible PASS(7, M30M28) DMESG_WARN(1, M30)
Note: You need to pay more attention to line start with '*'
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2015-01-16 0:19 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-12-11 15:41 [RFC] drm/i915: Reduce locking in command submission Tvrtko Ursulin
2014-12-15 13:06 ` Daniel Vetter
2014-12-16 13:34 ` Tvrtko Ursulin
2015-01-14 10:13 ` Tvrtko Ursulin
2015-01-15 11:21 ` [PATCH] " Tvrtko Ursulin
2015-01-15 16:54 ` Ben Widawsky
2015-01-15 17:05 ` Tvrtko Ursulin
2015-01-15 23:42 ` Ben Widawsky
2015-01-16 0:19 ` shuang.he
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox