* [PATCH] drm/i915: blitter ring workaround for gen6
@ 2011-10-03 1:27 Ben Widawsky
2011-10-03 4:20 ` Keith Packard
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: Ben Widawsky @ 2011-10-03 1:27 UTC (permalink / raw)
To: intel-gfx; +Cc: Ben Widawsky
Found this through doc inspection. I don't have a failing test case that this
fixes, but the docs specify we need to do it in addition to the A0 workaround.
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
drivers/gpu/drm/i915/intel_ringbuffer.c | 58 +++++++++++++++++++++++++++++--
1 files changed, 55 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 0e99589..163f734 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1233,13 +1233,18 @@ blt_ring_put_irq(struct intel_ring_buffer *ring)
}
+/* SNB C0 and D0 need ever more workarounds */
+#define NEED_MORE_BLT_WORKAROUND(dev) \
+ (dev->pdev->revision == 9 || dev->pdev->revision == 10)
+
/* Workaround for some stepping of SNB,
* each time when BLT engine ring tail moved,
* the first command in the ring to be parsed
* should be MI_BATCH_BUFFER_START
*/
#define NEED_BLT_WORKAROUND(dev) \
- (IS_GEN6(dev) && (dev->pdev->revision < 8))
+ ((IS_GEN6(dev) && (dev->pdev->revision < 8)) || \
+ (IS_GEN6(dev) && NEED_MORE_BLT_WORKAROUND(dev)))
static inline struct drm_i915_gem_object *
to_blt_workaround(struct intel_ring_buffer *ring)
@@ -1286,10 +1291,20 @@ static int blt_ring_begin(struct intel_ring_buffer *ring,
int num_dwords)
{
if (ring->private) {
- int ret = intel_ring_begin(ring, num_dwords+2);
+ int ret;
+ if (NEED_MORE_BLT_WORKAROUND(ring->dev))
+ num_dwords += 46;
+ else
+ num_dwords += 2;
+ ret = intel_ring_begin(ring, num_dwords);
if (ret)
return ret;
+ if (NEED_MORE_BLT_WORKAROUND(ring->dev)) {
+ int i = 0;
+ for (i = 0; i < 32; i++)
+ intel_ring_emit(ring, MI_NOOP);
+ }
intel_ring_emit(ring, MI_BATCH_BUFFER_START);
intel_ring_emit(ring, to_blt_workaround(ring)->gtt_offset);
@@ -1298,6 +1313,25 @@ static int blt_ring_begin(struct intel_ring_buffer *ring,
return intel_ring_begin(ring, 4);
}
+static void blt_ring_begin2(struct intel_ring_buffer *ring)
+{
+ if (!ring->private)
+ return;
+
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, 0x2209c);
+ intel_ring_emit(ring, 0x20002);
+ intel_ring_emit(ring, MI_FLUSH_DW);
+ intel_ring_emit(ring, 0x2209c);
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, 0x2209c);
+ intel_ring_emit(ring, 0x20000);
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, 0x2209c);
+ intel_ring_emit(ring, 0x20000);
+ intel_ring_emit(ring, MI_NOOP);
+}
+
static int blt_ring_flush(struct intel_ring_buffer *ring,
u32 invalidate, u32 flush)
{
@@ -1315,10 +1349,28 @@ static int blt_ring_flush(struct intel_ring_buffer *ring,
intel_ring_emit(ring, 0);
intel_ring_emit(ring, 0);
intel_ring_emit(ring, MI_NOOP);
+ blt_ring_begin2(ring);
intel_ring_advance(ring);
return 0;
}
+static int
+blt_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
+ u32 offset, u32 len)
+{
+ int ret;
+
+ ret = blt_ring_begin(ring, 4);
+ if (ret)
+ return ret;
+ intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
+ /* bit0-7 is the length on GEN6+ */
+ intel_ring_emit(ring, offset);
+ blt_ring_begin2(ring);
+ intel_ring_advance(ring);
+
+ return 0;
+}
static void blt_ring_cleanup(struct intel_ring_buffer *ring)
{
if (!ring->private)
@@ -1341,7 +1393,7 @@ static const struct intel_ring_buffer gen6_blt_ring = {
.get_seqno = ring_get_seqno,
.irq_get = blt_ring_get_irq,
.irq_put = blt_ring_put_irq,
- .dispatch_execbuffer = gen6_ring_dispatch_execbuffer,
+ .dispatch_execbuffer = blt_ring_dispatch_execbuffer,
.cleanup = blt_ring_cleanup,
.sync_to = gen6_blt_ring_sync_to,
.semaphore_register = {MI_SEMAPHORE_SYNC_BR,
--
1.7.6.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH] drm/i915: blitter ring workaround for gen6
2011-10-03 1:27 [PATCH] drm/i915: blitter ring workaround for gen6 Ben Widawsky
@ 2011-10-03 4:20 ` Keith Packard
2011-10-03 4:38 ` Ben Widawsky
2011-10-03 7:00 ` Chris Wilson
2011-10-03 7:41 ` Daniel Vetter
2 siblings, 1 reply; 8+ messages in thread
From: Keith Packard @ 2011-10-03 4:20 UTC (permalink / raw)
To: intel-gfx; +Cc: Ben Widawsky
[-- Attachment #1.1: Type: text/plain, Size: 778 bytes --]
On Sun, 2 Oct 2011 18:27:12 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> +static void blt_ring_begin2(struct intel_ring_buffer *ring)
> +{
> + if (!ring->private)
> + return;
> +
> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> + intel_ring_emit(ring, 0x2209c);
> + intel_ring_emit(ring, 0x20002);
> + intel_ring_emit(ring, MI_FLUSH_DW);
> + intel_ring_emit(ring, 0x2209c);
> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> + intel_ring_emit(ring, 0x2209c);
> + intel_ring_emit(ring, 0x20000);
> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> + intel_ring_emit(ring, 0x2209c);
> + intel_ring_emit(ring, 0x20000);
> + intel_ring_emit(ring, MI_NOOP);
> +}
> +
Surely you can provide symbolic names here.
--
keith.packard@intel.com
[-- Attachment #1.2: Type: application/pgp-signature, Size: 189 bytes --]
[-- Attachment #2: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] drm/i915: blitter ring workaround for gen6
2011-10-03 4:20 ` Keith Packard
@ 2011-10-03 4:38 ` Ben Widawsky
0 siblings, 0 replies; 8+ messages in thread
From: Ben Widawsky @ 2011-10-03 4:38 UTC (permalink / raw)
To: Keith Packard; +Cc: intel-gfx
[-- Attachment #1.1: Type: text/plain, Size: 1041 bytes --]
On Sun, Oct 02, 2011 at 09:20:32PM -0700, Keith Packard wrote:
> On Sun, 2 Oct 2011 18:27:12 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
>
> > +static void blt_ring_begin2(struct intel_ring_buffer *ring)
> > +{
> > + if (!ring->private)
> > + return;
> > +
> > + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> > + intel_ring_emit(ring, 0x2209c);
> > + intel_ring_emit(ring, 0x20002);
> > + intel_ring_emit(ring, MI_FLUSH_DW);
> > + intel_ring_emit(ring, 0x2209c);
> > + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> > + intel_ring_emit(ring, 0x2209c);
> > + intel_ring_emit(ring, 0x20000);
> > + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> > + intel_ring_emit(ring, 0x2209c);
> > + intel_ring_emit(ring, 0x20000);
> > + intel_ring_emit(ring, MI_NOOP);
> > +}
> > +
>
> Surely you can provide symbolic names here.
>
Of course. I lazily just copied the docs hoping someone would come up with some
reason not to bother going further.
Assuming nobody has any complains, I'll fix it up.
Ben
[-- Attachment #1.2: Type: application/pgp-signature, Size: 490 bytes --]
[-- Attachment #2: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] drm/i915: blitter ring workaround for gen6
2011-10-03 1:27 [PATCH] drm/i915: blitter ring workaround for gen6 Ben Widawsky
2011-10-03 4:20 ` Keith Packard
@ 2011-10-03 7:00 ` Chris Wilson
2011-10-03 7:57 ` Ben Widawsky
2011-10-03 8:55 ` Ben Widawsky
2011-10-03 7:41 ` Daniel Vetter
2 siblings, 2 replies; 8+ messages in thread
From: Chris Wilson @ 2011-10-03 7:00 UTC (permalink / raw)
To: intel-gfx; +Cc: Ben Widawsky
On Sun, 2 Oct 2011 18:27:12 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> Found this through doc inspection. I don't have a failing test case that this
> fixes, but the docs specify we need to do it in addition to the A0 workaround.
Can you try running /usr/lib/xscreenaver/xslip with and without this
patch? That reliably hangs my machine and various reporters, with some
being as unlucky to hit it during x11perf -copywinwin. I'll try and do so
before my flight...
Oh and rebase it against keithp/drm-intel-fixes ;-).
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] drm/i915: blitter ring workaround for gen6
2011-10-03 1:27 [PATCH] drm/i915: blitter ring workaround for gen6 Ben Widawsky
2011-10-03 4:20 ` Keith Packard
2011-10-03 7:00 ` Chris Wilson
@ 2011-10-03 7:41 ` Daniel Vetter
2011-10-03 7:59 ` Ben Widawsky
2 siblings, 1 reply; 8+ messages in thread
From: Daniel Vetter @ 2011-10-03 7:41 UTC (permalink / raw)
To: Ben Widawsky; +Cc: intel-gfx
On Sun, Oct 02, 2011 at 06:27:12PM -0700, Ben Widawsky wrote:
> Found this through doc inspection. I don't have a failing test case that this
> fixes, but the docs specify we need to do it in addition to the A0 workaround.
Can you confirm that the A0 workaround is really needed in addition to
this new work-around on production-hw? Because I have patches lying around
to kill that mess (no need don't carry around workarounds that only apply
to pre-production hw after hw bringup) and your patch seems to extend that
A0 workaround to all chips.
-Daniel
--
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH] drm/i915: blitter ring workaround for gen6
2011-10-03 7:00 ` Chris Wilson
@ 2011-10-03 7:57 ` Ben Widawsky
2011-10-03 8:55 ` Ben Widawsky
1 sibling, 0 replies; 8+ messages in thread
From: Ben Widawsky @ 2011-10-03 7:57 UTC (permalink / raw)
To: Chris Wilson; +Cc: Ben Widawsky, intel-gfx
I found this workaround in the docs while trying to debug a certain test
case I stumbled upon. The patch is in flux as I try to get it to be
useful. Both my test case and xscreensaver slip have similar scenarios
which I'm hoping some variation of this patch will fix.
Again, this doesn't fix anything yet.
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
drivers/gpu/drm/i915/i915_reg.h | 2 +
drivers/gpu/drm/i915/intel_ringbuffer.c | 84 ++++++++++++++++++++++++++++--
2 files changed, 80 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 4fd736e..44f72bd 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -543,6 +543,8 @@
#define GEN6_RENDER_USER_INTERRUPT (1 << 0)
#define GEN6_BLITTER_HWSTAM 0x22098
+#define GEN6_BCS_MI_MODE 0x2209c
+#define GEN6_BCS_BYPASS_FENCE (1 << 1)
#define GEN6_BLITTER_IMR 0x220a8
#define GEN6_BLITTER_MI_FLUSH_DW_NOTIFY_INTERRUPT (1 << 26)
#define GEN6_BLITTER_COMMAND_PARSER_MASTER_ERROR (1 << 25)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 0e99589..3bf2dea 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -34,6 +34,9 @@
#include "i915_trace.h"
#include "intel_drv.h"
+static int blt_ring_begin(struct intel_ring_buffer *ring, int num_dwords);
+static void blt_ring_begin2(struct intel_ring_buffer *ring);
+
static inline int ring_space(struct intel_ring_buffer *ring)
{
int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
@@ -344,7 +347,10 @@ gen6_add_request(struct intel_ring_buffer *ring,
u32 mbox2_reg;
int ret;
- ret = intel_ring_begin(ring, 10);
+ if (ring->id == RING_BLT)
+ ret = blt_ring_begin(ring, 10);
+ else
+ ret = intel_ring_begin(ring, 10);
if (ret)
return ret;
@@ -359,6 +365,8 @@ gen6_add_request(struct intel_ring_buffer *ring,
intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
intel_ring_emit(ring, *seqno);
intel_ring_emit(ring, MI_USER_INTERRUPT);
+// if (ring->id == RING_BLT)
+// blt_ring_begin2(ring);
intel_ring_advance(ring);
return 0;
@@ -382,7 +390,10 @@ intel_ring_sync(struct intel_ring_buffer *waiter,
MI_SEMAPHORE_COMPARE |
MI_SEMAPHORE_REGISTER;
- ret = intel_ring_begin(waiter, 4);
+ if (waiter->id == RING_BLT)
+ ret = blt_ring_begin(waiter, 4);
+ else
+ ret = intel_ring_begin(waiter, 4);
if (ret)
return ret;
@@ -390,6 +401,8 @@ intel_ring_sync(struct intel_ring_buffer *waiter,
intel_ring_emit(waiter, seqno);
intel_ring_emit(waiter, 0);
intel_ring_emit(waiter, MI_NOOP);
+// if (waiter->id == RING_BLT)
+// blt_ring_begin2(waiter);
intel_ring_advance(waiter);
return 0;
@@ -1233,13 +1246,19 @@ blt_ring_put_irq(struct intel_ring_buffer *ring)
}
+/* SNB C0 and D0 need ever more workarounds */
+#define NEED_MORE_BLT_WORKAROUND(dev) \
+ (dev->pdev->revision == 9 || dev->pdev->revision == 10)
+
+
/* Workaround for some stepping of SNB,
* each time when BLT engine ring tail moved,
* the first command in the ring to be parsed
* should be MI_BATCH_BUFFER_START
*/
#define NEED_BLT_WORKAROUND(dev) \
- (IS_GEN6(dev) && (dev->pdev->revision < 8))
+ ((IS_GEN6(dev) && (dev->pdev->revision < 8)) || \
+ (IS_GEN6(dev) && NEED_MORE_BLT_WORKAROUND(dev)))
static inline struct drm_i915_gem_object *
to_blt_workaround(struct intel_ring_buffer *ring)
@@ -1286,18 +1305,53 @@ static int blt_ring_begin(struct intel_ring_buffer *ring,
int num_dwords)
{
if (ring->private) {
- int ret = intel_ring_begin(ring, num_dwords+2);
+ int ret;
+ if (NEED_MORE_BLT_WORKAROUND(ring->dev))
+ num_dwords += 48;
+ else
+ num_dwords += 2;
+ ret = intel_ring_begin(ring, num_dwords);
if (ret)
return ret;
+ if (NEED_MORE_BLT_WORKAROUND(ring->dev)) {
+ int i = 0;
+ for (i = 0; i < 32; i++)
+ intel_ring_emit(ring, MI_NOOP);
+ }
intel_ring_emit(ring, MI_BATCH_BUFFER_START);
intel_ring_emit(ring, to_blt_workaround(ring)->gtt_offset);
-
+ blt_ring_begin2(ring);
return 0;
} else
return intel_ring_begin(ring, 4);
}
+static void blt_ring_begin2(struct intel_ring_buffer *ring)
+{
+ if (!ring->private)
+ return;
+
+ if (!NEED_MORE_BLT_WORKAROUND(ring->dev))
+ return;
+
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, GEN6_BCS_MI_MODE);
+ intel_ring_emit(ring, GEN6_BCS_BYPASS_FENCE << 16 |
+ GEN6_BCS_BYPASS_FENCE);
+ intel_ring_emit(ring, MI_FLUSH_DW);
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, GEN6_BCS_MI_MODE);
+ intel_ring_emit(ring, GEN6_BCS_BYPASS_FENCE << 16);
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, GEN6_BCS_MI_MODE);
+ intel_ring_emit(ring, GEN6_BCS_BYPASS_FENCE << 16);
+ intel_ring_emit(ring, MI_NOOP);
+}
+
static int blt_ring_flush(struct intel_ring_buffer *ring,
u32 invalidate, u32 flush)
{
@@ -1315,10 +1369,28 @@ static int blt_ring_flush(struct intel_ring_buffer *ring,
intel_ring_emit(ring, 0);
intel_ring_emit(ring, 0);
intel_ring_emit(ring, MI_NOOP);
+ //blt_ring_begin2(ring);
intel_ring_advance(ring);
return 0;
}
+static int
+blt_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
+ u32 offset, u32 len)
+{
+ int ret;
+
+ ret = blt_ring_begin(ring, 4);
+ if (ret)
+ return ret;
+ intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
+ /* bit0-7 is the length on GEN6+ */
+ intel_ring_emit(ring, offset);
+ //blt_ring_begin2(ring);
+ intel_ring_advance(ring);
+
+ return 0;
+}
static void blt_ring_cleanup(struct intel_ring_buffer *ring)
{
if (!ring->private)
@@ -1341,7 +1413,7 @@ static const struct intel_ring_buffer gen6_blt_ring = {
.get_seqno = ring_get_seqno,
.irq_get = blt_ring_get_irq,
.irq_put = blt_ring_put_irq,
- .dispatch_execbuffer = gen6_ring_dispatch_execbuffer,
+ .dispatch_execbuffer = blt_ring_dispatch_execbuffer,
.cleanup = blt_ring_cleanup,
.sync_to = gen6_blt_ring_sync_to,
.semaphore_register = {MI_SEMAPHORE_SYNC_BR,
--
1.7.6.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH] drm/i915: blitter ring workaround for gen6
2011-10-03 7:41 ` Daniel Vetter
@ 2011-10-03 7:59 ` Ben Widawsky
0 siblings, 0 replies; 8+ messages in thread
From: Ben Widawsky @ 2011-10-03 7:59 UTC (permalink / raw)
To: Daniel Vetter; +Cc: intel-gfx
On Mon, 3 Oct 2011 09:41:28 +0200
Daniel Vetter <daniel@ffwll.ch> wrote:
> On Sun, Oct 02, 2011 at 06:27:12PM -0700, Ben Widawsky wrote:
> > Found this through doc inspection. I don't have a failing test case that this
> > fixes, but the docs specify we need to do it in addition to the A0 workaround.
>
> Can you confirm that the A0 workaround is really needed in addition to
> this new work-around on production-hw? Because I have patches lying around
> to kill that mess (no need don't carry around workarounds that only apply
> to pre-production hw after hw bringup) and your patch seems to extend that
> A0 workaround to all chips.
> -Daniel
It appears it is needed assuming I can make the patch do something
useful. The docs are so terribly vague about what this actually fixes,
or how it should be implemented.
By the way, it's not all chips, again once I get this to do something
useful, I'll clean things up.
Ben
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH] drm/i915: blitter ring workaround for gen6
2011-10-03 7:00 ` Chris Wilson
2011-10-03 7:57 ` Ben Widawsky
@ 2011-10-03 8:55 ` Ben Widawsky
1 sibling, 0 replies; 8+ messages in thread
From: Ben Widawsky @ 2011-10-03 8:55 UTC (permalink / raw)
To: Chris Wilson; +Cc: Ben Widawsky, intel-gfx
I found this workaround in the docs while trying to debug a certain test
case I stumbled upon. The patch is in flux as I try to get it to be
useful. Both my test case and xscreensaver slip have similar scenarios
which I'm hoping some variation of this patch will fix.
Again, this doesn't fix anything yet.
v2: bugfixes + cleanups... still doesn't help, still WIP
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
drivers/gpu/drm/i915/i915_reg.h | 2 +
drivers/gpu/drm/i915/intel_ringbuffer.c | 153 ++++++++++++++++++++++++++-----
2 files changed, 133 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 4fd736e..44f72bd 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -543,6 +543,8 @@
#define GEN6_RENDER_USER_INTERRUPT (1 << 0)
#define GEN6_BLITTER_HWSTAM 0x22098
+#define GEN6_BCS_MI_MODE 0x2209c
+#define GEN6_BCS_BYPASS_FENCE (1 << 1)
#define GEN6_BLITTER_IMR 0x220a8
#define GEN6_BLITTER_MI_FLUSH_DW_NOTIFY_INTERRUPT (1 << 26)
#define GEN6_BLITTER_COMMAND_PARSER_MASTER_ERROR (1 << 25)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 0e99589..4c712f0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -34,6 +34,31 @@
#include "i915_trace.h"
#include "intel_drv.h"
+static int blt_ring_begin(struct intel_ring_buffer *ring, int num_dwords);
+static void blt_ring_begin2(struct intel_ring_buffer *ring);
+
+#define BLT_WA_AFTER_WORK 1
+
+/* SNB C0 and D0 need ever more workarounds */
+#define NEED_MORE_BLT_WORKAROUND(dev) \
+ (dev->pdev->revision == 9 || dev->pdev->revision == 10)
+
+
+/* Workaround for some stepping of SNB,
+ * each time when BLT engine ring tail moved,
+ * the first command in the ring to be parsed
+ * should be MI_BATCH_BUFFER_START
+ */
+#define NEED_BLT_WORKAROUND(dev) \
+ ((IS_GEN6(dev) && (dev->pdev->revision < 8)) || \
+ (IS_GEN6(dev) && NEED_MORE_BLT_WORKAROUND(dev)))
+
+static inline struct drm_i915_gem_object *
+to_blt_workaround(struct intel_ring_buffer *ring)
+{
+ return ring->private;
+}
+
static inline int ring_space(struct intel_ring_buffer *ring)
{
int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
@@ -344,7 +369,10 @@ gen6_add_request(struct intel_ring_buffer *ring,
u32 mbox2_reg;
int ret;
- ret = intel_ring_begin(ring, 10);
+ if (ring->id == RING_BLT)
+ ret = blt_ring_begin(ring, 10);
+ else
+ ret = intel_ring_begin(ring, 10);
if (ret)
return ret;
@@ -359,6 +387,10 @@ gen6_add_request(struct intel_ring_buffer *ring,
intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
intel_ring_emit(ring, *seqno);
intel_ring_emit(ring, MI_USER_INTERRUPT);
+#if (BLT_WA_AFTER_WORK == 1)
+ if (ring->id == RING_BLT)
+ blt_ring_begin2(ring);
+#endif
intel_ring_advance(ring);
return 0;
@@ -382,7 +414,10 @@ intel_ring_sync(struct intel_ring_buffer *waiter,
MI_SEMAPHORE_COMPARE |
MI_SEMAPHORE_REGISTER;
- ret = intel_ring_begin(waiter, 4);
+ if (waiter->id == RING_BLT)
+ ret = blt_ring_begin(waiter, 4);
+ else
+ ret = intel_ring_begin(waiter, 4);
if (ret)
return ret;
@@ -390,6 +425,10 @@ intel_ring_sync(struct intel_ring_buffer *waiter,
intel_ring_emit(waiter, seqno);
intel_ring_emit(waiter, 0);
intel_ring_emit(waiter, MI_NOOP);
+#if (BLT_WA_AFTER_WORK == 1)
+ if (waiter->id == RING_BLT)
+ blt_ring_begin2(waiter);
+#endif
intel_ring_advance(waiter);
return 0;
@@ -967,6 +1006,10 @@ static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
{
unsigned int *virt;
int rem = ring->size - ring->tail;
+ if (ring->id == RING_BLT)
+ rem-=(128+64); /* 32 noops, 14 wa, 2 wa * 4 */
+
+ BUG_ON(rem < 0);
if (ring->space < rem) {
int ret = intel_wait_ring_buffer(ring, rem);
@@ -975,6 +1018,27 @@ static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
}
virt = (unsigned int *)(ring->virtual_start + ring->tail);
+ if (ring->id == RING_BLT) {
+ int i = 0;
+ for (i = 0; i < 32; i++)
+ *virt++ = MI_NOOP;
+ *virt++ = MI_BATCH_BUFFER_START;
+ *virt++ = to_blt_workaround(ring)->gtt_offset;
+ *virt++ = MI_LOAD_REGISTER_IMM(1);
+ *virt++ = GEN6_BCS_MI_MODE;
+ *virt++ = GEN6_BCS_BYPASS_FENCE << 16 | GEN6_BCS_BYPASS_FENCE;
+ *virt++ = MI_FLUSH_DW;
+ *virt++ = 0;
+ *virt++ = 0;
+ *virt++ = MI_NOOP;
+ *virt++ = MI_LOAD_REGISTER_IMM(1);
+ *virt++ = GEN6_BCS_MI_MODE;
+ *virt++ = GEN6_BCS_BYPASS_FENCE << 16;
+ *virt++ = MI_LOAD_REGISTER_IMM(1);
+ *virt++ = GEN6_BCS_MI_MODE;
+ *virt++ = GEN6_BCS_BYPASS_FENCE << 16;
+ *virt++ = MI_NOOP;
+ }
rem /= 8;
while (rem--) {
*virt++ = MI_NOOP;
@@ -1232,21 +1296,6 @@ blt_ring_put_irq(struct intel_ring_buffer *ring)
GEN6_BLITTER_USER_INTERRUPT);
}
-
-/* Workaround for some stepping of SNB,
- * each time when BLT engine ring tail moved,
- * the first command in the ring to be parsed
- * should be MI_BATCH_BUFFER_START
- */
-#define NEED_BLT_WORKAROUND(dev) \
- (IS_GEN6(dev) && (dev->pdev->revision < 8))
-
-static inline struct drm_i915_gem_object *
-to_blt_workaround(struct intel_ring_buffer *ring)
-{
- return ring->private;
-}
-
static int blt_ring_init(struct intel_ring_buffer *ring)
{
if (NEED_BLT_WORKAROUND(ring->dev)) {
@@ -1275,7 +1324,6 @@ static int blt_ring_init(struct intel_ring_buffer *ring)
drm_gem_object_unreference(&obj->base);
return ret;
}
-
ring->private = obj;
}
@@ -1286,18 +1334,52 @@ static int blt_ring_begin(struct intel_ring_buffer *ring,
int num_dwords)
{
if (ring->private) {
- int ret = intel_ring_begin(ring, num_dwords+2);
+ int ret;
+ if (NEED_MORE_BLT_WORKAROUND(ring->dev))
+ num_dwords += (32 + 14 + 2); /* 32 noops, begin2, a0 wa */
+ else
+ num_dwords += 2;
+ ret = intel_ring_begin(ring, num_dwords);
if (ret)
return ret;
+ if (NEED_MORE_BLT_WORKAROUND(ring->dev)) {
+ int i = 0;
+ for (i = 0; i < 32; i++)
+ intel_ring_emit(ring, MI_NOOP);
+ }
intel_ring_emit(ring, MI_BATCH_BUFFER_START);
intel_ring_emit(ring, to_blt_workaround(ring)->gtt_offset);
-
+#if (BLT_WA_AFTER_WORK == 0)
+ blt_ring_begin2(ring);
+#endif
return 0;
} else
return intel_ring_begin(ring, 4);
}
+static void blt_ring_begin2(struct intel_ring_buffer *ring)
+{
+ if (!NEED_MORE_BLT_WORKAROUND(ring->dev))
+ return;
+
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, GEN6_BCS_MI_MODE);
+ intel_ring_emit(ring, GEN6_BCS_BYPASS_FENCE << 16 |
+ GEN6_BCS_BYPASS_FENCE);
+ intel_ring_emit(ring, MI_FLUSH_DW);
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, GEN6_BCS_MI_MODE);
+ intel_ring_emit(ring, GEN6_BCS_BYPASS_FENCE << 16);
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, GEN6_BCS_MI_MODE);
+ intel_ring_emit(ring, GEN6_BCS_BYPASS_FENCE << 16);
+ intel_ring_emit(ring, MI_NOOP);
+}
+
static int blt_ring_flush(struct intel_ring_buffer *ring,
u32 invalidate, u32 flush)
{
@@ -1315,10 +1397,32 @@ static int blt_ring_flush(struct intel_ring_buffer *ring,
intel_ring_emit(ring, 0);
intel_ring_emit(ring, 0);
intel_ring_emit(ring, MI_NOOP);
+#if (BLT_WA_AFTER_WORK == 1)
+ blt_ring_begin2(ring);
+#endif
intel_ring_advance(ring);
return 0;
}
+static int
+blt_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
+ u32 offset, u32 len)
+{
+ int ret;
+
+ ret = blt_ring_begin(ring, 4);
+ if (ret)
+ return ret;
+ intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
+ /* bit0-7 is the length on GEN6+ */
+ intel_ring_emit(ring, offset);
+#if (BLT_WA_AFTER_WORK == 1)
+ blt_ring_begin2(ring);
+#endif
+ intel_ring_advance(ring);
+
+ return 0;
+}
static void blt_ring_cleanup(struct intel_ring_buffer *ring)
{
if (!ring->private)
@@ -1341,7 +1445,7 @@ static const struct intel_ring_buffer gen6_blt_ring = {
.get_seqno = ring_get_seqno,
.irq_get = blt_ring_get_irq,
.irq_put = blt_ring_put_irq,
- .dispatch_execbuffer = gen6_ring_dispatch_execbuffer,
+ .dispatch_execbuffer = blt_ring_dispatch_execbuffer,
.cleanup = blt_ring_cleanup,
.sync_to = gen6_blt_ring_sync_to,
.semaphore_register = {MI_SEMAPHORE_SYNC_BR,
@@ -1435,8 +1539,13 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
{
drm_i915_private_t *dev_priv = dev->dev_private;
struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
+ int ret;
*ring = gen6_blt_ring;
- return intel_init_ring_buffer(dev, ring);
+ ret = intel_init_ring_buffer(dev, ring);
+
+ ring->effective_size -= 192;
+
+ return ret;
}
--
1.7.6.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2011-10-03 8:55 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-10-03 1:27 [PATCH] drm/i915: blitter ring workaround for gen6 Ben Widawsky
2011-10-03 4:20 ` Keith Packard
2011-10-03 4:38 ` Ben Widawsky
2011-10-03 7:00 ` Chris Wilson
2011-10-03 7:57 ` Ben Widawsky
2011-10-03 8:55 ` Ben Widawsky
2011-10-03 7:41 ` Daniel Vetter
2011-10-03 7:59 ` Ben Widawsky
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.