* [PATCH 2/8] drm/i915: implement WaDisableDopClockGatingisable on VLV and IVB
2012-10-18 18:07 [PATCH 1/8] drm/i915: implement WaDisableL3CacheAging on VLV Jesse Barnes
@ 2012-10-18 18:07 ` Jesse Barnes
2012-10-18 18:07 ` [PATCH 3/8] drm/i915: implement WaForceL3Serialization " Jesse Barnes
` (5 subsequent siblings)
6 siblings, 0 replies; 13+ messages in thread
From: Jesse Barnes @ 2012-10-18 18:07 UTC (permalink / raw)
To: intel-gfx
v2: use correct register
v3: remove extra hunks, pull in register definitions & offset check directly
v4: add GT1 vs GT2 distinction for IVB portion (Ben)
References: https://bugs.freedesktop.org/show_bug.cgi?id=50233
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
drivers/gpu/drm/i915/i915_drv.c | 7 +++++++
drivers/gpu/drm/i915/i915_drv.h | 3 +++
drivers/gpu/drm/i915/i915_reg.h | 4 ++++
drivers/gpu/drm/i915/intel_pm.c | 13 ++++++++++++-
4 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 9e7e647..39282a4 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1129,6 +1129,13 @@ static bool IS_DISPLAYREG(u32 reg)
if (reg == GEN6_GDRST)
return false;
+ switch (reg) {
+ case GEN7_ROW_CHICKEN2:
+ return false;
+ default:
+ break;
+ }
+
return true;
}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4728d30..beef67f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1128,6 +1128,9 @@ struct drm_i915_file_private {
#define IS_IRONLAKE_D(dev) ((dev)->pci_device == 0x0042)
#define IS_IRONLAKE_M(dev) ((dev)->pci_device == 0x0046)
#define IS_IVYBRIDGE(dev) (INTEL_INFO(dev)->is_ivybridge)
+#define IS_IVB_GT1(dev) ((dev)->pci_device == 0x0156 || \
+ (dev)->pci_device == 0x0152 || \
+ (dev)->pci_device == 0x015a)
#define IS_VALLEYVIEW(dev) (INTEL_INFO(dev)->is_valleyview)
#define IS_HASWELL(dev) (INTEL_INFO(dev)->is_haswell)
#define IS_MOBILE(dev) (INTEL_INFO(dev)->is_mobile)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 876ef96..b07d309 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4266,6 +4266,10 @@
#define GEN7_L3LOG_BASE 0xB070
#define GEN7_L3LOG_SIZE 0x80
+#define GEN7_ROW_CHICKEN2 0xe4f4
+#define GEN7_ROW_CHICKEN2_GT2 0xf4f4
+#define DOP_CLOCK_GATING_DISABLE (1<<0)
+
#define G4X_AUD_VID_DID 0x62020
#define INTEL_AUDIO_DEVCL 0x808629FB
#define INTEL_AUDIO_DEVBLC 0x80862801
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 049c7e2..15328a3 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3535,7 +3535,14 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
I915_WRITE(GEN7_L3CNTLREG1,
GEN7_WA_FOR_GEN7_L3_CONTROL);
I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
- GEN7_WA_L3_CHICKEN_MODE);
+ GEN7_WA_L3_CHICKEN_MODE);
+ if (IS_IVB_GT1(dev))
+ I915_WRITE(GEN7_ROW_CHICKEN2,
+ _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+ else
+ I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
+ _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+
/* WaForceL3Serialization */
I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
@@ -3620,6 +3627,10 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
~L3SQ_URB_READ_CAM_MATCH_DISABLE);
+ /* WaDisableDopClockGating */
+ I915_WRITE(GEN7_ROW_CHICKEN2,
+ _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+
/* This is required by WaCatErrorRejectionIssue */
I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
--
1.7.9.5
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH 3/8] drm/i915: implement WaForceL3Serialization on VLV and IVB
2012-10-18 18:07 [PATCH 1/8] drm/i915: implement WaDisableL3CacheAging on VLV Jesse Barnes
2012-10-18 18:07 ` [PATCH 2/8] drm/i915: implement WaDisableDopClockGatingisable on VLV and IVB Jesse Barnes
@ 2012-10-18 18:07 ` Jesse Barnes
2012-10-18 18:07 ` [PATCH 4/8] drm/i915: implement WaDisableVLVClockGating_VBIIssue on VLV Jesse Barnes
` (4 subsequent siblings)
6 siblings, 0 replies; 13+ messages in thread
From: Jesse Barnes @ 2012-10-18 18:07 UTC (permalink / raw)
To: intel-gfx
References: https://bugs.freedesktop.org/show_bug.cgi?id=50250
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
drivers/gpu/drm/i915/intel_pm.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 15328a3..969687f 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3631,6 +3631,10 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
I915_WRITE(GEN7_ROW_CHICKEN2,
_MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+ /* WaForceL3Serialization */
+ I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
+ ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
+
/* This is required by WaCatErrorRejectionIssue */
I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
--
1.7.9.5
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH 4/8] drm/i915: implement WaDisableVLVClockGating_VBIIssue on VLV
2012-10-18 18:07 [PATCH 1/8] drm/i915: implement WaDisableL3CacheAging on VLV Jesse Barnes
2012-10-18 18:07 ` [PATCH 2/8] drm/i915: implement WaDisableDopClockGatingisable on VLV and IVB Jesse Barnes
2012-10-18 18:07 ` [PATCH 3/8] drm/i915: implement WaForceL3Serialization " Jesse Barnes
@ 2012-10-18 18:07 ` Jesse Barnes
2012-10-18 18:07 ` [PATCH 5/8] drm/i915: implement WaDisablePSDDualDispatchEnable on IVB & VLV Jesse Barnes
` (3 subsequent siblings)
6 siblings, 0 replies; 13+ messages in thread
From: Jesse Barnes @ 2012-10-18 18:07 UTC (permalink / raw)
To: intel-gfx
This allows us to get the right vblank interrupt frequency.
v2: pull in register definition
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
drivers/gpu/drm/i915/i915_reg.h | 2 ++
drivers/gpu/drm/i915/intel_pm.c | 7 +++++++
2 files changed, 9 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b07d309..ecb28be 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -552,6 +552,8 @@
#define IIR 0x020a4
#define IMR 0x020a8
#define ISR 0x020ac
+#define VLV_GUNIT_CLOCK_GATE 0x182060
+#define GCFG_DIS (1<<8)
#define VLV_IIR_RW 0x182084
#define VLV_IER 0x1820a0
#define VLV_IIR 0x1820a4
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 969687f..4c86549 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3691,6 +3691,13 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
PIPEA_HLINE_INT_EN | PIPEA_VBLANK_INT_EN |
SPRITEB_FLIPDONE_INT_EN | SPRITEA_FLIPDONE_INT_EN |
PLANEA_FLIPDONE_INT_EN);
+
+ /*
+ * WaDisableVLVClockGating_VBIIssue
+ * Disable clock gating on th GCFG unit to prevent a delay
+ * in the reporting of vblank events.
+ */
+ I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
}
static void g4x_init_clock_gating(struct drm_device *dev)
--
1.7.9.5
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH 5/8] drm/i915: implement WaDisablePSDDualDispatchEnable on IVB & VLV
2012-10-18 18:07 [PATCH 1/8] drm/i915: implement WaDisableL3CacheAging on VLV Jesse Barnes
` (2 preceding siblings ...)
2012-10-18 18:07 ` [PATCH 4/8] drm/i915: implement WaDisableVLVClockGating_VBIIssue on VLV Jesse Barnes
@ 2012-10-18 18:07 ` Jesse Barnes
2012-10-18 18:07 ` [PATCH 6/8] drm/i915: TLB invalidation with MI_FLUSH_SW requires a post-sync op Jesse Barnes
` (2 subsequent siblings)
6 siblings, 0 replies; 13+ messages in thread
From: Jesse Barnes @ 2012-10-18 18:07 UTC (permalink / raw)
To: intel-gfx
Workaround for dual port PS dispatch on GT1.
v2: pull in register definition & offset handling
v3: use IVB GT1 macro to get the right regs (Ben)
v4: add for VLV too (Ben)
v5: don't read the reg, it's masked so we'll only enable the one extra bit (Chris)
v6: use a _GT2 suffix for the second reg (Chris)
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
drivers/gpu/drm/i915/i915_drv.c | 1 +
drivers/gpu/drm/i915/i915_reg.h | 5 +++++
drivers/gpu/drm/i915/intel_pm.c | 11 +++++++++++
3 files changed, 17 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 39282a4..6f03b26 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1131,6 +1131,7 @@ static bool IS_DISPLAYREG(u32 reg)
switch (reg) {
case GEN7_ROW_CHICKEN2:
+ case GEN7_HALF_SLICE_CHICKEN1:
return false;
default:
break;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index ecb28be..34067b5 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4268,6 +4268,11 @@
#define GEN7_L3LOG_BASE 0xB070
#define GEN7_L3LOG_SIZE 0x80
+#define GEN7_HALF_SLICE_CHICKEN1 0xe100 /* IVB GT1 + VLV */
+#define GEN7_HALF_SLICE_CHICKEN1_GT2 0xf100
+#define GEN7_MAX_PS_THREAD_DEP (8<<12)
+#define GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE (1<<3)
+
#define GEN7_ROW_CHICKEN2 0xe4f4
#define GEN7_ROW_CHICKEN2_GT2 0xf4f4
#define DOP_CLOCK_GATING_DISABLE (1<<0)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 4c86549..0c1b270 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3527,6 +3527,14 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
CHICKEN3_DGMG_DONE_FIX_DISABLE);
+ /* WaDisablePSDDualDispatchEnable */
+ if (IS_IVB_GT1(dev))
+ I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
+ _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
+ else
+ I915_WRITE(GEN7_HALF_SLICE_CHICKEN1_GT2,
+ _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
+
/* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */
I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
@@ -3615,6 +3623,9 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
CHICKEN3_DGMG_DONE_FIX_DISABLE);
+ I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
+ _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
+
/* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */
I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH 6/8] drm/i915: TLB invalidation with MI_FLUSH_SW requires a post-sync op
2012-10-18 18:07 [PATCH 1/8] drm/i915: implement WaDisableL3CacheAging on VLV Jesse Barnes
` (3 preceding siblings ...)
2012-10-18 18:07 ` [PATCH 5/8] drm/i915: implement WaDisablePSDDualDispatchEnable on IVB & VLV Jesse Barnes
@ 2012-10-18 18:07 ` Jesse Barnes
2012-10-23 11:22 ` Chris Wilson
2012-10-18 18:07 ` [PATCH 7/8] drm/i915: PIPE_CONTROL TLB invalidate requires CS stall Jesse Barnes
2012-10-18 18:07 ` [PATCH 8/8] drm/i915: add clock gating regs to VLV offset check function Jesse Barnes
6 siblings, 1 reply; 13+ messages in thread
From: Jesse Barnes @ 2012-10-18 18:07 UTC (permalink / raw)
To: intel-gfx
So store into the scratch space of the HWS to make sure the invalidate
occurs.
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
drivers/gpu/drm/i915/i915_reg.h | 6 ++++--
drivers/gpu/drm/i915/intel_ringbuffer.c | 22 ++++++++++++++++++----
drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
3 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 34067b5..c6f63a4 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -241,8 +241,10 @@
*/
#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*x-1)
#define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */
-#define MI_INVALIDATE_TLB (1<<18)
-#define MI_INVALIDATE_BSD (1<<7)
+#define MI_FLUSH_DW_STORE_INDEX (1<<21)
+#define MI_INVALIDATE_TLB (1<<18)
+#define MI_FLUSH_DW_OP_STOREDW (1<<14)
+#define MI_INVALIDATE_BSD (1<<7)
#define MI_BATCH_BUFFER MI_INSTR(0x30, 1)
#define MI_BATCH_NON_SECURE (1)
/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 6c6f95a..e7daa90 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1396,10 +1396,17 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring,
return ret;
cmd = MI_FLUSH_DW;
+ /*
+ * Bspec vol 1c.5 - video engine command streamer:
+ * "If ENABLED, all TLBs will be invalidated once the flush
+ * operation is complete. This bit is only valid when the
+ * Post-Sync Operation field is a value of 1h or 3h."
+ */
if (invalidate & I915_GEM_GPU_DOMAINS)
- cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
+ cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
+ MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
intel_ring_emit(ring, cmd);
- intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_INDEX << 3);
intel_ring_emit(ring, 0);
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
@@ -1461,10 +1468,17 @@ static int blt_ring_flush(struct intel_ring_buffer *ring,
return ret;
cmd = MI_FLUSH_DW;
+ /*
+ * Bspec vol 1c.3 - blitter engine command streamer:
+ * "If ENABLED, all TLBs will be invalidated once the flush
+ * operation is complete. This bit is only valid when the
+ * Post-Sync Operation field is a value of 1h or 3h."
+ */
if (invalidate & I915_GEM_DOMAIN_RENDER)
- cmd |= MI_INVALIDATE_TLB;
+ cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
+ MI_FLUSH_DW_OP_STOREDW;
intel_ring_emit(ring, cmd);
- intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_INDEX << 3);
intel_ring_emit(ring, 0);
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 3745d1d..d089520 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -183,6 +183,7 @@ intel_read_status_page(struct intel_ring_buffer *ring,
* The area from dword 0x20 to 0x3ff is available for driver usage.
*/
#define I915_GEM_HWS_INDEX 0x20
+#define I915_GEM_HWS_SCRATCH_INDEX 0x28
void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH 6/8] drm/i915: TLB invalidation with MI_FLUSH_SW requires a post-sync op
2012-10-18 18:07 ` [PATCH 6/8] drm/i915: TLB invalidation with MI_FLUSH_SW requires a post-sync op Jesse Barnes
@ 2012-10-23 11:22 ` Chris Wilson
2012-10-23 14:28 ` Jesse Barnes
0 siblings, 1 reply; 13+ messages in thread
From: Chris Wilson @ 2012-10-23 11:22 UTC (permalink / raw)
To: Jesse Barnes, intel-gfx
On Thu, 18 Oct 2012 13:07:17 -0500, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> So store into the scratch space of the HWS to make sure the invalidate
> occurs.
Whoops, instant hang. Probably doesn't agree with being called FLUSH_SW
and not FLUSH_DW! ;-)
> + /*
> + * Bspec vol 1c.5 - video engine command streamer:
> + * "If ENABLED, all TLBs will be invalidated once the flush
> + * operation is complete. This bit is only valid when the
> + * Post-Sync Operation field is a value of 1h or 3h."
> + */
> if (invalidate & I915_GEM_GPU_DOMAINS)
> - cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
> + cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
> + MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
> intel_ring_emit(ring, cmd);
> - intel_ring_emit(ring, 0);
> + intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_INDEX << 3);
And here is where the error lies. Perhaps this would be clearer if you
do:
#define MI_FLUSH_DW_USE_PPGTT 0
#define MI_FLUSH_DW_USE_GTT (1<<2)
#define I915_GEM_HWS_SCRATCH_INDEX 0x30
#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
Then:
intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
Hangs begone!
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 6/8] drm/i915: TLB invalidation with MI_FLUSH_SW requires a post-sync op
2012-10-23 11:22 ` Chris Wilson
@ 2012-10-23 14:28 ` Jesse Barnes
0 siblings, 0 replies; 13+ messages in thread
From: Jesse Barnes @ 2012-10-23 14:28 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Tue, 23 Oct 2012 12:22:16 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Thu, 18 Oct 2012 13:07:17 -0500, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > So store into the scratch space of the HWS to make sure the invalidate
> > occurs.
>
> Whoops, instant hang. Probably doesn't agree with being called FLUSH_SW
> and not FLUSH_DW! ;-)
>
> > + /*
> > + * Bspec vol 1c.5 - video engine command streamer:
> > + * "If ENABLED, all TLBs will be invalidated once the flush
> > + * operation is complete. This bit is only valid when the
> > + * Post-Sync Operation field is a value of 1h or 3h."
> > + */
> > if (invalidate & I915_GEM_GPU_DOMAINS)
> > - cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
> > + cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
> > + MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
> > intel_ring_emit(ring, cmd);
> > - intel_ring_emit(ring, 0);
> > + intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_INDEX << 3);
> And here is where the error lies. Perhaps this would be clearer if you
> do:
>
> #define MI_FLUSH_DW_USE_PPGTT 0
> #define MI_FLUSH_DW_USE_GTT (1<<2)
>
> #define I915_GEM_HWS_SCRATCH_INDEX 0x30
> #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
>
> Then:
> intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
>
> Hangs begone!
Ah cool, was hoping it was something simple. Damn PPGTT vs GTT always
gets us.
I'll respin with the change.
--
Jesse Barnes, Intel Open Source Technology Center
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 7/8] drm/i915: PIPE_CONTROL TLB invalidate requires CS stall
2012-10-18 18:07 [PATCH 1/8] drm/i915: implement WaDisableL3CacheAging on VLV Jesse Barnes
` (4 preceding siblings ...)
2012-10-18 18:07 ` [PATCH 6/8] drm/i915: TLB invalidation with MI_FLUSH_SW requires a post-sync op Jesse Barnes
@ 2012-10-18 18:07 ` Jesse Barnes
2012-10-23 11:42 ` Chris Wilson
2012-10-18 18:07 ` [PATCH 8/8] drm/i915: add clock gating regs to VLV offset check function Jesse Barnes
6 siblings, 1 reply; 13+ messages in thread
From: Jesse Barnes @ 2012-10-18 18:07 UTC (permalink / raw)
To: intel-gfx
"If ENABLED, PIPE_CONTROL command will flush the in flight data written
out by render engine to Global Observation point on flush done. Also
Requires stall bit ([20] of DW1) set."
So set the stall bit to ensure proper invalidation.
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e7daa90..1e09c62 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -246,7 +246,7 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
/*
* TLB invalidate requires a post-sync write.
*/
- flags |= PIPE_CONTROL_QW_WRITE;
+ flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
}
ret = intel_ring_begin(ring, 4);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH 7/8] drm/i915: PIPE_CONTROL TLB invalidate requires CS stall
2012-10-18 18:07 ` [PATCH 7/8] drm/i915: PIPE_CONTROL TLB invalidate requires CS stall Jesse Barnes
@ 2012-10-23 11:42 ` Chris Wilson
2012-10-25 18:28 ` Jesse Barnes
0 siblings, 1 reply; 13+ messages in thread
From: Chris Wilson @ 2012-10-23 11:42 UTC (permalink / raw)
To: Jesse Barnes, intel-gfx
On Thu, 18 Oct 2012 13:07:18 -0500, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> "If ENABLED, PIPE_CONTROL command will flush the in flight data written
> out by render engine to Global Observation point on flush done. Also
> Requires stall bit ([20] of DW1) set."
That quotation doesn't make sense in the context of TLB invalidation,
and the programming guide here very carefully avoids the mention of
requiring any stall bit set for the post-sync op of TLB invalidation.
Maybe quote chapter and verse as well?
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 7/8] drm/i915: PIPE_CONTROL TLB invalidate requires CS stall
2012-10-23 11:42 ` Chris Wilson
@ 2012-10-25 18:28 ` Jesse Barnes
0 siblings, 0 replies; 13+ messages in thread
From: Jesse Barnes @ 2012-10-25 18:28 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Tue, 23 Oct 2012 12:42:07 +0100
Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Thu, 18 Oct 2012 13:07:18 -0500, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > "If ENABLED, PIPE_CONTROL command will flush the in flight data written
> > out by render engine to Global Observation point on flush done. Also
> > Requires stall bit ([20] of DW1) set."
>
> That quotation doesn't make sense in the context of TLB invalidation,
> and the programming guide here very carefully avoids the mention of
> requiring any stall bit set for the post-sync op of TLB invalidation.
>
> Maybe quote chapter and verse as well?
I thought the "Also Requires stall bit ([20] of DW1) set." was pretty
clear?
--
Jesse Barnes, Intel Open Source Technology Center
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 8/8] drm/i915: add clock gating regs to VLV offset check function
2012-10-18 18:07 [PATCH 1/8] drm/i915: implement WaDisableL3CacheAging on VLV Jesse Barnes
` (5 preceding siblings ...)
2012-10-18 18:07 ` [PATCH 7/8] drm/i915: PIPE_CONTROL TLB invalidate requires CS stall Jesse Barnes
@ 2012-10-18 18:07 ` Jesse Barnes
6 siblings, 0 replies; 13+ messages in thread
From: Jesse Barnes @ 2012-10-18 18:07 UTC (permalink / raw)
To: intel-gfx
So we can write them properly.
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
drivers/gpu/drm/i915/i915_drv.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 6f03b26..39c53ad 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1130,8 +1130,17 @@ static bool IS_DISPLAYREG(u32 reg)
return false;
switch (reg) {
+ case _3D_CHICKEN3:
+ case IVB_CHICKEN3:
+ case GEN7_COMMON_SLICE_CHICKEN1:
+ case GEN7_L3CNTLREG1:
+ case GEN7_L3_CHICKEN_MODE_REGISTER:
case GEN7_ROW_CHICKEN2:
+ case GEN7_L3SQCREG4:
+ case GEN7_SQ_CHICKEN_MBCUNIT_CONFIG:
case GEN7_HALF_SLICE_CHICKEN1:
+ case GEN6_MBCTL:
+ case GEN6_UCGCTL2:
return false;
default:
break;
--
1.7.9.5
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 6/8] drm/i915: TLB invalidation with MI_FLUSH_SW requires a post-sync op
2012-10-18 15:43 [PATCH 1/8] drm/i915: implement WaDisableL3CacheAging on VLV Jesse Barnes
@ 2012-10-18 15:43 ` Jesse Barnes
0 siblings, 0 replies; 13+ messages in thread
From: Jesse Barnes @ 2012-10-18 15:43 UTC (permalink / raw)
To: intel-gfx
So store into the scratch space of the HWS to make sure the invalidate
occurs.
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
drivers/gpu/drm/i915/i915_reg.h | 6 ++++--
drivers/gpu/drm/i915/intel_ringbuffer.c | 22 ++++++++++++++++++----
drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
3 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 12816bb..9d7e4f5 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -241,8 +241,10 @@
*/
#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*x-1)
#define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */
-#define MI_INVALIDATE_TLB (1<<18)
-#define MI_INVALIDATE_BSD (1<<7)
+#define MI_FLUSH_DW_STORE_INDEX (1<<21)
+#define MI_INVALIDATE_TLB (1<<18)
+#define MI_FLUSH_DW_OP_STOREDW (1<<14)
+#define MI_INVALIDATE_BSD (1<<7)
#define MI_BATCH_BUFFER MI_INSTR(0x30, 1)
#define MI_BATCH_NON_SECURE (1)
/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 6c6f95a..e7daa90 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1396,10 +1396,17 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring,
return ret;
cmd = MI_FLUSH_DW;
+ /*
+ * Bspec vol 1c.5 - video engine command streamer:
+ * "If ENABLED, all TLBs will be invalidated once the flush
+ * operation is complete. This bit is only valid when the
+ * Post-Sync Operation field is a value of 1h or 3h."
+ */
if (invalidate & I915_GEM_GPU_DOMAINS)
- cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
+ cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
+ MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
intel_ring_emit(ring, cmd);
- intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_INDEX << 3);
intel_ring_emit(ring, 0);
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
@@ -1461,10 +1468,17 @@ static int blt_ring_flush(struct intel_ring_buffer *ring,
return ret;
cmd = MI_FLUSH_DW;
+ /*
+ * Bspec vol 1c.3 - blitter engine command streamer:
+ * "If ENABLED, all TLBs will be invalidated once the flush
+ * operation is complete. This bit is only valid when the
+ * Post-Sync Operation field is a value of 1h or 3h."
+ */
if (invalidate & I915_GEM_DOMAIN_RENDER)
- cmd |= MI_INVALIDATE_TLB;
+ cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
+ MI_FLUSH_DW_OP_STOREDW;
intel_ring_emit(ring, cmd);
- intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_INDEX << 3);
intel_ring_emit(ring, 0);
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 3745d1d..d089520 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -183,6 +183,7 @@ intel_read_status_page(struct intel_ring_buffer *ring,
* The area from dword 0x20 to 0x3ff is available for driver usage.
*/
#define I915_GEM_HWS_INDEX 0x20
+#define I915_GEM_HWS_SCRATCH_INDEX 0x28
void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring);
--
1.7.9.5
^ permalink raw reply related [flat|nested] 13+ messages in thread