All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/9] drm/i915: disable DOP clock gating on VLV and IVB
@ 2012-09-19 20:28 Jesse Barnes
  2012-09-19 20:28 ` [PATCH 2/9] drm/i915: implement WaForceL3Serialization " Jesse Barnes
                   ` (8 more replies)
  0 siblings, 9 replies; 23+ messages in thread
From: Jesse Barnes @ 2012-09-19 20:28 UTC (permalink / raw)
  To: intel-gfx

References: https://bugs.freedesktop.org/show_bug.cgi?id=50233
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/i915_reg.h |    1 +
 drivers/gpu/drm/i915/intel_pm.c |    6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index a828e90..b89e5f5 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3443,6 +3443,7 @@
 
 #define GEN7_L3_CHICKEN_MODE_REGISTER		0xB030
 #define  GEN7_WA_L3_CHICKEN_MODE				0x20000000
+#define  GEN7_WA_DOP_CLOCK_GATING_DISABLE			0x08000000
 
 /* WaCatErrorRejectionIssue */
 #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG		0x9030
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 36c6409..7ec4b28 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3541,7 +3541,8 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
 	I915_WRITE(GEN7_L3CNTLREG1,
 			GEN7_WA_FOR_GEN7_L3_CONTROL);
 	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
-			GEN7_WA_L3_CHICKEN_MODE);
+		   GEN7_WA_L3_CHICKEN_MODE |
+		   GEN7_WA_DOP_CLOCK_GATING_DISABLE);
 
 	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
 	 * gating disable must be set.  Failure to set it results in
@@ -3611,7 +3612,8 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
 
 	/* WaApplyL3ControlAndL3ChickenMode requires those two on Ivy Bridge */
 	I915_WRITE(GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
-	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
+	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE |
+		   GEN7_WA_DOP_CLOCK_GATING_DISABLE);
 
 	/* This is required by WaCatErrorRejectionIssue */
 	I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 2/9] drm/i915: implement WaForceL3Serialization on VLV and IVB
  2012-09-19 20:28 [PATCH 1/9] drm/i915: disable DOP clock gating on VLV and IVB Jesse Barnes
@ 2012-09-19 20:28 ` Jesse Barnes
  2012-09-19 20:28 ` [PATCH 3/9] drm/i915: add a HSW scratch location for flush commands Jesse Barnes
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 23+ messages in thread
From: Jesse Barnes @ 2012-09-19 20:28 UTC (permalink / raw)
  To: intel-gfx

References: https://bugs.freedesktop.org/show_bug.cgi?id=50250
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/i915_reg.h |    3 +++
 drivers/gpu/drm/i915/intel_pm.c |    8 ++++++++
 2 files changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b89e5f5..54d15cd 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3445,6 +3445,9 @@
 #define  GEN7_WA_L3_CHICKEN_MODE				0x20000000
 #define  GEN7_WA_DOP_CLOCK_GATING_DISABLE			0x08000000
 
+#define GEN7_L3SQCREG4				0xb034
+#define  L3SQ_URB_READ_CAM_MATCH_DISABLE	(1<<27)
+
 /* WaCatErrorRejectionIssue */
 #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG		0x9030
 #define  GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB	(1<<11)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 7ec4b28..56a84ff 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3544,6 +3544,10 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
 		   GEN7_WA_L3_CHICKEN_MODE |
 		   GEN7_WA_DOP_CLOCK_GATING_DISABLE);
 
+	/* WaForceL3Serialization */
+	I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
+		   ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
+
 	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
 	 * gating disable must be set.  Failure to set it results in
 	 * flickering pixels due to Z write ordering failures after
@@ -3615,6 +3619,10 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
 	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE |
 		   GEN7_WA_DOP_CLOCK_GATING_DISABLE);
 
+	/* WaForceL3Serialization */
+	I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
+		   ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
+
 	/* This is required by WaCatErrorRejectionIssue */
 	I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
 		   I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 3/9] drm/i915: add a HSW scratch location for flush commands
  2012-09-19 20:28 [PATCH 1/9] drm/i915: disable DOP clock gating on VLV and IVB Jesse Barnes
  2012-09-19 20:28 ` [PATCH 2/9] drm/i915: implement WaForceL3Serialization " Jesse Barnes
@ 2012-09-19 20:28 ` Jesse Barnes
  2012-09-25  8:54   ` Daniel Vetter
  2012-09-19 20:28 ` [PATCH 4/9] drm/i915: add post-flush store dw workaround Jesse Barnes
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 23+ messages in thread
From: Jesse Barnes @ 2012-09-19 20:28 UTC (permalink / raw)
  To: intel-gfx

Some commands and workarounds require stores to occur to function
correctly, so add some scratch space to the HWS page to accommodate
them.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/intel_ringbuffer.h |    1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2ea7a31..ef85742 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -181,6 +181,7 @@ intel_read_status_page(struct intel_ring_buffer *ring,
  * The area from dword 0x20 to 0x3ff is available for driver usage.
  */
 #define I915_GEM_HWS_INDEX		0x20
+#define I915_GEM_SCRATCH_INDEX		0x28 /* Some commands need a scratch store */
 
 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring);
 
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 4/9] drm/i915: add post-flush store dw workaround
  2012-09-19 20:28 [PATCH 1/9] drm/i915: disable DOP clock gating on VLV and IVB Jesse Barnes
  2012-09-19 20:28 ` [PATCH 2/9] drm/i915: implement WaForceL3Serialization " Jesse Barnes
  2012-09-19 20:28 ` [PATCH 3/9] drm/i915: add a HSW scratch location for flush commands Jesse Barnes
@ 2012-09-19 20:28 ` Jesse Barnes
  2012-09-25  8:49   ` Daniel Vetter
  2012-09-19 20:28 ` [PATCH 5/9] drm/i915: implement WaDisableEarlyCull for VLV and IVB Jesse Barnes
                   ` (5 subsequent siblings)
  8 siblings, 1 reply; 23+ messages in thread
From: Jesse Barnes @ 2012-09-19 20:28 UTC (permalink / raw)
  To: intel-gfx

Several platforms need this to flush the CS write buffers.

References: https://bugs.freedesktop.org/show_bug.cgi?id=50241
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c |   15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 55cdb4d..ef5101f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -216,7 +216,7 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
 	u32 flags = 0;
 	struct pipe_control *pc = ring->private;
 	u32 scratch_addr = pc->gtt_offset + 128;
-	int ret;
+	int ret, i;
 
 	/* Force SNB workarounds for PIPE_CONTROL flushes */
 	ret = intel_emit_post_sync_nonzero_flush(ring);
@@ -259,6 +259,19 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
 	intel_ring_emit(ring, 0);
 	intel_ring_advance(ring);
 
+	ret = intel_ring_begin(ring, 4 * 8);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < 8; i++) {
+		intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
+		intel_ring_emit(ring, I915_GEM_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
+		intel_ring_emit(ring, 0);
+		intel_ring_emit(ring, MI_NOOP);
+	}
+	intel_ring_advance(ring);
+
+
 	return 0;
 }
 
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 5/9] drm/i915: implement WaDisableEarlyCull for VLV and IVB
  2012-09-19 20:28 [PATCH 1/9] drm/i915: disable DOP clock gating on VLV and IVB Jesse Barnes
                   ` (2 preceding siblings ...)
  2012-09-19 20:28 ` [PATCH 4/9] drm/i915: add post-flush store dw workaround Jesse Barnes
@ 2012-09-19 20:28 ` Jesse Barnes
  2012-09-19 20:29 ` [PATCH 6/9] drm/i915: implement WaDisablePSDDualDispatchEnable on IVB and VLV Jesse Barnes
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 23+ messages in thread
From: Jesse Barnes @ 2012-09-19 20:28 UTC (permalink / raw)
  To: intel-gfx

Workaround for a culling optimization.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/i915_reg.h |    1 +
 drivers/gpu/drm/i915/intel_pm.c |    8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 54d15cd..e570aea 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -521,6 +521,7 @@
  */
 # define _3D_CHICKEN2_WM_READ_PIPELINED			(1 << 14)
 #define _3D_CHICKEN3	0x02090
+#define  _3D_CHICKEN_SF_DISABLE_OBJEND_CULL		(1 << 10)
 #define  _3D_CHICKEN_SF_DISABLE_FASTCLIP_CULL		(1 << 5)
 
 #define MI_MODE		0x0209c
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 56a84ff..372a398 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3529,6 +3529,10 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
 
 	I915_WRITE(ILK_DSPCLK_GATE, IVB_VRHUNIT_CLK_GATE);
 
+	/* WaDisableEarlyCull */
+	I915_WRITE(_3D_CHICKEN3,
+		   _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
+
 	I915_WRITE(IVB_CHICKEN3,
 		   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
 		   CHICKEN3_DGMG_DONE_FIX_DISABLE);
@@ -3606,6 +3610,10 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
 
 	I915_WRITE(ILK_DSPCLK_GATE, IVB_VRHUNIT_CLK_GATE);
 
+	/* WaDisableEarlyCull */
+	I915_WRITE(_3D_CHICKEN3,
+		   _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
+
 	I915_WRITE(IVB_CHICKEN3,
 		   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
 		   CHICKEN3_DGMG_DONE_FIX_DISABLE);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 6/9] drm/i915: implement WaDisablePSDDualDispatchEnable on IVB and VLV
  2012-09-19 20:28 [PATCH 1/9] drm/i915: disable DOP clock gating on VLV and IVB Jesse Barnes
                   ` (3 preceding siblings ...)
  2012-09-19 20:28 ` [PATCH 5/9] drm/i915: implement WaDisableEarlyCull for VLV and IVB Jesse Barnes
@ 2012-09-19 20:29 ` Jesse Barnes
  2012-09-25  8:51   ` Daniel Vetter
  2012-10-01 16:57   ` Lespiau, Damien
  2012-09-19 20:29 ` [PATCH 7/9] drm/i915: limit VLV IRQ enables to those we use Jesse Barnes
                   ` (3 subsequent siblings)
  8 siblings, 2 replies; 23+ messages in thread
From: Jesse Barnes @ 2012-09-19 20:29 UTC (permalink / raw)
  To: intel-gfx

Workaround for dual port PS dispatch on GT1.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/i915_reg.h |    4 ++++
 drivers/gpu/drm/i915/intel_pm.c |   16 ++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e570aea..e863973 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4249,6 +4249,10 @@
 #define GEN7_L3LOG_BASE			0xB070
 #define GEN7_L3LOG_SIZE			0x80
 
+#define GEN7_HALF_SLICE_CHICKEN1	0xe100 /* IVB GT1 + VLV */
+#define GEN7_HALF_SLICE_CHICKEN1_IVB	0xf100
+#define   GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE	(1<<3)
+
 #define G4X_AUD_VID_DID			0x62020
 #define INTEL_AUDIO_DEVCL		0x808629FB
 #define INTEL_AUDIO_DEVBLC		0x80862801
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 372a398..f860cdd 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3537,6 +3537,17 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
 		   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
 		   CHICKEN3_DGMG_DONE_FIX_DISABLE);
 
+	/* WaDisablePSDDualDispatchEnable */
+	if (dev->pci_device == 0x0156 ||
+	    dev->pci_device == 0x0152)
+		I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
+			   I915_READ(GEN7_HALF_SLICE_CHICKEN1) |
+			   _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
+	else
+		I915_WRITE(GEN7_HALF_SLICE_CHICKEN1_IVB,
+			   I915_READ(GEN7_HALF_SLICE_CHICKEN1) |
+			   _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
+
 	/* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */
 	I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
 		   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
@@ -3618,6 +3629,11 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
 		   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
 		   CHICKEN3_DGMG_DONE_FIX_DISABLE);
 
+	/* WaDisablePSDDualDispatchEnable */
+	I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
+		   I915_READ(GEN7_HALF_SLICE_CHICKEN1) |
+		   _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
+
 	/* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */
 	I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
 		   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 7/9] drm/i915: limit VLV IRQ enables to those we use
  2012-09-19 20:28 [PATCH 1/9] drm/i915: disable DOP clock gating on VLV and IVB Jesse Barnes
                   ` (4 preceding siblings ...)
  2012-09-19 20:29 ` [PATCH 6/9] drm/i915: implement WaDisablePSDDualDispatchEnable on IVB and VLV Jesse Barnes
@ 2012-09-19 20:29 ` Jesse Barnes
  2012-09-26 14:16   ` Daniel Vetter
  2012-09-19 20:29 ` [PATCH 8/9] drm/i915: TLB invalidation with MI_FLUSH_SW requires a post-sync op Jesse Barnes
                   ` (2 subsequent siblings)
  8 siblings, 1 reply; 23+ messages in thread
From: Jesse Barnes @ 2012-09-19 20:29 UTC (permalink / raw)
  To: intel-gfx

To match IVB.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/i915_irq.c |   18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index d601013..731f066 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1948,6 +1948,7 @@ static int valleyview_irq_postinstall(struct drm_device *dev)
 	u32 enable_mask;
 	u32 hotplug_en = I915_READ(PORT_HOTPLUG_EN);
 	u32 pipestat_enable = PLANE_FLIP_DONE_INT_EN_VLV;
+	u32 render_irqs;
 	u16 msid;
 
 	enable_mask = I915_DISPLAY_PORT_INTERRUPT;
@@ -1987,21 +1988,12 @@ static int valleyview_irq_postinstall(struct drm_device *dev)
 	I915_WRITE(VLV_IIR, 0xffffffff);
 	I915_WRITE(VLV_IIR, 0xffffffff);
 
-	dev_priv->gt_irq_mask = ~0;
-
-	I915_WRITE(GTIIR, I915_READ(GTIIR));
 	I915_WRITE(GTIIR, I915_READ(GTIIR));
 	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-	I915_WRITE(GTIER, GT_GEN6_BLT_FLUSHDW_NOTIFY_INTERRUPT |
-		   GT_GEN6_BLT_CS_ERROR_INTERRUPT |
-		   GT_GEN6_BLT_USER_INTERRUPT |
-		   GT_GEN6_BSD_USER_INTERRUPT |
-		   GT_GEN6_BSD_CS_ERROR_INTERRUPT |
-		   GT_GEN7_L3_PARITY_ERROR_INTERRUPT |
-		   GT_PIPE_NOTIFY |
-		   GT_RENDER_CS_ERROR_INTERRUPT |
-		   GT_SYNC_STATUS |
-		   GT_USER_INTERRUPT);
+
+	render_irqs = GT_USER_INTERRUPT | GEN6_BSD_USER_INTERRUPT |
+		GEN6_BLITTER_USER_INTERRUPT;
+	I915_WRITE(GTIER, render_irqs);
 	POSTING_READ(GTIER);
 
 	/* ack & enable invalid PTE error interrupts */
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 8/9] drm/i915: TLB invalidation with MI_FLUSH_SW requires a post-sync op
  2012-09-19 20:28 [PATCH 1/9] drm/i915: disable DOP clock gating on VLV and IVB Jesse Barnes
                   ` (5 preceding siblings ...)
  2012-09-19 20:29 ` [PATCH 7/9] drm/i915: limit VLV IRQ enables to those we use Jesse Barnes
@ 2012-09-19 20:29 ` Jesse Barnes
  2012-09-19 20:29 ` [PATCH 9/9] drm/i915: PIPE_CONTROL TLB invalidate requires CS stall Jesse Barnes
  2012-09-19 21:41 ` [PATCH 1/9] drm/i915: disable DOP clock gating on VLV and IVB Ben Widawsky
  8 siblings, 0 replies; 23+ messages in thread
From: Jesse Barnes @ 2012-09-19 20:29 UTC (permalink / raw)
  To: intel-gfx

So store into the scratch space of the HWS to make sure the invalidate
occurs.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/i915_reg.h         |    6 ++++--
 drivers/gpu/drm/i915/intel_ringbuffer.c |   22 ++++++++++++++++++----
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e863973..e768bf5 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -241,8 +241,10 @@
  */
 #define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*x-1)
 #define MI_FLUSH_DW		MI_INSTR(0x26, 1) /* for GEN6 */
-#define   MI_INVALIDATE_TLB	(1<<18)
-#define   MI_INVALIDATE_BSD	(1<<7)
+#define   MI_FLUSH_DW_STORE_INDEX	(1<<21)
+#define   MI_INVALIDATE_TLB		(1<<18)
+#define   MI_FLUSH_DW_OP_STOREDW	(1<<14)
+#define   MI_INVALIDATE_BSD		(1<<7)
 #define MI_BATCH_BUFFER		MI_INSTR(0x30, 1)
 #define   MI_BATCH_NON_SECURE	(1)
 #define   MI_BATCH_NON_SECURE_I965 (1<<8)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index ef5101f..253b542 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1404,10 +1404,17 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring,
 		return ret;
 
 	cmd = MI_FLUSH_DW;
+	/*
+	 * Bspec vol 1c.5 - video engine command streamer:
+	 * "If ENABLED, all TLBs will be invalidated once the flush
+	 * operation is complete. This bit is only valid when the
+	 * Post-Sync Operation field is a value of 1h or 3h."
+	 */
 	if (invalidate & I915_GEM_GPU_DOMAINS)
-		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
+		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
+			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
 	intel_ring_emit(ring, cmd);
-	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, I915_GEM_SCRATCH_INDEX << 3);
 	intel_ring_emit(ring, 0);
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
@@ -1445,10 +1452,17 @@ static int blt_ring_flush(struct intel_ring_buffer *ring,
 		return ret;
 
 	cmd = MI_FLUSH_DW;
+	/*
+	 * Bspec vol 1c.3 - blitter engine command streamer:
+	 * "If ENABLED, all TLBs will be invalidated once the flush
+	 * operation is complete. This bit is only valid when the
+	 * Post-Sync Operation field is a value of 1h or 3h."
+	 */
 	if (invalidate & I915_GEM_DOMAIN_RENDER)
-		cmd |= MI_INVALIDATE_TLB;
+		cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
+			MI_FLUSH_DW_OP_STOREDW;
 	intel_ring_emit(ring, cmd);
-	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, I915_GEM_SCRATCH_INDEX << 3);
 	intel_ring_emit(ring, 0);
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH 9/9] drm/i915: PIPE_CONTROL TLB invalidate requires CS stall
  2012-09-19 20:28 [PATCH 1/9] drm/i915: disable DOP clock gating on VLV and IVB Jesse Barnes
                   ` (6 preceding siblings ...)
  2012-09-19 20:29 ` [PATCH 8/9] drm/i915: TLB invalidation with MI_FLUSH_SW requires a post-sync op Jesse Barnes
@ 2012-09-19 20:29 ` Jesse Barnes
  2012-09-19 21:41 ` [PATCH 1/9] drm/i915: disable DOP clock gating on VLV and IVB Ben Widawsky
  8 siblings, 0 replies; 23+ messages in thread
From: Jesse Barnes @ 2012-09-19 20:29 UTC (permalink / raw)
  To: intel-gfx

"If ENABLED, PIPE_CONTROL command will flush the in flight data  written
out by render engine to Global Observation point on flush done. Also
Requires stall bit ([20] of DW1) set."

So set the stall bit to ensure proper invalidation.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 253b542..5caea78 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -246,7 +246,7 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
 		/*
 		 * TLB invalidate requires a post-sync write.
 		 */
-		flags |= PIPE_CONTROL_QW_WRITE;
+		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
 	}
 
 	ret = intel_ring_begin(ring, 4);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/9] drm/i915: disable DOP clock gating on VLV and IVB
  2012-09-19 20:28 [PATCH 1/9] drm/i915: disable DOP clock gating on VLV and IVB Jesse Barnes
                   ` (7 preceding siblings ...)
  2012-09-19 20:29 ` [PATCH 9/9] drm/i915: PIPE_CONTROL TLB invalidate requires CS stall Jesse Barnes
@ 2012-09-19 21:41 ` Ben Widawsky
  2012-09-19 22:06   ` Jesse Barnes
  8 siblings, 1 reply; 23+ messages in thread
From: Ben Widawsky @ 2012-09-19 21:41 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Wed, 19 Sep 2012 13:28:55 -0700
Jesse Barnes <jbarnes@virtuousgeek.org> wrote:

> References: https://bugs.freedesktop.org/show_bug.cgi?id=50233
> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> ---
>  drivers/gpu/drm/i915/i915_reg.h |    1 +
>  drivers/gpu/drm/i915/intel_pm.c |    6 ++++--
>  2 files changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h
> b/drivers/gpu/drm/i915/i915_reg.h index a828e90..b89e5f5 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -3443,6 +3443,7 @@
>  
>  #define GEN7_L3_CHICKEN_MODE_REGISTER		0xB030
>  #define  GEN7_WA_L3_CHICKEN_MODE
> 0x20000000 +#define
> GEN7_WA_DOP_CLOCK_GATING_DISABLE			0x08000000 
>  /* WaCatErrorRejectionIssue */
>  #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG		0x9030
> diff --git a/drivers/gpu/drm/i915/intel_pm.c
> b/drivers/gpu/drm/i915/intel_pm.c index 36c6409..7ec4b28 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3541,7 +3541,8 @@ static void ivybridge_init_clock_gating(struct
> drm_device *dev) I915_WRITE(GEN7_L3CNTLREG1,
>  			GEN7_WA_FOR_GEN7_L3_CONTROL);
>  	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
> -			GEN7_WA_L3_CHICKEN_MODE);
> +		   GEN7_WA_L3_CHICKEN_MODE |
> +		   GEN7_WA_DOP_CLOCK_GATING_DISABLE);
>  
>  	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
>  	 * gating disable must be set.  Failure to set it results in
> @@ -3611,7 +3612,8 @@ static void valleyview_init_clock_gating(struct
> drm_device *dev) 
>  	/* WaApplyL3ControlAndL3ChickenMode requires those two on
> Ivy Bridge */ I915_WRITE(GEN7_L3CNTLREG1,
> GEN7_WA_FOR_GEN7_L3_CONTROL);
> -	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
> GEN7_WA_L3_CHICKEN_MODE);
> +	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
> GEN7_WA_L3_CHICKEN_MODE |
> +		   GEN7_WA_DOP_CLOCK_GATING_DISABLE);
>  
>  	/* This is required by WaCatErrorRejectionIssue */
>  	I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,

As discussed on IRC, can you confirm the register I use in the L3
partity work isn't the right one. You can remove that disabling there
as well with this patch.

-- 
Ben Widawsky, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/9] drm/i915: disable DOP clock gating on VLV and IVB
  2012-09-19 21:41 ` [PATCH 1/9] drm/i915: disable DOP clock gating on VLV and IVB Ben Widawsky
@ 2012-09-19 22:06   ` Jesse Barnes
  0 siblings, 0 replies; 23+ messages in thread
From: Jesse Barnes @ 2012-09-19 22:06 UTC (permalink / raw)
  To: Ben Widawsky; +Cc: intel-gfx

On Wed, 19 Sep 2012 14:41:31 -0700
Ben Widawsky <ben@bwidawsk.net> wrote:

> On Wed, 19 Sep 2012 13:28:55 -0700
> Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> 
> > References: https://bugs.freedesktop.org/show_bug.cgi?id=50233
> > Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> > ---
> >  drivers/gpu/drm/i915/i915_reg.h |    1 +
> >  drivers/gpu/drm/i915/intel_pm.c |    6 ++++--
> >  2 files changed, 5 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h
> > b/drivers/gpu/drm/i915/i915_reg.h index a828e90..b89e5f5 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -3443,6 +3443,7 @@
> >  
> >  #define GEN7_L3_CHICKEN_MODE_REGISTER		0xB030
> >  #define  GEN7_WA_L3_CHICKEN_MODE
> > 0x20000000 +#define
> > GEN7_WA_DOP_CLOCK_GATING_DISABLE			0x08000000 
> >  /* WaCatErrorRejectionIssue */
> >  #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG		0x9030
> > diff --git a/drivers/gpu/drm/i915/intel_pm.c
> > b/drivers/gpu/drm/i915/intel_pm.c index 36c6409..7ec4b28 100644
> > --- a/drivers/gpu/drm/i915/intel_pm.c
> > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > @@ -3541,7 +3541,8 @@ static void ivybridge_init_clock_gating(struct
> > drm_device *dev) I915_WRITE(GEN7_L3CNTLREG1,
> >  			GEN7_WA_FOR_GEN7_L3_CONTROL);
> >  	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
> > -			GEN7_WA_L3_CHICKEN_MODE);
> > +		   GEN7_WA_L3_CHICKEN_MODE |
> > +		   GEN7_WA_DOP_CLOCK_GATING_DISABLE);
> >  
> >  	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
> >  	 * gating disable must be set.  Failure to set it results in
> > @@ -3611,7 +3612,8 @@ static void valleyview_init_clock_gating(struct
> > drm_device *dev) 
> >  	/* WaApplyL3ControlAndL3ChickenMode requires those two on
> > Ivy Bridge */ I915_WRITE(GEN7_L3CNTLREG1,
> > GEN7_WA_FOR_GEN7_L3_CONTROL);
> > -	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
> > GEN7_WA_L3_CHICKEN_MODE);
> > +	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
> > GEN7_WA_L3_CHICKEN_MODE |
> > +		   GEN7_WA_DOP_CLOCK_GATING_DISABLE);
> >  
> >  	/* This is required by WaCatErrorRejectionIssue */
> >  	I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
> 
> As discussed on IRC, can you confirm the register I use in the L3
> partity work isn't the right one. You can remove that disabling there
> as well with this patch.

This appears to be the wrong register.  Looks like the correct one is
0x9424 bit 0 or 0xe4f4 or 0xf4f4 bit 0.

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/9] drm/i915: add post-flush store dw workaround
  2012-09-19 20:28 ` [PATCH 4/9] drm/i915: add post-flush store dw workaround Jesse Barnes
@ 2012-09-25  8:49   ` Daniel Vetter
  2012-09-25 11:07     ` Jesse Barnes
  0 siblings, 1 reply; 23+ messages in thread
From: Daniel Vetter @ 2012-09-25  8:49 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Wed, Sep 19, 2012 at 01:28:58PM -0700, Jesse Barnes wrote:
> Several platforms need this to flush the CS write buffers.

Chris spent quite some effort to dump less crap into the rings on gen6,
and your description here sounds like we only need this when flushing
write caches. Or it might only apply to CS writes (in which case this is
at the wrong spot). In any case, can you please double check where exactly
we need this and only add it there, with a neat comment explaining things
added?

I'm bitching because afair the CS stuff the windows driver emits (of which
I've seen some traces) only emits one such 8x MI_WRITE block per batch,
whereas your code here would emit 2 such 2x MI_WRITE blocks.

Thanks, Daniel
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=50241
> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> ---
>  drivers/gpu/drm/i915/intel_ringbuffer.c |   15 ++++++++++++++-
>  1 file changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 55cdb4d..ef5101f 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -216,7 +216,7 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
>  	u32 flags = 0;
>  	struct pipe_control *pc = ring->private;
>  	u32 scratch_addr = pc->gtt_offset + 128;
> -	int ret;
> +	int ret, i;
>  
>  	/* Force SNB workarounds for PIPE_CONTROL flushes */
>  	ret = intel_emit_post_sync_nonzero_flush(ring);
> @@ -259,6 +259,19 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
>  	intel_ring_emit(ring, 0);
>  	intel_ring_advance(ring);
>  
> +	ret = intel_ring_begin(ring, 4 * 8);
> +	if (ret)
> +		return ret;
> +
> +	for (i = 0; i < 8; i++) {
> +		intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
> +		intel_ring_emit(ring, I915_GEM_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> +		intel_ring_emit(ring, 0);
> +		intel_ring_emit(ring, MI_NOOP);
> +	}
> +	intel_ring_advance(ring);
> +
> +
>  	return 0;
>  }
>  
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 6/9] drm/i915: implement WaDisablePSDDualDispatchEnable on IVB and VLV
  2012-09-19 20:29 ` [PATCH 6/9] drm/i915: implement WaDisablePSDDualDispatchEnable on IVB and VLV Jesse Barnes
@ 2012-09-25  8:51   ` Daniel Vetter
  2012-10-01 16:52     ` Lespiau, Damien
  2012-10-01 16:57   ` Lespiau, Damien
  1 sibling, 1 reply; 23+ messages in thread
From: Daniel Vetter @ 2012-09-25  8:51 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Wed, Sep 19, 2012 at 01:29:00PM -0700, Jesse Barnes wrote:
> Workaround for dual port PS dispatch on GT1.
> 
> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> ---
>  drivers/gpu/drm/i915/i915_reg.h |    4 ++++
>  drivers/gpu/drm/i915/intel_pm.c |   16 ++++++++++++++++
>  2 files changed, 20 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index e570aea..e863973 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -4249,6 +4249,10 @@
>  #define GEN7_L3LOG_BASE			0xB070
>  #define GEN7_L3LOG_SIZE			0x80
>  
> +#define GEN7_HALF_SLICE_CHICKEN1	0xe100 /* IVB GT1 + VLV */
> +#define GEN7_HALF_SLICE_CHICKEN1_IVB	0xf100
> +#define   GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE	(1<<3)
> +
>  #define G4X_AUD_VID_DID			0x62020
>  #define INTEL_AUDIO_DEVCL		0x808629FB
>  #define INTEL_AUDIO_DEVBLC		0x80862801
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 372a398..f860cdd 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3537,6 +3537,17 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
>  		   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
>  		   CHICKEN3_DGMG_DONE_FIX_DISABLE);
>  
> +	/* WaDisablePSDDualDispatchEnable */
> +	if (dev->pci_device == 0x0156 ||
> +	    dev->pci_device == 0x0152)

Can we please have a comment here explaining which pci ids we're matching
here? Just in case we add yet another variant sometime. Or just plainly
use IS_MOBILE, which seems to match here ...
-Daniel

> +		I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
> +			   I915_READ(GEN7_HALF_SLICE_CHICKEN1) |
> +			   _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
> +	else
> +		I915_WRITE(GEN7_HALF_SLICE_CHICKEN1_IVB,
> +			   I915_READ(GEN7_HALF_SLICE_CHICKEN1) |
> +			   _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
> +
>  	/* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */
>  	I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
>  		   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
> @@ -3618,6 +3629,11 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
>  		   CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
>  		   CHICKEN3_DGMG_DONE_FIX_DISABLE);
>  
> +	/* WaDisablePSDDualDispatchEnable */
> +	I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
> +		   I915_READ(GEN7_HALF_SLICE_CHICKEN1) |
> +		   _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
> +
>  	/* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */
>  	I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
>  		   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/9] drm/i915: add a HSW scratch location for flush commands
  2012-09-19 20:28 ` [PATCH 3/9] drm/i915: add a HSW scratch location for flush commands Jesse Barnes
@ 2012-09-25  8:54   ` Daniel Vetter
  2012-09-25 11:08     ` Jesse Barnes
  0 siblings, 1 reply; 23+ messages in thread
From: Daniel Vetter @ 2012-09-25  8:54 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Wed, Sep 19, 2012 at 01:28:57PM -0700, Jesse Barnes wrote:
> Some commands and workarounds require stores to occur to function
> correctly, so add some scratch space to the HWS page to accommodate
> them.
> 
> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> ---
>  drivers/gpu/drm/i915/intel_ringbuffer.h |    1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 2ea7a31..ef85742 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -181,6 +181,7 @@ intel_read_status_page(struct intel_ring_buffer *ring,
>   * The area from dword 0x20 to 0x3ff is available for driver usage.
>   */
>  #define I915_GEM_HWS_INDEX		0x20
> +#define I915_GEM_SCRATCH_INDEX		0x28 /* Some commands need a scratch store */

Any specific reason for using an index divisible by 8? Afaik this is an
index, and the hw multiplies by 4 on it's own. So looks a bit puzzling
when reading (since iirc only 0x21 is used anywhere else, in some dri1
stuff).
-Daniel
>  
>  void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring);
>  
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/9] drm/i915: add post-flush store dw workaround
  2012-09-25  8:49   ` Daniel Vetter
@ 2012-09-25 11:07     ` Jesse Barnes
  0 siblings, 0 replies; 23+ messages in thread
From: Jesse Barnes @ 2012-09-25 11:07 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Tue, 25 Sep 2012 10:49:28 +0200
Daniel Vetter <daniel@ffwll.ch> wrote:

> On Wed, Sep 19, 2012 at 01:28:58PM -0700, Jesse Barnes wrote:
> > Several platforms need this to flush the CS write buffers.
> 
> Chris spent quite some effort to dump less crap into the rings on gen6,
> and your description here sounds like we only need this when flushing
> write caches. Or it might only apply to CS writes (in which case this is
> at the wrong spot). In any case, can you please double check where exactly
> we need this and only add it there, with a neat comment explaining things
> added?

"write caches" as in "any time we do a store dw and want to read the
result coherently" is my understanding.

> I'm bitching because afair the CS stuff the windows driver emits (of which
> I've seen some traces) only emits one such 8x MI_WRITE block per batch,
> whereas your code here would emit 2 such 2x MI_WRITE blocks.

Doing it once should be sufficient, I guess I need to split this out
(probably a good idea anyway for comment & naming purposes).

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/9] drm/i915: add a HSW scratch location for flush commands
  2012-09-25  8:54   ` Daniel Vetter
@ 2012-09-25 11:08     ` Jesse Barnes
  2012-09-25 11:47       ` Daniel Vetter
  0 siblings, 1 reply; 23+ messages in thread
From: Jesse Barnes @ 2012-09-25 11:08 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Tue, 25 Sep 2012 10:54:00 +0200
Daniel Vetter <daniel@ffwll.ch> wrote:

> On Wed, Sep 19, 2012 at 01:28:57PM -0700, Jesse Barnes wrote:
> > Some commands and workarounds require stores to occur to function
> > correctly, so add some scratch space to the HWS page to accommodate
> > them.
> > 
> > Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> > ---
> >  drivers/gpu/drm/i915/intel_ringbuffer.h |    1 +
> >  1 file changed, 1 insertion(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > index 2ea7a31..ef85742 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> > @@ -181,6 +181,7 @@ intel_read_status_page(struct intel_ring_buffer *ring,
> >   * The area from dword 0x20 to 0x3ff is available for driver usage.
> >   */
> >  #define I915_GEM_HWS_INDEX		0x20
> > +#define I915_GEM_SCRATCH_INDEX		0x28 /* Some commands need a scratch store */
> 
> Any specific reason for using an index divisible by 8? Afaik this is an
> index, and the hw multiplies by 4 on it's own. So looks a bit puzzling
> when reading (since iirc only 0x21 is used anywhere else, in some dri1
> stuff).

I got scared when I saw something about qword alignment in the docs.

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/9] drm/i915: add a HSW scratch location for flush commands
  2012-09-25 11:08     ` Jesse Barnes
@ 2012-09-25 11:47       ` Daniel Vetter
  2012-09-25 12:08         ` Jesse Barnes
  0 siblings, 1 reply; 23+ messages in thread
From: Daniel Vetter @ 2012-09-25 11:47 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Tue, Sep 25, 2012 at 1:08 PM, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> On Tue, 25 Sep 2012 10:54:00 +0200
> Daniel Vetter <daniel@ffwll.ch> wrote:
>
>> On Wed, Sep 19, 2012 at 01:28:57PM -0700, Jesse Barnes wrote:
>> > Some commands and workarounds require stores to occur to function
>> > correctly, so add some scratch space to the HWS page to accommodate
>> > them.
>> >
>> > Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
>> > ---
>> >  drivers/gpu/drm/i915/intel_ringbuffer.h |    1 +
>> >  1 file changed, 1 insertion(+)
>> >
>> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> > index 2ea7a31..ef85742 100644
>> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> > @@ -181,6 +181,7 @@ intel_read_status_page(struct intel_ring_buffer *ring,
>> >   * The area from dword 0x20 to 0x3ff is available for driver usage.
>> >   */
>> >  #define I915_GEM_HWS_INDEX         0x20
>> > +#define I915_GEM_SCRATCH_INDEX             0x28 /* Some commands need a scratch store */
>>
>> Any specific reason for using an index divisible by 8? Afaik this is an
>> index, and the hw multiplies by 4 on it's own. So looks a bit puzzling
>> when reading (since iirc only 0x21 is used anywhere else, in some dri1
>> stuff).
>
> I got scared when I saw something about qword alignment in the docs.

Afaik you need the qword alignment only when doing a qword write. And
iirc that only works with writes to gtt address (not status page
offsets). And for 64bit store_dw writes I remember some further
restrictions (at least on some pipes).

One thing I wonder is whether these workarounds still work when using
a status page store dw and not a direct write to a gtt address though
...
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/9] drm/i915: add a HSW scratch location for flush commands
  2012-09-25 11:47       ` Daniel Vetter
@ 2012-09-25 12:08         ` Jesse Barnes
  0 siblings, 0 replies; 23+ messages in thread
From: Jesse Barnes @ 2012-09-25 12:08 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Tue, 25 Sep 2012 13:47:54 +0200
Daniel Vetter <daniel@ffwll.ch> wrote:

> On Tue, Sep 25, 2012 at 1:08 PM, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > On Tue, 25 Sep 2012 10:54:00 +0200
> > Daniel Vetter <daniel@ffwll.ch> wrote:
> >
> >> On Wed, Sep 19, 2012 at 01:28:57PM -0700, Jesse Barnes wrote:
> >> > Some commands and workarounds require stores to occur to function
> >> > correctly, so add some scratch space to the HWS page to accommodate
> >> > them.
> >> >
> >> > Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> >> > ---
> >> >  drivers/gpu/drm/i915/intel_ringbuffer.h |    1 +
> >> >  1 file changed, 1 insertion(+)
> >> >
> >> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> >> > index 2ea7a31..ef85742 100644
> >> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> >> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> >> > @@ -181,6 +181,7 @@ intel_read_status_page(struct intel_ring_buffer *ring,
> >> >   * The area from dword 0x20 to 0x3ff is available for driver usage.
> >> >   */
> >> >  #define I915_GEM_HWS_INDEX         0x20
> >> > +#define I915_GEM_SCRATCH_INDEX             0x28 /* Some commands need a scratch store */
> >>
> >> Any specific reason for using an index divisible by 8? Afaik this is an
> >> index, and the hw multiplies by 4 on it's own. So looks a bit puzzling
> >> when reading (since iirc only 0x21 is used anywhere else, in some dri1
> >> stuff).
> >
> > I got scared when I saw something about qword alignment in the docs.
> 
> Afaik you need the qword alignment only when doing a qword write. And
> iirc that only works with writes to gtt address (not status page
> offsets). And for 64bit store_dw writes I remember some further
> restrictions (at least on some pipes).
> 
> One thing I wonder is whether these workarounds still work when using
> a status page store dw and not a direct write to a gtt address though
> ...

I think they do, but we should come up with a torture test for stuff
like this...

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 7/9] drm/i915: limit VLV IRQ enables to those we use
  2012-09-19 20:29 ` [PATCH 7/9] drm/i915: limit VLV IRQ enables to those we use Jesse Barnes
@ 2012-09-26 14:16   ` Daniel Vetter
  0 siblings, 0 replies; 23+ messages in thread
From: Daniel Vetter @ 2012-09-26 14:16 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Wed, Sep 19, 2012 at 01:29:01PM -0700, Jesse Barnes wrote:
> To match IVB.
> 
> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Queued for -next, thanks for the patch.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 6/9] drm/i915: implement WaDisablePSDDualDispatchEnable on IVB and VLV
  2012-09-25  8:51   ` Daniel Vetter
@ 2012-10-01 16:52     ` Lespiau, Damien
  2012-10-01 16:56       ` Jesse Barnes
  0 siblings, 1 reply; 23+ messages in thread
From: Lespiau, Damien @ 2012-10-01 16:52 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Tue, Sep 25, 2012 at 9:51 AM, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Wed, Sep 19, 2012 at 01:29:00PM -0700, Jesse Barnes wrote:
>> Workaround for dual port PS dispatch on GT1.
>>
>> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
>> +     /* WaDisablePSDDualDispatchEnable */
>> +     if (dev->pci_device == 0x0156 ||
>> +         dev->pci_device == 0x0152)
>
> Can we please have a comment here explaining which pci ids we're matching
> here? Just in case we add yet another variant sometime. Or just plainly
> use IS_MOBILE, which seems to match here ...
> -Daniel

This looks like testing if we're running on an IVB GT1 device (and
then we're missing 0x015a?). The address of the register is different
on GT1 and GT2 devices.

-- 
Damien

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 6/9] drm/i915: implement WaDisablePSDDualDispatchEnable on IVB and VLV
  2012-10-01 16:52     ` Lespiau, Damien
@ 2012-10-01 16:56       ` Jesse Barnes
  2012-10-01 17:07         ` Lespiau, Damien
  0 siblings, 1 reply; 23+ messages in thread
From: Jesse Barnes @ 2012-10-01 16:56 UTC (permalink / raw)
  To: Lespiau, Damien; +Cc: intel-gfx

On Mon, 1 Oct 2012 17:52:21 +0100
"Lespiau, Damien" <damien.lespiau@intel.com> wrote:

> On Tue, Sep 25, 2012 at 9:51 AM, Daniel Vetter <daniel@ffwll.ch> wrote:
> > On Wed, Sep 19, 2012 at 01:29:00PM -0700, Jesse Barnes wrote:
> >> Workaround for dual port PS dispatch on GT1.
> >>
> >> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> >> +     /* WaDisablePSDDualDispatchEnable */
> >> +     if (dev->pci_device == 0x0156 ||
> >> +         dev->pci_device == 0x0152)
> >
> > Can we please have a comment here explaining which pci ids we're matching
> > here? Just in case we add yet another variant sometime. Or just plainly
> > use IS_MOBILE, which seems to match here ...
> > -Daniel
> 
> This looks like testing if we're running on an IVB GT1 device (and
> then we're missing 0x015a?). The address of the register is different
> on GT1 and GT2 devices.
> 
Yeah I have an updated version that uses IS_MOBILE since I think that
matches the IDs we want.

Jesse

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 6/9] drm/i915: implement WaDisablePSDDualDispatchEnable on IVB and VLV
  2012-09-19 20:29 ` [PATCH 6/9] drm/i915: implement WaDisablePSDDualDispatchEnable on IVB and VLV Jesse Barnes
  2012-09-25  8:51   ` Daniel Vetter
@ 2012-10-01 16:57   ` Lespiau, Damien
  1 sibling, 0 replies; 23+ messages in thread
From: Lespiau, Damien @ 2012-10-01 16:57 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Wed, Sep 19, 2012 at 9:29 PM, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> Workaround for dual port PS dispatch on GT1.
>
> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
> ---
>  drivers/gpu/drm/i915/i915_reg.h |    4 ++++
>  drivers/gpu/drm/i915/intel_pm.c |   16 ++++++++++++++++
>  2 files changed, 20 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index e570aea..e863973 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -4249,6 +4249,10 @@
>  #define GEN7_L3LOG_BASE                        0xB070
>  #define GEN7_L3LOG_SIZE                        0x80
>
> +#define GEN7_HALF_SLICE_CHICKEN1       0xe100 /* IVB GT1 + VLV */
> +#define GEN7_HALF_SLICE_CHICKEN1_IVB   0xf100
> +#define   GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE (1<<3)
> +
>  #define G4X_AUD_VID_DID                        0x62020
>  #define INTEL_AUDIO_DEVCL              0x808629FB
>  #define INTEL_AUDIO_DEVBLC             0x80862801
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 372a398..f860cdd 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3537,6 +3537,17 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
>                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
>                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
>
> +       /* WaDisablePSDDualDispatchEnable */
> +       if (dev->pci_device == 0x0156 ||
> +           dev->pci_device == 0x0152)
> +               I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
> +                          I915_READ(GEN7_HALF_SLICE_CHICKEN1) |
> +                          _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
> +       else
> +               I915_WRITE(GEN7_HALF_SLICE_CHICKEN1_IVB,
> +                          I915_READ(GEN7_HALF_SLICE_CHICKEN1) |
> +                          _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
> +

It looks like we don't need to force the single port dispatch on IVB
GT2 devices, maybe just drop the else then?
0x015a is also a GT1 device that seems to be missing from the if. I
guess it'd be nice to have an IS_IVB_GT1() macro there.

Other than those 2 points the rest looks good to me:
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>

-- 
Damien

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 6/9] drm/i915: implement WaDisablePSDDualDispatchEnable on IVB and VLV
  2012-10-01 16:56       ` Jesse Barnes
@ 2012-10-01 17:07         ` Lespiau, Damien
  0 siblings, 0 replies; 23+ messages in thread
From: Lespiau, Damien @ 2012-10-01 17:07 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Mon, Oct 1, 2012 at 5:56 PM, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> Yeah I have an updated version that uses IS_MOBILE since I think that
> matches the IDs we want.

I don't think IS_MOBILE() is what we want here, IS_MOBILE() would be
matching 0x0156 and 0x0166 while your code tests for 0x0156 and 0x0152
(and, to me, is missing 0x015a) which are GT1 devices (and the _IVB
version of the register is valid for GT2 devices).

-- 
Damien

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2012-10-01 17:07 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-09-19 20:28 [PATCH 1/9] drm/i915: disable DOP clock gating on VLV and IVB Jesse Barnes
2012-09-19 20:28 ` [PATCH 2/9] drm/i915: implement WaForceL3Serialization " Jesse Barnes
2012-09-19 20:28 ` [PATCH 3/9] drm/i915: add a HSW scratch location for flush commands Jesse Barnes
2012-09-25  8:54   ` Daniel Vetter
2012-09-25 11:08     ` Jesse Barnes
2012-09-25 11:47       ` Daniel Vetter
2012-09-25 12:08         ` Jesse Barnes
2012-09-19 20:28 ` [PATCH 4/9] drm/i915: add post-flush store dw workaround Jesse Barnes
2012-09-25  8:49   ` Daniel Vetter
2012-09-25 11:07     ` Jesse Barnes
2012-09-19 20:28 ` [PATCH 5/9] drm/i915: implement WaDisableEarlyCull for VLV and IVB Jesse Barnes
2012-09-19 20:29 ` [PATCH 6/9] drm/i915: implement WaDisablePSDDualDispatchEnable on IVB and VLV Jesse Barnes
2012-09-25  8:51   ` Daniel Vetter
2012-10-01 16:52     ` Lespiau, Damien
2012-10-01 16:56       ` Jesse Barnes
2012-10-01 17:07         ` Lespiau, Damien
2012-10-01 16:57   ` Lespiau, Damien
2012-09-19 20:29 ` [PATCH 7/9] drm/i915: limit VLV IRQ enables to those we use Jesse Barnes
2012-09-26 14:16   ` Daniel Vetter
2012-09-19 20:29 ` [PATCH 8/9] drm/i915: TLB invalidation with MI_FLUSH_SW requires a post-sync op Jesse Barnes
2012-09-19 20:29 ` [PATCH 9/9] drm/i915: PIPE_CONTROL TLB invalidate requires CS stall Jesse Barnes
2012-09-19 21:41 ` [PATCH 1/9] drm/i915: disable DOP clock gating on VLV and IVB Ben Widawsky
2012-09-19 22:06   ` Jesse Barnes

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.