public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH 1/4] drm/i915/bdw: Add missing delay during L3 SQC credit programming
@ 2016-04-25 12:38 Imre Deak
  2016-04-25 12:38 ` [PATCH 2/4] drm/i915: Clean up L3 SQC register field definitions Imre Deak
                   ` (4 more replies)
  0 siblings, 5 replies; 16+ messages in thread
From: Imre Deak @ 2016-04-25 12:38 UTC (permalink / raw)
  To: intel-gfx

BSpec requires us to wait ~100 clocks before re-enabling clock gating,
so make sure we do this.

Signed-off-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 702f683..a6fd4dd 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6711,6 +6711,12 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
 	misccpctl = I915_READ(GEN7_MISCCPCTL);
 	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
 	I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
+	/*
+	 * Wait at least 100 clocks before re-enabling clock gating. See
+	 * the definition of L3SQCREG1 in BSpec.
+	 */
+	POSTING_READ(GEN8_L3SQCREG1);
+	udelay(1);
 	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
 
 	/*
-- 
2.5.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/4] drm/i915: Clean up L3 SQC register field definitions
  2016-04-25 12:38 [PATCH 1/4] drm/i915/bdw: Add missing delay during L3 SQC credit programming Imre Deak
@ 2016-04-25 12:38 ` Imre Deak
  2016-04-26  8:11   ` Mika Kuoppala
  2016-04-26 16:55   ` Ville Syrjälä
  2016-04-25 12:38 ` [PATCH 3/4] drm/i915/chv: Tune L3 SQC credits based on actual latencies Imre Deak
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 16+ messages in thread
From: Imre Deak @ 2016-04-25 12:38 UTC (permalink / raw)
  To: intel-gfx

No need for hard-coding the register value, the corresponding fields are
defined properly in BSpec.

No functional change.

Signed-off-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h | 3 ++-
 drivers/gpu/drm/i915/intel_pm.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c21b71c..0cb2e17 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -6073,7 +6073,8 @@ enum skl_disp_power_wells {
 #define  VLV_B0_WA_L3SQCREG1_VALUE		0x00D30000
 
 #define GEN8_L3SQCREG1				_MMIO(0xB100)
-#define  BDW_WA_L3SQCREG1_DEFAULT		0x784000
+#define  L3_GENERAL_PRIO_CREDITS(x)		(((x) >> 1) << 19)
+#define  L3_HIGH_PRIO_CREDITS(x)		(((x) >> 1) << 14)
 
 #define GEN7_L3CNTLREG1				_MMIO(0xB01C)
 #define  GEN7_WA_FOR_GEN7_L3_CONTROL			0x3C47FF8C
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index a6fd4dd..a9b7626 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6710,7 +6710,8 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
 	 */
 	misccpctl = I915_READ(GEN7_MISCCPCTL);
 	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
-	I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
+	I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(30) |
+				   L3_HIGH_PRIO_CREDITS(2));
 	/*
 	 * Wait at least 100 clocks before re-enabling clock gating. See
 	 * the definition of L3SQCREG1 in BSpec.
-- 
2.5.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 3/4] drm/i915/chv: Tune L3 SQC credits based on actual latencies
  2016-04-25 12:38 [PATCH 1/4] drm/i915/bdw: Add missing delay during L3 SQC credit programming Imre Deak
  2016-04-25 12:38 ` [PATCH 2/4] drm/i915: Clean up L3 SQC register field definitions Imre Deak
@ 2016-04-25 12:38 ` Imre Deak
  2016-04-25 13:16   ` Ville Syrjälä
  2016-04-26 16:39   ` [PATCH v2 " Imre Deak
  2016-04-25 12:38 ` [PATCH 4/4] drm/i915/bxt: " Imre Deak
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 16+ messages in thread
From: Imre Deak @ 2016-04-25 12:38 UTC (permalink / raw)
  To: intel-gfx

While browsing BSpec I bumped into a note saying we need to tune these
values based on actual measurements done after initial enabling. I've
checked that it indeed improves things on BXT. I haven't checked this on
CHV, but here it is if someone wants to give it a go.

CC: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index a9b7626..fcfdb7f 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6669,11 +6669,32 @@ static void lpt_suspend_hw(struct drm_device *dev)
 	}
 }
 
+static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
+				   int general_prio_credits,
+				   int high_prio_credits)
+{
+	u32 misccpctl;
+
+	misccpctl = I915_READ(GEN7_MISCCPCTL);
+	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
+
+	I915_WRITE(GEN8_L3SQCREG1,
+		   L3_GENERAL_PRIO_CREDITS(general_prio_credits) |
+		   L3_HIGH_PRIO_CREDITS(high_prio_credits));
+
+	/*
+	 * Wait at least 100 clocks before re-enabling clock gating.
+	 * See the definition of L3SQCREG1 in BSpec.
+	 */
+	POSTING_READ(GEN8_L3SQCREG1);
+	udelay(1);
+	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+}
+
 static void broadwell_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	enum pipe pipe;
-	uint32_t misccpctl;
 
 	ilk_init_lp_watermarks(dev);
 
@@ -6708,17 +6729,7 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
 	 * WaProgramL3SqcReg1Default:bdw
 	 * WaTempDisableDOPClkGating:bdw
 	 */
-	misccpctl = I915_READ(GEN7_MISCCPCTL);
-	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
-	I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(30) |
-				   L3_HIGH_PRIO_CREDITS(2));
-	/*
-	 * Wait at least 100 clocks before re-enabling clock gating. See
-	 * the definition of L3SQCREG1 in BSpec.
-	 */
-	POSTING_READ(GEN8_L3SQCREG1);
-	udelay(1);
-	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+	gen8_set_l3sqc_credits(dev_priv, 30, 2);
 
 	/*
 	 * WaGttCachingOffByDefault:bdw
@@ -6989,6 +7000,12 @@ static void cherryview_init_clock_gating(struct drm_device *dev)
 		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
 	/*
+	 * Adjust credits based on actual latencies, see BSpec LSQC Setting
+	 * Recommendations.
+	 */
+	gen8_set_l3sqc_credits(dev_priv, 38, 2);
+
+	/*
 	 * GTT cache may not work with big pages, so if those
 	 * are ever enabled GTT cache may need to be disabled.
 	 */
-- 
2.5.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 4/4] drm/i915/bxt: Tune L3 SQC credits based on actual latencies
  2016-04-25 12:38 [PATCH 1/4] drm/i915/bdw: Add missing delay during L3 SQC credit programming Imre Deak
  2016-04-25 12:38 ` [PATCH 2/4] drm/i915: Clean up L3 SQC register field definitions Imre Deak
  2016-04-25 12:38 ` [PATCH 3/4] drm/i915/chv: Tune L3 SQC credits based on actual latencies Imre Deak
@ 2016-04-25 12:38 ` Imre Deak
  2016-04-26 16:41   ` [PATCH v2 " Imre Deak
  2016-04-25 14:03 ` ✗ Fi.CI.BAT: failure for series starting with [1/4] drm/i915/bdw: Add missing delay during L3 SQC credit programming Patchwork
  2016-04-26 16:55 ` [PATCH 1/4] " Ville Syrjälä
  4 siblings, 1 reply; 16+ messages in thread
From: Imre Deak @ 2016-04-25 12:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

BSpec says we need to fine tune these values, so comply. I checked this
with random GPU benchmarks and it does seem to improve things.

Note that I considered to program this from the ring as part of the
context specific workarounds there, I decided against that for the
following reasons:
- It's not a context specific setting, it's part of whatever (power-)
  context the GPU manages regardless of context scheduling to
  save/restore things across power transitions. So it's enough to
  program it once.
- Atm, we don't apply workarounds for engines other than the render
  engine from the ring (although this could be added if needed).
- The same setting is programmed via MMIO for BDW and it makes
  sense to program it the same way on BXT too.

CC: Chris Wilson <chris@chris-wilson.co.uk>
CC: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index fcfdb7f..101035b 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -76,6 +76,16 @@ static void bxt_init_clock_gating(struct drm_device *dev)
 	if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER))
 		I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
 			   PWM1_GATING_DIS | PWM2_GATING_DIS);
+
+	/*
+	 * Note that for dynamic reprogramming we'd need to do a stalling flush
+	 * operation, but we can do away with that here, since the GPU is idle
+	 * at this point.
+	 */
+	if (IS_BXT_REVID(dev_priv, BXT_REVID_A1, REVID_FOREVER))
+		I915_WRITE(GEN8_L3SQCREG1,
+			   L3_GENERAL_PRIO_CREDITS(62) |
+			   L3_HIGH_PRIO_CREDITS(2));
 }
 
 static void i915_pineview_get_mem_freq(struct drm_device *dev)
-- 
2.5.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] drm/i915/chv: Tune L3 SQC credits based on actual latencies
  2016-04-25 12:38 ` [PATCH 3/4] drm/i915/chv: Tune L3 SQC credits based on actual latencies Imre Deak
@ 2016-04-25 13:16   ` Ville Syrjälä
  2016-04-26 16:19     ` Ville Syrjälä
  2016-04-26 16:39   ` [PATCH v2 " Imre Deak
  1 sibling, 1 reply; 16+ messages in thread
From: Ville Syrjälä @ 2016-04-25 13:16 UTC (permalink / raw)
  To: Imre Deak; +Cc: intel-gfx

On Mon, Apr 25, 2016 at 03:38:07PM +0300, Imre Deak wrote:
> While browsing BSpec I bumped into a note saying we need to tune these
> values based on actual measurements done after initial enabling. I've
> checked that it indeed improves things on BXT. I haven't checked this on
> CHV, but here it is if someone wants to give it a go.
> 
> CC: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Signed-off-by: Imre Deak <imre.deak@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_pm.c | 41 +++++++++++++++++++++++++++++------------
>  1 file changed, 29 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index a9b7626..fcfdb7f 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -6669,11 +6669,32 @@ static void lpt_suspend_hw(struct drm_device *dev)
>  	}
>  }
>  
> +static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
> +				   int general_prio_credits,
> +				   int high_prio_credits)
> +{
> +	u32 misccpctl;
> +
> +	misccpctl = I915_READ(GEN7_MISCCPCTL);
> +	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
> +
> +	I915_WRITE(GEN8_L3SQCREG1,
> +		   L3_GENERAL_PRIO_CREDITS(general_prio_credits) |
> +		   L3_HIGH_PRIO_CREDITS(high_prio_credits));
> +
> +	/*
> +	 * Wait at least 100 clocks before re-enabling clock gating.
> +	 * See the definition of L3SQCREG1 in BSpec.
> +	 */
> +	POSTING_READ(GEN8_L3SQCREG1);
> +	udelay(1);
> +	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
> +}
> +
>  static void broadwell_init_clock_gating(struct drm_device *dev)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	enum pipe pipe;
> -	uint32_t misccpctl;
>  
>  	ilk_init_lp_watermarks(dev);
>  
> @@ -6708,17 +6729,7 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
>  	 * WaProgramL3SqcReg1Default:bdw
>  	 * WaTempDisableDOPClkGating:bdw

The w/a note should be moved as well then.

>  	 */
> -	misccpctl = I915_READ(GEN7_MISCCPCTL);
> -	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
> -	I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(30) |
> -				   L3_HIGH_PRIO_CREDITS(2));
> -	/*
> -	 * Wait at least 100 clocks before re-enabling clock gating. See
> -	 * the definition of L3SQCREG1 in BSpec.
> -	 */
> -	POSTING_READ(GEN8_L3SQCREG1);
> -	udelay(1);
> -	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
> +	gen8_set_l3sqc_credits(dev_priv, 30, 2);
>  
>  	/*
>  	 * WaGttCachingOffByDefault:bdw
> @@ -6989,6 +7000,12 @@ static void cherryview_init_clock_gating(struct drm_device *dev)
>  		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
>  
>  	/*
> +	 * Adjust credits based on actual latencies, see BSpec LSQC Setting
> +	 * Recommendations.
> +	 */
> +	gen8_set_l3sqc_credits(dev_priv, 38, 2);

Where exactly in Bspec is this? Last I looked CHV was supposed to be
fine with the defaults.

> +
> +	/*
>  	 * GTT cache may not work with big pages, so if those
>  	 * are ever enabled GTT cache may need to be disabled.
>  	 */
> -- 
> 2.5.0

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/4] drm/i915/bdw: Add missing delay during L3 SQC credit programming
  2016-04-25 12:38 [PATCH 1/4] drm/i915/bdw: Add missing delay during L3 SQC credit programming Imre Deak
                   ` (2 preceding siblings ...)
  2016-04-25 12:38 ` [PATCH 4/4] drm/i915/bxt: " Imre Deak
@ 2016-04-25 14:03 ` Patchwork
  2016-04-26 16:55 ` [PATCH 1/4] " Ville Syrjälä
  4 siblings, 0 replies; 16+ messages in thread
From: Patchwork @ 2016-04-25 14:03 UTC (permalink / raw)
  To: Imre Deak; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/4] drm/i915/bdw: Add missing delay during L3 SQC credit programming
URL   : https://patchwork.freedesktop.org/series/6252/
State : failure

== Summary ==

Series 6252v1 Series without cover letter
http://patchwork.freedesktop.org/api/1.0/series/6252/revisions/1/mbox/

Test drv_module_reload_basic:
                pass       -> INCOMPLETE (skl-nuci5)
Test gem_sync:
        Subgroup basic-each:
                pass       -> DMESG-FAIL (hsw-brixbox)
Test kms_flip:
        Subgroup basic-flip-vs-wf_vblank:
                pass       -> FAIL       (hsw-gt2)
Test kms_pipe_crc_basic:
        Subgroup read-crc-pipe-b-frame-sequence:
                skip       -> PASS       (bdw-nuci7)

bdw-nuci7        total:200  pass:188  dwarn:0   dfail:0   fail:0   skip:12 
bdw-ultra        total:200  pass:175  dwarn:0   dfail:0   fail:0   skip:25 
bsw-nuc-2        total:199  pass:158  dwarn:0   dfail:0   fail:0   skip:41 
byt-nuc          total:199  pass:155  dwarn:0   dfail:0   fail:0   skip:44 
hsw-brixbox      total:200  pass:173  dwarn:0   dfail:1   fail:0   skip:26 
hsw-gt2          total:200  pass:178  dwarn:0   dfail:0   fail:1   skip:21 
ilk-hp8440p      total:200  pass:137  dwarn:0   dfail:0   fail:0   skip:63 
ivb-t430s        total:200  pass:166  dwarn:0   dfail:0   fail:0   skip:34 
skl-i7k-2        total:200  pass:173  dwarn:0   dfail:0   fail:0   skip:27 
skl-nuci5        total:87   pass:84   dwarn:0   dfail:0   fail:0   skip:2  
snb-dellxps      total:19   pass:16   dwarn:0   dfail:0   fail:0   skip:2  

Results at /archive/results/CI_IGT_test/Patchwork_2063/

f814551aa7232ed36d71244dd148b48660b53a78 drm-intel-nightly: 2016y-04m-25d-11h-36m-27s UTC integration manifest
38bf344 drm/i915/bdw: Add missing delay during L3 SQC credit programming

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] drm/i915: Clean up L3 SQC register field definitions
  2016-04-25 12:38 ` [PATCH 2/4] drm/i915: Clean up L3 SQC register field definitions Imre Deak
@ 2016-04-26  8:11   ` Mika Kuoppala
  2016-04-26  9:03     ` Imre Deak
  2016-04-26 16:55   ` Ville Syrjälä
  1 sibling, 1 reply; 16+ messages in thread
From: Mika Kuoppala @ 2016-04-26  8:11 UTC (permalink / raw)
  To: Imre Deak, intel-gfx

Imre Deak <imre.deak@intel.com> writes:

> [ text/plain ]
> No need for hard-coding the register value, the corresponding fields are
> defined properly in BSpec.
>
> No functional change.
>
> Signed-off-by: Imre Deak <imre.deak@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_reg.h | 3 ++-
>  drivers/gpu/drm/i915/intel_pm.c | 3 ++-
>  2 files changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index c21b71c..0cb2e17 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -6073,7 +6073,8 @@ enum skl_disp_power_wells {
>  #define  VLV_B0_WA_L3SQCREG1_VALUE		0x00D30000
>  
>  #define GEN8_L3SQCREG1				_MMIO(0xB100)
> -#define  BDW_WA_L3SQCREG1_DEFAULT		0x784000
> +#define  L3_GENERAL_PRIO_CREDITS(x)		(((x) >> 1) << 19)
> +#define  L3_HIGH_PRIO_CREDITS(x)		(((x) >> 1) << 14)
>  

The values listed for CHV it seems that the macros would produce
one less than intended. For example chv 34 -> 0b10010

-Mika


>  #define GEN7_L3CNTLREG1				_MMIO(0xB01C)
>  #define  GEN7_WA_FOR_GEN7_L3_CONTROL			0x3C47FF8C
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index a6fd4dd..a9b7626 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -6710,7 +6710,8 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
>  	 */
>  	misccpctl = I915_READ(GEN7_MISCCPCTL);
>  	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
> -	I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
> +	I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(30) |
> +				   L3_HIGH_PRIO_CREDITS(2));
>  	/*
>  	 * Wait at least 100 clocks before re-enabling clock gating. See
>  	 * the definition of L3SQCREG1 in BSpec.
> -- 
> 2.5.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] drm/i915: Clean up L3 SQC register field definitions
  2016-04-26  8:11   ` Mika Kuoppala
@ 2016-04-26  9:03     ` Imre Deak
  2016-04-26  9:21       ` Imre Deak
  0 siblings, 1 reply; 16+ messages in thread
From: Imre Deak @ 2016-04-26  9:03 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

On ti, 2016-04-26 at 11:11 +0300, Mika Kuoppala wrote:
> Imre Deak <imre.deak@intel.com> writes:
> 
> > [ text/plain ]
> > No need for hard-coding the register value, the corresponding
> > fields are
> > defined properly in BSpec.
> > 
> > No functional change.
> > 
> > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_reg.h | 3 ++-
> >  drivers/gpu/drm/i915/intel_pm.c | 3 ++-
> >  2 files changed, 4 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h
> > b/drivers/gpu/drm/i915/i915_reg.h
> > index c21b71c..0cb2e17 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -6073,7 +6073,8 @@ enum skl_disp_power_wells {
> >  #define  VLV_B0_WA_L3SQCREG1_VALUE		0x00D30000
> >  
> >  #define GEN8_L3SQCREG1				_MMIO(0xB100
> > )
> > -#define  BDW_WA_L3SQCREG1_DEFAULT		0x784000
> > +#define  L3_GENERAL_PRIO_CREDITS(x)		(((x) >> 1) <<
> > 19)
> > +#define  L3_HIGH_PRIO_CREDITS(x)		(((x) >> 1) << 14)
> >  
> 
> The values listed for CHV it seems that the macros would produce
> one less than intended. For example chv 34 -> 0b10010

Yes, Ville noticed this too. I assumed the same formula as used on the
other platforms (BDW, SKL, BXT) and haven't noticed the difference wrt.
CHV.

According to Ville's tests using the formula in the spec (and setting
the new credits 30, 2) leads to a hang. So my suspicion is that it's a
bug in the spec, I will file a change request there.

--Imre

> 
> -Mika
> 
> 
> >  #define GEN7_L3CNTLREG1				_MMIO(0xB01
> > C)
> >  #define  GEN7_WA_FOR_GEN7_L3_CONTROL			0x3C47
> > FF8C
> > diff --git a/drivers/gpu/drm/i915/intel_pm.c
> > b/drivers/gpu/drm/i915/intel_pm.c
> > index a6fd4dd..a9b7626 100644
> > --- a/drivers/gpu/drm/i915/intel_pm.c
> > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > @@ -6710,7 +6710,8 @@ static void
> > broadwell_init_clock_gating(struct drm_device *dev)
> >  	 */
> >  	misccpctl = I915_READ(GEN7_MISCCPCTL);
> >  	I915_WRITE(GEN7_MISCCPCTL, misccpctl &
> > ~GEN7_DOP_CLOCK_GATE_ENABLE);
> > -	I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
> > +	I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(30) |
> > +				   L3_HIGH_PRIO_CREDITS(2));
> >  	/*
> >  	 * Wait at least 100 clocks before re-enabling clock
> > gating. See
> >  	 * the definition of L3SQCREG1 in BSpec.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] drm/i915: Clean up L3 SQC register field definitions
  2016-04-26  9:03     ` Imre Deak
@ 2016-04-26  9:21       ` Imre Deak
  0 siblings, 0 replies; 16+ messages in thread
From: Imre Deak @ 2016-04-26  9:21 UTC (permalink / raw)
  To: Mika Kuoppala, intel-gfx

On ti, 2016-04-26 at 12:03 +0300, Imre Deak wrote:
> On ti, 2016-04-26 at 11:11 +0300, Mika Kuoppala wrote:
> > Imre Deak <imre.deak@intel.com> writes:
> > 
> > > [ text/plain ]
> > > No need for hard-coding the register value, the corresponding
> > > fields are
> > > defined properly in BSpec.
> > > 
> > > No functional change.
> > > 
> > > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/i915_reg.h | 3 ++-
> > >  drivers/gpu/drm/i915/intel_pm.c | 3 ++-
> > >  2 files changed, 4 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_reg.h
> > > b/drivers/gpu/drm/i915/i915_reg.h
> > > index c21b71c..0cb2e17 100644
> > > --- a/drivers/gpu/drm/i915/i915_reg.h
> > > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > > @@ -6073,7 +6073,8 @@ enum skl_disp_power_wells {
> > >  #define  VLV_B0_WA_L3SQCREG1_VALUE		0x00D30000
> > >  
> > >  #define GEN8_L3SQCREG1				_MMIO(0xB100
> > > )
> > > -#define  BDW_WA_L3SQCREG1_DEFAULT		0x784000
> > > +#define  L3_GENERAL_PRIO_CREDITS(x)		(((x) >> 1) <<
> > > 19)
> > > +#define  L3_HIGH_PRIO_CREDITS(x)		(((x) >> 1) << 14)
> > >  
> > 
> > The values listed for CHV it seems that the macros would produce
> > one less than intended. For example chv 34 -> 0b10010
> 
> Yes, Ville noticed this too. I assumed the same formula as used on the
> other platforms (BDW, SKL, BXT) and haven't noticed the difference wrt.
> CHV.
> 
> According to Ville's tests using the formula in the spec (and setting
> the new credits 30, 2) leads to a hang. So my suspicion is that it's a

Sorry, I meant setting the new credits 38, 2.

> bug in the spec, I will file a change request there.
> 
> --Imre
> 
> > 
> > -Mika
> > 
> > 
> > >  #define GEN7_L3CNTLREG1				_MMIO(0xB01
> > > C)
> > >  #define  GEN7_WA_FOR_GEN7_L3_CONTROL			0x3C47
> > > FF8C
> > > diff --git a/drivers/gpu/drm/i915/intel_pm.c
> > > b/drivers/gpu/drm/i915/intel_pm.c
> > > index a6fd4dd..a9b7626 100644
> > > --- a/drivers/gpu/drm/i915/intel_pm.c
> > > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > > @@ -6710,7 +6710,8 @@ static void
> > > broadwell_init_clock_gating(struct drm_device *dev)
> > >  	 */
> > >  	misccpctl = I915_READ(GEN7_MISCCPCTL);
> > >  	I915_WRITE(GEN7_MISCCPCTL, misccpctl &
> > > ~GEN7_DOP_CLOCK_GATE_ENABLE);
> > > -	I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
> > > +	I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(30) |
> > > +				   L3_HIGH_PRIO_CREDITS(2));
> > >  	/*
> > >  	 * Wait at least 100 clocks before re-enabling clock
> > > gating. See
> > >  	 * the definition of L3SQCREG1 in BSpec.
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] drm/i915/chv: Tune L3 SQC credits based on actual latencies
  2016-04-25 13:16   ` Ville Syrjälä
@ 2016-04-26 16:19     ` Ville Syrjälä
  0 siblings, 0 replies; 16+ messages in thread
From: Ville Syrjälä @ 2016-04-26 16:19 UTC (permalink / raw)
  To: Imre Deak; +Cc: intel-gfx

On Mon, Apr 25, 2016 at 04:16:38PM +0300, Ville Syrjälä wrote:
> On Mon, Apr 25, 2016 at 03:38:07PM +0300, Imre Deak wrote:
> > While browsing BSpec I bumped into a note saying we need to tune these
> > values based on actual measurements done after initial enabling. I've
> > checked that it indeed improves things on BXT. I haven't checked this on
> > CHV, but here it is if someone wants to give it a go.
> > 
> > CC: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > ---
> >  drivers/gpu/drm/i915/intel_pm.c | 41 +++++++++++++++++++++++++++++------------
> >  1 file changed, 29 insertions(+), 12 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> > index a9b7626..fcfdb7f 100644
> > --- a/drivers/gpu/drm/i915/intel_pm.c
> > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > @@ -6669,11 +6669,32 @@ static void lpt_suspend_hw(struct drm_device *dev)
> >  	}
> >  }
> >  
> > +static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
> > +				   int general_prio_credits,
> > +				   int high_prio_credits)
> > +{
> > +	u32 misccpctl;
> > +
> > +	misccpctl = I915_READ(GEN7_MISCCPCTL);
> > +	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
> > +
> > +	I915_WRITE(GEN8_L3SQCREG1,
> > +		   L3_GENERAL_PRIO_CREDITS(general_prio_credits) |
> > +		   L3_HIGH_PRIO_CREDITS(high_prio_credits));
> > +
> > +	/*
> > +	 * Wait at least 100 clocks before re-enabling clock gating.
> > +	 * See the definition of L3SQCREG1 in BSpec.
> > +	 */
> > +	POSTING_READ(GEN8_L3SQCREG1);
> > +	udelay(1);
> > +	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
> > +}
> > +
> >  static void broadwell_init_clock_gating(struct drm_device *dev)
> >  {
> >  	struct drm_i915_private *dev_priv = dev->dev_private;
> >  	enum pipe pipe;
> > -	uint32_t misccpctl;
> >  
> >  	ilk_init_lp_watermarks(dev);
> >  
> > @@ -6708,17 +6729,7 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
> >  	 * WaProgramL3SqcReg1Default:bdw
> >  	 * WaTempDisableDOPClkGating:bdw
> 
> The w/a note should be moved as well then.
> 
> >  	 */
> > -	misccpctl = I915_READ(GEN7_MISCCPCTL);
> > -	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
> > -	I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(30) |
> > -				   L3_HIGH_PRIO_CREDITS(2));
> > -	/*
> > -	 * Wait at least 100 clocks before re-enabling clock gating. See
> > -	 * the definition of L3SQCREG1 in BSpec.
> > -	 */
> > -	POSTING_READ(GEN8_L3SQCREG1);
> > -	udelay(1);
> > -	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
> > +	gen8_set_l3sqc_credits(dev_priv, 30, 2);
> >  
> >  	/*
> >  	 * WaGttCachingOffByDefault:bdw
> > @@ -6989,6 +7000,12 @@ static void cherryview_init_clock_gating(struct drm_device *dev)
> >  		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
> >  
> >  	/*
> > +	 * Adjust credits based on actual latencies, see BSpec LSQC Setting
> > +	 * Recommendations.
> > +	 */
> > +	gen8_set_l3sqc_credits(dev_priv, 38, 2);
> 
> Where exactly in Bspec is this? Last I looked CHV was supposed to be
> fine with the defaults.

OK, so the table of recommended values is in the "performance guide".
I don't actually know where the chv numbers came from since there's no
w/a for this, nor could I find a related hsd with any numbers.

Anyway, using these values Harri's membwtester texturing micro benchmark
gained >10% which seems nice. The other numbers seemed unchanged. I also
ran xonotic [1] but didn't see any significant changes in performance.

So based on that I'm fine with adjusting this on CHV.

[1] First I got crap results, but that turned out to be the CPU
    side making a mess of things. So it seems we're back to having
    to use taskset to pin the process to one core on chv :(

> 
> > +
> > +	/*
> >  	 * GTT cache may not work with big pages, so if those
> >  	 * are ever enabled GTT cache may need to be disabled.
> >  	 */
> > -- 
> > 2.5.0
> 
> -- 
> Ville Syrjälä
> Intel OTC
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v2 3/4] drm/i915/chv: Tune L3 SQC credits based on actual latencies
  2016-04-25 12:38 ` [PATCH 3/4] drm/i915/chv: Tune L3 SQC credits based on actual latencies Imre Deak
  2016-04-25 13:16   ` Ville Syrjälä
@ 2016-04-26 16:39   ` Imre Deak
  2016-04-26 16:51     ` Ville Syrjälä
  1 sibling, 1 reply; 16+ messages in thread
From: Imre Deak @ 2016-04-26 16:39 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

While browsing BSpec I bumped into a note saying we need to tune these
values based on actual measurements done after initial enabling. I've
checked that it indeed improves things on BXT. I haven't checked this on
CHV, but here it is if someone wants to give it a go.

v2:
- Add note about the discrepancy wrt. to the spec in the formula
  calculating the credit encodings. (Mika, Ville)
- Move the WA comment to the new function. (Ville)

CC: Ville Syrjälä <ville.syrjala@linux.intel.com>
CC: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h |  6 ++++++
 drivers/gpu/drm/i915/intel_pm.c | 47 +++++++++++++++++++++++++++--------------
 2 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 0cb2e17..e25e78f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -6073,6 +6073,12 @@ enum skl_disp_power_wells {
 #define  VLV_B0_WA_L3SQCREG1_VALUE		0x00D30000
 
 #define GEN8_L3SQCREG1				_MMIO(0xB100)
+/*
+ * Note that on CHV the following has an off-by-one error wrt. to BSpec.
+ * Using the formula in BSpec leads to a hang, while the formula here works
+ * fine and matches the formulas for all other platforms. A BSpec change
+ * request has been filed to clarify this.
+ */
 #define  L3_GENERAL_PRIO_CREDITS(x)		(((x) >> 1) << 19)
 #define  L3_HIGH_PRIO_CREDITS(x)		(((x) >> 1) << 14)
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index a9b7626..b217c44 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6669,11 +6669,38 @@ static void lpt_suspend_hw(struct drm_device *dev)
 	}
 }
 
+static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
+				   int general_prio_credits,
+				   int high_prio_credits)
+{
+	u32 misccpctl;
+
+	/*
+	 * WaProgramL3SqcReg1Default:bdw
+	 * WaTempDisableDOPClkGating:bdw
+	 * For CHV see gfxspecs/Related Documents/Performance Guide/
+	 * LSQC Setting Recommendations[CHV,BXT].
+	 */
+	misccpctl = I915_READ(GEN7_MISCCPCTL);
+	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
+
+	I915_WRITE(GEN8_L3SQCREG1,
+		   L3_GENERAL_PRIO_CREDITS(general_prio_credits) |
+		   L3_HIGH_PRIO_CREDITS(high_prio_credits));
+
+	/*
+	 * Wait at least 100 clocks before re-enabling clock gating.
+	 * See the definition of L3SQCREG1 in BSpec.
+	 */
+	POSTING_READ(GEN8_L3SQCREG1);
+	udelay(1);
+	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+}
+
 static void broadwell_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	enum pipe pipe;
-	uint32_t misccpctl;
 
 	ilk_init_lp_watermarks(dev);
 
@@ -6704,21 +6731,7 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
 	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
 		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
-	/*
-	 * WaProgramL3SqcReg1Default:bdw
-	 * WaTempDisableDOPClkGating:bdw
-	 */
-	misccpctl = I915_READ(GEN7_MISCCPCTL);
-	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
-	I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(30) |
-				   L3_HIGH_PRIO_CREDITS(2));
-	/*
-	 * Wait at least 100 clocks before re-enabling clock gating. See
-	 * the definition of L3SQCREG1 in BSpec.
-	 */
-	POSTING_READ(GEN8_L3SQCREG1);
-	udelay(1);
-	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+	gen8_set_l3sqc_credits(dev_priv, 30, 2);
 
 	/*
 	 * WaGttCachingOffByDefault:bdw
@@ -6988,6 +7001,8 @@ static void cherryview_init_clock_gating(struct drm_device *dev)
 	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
 		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
+	gen8_set_l3sqc_credits(dev_priv, 38, 2);
+
 	/*
 	 * GTT cache may not work with big pages, so if those
 	 * are ever enabled GTT cache may need to be disabled.
-- 
2.5.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2 4/4] drm/i915/bxt: Tune L3 SQC credits based on actual latencies
  2016-04-25 12:38 ` [PATCH 4/4] drm/i915/bxt: " Imre Deak
@ 2016-04-26 16:41   ` Imre Deak
  2016-04-26 16:53     ` Ville Syrjälä
  0 siblings, 1 reply; 16+ messages in thread
From: Imre Deak @ 2016-04-26 16:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: Mika Kuoppala

BSpec says we need to fine tune these values, so comply. I checked this
with random GPU benchmarks and it does seem to improve things.

Note that I considered to program this from the ring as part of the
context specific workarounds there, I decided against that for the
following reasons:
- It's not a context specific setting, it's part of whatever (power-)
  context the GPU manages regardless of context scheduling to
  save/restore things across power transitions. So it's enough to
  program it once.
- Atm, we don't apply workarounds for engines other than the render
  engine from the ring (although this could be added if needed).
- The same setting is programmed via MMIO for BDW/CHV/VLV and it
  makes sense to program it the same way on BXT too.

v2:
- Specify the actual WA we're implementing. (Ville)

CC: Chris Wilson <chris@chris-wilson.co.uk>
CC: Mika Kuoppala <mika.kuoppala@intel.com>
CC: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index b217c44..6853bf8 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -76,6 +76,17 @@ static void bxt_init_clock_gating(struct drm_device *dev)
 	if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER))
 		I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
 			   PWM1_GATING_DIS | PWM2_GATING_DIS);
+
+	/*
+	 * WaProgramL3SqcReg1DefaultForPerf:bxt
+	 * Note that for dynamic reprogramming we'd need to do a stalling flush
+	 * operation, but we can do away with that here, since the GPU is idle
+	 * at this point.
+	 */
+	if (IS_BXT_REVID(dev_priv, BXT_REVID_A1, REVID_FOREVER))
+		I915_WRITE(GEN8_L3SQCREG1,
+			   L3_GENERAL_PRIO_CREDITS(62) |
+			   L3_HIGH_PRIO_CREDITS(2));
 }
 
 static void i915_pineview_get_mem_freq(struct drm_device *dev)
-- 
2.5.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 3/4] drm/i915/chv: Tune L3 SQC credits based on actual latencies
  2016-04-26 16:39   ` [PATCH v2 " Imre Deak
@ 2016-04-26 16:51     ` Ville Syrjälä
  0 siblings, 0 replies; 16+ messages in thread
From: Ville Syrjälä @ 2016-04-26 16:51 UTC (permalink / raw)
  To: Imre Deak; +Cc: intel-gfx, Mika Kuoppala

On Tue, Apr 26, 2016 at 07:39:33PM +0300, Imre Deak wrote:
> While browsing BSpec I bumped into a note saying we need to tune these
> values based on actual measurements done after initial enabling. I've
> checked that it indeed improves things on BXT. I haven't checked this on
> CHV, but here it is if someone wants to give it a go.
> 
> v2:
> - Add note about the discrepancy wrt. to the spec in the formula
>   calculating the credit encodings. (Mika, Ville)
> - Move the WA comment to the new function. (Ville)
> 
> CC: Ville Syrjälä <ville.syrjala@linux.intel.com>
> CC: Mika Kuoppala <mika.kuoppala@intel.com>
> Signed-off-by: Imre Deak <imre.deak@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_reg.h |  6 ++++++
>  drivers/gpu/drm/i915/intel_pm.c | 47 +++++++++++++++++++++++++++--------------
>  2 files changed, 37 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 0cb2e17..e25e78f 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -6073,6 +6073,12 @@ enum skl_disp_power_wells {
>  #define  VLV_B0_WA_L3SQCREG1_VALUE		0x00D30000
>  
>  #define GEN8_L3SQCREG1				_MMIO(0xB100)
> +/*
> + * Note that on CHV the following has an off-by-one error wrt. to BSpec.
> + * Using the formula in BSpec leads to a hang, while the formula here works
> + * fine and matches the formulas for all other platforms. A BSpec change
> + * request has been filed to clarify this.
> + */
>  #define  L3_GENERAL_PRIO_CREDITS(x)		(((x) >> 1) << 19)
>  #define  L3_HIGH_PRIO_CREDITS(x)		(((x) >> 1) << 14)
>  
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index a9b7626..b217c44 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -6669,11 +6669,38 @@ static void lpt_suspend_hw(struct drm_device *dev)
>  	}
>  }
>  
> +static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
> +				   int general_prio_credits,
> +				   int high_prio_credits)
> +{
> +	u32 misccpctl;
> +
> +	/*
> +	 * WaProgramL3SqcReg1Default:bdw

I would have left this first w/a comment in the caller.

> +	 * WaTempDisableDOPClkGating:bdw

This one seems good here.

> +	 * For CHV see gfxspecs/Related Documents/Performance Guide/
> +	 * LSQC Setting Recommendations[CHV,BXT].

The chv comment would also seems better placed in the caller.

With those 
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

> +	 */
> +	misccpctl = I915_READ(GEN7_MISCCPCTL);
> +	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
> +
> +	I915_WRITE(GEN8_L3SQCREG1,
> +		   L3_GENERAL_PRIO_CREDITS(general_prio_credits) |
> +		   L3_HIGH_PRIO_CREDITS(high_prio_credits));
> +
> +	/*
> +	 * Wait at least 100 clocks before re-enabling clock gating.
> +	 * See the definition of L3SQCREG1 in BSpec.
> +	 */
> +	POSTING_READ(GEN8_L3SQCREG1);
> +	udelay(1);
> +	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
> +}
> +
>  static void broadwell_init_clock_gating(struct drm_device *dev)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	enum pipe pipe;
> -	uint32_t misccpctl;
>  
>  	ilk_init_lp_watermarks(dev);
>  
> @@ -6704,21 +6731,7 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
>  	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
>  		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
>  
> -	/*
> -	 * WaProgramL3SqcReg1Default:bdw
> -	 * WaTempDisableDOPClkGating:bdw
> -	 */
> -	misccpctl = I915_READ(GEN7_MISCCPCTL);
> -	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
> -	I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(30) |
> -				   L3_HIGH_PRIO_CREDITS(2));
> -	/*
> -	 * Wait at least 100 clocks before re-enabling clock gating. See
> -	 * the definition of L3SQCREG1 in BSpec.
> -	 */
> -	POSTING_READ(GEN8_L3SQCREG1);
> -	udelay(1);
> -	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
> +	gen8_set_l3sqc_credits(dev_priv, 30, 2);
>  
>  	/*
>  	 * WaGttCachingOffByDefault:bdw
> @@ -6988,6 +7001,8 @@ static void cherryview_init_clock_gating(struct drm_device *dev)
>  	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
>  		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
>  
> +	gen8_set_l3sqc_credits(dev_priv, 38, 2);
> +
>  	/*
>  	 * GTT cache may not work with big pages, so if those
>  	 * are ever enabled GTT cache may need to be disabled.
> -- 
> 2.5.0

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 4/4] drm/i915/bxt: Tune L3 SQC credits based on actual latencies
  2016-04-26 16:41   ` [PATCH v2 " Imre Deak
@ 2016-04-26 16:53     ` Ville Syrjälä
  0 siblings, 0 replies; 16+ messages in thread
From: Ville Syrjälä @ 2016-04-26 16:53 UTC (permalink / raw)
  To: Imre Deak; +Cc: intel-gfx, Mika Kuoppala

On Tue, Apr 26, 2016 at 07:41:32PM +0300, Imre Deak wrote:
> BSpec says we need to fine tune these values, so comply. I checked this
> with random GPU benchmarks and it does seem to improve things.
> 
> Note that I considered to program this from the ring as part of the
> context specific workarounds there, I decided against that for the
> following reasons:
> - It's not a context specific setting, it's part of whatever (power-)
>   context the GPU manages regardless of context scheduling to
>   save/restore things across power transitions. So it's enough to
>   program it once.
> - Atm, we don't apply workarounds for engines other than the render
>   engine from the ring (although this could be added if needed).
> - The same setting is programmed via MMIO for BDW/CHV/VLV and it
>   makes sense to program it the same way on BXT too.
> 
> v2:
> - Specify the actual WA we're implementing. (Ville)
> 
> CC: Chris Wilson <chris@chris-wilson.co.uk>
> CC: Mika Kuoppala <mika.kuoppala@intel.com>
> CC: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Signed-off-by: Imre Deak <imre.deak@intel.com>

Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/intel_pm.c | 11 +++++++++++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index b217c44..6853bf8 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -76,6 +76,17 @@ static void bxt_init_clock_gating(struct drm_device *dev)
>  	if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER))
>  		I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
>  			   PWM1_GATING_DIS | PWM2_GATING_DIS);
> +
> +	/*
> +	 * WaProgramL3SqcReg1DefaultForPerf:bxt
> +	 * Note that for dynamic reprogramming we'd need to do a stalling flush
> +	 * operation, but we can do away with that here, since the GPU is idle
> +	 * at this point.
> +	 */
> +	if (IS_BXT_REVID(dev_priv, BXT_REVID_A1, REVID_FOREVER))
> +		I915_WRITE(GEN8_L3SQCREG1,
> +			   L3_GENERAL_PRIO_CREDITS(62) |
> +			   L3_HIGH_PRIO_CREDITS(2));
>  }
>  
>  static void i915_pineview_get_mem_freq(struct drm_device *dev)
> -- 
> 2.5.0

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] drm/i915: Clean up L3 SQC register field definitions
  2016-04-25 12:38 ` [PATCH 2/4] drm/i915: Clean up L3 SQC register field definitions Imre Deak
  2016-04-26  8:11   ` Mika Kuoppala
@ 2016-04-26 16:55   ` Ville Syrjälä
  1 sibling, 0 replies; 16+ messages in thread
From: Ville Syrjälä @ 2016-04-26 16:55 UTC (permalink / raw)
  To: Imre Deak; +Cc: intel-gfx

On Mon, Apr 25, 2016 at 03:38:06PM +0300, Imre Deak wrote:
> No need for hard-coding the register value, the corresponding fields are
> defined properly in BSpec.
> 
> No functional change.
> 
> Signed-off-by: Imre Deak <imre.deak@intel.com>

Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_reg.h | 3 ++-
>  drivers/gpu/drm/i915/intel_pm.c | 3 ++-
>  2 files changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index c21b71c..0cb2e17 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -6073,7 +6073,8 @@ enum skl_disp_power_wells {
>  #define  VLV_B0_WA_L3SQCREG1_VALUE		0x00D30000
>  
>  #define GEN8_L3SQCREG1				_MMIO(0xB100)
> -#define  BDW_WA_L3SQCREG1_DEFAULT		0x784000
> +#define  L3_GENERAL_PRIO_CREDITS(x)		(((x) >> 1) << 19)
> +#define  L3_HIGH_PRIO_CREDITS(x)		(((x) >> 1) << 14)
>  
>  #define GEN7_L3CNTLREG1				_MMIO(0xB01C)
>  #define  GEN7_WA_FOR_GEN7_L3_CONTROL			0x3C47FF8C
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index a6fd4dd..a9b7626 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -6710,7 +6710,8 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
>  	 */
>  	misccpctl = I915_READ(GEN7_MISCCPCTL);
>  	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
> -	I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
> +	I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(30) |
> +				   L3_HIGH_PRIO_CREDITS(2));
>  	/*
>  	 * Wait at least 100 clocks before re-enabling clock gating. See
>  	 * the definition of L3SQCREG1 in BSpec.
> -- 
> 2.5.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/4] drm/i915/bdw: Add missing delay during L3 SQC credit programming
  2016-04-25 12:38 [PATCH 1/4] drm/i915/bdw: Add missing delay during L3 SQC credit programming Imre Deak
                   ` (3 preceding siblings ...)
  2016-04-25 14:03 ` ✗ Fi.CI.BAT: failure for series starting with [1/4] drm/i915/bdw: Add missing delay during L3 SQC credit programming Patchwork
@ 2016-04-26 16:55 ` Ville Syrjälä
  4 siblings, 0 replies; 16+ messages in thread
From: Ville Syrjälä @ 2016-04-26 16:55 UTC (permalink / raw)
  To: Imre Deak; +Cc: intel-gfx

On Mon, Apr 25, 2016 at 03:38:05PM +0300, Imre Deak wrote:
> BSpec requires us to wait ~100 clocks before re-enabling clock gating,
> so make sure we do this.
> 
> Signed-off-by: Imre Deak <imre.deak@intel.com>

Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/intel_pm.c | 6 ++++++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 702f683..a6fd4dd 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -6711,6 +6711,12 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
>  	misccpctl = I915_READ(GEN7_MISCCPCTL);
>  	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
>  	I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
> +	/*
> +	 * Wait at least 100 clocks before re-enabling clock gating. See
> +	 * the definition of L3SQCREG1 in BSpec.
> +	 */
> +	POSTING_READ(GEN8_L3SQCREG1);
> +	udelay(1);
>  	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
>  
>  	/*
> -- 
> 2.5.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2016-04-26 16:55 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-04-25 12:38 [PATCH 1/4] drm/i915/bdw: Add missing delay during L3 SQC credit programming Imre Deak
2016-04-25 12:38 ` [PATCH 2/4] drm/i915: Clean up L3 SQC register field definitions Imre Deak
2016-04-26  8:11   ` Mika Kuoppala
2016-04-26  9:03     ` Imre Deak
2016-04-26  9:21       ` Imre Deak
2016-04-26 16:55   ` Ville Syrjälä
2016-04-25 12:38 ` [PATCH 3/4] drm/i915/chv: Tune L3 SQC credits based on actual latencies Imre Deak
2016-04-25 13:16   ` Ville Syrjälä
2016-04-26 16:19     ` Ville Syrjälä
2016-04-26 16:39   ` [PATCH v2 " Imre Deak
2016-04-26 16:51     ` Ville Syrjälä
2016-04-25 12:38 ` [PATCH 4/4] drm/i915/bxt: " Imre Deak
2016-04-26 16:41   ` [PATCH v2 " Imre Deak
2016-04-26 16:53     ` Ville Syrjälä
2016-04-25 14:03 ` ✗ Fi.CI.BAT: failure for series starting with [1/4] drm/i915/bdw: Add missing delay during L3 SQC credit programming Patchwork
2016-04-26 16:55 ` [PATCH 1/4] " Ville Syrjälä

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox