public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH v2] drm/i915/vlv: WA for Turbo and RC6 to work together.
@ 2014-03-03  6:05 deepak.s
  2014-03-04 14:20 ` S, Deepak
  2014-03-05 12:11 ` Ville Syrjälä
  0 siblings, 2 replies; 30+ messages in thread
From: deepak.s @ 2014-03-03  6:05 UTC (permalink / raw)
  To: intel-gfx

From: Deepak S <deepak.s@intel.com>

With RC6 enabled, BYT has an HW issue in determining the right
Gfx busyness.
WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
on increasing/decreasing the freq. This logic will monitor C0
counters of render/media power-wells over EI period and takes
necessary action based on these values

v2: Refactor duplicate code. (ville)

Signed-off-by: Deepak S <deepak.s@intel.com>

---
 drivers/gpu/drm/i915/i915_drv.h |  19 ++++++
 drivers/gpu/drm/i915/i915_irq.c | 146 ++++++++++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_reg.h |  15 +++++
 drivers/gpu/drm/i915/intel_pm.c |  50 ++++++++++----
 4 files changed, 213 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 728b9c3..2baeeef 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -957,6 +957,12 @@ struct i915_suspend_saved_registers {
 	u32 savePCH_PORT_HOTPLUG;
 };
 
+struct intel_rps_ei_calc {
+	u32 cz_ts_ei;
+	u32 render_ei_c0;
+	u32 media_ei_c0;
+};
+
 struct intel_gen6_power_mgmt {
 	/* work and pm_iir are protected by dev_priv->irq_lock */
 	struct work_struct work;
@@ -969,10 +975,16 @@ struct intel_gen6_power_mgmt {
 	u8 rp1_delay;
 	u8 rp0_delay;
 	u8 hw_max;
+	u8 hw_min;
 
 	bool rp_up_masked;
 	bool rp_down_masked;
 
+	u32 cz_freq;
+	u32 ei_interrupt_count;
+
+	bool use_RC0_residency_for_turbo;
+
 	int last_adj;
 	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
@@ -1531,6 +1543,13 @@ typedef struct drm_i915_private {
 	/* gen6+ rps state */
 	struct intel_gen6_power_mgmt rps;
 
+	/* rps wa up ei calculation */
+	struct intel_rps_ei_calc rps_up_ei;
+
+	/* rps wa down ei calculation */
+	struct intel_rps_ei_calc rps_down_ei;
+
+
 	/* ilk-only ips/rps state. Everything in here is protected by the global
 	 * mchdev_lock in intel_pm.c */
 	struct intel_ilk_power_mgmt ips;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 56edff3..93b6ebf 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1023,6 +1023,120 @@ void gen6_set_pm_mask(struct drm_i915_private *dev_priv,
 	}
 }
 
+static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
+				struct  intel_rps_ei_calc *rps_ei)
+{
+	u32 cz_ts, cz_freq_khz;
+	u32 render_count, media_count;
+	u32 elapsed_render, elapsed_media, elapsed_time;
+	u32 residency = 0;
+
+	cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
+	cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
+
+	render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
+	media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
+
+	if (rps_ei->cz_ts_ei == 0) {
+		rps_ei->cz_ts_ei = cz_ts;
+		rps_ei->render_ei_c0 = render_count;
+		rps_ei->media_ei_c0 = media_count;
+
+		return dev_priv->rps.cur_delay;
+	}
+
+	elapsed_time = cz_ts - rps_ei->cz_ts_ei;
+	rps_ei->cz_ts_ei = cz_ts;
+
+	elapsed_render = render_count - rps_ei->render_ei_c0;
+	rps_ei->render_ei_c0 = render_count;
+
+	elapsed_media = media_count - rps_ei->media_ei_c0;
+	rps_ei->media_ei_c0 = media_count;
+
+	/* Convert all the counters into common unit of milli sec */
+	elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
+	elapsed_render /=  cz_freq_khz;
+	elapsed_media /= cz_freq_khz;
+
+	/* Calculate overall C0 residency percentage only
+	* if elapsed time is non zero
+	*/
+	if (elapsed_time) {
+		residency =
+			((max(elapsed_render, elapsed_media) * 100)
+				/ elapsed_time);
+	}
+
+	return residency;
+}
+
+
+/**
+ * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
+ * busy-ness calculated from C0 counters of render & media power wells
+ * @dev_priv: DRM device private
+ *
+ */
+static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
+{
+	u32 residency_C0_up = 0, residency_C0_down = 0;
+	u8 new_delay;
+
+	dev_priv->rps.ei_interrupt_count++;
+
+	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+
+
+	if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
+		vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
+		vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
+		return dev_priv->rps.cur_delay;
+	}
+
+
+	/* To down throttle, C0 residency should be less than down threshold
+	* for continous EI intervals. So calculate down EI counters
+	* once in VLV_INT_COUNT_FOR_DOWN_EI
+	*/
+	if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
+
+		dev_priv->rps.ei_interrupt_count = 0;
+
+		residency_C0_down =  vlv_c0_residency(dev_priv,
+						&dev_priv->rps_down_ei);
+	} else {
+		residency_C0_up =  vlv_c0_residency(dev_priv,
+						&dev_priv->rps_up_ei);
+	}
+
+	new_delay = dev_priv->rps.cur_delay;
+
+	/* C0 residency is greater than UP threshold. Increase Frequency */
+	if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
+
+		if (dev_priv->rps.cur_delay < dev_priv->rps.max_delay)
+			new_delay = dev_priv->rps.cur_delay + 1;
+
+		/*
+		 * For better performance, jump directly
+		 * to RPe if we're below it.
+		 */
+		if (new_delay < dev_priv->rps.rpe_delay)
+			new_delay = dev_priv->rps.rpe_delay;
+
+	} else if (!dev_priv->rps.ei_interrupt_count &&
+			(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
+		/* This means, C0 residency is less than down threshold over
+		* a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
+		*/
+		if (dev_priv->rps.cur_delay > dev_priv->rps.min_delay)
+			new_delay = dev_priv->rps.cur_delay - 1;
+	}
+
+	return new_delay;
+}
+
 static void gen6_pm_rps_work(struct work_struct *work)
 {
 	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
@@ -1034,13 +1148,16 @@ static void gen6_pm_rps_work(struct work_struct *work)
 	pm_iir = dev_priv->rps.pm_iir;
 	dev_priv->rps.pm_iir = 0;
 	/* Make sure not to corrupt PMIMR state used by ringbuffer code */
-	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
+	if (dev_priv->rps.use_RC0_residency_for_turbo)
+		snb_enable_pm_irq(dev_priv, GEN6_PM_RP_UP_EI_EXPIRED);
+	else
+		snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
 	spin_unlock_irq(&dev_priv->irq_lock);
 
 	/* Make sure we didn't queue anything we're not going to process. */
-	WARN_ON(pm_iir & ~GEN6_PM_RPS_EVENTS);
+	WARN_ON(pm_iir & ~(GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED));
 
-	if ((pm_iir & GEN6_PM_RPS_EVENTS) == 0)
+	if ((pm_iir & (GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
 		return;
 
 	mutex_lock(&dev_priv->rps.hw_lock);
@@ -1065,6 +1182,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
 		else
 			new_delay = dev_priv->rps.min_delay;
 		adj = 0;
+	} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
+		new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
 	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
 		if (adj < 0)
 			adj *= 2;
@@ -1466,6 +1585,16 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
 		queue_work(dev_priv->wq, &dev_priv->rps.work);
 	}
 
+	if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
+		spin_lock(&dev_priv->irq_lock);
+		dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RP_UP_EI_EXPIRED;
+		snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RP_UP_EI_EXPIRED);
+		spin_unlock(&dev_priv->irq_lock);
+		DRM_DEBUG_DRIVER("\nQueueing RPS Work - RC6 WA Turbo");
+
+		queue_work(dev_priv->wq, &dev_priv->rps.work);
+	}
+
 	if (HAS_VEBOX(dev_priv->dev)) {
 		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
 			notify_ring(dev_priv->dev, &dev_priv->ring[VECS]);
@@ -1546,7 +1675,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg)
 		if (pipe_stats[0] & PIPE_GMBUS_INTERRUPT_STATUS)
 			gmbus_irq_handler(dev);
 
-		if (pm_iir)
+		if (pm_iir & (GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED))
 			gen6_rps_irq_handler(dev_priv, pm_iir);
 
 		I915_WRITE(GTIIR, gt_iir);
@@ -2861,6 +2990,15 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
 			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
 
 		dev_priv->pm_irq_mask = 0xffffffff;
+
+		if (dev_priv->rps.use_RC0_residency_for_turbo) {
+			dev_priv->pm_irq_mask &= ~GEN6_PM_RP_UP_EI_EXPIRED;
+			pm_irqs |= GEN6_PM_RP_UP_EI_EXPIRED;
+		} else {
+			dev_priv->pm_irq_mask &= ~GEN6_PM_RPS_EVENTS;
+			pm_irqs |= GEN6_PM_RPS_EVENTS;
+		}
+
 		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
 		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
 		I915_WRITE(GEN6_PMIER, pm_irqs);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f73a49d..e58b37e 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -391,6 +391,7 @@
 #define PUNIT_REG_GPU_FREQ_STS			0xd8
 #define   GENFREQSTATUS				(1<<0)
 #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ		0xdc
+#define PUNIT_REG_CZ_TIMESTAMP			0xce
 
 #define PUNIT_FUSE_BUS2				0xf6 /* bits 47:40 */
 #define PUNIT_FUSE_BUS1				0xf5 /* bits 55:48 */
@@ -406,6 +407,11 @@
 #define   FB_FMAX_VMIN_FREQ_LO_SHIFT		27
 #define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000
 
+#define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
+#define VLV_RP_UP_EI_THRESHOLD			90
+#define VLV_RP_DOWN_EI_THRESHOLD		70
+#define VLV_INT_COUNT_FOR_DOWN_EI		5
+
 /* vlv2 north clock has */
 #define CCK_FUSE_REG				0x8
 #define  CCK_FUSE_HPLL_FREQ_MASK		0x3
@@ -4857,6 +4863,7 @@
 #define  VLV_GTLC_PW_STATUS			0x130094
 #define VLV_GTLC_PW_RENDER_STATUS_MASK		0x80
 #define VLV_GTLC_PW_MEDIA_STATUS_MASK		0x20
+#define VLV_GTLC_SURVIVABILITY_REG              0x130098
 #define  FORCEWAKE_MT				0xa188 /* multi-threaded */
 #define   FORCEWAKE_KERNEL			0x1
 #define   FORCEWAKE_USER			0x2
@@ -4864,6 +4871,11 @@
 #define  ECOBUS					0xa180
 #define    FORCEWAKE_MT_ENABLE			(1<<5)
 
+#define VLV_GFX_CLK_FORCE_ON_BIT                (1<<2)
+#define VLV_GFX_CLK_STATUS_BIT                  (1<<3)
+
+#define VLV_RC_COUNTER_CONTROL                  0xFFFF00FF
+
 #define  GTFIFODBG				0x120000
 #define    GT_FIFO_SBDROPERR			(1<<6)
 #define    GT_FIFO_BLOBDROPERR			(1<<5)
@@ -4979,6 +4991,9 @@
 #define VLV_GFX_CLK_STATUS_BIT			(1<<3)
 #define VLV_GFX_CLK_FORCE_ON_BIT		(1<<2)
 
+#define VLV_RENDER_C0_COUNT_REG		0x138118
+#define VLV_MEDIA_C0_COUNT_REG			0x13811C
+
 #define GEN6_GT_GFX_RC6_LOCKED			0x138104
 #define VLV_COUNTER_CONTROL			0x138104
 #define   VLV_COUNT_RANGE_HIGH			(1<<15)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 9ab3883..8002ac7 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3084,10 +3084,14 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 		I915_READ(VLV_GTLC_SURVIVABILITY_REG) &
 				~VLV_GFX_CLK_FORCE_ON_BIT);
 
-	/* Unmask Up interrupts */
-	dev_priv->rps.rp_up_masked = true;
-	gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
+	/* Unmask Turbo interrupts */
+	if (dev_priv->rps.use_RC0_residency_for_turbo)
+		I915_WRITE(GEN6_PMINTRMSK, ~GEN6_PM_RP_UP_EI_EXPIRED);
+	else {
+		dev_priv->rps.rp_up_masked = true;
+		gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
 						dev_priv->rps.min_delay);
+	}
 }
 
 void gen6_rps_idle(struct drm_i915_private *dev_priv)
@@ -3148,7 +3152,13 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
-	I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & ~GEN6_PM_RPS_EVENTS);
+	if (dev_priv->rps.use_RC0_residency_for_turbo) {
+		I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) &
+						~GEN6_PM_RP_UP_EI_EXPIRED);
+	} else {
+		I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) &
+						~GEN6_PM_RPS_EVENTS);
+	}
 	/* Complete PM interrupt masking here doesn't race with the rps work
 	 * item again unmasking PM interrupts because that is using a different
 	 * register (PMIMR) to mask PM interrupts. The only risk is in leaving
@@ -3158,7 +3168,10 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev)
 	dev_priv->rps.pm_iir = 0;
 	spin_unlock_irq(&dev_priv->irq_lock);
 
-	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
+	if (dev_priv->rps.use_RC0_residency_for_turbo)
+		I915_WRITE(GEN6_PMIIR, GEN6_PM_RP_UP_EI_EXPIRED);
+	else
+		I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
 }
 
 static void gen6_disable_rps(struct drm_device *dev)
@@ -3228,19 +3241,29 @@ static void gen6_enable_rps_interrupts(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 enabled_intrs;
 
+	/* Clear out any stale interrupts first */
 	spin_lock_irq(&dev_priv->irq_lock);
 	WARN_ON(dev_priv->rps.pm_iir);
-	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
-	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
+	if (dev_priv->rps.use_RC0_residency_for_turbo) {
+		snb_enable_pm_irq(dev_priv, GEN6_PM_RP_UP_EI_EXPIRED);
+		I915_WRITE(GEN6_PMIIR, GEN6_PM_RP_UP_EI_EXPIRED);
+	} else {
+		snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
+		I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
+	}
 	spin_unlock_irq(&dev_priv->irq_lock);
 
 	/* only unmask PM interrupts we need. Mask all others. */
-	enabled_intrs = GEN6_PM_RPS_EVENTS;
+	if (dev_priv->rps.use_RC0_residency_for_turbo)
+		enabled_intrs = GEN6_PM_RP_UP_EI_EXPIRED;
+	else
+		enabled_intrs = GEN6_PM_RPS_EVENTS;
 
 	/* IVB and SNB hard hangs on looping batchbuffer
 	 * if GEN6_PM_UP_EI_EXPIRED is masked.
 	 */
-	if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev))
+	if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev) &&
+			!dev_priv->rps.use_RC0_residency_for_turbo)
 		enabled_intrs |= GEN6_PM_RP_UP_EI_EXPIRED;
 
 	I915_WRITE(GEN6_PMINTRMSK, ~enabled_intrs);
@@ -3608,6 +3631,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
 	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
 
 	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
 
 	I915_WRITE(GEN6_RP_CONTROL,
 		   GEN6_RP_MEDIA_TURBO |
@@ -3627,10 +3651,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
 	I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
 
 	/* allows RC6 residency counter to work */
-	I915_WRITE(VLV_COUNTER_CONTROL,
-		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
-				      VLV_MEDIA_RC6_COUNT_EN |
-				      VLV_RENDER_RC6_COUNT_EN));
+	I915_WRITE(VLV_COUNTER_CONTROL, VLV_RC_COUNTER_CONTROL);
 	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
 		rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
 
@@ -3673,6 +3694,9 @@ static void valleyview_enable_rps(struct drm_device *dev)
 	dev_priv->rps.rp_up_masked = false;
 	dev_priv->rps.rp_down_masked = false;
 
+	/* enable WA for RC6+turbo to work together */
+	dev_priv->rps.use_RC0_residency_for_turbo = true;
+
 	gen6_enable_rps_interrupts(dev);
 
 	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
-- 
1.8.5.2

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH v2] drm/i915/vlv: WA for Turbo and RC6 to work together.
  2014-03-03  6:05 [PATCH v2] drm/i915/vlv: WA for Turbo and RC6 to work together deepak.s
@ 2014-03-04 14:20 ` S, Deepak
  2014-03-05 12:11 ` Ville Syrjälä
  1 sibling, 0 replies; 30+ messages in thread
From: S, Deepak @ 2014-03-04 14:20 UTC (permalink / raw)
  To: intel-gfx, Ville Syrjälä

Hi Ville,

Please review the patch and share the comments

Thanks
Deepak

On 3/3/2014 11:35 AM, deepak.s@intel.com wrote:
> From: Deepak S <deepak.s@intel.com>
>
> With RC6 enabled, BYT has an HW issue in determining the right
> Gfx busyness.
> WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
> on increasing/decreasing the freq. This logic will monitor C0
> counters of render/media power-wells over EI period and takes
> necessary action based on these values
>
> v2: Refactor duplicate code. (ville)
>
> Signed-off-by: Deepak S <deepak.s@intel.com>
>
> ---
>   drivers/gpu/drm/i915/i915_drv.h |  19 ++++++
>   drivers/gpu/drm/i915/i915_irq.c | 146 ++++++++++++++++++++++++++++++++++++++--
>   drivers/gpu/drm/i915/i915_reg.h |  15 +++++
>   drivers/gpu/drm/i915/intel_pm.c |  50 ++++++++++----
>   4 files changed, 213 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 728b9c3..2baeeef 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -957,6 +957,12 @@ struct i915_suspend_saved_registers {
>   	u32 savePCH_PORT_HOTPLUG;
>   };
>
> +struct intel_rps_ei_calc {
> +	u32 cz_ts_ei;
> +	u32 render_ei_c0;
> +	u32 media_ei_c0;
> +};
> +
>   struct intel_gen6_power_mgmt {
>   	/* work and pm_iir are protected by dev_priv->irq_lock */
>   	struct work_struct work;
> @@ -969,10 +975,16 @@ struct intel_gen6_power_mgmt {
>   	u8 rp1_delay;
>   	u8 rp0_delay;
>   	u8 hw_max;
> +	u8 hw_min;
>
>   	bool rp_up_masked;
>   	bool rp_down_masked;
>
> +	u32 cz_freq;
> +	u32 ei_interrupt_count;
> +
> +	bool use_RC0_residency_for_turbo;
> +
>   	int last_adj;
>   	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
>
> @@ -1531,6 +1543,13 @@ typedef struct drm_i915_private {
>   	/* gen6+ rps state */
>   	struct intel_gen6_power_mgmt rps;
>
> +	/* rps wa up ei calculation */
> +	struct intel_rps_ei_calc rps_up_ei;
> +
> +	/* rps wa down ei calculation */
> +	struct intel_rps_ei_calc rps_down_ei;
> +
> +
>   	/* ilk-only ips/rps state. Everything in here is protected by the global
>   	 * mchdev_lock in intel_pm.c */
>   	struct intel_ilk_power_mgmt ips;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 56edff3..93b6ebf 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1023,6 +1023,120 @@ void gen6_set_pm_mask(struct drm_i915_private *dev_priv,
>   	}
>   }
>
> +static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
> +				struct  intel_rps_ei_calc *rps_ei)
> +{
> +	u32 cz_ts, cz_freq_khz;
> +	u32 render_count, media_count;
> +	u32 elapsed_render, elapsed_media, elapsed_time;
> +	u32 residency = 0;
> +
> +	cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
> +	cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
> +
> +	render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
> +	media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
> +
> +	if (rps_ei->cz_ts_ei == 0) {
> +		rps_ei->cz_ts_ei = cz_ts;
> +		rps_ei->render_ei_c0 = render_count;
> +		rps_ei->media_ei_c0 = media_count;
> +
> +		return dev_priv->rps.cur_delay;
> +	}
> +
> +	elapsed_time = cz_ts - rps_ei->cz_ts_ei;
> +	rps_ei->cz_ts_ei = cz_ts;
> +
> +	elapsed_render = render_count - rps_ei->render_ei_c0;
> +	rps_ei->render_ei_c0 = render_count;
> +
> +	elapsed_media = media_count - rps_ei->media_ei_c0;
> +	rps_ei->media_ei_c0 = media_count;
> +
> +	/* Convert all the counters into common unit of milli sec */
> +	elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
> +	elapsed_render /=  cz_freq_khz;
> +	elapsed_media /= cz_freq_khz;
> +
> +	/* Calculate overall C0 residency percentage only
> +	* if elapsed time is non zero
> +	*/
> +	if (elapsed_time) {
> +		residency =
> +			((max(elapsed_render, elapsed_media) * 100)
> +				/ elapsed_time);
> +	}
> +
> +	return residency;
> +}
> +
> +
> +/**
> + * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
> + * busy-ness calculated from C0 counters of render & media power wells
> + * @dev_priv: DRM device private
> + *
> + */
> +static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
> +{
> +	u32 residency_C0_up = 0, residency_C0_down = 0;
> +	u8 new_delay;
> +
> +	dev_priv->rps.ei_interrupt_count++;
> +
> +	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
> +
> +
> +	if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
> +		vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
> +		vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
> +		return dev_priv->rps.cur_delay;
> +	}
> +
> +
> +	/* To down throttle, C0 residency should be less than down threshold
> +	* for continous EI intervals. So calculate down EI counters
> +	* once in VLV_INT_COUNT_FOR_DOWN_EI
> +	*/
> +	if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
> +
> +		dev_priv->rps.ei_interrupt_count = 0;
> +
> +		residency_C0_down =  vlv_c0_residency(dev_priv,
> +						&dev_priv->rps_down_ei);
> +	} else {
> +		residency_C0_up =  vlv_c0_residency(dev_priv,
> +						&dev_priv->rps_up_ei);
> +	}
> +
> +	new_delay = dev_priv->rps.cur_delay;
> +
> +	/* C0 residency is greater than UP threshold. Increase Frequency */
> +	if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
> +
> +		if (dev_priv->rps.cur_delay < dev_priv->rps.max_delay)
> +			new_delay = dev_priv->rps.cur_delay + 1;
> +
> +		/*
> +		 * For better performance, jump directly
> +		 * to RPe if we're below it.
> +		 */
> +		if (new_delay < dev_priv->rps.rpe_delay)
> +			new_delay = dev_priv->rps.rpe_delay;
> +
> +	} else if (!dev_priv->rps.ei_interrupt_count &&
> +			(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
> +		/* This means, C0 residency is less than down threshold over
> +		* a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
> +		*/
> +		if (dev_priv->rps.cur_delay > dev_priv->rps.min_delay)
> +			new_delay = dev_priv->rps.cur_delay - 1;
> +	}
> +
> +	return new_delay;
> +}
> +
>   static void gen6_pm_rps_work(struct work_struct *work)
>   {
>   	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
> @@ -1034,13 +1148,16 @@ static void gen6_pm_rps_work(struct work_struct *work)
>   	pm_iir = dev_priv->rps.pm_iir;
>   	dev_priv->rps.pm_iir = 0;
>   	/* Make sure not to corrupt PMIMR state used by ringbuffer code */
> -	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
> +	if (dev_priv->rps.use_RC0_residency_for_turbo)
> +		snb_enable_pm_irq(dev_priv, GEN6_PM_RP_UP_EI_EXPIRED);
> +	else
> +		snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
>   	spin_unlock_irq(&dev_priv->irq_lock);
>
>   	/* Make sure we didn't queue anything we're not going to process. */
> -	WARN_ON(pm_iir & ~GEN6_PM_RPS_EVENTS);
> +	WARN_ON(pm_iir & ~(GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED));
>
> -	if ((pm_iir & GEN6_PM_RPS_EVENTS) == 0)
> +	if ((pm_iir & (GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
>   		return;
>
>   	mutex_lock(&dev_priv->rps.hw_lock);
> @@ -1065,6 +1182,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
>   		else
>   			new_delay = dev_priv->rps.min_delay;
>   		adj = 0;
> +	} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> +		new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
>   	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
>   		if (adj < 0)
>   			adj *= 2;
> @@ -1466,6 +1585,16 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
>   		queue_work(dev_priv->wq, &dev_priv->rps.work);
>   	}
>
> +	if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> +		spin_lock(&dev_priv->irq_lock);
> +		dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RP_UP_EI_EXPIRED;
> +		snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RP_UP_EI_EXPIRED);
> +		spin_unlock(&dev_priv->irq_lock);
> +		DRM_DEBUG_DRIVER("\nQueueing RPS Work - RC6 WA Turbo");
> +
> +		queue_work(dev_priv->wq, &dev_priv->rps.work);
> +	}
> +
>   	if (HAS_VEBOX(dev_priv->dev)) {
>   		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
>   			notify_ring(dev_priv->dev, &dev_priv->ring[VECS]);
> @@ -1546,7 +1675,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg)
>   		if (pipe_stats[0] & PIPE_GMBUS_INTERRUPT_STATUS)
>   			gmbus_irq_handler(dev);
>
> -		if (pm_iir)
> +		if (pm_iir & (GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED))
>   			gen6_rps_irq_handler(dev_priv, pm_iir);
>
>   		I915_WRITE(GTIIR, gt_iir);
> @@ -2861,6 +2990,15 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
>   			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
>
>   		dev_priv->pm_irq_mask = 0xffffffff;
> +
> +		if (dev_priv->rps.use_RC0_residency_for_turbo) {
> +			dev_priv->pm_irq_mask &= ~GEN6_PM_RP_UP_EI_EXPIRED;
> +			pm_irqs |= GEN6_PM_RP_UP_EI_EXPIRED;
> +		} else {
> +			dev_priv->pm_irq_mask &= ~GEN6_PM_RPS_EVENTS;
> +			pm_irqs |= GEN6_PM_RPS_EVENTS;
> +		}
> +
>   		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
>   		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
>   		I915_WRITE(GEN6_PMIER, pm_irqs);
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index f73a49d..e58b37e 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -391,6 +391,7 @@
>   #define PUNIT_REG_GPU_FREQ_STS			0xd8
>   #define   GENFREQSTATUS				(1<<0)
>   #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ		0xdc
> +#define PUNIT_REG_CZ_TIMESTAMP			0xce
>
>   #define PUNIT_FUSE_BUS2				0xf6 /* bits 47:40 */
>   #define PUNIT_FUSE_BUS1				0xf5 /* bits 55:48 */
> @@ -406,6 +407,11 @@
>   #define   FB_FMAX_VMIN_FREQ_LO_SHIFT		27
>   #define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000
>
> +#define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
> +#define VLV_RP_UP_EI_THRESHOLD			90
> +#define VLV_RP_DOWN_EI_THRESHOLD		70
> +#define VLV_INT_COUNT_FOR_DOWN_EI		5
> +
>   /* vlv2 north clock has */
>   #define CCK_FUSE_REG				0x8
>   #define  CCK_FUSE_HPLL_FREQ_MASK		0x3
> @@ -4857,6 +4863,7 @@
>   #define  VLV_GTLC_PW_STATUS			0x130094
>   #define VLV_GTLC_PW_RENDER_STATUS_MASK		0x80
>   #define VLV_GTLC_PW_MEDIA_STATUS_MASK		0x20
> +#define VLV_GTLC_SURVIVABILITY_REG              0x130098
>   #define  FORCEWAKE_MT				0xa188 /* multi-threaded */
>   #define   FORCEWAKE_KERNEL			0x1
>   #define   FORCEWAKE_USER			0x2
> @@ -4864,6 +4871,11 @@
>   #define  ECOBUS					0xa180
>   #define    FORCEWAKE_MT_ENABLE			(1<<5)
>
> +#define VLV_GFX_CLK_FORCE_ON_BIT                (1<<2)
> +#define VLV_GFX_CLK_STATUS_BIT                  (1<<3)
> +
> +#define VLV_RC_COUNTER_CONTROL                  0xFFFF00FF
> +
>   #define  GTFIFODBG				0x120000
>   #define    GT_FIFO_SBDROPERR			(1<<6)
>   #define    GT_FIFO_BLOBDROPERR			(1<<5)
> @@ -4979,6 +4991,9 @@
>   #define VLV_GFX_CLK_STATUS_BIT			(1<<3)
>   #define VLV_GFX_CLK_FORCE_ON_BIT		(1<<2)
>
> +#define VLV_RENDER_C0_COUNT_REG		0x138118
> +#define VLV_MEDIA_C0_COUNT_REG			0x13811C
> +
>   #define GEN6_GT_GFX_RC6_LOCKED			0x138104
>   #define VLV_COUNTER_CONTROL			0x138104
>   #define   VLV_COUNT_RANGE_HIGH			(1<<15)
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 9ab3883..8002ac7 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3084,10 +3084,14 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>   		I915_READ(VLV_GTLC_SURVIVABILITY_REG) &
>   				~VLV_GFX_CLK_FORCE_ON_BIT);
>
> -	/* Unmask Up interrupts */
> -	dev_priv->rps.rp_up_masked = true;
> -	gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
> +	/* Unmask Turbo interrupts */
> +	if (dev_priv->rps.use_RC0_residency_for_turbo)
> +		I915_WRITE(GEN6_PMINTRMSK, ~GEN6_PM_RP_UP_EI_EXPIRED);
> +	else {
> +		dev_priv->rps.rp_up_masked = true;
> +		gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
>   						dev_priv->rps.min_delay);
> +	}
>   }
>
>   void gen6_rps_idle(struct drm_i915_private *dev_priv)
> @@ -3148,7 +3152,13 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev)
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>
>   	I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
> -	I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & ~GEN6_PM_RPS_EVENTS);
> +	if (dev_priv->rps.use_RC0_residency_for_turbo) {
> +		I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) &
> +						~GEN6_PM_RP_UP_EI_EXPIRED);
> +	} else {
> +		I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) &
> +						~GEN6_PM_RPS_EVENTS);
> +	}
>   	/* Complete PM interrupt masking here doesn't race with the rps work
>   	 * item again unmasking PM interrupts because that is using a different
>   	 * register (PMIMR) to mask PM interrupts. The only risk is in leaving
> @@ -3158,7 +3168,10 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev)
>   	dev_priv->rps.pm_iir = 0;
>   	spin_unlock_irq(&dev_priv->irq_lock);
>
> -	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
> +	if (dev_priv->rps.use_RC0_residency_for_turbo)
> +		I915_WRITE(GEN6_PMIIR, GEN6_PM_RP_UP_EI_EXPIRED);
> +	else
> +		I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
>   }
>
>   static void gen6_disable_rps(struct drm_device *dev)
> @@ -3228,19 +3241,29 @@ static void gen6_enable_rps_interrupts(struct drm_device *dev)
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	u32 enabled_intrs;
>
> +	/* Clear out any stale interrupts first */
>   	spin_lock_irq(&dev_priv->irq_lock);
>   	WARN_ON(dev_priv->rps.pm_iir);
> -	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
> -	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
> +	if (dev_priv->rps.use_RC0_residency_for_turbo) {
> +		snb_enable_pm_irq(dev_priv, GEN6_PM_RP_UP_EI_EXPIRED);
> +		I915_WRITE(GEN6_PMIIR, GEN6_PM_RP_UP_EI_EXPIRED);
> +	} else {
> +		snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
> +		I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
> +	}
>   	spin_unlock_irq(&dev_priv->irq_lock);
>
>   	/* only unmask PM interrupts we need. Mask all others. */
> -	enabled_intrs = GEN6_PM_RPS_EVENTS;
> +	if (dev_priv->rps.use_RC0_residency_for_turbo)
> +		enabled_intrs = GEN6_PM_RP_UP_EI_EXPIRED;
> +	else
> +		enabled_intrs = GEN6_PM_RPS_EVENTS;
>
>   	/* IVB and SNB hard hangs on looping batchbuffer
>   	 * if GEN6_PM_UP_EI_EXPIRED is masked.
>   	 */
> -	if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev))
> +	if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev) &&
> +			!dev_priv->rps.use_RC0_residency_for_turbo)
>   		enabled_intrs |= GEN6_PM_RP_UP_EI_EXPIRED;
>
>   	I915_WRITE(GEN6_PMINTRMSK, ~enabled_intrs);
> @@ -3608,6 +3631,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
>   	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
>
>   	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> +	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
>
>   	I915_WRITE(GEN6_RP_CONTROL,
>   		   GEN6_RP_MEDIA_TURBO |
> @@ -3627,10 +3651,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
>   	I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
>
>   	/* allows RC6 residency counter to work */
> -	I915_WRITE(VLV_COUNTER_CONTROL,
> -		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
> -				      VLV_MEDIA_RC6_COUNT_EN |
> -				      VLV_RENDER_RC6_COUNT_EN));
> +	I915_WRITE(VLV_COUNTER_CONTROL, VLV_RC_COUNTER_CONTROL);
>   	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
>   		rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
>
> @@ -3673,6 +3694,9 @@ static void valleyview_enable_rps(struct drm_device *dev)
>   	dev_priv->rps.rp_up_masked = false;
>   	dev_priv->rps.rp_down_masked = false;
>
> +	/* enable WA for RC6+turbo to work together */
> +	dev_priv->rps.use_RC0_residency_for_turbo = true;
> +
>   	gen6_enable_rps_interrupts(dev);
>
>   	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v2] drm/i915/vlv: WA for Turbo and RC6 to work together.
  2014-03-03  6:05 [PATCH v2] drm/i915/vlv: WA for Turbo and RC6 to work together deepak.s
  2014-03-04 14:20 ` S, Deepak
@ 2014-03-05 12:11 ` Ville Syrjälä
  2014-03-05 12:30   ` S, Deepak
  1 sibling, 1 reply; 30+ messages in thread
From: Ville Syrjälä @ 2014-03-05 12:11 UTC (permalink / raw)
  To: deepak.s; +Cc: intel-gfx

On Mon, Mar 03, 2014 at 11:35:50AM +0530, deepak.s@intel.com wrote:
> From: Deepak S <deepak.s@intel.com>
> 
> With RC6 enabled, BYT has an HW issue in determining the right
> Gfx busyness.
> WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
> on increasing/decreasing the freq. This logic will monitor C0
> counters of render/media power-wells over EI period and takes
> necessary action based on these values
> 
> v2: Refactor duplicate code. (ville)
> 
> Signed-off-by: Deepak S <deepak.s@intel.com>

Did we reach some conclusion about this approach? It seemed to save
power in some workloads at least, but there's still the question
whether it ramps up the frquency fast enoguh to provide a good user
experience. Maybe we should make it optional even on VLV?

> 
> ---
>  drivers/gpu/drm/i915/i915_drv.h |  19 ++++++
>  drivers/gpu/drm/i915/i915_irq.c | 146 ++++++++++++++++++++++++++++++++++++++--
>  drivers/gpu/drm/i915/i915_reg.h |  15 +++++
>  drivers/gpu/drm/i915/intel_pm.c |  50 ++++++++++----
>  4 files changed, 213 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 728b9c3..2baeeef 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -957,6 +957,12 @@ struct i915_suspend_saved_registers {
>  	u32 savePCH_PORT_HOTPLUG;
>  };
>  
> +struct intel_rps_ei_calc {
> +	u32 cz_ts_ei;
> +	u32 render_ei_c0;
> +	u32 media_ei_c0;
> +};
> +
>  struct intel_gen6_power_mgmt {
>  	/* work and pm_iir are protected by dev_priv->irq_lock */
>  	struct work_struct work;
> @@ -969,10 +975,16 @@ struct intel_gen6_power_mgmt {
>  	u8 rp1_delay;
>  	u8 rp0_delay;
>  	u8 hw_max;
> +	u8 hw_min;

Some leftover still?

>  
>  	bool rp_up_masked;
>  	bool rp_down_masked;
>  
> +	u32 cz_freq;

This too seems unused.

> +	u32 ei_interrupt_count;
> +
> +	bool use_RC0_residency_for_turbo;
> +
>  	int last_adj;
>  	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
>  
> @@ -1531,6 +1543,13 @@ typedef struct drm_i915_private {
>  	/* gen6+ rps state */
>  	struct intel_gen6_power_mgmt rps;
>  
> +	/* rps wa up ei calculation */
> +	struct intel_rps_ei_calc rps_up_ei;
> +
> +	/* rps wa down ei calculation */
> +	struct intel_rps_ei_calc rps_down_ei;
> +
> +
>  	/* ilk-only ips/rps state. Everything in here is protected by the global
>  	 * mchdev_lock in intel_pm.c */
>  	struct intel_ilk_power_mgmt ips;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 56edff3..93b6ebf 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1023,6 +1023,120 @@ void gen6_set_pm_mask(struct drm_i915_private *dev_priv,
>  	}
>  }
>  
> +static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
> +				struct  intel_rps_ei_calc *rps_ei)
> +{
> +	u32 cz_ts, cz_freq_khz;
> +	u32 render_count, media_count;
> +	u32 elapsed_render, elapsed_media, elapsed_time;
> +	u32 residency = 0;
> +
> +	cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
> +	cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
> +
> +	render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
> +	media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
> +
> +	if (rps_ei->cz_ts_ei == 0) {
> +		rps_ei->cz_ts_ei = cz_ts;
> +		rps_ei->render_ei_c0 = render_count;
> +		rps_ei->media_ei_c0 = media_count;
> +
> +		return dev_priv->rps.cur_delay;
> +	}
> +
> +	elapsed_time = cz_ts - rps_ei->cz_ts_ei;
> +	rps_ei->cz_ts_ei = cz_ts;
> +
> +	elapsed_render = render_count - rps_ei->render_ei_c0;
> +	rps_ei->render_ei_c0 = render_count;
> +
> +	elapsed_media = media_count - rps_ei->media_ei_c0;
> +	rps_ei->media_ei_c0 = media_count;
> +
> +	/* Convert all the counters into common unit of milli sec */
> +	elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
> +	elapsed_render /=  cz_freq_khz;
> +	elapsed_media /= cz_freq_khz;
> +
> +	/* Calculate overall C0 residency percentage only
> +	* if elapsed time is non zero
> +	*/

Badly formatted comment.

> +	if (elapsed_time) {
> +		residency =
> +			((max(elapsed_render, elapsed_media) * 100)
> +				/ elapsed_time);
> +	}
> +
> +	return residency;
> +}
> +
> +
> +/**
> + * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
> + * busy-ness calculated from C0 counters of render & media power wells
> + * @dev_priv: DRM device private
> + *
> + */
> +static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
> +{
> +	u32 residency_C0_up = 0, residency_C0_down = 0;
> +	u8 new_delay;
> +
> +	dev_priv->rps.ei_interrupt_count++;
> +
> +	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
> +
> +
> +	if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
> +		vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
> +		vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
> +		return dev_priv->rps.cur_delay;
> +	}
> +
> +
> +	/* To down throttle, C0 residency should be less than down threshold
> +	* for continous EI intervals. So calculate down EI counters
> +	* once in VLV_INT_COUNT_FOR_DOWN_EI
> +	*/

Badly formatted comment.

> +	if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
> +
> +		dev_priv->rps.ei_interrupt_count = 0;
> +
> +		residency_C0_down =  vlv_c0_residency(dev_priv,
                                   ^

Extra space we don't need

> +						&dev_priv->rps_down_ei);
> +	} else {
> +		residency_C0_up =  vlv_c0_residency(dev_priv,
                                 ^

Another

> +						&dev_priv->rps_up_ei);
> +	}
> +
> +	new_delay = dev_priv->rps.cur_delay;
> +
> +	/* C0 residency is greater than UP threshold. Increase Frequency */
> +	if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
> +
> +		if (dev_priv->rps.cur_delay < dev_priv->rps.max_delay)
> +			new_delay = dev_priv->rps.cur_delay + 1;
> +
> +		/*
> +		 * For better performance, jump directly
> +		 * to RPe if we're below it.
> +		 */
> +		if (new_delay < dev_priv->rps.rpe_delay)
> +			new_delay = dev_priv->rps.rpe_delay;
> +
> +	} else if (!dev_priv->rps.ei_interrupt_count &&
> +			(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
> +		/* This means, C0 residency is less than down threshold over
> +		* a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
> +		*/

Comment is badly formatted.

> +		if (dev_priv->rps.cur_delay > dev_priv->rps.min_delay)
> +			new_delay = dev_priv->rps.cur_delay - 1;
> +	}
> +
> +	return new_delay;
> +}
> +
>  static void gen6_pm_rps_work(struct work_struct *work)
>  {
>  	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
> @@ -1034,13 +1148,16 @@ static void gen6_pm_rps_work(struct work_struct *work)
>  	pm_iir = dev_priv->rps.pm_iir;
>  	dev_priv->rps.pm_iir = 0;
>  	/* Make sure not to corrupt PMIMR state used by ringbuffer code */
> -	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
> +	if (dev_priv->rps.use_RC0_residency_for_turbo)
> +		snb_enable_pm_irq(dev_priv, GEN6_PM_RP_UP_EI_EXPIRED);
> +	else
> +		snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);

This pattern keeps reeating many times in the patch. Maybe it would be
better to track the enabled PM interrupts in dev_priv somewhere, and
use that instead of GEN6_PM_RPS_EVENTS vs. GEN6_PM_RP_UP_EI_EXPIRED
everywhere. Maybe call it dev_priv->pm_rps_events to keep in line with
the GEN6_PM_RPS_EVENTS name. I'd make it a separate preparation patch.

>  	spin_unlock_irq(&dev_priv->irq_lock);
>  
>  	/* Make sure we didn't queue anything we're not going to process. */
> -	WARN_ON(pm_iir & ~GEN6_PM_RPS_EVENTS);
> +	WARN_ON(pm_iir & ~(GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED));
>  
> -	if ((pm_iir & GEN6_PM_RPS_EVENTS) == 0)
> +	if ((pm_iir & (GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
>  		return;
>  
>  	mutex_lock(&dev_priv->rps.hw_lock);
> @@ -1065,6 +1182,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
>  		else
>  			new_delay = dev_priv->rps.min_delay;
>  		adj = 0;
> +	} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> +		new_delay = vlv_calc_delay_from_C0_counters(dev_priv);


>  	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
>  		if (adj < 0)
>  			adj *= 2;
> @@ -1466,6 +1585,16 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
>  		queue_work(dev_priv->wq, &dev_priv->rps.work);
>  	}
>  
> +	if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> +		spin_lock(&dev_priv->irq_lock);
> +		dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RP_UP_EI_EXPIRED;
> +		snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RP_UP_EI_EXPIRED);
> +		spin_unlock(&dev_priv->irq_lock);
> +		DRM_DEBUG_DRIVER("\nQueueing RPS Work - RC6 WA Turbo");
> +
> +		queue_work(dev_priv->wq, &dev_priv->rps.work);
> +	}
> +
>  	if (HAS_VEBOX(dev_priv->dev)) {
>  		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
>  			notify_ring(dev_priv->dev, &dev_priv->ring[VECS]);
> @@ -1546,7 +1675,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg)
>  		if (pipe_stats[0] & PIPE_GMBUS_INTERRUPT_STATUS)
>  			gmbus_irq_handler(dev);
>  
> -		if (pm_iir)
> +		if (pm_iir & (GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED))
>  			gen6_rps_irq_handler(dev_priv, pm_iir);

>  
>  		I915_WRITE(GTIIR, gt_iir);
> @@ -2861,6 +2990,15 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
>  			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
>  
>  		dev_priv->pm_irq_mask = 0xffffffff;
> +
> +		if (dev_priv->rps.use_RC0_residency_for_turbo) {
> +			dev_priv->pm_irq_mask &= ~GEN6_PM_RP_UP_EI_EXPIRED;
> +			pm_irqs |= GEN6_PM_RP_UP_EI_EXPIRED;
> +		} else {
> +			dev_priv->pm_irq_mask &= ~GEN6_PM_RPS_EVENTS;
> +			pm_irqs |= GEN6_PM_RPS_EVENTS;
> +		}
> +
>  		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
>  		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
>  		I915_WRITE(GEN6_PMIER, pm_irqs);
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index f73a49d..e58b37e 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -391,6 +391,7 @@
>  #define PUNIT_REG_GPU_FREQ_STS			0xd8
>  #define   GENFREQSTATUS				(1<<0)
>  #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ		0xdc
> +#define PUNIT_REG_CZ_TIMESTAMP			0xce
>  
>  #define PUNIT_FUSE_BUS2				0xf6 /* bits 47:40 */
>  #define PUNIT_FUSE_BUS1				0xf5 /* bits 55:48 */
> @@ -406,6 +407,11 @@
>  #define   FB_FMAX_VMIN_FREQ_LO_SHIFT		27
>  #define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000
>  
> +#define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
> +#define VLV_RP_UP_EI_THRESHOLD			90
> +#define VLV_RP_DOWN_EI_THRESHOLD		70
> +#define VLV_INT_COUNT_FOR_DOWN_EI		5
> +
>  /* vlv2 north clock has */
>  #define CCK_FUSE_REG				0x8
>  #define  CCK_FUSE_HPLL_FREQ_MASK		0x3
> @@ -4857,6 +4863,7 @@
>  #define  VLV_GTLC_PW_STATUS			0x130094
>  #define VLV_GTLC_PW_RENDER_STATUS_MASK		0x80
>  #define VLV_GTLC_PW_MEDIA_STATUS_MASK		0x20
> +#define VLV_GTLC_SURVIVABILITY_REG              0x130098
>  #define  FORCEWAKE_MT				0xa188 /* multi-threaded */
>  #define   FORCEWAKE_KERNEL			0x1
>  #define   FORCEWAKE_USER			0x2
> @@ -4864,6 +4871,11 @@
>  #define  ECOBUS					0xa180
>  #define    FORCEWAKE_MT_ENABLE			(1<<5)
>  
> +#define VLV_GFX_CLK_FORCE_ON_BIT                (1<<2)
> +#define VLV_GFX_CLK_STATUS_BIT                  (1<<3)

Leftovers from somewhere.

> +
> +#define VLV_RC_COUNTER_CONTROL                  0xFFFF00FF

This should be below the register define, but it would be better to
use the names of the bits properly. Also do we really want to enable
all of the counters when you only use the rc0 counters?

I'm also wondering why we're currently enabling the rc6 counters.
I don't see that listed as a requirement for rc6 to work in any
document, and we don't seem to expose those counters through debugfs
either.

> +
>  #define  GTFIFODBG				0x120000
>  #define    GT_FIFO_SBDROPERR			(1<<6)
>  #define    GT_FIFO_BLOBDROPERR			(1<<5)
> @@ -4979,6 +4991,9 @@
>  #define VLV_GFX_CLK_STATUS_BIT			(1<<3)
>  #define VLV_GFX_CLK_FORCE_ON_BIT		(1<<2)
>  
> +#define VLV_RENDER_C0_COUNT_REG		0x138118
> +#define VLV_MEDIA_C0_COUNT_REG			0x13811C

Maybe put these in the correct numerical place between
GEN6_GT_GFX_RC6pp and GEN6_PCODE_MAILBOX.

> +
>  #define GEN6_GT_GFX_RC6_LOCKED			0x138104
>  #define VLV_COUNTER_CONTROL			0x138104
>  #define   VLV_COUNT_RANGE_HIGH			(1<<15)
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 9ab3883..8002ac7 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3084,10 +3084,14 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>  		I915_READ(VLV_GTLC_SURVIVABILITY_REG) &
>  				~VLV_GFX_CLK_FORCE_ON_BIT);
>  
> -	/* Unmask Up interrupts */
> -	dev_priv->rps.rp_up_masked = true;
> -	gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
> +	/* Unmask Turbo interrupts */
> +	if (dev_priv->rps.use_RC0_residency_for_turbo)
> +		I915_WRITE(GEN6_PMINTRMSK, ~GEN6_PM_RP_UP_EI_EXPIRED);
> +	else {
> +		dev_priv->rps.rp_up_masked = true;
> +		gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
>  						dev_priv->rps.min_delay);
> +	}
>  }
>  
>  void gen6_rps_idle(struct drm_i915_private *dev_priv)
> @@ -3148,7 +3152,13 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev)
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  
>  	I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
> -	I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & ~GEN6_PM_RPS_EVENTS);
> +	if (dev_priv->rps.use_RC0_residency_for_turbo) {
> +		I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) &
> +						~GEN6_PM_RP_UP_EI_EXPIRED);
> +	} else {
> +		I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) &
> +						~GEN6_PM_RPS_EVENTS);
> +	}
>  	/* Complete PM interrupt masking here doesn't race with the rps work
>  	 * item again unmasking PM interrupts because that is using a different
>  	 * register (PMIMR) to mask PM interrupts. The only risk is in leaving
> @@ -3158,7 +3168,10 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev)
>  	dev_priv->rps.pm_iir = 0;
>  	spin_unlock_irq(&dev_priv->irq_lock);
>  
> -	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
> +	if (dev_priv->rps.use_RC0_residency_for_turbo)
> +		I915_WRITE(GEN6_PMIIR, GEN6_PM_RP_UP_EI_EXPIRED);
> +	else
> +		I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
>  }
>  
>  static void gen6_disable_rps(struct drm_device *dev)
> @@ -3228,19 +3241,29 @@ static void gen6_enable_rps_interrupts(struct drm_device *dev)
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	u32 enabled_intrs;
>  
> +	/* Clear out any stale interrupts first */
>  	spin_lock_irq(&dev_priv->irq_lock);
>  	WARN_ON(dev_priv->rps.pm_iir);
> -	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
> -	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
> +	if (dev_priv->rps.use_RC0_residency_for_turbo) {
> +		snb_enable_pm_irq(dev_priv, GEN6_PM_RP_UP_EI_EXPIRED);
> +		I915_WRITE(GEN6_PMIIR, GEN6_PM_RP_UP_EI_EXPIRED);
> +	} else {
> +		snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
> +		I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
> +	}
>  	spin_unlock_irq(&dev_priv->irq_lock);
>  
>  	/* only unmask PM interrupts we need. Mask all others. */
> -	enabled_intrs = GEN6_PM_RPS_EVENTS;
> +	if (dev_priv->rps.use_RC0_residency_for_turbo)
> +		enabled_intrs = GEN6_PM_RP_UP_EI_EXPIRED;
> +	else
> +		enabled_intrs = GEN6_PM_RPS_EVENTS;
>  
>  	/* IVB and SNB hard hangs on looping batchbuffer
>  	 * if GEN6_PM_UP_EI_EXPIRED is masked.
>  	 */
> -	if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev))
> +	if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev) &&
> +			!dev_priv->rps.use_RC0_residency_for_turbo)
>  		enabled_intrs |= GEN6_PM_RP_UP_EI_EXPIRED;
>  
>  	I915_WRITE(GEN6_PMINTRMSK, ~enabled_intrs);
> @@ -3608,6 +3631,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
>  	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
>  
>  	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> +	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
>  
>  	I915_WRITE(GEN6_RP_CONTROL,
>  		   GEN6_RP_MEDIA_TURBO |
> @@ -3627,10 +3651,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
>  	I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
>  
>  	/* allows RC6 residency counter to work */
> -	I915_WRITE(VLV_COUNTER_CONTROL,
> -		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
> -				      VLV_MEDIA_RC6_COUNT_EN |
> -				      VLV_RENDER_RC6_COUNT_EN));
> +	I915_WRITE(VLV_COUNTER_CONTROL, VLV_RC_COUNTER_CONTROL);
>  	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
>  		rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
>  
> @@ -3673,6 +3694,9 @@ static void valleyview_enable_rps(struct drm_device *dev)
>  	dev_priv->rps.rp_up_masked = false;
>  	dev_priv->rps.rp_down_masked = false;
>  
> +	/* enable WA for RC6+turbo to work together */
> +	dev_priv->rps.use_RC0_residency_for_turbo = true;
> +
>  	gen6_enable_rps_interrupts(dev);
>  
>  	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
> -- 
> 1.8.5.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v2] drm/i915/vlv: WA for Turbo and RC6 to work together.
  2014-03-05 12:11 ` Ville Syrjälä
@ 2014-03-05 12:30   ` S, Deepak
  2014-03-13 16:00     ` [PATCH v3 0/3] " deepak.s
  0 siblings, 1 reply; 30+ messages in thread
From: S, Deepak @ 2014-03-05 12:30 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx



On 3/5/2014 5:41 PM, Ville Syrjälä wrote:
> On Mon, Mar 03, 2014 at 11:35:50AM +0530, deepak.s@intel.com wrote:
>> From: Deepak S <deepak.s@intel.com>
>>
>> With RC6 enabled, BYT has an HW issue in determining the right
>> Gfx busyness.
>> WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
>> on increasing/decreasing the freq. This logic will monitor C0
>> counters of render/media power-wells over EI period and takes
>> necessary action based on these values
>>
>> v2: Refactor duplicate code. (ville)
>>
>> Signed-off-by: Deepak S <deepak.s@intel.com>
>
> Did we reach some conclusion about this approach? It seemed to save
> power in some workloads at least, but there's still the question
> whether it ramps up the frquency fast enoguh to provide a good user
> experience. Maybe we should make it optional even on VLV?

I have made this as a optional for the VLV. The boost logic is enabled 
by default. If we need power savings then we can turn off the boost and 
regular turbo logic will be enabled.

I will working on other options that Dainel suggested of identifying the 
libva workload and controlling the boost at run time.

I will address the other comments and upload the patch for review.

>>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h |  19 ++++++
>>   drivers/gpu/drm/i915/i915_irq.c | 146 ++++++++++++++++++++++++++++++++++++++--
>>   drivers/gpu/drm/i915/i915_reg.h |  15 +++++
>>   drivers/gpu/drm/i915/intel_pm.c |  50 ++++++++++----
>>   4 files changed, 213 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 728b9c3..2baeeef 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -957,6 +957,12 @@ struct i915_suspend_saved_registers {
>>   	u32 savePCH_PORT_HOTPLUG;
>>   };
>>
>> +struct intel_rps_ei_calc {
>> +	u32 cz_ts_ei;
>> +	u32 render_ei_c0;
>> +	u32 media_ei_c0;
>> +};
>> +
>>   struct intel_gen6_power_mgmt {
>>   	/* work and pm_iir are protected by dev_priv->irq_lock */
>>   	struct work_struct work;
>> @@ -969,10 +975,16 @@ struct intel_gen6_power_mgmt {
>>   	u8 rp1_delay;
>>   	u8 rp0_delay;
>>   	u8 hw_max;
>> +	u8 hw_min;
>
> Some leftover still?
>
>>
>>   	bool rp_up_masked;
>>   	bool rp_down_masked;
>>
>> +	u32 cz_freq;
>
> This too seems unused.
>
>> +	u32 ei_interrupt_count;
>> +
>> +	bool use_RC0_residency_for_turbo;
>> +
>>   	int last_adj;
>>   	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
>>
>> @@ -1531,6 +1543,13 @@ typedef struct drm_i915_private {
>>   	/* gen6+ rps state */
>>   	struct intel_gen6_power_mgmt rps;
>>
>> +	/* rps wa up ei calculation */
>> +	struct intel_rps_ei_calc rps_up_ei;
>> +
>> +	/* rps wa down ei calculation */
>> +	struct intel_rps_ei_calc rps_down_ei;
>> +
>> +
>>   	/* ilk-only ips/rps state. Everything in here is protected by the global
>>   	 * mchdev_lock in intel_pm.c */
>>   	struct intel_ilk_power_mgmt ips;
>> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
>> index 56edff3..93b6ebf 100644
>> --- a/drivers/gpu/drm/i915/i915_irq.c
>> +++ b/drivers/gpu/drm/i915/i915_irq.c
>> @@ -1023,6 +1023,120 @@ void gen6_set_pm_mask(struct drm_i915_private *dev_priv,
>>   	}
>>   }
>>
>> +static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
>> +				struct  intel_rps_ei_calc *rps_ei)
>> +{
>> +	u32 cz_ts, cz_freq_khz;
>> +	u32 render_count, media_count;
>> +	u32 elapsed_render, elapsed_media, elapsed_time;
>> +	u32 residency = 0;
>> +
>> +	cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
>> +	cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
>> +
>> +	render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
>> +	media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
>> +
>> +	if (rps_ei->cz_ts_ei == 0) {
>> +		rps_ei->cz_ts_ei = cz_ts;
>> +		rps_ei->render_ei_c0 = render_count;
>> +		rps_ei->media_ei_c0 = media_count;
>> +
>> +		return dev_priv->rps.cur_delay;
>> +	}
>> +
>> +	elapsed_time = cz_ts - rps_ei->cz_ts_ei;
>> +	rps_ei->cz_ts_ei = cz_ts;
>> +
>> +	elapsed_render = render_count - rps_ei->render_ei_c0;
>> +	rps_ei->render_ei_c0 = render_count;
>> +
>> +	elapsed_media = media_count - rps_ei->media_ei_c0;
>> +	rps_ei->media_ei_c0 = media_count;
>> +
>> +	/* Convert all the counters into common unit of milli sec */
>> +	elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
>> +	elapsed_render /=  cz_freq_khz;
>> +	elapsed_media /= cz_freq_khz;
>> +
>> +	/* Calculate overall C0 residency percentage only
>> +	* if elapsed time is non zero
>> +	*/
>
> Badly formatted comment.
>
>> +	if (elapsed_time) {
>> +		residency =
>> +			((max(elapsed_render, elapsed_media) * 100)
>> +				/ elapsed_time);
>> +	}
>> +
>> +	return residency;
>> +}
>> +
>> +
>> +/**
>> + * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
>> + * busy-ness calculated from C0 counters of render & media power wells
>> + * @dev_priv: DRM device private
>> + *
>> + */
>> +static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
>> +{
>> +	u32 residency_C0_up = 0, residency_C0_down = 0;
>> +	u8 new_delay;
>> +
>> +	dev_priv->rps.ei_interrupt_count++;
>> +
>> +	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
>> +
>> +
>> +	if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
>> +		vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
>> +		vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
>> +		return dev_priv->rps.cur_delay;
>> +	}
>> +
>> +
>> +	/* To down throttle, C0 residency should be less than down threshold
>> +	* for continous EI intervals. So calculate down EI counters
>> +	* once in VLV_INT_COUNT_FOR_DOWN_EI
>> +	*/
>
> Badly formatted comment.
>
>> +	if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
>> +
>> +		dev_priv->rps.ei_interrupt_count = 0;
>> +
>> +		residency_C0_down =  vlv_c0_residency(dev_priv,
>                                     ^
>
> Extra space we don't need
>
>> +						&dev_priv->rps_down_ei);
>> +	} else {
>> +		residency_C0_up =  vlv_c0_residency(dev_priv,
>                                   ^
>
> Another
>
>> +						&dev_priv->rps_up_ei);
>> +	}
>> +
>> +	new_delay = dev_priv->rps.cur_delay;
>> +
>> +	/* C0 residency is greater than UP threshold. Increase Frequency */
>> +	if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
>> +
>> +		if (dev_priv->rps.cur_delay < dev_priv->rps.max_delay)
>> +			new_delay = dev_priv->rps.cur_delay + 1;
>> +
>> +		/*
>> +		 * For better performance, jump directly
>> +		 * to RPe if we're below it.
>> +		 */
>> +		if (new_delay < dev_priv->rps.rpe_delay)
>> +			new_delay = dev_priv->rps.rpe_delay;
>> +
>> +	} else if (!dev_priv->rps.ei_interrupt_count &&
>> +			(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
>> +		/* This means, C0 residency is less than down threshold over
>> +		* a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
>> +		*/
>
> Comment is badly formatted.
>
>> +		if (dev_priv->rps.cur_delay > dev_priv->rps.min_delay)
>> +			new_delay = dev_priv->rps.cur_delay - 1;
>> +	}
>> +
>> +	return new_delay;
>> +}
>> +
>>   static void gen6_pm_rps_work(struct work_struct *work)
>>   {
>>   	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
>> @@ -1034,13 +1148,16 @@ static void gen6_pm_rps_work(struct work_struct *work)
>>   	pm_iir = dev_priv->rps.pm_iir;
>>   	dev_priv->rps.pm_iir = 0;
>>   	/* Make sure not to corrupt PMIMR state used by ringbuffer code */
>> -	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
>> +	if (dev_priv->rps.use_RC0_residency_for_turbo)
>> +		snb_enable_pm_irq(dev_priv, GEN6_PM_RP_UP_EI_EXPIRED);
>> +	else
>> +		snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
>
> This pattern keeps reeating many times in the patch. Maybe it would be
> better to track the enabled PM interrupts in dev_priv somewhere, and
> use that instead of GEN6_PM_RPS_EVENTS vs. GEN6_PM_RP_UP_EI_EXPIRED
> everywhere. Maybe call it dev_priv->pm_rps_events to keep in line with
> the GEN6_PM_RPS_EVENTS name. I'd make it a separate preparation patch.
>
>>   	spin_unlock_irq(&dev_priv->irq_lock);
>>
>>   	/* Make sure we didn't queue anything we're not going to process. */
>> -	WARN_ON(pm_iir & ~GEN6_PM_RPS_EVENTS);
>> +	WARN_ON(pm_iir & ~(GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED));
>>
>> -	if ((pm_iir & GEN6_PM_RPS_EVENTS) == 0)
>> +	if ((pm_iir & (GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
>>   		return;
>>
>>   	mutex_lock(&dev_priv->rps.hw_lock);
>> @@ -1065,6 +1182,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
>>   		else
>>   			new_delay = dev_priv->rps.min_delay;
>>   		adj = 0;
>> +	} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
>> +		new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
>
>
>>   	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
>>   		if (adj < 0)
>>   			adj *= 2;
>> @@ -1466,6 +1585,16 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
>>   		queue_work(dev_priv->wq, &dev_priv->rps.work);
>>   	}
>>
>> +	if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
>> +		spin_lock(&dev_priv->irq_lock);
>> +		dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RP_UP_EI_EXPIRED;
>> +		snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RP_UP_EI_EXPIRED);
>> +		spin_unlock(&dev_priv->irq_lock);
>> +		DRM_DEBUG_DRIVER("\nQueueing RPS Work - RC6 WA Turbo");
>> +
>> +		queue_work(dev_priv->wq, &dev_priv->rps.work);
>> +	}
>> +
>>   	if (HAS_VEBOX(dev_priv->dev)) {
>>   		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
>>   			notify_ring(dev_priv->dev, &dev_priv->ring[VECS]);
>> @@ -1546,7 +1675,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg)
>>   		if (pipe_stats[0] & PIPE_GMBUS_INTERRUPT_STATUS)
>>   			gmbus_irq_handler(dev);
>>
>> -		if (pm_iir)
>> +		if (pm_iir & (GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED))
>>   			gen6_rps_irq_handler(dev_priv, pm_iir);
>
>>
>>   		I915_WRITE(GTIIR, gt_iir);
>> @@ -2861,6 +2990,15 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
>>   			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
>>
>>   		dev_priv->pm_irq_mask = 0xffffffff;
>> +
>> +		if (dev_priv->rps.use_RC0_residency_for_turbo) {
>> +			dev_priv->pm_irq_mask &= ~GEN6_PM_RP_UP_EI_EXPIRED;
>> +			pm_irqs |= GEN6_PM_RP_UP_EI_EXPIRED;
>> +		} else {
>> +			dev_priv->pm_irq_mask &= ~GEN6_PM_RPS_EVENTS;
>> +			pm_irqs |= GEN6_PM_RPS_EVENTS;
>> +		}
>> +
>>   		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
>>   		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
>>   		I915_WRITE(GEN6_PMIER, pm_irqs);
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index f73a49d..e58b37e 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -391,6 +391,7 @@
>>   #define PUNIT_REG_GPU_FREQ_STS			0xd8
>>   #define   GENFREQSTATUS				(1<<0)
>>   #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ		0xdc
>> +#define PUNIT_REG_CZ_TIMESTAMP			0xce
>>
>>   #define PUNIT_FUSE_BUS2				0xf6 /* bits 47:40 */
>>   #define PUNIT_FUSE_BUS1				0xf5 /* bits 55:48 */
>> @@ -406,6 +407,11 @@
>>   #define   FB_FMAX_VMIN_FREQ_LO_SHIFT		27
>>   #define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000
>>
>> +#define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
>> +#define VLV_RP_UP_EI_THRESHOLD			90
>> +#define VLV_RP_DOWN_EI_THRESHOLD		70
>> +#define VLV_INT_COUNT_FOR_DOWN_EI		5
>> +
>>   /* vlv2 north clock has */
>>   #define CCK_FUSE_REG				0x8
>>   #define  CCK_FUSE_HPLL_FREQ_MASK		0x3
>> @@ -4857,6 +4863,7 @@
>>   #define  VLV_GTLC_PW_STATUS			0x130094
>>   #define VLV_GTLC_PW_RENDER_STATUS_MASK		0x80
>>   #define VLV_GTLC_PW_MEDIA_STATUS_MASK		0x20
>> +#define VLV_GTLC_SURVIVABILITY_REG              0x130098
>>   #define  FORCEWAKE_MT				0xa188 /* multi-threaded */
>>   #define   FORCEWAKE_KERNEL			0x1
>>   #define   FORCEWAKE_USER			0x2
>> @@ -4864,6 +4871,11 @@
>>   #define  ECOBUS					0xa180
>>   #define    FORCEWAKE_MT_ENABLE			(1<<5)
>>
>> +#define VLV_GFX_CLK_FORCE_ON_BIT                (1<<2)
>> +#define VLV_GFX_CLK_STATUS_BIT                  (1<<3)
>
> Leftovers from somewhere.
>
>> +
>> +#define VLV_RC_COUNTER_CONTROL                  0xFFFF00FF
>
> This should be below the register define, but it would be better to
> use the names of the bits properly. Also do we really want to enable
> all of the counters when you only use the rc0 counters?
>
> I'm also wondering why we're currently enabling the rc6 counters.
> I don't see that listed as a requirement for rc6 to work in any
> document, and we don't seem to expose those counters through debugfs
> either.
>
>> +
>>   #define  GTFIFODBG				0x120000
>>   #define    GT_FIFO_SBDROPERR			(1<<6)
>>   #define    GT_FIFO_BLOBDROPERR			(1<<5)
>> @@ -4979,6 +4991,9 @@
>>   #define VLV_GFX_CLK_STATUS_BIT			(1<<3)
>>   #define VLV_GFX_CLK_FORCE_ON_BIT		(1<<2)
>>
>> +#define VLV_RENDER_C0_COUNT_REG		0x138118
>> +#define VLV_MEDIA_C0_COUNT_REG			0x13811C
>
> Maybe put these in the correct numerical place between
> GEN6_GT_GFX_RC6pp and GEN6_PCODE_MAILBOX.
>
>> +
>>   #define GEN6_GT_GFX_RC6_LOCKED			0x138104
>>   #define VLV_COUNTER_CONTROL			0x138104
>>   #define   VLV_COUNT_RANGE_HIGH			(1<<15)
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
>> index 9ab3883..8002ac7 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -3084,10 +3084,14 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>>   		I915_READ(VLV_GTLC_SURVIVABILITY_REG) &
>>   				~VLV_GFX_CLK_FORCE_ON_BIT);
>>
>> -	/* Unmask Up interrupts */
>> -	dev_priv->rps.rp_up_masked = true;
>> -	gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
>> +	/* Unmask Turbo interrupts */
>> +	if (dev_priv->rps.use_RC0_residency_for_turbo)
>> +		I915_WRITE(GEN6_PMINTRMSK, ~GEN6_PM_RP_UP_EI_EXPIRED);
>> +	else {
>> +		dev_priv->rps.rp_up_masked = true;
>> +		gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
>>   						dev_priv->rps.min_delay);
>> +	}
>>   }
>>
>>   void gen6_rps_idle(struct drm_i915_private *dev_priv)
>> @@ -3148,7 +3152,13 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev)
>>   	struct drm_i915_private *dev_priv = dev->dev_private;
>>
>>   	I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
>> -	I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & ~GEN6_PM_RPS_EVENTS);
>> +	if (dev_priv->rps.use_RC0_residency_for_turbo) {
>> +		I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) &
>> +						~GEN6_PM_RP_UP_EI_EXPIRED);
>> +	} else {
>> +		I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) &
>> +						~GEN6_PM_RPS_EVENTS);
>> +	}
>>   	/* Complete PM interrupt masking here doesn't race with the rps work
>>   	 * item again unmasking PM interrupts because that is using a different
>>   	 * register (PMIMR) to mask PM interrupts. The only risk is in leaving
>> @@ -3158,7 +3168,10 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev)
>>   	dev_priv->rps.pm_iir = 0;
>>   	spin_unlock_irq(&dev_priv->irq_lock);
>>
>> -	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
>> +	if (dev_priv->rps.use_RC0_residency_for_turbo)
>> +		I915_WRITE(GEN6_PMIIR, GEN6_PM_RP_UP_EI_EXPIRED);
>> +	else
>> +		I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
>>   }
>>
>>   static void gen6_disable_rps(struct drm_device *dev)
>> @@ -3228,19 +3241,29 @@ static void gen6_enable_rps_interrupts(struct drm_device *dev)
>>   	struct drm_i915_private *dev_priv = dev->dev_private;
>>   	u32 enabled_intrs;
>>
>> +	/* Clear out any stale interrupts first */
>>   	spin_lock_irq(&dev_priv->irq_lock);
>>   	WARN_ON(dev_priv->rps.pm_iir);
>> -	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
>> -	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
>> +	if (dev_priv->rps.use_RC0_residency_for_turbo) {
>> +		snb_enable_pm_irq(dev_priv, GEN6_PM_RP_UP_EI_EXPIRED);
>> +		I915_WRITE(GEN6_PMIIR, GEN6_PM_RP_UP_EI_EXPIRED);
>> +	} else {
>> +		snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
>> +		I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
>> +	}
>>   	spin_unlock_irq(&dev_priv->irq_lock);
>>
>>   	/* only unmask PM interrupts we need. Mask all others. */
>> -	enabled_intrs = GEN6_PM_RPS_EVENTS;
>> +	if (dev_priv->rps.use_RC0_residency_for_turbo)
>> +		enabled_intrs = GEN6_PM_RP_UP_EI_EXPIRED;
>> +	else
>> +		enabled_intrs = GEN6_PM_RPS_EVENTS;
>>
>>   	/* IVB and SNB hard hangs on looping batchbuffer
>>   	 * if GEN6_PM_UP_EI_EXPIRED is masked.
>>   	 */
>> -	if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev))
>> +	if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev) &&
>> +			!dev_priv->rps.use_RC0_residency_for_turbo)
>>   		enabled_intrs |= GEN6_PM_RP_UP_EI_EXPIRED;
>>
>>   	I915_WRITE(GEN6_PMINTRMSK, ~enabled_intrs);
>> @@ -3608,6 +3631,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
>>   	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
>>
>>   	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
>> +	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
>>
>>   	I915_WRITE(GEN6_RP_CONTROL,
>>   		   GEN6_RP_MEDIA_TURBO |
>> @@ -3627,10 +3651,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
>>   	I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
>>
>>   	/* allows RC6 residency counter to work */
>> -	I915_WRITE(VLV_COUNTER_CONTROL,
>> -		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
>> -				      VLV_MEDIA_RC6_COUNT_EN |
>> -				      VLV_RENDER_RC6_COUNT_EN));
>> +	I915_WRITE(VLV_COUNTER_CONTROL, VLV_RC_COUNTER_CONTROL);
>>   	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
>>   		rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
>>
>> @@ -3673,6 +3694,9 @@ static void valleyview_enable_rps(struct drm_device *dev)
>>   	dev_priv->rps.rp_up_masked = false;
>>   	dev_priv->rps.rp_down_masked = false;
>>
>> +	/* enable WA for RC6+turbo to work together */
>> +	dev_priv->rps.use_RC0_residency_for_turbo = true;
>> +
>>   	gen6_enable_rps_interrupts(dev);
>>
>>   	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
>> --
>> 1.8.5.2
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH v3 0/3] WA for Turbo and RC6 to work together.
  2014-03-05 12:30   ` S, Deepak
@ 2014-03-13 16:00     ` deepak.s
  2014-03-13 16:00       ` [PATCH 1/3] drm/i915: Track the enabled PM interrupts in dev_priv deepak.s
                         ` (2 more replies)
  0 siblings, 3 replies; 30+ messages in thread
From: deepak.s @ 2014-03-13 16:00 UTC (permalink / raw)
  To: intel-gfx

From: Deepak S <deepak.s@linux.intel.com>

This series adds WA patches to enable RC6 and Turbo to work together and also adds a patch to contol the rps boost at runtime

Deepak S (3):
  drm/i915: Track the enabled PM interrupts in dev_priv.
  drm/i915/vlv: WA for Turbo and RC6 to work together.
  drm/i915: Add boot paramter to control rps boost at boot time.

 drivers/gpu/drm/i915/i915_drv.h    |  19 +++++
 drivers/gpu/drm/i915/i915_gem.c    |  16 +++-
 drivers/gpu/drm/i915/i915_irq.c    | 147 +++++++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_params.c |   5 ++
 drivers/gpu/drm/i915/i915_reg.h    |  13 +++-
 drivers/gpu/drm/i915/intel_pm.c    |  31 +++++---
 6 files changed, 210 insertions(+), 21 deletions(-)

-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH 1/3] drm/i915: Track the enabled PM interrupts in dev_priv.
  2014-03-13 16:00     ` [PATCH v3 0/3] " deepak.s
@ 2014-03-13 16:00       ` deepak.s
  2014-03-13 18:16         ` Ville Syrjälä
  2014-03-13 16:00       ` [PATCH v3 2/3] " deepak.s
  2014-03-13 16:00       ` [PATCH v2 3/3] drm/i915: Add boot paramter to control rps boost at boot time deepak.s
  2 siblings, 1 reply; 30+ messages in thread
From: deepak.s @ 2014-03-13 16:00 UTC (permalink / raw)
  To: intel-gfx; +Cc: Deepak S

From: Deepak S <deepak.s@intel.com>

When we use different rps events for different platform or due to wa, we
mgiht end up doing (vs) everywahere. Insted of this, Let's use a variable
in dev_priv to track the enabled PM interrupts

Signed-off-by: Deepak S <deepak.s@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/i915_irq.c | 14 +++++++-------
 drivers/gpu/drm/i915/intel_pm.c | 14 +++++++++-----
 3 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 70fbe90..d522313 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1487,6 +1487,7 @@ typedef struct drm_i915_private {
 	};
 	u32 gt_irq_mask;
 	u32 pm_irq_mask;
+	u32 pm_rps_events;
 	u32 pipestat_irq_mask[I915_MAX_PIPES];
 
 	struct work_struct hotplug_work;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 37f852d..04f2742 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1132,13 +1132,13 @@ static void gen6_pm_rps_work(struct work_struct *work)
 	pm_iir = dev_priv->rps.pm_iir;
 	dev_priv->rps.pm_iir = 0;
 	/* Make sure not to corrupt PMIMR state used by ringbuffer code */
-	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
+	snb_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
 	spin_unlock_irq(&dev_priv->irq_lock);
 
 	/* Make sure we didn't queue anything we're not going to process. */
-	WARN_ON(pm_iir & ~GEN6_PM_RPS_EVENTS);
+	WARN_ON(pm_iir & ~dev_priv->pm_rps_events);
 
-	if ((pm_iir & GEN6_PM_RPS_EVENTS) == 0)
+	if ((pm_iir & dev_priv->pm_rps_events) == 0)
 		return;
 
 	mutex_lock(&dev_priv->rps.hw_lock);
@@ -1555,10 +1555,10 @@ static void i9xx_pipe_crc_irq_handler(struct drm_device *dev, enum pipe pipe)
  * the work queue. */
 static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
 {
-	if (pm_iir & GEN6_PM_RPS_EVENTS) {
+	if (pm_iir & dev_priv->pm_rps_events) {
 		spin_lock(&dev_priv->irq_lock);
-		dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RPS_EVENTS;
-		snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RPS_EVENTS);
+		dev_priv->rps.pm_iir |= pm_iir & dev_priv->pm_rps_events;
+		snb_disable_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events);
 		spin_unlock(&dev_priv->irq_lock);
 
 		queue_work(dev_priv->wq, &dev_priv->rps.work);
@@ -2983,7 +2983,7 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
 	POSTING_READ(GTIER);
 
 	if (INTEL_INFO(dev)->gen >= 6) {
-		pm_irqs |= GEN6_PM_RPS_EVENTS;
+		pm_irqs |= dev_priv->pm_rps_events;
 
 		if (HAS_VEBOX(dev))
 			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index ad58ce3..bf6baa6 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3160,7 +3160,7 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
-	I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & ~GEN6_PM_RPS_EVENTS);
+	I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & ~dev_priv->pm_rps_events);
 	/* Complete PM interrupt masking here doesn't race with the rps work
 	 * item again unmasking PM interrupts because that is using a different
 	 * register (PMIMR) to mask PM interrupts. The only risk is in leaving
@@ -3170,7 +3170,7 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev)
 	dev_priv->rps.pm_iir = 0;
 	spin_unlock_irq(&dev_priv->irq_lock);
 
-	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
+	I915_WRITE(GEN6_PMIIR, dev_priv->pm_rps_events);
 }
 
 static void gen6_disable_rps(struct drm_device *dev)
@@ -3232,12 +3232,12 @@ static void gen6_enable_rps_interrupts(struct drm_device *dev)
 
 	spin_lock_irq(&dev_priv->irq_lock);
 	WARN_ON(dev_priv->rps.pm_iir);
-	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
-	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
+	snb_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
+	I915_WRITE(GEN6_PMIIR, dev_priv->pm_rps_events);
 	spin_unlock_irq(&dev_priv->irq_lock);
 
 	/* only unmask PM interrupts we need. Mask all others. */
-	enabled_intrs = GEN6_PM_RPS_EVENTS;
+	enabled_intrs = dev_priv->pm_rps_events;
 
 	/* IVB and SNB hard hangs on looping batchbuffer
 	 * if GEN6_PM_UP_EI_EXPIRED is masked.
@@ -3311,6 +3311,8 @@ static void gen8_enable_rps(struct drm_device *dev)
 		   GEN6_RP_UP_BUSY_AVG |
 		   GEN6_RP_DOWN_IDLE_AVG);
 
+	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
+
 	/* 6: Ring frequency + overclocking (our driver does this later */
 
 	gen6_set_rps(dev, (I915_READ(GEN6_GT_PERF_STATUS) & 0xff00) >> 8);
@@ -3430,6 +3432,7 @@ static void gen6_enable_rps(struct drm_device *dev)
 	dev_priv->rps.power = HIGH_POWER; /* force a reset */
 	gen6_set_rps(dev_priv->dev, dev_priv->rps.min_delay);
 
+	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
 	gen6_enable_rps_interrupts(dev);
 
 	rc6vids = 0;
@@ -3688,6 +3691,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
 	dev_priv->rps.rp_up_masked = false;
 	dev_priv->rps.rp_down_masked = false;
 
+	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
 	gen6_enable_rps_interrupts(dev);
 
 	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
-- 
1.8.4.2

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v3 2/3] drm/i915/vlv: WA for Turbo and RC6 to work together.
  2014-03-13 16:00     ` [PATCH v3 0/3] " deepak.s
  2014-03-13 16:00       ` [PATCH 1/3] drm/i915: Track the enabled PM interrupts in dev_priv deepak.s
@ 2014-03-13 16:00       ` deepak.s
  2014-03-13 18:17         ` Ville Syrjälä
  2014-03-13 16:00       ` [PATCH v2 3/3] drm/i915: Add boot paramter to control rps boost at boot time deepak.s
  2 siblings, 1 reply; 30+ messages in thread
From: deepak.s @ 2014-03-13 16:00 UTC (permalink / raw)
  To: intel-gfx; +Cc: Deepak S

From: Deepak S <deepak.s@intel.com>

With RC6 enabled, BYT has an HW issue in determining the right
Gfx busyness.
WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
on increasing/decreasing the freq. This logic will monitor C0
counters of render/media power-wells over EI period and takes
necessary action based on these values

v2: Refactor duplicate code. (Ville)

v3: Reformat the comments. (Ville)

Signed-off-by: Deepak S <deepak.s@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |  17 +++++
 drivers/gpu/drm/i915/i915_irq.c | 133 ++++++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_reg.h |  13 +++-
 drivers/gpu/drm/i915/intel_pm.c |  19 +++---
 4 files changed, 173 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d522313..607042b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -974,6 +974,12 @@ struct i915_suspend_saved_registers {
 	u32 savePCH_PORT_HOTPLUG;
 };
 
+struct intel_rps_ei_calc {
+	u32 cz_ts_ei;
+	u32 render_ei_c0;
+	u32 media_ei_c0;
+};
+
 struct intel_gen6_power_mgmt {
 	/* work and pm_iir are protected by dev_priv->irq_lock */
 	struct work_struct work;
@@ -990,6 +996,10 @@ struct intel_gen6_power_mgmt {
 	bool rp_up_masked;
 	bool rp_down_masked;
 
+	u32 ei_interrupt_count;
+
+	bool use_RC0_residency_for_turbo;
+
 	int last_adj;
 	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
@@ -1583,6 +1593,13 @@ typedef struct drm_i915_private {
 	/* gen6+ rps state */
 	struct intel_gen6_power_mgmt rps;
 
+	/* rps wa up ei calculation */
+	struct intel_rps_ei_calc rps_up_ei;
+
+	/* rps wa down ei calculation */
+	struct intel_rps_ei_calc rps_down_ei;
+
+
 	/* ilk-only ips/rps state. Everything in here is protected by the global
 	 * mchdev_lock in intel_pm.c */
 	struct intel_ilk_power_mgmt ips;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 04f2742..618a442 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1121,6 +1121,123 @@ void gen6_set_pm_mask(struct drm_i915_private *dev_priv,
 	}
 }
 
+static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
+				struct  intel_rps_ei_calc *rps_ei)
+{
+	u32 cz_ts, cz_freq_khz;
+	u32 render_count, media_count;
+	u32 elapsed_render, elapsed_media, elapsed_time;
+	u32 residency = 0;
+
+	cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
+	cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
+
+	render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
+	media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
+
+	if (rps_ei->cz_ts_ei == 0) {
+		rps_ei->cz_ts_ei = cz_ts;
+		rps_ei->render_ei_c0 = render_count;
+		rps_ei->media_ei_c0 = media_count;
+
+		return dev_priv->rps.cur_delay;
+	}
+
+	elapsed_time = cz_ts - rps_ei->cz_ts_ei;
+	rps_ei->cz_ts_ei = cz_ts;
+
+	elapsed_render = render_count - rps_ei->render_ei_c0;
+	rps_ei->render_ei_c0 = render_count;
+
+	elapsed_media = media_count - rps_ei->media_ei_c0;
+	rps_ei->media_ei_c0 = media_count;
+
+	/* Convert all the counters into common unit of milli sec */
+	elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
+	elapsed_render /=  cz_freq_khz;
+	elapsed_media /= cz_freq_khz;
+
+	/**
+	 * Calculate overall C0 residency percentage
+	 * only if elapsed time is non zero
+	 */
+	if (elapsed_time) {
+		residency =
+			((max(elapsed_render, elapsed_media) * 100)
+				/ elapsed_time);
+	}
+
+	return residency;
+}
+
+
+/**
+ * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
+ * busy-ness calculated from C0 counters of render & media power wells
+ * @dev_priv: DRM device private
+ *
+ */
+static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
+{
+	u32 residency_C0_up = 0, residency_C0_down = 0;
+	u8 new_delay;
+
+	dev_priv->rps.ei_interrupt_count++;
+
+	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+
+
+	if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
+		vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
+		vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
+		return dev_priv->rps.cur_delay;
+	}
+
+
+	/**
+	 * To down throttle, C0 residency should be less than down threshold
+	 * for continous EI intervals. So calculate down EI counters
+	 * once in VLV_INT_COUNT_FOR_DOWN_EI
+	 */
+	if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
+
+		dev_priv->rps.ei_interrupt_count = 0;
+
+		residency_C0_down = vlv_c0_residency(dev_priv,
+						&dev_priv->rps_down_ei);
+	} else {
+		residency_C0_up = vlv_c0_residency(dev_priv,
+						&dev_priv->rps_up_ei);
+	}
+
+	new_delay = dev_priv->rps.cur_delay;
+
+	/* C0 residency is greater than UP threshold. Increase Frequency */
+	if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
+
+		if (dev_priv->rps.cur_delay < dev_priv->rps.max_delay)
+			new_delay = dev_priv->rps.cur_delay + 1;
+
+		/**
+		 * For better performance, jump directly
+		 * to RPe if we're below it.
+		 */
+		if (new_delay < dev_priv->rps.rpe_delay)
+			new_delay = dev_priv->rps.rpe_delay;
+
+	} else if (!dev_priv->rps.ei_interrupt_count &&
+			(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
+		/**
+		 * This means, C0 residency is less than down threshold over
+		 * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
+		 */
+		if (dev_priv->rps.cur_delay > dev_priv->rps.min_delay)
+			new_delay = dev_priv->rps.cur_delay - 1;
+	}
+
+	return new_delay;
+}
+
 static void gen6_pm_rps_work(struct work_struct *work)
 {
 	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
@@ -1163,6 +1280,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
 		else
 			new_delay = dev_priv->rps.min_delay;
 		adj = 0;
+	} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
+		new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
 	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
 		if (adj < 0)
 			adj *= 2;
@@ -1564,6 +1683,16 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
 		queue_work(dev_priv->wq, &dev_priv->rps.work);
 	}
 
+	if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
+		spin_lock(&dev_priv->irq_lock);
+		dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RP_UP_EI_EXPIRED;
+		snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RP_UP_EI_EXPIRED);
+		spin_unlock(&dev_priv->irq_lock);
+		DRM_DEBUG_DRIVER("\nQueueing RPS Work - RC6 WA Turbo");
+
+		queue_work(dev_priv->wq, &dev_priv->rps.work);
+	}
+
 	if (HAS_VEBOX(dev_priv->dev)) {
 		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
 			notify_ring(dev_priv->dev, &dev_priv->ring[VECS]);
@@ -2989,6 +3118,10 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
 			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
 
 		dev_priv->pm_irq_mask = 0xffffffff;
+
+		dev_priv->pm_irq_mask &= ~dev_priv->pm_rps_events;
+		pm_irqs |= dev_priv->pm_rps_events;
+
 		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
 		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
 		I915_WRITE(GEN6_PMIER, pm_irqs);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 6174fda..d978b46 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -419,6 +419,7 @@ enum punit_power_well {
 #define PUNIT_REG_GPU_FREQ_STS			0xd8
 #define   GENFREQSTATUS				(1<<0)
 #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ		0xdc
+#define PUNIT_REG_CZ_TIMESTAMP			0xce
 
 #define PUNIT_FUSE_BUS2				0xf6 /* bits 47:40 */
 #define PUNIT_FUSE_BUS1				0xf5 /* bits 55:48 */
@@ -434,6 +435,11 @@ enum punit_power_well {
 #define   FB_FMAX_VMIN_FREQ_LO_SHIFT		27
 #define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000
 
+#define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
+#define VLV_RP_UP_EI_THRESHOLD			90
+#define VLV_RP_DOWN_EI_THRESHOLD		70
+#define VLV_INT_COUNT_FOR_DOWN_EI		5
+
 /* vlv2 north clock has */
 #define CCK_FUSE_REG				0x8
 #define  CCK_FUSE_HPLL_FREQ_MASK		0x3
@@ -4892,6 +4898,7 @@ enum punit_power_well {
 #define  VLV_GTLC_PW_STATUS			0x130094
 #define VLV_GTLC_PW_RENDER_STATUS_MASK		0x80
 #define VLV_GTLC_PW_MEDIA_STATUS_MASK		0x20
+#define VLV_GTLC_SURVIVABILITY_REG              0x130098
 #define  FORCEWAKE_MT				0xa188 /* multi-threaded */
 #define   FORCEWAKE_KERNEL			0x1
 #define   FORCEWAKE_USER			0x2
@@ -5019,13 +5026,17 @@ enum punit_power_well {
 
 #define GEN6_GT_GFX_RC6_LOCKED			0x138104
 #define VLV_COUNTER_CONTROL			0x138104
+#define VLV_RC_COUNTER_CONTROL                  0xFFFF00FF
 #define   VLV_COUNT_RANGE_HIGH			(1<<15)
+#define   VLV_MEDIA_RC0_COUNT_EN		(1<<5)
+#define   VLV_RENDER_RC0_COUNT_EN		(1<<4)
 #define   VLV_MEDIA_RC6_COUNT_EN		(1<<1)
 #define   VLV_RENDER_RC6_COUNT_EN		(1<<0)
 #define GEN6_GT_GFX_RC6				0x138108
 #define GEN6_GT_GFX_RC6p			0x13810C
 #define GEN6_GT_GFX_RC6pp			0x138110
-
+#define VLV_RENDER_C0_COUNT_REG		0x138118
+#define VLV_MEDIA_C0_COUNT_REG			0x13811C
 #define GEN6_PCODE_MAILBOX			0x138124
 #define   GEN6_PCODE_READY			(1<<31)
 #define   GEN6_READ_OC_PARAMS			0xc
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index bf6baa6..8a791b7 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3096,10 +3096,14 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 		I915_READ(VLV_GTLC_SURVIVABILITY_REG) &
 				~VLV_GFX_CLK_FORCE_ON_BIT);
 
-	/* Unmask Up interrupts */
-	dev_priv->rps.rp_up_masked = true;
-	gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
+	/* Unmask Turbo interrupts */
+	if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
+		I915_WRITE(GEN6_PMINTRMSK, ~dev_priv->pm_rps_events);
+	else {
+		dev_priv->rps.rp_up_masked = true;
+		gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
 						dev_priv->rps.min_delay);
+	}
 }
 
 void gen6_rps_idle(struct drm_i915_private *dev_priv)
@@ -3620,6 +3624,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
 	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
 
 	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
 
 	I915_WRITE(GEN6_RP_CONTROL,
 		   GEN6_RP_MEDIA_TURBO |
@@ -3639,10 +3644,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
 	I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
 
 	/* allows RC6 residency counter to work */
-	I915_WRITE(VLV_COUNTER_CONTROL,
-		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
-				      VLV_MEDIA_RC6_COUNT_EN |
-				      VLV_RENDER_RC6_COUNT_EN));
+	I915_WRITE(VLV_COUNTER_CONTROL, VLV_RC_COUNTER_CONTROL);
 	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
 		rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
 
@@ -3691,7 +3693,8 @@ static void valleyview_enable_rps(struct drm_device *dev)
 	dev_priv->rps.rp_up_masked = false;
 	dev_priv->rps.rp_down_masked = false;
 
-	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
+	/* WAUseRC0ResidenncyTurbo:VLV */
+	dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
 	gen6_enable_rps_interrupts(dev);
 
 	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
-- 
1.8.4.2

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v2 3/3] drm/i915: Add boot paramter to control rps boost at boot time.
  2014-03-13 16:00     ` [PATCH v3 0/3] " deepak.s
  2014-03-13 16:00       ` [PATCH 1/3] drm/i915: Track the enabled PM interrupts in dev_priv deepak.s
  2014-03-13 16:00       ` [PATCH v3 2/3] " deepak.s
@ 2014-03-13 16:00       ` deepak.s
  2014-03-13 18:16         ` Ville Syrjälä
  2 siblings, 1 reply; 30+ messages in thread
From: deepak.s @ 2014-03-13 16:00 UTC (permalink / raw)
  To: intel-gfx; +Cc: Deepak S

From: Deepak S <deepak.s@intel.com>

We are adding a module paramter to control rps boost. By default, we
enable the boost for better performace. Based on the need (perf/power)
we can either enable/disable.

v2: Addressed rps default comment (Jani)

Signed-off-by: Deepak S <deepak.s@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h    |  1 +
 drivers/gpu/drm/i915/i915_gem.c    | 16 +++++++++++++++-
 drivers/gpu/drm/i915/i915_params.c |  5 +++++
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 607042b..7808319 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2106,6 +2106,7 @@ struct i915_params {
 	int panel_use_ssc;
 	int vbt_sdvo_panel_type;
 	int enable_rc6;
+	int enable_rps_boost;
 	int enable_fbc;
 	int enable_ppgtt;
 	int enable_psr;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 92b0b41..23a4700 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1002,6 +1002,17 @@ static bool can_wait_boost(struct drm_i915_file_private *file_priv)
 	return !atomic_xchg(&file_priv->rps_wait_boost, true);
 }
 
+static int  intel_enable_rps_boost(struct drm_device *dev)
+{
+	/* No RPS Boost before Ironlake */
+	if (INTEL_INFO(dev)->gen < 6)
+		return 0;
+
+	/* Respect the kernel parameter if it is set */
+	return i915.enable_rps_boost;
+
+}
+
 /**
  * __wait_seqno - wait until execution of seqno has finished
  * @ring: the ring expected to report seqno
@@ -1042,8 +1053,11 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
 
 	timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
 
-	if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv)) {
+	if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv) && 
+			intel_enable_rps_boost(ring->dev)) {
+		
 		gen6_rps_boost(dev_priv);
+
 		if (file_priv)
 			mod_delayed_work(dev_priv->wq,
 					 &file_priv->mm.idle_work,
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index a66ffb6..2d207e3 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -34,6 +34,7 @@ struct i915_params i915 __read_mostly = {
 	.panel_use_ssc = -1,
 	.vbt_sdvo_panel_type = -1,
 	.enable_rc6 = -1,
+	.enable_rps_boost = 1,
 	.enable_fbc = -1,
 	.enable_hangcheck = true,
 	.enable_ppgtt = -1,
@@ -78,6 +79,10 @@ MODULE_PARM_DESC(enable_rc6,
 	"For example, 3 would enable rc6 and deep rc6, and 7 would enable everything. "
 	"default: -1 (use per-chip default)");
 
+module_param_named(enable_rps_boost, i915.enable_rps_boost, int, 0600);
+MODULE_PARM_DESC(enable_rps_boost,
+		"Enable/Disable boost RPS frequency (default: enabled (1))");
+
 module_param_named(enable_fbc, i915.enable_fbc, int, 0600);
 MODULE_PARM_DESC(enable_fbc,
 	"Enable frame buffer compression for power savings "
-- 
1.8.4.2

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH v2 3/3] drm/i915: Add boot paramter to control rps boost at boot time.
  2014-03-13 16:00       ` [PATCH v2 3/3] drm/i915: Add boot paramter to control rps boost at boot time deepak.s
@ 2014-03-13 18:16         ` Ville Syrjälä
  2014-03-13 18:46           ` S, Deepak
  0 siblings, 1 reply; 30+ messages in thread
From: Ville Syrjälä @ 2014-03-13 18:16 UTC (permalink / raw)
  To: deepak.s; +Cc: Deepak S, intel-gfx

On Thu, Mar 13, 2014 at 09:30:18PM +0530, deepak.s@linux.intel.com wrote:
> From: Deepak S <deepak.s@intel.com>
> 
> We are adding a module paramter to control rps boost. By default, we
> enable the boost for better performace. Based on the need (perf/power)
> we can either enable/disable.
> 
> v2: Addressed rps default comment (Jani)
> 
> Signed-off-by: Deepak S <deepak.s@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h    |  1 +
>  drivers/gpu/drm/i915/i915_gem.c    | 16 +++++++++++++++-
>  drivers/gpu/drm/i915/i915_params.c |  5 +++++
>  3 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 607042b..7808319 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2106,6 +2106,7 @@ struct i915_params {
>  	int panel_use_ssc;
>  	int vbt_sdvo_panel_type;
>  	int enable_rc6;
> +	int enable_rps_boost;

Should be bool like Jani said. And then it should be thrown somewhere
somewhere at the end of the structure next to the other bools.

>  	int enable_fbc;
>  	int enable_ppgtt;
>  	int enable_psr;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 92b0b41..23a4700 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1002,6 +1002,17 @@ static bool can_wait_boost(struct drm_i915_file_private *file_priv)
>  	return !atomic_xchg(&file_priv->rps_wait_boost, true);
>  }
>  
> +static int  intel_enable_rps_boost(struct drm_device *dev)
> +{
> +	/* No RPS Boost before Ironlake */

This comment is still wrong. I'd just drop it, everyone should know what
the gen check below means.

> +	if (INTEL_INFO(dev)->gen < 6)
> +		return 0;
> +
> +	/* Respect the kernel parameter if it is set */

This comment too seems rather obvious. I'd drop it as well.

> +	return i915.enable_rps_boost;
> +
> +}

This function is still just a wrapper for i915.enable_rps_boost since
__wait_seqno() already does the gen check. You could just check
i915.enable_rps_boost directly in __wait_seqno(). The other option is
to just drop the gen check from __wait_seqno() and just let this
function take care of it. Hmm. Yeah that might be the nicest choice in
fact.

> +
>  /**
>   * __wait_seqno - wait until execution of seqno has finished
>   * @ring: the ring expected to report seqno
> @@ -1042,8 +1053,11 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
>  
>  	timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
>  
> -	if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv)) {
> +	if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv) && 
> +			intel_enable_rps_boost(ring->dev)) {

Indentation is quite wrong. There's also trailing whitespace around
these parts. Please run patches through checkpatch.pl before submitting.

> +		
>  		gen6_rps_boost(dev_priv);
> +
>  		if (file_priv)
>  			mod_delayed_work(dev_priv->wq,
>  					 &file_priv->mm.idle_work,
> diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
> index a66ffb6..2d207e3 100644
> --- a/drivers/gpu/drm/i915/i915_params.c
> +++ b/drivers/gpu/drm/i915/i915_params.c
> @@ -34,6 +34,7 @@ struct i915_params i915 __read_mostly = {
>  	.panel_use_ssc = -1,
>  	.vbt_sdvo_panel_type = -1,
>  	.enable_rc6 = -1,
> +	.enable_rps_boost = 1,

true

>  	.enable_fbc = -1,
>  	.enable_hangcheck = true,
>  	.enable_ppgtt = -1,
> @@ -78,6 +79,10 @@ MODULE_PARM_DESC(enable_rc6,
>  	"For example, 3 would enable rc6 and deep rc6, and 7 would enable everything. "
>  	"default: -1 (use per-chip default)");
>  
> +module_param_named(enable_rps_boost, i915.enable_rps_boost, int, 0600);

bool

> +MODULE_PARM_DESC(enable_rps_boost,
> +		"Enable/Disable boost RPS frequency (default: enabled (1))");

default: true

> +
>  module_param_named(enable_fbc, i915.enable_fbc, int, 0600);
>  MODULE_PARM_DESC(enable_fbc,
>  	"Enable frame buffer compression for power savings "
> -- 
> 1.8.4.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] drm/i915: Track the enabled PM interrupts in dev_priv.
  2014-03-13 16:00       ` [PATCH 1/3] drm/i915: Track the enabled PM interrupts in dev_priv deepak.s
@ 2014-03-13 18:16         ` Ville Syrjälä
  2014-03-13 18:43           ` S, Deepak
  0 siblings, 1 reply; 30+ messages in thread
From: Ville Syrjälä @ 2014-03-13 18:16 UTC (permalink / raw)
  To: deepak.s; +Cc: Deepak S, intel-gfx

On Thu, Mar 13, 2014 at 09:30:16PM +0530, deepak.s@linux.intel.com wrote:
> From: Deepak S <deepak.s@intel.com>
> 
> When we use different rps events for different platform or due to wa, we
> mgiht end up doing (vs) everywahere. Insted of this, Let's use a variable
> in dev_priv to track the enabled PM interrupts
> 
> Signed-off-by: Deepak S <deepak.s@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h |  1 +
>  drivers/gpu/drm/i915/i915_irq.c | 14 +++++++-------
>  drivers/gpu/drm/i915/intel_pm.c | 14 +++++++++-----
>  3 files changed, 17 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 70fbe90..d522313 100644
<snip>
> @@ -3311,6 +3311,8 @@ static void gen8_enable_rps(struct drm_device *dev)
>  		   GEN6_RP_UP_BUSY_AVG |
>  		   GEN6_RP_DOWN_IDLE_AVG);
>  
> +	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
> +
>  	/* 6: Ring frequency + overclocking (our driver does this later */
>  
>  	gen6_set_rps(dev, (I915_READ(GEN6_GT_PERF_STATUS) & 0xff00) >> 8);
> @@ -3430,6 +3432,7 @@ static void gen6_enable_rps(struct drm_device *dev)
>  	dev_priv->rps.power = HIGH_POWER; /* force a reset */
>  	gen6_set_rps(dev_priv->dev, dev_priv->rps.min_delay);
>  
> +	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
>  	gen6_enable_rps_interrupts(dev);
>  
>  	rc6vids = 0;
> @@ -3688,6 +3691,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
>  	dev_priv->rps.rp_up_masked = false;
>  	dev_priv->rps.rp_down_masked = false;
>  
> +	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
>  	gen6_enable_rps_interrupts(dev);
>  
>  	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);

I think we need to initialize pm_rps_events somewhere earlier since we
depend on it already in irq postinstall. Othwewise the patch looks
good.

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v3 2/3] drm/i915/vlv: WA for Turbo and RC6 to work together.
  2014-03-13 16:00       ` [PATCH v3 2/3] " deepak.s
@ 2014-03-13 18:17         ` Ville Syrjälä
  2014-03-13 18:40           ` S, Deepak
  0 siblings, 1 reply; 30+ messages in thread
From: Ville Syrjälä @ 2014-03-13 18:17 UTC (permalink / raw)
  To: deepak.s; +Cc: Deepak S, intel-gfx

On Thu, Mar 13, 2014 at 09:30:17PM +0530, deepak.s@linux.intel.com wrote:
> From: Deepak S <deepak.s@intel.com>
> 
> With RC6 enabled, BYT has an HW issue in determining the right
> Gfx busyness.
> WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
> on increasing/decreasing the freq. This logic will monitor C0
> counters of render/media power-wells over EI period and takes
> necessary action based on these values
> 
> v2: Refactor duplicate code. (Ville)
> 
> v3: Reformat the comments. (Ville)
> 
> Signed-off-by: Deepak S <deepak.s@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h |  17 +++++
>  drivers/gpu/drm/i915/i915_irq.c | 133 ++++++++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_reg.h |  13 +++-
>  drivers/gpu/drm/i915/intel_pm.c |  19 +++---
>  4 files changed, 173 insertions(+), 9 deletions(-)
> 
<snip>
> @@ -1564,6 +1683,16 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
>  		queue_work(dev_priv->wq, &dev_priv->rps.work);
>  	}
>  
> +	if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> +		spin_lock(&dev_priv->irq_lock);
> +		dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RP_UP_EI_EXPIRED;
> +		snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RP_UP_EI_EXPIRED);
> +		spin_unlock(&dev_priv->irq_lock);
> +		DRM_DEBUG_DRIVER("\nQueueing RPS Work - RC6 WA Turbo");

This debug message still seems rather pointless. Just drop it.

Oh actually isn't this entire block of code useless now that
pm_rps_events is used? The previous if block already checked
pm_rps_events which will include GEN6_PM_RP_UP_EI_EXPIRED on
VLV, so this code here will just repeat the work already done.

> +
> +		queue_work(dev_priv->wq, &dev_priv->rps.work);
> +	}
> +
>  	if (HAS_VEBOX(dev_priv->dev)) {
>  		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
>  			notify_ring(dev_priv->dev, &dev_priv->ring[VECS]);
> @@ -2989,6 +3118,10 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
>  			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
>  
>  		dev_priv->pm_irq_mask = 0xffffffff;
> +
> +		dev_priv->pm_irq_mask &= ~dev_priv->pm_rps_events;
> +		pm_irqs |= dev_priv->pm_rps_events;

What's this stuff doing here?

> +
>  		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
>  		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
>  		I915_WRITE(GEN6_PMIER, pm_irqs);
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 6174fda..d978b46 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -419,6 +419,7 @@ enum punit_power_well {
>  #define PUNIT_REG_GPU_FREQ_STS			0xd8
>  #define   GENFREQSTATUS				(1<<0)
>  #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ		0xdc
> +#define PUNIT_REG_CZ_TIMESTAMP			0xce
>  
>  #define PUNIT_FUSE_BUS2				0xf6 /* bits 47:40 */
>  #define PUNIT_FUSE_BUS1				0xf5 /* bits 55:48 */
> @@ -434,6 +435,11 @@ enum punit_power_well {
>  #define   FB_FMAX_VMIN_FREQ_LO_SHIFT		27
>  #define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000
>  
> +#define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
> +#define VLV_RP_UP_EI_THRESHOLD			90
> +#define VLV_RP_DOWN_EI_THRESHOLD		70
> +#define VLV_INT_COUNT_FOR_DOWN_EI		5
> +
>  /* vlv2 north clock has */
>  #define CCK_FUSE_REG				0x8
>  #define  CCK_FUSE_HPLL_FREQ_MASK		0x3
> @@ -4892,6 +4898,7 @@ enum punit_power_well {
>  #define  VLV_GTLC_PW_STATUS			0x130094
>  #define VLV_GTLC_PW_RENDER_STATUS_MASK		0x80
>  #define VLV_GTLC_PW_MEDIA_STATUS_MASK		0x20
> +#define VLV_GTLC_SURVIVABILITY_REG              0x130098
>  #define  FORCEWAKE_MT				0xa188 /* multi-threaded */
>  #define   FORCEWAKE_KERNEL			0x1
>  #define   FORCEWAKE_USER			0x2
> @@ -5019,13 +5026,17 @@ enum punit_power_well {
>  
>  #define GEN6_GT_GFX_RC6_LOCKED			0x138104
>  #define VLV_COUNTER_CONTROL			0x138104
> +#define VLV_RC_COUNTER_CONTROL                  0xFFFF00FF

I'd still like to see names for all the bits we frob, and I'd
still like to have some kind of an answer to the question whether
we really need to enable them all when the w/a is only interested
in the rc0 counters.

>  #define   VLV_COUNT_RANGE_HIGH			(1<<15)
> +#define   VLV_MEDIA_RC0_COUNT_EN		(1<<5)
> +#define   VLV_RENDER_RC0_COUNT_EN		(1<<4)
>  #define   VLV_MEDIA_RC6_COUNT_EN		(1<<1)
>  #define   VLV_RENDER_RC6_COUNT_EN		(1<<0)
>  #define GEN6_GT_GFX_RC6				0x138108
>  #define GEN6_GT_GFX_RC6p			0x13810C
>  #define GEN6_GT_GFX_RC6pp			0x138110
> -
> +#define VLV_RENDER_C0_COUNT_REG		0x138118
> +#define VLV_MEDIA_C0_COUNT_REG			0x13811C
>  #define GEN6_PCODE_MAILBOX			0x138124
>  #define   GEN6_PCODE_READY			(1<<31)
>  #define   GEN6_READ_OC_PARAMS			0xc
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index bf6baa6..8a791b7 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3096,10 +3096,14 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>  		I915_READ(VLV_GTLC_SURVIVABILITY_REG) &
>  				~VLV_GFX_CLK_FORCE_ON_BIT);
>  
> -	/* Unmask Up interrupts */
> -	dev_priv->rps.rp_up_masked = true;
> -	gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
> +	/* Unmask Turbo interrupts */
> +	if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
> +		I915_WRITE(GEN6_PMINTRMSK, ~dev_priv->pm_rps_events);
> +	else {
> +		dev_priv->rps.rp_up_masked = true;
> +		gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
>  						dev_priv->rps.min_delay);
> +	}
>  }
>  
>  void gen6_rps_idle(struct drm_i915_private *dev_priv)
> @@ -3620,6 +3624,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
>  	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
>  
>  	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> +	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
>  
>  	I915_WRITE(GEN6_RP_CONTROL,
>  		   GEN6_RP_MEDIA_TURBO |
> @@ -3639,10 +3644,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
>  	I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
>  
>  	/* allows RC6 residency counter to work */
> -	I915_WRITE(VLV_COUNTER_CONTROL,
> -		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
> -				      VLV_MEDIA_RC6_COUNT_EN |
> -				      VLV_RENDER_RC6_COUNT_EN));
> +	I915_WRITE(VLV_COUNTER_CONTROL, VLV_RC_COUNTER_CONTROL);
>  	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
>  		rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
>  
> @@ -3691,7 +3693,8 @@ static void valleyview_enable_rps(struct drm_device *dev)
>  	dev_priv->rps.rp_up_masked = false;
>  	dev_priv->rps.rp_down_masked = false;
>  
> -	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
> +	/* WAUseRC0ResidenncyTurbo:VLV */
> +	dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;

I'm still wondering if we should have the option of using the old
fashioned method...

>  	gen6_enable_rps_interrupts(dev);
>  
>  	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
> -- 
> 1.8.4.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v3 2/3] drm/i915/vlv: WA for Turbo and RC6 to work together.
  2014-03-13 18:17         ` Ville Syrjälä
@ 2014-03-13 18:40           ` S, Deepak
  2014-03-13 18:57             ` Ville Syrjälä
  0 siblings, 1 reply; 30+ messages in thread
From: S, Deepak @ 2014-03-13 18:40 UTC (permalink / raw)
  To: Ville Syrjälä, deepak.s; +Cc: intel-gfx



On 3/13/2014 11:47 PM, Ville Syrjälä wrote:
> On Thu, Mar 13, 2014 at 09:30:17PM +0530, deepak.s@linux.intel.com wrote:
>> From: Deepak S <deepak.s@intel.com>
>>
>> With RC6 enabled, BYT has an HW issue in determining the right
>> Gfx busyness.
>> WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
>> on increasing/decreasing the freq. This logic will monitor C0
>> counters of render/media power-wells over EI period and takes
>> necessary action based on these values
>>
>> v2: Refactor duplicate code. (Ville)
>>
>> v3: Reformat the comments. (Ville)
>>
>> Signed-off-by: Deepak S <deepak.s@linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h |  17 +++++
>>   drivers/gpu/drm/i915/i915_irq.c | 133 ++++++++++++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/i915_reg.h |  13 +++-
>>   drivers/gpu/drm/i915/intel_pm.c |  19 +++---
>>   4 files changed, 173 insertions(+), 9 deletions(-)
>>
> <snip>
>> @@ -1564,6 +1683,16 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
>>   		queue_work(dev_priv->wq, &dev_priv->rps.work);
>>   	}
>>
>> +	if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
>> +		spin_lock(&dev_priv->irq_lock);
>> +		dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RP_UP_EI_EXPIRED;
>> +		snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RP_UP_EI_EXPIRED);
>> +		spin_unlock(&dev_priv->irq_lock);
>> +		DRM_DEBUG_DRIVER("\nQueueing RPS Work - RC6 WA Turbo");
>
> This debug message still seems rather pointless. Just drop it.
>
> Oh actually isn't this entire block of code useless now that
> pm_rps_events is used? The previous if block already checked
> pm_rps_events which will include GEN6_PM_RP_UP_EI_EXPIRED on
> VLV, so this code here will just repeat the work already done.

hmmm. I think i missed this in internal review. with pm_rps_events i 
think this is redundant.

>> +
>> +		queue_work(dev_priv->wq, &dev_priv->rps.work);
>> +	}
>> +
>>   	if (HAS_VEBOX(dev_priv->dev)) {
>>   		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
>>   			notify_ring(dev_priv->dev, &dev_priv->ring[VECS]);
>> @@ -2989,6 +3118,10 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
>>   			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
>>
>>   		dev_priv->pm_irq_mask = 0xffffffff;
>> +
>> +		dev_priv->pm_irq_mask &= ~dev_priv->pm_rps_events;
>> +		pm_irqs |= dev_priv->pm_rps_events;
>
> What's this stuff doing here?
>> +
>>   		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
>>   		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
>>   		I915_WRITE(GEN6_PMIER, pm_irqs);
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index 6174fda..d978b46 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -419,6 +419,7 @@ enum punit_power_well {
>>   #define PUNIT_REG_GPU_FREQ_STS			0xd8
>>   #define   GENFREQSTATUS				(1<<0)
>>   #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ		0xdc
>> +#define PUNIT_REG_CZ_TIMESTAMP			0xce
>>
>>   #define PUNIT_FUSE_BUS2				0xf6 /* bits 47:40 */
>>   #define PUNIT_FUSE_BUS1				0xf5 /* bits 55:48 */
>> @@ -434,6 +435,11 @@ enum punit_power_well {
>>   #define   FB_FMAX_VMIN_FREQ_LO_SHIFT		27
>>   #define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000
>>
>> +#define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
>> +#define VLV_RP_UP_EI_THRESHOLD			90
>> +#define VLV_RP_DOWN_EI_THRESHOLD		70
>> +#define VLV_INT_COUNT_FOR_DOWN_EI		5
>> +
>>   /* vlv2 north clock has */
>>   #define CCK_FUSE_REG				0x8
>>   #define  CCK_FUSE_HPLL_FREQ_MASK		0x3
>> @@ -4892,6 +4898,7 @@ enum punit_power_well {
>>   #define  VLV_GTLC_PW_STATUS			0x130094
>>   #define VLV_GTLC_PW_RENDER_STATUS_MASK		0x80
>>   #define VLV_GTLC_PW_MEDIA_STATUS_MASK		0x20
>> +#define VLV_GTLC_SURVIVABILITY_REG              0x130098
>>   #define  FORCEWAKE_MT				0xa188 /* multi-threaded */
>>   #define   FORCEWAKE_KERNEL			0x1
>>   #define   FORCEWAKE_USER			0x2
>> @@ -5019,13 +5026,17 @@ enum punit_power_well {
>>
>>   #define GEN6_GT_GFX_RC6_LOCKED			0x138104
>>   #define VLV_COUNTER_CONTROL			0x138104
>> +#define VLV_RC_COUNTER_CONTROL                  0xFFFF00FF
>
> I'd still like to see names for all the bits we frob, and I'd
> still like to have some kind of an answer to the question whether
> we really need to enable them all when the w/a is only interested
> in the rc0 counters.

I did try with enabling only the rc0 counters, but the busyness 
calculation was not right. Let me do some more investigation and get 
back to you on this.

>>   #define   VLV_COUNT_RANGE_HIGH			(1<<15)
>> +#define   VLV_MEDIA_RC0_COUNT_EN		(1<<5)
>> +#define   VLV_RENDER_RC0_COUNT_EN		(1<<4)
>>   #define   VLV_MEDIA_RC6_COUNT_EN		(1<<1)
>>   #define   VLV_RENDER_RC6_COUNT_EN		(1<<0)
>>   #define GEN6_GT_GFX_RC6				0x138108
>>   #define GEN6_GT_GFX_RC6p			0x13810C
>>   #define GEN6_GT_GFX_RC6pp			0x138110
>> -
>> +#define VLV_RENDER_C0_COUNT_REG		0x138118
>> +#define VLV_MEDIA_C0_COUNT_REG			0x13811C
>>   #define GEN6_PCODE_MAILBOX			0x138124
>>   #define   GEN6_PCODE_READY			(1<<31)
>>   #define   GEN6_READ_OC_PARAMS			0xc
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
>> index bf6baa6..8a791b7 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -3096,10 +3096,14 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>>   		I915_READ(VLV_GTLC_SURVIVABILITY_REG) &
>>   				~VLV_GFX_CLK_FORCE_ON_BIT);
>>
>> -	/* Unmask Up interrupts */
>> -	dev_priv->rps.rp_up_masked = true;
>> -	gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
>> +	/* Unmask Turbo interrupts */
>> +	if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
>> +		I915_WRITE(GEN6_PMINTRMSK, ~dev_priv->pm_rps_events);
>> +	else {
>> +		dev_priv->rps.rp_up_masked = true;
>> +		gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
>>   						dev_priv->rps.min_delay);
>> +	}
>>   }
>>
>>   void gen6_rps_idle(struct drm_i915_private *dev_priv)
>> @@ -3620,6 +3624,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
>>   	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
>>
>>   	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
>> +	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
>>
>>   	I915_WRITE(GEN6_RP_CONTROL,
>>   		   GEN6_RP_MEDIA_TURBO |
>> @@ -3639,10 +3644,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
>>   	I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
>>
>>   	/* allows RC6 residency counter to work */
>> -	I915_WRITE(VLV_COUNTER_CONTROL,
>> -		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
>> -				      VLV_MEDIA_RC6_COUNT_EN |
>> -				      VLV_RENDER_RC6_COUNT_EN));
>> +	I915_WRITE(VLV_COUNTER_CONTROL, VLV_RC_COUNTER_CONTROL);
>>   	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
>>   		rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
>>
>> @@ -3691,7 +3693,8 @@ static void valleyview_enable_rps(struct drm_device *dev)
>>   	dev_priv->rps.rp_up_masked = false;
>>   	dev_priv->rps.rp_down_masked = false;
>>
>> -	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
>> +	/* WAUseRC0ResidenncyTurbo:VLV */
>> +	dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
>
> I'm still wondering if we should have the option of using the old
> fashioned method...

I think we can have a if turbo_wa, This will help us to switch by 
disabling a flag.

>>   	gen6_enable_rps_interrupts(dev);
>>
>>   	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
>> --
>> 1.8.4.2
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] drm/i915: Track the enabled PM interrupts in dev_priv.
  2014-03-13 18:16         ` Ville Syrjälä
@ 2014-03-13 18:43           ` S, Deepak
  2014-03-13 18:59             ` Ville Syrjälä
  0 siblings, 1 reply; 30+ messages in thread
From: S, Deepak @ 2014-03-13 18:43 UTC (permalink / raw)
  To: Ville Syrjälä, deepak.s; +Cc: intel-gfx



On 3/13/2014 11:46 PM, Ville Syrjälä wrote:
> On Thu, Mar 13, 2014 at 09:30:16PM +0530, deepak.s@linux.intel.com wrote:
>> From: Deepak S <deepak.s@intel.com>
>>
>> When we use different rps events for different platform or due to wa, we
>> mgiht end up doing (vs) everywahere. Insted of this, Let's use a variable
>> in dev_priv to track the enabled PM interrupts
>>
>> Signed-off-by: Deepak S <deepak.s@linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h |  1 +
>>   drivers/gpu/drm/i915/i915_irq.c | 14 +++++++-------
>>   drivers/gpu/drm/i915/intel_pm.c | 14 +++++++++-----
>>   3 files changed, 17 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 70fbe90..d522313 100644
> <snip>
>> @@ -3311,6 +3311,8 @@ static void gen8_enable_rps(struct drm_device *dev)
>>   		   GEN6_RP_UP_BUSY_AVG |
>>   		   GEN6_RP_DOWN_IDLE_AVG);
>>
>> +	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
>> +
>>   	/* 6: Ring frequency + overclocking (our driver does this later */
>>
>>   	gen6_set_rps(dev, (I915_READ(GEN6_GT_PERF_STATUS) & 0xff00) >> 8);
>> @@ -3430,6 +3432,7 @@ static void gen6_enable_rps(struct drm_device *dev)
>>   	dev_priv->rps.power = HIGH_POWER; /* force a reset */
>>   	gen6_set_rps(dev_priv->dev, dev_priv->rps.min_delay);
>>
>> +	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
>>   	gen6_enable_rps_interrupts(dev);
>>
>>   	rc6vids = 0;
>> @@ -3688,6 +3691,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
>>   	dev_priv->rps.rp_up_masked = false;
>>   	dev_priv->rps.rp_down_masked = false;
>>
>> +	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
>>   	gen6_enable_rps_interrupts(dev);
>>
>>   	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
>
> I think we need to initialize pm_rps_events somewhere earlier since we
> depend on it already in irq postinstall. Othwewise the patch looks
> good.
Adding it in functions "intel_uncore_early_sanitize" or "pm_init" as 
this gets executed before irq_install in driver_load?

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v2 3/3] drm/i915: Add boot paramter to control rps boost at boot time.
  2014-03-13 18:16         ` Ville Syrjälä
@ 2014-03-13 18:46           ` S, Deepak
  0 siblings, 0 replies; 30+ messages in thread
From: S, Deepak @ 2014-03-13 18:46 UTC (permalink / raw)
  To: Ville Syrjälä, deepak.s; +Cc: intel-gfx



On 3/13/2014 11:46 PM, Ville Syrjälä wrote:
> On Thu, Mar 13, 2014 at 09:30:18PM +0530, deepak.s@linux.intel.com wrote:
>> From: Deepak S <deepak.s@intel.com>
>>
>> We are adding a module paramter to control rps boost. By default, we
>> enable the boost for better performace. Based on the need (perf/power)
>> we can either enable/disable.
>>
>> v2: Addressed rps default comment (Jani)
>>
>> Signed-off-by: Deepak S <deepak.s@linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h    |  1 +
>>   drivers/gpu/drm/i915/i915_gem.c    | 16 +++++++++++++++-
>>   drivers/gpu/drm/i915/i915_params.c |  5 +++++
>>   3 files changed, 21 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 607042b..7808319 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2106,6 +2106,7 @@ struct i915_params {
>>   	int panel_use_ssc;
>>   	int vbt_sdvo_panel_type;
>>   	int enable_rc6;
>> +	int enable_rps_boost;
>
> Should be bool like Jani said. And then it should be thrown somewhere
> somewhere at the end of the structure next to the other bools.

I will address this.
>>   	int enable_fbc;
>>   	int enable_ppgtt;
>>   	int enable_psr;
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index 92b0b41..23a4700 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -1002,6 +1002,17 @@ static bool can_wait_boost(struct drm_i915_file_private *file_priv)
>>   	return !atomic_xchg(&file_priv->rps_wait_boost, true);
>>   }
>>
>> +static int  intel_enable_rps_boost(struct drm_device *dev)
>> +{
>> +	/* No RPS Boost before Ironlake */
>
> This comment is still wrong. I'd just drop it, everyone should know what
> the gen check below means.
Ok
>> +	if (INTEL_INFO(dev)->gen < 6)
>> +		return 0;
>> +
>> +	/* Respect the kernel parameter if it is set */
>
> This comment too seems rather obvious. I'd drop it as well.
Ok
>> +	return i915.enable_rps_boost;
>> +
>> +}
>
> This function is still just a wrapper for i915.enable_rps_boost since
> __wait_seqno() already does the gen check. You could just check
> i915.enable_rps_boost directly in __wait_seqno(). The other option is
> to just drop the gen check from __wait_seqno() and just let this
> function take care of it. Hmm. Yeah that might be the nicest choice in
> fact.
Agreed. Does not make sense to have multiple platform check's.
>> +
>>   /**
>>    * __wait_seqno - wait until execution of seqno has finished
>>    * @ring: the ring expected to report seqno
>> @@ -1042,8 +1053,11 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
>>
>>   	timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
>>
>> -	if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv)) {
>> +	if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv) &&
>> +			intel_enable_rps_boost(ring->dev)) {
>
> Indentation is quite wrong. There's also trailing whitespace around
> these parts. Please run patches through checkpatch.pl before submitting.
>
>> +		
>>   		gen6_rps_boost(dev_priv);
>> +
>>   		if (file_priv)
>>   			mod_delayed_work(dev_priv->wq,
>>   					 &file_priv->mm.idle_work,
>> diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
>> index a66ffb6..2d207e3 100644
>> --- a/drivers/gpu/drm/i915/i915_params.c
>> +++ b/drivers/gpu/drm/i915/i915_params.c
>> @@ -34,6 +34,7 @@ struct i915_params i915 __read_mostly = {
>>   	.panel_use_ssc = -1,
>>   	.vbt_sdvo_panel_type = -1,
>>   	.enable_rc6 = -1,
>> +	.enable_rps_boost = 1,
>
> true
>
>>   	.enable_fbc = -1,
>>   	.enable_hangcheck = true,
>>   	.enable_ppgtt = -1,
>> @@ -78,6 +79,10 @@ MODULE_PARM_DESC(enable_rc6,
>>   	"For example, 3 would enable rc6 and deep rc6, and 7 would enable everything. "
>>   	"default: -1 (use per-chip default)");
>>
>> +module_param_named(enable_rps_boost, i915.enable_rps_boost, int, 0600);
>
> bool
>
>> +MODULE_PARM_DESC(enable_rps_boost,
>> +		"Enable/Disable boost RPS frequency (default: enabled (1))");
>
> default: true
>
>> +
>>   module_param_named(enable_fbc, i915.enable_fbc, int, 0600);
>>   MODULE_PARM_DESC(enable_fbc,
>>   	"Enable frame buffer compression for power savings "
>> --
>> 1.8.4.2
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v3 2/3] drm/i915/vlv: WA for Turbo and RC6 to work together.
  2014-03-13 18:40           ` S, Deepak
@ 2014-03-13 18:57             ` Ville Syrjälä
  0 siblings, 0 replies; 30+ messages in thread
From: Ville Syrjälä @ 2014-03-13 18:57 UTC (permalink / raw)
  To: S, Deepak; +Cc: intel-gfx

On Fri, Mar 14, 2014 at 12:10:45AM +0530, S, Deepak wrote:
> 
> 
> On 3/13/2014 11:47 PM, Ville Syrjälä wrote:
> > On Thu, Mar 13, 2014 at 09:30:17PM +0530, deepak.s@linux.intel.com wrote:
<snip>
> >> @@ -5019,13 +5026,17 @@ enum punit_power_well {
> >>
> >>   #define GEN6_GT_GFX_RC6_LOCKED			0x138104
> >>   #define VLV_COUNTER_CONTROL			0x138104
> >> +#define VLV_RC_COUNTER_CONTROL                  0xFFFF00FF
> >
> > I'd still like to see names for all the bits we frob, and I'd
> > still like to have some kind of an answer to the question whether
> > we really need to enable them all when the w/a is only interested
> > in the rc0 counters.
> 
> I did try with enabling only the rc0 counters, but the busyness 
> calculation was not right. Let me do some more investigation and get 
> back to you on this.

Well, if you tried it and it didn't work right, then I'm already fairly
satisfied with that. It just needs a comment to make it clear why we
enable them all. Of course if you can dig out more details, that's
always a bonus.

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] drm/i915: Track the enabled PM interrupts in dev_priv.
  2014-03-13 18:43           ` S, Deepak
@ 2014-03-13 18:59             ` Ville Syrjälä
  2014-03-15 14:53               ` [PATCH v4 0/3] WA for Turbo and RC6 to work together deepak.s
  0 siblings, 1 reply; 30+ messages in thread
From: Ville Syrjälä @ 2014-03-13 18:59 UTC (permalink / raw)
  To: S, Deepak; +Cc: intel-gfx

On Fri, Mar 14, 2014 at 12:13:30AM +0530, S, Deepak wrote:
> 
> 
> On 3/13/2014 11:46 PM, Ville Syrjälä wrote:
> > On Thu, Mar 13, 2014 at 09:30:16PM +0530, deepak.s@linux.intel.com wrote:
> >> From: Deepak S <deepak.s@intel.com>
> >>
> >> When we use different rps events for different platform or due to wa, we
> >> mgiht end up doing (vs) everywahere. Insted of this, Let's use a variable
> >> in dev_priv to track the enabled PM interrupts
> >>
> >> Signed-off-by: Deepak S <deepak.s@linux.intel.com>
> >> ---
> >>   drivers/gpu/drm/i915/i915_drv.h |  1 +
> >>   drivers/gpu/drm/i915/i915_irq.c | 14 +++++++-------
> >>   drivers/gpu/drm/i915/intel_pm.c | 14 +++++++++-----
> >>   3 files changed, 17 insertions(+), 12 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >> index 70fbe90..d522313 100644
> > <snip>
> >> @@ -3311,6 +3311,8 @@ static void gen8_enable_rps(struct drm_device *dev)
> >>   		   GEN6_RP_UP_BUSY_AVG |
> >>   		   GEN6_RP_DOWN_IDLE_AVG);
> >>
> >> +	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
> >> +
> >>   	/* 6: Ring frequency + overclocking (our driver does this later */
> >>
> >>   	gen6_set_rps(dev, (I915_READ(GEN6_GT_PERF_STATUS) & 0xff00) >> 8);
> >> @@ -3430,6 +3432,7 @@ static void gen6_enable_rps(struct drm_device *dev)
> >>   	dev_priv->rps.power = HIGH_POWER; /* force a reset */
> >>   	gen6_set_rps(dev_priv->dev, dev_priv->rps.min_delay);
> >>
> >> +	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
> >>   	gen6_enable_rps_interrupts(dev);
> >>
> >>   	rc6vids = 0;
> >> @@ -3688,6 +3691,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
> >>   	dev_priv->rps.rp_up_masked = false;
> >>   	dev_priv->rps.rp_down_masked = false;
> >>
> >> +	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
> >>   	gen6_enable_rps_interrupts(dev);
> >>
> >>   	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
> >
> > I think we need to initialize pm_rps_events somewhere earlier since we
> > depend on it already in irq postinstall. Othwewise the patch looks
> > good.
> Adding it in functions "intel_uncore_early_sanitize" or "pm_init" as 
> this gets executed before irq_install in driver_load?

intel_irq_init() might be a good choice since that's where we also
initialize the rps.work, and then it's clear it gets executed before any
other irq setup code.

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH v4 0/3] WA for Turbo and RC6 to work together.
  2014-03-13 18:59             ` Ville Syrjälä
@ 2014-03-15 14:53               ` deepak.s
  2014-03-15 14:53                 ` [PATCH v2 1/3] drm/i915: Track the enabled PM interrupts in dev_priv deepak.s
                                   ` (3 more replies)
  0 siblings, 4 replies; 30+ messages in thread
From: deepak.s @ 2014-03-15 14:53 UTC (permalink / raw)
  To: intel-gfx

From: Deepak S <deepak.s@linux.intel.com>

This series adds WA patches to enable RC6 and Turbo to work together and also adds a patch to contol the rps boost at runtime

Deepak S (3):
  drm/i915: Track the enabled PM interrupts in dev_priv.
  drm/i915/vlv: WA for Turbo and RC6 to work together.
  drm/i915: Add boot paramter to control rps boost at boot time.

 drivers/gpu/drm/i915/i915_drv.h    |  17 +++++
 drivers/gpu/drm/i915/i915_gem.c    |   2 +-
 drivers/gpu/drm/i915/i915_irq.c    | 141 +++++++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_params.c |   5 ++
 drivers/gpu/drm/i915/i915_reg.h    |  13 +++-
 drivers/gpu/drm/i915/intel_pm.c    |  26 ++++---
 6 files changed, 186 insertions(+), 18 deletions(-)

-- 
1.8.4.2

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH v2 1/3] drm/i915: Track the enabled PM interrupts in dev_priv.
  2014-03-15 14:53               ` [PATCH v4 0/3] WA for Turbo and RC6 to work together deepak.s
@ 2014-03-15 14:53                 ` deepak.s
  2014-03-24 19:26                   ` Ville Syrjälä
  2014-03-15 14:53                 ` [PATCH v4 2/3] drm/i915/vlv: WA for Turbo and RC6 to work together deepak.s
                                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 30+ messages in thread
From: deepak.s @ 2014-03-15 14:53 UTC (permalink / raw)
  To: intel-gfx

From: Deepak S <deepak.s@linux.intel.com>

When we use different rps events for different platform or due to wa, we
mgiht end up doing (vs) everywahere. Insted of this, Let's use a variable
in dev_priv to track the enabled PM interrupts

v2: Initialize pm_rps_events in intel_irq_init() (Ville).

Signed-off-by: Deepak S <deepak.s@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/i915_irq.c | 17 ++++++++++-------
 drivers/gpu/drm/i915/intel_pm.c | 11 ++++++-----
 3 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 70fbe90..d522313 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1487,6 +1487,7 @@ typedef struct drm_i915_private {
 	};
 	u32 gt_irq_mask;
 	u32 pm_irq_mask;
+	u32 pm_rps_events;
 	u32 pipestat_irq_mask[I915_MAX_PIPES];
 
 	struct work_struct hotplug_work;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 37f852d..00b3bfc 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1132,13 +1132,13 @@ static void gen6_pm_rps_work(struct work_struct *work)
 	pm_iir = dev_priv->rps.pm_iir;
 	dev_priv->rps.pm_iir = 0;
 	/* Make sure not to corrupt PMIMR state used by ringbuffer code */
-	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
+	snb_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
 	spin_unlock_irq(&dev_priv->irq_lock);
 
 	/* Make sure we didn't queue anything we're not going to process. */
-	WARN_ON(pm_iir & ~GEN6_PM_RPS_EVENTS);
+	WARN_ON(pm_iir & ~dev_priv->pm_rps_events);
 
-	if ((pm_iir & GEN6_PM_RPS_EVENTS) == 0)
+	if ((pm_iir & dev_priv->pm_rps_events) == 0)
 		return;
 
 	mutex_lock(&dev_priv->rps.hw_lock);
@@ -1555,10 +1555,10 @@ static void i9xx_pipe_crc_irq_handler(struct drm_device *dev, enum pipe pipe)
  * the work queue. */
 static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
 {
-	if (pm_iir & GEN6_PM_RPS_EVENTS) {
+	if (pm_iir & dev_priv->pm_rps_events) {
 		spin_lock(&dev_priv->irq_lock);
-		dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RPS_EVENTS;
-		snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RPS_EVENTS);
+		dev_priv->rps.pm_iir |= pm_iir & dev_priv->pm_rps_events;
+		snb_disable_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events);
 		spin_unlock(&dev_priv->irq_lock);
 
 		queue_work(dev_priv->wq, &dev_priv->rps.work);
@@ -2983,7 +2983,7 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
 	POSTING_READ(GTIER);
 
 	if (INTEL_INFO(dev)->gen >= 6) {
-		pm_irqs |= GEN6_PM_RPS_EVENTS;
+		pm_irqs |= dev_priv->pm_rps_events;
 
 		if (HAS_VEBOX(dev))
 			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
@@ -4030,6 +4030,9 @@ void intel_irq_init(struct drm_device *dev)
 	INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work);
 	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
 
+	/* Let's track the enabled rps events */
+	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
+
 	setup_timer(&dev_priv->gpu_error.hangcheck_timer,
 		    i915_hangcheck_elapsed,
 		    (unsigned long) dev);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index ad58ce3..95b133a 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3160,7 +3160,8 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
-	I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & ~GEN6_PM_RPS_EVENTS);
+	I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) &
+				~dev_priv->pm_rps_events);
 	/* Complete PM interrupt masking here doesn't race with the rps work
 	 * item again unmasking PM interrupts because that is using a different
 	 * register (PMIMR) to mask PM interrupts. The only risk is in leaving
@@ -3170,7 +3171,7 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev)
 	dev_priv->rps.pm_iir = 0;
 	spin_unlock_irq(&dev_priv->irq_lock);
 
-	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
+	I915_WRITE(GEN6_PMIIR, dev_priv->pm_rps_events);
 }
 
 static void gen6_disable_rps(struct drm_device *dev)
@@ -3232,12 +3233,12 @@ static void gen6_enable_rps_interrupts(struct drm_device *dev)
 
 	spin_lock_irq(&dev_priv->irq_lock);
 	WARN_ON(dev_priv->rps.pm_iir);
-	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
-	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
+	snb_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
+	I915_WRITE(GEN6_PMIIR, dev_priv->pm_rps_events);
 	spin_unlock_irq(&dev_priv->irq_lock);
 
 	/* only unmask PM interrupts we need. Mask all others. */
-	enabled_intrs = GEN6_PM_RPS_EVENTS;
+	enabled_intrs = dev_priv->pm_rps_events;
 
 	/* IVB and SNB hard hangs on looping batchbuffer
 	 * if GEN6_PM_UP_EI_EXPIRED is masked.
-- 
1.8.4.2

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v4 2/3] drm/i915/vlv: WA for Turbo and RC6 to work together.
  2014-03-15 14:53               ` [PATCH v4 0/3] WA for Turbo and RC6 to work together deepak.s
  2014-03-15 14:53                 ` [PATCH v2 1/3] drm/i915: Track the enabled PM interrupts in dev_priv deepak.s
@ 2014-03-15 14:53                 ` deepak.s
  2014-03-24 19:26                   ` Ville Syrjälä
  2014-03-15 14:53                 ` [PATCH v3 3/3] drm/i915: Add boot paramter to control rps boost at boot time deepak.s
  2014-03-27  6:35                 ` [PATCH v5] drm/i915/vlv: WA for Turbo and RC6 to work together deepak.s
  3 siblings, 1 reply; 30+ messages in thread
From: deepak.s @ 2014-03-15 14:53 UTC (permalink / raw)
  To: intel-gfx

From: Deepak S <deepak.s@linux.intel.com>

With RC6 enabled, BYT has an HW issue in determining the right
Gfx busyness.
WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
on increasing/decreasing the freq. This logic will monitor C0
counters of render/media power-wells over EI period and takes
necessary action based on these values

v2: Refactor duplicate code. (Ville)

v3: Reformat the comments. (Ville)

v4: Enable required counters and remove unwanted code (Ville)

Signed-off-by: Deepak S <deepak.s@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |  15 +++++
 drivers/gpu/drm/i915/i915_irq.c | 126 +++++++++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_reg.h |  13 ++++-
 drivers/gpu/drm/i915/intel_pm.c |  15 +++--
 4 files changed, 163 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d522313..f23c258 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -974,6 +974,12 @@ struct i915_suspend_saved_registers {
 	u32 savePCH_PORT_HOTPLUG;
 };
 
+struct intel_rps_ei_calc {
+	u32 cz_ts_ei;
+	u32 render_ei_c0;
+	u32 media_ei_c0;
+};
+
 struct intel_gen6_power_mgmt {
 	/* work and pm_iir are protected by dev_priv->irq_lock */
 	struct work_struct work;
@@ -990,6 +996,8 @@ struct intel_gen6_power_mgmt {
 	bool rp_up_masked;
 	bool rp_down_masked;
 
+	u32 ei_interrupt_count;
+
 	int last_adj;
 	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
@@ -1583,6 +1591,13 @@ typedef struct drm_i915_private {
 	/* gen6+ rps state */
 	struct intel_gen6_power_mgmt rps;
 
+	/* rps wa up ei calculation */
+	struct intel_rps_ei_calc rps_up_ei;
+
+	/* rps wa down ei calculation */
+	struct intel_rps_ei_calc rps_down_ei;
+
+
 	/* ilk-only ips/rps state. Everything in here is protected by the global
 	 * mchdev_lock in intel_pm.c */
 	struct intel_ilk_power_mgmt ips;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 00b3bfc..a817c43 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1121,6 +1121,123 @@ void gen6_set_pm_mask(struct drm_i915_private *dev_priv,
 	}
 }
 
+static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
+				struct  intel_rps_ei_calc *rps_ei)
+{
+	u32 cz_ts, cz_freq_khz;
+	u32 render_count, media_count;
+	u32 elapsed_render, elapsed_media, elapsed_time;
+	u32 residency = 0;
+
+	cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
+	cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
+
+	render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
+	media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
+
+	if (rps_ei->cz_ts_ei == 0) {
+		rps_ei->cz_ts_ei = cz_ts;
+		rps_ei->render_ei_c0 = render_count;
+		rps_ei->media_ei_c0 = media_count;
+
+		return dev_priv->rps.cur_delay;
+	}
+
+	elapsed_time = cz_ts - rps_ei->cz_ts_ei;
+	rps_ei->cz_ts_ei = cz_ts;
+
+	elapsed_render = render_count - rps_ei->render_ei_c0;
+	rps_ei->render_ei_c0 = render_count;
+
+	elapsed_media = media_count - rps_ei->media_ei_c0;
+	rps_ei->media_ei_c0 = media_count;
+
+	/* Convert all the counters into common unit of milli sec */
+	elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
+	elapsed_render /=  cz_freq_khz;
+	elapsed_media /= cz_freq_khz;
+
+	/**
+	 * Calculate overall C0 residency percentage
+	 * only if elapsed time is non zero
+	 */
+	if (elapsed_time) {
+		residency =
+			((max(elapsed_render, elapsed_media) * 100)
+				/ elapsed_time);
+	}
+
+	return residency;
+}
+
+
+/**
+ * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
+ * busy-ness calculated from C0 counters of render & media power wells
+ * @dev_priv: DRM device private
+ *
+ */
+static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
+{
+	u32 residency_C0_up = 0, residency_C0_down = 0;
+	u8 new_delay;
+
+	dev_priv->rps.ei_interrupt_count++;
+
+	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+
+
+	if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
+		vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
+		vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
+		return dev_priv->rps.cur_delay;
+	}
+
+
+	/**
+	 * To down throttle, C0 residency should be less than down threshold
+	 * for continous EI intervals. So calculate down EI counters
+	 * once in VLV_INT_COUNT_FOR_DOWN_EI
+	 */
+	if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
+
+		dev_priv->rps.ei_interrupt_count = 0;
+
+		residency_C0_down = vlv_c0_residency(dev_priv,
+						&dev_priv->rps_down_ei);
+	} else {
+		residency_C0_up = vlv_c0_residency(dev_priv,
+						&dev_priv->rps_up_ei);
+	}
+
+	new_delay = dev_priv->rps.cur_delay;
+
+	/* C0 residency is greater than UP threshold. Increase Frequency */
+	if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
+
+		if (dev_priv->rps.cur_delay < dev_priv->rps.max_delay)
+			new_delay = dev_priv->rps.cur_delay + 1;
+
+		/**
+		 * For better performance, jump directly
+		 * to RPe if we're below it.
+		 */
+		if (new_delay < dev_priv->rps.rpe_delay)
+			new_delay = dev_priv->rps.rpe_delay;
+
+	} else if (!dev_priv->rps.ei_interrupt_count &&
+			(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
+		/**
+		 * This means, C0 residency is less than down threshold over
+		 * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
+		 */
+		if (dev_priv->rps.cur_delay > dev_priv->rps.min_delay)
+			new_delay = dev_priv->rps.cur_delay - 1;
+	}
+
+	return new_delay;
+}
+
 static void gen6_pm_rps_work(struct work_struct *work)
 {
 	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
@@ -1163,6 +1280,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
 		else
 			new_delay = dev_priv->rps.min_delay;
 		adj = 0;
+	} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
+		new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
 	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
 		if (adj < 0)
 			adj *= 2;
@@ -2989,6 +3108,7 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
 			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
 
 		dev_priv->pm_irq_mask = 0xffffffff;
+
 		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
 		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
 		I915_WRITE(GEN6_PMIER, pm_irqs);
@@ -4031,7 +4151,11 @@ void intel_irq_init(struct drm_device *dev)
 	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
 
 	/* Let's track the enabled rps events */
-	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
+	if (IS_VALLEYVIEW(dev))
+		/* WAUseRC0ResidenncyTurbo:VLV */
+		dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
+	else
+		dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
 
 	setup_timer(&dev_priv->gpu_error.hangcheck_timer,
 		    i915_hangcheck_elapsed,
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 6174fda..d978b46 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -419,6 +419,7 @@ enum punit_power_well {
 #define PUNIT_REG_GPU_FREQ_STS			0xd8
 #define   GENFREQSTATUS				(1<<0)
 #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ		0xdc
+#define PUNIT_REG_CZ_TIMESTAMP			0xce
 
 #define PUNIT_FUSE_BUS2				0xf6 /* bits 47:40 */
 #define PUNIT_FUSE_BUS1				0xf5 /* bits 55:48 */
@@ -434,6 +435,11 @@ enum punit_power_well {
 #define   FB_FMAX_VMIN_FREQ_LO_SHIFT		27
 #define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000
 
+#define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
+#define VLV_RP_UP_EI_THRESHOLD			90
+#define VLV_RP_DOWN_EI_THRESHOLD		70
+#define VLV_INT_COUNT_FOR_DOWN_EI		5
+
 /* vlv2 north clock has */
 #define CCK_FUSE_REG				0x8
 #define  CCK_FUSE_HPLL_FREQ_MASK		0x3
@@ -4892,6 +4898,7 @@ enum punit_power_well {
 #define  VLV_GTLC_PW_STATUS			0x130094
 #define VLV_GTLC_PW_RENDER_STATUS_MASK		0x80
 #define VLV_GTLC_PW_MEDIA_STATUS_MASK		0x20
+#define VLV_GTLC_SURVIVABILITY_REG              0x130098
 #define  FORCEWAKE_MT				0xa188 /* multi-threaded */
 #define   FORCEWAKE_KERNEL			0x1
 #define   FORCEWAKE_USER			0x2
@@ -5019,13 +5026,17 @@ enum punit_power_well {
 
 #define GEN6_GT_GFX_RC6_LOCKED			0x138104
 #define VLV_COUNTER_CONTROL			0x138104
+#define VLV_RC_COUNTER_CONTROL                  0xFFFF00FF
 #define   VLV_COUNT_RANGE_HIGH			(1<<15)
+#define   VLV_MEDIA_RC0_COUNT_EN		(1<<5)
+#define   VLV_RENDER_RC0_COUNT_EN		(1<<4)
 #define   VLV_MEDIA_RC6_COUNT_EN		(1<<1)
 #define   VLV_RENDER_RC6_COUNT_EN		(1<<0)
 #define GEN6_GT_GFX_RC6				0x138108
 #define GEN6_GT_GFX_RC6p			0x13810C
 #define GEN6_GT_GFX_RC6pp			0x138110
-
+#define VLV_RENDER_C0_COUNT_REG		0x138118
+#define VLV_MEDIA_C0_COUNT_REG			0x13811C
 #define GEN6_PCODE_MAILBOX			0x138124
 #define   GEN6_PCODE_READY			(1<<31)
 #define   GEN6_READ_OC_PARAMS			0xc
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 95b133a..858cd1f 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3096,10 +3096,14 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 		I915_READ(VLV_GTLC_SURVIVABILITY_REG) &
 				~VLV_GFX_CLK_FORCE_ON_BIT);
 
-	/* Unmask Up interrupts */
-	dev_priv->rps.rp_up_masked = true;
-	gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
+	/* Unmask Turbo interrupts */
+	if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
+		I915_WRITE(GEN6_PMINTRMSK, ~dev_priv->pm_rps_events);
+	else {
+		dev_priv->rps.rp_up_masked = true;
+		gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
 						dev_priv->rps.min_delay);
+	}
 }
 
 void gen6_rps_idle(struct drm_i915_private *dev_priv)
@@ -3618,6 +3622,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
 	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
 
 	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
 
 	I915_WRITE(GEN6_RP_CONTROL,
 		   GEN6_RP_MEDIA_TURBO |
@@ -3638,9 +3643,11 @@ static void valleyview_enable_rps(struct drm_device *dev)
 
 	/* allows RC6 residency counter to work */
 	I915_WRITE(VLV_COUNTER_CONTROL,
-		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+		   _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
+				      VLV_RENDER_RC0_COUNT_EN |
 				      VLV_MEDIA_RC6_COUNT_EN |
 				      VLV_RENDER_RC6_COUNT_EN));
+
 	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
 		rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
 
-- 
1.8.4.2

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v3 3/3] drm/i915: Add boot paramter to control rps boost at boot time.
  2014-03-15 14:53               ` [PATCH v4 0/3] WA for Turbo and RC6 to work together deepak.s
  2014-03-15 14:53                 ` [PATCH v2 1/3] drm/i915: Track the enabled PM interrupts in dev_priv deepak.s
  2014-03-15 14:53                 ` [PATCH v4 2/3] drm/i915/vlv: WA for Turbo and RC6 to work together deepak.s
@ 2014-03-15 14:53                 ` deepak.s
  2014-03-24 19:27                   ` Ville Syrjälä
  2014-03-27  6:35                 ` [PATCH v5] drm/i915/vlv: WA for Turbo and RC6 to work together deepak.s
  3 siblings, 1 reply; 30+ messages in thread
From: deepak.s @ 2014-03-15 14:53 UTC (permalink / raw)
  To: intel-gfx

From: Deepak S <deepak.s@linux.intel.com>

We are adding a module paramter to control rps boost. By default, we
enable the boost for better performace. Based on the need (perf/power)
we can either enable/disable.

v2: Addressed rps default comment (Jani)

v3: Use bool to represent the boot parameter (Ville).

Signed-off-by: Deepak S <deepak.s@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h    | 1 +
 drivers/gpu/drm/i915/i915_gem.c    | 2 +-
 drivers/gpu/drm/i915/i915_params.c | 5 +++++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f23c258..6d91b1c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2120,6 +2120,7 @@ struct i915_params {
 	bool prefault_disable;
 	bool reset;
 	bool disable_display;
+	bool enable_rps_boost;
 };
 extern struct i915_params i915 __read_mostly;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 92b0b41..b9c52b8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1042,7 +1042,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
 
 	timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
 
-	if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv)) {
+	if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv) && i915.enable_rps_boost) {
 		gen6_rps_boost(dev_priv);
 		if (file_priv)
 			mod_delayed_work(dev_priv->wq,
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index a66ffb6..772659f 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -49,6 +49,7 @@ struct i915_params i915 __read_mostly = {
 	.invert_brightness = 0,
 	.disable_display = 0,
 	.enable_cmd_parser = 0,
+	.enable_rps_boost = true,
 };
 
 module_param_named(modeset, i915.modeset, int, 0400);
@@ -162,3 +163,7 @@ MODULE_PARM_DESC(disable_display, "Disable display (default: false)");
 module_param_named(enable_cmd_parser, i915.enable_cmd_parser, int, 0600);
 MODULE_PARM_DESC(enable_cmd_parser,
 		 "Enable command parsing (1=enabled, 0=disabled [default])");
+
+module_param_named(enable_rps_boost, i915.enable_rps_boost, bool, 0600);
+MODULE_PARM_DESC(enable_rps_boost,
+		 "Enable/Disable boost RPS frequency (default: true)");
-- 
1.8.4.2

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH v4 2/3] drm/i915/vlv: WA for Turbo and RC6 to work together.
  2014-03-15 14:53                 ` [PATCH v4 2/3] drm/i915/vlv: WA for Turbo and RC6 to work together deepak.s
@ 2014-03-24 19:26                   ` Ville Syrjälä
  0 siblings, 0 replies; 30+ messages in thread
From: Ville Syrjälä @ 2014-03-24 19:26 UTC (permalink / raw)
  To: deepak.s; +Cc: intel-gfx

On Sat, Mar 15, 2014 at 08:23:23PM +0530, deepak.s@linux.intel.com wrote:
> From: Deepak S <deepak.s@linux.intel.com>
> 
> With RC6 enabled, BYT has an HW issue in determining the right
> Gfx busyness.
> WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
> on increasing/decreasing the freq. This logic will monitor C0
> counters of render/media power-wells over EI period and takes
> necessary action based on these values
> 
> v2: Refactor duplicate code. (Ville)
> 
> v3: Reformat the comments. (Ville)
> 
> v4: Enable required counters and remove unwanted code (Ville)
> 
> Signed-off-by: Deepak S <deepak.s@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h |  15 +++++
>  drivers/gpu/drm/i915/i915_irq.c | 126 +++++++++++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/i915/i915_reg.h |  13 ++++-
>  drivers/gpu/drm/i915/intel_pm.c |  15 +++--
>  4 files changed, 163 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index d522313..f23c258 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -974,6 +974,12 @@ struct i915_suspend_saved_registers {
>  	u32 savePCH_PORT_HOTPLUG;
>  };
>  
> +struct intel_rps_ei_calc {
> +	u32 cz_ts_ei;
> +	u32 render_ei_c0;
> +	u32 media_ei_c0;
> +};
> +
>  struct intel_gen6_power_mgmt {
>  	/* work and pm_iir are protected by dev_priv->irq_lock */
>  	struct work_struct work;
> @@ -990,6 +996,8 @@ struct intel_gen6_power_mgmt {
>  	bool rp_up_masked;
>  	bool rp_down_masked;
>  
> +	u32 ei_interrupt_count;
> +
>  	int last_adj;
>  	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
>  
> @@ -1583,6 +1591,13 @@ typedef struct drm_i915_private {
>  	/* gen6+ rps state */
>  	struct intel_gen6_power_mgmt rps;
>  
> +	/* rps wa up ei calculation */
> +	struct intel_rps_ei_calc rps_up_ei;
> +
> +	/* rps wa down ei calculation */
> +	struct intel_rps_ei_calc rps_down_ei;
> +
> +
>  	/* ilk-only ips/rps state. Everything in here is protected by the global
>  	 * mchdev_lock in intel_pm.c */
>  	struct intel_ilk_power_mgmt ips;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 00b3bfc..a817c43 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1121,6 +1121,123 @@ void gen6_set_pm_mask(struct drm_i915_private *dev_priv,
>  	}
>  }
>  
> +static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
> +				struct  intel_rps_ei_calc *rps_ei)
> +{
> +	u32 cz_ts, cz_freq_khz;
> +	u32 render_count, media_count;
> +	u32 elapsed_render, elapsed_media, elapsed_time;
> +	u32 residency = 0;
> +
> +	cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
> +	cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
> +
> +	render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
> +	media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
> +
> +	if (rps_ei->cz_ts_ei == 0) {
> +		rps_ei->cz_ts_ei = cz_ts;
> +		rps_ei->render_ei_c0 = render_count;
> +		rps_ei->media_ei_c0 = media_count;
> +
> +		return dev_priv->rps.cur_delay;
> +	}
> +
> +	elapsed_time = cz_ts - rps_ei->cz_ts_ei;
> +	rps_ei->cz_ts_ei = cz_ts;
> +
> +	elapsed_render = render_count - rps_ei->render_ei_c0;
> +	rps_ei->render_ei_c0 = render_count;
> +
> +	elapsed_media = media_count - rps_ei->media_ei_c0;
> +	rps_ei->media_ei_c0 = media_count;
> +
> +	/* Convert all the counters into common unit of milli sec */
> +	elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
> +	elapsed_render /=  cz_freq_khz;
> +	elapsed_media /= cz_freq_khz;
> +
> +	/**

This isn't a kernel-doc comment so the double ** shouldn't be here.

> +	 * Calculate overall C0 residency percentage
> +	 * only if elapsed time is non zero
> +	 */
> +	if (elapsed_time) {
> +		residency =
> +			((max(elapsed_render, elapsed_media) * 100)
> +				/ elapsed_time);
> +	}
> +
> +	return residency;
> +}
> +
> +
> +/**
> + * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
> + * busy-ness calculated from C0 counters of render & media power wells
> + * @dev_priv: DRM device private
> + *
> + */
> +static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
> +{
> +	u32 residency_C0_up = 0, residency_C0_down = 0;
> +	u8 new_delay;
> +
> +	dev_priv->rps.ei_interrupt_count++;
> +
> +	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
> +
> +
> +	if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
> +		vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
> +		vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
> +		return dev_priv->rps.cur_delay;
> +	}
> +
> +
> +	/**

here too

> +	 * To down throttle, C0 residency should be less than down threshold
> +	 * for continous EI intervals. So calculate down EI counters
> +	 * once in VLV_INT_COUNT_FOR_DOWN_EI
> +	 */
> +	if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
> +
> +		dev_priv->rps.ei_interrupt_count = 0;
> +
> +		residency_C0_down = vlv_c0_residency(dev_priv,
> +						&dev_priv->rps_down_ei);
> +	} else {
> +		residency_C0_up = vlv_c0_residency(dev_priv,
> +						&dev_priv->rps_up_ei);
> +	}
> +
> +	new_delay = dev_priv->rps.cur_delay;
> +
> +	/* C0 residency is greater than UP threshold. Increase Frequency */
> +	if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
> +
> +		if (dev_priv->rps.cur_delay < dev_priv->rps.max_delay)
> +			new_delay = dev_priv->rps.cur_delay + 1;
> +
> +		/**

and here

> +		 * For better performance, jump directly
> +		 * to RPe if we're below it.
> +		 */
> +		if (new_delay < dev_priv->rps.rpe_delay)
> +			new_delay = dev_priv->rps.rpe_delay;
> +
> +	} else if (!dev_priv->rps.ei_interrupt_count &&
> +			(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
> +		/**

ditto

> +		 * This means, C0 residency is less than down threshold over
> +		 * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
> +		 */
> +		if (dev_priv->rps.cur_delay > dev_priv->rps.min_delay)
> +			new_delay = dev_priv->rps.cur_delay - 1;
> +	}
> +
> +	return new_delay;
> +}
> +
>  static void gen6_pm_rps_work(struct work_struct *work)
>  {
>  	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
> @@ -1163,6 +1280,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
>  		else
>  			new_delay = dev_priv->rps.min_delay;
>  		adj = 0;
> +	} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> +		new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
>  	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
>  		if (adj < 0)
>  			adj *= 2;
> @@ -2989,6 +3108,7 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
>  			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
>  
>  		dev_priv->pm_irq_mask = 0xffffffff;
> +
>  		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
>  		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
>  		I915_WRITE(GEN6_PMIER, pm_irqs);
> @@ -4031,7 +4151,11 @@ void intel_irq_init(struct drm_device *dev)
>  	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
>  
>  	/* Let's track the enabled rps events */
> -	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
> +	if (IS_VALLEYVIEW(dev))
> +		/* WAUseRC0ResidenncyTurbo:VLV */

I can't see this name in the database (even with the typo fixed).
I do see something called WaGsvRC0ResidencyMethod:vlv which I'm
assuming is the right one.

So apart from the minor nits I listed, my only two remaining issues here
are whether we want to make this optional or not, and whether we should
apply acceleration to the up/down transitions like gen6_pm_rps_work()
does (to ramp the freq up/down faster). I forget if you tested how that
affects the power consumption numbers, or if you just tested the effect
of the __wait_seqno() boost mechanism.

> +		dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
> +	else
> +		dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
>  
>  	setup_timer(&dev_priv->gpu_error.hangcheck_timer,
>  		    i915_hangcheck_elapsed,
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 6174fda..d978b46 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -419,6 +419,7 @@ enum punit_power_well {
>  #define PUNIT_REG_GPU_FREQ_STS			0xd8
>  #define   GENFREQSTATUS				(1<<0)
>  #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ		0xdc
> +#define PUNIT_REG_CZ_TIMESTAMP			0xce
>  
>  #define PUNIT_FUSE_BUS2				0xf6 /* bits 47:40 */
>  #define PUNIT_FUSE_BUS1				0xf5 /* bits 55:48 */
> @@ -434,6 +435,11 @@ enum punit_power_well {
>  #define   FB_FMAX_VMIN_FREQ_LO_SHIFT		27
>  #define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000
>  
> +#define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
> +#define VLV_RP_UP_EI_THRESHOLD			90
> +#define VLV_RP_DOWN_EI_THRESHOLD		70
> +#define VLV_INT_COUNT_FOR_DOWN_EI		5
> +
>  /* vlv2 north clock has */
>  #define CCK_FUSE_REG				0x8
>  #define  CCK_FUSE_HPLL_FREQ_MASK		0x3
> @@ -4892,6 +4898,7 @@ enum punit_power_well {
>  #define  VLV_GTLC_PW_STATUS			0x130094
>  #define VLV_GTLC_PW_RENDER_STATUS_MASK		0x80
>  #define VLV_GTLC_PW_MEDIA_STATUS_MASK		0x20
> +#define VLV_GTLC_SURVIVABILITY_REG              0x130098
>  #define  FORCEWAKE_MT				0xa188 /* multi-threaded */
>  #define   FORCEWAKE_KERNEL			0x1
>  #define   FORCEWAKE_USER			0x2
> @@ -5019,13 +5026,17 @@ enum punit_power_well {
>  
>  #define GEN6_GT_GFX_RC6_LOCKED			0x138104
>  #define VLV_COUNTER_CONTROL			0x138104
> +#define VLV_RC_COUNTER_CONTROL                  0xFFFF00FF
>  #define   VLV_COUNT_RANGE_HIGH			(1<<15)
> +#define   VLV_MEDIA_RC0_COUNT_EN		(1<<5)
> +#define   VLV_RENDER_RC0_COUNT_EN		(1<<4)
>  #define   VLV_MEDIA_RC6_COUNT_EN		(1<<1)
>  #define   VLV_RENDER_RC6_COUNT_EN		(1<<0)
>  #define GEN6_GT_GFX_RC6				0x138108
>  #define GEN6_GT_GFX_RC6p			0x13810C
>  #define GEN6_GT_GFX_RC6pp			0x138110
> -
> +#define VLV_RENDER_C0_COUNT_REG		0x138118
> +#define VLV_MEDIA_C0_COUNT_REG			0x13811C
>  #define GEN6_PCODE_MAILBOX			0x138124
>  #define   GEN6_PCODE_READY			(1<<31)
>  #define   GEN6_READ_OC_PARAMS			0xc
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 95b133a..858cd1f 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3096,10 +3096,14 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>  		I915_READ(VLV_GTLC_SURVIVABILITY_REG) &
>  				~VLV_GFX_CLK_FORCE_ON_BIT);
>  
> -	/* Unmask Up interrupts */
> -	dev_priv->rps.rp_up_masked = true;
> -	gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
> +	/* Unmask Turbo interrupts */
> +	if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
> +		I915_WRITE(GEN6_PMINTRMSK, ~dev_priv->pm_rps_events);
> +	else {
> +		dev_priv->rps.rp_up_masked = true;
> +		gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
>  						dev_priv->rps.min_delay);
> +	}
>  }
>  
>  void gen6_rps_idle(struct drm_i915_private *dev_priv)
> @@ -3618,6 +3622,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
>  	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
>  
>  	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> +	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
>  
>  	I915_WRITE(GEN6_RP_CONTROL,
>  		   GEN6_RP_MEDIA_TURBO |
> @@ -3638,9 +3643,11 @@ static void valleyview_enable_rps(struct drm_device *dev)
>  
>  	/* allows RC6 residency counter to work */
>  	I915_WRITE(VLV_COUNTER_CONTROL,
> -		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
> +		   _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
> +				      VLV_RENDER_RC0_COUNT_EN |
>  				      VLV_MEDIA_RC6_COUNT_EN |
>  				      VLV_RENDER_RC6_COUNT_EN));
> +
>  	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
>  		rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
>  
> -- 
> 1.8.4.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v2 1/3] drm/i915: Track the enabled PM interrupts in dev_priv.
  2014-03-15 14:53                 ` [PATCH v2 1/3] drm/i915: Track the enabled PM interrupts in dev_priv deepak.s
@ 2014-03-24 19:26                   ` Ville Syrjälä
  2014-03-24 20:22                     ` Daniel Vetter
  0 siblings, 1 reply; 30+ messages in thread
From: Ville Syrjälä @ 2014-03-24 19:26 UTC (permalink / raw)
  To: deepak.s; +Cc: intel-gfx

On Sat, Mar 15, 2014 at 08:23:22PM +0530, deepak.s@linux.intel.com wrote:
> From: Deepak S <deepak.s@linux.intel.com>
> 
> When we use different rps events for different platform or due to wa, we
> mgiht end up doing (vs) everywahere. Insted of this, Let's use a variable
> in dev_priv to track the enabled PM interrupts
> 
> v2: Initialize pm_rps_events in intel_irq_init() (Ville).
> 
> Signed-off-by: Deepak S <deepak.s@linux.intel.com>

Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_drv.h |  1 +
>  drivers/gpu/drm/i915/i915_irq.c | 17 ++++++++++-------
>  drivers/gpu/drm/i915/intel_pm.c | 11 ++++++-----
>  3 files changed, 17 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 70fbe90..d522313 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1487,6 +1487,7 @@ typedef struct drm_i915_private {
>  	};
>  	u32 gt_irq_mask;
>  	u32 pm_irq_mask;
> +	u32 pm_rps_events;
>  	u32 pipestat_irq_mask[I915_MAX_PIPES];
>  
>  	struct work_struct hotplug_work;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 37f852d..00b3bfc 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1132,13 +1132,13 @@ static void gen6_pm_rps_work(struct work_struct *work)
>  	pm_iir = dev_priv->rps.pm_iir;
>  	dev_priv->rps.pm_iir = 0;
>  	/* Make sure not to corrupt PMIMR state used by ringbuffer code */
> -	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
> +	snb_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
>  	spin_unlock_irq(&dev_priv->irq_lock);
>  
>  	/* Make sure we didn't queue anything we're not going to process. */
> -	WARN_ON(pm_iir & ~GEN6_PM_RPS_EVENTS);
> +	WARN_ON(pm_iir & ~dev_priv->pm_rps_events);
>  
> -	if ((pm_iir & GEN6_PM_RPS_EVENTS) == 0)
> +	if ((pm_iir & dev_priv->pm_rps_events) == 0)
>  		return;
>  
>  	mutex_lock(&dev_priv->rps.hw_lock);
> @@ -1555,10 +1555,10 @@ static void i9xx_pipe_crc_irq_handler(struct drm_device *dev, enum pipe pipe)
>   * the work queue. */
>  static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
>  {
> -	if (pm_iir & GEN6_PM_RPS_EVENTS) {
> +	if (pm_iir & dev_priv->pm_rps_events) {
>  		spin_lock(&dev_priv->irq_lock);
> -		dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RPS_EVENTS;
> -		snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RPS_EVENTS);
> +		dev_priv->rps.pm_iir |= pm_iir & dev_priv->pm_rps_events;
> +		snb_disable_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events);
>  		spin_unlock(&dev_priv->irq_lock);
>  
>  		queue_work(dev_priv->wq, &dev_priv->rps.work);
> @@ -2983,7 +2983,7 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
>  	POSTING_READ(GTIER);
>  
>  	if (INTEL_INFO(dev)->gen >= 6) {
> -		pm_irqs |= GEN6_PM_RPS_EVENTS;
> +		pm_irqs |= dev_priv->pm_rps_events;
>  
>  		if (HAS_VEBOX(dev))
>  			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
> @@ -4030,6 +4030,9 @@ void intel_irq_init(struct drm_device *dev)
>  	INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work);
>  	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
>  
> +	/* Let's track the enabled rps events */
> +	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
> +
>  	setup_timer(&dev_priv->gpu_error.hangcheck_timer,
>  		    i915_hangcheck_elapsed,
>  		    (unsigned long) dev);
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index ad58ce3..95b133a 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3160,7 +3160,8 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev)
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  
>  	I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
> -	I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & ~GEN6_PM_RPS_EVENTS);
> +	I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) &
> +				~dev_priv->pm_rps_events);
>  	/* Complete PM interrupt masking here doesn't race with the rps work
>  	 * item again unmasking PM interrupts because that is using a different
>  	 * register (PMIMR) to mask PM interrupts. The only risk is in leaving
> @@ -3170,7 +3171,7 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev)
>  	dev_priv->rps.pm_iir = 0;
>  	spin_unlock_irq(&dev_priv->irq_lock);
>  
> -	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
> +	I915_WRITE(GEN6_PMIIR, dev_priv->pm_rps_events);
>  }
>  
>  static void gen6_disable_rps(struct drm_device *dev)
> @@ -3232,12 +3233,12 @@ static void gen6_enable_rps_interrupts(struct drm_device *dev)
>  
>  	spin_lock_irq(&dev_priv->irq_lock);
>  	WARN_ON(dev_priv->rps.pm_iir);
> -	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
> -	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
> +	snb_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
> +	I915_WRITE(GEN6_PMIIR, dev_priv->pm_rps_events);
>  	spin_unlock_irq(&dev_priv->irq_lock);
>  
>  	/* only unmask PM interrupts we need. Mask all others. */
> -	enabled_intrs = GEN6_PM_RPS_EVENTS;
> +	enabled_intrs = dev_priv->pm_rps_events;
>  
>  	/* IVB and SNB hard hangs on looping batchbuffer
>  	 * if GEN6_PM_UP_EI_EXPIRED is masked.
> -- 
> 1.8.4.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v3 3/3] drm/i915: Add boot paramter to control rps boost at boot time.
  2014-03-15 14:53                 ` [PATCH v3 3/3] drm/i915: Add boot paramter to control rps boost at boot time deepak.s
@ 2014-03-24 19:27                   ` Ville Syrjälä
  0 siblings, 0 replies; 30+ messages in thread
From: Ville Syrjälä @ 2014-03-24 19:27 UTC (permalink / raw)
  To: deepak.s; +Cc: intel-gfx

On Sat, Mar 15, 2014 at 08:23:24PM +0530, deepak.s@linux.intel.com wrote:
> From: Deepak S <deepak.s@linux.intel.com>
> 
> We are adding a module paramter to control rps boost. By default, we
> enable the boost for better performace. Based on the need (perf/power)
> we can either enable/disable.
> 
> v2: Addressed rps default comment (Jani)
> 
> v3: Use bool to represent the boot parameter (Ville).
> 
> Signed-off-by: Deepak S <deepak.s@linux.intel.com>

Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_drv.h    | 1 +
>  drivers/gpu/drm/i915/i915_gem.c    | 2 +-
>  drivers/gpu/drm/i915/i915_params.c | 5 +++++
>  3 files changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index f23c258..6d91b1c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2120,6 +2120,7 @@ struct i915_params {
>  	bool prefault_disable;
>  	bool reset;
>  	bool disable_display;
> +	bool enable_rps_boost;
>  };
>  extern struct i915_params i915 __read_mostly;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 92b0b41..b9c52b8 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1042,7 +1042,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
>  
>  	timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
>  
> -	if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv)) {
> +	if (INTEL_INFO(dev)->gen >= 6 && can_wait_boost(file_priv) && i915.enable_rps_boost) {
>  		gen6_rps_boost(dev_priv);
>  		if (file_priv)
>  			mod_delayed_work(dev_priv->wq,
> diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
> index a66ffb6..772659f 100644
> --- a/drivers/gpu/drm/i915/i915_params.c
> +++ b/drivers/gpu/drm/i915/i915_params.c
> @@ -49,6 +49,7 @@ struct i915_params i915 __read_mostly = {
>  	.invert_brightness = 0,
>  	.disable_display = 0,
>  	.enable_cmd_parser = 0,
> +	.enable_rps_boost = true,
>  };
>  
>  module_param_named(modeset, i915.modeset, int, 0400);
> @@ -162,3 +163,7 @@ MODULE_PARM_DESC(disable_display, "Disable display (default: false)");
>  module_param_named(enable_cmd_parser, i915.enable_cmd_parser, int, 0600);
>  MODULE_PARM_DESC(enable_cmd_parser,
>  		 "Enable command parsing (1=enabled, 0=disabled [default])");
> +
> +module_param_named(enable_rps_boost, i915.enable_rps_boost, bool, 0600);
> +MODULE_PARM_DESC(enable_rps_boost,
> +		 "Enable/Disable boost RPS frequency (default: true)");
> -- 
> 1.8.4.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v2 1/3] drm/i915: Track the enabled PM interrupts in dev_priv.
  2014-03-24 19:26                   ` Ville Syrjälä
@ 2014-03-24 20:22                     ` Daniel Vetter
  0 siblings, 0 replies; 30+ messages in thread
From: Daniel Vetter @ 2014-03-24 20:22 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx

On Mon, Mar 24, 2014 at 09:26:52PM +0200, Ville Syrjälä wrote:
> On Sat, Mar 15, 2014 at 08:23:22PM +0530, deepak.s@linux.intel.com wrote:
> > From: Deepak S <deepak.s@linux.intel.com>
> > 
> > When we use different rps events for different platform or due to wa, we
> > mgiht end up doing (vs) everywahere. Insted of this, Let's use a variable
> > in dev_priv to track the enabled PM interrupts

I didn't follow this really, so I've reworded it a bit and fixed the
spelling - or tried to at least ;-)
> > 
> > v2: Initialize pm_rps_events in intel_irq_init() (Ville).
> > 
> > Signed-off-by: Deepak S <deepak.s@linux.intel.com>
> 
> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

Queued for -next, thanks for the patch.
-Daniel

> 
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h |  1 +
> >  drivers/gpu/drm/i915/i915_irq.c | 17 ++++++++++-------
> >  drivers/gpu/drm/i915/intel_pm.c | 11 ++++++-----
> >  3 files changed, 17 insertions(+), 12 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 70fbe90..d522313 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1487,6 +1487,7 @@ typedef struct drm_i915_private {
> >  	};
> >  	u32 gt_irq_mask;
> >  	u32 pm_irq_mask;
> > +	u32 pm_rps_events;
> >  	u32 pipestat_irq_mask[I915_MAX_PIPES];
> >  
> >  	struct work_struct hotplug_work;
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index 37f852d..00b3bfc 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -1132,13 +1132,13 @@ static void gen6_pm_rps_work(struct work_struct *work)
> >  	pm_iir = dev_priv->rps.pm_iir;
> >  	dev_priv->rps.pm_iir = 0;
> >  	/* Make sure not to corrupt PMIMR state used by ringbuffer code */
> > -	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
> > +	snb_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
> >  	spin_unlock_irq(&dev_priv->irq_lock);
> >  
> >  	/* Make sure we didn't queue anything we're not going to process. */
> > -	WARN_ON(pm_iir & ~GEN6_PM_RPS_EVENTS);
> > +	WARN_ON(pm_iir & ~dev_priv->pm_rps_events);
> >  
> > -	if ((pm_iir & GEN6_PM_RPS_EVENTS) == 0)
> > +	if ((pm_iir & dev_priv->pm_rps_events) == 0)
> >  		return;
> >  
> >  	mutex_lock(&dev_priv->rps.hw_lock);
> > @@ -1555,10 +1555,10 @@ static void i9xx_pipe_crc_irq_handler(struct drm_device *dev, enum pipe pipe)
> >   * the work queue. */
> >  static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
> >  {
> > -	if (pm_iir & GEN6_PM_RPS_EVENTS) {
> > +	if (pm_iir & dev_priv->pm_rps_events) {
> >  		spin_lock(&dev_priv->irq_lock);
> > -		dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RPS_EVENTS;
> > -		snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RPS_EVENTS);
> > +		dev_priv->rps.pm_iir |= pm_iir & dev_priv->pm_rps_events;
> > +		snb_disable_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events);
> >  		spin_unlock(&dev_priv->irq_lock);
> >  
> >  		queue_work(dev_priv->wq, &dev_priv->rps.work);
> > @@ -2983,7 +2983,7 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
> >  	POSTING_READ(GTIER);
> >  
> >  	if (INTEL_INFO(dev)->gen >= 6) {
> > -		pm_irqs |= GEN6_PM_RPS_EVENTS;
> > +		pm_irqs |= dev_priv->pm_rps_events;
> >  
> >  		if (HAS_VEBOX(dev))
> >  			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
> > @@ -4030,6 +4030,9 @@ void intel_irq_init(struct drm_device *dev)
> >  	INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work);
> >  	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
> >  
> > +	/* Let's track the enabled rps events */
> > +	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
> > +
> >  	setup_timer(&dev_priv->gpu_error.hangcheck_timer,
> >  		    i915_hangcheck_elapsed,
> >  		    (unsigned long) dev);
> > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> > index ad58ce3..95b133a 100644
> > --- a/drivers/gpu/drm/i915/intel_pm.c
> > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > @@ -3160,7 +3160,8 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev)
> >  	struct drm_i915_private *dev_priv = dev->dev_private;
> >  
> >  	I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
> > -	I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & ~GEN6_PM_RPS_EVENTS);
> > +	I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) &
> > +				~dev_priv->pm_rps_events);
> >  	/* Complete PM interrupt masking here doesn't race with the rps work
> >  	 * item again unmasking PM interrupts because that is using a different
> >  	 * register (PMIMR) to mask PM interrupts. The only risk is in leaving
> > @@ -3170,7 +3171,7 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev)
> >  	dev_priv->rps.pm_iir = 0;
> >  	spin_unlock_irq(&dev_priv->irq_lock);
> >  
> > -	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
> > +	I915_WRITE(GEN6_PMIIR, dev_priv->pm_rps_events);
> >  }
> >  
> >  static void gen6_disable_rps(struct drm_device *dev)
> > @@ -3232,12 +3233,12 @@ static void gen6_enable_rps_interrupts(struct drm_device *dev)
> >  
> >  	spin_lock_irq(&dev_priv->irq_lock);
> >  	WARN_ON(dev_priv->rps.pm_iir);
> > -	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
> > -	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
> > +	snb_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
> > +	I915_WRITE(GEN6_PMIIR, dev_priv->pm_rps_events);
> >  	spin_unlock_irq(&dev_priv->irq_lock);
> >  
> >  	/* only unmask PM interrupts we need. Mask all others. */
> > -	enabled_intrs = GEN6_PM_RPS_EVENTS;
> > +	enabled_intrs = dev_priv->pm_rps_events;
> >  
> >  	/* IVB and SNB hard hangs on looping batchbuffer
> >  	 * if GEN6_PM_UP_EI_EXPIRED is masked.
> > -- 
> > 1.8.4.2
> > 
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> -- 
> Ville Syrjälä
> Intel OTC
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH v5] drm/i915/vlv: WA for Turbo and RC6 to work together.
  2014-03-15 14:53               ` [PATCH v4 0/3] WA for Turbo and RC6 to work together deepak.s
                                   ` (2 preceding siblings ...)
  2014-03-15 14:53                 ` [PATCH v3 3/3] drm/i915: Add boot paramter to control rps boost at boot time deepak.s
@ 2014-03-27  6:35                 ` deepak.s
  2014-03-28 12:53                   ` Ville Syrjälä
  2014-03-30  6:28                   ` [PATCH v6] " deepak.s
  3 siblings, 2 replies; 30+ messages in thread
From: deepak.s @ 2014-03-27  6:35 UTC (permalink / raw)
  To: intel-gfx

From: Deepak S <deepak.s@linux.intel.com>

With RC6 enabled, BYT has an HW issue in determining the right
Gfx busyness.
WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
on increasing/decreasing the freq. This logic will monitor C0
counters of render/media power-wells over EI period and takes
necessary action based on these values

v2: Refactor duplicate code. (Ville)

v3: Reformat the comments. (Ville)

v4: Enable required counters and remove unwanted code (Ville)

v5: Resolved comments and remove kernel-doc style comments. (Ville)

Signed-off-by: Deepak S <deepak.s@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |  15 +++++
 drivers/gpu/drm/i915/i915_irq.c | 135 +++++++++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_reg.h |  12 +++-
 drivers/gpu/drm/i915/intel_pm.c |  13 +++-
 4 files changed, 170 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7c212f3..c48ea93 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -816,6 +816,12 @@ struct i915_suspend_saved_registers {
 	u32 savePCH_PORT_HOTPLUG;
 };
 
+struct intel_rps_ei_calc {
+	u32 cz_ts_ei;
+	u32 render_ei_c0;
+	u32 media_ei_c0;
+};
+
 struct intel_gen6_power_mgmt {
 	/* work and pm_iir are protected by dev_priv->irq_lock */
 	struct work_struct work;
@@ -843,6 +849,8 @@ struct intel_gen6_power_mgmt {
 	bool rp_up_masked;
 	bool rp_down_masked;
 
+	u32 ei_interrupt_count;
+
 	int last_adj;
 	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
@@ -1403,6 +1411,13 @@ typedef struct drm_i915_private {
 	/* gen6+ rps state */
 	struct intel_gen6_power_mgmt rps;
 
+	/* rps wa up ei calculation */
+	struct intel_rps_ei_calc rps_up_ei;
+
+	/* rps wa down ei calculation */
+	struct intel_rps_ei_calc rps_down_ei;
+
+
 	/* ilk-only ips/rps state. Everything in here is protected by the global
 	 * mchdev_lock in intel_pm.c */
 	struct intel_ilk_power_mgmt ips;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 300f127..4b421b4 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1121,6 +1121,132 @@ void gen6_set_pm_mask(struct drm_i915_private *dev_priv,
 	}
 }
 
+static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
+				struct  intel_rps_ei_calc *rps_ei)
+{
+	u32 cz_ts, cz_freq_khz;
+	u32 render_count, media_count;
+	u32 elapsed_render, elapsed_media, elapsed_time;
+	u32 residency = 0;
+
+	cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
+	cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
+
+	render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
+	media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
+
+	if (rps_ei->cz_ts_ei == 0) {
+		rps_ei->cz_ts_ei = cz_ts;
+		rps_ei->render_ei_c0 = render_count;
+		rps_ei->media_ei_c0 = media_count;
+
+		return dev_priv->rps.cur_freq;
+	}
+
+	elapsed_time = cz_ts - rps_ei->cz_ts_ei;
+	rps_ei->cz_ts_ei = cz_ts;
+
+	elapsed_render = render_count - rps_ei->render_ei_c0;
+	rps_ei->render_ei_c0 = render_count;
+
+	elapsed_media = media_count - rps_ei->media_ei_c0;
+	rps_ei->media_ei_c0 = media_count;
+
+	/* Convert all the counters into common unit of milli sec */
+	elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
+	elapsed_render /=  cz_freq_khz;
+	elapsed_media /= cz_freq_khz;
+
+	/*
+	 * Calculate overall C0 residency percentage
+	 * only if elapsed time is non zero
+	 */
+	if (elapsed_time) {
+		residency =
+			((max(elapsed_render, elapsed_media) * 100)
+				/ elapsed_time);
+	}
+
+	return residency;
+}
+
+
+/**
+ * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
+ * busy-ness calculated from C0 counters of render & media power wells
+ * @dev_priv: DRM device private
+ *
+ */
+static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
+{
+	u32 residency_C0_up = 0, residency_C0_down = 0;
+	u8 new_delay, adj;
+
+	dev_priv->rps.ei_interrupt_count++;
+
+	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+
+
+	if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
+		vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
+		vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
+		return dev_priv->rps.cur_freq;
+	}
+
+
+	/*
+	 * To down throttle, C0 residency should be less than down threshold
+	 * for continous EI intervals. So calculate down EI counters
+	 * once in VLV_INT_COUNT_FOR_DOWN_EI
+	 */
+	if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
+
+		dev_priv->rps.ei_interrupt_count = 0;
+
+		residency_C0_down = vlv_c0_residency(dev_priv,
+						&dev_priv->rps_down_ei);
+	} else {
+		residency_C0_up = vlv_c0_residency(dev_priv,
+						&dev_priv->rps_up_ei);
+	}
+
+	new_delay = dev_priv->rps.cur_freq;
+
+	adj = dev_priv->rps.last_adj;
+	/* C0 residency is greater than UP threshold. Increase Frequency */
+	if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
+		if (adj > 0)
+			adj *= 2;
+		else
+			adj = 1;
+
+		if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
+			new_delay = dev_priv->rps.cur_freq + adj;
+
+		/*
+		 * For better performance, jump directly
+		 * to RPe if we're below it.
+		 */
+		if (new_delay < dev_priv->rps.efficient_freq)
+			new_delay = dev_priv->rps.efficient_freq;
+
+	} else if (!dev_priv->rps.ei_interrupt_count &&
+			(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
+		if (adj < 0)
+			adj *= 2;
+		else
+			adj = -1;
+		/*
+		 * This means, C0 residency is less than down threshold over
+		 * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
+		 */
+		if (dev_priv->rps.cur_freq > dev_priv->rps.max_freq_softlimit)
+			new_delay = dev_priv->rps.cur_freq + adj;
+	}
+
+	return new_delay;
+}
+
 static void gen6_pm_rps_work(struct work_struct *work)
 {
 	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
@@ -1163,6 +1289,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
 		else
 			new_delay = dev_priv->rps.min_freq_softlimit;
 		adj = 0;
+	} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
+		new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
 	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
 		if (adj < 0)
 			adj *= 2;
@@ -3053,6 +3181,7 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
 			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
 
 		dev_priv->pm_irq_mask = 0xffffffff;
+
 		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
 		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
 		I915_WRITE(GEN6_PMIER, pm_irqs);
@@ -4095,7 +4224,11 @@ void intel_irq_init(struct drm_device *dev)
 	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
 
 	/* Let's track the enabled rps events */
-	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
+	if (IS_VALLEYVIEW(dev))
+		/* WAUseRC0ResidenncyTurbo:VLV */
+		dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
+	else
+		dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
 
 	setup_timer(&dev_priv->gpu_error.hangcheck_timer,
 		    i915_hangcheck_elapsed,
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 927a7c1..e334bf1 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -506,6 +506,7 @@ enum punit_power_well {
 #define PUNIT_REG_GPU_FREQ_STS			0xd8
 #define   GENFREQSTATUS				(1<<0)
 #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ		0xdc
+#define PUNIT_REG_CZ_TIMESTAMP			0xce
 
 #define PUNIT_FUSE_BUS2				0xf6 /* bits 47:40 */
 #define PUNIT_FUSE_BUS1				0xf5 /* bits 55:48 */
@@ -521,6 +522,11 @@ enum punit_power_well {
 #define   FB_FMAX_VMIN_FREQ_LO_SHIFT		27
 #define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000
 
+#define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
+#define VLV_RP_UP_EI_THRESHOLD			90
+#define VLV_RP_DOWN_EI_THRESHOLD		70
+#define VLV_INT_COUNT_FOR_DOWN_EI		5
+
 /* vlv2 north clock has */
 #define CCK_FUSE_REG				0x8
 #define  CCK_FUSE_HPLL_FREQ_MASK		0x3
@@ -4984,6 +4990,7 @@ enum punit_power_well {
 #define  VLV_GTLC_PW_STATUS			0x130094
 #define VLV_GTLC_PW_RENDER_STATUS_MASK		0x80
 #define VLV_GTLC_PW_MEDIA_STATUS_MASK		0x20
+#define VLV_GTLC_SURVIVABILITY_REG              0x130098
 #define  FORCEWAKE_MT				0xa188 /* multi-threaded */
 #define   FORCEWAKE_KERNEL			0x1
 #define   FORCEWAKE_USER			0x2
@@ -5112,12 +5119,15 @@ enum punit_power_well {
 #define GEN6_GT_GFX_RC6_LOCKED			0x138104
 #define VLV_COUNTER_CONTROL			0x138104
 #define   VLV_COUNT_RANGE_HIGH			(1<<15)
+#define   VLV_MEDIA_RC0_COUNT_EN		(1<<5)
+#define   VLV_RENDER_RC0_COUNT_EN		(1<<4)
 #define   VLV_MEDIA_RC6_COUNT_EN		(1<<1)
 #define   VLV_RENDER_RC6_COUNT_EN		(1<<0)
 #define GEN6_GT_GFX_RC6				0x138108
 #define GEN6_GT_GFX_RC6p			0x13810C
 #define GEN6_GT_GFX_RC6pp			0x138110
-
+#define VLV_RENDER_C0_COUNT_REG		0x138118
+#define VLV_MEDIA_C0_COUNT_REG			0x13811C
 #define GEN6_PCODE_MAILBOX			0x138124
 #define   GEN6_PCODE_READY			(1<<31)
 #define   GEN6_READ_OC_PARAMS			0xc
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index b66a43b..30730be 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3097,9 +3097,13 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 				~VLV_GFX_CLK_FORCE_ON_BIT);
 
 	/* Unmask Up interrupts */
-	dev_priv->rps.rp_up_masked = true;
-	gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
+	if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
+		I915_WRITE(GEN6_PMINTRMSK, ~dev_priv->pm_rps_events);
+	else {
+		dev_priv->rps.rp_up_masked = true;
+		gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
 						dev_priv->rps.min_freq_softlimit);
+	}
 }
 
 void gen6_rps_idle(struct drm_i915_private *dev_priv)
@@ -3619,6 +3623,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
 	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
 
 	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
 
 	I915_WRITE(GEN6_RP_CONTROL,
 		   GEN6_RP_MEDIA_TURBO |
@@ -3639,9 +3644,11 @@ static void valleyview_enable_rps(struct drm_device *dev)
 
 	/* allows RC6 residency counter to work */
 	I915_WRITE(VLV_COUNTER_CONTROL,
-		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+		   _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
+				      VLV_RENDER_RC0_COUNT_EN |
 				      VLV_MEDIA_RC6_COUNT_EN |
 				      VLV_RENDER_RC6_COUNT_EN));
+
 	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
 		rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH v5] drm/i915/vlv: WA for Turbo and RC6 to work together.
  2014-03-27  6:35                 ` [PATCH v5] drm/i915/vlv: WA for Turbo and RC6 to work together deepak.s
@ 2014-03-28 12:53                   ` Ville Syrjälä
  2014-03-28 13:06                     ` Chris Wilson
  2014-03-30  6:28                   ` [PATCH v6] " deepak.s
  1 sibling, 1 reply; 30+ messages in thread
From: Ville Syrjälä @ 2014-03-28 12:53 UTC (permalink / raw)
  To: deepak.s; +Cc: intel-gfx

On Thu, Mar 27, 2014 at 12:05:01PM +0530, deepak.s@linux.intel.com wrote:
> From: Deepak S <deepak.s@linux.intel.com>
> 
> With RC6 enabled, BYT has an HW issue in determining the right
> Gfx busyness.
> WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
> on increasing/decreasing the freq. This logic will monitor C0
> counters of render/media power-wells over EI period and takes
> necessary action based on these values
> 
> v2: Refactor duplicate code. (Ville)
> 
> v3: Reformat the comments. (Ville)
> 
> v4: Enable required counters and remove unwanted code (Ville)
> 
> v5: Resolved comments and remove kernel-doc style comments. (Ville)

That doesn't tell me that you added the frequency change acceleration
support.

> 
> Signed-off-by: Deepak S <deepak.s@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h |  15 +++++
>  drivers/gpu/drm/i915/i915_irq.c | 135 +++++++++++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/i915/i915_reg.h |  12 +++-
>  drivers/gpu/drm/i915/intel_pm.c |  13 +++-
>  4 files changed, 170 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7c212f3..c48ea93 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -816,6 +816,12 @@ struct i915_suspend_saved_registers {
>  	u32 savePCH_PORT_HOTPLUG;
>  };
>  
> +struct intel_rps_ei_calc {
> +	u32 cz_ts_ei;
> +	u32 render_ei_c0;
> +	u32 media_ei_c0;
> +};
> +
>  struct intel_gen6_power_mgmt {
>  	/* work and pm_iir are protected by dev_priv->irq_lock */
>  	struct work_struct work;
> @@ -843,6 +849,8 @@ struct intel_gen6_power_mgmt {
>  	bool rp_up_masked;
>  	bool rp_down_masked;
>  
> +	u32 ei_interrupt_count;
> +
>  	int last_adj;
>  	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
>  
> @@ -1403,6 +1411,13 @@ typedef struct drm_i915_private {
>  	/* gen6+ rps state */
>  	struct intel_gen6_power_mgmt rps;
>  
> +	/* rps wa up ei calculation */
> +	struct intel_rps_ei_calc rps_up_ei;
> +
> +	/* rps wa down ei calculation */
> +	struct intel_rps_ei_calc rps_down_ei;
> +
> +
>  	/* ilk-only ips/rps state. Everything in here is protected by the global
>  	 * mchdev_lock in intel_pm.c */
>  	struct intel_ilk_power_mgmt ips;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 300f127..4b421b4 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1121,6 +1121,132 @@ void gen6_set_pm_mask(struct drm_i915_private *dev_priv,
>  	}
>  }
>  
> +static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
> +				struct  intel_rps_ei_calc *rps_ei)
> +{
> +	u32 cz_ts, cz_freq_khz;
> +	u32 render_count, media_count;
> +	u32 elapsed_render, elapsed_media, elapsed_time;
> +	u32 residency = 0;
> +
> +	cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
> +	cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
> +
> +	render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
> +	media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
> +
> +	if (rps_ei->cz_ts_ei == 0) {
> +		rps_ei->cz_ts_ei = cz_ts;
> +		rps_ei->render_ei_c0 = render_count;
> +		rps_ei->media_ei_c0 = media_count;
> +
> +		return dev_priv->rps.cur_freq;
> +	}
> +
> +	elapsed_time = cz_ts - rps_ei->cz_ts_ei;
> +	rps_ei->cz_ts_ei = cz_ts;
> +
> +	elapsed_render = render_count - rps_ei->render_ei_c0;
> +	rps_ei->render_ei_c0 = render_count;
> +
> +	elapsed_media = media_count - rps_ei->media_ei_c0;
> +	rps_ei->media_ei_c0 = media_count;
> +
> +	/* Convert all the counters into common unit of milli sec */
> +	elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
> +	elapsed_render /=  cz_freq_khz;
> +	elapsed_media /= cz_freq_khz;
> +
> +	/*
> +	 * Calculate overall C0 residency percentage
> +	 * only if elapsed time is non zero
> +	 */
> +	if (elapsed_time) {
> +		residency =
> +			((max(elapsed_render, elapsed_media) * 100)
> +				/ elapsed_time);
> +	}
> +
> +	return residency;
> +}
> +
> +
> +/**
> + * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
> + * busy-ness calculated from C0 counters of render & media power wells
> + * @dev_priv: DRM device private
> + *
> + */
> +static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
> +{
> +	u32 residency_C0_up = 0, residency_C0_down = 0;
> +	u8 new_delay, adj;
> +
> +	dev_priv->rps.ei_interrupt_count++;
> +
> +	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
> +
> +
> +	if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
> +		vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
> +		vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
> +		return dev_priv->rps.cur_freq;
> +	}
> +
> +
> +	/*
> +	 * To down throttle, C0 residency should be less than down threshold
> +	 * for continous EI intervals. So calculate down EI counters
> +	 * once in VLV_INT_COUNT_FOR_DOWN_EI
> +	 */
> +	if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
> +
> +		dev_priv->rps.ei_interrupt_count = 0;
> +
> +		residency_C0_down = vlv_c0_residency(dev_priv,
> +						&dev_priv->rps_down_ei);
> +	} else {
> +		residency_C0_up = vlv_c0_residency(dev_priv,
> +						&dev_priv->rps_up_ei);
> +	}
> +
> +	new_delay = dev_priv->rps.cur_freq;
> +
> +	adj = dev_priv->rps.last_adj;
> +	/* C0 residency is greater than UP threshold. Increase Frequency */
> +	if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
> +		if (adj > 0)
> +			adj *= 2;
> +		else
> +			adj = 1;
> +
> +		if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
> +			new_delay = dev_priv->rps.cur_freq + adj;
> +
> +		/*
> +		 * For better performance, jump directly
> +		 * to RPe if we're below it.
> +		 */
> +		if (new_delay < dev_priv->rps.efficient_freq)
> +			new_delay = dev_priv->rps.efficient_freq;
> +
> +	} else if (!dev_priv->rps.ei_interrupt_count &&
> +			(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
> +		if (adj < 0)
> +			adj *= 2;
> +		else
> +			adj = -1;
> +		/*
> +		 * This means, C0 residency is less than down threshold over
> +		 * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
> +		 */
> +		if (dev_priv->rps.cur_freq > dev_priv->rps.max_freq_softlimit)
> +			new_delay = dev_priv->rps.cur_freq + adj;
> +	}
> +
> +	return new_delay;
> +}
> +
>  static void gen6_pm_rps_work(struct work_struct *work)
>  {
>  	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
> @@ -1163,6 +1289,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
>  		else
>  			new_delay = dev_priv->rps.min_freq_softlimit;
>  		adj = 0;
> +	} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> +		new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
>  	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
>  		if (adj < 0)
>  			adj *= 2;
> @@ -3053,6 +3181,7 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
>  			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
>  
>  		dev_priv->pm_irq_mask = 0xffffffff;
> +
>  		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
>  		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
>  		I915_WRITE(GEN6_PMIER, pm_irqs);
> @@ -4095,7 +4224,11 @@ void intel_irq_init(struct drm_device *dev)
>  	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
>  
>  	/* Let's track the enabled rps events */
> -	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
> +	if (IS_VALLEYVIEW(dev))
> +		/* WAUseRC0ResidenncyTurbo:VLV */

Looks like you forgot to fix the w/a comment.

Otherwise the patch looks good to me, so if you fix those two small
issues you can add:
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

> +		dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
> +	else
> +		dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
>  
>  	setup_timer(&dev_priv->gpu_error.hangcheck_timer,
>  		    i915_hangcheck_elapsed,
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 927a7c1..e334bf1 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -506,6 +506,7 @@ enum punit_power_well {
>  #define PUNIT_REG_GPU_FREQ_STS			0xd8
>  #define   GENFREQSTATUS				(1<<0)
>  #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ		0xdc
> +#define PUNIT_REG_CZ_TIMESTAMP			0xce
>  
>  #define PUNIT_FUSE_BUS2				0xf6 /* bits 47:40 */
>  #define PUNIT_FUSE_BUS1				0xf5 /* bits 55:48 */
> @@ -521,6 +522,11 @@ enum punit_power_well {
>  #define   FB_FMAX_VMIN_FREQ_LO_SHIFT		27
>  #define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000
>  
> +#define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
> +#define VLV_RP_UP_EI_THRESHOLD			90
> +#define VLV_RP_DOWN_EI_THRESHOLD		70
> +#define VLV_INT_COUNT_FOR_DOWN_EI		5
> +
>  /* vlv2 north clock has */
>  #define CCK_FUSE_REG				0x8
>  #define  CCK_FUSE_HPLL_FREQ_MASK		0x3
> @@ -4984,6 +4990,7 @@ enum punit_power_well {
>  #define  VLV_GTLC_PW_STATUS			0x130094
>  #define VLV_GTLC_PW_RENDER_STATUS_MASK		0x80
>  #define VLV_GTLC_PW_MEDIA_STATUS_MASK		0x20
> +#define VLV_GTLC_SURVIVABILITY_REG              0x130098
>  #define  FORCEWAKE_MT				0xa188 /* multi-threaded */
>  #define   FORCEWAKE_KERNEL			0x1
>  #define   FORCEWAKE_USER			0x2
> @@ -5112,12 +5119,15 @@ enum punit_power_well {
>  #define GEN6_GT_GFX_RC6_LOCKED			0x138104
>  #define VLV_COUNTER_CONTROL			0x138104
>  #define   VLV_COUNT_RANGE_HIGH			(1<<15)
> +#define   VLV_MEDIA_RC0_COUNT_EN		(1<<5)
> +#define   VLV_RENDER_RC0_COUNT_EN		(1<<4)
>  #define   VLV_MEDIA_RC6_COUNT_EN		(1<<1)
>  #define   VLV_RENDER_RC6_COUNT_EN		(1<<0)
>  #define GEN6_GT_GFX_RC6				0x138108
>  #define GEN6_GT_GFX_RC6p			0x13810C
>  #define GEN6_GT_GFX_RC6pp			0x138110
> -
> +#define VLV_RENDER_C0_COUNT_REG		0x138118
> +#define VLV_MEDIA_C0_COUNT_REG			0x13811C
>  #define GEN6_PCODE_MAILBOX			0x138124
>  #define   GEN6_PCODE_READY			(1<<31)
>  #define   GEN6_READ_OC_PARAMS			0xc
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index b66a43b..30730be 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3097,9 +3097,13 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>  				~VLV_GFX_CLK_FORCE_ON_BIT);
>  
>  	/* Unmask Up interrupts */
> -	dev_priv->rps.rp_up_masked = true;
> -	gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
> +	if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
> +		I915_WRITE(GEN6_PMINTRMSK, ~dev_priv->pm_rps_events);
> +	else {
> +		dev_priv->rps.rp_up_masked = true;
> +		gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
>  						dev_priv->rps.min_freq_softlimit);
> +	}
>  }
>  
>  void gen6_rps_idle(struct drm_i915_private *dev_priv)
> @@ -3619,6 +3623,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
>  	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
>  
>  	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> +	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
>  
>  	I915_WRITE(GEN6_RP_CONTROL,
>  		   GEN6_RP_MEDIA_TURBO |
> @@ -3639,9 +3644,11 @@ static void valleyview_enable_rps(struct drm_device *dev)
>  
>  	/* allows RC6 residency counter to work */
>  	I915_WRITE(VLV_COUNTER_CONTROL,
> -		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
> +		   _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
> +				      VLV_RENDER_RC0_COUNT_EN |
>  				      VLV_MEDIA_RC6_COUNT_EN |
>  				      VLV_RENDER_RC6_COUNT_EN));
> +
>  	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
>  		rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
>  
> -- 
> 1.9.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v5] drm/i915/vlv: WA for Turbo and RC6 to work together.
  2014-03-28 12:53                   ` Ville Syrjälä
@ 2014-03-28 13:06                     ` Chris Wilson
  2014-03-30  6:27                       ` Deepak S
  0 siblings, 1 reply; 30+ messages in thread
From: Chris Wilson @ 2014-03-28 13:06 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx

On Fri, Mar 28, 2014 at 02:53:48PM +0200, Ville Syrjälä wrote:
> On Thu, Mar 27, 2014 at 12:05:01PM +0530, deepak.s@linux.intel.com wrote:
> > @@ -1403,6 +1411,13 @@ typedef struct drm_i915_private {
> >  	/* gen6+ rps state */
> >  	struct intel_gen6_power_mgmt rps;
> >  
> > +	/* rps wa up ei calculation */
> > +	struct intel_rps_ei_calc rps_up_ei;
> > +
> > +	/* rps wa down ei calculation */
> > +	struct intel_rps_ei_calc rps_down_ei;

I could have sworn there was a field for holding all the interesting rps
state together. 
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v5] drm/i915/vlv: WA for Turbo and RC6 to work together.
  2014-03-28 13:06                     ` Chris Wilson
@ 2014-03-30  6:27                       ` Deepak S
  0 siblings, 0 replies; 30+ messages in thread
From: Deepak S @ 2014-03-30  6:27 UTC (permalink / raw)
  To: Chris Wilson, Ville Syrjälä, intel-gfx


On Friday 28 March 2014 06:36 PM, Chris Wilson wrote:
> On Fri, Mar 28, 2014 at 02:53:48PM +0200, Ville Syrjälä wrote:
>> On Thu, Mar 27, 2014 at 12:05:01PM +0530, deepak.s@linux.intel.com wrote:
>>> @@ -1403,6 +1411,13 @@ typedef struct drm_i915_private {
>>>   	/* gen6+ rps state */
>>>   	struct intel_gen6_power_mgmt rps;
>>>   
>>> +	/* rps wa up ei calculation */
>>> +	struct intel_rps_ei_calc rps_up_ei;
>>> +
>>> +	/* rps wa down ei calculation */
>>> +	struct intel_rps_ei_calc rps_down_ei;
> I could have sworn there was a field for holding all the interesting rps
> state together.
> -Chris
>
Hi Chris,

Earlier i was using the rps structure to hold the wa rps state, but there was not of duplicated code to avoid that i created a separate structure.
We can still re-factor and use rps structure to hold the wa state. Let me know if we need to created a separate patch to re-factor or add to the WA patch itself.

I a thinking of adding a new patch on top this.

Thanks

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH v6] drm/i915/vlv: WA for Turbo and RC6 to work together.
  2014-03-27  6:35                 ` [PATCH v5] drm/i915/vlv: WA for Turbo and RC6 to work together deepak.s
  2014-03-28 12:53                   ` Ville Syrjälä
@ 2014-03-30  6:28                   ` deepak.s
  2014-05-13 22:12                     ` Jesse Barnes
  1 sibling, 1 reply; 30+ messages in thread
From: deepak.s @ 2014-03-30  6:28 UTC (permalink / raw)
  To: intel-gfx

From: Deepak S <deepak.s@linux.intel.com>

With RC6 enabled, BYT has an HW issue in determining the right
Gfx busyness.
WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
on increasing/decreasing the freq. This logic will monitor C0
counters of render/media power-wells over EI period and takes
necessary action based on these values

v2: Refactor duplicate code. (Ville)

v3: Reformat the comments. (Ville)

v4: Enable required counters and remove unwanted code (Ville)

v5: Added frequency change acceleration support and remove kernel-doc
style comments. (Ville)

v6: Updated comment section and Fix w/a comment. (Ville)

Signed-off-by: Deepak S <deepak.s@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |  15 +++++
 drivers/gpu/drm/i915/i915_irq.c | 135 +++++++++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_reg.h |  12 +++-
 drivers/gpu/drm/i915/intel_pm.c |  13 +++-
 4 files changed, 170 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7c212f3..c48ea93 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -816,6 +816,12 @@ struct i915_suspend_saved_registers {
 	u32 savePCH_PORT_HOTPLUG;
 };
 
+struct intel_rps_ei_calc {
+	u32 cz_ts_ei;
+	u32 render_ei_c0;
+	u32 media_ei_c0;
+};
+
 struct intel_gen6_power_mgmt {
 	/* work and pm_iir are protected by dev_priv->irq_lock */
 	struct work_struct work;
@@ -843,6 +849,8 @@ struct intel_gen6_power_mgmt {
 	bool rp_up_masked;
 	bool rp_down_masked;
 
+	u32 ei_interrupt_count;
+
 	int last_adj;
 	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 
@@ -1403,6 +1411,13 @@ typedef struct drm_i915_private {
 	/* gen6+ rps state */
 	struct intel_gen6_power_mgmt rps;
 
+	/* rps wa up ei calculation */
+	struct intel_rps_ei_calc rps_up_ei;
+
+	/* rps wa down ei calculation */
+	struct intel_rps_ei_calc rps_down_ei;
+
+
 	/* ilk-only ips/rps state. Everything in here is protected by the global
 	 * mchdev_lock in intel_pm.c */
 	struct intel_ilk_power_mgmt ips;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 300f127..341843d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1121,6 +1121,132 @@ void gen6_set_pm_mask(struct drm_i915_private *dev_priv,
 	}
 }
 
+static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
+				struct  intel_rps_ei_calc *rps_ei)
+{
+	u32 cz_ts, cz_freq_khz;
+	u32 render_count, media_count;
+	u32 elapsed_render, elapsed_media, elapsed_time;
+	u32 residency = 0;
+
+	cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
+	cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
+
+	render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
+	media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
+
+	if (rps_ei->cz_ts_ei == 0) {
+		rps_ei->cz_ts_ei = cz_ts;
+		rps_ei->render_ei_c0 = render_count;
+		rps_ei->media_ei_c0 = media_count;
+
+		return dev_priv->rps.cur_freq;
+	}
+
+	elapsed_time = cz_ts - rps_ei->cz_ts_ei;
+	rps_ei->cz_ts_ei = cz_ts;
+
+	elapsed_render = render_count - rps_ei->render_ei_c0;
+	rps_ei->render_ei_c0 = render_count;
+
+	elapsed_media = media_count - rps_ei->media_ei_c0;
+	rps_ei->media_ei_c0 = media_count;
+
+	/* Convert all the counters into common unit of milli sec */
+	elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
+	elapsed_render /=  cz_freq_khz;
+	elapsed_media /= cz_freq_khz;
+
+	/*
+	 * Calculate overall C0 residency percentage
+	 * only if elapsed time is non zero
+	 */
+	if (elapsed_time) {
+		residency =
+			((max(elapsed_render, elapsed_media) * 100)
+				/ elapsed_time);
+	}
+
+	return residency;
+}
+
+
+/**
+ * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
+ * busy-ness calculated from C0 counters of render & media power wells
+ * @dev_priv: DRM device private
+ *
+ */
+static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
+{
+	u32 residency_C0_up = 0, residency_C0_down = 0;
+	u8 new_delay, adj;
+
+	dev_priv->rps.ei_interrupt_count++;
+
+	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+
+
+	if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
+		vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
+		vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
+		return dev_priv->rps.cur_freq;
+	}
+
+
+	/*
+	 * To down throttle, C0 residency should be less than down threshold
+	 * for continous EI intervals. So calculate down EI counters
+	 * once in VLV_INT_COUNT_FOR_DOWN_EI
+	 */
+	if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
+
+		dev_priv->rps.ei_interrupt_count = 0;
+
+		residency_C0_down = vlv_c0_residency(dev_priv,
+						&dev_priv->rps_down_ei);
+	} else {
+		residency_C0_up = vlv_c0_residency(dev_priv,
+						&dev_priv->rps_up_ei);
+	}
+
+	new_delay = dev_priv->rps.cur_freq;
+
+	adj = dev_priv->rps.last_adj;
+	/* C0 residency is greater than UP threshold. Increase Frequency */
+	if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
+		if (adj > 0)
+			adj *= 2;
+		else
+			adj = 1;
+
+		if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
+			new_delay = dev_priv->rps.cur_freq + adj;
+
+		/*
+		 * For better performance, jump directly
+		 * to RPe if we're below it.
+		 */
+		if (new_delay < dev_priv->rps.efficient_freq)
+			new_delay = dev_priv->rps.efficient_freq;
+
+	} else if (!dev_priv->rps.ei_interrupt_count &&
+			(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
+		if (adj < 0)
+			adj *= 2;
+		else
+			adj = -1;
+		/*
+		 * This means, C0 residency is less than down threshold over
+		 * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
+		 */
+		if (dev_priv->rps.cur_freq > dev_priv->rps.max_freq_softlimit)
+			new_delay = dev_priv->rps.cur_freq + adj;
+	}
+
+	return new_delay;
+}
+
 static void gen6_pm_rps_work(struct work_struct *work)
 {
 	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
@@ -1163,6 +1289,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
 		else
 			new_delay = dev_priv->rps.min_freq_softlimit;
 		adj = 0;
+	} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
+		new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
 	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
 		if (adj < 0)
 			adj *= 2;
@@ -3053,6 +3181,7 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
 			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
 
 		dev_priv->pm_irq_mask = 0xffffffff;
+
 		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
 		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
 		I915_WRITE(GEN6_PMIER, pm_irqs);
@@ -4095,7 +4224,11 @@ void intel_irq_init(struct drm_device *dev)
 	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
 
 	/* Let's track the enabled rps events */
-	dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
+	if (IS_VALLEYVIEW(dev))
+		/* WaGsvRC0ResidenncyMethod:VLV */
+		dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
+	else
+		dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
 
 	setup_timer(&dev_priv->gpu_error.hangcheck_timer,
 		    i915_hangcheck_elapsed,
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 927a7c1..e334bf1 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -506,6 +506,7 @@ enum punit_power_well {
 #define PUNIT_REG_GPU_FREQ_STS			0xd8
 #define   GENFREQSTATUS				(1<<0)
 #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ		0xdc
+#define PUNIT_REG_CZ_TIMESTAMP			0xce
 
 #define PUNIT_FUSE_BUS2				0xf6 /* bits 47:40 */
 #define PUNIT_FUSE_BUS1				0xf5 /* bits 55:48 */
@@ -521,6 +522,11 @@ enum punit_power_well {
 #define   FB_FMAX_VMIN_FREQ_LO_SHIFT		27
 #define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000
 
+#define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
+#define VLV_RP_UP_EI_THRESHOLD			90
+#define VLV_RP_DOWN_EI_THRESHOLD		70
+#define VLV_INT_COUNT_FOR_DOWN_EI		5
+
 /* vlv2 north clock has */
 #define CCK_FUSE_REG				0x8
 #define  CCK_FUSE_HPLL_FREQ_MASK		0x3
@@ -4984,6 +4990,7 @@ enum punit_power_well {
 #define  VLV_GTLC_PW_STATUS			0x130094
 #define VLV_GTLC_PW_RENDER_STATUS_MASK		0x80
 #define VLV_GTLC_PW_MEDIA_STATUS_MASK		0x20
+#define VLV_GTLC_SURVIVABILITY_REG              0x130098
 #define  FORCEWAKE_MT				0xa188 /* multi-threaded */
 #define   FORCEWAKE_KERNEL			0x1
 #define   FORCEWAKE_USER			0x2
@@ -5112,12 +5119,15 @@ enum punit_power_well {
 #define GEN6_GT_GFX_RC6_LOCKED			0x138104
 #define VLV_COUNTER_CONTROL			0x138104
 #define   VLV_COUNT_RANGE_HIGH			(1<<15)
+#define   VLV_MEDIA_RC0_COUNT_EN		(1<<5)
+#define   VLV_RENDER_RC0_COUNT_EN		(1<<4)
 #define   VLV_MEDIA_RC6_COUNT_EN		(1<<1)
 #define   VLV_RENDER_RC6_COUNT_EN		(1<<0)
 #define GEN6_GT_GFX_RC6				0x138108
 #define GEN6_GT_GFX_RC6p			0x13810C
 #define GEN6_GT_GFX_RC6pp			0x138110
-
+#define VLV_RENDER_C0_COUNT_REG		0x138118
+#define VLV_MEDIA_C0_COUNT_REG			0x13811C
 #define GEN6_PCODE_MAILBOX			0x138124
 #define   GEN6_PCODE_READY			(1<<31)
 #define   GEN6_READ_OC_PARAMS			0xc
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index b66a43b..30730be 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3097,9 +3097,13 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 				~VLV_GFX_CLK_FORCE_ON_BIT);
 
 	/* Unmask Up interrupts */
-	dev_priv->rps.rp_up_masked = true;
-	gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
+	if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
+		I915_WRITE(GEN6_PMINTRMSK, ~dev_priv->pm_rps_events);
+	else {
+		dev_priv->rps.rp_up_masked = true;
+		gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
 						dev_priv->rps.min_freq_softlimit);
+	}
 }
 
 void gen6_rps_idle(struct drm_i915_private *dev_priv)
@@ -3619,6 +3623,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
 	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
 
 	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
 
 	I915_WRITE(GEN6_RP_CONTROL,
 		   GEN6_RP_MEDIA_TURBO |
@@ -3639,9 +3644,11 @@ static void valleyview_enable_rps(struct drm_device *dev)
 
 	/* allows RC6 residency counter to work */
 	I915_WRITE(VLV_COUNTER_CONTROL,
-		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+		   _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
+				      VLV_RENDER_RC0_COUNT_EN |
 				      VLV_MEDIA_RC6_COUNT_EN |
 				      VLV_RENDER_RC6_COUNT_EN));
+
 	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
 		rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
 
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH v6] drm/i915/vlv: WA for Turbo and RC6 to work together.
  2014-03-30  6:28                   ` [PATCH v6] " deepak.s
@ 2014-05-13 22:12                     ` Jesse Barnes
  0 siblings, 0 replies; 30+ messages in thread
From: Jesse Barnes @ 2014-05-13 22:12 UTC (permalink / raw)
  To: deepak.s; +Cc: intel-gfx

On Sun, 30 Mar 2014 11:58:48 +0530
deepak.s@linux.intel.com wrote:

> @@ -843,6 +849,8 @@ struct intel_gen6_power_mgmt {
>  	bool rp_up_masked;
>  	bool rp_down_masked;
>  
> +	u32 ei_interrupt_count;
> +
>  	int last_adj;
>  	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
>  
> @@ -1403,6 +1411,13 @@ typedef struct drm_i915_private {
>  	/* gen6+ rps state */
>  	struct intel_gen6_power_mgmt rps;
>  
> +	/* rps wa up ei calculation */
> +	struct intel_rps_ei_calc rps_up_ei;
> +
> +	/* rps wa down ei calculation */
> +	struct intel_rps_ei_calc rps_down_ei;
> +

I think Chris meant that these bits belonged in intel_gen6_power_mgmt
too.

Other than that it looks like Ville has given this his r-b so it ought
to be fine.

-- 
Jesse Barnes, Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2014-05-13 22:37 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-03-03  6:05 [PATCH v2] drm/i915/vlv: WA for Turbo and RC6 to work together deepak.s
2014-03-04 14:20 ` S, Deepak
2014-03-05 12:11 ` Ville Syrjälä
2014-03-05 12:30   ` S, Deepak
2014-03-13 16:00     ` [PATCH v3 0/3] " deepak.s
2014-03-13 16:00       ` [PATCH 1/3] drm/i915: Track the enabled PM interrupts in dev_priv deepak.s
2014-03-13 18:16         ` Ville Syrjälä
2014-03-13 18:43           ` S, Deepak
2014-03-13 18:59             ` Ville Syrjälä
2014-03-15 14:53               ` [PATCH v4 0/3] WA for Turbo and RC6 to work together deepak.s
2014-03-15 14:53                 ` [PATCH v2 1/3] drm/i915: Track the enabled PM interrupts in dev_priv deepak.s
2014-03-24 19:26                   ` Ville Syrjälä
2014-03-24 20:22                     ` Daniel Vetter
2014-03-15 14:53                 ` [PATCH v4 2/3] drm/i915/vlv: WA for Turbo and RC6 to work together deepak.s
2014-03-24 19:26                   ` Ville Syrjälä
2014-03-15 14:53                 ` [PATCH v3 3/3] drm/i915: Add boot paramter to control rps boost at boot time deepak.s
2014-03-24 19:27                   ` Ville Syrjälä
2014-03-27  6:35                 ` [PATCH v5] drm/i915/vlv: WA for Turbo and RC6 to work together deepak.s
2014-03-28 12:53                   ` Ville Syrjälä
2014-03-28 13:06                     ` Chris Wilson
2014-03-30  6:27                       ` Deepak S
2014-03-30  6:28                   ` [PATCH v6] " deepak.s
2014-05-13 22:12                     ` Jesse Barnes
2014-03-13 16:00       ` [PATCH v3 2/3] " deepak.s
2014-03-13 18:17         ` Ville Syrjälä
2014-03-13 18:40           ` S, Deepak
2014-03-13 18:57             ` Ville Syrjälä
2014-03-13 16:00       ` [PATCH v2 3/3] drm/i915: Add boot paramter to control rps boost at boot time deepak.s
2014-03-13 18:16         ` Ville Syrjälä
2014-03-13 18:46           ` S, Deepak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox