From mboxrd@z Thu Jan 1 00:00:00 1970 From: "S, Deepak" Subject: Re: [PATCH v2] drm/i915/vlv: WA for Turbo and RC6 to work together. Date: Tue, 04 Mar 2014 19:50:02 +0530 Message-ID: <5315E112.8090005@intel.com> References: <1393826750-32167-1-git-send-email-deepak.s@intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by gabe.freedesktop.org (Postfix) with ESMTP id 3D71BFC0B7 for ; Tue, 4 Mar 2014 06:20:15 -0800 (PST) In-Reply-To: <1393826750-32167-1-git-send-email-deepak.s@intel.com> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: intel-gfx-bounces@lists.freedesktop.org Errors-To: intel-gfx-bounces@lists.freedesktop.org To: intel-gfx@lists.freedesktop.org, =?ISO-8859-1?Q?Ville_Syrj=E4l=E4?= List-Id: intel-gfx@lists.freedesktop.org Hi Ville, Please review the patch and share the comments Thanks Deepak On 3/3/2014 11:35 AM, deepak.s@intel.com wrote: > From: Deepak S > > With RC6 enabled, BYT has an HW issue in determining the right > Gfx busyness. > WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide > on increasing/decreasing the freq. This logic will monitor C0 > counters of render/media power-wells over EI period and takes > necessary action based on these values > > v2: Refactor duplicate code. (ville) > > Signed-off-by: Deepak S > > --- > drivers/gpu/drm/i915/i915_drv.h | 19 ++++++ > drivers/gpu/drm/i915/i915_irq.c | 146 ++++++++++++++++++++++++++++++++++++++-- > drivers/gpu/drm/i915/i915_reg.h | 15 +++++ > drivers/gpu/drm/i915/intel_pm.c | 50 ++++++++++---- > 4 files changed, 213 insertions(+), 17 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 728b9c3..2baeeef 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -957,6 +957,12 @@ struct i915_suspend_saved_registers { > u32 savePCH_PORT_HOTPLUG; > }; > > +struct intel_rps_ei_calc { > + u32 cz_ts_ei; > + u32 render_ei_c0; > + u32 media_ei_c0; > +}; > + > struct intel_gen6_power_mgmt { > /* work and pm_iir are protected by dev_priv->irq_lock */ > struct work_struct work; > @@ -969,10 +975,16 @@ struct intel_gen6_power_mgmt { > u8 rp1_delay; > u8 rp0_delay; > u8 hw_max; > + u8 hw_min; > > bool rp_up_masked; > bool rp_down_masked; > > + u32 cz_freq; > + u32 ei_interrupt_count; > + > + bool use_RC0_residency_for_turbo; > + > int last_adj; > enum { LOW_POWER, BETWEEN, HIGH_POWER } power; > > @@ -1531,6 +1543,13 @@ typedef struct drm_i915_private { > /* gen6+ rps state */ > struct intel_gen6_power_mgmt rps; > > + /* rps wa up ei calculation */ > + struct intel_rps_ei_calc rps_up_ei; > + > + /* rps wa down ei calculation */ > + struct intel_rps_ei_calc rps_down_ei; > + > + > /* ilk-only ips/rps state. Everything in here is protected by the global > * mchdev_lock in intel_pm.c */ > struct intel_ilk_power_mgmt ips; > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 56edff3..93b6ebf 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1023,6 +1023,120 @@ void gen6_set_pm_mask(struct drm_i915_private *dev_priv, > } > } > > +static u32 vlv_c0_residency(struct drm_i915_private *dev_priv, > + struct intel_rps_ei_calc *rps_ei) > +{ > + u32 cz_ts, cz_freq_khz; > + u32 render_count, media_count; > + u32 elapsed_render, elapsed_media, elapsed_time; > + u32 residency = 0; > + > + cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP); > + cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4); > + > + render_count = I915_READ(VLV_RENDER_C0_COUNT_REG); > + media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG); > + > + if (rps_ei->cz_ts_ei == 0) { > + rps_ei->cz_ts_ei = cz_ts; > + rps_ei->render_ei_c0 = render_count; > + rps_ei->media_ei_c0 = media_count; > + > + return dev_priv->rps.cur_delay; > + } > + > + elapsed_time = cz_ts - rps_ei->cz_ts_ei; > + rps_ei->cz_ts_ei = cz_ts; > + > + elapsed_render = render_count - rps_ei->render_ei_c0; > + rps_ei->render_ei_c0 = render_count; > + > + elapsed_media = media_count - rps_ei->media_ei_c0; > + rps_ei->media_ei_c0 = media_count; > + > + /* Convert all the counters into common unit of milli sec */ > + elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC; > + elapsed_render /= cz_freq_khz; > + elapsed_media /= cz_freq_khz; > + > + /* Calculate overall C0 residency percentage only > + * if elapsed time is non zero > + */ > + if (elapsed_time) { > + residency = > + ((max(elapsed_render, elapsed_media) * 100) > + / elapsed_time); > + } > + > + return residency; > +} > + > + > +/** > + * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU > + * busy-ness calculated from C0 counters of render & media power wells > + * @dev_priv: DRM device private > + * > + */ > +static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv) > +{ > + u32 residency_C0_up = 0, residency_C0_down = 0; > + u8 new_delay; > + > + dev_priv->rps.ei_interrupt_count++; > + > + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); > + > + > + if (dev_priv->rps_up_ei.cz_ts_ei == 0) { > + vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei); > + vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei); > + return dev_priv->rps.cur_delay; > + } > + > + > + /* To down throttle, C0 residency should be less than down threshold > + * for continous EI intervals. So calculate down EI counters > + * once in VLV_INT_COUNT_FOR_DOWN_EI > + */ > + if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) { > + > + dev_priv->rps.ei_interrupt_count = 0; > + > + residency_C0_down = vlv_c0_residency(dev_priv, > + &dev_priv->rps_down_ei); > + } else { > + residency_C0_up = vlv_c0_residency(dev_priv, > + &dev_priv->rps_up_ei); > + } > + > + new_delay = dev_priv->rps.cur_delay; > + > + /* C0 residency is greater than UP threshold. Increase Frequency */ > + if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) { > + > + if (dev_priv->rps.cur_delay < dev_priv->rps.max_delay) > + new_delay = dev_priv->rps.cur_delay + 1; > + > + /* > + * For better performance, jump directly > + * to RPe if we're below it. > + */ > + if (new_delay < dev_priv->rps.rpe_delay) > + new_delay = dev_priv->rps.rpe_delay; > + > + } else if (!dev_priv->rps.ei_interrupt_count && > + (residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) { > + /* This means, C0 residency is less than down threshold over > + * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq > + */ > + if (dev_priv->rps.cur_delay > dev_priv->rps.min_delay) > + new_delay = dev_priv->rps.cur_delay - 1; > + } > + > + return new_delay; > +} > + > static void gen6_pm_rps_work(struct work_struct *work) > { > drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t, > @@ -1034,13 +1148,16 @@ static void gen6_pm_rps_work(struct work_struct *work) > pm_iir = dev_priv->rps.pm_iir; > dev_priv->rps.pm_iir = 0; > /* Make sure not to corrupt PMIMR state used by ringbuffer code */ > - snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS); > + if (dev_priv->rps.use_RC0_residency_for_turbo) > + snb_enable_pm_irq(dev_priv, GEN6_PM_RP_UP_EI_EXPIRED); > + else > + snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS); > spin_unlock_irq(&dev_priv->irq_lock); > > /* Make sure we didn't queue anything we're not going to process. */ > - WARN_ON(pm_iir & ~GEN6_PM_RPS_EVENTS); > + WARN_ON(pm_iir & ~(GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED)); > > - if ((pm_iir & GEN6_PM_RPS_EVENTS) == 0) > + if ((pm_iir & (GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED)) == 0) > return; > > mutex_lock(&dev_priv->rps.hw_lock); > @@ -1065,6 +1182,8 @@ static void gen6_pm_rps_work(struct work_struct *work) > else > new_delay = dev_priv->rps.min_delay; > adj = 0; > + } else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) { > + new_delay = vlv_calc_delay_from_C0_counters(dev_priv); > } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { > if (adj < 0) > adj *= 2; > @@ -1466,6 +1585,16 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) > queue_work(dev_priv->wq, &dev_priv->rps.work); > } > > + if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) { > + spin_lock(&dev_priv->irq_lock); > + dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RP_UP_EI_EXPIRED; > + snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RP_UP_EI_EXPIRED); > + spin_unlock(&dev_priv->irq_lock); > + DRM_DEBUG_DRIVER("\nQueueing RPS Work - RC6 WA Turbo"); > + > + queue_work(dev_priv->wq, &dev_priv->rps.work); > + } > + > if (HAS_VEBOX(dev_priv->dev)) { > if (pm_iir & PM_VEBOX_USER_INTERRUPT) > notify_ring(dev_priv->dev, &dev_priv->ring[VECS]); > @@ -1546,7 +1675,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg) > if (pipe_stats[0] & PIPE_GMBUS_INTERRUPT_STATUS) > gmbus_irq_handler(dev); > > - if (pm_iir) > + if (pm_iir & (GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED)) > gen6_rps_irq_handler(dev_priv, pm_iir); > > I915_WRITE(GTIIR, gt_iir); > @@ -2861,6 +2990,15 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev) > pm_irqs |= PM_VEBOX_USER_INTERRUPT; > > dev_priv->pm_irq_mask = 0xffffffff; > + > + if (dev_priv->rps.use_RC0_residency_for_turbo) { > + dev_priv->pm_irq_mask &= ~GEN6_PM_RP_UP_EI_EXPIRED; > + pm_irqs |= GEN6_PM_RP_UP_EI_EXPIRED; > + } else { > + dev_priv->pm_irq_mask &= ~GEN6_PM_RPS_EVENTS; > + pm_irqs |= GEN6_PM_RPS_EVENTS; > + } > + > I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR)); > I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask); > I915_WRITE(GEN6_PMIER, pm_irqs); > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index f73a49d..e58b37e 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -391,6 +391,7 @@ > #define PUNIT_REG_GPU_FREQ_STS 0xd8 > #define GENFREQSTATUS (1<<0) > #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ 0xdc > +#define PUNIT_REG_CZ_TIMESTAMP 0xce > > #define PUNIT_FUSE_BUS2 0xf6 /* bits 47:40 */ > #define PUNIT_FUSE_BUS1 0xf5 /* bits 55:48 */ > @@ -406,6 +407,11 @@ > #define FB_FMAX_VMIN_FREQ_LO_SHIFT 27 > #define FB_FMAX_VMIN_FREQ_LO_MASK 0xf8000000 > > +#define VLV_CZ_CLOCK_TO_MILLI_SEC 100000 > +#define VLV_RP_UP_EI_THRESHOLD 90 > +#define VLV_RP_DOWN_EI_THRESHOLD 70 > +#define VLV_INT_COUNT_FOR_DOWN_EI 5 > + > /* vlv2 north clock has */ > #define CCK_FUSE_REG 0x8 > #define CCK_FUSE_HPLL_FREQ_MASK 0x3 > @@ -4857,6 +4863,7 @@ > #define VLV_GTLC_PW_STATUS 0x130094 > #define VLV_GTLC_PW_RENDER_STATUS_MASK 0x80 > #define VLV_GTLC_PW_MEDIA_STATUS_MASK 0x20 > +#define VLV_GTLC_SURVIVABILITY_REG 0x130098 > #define FORCEWAKE_MT 0xa188 /* multi-threaded */ > #define FORCEWAKE_KERNEL 0x1 > #define FORCEWAKE_USER 0x2 > @@ -4864,6 +4871,11 @@ > #define ECOBUS 0xa180 > #define FORCEWAKE_MT_ENABLE (1<<5) > > +#define VLV_GFX_CLK_FORCE_ON_BIT (1<<2) > +#define VLV_GFX_CLK_STATUS_BIT (1<<3) > + > +#define VLV_RC_COUNTER_CONTROL 0xFFFF00FF > + > #define GTFIFODBG 0x120000 > #define GT_FIFO_SBDROPERR (1<<6) > #define GT_FIFO_BLOBDROPERR (1<<5) > @@ -4979,6 +4991,9 @@ > #define VLV_GFX_CLK_STATUS_BIT (1<<3) > #define VLV_GFX_CLK_FORCE_ON_BIT (1<<2) > > +#define VLV_RENDER_C0_COUNT_REG 0x138118 > +#define VLV_MEDIA_C0_COUNT_REG 0x13811C > + > #define GEN6_GT_GFX_RC6_LOCKED 0x138104 > #define VLV_COUNTER_CONTROL 0x138104 > #define VLV_COUNT_RANGE_HIGH (1<<15) > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index 9ab3883..8002ac7 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -3084,10 +3084,14 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) > I915_READ(VLV_GTLC_SURVIVABILITY_REG) & > ~VLV_GFX_CLK_FORCE_ON_BIT); > > - /* Unmask Up interrupts */ > - dev_priv->rps.rp_up_masked = true; > - gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD, > + /* Unmask Turbo interrupts */ > + if (dev_priv->rps.use_RC0_residency_for_turbo) > + I915_WRITE(GEN6_PMINTRMSK, ~GEN6_PM_RP_UP_EI_EXPIRED); > + else { > + dev_priv->rps.rp_up_masked = true; > + gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD, > dev_priv->rps.min_delay); > + } > } > > void gen6_rps_idle(struct drm_i915_private *dev_priv) > @@ -3148,7 +3152,13 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev) > struct drm_i915_private *dev_priv = dev->dev_private; > > I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); > - I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & ~GEN6_PM_RPS_EVENTS); > + if (dev_priv->rps.use_RC0_residency_for_turbo) { > + I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & > + ~GEN6_PM_RP_UP_EI_EXPIRED); > + } else { > + I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & > + ~GEN6_PM_RPS_EVENTS); > + } > /* Complete PM interrupt masking here doesn't race with the rps work > * item again unmasking PM interrupts because that is using a different > * register (PMIMR) to mask PM interrupts. The only risk is in leaving > @@ -3158,7 +3168,10 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev) > dev_priv->rps.pm_iir = 0; > spin_unlock_irq(&dev_priv->irq_lock); > > - I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS); > + if (dev_priv->rps.use_RC0_residency_for_turbo) > + I915_WRITE(GEN6_PMIIR, GEN6_PM_RP_UP_EI_EXPIRED); > + else > + I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS); > } > > static void gen6_disable_rps(struct drm_device *dev) > @@ -3228,19 +3241,29 @@ static void gen6_enable_rps_interrupts(struct drm_device *dev) > struct drm_i915_private *dev_priv = dev->dev_private; > u32 enabled_intrs; > > + /* Clear out any stale interrupts first */ > spin_lock_irq(&dev_priv->irq_lock); > WARN_ON(dev_priv->rps.pm_iir); > - snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS); > - I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS); > + if (dev_priv->rps.use_RC0_residency_for_turbo) { > + snb_enable_pm_irq(dev_priv, GEN6_PM_RP_UP_EI_EXPIRED); > + I915_WRITE(GEN6_PMIIR, GEN6_PM_RP_UP_EI_EXPIRED); > + } else { > + snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS); > + I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS); > + } > spin_unlock_irq(&dev_priv->irq_lock); > > /* only unmask PM interrupts we need. Mask all others. */ > - enabled_intrs = GEN6_PM_RPS_EVENTS; > + if (dev_priv->rps.use_RC0_residency_for_turbo) > + enabled_intrs = GEN6_PM_RP_UP_EI_EXPIRED; > + else > + enabled_intrs = GEN6_PM_RPS_EVENTS; > > /* IVB and SNB hard hangs on looping batchbuffer > * if GEN6_PM_UP_EI_EXPIRED is masked. > */ > - if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev)) > + if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev) && > + !dev_priv->rps.use_RC0_residency_for_turbo) > enabled_intrs |= GEN6_PM_RP_UP_EI_EXPIRED; > > I915_WRITE(GEN6_PMINTRMSK, ~enabled_intrs); > @@ -3608,6 +3631,7 @@ static void valleyview_enable_rps(struct drm_device *dev) > I915_WRITE(GEN6_RP_DOWN_EI, 350000); > > I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); > + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240); > > I915_WRITE(GEN6_RP_CONTROL, > GEN6_RP_MEDIA_TURBO | > @@ -3627,10 +3651,7 @@ static void valleyview_enable_rps(struct drm_device *dev) > I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); > > /* allows RC6 residency counter to work */ > - I915_WRITE(VLV_COUNTER_CONTROL, > - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | > - VLV_MEDIA_RC6_COUNT_EN | > - VLV_RENDER_RC6_COUNT_EN)); > + I915_WRITE(VLV_COUNTER_CONTROL, VLV_RC_COUNTER_CONTROL); > if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) > rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; > > @@ -3673,6 +3694,9 @@ static void valleyview_enable_rps(struct drm_device *dev) > dev_priv->rps.rp_up_masked = false; > dev_priv->rps.rp_down_masked = false; > > + /* enable WA for RC6+turbo to work together */ > + dev_priv->rps.use_RC0_residency_for_turbo = true; > + > gen6_enable_rps_interrupts(dev); > > gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); >