All of lore.kernel.org
 help / color / mirror / Atom feed
From: Deepak S <deepak.s@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>,
	Rodrigo Vivi <rodrigo.vivi@intel.com>
Subject: Re: [PATCH 3/7] drm/i915: Improved w/a for rps on Baytrail
Date: Wed, 18 Mar 2015 13:26:02 +0530	[thread overview]
Message-ID: <55092F92.20205@linux.intel.com> (raw)
In-Reply-To: <1425654391-2126-3-git-send-email-chris@chris-wilson.co.uk>



On Friday 06 March 2015 08:36 PM, Chris Wilson wrote:
> Rewrite commit 31685c258e0b0ad6aa486c5ec001382cf8a64212
> Author: Deepak S <deepak.s@linux.intel.com>
> Date:   Thu Jul 3 17:33:01 2014 -0400
>
>      drm/i915/vlv: WA for Turbo and RC6 to work together.
>
> Other than code clarity, the major improvement is to disable the extra
> interrupts generated when idle.  However, the reclocking remains rather
> slow under the new manual regime, in particular it fails to downclock as
> quickly as desired. The second major improvement is that for certain
> workloads, like games, we need to combine render+media activity counters
> as the work of displaying the frame is split across the engines and both
> need to be taken into account when deciding the global GPU frequency as
> memory cycles are shared.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Deepak S <deepak.s@linux.intel.com>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>
> Conflicts:
> 	drivers/gpu/drm/i915/intel_pm.c
> ---
>   drivers/gpu/drm/i915/i915_irq.c      | 155 +++++++++++++----------------------
>   drivers/gpu/drm/i915/i915_reg.h      |   4 +-
>   drivers/gpu/drm/i915/intel_display.c |   2 +
>   drivers/gpu/drm/i915/intel_drv.h     |   2 +
>   drivers/gpu/drm/i915/intel_pm.c      |  22 ++++-
>   5 files changed, 81 insertions(+), 104 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 1296ce37e435..4b7b86298b37 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -997,129 +997,84 @@ static void notify_ring(struct drm_device *dev,
>   	wake_up_all(&ring->irq_queue);
>   }
>   
> -static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
> -			    struct intel_rps_ei *rps_ei)
> +static void vlv_c0_read(struct drm_i915_private *dev_priv,
> +			struct intel_rps_ei *ei)
>   {
> -	u32 cz_ts, cz_freq_khz;
> -	u32 render_count, media_count;
> -	u32 elapsed_render, elapsed_media, elapsed_time;
> -	u32 residency = 0;
> -
> -	cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
> -	cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
> -
> -	render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
> -	media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
> -
> -	if (rps_ei->cz_clock == 0) {
> -		rps_ei->cz_clock = cz_ts;
> -		rps_ei->render_c0 = render_count;
> -		rps_ei->media_c0 = media_count;
> -
> -		return dev_priv->rps.cur_freq;
> -	}
> -
> -	elapsed_time = cz_ts - rps_ei->cz_clock;
> -	rps_ei->cz_clock = cz_ts;
> +	ei->cz_clock = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
> +	ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
> +	ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
> +}
>   
> -	elapsed_render = render_count - rps_ei->render_c0;
> -	rps_ei->render_c0 = render_count;
> +static bool vlv_c0_above(struct drm_i915_private *dev_priv,
> +			 const struct intel_rps_ei *old,
> +			 const struct intel_rps_ei *now,
> +			 int threshold)
> +{
> +	u64 time, c0;
>   
> -	elapsed_media = media_count - rps_ei->media_c0;
> -	rps_ei->media_c0 = media_count;
> +	if (old->cz_clock == 0)
> +		return false;
>   
> -	/* Convert all the counters into common unit of milli sec */
> -	elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
> -	elapsed_render /=  cz_freq_khz;
> -	elapsed_media /= cz_freq_khz;
> +	time = now->cz_clock - old->cz_clock;
> +	time *= threshold * dev_priv->mem_freq;
>   
> -	/*
> -	 * Calculate overall C0 residency percentage
> -	 * only if elapsed time is non zero
> +	/* Workload can be split between render + media, e.g. SwapBuffers
> +	 * being blitted in X after being rendered in mesa. To account for
> +	 * this we need to combine both engines into our activity counter.
>   	 */
> -	if (elapsed_time) {
> -		residency =
> -			((max(elapsed_render, elapsed_media) * 100)
> -				/ elapsed_time);
> -	}
> +	c0 = now->render_c0 - old->render_c0;
> +	c0 += now->media_c0 - old->media_c0;
> +	c0 *= 100 * VLV_CZ_CLOCK_TO_MILLI_SEC * 4 / 1000;
>   
> -	return residency;
> +	return c0 >= time;
>   }
>   
> -/**
> - * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
> - * busy-ness calculated from C0 counters of render & media power wells
> - * @dev_priv: DRM device private
> - *
> - */
> -static int vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
> +void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
>   {
> -	u32 residency_C0_up = 0, residency_C0_down = 0;
> -	int new_delay, adj;
> -
> -	dev_priv->rps.ei_interrupt_count++;
> -
> -	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
> +	vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
> +	dev_priv->rps.up_ei = dev_priv->rps.down_ei;
> +	dev_priv->rps.ei_interrupt_count = 0;
> +}
>   
> +static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
> +{
> +	struct intel_rps_ei now;
> +	u32 events = 0;
>   
> -	if (dev_priv->rps.up_ei.cz_clock == 0) {
> -		vlv_c0_residency(dev_priv, &dev_priv->rps.up_ei);
> -		vlv_c0_residency(dev_priv, &dev_priv->rps.down_ei);
> -		return dev_priv->rps.cur_freq;
> -	}
> +	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
> +		return 0;
>   
> +	vlv_c0_read(dev_priv, &now);
> +	if (now.cz_clock == 0)
> +		return 0;
>   
>   	/*
>   	 * To down throttle, C0 residency should be less than down threshold
>   	 * for continous EI intervals. So calculate down EI counters
>   	 * once in VLV_INT_COUNT_FOR_DOWN_EI
>   	 */
> -	if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
> -
> +	if (++dev_priv->rps.ei_interrupt_count >= VLV_INT_COUNT_FOR_DOWN_EI) {
> +		pm_iir |= GEN6_PM_RP_DOWN_EI_EXPIRED;
>   		dev_priv->rps.ei_interrupt_count = 0;
> -
> -		residency_C0_down = vlv_c0_residency(dev_priv,
> -						     &dev_priv->rps.down_ei);
> -	} else {
> -		residency_C0_up = vlv_c0_residency(dev_priv,
> -						   &dev_priv->rps.up_ei);
>   	}
>   
> -	new_delay = dev_priv->rps.cur_freq;
> -
> -	adj = dev_priv->rps.last_adj;
> -	/* C0 residency is greater than UP threshold. Increase Frequency */
> -	if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
> -		if (adj > 0)
> -			adj *= 2;
> -		else
> -			adj = 1;
> -
> -		if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
> -			new_delay = dev_priv->rps.cur_freq + adj;
> -
> -		/*
> -		 * For better performance, jump directly
> -		 * to RPe if we're below it.
> -		 */
> -		if (new_delay < dev_priv->rps.efficient_freq)
> -			new_delay = dev_priv->rps.efficient_freq;
> +	if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
> +		if (!vlv_c0_above(dev_priv,
> +				  &dev_priv->rps.down_ei, &now,
> +				  VLV_RP_DOWN_EI_THRESHOLD))
> +			events |= GEN6_PM_RP_DOWN_THRESHOLD;
> +		dev_priv->rps.down_ei = now;
> +	}
>   
> -	} else if (!dev_priv->rps.ei_interrupt_count &&
> -			(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
> -		if (adj < 0)
> -			adj *= 2;
> -		else
> -			adj = -1;
> -		/*
> -		 * This means, C0 residency is less than down threshold over
> -		 * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
> -		 */
> -		if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit)
> -			new_delay = dev_priv->rps.cur_freq + adj;
> +	if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> +		if (vlv_c0_above(dev_priv,
> +				 &dev_priv->rps.up_ei, &now,
> +				 VLV_RP_UP_EI_THRESHOLD))
> +			events |= GEN6_PM_RP_UP_THRESHOLD;
> +		dev_priv->rps.up_ei = now;
>   	}
>   
> -	return new_delay;
> +	return events;
>   }
>   
>   static void gen6_pm_rps_work(struct work_struct *work)
> @@ -1149,6 +1104,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
>   
>   	mutex_lock(&dev_priv->rps.hw_lock);
>   
> +	pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
> +
>   	adj = dev_priv->rps.last_adj;
>   	new_delay = dev_priv->rps.cur_freq;
>   	if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
> @@ -1170,8 +1127,6 @@ static void gen6_pm_rps_work(struct work_struct *work)
>   		else
>   			new_delay = dev_priv->rps.min_freq_softlimit;
>   		adj = 0;
> -	} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> -		new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
>   	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
>   		if (adj < 0)
>   			adj *= 2;
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 56b97c41bf17..324922d8f8a1 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -6190,8 +6190,8 @@ enum skl_disp_power_wells {
>   
>   #define GEN6_GT_GFX_RC6p			0x13810C
>   #define GEN6_GT_GFX_RC6pp			0x138110
> -#define VLV_RENDER_C0_COUNT_REG		0x138118
> -#define VLV_MEDIA_C0_COUNT_REG			0x13811C
> +#define VLV_RENDER_C0_COUNT			0x138118
> +#define VLV_MEDIA_C0_COUNT			0x13811C
>   
>   #define GEN6_PCODE_MAILBOX			0x138124
>   #define   GEN6_PCODE_READY			(1<<31)
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 77113d95fb71..2412002d510b 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -9167,6 +9167,8 @@ void intel_mark_busy(struct drm_device *dev)
>   
>   	intel_runtime_pm_get(dev_priv);
>   	i915_update_gfx_val(dev_priv);
> +	if (INTEL_INFO(dev)->gen >= 6)
> +		gen6_rps_busy(dev_priv);
>   	dev_priv->mm.busy = true;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index ff79dca2ff8e..1e564da2fd38 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1238,6 +1238,8 @@ void intel_disable_gt_powersave(struct drm_device *dev);
>   void intel_suspend_gt_powersave(struct drm_device *dev);
>   void intel_reset_gt_powersave(struct drm_device *dev);
>   void gen6_update_ring_freq(struct drm_device *dev);
> +void gen6_rps_busy(struct drm_i915_private *dev_priv);
> +void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
>   void gen6_rps_idle(struct drm_i915_private *dev_priv);
>   void gen6_rps_boost(struct drm_i915_private *dev_priv);
>   void ilk_wm_get_hw_state(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index e53724516852..b37d634bea99 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3898,6 +3898,18 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>   	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
>   }
>   
> +void gen6_rps_busy(struct drm_i915_private *dev_priv)
> +{
> +	mutex_lock(&dev_priv->rps.hw_lock);
> +	if (dev_priv->rps.enabled) {
> +		if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
> +			gen6_rps_reset_ei(dev_priv);
> +		I915_WRITE(GEN6_PMINTRMSK,
> +			   gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
> +	}
> +	mutex_unlock(&dev_priv->rps.hw_lock);
> +}
> +
>   void gen6_rps_idle(struct drm_i915_private *dev_priv)
>   {
>   	struct drm_device *dev = dev_priv->dev;
> @@ -3909,15 +3921,21 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
>   		else
>   			gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
>   		dev_priv->rps.last_adj = 0;
> +		I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
>   	}
>   	mutex_unlock(&dev_priv->rps.hw_lock);
>   }
>   
>   void gen6_rps_boost(struct drm_i915_private *dev_priv)
>   {
> +	u32 val;
> +
>   	mutex_lock(&dev_priv->rps.hw_lock);
> -	if (dev_priv->rps.enabled) {
> -		intel_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit);
> +	val = dev_priv->rps.max_freq_softlimit;
> +	if (dev_priv->rps.enabled &&
> +	    dev_priv->mm.busy &&
> +	    dev_priv->rps.cur_freq < val) {
> +		intel_set_rps(dev_priv->dev, val);
>   		dev_priv->rps.last_adj = 0;
>   	}
>   	mutex_unlock(&dev_priv->rps.hw_lock);

EI Calculation and changes looks fine to me
Reviewed-by: Deepak S<deepak.s@linux.intel.com>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2015-03-18  7:59 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-03-06 15:06 [PATCH 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle Chris Wilson
2015-03-06 15:06 ` [PATCH 2/7] drm/i915: Fix computation of last_adjustment for RPS autotuning Chris Wilson
2015-03-06 17:32   ` Daniel Vetter
2015-03-06 21:44     ` Chris Wilson
2015-03-18  6:56   ` Deepak S
2015-03-18  9:05     ` Chris Wilson
2015-03-18  9:20     ` Chris Wilson
2015-03-18 11:09       ` Deepak S
2015-03-06 15:06 ` [PATCH 3/7] drm/i915: Improved w/a for rps on Baytrail Chris Wilson
2015-03-18  7:56   ` Deepak S [this message]
2015-03-06 15:06 ` [PATCH 4/7] drm/i915: Use down ei for manual Baytrail RPS calculations Chris Wilson
2015-03-18  8:04   ` Deepak S
2015-03-06 15:06 ` [PATCH 5/7] drm/i915: Agressive downclocking on Baytrail Chris Wilson
2015-03-18  8:12   ` Deepak S
2015-03-18  9:48     ` Daniel Vetter
2015-03-18  9:49       ` Chris Wilson
2015-03-18 11:06       ` Deepak S
2015-03-06 15:06 ` [PATCH 6/7] drm/i915: Boost GPU frequency if we detect outstanding pageflips Chris Wilson
2015-03-18  8:18   ` Deepak S
2015-03-18  8:20     ` Deepak S
2015-03-06 15:06 ` [PATCH 7/7] drm/i915: Deminish contribution of wait-boosting from clients Chris Wilson
2015-03-06 17:58   ` shuang.he
2015-03-18  9:00   ` Deepak S
2015-03-09 15:38 ` [PATCH 1/7] drm/i915: Relax RPS contraints to allows setting minfreq on idle Jesse Barnes
2015-03-18  5:44 ` Deepak S
2015-03-18  9:07   ` Chris Wilson
2015-03-18  5:52 ` Deepak S

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=55092F92.20205@linux.intel.com \
    --to=deepak.s@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=daniel.vetter@ffwll.ch \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=rodrigo.vivi@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.