All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Cc: "# v4 . 1+" <stable@vger.kernel.org>
Subject: Re: [PATCH v3 1/3] drm/i915: Stop using RP_DOWN_EI on Baytrail
Date: Fri, 10 Mar 2017 16:10:29 +0200	[thread overview]
Message-ID: <87efy52pey.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20170309211232.28878-1-chris@chris-wilson.co.uk>

Chris Wilson <chris@chris-wilson.co.uk> writes:

> On Baytrail, we manually calculate busyness over the evaluation interval
> to avoid issues with miscaluations with RC6 enabled. However, it turns
> out that the DOWN_EI interrupt generator is completely bust - it
> operates in two modes, continuous or never. Neither of which are
> conducive to good behaviour. Stop unmask the DOWN_EI interrupt and just
> compute everything from the UP_EI which does seem to correspond to the
> desired interval.
>
> v2: Fixup gen6_rps_pm_mask() as well
> v3: Inline vlv_c0_above() to combine the now identical elapsed
> calculation for up/down and simplify the threshold testing
>
> Fixes: 43cf3bf084ba ("drm/i915: Improved w/a for rps on Baytrail")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: <stable@vger.kernel.org> # v4.1+
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h |  2 +-
>  drivers/gpu/drm/i915/i915_irq.c | 73 ++++++++++++++++-------------------------
>  drivers/gpu/drm/i915/intel_pm.c |  5 +--
>  3 files changed, 32 insertions(+), 48 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index c52aee5141ca..b399c8fe5ac1 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1377,7 +1377,7 @@ struct intel_gen6_power_mgmt {
>  	unsigned boosts;
>  
>  	/* manual wa residency calculations */
> -	struct intel_rps_ei up_ei, down_ei;
> +	struct intel_rps_ei ei;
>  
>  	/*
>  	 * Protects RPS/RC6 register access and PCU communication.
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index d91078f8e9d4..b3f9181ef314 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1083,68 +1083,51 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv,
>  	ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
>  }
>  
> -static bool vlv_c0_above(struct drm_i915_private *dev_priv,
> -			 const struct intel_rps_ei *old,
> -			 const struct intel_rps_ei *now,
> -			 int threshold)
> -{
> -	u64 time, c0;
> -	unsigned int mul = 100;
> -
> -	if (old->cz_clock == 0)
> -		return false;
> -
> -	if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
> -		mul <<= 8;
> -
> -	time = now->cz_clock - old->cz_clock;
> -	time *= threshold * dev_priv->czclk_freq;
> -
> -	/* Workload can be split between render + media, e.g. SwapBuffers
> -	 * being blitted in X after being rendered in mesa. To account for
> -	 * this we need to combine both engines into our activity counter.
> -	 */
> -	c0 = now->render_c0 - old->render_c0;
> -	c0 += now->media_c0 - old->media_c0;
> -	c0 *= mul * VLV_CZ_CLOCK_TO_MILLI_SEC;
> -
> -	return c0 >= time;
> -}
> -
>  void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
>  {
> -	vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
> -	dev_priv->rps.up_ei = dev_priv->rps.down_ei;
> +	memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei));
>  }
>  
>  static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
>  {
> +	const struct intel_rps_ei *prev = &dev_priv->rps.ei;
>  	struct intel_rps_ei now;
>  	u32 events = 0;
>  
> -	if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
> +	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
>  		return 0;
>  
>  	vlv_c0_read(dev_priv, &now);
>  	if (now.cz_clock == 0)
>  		return 0;
>  
> -	if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
> -		if (!vlv_c0_above(dev_priv,
> -				  &dev_priv->rps.down_ei, &now,
> -				  dev_priv->rps.down_threshold))
> -			events |= GEN6_PM_RP_DOWN_THRESHOLD;
> -		dev_priv->rps.down_ei = now;
> -	}
> +	if (prev->cz_clock) {
> +		u64 time, c0;
> +		unsigned int mul;
>  
> -	if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> -		if (vlv_c0_above(dev_priv,
> -				 &dev_priv->rps.up_ei, &now,
> -				 dev_priv->rps.up_threshold))
> -			events |= GEN6_PM_RP_UP_THRESHOLD;
> -		dev_priv->rps.up_ei = now;
> +		mul = VLV_CZ_CLOCK_TO_MILLI_SEC * 100; /* scale to % */

scale to % as our thresholds are in %. But tiny bikeshed, so

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>


> +		if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
> +			mul <<= 8;
> +
> +		time = now.cz_clock - prev->cz_clock;
> +		time *= dev_priv->czclk_freq;
> +
> +		/* Workload can be split between render + media,
> +		 * e.g. SwapBuffers being blitted in X after being rendered in
> +		 * mesa. To account for this we need to combine both engines
> +		 * into our activity counter.
> +		 */
> +		c0 = now.render_c0 - prev->render_c0;
> +		c0 += now.media_c0 - prev->media_c0;
> +		c0 *= mul;
> +
> +		if (c0 > time * dev_priv->rps.up_threshold)
> +			events = GEN6_PM_RP_UP_THRESHOLD;
> +		else if (c0 < time * dev_priv->rps.down_threshold)
> +			events = GEN6_PM_RP_DOWN_THRESHOLD;
>  	}
>  
> +	dev_priv->rps.ei = now;
>  	return events;
>  }
>  
> @@ -4284,7 +4267,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
>  	/* Let's track the enabled rps events */
>  	if (IS_VALLEYVIEW(dev_priv))
>  		/* WaGsvRC0ResidencyMethod:vlv */
> -		dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED;
> +		dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
>  	else
>  		dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
>  
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 99e09f63d4b3..4db3a792c6c2 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -5166,8 +5166,9 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
>  {
>  	u32 mask = 0;
>  
> +	/* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
>  	if (val > dev_priv->rps.min_freq_softlimit)
> -		mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
> +		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
>  	if (val < dev_priv->rps.max_freq_softlimit)
>  		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
>  
> @@ -5277,7 +5278,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
>  	if (dev_priv->rps.enabled) {
>  		u8 freq;
>  
> -		if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
> +		if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
>  			gen6_rps_reset_ei(dev_priv);
>  		I915_WRITE(GEN6_PMINTRMSK,
>  			   gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
> -- 
> 2.11.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

WARNING: multiple messages have this Message-ID (diff)
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Cc: Chris Wilson <chris@chris-wilson.co.uk>,
	"# v4 . 1+" <stable@vger.kernel.org>
Subject: Re: [PATCH v3 1/3] drm/i915: Stop using RP_DOWN_EI on Baytrail
Date: Fri, 10 Mar 2017 16:10:29 +0200	[thread overview]
Message-ID: <87efy52pey.fsf@gaia.fi.intel.com> (raw)
In-Reply-To: <20170309211232.28878-1-chris@chris-wilson.co.uk>

Chris Wilson <chris@chris-wilson.co.uk> writes:

> On Baytrail, we manually calculate busyness over the evaluation interval
> to avoid issues with miscaluations with RC6 enabled. However, it turns
> out that the DOWN_EI interrupt generator is completely bust - it
> operates in two modes, continuous or never. Neither of which are
> conducive to good behaviour. Stop unmask the DOWN_EI interrupt and just
> compute everything from the UP_EI which does seem to correspond to the
> desired interval.
>
> v2: Fixup gen6_rps_pm_mask() as well
> v3: Inline vlv_c0_above() to combine the now identical elapsed
> calculation for up/down and simplify the threshold testing
>
> Fixes: 43cf3bf084ba ("drm/i915: Improved w/a for rps on Baytrail")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: <stable@vger.kernel.org> # v4.1+
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h |  2 +-
>  drivers/gpu/drm/i915/i915_irq.c | 73 ++++++++++++++++-------------------------
>  drivers/gpu/drm/i915/intel_pm.c |  5 +--
>  3 files changed, 32 insertions(+), 48 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index c52aee5141ca..b399c8fe5ac1 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1377,7 +1377,7 @@ struct intel_gen6_power_mgmt {
>  	unsigned boosts;
>  
>  	/* manual wa residency calculations */
> -	struct intel_rps_ei up_ei, down_ei;
> +	struct intel_rps_ei ei;
>  
>  	/*
>  	 * Protects RPS/RC6 register access and PCU communication.
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index d91078f8e9d4..b3f9181ef314 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1083,68 +1083,51 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv,
>  	ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
>  }
>  
> -static bool vlv_c0_above(struct drm_i915_private *dev_priv,
> -			 const struct intel_rps_ei *old,
> -			 const struct intel_rps_ei *now,
> -			 int threshold)
> -{
> -	u64 time, c0;
> -	unsigned int mul = 100;
> -
> -	if (old->cz_clock == 0)
> -		return false;
> -
> -	if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
> -		mul <<= 8;
> -
> -	time = now->cz_clock - old->cz_clock;
> -	time *= threshold * dev_priv->czclk_freq;
> -
> -	/* Workload can be split between render + media, e.g. SwapBuffers
> -	 * being blitted in X after being rendered in mesa. To account for
> -	 * this we need to combine both engines into our activity counter.
> -	 */
> -	c0 = now->render_c0 - old->render_c0;
> -	c0 += now->media_c0 - old->media_c0;
> -	c0 *= mul * VLV_CZ_CLOCK_TO_MILLI_SEC;
> -
> -	return c0 >= time;
> -}
> -
>  void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
>  {
> -	vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
> -	dev_priv->rps.up_ei = dev_priv->rps.down_ei;
> +	memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei));
>  }
>  
>  static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
>  {
> +	const struct intel_rps_ei *prev = &dev_priv->rps.ei;
>  	struct intel_rps_ei now;
>  	u32 events = 0;
>  
> -	if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
> +	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
>  		return 0;
>  
>  	vlv_c0_read(dev_priv, &now);
>  	if (now.cz_clock == 0)
>  		return 0;
>  
> -	if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
> -		if (!vlv_c0_above(dev_priv,
> -				  &dev_priv->rps.down_ei, &now,
> -				  dev_priv->rps.down_threshold))
> -			events |= GEN6_PM_RP_DOWN_THRESHOLD;
> -		dev_priv->rps.down_ei = now;
> -	}
> +	if (prev->cz_clock) {
> +		u64 time, c0;
> +		unsigned int mul;
>  
> -	if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> -		if (vlv_c0_above(dev_priv,
> -				 &dev_priv->rps.up_ei, &now,
> -				 dev_priv->rps.up_threshold))
> -			events |= GEN6_PM_RP_UP_THRESHOLD;
> -		dev_priv->rps.up_ei = now;
> +		mul = VLV_CZ_CLOCK_TO_MILLI_SEC * 100; /* scale to % */

scale to % as our thresholds are in %. But tiny bikeshed, so

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>


> +		if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
> +			mul <<= 8;
> +
> +		time = now.cz_clock - prev->cz_clock;
> +		time *= dev_priv->czclk_freq;
> +
> +		/* Workload can be split between render + media,
> +		 * e.g. SwapBuffers being blitted in X after being rendered in
> +		 * mesa. To account for this we need to combine both engines
> +		 * into our activity counter.
> +		 */
> +		c0 = now.render_c0 - prev->render_c0;
> +		c0 += now.media_c0 - prev->media_c0;
> +		c0 *= mul;
> +
> +		if (c0 > time * dev_priv->rps.up_threshold)
> +			events = GEN6_PM_RP_UP_THRESHOLD;
> +		else if (c0 < time * dev_priv->rps.down_threshold)
> +			events = GEN6_PM_RP_DOWN_THRESHOLD;
>  	}
>  
> +	dev_priv->rps.ei = now;
>  	return events;
>  }
>  
> @@ -4284,7 +4267,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
>  	/* Let's track the enabled rps events */
>  	if (IS_VALLEYVIEW(dev_priv))
>  		/* WaGsvRC0ResidencyMethod:vlv */
> -		dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED;
> +		dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
>  	else
>  		dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
>  
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 99e09f63d4b3..4db3a792c6c2 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -5166,8 +5166,9 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
>  {
>  	u32 mask = 0;
>  
> +	/* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
>  	if (val > dev_priv->rps.min_freq_softlimit)
> -		mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
> +		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
>  	if (val < dev_priv->rps.max_freq_softlimit)
>  		mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
>  
> @@ -5277,7 +5278,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
>  	if (dev_priv->rps.enabled) {
>  		u8 freq;
>  
> -		if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
> +		if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
>  			gen6_rps_reset_ei(dev_priv);
>  		I915_WRITE(GEN6_PMINTRMSK,
>  			   gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
> -- 
> 2.11.0

  parent reply	other threads:[~2017-03-10 14:11 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-03-09 21:12 [PATCH v3 1/3] drm/i915: Stop using RP_DOWN_EI on Baytrail Chris Wilson
2017-03-09 21:12 ` [PATCH v3 2/3] drm/i915: Use max(render, media) for Baytrail busyness calculation Chris Wilson
2017-03-09 21:12 ` [PATCH v3 3/3] drm/i915: Defer unmasking RPS interrupts until after making adjustments Chris Wilson
2017-03-10 14:13   ` Mika Kuoppala
2017-03-10  9:04 ` ✓ Fi.CI.BAT: success for series starting with [v3,1/3] drm/i915: Stop using RP_DOWN_EI on Baytrail Patchwork
2017-03-10 14:10 ` Mika Kuoppala [this message]
2017-03-10 14:10   ` [PATCH v3 1/3] " Mika Kuoppala
2017-03-10 14:22   ` Chris Wilson
2017-03-10 14:22     ` Chris Wilson
2017-03-10 15:07   ` Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87efy52pey.fsf@gaia.fi.intel.com \
    --to=mika.kuoppala@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.