* [PATCH v3 1/3] drm/i915: Stop using RP_DOWN_EI on Baytrail
@ 2017-03-09 21:12 Chris Wilson
2017-03-09 21:12 ` [PATCH v3 2/3] drm/i915: Use max(render, media) for Baytrail busyness calculation Chris Wilson
` (3 more replies)
0 siblings, 4 replies; 10+ messages in thread
From: Chris Wilson @ 2017-03-09 21:12 UTC (permalink / raw)
To: intel-gfx; +Cc: Chris Wilson, Mika Kuoppala, # v4 . 1+
On Baytrail, we manually calculate busyness over the evaluation interval
to avoid issues with miscaluations with RC6 enabled. However, it turns
out that the DOWN_EI interrupt generator is completely bust - it
operates in two modes, continuous or never. Neither of which are
conducive to good behaviour. Stop unmask the DOWN_EI interrupt and just
compute everything from the UP_EI which does seem to correspond to the
desired interval.
v2: Fixup gen6_rps_pm_mask() as well
v3: Inline vlv_c0_above() to combine the now identical elapsed
calculation for up/down and simplify the threshold testing
Fixes: 43cf3bf084ba ("drm/i915: Improved w/a for rps on Baytrail")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.1+
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_drv.h | 2 +-
drivers/gpu/drm/i915/i915_irq.c | 73 ++++++++++++++++-------------------------
drivers/gpu/drm/i915/intel_pm.c | 5 +--
3 files changed, 32 insertions(+), 48 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c52aee5141ca..b399c8fe5ac1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1377,7 +1377,7 @@ struct intel_gen6_power_mgmt {
unsigned boosts;
/* manual wa residency calculations */
- struct intel_rps_ei up_ei, down_ei;
+ struct intel_rps_ei ei;
/*
* Protects RPS/RC6 register access and PCU communication.
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index d91078f8e9d4..b3f9181ef314 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1083,68 +1083,51 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv,
ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
}
-static bool vlv_c0_above(struct drm_i915_private *dev_priv,
- const struct intel_rps_ei *old,
- const struct intel_rps_ei *now,
- int threshold)
-{
- u64 time, c0;
- unsigned int mul = 100;
-
- if (old->cz_clock == 0)
- return false;
-
- if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
- mul <<= 8;
-
- time = now->cz_clock - old->cz_clock;
- time *= threshold * dev_priv->czclk_freq;
-
- /* Workload can be split between render + media, e.g. SwapBuffers
- * being blitted in X after being rendered in mesa. To account for
- * this we need to combine both engines into our activity counter.
- */
- c0 = now->render_c0 - old->render_c0;
- c0 += now->media_c0 - old->media_c0;
- c0 *= mul * VLV_CZ_CLOCK_TO_MILLI_SEC;
-
- return c0 >= time;
-}
-
void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
{
- vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
- dev_priv->rps.up_ei = dev_priv->rps.down_ei;
+ memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei));
}
static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
{
+ const struct intel_rps_ei *prev = &dev_priv->rps.ei;
struct intel_rps_ei now;
u32 events = 0;
- if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
+ if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
return 0;
vlv_c0_read(dev_priv, &now);
if (now.cz_clock == 0)
return 0;
- if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
- if (!vlv_c0_above(dev_priv,
- &dev_priv->rps.down_ei, &now,
- dev_priv->rps.down_threshold))
- events |= GEN6_PM_RP_DOWN_THRESHOLD;
- dev_priv->rps.down_ei = now;
- }
+ if (prev->cz_clock) {
+ u64 time, c0;
+ unsigned int mul;
- if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
- if (vlv_c0_above(dev_priv,
- &dev_priv->rps.up_ei, &now,
- dev_priv->rps.up_threshold))
- events |= GEN6_PM_RP_UP_THRESHOLD;
- dev_priv->rps.up_ei = now;
+ mul = VLV_CZ_CLOCK_TO_MILLI_SEC * 100; /* scale to % */
+ if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
+ mul <<= 8;
+
+ time = now.cz_clock - prev->cz_clock;
+ time *= dev_priv->czclk_freq;
+
+ /* Workload can be split between render + media,
+ * e.g. SwapBuffers being blitted in X after being rendered in
+ * mesa. To account for this we need to combine both engines
+ * into our activity counter.
+ */
+ c0 = now.render_c0 - prev->render_c0;
+ c0 += now.media_c0 - prev->media_c0;
+ c0 *= mul;
+
+ if (c0 > time * dev_priv->rps.up_threshold)
+ events = GEN6_PM_RP_UP_THRESHOLD;
+ else if (c0 < time * dev_priv->rps.down_threshold)
+ events = GEN6_PM_RP_DOWN_THRESHOLD;
}
+ dev_priv->rps.ei = now;
return events;
}
@@ -4284,7 +4267,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
/* Let's track the enabled rps events */
if (IS_VALLEYVIEW(dev_priv))
/* WaGsvRC0ResidencyMethod:vlv */
- dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED;
+ dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
else
dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 99e09f63d4b3..4db3a792c6c2 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5166,8 +5166,9 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
{
u32 mask = 0;
+ /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
if (val > dev_priv->rps.min_freq_softlimit)
- mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
+ mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
if (val < dev_priv->rps.max_freq_softlimit)
mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
@@ -5277,7 +5278,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
if (dev_priv->rps.enabled) {
u8 freq;
- if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
+ if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
gen6_rps_reset_ei(dev_priv);
I915_WRITE(GEN6_PMINTRMSK,
gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
--
2.11.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v3 2/3] drm/i915: Use max(render, media) for Baytrail busyness calculation
2017-03-09 21:12 [PATCH v3 1/3] drm/i915: Stop using RP_DOWN_EI on Baytrail Chris Wilson
@ 2017-03-09 21:12 ` Chris Wilson
2017-03-09 21:12 ` [PATCH v3 3/3] drm/i915: Defer unmasking RPS interrupts until after making adjustments Chris Wilson
` (2 subsequent siblings)
3 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2017-03-09 21:12 UTC (permalink / raw)
To: intel-gfx
Currently, we sum the render and media cycles (on different engines) to
compute a percentage - but we fail to factor in the duplication into the
threshold calculations. This makes us very eager to upclock!
If we just consider the maximum busy cycles of either counter, we should
have an accurate reflection on whether there are cycles to spare to
handle the workload at this frequency.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
drivers/gpu/drm/i915/i915_irq.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b3f9181ef314..e8f4c78c4bdc 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1103,6 +1103,7 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
if (prev->cz_clock) {
u64 time, c0;
+ u32 render, media;
unsigned int mul;
mul = VLV_CZ_CLOCK_TO_MILLI_SEC * 100; /* scale to % */
@@ -1117,8 +1118,9 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
* mesa. To account for this we need to combine both engines
* into our activity counter.
*/
- c0 = now.render_c0 - prev->render_c0;
- c0 += now.media_c0 - prev->media_c0;
+ render = now.render_c0 - prev->render_c0;
+ media = now.media_c0 - prev->media_c0;
+ c0 = max(render, media);
c0 *= mul;
if (c0 > time * dev_priv->rps.up_threshold)
--
2.11.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v3 3/3] drm/i915: Defer unmasking RPS interrupts until after making adjustments
2017-03-09 21:12 [PATCH v3 1/3] drm/i915: Stop using RP_DOWN_EI on Baytrail Chris Wilson
2017-03-09 21:12 ` [PATCH v3 2/3] drm/i915: Use max(render, media) for Baytrail busyness calculation Chris Wilson
@ 2017-03-09 21:12 ` Chris Wilson
2017-03-10 14:13 ` Mika Kuoppala
2017-03-10 9:04 ` ✓ Fi.CI.BAT: success for series starting with [v3,1/3] drm/i915: Stop using RP_DOWN_EI on Baytrail Patchwork
2017-03-10 14:10 ` Mika Kuoppala
3 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2017-03-09 21:12 UTC (permalink / raw)
To: intel-gfx
To make our adjustments to RPS requires taking a mutex and potentially
sleeping for an unknown duration - until we have completed our
adjustments further RPS interrupts are immaterial (they are based on
stale thresholds) and we can safely ignore them.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
drivers/gpu/drm/i915/i915_irq.c | 28 +++++++++++++---------------
1 file changed, 13 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index e8f4c78c4bdc..ab55a3ddafb3 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1149,30 +1149,21 @@ static void gen6_pm_rps_work(struct work_struct *work)
{
struct drm_i915_private *dev_priv =
container_of(work, struct drm_i915_private, rps.work);
- bool client_boost;
+ bool client_boost = false;
int new_delay, adj, min, max;
- u32 pm_iir;
+ u32 pm_iir = 0;
spin_lock_irq(&dev_priv->irq_lock);
- /* Speed up work cancelation during disabling rps interrupts. */
- if (!dev_priv->rps.interrupts_enabled) {
- spin_unlock_irq(&dev_priv->irq_lock);
- return;
+ if (dev_priv->rps.interrupts_enabled) {
+ pm_iir = fetch_and_zero(&dev_priv->rps.pm_iir);
+ client_boost = fetch_and_zero(&dev_priv->rps.client_boost);
}
-
- pm_iir = dev_priv->rps.pm_iir;
- dev_priv->rps.pm_iir = 0;
- /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
- gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events);
- client_boost = dev_priv->rps.client_boost;
- dev_priv->rps.client_boost = false;
spin_unlock_irq(&dev_priv->irq_lock);
/* Make sure we didn't queue anything we're not going to process. */
WARN_ON(pm_iir & ~dev_priv->pm_rps_events);
-
if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost)
- return;
+ goto out;
mutex_lock(&dev_priv->rps.hw_lock);
@@ -1229,6 +1220,13 @@ static void gen6_pm_rps_work(struct work_struct *work)
}
mutex_unlock(&dev_priv->rps.hw_lock);
+
+out:
+ /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
+ spin_lock_irq(&dev_priv->irq_lock);
+ if (dev_priv->rps.interrupts_enabled)
+ gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events);
+ spin_unlock_irq(&dev_priv->irq_lock);
}
--
2.11.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 10+ messages in thread
* ✓ Fi.CI.BAT: success for series starting with [v3,1/3] drm/i915: Stop using RP_DOWN_EI on Baytrail
2017-03-09 21:12 [PATCH v3 1/3] drm/i915: Stop using RP_DOWN_EI on Baytrail Chris Wilson
2017-03-09 21:12 ` [PATCH v3 2/3] drm/i915: Use max(render, media) for Baytrail busyness calculation Chris Wilson
2017-03-09 21:12 ` [PATCH v3 3/3] drm/i915: Defer unmasking RPS interrupts until after making adjustments Chris Wilson
@ 2017-03-10 9:04 ` Patchwork
2017-03-10 14:10 ` Mika Kuoppala
3 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2017-03-10 9:04 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: series starting with [v3,1/3] drm/i915: Stop using RP_DOWN_EI on Baytrail
URL : https://patchwork.freedesktop.org/series/21014/
State : success
== Summary ==
Series 21014v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/21014/revisions/1/mbox/
Test gem_exec_flush:
Subgroup basic-batch-kernel-default-uc:
incomplete -> PASS (fi-skl-6700hq) fdo#100130
Test gem_exec_suspend:
Subgroup basic-s3:
pass -> FAIL (fi-kbl-7500u) fdo#100124
Subgroup basic-s4-devices:
dmesg-warn -> FAIL (fi-kbl-7500u) fdo#100099
Test kms_pipe_crc_basic:
Subgroup suspend-read-crc-pipe-a:
pass -> FAIL (fi-kbl-7500u) fdo#100123
Subgroup suspend-read-crc-pipe-b:
pass -> FAIL (fi-kbl-7500u) fdo#100123
Subgroup suspend-read-crc-pipe-c:
pass -> FAIL (fi-kbl-7500u) fdo#100044
fdo#100130 https://bugs.freedesktop.org/show_bug.cgi?id=100130
fdo#100124 https://bugs.freedesktop.org/show_bug.cgi?id=100124
fdo#100099 https://bugs.freedesktop.org/show_bug.cgi?id=100099
fdo#100123 https://bugs.freedesktop.org/show_bug.cgi?id=100123
fdo#100044 https://bugs.freedesktop.org/show_bug.cgi?id=100044
fi-bdw-5557u total:278 pass:267 dwarn:0 dfail:0 fail:0 skip:11 time: 464s
fi-bsw-n3050 total:278 pass:239 dwarn:0 dfail:0 fail:0 skip:39 time: 610s
fi-bxt-j4205 total:278 pass:259 dwarn:0 dfail:0 fail:0 skip:19 time: 545s
fi-bxt-t5700 total:278 pass:258 dwarn:0 dfail:0 fail:0 skip:20 time: 615s
fi-byt-n2820 total:278 pass:247 dwarn:0 dfail:0 fail:0 skip:31 time: 500s
fi-hsw-4770 total:278 pass:262 dwarn:0 dfail:0 fail:0 skip:16 time: 440s
fi-hsw-4770r total:278 pass:262 dwarn:0 dfail:0 fail:0 skip:16 time: 435s
fi-ilk-650 total:278 pass:228 dwarn:0 dfail:0 fail:0 skip:50 time: 438s
fi-ivb-3520m total:278 pass:260 dwarn:0 dfail:0 fail:0 skip:18 time: 505s
fi-ivb-3770 total:278 pass:260 dwarn:0 dfail:0 fail:0 skip:18 time: 487s
fi-kbl-7500u total:278 pass:255 dwarn:0 dfail:0 fail:5 skip:18 time: 497s
fi-skl-6260u total:278 pass:268 dwarn:0 dfail:0 fail:0 skip:10 time: 508s
fi-skl-6700hq total:53 pass:45 dwarn:0 dfail:0 fail:0 skip:7 time: 0s
fi-skl-6770hq total:278 pass:268 dwarn:0 dfail:0 fail:0 skip:10 time: 548s
fi-snb-2520m total:278 pass:250 dwarn:0 dfail:0 fail:0 skip:28 time: 563s
fi-skl-6700k failed to collect. IGT log at Patchwork_4125/fi-skl-6700k/igt.log
fi-snb-2600 failed to collect. IGT log at Patchwork_4125/fi-snb-2600/igt.log
efa9a3649bd94a4f35911aaecc723e041d8b99c5 drm-tip: 2017y-03m-09d-21h-28m-20s UTC integration manifest
c697b52 drm/i915: Defer unmasking RPS interrupts until after making adjustments
8d6c959 drm/i915: Use max(render, media) for Baytrail busyness calculation
661b516 drm/i915: Stop using RP_DOWN_EI on Baytrail
== Logs ==
For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_4125/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v3 1/3] drm/i915: Stop using RP_DOWN_EI on Baytrail
2017-03-09 21:12 [PATCH v3 1/3] drm/i915: Stop using RP_DOWN_EI on Baytrail Chris Wilson
@ 2017-03-10 14:10 ` Mika Kuoppala
2017-03-09 21:12 ` [PATCH v3 3/3] drm/i915: Defer unmasking RPS interrupts until after making adjustments Chris Wilson
` (2 subsequent siblings)
3 siblings, 0 replies; 10+ messages in thread
From: Mika Kuoppala @ 2017-03-10 14:10 UTC (permalink / raw)
To: Chris Wilson, intel-gfx; +Cc: # v4 . 1+
Chris Wilson <chris@chris-wilson.co.uk> writes:
> On Baytrail, we manually calculate busyness over the evaluation interval
> to avoid issues with miscaluations with RC6 enabled. However, it turns
> out that the DOWN_EI interrupt generator is completely bust - it
> operates in two modes, continuous or never. Neither of which are
> conducive to good behaviour. Stop unmask the DOWN_EI interrupt and just
> compute everything from the UP_EI which does seem to correspond to the
> desired interval.
>
> v2: Fixup gen6_rps_pm_mask() as well
> v3: Inline vlv_c0_above() to combine the now identical elapsed
> calculation for up/down and simplify the threshold testing
>
> Fixes: 43cf3bf084ba ("drm/i915: Improved w/a for rps on Baytrail")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: <stable@vger.kernel.org> # v4.1+
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 2 +-
> drivers/gpu/drm/i915/i915_irq.c | 73 ++++++++++++++++-------------------------
> drivers/gpu/drm/i915/intel_pm.c | 5 +--
> 3 files changed, 32 insertions(+), 48 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index c52aee5141ca..b399c8fe5ac1 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1377,7 +1377,7 @@ struct intel_gen6_power_mgmt {
> unsigned boosts;
>
> /* manual wa residency calculations */
> - struct intel_rps_ei up_ei, down_ei;
> + struct intel_rps_ei ei;
>
> /*
> * Protects RPS/RC6 register access and PCU communication.
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index d91078f8e9d4..b3f9181ef314 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1083,68 +1083,51 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv,
> ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
> }
>
> -static bool vlv_c0_above(struct drm_i915_private *dev_priv,
> - const struct intel_rps_ei *old,
> - const struct intel_rps_ei *now,
> - int threshold)
> -{
> - u64 time, c0;
> - unsigned int mul = 100;
> -
> - if (old->cz_clock == 0)
> - return false;
> -
> - if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
> - mul <<= 8;
> -
> - time = now->cz_clock - old->cz_clock;
> - time *= threshold * dev_priv->czclk_freq;
> -
> - /* Workload can be split between render + media, e.g. SwapBuffers
> - * being blitted in X after being rendered in mesa. To account for
> - * this we need to combine both engines into our activity counter.
> - */
> - c0 = now->render_c0 - old->render_c0;
> - c0 += now->media_c0 - old->media_c0;
> - c0 *= mul * VLV_CZ_CLOCK_TO_MILLI_SEC;
> -
> - return c0 >= time;
> -}
> -
> void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
> {
> - vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
> - dev_priv->rps.up_ei = dev_priv->rps.down_ei;
> + memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei));
> }
>
> static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
> {
> + const struct intel_rps_ei *prev = &dev_priv->rps.ei;
> struct intel_rps_ei now;
> u32 events = 0;
>
> - if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
> + if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
> return 0;
>
> vlv_c0_read(dev_priv, &now);
> if (now.cz_clock == 0)
> return 0;
>
> - if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
> - if (!vlv_c0_above(dev_priv,
> - &dev_priv->rps.down_ei, &now,
> - dev_priv->rps.down_threshold))
> - events |= GEN6_PM_RP_DOWN_THRESHOLD;
> - dev_priv->rps.down_ei = now;
> - }
> + if (prev->cz_clock) {
> + u64 time, c0;
> + unsigned int mul;
>
> - if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> - if (vlv_c0_above(dev_priv,
> - &dev_priv->rps.up_ei, &now,
> - dev_priv->rps.up_threshold))
> - events |= GEN6_PM_RP_UP_THRESHOLD;
> - dev_priv->rps.up_ei = now;
> + mul = VLV_CZ_CLOCK_TO_MILLI_SEC * 100; /* scale to % */
scale to % as our thresholds are in %. But tiny bikeshed, so
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
> + if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
> + mul <<= 8;
> +
> + time = now.cz_clock - prev->cz_clock;
> + time *= dev_priv->czclk_freq;
> +
> + /* Workload can be split between render + media,
> + * e.g. SwapBuffers being blitted in X after being rendered in
> + * mesa. To account for this we need to combine both engines
> + * into our activity counter.
> + */
> + c0 = now.render_c0 - prev->render_c0;
> + c0 += now.media_c0 - prev->media_c0;
> + c0 *= mul;
> +
> + if (c0 > time * dev_priv->rps.up_threshold)
> + events = GEN6_PM_RP_UP_THRESHOLD;
> + else if (c0 < time * dev_priv->rps.down_threshold)
> + events = GEN6_PM_RP_DOWN_THRESHOLD;
> }
>
> + dev_priv->rps.ei = now;
> return events;
> }
>
> @@ -4284,7 +4267,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
> /* Let's track the enabled rps events */
> if (IS_VALLEYVIEW(dev_priv))
> /* WaGsvRC0ResidencyMethod:vlv */
> - dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED;
> + dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
> else
> dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
>
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 99e09f63d4b3..4db3a792c6c2 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -5166,8 +5166,9 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
> {
> u32 mask = 0;
>
> + /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
> if (val > dev_priv->rps.min_freq_softlimit)
> - mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
> + mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
> if (val < dev_priv->rps.max_freq_softlimit)
> mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
>
> @@ -5277,7 +5278,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
> if (dev_priv->rps.enabled) {
> u8 freq;
>
> - if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
> + if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
> gen6_rps_reset_ei(dev_priv);
> I915_WRITE(GEN6_PMINTRMSK,
> gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
> --
> 2.11.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v3 1/3] drm/i915: Stop using RP_DOWN_EI on Baytrail
@ 2017-03-10 14:10 ` Mika Kuoppala
0 siblings, 0 replies; 10+ messages in thread
From: Mika Kuoppala @ 2017-03-10 14:10 UTC (permalink / raw)
To: Chris Wilson, intel-gfx; +Cc: Chris Wilson, # v4 . 1+
Chris Wilson <chris@chris-wilson.co.uk> writes:
> On Baytrail, we manually calculate busyness over the evaluation interval
> to avoid issues with miscaluations with RC6 enabled. However, it turns
> out that the DOWN_EI interrupt generator is completely bust - it
> operates in two modes, continuous or never. Neither of which are
> conducive to good behaviour. Stop unmask the DOWN_EI interrupt and just
> compute everything from the UP_EI which does seem to correspond to the
> desired interval.
>
> v2: Fixup gen6_rps_pm_mask() as well
> v3: Inline vlv_c0_above() to combine the now identical elapsed
> calculation for up/down and simplify the threshold testing
>
> Fixes: 43cf3bf084ba ("drm/i915: Improved w/a for rps on Baytrail")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: <stable@vger.kernel.org> # v4.1+
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 2 +-
> drivers/gpu/drm/i915/i915_irq.c | 73 ++++++++++++++++-------------------------
> drivers/gpu/drm/i915/intel_pm.c | 5 +--
> 3 files changed, 32 insertions(+), 48 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index c52aee5141ca..b399c8fe5ac1 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1377,7 +1377,7 @@ struct intel_gen6_power_mgmt {
> unsigned boosts;
>
> /* manual wa residency calculations */
> - struct intel_rps_ei up_ei, down_ei;
> + struct intel_rps_ei ei;
>
> /*
> * Protects RPS/RC6 register access and PCU communication.
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index d91078f8e9d4..b3f9181ef314 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1083,68 +1083,51 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv,
> ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
> }
>
> -static bool vlv_c0_above(struct drm_i915_private *dev_priv,
> - const struct intel_rps_ei *old,
> - const struct intel_rps_ei *now,
> - int threshold)
> -{
> - u64 time, c0;
> - unsigned int mul = 100;
> -
> - if (old->cz_clock == 0)
> - return false;
> -
> - if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
> - mul <<= 8;
> -
> - time = now->cz_clock - old->cz_clock;
> - time *= threshold * dev_priv->czclk_freq;
> -
> - /* Workload can be split between render + media, e.g. SwapBuffers
> - * being blitted in X after being rendered in mesa. To account for
> - * this we need to combine both engines into our activity counter.
> - */
> - c0 = now->render_c0 - old->render_c0;
> - c0 += now->media_c0 - old->media_c0;
> - c0 *= mul * VLV_CZ_CLOCK_TO_MILLI_SEC;
> -
> - return c0 >= time;
> -}
> -
> void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
> {
> - vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
> - dev_priv->rps.up_ei = dev_priv->rps.down_ei;
> + memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei));
> }
>
> static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
> {
> + const struct intel_rps_ei *prev = &dev_priv->rps.ei;
> struct intel_rps_ei now;
> u32 events = 0;
>
> - if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
> + if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
> return 0;
>
> vlv_c0_read(dev_priv, &now);
> if (now.cz_clock == 0)
> return 0;
>
> - if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
> - if (!vlv_c0_above(dev_priv,
> - &dev_priv->rps.down_ei, &now,
> - dev_priv->rps.down_threshold))
> - events |= GEN6_PM_RP_DOWN_THRESHOLD;
> - dev_priv->rps.down_ei = now;
> - }
> + if (prev->cz_clock) {
> + u64 time, c0;
> + unsigned int mul;
>
> - if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> - if (vlv_c0_above(dev_priv,
> - &dev_priv->rps.up_ei, &now,
> - dev_priv->rps.up_threshold))
> - events |= GEN6_PM_RP_UP_THRESHOLD;
> - dev_priv->rps.up_ei = now;
> + mul = VLV_CZ_CLOCK_TO_MILLI_SEC * 100; /* scale to % */
scale to % as our thresholds are in %. But tiny bikeshed, so
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
> + if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
> + mul <<= 8;
> +
> + time = now.cz_clock - prev->cz_clock;
> + time *= dev_priv->czclk_freq;
> +
> + /* Workload can be split between render + media,
> + * e.g. SwapBuffers being blitted in X after being rendered in
> + * mesa. To account for this we need to combine both engines
> + * into our activity counter.
> + */
> + c0 = now.render_c0 - prev->render_c0;
> + c0 += now.media_c0 - prev->media_c0;
> + c0 *= mul;
> +
> + if (c0 > time * dev_priv->rps.up_threshold)
> + events = GEN6_PM_RP_UP_THRESHOLD;
> + else if (c0 < time * dev_priv->rps.down_threshold)
> + events = GEN6_PM_RP_DOWN_THRESHOLD;
> }
>
> + dev_priv->rps.ei = now;
> return events;
> }
>
> @@ -4284,7 +4267,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
> /* Let's track the enabled rps events */
> if (IS_VALLEYVIEW(dev_priv))
> /* WaGsvRC0ResidencyMethod:vlv */
> - dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED;
> + dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
> else
> dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
>
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 99e09f63d4b3..4db3a792c6c2 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -5166,8 +5166,9 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
> {
> u32 mask = 0;
>
> + /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */
> if (val > dev_priv->rps.min_freq_softlimit)
> - mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
> + mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
> if (val < dev_priv->rps.max_freq_softlimit)
> mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
>
> @@ -5277,7 +5278,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
> if (dev_priv->rps.enabled) {
> u8 freq;
>
> - if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
> + if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
> gen6_rps_reset_ei(dev_priv);
> I915_WRITE(GEN6_PMINTRMSK,
> gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
> --
> 2.11.0
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v3 3/3] drm/i915: Defer unmasking RPS interrupts until after making adjustments
2017-03-09 21:12 ` [PATCH v3 3/3] drm/i915: Defer unmasking RPS interrupts until after making adjustments Chris Wilson
@ 2017-03-10 14:13 ` Mika Kuoppala
0 siblings, 0 replies; 10+ messages in thread
From: Mika Kuoppala @ 2017-03-10 14:13 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
Chris Wilson <chris@chris-wilson.co.uk> writes:
> To make our adjustments to RPS requires taking a mutex and potentially
> sleeping for an unknown duration - until we have completed our
> adjustments further RPS interrupts are immaterial (they are based on
> stale thresholds) and we can safely ignore them.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
> drivers/gpu/drm/i915/i915_irq.c | 28 +++++++++++++---------------
> 1 file changed, 13 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index e8f4c78c4bdc..ab55a3ddafb3 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1149,30 +1149,21 @@ static void gen6_pm_rps_work(struct work_struct *work)
> {
> struct drm_i915_private *dev_priv =
> container_of(work, struct drm_i915_private, rps.work);
> - bool client_boost;
> + bool client_boost = false;
> int new_delay, adj, min, max;
> - u32 pm_iir;
> + u32 pm_iir = 0;
>
> spin_lock_irq(&dev_priv->irq_lock);
> - /* Speed up work cancelation during disabling rps interrupts. */
> - if (!dev_priv->rps.interrupts_enabled) {
> - spin_unlock_irq(&dev_priv->irq_lock);
> - return;
> + if (dev_priv->rps.interrupts_enabled) {
> + pm_iir = fetch_and_zero(&dev_priv->rps.pm_iir);
> + client_boost = fetch_and_zero(&dev_priv->rps.client_boost);
> }
> -
> - pm_iir = dev_priv->rps.pm_iir;
> - dev_priv->rps.pm_iir = 0;
> - /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
> - gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events);
> - client_boost = dev_priv->rps.client_boost;
> - dev_priv->rps.client_boost = false;
> spin_unlock_irq(&dev_priv->irq_lock);
>
> /* Make sure we didn't queue anything we're not going to process. */
> WARN_ON(pm_iir & ~dev_priv->pm_rps_events);
> -
> if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost)
> - return;
> + goto out;
>
> mutex_lock(&dev_priv->rps.hw_lock);
>
> @@ -1229,6 +1220,13 @@ static void gen6_pm_rps_work(struct work_struct *work)
> }
>
> mutex_unlock(&dev_priv->rps.hw_lock);
> +
> +out:
> + /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
> + spin_lock_irq(&dev_priv->irq_lock);
> + if (dev_priv->rps.interrupts_enabled)
> + gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events);
> + spin_unlock_irq(&dev_priv->irq_lock);
> }
>
>
> --
> 2.11.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v3 1/3] drm/i915: Stop using RP_DOWN_EI on Baytrail
2017-03-10 14:10 ` Mika Kuoppala
@ 2017-03-10 14:22 ` Chris Wilson
-1 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2017-03-10 14:22 UTC (permalink / raw)
To: Mika Kuoppala; +Cc: intel-gfx, # v4 . 1+
On Fri, Mar 10, 2017 at 04:10:29PM +0200, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
>
> > On Baytrail, we manually calculate busyness over the evaluation interval
> > to avoid issues with miscaluations with RC6 enabled. However, it turns
> > out that the DOWN_EI interrupt generator is completely bust - it
> > operates in two modes, continuous or never. Neither of which are
> > conducive to good behaviour. Stop unmask the DOWN_EI interrupt and just
> > compute everything from the UP_EI which does seem to correspond to the
> > desired interval.
> >
> > v2: Fixup gen6_rps_pm_mask() as well
> > v3: Inline vlv_c0_above() to combine the now identical elapsed
> > calculation for up/down and simplify the threshold testing
> >
> > Fixes: 43cf3bf084ba ("drm/i915: Improved w/a for rps on Baytrail")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > Cc: <stable@vger.kernel.org> # v4.1+
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> > drivers/gpu/drm/i915/i915_drv.h | 2 +-
> > drivers/gpu/drm/i915/i915_irq.c | 73 ++++++++++++++++-------------------------
> > drivers/gpu/drm/i915/intel_pm.c | 5 +--
> > 3 files changed, 32 insertions(+), 48 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index c52aee5141ca..b399c8fe5ac1 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1377,7 +1377,7 @@ struct intel_gen6_power_mgmt {
> > unsigned boosts;
> >
> > /* manual wa residency calculations */
> > - struct intel_rps_ei up_ei, down_ei;
> > + struct intel_rps_ei ei;
> >
> > /*
> > * Protects RPS/RC6 register access and PCU communication.
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index d91078f8e9d4..b3f9181ef314 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -1083,68 +1083,51 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv,
> > ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
> > }
> >
> > -static bool vlv_c0_above(struct drm_i915_private *dev_priv,
> > - const struct intel_rps_ei *old,
> > - const struct intel_rps_ei *now,
> > - int threshold)
> > -{
> > - u64 time, c0;
> > - unsigned int mul = 100;
> > -
> > - if (old->cz_clock == 0)
> > - return false;
> > -
> > - if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
> > - mul <<= 8;
> > -
> > - time = now->cz_clock - old->cz_clock;
> > - time *= threshold * dev_priv->czclk_freq;
> > -
> > - /* Workload can be split between render + media, e.g. SwapBuffers
> > - * being blitted in X after being rendered in mesa. To account for
> > - * this we need to combine both engines into our activity counter.
> > - */
> > - c0 = now->render_c0 - old->render_c0;
> > - c0 += now->media_c0 - old->media_c0;
> > - c0 *= mul * VLV_CZ_CLOCK_TO_MILLI_SEC;
> > -
> > - return c0 >= time;
> > -}
> > -
> > void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
> > {
> > - vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
> > - dev_priv->rps.up_ei = dev_priv->rps.down_ei;
> > + memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei));
> > }
> >
> > static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
> > {
> > + const struct intel_rps_ei *prev = &dev_priv->rps.ei;
> > struct intel_rps_ei now;
> > u32 events = 0;
> >
> > - if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
> > + if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
> > return 0;
> >
> > vlv_c0_read(dev_priv, &now);
> > if (now.cz_clock == 0)
> > return 0;
> >
> > - if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
> > - if (!vlv_c0_above(dev_priv,
> > - &dev_priv->rps.down_ei, &now,
> > - dev_priv->rps.down_threshold))
> > - events |= GEN6_PM_RP_DOWN_THRESHOLD;
> > - dev_priv->rps.down_ei = now;
> > - }
> > + if (prev->cz_clock) {
> > + u64 time, c0;
> > + unsigned int mul;
> >
> > - if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> > - if (vlv_c0_above(dev_priv,
> > - &dev_priv->rps.up_ei, &now,
> > - dev_priv->rps.up_threshold))
> > - events |= GEN6_PM_RP_UP_THRESHOLD;
> > - dev_priv->rps.up_ei = now;
> > + mul = VLV_CZ_CLOCK_TO_MILLI_SEC * 100; /* scale to % */
>
> scale to % as our thresholds are in %. But tiny bikeshed, so
You can have /* scale to % threshold */ but no more!
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v3 1/3] drm/i915: Stop using RP_DOWN_EI on Baytrail
@ 2017-03-10 14:22 ` Chris Wilson
0 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2017-03-10 14:22 UTC (permalink / raw)
To: Mika Kuoppala; +Cc: intel-gfx, # v4 . 1+
On Fri, Mar 10, 2017 at 04:10:29PM +0200, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
>
> > On Baytrail, we manually calculate busyness over the evaluation interval
> > to avoid issues with miscaluations with RC6 enabled. However, it turns
> > out that the DOWN_EI interrupt generator is completely bust - it
> > operates in two modes, continuous or never. Neither of which are
> > conducive to good behaviour. Stop unmask the DOWN_EI interrupt and just
> > compute everything from the UP_EI which does seem to correspond to the
> > desired interval.
> >
> > v2: Fixup gen6_rps_pm_mask() as well
> > v3: Inline vlv_c0_above() to combine the now identical elapsed
> > calculation for up/down and simplify the threshold testing
> >
> > Fixes: 43cf3bf084ba ("drm/i915: Improved w/a for rps on Baytrail")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > Cc: <stable@vger.kernel.org> # v4.1+
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> > drivers/gpu/drm/i915/i915_drv.h | 2 +-
> > drivers/gpu/drm/i915/i915_irq.c | 73 ++++++++++++++++-------------------------
> > drivers/gpu/drm/i915/intel_pm.c | 5 +--
> > 3 files changed, 32 insertions(+), 48 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index c52aee5141ca..b399c8fe5ac1 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1377,7 +1377,7 @@ struct intel_gen6_power_mgmt {
> > unsigned boosts;
> >
> > /* manual wa residency calculations */
> > - struct intel_rps_ei up_ei, down_ei;
> > + struct intel_rps_ei ei;
> >
> > /*
> > * Protects RPS/RC6 register access and PCU communication.
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index d91078f8e9d4..b3f9181ef314 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> > @@ -1083,68 +1083,51 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv,
> > ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
> > }
> >
> > -static bool vlv_c0_above(struct drm_i915_private *dev_priv,
> > - const struct intel_rps_ei *old,
> > - const struct intel_rps_ei *now,
> > - int threshold)
> > -{
> > - u64 time, c0;
> > - unsigned int mul = 100;
> > -
> > - if (old->cz_clock == 0)
> > - return false;
> > -
> > - if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
> > - mul <<= 8;
> > -
> > - time = now->cz_clock - old->cz_clock;
> > - time *= threshold * dev_priv->czclk_freq;
> > -
> > - /* Workload can be split between render + media, e.g. SwapBuffers
> > - * being blitted in X after being rendered in mesa. To account for
> > - * this we need to combine both engines into our activity counter.
> > - */
> > - c0 = now->render_c0 - old->render_c0;
> > - c0 += now->media_c0 - old->media_c0;
> > - c0 *= mul * VLV_CZ_CLOCK_TO_MILLI_SEC;
> > -
> > - return c0 >= time;
> > -}
> > -
> > void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
> > {
> > - vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
> > - dev_priv->rps.up_ei = dev_priv->rps.down_ei;
> > + memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei));
> > }
> >
> > static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
> > {
> > + const struct intel_rps_ei *prev = &dev_priv->rps.ei;
> > struct intel_rps_ei now;
> > u32 events = 0;
> >
> > - if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
> > + if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
> > return 0;
> >
> > vlv_c0_read(dev_priv, &now);
> > if (now.cz_clock == 0)
> > return 0;
> >
> > - if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
> > - if (!vlv_c0_above(dev_priv,
> > - &dev_priv->rps.down_ei, &now,
> > - dev_priv->rps.down_threshold))
> > - events |= GEN6_PM_RP_DOWN_THRESHOLD;
> > - dev_priv->rps.down_ei = now;
> > - }
> > + if (prev->cz_clock) {
> > + u64 time, c0;
> > + unsigned int mul;
> >
> > - if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> > - if (vlv_c0_above(dev_priv,
> > - &dev_priv->rps.up_ei, &now,
> > - dev_priv->rps.up_threshold))
> > - events |= GEN6_PM_RP_UP_THRESHOLD;
> > - dev_priv->rps.up_ei = now;
> > + mul = VLV_CZ_CLOCK_TO_MILLI_SEC * 100; /* scale to % */
>
> scale to % as our thresholds are in %. But tiny bikeshed, so
You can have /* scale to % threshold */ but no more!
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v3 1/3] drm/i915: Stop using RP_DOWN_EI on Baytrail
2017-03-10 14:10 ` Mika Kuoppala
(?)
(?)
@ 2017-03-10 15:07 ` Chris Wilson
-1 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2017-03-10 15:07 UTC (permalink / raw)
To: Mika Kuoppala; +Cc: intel-gfx, # v4 . 1+
On Fri, Mar 10, 2017 at 04:10:29PM +0200, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
>
> > On Baytrail, we manually calculate busyness over the evaluation interval
> > to avoid issues with miscaluations with RC6 enabled. However, it turns
> > out that the DOWN_EI interrupt generator is completely bust - it
> > operates in two modes, continuous or never. Neither of which are
> > conducive to good behaviour. Stop unmask the DOWN_EI interrupt and just
> > compute everything from the UP_EI which does seem to correspond to the
> > desired interval.
> >
> > v2: Fixup gen6_rps_pm_mask() as well
> > v3: Inline vlv_c0_above() to combine the now identical elapsed
> > calculation for up/down and simplify the threshold testing
> >
> > Fixes: 43cf3bf084ba ("drm/i915: Improved w/a for rps on Baytrail")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > Cc: <stable@vger.kernel.org> # v4.1+
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Many thanks for the review. Pushed, active rps on byt is going to upset
the apple cart slightly :|
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2017-03-10 15:07 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-03-09 21:12 [PATCH v3 1/3] drm/i915: Stop using RP_DOWN_EI on Baytrail Chris Wilson
2017-03-09 21:12 ` [PATCH v3 2/3] drm/i915: Use max(render, media) for Baytrail busyness calculation Chris Wilson
2017-03-09 21:12 ` [PATCH v3 3/3] drm/i915: Defer unmasking RPS interrupts until after making adjustments Chris Wilson
2017-03-10 14:13 ` Mika Kuoppala
2017-03-10 9:04 ` ✓ Fi.CI.BAT: success for series starting with [v3,1/3] drm/i915: Stop using RP_DOWN_EI on Baytrail Patchwork
2017-03-10 14:10 ` [PATCH v3 1/3] " Mika Kuoppala
2017-03-10 14:10 ` Mika Kuoppala
2017-03-10 14:22 ` Chris Wilson
2017-03-10 14:22 ` Chris Wilson
2017-03-10 15:07 ` Chris Wilson
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.