* [PATCH 1/7] drm/i915: Move RPS evaluation interval counters to i915->rps
@ 2014-07-10 19:31 Chris Wilson
2014-07-10 19:31 ` [PATCH 2/7] drm/i915: Make the RPS interrupt generation mask handle the vlv wa Chris Wilson
` (5 more replies)
0 siblings, 6 replies; 10+ messages in thread
From: Chris Wilson @ 2014-07-10 19:31 UTC (permalink / raw)
To: intel-gfx
Place the RPS counters inside the RPS struct.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_drv.h | 18 +++++++-----------
drivers/gpu/drm/i915/i915_irq.c | 32 ++++++++++++++++----------------
2 files changed, 23 insertions(+), 27 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fed405d1a7eb..daee71ef201d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -907,10 +907,10 @@ struct vlv_s0ix_state {
u32 clock_gate_dis2;
};
-struct intel_rps_ei_calc {
- u32 cz_ts_ei;
- u32 render_ei_c0;
- u32 media_ei_c0;
+struct intel_rps_ei {
+ u32 cz_clock;
+ u32 render_c0;
+ u32 media_c0;
};
struct intel_gen6_power_mgmt {
@@ -946,6 +946,9 @@ struct intel_gen6_power_mgmt {
struct delayed_work delayed_resume_work;
struct work_struct boost_work;
+ /* manual wa residency calculations */
+ struct intel_rps_ei up_ei, down_ei;
+
/*
* Protects RPS/RC6 register access and PCU communication.
* Must be taken after struct_mutex if nested.
@@ -1548,13 +1551,6 @@ struct drm_i915_private {
/* gen6+ rps state */
struct intel_gen6_power_mgmt rps;
- /* rps wa up ei calculation */
- struct intel_rps_ei_calc rps_up_ei;
-
- /* rps wa down ei calculation */
- struct intel_rps_ei_calc rps_down_ei;
-
-
/* ilk-only ips/rps state. Everything in here is protected by the global
* mchdev_lock in intel_pm.c */
struct intel_ilk_power_mgmt ips;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index a2d50980b827..8e19d031c05d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1283,7 +1283,7 @@ static void notify_ring(struct drm_device *dev,
}
static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
- struct intel_rps_ei_calc *rps_ei)
+ struct intel_rps_ei *rps_ei)
{
u32 cz_ts, cz_freq_khz;
u32 render_count, media_count;
@@ -1296,22 +1296,22 @@ static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
- if (rps_ei->cz_ts_ei == 0) {
- rps_ei->cz_ts_ei = cz_ts;
- rps_ei->render_ei_c0 = render_count;
- rps_ei->media_ei_c0 = media_count;
+ if (rps_ei->cz_clock == 0) {
+ rps_ei->cz_clock = cz_ts;
+ rps_ei->render_c0 = render_count;
+ rps_ei->media_c0 = media_count;
return dev_priv->rps.cur_freq;
}
- elapsed_time = cz_ts - rps_ei->cz_ts_ei;
- rps_ei->cz_ts_ei = cz_ts;
+ elapsed_time = cz_ts - rps_ei->cz_clock;
+ rps_ei->cz_clock = cz_ts;
- elapsed_render = render_count - rps_ei->render_ei_c0;
- rps_ei->render_ei_c0 = render_count;
+ elapsed_render = render_count - rps_ei->render_c0;
+ rps_ei->render_c0 = render_count;
- elapsed_media = media_count - rps_ei->media_ei_c0;
- rps_ei->media_ei_c0 = media_count;
+ elapsed_media = media_count - rps_ei->media_c0;
+ rps_ei->media_c0 = media_count;
/* Convert all the counters into common unit of milli sec */
elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
@@ -1347,9 +1347,9 @@ static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
- if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
- vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
- vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
+ if (dev_priv->rps.up_ei.cz_clock == 0) {
+ vlv_c0_residency(dev_priv, &dev_priv->rps.up_ei);
+ vlv_c0_residency(dev_priv, &dev_priv->rps.down_ei);
return dev_priv->rps.cur_freq;
}
@@ -1364,10 +1364,10 @@ static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
dev_priv->rps.ei_interrupt_count = 0;
residency_C0_down = vlv_c0_residency(dev_priv,
- &dev_priv->rps_down_ei);
+ &dev_priv->rps.down_ei);
} else {
residency_C0_up = vlv_c0_residency(dev_priv,
- &dev_priv->rps_up_ei);
+ &dev_priv->rps.up_ei);
}
new_delay = dev_priv->rps.cur_freq;
--
2.0.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 2/7] drm/i915: Make the RPS interrupt generation mask handle the vlv wa
2014-07-10 19:31 [PATCH 1/7] drm/i915: Move RPS evaluation interval counters to i915->rps Chris Wilson
@ 2014-07-10 19:31 ` Chris Wilson
2014-07-10 20:32 ` Daniel Vetter
2014-07-10 19:31 ` [PATCH 3/7] drm/i915: Include the RPS evalutation metrics in debugfs for Baytrail Chris Wilson
` (4 subsequent siblings)
5 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2014-07-10 19:31 UTC (permalink / raw)
To: intel-gfx
We can eliminate a lot of special case code by making the computation of
the interrupt mask be correct for all callers.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/intel_pm.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 5c27065bac17..1302e1bc9136 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3183,6 +3183,9 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
if (val < dev_priv->rps.max_freq_softlimit)
mask |= GEN6_PM_RP_UP_THRESHOLD;
+ mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED);
+ mask &= dev_priv->pm_rps_events;
+
/* IVB and SNB hard hangs on looping batchbuffer
* if GEN6_PM_UP_EI_EXPIRED is masked.
*/
@@ -3274,11 +3277,8 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
vlv_force_gfx_clock(dev_priv, false);
- if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
- I915_WRITE(GEN6_PMINTRMSK, ~dev_priv->pm_rps_events);
- else
- I915_WRITE(GEN6_PMINTRMSK,
- gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
+ I915_WRITE(GEN6_PMINTRMSK,
+ gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
}
void gen6_rps_idle(struct drm_i915_private *dev_priv)
--
2.0.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 3/7] drm/i915: Include the RPS evalutation metrics in debugfs for Baytrail
2014-07-10 19:31 [PATCH 1/7] drm/i915: Move RPS evaluation interval counters to i915->rps Chris Wilson
2014-07-10 19:31 ` [PATCH 2/7] drm/i915: Make the RPS interrupt generation mask handle the vlv wa Chris Wilson
@ 2014-07-10 19:31 ` Chris Wilson
2014-07-10 19:31 ` [PATCH 4/7] drm/i915: Improved w/a for rps on Baytrail Chris Wilson
` (3 subsequent siblings)
5 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2014-07-10 19:31 UTC (permalink / raw)
To: intel-gfx
Baytail, like Sandybridge+, also has the RPS registers which are useful
to monitor. In addition, we were missing the evaluation interval
registers so add those to all.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_debugfs.c | 186 ++++++++++++++++++------------------
1 file changed, 94 insertions(+), 92 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e8c4ddd9369b..c1741799d673 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1081,13 +1081,13 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
- int ret = 0;
intel_runtime_pm_get(dev_priv);
flush_delayed_work(&dev_priv->rps.delayed_resume_work);
-
- if (IS_GEN5(dev)) {
+ if (INTEL_INFO(dev)->gen < 5) {
+ seq_puts(m, "no P-state info available\n");
+ } else if (INTEL_INFO(dev)->gen < 6) {
u16 rgvswctl = I915_READ16(MEMSWCTL);
u16 rgvstat = I915_READ16(MEMSTAT_ILK);
@@ -1097,124 +1097,126 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
MEMSTAT_VID_SHIFT);
seq_printf(m, "Current P-state: %d\n",
(rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT);
- } else if (IS_GEN6(dev) || (IS_GEN7(dev) && !IS_VALLEYVIEW(dev)) ||
- IS_BROADWELL(dev)) {
- u32 gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
- u32 rp_state_limits = I915_READ(GEN6_RP_STATE_LIMITS);
- u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
+ } else {
u32 rpmodectl, rpinclimit, rpdeclimit;
- u32 rpstat, cagf, reqf;
- u32 rpupei, rpcurup, rpprevup;
- u32 rpdownei, rpcurdown, rpprevdown;
- int max_freq;
-
- /* RPSTAT1 is in the GT power well */
- ret = mutex_lock_interruptible(&dev->struct_mutex);
- if (ret)
- goto out;
+ u32 rpupei, rpcurupei, rpcurup, rpprevup;
+ u32 rpdownei, rpcurdownei, rpcurdown, rpprevdown;
gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
- reqf = I915_READ(GEN6_RPNSWREQ);
- reqf &= ~GEN6_TURBO_DISABLE;
- if (IS_HASWELL(dev) || IS_BROADWELL(dev))
- reqf >>= 24;
- else
- reqf >>= 25;
- reqf *= GT_FREQUENCY_MULTIPLIER;
+ if (IS_VALLEYVIEW(dev)) {
+ u32 freq_sts, val;
+
+ mutex_lock(&dev_priv->rps.hw_lock);
+ freq_sts = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
+ seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts);
+ seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq);
+
+ val = valleyview_rps_max_freq(dev_priv);
+ seq_printf(m, "max GPU freq: %d MHz\n",
+ vlv_gpu_freq(dev_priv, val));
+
+ val = valleyview_rps_min_freq(dev_priv);
+ seq_printf(m, "min GPU freq: %d MHz\n",
+ vlv_gpu_freq(dev_priv, val));
+
+ seq_printf(m, "current GPU freq: %d MHz\n",
+ vlv_gpu_freq(dev_priv, (freq_sts >> 8) & 0xff));
+ mutex_unlock(&dev_priv->rps.hw_lock);
+ } else {
+ u32 gt_perf_status;
+ u32 rp_state_limits;
+ u32 rp_state_cap;
+ u32 cagf, reqf;
+ int max_freq;
+
+ gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
+ rp_state_limits = I915_READ(GEN6_RP_STATE_LIMITS);
+ rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
+
+ reqf = I915_READ(GEN6_RPNSWREQ);
+ reqf &= ~GEN6_TURBO_DISABLE;
+ if (IS_HASWELL(dev) || IS_BROADWELL(dev))
+ reqf >>= 24;
+ else
+ reqf >>= 25;
+ reqf *= GT_FREQUENCY_MULTIPLIER;
+
+ cagf = I915_READ(GEN6_RPSTAT1);
+ if (IS_HASWELL(dev) || IS_BROADWELL(dev))
+ cagf = (cagf & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
+ else
+ cagf = (cagf & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
+ cagf *= GT_FREQUENCY_MULTIPLIER;
+
+ seq_printf(m, "PM IER=0x%08x IMR=0x%08x ISR=0x%08x IIR=0x%08x, MASK=0x%08x\n",
+ I915_READ(GEN6_PMIER),
+ I915_READ(GEN6_PMIMR),
+ I915_READ(GEN6_PMISR),
+ I915_READ(GEN6_PMIIR),
+ I915_READ(GEN6_PMINTRMSK));
+ seq_printf(m, "GT_PERF_STATUS: 0x%08x (ratio: %d, VID: %d, limit: %d)\n",
+ gt_perf_status,
+ (gt_perf_status & 0xff00) >> 8,
+ gt_perf_status & 0xff,
+ rp_state_limits & 0xff);
+
+ max_freq = (rp_state_cap & 0xff0000) >> 16;
+ seq_printf(m, "Lowest (RPN) frequency: %dMHz\n",
+ max_freq * GT_FREQUENCY_MULTIPLIER);
+
+ max_freq = (rp_state_cap & 0xff00) >> 8;
+ seq_printf(m, "Nominal (RP1) frequency: %dMHz\n",
+ max_freq * GT_FREQUENCY_MULTIPLIER);
+
+ max_freq = rp_state_cap & 0xff;
+ seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
+ max_freq * GT_FREQUENCY_MULTIPLIER);
+
+ seq_printf(m, "Max overclocked frequency: %dMHz\n",
+ dev_priv->rps.max_freq * GT_FREQUENCY_MULTIPLIER);
+
+ seq_printf(m, "requested GPU freq: %d MHz\n", cagf);
+ seq_printf(m, "current GPU freq: %d MHz\n", reqf);
+ }
rpmodectl = I915_READ(GEN6_RP_CONTROL);
rpinclimit = I915_READ(GEN6_RP_UP_THRESHOLD);
rpdeclimit = I915_READ(GEN6_RP_DOWN_THRESHOLD);
- rpstat = I915_READ(GEN6_RPSTAT1);
- rpupei = I915_READ(GEN6_RP_CUR_UP_EI);
+ rpupei = I915_READ(GEN6_RP_UP_EI);
+ rpcurupei = I915_READ(GEN6_RP_CUR_UP_EI);
rpcurup = I915_READ(GEN6_RP_CUR_UP);
rpprevup = I915_READ(GEN6_RP_PREV_UP);
- rpdownei = I915_READ(GEN6_RP_CUR_DOWN_EI);
+ rpdownei = I915_READ(GEN6_RP_DOWN_EI);
+ rpcurdownei = I915_READ(GEN6_RP_CUR_DOWN_EI);
rpcurdown = I915_READ(GEN6_RP_CUR_DOWN);
rpprevdown = I915_READ(GEN6_RP_PREV_DOWN);
- if (IS_HASWELL(dev) || IS_BROADWELL(dev))
- cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
- else
- cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
- cagf *= GT_FREQUENCY_MULTIPLIER;
gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
- mutex_unlock(&dev->struct_mutex);
- seq_printf(m, "PM IER=0x%08x IMR=0x%08x ISR=0x%08x IIR=0x%08x, MASK=0x%08x\n",
- I915_READ(GEN6_PMIER),
- I915_READ(GEN6_PMIMR),
- I915_READ(GEN6_PMISR),
- I915_READ(GEN6_PMIIR),
- I915_READ(GEN6_PMINTRMSK));
- seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
- seq_printf(m, "Render p-state ratio: %d\n",
- (gt_perf_status & 0xff00) >> 8);
- seq_printf(m, "Render p-state VID: %d\n",
- gt_perf_status & 0xff);
- seq_printf(m, "Render p-state limit: %d\n",
- rp_state_limits & 0xff);
- seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat);
- seq_printf(m, "RPMODECTL: 0x%08x\n", rpmodectl);
- seq_printf(m, "RPINCLIMIT: 0x%08x\n", rpinclimit);
- seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
- seq_printf(m, "RPNSWREQ: %dMHz\n", reqf);
- seq_printf(m, "CAGF: %dMHz\n", cagf);
- seq_printf(m, "RP CUR UP EI: %dus\n", rpupei &
+ seq_puts(m, "\n");
+ seq_printf(m, "RP CONTROL: 0x%08x\n", rpmodectl);
+ seq_printf(m, "RP UP EI: 0x%08x\n", rpupei);
+ seq_printf(m, "RP UP THRESHOLD: 0x%08x\n", rpinclimit);
+ seq_printf(m, "RP DOWN EI: 0x%08x\n", rpdownei);
+ seq_printf(m, "RP DOWN THRESHOLD: 0x%08x\n", rpdeclimit);
+ seq_printf(m, "RP CUR UP EI: %dus\n", rpcurupei &
GEN6_CURICONT_MASK);
seq_printf(m, "RP CUR UP: %dus\n", rpcurup &
GEN6_CURBSYTAVG_MASK);
seq_printf(m, "RP PREV UP: %dus\n", rpprevup &
GEN6_CURBSYTAVG_MASK);
- seq_printf(m, "RP CUR DOWN EI: %dus\n", rpdownei &
+ seq_printf(m, "RP CUR DOWN EI: %dus\n", rpcurdownei &
GEN6_CURIAVG_MASK);
seq_printf(m, "RP CUR DOWN: %dus\n", rpcurdown &
GEN6_CURBSYTAVG_MASK);
seq_printf(m, "RP PREV DOWN: %dus\n", rpprevdown &
GEN6_CURBSYTAVG_MASK);
-
- max_freq = (rp_state_cap & 0xff0000) >> 16;
- seq_printf(m, "Lowest (RPN) frequency: %dMHz\n",
- max_freq * GT_FREQUENCY_MULTIPLIER);
-
- max_freq = (rp_state_cap & 0xff00) >> 8;
- seq_printf(m, "Nominal (RP1) frequency: %dMHz\n",
- max_freq * GT_FREQUENCY_MULTIPLIER);
-
- max_freq = rp_state_cap & 0xff;
- seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
- max_freq * GT_FREQUENCY_MULTIPLIER);
-
- seq_printf(m, "Max overclocked frequency: %dMHz\n",
- dev_priv->rps.max_freq * GT_FREQUENCY_MULTIPLIER);
- } else if (IS_VALLEYVIEW(dev)) {
- u32 freq_sts, val;
-
- mutex_lock(&dev_priv->rps.hw_lock);
- freq_sts = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
- seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts);
- seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq);
-
- val = valleyview_rps_max_freq(dev_priv);
- seq_printf(m, "max GPU freq: %d MHz\n",
- vlv_gpu_freq(dev_priv, val));
-
- val = valleyview_rps_min_freq(dev_priv);
- seq_printf(m, "min GPU freq: %d MHz\n",
- vlv_gpu_freq(dev_priv, val));
-
- seq_printf(m, "current GPU freq: %d MHz\n",
- vlv_gpu_freq(dev_priv, (freq_sts >> 8) & 0xff));
- mutex_unlock(&dev_priv->rps.hw_lock);
- } else {
- seq_puts(m, "no P-state info available\n");
}
-out:
intel_runtime_pm_put(dev_priv);
- return ret;
+ return 0;
}
static int ironlake_drpc_info(struct seq_file *m)
--
2.0.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 4/7] drm/i915: Improved w/a for rps on Baytrail
2014-07-10 19:31 [PATCH 1/7] drm/i915: Move RPS evaluation interval counters to i915->rps Chris Wilson
2014-07-10 19:31 ` [PATCH 2/7] drm/i915: Make the RPS interrupt generation mask handle the vlv wa Chris Wilson
2014-07-10 19:31 ` [PATCH 3/7] drm/i915: Include the RPS evalutation metrics in debugfs for Baytrail Chris Wilson
@ 2014-07-10 19:31 ` Chris Wilson
2014-09-01 8:23 ` Ville Syrjälä
2014-07-10 19:31 ` [PATCH 5/7] drm/i915: Use down ei for manual Baytrail RPS calculations Chris Wilson
` (2 subsequent siblings)
5 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2014-07-10 19:31 UTC (permalink / raw)
To: intel-gfx; +Cc: Daniel Vetter, Rodrigo Vivi
Rewrite commit 31685c258e0b0ad6aa486c5ec001382cf8a64212
Author: Deepak S <deepak.s@linux.intel.com>
Date: Thu Jul 3 17:33:01 2014 -0400
drm/i915/vlv: WA for Turbo and RC6 to work together.
Other than code clarity, the major improvement is to disable the extra
interrupts generated when idle. However, the reclocking remains rather
slow under the new manual regime, in particular it fails to downclock as
quickly as desired.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Deepak S <deepak.s@linux.intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
drivers/gpu/drm/i915/i915_irq.c | 166 ++++++++++++-----------------------
drivers/gpu/drm/i915/i915_reg.h | 4 +-
drivers/gpu/drm/i915/intel_display.c | 2 +
drivers/gpu/drm/i915/intel_drv.h | 2 +
drivers/gpu/drm/i915/intel_pm.c | 13 +++
5 files changed, 73 insertions(+), 114 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 8e19d031c05d..2db5dbb87ced 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1282,129 +1282,72 @@ static void notify_ring(struct drm_device *dev,
i915_queue_hangcheck(dev);
}
-static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
- struct intel_rps_ei *rps_ei)
+static void vlv_c0_read(struct drm_i915_private *dev_priv,
+ struct intel_rps_ei *ei)
{
- u32 cz_ts, cz_freq_khz;
- u32 render_count, media_count;
- u32 elapsed_render, elapsed_media, elapsed_time;
- u32 residency = 0;
-
- cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
- cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
-
- render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
- media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
-
- if (rps_ei->cz_clock == 0) {
- rps_ei->cz_clock = cz_ts;
- rps_ei->render_c0 = render_count;
- rps_ei->media_c0 = media_count;
-
- return dev_priv->rps.cur_freq;
- }
-
- elapsed_time = cz_ts - rps_ei->cz_clock;
- rps_ei->cz_clock = cz_ts;
-
- elapsed_render = render_count - rps_ei->render_c0;
- rps_ei->render_c0 = render_count;
-
- elapsed_media = media_count - rps_ei->media_c0;
- rps_ei->media_c0 = media_count;
-
- /* Convert all the counters into common unit of milli sec */
- elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
- elapsed_render /= cz_freq_khz;
- elapsed_media /= cz_freq_khz;
-
- /*
- * Calculate overall C0 residency percentage
- * only if elapsed time is non zero
- */
- if (elapsed_time) {
- residency =
- ((max(elapsed_render, elapsed_media) * 100)
- / elapsed_time);
- }
-
- return residency;
+ ei->cz_clock = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
+ ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
+ ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
}
-/**
- * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
- * busy-ness calculated from C0 counters of render & media power wells
- * @dev_priv: DRM device private
- *
- */
-static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
+static bool vlv_c0_above(struct drm_i915_private *dev_priv,
+ const struct intel_rps_ei *old,
+ const struct intel_rps_ei *now,
+ int threshold)
{
- u32 residency_C0_up = 0, residency_C0_down = 0;
- u8 new_delay, adj;
+ u64 time = now->cz_clock - old->cz_clock;
+ u64 c0 = max(now->render_c0 - old->render_c0,
+ now->media_c0 - old->media_c0);
- dev_priv->rps.ei_interrupt_count++;
+ c0 *= 100 * VLV_CZ_CLOCK_TO_MILLI_SEC * 4 / 1000;
+ time *= threshold * dev_priv->mem_freq;
+ return c0 >= time;
+}
- WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
+{
+ vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
+ dev_priv->rps.up_ei = dev_priv->rps.down_ei;
+ dev_priv->rps.ei_interrupt_count = 0;
+}
+static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
+{
+ struct intel_rps_ei now;
+ u32 events = 0;
- if (dev_priv->rps.up_ei.cz_clock == 0) {
- vlv_c0_residency(dev_priv, &dev_priv->rps.up_ei);
- vlv_c0_residency(dev_priv, &dev_priv->rps.down_ei);
- return dev_priv->rps.cur_freq;
- }
+ if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
+ return 0;
+ vlv_c0_read(dev_priv, &now);
/*
* To down throttle, C0 residency should be less than down threshold
* for continous EI intervals. So calculate down EI counters
* once in VLV_INT_COUNT_FOR_DOWN_EI
*/
- if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
-
+ if (++dev_priv->rps.ei_interrupt_count >= VLV_INT_COUNT_FOR_DOWN_EI) {
+ pm_iir |= GEN6_PM_RP_DOWN_EI_EXPIRED;
dev_priv->rps.ei_interrupt_count = 0;
-
- residency_C0_down = vlv_c0_residency(dev_priv,
- &dev_priv->rps.down_ei);
- } else {
- residency_C0_up = vlv_c0_residency(dev_priv,
- &dev_priv->rps.up_ei);
}
- new_delay = dev_priv->rps.cur_freq;
-
- adj = dev_priv->rps.last_adj;
- /* C0 residency is greater than UP threshold. Increase Frequency */
- if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
- if (adj > 0)
- adj *= 2;
- else
- adj = 1;
-
- if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
- new_delay = dev_priv->rps.cur_freq + adj;
-
- /*
- * For better performance, jump directly
- * to RPe if we're below it.
- */
- if (new_delay < dev_priv->rps.efficient_freq)
- new_delay = dev_priv->rps.efficient_freq;
+ if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
+ if (!vlv_c0_above(dev_priv,
+ &dev_priv->rps.down_ei, &now,
+ VLV_RP_DOWN_EI_THRESHOLD))
+ events |= GEN6_PM_RP_DOWN_THRESHOLD;
+ dev_priv->rps.down_ei = now;
+ }
- } else if (!dev_priv->rps.ei_interrupt_count &&
- (residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
- if (adj < 0)
- adj *= 2;
- else
- adj = -1;
- /*
- * This means, C0 residency is less than down threshold over
- * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
- */
- if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit)
- new_delay = dev_priv->rps.cur_freq + adj;
+ if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
+ if (vlv_c0_above(dev_priv,
+ &dev_priv->rps.up_ei, &now,
+ VLV_RP_UP_EI_THRESHOLD))
+ events |= GEN6_PM_RP_UP_THRESHOLD;
+ dev_priv->rps.up_ei = now;
}
- return new_delay;
+ return events;
}
static void gen6_pm_rps_work(struct work_struct *work)
@@ -1433,14 +1376,14 @@ static void gen6_pm_rps_work(struct work_struct *work)
mutex_lock(&dev_priv->rps.hw_lock);
+ pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
+
adj = dev_priv->rps.last_adj;
if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
if (adj > 0)
adj *= 2;
- else {
- /* CHV needs even encode values */
- adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1;
- }
+ else
+ adj = 1;
new_delay = dev_priv->rps.cur_freq + adj;
/*
@@ -1455,15 +1398,11 @@ static void gen6_pm_rps_work(struct work_struct *work)
else
new_delay = dev_priv->rps.min_freq_softlimit;
adj = 0;
- } else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
- new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
if (adj < 0)
adj *= 2;
- else {
- /* CHV needs even encode values */
- adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1;
- }
+ else
+ adj = -1;
new_delay = dev_priv->rps.cur_freq + adj;
} else { /* unknown event */
new_delay = dev_priv->rps.cur_freq;
@@ -1475,6 +1414,9 @@ static void gen6_pm_rps_work(struct work_struct *work)
new_delay = clamp_t(int, new_delay,
dev_priv->rps.min_freq_softlimit,
dev_priv->rps.max_freq_softlimit);
+ /* CHV needs even encode values */
+ if (IS_CHERRYVIEW(dev_priv))
+ new_delay = new_delay & ~1;
dev_priv->rps.last_adj = new_delay - dev_priv->rps.cur_freq;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 59eaebce2b02..66de6e3e99bf 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -5568,8 +5568,8 @@ enum punit_power_well {
#define GEN6_GT_GFX_RC6p 0x13810C
#define GEN6_GT_GFX_RC6pp 0x138110
-#define VLV_RENDER_C0_COUNT_REG 0x138118
-#define VLV_MEDIA_C0_COUNT_REG 0x13811C
+#define VLV_RENDER_C0_COUNT 0x138118
+#define VLV_MEDIA_C0_COUNT 0x13811C
#define GEN6_PCODE_MAILBOX 0x138124
#define GEN6_PCODE_READY (1<<31)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index a17c2e80f9f2..960a3be39ad7 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -8850,6 +8850,8 @@ void intel_mark_busy(struct drm_device *dev)
intel_runtime_pm_get(dev_priv);
i915_update_gfx_val(dev_priv);
+ if (INTEL_INFO(dev)->gen >= 6)
+ gen6_rps_busy(dev_priv);
dev_priv->mm.busy = true;
}
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 59748497b7ef..857b18df974e 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1005,6 +1005,8 @@ void intel_suspend_gt_powersave(struct drm_device *dev);
void intel_reset_gt_powersave(struct drm_device *dev);
void ironlake_teardown_rc6(struct drm_device *dev);
void gen6_update_ring_freq(struct drm_device *dev);
+void gen6_rps_busy(struct drm_i915_private *dev_priv);
+void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
void gen6_rps_idle(struct drm_i915_private *dev_priv);
void gen6_rps_boost(struct drm_i915_private *dev_priv);
void intel_queue_rps_boost(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 1302e1bc9136..eb6c54a5b112 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3281,6 +3281,18 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
}
+void gen6_rps_busy(struct drm_i915_private *dev_priv)
+{
+ mutex_lock(&dev_priv->rps.hw_lock);
+ if (dev_priv->rps.enabled) {
+ if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
+ gen6_rps_reset_ei(dev_priv);
+ I915_WRITE(GEN6_PMINTRMSK,
+ gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
+ }
+ mutex_unlock(&dev_priv->rps.hw_lock);
+}
+
void gen6_rps_idle(struct drm_i915_private *dev_priv)
{
struct drm_device *dev = dev_priv->dev;
@@ -3292,6 +3304,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
else
gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
dev_priv->rps.last_adj = 0;
+ I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
}
mutex_unlock(&dev_priv->rps.hw_lock);
}
--
2.0.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 5/7] drm/i915: Use down ei for manual Baytrail RPS calculations
2014-07-10 19:31 [PATCH 1/7] drm/i915: Move RPS evaluation interval counters to i915->rps Chris Wilson
` (2 preceding siblings ...)
2014-07-10 19:31 ` [PATCH 4/7] drm/i915: Improved w/a for rps on Baytrail Chris Wilson
@ 2014-07-10 19:31 ` Chris Wilson
2014-07-10 19:31 ` [PATCH 6/7] drm/i915: Improve code clarity of vlv_set_rps_idle() Chris Wilson
2014-07-10 19:31 ` [PATCH 7/7] drm/i915: Agressive downclocking on Baytrail Chris Wilson
5 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2014-07-10 19:31 UTC (permalink / raw)
To: intel-gfx
Use both up/down manual ei calcuations for symmetry and greater
flexibility for reclocking, instead of faking the down interrupt based
on a fixed integer number of up interrupts.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_drv.h | 2 --
drivers/gpu/drm/i915/i915_irq.c | 15 ++-------------
drivers/gpu/drm/i915/i915_reg.h | 1 -
drivers/gpu/drm/i915/intel_pm.c | 5 ++---
4 files changed, 4 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index daee71ef201d..149015b5cb24 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -937,8 +937,6 @@ struct intel_gen6_power_mgmt {
u8 rp1_freq; /* "less than" RP0 power/freqency */
u8 rp0_freq; /* Non-overclocked max frequency. */
- u32 ei_interrupt_count;
-
int last_adj;
enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 2db5dbb87ced..43bd40cc75a6 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1308,7 +1308,6 @@ void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
{
vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
dev_priv->rps.up_ei = dev_priv->rps.down_ei;
- dev_priv->rps.ei_interrupt_count = 0;
}
static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
@@ -1316,21 +1315,11 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
struct intel_rps_ei now;
u32 events = 0;
- if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
+ if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
return 0;
vlv_c0_read(dev_priv, &now);
- /*
- * To down throttle, C0 residency should be less than down threshold
- * for continous EI intervals. So calculate down EI counters
- * once in VLV_INT_COUNT_FOR_DOWN_EI
- */
- if (++dev_priv->rps.ei_interrupt_count >= VLV_INT_COUNT_FOR_DOWN_EI) {
- pm_iir |= GEN6_PM_RP_DOWN_EI_EXPIRED;
- dev_priv->rps.ei_interrupt_count = 0;
- }
-
if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
if (!vlv_c0_above(dev_priv,
&dev_priv->rps.down_ei, &now,
@@ -4551,7 +4540,7 @@ void intel_irq_init(struct drm_device *dev)
/* Let's track the enabled rps events */
if (IS_VALLEYVIEW(dev))
/* WaGsvRC0ResidenncyMethod:VLV */
- dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
+ dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED;
else
dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 66de6e3e99bf..5571f7714f78 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -566,7 +566,6 @@ enum punit_power_well {
#define VLV_CZ_CLOCK_TO_MILLI_SEC 100000
#define VLV_RP_UP_EI_THRESHOLD 90
#define VLV_RP_DOWN_EI_THRESHOLD 70
-#define VLV_INT_COUNT_FOR_DOWN_EI 5
/* vlv2 north clock has */
#define CCK_FUSE_REG 0x8
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index eb6c54a5b112..f29c643c9926 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3179,11 +3179,10 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
u32 mask = 0;
if (val > dev_priv->rps.min_freq_softlimit)
- mask |= GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
+ mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
if (val < dev_priv->rps.max_freq_softlimit)
- mask |= GEN6_PM_RP_UP_THRESHOLD;
+ mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
- mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED);
mask &= dev_priv->pm_rps_events;
/* IVB and SNB hard hangs on looping batchbuffer
--
2.0.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 6/7] drm/i915: Improve code clarity of vlv_set_rps_idle()
2014-07-10 19:31 [PATCH 1/7] drm/i915: Move RPS evaluation interval counters to i915->rps Chris Wilson
` (3 preceding siblings ...)
2014-07-10 19:31 ` [PATCH 5/7] drm/i915: Use down ei for manual Baytrail RPS calculations Chris Wilson
@ 2014-07-10 19:31 ` Chris Wilson
2014-07-10 19:31 ` [PATCH 7/7] drm/i915: Agressive downclocking on Baytrail Chris Wilson
5 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2014-07-10 19:31 UTC (permalink / raw)
To: intel-gfx
Use a short local variable to pass around the desired idle frequency.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/intel_pm.c | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index f29c643c9926..1b1713e37d16 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3246,10 +3246,11 @@ void gen6_set_rps(struct drm_device *dev, u8 val)
static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
{
struct drm_device *dev = dev_priv->dev;
+ u32 val = dev_priv->rps.min_freq_softlimit;
/* Latest VLV doesn't need to force the gfx clock */
if (dev->pdev->revision >= 0xd) {
- valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
+ valleyview_set_rps(dev_priv->dev, val);
return;
}
@@ -3257,7 +3258,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
* When we are idle. Drop to min voltage state.
*/
- if (dev_priv->rps.cur_freq <= dev_priv->rps.min_freq_softlimit)
+ if (dev_priv->rps.cur_freq <= val)
return;
/* Mask turbo interrupt so that they will not come in between */
@@ -3265,10 +3266,9 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
vlv_force_gfx_clock(dev_priv, true);
- dev_priv->rps.cur_freq = dev_priv->rps.min_freq_softlimit;
+ dev_priv->rps.cur_freq = val;
- vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ,
- dev_priv->rps.min_freq_softlimit);
+ vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS))
& GENFREQSTATUS) == 0, 5))
@@ -3276,8 +3276,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
vlv_force_gfx_clock(dev_priv, false);
- I915_WRITE(GEN6_PMINTRMSK,
- gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
+ I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
}
void gen6_rps_busy(struct drm_i915_private *dev_priv)
--
2.0.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 7/7] drm/i915: Agressive downclocking on Baytrail
2014-07-10 19:31 [PATCH 1/7] drm/i915: Move RPS evaluation interval counters to i915->rps Chris Wilson
` (4 preceding siblings ...)
2014-07-10 19:31 ` [PATCH 6/7] drm/i915: Improve code clarity of vlv_set_rps_idle() Chris Wilson
@ 2014-07-10 19:31 ` Chris Wilson
5 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2014-07-10 19:31 UTC (permalink / raw)
To: intel-gfx; +Cc: Daniel Vetter, Rodrigo Vivi
Reuse the same reclocking strategy for Baytail as on its bigger brethren,
Sandybridge and Ivybridge. In particular, this makes the device quicker
to reclock (both up and down) though the tendency now is to downclock
more aggressively to compensate for the RPS boosts.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Deepak S <deepak.s@linux.intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
drivers/gpu/drm/i915/i915_debugfs.c | 4 ++--
drivers/gpu/drm/i915/i915_drv.h | 3 +++
drivers/gpu/drm/i915/i915_irq.c | 4 ++--
drivers/gpu/drm/i915/i915_reg.h | 2 --
drivers/gpu/drm/i915/intel_pm.c | 11 ++++++++++-
5 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index c1741799d673..0c538bf398e5 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1198,9 +1198,9 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
seq_puts(m, "\n");
seq_printf(m, "RP CONTROL: 0x%08x\n", rpmodectl);
seq_printf(m, "RP UP EI: 0x%08x\n", rpupei);
- seq_printf(m, "RP UP THRESHOLD: 0x%08x\n", rpinclimit);
+ seq_printf(m, "RP UP THRESHOLD: 0x%08x [%d%%]\n", rpinclimit, dev_priv->rps.up_threshold);
seq_printf(m, "RP DOWN EI: 0x%08x\n", rpdownei);
- seq_printf(m, "RP DOWN THRESHOLD: 0x%08x\n", rpdeclimit);
+ seq_printf(m, "RP DOWN THRESHOLD: 0x%08x [%d%%]\n", rpdeclimit, dev_priv->rps.down_threshold);
seq_printf(m, "RP CUR UP EI: %dus\n", rpcurupei &
GEN6_CURICONT_MASK);
seq_printf(m, "RP CUR UP: %dus\n", rpcurup &
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 149015b5cb24..65e440a9a086 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -937,6 +937,9 @@ struct intel_gen6_power_mgmt {
u8 rp1_freq; /* "less than" RP0 power/freqency */
u8 rp0_freq; /* Non-overclocked max frequency. */
+ u8 up_threshold; /* Current %busy required to uplock */
+ u8 down_threshold; /* Current %busy required to downclock */
+
int last_adj;
enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 43bd40cc75a6..041aa0b3d7d8 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1323,7 +1323,7 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
if (!vlv_c0_above(dev_priv,
&dev_priv->rps.down_ei, &now,
- VLV_RP_DOWN_EI_THRESHOLD))
+ dev_priv->rps.down_threshold))
events |= GEN6_PM_RP_DOWN_THRESHOLD;
dev_priv->rps.down_ei = now;
}
@@ -1331,7 +1331,7 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
if (vlv_c0_above(dev_priv,
&dev_priv->rps.up_ei, &now,
- VLV_RP_UP_EI_THRESHOLD))
+ dev_priv->rps.up_threshold))
events |= GEN6_PM_RP_UP_THRESHOLD;
dev_priv->rps.up_ei = now;
}
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 5571f7714f78..dc808b0f6577 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -564,8 +564,6 @@ enum punit_power_well {
#define FB_FMAX_VMIN_FREQ_LO_MASK 0xf8000000
#define VLV_CZ_CLOCK_TO_MILLI_SEC 100000
-#define VLV_RP_UP_EI_THRESHOLD 90
-#define VLV_RP_DOWN_EI_THRESHOLD 70
/* vlv2 north clock has */
#define CCK_FUSE_REG 0x8
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 1b1713e37d16..20296e3ea650 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3117,10 +3117,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
switch (new_power) {
case LOW_POWER:
/* Upclock if more than 95% busy over 16ms */
+ dev_priv->rps.up_threshold = 95;
I915_WRITE(GEN6_RP_UP_EI, 12500);
I915_WRITE(GEN6_RP_UP_THRESHOLD, 11800);
/* Downclock if less than 85% busy over 32ms */
+ dev_priv->rps.down_threshold = 85;
I915_WRITE(GEN6_RP_DOWN_EI, 25000);
I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 21250);
@@ -3135,10 +3137,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
case BETWEEN:
/* Upclock if more than 90% busy over 13ms */
+ dev_priv->rps.up_threshold = 90;
I915_WRITE(GEN6_RP_UP_EI, 10250);
I915_WRITE(GEN6_RP_UP_THRESHOLD, 9225);
/* Downclock if less than 75% busy over 32ms */
+ dev_priv->rps.down_threshold = 75;
I915_WRITE(GEN6_RP_DOWN_EI, 25000);
I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 18750);
@@ -3153,10 +3157,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
case HIGH_POWER:
/* Upclock if more than 85% busy over 10ms */
+ dev_priv->rps.up_threshold = 85;
I915_WRITE(GEN6_RP_UP_EI, 8000);
I915_WRITE(GEN6_RP_UP_THRESHOLD, 6800);
/* Downclock if less than 60% busy over 32ms */
+ dev_priv->rps.down_threshold = 60;
I915_WRITE(GEN6_RP_DOWN_EI, 25000);
I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 15000);
@@ -3274,6 +3280,7 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
& GENFREQSTATUS) == 0, 5))
DRM_ERROR("timed out waiting for Punit\n");
+ gen6_set_rps_thresholds(dev_priv, val);
vlv_force_gfx_clock(dev_priv, false);
I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
@@ -3335,8 +3342,10 @@ void valleyview_set_rps(struct drm_device *dev, u8 val)
dev_priv->rps.cur_freq,
vlv_gpu_freq(dev_priv, val), val);
- if (val != dev_priv->rps.cur_freq)
+ if (val != dev_priv->rps.cur_freq) {
vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
+ gen6_set_rps_thresholds(dev_priv, val);
+ }
I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
--
2.0.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 2/7] drm/i915: Make the RPS interrupt generation mask handle the vlv wa
2014-07-10 19:31 ` [PATCH 2/7] drm/i915: Make the RPS interrupt generation mask handle the vlv wa Chris Wilson
@ 2014-07-10 20:32 ` Daniel Vetter
0 siblings, 0 replies; 10+ messages in thread
From: Daniel Vetter @ 2014-07-10 20:32 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Thu, Jul 10, 2014 at 08:31:19PM +0100, Chris Wilson wrote:
> We can eliminate a lot of special case code by making the computation of
> the interrupt mask be correct for all callers.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/intel_pm.c | 10 +++++-----
> 1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 5c27065bac17..1302e1bc9136 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3183,6 +3183,9 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
> if (val < dev_priv->rps.max_freq_softlimit)
> mask |= GEN6_PM_RP_UP_THRESHOLD;
>
> + mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED);
> + mask &= dev_priv->pm_rps_events;
Might as well move pm_rps_events to dev_priv->rps, too, next to the work
item. Anyway, first 2 patches merged.
-Daniel
> +
> /* IVB and SNB hard hangs on looping batchbuffer
> * if GEN6_PM_UP_EI_EXPIRED is masked.
> */
> @@ -3274,11 +3277,8 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>
> vlv_force_gfx_clock(dev_priv, false);
>
> - if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
> - I915_WRITE(GEN6_PMINTRMSK, ~dev_priv->pm_rps_events);
> - else
> - I915_WRITE(GEN6_PMINTRMSK,
> - gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
> + I915_WRITE(GEN6_PMINTRMSK,
> + gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
> }
>
> void gen6_rps_idle(struct drm_i915_private *dev_priv)
> --
> 2.0.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 4/7] drm/i915: Improved w/a for rps on Baytrail
2014-07-10 19:31 ` [PATCH 4/7] drm/i915: Improved w/a for rps on Baytrail Chris Wilson
@ 2014-09-01 8:23 ` Ville Syrjälä
2014-09-01 8:37 ` Chris Wilson
0 siblings, 1 reply; 10+ messages in thread
From: Ville Syrjälä @ 2014-09-01 8:23 UTC (permalink / raw)
To: Chris Wilson; +Cc: Daniel Vetter, intel-gfx, Rodrigo Vivi
On Thu, Jul 10, 2014 at 08:31:21PM +0100, Chris Wilson wrote:
> Rewrite commit 31685c258e0b0ad6aa486c5ec001382cf8a64212
> Author: Deepak S <deepak.s@linux.intel.com>
> Date: Thu Jul 3 17:33:01 2014 -0400
>
> drm/i915/vlv: WA for Turbo and RC6 to work together.
>
> Other than code clarity, the major improvement is to disable the extra
> interrupts generated when idle. However, the reclocking remains rather
> slow under the new manual regime, in particular it fails to downclock as
> quickly as desired.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Deepak S <deepak.s@linux.intel.com>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> ---
> drivers/gpu/drm/i915/i915_irq.c | 166 ++++++++++++-----------------------
> drivers/gpu/drm/i915/i915_reg.h | 4 +-
> drivers/gpu/drm/i915/intel_display.c | 2 +
> drivers/gpu/drm/i915/intel_drv.h | 2 +
> drivers/gpu/drm/i915/intel_pm.c | 13 +++
> 5 files changed, 73 insertions(+), 114 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 8e19d031c05d..2db5dbb87ced 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
<snip>
> @@ -1433,14 +1376,14 @@ static void gen6_pm_rps_work(struct work_struct *work)
>
> mutex_lock(&dev_priv->rps.hw_lock);
>
> + pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
> +
> adj = dev_priv->rps.last_adj;
> if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
> if (adj > 0)
> adj *= 2;
> - else {
> - /* CHV needs even encode values */
> - adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1;
> - }
> + else
> + adj = 1;
> new_delay = dev_priv->rps.cur_freq + adj;
>
> /*
> @@ -1455,15 +1398,11 @@ static void gen6_pm_rps_work(struct work_struct *work)
> else
> new_delay = dev_priv->rps.min_freq_softlimit;
> adj = 0;
> - } else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> - new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
> } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
> if (adj < 0)
> adj *= 2;
> - else {
> - /* CHV needs even encode values */
> - adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1;
> - }
> + else
> + adj = -1;
> new_delay = dev_priv->rps.cur_freq + adj;
> } else { /* unknown event */
> new_delay = dev_priv->rps.cur_freq;
> @@ -1475,6 +1414,9 @@ static void gen6_pm_rps_work(struct work_struct *work)
> new_delay = clamp_t(int, new_delay,
> dev_priv->rps.min_freq_softlimit,
> dev_priv->rps.max_freq_softlimit);
> + /* CHV needs even encode values */
> + if (IS_CHERRYVIEW(dev_priv))
> + new_delay = new_delay & ~1;
This will effectively make the first up interrupt a nop. The current
code is the way it is precisely to avoid that. I guess it's not a huge
problem but still seems silly to not satisfy the GPU when it wants moar
speed.
--
Ville Syrjälä
Intel OTC
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 4/7] drm/i915: Improved w/a for rps on Baytrail
2014-09-01 8:23 ` Ville Syrjälä
@ 2014-09-01 8:37 ` Chris Wilson
0 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2014-09-01 8:37 UTC (permalink / raw)
To: Ville Syrjälä; +Cc: Daniel Vetter, intel-gfx, Rodrigo Vivi
On Mon, Sep 01, 2014 at 11:23:20AM +0300, Ville Syrjälä wrote:
> On Thu, Jul 10, 2014 at 08:31:21PM +0100, Chris Wilson wrote:
> > Rewrite commit 31685c258e0b0ad6aa486c5ec001382cf8a64212
> > Author: Deepak S <deepak.s@linux.intel.com>
> > Date: Thu Jul 3 17:33:01 2014 -0400
> >
> > drm/i915/vlv: WA for Turbo and RC6 to work together.
> >
> > Other than code clarity, the major improvement is to disable the extra
> > interrupts generated when idle. However, the reclocking remains rather
> > slow under the new manual regime, in particular it fails to downclock as
> > quickly as desired.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Deepak S <deepak.s@linux.intel.com>
> > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > ---
> > drivers/gpu/drm/i915/i915_irq.c | 166 ++++++++++++-----------------------
> > drivers/gpu/drm/i915/i915_reg.h | 4 +-
> > drivers/gpu/drm/i915/intel_display.c | 2 +
> > drivers/gpu/drm/i915/intel_drv.h | 2 +
> > drivers/gpu/drm/i915/intel_pm.c | 13 +++
> > 5 files changed, 73 insertions(+), 114 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> > index 8e19d031c05d..2db5dbb87ced 100644
> > --- a/drivers/gpu/drm/i915/i915_irq.c
> > +++ b/drivers/gpu/drm/i915/i915_irq.c
> <snip>
> > @@ -1433,14 +1376,14 @@ static void gen6_pm_rps_work(struct work_struct *work)
> >
> > mutex_lock(&dev_priv->rps.hw_lock);
> >
> > + pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
> > +
> > adj = dev_priv->rps.last_adj;
> > if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
> > if (adj > 0)
> > adj *= 2;
> > - else {
> > - /* CHV needs even encode values */
> > - adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1;
> > - }
> > + else
> > + adj = 1;
> > new_delay = dev_priv->rps.cur_freq + adj;
> >
> > /*
> > @@ -1455,15 +1398,11 @@ static void gen6_pm_rps_work(struct work_struct *work)
> > else
> > new_delay = dev_priv->rps.min_freq_softlimit;
> > adj = 0;
> > - } else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> > - new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
> > } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
> > if (adj < 0)
> > adj *= 2;
> > - else {
> > - /* CHV needs even encode values */
> > - adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1;
> > - }
> > + else
> > + adj = -1;
> > new_delay = dev_priv->rps.cur_freq + adj;
> > } else { /* unknown event */
> > new_delay = dev_priv->rps.cur_freq;
> > @@ -1475,6 +1414,9 @@ static void gen6_pm_rps_work(struct work_struct *work)
> > new_delay = clamp_t(int, new_delay,
> > dev_priv->rps.min_freq_softlimit,
> > dev_priv->rps.max_freq_softlimit);
> > + /* CHV needs even encode values */
> > + if (IS_CHERRYVIEW(dev_priv))
> > + new_delay = new_delay & ~1;
>
> This will effectively make the first up interrupt a nop. The current
> code is the way it is precisely to avoid that. I guess it's not a huge
> problem but still seems silly to not satisfy the GPU when it wants moar
> speed.
Hmm, it's actually worse than that (see the last_adj). Ok, I should
split this out and tidy it up more carefully.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2014-09-01 8:37 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-07-10 19:31 [PATCH 1/7] drm/i915: Move RPS evaluation interval counters to i915->rps Chris Wilson
2014-07-10 19:31 ` [PATCH 2/7] drm/i915: Make the RPS interrupt generation mask handle the vlv wa Chris Wilson
2014-07-10 20:32 ` Daniel Vetter
2014-07-10 19:31 ` [PATCH 3/7] drm/i915: Include the RPS evalutation metrics in debugfs for Baytrail Chris Wilson
2014-07-10 19:31 ` [PATCH 4/7] drm/i915: Improved w/a for rps on Baytrail Chris Wilson
2014-09-01 8:23 ` Ville Syrjälä
2014-09-01 8:37 ` Chris Wilson
2014-07-10 19:31 ` [PATCH 5/7] drm/i915: Use down ei for manual Baytrail RPS calculations Chris Wilson
2014-07-10 19:31 ` [PATCH 6/7] drm/i915: Improve code clarity of vlv_set_rps_idle() Chris Wilson
2014-07-10 19:31 ` [PATCH 7/7] drm/i915: Agressive downclocking on Baytrail Chris Wilson
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.