linux-pm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v1 1/2] intel_pstate: Use the cpu load to determine the PercentPerformance
@ 2015-11-03  9:27 Philippe Longepe
  2015-11-03  9:27 ` [PATCH v1 2/2] intel_pstate: Change the setpoint for the cores Philippe Longepe
  2015-11-07  1:09 ` [PATCH v1 1/2] intel_pstate: Use the cpu load to determine the PercentPerformance Rafael J. Wysocki
  0 siblings, 2 replies; 10+ messages in thread
From: Philippe Longepe @ 2015-11-03  9:27 UTC (permalink / raw)
  To: linux-pm; +Cc: srinivas.pandruvada, Stephane Gasparini

Aperf and Mperf counter are not enough to determine the Target P-state
because they measure performance only when the targeted processor is
in the C0 state (active state).
Because of that, we were computing the average P-state during the last
period which can be very different from the average frequency
(or percentage of performance).

As defined in the SDM (section 14.2), the PercentPerformance is defined by:

PercentPerformance = PercentBusy * (delta_aperf / delta_mperf);

The PercentBusy (or load) can be estimated as the ratio of the mperf
counter running at a constant frequency only during active periods (C0)
and the time stamp counter running at the same frequency but also
during idle.

So, PercentBusy = 100 * (delta_mperf / delta_tsc)

and, PercentPerformance = 100 * (delta_mperf / delta_tsc) *
				(delta_aperf / delta_mperf)
That can be simplified with:
PercentPerformance = 100 * (delta_aperf / delta_tsc)

Signed-off-by: Philippe Longepe <philippe.longepe@linux.intel.com>
Signed-off-by: Stephane Gasparini <stephane.gasparini@linux.intel.com>
---
 drivers/cpufreq/intel_pstate.c | 87 +++++++++++++++---------------------------
 1 file changed, 31 insertions(+), 56 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 93a3c63..421903f 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -69,7 +69,7 @@ static inline int ceiling_fp(int32_t x)
 }
 
 struct sample {
-	int32_t core_pct_busy;
+	int32_t cpu_load;
 	u64 aperf;
 	u64 mperf;
 	u64 tsc;
@@ -993,21 +993,39 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
 	intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
 }
 
-static inline void intel_pstate_calc_busy(struct cpudata *cpu)
+static inline int32_t intel_pstate_calc_busy(struct cpudata *cpu)
 {
 	struct sample *sample = &cpu->sample;
-	int64_t core_pct;
+	struct pstate_data *pstate = &cpu->pstate;
+	int64_t core_busy_ratio;
 
-	core_pct = int_tofp(sample->aperf) * int_tofp(100);
-	core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
+	/*
+	 * The load can be estimated as the ratio of the mperf counter
+	 * running at a constant frequency only during active periods
+	 * (C0) and the time stamp counter running at the same frequency
+	 * also during C-states.
+	 */
+	sample->cpu_load = div64_u64(100 * sample->mperf, sample->tsc);
+
+	/*
+	 * The target P-state can be estimated with the following formula:
+	 * PercentPerformance = PercentBusy * (delta_aperf/delta_mperf);
+	 * (see Section 14.2 from Intel Software Developer Manual)
+	 * with PercentBusy = 100 * (delta_mperf / delta_tsc) and
+	 * PercentPerformance can be simplified with:
+	 * (delta_mperf * delta_aperf) / (delta_tsc * delta_mperf) =
+	 * delta_aperf / delta_tsc. Finally, we normalize core_busy_ratio,
+	 * which was our actual percent performance to what we requested
+	 * during the last sample period. The result will be a percentage of
+	 * busy at a specified pstate.
+	 */
+	core_busy_ratio = div64_u64(int_tofp(100) * sample->aperf *
+		pstate->max_pstate, sample->tsc * pstate->current_pstate);
 
-	sample->freq = fp_toint(
-		mul_fp(int_tofp(
-			cpu->pstate.max_pstate_physical *
-			cpu->pstate.scaling / 100),
-			core_pct));
+	sample->freq = div64_u64(sample->aperf * pstate->max_pstate *
+		pstate->scaling, sample->mperf);
 
-	sample->core_pct_busy = (int32_t)core_pct;
+	return core_busy_ratio;
 }
 
 static inline void intel_pstate_sample(struct cpudata *cpu)
@@ -1036,8 +1054,6 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
 	cpu->sample.mperf -= cpu->prev_mperf;
 	cpu->sample.tsc -= cpu->prev_tsc;
 
-	intel_pstate_calc_busy(cpu);
-
 	cpu->prev_aperf = aperf;
 	cpu->prev_mperf = mperf;
 	cpu->prev_tsc = tsc;
@@ -1059,47 +1075,6 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
 	mod_timer_pinned(&cpu->timer, jiffies + delay);
 }
 
-static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
-{
-	int32_t core_busy, max_pstate, current_pstate, sample_ratio;
-	s64 duration_us;
-	u32 sample_time;
-
-	/*
-	 * core_busy is the ratio of actual performance to max
-	 * max_pstate is the max non turbo pstate available
-	 * current_pstate was the pstate that was requested during
-	 * 	the last sample period.
-	 *
-	 * We normalize core_busy, which was our actual percent
-	 * performance to what we requested during the last sample
-	 * period. The result will be a percentage of busy at a
-	 * specified pstate.
-	 */
-	core_busy = cpu->sample.core_pct_busy;
-	max_pstate = int_tofp(cpu->pstate.max_pstate_physical);
-	current_pstate = int_tofp(cpu->pstate.current_pstate);
-	core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
-
-	/*
-	 * Since we have a deferred timer, it will not fire unless
-	 * we are in C0.  So, determine if the actual elapsed time
-	 * is significantly greater (3x) than our sample interval.  If it
-	 * is, then we were idle for a long enough period of time
-	 * to adjust our busyness.
-	 */
-	sample_time = pid_params.sample_rate_ms  * USEC_PER_MSEC;
-	duration_us = ktime_us_delta(cpu->sample.time,
-				     cpu->last_sample_time);
-	if (duration_us > sample_time * 3) {
-		sample_ratio = div_fp(int_tofp(sample_time),
-				      int_tofp(duration_us));
-		core_busy = mul_fp(core_busy, sample_ratio);
-	}
-
-	return core_busy;
-}
-
 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 {
 	int32_t busy_scaled;
@@ -1111,7 +1086,7 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 	from = cpu->pstate.current_pstate;
 
 	pid = &cpu->pid;
-	busy_scaled = intel_pstate_get_scaled_busy(cpu);
+	busy_scaled = intel_pstate_calc_busy(cpu);
 
 	ctl = pid_calc(pid, busy_scaled);
 
@@ -1119,7 +1094,7 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 	intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl, true);
 
 	sample = &cpu->sample;
-	trace_pstate_sample(fp_toint(sample->core_pct_busy),
+	trace_pstate_sample(fp_toint(busy_scaled),
 		fp_toint(busy_scaled),
 		from,
 		cpu->pstate.current_pstate,
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2015-11-24  1:45 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-11-03  9:27 [PATCH v1 1/2] intel_pstate: Use the cpu load to determine the PercentPerformance Philippe Longepe
2015-11-03  9:27 ` [PATCH v1 2/2] intel_pstate: Change the setpoint for the cores Philippe Longepe
2015-11-21 16:22   ` Doug Smythies
2015-11-23 13:45     ` Philippe Longepe
2015-11-07  1:09 ` [PATCH v1 1/2] intel_pstate: Use the cpu load to determine the PercentPerformance Rafael J. Wysocki
2015-11-07  1:14   ` Srinivas Pandruvada
2015-11-21 16:21     ` Doug Smythies
2015-11-23 13:28       ` plongepe
2015-11-24  1:33         ` Doug Smythies
2015-11-24  1:44           ` Srinivas Pandruvada

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).