From: Peter Zijlstra <peterz@infradead.org>
To: Yuyang Du <yuyang.du@intel.com>
Cc: mingo@kernel.org, linux-kernel@vger.kernel.org, pjt@google.com,
bsegall@google.com, morten.rasmussen@arm.com,
vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
matt@codeblueprint.co.uk, umgwanakikbuti@gmail.com
Subject: Re: [RESEND PATCH 2/2] sched/fair: Optimize __update_sched_avg()
Date: Tue, 28 Mar 2017 16:46:25 +0200 [thread overview]
Message-ID: <20170328144625.GX3093@worktop> (raw)
In-Reply-To: <1486935863-25251-3-git-send-email-yuyang.du@intel.com>
On Mon, Feb 13, 2017 at 05:44:23AM +0800, Yuyang Du wrote:
> __update_load_avg() has the following steps:
>
> 1. add the remainder of the last incomplete period
> 2. decay old sum
> 3. accumulate new sum in full periods since last_update_time
> 4. accumulate the current incomplete period
> 5. update averages
>
> However, there is no need to separately compute steps 1, 3, and 4.
>
> Illustation:
>
> c1 c3 c4
> ^ ^ ^
> | | |
> |<->|<----------------->|<--->|
> ... |---x---|------| ... |------|-----x (now)
>
> c1, c3, and c4 are the accumulated (meanwhile decayed) contributions
> in timing in steps 1, 3, and 4 respectively.
>
> With them, the accumulated contribution to load_sum, for example, is:
>
> contrib = c1 * weight * freq_scaled;
> contrib += c3 * weight * freq_scaled;
> contrib += c4 * weight * freq_scaled;
>
> Obviously, we can optimize the above and they equate to:
>
> contrib = c1 + c3 + c4;
> contrib *= weight * freq_scaled;
>
So I figured out what it is you're doing, how's this? I still need to
rewrite the Changelog to make this cleared, but I think the code now has
understandable comments.
---
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2767,7 +2767,7 @@ static const u32 __accumulated_sum_N32[]
* Approximate:
* val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
*/
-static __always_inline u64 decay_load(u64 val, u64 n)
+static u64 decay_load(u64 val, u64 n)
{
unsigned int local_n;
@@ -2795,32 +2795,111 @@ static __always_inline u64 decay_load(u6
return val;
}
-/*
- * For updates fully spanning n periods, the contribution to runnable
- * average will be: \Sum 1024*y^n
- *
- * We can compute this reasonably efficiently by combining:
- * y^PERIOD = 1/2 with precomputed \Sum 1024*y^n {for n <PERIOD}
- */
-static u32 __compute_runnable_contrib(u64 n)
+static u32 __accumulate_sum(u64 periods, u32 period_contrib, u32 remainder)
{
- u32 contrib = 0;
+ u32 contrib;
+
+ if (!periods)
+ return remainder - period_contrib;
- if (likely(n <= LOAD_AVG_PERIOD))
- return runnable_avg_yN_sum[n];
- else if (unlikely(n >= LOAD_AVG_MAX_N))
+ if (unlikely(periods >= LOAD_AVG_MAX_N))
return LOAD_AVG_MAX;
- /* Since n < LOAD_AVG_MAX_N, n/LOAD_AVG_PERIOD < 11 */
- contrib = __accumulated_sum_N32[n/LOAD_AVG_PERIOD];
- n %= LOAD_AVG_PERIOD;
- contrib = decay_load(contrib, n);
- return contrib + runnable_avg_yN_sum[n];
+ /*
+ * c1 y^(p+1) + c3 y^0
+ */
+ remainder += decay_load((u64)(1024 - period_contrib), periods);
+
+ periods -= 1;
+ /*
+ * For updates fully spanning n periods, the contribution to runnable
+ * average will be: 1024 \Sum y^n
+ *
+ * We can compute this reasonably efficiently by combining:
+ *
+ * y^PERIOD = 1/2 with precomputed 1024 \Sum y^n {for: n < PERIOD}
+ */
+ if (likely(periods <= LOAD_AVG_PERIOD)) {
+ contrib = runnable_avg_yN_sum[periods];
+ } else {
+ contrib = __accumulated_sum_N32[periods/LOAD_AVG_PERIOD];
+ periods %= LOAD_AVG_PERIOD;
+ contrib = decay_load(contrib, periods);
+ contrib += runnable_avg_yN_sum[periods];
+ }
+
+ return contrib + remainder;
}
#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
/*
+ * Accumulate the three separate parts of the sum; c1 the remainder
+ * of the last (incomplete) period, c2 the span of full periods and c3
+ * the remainder of the (incomplete) current period.
+ *
+ * c1 c2 c3
+ * ^ ^ ^
+ * | | |
+ * |<->|<----------------->|<--->|
+ * ... |---x---|------| ... |------|-----x (now)
+ *
+ * p
+ * u' = (u + c1) y^(p+1) + 1024 \Sum y^n + c3 y^0
+ * n=1
+ *
+ * = u y^(p+1) + (Step 1)
+ *
+ * p
+ * c1 y^(p+1) + 1024 \Sum y^n + c3 y^0 (Step 2)
+ * n=1
+ */
+static __always_inline u32
+accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
+ unsigned long weight, int running, struct cfs_rq *cfs_rq)
+{
+ unsigned long scale_freq, scale_cpu;
+ u64 periods;
+ u32 contrib;
+
+ scale_freq = arch_scale_freq_capacity(NULL, cpu);
+ scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
+
+ delta += sa->period_contrib;
+ periods = delta / 1024; /* A period is 1024us (~1ms) */
+
+ /*
+ * Step 1: decay old *_sum if we crossed period boundaries.
+ */
+ if (periods) {
+ sa->load_sum = decay_load(sa->load_sum, periods);
+ if (cfs_rq) {
+ cfs_rq->runnable_load_sum =
+ decay_load(cfs_rq->runnable_load_sum, periods);
+ }
+ sa->util_sum = decay_load((u64)(sa->util_sum), periods);
+ }
+
+ /*
+ * Step 2
+ */
+ delta %= 1024;
+ contrib = __accumulate_sum(periods, sa->period_contrib, delta);
+ sa->period_contrib = delta;
+
+ contrib = cap_scale(contrib, scale_freq);
+ if (weight) {
+ sa->load_sum += weight * contrib;
+ if (cfs_rq)
+ cfs_rq->runnable_load_sum += weight * contrib;
+ }
+ if (running)
+ sa->util_sum += contrib * scale_cpu;
+
+ return periods;
+}
+
+/*
* We can represent the historical contribution to runnable average as the
* coefficients of a geometric series. To do this we sub-divide our runnable
* history into segments of approximately 1ms (1024us); label the segment that
@@ -2852,10 +2931,7 @@ static __always_inline int
___update_load_avg(u64 now, int cpu, struct sched_avg *sa,
unsigned long weight, int running, struct cfs_rq *cfs_rq)
{
- u64 delta, scaled_delta, periods;
- u32 contrib;
- unsigned int delta_w, scaled_delta_w, decayed = 0;
- unsigned long scale_freq, scale_cpu;
+ u64 delta;
delta = now - sa->last_update_time;
/*
@@ -2876,81 +2952,27 @@ ___update_load_avg(u64 now, int cpu, str
return 0;
sa->last_update_time = now;
- scale_freq = arch_scale_freq_capacity(NULL, cpu);
- scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
-
- /* delta_w is the amount already accumulated against our next period */
- delta_w = sa->period_contrib;
- if (delta + delta_w >= 1024) {
- decayed = 1;
-
- /* how much left for next period will start over, we don't know yet */
- sa->period_contrib = 0;
-
- /*
- * Now that we know we're crossing a period boundary, figure
- * out how much from delta we need to complete the current
- * period and accrue it.
- */
- delta_w = 1024 - delta_w;
- scaled_delta_w = cap_scale(delta_w, scale_freq);
- if (weight) {
- sa->load_sum += weight * scaled_delta_w;
- if (cfs_rq) {
- cfs_rq->runnable_load_sum +=
- weight * scaled_delta_w;
- }
- }
- if (running)
- sa->util_sum += scaled_delta_w * scale_cpu;
-
- delta -= delta_w;
-
- /* Figure out how many additional periods this update spans */
- periods = delta / 1024;
- delta %= 1024;
-
- sa->load_sum = decay_load(sa->load_sum, periods + 1);
- if (cfs_rq) {
- cfs_rq->runnable_load_sum =
- decay_load(cfs_rq->runnable_load_sum, periods + 1);
- }
- sa->util_sum = decay_load((u64)(sa->util_sum), periods + 1);
-
- /* Efficiently calculate \sum (1..n_period) 1024*y^i */
- contrib = __compute_runnable_contrib(periods);
- contrib = cap_scale(contrib, scale_freq);
- if (weight) {
- sa->load_sum += weight * contrib;
- if (cfs_rq)
- cfs_rq->runnable_load_sum += weight * contrib;
- }
- if (running)
- sa->util_sum += contrib * scale_cpu;
- }
-
- /* Remainder of delta accrued against u_0` */
- scaled_delta = cap_scale(delta, scale_freq);
- if (weight) {
- sa->load_sum += weight * scaled_delta;
- if (cfs_rq)
- cfs_rq->runnable_load_sum += weight * scaled_delta;
- }
- if (running)
- sa->util_sum += scaled_delta * scale_cpu;
-
- sa->period_contrib += delta;
+ /*
+ * Now we know we crossed measurement unit boundaries. The *_avg
+ * accrues by two steps:
+ *
+ * Step 1: accumulate *_sum since last_update_time. If we haven't
+ * crossed period boundaries, finish.
+ */
+ if (!accumulate_sum(delta, cpu, sa, weight, running, cfs_rq))
+ return 0;
- if (decayed) {
- sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX);
- if (cfs_rq) {
- cfs_rq->runnable_load_avg =
- div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX);
- }
- sa->util_avg = sa->util_sum / LOAD_AVG_MAX;
+ /*
+ * Step 2: update *_avg.
+ */
+ sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX);
+ if (cfs_rq) {
+ cfs_rq->runnable_load_avg =
+ div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX);
}
+ sa->util_avg = sa->util_sum / LOAD_AVG_MAX;
- return decayed;
+ return 1;
}
static int
next prev parent reply other threads:[~2017-03-28 14:46 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-02-12 21:44 [RESEND PATCH 0/2] sched/fair: Add documentation and optimize __update_sched_avg() Yuyang Du
2017-02-12 21:44 ` [RESEND PATCH 1/2] documentation: Add scheduler/sched-avg.txt Yuyang Du
2017-04-14 9:28 ` [tip:sched/core] sched/Documentation: Add 'sched-pelt' tool tip-bot for Yuyang Du
2017-02-12 21:44 ` [RESEND PATCH 2/2] sched/fair: Optimize __update_sched_avg() Yuyang Du
2017-03-28 12:50 ` Peter Zijlstra
2017-03-28 23:57 ` Yuyang Du
2017-03-28 14:46 ` Peter Zijlstra [this message]
2017-03-29 0:04 ` Yuyang Du
2017-03-29 10:41 ` Peter Zijlstra
2017-03-29 18:41 ` Yuyang Du
2017-03-30 11:21 ` Paul Turner
2017-03-30 12:16 ` Peter Zijlstra
2017-03-30 13:46 ` Peter Zijlstra
2017-03-30 18:50 ` Yuyang Du
2017-03-30 14:14 ` Peter Zijlstra
2017-03-30 19:13 ` Yuyang Du
2017-03-30 19:41 ` Yuyang Du
2017-03-31 7:13 ` Peter Zijlstra
2017-03-30 22:02 ` Paul Turner
2017-03-31 7:01 ` Peter Zijlstra
2017-03-31 9:58 ` Paul Turner
2017-03-31 11:23 ` Peter Zijlstra
2017-04-10 7:39 ` Dietmar Eggemann
2017-04-10 8:40 ` Peter Zijlstra
2017-03-31 11:30 ` Peter Zijlstra
2017-03-31 10:55 ` Paul Turner
2017-03-31 11:38 ` Peter Zijlstra
2017-04-10 10:47 ` Peter Zijlstra
2017-03-30 18:39 ` Yuyang Du
2017-03-30 8:32 ` [tip:sched/core] sched/fair: Optimize ___update_sched_avg() tip-bot for Yuyang Du
2017-02-28 0:59 ` [RESEND PATCH 0/2] sched/fair: Add documentation and optimize __update_sched_avg() Yuyang Du
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170328144625.GX3093@worktop \
--to=peterz@infradead.org \
--cc=bsegall@google.com \
--cc=dietmar.eggemann@arm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=matt@codeblueprint.co.uk \
--cc=mingo@kernel.org \
--cc=morten.rasmussen@arm.com \
--cc=pjt@google.com \
--cc=umgwanakikbuti@gmail.com \
--cc=vincent.guittot@linaro.org \
--cc=yuyang.du@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox