From: Peter Zijlstra <peterz@infradead.org>
To: tmhikaru@gmail.com
Cc: Damien Wyart <damien.wyart@free.fr>,
Venkatesh Pallipadi <venki@google.com>,
Chase Douglas <chase.douglas@canonical.com>,
Ingo Molnar <mingo@elte.hu>, Thomas Gleixner <tglx@linutronix.de>,
linux-kernel@vger.kernel.org, Kyle McMartin <kyle@mcmartin.ca>
Subject: Re: High CPU load when machine is idle (related to PROBLEM: Unusually high load average when idle in 2.6.35, 2.6.35.1 and later)
Date: Tue, 30 Nov 2010 15:59:05 +0100 [thread overview]
Message-ID: <1291129145.32004.874.camel@laptop> (raw)
In-Reply-To: <1291071677.32004.527.camel@laptop>
On Tue, 2010-11-30 at 00:01 +0100, Peter Zijlstra wrote:
>
> Ok, that's good testing.. so its still not quite the same as NO_HZ=n,
> how about this one?
>
> (it seems to drop down to 0.00 if I wait a few minutes with top -d5)
OK, so here's a less crufty patch that gets the same result on my
machine, load drops down to 0.00 after a while.
It seems a bit slower to reach 0.00, but that could be because I
actually changed the load computation for NO_HZ=n as well, I added a
rounding factor in calc_load(), we no longer truncate the division.
If people want to compare, simply remove the third line from
calc_load(): load += 1UL << (FSHIFT - 1), to restore the old behaviour.
---
include/linux/sched.h | 2 +-
kernel/sched.c | 127 ++++++++++++++++++++++++++++++++++++++++++++----
kernel/timer.c | 2 +-
3 files changed, 118 insertions(+), 13 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index de4d68f..a6e0c62 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -143,7 +143,7 @@ extern unsigned long nr_iowait_cpu(int cpu);
extern unsigned long this_cpu_load(void);
-extern void calc_global_load(void);
+extern void calc_global_load(unsigned long ticks);
extern unsigned long get_parent_ip(unsigned long addr);
diff --git a/kernel/sched.c b/kernel/sched.c
index 864040c..7868a18 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2978,6 +2978,15 @@ static long calc_load_fold_active(struct rq *this_rq)
return delta;
}
+static unsigned long
+calc_load(unsigned long load, unsigned long exp, unsigned long active)
+{
+ load *= exp;
+ load += active * (FIXED_1 - exp);
+ load += 1UL << (FSHIFT - 1);
+ return load >> FSHIFT;
+}
+
#ifdef CONFIG_NO_HZ
/*
* For NO_HZ we delay the active fold to the next LOAD_FREQ update.
@@ -3007,6 +3016,105 @@ static long calc_load_fold_idle(void)
return delta;
}
+
+/**
+ * fixed_power_int - compute: x^n, in O(log n) time
+ *
+ * @x: base of the power
+ * @frac_bits: fractional bits of @x
+ * @n: power to raise @x to.
+ *
+ * By exploiting the relation between the definition of the natural power
+ * function: x^n := x*x*...*x (x multiplied by itself for n times), and
+ * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
+ * (where: n_i \elem {0, 1}, the binary vector representing n),
+ * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
+ * of course trivially computable in O(log_2 n), the length of our binary
+ * vector.
+ */
+static unsigned long
+fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
+{
+ unsigned long result = 1UL << frac_bits;
+
+ if (n) for (;;) {
+ if (n & 1) {
+ result *= x;
+ result += 1UL << (frac_bits - 1);
+ result >>= frac_bits;
+ }
+ n >>= 1;
+ if (!n)
+ break;
+ x *= x;
+ x += 1UL << (frac_bits - 1);
+ x >>= frac_bits;
+ }
+
+ return result;
+}
+
+/*
+ * a1 = a0 * e + a * (1 - e)
+ *
+ * a2 = a1 * e + a * (1 - e)
+ * = (a0 * e + a * (1 - e)) * e + a * (1 - e)
+ * = a0 * e^2 + a * (1 - e) * (1 + e)
+ *
+ * a3 = a2 * e + a * (1 - e)
+ * = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
+ * = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
+ *
+ * ...
+ *
+ * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
+ * = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
+ * = a0 * e^n + a * (1 - e^n)
+ *
+ * [1] application of the geometric series:
+ *
+ * n 1 - x^(n+1)
+ * S_n := \Sum x^i = -------------
+ * i=0 1 - x
+ */
+static unsigned long
+calc_load_n(unsigned long load, unsigned long exp,
+ unsigned long active, unsigned int n)
+{
+
+ return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
+}
+
+static void calc_global_nohz(unsigned long ticks)
+{
+ long delta, active, n;
+
+ if (time_before(jiffies, calc_load_update))
+ return;
+
+ /*
+ * If we crossed a calc_load_update boundary, make sure to fold
+ * any pending idle changes, the respective CPUs might have
+ * missed the tick driven calc_load_account_active() update
+ * due to NO_HZ
+ */
+ delta = calc_load_fold_idle();
+ if (delta)
+ atomic_long_add(delta, &calc_load_tasks);
+
+ if (ticks >= LOAD_FREQ) {
+ n = ticks / LOAD_FREQ;
+
+ active = atomic_long_read(&calc_load_tasks);
+ active = active > 0 ? active * FIXED_1 : 0;
+
+ avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
+ avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
+ avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
+
+ calc_load_update += n * LOAD_FREQ;
+ }
+}
#else
static void calc_load_account_idle(struct rq *this_rq)
{
@@ -3016,6 +3124,10 @@ static inline long calc_load_fold_idle(void)
{
return 0;
}
+
+static void calc_global_nohz(unsigned long ticks)
+{
+}
#endif
/**
@@ -3033,24 +3145,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
loads[2] = (avenrun[2] + offset) << shift;
}
-static unsigned long
-calc_load(unsigned long load, unsigned long exp, unsigned long active)
-{
- load *= exp;
- load += active * (FIXED_1 - exp);
- return load >> FSHIFT;
-}
-
/*
* calc_load - update the avenrun load estimates 10 ticks after the
* CPUs have updated calc_load_tasks.
*/
-void calc_global_load(void)
+void calc_global_load(unsigned long ticks)
{
- unsigned long upd = calc_load_update + 10;
long active;
- if (time_before(jiffies, upd))
+ calc_global_nohz(ticks);
+
+ if (time_before(jiffies, calc_load_update + 10))
return;
active = atomic_long_read(&calc_load_tasks);
diff --git a/kernel/timer.c b/kernel/timer.c
index d6ccb90..9f82b2a 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1297,7 +1297,7 @@ void do_timer(unsigned long ticks)
{
jiffies_64 += ticks;
update_wall_time();
- calc_global_load();
+ calc_global_load(ticks);
}
#ifdef __ARCH_WANT_SYS_ALARM
next prev parent reply other threads:[~2010-11-30 14:58 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-09-29 7:01 High CPU load when machine is idle Damien Wyart
2010-10-14 14:58 ` High CPU load when machine is idle (related to PROBLEM: Unusually high load average when idle in 2.6.35, 2.6.35.1 and later) Damien Wyart
2010-10-14 15:29 ` Chase Douglas
2010-10-14 15:56 ` Damien Wyart
2010-10-15 11:08 ` Peter Zijlstra
2010-10-18 12:32 ` Peter Zijlstra
2010-10-20 13:27 ` Damien Wyart
2010-10-20 13:30 ` Peter Zijlstra
2010-10-20 13:43 ` Peter Zijlstra
2010-10-20 14:14 ` Peter Zijlstra
2010-10-20 14:25 ` Peter Zijlstra
2010-10-20 17:26 ` Peter Zijlstra
2010-10-20 20:24 ` Damien Wyart
2010-10-21 1:48 ` tmhikaru
2010-10-21 1:53 ` tmhikaru
2010-10-21 8:22 ` Ingo Molnar
2010-10-21 8:57 ` tmhikaru
2010-10-21 18:36 ` tmhikaru
2010-10-22 1:37 ` tmhikaru
2010-10-21 12:09 ` Peter Zijlstra
2010-10-21 17:18 ` Venkatesh Pallipadi
2010-10-22 21:03 ` Venkatesh Pallipadi
2010-10-22 23:03 ` High CPU load when machine is idle (related to PROBLEM: Unusually high load average when idle in 2.6.35, 2.6.35.1 and later) Venkatesh Pallipadi
2010-10-23 2:13 ` tmhikaru
2010-10-25 10:12 ` Peter Zijlstra
2010-10-25 16:29 ` Venkatesh Pallipadi
2010-10-26 12:44 ` Peter Zijlstra
2010-10-26 14:05 ` Peter Zijlstra
2010-10-29 19:42 ` Peter Zijlstra
2010-11-09 18:55 ` Kyle McMartin
2010-11-09 19:02 ` Peter Zijlstra
2010-11-10 2:37 ` tmhikaru
2010-11-10 12:01 ` Peter Zijlstra
2010-11-10 3:45 ` Kyle McMartin
2010-11-10 12:00 ` Peter Zijlstra
2010-11-14 5:14 ` tmhikaru
2010-11-25 13:31 ` Damien Wyart
2010-11-25 14:03 ` Peter Zijlstra
2010-11-27 20:15 ` Peter Zijlstra
2010-11-28 4:26 ` Kyle McMartin
2010-11-28 11:40 ` Damien Wyart
2010-11-28 18:07 ` Valdis.Kletnieks
2010-11-29 11:38 ` Peter Zijlstra
2010-11-29 19:40 ` tmhikaru
2010-11-29 23:01 ` Peter Zijlstra
2010-11-30 14:59 ` Peter Zijlstra [this message]
2010-11-30 15:39 ` Kyle McMartin
2010-11-30 20:04 ` Kyle McMartin
2010-11-30 16:53 ` Damien Wyart
2010-11-30 17:29 ` Peter Zijlstra
2010-12-01 21:27 ` tmhikaru
2010-12-02 10:16 ` tmhikaru
2010-12-08 20:40 ` [tip:sched/urgent] sched: Cure more NO_HZ load average woes tip-bot for Peter Zijlstra
2010-11-30 20:01 ` High CPU load when machine is idle (related to PROBLEM: Unusually high load average when idle in 2.6.35, 2.6.35.1 and later) tmhikaru
2010-11-30 16:49 ` Damien Wyart
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1291129145.32004.874.camel@laptop \
--to=peterz@infradead.org \
--cc=chase.douglas@canonical.com \
--cc=damien.wyart@free.fr \
--cc=kyle@mcmartin.ca \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=tglx@linutronix.de \
--cc=tmhikaru@gmail.com \
--cc=venki@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox