From: Peter Zijlstra <peterz@infradead.org>
To: tmhikaru@gmail.com
Cc: Damien Wyart <damien.wyart@free.fr>,
Venkatesh Pallipadi <venki@google.com>,
Chase Douglas <chase.douglas@canonical.com>,
Ingo Molnar <mingo@elte.hu>, Thomas Gleixner <tglx@linutronix.de>,
linux-kernel@vger.kernel.org, Kyle McMartin <kyle@mcmartin.ca>
Subject: Re: High CPU load when machine is idle (related to PROBLEM: Unusually high load average when idle in 2.6.35, 2.6.35.1 and later)
Date: Tue, 30 Nov 2010 15:59:05 +0100 [thread overview]
Message-ID: <1291129145.32004.874.camel@laptop> (raw)
In-Reply-To: <1291071677.32004.527.camel@laptop>
On Tue, 2010-11-30 at 00:01 +0100, Peter Zijlstra wrote:
>
> Ok, that's good testing.. so its still not quite the same as NO_HZ=n,
> how about this one?
>
> (it seems to drop down to 0.00 if I wait a few minutes with top -d5)
OK, so here's a less crufty patch that gets the same result on my
machine, load drops down to 0.00 after a while.
It seems a bit slower to reach 0.00, but that could be because I
actually changed the load computation for NO_HZ=n as well, I added a
rounding factor in calc_load(), we no longer truncate the division.
If people want to compare, simply remove the third line from
calc_load(): load += 1UL << (FSHIFT - 1), to restore the old behaviour.
---
include/linux/sched.h | 2 +-
kernel/sched.c | 127 ++++++++++++++++++++++++++++++++++++++++++++----
kernel/timer.c | 2 +-
3 files changed, 118 insertions(+), 13 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index de4d68f..a6e0c62 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -143,7 +143,7 @@ extern unsigned long nr_iowait_cpu(int cpu);
extern unsigned long this_cpu_load(void);
-extern void calc_global_load(void);
+extern void calc_global_load(unsigned long ticks);
extern unsigned long get_parent_ip(unsigned long addr);
diff --git a/kernel/sched.c b/kernel/sched.c
index 864040c..7868a18 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2978,6 +2978,15 @@ static long calc_load_fold_active(struct rq *this_rq)
return delta;
}
+static unsigned long
+calc_load(unsigned long load, unsigned long exp, unsigned long active)
+{
+ load *= exp;
+ load += active * (FIXED_1 - exp);
+ load += 1UL << (FSHIFT - 1);
+ return load >> FSHIFT;
+}
+
#ifdef CONFIG_NO_HZ
/*
* For NO_HZ we delay the active fold to the next LOAD_FREQ update.
@@ -3007,6 +3016,105 @@ static long calc_load_fold_idle(void)
return delta;
}
+
+/**
+ * fixed_power_int - compute: x^n, in O(log n) time
+ *
+ * @x: base of the power
+ * @frac_bits: fractional bits of @x
+ * @n: power to raise @x to.
+ *
+ * By exploiting the relation between the definition of the natural power
+ * function: x^n := x*x*...*x (x multiplied by itself for n times), and
+ * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
+ * (where: n_i \elem {0, 1}, the binary vector representing n),
+ * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
+ * of course trivially computable in O(log_2 n), the length of our binary
+ * vector.
+ */
+static unsigned long
+fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
+{
+ unsigned long result = 1UL << frac_bits;
+
+ if (n) for (;;) {
+ if (n & 1) {
+ result *= x;
+ result += 1UL << (frac_bits - 1);
+ result >>= frac_bits;
+ }
+ n >>= 1;
+ if (!n)
+ break;
+ x *= x;
+ x += 1UL << (frac_bits - 1);
+ x >>= frac_bits;
+ }
+
+ return result;
+}
+
+/*
+ * a1 = a0 * e + a * (1 - e)
+ *
+ * a2 = a1 * e + a * (1 - e)
+ * = (a0 * e + a * (1 - e)) * e + a * (1 - e)
+ * = a0 * e^2 + a * (1 - e) * (1 + e)
+ *
+ * a3 = a2 * e + a * (1 - e)
+ * = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
+ * = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
+ *
+ * ...
+ *
+ * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
+ * = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
+ * = a0 * e^n + a * (1 - e^n)
+ *
+ * [1] application of the geometric series:
+ *
+ * n 1 - x^(n+1)
+ * S_n := \Sum x^i = -------------
+ * i=0 1 - x
+ */
+static unsigned long
+calc_load_n(unsigned long load, unsigned long exp,
+ unsigned long active, unsigned int n)
+{
+
+ return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
+}
+
+static void calc_global_nohz(unsigned long ticks)
+{
+ long delta, active, n;
+
+ if (time_before(jiffies, calc_load_update))
+ return;
+
+ /*
+ * If we crossed a calc_load_update boundary, make sure to fold
+ * any pending idle changes, the respective CPUs might have
+ * missed the tick driven calc_load_account_active() update
+ * due to NO_HZ
+ */
+ delta = calc_load_fold_idle();
+ if (delta)
+ atomic_long_add(delta, &calc_load_tasks);
+
+ if (ticks >= LOAD_FREQ) {
+ n = ticks / LOAD_FREQ;
+
+ active = atomic_long_read(&calc_load_tasks);
+ active = active > 0 ? active * FIXED_1 : 0;
+
+ avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
+ avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
+ avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
+
+ calc_load_update += n * LOAD_FREQ;
+ }
+}
#else
static void calc_load_account_idle(struct rq *this_rq)
{
@@ -3016,6 +3124,10 @@ static inline long calc_load_fold_idle(void)
{
return 0;
}
+
+static void calc_global_nohz(unsigned long ticks)
+{
+}
#endif
/**
@@ -3033,24 +3145,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
loads[2] = (avenrun[2] + offset) << shift;
}
-static unsigned long
-calc_load(unsigned long load, unsigned long exp, unsigned long active)
-{
- load *= exp;
- load += active * (FIXED_1 - exp);
- return load >> FSHIFT;
-}
-
/*
* calc_load - update the avenrun load estimates 10 ticks after the
* CPUs have updated calc_load_tasks.
*/
-void calc_global_load(void)
+void calc_global_load(unsigned long ticks)
{
- unsigned long upd = calc_load_update + 10;
long active;
- if (time_before(jiffies, upd))
+ calc_global_nohz(ticks);
+
+ if (time_before(jiffies, calc_load_update + 10))
return;
active = atomic_long_read(&calc_load_tasks);
diff --git a/kernel/timer.c b/kernel/timer.c
index d6ccb90..9f82b2a 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1297,7 +1297,7 @@ void do_timer(unsigned long ticks)
{
jiffies_64 += ticks;
update_wall_time();
- calc_global_load();
+ calc_global_load(ticks);
}
#ifdef __ARCH_WANT_SYS_ALARM
next prev parent reply other threads:[~2010-11-30 14:58 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-09-29 7:01 High CPU load when machine is idle Damien Wyart
2010-10-14 14:58 ` High CPU load when machine is idle (related to PROBLEM: Unusually high load average when idle in 2.6.35, 2.6.35.1 and later) Damien Wyart
2010-10-14 15:29 ` Chase Douglas
2010-10-14 15:56 ` Damien Wyart
2010-10-15 11:08 ` Peter Zijlstra
2010-10-18 12:32 ` Peter Zijlstra
2010-10-20 13:27 ` Damien Wyart
2010-10-20 13:30 ` Peter Zijlstra
2010-10-20 13:43 ` Peter Zijlstra
2010-10-20 14:14 ` Peter Zijlstra
2010-10-20 14:25 ` Peter Zijlstra
2010-10-20 17:26 ` Peter Zijlstra
2010-10-20 20:24 ` Damien Wyart
2010-10-21 1:48 ` tmhikaru
2010-10-21 1:53 ` tmhikaru
2010-10-21 8:22 ` Ingo Molnar
2010-10-21 8:57 ` tmhikaru
2010-10-21 18:36 ` tmhikaru
2010-10-22 1:37 ` tmhikaru
2010-10-21 12:09 ` Peter Zijlstra
2010-10-21 17:18 ` Venkatesh Pallipadi
2010-10-22 21:03 ` Venkatesh Pallipadi
2010-10-22 23:03 ` High CPU load when machine is idle (related to PROBLEM: Unusually high load average when idle in 2.6.35, 2.6.35.1 and later) Venkatesh Pallipadi
2010-10-23 2:13 ` tmhikaru
2010-10-25 10:12 ` Peter Zijlstra
2010-10-25 16:29 ` Venkatesh Pallipadi
2010-10-26 12:44 ` Peter Zijlstra
2010-10-26 14:05 ` Peter Zijlstra
2010-10-29 19:42 ` Peter Zijlstra
2010-11-09 18:55 ` Kyle McMartin
2010-11-09 19:02 ` Peter Zijlstra
2010-11-10 2:37 ` tmhikaru
2010-11-10 12:01 ` Peter Zijlstra
2010-11-10 3:45 ` Kyle McMartin
2010-11-10 12:00 ` Peter Zijlstra
2010-11-14 5:14 ` tmhikaru
2010-11-25 13:31 ` Damien Wyart
2010-11-25 14:03 ` Peter Zijlstra
2010-11-27 20:15 ` Peter Zijlstra
2010-11-28 4:26 ` Kyle McMartin
2010-11-28 11:40 ` Damien Wyart
2010-11-28 18:07 ` Valdis.Kletnieks
2010-11-29 11:38 ` Peter Zijlstra
2010-11-29 19:40 ` tmhikaru
2010-11-29 23:01 ` Peter Zijlstra
2010-11-30 14:59 ` Peter Zijlstra [this message]
2010-11-30 15:39 ` Kyle McMartin
2010-11-30 20:04 ` Kyle McMartin
2010-11-30 16:53 ` Damien Wyart
2010-11-30 17:29 ` Peter Zijlstra
2010-12-01 21:27 ` tmhikaru
2010-12-02 10:16 ` tmhikaru
2010-12-08 20:40 ` [tip:sched/urgent] sched: Cure more NO_HZ load average woes tip-bot for Peter Zijlstra
2010-11-30 20:01 ` High CPU load when machine is idle (related to PROBLEM: Unusually high load average when idle in 2.6.35, 2.6.35.1 and later) tmhikaru
2010-11-30 16:49 ` Damien Wyart
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1291129145.32004.874.camel@laptop \
--to=peterz@infradead.org \
--cc=chase.douglas@canonical.com \
--cc=damien.wyart@free.fr \
--cc=kyle@mcmartin.ca \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=tglx@linutronix.de \
--cc=tmhikaru@gmail.com \
--cc=venki@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.