From: Peter Zijlstra <peterz@infradead.org>
To: Damien Wyart <damien.wyart@free.fr>
Cc: Chase Douglas <chase.douglas@canonical.com>,
Ingo Molnar <mingo@elte.hu>,
tmhikaru@gmail.com, Thomas Gleixner <tglx@linutronix.de>,
linux-kernel@vger.kernel.org
Subject: Re: High CPU load when machine is idle (related to PROBLEM: Unusually high load average when idle in 2.6.35, 2.6.35.1 and later)
Date: Fri, 15 Oct 2010 13:08:22 +0200 [thread overview]
Message-ID: <1287140902.29097.1455.camel@twins> (raw)
In-Reply-To: <20101014145813.GA2185@brouette>
On Thu, 2010-10-14 at 16:58 +0200, Damien Wyart wrote:
> - the commit 74f5187ac873042f502227701ed1727e7c5fbfa9 isolated by Tim
> seems to be the culprit;
Right, so I think I figured out what's happening.
We're folding sucessive idles of the same cpu into the total idle
number, which is inflating things.
+/*
+ * For NO_HZ we delay the active fold to the next LOAD_FREQ update.
+ *
+ * When making the ILB scale, we should try to pull this in as well.
+ */
+static atomic_long_t calc_load_tasks_idle;
+
+static void calc_load_account_idle(struct rq *this_rq)
+{
+ long delta;
+
+ delta = calc_load_fold_active(this_rq);
+ if (delta)
+ atomic_long_add(delta, &calc_load_tasks_idle);
+}
+
+static long calc_load_fold_idle(void)
+{
+ long delta = 0;
+
+ /*
+ * Its got a race, we don't care...
+ */
+ if (atomic_long_read(&calc_load_tasks_idle))
+ delta = atomic_long_xchg(&calc_load_tasks_idle, 0);
+
+ return delta;
+}
If you look at that and imagine CPU1 going idle with 1 task blocked,
then waking up due to unblocking, then going idle with that same task
block, etc.. all before we fold_idle on an active cpu, then we can count
that one task many times over.
I haven't come up with a sane patch yet, hackery below, but that does
let my 24-cpu system idle into load 0.0x instead of the constant 1.x it
had before.
Beware, utter hackery below.. lots of races not sure it matters but it
ain't pretty..
Anybody got a bright idea here?
---
kernel/sched.c | 32 ++++++++++++++++++++++++++++++--
kernel/sched_idletask.c | 1 +
2 files changed, 31 insertions(+), 2 deletions(-)
diff --git a/kernel/sched.c b/kernel/sched.c
index 91c19db..ac4512d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -521,6 +521,9 @@ struct rq {
/* calc_load related fields */
unsigned long calc_load_update;
long calc_load_active;
+#ifdef CONFIG_NO_HZ
+ long calc_load_inactive;
+#endif
#ifdef CONFIG_SCHED_HRTICK
#ifdef CONFIG_SMP
@@ -1817,6 +1820,7 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
#endif
static void calc_load_account_idle(struct rq *this_rq);
+static void calc_load_account_nonidle(struct rq *this_rq);
static void update_sysctl(void);
static int get_update_sysctl_factor(void);
static void update_cpu_load(struct rq *this_rq);
@@ -2978,14 +2982,33 @@ static long calc_load_fold_active(struct rq *this_rq)
* When making the ILB scale, we should try to pull this in as well.
*/
static atomic_long_t calc_load_tasks_idle;
+static cpumask_var_t calc_load_mask;
static void calc_load_account_idle(struct rq *this_rq)
{
long delta;
delta = calc_load_fold_active(this_rq);
- if (delta)
+ this_rq->calc_load_inactive = delta;
+
+ if (delta) {
atomic_long_add(delta, &calc_load_tasks_idle);
+ cpumask_set_cpu(cpu_of(this_rq), calc_load_mask);
+ }
+
+ trace_printk("idle start: %d %Ld %Ld\n", cpu_of(this_rq), delta,
+ atomic_long_read(&calc_load_tasks_idle));
+}
+
+static void calc_load_account_nonidle(struct rq *this_rq)
+{
+ if (cpumask_test_and_clear_cpu(cpu_of(this_rq), calc_load_mask)) {
+ atomic_long_sub(this_rq->calc_load_inactive, &calc_load_tasks_idle);
+ trace_printk("idle end: %d %Ld %Ld\n", cpu_of(this_rq),
+ this_rq->calc_load_inactive,
+ atomic_long_read(&calc_load_tasks_idle));
+ } else
+ trace_printk("idle end: %d\n", cpu_of(this_rq));
}
static long calc_load_fold_idle(void)
@@ -2995,8 +3018,12 @@ static long calc_load_fold_idle(void)
/*
* Its got a race, we don't care...
*/
- if (atomic_long_read(&calc_load_tasks_idle))
+ if (atomic_long_read(&calc_load_tasks_idle)) {
delta = atomic_long_xchg(&calc_load_tasks_idle, 0);
+ cpumask_clear(calc_load_mask);
+ }
+
+ trace_printk("idle fold: %d %Ld\n", smp_processor_id(), delta);
return delta;
}
@@ -7935,6 +7962,7 @@ void __init sched_init(void)
atomic_set(&nohz.load_balancer, nr_cpu_ids);
atomic_set(&nohz.first_pick_cpu, nr_cpu_ids);
atomic_set(&nohz.second_pick_cpu, nr_cpu_ids);
+ zalloc_cpumask_var(&calc_load_mask, GFP_NOWAIT);
#endif
/* May be allocated at isolcpus cmdline parse time */
if (cpu_isolated_map == NULL)
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 9fa0f402..a7fa1aa 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -42,6 +42,7 @@ dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
{
+ calc_load_account_nonidle(rq);
}
static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued)
next prev parent reply other threads:[~2010-10-15 11:08 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-09-29 7:01 High CPU load when machine is idle Damien Wyart
2010-10-14 14:58 ` High CPU load when machine is idle (related to PROBLEM: Unusually high load average when idle in 2.6.35, 2.6.35.1 and later) Damien Wyart
2010-10-14 15:29 ` Chase Douglas
2010-10-14 15:56 ` Damien Wyart
2010-10-15 11:08 ` Peter Zijlstra [this message]
2010-10-18 12:32 ` Peter Zijlstra
2010-10-20 13:27 ` Damien Wyart
2010-10-20 13:30 ` Peter Zijlstra
2010-10-20 13:43 ` Peter Zijlstra
2010-10-20 14:14 ` Peter Zijlstra
2010-10-20 14:25 ` Peter Zijlstra
2010-10-20 17:26 ` Peter Zijlstra
2010-10-20 20:24 ` Damien Wyart
2010-10-21 1:48 ` tmhikaru
2010-10-21 1:53 ` tmhikaru
2010-10-21 8:22 ` Ingo Molnar
2010-10-21 8:57 ` tmhikaru
2010-10-21 18:36 ` tmhikaru
2010-10-22 1:37 ` tmhikaru
2010-10-21 12:09 ` Peter Zijlstra
2010-10-21 17:18 ` Venkatesh Pallipadi
2010-10-22 21:03 ` Venkatesh Pallipadi
2010-10-22 23:03 ` High CPU load when machine is idle (related to PROBLEM: Unusually high load average when idle in 2.6.35, 2.6.35.1 and later) Venkatesh Pallipadi
2010-10-23 2:13 ` tmhikaru
2010-10-25 10:12 ` Peter Zijlstra
2010-10-25 16:29 ` Venkatesh Pallipadi
2010-10-26 12:44 ` Peter Zijlstra
2010-10-26 14:05 ` Peter Zijlstra
2010-10-29 19:42 ` Peter Zijlstra
2010-11-09 18:55 ` Kyle McMartin
2010-11-09 19:02 ` Peter Zijlstra
2010-11-10 2:37 ` tmhikaru
2010-11-10 12:01 ` Peter Zijlstra
2010-11-10 3:45 ` Kyle McMartin
2010-11-10 12:00 ` Peter Zijlstra
2010-11-14 5:14 ` tmhikaru
2010-11-25 13:31 ` Damien Wyart
2010-11-25 14:03 ` Peter Zijlstra
2010-11-27 20:15 ` Peter Zijlstra
2010-11-28 4:26 ` Kyle McMartin
2010-11-28 11:40 ` Damien Wyart
2010-11-28 18:07 ` Valdis.Kletnieks
2010-11-29 11:38 ` Peter Zijlstra
2010-11-29 19:40 ` tmhikaru
2010-11-29 23:01 ` Peter Zijlstra
2010-11-30 14:59 ` Peter Zijlstra
2010-11-30 15:39 ` Kyle McMartin
2010-11-30 20:04 ` Kyle McMartin
2010-11-30 16:53 ` Damien Wyart
2010-11-30 17:29 ` Peter Zijlstra
2010-12-01 21:27 ` tmhikaru
2010-12-02 10:16 ` tmhikaru
2010-12-08 20:40 ` [tip:sched/urgent] sched: Cure more NO_HZ load average woes tip-bot for Peter Zijlstra
2010-11-30 20:01 ` High CPU load when machine is idle (related to PROBLEM: Unusually high load average when idle in 2.6.35, 2.6.35.1 and later) tmhikaru
2010-11-30 16:49 ` Damien Wyart
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1287140902.29097.1455.camel@twins \
--to=peterz@infradead.org \
--cc=chase.douglas@canonical.com \
--cc=damien.wyart@free.fr \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=tglx@linutronix.de \
--cc=tmhikaru@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox