* [patch 2/2] sched: Fix and rate-limit newidle
@ 2009-11-10 2:50 Mike Galbraith
2009-11-10 4:21 ` [tip:sched/core] sched: Fix and clean up rate-limit newidle code tip-bot for Mike Galbraith
0 siblings, 1 reply; 2+ messages in thread
From: Mike Galbraith @ 2009-11-10 2:50 UTC (permalink / raw)
To: Ingo Molnar, Peter Zijlstra; +Cc: LKML, alex.shi, Zhang, Yanmin
sched: Fix and rate-limit newidle
Commit 1b9508f, "Rate-limit newidle" has been confirmed to fix the netperf
UDP loopback regression reported by Alex Shi. This is a replacement, moved
to a more out of the way spot, and with a fix to ensure that balancing
doesn't try to balance runqueues which haven't gone online yet, which can
mess up CPU enumeration during boot.
Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reported-by: Alex Shi <alex.shi@intel.com>
Reported-by: Zhang, Yanmin <yanmin_zhang@linux.intel.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
kernel/sched.c | 28 +++++++++++++++++++++++++---
kernel/sched_debug.c | 2 ++
2 files changed, 27 insertions(+), 3 deletions(-)
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -589,6 +589,8 @@ struct rq {
u64 rt_avg;
u64 age_stamp;
+ u64 idle_stamp;
+ u64 avg_idle;
#endif
/* calc_load related fields */
@@ -2437,6 +2439,17 @@ out_running:
#ifdef CONFIG_SMP
if (p->sched_class->task_wake_up)
p->sched_class->task_wake_up(rq, p);
+
+ if (unlikely(rq->idle_stamp)) {
+ u64 delta = rq->clock - rq->idle_stamp;
+ u64 max = 2*sysctl_sched_migration_cost;
+
+ if (delta > max)
+ rq->avg_idle = max;
+ else
+ update_avg(&rq->avg_idle, delta);
+ rq->idle_stamp = 0;
+ }
#endif
out:
task_rq_unlock(rq, &flags);
@@ -4114,7 +4127,7 @@ static int load_balance(int this_cpu, st
unsigned long flags;
struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
- cpumask_setall(cpus);
+ cpumask_copy(cpus, cpu_online_mask);
/*
* When power savings policy is enabled for the parent domain, idle
@@ -4277,7 +4290,7 @@ load_balance_newidle(int this_cpu, struc
int all_pinned = 0;
struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
- cpumask_setall(cpus);
+ cpumask_copy(cpus, cpu_online_mask);
/*
* When power savings policy is enabled for the parent domain, idle
@@ -4417,6 +4430,11 @@ static void idle_balance(int this_cpu, s
int pulled_task = 0;
unsigned long next_balance = jiffies + HZ;
+ this_rq->idle_stamp = this_rq->clock;
+
+ if (this_rq->avg_idle < sysctl_sched_migration_cost)
+ return;
+
for_each_domain(this_cpu, sd) {
unsigned long interval;
@@ -4431,8 +4449,10 @@ static void idle_balance(int this_cpu, s
interval = msecs_to_jiffies(sd->balance_interval);
if (time_after(next_balance, sd->last_balance + interval))
next_balance = sd->last_balance + interval;
- if (pulled_task)
+ if (pulled_task) {
+ this_rq->idle_stamp = 0;
break;
+ }
}
if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
/*
@@ -9535,6 +9555,8 @@ void __init sched_init(void)
rq->cpu = i;
rq->online = 0;
rq->migration_thread = NULL;
+ rq->idle_stamp = 0;
+ rq->avg_idle = 2*sysctl_sched_migration_cost;
INIT_LIST_HEAD(&rq->migration_queue);
rq_attach_root(rq, &def_root_domain);
#endif
Index: linux-2.6/kernel/sched_debug.c
===================================================================
--- linux-2.6.orig/kernel/sched_debug.c
+++ linux-2.6/kernel/sched_debug.c
@@ -285,12 +285,14 @@ static void print_cpu(struct seq_file *m
#ifdef CONFIG_SCHEDSTATS
#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
+#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);
P(yld_count);
P(sched_switch);
P(sched_count);
P(sched_goidle);
+ P64(avg_idle);
P(ttwu_count);
P(ttwu_local);
^ permalink raw reply [flat|nested] 2+ messages in thread* [tip:sched/core] sched: Fix and clean up rate-limit newidle code
2009-11-10 2:50 [patch 2/2] sched: Fix and rate-limit newidle Mike Galbraith
@ 2009-11-10 4:21 ` tip-bot for Mike Galbraith
0 siblings, 0 replies; 2+ messages in thread
From: tip-bot for Mike Galbraith @ 2009-11-10 4:21 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, hpa, mingo, a.p.zijlstra, yanmin_zhang, efault,
alex.shi, tglx, mingo
Commit-ID: eae0c9dfb534cb3449888b9601228efa6480fdb5
Gitweb: http://git.kernel.org/tip/eae0c9dfb534cb3449888b9601228efa6480fdb5
Author: Mike Galbraith <efault@gmx.de>
AuthorDate: Tue, 10 Nov 2009 03:50:02 +0100
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Tue, 10 Nov 2009 04:25:58 +0100
sched: Fix and clean up rate-limit newidle code
Commit 1b9508f, "Rate-limit newidle" has been confirmed to fix
the netperf UDP loopback regression reported by Alex Shi.
This is a cleanup and a fix:
- moved to a more out of the way spot
- fix to ensure that balancing doesn't try to balance
runqueues which haven't gone online yet, which can
mess up CPU enumeration during boot.
Reported-by: Alex Shi <alex.shi@intel.com>
Reported-by: Zhang, Yanmin <yanmin_zhang@linux.intel.com>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: <stable@kernel.org> # .32.x: a1f84a3: sched: Check for an idle shared cache
Cc: <stable@kernel.org> # .32.x: 1b9508f: sched: Rate-limit newidle
Cc: <stable@kernel.org> # .32.x: fd21073: sched: Fix affinity logic
Cc: <stable@kernel.org> # .32.x
LKML-Reference: <1257821402.5648.17.camel@marge.simson.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
kernel/sched.c | 28 +++++++++++++++-------------
1 files changed, 15 insertions(+), 13 deletions(-)
diff --git a/kernel/sched.c b/kernel/sched.c
index 23e3535..ad37776 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2354,17 +2354,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
if (rq != orig_rq)
update_rq_clock(rq);
- if (rq->idle_stamp) {
- u64 delta = rq->clock - rq->idle_stamp;
- u64 max = 2*sysctl_sched_migration_cost;
-
- if (delta > max)
- rq->avg_idle = max;
- else
- update_avg(&rq->avg_idle, delta);
- rq->idle_stamp = 0;
- }
-
WARN_ON(p->state != TASK_WAKING);
cpu = task_cpu(p);
@@ -2421,6 +2410,17 @@ out_running:
#ifdef CONFIG_SMP
if (p->sched_class->task_wake_up)
p->sched_class->task_wake_up(rq, p);
+
+ if (unlikely(rq->idle_stamp)) {
+ u64 delta = rq->clock - rq->idle_stamp;
+ u64 max = 2*sysctl_sched_migration_cost;
+
+ if (delta > max)
+ rq->avg_idle = max;
+ else
+ update_avg(&rq->avg_idle, delta);
+ rq->idle_stamp = 0;
+ }
#endif
out:
task_rq_unlock(rq, &flags);
@@ -4098,7 +4098,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
unsigned long flags;
struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
- cpumask_setall(cpus);
+ cpumask_copy(cpus, cpu_online_mask);
/*
* When power savings policy is enabled for the parent domain, idle
@@ -4261,7 +4261,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
int all_pinned = 0;
struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
- cpumask_setall(cpus);
+ cpumask_copy(cpus, cpu_online_mask);
/*
* When power savings policy is enabled for the parent domain, idle
@@ -9522,6 +9522,8 @@ void __init sched_init(void)
rq->cpu = i;
rq->online = 0;
rq->migration_thread = NULL;
+ rq->idle_stamp = 0;
+ rq->avg_idle = 2*sysctl_sched_migration_cost;
INIT_LIST_HEAD(&rq->migration_queue);
rq_attach_root(rq, &def_root_domain);
#endif
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2009-11-10 4:22 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-11-10 2:50 [patch 2/2] sched: Fix and rate-limit newidle Mike Galbraith
2009-11-10 4:21 ` [tip:sched/core] sched: Fix and clean up rate-limit newidle code tip-bot for Mike Galbraith
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.