All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch 2/2] sched: Fix and rate-limit newidle
@ 2009-11-10  2:50 Mike Galbraith
  2009-11-10  4:21 ` [tip:sched/core] sched: Fix and clean up rate-limit newidle code tip-bot for Mike Galbraith
  0 siblings, 1 reply; 2+ messages in thread
From: Mike Galbraith @ 2009-11-10  2:50 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra; +Cc: LKML, alex.shi, Zhang, Yanmin


sched: Fix and rate-limit newidle

Commit 1b9508f, "Rate-limit newidle" has been confirmed to fix the netperf
UDP loopback regression reported by Alex Shi.  This is a replacement, moved
to a more out of the way spot, and with a fix to ensure that balancing
doesn't try to balance runqueues which haven't gone online yet, which can
mess up CPU enumeration during boot.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reported-by: Alex Shi <alex.shi@intel.com>
Reported-by: Zhang, Yanmin <yanmin_zhang@linux.intel.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 kernel/sched.c       |   28 +++++++++++++++++++++++++---
 kernel/sched_debug.c |    2 ++
 2 files changed, 27 insertions(+), 3 deletions(-)

Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -589,6 +589,8 @@ struct rq {
 
 	u64 rt_avg;
 	u64 age_stamp;
+	u64 idle_stamp;
+	u64 avg_idle;
 #endif
 
 	/* calc_load related fields */
@@ -2437,6 +2439,17 @@ out_running:
 #ifdef CONFIG_SMP
 	if (p->sched_class->task_wake_up)
 		p->sched_class->task_wake_up(rq, p);
+
+	if (unlikely(rq->idle_stamp)) {
+		u64 delta = rq->clock - rq->idle_stamp;
+		u64 max = 2*sysctl_sched_migration_cost;
+
+		if (delta > max)
+			rq->avg_idle = max;
+		else
+			update_avg(&rq->avg_idle, delta);
+		rq->idle_stamp = 0;
+	}
 #endif
 out:
 	task_rq_unlock(rq, &flags);
@@ -4114,7 +4127,7 @@ static int load_balance(int this_cpu, st
 	unsigned long flags;
 	struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
 
-	cpumask_setall(cpus);
+	cpumask_copy(cpus, cpu_online_mask);
 
 	/*
 	 * When power savings policy is enabled for the parent domain, idle
@@ -4277,7 +4290,7 @@ load_balance_newidle(int this_cpu, struc
 	int all_pinned = 0;
 	struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
 
-	cpumask_setall(cpus);
+	cpumask_copy(cpus, cpu_online_mask);
 
 	/*
 	 * When power savings policy is enabled for the parent domain, idle
@@ -4417,6 +4430,11 @@ static void idle_balance(int this_cpu, s
 	int pulled_task = 0;
 	unsigned long next_balance = jiffies + HZ;
 
+	this_rq->idle_stamp = this_rq->clock;
+
+	if (this_rq->avg_idle < sysctl_sched_migration_cost)
+		return;
+
 	for_each_domain(this_cpu, sd) {
 		unsigned long interval;
 
@@ -4431,8 +4449,10 @@ static void idle_balance(int this_cpu, s
 		interval = msecs_to_jiffies(sd->balance_interval);
 		if (time_after(next_balance, sd->last_balance + interval))
 			next_balance = sd->last_balance + interval;
-		if (pulled_task)
+		if (pulled_task) {
+			this_rq->idle_stamp = 0;
 			break;
+		}
 	}
 	if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
 		/*
@@ -9535,6 +9555,8 @@ void __init sched_init(void)
 		rq->cpu = i;
 		rq->online = 0;
 		rq->migration_thread = NULL;
+		rq->idle_stamp = 0;
+		rq->avg_idle = 2*sysctl_sched_migration_cost;
 		INIT_LIST_HEAD(&rq->migration_queue);
 		rq_attach_root(rq, &def_root_domain);
 #endif
Index: linux-2.6/kernel/sched_debug.c
===================================================================
--- linux-2.6.orig/kernel/sched_debug.c
+++ linux-2.6/kernel/sched_debug.c
@@ -285,12 +285,14 @@ static void print_cpu(struct seq_file *m
 
 #ifdef CONFIG_SCHEDSTATS
 #define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, rq->n);
+#define P64(n) SEQ_printf(m, "  .%-30s: %Ld\n", #n, rq->n);
 
 	P(yld_count);
 
 	P(sched_switch);
 	P(sched_count);
 	P(sched_goidle);
+	P64(avg_idle);
 
 	P(ttwu_count);
 	P(ttwu_local);



^ permalink raw reply	[flat|nested] 2+ messages in thread

* [tip:sched/core] sched: Fix and clean up rate-limit newidle code
  2009-11-10  2:50 [patch 2/2] sched: Fix and rate-limit newidle Mike Galbraith
@ 2009-11-10  4:21 ` tip-bot for Mike Galbraith
  0 siblings, 0 replies; 2+ messages in thread
From: tip-bot for Mike Galbraith @ 2009-11-10  4:21 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, a.p.zijlstra, yanmin_zhang, efault,
	alex.shi, tglx, mingo

Commit-ID:  eae0c9dfb534cb3449888b9601228efa6480fdb5
Gitweb:     http://git.kernel.org/tip/eae0c9dfb534cb3449888b9601228efa6480fdb5
Author:     Mike Galbraith <efault@gmx.de>
AuthorDate: Tue, 10 Nov 2009 03:50:02 +0100
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Tue, 10 Nov 2009 04:25:58 +0100

sched: Fix and clean up rate-limit newidle code

Commit 1b9508f, "Rate-limit newidle" has been confirmed to fix
the netperf UDP loopback regression reported by Alex Shi.

This is a cleanup and a fix:

 - moved to a more out of the way spot

 - fix to ensure that balancing doesn't try to balance
   runqueues which haven't gone online yet, which can
   mess up CPU enumeration during boot.

Reported-by: Alex Shi <alex.shi@intel.com>
Reported-by: Zhang, Yanmin <yanmin_zhang@linux.intel.com>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: <stable@kernel.org> # .32.x: a1f84a3: sched: Check for an idle shared cache
Cc: <stable@kernel.org> # .32.x: 1b9508f: sched: Rate-limit newidle
Cc: <stable@kernel.org> # .32.x: fd21073: sched: Fix affinity logic
Cc: <stable@kernel.org> # .32.x
LKML-Reference: <1257821402.5648.17.camel@marge.simson.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c |   28 +++++++++++++++-------------
 1 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 23e3535..ad37776 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2354,17 +2354,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 	if (rq != orig_rq)
 		update_rq_clock(rq);
 
-	if (rq->idle_stamp) {
-		u64 delta = rq->clock - rq->idle_stamp;
-		u64 max = 2*sysctl_sched_migration_cost;
-
-		if (delta > max)
-			rq->avg_idle = max;
-		else
-			update_avg(&rq->avg_idle, delta);
-		rq->idle_stamp = 0;
-	}
-
 	WARN_ON(p->state != TASK_WAKING);
 	cpu = task_cpu(p);
 
@@ -2421,6 +2410,17 @@ out_running:
 #ifdef CONFIG_SMP
 	if (p->sched_class->task_wake_up)
 		p->sched_class->task_wake_up(rq, p);
+
+	if (unlikely(rq->idle_stamp)) {
+		u64 delta = rq->clock - rq->idle_stamp;
+		u64 max = 2*sysctl_sched_migration_cost;
+
+		if (delta > max)
+			rq->avg_idle = max;
+		else
+			update_avg(&rq->avg_idle, delta);
+		rq->idle_stamp = 0;
+	}
 #endif
 out:
 	task_rq_unlock(rq, &flags);
@@ -4098,7 +4098,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 	unsigned long flags;
 	struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
 
-	cpumask_setall(cpus);
+	cpumask_copy(cpus, cpu_online_mask);
 
 	/*
 	 * When power savings policy is enabled for the parent domain, idle
@@ -4261,7 +4261,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
 	int all_pinned = 0;
 	struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
 
-	cpumask_setall(cpus);
+	cpumask_copy(cpus, cpu_online_mask);
 
 	/*
 	 * When power savings policy is enabled for the parent domain, idle
@@ -9522,6 +9522,8 @@ void __init sched_init(void)
 		rq->cpu = i;
 		rq->online = 0;
 		rq->migration_thread = NULL;
+		rq->idle_stamp = 0;
+		rq->avg_idle = 2*sysctl_sched_migration_cost;
 		INIT_LIST_HEAD(&rq->migration_queue);
 		rq_attach_root(rq, &def_root_domain);
 #endif

^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2009-11-10  4:22 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-11-10  2:50 [patch 2/2] sched: Fix and rate-limit newidle Mike Galbraith
2009-11-10  4:21 ` [tip:sched/core] sched: Fix and clean up rate-limit newidle code tip-bot for Mike Galbraith

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.