public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Mike Galbraith <efault@gmx.de>
To: Peter Zijlstra <a.p.zijlstra@chello.nl>, Ingo Molnar <mingo@elte.hu>
Cc: LKML <linux-kernel@vger.kernel.org>
Subject: [patch] sched: fix b5d9d734 blunder in task_new_fair()
Date: Sun, 22 Nov 2009 13:07:44 +0100	[thread overview]
Message-ID: <1258891664.14325.30.camel@marge.simson.net> (raw)


sched: fix b5d9d734 blunder in task_new_fair()

b5d9d734 fixed the problem of a forking task's child gaining vruntime..
IFF the child/parent shared a runqueue.  In the other case, it broke
fairness all to pieces by setting the child's vruntime to whatever task
happened to be current on the child's runqueue at wakeup time.  Fix this
by adding a sched_class::task_new parameter to give the class a chance to
prepare the child for wakeup.

At child wakeup time, call task_new() with the parent's rq locked as the
comment in task new states, update the parent's stats (which must be done
with the rq locked), call task_new() to prepare the child, unlock parent rq,
select a runqueue a runqueue for the child, _then_ set_task_cpu() with the
child's vruntime set properly and both runqueue clocks updated to get the
current offset.  Lock child's rq and proceed with wakeup.

Also, since setting scheduling policy requires the tasklist lock, move
sched_fork() under the tasklist lock in copy_process();

Signed-off-by: Mike Galbraith <efault@gmx.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>

---
 include/linux/sched.h |    2 +-
 kernel/fork.c         |    6 +++---
 kernel/sched.c        |   43 ++++++++++++++++++++++++++++---------------
 kernel/sched_fair.c   |   17 +++++++++++------
 4 files changed, 43 insertions(+), 25 deletions(-)

Index: linux-2.6/kernel/sched_fair.c
===================================================================
--- linux-2.6.orig/kernel/sched_fair.c
+++ linux-2.6/kernel/sched_fair.c
@@ -1925,20 +1925,23 @@ static void task_tick_fair(struct rq *rq
  * Share the fairness runtime between parent and child, thus the
  * total amount of pressure for CPU stays equal - new tasks
  * get a chance to run but frequent forkers are not allowed to
- * monopolize the CPU. Note: the parent runqueue is locked,
- * the child is not running yet.
+ * monopolize the CPU. Note: the parent runqueue is locked at
+ * prep time, the child is not running yet.  At wakeup time,
+ * the clild's runqueue is locked.
  */
-static void task_new_fair(struct rq *rq, struct task_struct *p)
+static void task_new_fair(struct rq *rq, struct task_struct *p, int prep)
 {
 	struct cfs_rq *cfs_rq = task_cfs_rq(p);
 	struct sched_entity *se = &p->se, *curr = cfs_rq->curr;
 	int this_cpu = smp_processor_id();
 
-	sched_info_queued(p);
-
 	update_curr(cfs_rq);
-	if (curr)
+
+	if (prep && curr) {
 		se->vruntime = curr->vruntime;
+		return;
+	}
+
 	place_entity(cfs_rq, se, 1);
 
 	/* 'curr' will be NULL if the child belongs to a different group */
@@ -1953,6 +1956,8 @@ static void task_new_fair(struct rq *rq,
 	}
 
 	enqueue_task_fair(rq, p, 0);
+
+	sched_info_queued(p);
 }
 
 /*
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -1099,7 +1099,7 @@ struct sched_class {
 
 	void (*set_curr_task) (struct rq *rq);
 	void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
-	void (*task_new) (struct rq *rq, struct task_struct *p);
+	void (*task_new) (struct rq *rq, struct task_struct *p, int prep);
 
 	void (*switched_from) (struct rq *this_rq, struct task_struct *task,
 			       int running);
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -2558,7 +2558,6 @@ static void __sched_fork(struct task_str
 void sched_fork(struct task_struct *p, int clone_flags)
 {
 	int cpu = get_cpu();
-	unsigned long flags;
 
 	__sched_fork(p);
 
@@ -2566,7 +2565,7 @@ void sched_fork(struct task_struct *p, i
 	 * Revert to default priority/policy on fork if requested.
 	 */
 	if (unlikely(p->sched_reset_on_fork)) {
-		if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) {
+		if (task_has_rt_policy(p)) {
 			p->policy = SCHED_NORMAL;
 			p->normal_prio = p->static_prio;
 		}
@@ -2589,16 +2588,10 @@ void sched_fork(struct task_struct *p, i
 	 */
 	p->prio = current->normal_prio;
 
-	if (!rt_prio(p->prio))
+	if (!task_has_rt_policy(p))
 		p->sched_class = &fair_sched_class;
 
-#ifdef CONFIG_SMP
-	cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0);
-#endif
-	local_irq_save(flags);
-	update_rq_clock(cpu_rq(cpu));
-	set_task_cpu(p, cpu);
-	local_irq_restore(flags);
+	__set_task_cpu(p, cpu);
 
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 	if (likely(sched_info_on()))
@@ -2625,21 +2618,40 @@ void sched_fork(struct task_struct *p, i
  */
 void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 {
+	int cpu = get_cpu();
 	unsigned long flags;
-	struct rq *rq;
+	struct task_struct *parent = current;
+	struct rq *rq, *orig_rq;
 
-	rq = task_rq_lock(p, &flags);
+	smp_wmb();
+	rq = orig_rq = task_rq_lock(parent, &flags);
 	BUG_ON(p->state != TASK_RUNNING);
-	update_rq_clock(rq);
+	update_rq_clock(orig_rq);
 
-	if (!p->sched_class->task_new || !current->se.on_rq) {
+	if (p->sched_class->task_new)
+		p->sched_class->task_new(orig_rq, p, 1);
+#ifdef CONFIG_SMP
+	p->state = TASK_WAKING;
+	__task_rq_unlock(orig_rq);
+	cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0);
+	rq = cpu_rq(cpu);
+	if (rq != orig_rq) {
+		update_rq_clock(rq);
+		set_task_cpu(p, cpu);
+	}
+	__task_rq_lock(p);
+	WARN_ON(p->state != TASK_WAKING);
+	p->state = TASK_RUNNING;
+#endif
+
+	if (!p->sched_class->task_new || !parent->se.on_rq) {
 		activate_task(rq, p, 0);
 	} else {
 		/*
 		 * Let the scheduling class do new task startup
 		 * management (if any):
 		 */
-		p->sched_class->task_new(rq, p);
+		p->sched_class->task_new(rq, p, 0);
 		inc_nr_running(rq);
 	}
 	trace_sched_wakeup_new(rq, p, 1);
@@ -2649,6 +2661,7 @@ void wake_up_new_task(struct task_struct
 		p->sched_class->task_wake_up(rq, p);
 #endif
 	task_rq_unlock(rq, &flags);
+	put_cpu();
 }
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
Index: linux-2.6/kernel/fork.c
===================================================================
--- linux-2.6.orig/kernel/fork.c
+++ linux-2.6/kernel/fork.c
@@ -1125,9 +1125,6 @@ static struct task_struct *copy_process(
 
 	p->stack_start = stack_start;
 
-	/* Perform scheduler related setup. Assign this task to a CPU. */
-	sched_fork(p, clone_flags);
-
 	retval = perf_event_init_task(p);
 	if (retval)
 		goto bad_fork_cleanup_policy;
@@ -1229,6 +1226,9 @@ static struct task_struct *copy_process(
 	/* Need tasklist lock for parent etc handling! */
 	write_lock_irq(&tasklist_lock);
 
+	/* Perform scheduler related setup. Assign this task to a CPU. */
+	sched_fork(p, clone_flags);
+
 	/*
 	 * The task hasn't been attached yet, so its cpus_allowed mask will
 	 * not be changed, nor will its assigned CPU.



             reply	other threads:[~2009-11-22 12:07 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-11-22 12:07 Mike Galbraith [this message]
2009-11-24 13:51 ` [patch] sched: fix b5d9d734 blunder in task_new_fair() Peter Zijlstra
2009-11-24 17:07   ` Mike Galbraith
2009-11-24 17:35     ` Peter Zijlstra
2009-11-24 17:54       ` Peter Zijlstra
2009-11-24 18:21         ` Mike Galbraith
2009-11-24 18:27           ` Peter Zijlstra
2009-11-24 18:36             ` Mike Galbraith
2009-11-25  6:57             ` Mike Galbraith
2009-11-25  9:51               ` Peter Zijlstra
2009-11-25 13:09                 ` Mike Galbraith
2009-11-26 16:26 ` Peter Zijlstra
2009-11-27  8:45   ` Mike Galbraith
2009-11-27  8:57     ` Mike Galbraith
2009-11-27 11:55   ` Mike Galbraith
2009-11-27 12:21     ` Peter Zijlstra
2009-11-27 12:38       ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1258891664.14325.30.camel@marge.simson.net \
    --to=efault@gmx.de \
    --cc=a.p.zijlstra@chello.nl \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox