All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Mike Galbraith <efault@gmx.de>
Cc: Ingo Molnar <mingo@elte.hu>, LKML <linux-kernel@vger.kernel.org>
Subject: Re: [patch] sched: fix b5d9d734 blunder in task_new_fair()
Date: Fri, 27 Nov 2009 13:21:39 +0100	[thread overview]
Message-ID: <1259324499.6483.257.camel@laptop> (raw)
In-Reply-To: <1259322921.3878.28.camel@marge.simson.net>

I'm poking at it like so...

the below hasn't yet seen a compiler and I know there to still be a few
races in the ttwu() path.


---
 include/linux/sched.h |    6 +-
 kernel/fork.c         |    2 +-
 kernel/sched.c        |  170 +++++++++++++++++++++++++++++++------------------
 kernel/sched_fair.c   |   26 +++----
 4 files changed, 124 insertions(+), 80 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1c46023..ff5ec5a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1099,7 +1099,7 @@ struct sched_class {
 
 	void (*set_curr_task) (struct rq *rq);
 	void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
-	void (*task_new) (struct rq *rq, struct task_struct *p);
+	void (*fork) (struct task_struct *p);
 
 	void (*switched_from) (struct rq *this_rq, struct task_struct *task,
 			       int running);
@@ -2456,7 +2456,7 @@ static inline unsigned int task_cpu(const struct task_struct *p)
 	return task_thread_info(p)->cpu;
 }
 
-extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
+extern void set_task_cpu_locked(struct task_struct *p, int cpu);
 
 #else
 
@@ -2465,7 +2465,7 @@ static inline unsigned int task_cpu(const struct task_struct *p)
 	return 0;
 }
 
-static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
+static inline void set_task_cpu_locked(struct task_struct *p, int cpu)
 {
 }
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 166b8c4..29c4aec 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1242,7 +1242,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
 	if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
 			!cpu_online(task_cpu(p))))
-		set_task_cpu(p, smp_processor_id());
+		set_task_cpu_locked(p, smp_processor_id());
 
 	/* CLONE_PARENT re-uses the old parent */
 	if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
diff --git a/kernel/sched.c b/kernel/sched.c
index 0cbf2ef..dced5af 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1967,7 +1967,7 @@ inline int task_curr(const struct task_struct *p)
 	return cpu_curr(task_cpu(p)) == p;
 }
 
-static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
+static inline void raw_set_task_cpu(struct task_struct *p, unsigned int cpu)
 {
 	set_task_rq(p, cpu);
 #ifdef CONFIG_SMP
@@ -2008,6 +2008,7 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
 void kthread_bind(struct task_struct *p, unsigned int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
+	struct rq *old_rq = task_rq(p);
 	unsigned long flags;
 
 	/* Must have done schedule() in kthread() before we set_task_cpu */
@@ -2016,13 +2017,14 @@ void kthread_bind(struct task_struct *p, unsigned int cpu)
 		return;
 	}
 
-	spin_lock_irqsave(&rq->lock, flags);
-	update_rq_clock(rq);
+	local_irq_save(flags);
+	double_rq_lock(old_rq, rq);
 	set_task_cpu(p, cpu);
 	p->cpus_allowed = cpumask_of_cpu(cpu);
 	p->rt.nr_cpus_allowed = 1;
 	p->flags |= PF_THREAD_BOUND;
-	spin_unlock_irqrestore(&rq->lock, flags);
+	double_rq_unlock(old_rq, rq);
+	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(kthread_bind);
 
@@ -2056,40 +2058,101 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
 	return delta < (s64)sysctl_sched_migration_cost;
 }
 
+static void set_task_cpu_all(struct task_struct *p, int old_cpu, int new_cpu)
+{
+	trace_sched_migrate_task(p, new_cpu);
+
+	p->se.nr_migrations++;
+#ifdef CONFIG_SCHEDSTATS
+	if (task_hot(p, old_rq->clock, NULL))
+		schedstat_inc(p, se.nr_forced2_migrations);
+#endif
+	perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0);
+
+	raw_set_task_cpu(p, new_cpu);
+}
 
-void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
+static void set_task_cpu(struct task_struct *p, int new_cpu)
 {
 	int old_cpu = task_cpu(p);
-	struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu);
-	struct cfs_rq *old_cfsrq = task_cfs_rq(p),
-		      *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
-	u64 clock_offset;
 
-	clock_offset = old_rq->clock - new_rq->clock;
+	if (old_cpu == new_cpu)
+		return;
 
-	trace_sched_migrate_task(p, new_cpu);
+	if (p->sched_class == &fair_sched_class) {
+		struct cfs_rq *old_cfsrq = task_cfs_rq(p),
+			      *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
 
-#ifdef CONFIG_SCHEDSTATS
-	if (p->se.wait_start)
-		p->se.wait_start -= clock_offset;
-	if (p->se.sleep_start)
-		p->se.sleep_start -= clock_offset;
-	if (p->se.block_start)
-		p->se.block_start -= clock_offset;
-#endif
-	if (old_cpu != new_cpu) {
-		p->se.nr_migrations++;
-#ifdef CONFIG_SCHEDSTATS
-		if (task_hot(p, old_rq->clock, NULL))
-			schedstat_inc(p, se.nr_forced2_migrations);
-#endif
-		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
-				     1, 1, NULL, 0);
+		p->se.vruntime -= old_cfsrq->min_vruntime -
+			new_cfsrq->min_vruntime;
+	}
+
+	set_task_cpu_all(p, old_cpu, new_cpu);
+}
+
+static inline
+int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
+{
+	return p->sched_class->select_task_rq(p, sd_flags, wake_flags);
+}
+
+/*
+ * self balance a task without holding both rq locks
+ *
+ * used by fork and wakeup, since in neither case the task was present
+ * on a rq the load-balancer wouldn't encounter it and move it away
+ * underneath us.
+ *
+ * and since we have IRQs disabled we keep off hot-unplug.
+ */
+static struct rq *
+balance_task(struct task_struct *p, int sd_flags, int wake_flags)
+{
+	struct rq *rq, *old_rq;
+	u64 vdelta;
+
+       	rq = old_rq = task_rq(p);
+
+	if (p->sched_class == &fair_sched_class)
+		vdelta = task_cfs_rq(p)->min_vruntime;
+
+	__task_rq_unlock(old_rq);
+
+	cpu = select_task_rq(p, sd_flags, wake_flags);
+
+	rq = cpu_rq(cpu);
+	spin_lock(&rq->lock);
+	if (rq == old_rq)
+		return rq;
+
+	update_rq_clock(rq);
+
+	set_task_cpu_all(p, task_cpu(p), cpu);
+
+	if (p->sched_class == &fair_sched_class) {
+		vdelta -= task_cfs_rq(p)->min_vruntime;
+		p->se.vruntime -= vdelta;
 	}
-	p->se.vruntime -= old_cfsrq->min_vruntime -
-					 new_cfsrq->min_vruntime;
 
-	__set_task_cpu(p, new_cpu);
+	return rq;
+}
+
+void set_task_cpu_locked(struct task_struct *p, int new_cpu)
+{
+	struct rq *old_rq = task_rq(p);
+	struct rq *new_rq = cpu_rq(new_cpu);
+	unsigned long flags;
+
+	local_irq_save(flags);
+	double_rq_lock(old_rq, new_rq);
+	/*
+	 * called from fork() on the child task before it gets
+	 * put on the tasklist, no way we could've been migrated
+	 */
+	WARN_ON_ONCE(old_rq != task_rq(p));
+	set_task_cpu(p, new_cpu);
+	double_rq_unlock(old_rq, new_rq);
+	local_irq_restore(flags);
 }
 
 struct migration_req {
@@ -2373,19 +2436,11 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 	 */
 	if (task_contributes_to_load(p))
 		rq->nr_uninterruptible--;
+
 	p->state = TASK_WAKING;
+	rq = balance_task(p, SD_BALANCE_WAKE, wake_flags);
 	task_rq_unlock(rq, &flags);
 
-	cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
-	if (cpu != orig_cpu) {
-		local_irq_save(flags);
-		rq = cpu_rq(cpu);
-		update_rq_clock(rq);
-		set_task_cpu(p, cpu);
-		local_irq_restore(flags);
-	}
-	rq = task_rq_lock(p, &flags);
-
 	WARN_ON(p->state != TASK_WAKING);
 	cpu = task_cpu(p);
 
@@ -2557,8 +2612,9 @@ static void __sched_fork(struct task_struct *p)
  */
 void sched_fork(struct task_struct *p, int clone_flags)
 {
-	int cpu = get_cpu();
 	unsigned long flags;
+	struct *rq;
+	int cpu;
 
 	__sched_fork(p);
 
@@ -2592,13 +2648,15 @@ void sched_fork(struct task_struct *p, int clone_flags)
 	if (!rt_prio(p->prio))
 		p->sched_class = &fair_sched_class;
 
-#ifdef CONFIG_SMP
-	cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0);
-#endif
-	local_irq_save(flags);
-	update_rq_clock(cpu_rq(cpu));
-	set_task_cpu(p, cpu);
-	local_irq_restore(flags);
+	rq = task_rq_lock(current, &flags);
+	update_rq_clock(rq);
+
+	if (p->sched_class->fork)
+		p->sched_class->fork(p);
+
+	rq = balance_task(p, SD_BALANCE_FORK, 0);
+	task_rq_unlock(rq, &flags);
+
 
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 	if (likely(sched_info_on()))
@@ -2631,17 +2689,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 	rq = task_rq_lock(p, &flags);
 	BUG_ON(p->state != TASK_RUNNING);
 	update_rq_clock(rq);
-
-	if (!p->sched_class->task_new || !current->se.on_rq) {
-		activate_task(rq, p, 0);
-	} else {
-		/*
-		 * Let the scheduling class do new task startup
-		 * management (if any):
-		 */
-		p->sched_class->task_new(rq, p);
-		inc_nr_running(rq);
-	}
+	activate_task(rq, p, 0);
 	trace_sched_wakeup_new(rq, p, 1);
 	check_preempt_curr(rq, p, WF_FORK);
 #ifdef CONFIG_SMP
@@ -3156,7 +3204,7 @@ out:
 void sched_exec(void)
 {
 	int new_cpu, this_cpu = get_cpu();
-	new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0);
+	new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0);
 	put_cpu();
 	if (new_cpu != this_cpu)
 		sched_migrate_task(current, new_cpu);
@@ -6980,7 +7028,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 
 	idle->prio = idle->normal_prio = MAX_PRIO;
 	cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
-	__set_task_cpu(idle, cpu);
+	raw_set_task_cpu(idle, cpu);
 
 	rq->curr = rq->idle = idle;
 #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 0ff21af..7c27d2f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1922,28 +1922,26 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 }
 
 /*
- * Share the fairness runtime between parent and child, thus the
- * total amount of pressure for CPU stays equal - new tasks
- * get a chance to run but frequent forkers are not allowed to
- * monopolize the CPU. Note: the parent runqueue is locked,
- * the child is not running yet.
+ * Called at fork, before the child is on the tasklist and before
+ * self-balance, current (parent) his rq is locked.
  */
-static void task_new_fair(struct rq *rq, struct task_struct *p)
+static void fork_fair(struct task_struct *p)
 {
-	struct cfs_rq *cfs_rq = task_cfs_rq(p);
-	struct sched_entity *se = &p->se, *curr = cfs_rq->curr;
+	struct rq *rq = this_rq();
+	struct cfs_rq *cfs_rq = task_cfs_rq(current);
+	struct sched_entity *se = &p->se, *curr = cfs_rq-curr;
 	int this_cpu = smp_processor_id();
 
+	if (unlikely(task_cpu(p) != this_cpu))
+		raw_set_task_cpu(p, this_cpu);
+	
 	sched_info_queued(p);
-
 	update_curr(cfs_rq);
 	if (curr)
 		se->vruntime = curr->vruntime;
 	place_entity(cfs_rq, se, 1);
 
-	/* 'curr' will be NULL if the child belongs to a different group */
-	if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) &&
-			curr && entity_before(curr, se)) {
+	if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
 		/*
 		 * Upon rescheduling, sched_class::put_prev_task() will place
 		 * 'current' within the tree based on its new key value.
@@ -1951,8 +1949,6 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
 		swap(curr->vruntime, se->vruntime);
 		resched_task(rq->curr);
 	}
-
-	enqueue_task_fair(rq, p, 0);
 }
 
 /*
@@ -2056,7 +2052,7 @@ static const struct sched_class fair_sched_class = {
 
 	.set_curr_task          = set_curr_task_fair,
 	.task_tick		= task_tick_fair,
-	.task_new		= task_new_fair,
+	.fork			= fork_fair,
 
 	.prio_changed		= prio_changed_fair,
 	.switched_to		= switched_to_fair,



  reply	other threads:[~2009-11-27 12:21 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-11-22 12:07 [patch] sched: fix b5d9d734 blunder in task_new_fair() Mike Galbraith
2009-11-24 13:51 ` Peter Zijlstra
2009-11-24 17:07   ` Mike Galbraith
2009-11-24 17:35     ` Peter Zijlstra
2009-11-24 17:54       ` Peter Zijlstra
2009-11-24 18:21         ` Mike Galbraith
2009-11-24 18:27           ` Peter Zijlstra
2009-11-24 18:36             ` Mike Galbraith
2009-11-25  6:57             ` Mike Galbraith
2009-11-25  9:51               ` Peter Zijlstra
2009-11-25 13:09                 ` Mike Galbraith
2009-11-26 16:26 ` Peter Zijlstra
2009-11-27  8:45   ` Mike Galbraith
2009-11-27  8:57     ` Mike Galbraith
2009-11-27 11:55   ` Mike Galbraith
2009-11-27 12:21     ` Peter Zijlstra [this message]
2009-11-27 12:38       ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1259324499.6483.257.camel@laptop \
    --to=peterz@infradead.org \
    --cc=efault@gmx.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.