Re: sched_setscheduler() vs idle_balance() race

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Peter Zijlstra <peterz@infradead.org>
To: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: LKML <linux-kernel@vger.kernel.org>, Ingo Molnar <mingo@elte.hu>,
	ktkhai@parallels.com
Subject: Re: sched_setscheduler() vs idle_balance() race
Date: Thu, 28 May 2015 15:53:55 +0200	[thread overview]
Message-ID: <20150528135355.GK3644@twins.programming.kicks-ass.net> (raw)
In-Reply-To: <1432799032.3237.119.camel@gmail.com>

On Thu, May 28, 2015 at 09:43:52AM +0200, Mike Galbraith wrote:
> Hi Peter,
> 
> I'm not seeing what prevents pull_task() from yanking a task out from
> under __sched_setscheduler().  A box sprinkling smoldering 3.0 kernel
> wreckage all over my bugzilla mbox isn't seeing it either ;-)

Say, how easy can that thing be reproduced?

The below is compile tested only, but it might just work if I didn't
miss anything :-)


---
 kernel/sched/core.c | 137 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 77 insertions(+), 60 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4eec60757b16..28f1ddc0bef2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1000,22 +1000,6 @@ inline int task_curr(const struct task_struct *p)
 	return cpu_curr(task_cpu(p)) == p;
 }
 
-/*
- * Can drop rq->lock because from sched_class::switched_from() methods drop it.
- */
-static inline void check_class_changed(struct rq *rq, struct task_struct *p,
-				       const struct sched_class *prev_class,
-				       int oldprio)
-{
-	if (prev_class != p->sched_class) {
-		if (prev_class->switched_from)
-			prev_class->switched_from(rq, p);
-		/* Possble rq->lock 'hole'.  */
-		p->sched_class->switched_to(rq, p);
-	} else if (oldprio != p->prio || dl_task(p))
-		p->sched_class->prio_changed(rq, p, oldprio);
-}
-
 void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 {
 	const struct sched_class *class;
@@ -3075,12 +3059,38 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 
 	p->prio = prio;
 
+	if (prev_class != p->sched_class) {
+		prev_class->switched_from(rq, p);
+		/*
+		 * switched_from() is allowed to drop @rq->lock; which opens a
+		 * race against load-balancing, however since @p is not
+		 * currently enqueued it is invisible to the load-balancer.
+		 *
+		 * double check @p is still where we thought it was.
+		 */
+		WARN_ON_ONCE(task_rq(p) != rq);
+	}
+
 	if (running)
 		p->sched_class->set_curr_task(rq);
 	if (queued)
 		enqueue_task(rq, p, enqueue_flag);
 
-	check_class_changed(rq, p, prev_class, oldprio);
+	/*
+	 * Both switched_to() and prio_changed() are allowed to drop @rq->lock;
+	 * which opens a race against load-balancing, and since @p is now
+	 * enqueued it can indeed be subject to this.
+	 *
+	 * This means that any balancing done by these functions must double
+	 * check a task's rq.
+	 */
+	if (prev_class != p->sched_class)
+		p->sched_class->switched_to(rq, p);
+	else if (oldprio != p->prio || dl_task(p))
+		p->sched_class->prio_changed(rq, p, oldprio);
+	/*
+	 * It further means we should not rely on @p's rq from here on.
+	 */
 out_unlock:
 	__task_rq_unlock(rq);
 }
@@ -3420,7 +3430,7 @@ static bool dl_param_changed(struct task_struct *p,
 
 static int __sched_setscheduler(struct task_struct *p,
 				const struct sched_attr *attr,
-				bool user)
+				bool user, bool pi)
 {
 	int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
 		      MAX_RT_PRIO - 1 - attr->sched_priority;
@@ -3606,18 +3616,20 @@ static int __sched_setscheduler(struct task_struct *p,
 	p->sched_reset_on_fork = reset_on_fork;
 	oldprio = p->prio;
 
-	/*
-	 * Take priority boosted tasks into account. If the new
-	 * effective priority is unchanged, we just store the new
-	 * normal parameters and do not touch the scheduler class and
-	 * the runqueue. This will be done when the task deboost
-	 * itself.
-	 */
-	new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
-	if (new_effective_prio == oldprio) {
-		__setscheduler_params(p, attr);
-		task_rq_unlock(rq, p, &flags);
-		return 0;
+	if (pi) {
+		/*
+		 * Take priority boosted tasks into account. If the new
+		 * effective priority is unchanged, we just store the new
+		 * normal parameters and do not touch the scheduler class and
+		 * the runqueue. This will be done when the task deboost
+		 * itself.
+		 */
+		new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
+		if (new_effective_prio == oldprio) {
+			__setscheduler_params(p, attr);
+			task_rq_unlock(rq, p, &flags);
+			return 0;
+		}
 	}
 
 	queued = task_on_rq_queued(p);
@@ -3628,7 +3640,19 @@ static int __sched_setscheduler(struct task_struct *p,
 		put_prev_task(rq, p);
 
 	prev_class = p->sched_class;
-	__setscheduler(rq, p, attr, true);
+	__setscheduler(rq, p, attr, pi);
+
+	if (prev_class != p->sched_class) {
+		prev_class->switched_from(rq, p);
+		/*
+		 * switched_from() is allowed to drop @rq->lock; which opens a
+		 * race against load-balancing, however since @p is not
+		 * currently enqueued it is invisible to the load-balancer.
+		 *
+		 * double check @p is still where we thought it was.
+		 */
+		WARN_ON_ONCE(task_rq(p) != rq);
+	}
 
 	if (running)
 		p->sched_class->set_curr_task(rq);
@@ -3640,10 +3664,25 @@ static int __sched_setscheduler(struct task_struct *p,
 		enqueue_task(rq, p, oldprio <= p->prio ? ENQUEUE_HEAD : 0);
 	}
 
-	check_class_changed(rq, p, prev_class, oldprio);
+	/*
+	 * Both switched_to() and prio_changed() are allowed to drop @rq->lock;
+	 * which opens a race against load-balancing, and since @p is now
+	 * enqueued it can indeed be subject to this.
+	 *
+	 * This means that any balancing done by these functions must double
+	 * check a task's rq.
+	 */
+	if (prev_class != p->sched_class)
+		p->sched_class->switched_to(rq, p);
+	else if (oldprio != p->prio || dl_task(p))
+		p->sched_class->prio_changed(rq, p, oldprio);
+	/*
+	 * It further means we should not rely on @p's rq from here on.
+	 */
 	task_rq_unlock(rq, p, &flags);
 
-	rt_mutex_adjust_pi(p);
+	if (pi)
+		rt_mutex_adjust_pi(p);
 
 	return 0;
 }
@@ -3664,7 +3703,7 @@ static int _sched_setscheduler(struct task_struct *p, int policy,
 		attr.sched_policy = policy;
 	}
 
-	return __sched_setscheduler(p, &attr, check);
+	return __sched_setscheduler(p, &attr, check, true);
 }
 /**
  * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
@@ -3685,7 +3724,7 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
 
 int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
 {
-	return __sched_setscheduler(p, attr, true);
+	return __sched_setscheduler(p, attr, true, true);
 }
 EXPORT_SYMBOL_GPL(sched_setattr);
 
@@ -7346,32 +7385,12 @@ EXPORT_SYMBOL(___might_sleep);
 #endif
 
 #ifdef CONFIG_MAGIC_SYSRQ
-static void normalize_task(struct rq *rq, struct task_struct *p)
+void normalize_rt_tasks(void)
 {
-	const struct sched_class *prev_class = p->sched_class;
+	struct task_struct *g, *p;
 	struct sched_attr attr = {
 		.sched_policy = SCHED_NORMAL,
 	};
-	int old_prio = p->prio;
-	int queued;
-
-	queued = task_on_rq_queued(p);
-	if (queued)
-		dequeue_task(rq, p, 0);
-	__setscheduler(rq, p, &attr, false);
-	if (queued) {
-		enqueue_task(rq, p, 0);
-		resched_curr(rq);
-	}
-
-	check_class_changed(rq, p, prev_class, old_prio);
-}
-
-void normalize_rt_tasks(void)
-{
-	struct task_struct *g, *p;
-	unsigned long flags;
-	struct rq *rq;
 
 	read_lock(&tasklist_lock);
 	for_each_process_thread(g, p) {
@@ -7398,9 +7417,7 @@ void normalize_rt_tasks(void)
 			continue;
 		}
 
-		rq = task_rq_lock(p, &flags);
-		normalize_task(rq, p);
-		task_rq_unlock(rq, p, &flags);
+		__sched_setscheduler(p, &attr, false, false);
 	}
 	read_unlock(&tasklist_lock);
 }

next prev parent reply	other threads:[~2015-05-28 13:54 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-05-28  7:43 sched_setscheduler() vs idle_balance() race Mike Galbraith
2015-05-28 11:51 ` Peter Zijlstra
2015-05-28 12:04   ` Mike Galbraith
2015-05-28 12:06     ` Peter Zijlstra
2015-05-28 12:32       ` Mike Galbraith
2015-05-28 13:53 ` Peter Zijlstra [this message]
2015-05-28 14:54   ` Mike Galbraith
2015-05-28 15:24     ` Peter Zijlstra
2015-05-29 18:30       ` Mike Galbraith
2015-05-29 18:48         ` Mike Galbraith
2015-06-01  8:14           ` Peter Zijlstra
2015-06-01  8:16         ` Peter Zijlstra
2015-06-01 10:00           ` Mike Galbraith
2015-05-28 16:59   ` Kirill Tkhai
2015-05-30 13:08   ` Mike Galbraith
2015-05-31  6:39     ` Mike Galbraith
2015-06-01  8:24       ` Peter Zijlstra
2015-06-01  8:19     ` Peter Zijlstra

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:4eec60757b1 dfblob:28f1ddc0bef )
 OR (
bs:"Re: sched_setscheduler() vs idle_balance() race" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150528135355.GK3644@twins.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=ktkhai@parallels.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=umgwanakikbuti@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.