From: Mike Galbraith <umgwanakikbuti@gmail.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: LKML <linux-kernel@vger.kernel.org>, Ingo Molnar <mingo@elte.hu>,
ktkhai@parallels.com
Subject: Re: sched_setscheduler() vs idle_balance() race
Date: Sun, 31 May 2015 08:39:04 +0200 [thread overview]
Message-ID: <1433054344.6545.20.camel@gmail.com> (raw)
In-Reply-To: <1432991306.3395.35.camel@gmail.com>
On Sat, 2015-05-30 at 15:08 +0200, Mike Galbraith wrote:
> On Thu, 2015-05-28 at 15:53 +0200, Peter Zijlstra wrote:
> > On Thu, May 28, 2015 at 09:43:52AM +0200, Mike Galbraith wrote:
> > > Hi Peter,
> > >
> > > I'm not seeing what prevents pull_task() from yanking a task out from
> > > under __sched_setscheduler(). A box sprinkling smoldering 3.0 kernel
> > > wreckage all over my bugzilla mbox isn't seeing it either ;-)
> >
> > Say, how easy can that thing be reproduced?
> >
> > The below is compile tested only, but it might just work if I didn't
> > miss anything :-)
>
> Seems trying to make the target invisible to balancing created a new
> race: dequeue target, do stuff that may drop rq->lock while it's
> dequeued, target sneaks into schedule(), dequeues itself (#2), boom.
Well, the below (ick) plugged it up, but...
I don't see why we can't just say no in can_migrate_task() if ->pi_lock
is held. It plugged the original hole in a lot less lines.
Hohum, time to go pretend I have something better to do on a sunny
Sunday morning ;-)
massive_intr_x-6213 [007] d... 170.579339: pull_rt_task: yup, pulled
massive_intr_x-6213 [002] d... 170.580114: pull_rt_task: yup, pulled
massive_intr_x-6213 [006] d... 170.586083: pull_rt_task: yup, pulled
<...>-6237 [006] d... 170.593878: __schedule: saving the day
---
kernel/sched/core.c | 43 +++++++++++++++++++++++++++++++++++--------
kernel/sched/deadline.c | 6 +++---
kernel/sched/rt.c | 11 +++++++++--
kernel/sched/sched.h | 10 +++++++++-
4 files changed, 56 insertions(+), 14 deletions(-)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2745,9 +2745,18 @@ static void __sched __schedule(void)
* can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
* done by the caller to avoid the race with signal_wake_up().
*/
+dequeued:
smp_mb__before_spinlock();
raw_spin_lock_irq(&rq->lock);
+ if (unlikely(!task_on_rq_queued(prev))) {
+ trace_printk("saving the day\n");
+ tracing_off();
+ raw_spin_unlock_irq(&rq->lock);
+ cpu_relax();
+ goto dequeued;
+ }
+
rq->clock_skip_update <<= 1; /* promote REQ to ACT */
switch_count = &prev->nivcsw;
@@ -3013,8 +3022,10 @@ void rt_mutex_setprio(struct task_struct
prev_class = p->sched_class;
queued = task_on_rq_queued(p);
running = task_current(rq, p);
- if (queued)
+ if (queued) {
dequeue_task(rq, p, 0);
+ p->on_rq = TASK_ON_RQ_DEQUEUED;
+ }
if (running)
put_prev_task(rq, p);
@@ -3067,8 +3078,10 @@ void rt_mutex_setprio(struct task_struct
if (running)
p->sched_class->set_curr_task(rq);
- if (queued)
+ if (queued) {
enqueue_task(rq, p, enqueue_flag);
+ p->on_rq = TASK_ON_RQ_QUEUED;
+ }
/*
* Both switched_to() and prio_changed() are allowed to drop @rq->lock;
@@ -3114,8 +3127,10 @@ void set_user_nice(struct task_struct *p
goto out_unlock;
}
queued = task_on_rq_queued(p);
- if (queued)
+ if (queued) {
dequeue_task(rq, p, 0);
+ p->on_rq = TASK_ON_RQ_DEQUEUED;
+ }
p->static_prio = NICE_TO_PRIO(nice);
set_load_weight(p);
@@ -3125,6 +3140,7 @@ void set_user_nice(struct task_struct *p
if (queued) {
enqueue_task(rq, p, 0);
+ p->on_rq = TASK_ON_RQ_QUEUED;
/*
* If the task increased its priority or is running and
* lowered its priority, then reschedule its CPU:
@@ -3628,8 +3644,10 @@ static int __sched_setscheduler(struct t
queued = task_on_rq_queued(p);
running = task_current(rq, p);
- if (queued)
+ if (queued) {
dequeue_task(rq, p, 0);
+ p->on_rq = TASK_ON_RQ_DEQUEUED;
+ }
if (running)
put_prev_task(rq, p);
@@ -3656,6 +3674,7 @@ static int __sched_setscheduler(struct t
* increased (user space view).
*/
enqueue_task(rq, p, oldprio <= p->prio ? ENQUEUE_HEAD : 0);
+ p->on_rq = TASK_ON_RQ_QUEUED;
}
/*
@@ -4943,8 +4962,10 @@ void sched_setnuma(struct task_struct *p
queued = task_on_rq_queued(p);
running = task_current(rq, p);
- if (queued)
+ if (queued) {
dequeue_task(rq, p, 0);
+ p->on_rq = TASK_ON_RQ_DEQUEUED;
+ }
if (running)
put_prev_task(rq, p);
@@ -4952,8 +4973,10 @@ void sched_setnuma(struct task_struct *p
if (running)
p->sched_class->set_curr_task(rq);
- if (queued)
+ if (queued) {
enqueue_task(rq, p, 0);
+ p->on_rq = TASK_ON_RQ_QUEUED;
+ }
task_rq_unlock(rq, p, &flags);
}
#endif
@@ -7587,8 +7610,10 @@ void sched_move_task(struct task_struct
running = task_current(rq, tsk);
queued = task_on_rq_queued(tsk);
- if (queued)
+ if (queued) {
dequeue_task(rq, tsk, 0);
+ tsk->on_rq = TASK_ON_RQ_DEQUEUED;
+ }
if (unlikely(running))
put_prev_task(rq, tsk);
@@ -7611,8 +7636,10 @@ void sched_move_task(struct task_struct
if (unlikely(running))
tsk->sched_class->set_curr_task(rq);
- if (queued)
+ if (queued) {
enqueue_task(rq, tsk, 0);
+ tsk->on_rq = TASK_ON_RQ_QUEUED;
+ }
task_rq_unlock(rq, tsk, &flags);
}
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -607,7 +607,7 @@ static enum hrtimer_restart dl_task_time
* We can be both throttled and !queued. Replenish the counter
* but do not enqueue -- wait for our wakeup to do that.
*/
- if (!task_on_rq_queued(p)) {
+ if (!task_on_rq_queued_or_dequeued(p)) {
replenish_dl_entity(dl_se, dl_se);
goto unlock;
}
@@ -1526,7 +1526,7 @@ static int pull_dl_task(struct rq *this_
dl_time_before(p->dl.deadline,
this_rq->dl.earliest_dl.curr))) {
WARN_ON(p == src_rq->curr);
- WARN_ON(!task_on_rq_queued(p));
+ WARN_ON(!task_on_rq_queued_or_dequeued(p));
/*
* Then we pull iff p has actually an earlier
@@ -1707,7 +1707,7 @@ static void switched_from_dl(struct rq *
* this is the right place to try to pull some other one
* from an overloaded cpu, if any.
*/
- if (!task_on_rq_queued(p) || rq->dl.dl_nr_running)
+ if (!task_on_rq_queued_or_dequeued(p) || rq->dl.dl_nr_running)
return;
if (pull_dl_task(rq))
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1656,7 +1656,7 @@ static struct rq *find_lock_lowest_rq(st
!cpumask_test_cpu(lowest_rq->cpu,
tsk_cpus_allowed(task)) ||
task_running(rq, task) ||
- !task_on_rq_queued(task))) {
+ !task_on_rq_queued_or_dequeued(task))) {
double_unlock_balance(rq, lowest_rq);
lowest_rq = NULL;
@@ -1953,10 +1953,14 @@ static int pull_rt_task(struct rq *this_
int this_cpu = this_rq->cpu, ret = 0, cpu;
struct task_struct *p;
struct rq *src_rq;
+ int task_dequeued = 0;
if (likely(!rt_overloaded(this_rq)))
return 0;
+ if (this_rq->curr->on_rq == TASK_ON_RQ_DEQUEUED)
+ task_dequeued = 1;
+
/*
* Match the barrier from rt_set_overloaded; this guarantees that if we
* see overloaded we must also see the rto_mask bit.
@@ -2035,6 +2039,9 @@ static int pull_rt_task(struct rq *this_
double_unlock_balance(this_rq, src_rq);
}
+ if (ret && task_dequeued)
+ trace_printk("yup, pulled\n");
+
return ret;
}
@@ -2133,7 +2140,7 @@ static void switched_from_rt(struct rq *
* we may need to handle the pulling of RT tasks
* now.
*/
- if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
+ if (!task_on_rq_queued_or_dequeued(p) || rq->rt.rt_nr_running)
return;
if (pull_rt_task(rq))
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -19,7 +19,8 @@ struct cpuidle_state;
/* task_struct::on_rq states: */
#define TASK_ON_RQ_QUEUED 1
-#define TASK_ON_RQ_MIGRATING 2
+#define TASK_ON_RQ_DEQUEUED 2
+#define TASK_ON_RQ_MIGRATING 3
extern __read_mostly int scheduler_running;
@@ -1034,6 +1035,13 @@ static inline int task_on_rq_queued(stru
return p->on_rq == TASK_ON_RQ_QUEUED;
}
+static inline int task_on_rq_queued_or_dequeued(struct task_struct *p)
+{
+ if (p->on_rq == TASK_ON_RQ_QUEUED)
+ return 1;
+ return p->on_rq == TASK_ON_RQ_DEQUEUED;
+}
+
static inline int task_on_rq_migrating(struct task_struct *p)
{
return p->on_rq == TASK_ON_RQ_MIGRATING;
next prev parent reply other threads:[~2015-05-31 6:39 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-05-28 7:43 sched_setscheduler() vs idle_balance() race Mike Galbraith
2015-05-28 11:51 ` Peter Zijlstra
2015-05-28 12:04 ` Mike Galbraith
2015-05-28 12:06 ` Peter Zijlstra
2015-05-28 12:32 ` Mike Galbraith
2015-05-28 13:53 ` Peter Zijlstra
2015-05-28 14:54 ` Mike Galbraith
2015-05-28 15:24 ` Peter Zijlstra
2015-05-29 18:30 ` Mike Galbraith
2015-05-29 18:48 ` Mike Galbraith
2015-06-01 8:14 ` Peter Zijlstra
2015-06-01 8:16 ` Peter Zijlstra
2015-06-01 10:00 ` Mike Galbraith
2015-05-28 16:59 ` Kirill Tkhai
2015-05-30 13:08 ` Mike Galbraith
2015-05-31 6:39 ` Mike Galbraith [this message]
2015-06-01 8:24 ` Peter Zijlstra
2015-06-01 8:19 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1433054344.6545.20.camel@gmail.com \
--to=umgwanakikbuti@gmail.com \
--cc=ktkhai@parallels.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.