From: Peter Zijlstra <peterz@infradead.org>
To: umgwanakikbuti@gmail.com, mingo@elte.hu
Cc: ktkhai@parallels.com, rostedt@goodmis.org, tglx@linutronix.de,
juri.lelli@gmail.com, pang.xunlei@linaro.org, oleg@redhat.com,
wanpeng.li@linux.intel.com, linux-kernel@vger.kernel.org,
peterz@infradead.org
Subject: [PATCH 18/18] sched,lockdep: Employ lock pinning
Date: Thu, 11 Jun 2015 14:46:54 +0200 [thread overview]
Message-ID: <20150611124744.003233193@infradead.org> (raw)
In-Reply-To: 20150611124636.448700267@infradead.org
[-- Attachment #1: peter_zijlstra-schedlockdep-employ_lock_pinning.patch --]
[-- Type: text/plain, Size: 8209 bytes --]
Employ the new lockdep lock pinning annotation to ensure no
'accidental' lock-breaks happen with rq->lock.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
kernel/sched/core.c | 42 +++++++++++++++++++++++++++++++++++++++---
kernel/sched/deadline.c | 8 ++++++++
kernel/sched/fair.c | 11 ++++++++---
kernel/sched/rt.c | 8 ++++++++
kernel/sched/sched.h | 10 ++++++++--
5 files changed, 71 insertions(+), 8 deletions(-)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1201,8 +1201,15 @@ int set_cpus_allowed_ptr(struct task_str
stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
tlb_migrate_finish(p->mm);
return 0;
- } else if (task_on_rq_queued(p))
+ } else if (task_on_rq_queued(p)) {
+ /*
+ * OK, since we're going to drop the lock immediately
+ * afterwards anyway.
+ */
+ lockdep_unpin_lock(&rq->lock);
rq = move_queued_task(rq, p, dest_cpu);
+ lockdep_pin_lock(&rq->lock);
+ }
out:
task_rq_unlock(rq, p, &flags);
@@ -1562,6 +1569,8 @@ static int select_fallback_rq(int cpu, s
static inline
int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
{
+ lockdep_assert_held(&p->pi_lock);
+
if (p->nr_cpus_allowed > 1)
cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
@@ -1652,9 +1661,12 @@ ttwu_do_wakeup(struct rq *rq, struct tas
#ifdef CONFIG_SMP
if (p->sched_class->task_woken) {
/*
- * XXX can drop rq->lock; most likely ok.
+ * Our task @p is fully woken up and running; so its safe to
+ * drop the rq->lock, hereafter rq is only used for statistics.
*/
+ lockdep_unpin_lock(&rq->lock);
p->sched_class->task_woken(rq, p);
+ lockdep_pin_lock(&rq->lock);
}
if (rq->idle_stamp) {
@@ -1674,6 +1686,8 @@ ttwu_do_wakeup(struct rq *rq, struct tas
static void
ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
{
+ lockdep_assert_held(&rq->lock);
+
#ifdef CONFIG_SMP
if (p->sched_contributes_to_load)
rq->nr_uninterruptible--;
@@ -1718,6 +1732,7 @@ void sched_ttwu_pending(void)
return;
raw_spin_lock_irqsave(&rq->lock, flags);
+ lockdep_pin_lock(&rq->lock);
while (llist) {
p = llist_entry(llist, struct task_struct, wake_entry);
@@ -1725,6 +1740,7 @@ void sched_ttwu_pending(void)
ttwu_do_activate(rq, p, 0);
}
+ lockdep_unpin_lock(&rq->lock);
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
@@ -1821,7 +1837,9 @@ static void ttwu_queue(struct task_struc
#endif
raw_spin_lock(&rq->lock);
+ lockdep_pin_lock(&rq->lock);
ttwu_do_activate(rq, p, 0);
+ lockdep_unpin_lock(&rq->lock);
raw_spin_unlock(&rq->lock);
}
@@ -1916,9 +1934,17 @@ static void try_to_wake_up_local(struct
lockdep_assert_held(&rq->lock);
if (!raw_spin_trylock(&p->pi_lock)) {
+ /*
+ * This is OK, because current is on_cpu, which avoids it being
+ * picked for load-balance and preemption/IRQs are still
+ * disabled avoiding further scheduler activity on it and we've
+ * not yet picked a replacement task.
+ */
+ lockdep_unpin_lock(&rq->lock);
raw_spin_unlock(&rq->lock);
raw_spin_lock(&p->pi_lock);
raw_spin_lock(&rq->lock);
+ lockdep_pin_lock(&rq->lock);
}
if (!(p->state & TASK_NORMAL))
@@ -2538,6 +2564,7 @@ context_switch(struct rq *rq, struct tas
* of the scheduler it's an obvious special-case), so we
* do an early lockdep release here:
*/
+ lockdep_unpin_lock(&rq->lock);
spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
/* Here we just switch the register state and the stack. */
@@ -2960,6 +2987,7 @@ static void __sched __schedule(void)
*/
smp_mb__before_spinlock();
raw_spin_lock_irq(&rq->lock);
+ lockdep_pin_lock(&rq->lock);
rq->clock_skip_update <<= 1; /* promote REQ to ACT */
@@ -3002,8 +3030,10 @@ static void __sched __schedule(void)
rq = context_switch(rq, prev, next); /* unlocks the rq */
cpu = cpu_of(rq);
- } else
+ } else {
+ lockdep_unpin_lock(&rq->lock);
raw_spin_unlock_irq(&rq->lock);
+ }
balance_callback(rq);
}
@@ -5071,6 +5101,11 @@ static void migrate_tasks(struct rq *dea
if (rq->nr_running == 1)
break;
+ /*
+ * Ensure rq->lock covers the entire task selection
+ * until the migration.
+ */
+ lockdep_pin_lock(&rq->lock);
next = pick_next_task(rq, &fake_task);
BUG_ON(!next);
next->sched_class->put_prev_task(rq, next);
@@ -5078,6 +5113,7 @@ static void migrate_tasks(struct rq *dea
/* Find suitable destination for @next, with force if needed. */
dest_cpu = select_fallback_rq(dead_rq->cpu, next);
+ lockdep_unpin_lock(&rq->lock);
rq = __migrate_task(rq, next, dest_cpu);
if (rq != dead_rq) {
raw_spin_unlock(&rq->lock);
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1153,7 +1153,15 @@ struct task_struct *pick_next_task_dl(st
dl_rq = &rq->dl;
if (need_pull_dl_task(rq, prev)) {
+ /*
+ * This is OK, because current is on_cpu, which avoids it being
+ * picked for load-balance and preemption/IRQs are still
+ * disabled avoiding further scheduler activity on it and we're
+ * being very careful to re-start the picking loop.
+ */
+ lockdep_unpin_lock(&rq->lock);
pull_dl_task(rq);
+ lockdep_pin_lock(&rq->lock);
/*
* pull_rt_task() can drop (and re-acquire) rq->lock; this
* means a stop task can slip in, in which case we need to
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5392,7 +5392,15 @@ pick_next_task_fair(struct rq *rq, struc
return p;
idle:
+ /*
+ * This is OK, because current is on_cpu, which avoids it being picked
+ * for load-balance and preemption/IRQs are still disabled avoiding
+ * further scheduler activity on it and we're being very careful to
+ * re-start the picking loop.
+ */
+ lockdep_unpin_lock(&rq->lock);
new_tasks = idle_balance(rq);
+ lockdep_pin_lock(&rq->lock);
/*
* Because idle_balance() releases (and re-acquires) rq->lock, it is
* possible for any higher priority task to appear. In that case we
@@ -7426,9 +7434,6 @@ static int idle_balance(struct rq *this_
goto out;
}
- /*
- * Drop the rq->lock, but keep IRQ/preempt disabled.
- */
raw_spin_unlock(&this_rq->lock);
update_blocked_averages(this_cpu);
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1478,7 +1478,15 @@ pick_next_task_rt(struct rq *rq, struct
struct rt_rq *rt_rq = &rq->rt;
if (need_pull_rt_task(rq, prev)) {
+ /*
+ * This is OK, because current is on_cpu, which avoids it being
+ * picked for load-balance and preemption/IRQs are still
+ * disabled avoiding further scheduler activity on it and we're
+ * being very careful to re-start the picking loop.
+ */
+ lockdep_unpin_lock(&rq->lock);
pull_rt_task(rq);
+ lockdep_pin_lock(&rq->lock);
/*
* pull_rt_task() can drop (and re-acquire) rq->lock; this
* means a dl or stop task can slip in, in which case we need
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1438,8 +1438,10 @@ static inline struct rq *__task_rq_lock(
for (;;) {
rq = task_rq(p);
raw_spin_lock(&rq->lock);
- if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
+ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
+ lockdep_pin_lock(&rq->lock);
return rq;
+ }
raw_spin_unlock(&rq->lock);
while (unlikely(task_on_rq_migrating(p)))
@@ -1476,8 +1478,10 @@ static inline struct rq *task_rq_lock(st
* If we observe the new cpu in task_rq_lock, the acquire will
* pair with the WMB to ensure we must then also see migrating.
*/
- if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
+ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
+ lockdep_pin_lock(&rq->lock);
return rq;
+ }
raw_spin_unlock(&rq->lock);
raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
@@ -1489,6 +1493,7 @@ static inline struct rq *task_rq_lock(st
static inline void __task_rq_unlock(struct rq *rq)
__releases(rq->lock)
{
+ lockdep_unpin_lock(&rq->lock);
raw_spin_unlock(&rq->lock);
}
@@ -1497,6 +1502,7 @@ task_rq_unlock(struct rq *rq, struct tas
__releases(rq->lock)
__releases(p->pi_lock)
{
+ lockdep_unpin_lock(&rq->lock);
raw_spin_unlock(&rq->lock);
raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
}
next prev parent reply other threads:[~2015-06-11 12:56 UTC|newest]
Thread overview: 58+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-06-11 12:46 [PATCH 00/18] sched: balance callbacks v4 Peter Zijlstra
2015-06-11 12:46 ` [PATCH 01/18] sched: Replace post_schedule with a balance callback list Peter Zijlstra
2015-06-11 15:32 ` Kirill Tkhai
2015-06-18 23:00 ` [tip:sched/hrtimers] " tip-bot for Peter Zijlstra
2015-06-11 12:46 ` [PATCH 02/18] sched: Use replace normalize_task() with __sched_setscheduler() Peter Zijlstra
2015-06-18 23:00 ` [tip:sched/hrtimers] " tip-bot for Peter Zijlstra
2015-06-11 12:46 ` [PATCH 03/18] sched: Allow balance callbacks for check_class_changed() Peter Zijlstra
2015-06-18 23:01 ` [tip:sched/hrtimers] " tip-bot for Peter Zijlstra
2015-06-11 12:46 ` [PATCH 04/18] sched,rt: Remove return value from pull_rt_task() Peter Zijlstra
2015-06-18 23:01 ` [tip:sched/hrtimers] " tip-bot for Peter Zijlstra
2015-06-11 12:46 ` [PATCH 05/18] sched,rt: Convert switched_{from,to}_rt() / prio_changed_rt() to balance callbacks Peter Zijlstra
2015-06-18 23:01 ` [tip:sched/hrtimers] sched, rt: Convert switched_{from, to}_rt() " tip-bot for Peter Zijlstra
2015-06-11 12:46 ` [PATCH 06/18] sched,dl: Remove return value from pull_dl_task() Peter Zijlstra
2015-06-18 23:02 ` [tip:sched/hrtimers] " tip-bot for Peter Zijlstra
2015-06-11 12:46 ` [PATCH 07/18] sched,dl: Convert switched_{from,to}_dl() / prio_changed_dl() to balance callbacks Peter Zijlstra
2015-06-18 23:02 ` [tip:sched/hrtimers] sched, dl: Convert switched_{from, to}_dl() " tip-bot for Peter Zijlstra
2015-06-11 12:46 ` [PATCH 08/18] hrtimer: Remove HRTIMER_STATE_MIGRATE Peter Zijlstra
2015-06-18 22:18 ` [tip:timers/core] " tip-bot for Oleg Nesterov
2015-06-11 12:46 ` [PATCH 09/18] hrtimer: Fix hrtimer_is_queued() hole Peter Zijlstra
2015-06-18 22:18 ` [tip:timers/core] " tip-bot for Peter Zijlstra
2015-06-11 12:46 ` [PATCH 10/18] seqcount: Rename write_seqcount_barrier() Peter Zijlstra
2015-06-18 22:19 ` [tip:timers/core] " tip-bot for Peter Zijlstra
2015-06-11 12:46 ` [PATCH 11/18] seqcount: Introduce raw_write_seqcount_barrier() Peter Zijlstra
2015-06-11 15:33 ` Paul E. McKenney
2015-06-11 21:45 ` Paul E. McKenney
2015-06-12 7:08 ` Peter Zijlstra
2015-06-12 18:59 ` Oleg Nesterov
2015-06-17 12:29 ` Peter Zijlstra
2015-06-17 14:57 ` Paul E. McKenney
2015-06-17 15:11 ` Peter Zijlstra
2015-06-17 15:42 ` Paul E. McKenney
2015-06-17 16:58 ` Peter Zijlstra
2015-06-17 15:49 ` Peter Zijlstra
2015-06-17 16:37 ` Paul E. McKenney
2015-06-17 17:11 ` Peter Zijlstra
2015-06-17 18:02 ` Paul E. McKenney
2015-06-18 9:15 ` Peter Zijlstra
2015-06-18 9:40 ` Ingo Molnar
2015-06-18 10:40 ` Peter Zijlstra
2015-06-18 16:54 ` Paul E. McKenney
2015-06-18 17:10 ` Steven Rostedt
2015-06-18 17:51 ` Paul E. McKenney
2015-06-18 22:19 ` [tip:timers/core] seqcount: Introduce raw_write_seqcount_barrier( ) tip-bot for Peter Zijlstra
2015-06-11 12:46 ` [PATCH 12/18] hrtimer: Allow hrtimer::function() to free the timer Peter Zijlstra
2015-06-18 22:19 ` [tip:timers/core] " tip-bot for Peter Zijlstra
2015-06-11 12:46 ` [PATCH 13/18] sched,dl: Fix sched class hopping CBS hole Peter Zijlstra
2015-06-18 23:02 ` [tip:sched/hrtimers] " tip-bot for Peter Zijlstra
2015-06-11 12:46 ` [PATCH 14/18] sched: Move code around Peter Zijlstra
2015-06-18 23:02 ` [tip:sched/hrtimers] " tip-bot for Peter Zijlstra
2015-06-11 12:46 ` [PATCH 15/18] sched: Streamline the task migration locking a little Peter Zijlstra
2015-06-18 23:03 ` [tip:sched/hrtimers] " tip-bot for Peter Zijlstra
2015-06-11 12:46 ` [PATCH 16/18] lockdep: Simplify lock_release() Peter Zijlstra
2015-06-18 23:03 ` [tip:sched/hrtimers] " tip-bot for Peter Zijlstra
2015-06-11 12:46 ` [PATCH 17/18] lockdep: Implement lock pinning Peter Zijlstra
2015-06-18 23:03 ` [tip:sched/hrtimers] " tip-bot for Peter Zijlstra
2015-06-11 12:46 ` Peter Zijlstra [this message]
2015-06-18 23:04 ` [tip:sched/hrtimers] sched,lockdep: Employ " tip-bot for Peter Zijlstra
2015-12-29 5:41 ` [PATCH 00/18] sched: balance callbacks v4 Byungchul Park
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20150611124744.003233193@infradead.org \
--to=peterz@infradead.org \
--cc=juri.lelli@gmail.com \
--cc=ktkhai@parallels.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=oleg@redhat.com \
--cc=pang.xunlei@linaro.org \
--cc=rostedt@goodmis.org \
--cc=tglx@linutronix.de \
--cc=umgwanakikbuti@gmail.com \
--cc=wanpeng.li@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox