All of lore.kernel.org
 help / color / mirror / Atom feed
From: tip-bot for Peter Zijlstra <tipbot@zytor.com>
To: linux-tip-commits@vger.kernel.org
Cc: mingo@kernel.org, mhocko@suse.com, umgwanakikbuti@gmail.com,
	hpa@zytor.com, torvalds@linux-foundation.org,
	linux-kernel@vger.kernel.org, peterz@infradead.org,
	tglx@linutronix.de, matt@codeblueprint.co.uk
Subject: [tip:sched/urgent] stop_machine, sched: Fix migrate_swap() vs. active_balance() deadlock
Date: Thu, 3 May 2018 02:24:55 -0700	[thread overview]
Message-ID: <tip-0b26351b910fb8fe6a056f8a1bbccabe50c0e19f@git.kernel.org> (raw)
In-Reply-To: <20180420095005.GH4064@hirez.programming.kicks-ass.net>

Commit-ID:  0b26351b910fb8fe6a056f8a1bbccabe50c0e19f
Gitweb:     https://git.kernel.org/tip/0b26351b910fb8fe6a056f8a1bbccabe50c0e19f
Author:     Peter Zijlstra <peterz@infradead.org>
AuthorDate: Fri, 20 Apr 2018 11:50:05 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 3 May 2018 07:38:03 +0200

stop_machine, sched: Fix migrate_swap() vs. active_balance() deadlock

Matt reported the following deadlock:

CPU0					CPU1

schedule(.prev=migrate/0)		<fault>
  pick_next_task()			  ...
    idle_balance()			    migrate_swap()
      active_balance()			      stop_two_cpus()
						spin_lock(stopper0->lock)
						spin_lock(stopper1->lock)
						ttwu(migrate/0)
						  smp_cond_load_acquire() -- waits for schedule()
        stop_one_cpu(1)
	  spin_lock(stopper1->lock) -- waits for stopper lock

Fix this deadlock by taking the wakeups out from under stopper->lock.
This allows the active_balance() to queue the stop work and finish the
context switch, which in turn allows the wakeup from migrate_swap() to
observe the context and complete the wakeup.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reported-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20180420095005.GH4064@hirez.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/stop_machine.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index b7591261652d..64c0291b579c 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -21,6 +21,7 @@
 #include <linux/smpboot.h>
 #include <linux/atomic.h>
 #include <linux/nmi.h>
+#include <linux/sched/wake_q.h>
 
 /*
  * Structure to determine completion condition and record errors.  May
@@ -65,27 +66,31 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done)
 }
 
 static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
-					struct cpu_stop_work *work)
+					struct cpu_stop_work *work,
+					struct wake_q_head *wakeq)
 {
 	list_add_tail(&work->list, &stopper->works);
-	wake_up_process(stopper->thread);
+	wake_q_add(wakeq, stopper->thread);
 }
 
 /* queue @work to @stopper.  if offline, @work is completed immediately */
 static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
 {
 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
+	DEFINE_WAKE_Q(wakeq);
 	unsigned long flags;
 	bool enabled;
 
 	spin_lock_irqsave(&stopper->lock, flags);
 	enabled = stopper->enabled;
 	if (enabled)
-		__cpu_stop_queue_work(stopper, work);
+		__cpu_stop_queue_work(stopper, work, &wakeq);
 	else if (work->done)
 		cpu_stop_signal_done(work->done);
 	spin_unlock_irqrestore(&stopper->lock, flags);
 
+	wake_up_q(&wakeq);
+
 	return enabled;
 }
 
@@ -229,6 +234,7 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
 {
 	struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
 	struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
+	DEFINE_WAKE_Q(wakeq);
 	int err;
 retry:
 	spin_lock_irq(&stopper1->lock);
@@ -252,8 +258,8 @@ retry:
 			goto unlock;
 
 	err = 0;
-	__cpu_stop_queue_work(stopper1, work1);
-	__cpu_stop_queue_work(stopper2, work2);
+	__cpu_stop_queue_work(stopper1, work1, &wakeq);
+	__cpu_stop_queue_work(stopper2, work2, &wakeq);
 unlock:
 	spin_unlock(&stopper2->lock);
 	spin_unlock_irq(&stopper1->lock);
@@ -263,6 +269,9 @@ unlock:
 			cpu_relax();
 		goto retry;
 	}
+
+	wake_up_q(&wakeq);
+
 	return err;
 }
 /**

      parent reply	other threads:[~2018-05-03  9:25 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-17 14:21 cpu stopper threads and load balancing leads to deadlock Matt Fleming
2018-04-18  5:47 ` Mike Galbraith
2018-04-19  5:38   ` Mike Galbraith
2018-04-20  9:50 ` Peter Zijlstra
2018-04-24 13:33   ` Matt Fleming
2018-05-03 12:12     ` Mike Galbraith
2018-05-03 12:28       ` Peter Zijlstra
2018-05-03 12:40         ` Mike Galbraith
2018-05-03 12:49           ` Peter Zijlstra
2018-05-03 13:32             ` Mike Galbraith
2018-05-03 13:56               ` Peter Zijlstra
2018-05-03 14:16                 ` Mike Galbraith
2018-05-03 14:44                   ` Peter Zijlstra
2018-05-03 16:12                     ` Paul E. McKenney
2018-05-03 16:45                       ` Peter Zijlstra
2018-05-03 17:18                         ` Paul E. McKenney
2018-05-03 17:54                           ` Peter Zijlstra
2018-05-03 18:24                             ` Paul E. McKenney
2018-05-04  3:38                         ` Mike Galbraith
2018-05-15  4:30                         ` Mike Galbraith
2018-05-17 14:03                           ` Paul E. McKenney
2018-05-17 14:10                             ` Mike Galbraith
2018-05-17 14:23                             ` Peter Zijlstra
2018-05-17 14:56                               ` Paul E. McKenney
2018-05-22 17:05                                 ` Paul E. McKenney
2018-05-03 14:39                 ` Paul E. McKenney
2018-05-03 14:52                   ` Peter Zijlstra
2018-05-03  9:24   ` tip-bot for Peter Zijlstra [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=tip-0b26351b910fb8fe6a056f8a1bbccabe50c0e19f@git.kernel.org \
    --to=tipbot@zytor.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tip-commits@vger.kernel.org \
    --cc=matt@codeblueprint.co.uk \
    --cc=mhocko@suse.com \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=umgwanakikbuti@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.