All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Oleg Nesterov <oleg@redhat.com>
Cc: "Kohli, Gaurav" <gkohli@codeaurora.org>,
	tglx@linutronix.de, mpe@ellerman.id.au, mingo@kernel.org,
	bigeasy@linutronix.de, linux-kernel@vger.kernel.org,
	linux-arm-msm@vger.kernel.org,
	Neeraj Upadhyay <neeraju@codeaurora.org>,
	Will Deacon <will.deacon@arm.com>
Subject: Re: [PATCH v1] kthread/smpboot: Serialize kthread parking against wakeup
Date: Tue, 5 Jun 2018 22:13:16 +0200	[thread overview]
Message-ID: <20180605201316.GZ12198@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20180605163515.GB24053@redhat.com>

On Tue, Jun 05, 2018 at 06:35:16PM +0200, Oleg Nesterov wrote:
> Yes, but this won't fix the race decribed by Kohli...

Clearly I'm not going strong today... and yes, looking at that again I
didn't address that.

> Plus this complicates the schedule() paths for the very special case

I checked, we already spilled @preempt onto the stack, context_switch()
is inlined so the extra argument is a no-op, if finish_task_switch()
also gets inlined (possible, could force it) the only additional cost is
the @preempt load from stack in the slow path. If it doesn't get inlined
we get the additional function call overhead, which should be minimal.

But yes, I wasn't a fan either.

> and to me it seems that all this kthread_park/unpark logic needs some
> serious cleanups...

I really hated how even with TASK_PARKED special the smpboot thread
could still get migrated, which is why I moved the completion. Do you
have any saner suggestions?

Humm, I suppose we could do wait_task_inactive() after
wait_for_completion().  I absolutely abhor wait_task_inactive(), but I
remember us both failing to fix that at least twice :/

Also, I think we still need TASK_PARKED as a special state for that.

How's the below?

---
 include/linux/kthread.h |  1 -
 include/linux/sched.h   |  2 +-
 kernel/kthread.c        | 32 +++++++++++++++++++++++++-------
 kernel/sched/core.c     | 31 +++++++++++--------------------
 4 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 2803264c512f..c1961761311d 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -62,7 +62,6 @@ void *kthread_probe_data(struct task_struct *k);
 int kthread_park(struct task_struct *k);
 void kthread_unpark(struct task_struct *k);
 void kthread_parkme(void);
-void kthread_park_complete(struct task_struct *k);
 
 int kthreadd(void *unused);
 extern struct task_struct *kthreadd_task;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 14e4f9c12337..4e32c1cc7794 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -117,7 +117,7 @@ struct task_group;
  * the comment with set_special_state().
  */
 #define is_special_task_state(state)				\
-	((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD))
+	((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD))
 
 #define __set_current_state(state_value)			\
 	do {							\
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 481951bf091d..8f66a3dc767a 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -177,12 +177,24 @@ void *kthread_probe_data(struct task_struct *task)
 static void __kthread_parkme(struct kthread *self)
 {
 	for (;;) {
-		set_current_state(TASK_PARKED);
+		/*
+		 * TASK_PARKED is a special state; we must serialize against
+		 * possible pending wakeups to avoid store-store collisions on
+		 * task->state.
+		 *
+		 * Such a collision might possibly result in the task state
+		 * changin from TASK_PARKED and us failing the
+		 * wait_task_inactive() in kthread_park().
+		 */
+		set_special_state(TASK_PARKED);
 		if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
 			break;
+
+		complete_all(&self->parked);
 		schedule();
 	}
 	__set_current_state(TASK_RUNNING);
+	reinit_completion(&self->parked);
 }
 
 void kthread_parkme(void)
@@ -191,11 +203,6 @@ void kthread_parkme(void)
 }
 EXPORT_SYMBOL_GPL(kthread_parkme);
 
-void kthread_park_complete(struct task_struct *k)
-{
-	complete_all(&to_kthread(k)->parked);
-}
-
 static int kthread(void *_create)
 {
 	/* Copy data: it's on kthread's stack */
@@ -459,8 +466,10 @@ void kthread_unpark(struct task_struct *k)
 	if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
 		__kthread_bind(k, kthread->cpu, TASK_PARKED);
 
-	reinit_completion(&kthread->parked);
 	clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+	/*
+	 * __kthread_parkme() will either see !SHOULD_PARK or get the wakeup.
+	 */
 	wake_up_state(k, TASK_PARKED);
 }
 EXPORT_SYMBOL_GPL(kthread_unpark);
@@ -487,7 +496,16 @@ int kthread_park(struct task_struct *k)
 	set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
 	if (k != current) {
 		wake_up_process(k);
+		/*
+		 * Wait for __kthread_parkme() to complete(), this means we
+		 * _will_ have TASK_PARKED and are about to call schedule().
+		 */
 		wait_for_completion(&kthread->parked);
+		/*
+		 * Now wait for that schedule() to complete and the task to
+		 * get scheduled out.
+		 */
+		WARN_ON_ONCE(!wait_task_inactive(p, TASK_PARKED));
 	}
 
 	return 0;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8d59b259af4a..cf72c4eed7da 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7,7 +7,6 @@
  */
 #include "sched.h"
 
-#include <linux/kthread.h>
 #include <linux/nospec.h>
 
 #include <asm/switch_to.h>
@@ -2701,28 +2700,20 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 		membarrier_mm_sync_core_before_usermode(mm);
 		mmdrop(mm);
 	}
-	if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) {
-		switch (prev_state) {
-		case TASK_DEAD:
-			if (prev->sched_class->task_dead)
-				prev->sched_class->task_dead(prev);
+	if (unlikely(prev_state == TASK_DEAD)) {
+		if (prev->sched_class->task_dead)
+			prev->sched_class->task_dead(prev);
 
-			/*
-			 * Remove function-return probe instances associated with this
-			 * task and put them back on the free list.
-			 */
-			kprobe_flush_task(prev);
-
-			/* Task is done with its stack. */
-			put_task_stack(prev);
+		/*
+		 * Remove function-return probe instances associated with this
+		 * task and put them back on the free list.
+		 */
+		kprobe_flush_task(prev);
 
-			put_task_struct(prev);
-			break;
+		/* Task is done with its stack. */
+		put_task_stack(prev);
 
-		case TASK_PARKED:
-			kthread_park_complete(prev);
-			break;
-		}
+		put_task_struct(prev);
 	}
 
 	tick_nohz_task_switch();

  parent reply	other threads:[~2018-06-05 20:13 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-25  8:33 [PATCH v1] kthread/smpboot: Serialize kthread parking against wakeup Gaurav Kohli
2018-04-25 20:09 ` Peter Zijlstra
2018-04-26  4:04   ` Kohli, Gaurav
2018-04-26  9:14     ` Peter Zijlstra
2018-04-26  8:41   ` Peter Zijlstra
2018-04-26  8:57     ` Peter Zijlstra
2018-04-26 15:53       ` Kohli, Gaurav
2018-04-30 11:17         ` Peter Zijlstra
2018-05-01  7:50           ` Kohli, Gaurav
2018-05-01 10:18             ` Peter Zijlstra
2018-05-01 10:40               ` Peter Zijlstra
2018-05-01 10:40               ` Kohli, Gaurav
2018-05-01 11:31                 ` Peter Zijlstra
2018-05-01 11:46                   ` Kohli, Gaurav
2018-05-01 13:19                     ` Peter Zijlstra
2018-05-02  5:15                       ` Kohli, Gaurav
2018-05-02  8:20                         ` Peter Zijlstra
2018-05-02 10:13                           ` Kohli, Gaurav
2018-05-07 11:09                             ` Kohli, Gaurav
2018-05-07 11:23                               ` Kohli, Gaurav
2018-06-05 11:13                                 ` Kohli, Gaurav
2018-06-05 15:08                                   ` Oleg Nesterov
2018-06-05 15:22                                     ` Peter Zijlstra
2018-06-05 15:40                                       ` Peter Zijlstra
2018-06-05 16:35                                         ` Oleg Nesterov
2018-06-05 18:21                                           ` Kohli, Gaurav
2018-06-05 20:13                                           ` Peter Zijlstra [this message]
2018-06-06 13:51                                             ` Oleg Nesterov
2018-06-06 15:03                                               ` Peter Zijlstra
2018-06-06 15:04                                               ` Peter Zijlstra
2018-06-06 15:22                                               ` Peter Zijlstra
2018-06-06 18:59                                               ` Peter Zijlstra
2018-06-07  8:30                                                 ` Kohli, Gaurav
2018-05-01 10:44               ` Peter Zijlstra
2018-04-26 16:02     ` Andrea Parri
2018-04-26 16:18     ` Oleg Nesterov
2018-04-30 11:20       ` Peter Zijlstra
2018-04-30 11:56         ` Peter Zijlstra
2018-04-28  6:43 ` [lkp-robot] [kthread/smpboot] cad8e99675: inconsistent{IN-HARDIRQ-W}->{HARDIRQ-ON-W}usage kernel test robot
2018-04-28  6:43   ` kernel test robot
2018-04-28  6:43   ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180605201316.GZ12198@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=bigeasy@linutronix.de \
    --cc=gkohli@codeaurora.org \
    --cc=linux-arm-msm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=mpe@ellerman.id.au \
    --cc=neeraju@codeaurora.org \
    --cc=oleg@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=will.deacon@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.