All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Oleg Nesterov <oleg@redhat.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	linux-kernel@vger.kernel.org, Ben Segall <bsegall@google.com>,
	Daniel Bristot de Oliveira <bristot@redhat.com>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Ingo Molnar <mingo@redhat.com>,
	Juri Lelli <juri.lelli@redhat.com>, Mel Gorman <mgorman@suse.de>,
	Steven Rostedt <rostedt@goodmis.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	ebiederm@xmission.com
Subject: Re: [PATCH v2] ptrace: fix ptrace vs tasklist_lock race on PREEMPT_RT.
Date: Thu, 7 Apr 2022 14:13:40 +0200	[thread overview]
Message-ID: <20220407121340.GA2762@worktop.programming.kicks-ass.net> (raw)
In-Reply-To: <Ykwn0MpcrZ/N+LOG@hirez.programming.kicks-ass.net>

On Tue, Apr 05, 2022 at 01:28:16PM +0200, Peter Zijlstra wrote:
> On Tue, Apr 05, 2022 at 12:29:03PM +0200, Oleg Nesterov wrote:
> > On 04/05, Peter Zijlstra wrote:
> > >
> > > As is, I think we can write task_is_stopped() like:
> > >
> > > #define task_is_stopped(task)	((task)->jobctl & JOBCTL_STOP_PENDING)
> > >
> > > Because jobctl is in fact the canonical state.
> > 
> > Not really. JOBCTL_STOP_PENDING means that the task should stop.
> > 
> > And this flag is cleared right before set_special_state(TASK_STOPPED)
> > in do_signal_stop(), see task_participate_group_stop().
> 
> Argh! More work then :-( Still, I really want to not have p->__state be
> actual unique state.

How insane is this?

In addition to solving the whole saved_state issue, it also allows my
freezer rewrite to reconstruct __state. If you don't find the below
fundamentally broken I can respin that freezer rewrite on top of it.

---
 include/linux/sched.h        |  8 +++-----
 include/linux/sched/jobctl.h |  8 ++++++++
 include/linux/sched/signal.h | 15 ++++++++++++++-
 kernel/ptrace.c              | 18 ++++++++++++++----
 kernel/signal.c              |  9 ++++++---
 5 files changed, 45 insertions(+), 13 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 67f06f72c50e..6698b97b8e3b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -118,11 +118,9 @@ struct task_group;
 
 #define task_is_running(task)		(READ_ONCE((task)->__state) == TASK_RUNNING)
 
-#define task_is_traced(task)		((READ_ONCE(task->__state) & __TASK_TRACED) != 0)
-
-#define task_is_stopped(task)		((READ_ONCE(task->__state) & __TASK_STOPPED) != 0)
-
-#define task_is_stopped_or_traced(task)	((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0)
+#define task_is_traced(task)		((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0)
+#define task_is_stopped(task)		((READ_ONCE(task->jobctl) & JOBCTL_STOPPED) != 0)
+#define task_is_stopped_or_traced(task)	((READ_ONCE(task->jobctl) & (JOBCTL_STOPPED | JOBCTL_TRACED)) != 0)
 
 /*
  * Special states are those that do not use the normal wait-loop pattern. See
diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h
index fa067de9f1a9..ec8b312f7506 100644
--- a/include/linux/sched/jobctl.h
+++ b/include/linux/sched/jobctl.h
@@ -20,6 +20,10 @@ struct task_struct;
 #define JOBCTL_LISTENING_BIT	22	/* ptracer is listening for events */
 #define JOBCTL_TRAP_FREEZE_BIT	23	/* trap for cgroup freezer */
 
+#define JOBCTL_STOPPED_BIT	24
+#define JOBCTL_TRACED_BIT	25
+#define JOBCTL_TRACED_FROZEN_BIT 26
+
 #define JOBCTL_STOP_DEQUEUED	(1UL << JOBCTL_STOP_DEQUEUED_BIT)
 #define JOBCTL_STOP_PENDING	(1UL << JOBCTL_STOP_PENDING_BIT)
 #define JOBCTL_STOP_CONSUME	(1UL << JOBCTL_STOP_CONSUME_BIT)
@@ -29,6 +33,10 @@ struct task_struct;
 #define JOBCTL_LISTENING	(1UL << JOBCTL_LISTENING_BIT)
 #define JOBCTL_TRAP_FREEZE	(1UL << JOBCTL_TRAP_FREEZE_BIT)
 
+#define JOBCTL_STOPPED		(1UL << JOBCTL_STOPPED_BIT)
+#define JOBCTL_TRACED		(1UL << JOBCTL_TRACED_BIT)
+#define JOBCTL_TRACED_FROZEN	(1UL << JOBCTL_TRACED_FROZEN_BIT)
+
 #define JOBCTL_TRAP_MASK	(JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
 #define JOBCTL_PENDING_MASK	(JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
 
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 3c8b34876744..3e4a14ed402e 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -294,8 +294,10 @@ static inline int kernel_dequeue_signal(void)
 static inline void kernel_signal_stop(void)
 {
 	spin_lock_irq(&current->sighand->siglock);
-	if (current->jobctl & JOBCTL_STOP_DEQUEUED)
+	if (current->jobctl & JOBCTL_STOP_DEQUEUED) {
+		current->jobctl |= JOBCTL_STOPPED;
 		set_special_state(TASK_STOPPED);
+	}
 	spin_unlock_irq(&current->sighand->siglock);
 
 	schedule();
@@ -437,10 +439,21 @@ extern void signal_wake_up_state(struct task_struct *t, unsigned int state);
 
 static inline void signal_wake_up(struct task_struct *t, bool resume)
 {
+	lockdep_assert_held(t->sighand->siglock);
+
+	if (resume && !(t->jobctl & JOBCTL_TRACED_FROZEN))
+		t->jobctl &= ~(JOBCTL_STOPPED | JOBCTL_TRACED);
+
 	signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0);
 }
+
 static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
 {
+	lockdep_assert_held(t->sighand->siglock);
+
+	if (resume)
+		t->jobctl &= ~JOBCTL_TRACED;
+
 	signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
 }
 
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index ccc4b465775b..626f96d275c7 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -185,7 +185,12 @@ static bool looks_like_a_spurious_pid(struct task_struct *task)
 	return true;
 }
 
-/* Ensure that nothing can wake it up, even SIGKILL */
+/*
+ * Ensure that nothing can wake it up, even SIGKILL
+ *
+ * A task is switched to this state while a ptrace operation is in progress;
+ * such that the ptrace operation is uninterruptible.
+ */
 static bool ptrace_freeze_traced(struct task_struct *task)
 {
 	bool ret = false;
@@ -197,6 +202,7 @@ static bool ptrace_freeze_traced(struct task_struct *task)
 	spin_lock_irq(&task->sighand->siglock);
 	if (task_is_traced(task) && !looks_like_a_spurious_pid(task) &&
 	    !__fatal_signal_pending(task)) {
+		task->jobctl |= JOBCTL_TRACED_FROZEN;
 		WRITE_ONCE(task->__state, __TASK_TRACED);
 		ret = true;
 	}
@@ -218,9 +224,11 @@ static void ptrace_unfreeze_traced(struct task_struct *task)
 	 */
 	spin_lock_irq(&task->sighand->siglock);
 	if (READ_ONCE(task->__state) == __TASK_TRACED) {
-		if (__fatal_signal_pending(task))
+		task->jobctl &= ~JOBCTL_TRACED_FROZEN;
+		if (__fatal_signal_pending(task)) {
+			task->jobctl &= ~JOBCTL_TRACED;
 			wake_up_state(task, __TASK_TRACED);
-		else
+		} else
 			WRITE_ONCE(task->__state, TASK_TRACED);
 	}
 	spin_unlock_irq(&task->sighand->siglock);
@@ -475,8 +483,10 @@ static int ptrace_attach(struct task_struct *task, long request,
 	 * in and out of STOPPED are protected by siglock.
 	 */
 	if (task_is_stopped(task) &&
-	    task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING))
+	    task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) {
+		task->jobctl &= ~JOBCTL_STOPPED;
 		signal_wake_up_state(task, __TASK_STOPPED);
+	}
 
 	spin_unlock(&task->sighand->siglock);
 
diff --git a/kernel/signal.c b/kernel/signal.c
index 30cd1ca43bcd..0aea3f0a8002 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -884,7 +884,7 @@ static int check_kill_permission(int sig, struct kernel_siginfo *info,
 static void ptrace_trap_notify(struct task_struct *t)
 {
 	WARN_ON_ONCE(!(t->ptrace & PT_SEIZED));
-	assert_spin_locked(&t->sighand->siglock);
+	lockdep_assert_held(&t->sighand->siglock);
 
 	task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY);
 	ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING);
@@ -930,9 +930,10 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force)
 		for_each_thread(p, t) {
 			flush_sigqueue_mask(&flush, &t->pending);
 			task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING);
-			if (likely(!(t->ptrace & PT_SEIZED)))
+			if (likely(!(t->ptrace & PT_SEIZED))) {
+				t->jobctl &= ~JOBCTL_STOPPED;
 				wake_up_state(t, __TASK_STOPPED);
-			else
+			} else
 				ptrace_trap_notify(t);
 		}
 
@@ -2219,6 +2220,7 @@ static int ptrace_stop(int exit_code, int why, int clear_code,
 	 * schedule() will not sleep if there is a pending signal that
 	 * can awaken the task.
 	 */
+	current->jobctl |= JOBCTL_TRACED;
 	set_special_state(TASK_TRACED);
 
 	/*
@@ -2460,6 +2462,7 @@ static bool do_signal_stop(int signr)
 		if (task_participate_group_stop(current))
 			notify = CLD_STOPPED;
 
+		current->jobctl |= JOBCTL_STOPPED;
 		set_special_state(TASK_STOPPED);
 		spin_unlock_irq(&current->sighand->siglock);
 

  reply	other threads:[~2022-04-07 12:14 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-02 21:04 [PATCH] ptrace: fix ptrace vs tasklist_lock race on PREEMPT_RT Sebastian Andrzej Siewior
2022-03-14  9:27 ` Sebastian Andrzej Siewior
2022-03-14 18:54 ` Oleg Nesterov
2022-03-15  8:31   ` Sebastian Andrzej Siewior
2022-03-15 14:29     ` Oleg Nesterov
2022-03-16  8:23       ` Sebastian Andrzej Siewior
2022-03-31 14:25       ` [PATCH v2] " Sebastian Andrzej Siewior
2022-04-04 16:13         ` Oleg Nesterov
2022-04-05  8:34           ` Oleg Nesterov
2022-04-05 10:10         ` Peter Zijlstra
2022-04-05 10:29           ` Oleg Nesterov
2022-04-05 11:28             ` Peter Zijlstra
2022-04-07 12:13               ` Peter Zijlstra [this message]
2022-04-07 17:51                 ` Eric W. Biederman
2022-04-07 22:50                 ` Eric W. Biederman
2022-04-08  9:09                   ` Peter Zijlstra
2022-04-08 19:40                     ` Eric W. Biederman
2022-04-08 20:06                       ` Peter Zijlstra
2022-04-11 11:35                         ` Peter Zijlstra
2022-04-11 13:44                           ` Eric W. Biederman
2022-04-11 17:07                             ` Peter Zijlstra
2022-04-12 11:59                               ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220407121340.GA2762@worktop.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=bigeasy@linutronix.de \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=ebiederm@xmission.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=oleg@redhat.com \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=vincent.guittot@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.