All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kirill Tkhai <ktkhai@odin.com>
To: <linux-kernel@vger.kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Ingo Molnar <mingo@redhat.com>,
	"Peter Zijlstra" <peterz@infradead.org>,
	Michal Hocko <mhocko@suse.cz>, "Rik van Riel" <riel@redhat.com>,
	Ionut Alexa <ionut.m.alexa@gmail.com>,
	Peter Hurley <peter@hurleysoftware.com>,
	Kirill Tkhai <tkhai@yandex.ru>
Subject: [PATCH RFC 11/13] exit: Syncronize on kin_lock while do_notify_parent()
Date: Mon, 25 May 2015 20:46:02 +0300	[thread overview]
Message-ID: <1432575962.6866.44.camel@odin.com> (raw)
In-Reply-To: <20150525162722.5171.15901.stgit@pro>

Note: do_wait() (working on tsk) may catch its child, which is being traced
by a thread from tsk's thread group. For proper synchronization, we must
hold both parent and real_parent locks for calling do_notify_parent().

Also delete tasklist_lock dependence in ptrace_attach() etc, because everything
is already synchronized on kin_lock (Due to previous patches).

Signed-off-by: Kirill Tkhai <ktkhai@odin.com>
---
 kernel/exit.c   |   42 ++++++++++++++++++++++++++++--------------
 kernel/ptrace.c |   20 ++++++--------------
 kernel/signal.c |   11 ++---------
 3 files changed, 36 insertions(+), 37 deletions(-)

diff --git a/kernel/exit.c b/kernel/exit.c
index bb9d165..aeded00 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -183,6 +183,7 @@ void release_task(struct task_struct *p)
 	write_parent_and_real_parent_lock(p);
 	ptrace_release_task(p);
 	__exit_signal(p);
+	write_unlock(&tasklist_lock);
 
 	/*
 	 * If we are the last non-leader member of the thread
@@ -203,8 +204,7 @@ void release_task(struct task_struct *p)
 			leader->exit_state = EXIT_DEAD;
 	}
 
-	write_parent_and_real_parent_unlock(p);
-	write_unlock_irq(&tasklist_lock);
+	write_parent_and_real_parent_unlock_irq(p);
 	release_thread(p);
 	call_rcu(&p->rcu, delayed_put_task_struct);
 
@@ -1016,7 +1016,7 @@ static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,
 
 /*
  * Handle sys_wait4 work for one task in state EXIT_ZOMBIE.  We hold
- * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
+ * read_lock(wo->held_lock) on entry.  If we return zero, we still hold
  * the lock and this task is uninteresting.  If we return nonzero, we have
  * released the lock and the system call should return.
  */
@@ -1036,6 +1036,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 
 		get_task_struct(p);
 		read_unlock(wo->held_lock);
+		rcu_read_unlock();
 		sched_annotate_sleep();
 
 		if ((exit_code & 0x7f) == 0) {
@@ -1058,6 +1059,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 	 * We own this thread, nobody else can reap it.
 	 */
 	read_unlock(wo->held_lock);
+	rcu_read_unlock();
 	sched_annotate_sleep();
 
 	/*
@@ -1152,16 +1154,21 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		retval = pid;
 
 	if (state == EXIT_TRACE) {
-		write_lock_irq(&tasklist_lock);
+		struct task_struct *old_parent;
+		write_parent_and_real_parent_lock_irq(p);
+		old_parent = p->parent;
 		/* We dropped tasklist, ptracer could die and untrace */
 		ptrace_unlink(p);
 
+		if (p->parent != old_parent)
+			write_unlock(&old_parent->kin_lock);
+
 		/* If parent wants a zombie, don't release it now */
 		state = EXIT_ZOMBIE;
 		if (do_notify_parent(p, p->exit_signal))
 			state = EXIT_DEAD;
 		p->exit_state = state;
-		write_unlock_irq(&tasklist_lock);
+		write_parent_unlock_irq(p);
 	}
 	if (state == EXIT_DEAD)
 		release_task(p);
@@ -1191,13 +1198,13 @@ static int *task_stopped_code(struct task_struct *p, bool ptrace)
  * Handle sys_wait4() work for %p in state %TASK_STOPPED or %TASK_TRACED.
  *
  * CONTEXT:
- * read_lock(&tasklist_lock), which is released if return value is
+ * read_lock(wo->held_lock), which is released if return value is
  * non-zero.  Also, grabs and releases @p->sighand->siglock.
  *
  * RETURNS:
  * 0 if wait condition didn't exist and search for other wait conditions
  * should continue.  Non-zero return, -errno on failure and @p's pid on
- * success, implies that tasklist_lock is released and wait condition
+ * success, implies that wo->held_lock is released and wait condition
  * search should terminate.
  */
 static int wait_task_stopped(struct wait_opts *wo,
@@ -1241,13 +1248,14 @@ static int wait_task_stopped(struct wait_opts *wo,
 	 * Now we are pretty sure this task is interesting.
 	 * Make sure it doesn't get reaped out from under us while we
 	 * give up the lock and then examine it below.  We don't want to
-	 * keep holding onto the tasklist_lock while we call getrusage and
+	 * keep holding onto wo->held_lock while we call getrusage and
 	 * possibly take page faults for user memory.
 	 */
 	get_task_struct(p);
 	pid = task_pid_vnr(p);
 	why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
 	read_unlock(wo->held_lock);
+	rcu_read_unlock();
 	sched_annotate_sleep();
 
 	if (unlikely(wo->wo_flags & WNOWAIT))
@@ -1281,7 +1289,7 @@ static int wait_task_stopped(struct wait_opts *wo,
 
 /*
  * Handle do_wait work for one task in a live, non-stopped state.
- * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
+ * read_lock(wo->held_lock) on entry.  If we return zero, we still hold
  * the lock and this task is uninteresting.  If we return nonzero, we have
  * released the lock and the system call should return.
  */
@@ -1311,6 +1319,7 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
 	pid = task_pid_vnr(p);
 	get_task_struct(p);
 	read_unlock(wo->held_lock);
+	rcu_read_unlock();
 	sched_annotate_sleep();
 
 	if (!wo->wo_info) {
@@ -1334,7 +1343,7 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
  * Consider @p for a wait by @parent.
  *
  * -ECHILD should be in ->notask_error before the first call.
- * Returns nonzero for a final return, when we have unlocked tasklist_lock.
+ * Returns nonzero for a final return, when we have unlocked wo->held_lock.
  * Returns zero if the search for a child should continue;
  * then ->notask_error is 0 if @p is an eligible child,
  * or another error from security_task_wait(), or still -ECHILD.
@@ -1392,6 +1401,9 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
 		 * If a ptracer wants to distinguish these two events for its
 		 * own children it should create a separate process which takes
 		 * the role of real parent.
+		 *
+		 * Since we use call do_notify_parent() under both parent's and
+		 * real_parent's kin_locks, we are protected from it.
 		 */
 		if (!ptrace_reparented(p))
 			ptrace = 1;
@@ -1460,7 +1472,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
  * Do the work of do_wait() for one thread in the group, @tsk.
  *
  * -ECHILD should be in ->notask_error before the first call.
- * Returns nonzero for a final return, when we have unlocked tasklist_lock.
+ * Returns nonzero for a final return, when we have unlocked wo->held_lock.
  * Returns zero if the search for a child should continue; then
  * ->notask_error is 0 if there were any eligible children,
  * or another error from security_task_wait(), or still -ECHILD.
@@ -1538,9 +1550,10 @@ static long do_wait(struct wait_opts *wo)
 		goto notask;
 
 	set_current_state(TASK_INTERRUPTIBLE);
-	read_lock(&tasklist_lock);
-	wo->held_lock = &tasklist_lock;
+	rcu_read_lock();
 	for_each_thread(current, tsk) {
+		read_lock(&tsk->kin_lock);
+		wo->held_lock = &tsk->kin_lock;
 		retval = do_wait_thread(wo, tsk);
 		if (retval)
 			goto end;
@@ -1548,11 +1561,12 @@ static long do_wait(struct wait_opts *wo)
 		retval = ptrace_do_wait(wo, tsk);
 		if (retval)
 			goto end;
+		read_unlock(&tsk->kin_lock);
 
 		if (wo->wo_flags & __WNOTHREAD)
 			break;
 	}
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 
 notask:
 	retval = wo->notask_error;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 817288d..6785f66 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -180,7 +180,6 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
 	 * we are sure that this is our traced child and that can only
 	 * be changed by us so it's not changing right after this.
 	 */
-	read_lock(&tasklist_lock);
 	read_parent_lock(child);
 	if (child->ptrace && child->parent == current) {
 		WARN_ON(child->state == __TASK_TRACED);
@@ -192,7 +191,6 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
 			ret = 0;
 	}
 	read_parent_unlock(child);
-	read_unlock(&tasklist_lock);
 
 	if (!ret && !ignore_state) {
 		if (!wait_task_inactive(child, __TASK_TRACED)) {
@@ -314,8 +312,7 @@ static int ptrace_attach(struct task_struct *task, long request,
 	if (retval)
 		goto unlock_creds;
 
-	write_lock_irq(&tasklist_lock);
-	write_parent_and_given_lock(task, current);
+	write_parent_and_given_lock_irq(task, current);
 	old_parent = task->parent;
 	retval = -EPERM;
 	if (unlikely(task->exit_state) || task->ptrace) {
@@ -365,8 +362,7 @@ static int ptrace_attach(struct task_struct *task, long request,
 
 	retval = 0;
 unlock_current:
-	write_unlock(&current->kin_lock);
-	write_unlock_irq(&tasklist_lock);
+	write_unlock_irq(&current->kin_lock);
 unlock_creds:
 	mutex_unlock(&task->signal->cred_guard_mutex);
 out:
@@ -389,8 +385,7 @@ static int ptrace_traceme(void)
 {
 	int ret = -EPERM;
 
-	write_lock_irq(&tasklist_lock);
-	write_parent_lock(current);
+	write_parent_lock_irq(current);
 	/* Are we already being traced? */
 	if (!current->ptrace) {
 		ret = security_ptrace_traceme(current->parent);
@@ -405,8 +400,7 @@ static int ptrace_traceme(void)
 			__ptrace_link(current, current->real_parent);
 		}
 	}
-	write_parent_unlock(current);
-	write_unlock_irq(&tasklist_lock);
+	write_parent_unlock_irq(current);
 
 	return ret;
 }
@@ -474,8 +468,7 @@ static int ptrace_detach(struct task_struct *child, unsigned int data)
 	ptrace_disable(child);
 	clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 
-	write_lock_irq(&tasklist_lock);
-	write_parent_and_real_parent_lock(child);
+	write_parent_and_real_parent_lock_irq(child);
 	old_parent = child->parent;
 	/*
 	 * We rely on ptrace_freeze_traced(). It can't be killed and
@@ -490,8 +483,7 @@ static int ptrace_detach(struct task_struct *child, unsigned int data)
 	__ptrace_detach(current, child);
 	if (old_parent != child->real_parent)
 		write_unlock(&old_parent->kin_lock);
-	write_real_parent_unlock(child);
-	write_unlock_irq(&tasklist_lock);
+	write_real_parent_unlock_irq(child);
 
 	proc_ptrace_connector(child, PTRACE_DETACH);
 
diff --git a/kernel/signal.c b/kernel/signal.c
index 8288019..2e7b622 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1732,6 +1732,8 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
 	struct sighand_struct *sighand;
 	cputime_t utime, stime;
 
+	BUG_ON(!same_thread_group(tsk, current));
+
 	if (for_ptracer) {
 		parent = tsk->parent;
 	} else {
@@ -1881,7 +1883,6 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
 	task_clear_jobctl_trapping(current);
 
 	spin_unlock_irq(&current->sighand->siglock);
-	read_lock(&tasklist_lock);
 	read_parent_and_real_parent_lock(current);
 	if (may_ptrace_stop()) {
 		/*
@@ -1906,7 +1907,6 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
 		 */
 		preempt_disable();
 		read_parent_and_real_parent_unlock(current);
-		read_unlock(&tasklist_lock);
 		preempt_enable_no_resched();
 		freezable_schedule();
 	} else {
@@ -1928,7 +1928,6 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
 		if (clear_code)
 			current->exit_code = 0;
 		read_parent_and_real_parent_unlock(current);
-		read_unlock(&tasklist_lock);
 	}
 
 	/*
@@ -2081,11 +2080,9 @@ static bool do_signal_stop(int signr)
 		 * TASK_TRACED.
 		 */
 		if (notify) {
-			read_lock(&tasklist_lock);
 			read_parent_and_real_parent_lock(current);
 			do_notify_parent_cldstop(current, false, notify);
 			read_parent_and_real_parent_unlock(current);
-			read_unlock(&tasklist_lock);
 		}
 
 		/* Now we don't run again until woken by SIGCONT or SIGKILL */
@@ -2229,7 +2226,6 @@ int get_signal(struct ksignal *ksig)
 		 * the ptracer of the group leader too unless it's gonna be
 		 * a duplicate.
 		 */
-		read_lock(&tasklist_lock);
 		read_parent_and_real_parent_lock(current->group_leader);
 		do_notify_parent_cldstop(current, false, why);
 
@@ -2237,7 +2233,6 @@ int get_signal(struct ksignal *ksig)
 			do_notify_parent_cldstop(current->group_leader,
 						true, why);
 		read_parent_and_real_parent_unlock(current->group_leader);
-		read_unlock(&tasklist_lock);
 
 		goto relock;
 	}
@@ -2482,11 +2477,9 @@ void exit_signals(struct task_struct *tsk)
 	 * should always go to the real parent of the group leader.
 	 */
 	if (unlikely(group_stop)) {
-		read_lock(&tasklist_lock);
 		read_parent_and_real_parent_lock(tsk);
 		do_notify_parent_cldstop(tsk, false, group_stop);
 		read_parent_and_real_parent_unlock(tsk);
-		read_unlock(&tasklist_lock);
 	}
 }
 




  parent reply	other threads:[~2015-05-25 17:46 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20150525162722.5171.15901.stgit@pro>
2015-05-25 17:44 ` [PATCH RFC 01/13] exit: Clarify choice of new parent in forget_original_parent() Kirill Tkhai
2015-05-25 17:44 ` [PATCH RFC 02/13] rwlock_t: Implement double_write_{,un}lock() Kirill Tkhai
2015-05-25 17:44 ` [PATCH RFC 03/13] pid_ns: Implement rwlock_t pid_ns::cr_lock for locking child_reaper Kirill Tkhai
2015-05-25 17:44 ` [PATCH RFC 04/13] exit: Small refactoring mm_update_next_owner() Kirill Tkhai
2015-05-25 17:45 ` [PATCH RFC 05/13] fs: Refactoring in get_children_pid() Kirill Tkhai
2015-05-25 17:45 ` [PATCH RFC 06/13] core: Add rwlock_t task_list::kin_lock Kirill Tkhai
2015-05-25 17:45 ` [PATCH RFC 07/13] kin_lock: Implement helpers for kin_lock locking Kirill Tkhai
2015-05-25 17:45 ` [PATCH RFC 08/13] core: Use kin_lock synchronizations between parent and child and for thread group Kirill Tkhai
2015-05-25 17:45 ` [PATCH RFC 09/13] exit: Use for_each_thread() in do_wait() Kirill Tkhai
2015-05-26 19:46   ` Oleg Nesterov
2015-05-27  9:33     ` Kirill Tkhai
2015-05-27  9:42       ` Kirill Tkhai
2015-05-25 17:45 ` [PATCH RFC 10/13] exit: Add struct wait_opts's member held_lock and use it for tasklist_lock Kirill Tkhai
2015-05-25 17:46 ` Kirill Tkhai [this message]
2015-05-25 17:46 ` [PATCH RFC 12/13] exit: Delete write dependence on tasklist_lock in exit_notify() Kirill Tkhai
2015-05-25 17:46 ` [PATCH RFC 13/13] core: Nest tasklist_lock into task_struct::kin_lock Kirill Tkhai

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1432575962.6866.44.camel@odin.com \
    --to=ktkhai@odin.com \
    --cc=akpm@linux-foundation.org \
    --cc=ionut.m.alexa@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mhocko@suse.cz \
    --cc=mingo@redhat.com \
    --cc=oleg@redhat.com \
    --cc=peter@hurleysoftware.com \
    --cc=peterz@infradead.org \
    --cc=riel@redhat.com \
    --cc=tkhai@yandex.ru \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.