public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 3/4] coredump: kill ptrace related stuff
@ 2006-04-06 22:06 Oleg Nesterov
  2006-04-10  4:54 ` Roland McGrath
  0 siblings, 1 reply; 5+ messages in thread
From: Oleg Nesterov @ 2006-04-06 22:06 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Eric W. Biederman, Ingo Molnar, Paul E. McKenney, Roland McGrath,
	linux-kernel

With this patch zap_process() sets SIGNAL_GROUP_EXIT while sending
SIGKILL to the thread group. This means that a TASK_TRACED task

	1. Will be awakened by signal_wake_up(1)

	2. Can't sleep again via ptrace_notify()

	3. Can't go to do_signal_stop() after return
	   from ptrace_stop() in get_signal_to_deliver()

So we can remove all ptrace related stuff from coredump path.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>

--- rc1/fs/exec.c~3_PTRACE	2006-04-06 15:19:33.000000000 +0400
+++ rc1/fs/exec.c	2006-04-06 19:56:36.000000000 +0400
@@ -1384,12 +1384,14 @@ static void format_corename(char *corena
 	*out_ptr = 0;
 }
 
-static void zap_process(struct task_struct *start, int *ptraced)
+static void zap_process(struct task_struct *start)
 {
 	struct task_struct *t;
 	unsigned long flags;
 
 	spin_lock_irqsave(&start->sighand->siglock, flags);
+	start->signal->flags = SIGNAL_GROUP_EXIT;
+	start->signal->group_stop_count = 0;
 
 	t = start;
 	do {
@@ -1397,22 +1399,17 @@ static void zap_process(struct task_stru
 			t->mm->core_waiters++;
 			sigaddset(&t->pending.signal, SIGKILL);
 			signal_wake_up(t, 1);
-
-			if (unlikely(t->ptrace) &&
-			    unlikely(t->parent->mm == t->mm))
-				*ptraced = 1;
 		}
 	} while ((t = next_thread(t)) != start);
 
 	spin_unlock_irqrestore(&start->sighand->siglock, flags);
 }
 
-static void zap_threads (struct mm_struct *mm)
+static void zap_threads(struct mm_struct *mm)
 {
 	struct task_struct *g, *p;
 	struct task_struct *tsk = current;
 	struct completion *vfork_done = tsk->vfork_done;
-	int traced = 0;
 
 	/*
 	 * Make sure nobody is waiting for us to release the VM,
@@ -1429,29 +1426,12 @@ static void zap_threads (struct mm_struc
 		do {
 			if (p->mm) {
 				if (p->mm == mm)
-					zap_process(p, &traced);
+					zap_process(p);
 				break;
 			}
 		} while ((p = next_thread(p)) != g);
 	}
 	read_unlock(&tasklist_lock);
-
-	if (unlikely(traced)) {
-		/*
-		 * We are zapping a thread and the thread it ptraces.
-		 * If the tracee went into a ptrace stop for exit tracing,
-		 * we could deadlock since the tracer is waiting for this
-		 * coredump to finish.  Detach them so they can both die.
-		 */
-		write_lock_irq(&tasklist_lock);
-		do_each_thread(g,p) {
-			if (mm == p->mm && p != tsk &&
-			    p->ptrace && p->parent->mm == mm) {
-				__ptrace_detach(p, 0);
-			}
-		} while_each_thread(g,p);
-		write_unlock_irq(&tasklist_lock);
-	}
 }
 
 static void coredump_wait(struct mm_struct *mm)
--- rc1/include/linux/ptrace.h~3_PTRACE	2006-03-01 21:57:22.000000000 +0300
+++ rc1/include/linux/ptrace.h	2006-04-06 16:37:54.000000000 +0400
@@ -84,7 +84,6 @@ extern int ptrace_readdata(struct task_s
 extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
 extern int ptrace_attach(struct task_struct *tsk);
 extern int ptrace_detach(struct task_struct *, unsigned int);
-extern void __ptrace_detach(struct task_struct *, unsigned int);
 extern void ptrace_disable(struct task_struct *);
 extern int ptrace_check_attach(struct task_struct *task, int kill);
 extern int ptrace_request(struct task_struct *child, long request, long addr, long data);
--- rc1/kernel/ptrace.c~3_PTRACE	2006-04-03 16:21:26.000000000 +0400
+++ rc1/kernel/ptrace.c	2006-04-06 16:41:41.000000000 +0400
@@ -183,7 +183,7 @@ bad:
 	return retval;
 }
 
-void __ptrace_detach(struct task_struct *child, unsigned int data)
+static inline void __ptrace_detach(struct task_struct *child, unsigned int data)
 {
 	child->exit_code = data;
 	/* .. re-parent .. */
@@ -202,8 +202,7 @@ int ptrace_detach(struct task_struct *ch
 	ptrace_disable(child);
 
 	write_lock_irq(&tasklist_lock);
-	if (child->ptrace)
-		__ptrace_detach(child, data);
+	__ptrace_detach(child, data);
 	write_unlock_irq(&tasklist_lock);
 
 	return 0;


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 3/4] coredump: kill ptrace related stuff
  2006-04-06 22:06 [PATCH 3/4] coredump: kill ptrace related stuff Oleg Nesterov
@ 2006-04-10  4:54 ` Roland McGrath
  2006-04-10 13:35   ` Oleg Nesterov
  0 siblings, 1 reply; 5+ messages in thread
From: Roland McGrath @ 2006-04-10  4:54 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andrew Morton, Eric W. Biederman, Ingo Molnar, Paul E. McKenney,
	linux-kernel

> With this patch zap_process() sets SIGNAL_GROUP_EXIT while sending
> SIGKILL to the thread group. 

do_coredump has already done this.  So you are addressing the case of other
thread groups sharing the mm, right?

> This means that a TASK_TRACED task
> 
> 	1. Will be awakened by signal_wake_up(1)

That should always happen regardless of signal->flags, so yes.

> 	2. Can't sleep again via ptrace_notify()

What makes this be so?  What if it's entering a notification event now?
What about exit tracing?

> 	3. Can't go to do_signal_stop() after return
> 	   from ptrace_stop() in get_signal_to_deliver()

This is only true because of the check in get_signal_to_deliver,
which I've said I think should be taken out for other reasons.

I guess I'm missing something here.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 3/4] coredump: kill ptrace related stuff
  2006-04-10  4:54 ` Roland McGrath
@ 2006-04-10 13:35   ` Oleg Nesterov
  2006-04-11  1:21     ` Roland McGrath
  0 siblings, 1 reply; 5+ messages in thread
From: Oleg Nesterov @ 2006-04-10 13:35 UTC (permalink / raw)
  To: Roland McGrath
  Cc: Andrew Morton, Eric W. Biederman, Ingo Molnar, Paul E. McKenney,
	linux-kernel

On 04/09, Roland McGrath wrote:
>
> > With this patch zap_process() sets SIGNAL_GROUP_EXIT while sending
> > SIGKILL to the thread group.
>
> do_coredump has already done this.  So you are addressing the case of other
> thread groups sharing the mm, right?

Yes,

> > This means that a TASK_TRACED task
> >
> > 	1. Will be awakened by signal_wake_up(1)
>
> That should always happen regardless of signal->flags, so yes.
>
> > 	2. Can't sleep again via ptrace_notify()
>
> What makes this be so?  What if it's entering a notification event now?
> What about exit tracing?

It turns out I misread SIGNAL_GROUP_EXIT check in ptrace_stop(),
didn't notice '(->parent->signal != current->signal) ||' before
it.

You are right, this is a problem, I need to think about it.

Do you see any solution which doesn't need tasklist_lock to be
held while traversing global process list?

> > 	3. Can't go to do_signal_stop() after return
> > 	   from ptrace_stop() in get_signal_to_deliver()
>
> This is only true because of the check in get_signal_to_deliver,
> which I've said I think should be taken out for other reasons.

Yes, changelog refers to SIGNAL_GROUP_EXIT check in get_signal_to_deliver.
However, do_signal_stop() returns 0 when it doesn't see SIGNAL_STOP_DEQUEUED,
(which was cleared by SIGNAL_GROUP_EXIT), so I think we don't depend on
SIGNAL_GROUP_EXIT check in get_signal_to_deliver. No?

Btw, I don't understand Andrea's patch (and changelog) too.

Oleg.


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 3/4] coredump: kill ptrace related stuff
  2006-04-10 13:35   ` Oleg Nesterov
@ 2006-04-11  1:21     ` Roland McGrath
  2006-04-13 12:58       ` Oleg Nesterov
  0 siblings, 1 reply; 5+ messages in thread
From: Roland McGrath @ 2006-04-11  1:21 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andrew Morton, Eric W. Biederman, Ingo Molnar, Paul E. McKenney,
	linux-kernel

> It turns out I misread SIGNAL_GROUP_EXIT check in ptrace_stop(),
> didn't notice '(->parent->signal != current->signal) ||' before
> it.

I thought that might have been it.

> Do you see any solution which doesn't need tasklist_lock to be
> held while traversing global process list?

Eh, kind of, but I'm not sure I want to get into it.  This only comes up in
a pathological case and we don't actually take the lock unless the weird
case really happened.  My inclination is to get the rest of the cleanups
and optimizations ironed out and merged in first.  Then we can revisit this
oddball case later on.

> > > 	3. Can't go to do_signal_stop() after return
> > > 	   from ptrace_stop() in get_signal_to_deliver()
> >
> > This is only true because of the check in get_signal_to_deliver,
> > which I've said I think should be taken out for other reasons.
>
> Yes, changelog refers to SIGNAL_GROUP_EXIT check in get_signal_to_deliver.
> However, do_signal_stop() returns 0 when it doesn't see SIGNAL_STOP_DEQUEUED,
> (which was cleared by SIGNAL_GROUP_EXIT), so I think we don't depend on
> SIGNAL_GROUP_EXIT check in get_signal_to_deliver. No?

Ah yes, you are right.  So there is no conflict with removing the check as
I want to do.


Thanks,
Roland

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 3/4] coredump: kill ptrace related stuff
  2006-04-11  1:21     ` Roland McGrath
@ 2006-04-13 12:58       ` Oleg Nesterov
  0 siblings, 0 replies; 5+ messages in thread
From: Oleg Nesterov @ 2006-04-13 12:58 UTC (permalink / raw)
  To: Roland McGrath
  Cc: Andrew Morton, Eric W. Biederman, Ingo Molnar, Paul E. McKenney,
	linux-kernel

On 04/10, Roland McGrath wrote:
>
> > It turns out I misread SIGNAL_GROUP_EXIT check in ptrace_stop(),
> > didn't notice '(->parent->signal != current->signal) ||' before
> > it.
> 
> I thought that might have been it.
> 
> > Do you see any solution which doesn't need tasklist_lock to be
> > held while traversing global process list?
> 
> Eh, kind of, but I'm not sure I want to get into it.  This only comes up in
> a pathological case and we don't actually take the lock unless the weird
> case really happened.  My inclination is to get the rest of the cleanups
> and optimizations ironed out and merged in first.  Then we can revisit this
> oddball case later on.

The main optimization is avoiding tasklist_lock. But we can't reintroduce
'if (p->ptrace && p->parent->mm == mm)' check without tasklist_lock.

Roland, could you please look at the patch below and ack/nack it ?
This patch is soooooooo ugly, but:

	It is very simple.

	It (as I hope) fixes all coredump vs ptrace problems, those
	which current code has and those which were added by me.

	It allows us to avoid tasklist_lock.

	Since the locking in ptrace_attch() likely to be changed soon,
	it is unclear to me what could be done as "right thing" now.

Oleg.

--- MM/include/linux/sched.h~1_PTFIX	2006-04-13 16:06:19.000000000 +0400
+++ MM/include/linux/sched.h	2006-04-13 16:06:32.000000000 +0400
@@ -466,6 +466,7 @@ struct signal_struct {
 #define SIGNAL_STOP_DEQUEUED	0x00000002 /* stop signal dequeued */
 #define SIGNAL_STOP_CONTINUED	0x00000004 /* SIGCONT since WCONTINUED reap */
 #define SIGNAL_GROUP_EXIT	0x00000008 /* group exit in progress */
+#define SIGNAL_GROUP_COREDUMP	0x00000010 /* coredump in progress */
 
 
 /*
--- MM/fs/exec.c~1_PTFIX	2006-04-09 03:52:03.000000000 +0400
+++ MM/fs/exec.c	2006-04-13 16:16:27.000000000 +0400
@@ -1384,7 +1384,7 @@ static void zap_process(struct task_stru
 {
 	struct task_struct *t;
 
-	start->signal->flags = SIGNAL_GROUP_EXIT;
+	start->signal->flags = SIGNAL_GROUP_EXIT | SIGNAL_GROUP_COREDUMP;
 	start->signal->group_stop_count = 0;
 
 	t = start;
--- MM/kernel/signal.c~1_PTFIX	2006-03-25 20:18:38.000000000 +0300
+++ MM/kernel/signal.c	2006-04-13 16:40:49.000000000 +0400
@@ -1545,6 +1545,34 @@ static void do_notify_parent_cldstop(str
  * If we actually decide not to stop at all because the tracer is gone,
  * we leave nostop_code in current->exit_code.
  */
+static inline int may_ptrace_stop(void)
+{
+	if (!likely(current->ptrace & PT_PTRACED))
+		return 0;
+
+	if (unlikely(current->parent == current->real_parent &&
+		    (current->ptrace & PT_ATTACHED)))
+		return 0;
+
+	// Copied from ptrace_stop(), seems to be unneeded
+
+	if ((unlikely(current->parent->signal == current->signal) &&
+	     unlikely(current->signal->flags & SIGNAL_GROUP_EXIT)))
+		return 0;
+
+	// ... Fat comment is needed here ...
+
+	// This check '->flags & SIGNAL_GROUP_COREDUMP' is racy.
+	// But if this flag was set after spin_unlock(->siglock)
+	// zap_process() will wake up this task anyway.
+
+	if ((unlikely(current->parent->mm == current->mm) &&
+	     unlikely(current->signal->flags & SIGNAL_GROUP_COREDUMP)))
+		return 0;
+
+	return 1;
+}
+
 static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info)
 {
 	/*
@@ -1561,11 +1589,7 @@ static void ptrace_stop(int exit_code, i
 	set_current_state(TASK_TRACED);
 	spin_unlock_irq(&current->sighand->siglock);
 	read_lock(&tasklist_lock);
-	if (likely(current->ptrace & PT_PTRACED) &&
-	    likely(current->parent != current->real_parent ||
-		   !(current->ptrace & PT_ATTACHED)) &&
-	    (likely(current->parent->signal != current->signal) ||
-	     !unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) {
+	if (may_ptrace_stop()) {
 		do_notify_parent_cldstop(current, CLD_TRAPPED);
 		read_unlock(&tasklist_lock);
 		schedule();


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2006-04-13  9:01 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-04-06 22:06 [PATCH 3/4] coredump: kill ptrace related stuff Oleg Nesterov
2006-04-10  4:54 ` Roland McGrath
2006-04-10 13:35   ` Oleg Nesterov
2006-04-11  1:21     ` Roland McGrath
2006-04-13 12:58       ` Oleg Nesterov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox