From: Oleg Nesterov <oleg@redhat.com>
To: Roland McGrath <roland@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Stanislaw Gruszka <sgruszka@redhat.com>,
Vitaly Mayatskikh <vmayatsk@redhat.com>,
linux-kernel@vger.kernel.org
Subject: [rfc] ptrace: wait_task_zombie: fix the racy EXIT_ZOMBIE setting
Date: Wed, 17 Jun 2009 21:48:13 +0200 [thread overview]
Message-ID: <20090617194813.GA26428@redhat.com> (raw)
In-Reply-To: <20090616004510.2885FFC3D3@magilla.sf.frob.com>
I didn't try to test this patch, just for early review. I think we need
a helper or two for "if (noreap)" branches.
>From now EXIT_DEAD really means the task is dead and should be ignored.
Fixes the race with ->real_parent doing do_wait().
Problem: if we untrace but do not set EXIT_DEAD, ->real_parent can release
the task before getrusage(). In that case k_getrusage() silently fails, and
we fill ->wo_rusage with zeros.
Do you see any solution? Or can we ignore this minor (I think) problem?
On 06/15, Roland McGrath wrote:
>
> ACK, but I think it warrants a comment explaining that task_detached() here
> always means "ptrace'd but not reparented".
Yes. And the name of "int traced" is very bad and confusing.
But I am wondering, shouldn't we always untrace the traced chils, regardless
of ptrace_reparented() ? This looks more clean to me.
Oleg.
--- PTRACE/kernel/exit.c~3_ZOMBIE_DEAD 2009-06-15 23:06:45.000000000 +0200
+++ PTRACE/kernel/exit.c 2009-06-17 21:24:23.000000000 +0200
@@ -1153,7 +1153,7 @@ static int wait_noreap_copyout(struct wa
static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
{
unsigned long state;
- int retval, status, traced;
+ int retval, status, noreap;
pid_t pid = task_pid_vnr(p);
uid_t uid = __task_cred(p)->uid;
struct siginfo __user *infop;
@@ -1161,11 +1161,12 @@ static int wait_task_zombie(struct wait_
if (!likely(wo->wo_flags & WEXITED))
return 0;
+ get_task_struct(p);
+
if (unlikely(wo->wo_flags & WNOWAIT)) {
int exit_code = p->exit_code;
int why, status;
- get_task_struct(p);
read_unlock(&tasklist_lock);
if ((exit_code & 0x7f) == 0) {
why = CLD_EXITED;
@@ -1177,84 +1178,105 @@ static int wait_task_zombie(struct wait_
return wait_noreap_copyout(wo, p, pid, uid, why, status);
}
- /*
- * Try to move the task's state to DEAD
- * only one thread is allowed to do this:
- */
- state = xchg(&p->exit_state, EXIT_DEAD);
- if (state != EXIT_ZOMBIE) {
- BUG_ON(state != EXIT_DEAD);
- return 0;
- }
-
- traced = ptrace_reparented(p);
+ status = (p->signal->flags & SIGNAL_GROUP_EXIT)
+ ? p->signal->group_exit_code : p->exit_code;
- if (likely(!traced) && likely(!task_detached(p))) {
- struct signal_struct *psig;
- struct signal_struct *sig;
+ noreap = ptrace_reparented(p);
+ if (unlikely(noreap)) {
+ read_unlock(&tasklist_lock);
+ retval = -EAGAIN;
+ write_lock_irq(&tasklist_lock);
+ if (task_ptrace(p)) {
+ BUG_ON(p->exit_state != EXIT_ZOMBIE);
+ __ptrace_unlink(p);
+ /*
+ * If this is not a detached task, notify the parent.
+ * If it's still not detached after that, don't release
+ * it now.
+ */
+ if (!task_detached(p))
+ do_notify_parent(p, p->exit_signal);
+ if (task_detached(p)) {
+ p->exit_state = EXIT_DEAD;
+ noreap = 0;
+ }
+ retval = 0;
+ }
+ write_unlock_irq(&tasklist_lock);
+ if (unlikely(retval))
+ goto out;
+ } else {
/*
- * The resource counters for the group leader are in its
- * own task_struct. Those for dead threads in the group
- * are in its signal_struct, as are those for the child
- * processes it has previously reaped. All these
- * accumulate in the parent's signal_struct c* fields.
- *
- * We don't bother to take a lock here to protect these
- * p->signal fields, because they are only touched by
- * __exit_signal, which runs with tasklist_lock
- * write-locked anyway, and so is excluded here. We do
- * need to protect the access to parent->signal fields,
- * as other threads in the parent group can be right
- * here reaping other children at the same time.
+ * Try to move the task's state to DEAD
+ * only one thread is allowed to do this:
*/
- spin_lock_irq(&p->real_parent->sighand->siglock);
- psig = p->real_parent->signal;
- sig = p->signal;
- psig->cutime =
- cputime_add(psig->cutime,
- cputime_add(p->utime,
- cputime_add(sig->utime,
- sig->cutime)));
- psig->cstime =
- cputime_add(psig->cstime,
- cputime_add(p->stime,
- cputime_add(sig->stime,
- sig->cstime)));
- psig->cgtime =
- cputime_add(psig->cgtime,
- cputime_add(p->gtime,
- cputime_add(sig->gtime,
- sig->cgtime)));
- psig->cmin_flt +=
- p->min_flt + sig->min_flt + sig->cmin_flt;
- psig->cmaj_flt +=
- p->maj_flt + sig->maj_flt + sig->cmaj_flt;
- psig->cnvcsw +=
- p->nvcsw + sig->nvcsw + sig->cnvcsw;
- psig->cnivcsw +=
- p->nivcsw + sig->nivcsw + sig->cnivcsw;
- psig->cinblock +=
- task_io_get_inblock(p) +
- sig->inblock + sig->cinblock;
- psig->coublock +=
- task_io_get_oublock(p) +
- sig->oublock + sig->coublock;
- task_io_accounting_add(&psig->ioac, &p->ioac);
- task_io_accounting_add(&psig->ioac, &sig->ioac);
- spin_unlock_irq(&p->real_parent->sighand->siglock);
- }
+ state = xchg(&p->exit_state, EXIT_DEAD);
+ if (unlikely(state != EXIT_ZOMBIE)) {
+ BUG_ON(state != EXIT_DEAD);
+ goto out;
+ }
- /*
- * Now we are sure this task is interesting, and no other
- * thread can reap it because we set its state to EXIT_DEAD.
- */
- read_unlock(&tasklist_lock);
+ if (likely(!task_detached(p))) {
+ struct signal_struct *psig;
+ struct signal_struct *sig;
+
+ /*
+ * The resource counters for the group leader are in its
+ * own task_struct. Those for dead threads in the group
+ * are in its signal_struct, as are those for the child
+ * processes it has previously reaped. All these
+ * accumulate in the parent's signal_struct c* fields.
+ *
+ * We don't bother to take a lock here to protect these
+ * p->signal fields, because they are only touched by
+ * __exit_signal, which runs with tasklist_lock
+ * write-locked anyway, and so is excluded here. We do
+ * need to protect the access to parent->signal fields,
+ * as other threads in the parent group can be right
+ * here reaping other children at the same time.
+ */
+ spin_lock_irq(&p->real_parent->sighand->siglock);
+ psig = p->real_parent->signal;
+ sig = p->signal;
+ psig->cutime =
+ cputime_add(psig->cutime,
+ cputime_add(p->utime,
+ cputime_add(sig->utime,
+ sig->cutime)));
+ psig->cstime =
+ cputime_add(psig->cstime,
+ cputime_add(p->stime,
+ cputime_add(sig->stime,
+ sig->cstime)));
+ psig->cgtime =
+ cputime_add(psig->cgtime,
+ cputime_add(p->gtime,
+ cputime_add(sig->gtime,
+ sig->cgtime)));
+ psig->cmin_flt +=
+ p->min_flt + sig->min_flt + sig->cmin_flt;
+ psig->cmaj_flt +=
+ p->maj_flt + sig->maj_flt + sig->cmaj_flt;
+ psig->cnvcsw +=
+ p->nvcsw + sig->nvcsw + sig->cnvcsw;
+ psig->cnivcsw +=
+ p->nivcsw + sig->nivcsw + sig->cnivcsw;
+ psig->cinblock +=
+ task_io_get_inblock(p) +
+ sig->inblock + sig->cinblock;
+ psig->coublock +=
+ task_io_get_oublock(p) +
+ sig->oublock + sig->coublock;
+ task_io_accounting_add(&psig->ioac, &p->ioac);
+ task_io_accounting_add(&psig->ioac, &sig->ioac);
+ spin_unlock_irq(&p->real_parent->sighand->siglock);
+ }
+ read_unlock(&tasklist_lock);
+ }
retval = wo->wo_rusage
? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
- status = (p->signal->flags & SIGNAL_GROUP_EXIT)
- ? p->signal->group_exit_code : p->exit_code;
if (!retval && wo->wo_stat)
retval = put_user(status, wo->wo_stat);
@@ -1284,27 +1306,10 @@ static int wait_task_zombie(struct wait_
if (!retval)
retval = pid;
- if (traced) {
- write_lock_irq(&tasklist_lock);
- /* We dropped tasklist, ptracer could die and untrace */
- ptrace_unlink(p);
- /*
- * If this is not a detached task, notify the parent.
- * If it's still not detached after that, don't release
- * it now.
- */
- if (!task_detached(p)) {
- do_notify_parent(p, p->exit_signal);
- if (!task_detached(p)) {
- p->exit_state = EXIT_ZOMBIE;
- p = NULL;
- }
- }
- write_unlock_irq(&tasklist_lock);
- }
- if (p != NULL)
+ if (likely(!noreap))
release_task(p);
-
+out:
+ put_task_struct(p);
return retval;
}
@@ -1587,8 +1592,11 @@ repeat:
goto end;
retval = ptrace_do_wait(wo, tsk);
- if (retval)
+ if (retval) {
+ if (unlikely(retval == -EAGAIN))
+ goto repeat;
goto end;
+ }
if (wo->wo_flags & __WNOTHREAD)
break;
next prev parent reply other threads:[~2009-06-17 19:53 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-06-15 21:26 [PATCH 1/2] ptrace: wait_task_zombie: do not account traced sub-threads Oleg Nesterov
2009-06-16 0:45 ` Roland McGrath
2009-06-17 19:48 ` Oleg Nesterov [this message]
2009-06-18 18:47 ` [PATCH] ptrace-wait_task_zombie-do-not-account-traced-sub-threads-fix Oleg Nesterov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090617194813.GA26428@redhat.com \
--to=oleg@redhat.com \
--cc=a.p.zijlstra@chello.nl \
--cc=akpm@linux-foundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=roland@redhat.com \
--cc=sgruszka@redhat.com \
--cc=vmayatsk@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.