From: Oleg Nesterov <oleg@redhat.com>
To: Linus Torvalds <torvalds@linux-foundation.org>,
Roland McGrath <roland@hack.frob.com>, Tejun Heo <tj@kernel.org>
Cc: Denys Vlasenko <dvlasenk@redhat.com>,
KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>,
Matt Fleming <matt.fleming@linux.intel.com>,
linux-kernel@vger.kernel.org
Subject: [PATCH 6/8] vfork: do not setup child->vfork_done beforehand
Date: Wed, 27 Jul 2011 18:33:45 +0200 [thread overview]
Message-ID: <20110727163345.GF23793@redhat.com> (raw)
In-Reply-To: <20110727163159.GA23785@redhat.com>
do_fork() allocates/initializes "struct completion" on stack and
sets p->vfork_done in advance, before the task becomes runnable.
This way clone_vfork_finish(p) can always trust ->vfork_done.
But, to make this restartable, we need to re-assign ->vfork_done
and wait in sys_restart_syscall(), and we must not do this if the
child has already passed complete_vfork_done().
With this patch we allocate/initialize and set child->vfork_done in
clone_vfork_finish() when we are going to actually wait. To prevent
the race with the child we use the fake VFORK_DONE_NOP marker set by
do_fork().
complete_vfork_done() does nothing if it sees VFORK_DONE_NOP but sets
->vfork_done = NULL.
This way clone_vfork_finish() can do xchg() and if it returns !NULL
we can be sure the child didn't exit yet, we can safely wait.
IOW, the logic is:
- do_fork(CLONE_VFORK)
child->vfork_done = VFORK_DONE_NOP; // != NULL
- complete_vfork_done() // child
vfork = xchg(tsk->vfork_done, NULL);
- clone_vfork_finish() // parent
struct completion vfork_done;
if (xchg(&child->vfork_done, &vfork_done) != NULL)
wait_for_completion(&vfork_done);
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
kernel/fork.c | 31 ++++++++++++++++++++-----------
1 file changed, 20 insertions(+), 11 deletions(-)
--- 3.1/kernel/fork.c~6_no_vfork_done 2011-07-26 20:28:24.000000000 +0200
+++ 3.1/kernel/fork.c 2011-07-26 21:13:56.000000000 +0200
@@ -1443,28 +1443,40 @@ struct task_struct * __cpuinit fork_idle
return task;
}
+#define VFORK_DONE_NOP ((struct completion *) 1)
+#define vfork_done_valid(vf) ((unsigned long)(vf) > 1)
+
static void complete_vfork_done(struct task_struct *tsk)
{
struct completion *vfork_done = xchg(&tsk->vfork_done, NULL);
- if (vfork_done)
+ if (vfork_done_valid(vfork_done))
complete(vfork_done);
}
-static long clone_vfork_finish(struct task_struct *child,
- struct completion *vfork_done, long pid)
+static long clone_vfork_finish(struct task_struct *child, long pid)
{
- int killed = wait_for_completion_killable(vfork_done);
+ struct completion vfork_done;
+ int killed;
+
+ init_completion(&vfork_done);
+ /* complete_vfork_done() was already called? */
+ if (xchg(&child->vfork_done, &vfork_done) == NULL)
+ goto done;
+
+ killed = wait_for_completion_killable(&vfork_done);
if (killed) {
- struct completion *steal = xchg(&child->vfork_done, NULL);
+ struct completion *steal = xchg(&child->vfork_done,
+ VFORK_DONE_NOP);
/* if we race with complete_vfork_done() we have to wait */
if (unlikely(!steal))
- wait_for_completion(vfork_done);
+ wait_for_completion(&vfork_done);
return -EINTR;
}
+done:
ptrace_event(PTRACE_EVENT_VFORK_DONE, pid);
return pid;
}
@@ -1526,8 +1538,6 @@ long do_fork(unsigned long clone_flags,
* might get invalid after that point, if the thread exits quickly.
*/
if (!IS_ERR(p)) {
- struct completion vfork;
-
trace_sched_process_fork(current, p);
nr = task_pid_vnr(p);
@@ -1536,9 +1546,8 @@ long do_fork(unsigned long clone_flags,
put_user(nr, parent_tidptr);
if (clone_flags & CLONE_VFORK) {
+ p->vfork_done = VFORK_DONE_NOP;
get_task_struct(p);
- p->vfork_done = &vfork;
- init_completion(&vfork);
}
audit_finish_fork(p);
@@ -1558,7 +1567,7 @@ long do_fork(unsigned long clone_flags,
ptrace_event(trace, nr);
if (clone_flags & CLONE_VFORK) {
- nr = clone_vfork_finish(p, &vfork, nr);
+ nr = clone_vfork_finish(p, nr);
put_task_struct(p);
}
} else {
next prev parent reply other threads:[~2011-07-27 16:36 UTC|newest]
Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-07-27 16:31 [PATCH 0/8] make vfork killable/restartable/traceable Oleg Nesterov
2011-07-27 16:32 ` [PATCH 1/8] vfork: introduce complete_vfork_done() Oleg Nesterov
2011-07-27 16:32 ` [PATCH 2/8] vfork: introduce clone_vfork_finish() Oleg Nesterov
2011-07-27 16:32 ` [PATCH 3/8] vfork: make it killable Oleg Nesterov
2011-07-29 13:02 ` Matt Fleming
2011-07-29 14:32 ` Oleg Nesterov
2011-07-29 15:32 ` Matt Fleming
2011-07-27 16:33 ` [PATCH 4/8] coredump_wait: don't call complete_vfork_done() Oleg Nesterov
2011-07-29 13:02 ` Matt Fleming
2011-07-29 14:25 ` Oleg Nesterov
2011-07-29 15:26 ` Matt Fleming
2011-07-27 16:33 ` [PATCH 5/8] introduce find_get_task_by_vpid() Oleg Nesterov
2011-07-27 16:33 ` Oleg Nesterov [this message]
2011-07-27 16:34 ` [PATCH 7/8] vfork: make it stoppable/traceable Oleg Nesterov
2011-07-27 16:34 ` [PATCH 8/8] vfork: do not block SIG_DFL/SIG_IGN signals is single-threaded Oleg Nesterov
2011-07-27 16:34 ` [PATCH 9/8] kill PF_STARTING Oleg Nesterov
2011-07-27 19:39 ` [PATCH 0/8] make vfork killable/restartable/traceable Linus Torvalds
2011-07-28 13:59 ` Oleg Nesterov
2011-07-28 14:58 ` Oleg Nesterov
2011-07-27 22:38 ` Pedro Alves
2011-07-29 19:23 ` Tejun Heo
2011-08-12 17:55 ` [PATCH v2 0/3] make vfork killable Oleg Nesterov
2011-08-12 17:56 ` [PATCH 1/3] vfork: introduce complete_vfork_done() Oleg Nesterov
2011-08-12 17:56 ` [PATCH 2/3] vfork: make it killable Oleg Nesterov
2011-08-19 20:33 ` Matt Fleming
2011-08-22 13:35 ` Oleg Nesterov
2011-08-12 17:56 ` [PATCH 3/3] coredump_wait: don't call complete_vfork_done() Oleg Nesterov
2011-08-17 7:50 ` Tejun Heo
2011-08-17 15:11 ` Oleg Nesterov
2011-08-12 17:57 ` [PATCH 4/3] kill PF_STARTING Oleg Nesterov
2011-08-17 7:51 ` Tejun Heo
2011-08-13 16:18 ` [PATCH v2 0/3] make vfork killable Tejun Heo
2011-08-15 19:42 ` Oleg Nesterov
2011-08-16 19:42 ` Tejun Heo
2011-08-23 22:01 ` Matt Helsley
2011-08-23 22:12 ` Tejun Heo
[not found] ` <20110727163610.GJ23793@redhat.com>
[not found] ` <20110727175624.GA3950@redhat.com>
[not found] ` <20110728154324.GA22864@redhat.com>
[not found] ` <alpine.DEB.2.00.1107281341060.16093@chino.kir.corp.google.com>
[not found] ` <20110729141431.GA3501@redhat.com>
[not found] ` <20110730143426.GA6061@redhat.com>
2011-07-30 15:22 ` mm->oom_disable_count is broken Oleg Nesterov
2011-08-01 11:52 ` KOSAKI Motohiro
2011-08-29 18:37 ` Oleg Nesterov
2011-08-29 23:17 ` David Rientjes
2011-08-30 7:43 ` [patch 1/2] oom: remove oom_disable_count David Rientjes
2011-08-30 7:43 ` David Rientjes
2011-08-30 7:43 ` [patch 2/2] oom: fix race while temporarily setting current's oom_score_adj David Rientjes
2011-08-30 7:43 ` David Rientjes
2011-08-30 15:57 ` Oleg Nesterov
2011-08-30 15:57 ` Oleg Nesterov
2011-08-30 15:28 ` [patch 1/2] oom: remove oom_disable_count Oleg Nesterov
2011-08-30 15:28 ` Oleg Nesterov
2011-08-30 22:06 ` David Rientjes
2011-08-30 22:06 ` David Rientjes
2011-08-30 16:17 ` mm->oom_disable_count is broken Oleg Nesterov
2011-08-10 21:44 ` [PATCH 0/8] make vfork killable/restartable/traceable Pavel Machek
2011-08-11 16:09 ` Oleg Nesterov
2011-08-11 16:22 ` Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20110727163345.GF23793@redhat.com \
--to=oleg@redhat.com \
--cc=dvlasenk@redhat.com \
--cc=kosaki.motohiro@jp.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=matt.fleming@linux.intel.com \
--cc=roland@hack.frob.com \
--cc=tj@kernel.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.