From: David Howells <dhowells@redhat.com>
To: torvalds@transmeta.com
Cc: linux-kernel@vger.kernel.org, mingo@redhat.com
Subject: [PATCH] wait4() WIFSTOPPED starvation fix #1/2
Date: Fri, 15 Mar 2002 21:09:20 +0000 [thread overview]
Message-ID: <17396.1016226560@warthog.cambridge.redhat.com> (raw)
Hi Linus,
There appears to be a starvation bug in sys_wait4().
The case runs like this:
(1) If a process has a lot of threads, and a debugger like strace is running
over all the threads, then the threads will be sat in the stopped more
often than they're running.
(2) the children being traced are "reparented" and added to strace's children
list.
(4) strace uses wait4() to find an _unspecified_ stopped (T) or zombie (Z)
state process for it to deal with.
(5) sys_wait4() always walks the list of children in the same order
(effectively fork/clone order).
(6) children at the end of the list starve for attention because sys_wait4()
always takes the first process on the list that needs attention, and the
list isn't ever rearranged.
This patch (#1) just converts the task_struct to use struct list_head rather
than direct pointers for maintaining the children list.
David
diff -uNr linux-2.5.6-pre1/arch/i386/kernel/signal.c linux-wait-256p1/arch/i386/kernel/signal.c
--- linux-2.5.6-pre1/arch/i386/kernel/signal.c Wed Feb 27 08:27:56 2002
+++ linux-wait-256p1/arch/i386/kernel/signal.c Fri Mar 15 19:10:03 2002
@@ -630,8 +630,8 @@
info.si_signo = signr;
info.si_errno = 0;
info.si_code = SI_USER;
- info.si_pid = current->p_pptr->pid;
- info.si_uid = current->p_pptr->uid;
+ info.si_pid = current->parent->pid;
+ info.si_uid = current->parent->uid;
}
/* If the (new) signal is now blocked, requeue it. */
@@ -670,7 +670,7 @@
case SIGSTOP: {
struct signal_struct *sig;
current->exit_code = signr;
- sig = current->p_pptr->sig;
+ sig = current->parent->sig;
preempt_disable();
current->state = TASK_STOPPED;
if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
diff -uNr linux-2.5.6-pre1/fs/binfmt_elf.c linux-wait-256p1/fs/binfmt_elf.c
--- linux-2.5.6-pre1/fs/binfmt_elf.c Wed Feb 27 08:26:56 2002
+++ linux-wait-256p1/fs/binfmt_elf.c Fri Mar 15 19:11:37 2002
@@ -1094,7 +1094,7 @@
prstatus.pr_sigpend = current->pending.signal.sig[0];
prstatus.pr_sighold = current->blocked.sig[0];
psinfo.pr_pid = prstatus.pr_pid = current->pid;
- psinfo.pr_ppid = prstatus.pr_ppid = current->p_pptr->pid;
+ psinfo.pr_ppid = prstatus.pr_ppid = current->parent->pid;
psinfo.pr_pgrp = prstatus.pr_pgrp = current->pgrp;
psinfo.pr_sid = prstatus.pr_sid = current->session;
prstatus.pr_utime.tv_sec = CT_TO_SECS(current->times.tms_utime);
diff -uNr linux-2.5.6-pre1/fs/proc/array.c linux-wait-256p1/fs/proc/array.c
--- linux-2.5.6-pre1/fs/proc/array.c Wed Feb 27 08:26:55 2002
+++ linux-wait-256p1/fs/proc/array.c Fri Mar 15 19:09:08 2002
@@ -159,7 +159,7 @@
"Uid:\t%d\t%d\t%d\t%d\n"
"Gid:\t%d\t%d\t%d\t%d\n",
get_task_state(p), p->tgid,
- p->pid, p->pid ? p->p_opptr->pid : 0, 0,
+ p->pid, p->pid ? p->real_parent->pid : 0, 0,
p->uid, p->euid, p->suid, p->fsuid,
p->gid, p->egid, p->sgid, p->fsgid);
read_unlock(&tasklist_lock);
@@ -340,7 +340,7 @@
nice = task_nice(task);
read_lock(&tasklist_lock);
- ppid = task->pid ? task->p_opptr->pid : 0;
+ ppid = task->pid ? task->real_parent->pid : 0;
read_unlock(&tasklist_lock);
res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
%lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %lu %lu %lu \
diff -uNr linux-2.5.6-pre1/fs/proc/base.c linux-wait-256p1/fs/proc/base.c
--- linux-2.5.6-pre1/fs/proc/base.c Wed Feb 27 08:26:55 2002
+++ linux-wait-256p1/fs/proc/base.c Fri Mar 15 19:09:19 2002
@@ -336,7 +336,7 @@
};
#define MAY_PTRACE(p) \
-(p==current||(p->p_pptr==current&&(p->ptrace & PT_PTRACED)&&p->state==TASK_STOPPED))
+(p==current||(p->parent==current&&(p->ptrace & PT_PTRACED)&&p->state==TASK_STOPPED))
static int mem_open(struct inode* inode, struct file* file)
diff -uNr linux-2.5.6-pre1/include/linux/init_task.h linux-wait-256p1/include/linux/init_task.h
--- linux-2.5.6-pre1/include/linux/init_task.h Wed Feb 27 08:27:00 2002
+++ linux-wait-256p1/include/linux/init_task.h Fri Mar 15 18:58:27 2002
@@ -55,8 +55,10 @@
time_slice: HZ, \
next_task: &tsk, \
prev_task: &tsk, \
- p_opptr: &tsk, \
- p_pptr: &tsk, \
+ real_parent: &tsk, \
+ parent: &tsk, \
+ children: LIST_HEAD_INIT(tsk.children), \
+ sibling: LIST_HEAD_INIT(tsk.sibling), \
thread_group: LIST_HEAD_INIT(tsk.thread_group), \
wait_chldexit: __WAIT_QUEUE_HEAD_INITIALIZER(tsk.wait_chldexit),\
real_timer: { \
diff -uNr linux-2.5.6-pre1/include/linux/sched.h linux-wait-256p1/include/linux/sched.h
--- linux-2.5.6-pre1/include/linux/sched.h Wed Feb 27 08:29:20 2002
+++ linux-wait-256p1/include/linux/sched.h Fri Mar 15 18:52:29 2002
@@ -274,9 +274,12 @@
/*
* pointers to (original) parent process, youngest child, younger sibling,
* older sibling, respectively. (p->father can be replaced with
- * p->p_pptr->pid)
+ * p->parent->pid)
*/
- struct task_struct *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr;
+ struct task_struct *real_parent; /* real parent process (when being debugged) */
+ struct task_struct *parent; /* parent process */
+ struct list_head children; /* list of my children */
+ struct list_head sibling; /* linkage in my parent's children list */
struct list_head thread_group;
/* PID hash table linkage. */
@@ -715,28 +718,44 @@
__ret; \
})
-#define REMOVE_LINKS(p) do { \
- (p)->next_task->prev_task = (p)->prev_task; \
- (p)->prev_task->next_task = (p)->next_task; \
- if ((p)->p_osptr) \
- (p)->p_osptr->p_ysptr = (p)->p_ysptr; \
- if ((p)->p_ysptr) \
- (p)->p_ysptr->p_osptr = (p)->p_osptr; \
- else \
- (p)->p_pptr->p_cptr = (p)->p_osptr; \
+#define REMOVE_LINKS(p) do { \
+ (p)->next_task->prev_task = (p)->prev_task; \
+ (p)->prev_task->next_task = (p)->next_task; \
+ list_del_init(&(p)->sibling); \
} while (0)
-#define SET_LINKS(p) do { \
- (p)->next_task = &init_task; \
- (p)->prev_task = init_task.prev_task; \
- init_task.prev_task->next_task = (p); \
- init_task.prev_task = (p); \
- (p)->p_ysptr = NULL; \
- if (((p)->p_osptr = (p)->p_pptr->p_cptr) != NULL) \
- (p)->p_osptr->p_ysptr = p; \
- (p)->p_pptr->p_cptr = p; \
+#define SET_LINKS(p) do { \
+ (p)->next_task = &init_task; \
+ (p)->prev_task = init_task.prev_task; \
+ init_task.prev_task->next_task = (p); \
+ init_task.prev_task = (p); \
+ list_add_tail(&(p)->sibling,&(p)->parent->children); \
} while (0)
+static inline struct task_struct *eldest_child(struct task_struct *p)
+{
+ if (list_empty(&p->children)) return NULL;
+ return list_entry(p->children.next,struct task_struct,sibling);
+}
+
+static inline struct task_struct *youngest_child(struct task_struct *p)
+{
+ if (list_empty(&p->children)) return NULL;
+ return list_entry(p->children.prev,struct task_struct,sibling);
+}
+
+static inline struct task_struct *older_sibling(struct task_struct *p)
+{
+ if (p->sibling.prev==&p->parent->children) return NULL;
+ return list_entry(p->sibling.prev,struct task_struct,sibling);
+}
+
+static inline struct task_struct *younger_sibling(struct task_struct *p)
+{
+ if (p->sibling.next==&p->parent->children) return NULL;
+ return list_entry(p->sibling.next,struct task_struct,sibling);
+}
+
#define for_each_task(p) \
for (p = &init_task ; (p = p->next_task) != &init_task ; )
diff -uNr linux-2.5.6-pre1/kernel/exit.c linux-wait-256p1/kernel/exit.c
--- linux-2.5.6-pre1/kernel/exit.c Wed Feb 27 08:26:59 2002
+++ linux-wait-256p1/kernel/exit.c Fri Mar 15 19:05:47 2002
@@ -91,10 +91,10 @@
for_each_task(p) {
if ((p == ignored_task) || (p->pgrp != pgrp) ||
(p->state == TASK_ZOMBIE) ||
- (p->p_pptr->pid == 1))
+ (p->parent->pid == 1))
continue;
- if ((p->p_pptr->pgrp != pgrp) &&
- (p->p_pptr->session == p->session)) {
+ if ((p->parent->pgrp != pgrp) &&
+ (p->parent->session == p->session)) {
read_unlock(&tasklist_lock);
return 0;
}
@@ -144,8 +144,8 @@
/* Reparent to init */
REMOVE_LINKS(current);
- current->p_pptr = child_reaper;
- current->p_opptr = child_reaper;
+ current->parent = child_reaper;
+ current->real_parent = child_reaper;
SET_LINKS(current);
/* Set the exit signal to SIGCHLD so we signal init on exit */
@@ -217,16 +217,16 @@
reaper = child_reaper;
for_each_task(p) {
- if (p->p_opptr == father) {
+ if (p->real_parent == father) {
/* We dont want people slaying init */
p->exit_signal = SIGCHLD;
p->self_exec_id++;
/* Make sure we're not reparenting to ourselves */
if (p == reaper)
- p->p_opptr = child_reaper;
+ p->real_parent = child_reaper;
else
- p->p_opptr = reaper;
+ p->real_parent = reaper;
if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0);
}
@@ -400,7 +400,7 @@
* is about to become orphaned.
*/
- t = current->p_pptr;
+ t = current->parent;
if ((t->pgrp != current->pgrp) &&
(t->session == current->session) &&
@@ -445,17 +445,12 @@
write_lock_irq(&tasklist_lock);
current->state = TASK_ZOMBIE;
do_notify_parent(current, current->exit_signal);
- while (current->p_cptr != NULL) {
- p = current->p_cptr;
- current->p_cptr = p->p_osptr;
- p->p_ysptr = NULL;
+ while ((p = eldest_child(current))) {
+ list_del_init(&p->sibling);
p->ptrace = 0;
- p->p_pptr = p->p_opptr;
- p->p_osptr = p->p_pptr->p_cptr;
- if (p->p_osptr)
- p->p_osptr->p_ysptr = p;
- p->p_pptr->p_cptr = p;
+ p->parent = p->real_parent;
+ list_add_tail(&p->sibling,&p->parent->children);
if (p->state == TASK_ZOMBIE)
do_notify_parent(p, p->exit_signal);
/*
@@ -568,7 +563,9 @@
tsk = current;
do {
struct task_struct *p;
- for (p = tsk->p_cptr ; p ; p = p->p_osptr) {
+ struct list_head *_p;
+ list_for_each(_p,&tsk->children) {
+ p = list_entry(_p,struct task_struct,sibling);
if (pid>0) {
if (p->pid != pid)
continue;
@@ -613,10 +610,10 @@
if (retval)
goto end_wait4;
retval = p->pid;
- if (p->p_opptr != p->p_pptr) {
+ if (p->real_parent != p->parent) {
write_lock_irq(&tasklist_lock);
REMOVE_LINKS(p);
- p->p_pptr = p->p_opptr;
+ p->parent = p->real_parent;
SET_LINKS(p);
do_notify_parent(p, SIGCHLD);
write_unlock_irq(&tasklist_lock);
diff -uNr linux-2.5.6-pre1/kernel/fork.c linux-wait-256p1/kernel/fork.c
--- linux-2.5.6-pre1/kernel/fork.c Wed Feb 27 08:29:20 2002
+++ linux-wait-256p1/kernel/fork.c Fri Mar 15 18:53:10 2002
@@ -666,7 +666,8 @@
INIT_LIST_HEAD(&p->run_list);
- p->p_cptr = NULL;
+ INIT_LIST_HEAD(&p->children);
+ INIT_LIST_HEAD(&p->sibling);
init_waitqueue_head(&p->wait_chldexit);
p->vfork_done = NULL;
if (clone_flags & CLONE_VFORK) {
@@ -766,12 +767,12 @@
write_lock_irq(&tasklist_lock);
/* CLONE_PARENT re-uses the old parent */
- p->p_opptr = current->p_opptr;
- p->p_pptr = current->p_pptr;
+ p->real_parent = current->real_parent;
+ p->parent = current->parent;
if (!(clone_flags & CLONE_PARENT)) {
- p->p_opptr = current;
+ p->real_parent = current;
if (!(p->ptrace & PT_PTRACED))
- p->p_pptr = current;
+ p->parent = current;
}
if (clone_flags & CLONE_THREAD) {
diff -uNr linux-2.5.6-pre1/kernel/ptrace.c linux-wait-256p1/kernel/ptrace.c
--- linux-2.5.6-pre1/kernel/ptrace.c Wed Feb 27 08:26:59 2002
+++ linux-wait-256p1/kernel/ptrace.c Fri Mar 15 19:06:07 2002
@@ -24,7 +24,7 @@
if (!(child->ptrace & PT_PTRACED))
return -ESRCH;
- if (child->p_pptr != current)
+ if (child->parent != current)
return -ESRCH;
if (!kill) {
@@ -70,9 +70,9 @@
task_unlock(task);
write_lock_irq(&tasklist_lock);
- if (task->p_pptr != current) {
+ if (task->parent != current) {
REMOVE_LINKS(task);
- task->p_pptr = current;
+ task->parent = current;
SET_LINKS(task);
}
write_unlock_irq(&tasklist_lock);
@@ -98,7 +98,7 @@
child->exit_code = data;
write_lock_irq(&tasklist_lock);
REMOVE_LINKS(child);
- child->p_pptr = child->p_opptr;
+ child->parent = child->real_parent;
SET_LINKS(child);
write_unlock_irq(&tasklist_lock);
diff -uNr linux-2.5.6-pre1/kernel/sched.c linux-wait-256p1/kernel/sched.c
--- linux-2.5.6-pre1/kernel/sched.c Wed Feb 27 08:29:20 2002
+++ linux-wait-256p1/kernel/sched.c Fri Mar 15 18:40:27 2002
@@ -1311,6 +1311,7 @@
static void show_task(task_t * p)
{
unsigned long free = 0;
+ task_t *relative;
int state;
static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
@@ -1337,17 +1338,17 @@
n++;
free = (unsigned long) n - (unsigned long)(p+1);
}
- printk("%5lu %5d %6d ", free, p->pid, p->p_pptr->pid);
- if (p->p_cptr)
- printk("%5d ", p->p_cptr->pid);
+ printk("%5lu %5d %6d ", free, p->pid, p->parent->pid);
+ if ((relative = eldest_child(p)))
+ printk("%5d ", relative->pid);
else
printk(" ");
- if (p->p_ysptr)
- printk("%7d", p->p_ysptr->pid);
+ if ((relative = younger_sibling(p)))
+ printk("%7d", relative->pid);
else
printk(" ");
- if (p->p_osptr)
- printk(" %5d", p->p_osptr->pid);
+ if ((relative = older_sibling(p)))
+ printk(" %5d", relative->pid);
else
printk(" ");
if (!p->mm)
diff -uNr linux-2.5.6-pre1/kernel/signal.c linux-wait-256p1/kernel/signal.c
--- linux-2.5.6-pre1/kernel/signal.c Wed Feb 27 08:26:59 2002
+++ linux-wait-256p1/kernel/signal.c Fri Mar 15 19:06:50 2002
@@ -804,8 +804,8 @@
info.si_code = why;
info.si_status = status;
- send_sig_info(sig, &info, tsk->p_pptr);
- wake_up_parent(tsk->p_pptr);
+ send_sig_info(sig, &info, tsk->parent);
+ wake_up_parent(tsk->parent);
}
diff -uNr linux-2.5.6-pre1/kernel/sys.c linux-wait-256p1/kernel/sys.c
--- linux-2.5.6-pre1/kernel/sys.c Wed Feb 27 08:26:59 2002
+++ linux-wait-256p1/kernel/sys.c Fri Mar 15 19:07:03 2002
@@ -839,7 +839,7 @@
if (!p)
goto out;
- if (p->p_pptr == current || p->p_opptr == current) {
+ if (p->parent == current || p->real_parent == current) {
err = -EPERM;
if (p->session != current->session)
goto out;
diff -uNr linux-2.5.6-pre1/kernel/timer.c linux-wait-256p1/kernel/timer.c
--- linux-2.5.6-pre1/kernel/timer.c Wed Feb 27 08:26:59 2002
+++ linux-wait-256p1/kernel/timer.c Fri Mar 15 19:06:19 2002
@@ -742,14 +742,14 @@
struct task_struct * me = current;
struct task_struct * parent;
- parent = me->p_opptr;
+ parent = me->real_parent;
for (;;) {
pid = parent->pid;
#if CONFIG_SMP
{
struct task_struct *old = parent;
mb();
- parent = me->p_opptr;
+ parent = me->real_parent;
if (old != parent)
continue;
}
reply other threads:[~2002-03-15 21:10 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=17396.1016226560@warthog.cambridge.redhat.com \
--to=dhowells@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=torvalds@transmeta.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox