public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: torvalds@transmeta.com
Cc: linux-kernel@vger.kernel.org, mingo@redhat.com
Subject: [PATCH] wait4() WIFSTOPPED starvation fix #1/2
Date: Fri, 15 Mar 2002 21:09:20 +0000	[thread overview]
Message-ID: <17396.1016226560@warthog.cambridge.redhat.com> (raw)


Hi Linus,

There appears to be a starvation bug in sys_wait4().

The case runs like this:

 (1) If a process has a lot of threads, and a debugger like strace is running
     over all the threads, then the threads will be sat in the stopped more
     often than they're running.

 (2) the children being traced are "reparented" and added to strace's children
     list.

 (4) strace uses wait4() to find an _unspecified_ stopped (T) or zombie (Z)
     state process for it to deal with.

 (5) sys_wait4() always walks the list of children in the same order
     (effectively fork/clone order).

 (6) children at the end of the list starve for attention because sys_wait4()
     always takes the first process on the list that needs attention, and the
     list isn't ever rearranged.

This patch (#1) just converts the task_struct to use struct list_head rather
than direct pointers for maintaining the children list.

David

diff -uNr linux-2.5.6-pre1/arch/i386/kernel/signal.c linux-wait-256p1/arch/i386/kernel/signal.c
--- linux-2.5.6-pre1/arch/i386/kernel/signal.c	Wed Feb 27 08:27:56 2002
+++ linux-wait-256p1/arch/i386/kernel/signal.c	Fri Mar 15 19:10:03 2002
@@ -630,8 +630,8 @@
 				info.si_signo = signr;
 				info.si_errno = 0;
 				info.si_code = SI_USER;
-				info.si_pid = current->p_pptr->pid;
-				info.si_uid = current->p_pptr->uid;
+				info.si_pid = current->parent->pid;
+				info.si_uid = current->parent->uid;
 			}
 
 			/* If the (new) signal is now blocked, requeue it.  */
@@ -670,7 +670,7 @@
 			case SIGSTOP: {
 				struct signal_struct *sig;
 				current->exit_code = signr;
-				sig = current->p_pptr->sig;
+				sig = current->parent->sig;
 				preempt_disable();
 				current->state = TASK_STOPPED;
 				if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
diff -uNr linux-2.5.6-pre1/fs/binfmt_elf.c linux-wait-256p1/fs/binfmt_elf.c
--- linux-2.5.6-pre1/fs/binfmt_elf.c	Wed Feb 27 08:26:56 2002
+++ linux-wait-256p1/fs/binfmt_elf.c	Fri Mar 15 19:11:37 2002
@@ -1094,7 +1094,7 @@
 	prstatus.pr_sigpend = current->pending.signal.sig[0];
 	prstatus.pr_sighold = current->blocked.sig[0];
 	psinfo.pr_pid = prstatus.pr_pid = current->pid;
-	psinfo.pr_ppid = prstatus.pr_ppid = current->p_pptr->pid;
+	psinfo.pr_ppid = prstatus.pr_ppid = current->parent->pid;
 	psinfo.pr_pgrp = prstatus.pr_pgrp = current->pgrp;
 	psinfo.pr_sid = prstatus.pr_sid = current->session;
 	prstatus.pr_utime.tv_sec = CT_TO_SECS(current->times.tms_utime);
diff -uNr linux-2.5.6-pre1/fs/proc/array.c linux-wait-256p1/fs/proc/array.c
--- linux-2.5.6-pre1/fs/proc/array.c	Wed Feb 27 08:26:55 2002
+++ linux-wait-256p1/fs/proc/array.c	Fri Mar 15 19:09:08 2002
@@ -159,7 +159,7 @@
 		"Uid:\t%d\t%d\t%d\t%d\n"
 		"Gid:\t%d\t%d\t%d\t%d\n",
 		get_task_state(p), p->tgid,
-		p->pid, p->pid ? p->p_opptr->pid : 0, 0,
+		p->pid, p->pid ? p->real_parent->pid : 0, 0,
 		p->uid, p->euid, p->suid, p->fsuid,
 		p->gid, p->egid, p->sgid, p->fsgid);
 	read_unlock(&tasklist_lock);	
@@ -340,7 +340,7 @@
 	nice = task_nice(task);
 
 	read_lock(&tasklist_lock);
-	ppid = task->pid ? task->p_opptr->pid : 0;
+	ppid = task->pid ? task->real_parent->pid : 0;
 	read_unlock(&tasklist_lock);
 	res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
 %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %lu %lu %lu \
diff -uNr linux-2.5.6-pre1/fs/proc/base.c linux-wait-256p1/fs/proc/base.c
--- linux-2.5.6-pre1/fs/proc/base.c	Wed Feb 27 08:26:55 2002
+++ linux-wait-256p1/fs/proc/base.c	Fri Mar 15 19:09:19 2002
@@ -336,7 +336,7 @@
 };
 
 #define MAY_PTRACE(p) \
-(p==current||(p->p_pptr==current&&(p->ptrace & PT_PTRACED)&&p->state==TASK_STOPPED))
+(p==current||(p->parent==current&&(p->ptrace & PT_PTRACED)&&p->state==TASK_STOPPED))
 
 
 static int mem_open(struct inode* inode, struct file* file)
diff -uNr linux-2.5.6-pre1/include/linux/init_task.h linux-wait-256p1/include/linux/init_task.h
--- linux-2.5.6-pre1/include/linux/init_task.h	Wed Feb 27 08:27:00 2002
+++ linux-wait-256p1/include/linux/init_task.h	Fri Mar 15 18:58:27 2002
@@ -55,8 +55,10 @@
     time_slice:		HZ,						\
     next_task:		&tsk,						\
     prev_task:		&tsk,						\
-    p_opptr:		&tsk,						\
-    p_pptr:		&tsk,						\
+    real_parent:	&tsk,						\
+    parent:		&tsk,						\
+    children:		LIST_HEAD_INIT(tsk.children),			\
+    sibling:		LIST_HEAD_INIT(tsk.sibling),			\
     thread_group:	LIST_HEAD_INIT(tsk.thread_group),		\
     wait_chldexit:	__WAIT_QUEUE_HEAD_INITIALIZER(tsk.wait_chldexit),\
     real_timer:		{						\
diff -uNr linux-2.5.6-pre1/include/linux/sched.h linux-wait-256p1/include/linux/sched.h
--- linux-2.5.6-pre1/include/linux/sched.h	Wed Feb 27 08:29:20 2002
+++ linux-wait-256p1/include/linux/sched.h	Fri Mar 15 18:52:29 2002
@@ -274,9 +274,12 @@
 	/* 
 	 * pointers to (original) parent process, youngest child, younger sibling,
 	 * older sibling, respectively.  (p->father can be replaced with 
-	 * p->p_pptr->pid)
+	 * p->parent->pid)
 	 */
-	struct task_struct *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr;
+	struct task_struct *real_parent; /* real parent process (when being debugged) */
+	struct task_struct *parent;	/* parent process */
+	struct list_head children;	/* list of my children */
+	struct list_head sibling;	/* linkage in my parent's children list */
 	struct list_head thread_group;
 
 	/* PID hash table linkage. */
@@ -715,28 +718,44 @@
 	__ret;								\
 })
 
-#define REMOVE_LINKS(p) do { \
-	(p)->next_task->prev_task = (p)->prev_task; \
-	(p)->prev_task->next_task = (p)->next_task; \
-	if ((p)->p_osptr) \
-		(p)->p_osptr->p_ysptr = (p)->p_ysptr; \
-	if ((p)->p_ysptr) \
-		(p)->p_ysptr->p_osptr = (p)->p_osptr; \
-	else \
-		(p)->p_pptr->p_cptr = (p)->p_osptr; \
+#define REMOVE_LINKS(p) do {				\
+	(p)->next_task->prev_task = (p)->prev_task;	\
+	(p)->prev_task->next_task = (p)->next_task;	\
+	list_del_init(&(p)->sibling);			\
 	} while (0)
 
-#define SET_LINKS(p) do { \
-	(p)->next_task = &init_task; \
-	(p)->prev_task = init_task.prev_task; \
-	init_task.prev_task->next_task = (p); \
-	init_task.prev_task = (p); \
-	(p)->p_ysptr = NULL; \
-	if (((p)->p_osptr = (p)->p_pptr->p_cptr) != NULL) \
-		(p)->p_osptr->p_ysptr = p; \
-	(p)->p_pptr->p_cptr = p; \
+#define SET_LINKS(p) do {					\
+	(p)->next_task = &init_task;				\
+	(p)->prev_task = init_task.prev_task;			\
+	init_task.prev_task->next_task = (p);			\
+	init_task.prev_task = (p);				\
+	list_add_tail(&(p)->sibling,&(p)->parent->children);	\
 	} while (0)
 
+static inline struct task_struct *eldest_child(struct task_struct *p)
+{
+	if (list_empty(&p->children)) return NULL;
+	return list_entry(p->children.next,struct task_struct,sibling);
+}
+
+static inline struct task_struct *youngest_child(struct task_struct *p)
+{
+	if (list_empty(&p->children)) return NULL;
+	return list_entry(p->children.prev,struct task_struct,sibling);
+}
+
+static inline struct task_struct *older_sibling(struct task_struct *p)
+{
+	if (p->sibling.prev==&p->parent->children) return NULL;
+	return list_entry(p->sibling.prev,struct task_struct,sibling);
+}
+
+static inline struct task_struct *younger_sibling(struct task_struct *p)
+{
+	if (p->sibling.next==&p->parent->children) return NULL;
+	return list_entry(p->sibling.next,struct task_struct,sibling);
+}
+
 #define for_each_task(p) \
 	for (p = &init_task ; (p = p->next_task) != &init_task ; )
 
diff -uNr linux-2.5.6-pre1/kernel/exit.c linux-wait-256p1/kernel/exit.c
--- linux-2.5.6-pre1/kernel/exit.c	Wed Feb 27 08:26:59 2002
+++ linux-wait-256p1/kernel/exit.c	Fri Mar 15 19:05:47 2002
@@ -91,10 +91,10 @@
 	for_each_task(p) {
 		if ((p == ignored_task) || (p->pgrp != pgrp) ||
 		    (p->state == TASK_ZOMBIE) ||
-		    (p->p_pptr->pid == 1))
+		    (p->parent->pid == 1))
 			continue;
-		if ((p->p_pptr->pgrp != pgrp) &&
-		    (p->p_pptr->session == p->session)) {
+		if ((p->parent->pgrp != pgrp) &&
+		    (p->parent->session == p->session)) {
 			read_unlock(&tasklist_lock);
  			return 0;
 		}
@@ -144,8 +144,8 @@
 
 	/* Reparent to init */
 	REMOVE_LINKS(current);
-	current->p_pptr = child_reaper;
-	current->p_opptr = child_reaper;
+	current->parent = child_reaper;
+	current->real_parent = child_reaper;
 	SET_LINKS(current);
 
 	/* Set the exit signal to SIGCHLD so we signal init on exit */
@@ -217,16 +217,16 @@
 		reaper = child_reaper;
 
 	for_each_task(p) {
-		if (p->p_opptr == father) {
+		if (p->real_parent == father) {
 			/* We dont want people slaying init */
 			p->exit_signal = SIGCHLD;
 			p->self_exec_id++;
 
 			/* Make sure we're not reparenting to ourselves */
 			if (p == reaper)
-				p->p_opptr = child_reaper;
+				p->real_parent = child_reaper;
 			else
-				p->p_opptr = reaper;
+				p->real_parent = reaper;
 
 			if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0);
 		}
@@ -400,7 +400,7 @@
 	 * is about to become orphaned.
 	 */
 	 
-	t = current->p_pptr;
+	t = current->parent;
 	
 	if ((t->pgrp != current->pgrp) &&
 	    (t->session == current->session) &&
@@ -445,17 +445,12 @@
 	write_lock_irq(&tasklist_lock);
 	current->state = TASK_ZOMBIE;
 	do_notify_parent(current, current->exit_signal);
-	while (current->p_cptr != NULL) {
-		p = current->p_cptr;
-		current->p_cptr = p->p_osptr;
-		p->p_ysptr = NULL;
+	while ((p = eldest_child(current))) {
+		list_del_init(&p->sibling);
 		p->ptrace = 0;
 
-		p->p_pptr = p->p_opptr;
-		p->p_osptr = p->p_pptr->p_cptr;
-		if (p->p_osptr)
-			p->p_osptr->p_ysptr = p;
-		p->p_pptr->p_cptr = p;
+		p->parent = p->real_parent;
+		list_add_tail(&p->sibling,&p->parent->children);
 		if (p->state == TASK_ZOMBIE)
 			do_notify_parent(p, p->exit_signal);
 		/*
@@ -568,7 +563,9 @@
 	tsk = current;
 	do {
 		struct task_struct *p;
-	 	for (p = tsk->p_cptr ; p ; p = p->p_osptr) {
+		struct list_head *_p;
+		list_for_each(_p,&tsk->children) {
+			p = list_entry(_p,struct task_struct,sibling);
 			if (pid>0) {
 				if (p->pid != pid)
 					continue;
@@ -613,10 +610,10 @@
 				if (retval)
 					goto end_wait4; 
 				retval = p->pid;
-				if (p->p_opptr != p->p_pptr) {
+				if (p->real_parent != p->parent) {
 					write_lock_irq(&tasklist_lock);
 					REMOVE_LINKS(p);
-					p->p_pptr = p->p_opptr;
+					p->parent = p->real_parent;
 					SET_LINKS(p);
 					do_notify_parent(p, SIGCHLD);
 					write_unlock_irq(&tasklist_lock);
diff -uNr linux-2.5.6-pre1/kernel/fork.c linux-wait-256p1/kernel/fork.c
--- linux-2.5.6-pre1/kernel/fork.c	Wed Feb 27 08:29:20 2002
+++ linux-wait-256p1/kernel/fork.c	Fri Mar 15 18:53:10 2002
@@ -666,7 +666,8 @@
 
 	INIT_LIST_HEAD(&p->run_list);
 
-	p->p_cptr = NULL;
+	INIT_LIST_HEAD(&p->children);
+	INIT_LIST_HEAD(&p->sibling);
 	init_waitqueue_head(&p->wait_chldexit);
 	p->vfork_done = NULL;
 	if (clone_flags & CLONE_VFORK) {
@@ -766,12 +767,12 @@
 	write_lock_irq(&tasklist_lock);
 
 	/* CLONE_PARENT re-uses the old parent */
-	p->p_opptr = current->p_opptr;
-	p->p_pptr = current->p_pptr;
+	p->real_parent = current->real_parent;
+	p->parent = current->parent;
 	if (!(clone_flags & CLONE_PARENT)) {
-		p->p_opptr = current;
+		p->real_parent = current;
 		if (!(p->ptrace & PT_PTRACED))
-			p->p_pptr = current;
+			p->parent = current;
 	}
 
 	if (clone_flags & CLONE_THREAD) {
diff -uNr linux-2.5.6-pre1/kernel/ptrace.c linux-wait-256p1/kernel/ptrace.c
--- linux-2.5.6-pre1/kernel/ptrace.c	Wed Feb 27 08:26:59 2002
+++ linux-wait-256p1/kernel/ptrace.c	Fri Mar 15 19:06:07 2002
@@ -24,7 +24,7 @@
 	if (!(child->ptrace & PT_PTRACED))
 		return -ESRCH;
 
-	if (child->p_pptr != current)
+	if (child->parent != current)
 		return -ESRCH;
 
 	if (!kill) {
@@ -70,9 +70,9 @@
 	task_unlock(task);
 
 	write_lock_irq(&tasklist_lock);
-	if (task->p_pptr != current) {
+	if (task->parent != current) {
 		REMOVE_LINKS(task);
-		task->p_pptr = current;
+		task->parent = current;
 		SET_LINKS(task);
 	}
 	write_unlock_irq(&tasklist_lock);
@@ -98,7 +98,7 @@
 	child->exit_code = data;
 	write_lock_irq(&tasklist_lock);
 	REMOVE_LINKS(child);
-	child->p_pptr = child->p_opptr;
+	child->parent = child->real_parent;
 	SET_LINKS(child);
 	write_unlock_irq(&tasklist_lock);
 
diff -uNr linux-2.5.6-pre1/kernel/sched.c linux-wait-256p1/kernel/sched.c
--- linux-2.5.6-pre1/kernel/sched.c	Wed Feb 27 08:29:20 2002
+++ linux-wait-256p1/kernel/sched.c	Fri Mar 15 18:40:27 2002
@@ -1311,6 +1311,7 @@
 static void show_task(task_t * p)
 {
 	unsigned long free = 0;
+	task_t *relative;
 	int state;
 	static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
 
@@ -1337,17 +1338,17 @@
 			n++;
 		free = (unsigned long) n - (unsigned long)(p+1);
 	}
-	printk("%5lu %5d %6d ", free, p->pid, p->p_pptr->pid);
-	if (p->p_cptr)
-		printk("%5d ", p->p_cptr->pid);
+	printk("%5lu %5d %6d ", free, p->pid, p->parent->pid);
+	if ((relative = eldest_child(p)))
+		printk("%5d ", relative->pid);
 	else
 		printk("      ");
-	if (p->p_ysptr)
-		printk("%7d", p->p_ysptr->pid);
+	if ((relative = younger_sibling(p)))
+		printk("%7d", relative->pid);
 	else
 		printk("       ");
-	if (p->p_osptr)
-		printk(" %5d", p->p_osptr->pid);
+	if ((relative = older_sibling(p)))
+		printk(" %5d", relative->pid);
 	else
 		printk("      ");
 	if (!p->mm)
diff -uNr linux-2.5.6-pre1/kernel/signal.c linux-wait-256p1/kernel/signal.c
--- linux-2.5.6-pre1/kernel/signal.c	Wed Feb 27 08:26:59 2002
+++ linux-wait-256p1/kernel/signal.c	Fri Mar 15 19:06:50 2002
@@ -804,8 +804,8 @@
 	info.si_code = why;
 	info.si_status = status;
 
-	send_sig_info(sig, &info, tsk->p_pptr);
-	wake_up_parent(tsk->p_pptr);
+	send_sig_info(sig, &info, tsk->parent);
+	wake_up_parent(tsk->parent);
 }
 
 
diff -uNr linux-2.5.6-pre1/kernel/sys.c linux-wait-256p1/kernel/sys.c
--- linux-2.5.6-pre1/kernel/sys.c	Wed Feb 27 08:26:59 2002
+++ linux-wait-256p1/kernel/sys.c	Fri Mar 15 19:07:03 2002
@@ -839,7 +839,7 @@
 	if (!p)
 		goto out;
 
-	if (p->p_pptr == current || p->p_opptr == current) {
+	if (p->parent == current || p->real_parent == current) {
 		err = -EPERM;
 		if (p->session != current->session)
 			goto out;
diff -uNr linux-2.5.6-pre1/kernel/timer.c linux-wait-256p1/kernel/timer.c
--- linux-2.5.6-pre1/kernel/timer.c	Wed Feb 27 08:26:59 2002
+++ linux-wait-256p1/kernel/timer.c	Fri Mar 15 19:06:19 2002
@@ -742,14 +742,14 @@
 	struct task_struct * me = current;
 	struct task_struct * parent;
 
-	parent = me->p_opptr;
+	parent = me->real_parent;
 	for (;;) {
 		pid = parent->pid;
 #if CONFIG_SMP
 {
 		struct task_struct *old = parent;
 		mb();
-		parent = me->p_opptr;
+		parent = me->real_parent;
 		if (old != parent)
 			continue;
 }

                 reply	other threads:[~2002-03-15 21:10 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=17396.1016226560@warthog.cambridge.redhat.com \
    --to=dhowells@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox