All of lore.kernel.org
 help / color / mirror / Atom feed
From: Rusty Russell <rusty@rustcorp.com.au>
To: Nathan Lynch <nathanl@austin.ibm.com>
Cc: Andrew Morton <akpm@osdl.org>,
	lkml - Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Nick Piggin <nickpiggin@yahoo.com.au>,
	Ingo Molnar <mingo@elte.hu>,
	Zwane Mwaikambo <zwane@linuxpower.ca>
Subject: [PATCH 2/2] Hotplug CPU vs TASK_ZOMBIEs: The Sequel to Hotplug CPU vs TASK_DEAD
Date: Thu, 26 Aug 2004 17:58:17 +1000	[thread overview]
Message-ID: <1093507097.29319.2510.camel@bach> (raw)
In-Reply-To: <1093475339.7056.6.camel@pants.austin.ibm.com>

Name: Hotplug CPU vs TASK_ZOMBIEs: The Sequel to Hotplug CPU vs TASK_DEAD
Status: Tested on 2.6.8.1-mm4
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Depends: Misc/stop_machine-nicksched-yield.patch.gz
Depends: Hotcpu/neaten-migrate_all_tasks.patch.gz
Version: -mm

To recap: release_task can now sleep.  Sleeping allows a CPU to go
down underneath you.  release_task removes you from the tasklist, so
you don't get migrated off the CPU: BUG() in sched.c.

In last week's episode, our dashing hero (Ingo Molnar) solved this for
self-reaping tasks by grabbing the hotplug cpu lock to prevent this.
However, in an unexpected twist, the problem remains for tasks whose
parents call release_task on them: the zombies are off the task list,
and lurk on the dead CPU.

Fortunately, the comedic sidekick (Rusty Russell) has an answer: let's
make the hotplug callback walk the runqueue of the dead CPU as well,
taking care of the zombies.

1) Restore exit.c to its former form.  The comment is incorrect:
   sched.c checks PF_DEAD, not the state, to decide to do the final
   put_task_struct(), and it does it for all tasks, self-reaping or
   no.

2) Implement migrate_dead_tasks() in the sched.c hotplug CPU callback.

3) Rename migrate_all_tasks() to migrate_live_tasks().

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .29162-linux-2.6.8.1-mm4/kernel/exit.c .29162-linux-2.6.8.1-mm4.updated/kernel/exit.c
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .32142-linux-2.6.8.1-mm4/kernel/exit.c .32142-linux-2.6.8.1-mm4.updated/kernel/exit.c
--- .32142-linux-2.6.8.1-mm4/kernel/exit.c	2004-08-26 13:21:45.000000000 +1000
+++ .32142-linux-2.6.8.1-mm4.updated/kernel/exit.c	2004-08-26 17:25:33.000000000 +1000
@@ -754,8 +754,8 @@ static void exit_notify(struct task_stru
 	state = TASK_ZOMBIE;
 	if (tsk->exit_signal == -1 && tsk->ptrace == 0)
 		state = TASK_DEAD;
-	else
-		tsk->state = state;
+	tsk->state = state;
+
 	/*
 	 * Clear these here so that update_process_times() won't try to deliver
 	 * itimer, profile or rlimit signals to this task while it is in late exit.
@@ -764,14 +764,6 @@ static void exit_notify(struct task_stru
 	tsk->it_prof_value = 0;
 	tsk->rlim[RLIMIT_CPU].rlim_cur = RLIM_INFINITY;
 
-	/*
-	 * Get a reference to it so that we can set the state
-	 * as the last step. The state-setting only matters if the
-	 * current task is releasing itself, to trigger the final
-	 * put_task_struct() in finish_task_switch(). (thread self-reap)
-	 */
-	get_task_struct(tsk);
-
 	write_unlock_irq(&tasklist_lock);
 
 	list_for_each_safe(_p, _n, &ptrace_dead) {
@@ -781,23 +773,12 @@ static void exit_notify(struct task_stru
 	}
 
 	/* If the process is dead, release it - nobody will wait for it */
-	if (state == TASK_DEAD) {
-		lock_cpu_hotplug();
+	if (state == TASK_DEAD)
 		release_task(tsk);
-		write_lock_irq(&tasklist_lock);
-		/*
-		 * No preemption may happen from this point on,
-		 * or CPU hotplug (and task exit) breaks:
-		 */
-		unlock_cpu_hotplug();
-		tsk->state = state;
-		_raw_write_unlock(&tasklist_lock);
-		local_irq_enable();
-	} else
-		preempt_disable();
 
+	/* PF_DEAD causes final put_task_struct after we schedule. */
+	preempt_disable();
 	tsk->flags |= PF_DEAD;
-	put_task_struct(tsk);
 }
 
 asmlinkage NORET_TYPE void do_exit(long code)
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .32142-linux-2.6.8.1-mm4/kernel/sched.c .32142-linux-2.6.8.1-mm4.updated/kernel/sched.c
--- .32142-linux-2.6.8.1-mm4/kernel/sched.c	2004-08-26 17:25:32.000000000 +1000
+++ .32142-linux-2.6.8.1-mm4.updated/kernel/sched.c	2004-08-26 17:25:54.000000000 +1000
@@ -3820,8 +3820,8 @@ static void move_task_off_dead_cpu(int d
 	__migrate_task(tsk, dead_cpu, dest_cpu);
 }
 
-/* migrate_all_tasks - function to migrate all tasks from the dead cpu. */
-static void migrate_all_tasks(int src_cpu)
+/* Run through task list and migrate tasks from the dead cpu. */
+static void migrate_live_tasks(int src_cpu)
 {
 	struct task_struct *tsk, *t;
 
@@ -3863,6 +3863,47 @@ void sched_idle_next(void)
 
 	spin_unlock_irqrestore(&rq->lock, flags);
 }
+
+static void migrate_dead(unsigned int dead_cpu, task_t *tsk)
+{
+	struct runqueue *rq = cpu_rq(dead_cpu);
+
+	/* Must be exiting, otherwise would be on tasklist. */
+	BUG_ON(tsk->state != TASK_ZOMBIE && tsk->state != TASK_DEAD);
+
+	/* Cannot have done final schedule yet: would have vanished. */
+	BUG_ON(tsk->flags & PF_DEAD);
+
+	get_task_struct(tsk);
+
+	/* 
+	 * Drop lock around migration; if someone else moves it,
+	 * that's OK.  No task can be added to this CPU, so iteration is
+	 * fine.
+	 */
+	spin_unlock_irq(&rq->lock);
+	move_task_off_dead_cpu(dead_cpu, tsk);
+	spin_lock_irq(&rq->lock);
+
+	put_task_struct(tsk);
+}
+
+/* release_task() removes task from tasklist, so we won't find dead tasks. */
+static void migrate_dead_tasks(unsigned int dead_cpu)
+{
+	unsigned arr, i;
+	struct runqueue *rq = cpu_rq(dead_cpu);
+
+	for (arr = 0; arr < 2; arr++) {
+		for (i = 0; i < MAX_PRIO; i++) {
+			struct list_head *list = &rq->arrays[arr].queue[i];
+			while (!list_empty(list))
+				migrate_dead(dead_cpu,
+					     list_entry(list->next, task_t,
+							run_list));
+		}
+	}
+}
 #endif /* CONFIG_HOTPLUG_CPU */
 
 /*
@@ -3902,7 +3943,7 @@ static int migration_call(struct notifie
 		cpu_rq(cpu)->migration_thread = NULL;
 		break;
 	case CPU_DEAD:
-		migrate_all_tasks(cpu);
+		migrate_live_tasks(cpu);
 		rq = cpu_rq(cpu);
 		kthread_stop(rq->migration_thread);
 		rq->migration_thread = NULL;
@@ -3911,6 +3952,7 @@ static int migration_call(struct notifie
 		deactivate_task(rq->idle, rq);
 		rq->idle->static_prio = MAX_PRIO;
 		__setscheduler(rq->idle, SCHED_NORMAL, 0);
+		migrate_dead_tasks(cpu);
 		task_rq_unlock(rq, &flags);
 		BUG_ON(rq->nr_running != 0);
 

-- 
Anyone who quotes me in their signature is an idiot -- Rusty Russell


  parent reply	other threads:[~2004-08-26  8:03 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-08-22  8:34 2.6.8.1-mm4 Andrew Morton
2004-08-22 14:20 ` 2.6.8.1-mm4 (strange behavior on dual Opteron w/ NUMA) R. J. Wysocki
2004-08-23 15:29   ` David Howells
2004-08-23 15:46     ` Randy.Dunlap
2004-08-23 18:27       ` Andrew Morton
2004-08-23 18:57         ` Randy.Dunlap
2004-08-23  2:18 ` 2.6.8.1-mm4 - failed opcode was: 0xe7 Ed Tomlinson
2004-08-23  5:00 ` 2.6.8.1-mm4 Eric W. Biederman
2004-08-23 12:00   ` 2.6.8.1-mm4 Alan Cox
2004-08-23 14:24     ` 2.6.8.1-mm4 Eric W. Biederman
2004-08-23 14:11 ` 2.6.8.1-mm4 (compile stats) John Cherry
2004-08-23 18:21 ` 2.6.8.1-mm4 Tomasz Torcz
2004-08-23 18:31   ` 2.6.8.1-mm4 Alan Cox
2004-08-23 19:40     ` 2.6.8.1-mm4 Andrew Morton
2004-08-23 19:15       ` 2.6.8.1-mm4 Alan Cox
2004-08-23 21:19         ` 2.6.8.1-mm4 David S. Miller
2004-08-23 20:21 ` 2.6.8.1-mm4 wli
2004-08-24  6:14   ` 2.6.8.1-mm4 Andrew Morton
2004-08-24  7:55     ` O(1) proc_pid_statm() William Lee Irwin III
2004-08-24 17:05       ` fix text reporting in " William Lee Irwin III
2004-08-25  0:06     ` [PATCH] advice to use good patch subject, for SubmittingPatches Tim Bird
2004-08-23 22:18 ` 2.6.8.1-mm4 - more cpu hotplug breakage Nathan Lynch
2004-08-25  3:57   ` Nathan Lynch
2004-08-25 23:09   ` Nathan Lynch
2004-08-26  2:54     ` Rusty Russell
2004-08-26  7:57     ` [PATCH 1/2] Neaten migrate_all_tasks Rusty Russell
2004-08-26  7:58     ` Rusty Russell [this message]
2004-08-26 15:29       ` [PATCH 2/2] Hotplug CPU vs TASK_ZOMBIEs: The Sequel to Hotplug CPU vs TASK_DEAD Nathan Lynch
2004-08-27  1:38         ` Rusty Russell
2004-08-24 20:56 ` 2.6.8.1-mm4 William Lee Irwin III
2004-08-24 20:57   ` 2.6.8.1-mm4 William Lee Irwin III
2004-08-24 21:23   ` 2.6.8.1-mm4 William Lee Irwin III
2004-08-24 21:26     ` 2.6.8.1-mm4 William Lee Irwin III
2004-08-24 21:37     ` 2.6.8.1-mm4 William Lee Irwin III
2004-08-24 21:48   ` 2.6.8.1-mm4 William Lee Irwin III
2004-08-24 21:06 ` WAITQUEUE_DEBUG crapectomy William Lee Irwin III

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1093507097.29319.2510.camel@bach \
    --to=rusty@rustcorp.com.au \
    --cc=akpm@osdl.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=nathanl@austin.ibm.com \
    --cc=nickpiggin@yahoo.com.au \
    --cc=zwane@linuxpower.ca \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.