All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] Fix CPU Hotplug: neaten migrate_all_tasks.
@ 2004-08-27  1:39 Rusty Russell
  2004-08-27  1:39 ` [PATCH 2/2] Fix CPU Hotplug: Handle dying tasks on dead CPU Rusty Russell
  0 siblings, 1 reply; 2+ messages in thread
From: Rusty Russell @ 2004-08-27  1:39 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: lkml - Kernel Mailing List, Andrew Morton, Ingo Molnar,
	Nathan Lynch

Against Linus tree.

Name: Neaten migrate_all_tasks
Status: Tested on 2.6.8.1-mm4
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

A followup patch wants to do forced migration, so separate that part
of the code out of migrate_all_tasks().

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .18890-linux-2.6.9-rc1-bk2/kernel/sched.c .18890-linux-2.6.9-rc1-bk2.updated/kernel/sched.c
--- .18890-linux-2.6.9-rc1-bk2/kernel/sched.c	2004-08-27 09:47:55.000000000 +1000
+++ .18890-linux-2.6.9-rc1-bk2.updated/kernel/sched.c	2004-08-27 11:18:40.000000000 +1000
@@ -3957,50 +3957,52 @@ wait_to_die:
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
+/* Figure out where task on dead CPU should go, use force if neccessary. */
+static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk)
+{
+	int dest_cpu;
+	cpumask_t mask;
+
+	/* On same node? */
+	mask = node_to_cpumask(cpu_to_node(dead_cpu));
+	cpus_and(mask, mask, tsk->cpus_allowed);
+	dest_cpu = any_online_cpu(mask);
+
+	/* On any allowed CPU? */
+	if (dest_cpu == NR_CPUS)
+		dest_cpu = any_online_cpu(tsk->cpus_allowed);
+
+	/* No more Mr. Nice Guy. */
+	if (dest_cpu == NR_CPUS) {
+		cpus_setall(tsk->cpus_allowed);
+		dest_cpu = any_online_cpu(tsk->cpus_allowed);
+
+		/*
+		 * Don't tell them about moving exiting tasks or
+		 * kernel threads (both mm NULL), since they never
+		 * leave kernel.
+		 */
+		if (tsk->mm && printk_ratelimit())
+			printk(KERN_INFO "process %d (%s) no "
+			       "longer affine to cpu%d\n",
+			       tsk->pid, tsk->comm, dead_cpu);
+	}
+	__migrate_task(tsk, dead_cpu, dest_cpu);
+}
+
 /* migrate_all_tasks - function to migrate all tasks from the dead cpu. */
 static void migrate_all_tasks(int src_cpu)
 {
 	struct task_struct *tsk, *t;
-	int dest_cpu;
-	unsigned int node;
 
 	write_lock_irq(&tasklist_lock);
 
-	/* watch out for per node tasks, let's stay on this node */
-	node = cpu_to_node(src_cpu);
-
 	do_each_thread(t, tsk) {
-		cpumask_t mask;
 		if (tsk == current)
 			continue;
 
-		if (task_cpu(tsk) != src_cpu)
-			continue;
-
-		/* Figure out where this task should go (attempting to
-		 * keep it on-node), and check if it can be migrated
-		 * as-is.  NOTE that kernel threads bound to more than
-		 * one online cpu will be migrated. */
-		mask = node_to_cpumask(node);
-		cpus_and(mask, mask, tsk->cpus_allowed);
-		dest_cpu = any_online_cpu(mask);
-		if (dest_cpu == NR_CPUS)
-			dest_cpu = any_online_cpu(tsk->cpus_allowed);
-		if (dest_cpu == NR_CPUS) {
-			cpus_setall(tsk->cpus_allowed);
-			dest_cpu = any_online_cpu(tsk->cpus_allowed);
-
-			/*
-			 * Don't tell them about moving exiting tasks
-			 * or kernel threads (both mm NULL), since
-			 * they never leave kernel.
-			 */
-			if (tsk->mm && printk_ratelimit())
-				printk(KERN_INFO "process %d (%s) no "
-				       "longer affine to cpu%d\n",
-				       tsk->pid, tsk->comm, src_cpu);
-		}
-		__migrate_task(tsk, src_cpu, dest_cpu);
+		if (task_cpu(tsk) == src_cpu)
+			move_task_off_dead_cpu(src_cpu, tsk);
 	} while_each_thread(t, tsk);
 
 	write_unlock_irq(&tasklist_lock);

-- 
Anyone who quotes me in their signature is an idiot -- Rusty Russell


^ permalink raw reply	[flat|nested] 2+ messages in thread

* [PATCH 2/2] Fix CPU Hotplug: Handle dying tasks on dead CPU
  2004-08-27  1:39 [PATCH 1/2] Fix CPU Hotplug: neaten migrate_all_tasks Rusty Russell
@ 2004-08-27  1:39 ` Rusty Russell
  0 siblings, 0 replies; 2+ messages in thread
From: Rusty Russell @ 2004-08-27  1:39 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: lkml - Kernel Mailing List, Andrew Morton, Ingo Molnar,
	Nathan Lynch

Name: Hotplug CPU vs TASK_ZOMBIEs: The Sequel to Hotplug CPU vs TASK_DEAD
Status: Tested on 2.6.8.1-mm4
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Depends: Hotcpu/neaten-migrate_all_tasks.patch.gz

To recap: release_task can now sleep.  Sleeping allows a CPU to go
down underneath you.  release_task removes you from the tasklist, so
you don't get migrated off the CPU: BUG() in sched.c.

In last week's episode, our dashing hero (Ingo Molnar) solved this for
self-reaping tasks by grabbing the hotplug cpu lock to prevent this.
However, in an unexpected twist, the problem remains for tasks whose
parents call release_task on them: the zombies are off the task list,
and lurk on the dead CPU.

Fortunately, the comedic sidekick (Rusty Russell) has an answer: let's
make the hotplug callback walk the runqueue of the dead CPU as well,
taking care of the zombies.

1) Restore exit.c to its former form.  The comment is incorrect:
   sched.c checks PF_DEAD, not the state, to decide to do the final
   put_task_struct(), and it does it for all tasks, self-reaping or
   no.

2) Implement migrate_dead_tasks() in the sched.c hotplug CPU callback.

3) Rename migrate_all_tasks() to migrate_live_tasks().

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .29162-linux-2.6.8.1-mm4/kernel/exit.c .29162-linux-2.6.8.1-mm4.updated/kernel/exit.c
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .32142-linux-2.6.8.1-mm4/kernel/exit.c .32142-linux-2.6.8.1-mm4.updated/kernel/exit.c
--- .32142-linux-2.6.8.1-mm4/kernel/exit.c	2004-08-26 13:21:45.000000000 +1000
+++ .32142-linux-2.6.8.1-mm4.updated/kernel/exit.c	2004-08-26 17:25:33.000000000 +1000
@@ -754,8 +754,8 @@ static void exit_notify(struct task_stru
 	state = TASK_ZOMBIE;
 	if (tsk->exit_signal == -1 && tsk->ptrace == 0)
 		state = TASK_DEAD;
-	else
-		tsk->state = state;
+	tsk->state = state;
+
 	/*
 	 * Clear these here so that update_process_times() won't try to deliver
 	 * itimer, profile or rlimit signals to this task while it is in late exit.
@@ -764,14 +764,6 @@ static void exit_notify(struct task_stru
 	tsk->it_prof_value = 0;
 	tsk->rlim[RLIMIT_CPU].rlim_cur = RLIM_INFINITY;
 
-	/*
-	 * Get a reference to it so that we can set the state
-	 * as the last step. The state-setting only matters if the
-	 * current task is releasing itself, to trigger the final
-	 * put_task_struct() in finish_task_switch(). (thread self-reap)
-	 */
-	get_task_struct(tsk);
-
 	write_unlock_irq(&tasklist_lock);
 
 	list_for_each_safe(_p, _n, &ptrace_dead) {
@@ -781,23 +773,12 @@ static void exit_notify(struct task_stru
 	}
 
 	/* If the process is dead, release it - nobody will wait for it */
-	if (state == TASK_DEAD) {
-		lock_cpu_hotplug();
+	if (state == TASK_DEAD)
 		release_task(tsk);
-		write_lock_irq(&tasklist_lock);
-		/*
-		 * No preemption may happen from this point on,
-		 * or CPU hotplug (and task exit) breaks:
-		 */
-		unlock_cpu_hotplug();
-		tsk->state = state;
-		_raw_write_unlock(&tasklist_lock);
-		local_irq_enable();
-	} else
-		preempt_disable();
 
+	/* PF_DEAD causes final put_task_struct after we schedule. */
+	preempt_disable();
 	tsk->flags |= PF_DEAD;
-	put_task_struct(tsk);
 }
 
 asmlinkage NORET_TYPE void do_exit(long code)
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .32142-linux-2.6.8.1-mm4/kernel/sched.c .32142-linux-2.6.8.1-mm4.updated/kernel/sched.c
--- .32142-linux-2.6.8.1-mm4/kernel/sched.c	2004-08-26 17:25:32.000000000 +1000
+++ .32142-linux-2.6.8.1-mm4.updated/kernel/sched.c	2004-08-26 17:25:54.000000000 +1000
@@ -3820,8 +3820,8 @@ static void move_task_off_dead_cpu(int d
 	__migrate_task(tsk, dead_cpu, dest_cpu);
 }
 
-/* migrate_all_tasks - function to migrate all tasks from the dead cpu. */
-static void migrate_all_tasks(int src_cpu)
+/* Run through task list and migrate tasks from the dead cpu. */
+static void migrate_live_tasks(int src_cpu)
 {
 	struct task_struct *tsk, *t;
 
@@ -3863,6 +3863,47 @@ void sched_idle_next(void)
 
 	spin_unlock_irqrestore(&rq->lock, flags);
 }
+
+static void migrate_dead(unsigned int dead_cpu, task_t *tsk)
+{
+	struct runqueue *rq = cpu_rq(dead_cpu);
+
+	/* Must be exiting, otherwise would be on tasklist. */
+	BUG_ON(tsk->state != TASK_ZOMBIE && tsk->state != TASK_DEAD);
+
+	/* Cannot have done final schedule yet: would have vanished. */
+	BUG_ON(tsk->flags & PF_DEAD);
+
+	get_task_struct(tsk);
+
+	/* 
+	 * Drop lock around migration; if someone else moves it,
+	 * that's OK.  No task can be added to this CPU, so iteration is
+	 * fine.
+	 */
+	spin_unlock_irq(&rq->lock);
+	move_task_off_dead_cpu(dead_cpu, tsk);
+	spin_lock_irq(&rq->lock);
+
+	put_task_struct(tsk);
+}
+
+/* release_task() removes task from tasklist, so we won't find dead tasks. */
+static void migrate_dead_tasks(unsigned int dead_cpu)
+{
+	unsigned arr, i;
+	struct runqueue *rq = cpu_rq(dead_cpu);
+
+	for (arr = 0; arr < 2; arr++) {
+		for (i = 0; i < MAX_PRIO; i++) {
+			struct list_head *list = &rq->arrays[arr].queue[i];
+			while (!list_empty(list))
+				migrate_dead(dead_cpu,
+					     list_entry(list->next, task_t,
+							run_list));
+		}
+	}
+}
 #endif /* CONFIG_HOTPLUG_CPU */
 
 /*
@@ -3902,7 +3943,7 @@ static int migration_call(struct notifie
 		cpu_rq(cpu)->migration_thread = NULL;
 		break;
 	case CPU_DEAD:
-		migrate_all_tasks(cpu);
+		migrate_live_tasks(cpu);
 		rq = cpu_rq(cpu);
 		kthread_stop(rq->migration_thread);
 		rq->migration_thread = NULL;
@@ -3911,6 +3952,7 @@ static int migration_call(struct notifie
 		deactivate_task(rq->idle, rq);
 		rq->idle->static_prio = MAX_PRIO;
 		__setscheduler(rq->idle, SCHED_NORMAL, 0);
+		migrate_dead_tasks(cpu);
 		task_rq_unlock(rq, &flags);
 		BUG_ON(rq->nr_running != 0);
 

-- 
Anyone who quotes me in their signature is an idiot -- Rusty Russell


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2004-08-27  1:54 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-08-27  1:39 [PATCH 1/2] Fix CPU Hotplug: neaten migrate_all_tasks Rusty Russell
2004-08-27  1:39 ` [PATCH 2/2] Fix CPU Hotplug: Handle dying tasks on dead CPU Rusty Russell

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.