public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] migration_init() synchronization fixes
@ 2002-04-16  0:03 William Lee Irwin III
  2002-04-18 23:15 ` Patricia Gaughen
  0 siblings, 1 reply; 2+ messages in thread
From: William Lee Irwin III @ 2002-04-16  0:03 UTC (permalink / raw)
  To: linux-kernel; +Cc: Martin.Bligh, pdorwin, gone, rml, mingo

This patch has helped me and some others having migration_init() troubles.
The migration_mask's semantics are altered for use as a lock word, and
some of its other functionality is deferred to a new counter and struct
completion to provide protection against pathological cases encountered
in practice.


Cheers,
Bill



diff -Nru a/kernel/sched.c b/kernel/sched.c
--- a/kernel/sched.c	Fri Apr 12 04:16:07 2002
+++ b/kernel/sched.c	Fri Apr 12 04:16:07 2002
@@ -1669,7 +1669,16 @@
 	down(&req.sem);
 }
 
+/*
+ * Treat the bits of migration_mask as lock bits.
+ * If the bit corresponding to the cpu a migration_thread is
+ * running on then we have failed to claim our cpu and must
+ * yield in order to find another.
+ */
 static volatile unsigned long migration_mask;
+static atomic_t migration_threads_seeking_cpu;
+static struct completion migration_complete
+			= COMPLETION_INITIALIZER(migration_complete);
 
 static int migration_thread(void * unused)
 {
@@ -1693,26 +1702,54 @@
 	 * task binds itself to the current CPU.
 	 */
 
-	/* wait for all migration threads to start up. */
-	while (!migration_mask)
-		yield();
+	preempt_disable();
 
-	for (;;) {
-		preempt_disable();
-		if (test_and_clear_bit(smp_processor_id(), &migration_mask))
-			current->cpus_allowed = 1 << smp_processor_id();
-		if (test_thread_flag(TIF_NEED_RESCHED))
-			schedule();
-		if (!migration_mask)
-			break;
+	/*
+	 * Enter the loop with preemption disabled so that
+	 * smp_processor_id() remains valid through the check. The
+	 * interior of the wait loop re-enables preemption in an
+	 * attempt to get scheduled off the current cpu. When the
+	 * loop is exited the lock bit in migration_mask is acquired
+	 * and preemption is disabled on the way out. This way the
+	 * cpu acquired remains valid when ->cpus_allowed is set.
+	 */
+	while (test_and_set_bit(smp_processor_id(), &migration_mask)) {
 		preempt_enable();
+		yield();
+		preempt_disable();
 	}
+
+	current->cpus_allowed = 1 << smp_processor_id();
 	rq = this_rq();
 	rq->migration_thread = current;
+
+	/*
+	 * Now that we've bound ourselves to a cpu, post to
+	 * migration_threads_seeking_cpu and wait for everyone else.
+	 * Preemption should remain disabled and the cpu should remain
+	 * in busywait. Yielding the cpu will allow the livelock
+	 * where where a timing pattern causes an idle task seeking a
+	 * migration_thread to always find the unbound migration_thread 
+	 * running on the cpu's it tries to steal tasks from.
+	 */
+	atomic_dec(&migration_threads_seeking_cpu);
+	while (atomic_read(&migration_threads_seeking_cpu))
+		cpu_relax();
+
 	preempt_enable();
 
 	sprintf(current->comm, "migration_CPU%d", smp_processor_id());
 
+	/*
+	 * Everyone's found their cpu, so now wake migration_init().
+	 * Multiple wakeups are harmless; removal from the waitqueue
+	 * has locking built-in, and waking an empty queue is valid.
+	 */
+	complete(&migration_complete);
+
+	/*
+	 * Initiate the event loop.
+	 */
 	for (;;) {
 		runqueue_t *rq_src, *rq_dest;
 		struct list_head *head;
@@ -1760,33 +1797,31 @@
 
 void __init migration_init(void)
 {
-	unsigned long tmp, orig_cache_decay_ticks;
+	unsigned long orig_cache_decay_ticks;
 	int cpu;
 
-	tmp = 0;
-	for (cpu = 0; cpu < smp_num_cpus; cpu++) {
-		if (kernel_thread(migration_thread, NULL,
-				CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0)
-			BUG();
-		tmp |= (1UL << cpu_logical_map(cpu));
-	}
+	atomic_set(&migration_threads_seeking_cpu, smp_num_cpus);
 
-	migration_mask = tmp;
+	preempt_disable();
 
 	orig_cache_decay_ticks = cache_decay_ticks;
 	cache_decay_ticks = 0;
 
-	for (cpu = 0; cpu < smp_num_cpus; cpu++) {
-		int logical = cpu_logical_map(cpu);
+	for (cpu = 0; cpu < smp_num_cpus; cpu++)
+		if (kernel_thread(migration_thread, NULL,
+				CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0)
+			BUG();
 
-		while (!cpu_rq(logical)->migration_thread) {
-			set_current_state(TASK_INTERRUPTIBLE);
-			schedule_timeout(2);
-		}
-	}
-	if (migration_mask)
-		BUG();
+	/*
+	 * We cannot have missed the wakeup for the migration_thread
+	 * bound for the cpu migration_init() is running on cannot
+	 * acquire this cpu until migration_init() has yielded it by
+	 * means of wait_for_completion().
+	 */
+	wait_for_completion(&migration_complete);
 
 	cache_decay_ticks = orig_cache_decay_ticks;
+
+	preempt_enable();
 }
 #endif

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] migration_init() synchronization fixes
  2002-04-16  0:03 [PATCH] migration_init() synchronization fixes William Lee Irwin III
@ 2002-04-18 23:15 ` Patricia Gaughen
  0 siblings, 0 replies; 2+ messages in thread
From: Patricia Gaughen @ 2002-04-18 23:15 UTC (permalink / raw)
  To: William Lee Irwin III, linux-kernel, Martin.Bligh, pdorwin, rml,
	mingo


  > This patch has helped me and some others having migration_init() troubles.
  > The migration_mask's semantics are altered for use as a lock word, and
  > some of its other functionality is deferred to a new counter and struct
  > completion to provide protection against pathological cases encountered
  > in practice.

Without this patch the 16 proc NUMA-Q box won't boot on 2.5.7/2.5.8.  With the 
patch the system boots. yeah!!!! :-)

Thanks,
Pat

-- 
Patricia Gaughen (gone@us.ibm.com)
IBM Linux Technology Center
http://www.ibm.com/linux/ltc/



^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2002-04-18 23:18 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-04-16  0:03 [PATCH] migration_init() synchronization fixes William Lee Irwin III
2002-04-18 23:15 ` Patricia Gaughen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox