public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] O(1) updates for 2.4.19-pre8-ac1
@ 2002-05-08 21:28 Robert Love
  0 siblings, 0 replies; only message in thread
From: Robert Love @ 2002-05-08 21:28 UTC (permalink / raw)
  To: alan; +Cc: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 593 bytes --]

Alan,

Attached are the updates for O(1) resycned to 2.4.19-pre8-ac1.  The
previous patches still apply cleanly.  If you want the chunks, they are
at:

	http://www.kernel.org/pub/linux/kernel/people/rml/sched/for_alan/

Specifically, these patches:

	- final yield() abstractions (found some more in arch-dependent
	  code)
	- updated migration_init code, from 2.5, based on Erich Focht's
	  work.  Cleaner and simpler and works.  wli (author of current
	  implementation) signs off.
	- separate notion of max RT priority from max RT priority
	  exported to user-space.

Enjoy,

	Robert Love


[-- Attachment #2: sched-updates-rml-2.4.19-pre8-ac1-1.patch --]
[-- Type: text/x-patch, Size: 11630 bytes --]

diff -urN linux-2.4.19-pre8-ac1/arch/alpha/mm/fault.c linux/arch/alpha/mm/fault.c
--- linux-2.4.19-pre8-ac1/arch/alpha/mm/fault.c	Wed May  8 12:03:59 2002
+++ linux/arch/alpha/mm/fault.c	Wed May  8 13:40:13 2002
@@ -196,8 +196,7 @@
  */
 out_of_memory:
 	if (current->pid == 1) {
-		current->policy |= SCHED_YIELD;
-		schedule();
+		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
 	}
diff -urN linux-2.4.19-pre8-ac1/arch/arm/mm/fault-common.c linux/arch/arm/mm/fault-common.c
--- linux-2.4.19-pre8-ac1/arch/arm/mm/fault-common.c	Wed May  8 12:04:09 2002
+++ linux/arch/arm/mm/fault-common.c	Wed May  8 13:40:13 2002
@@ -225,8 +225,7 @@
 	 * If we are out of memory for pid1,
 	 * sleep for a while and retry
 	 */
-	tsk->policy |= SCHED_YIELD;
-	schedule();
+	yield();
 	goto survive;
 
 check_stack:
diff -urN linux-2.4.19-pre8-ac1/arch/ia64/mm/fault.c linux/arch/ia64/mm/fault.c
--- linux-2.4.19-pre8-ac1/arch/ia64/mm/fault.c	Wed May  8 12:04:11 2002
+++ linux/arch/ia64/mm/fault.c	Wed May  8 13:40:13 2002
@@ -196,8 +196,7 @@
   out_of_memory:
 	up_read(&mm->mmap_sem);
 	if (current->pid == 1) {
-		current->policy |= SCHED_YIELD;
-		schedule();
+		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
 	}
diff -urN linux-2.4.19-pre8-ac1/arch/m68k/mm/fault.c linux/arch/m68k/mm/fault.c
--- linux-2.4.19-pre8-ac1/arch/m68k/mm/fault.c	Wed May  8 12:04:07 2002
+++ linux/arch/m68k/mm/fault.c	Wed May  8 13:40:13 2002
@@ -181,8 +181,7 @@
 out_of_memory:
 	up_read(&mm->mmap_sem);
 	if (current->pid == 1) {
-		current->policy |= SCHED_YIELD;
-		schedule();
+		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
 	}
diff -urN linux-2.4.19-pre8-ac1/arch/mips/mm/fault.c linux/arch/mips/mm/fault.c
--- linux-2.4.19-pre8-ac1/arch/mips/mm/fault.c	Wed May  8 12:04:01 2002
+++ linux/arch/mips/mm/fault.c	Wed May  8 13:40:13 2002
@@ -211,8 +211,7 @@
 out_of_memory:
 	up_read(&mm->mmap_sem);
 	if (tsk->pid == 1) {
-		tsk->policy |= SCHED_YIELD;
-		schedule();
+		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
 	}
diff -urN linux-2.4.19-pre8-ac1/arch/mips64/mm/fault.c linux/arch/mips64/mm/fault.c
--- linux-2.4.19-pre8-ac1/arch/mips64/mm/fault.c	Wed May  8 12:04:13 2002
+++ linux/arch/mips64/mm/fault.c	Wed May  8 13:40:13 2002
@@ -240,8 +240,7 @@
 out_of_memory:
 	up_read(&mm->mmap_sem);
 	if (tsk->pid == 1) {
-		tsk->policy |= SCHED_YIELD;
-		schedule();
+		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
 	}
diff -urN linux-2.4.19-pre8-ac1/arch/ppc/mm/fault.c linux/arch/ppc/mm/fault.c
--- linux-2.4.19-pre8-ac1/arch/ppc/mm/fault.c	Wed May  8 12:04:04 2002
+++ linux/arch/ppc/mm/fault.c	Wed May  8 13:40:13 2002
@@ -197,8 +197,7 @@
 out_of_memory:
 	up_read(&mm->mmap_sem);
 	if (current->pid == 1) {
-		current->policy |= SCHED_YIELD;
-		schedule();
+		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
 	}
diff -urN linux-2.4.19-pre8-ac1/arch/s390/mm/fault.c linux/arch/s390/mm/fault.c
--- linux-2.4.19-pre8-ac1/arch/s390/mm/fault.c	Wed May  8 12:04:13 2002
+++ linux/arch/s390/mm/fault.c	Wed May  8 13:40:13 2002
@@ -290,8 +290,7 @@
 out_of_memory:
 	up_read(&mm->mmap_sem);
 	if (tsk->pid == 1) {
-		tsk->policy |= SCHED_YIELD;
-		schedule();
+		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
 	}
diff -urN linux-2.4.19-pre8-ac1/arch/s390x/mm/fault.c linux/arch/s390x/mm/fault.c
--- linux-2.4.19-pre8-ac1/arch/s390x/mm/fault.c	Wed May  8 12:04:14 2002
+++ linux/arch/s390x/mm/fault.c	Wed May  8 13:40:13 2002
@@ -290,8 +290,7 @@
 out_of_memory:
 	up_read(&mm->mmap_sem);
 	if (tsk->pid == 1) {
-		tsk->policy |= SCHED_YIELD;
-		schedule();
+		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
 	}
diff -urN linux-2.4.19-pre8-ac1/arch/sh/mm/fault.c linux/arch/sh/mm/fault.c
--- linux-2.4.19-pre8-ac1/arch/sh/mm/fault.c	Wed May  8 12:04:10 2002
+++ linux/arch/sh/mm/fault.c	Wed May  8 13:40:13 2002
@@ -207,8 +207,7 @@
 out_of_memory:
 	up_read(&mm->mmap_sem);
 	if (current->pid == 1) {
-		current->policy |= SCHED_YIELD;
-		schedule();
+		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
 	}
diff -urN linux-2.4.19-pre8-ac1/kernel/sched.c linux/kernel/sched.c
--- linux-2.4.19-pre8-ac1/kernel/sched.c	Wed May  8 12:03:14 2002
+++ linux/kernel/sched.c	Wed May  8 13:40:18 2002
@@ -22,13 +22,17 @@
 #include <linux/kernel_stat.h>
 
 /*
- * Priority of a process goes from 0 to MAX_PRIO-1.  The
- * 0 to MAX_RT_PRIO-1 priority range is allocated to RT tasks,
- * the MAX_RT_PRIO to MAX_PRIO range is for SCHED_OTHER tasks.
- * Priority values are inverted: lower p->prio value means higher
- * priority.
+ * Priority of a process goes from 0 to 139. The 0-99
+ * priority range is allocated to RT tasks, the 100-139
+ * range is for SCHED_OTHER tasks. Priority values are
+ * inverted: lower p->prio value means higher priority.
+ * 
+ * MAX_USER_RT_PRIO allows the actual maximum RT priority
+ * to be separate from the value exported to user-space.
+ * NOTE: MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
  */
 #define MAX_RT_PRIO		100
+#define MAX_USER_RT_PRIO	100
 #define MAX_PRIO		(MAX_RT_PRIO + 40)
 
 /*
@@ -1025,7 +1029,7 @@
  */
 int task_prio(task_t *p)
 {
-	return p->prio - MAX_RT_PRIO;
+	return p->prio - MAX_USER_RT_PRIO;
 }
 
 int task_nice(task_t *p)
@@ -1082,11 +1086,11 @@
 	}
 
 	/*
-	 * Valid priorities for SCHED_FIFO and SCHED_RR are 1..MAX_RT_PRIO-1,
-	 * valid priority for SCHED_OTHER is 0.
+	 * Valid priorities for SCHED_FIFO and SCHED_RR are
+	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_OTHER is 0.
 	 */
 	retval = -EINVAL;
-	if (lp.sched_priority < 0 || lp.sched_priority > MAX_RT_PRIO - 1)
+	if (lp.sched_priority < 0 || lp.sched_priority > MAX_USER_RT_PRIO-1)
 		goto out_unlock;
 	if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
 		goto out_unlock;
@@ -1106,7 +1110,7 @@
 	p->policy = policy;
 	p->rt_priority = lp.sched_priority;
 	if (policy != SCHED_OTHER)
-		p->prio = (MAX_RT_PRIO - 1) - p->rt_priority;
+		p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority;
 	else
 		p->prio = p->static_prio;
 	if (array)
@@ -1229,7 +1233,7 @@
 	switch (policy) {
 	case SCHED_FIFO:
 	case SCHED_RR:
-		ret = MAX_RT_PRIO - 1;
+		ret = MAX_USER_RT_PRIO-1;
 		break;
 	case SCHED_OTHER:
 		ret = 0;
@@ -1536,80 +1540,34 @@
 	down(&req.sem);
 }
 
-/*
- * Treat the bits of migration_mask as lock bits.
- * If the bit corresponding to the cpu a migration_thread is
- * running on then we have failed to claim our cpu and must
- * yield in order to find another.
- */
-static volatile unsigned long migration_mask;
-static atomic_t migration_threads_seeking_cpu;
-static struct completion migration_complete
-			= COMPLETION_INITIALIZER(migration_complete);
-
-static int migration_thread(void * unused)
+static int migration_thread(void * bind_cpu)
 {
-	struct sched_param param = { sched_priority: MAX_RT_PRIO - 1 };
+	int cpu = cpu_logical_map((int) (long) bind_cpu);
+	struct sched_param param = { sched_priority: MAX_RT_PRIO-1 };
 	runqueue_t *rq;
 	int ret;
 
 	daemonize();
 	sigfillset(&current->blocked);
 	set_fs(KERNEL_DS);
-	ret = setscheduler(0, SCHED_FIFO, &param);
-
-	/*
-	 * We have to migrate manually - there is no migration thread
-	 * to do this for us yet :-)
-	 *
-	 * We use the following property of the Linux scheduler. At
-	 * this point no other task is running, so by keeping all
-	 * migration threads running, the load-balancer will distribute
-	 * them between all CPUs equally. At that point every migration
-	 * task binds itself to the current CPU.
-	 */
 
 	/*
-	 * Enter the loop with preemption disabled so that
-	 * smp_processor_id() remains valid through the check. The
-	 * interior of the wait loop re-enables preemption in an
-	 * attempt to get scheduled off the current cpu. When the
-	 * loop is exited the lock bit in migration_mask is acquired
-	 * and preemption is disabled on the way out. This way the
-	 * cpu acquired remains valid when ->cpus_allowed is set.
+	 * The first migration thread is started on CPU #0. This one can
+	 * migrate the other migration threads to their destination CPUs.
 	 */
-	while (test_and_set_bit(smp_processor_id(), &migration_mask))
-		yield();
+	if (cpu != 0) {
+		while (!cpu_rq(cpu_logical_map(0))->migration_thread)
+			yield();
+		set_cpus_allowed(current, 1UL << cpu);
+	}
+	printk("migration_task %d on cpu=%d\n", cpu, smp_processor_id());
+	ret = setscheduler(0, SCHED_FIFO, &param);
 
-	current->cpus_allowed = 1 << smp_processor_id();
 	rq = this_rq();
 	rq->migration_thread = current;
 
-	/*
-	 * Now that we've bound ourselves to a cpu, post to
-	 * migration_threads_seeking_cpu and wait for everyone else.
-	 * Preemption should remain disabled and the cpu should remain
-	 * in busywait. Yielding the cpu will allow the livelock
-	 * where where a timing pattern causes an idle task seeking a
-	 * migration_thread to always find the unbound migration_thread 
-	 * running on the cpu's it tries to steal tasks from.
-	 */
-	atomic_dec(&migration_threads_seeking_cpu);
-	while (atomic_read(&migration_threads_seeking_cpu))
-		cpu_relax();
-
 	sprintf(current->comm, "migration_CPU%d", smp_processor_id());
 
-	/*
-	 * Everyone's found their cpu, so now wake migration_init().
-	 * Multiple wakeups are harmless; removal from the waitqueue
-	 * has locking built-in, and waking an empty queue is valid.
-	 */
-	complete(&migration_complete);
-
-	/*
-	 * Initiate the event loop.
-	 */
 	for (;;) {
 		runqueue_t *rq_src, *rq_dest;
 		struct list_head *head;
@@ -1634,25 +1592,25 @@
 		cpu_dest = __ffs(p->cpus_allowed);
 		rq_dest = cpu_rq(cpu_dest);
 repeat:
-		cpu_src = p->cpu;
+		cpu_src = p->thread_info->cpu;
 		rq_src = cpu_rq(cpu_src);
 
 		local_irq_save(flags);
 		double_rq_lock(rq_src, rq_dest);
-		if (p->cpu != cpu_src) {
-			local_irq_restore(flags);
+		if (p->thread_info->cpu != cpu_src) {
 			double_rq_unlock(rq_src, rq_dest);
+			local_irq_restore(flags);
 			goto repeat;
 		}
 		if (rq_src == rq) {
-			p->cpu = cpu_dest;
+			p->thread_info->cpu = cpu_dest;
 			if (p->array) {
 				deactivate_task(p, rq_src);
 				activate_task(p, rq_dest);
 			}
 		}
-		local_irq_restore(flags);
 		double_rq_unlock(rq_src, rq_dest);
+		local_irq_restore(flags);
 
 		up(&req->sem);
 	}
@@ -1660,28 +1618,19 @@
 
 void __init migration_init(void)
 {
-	unsigned long orig_cache_decay_ticks;
 	int cpu;
 
-	atomic_set(&migration_threads_seeking_cpu, smp_num_cpus);
-
-	orig_cache_decay_ticks = cache_decay_ticks;
-	cache_decay_ticks = 0;
-
-	for (cpu = 0; cpu < smp_num_cpus; cpu++)
-		if (kernel_thread(migration_thread, NULL,
+	current->cpus_allowed = 1UL << cpu_logical_map(0);
+	for (cpu = 0; cpu < smp_num_cpus; cpu++) {
+		if (kernel_thread(migration_thread, (void *) (long) cpu,
 				CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0)
 			BUG();
+	}
+	current->cpus_allowed = -1L;
 
-	/*
-	 * We cannot have missed the wakeup for the migration_thread
-	 * bound for the cpu migration_init() is running on cannot
-	 * acquire this cpu until migration_init() has yielded it by
-	 * means of wait_for_completion().
-	 */
-	wait_for_completion(&migration_complete);
-
-	cache_decay_ticks = orig_cache_decay_ticks;
+	for (cpu = 0; cpu < smp_num_cpus; cpu++)
+		while (!cpu_rq(cpu_logical_map(cpu))->migration_thread)
+			schedule_timeout(2);
 }
 
 #endif /* CONFIG_SMP */

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2002-05-08 21:28 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-05-08 21:28 [PATCH] O(1) updates for 2.4.19-pre8-ac1 Robert Love

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox