public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] sched: staircase deadline misc fixes
@ 2007-03-28 16:37 Con Kolivas
  2007-03-28 17:34 ` [ck] " Prakash Punnoor
                   ` (3 more replies)
  0 siblings, 4 replies; 92+ messages in thread
From: Con Kolivas @ 2007-03-28 16:37 UTC (permalink / raw)
  To: linux list, Andrew Morton, Ingo Molnar, Andy Whitcroft, ck list

test.kernel.org found some idle time regressions in the latest update to the
staircase deadline scheduler and Andy Whitcroft helped me track down the 
offending problem which was present in all previous RSDL schedulers but
previously wouldn't be manifest without changes in nice. So here is a bugfix
for the set_load_weight being incorrectly set and a few other minor 
improvements. Thanks Andy!

I'm cautiously optimistic that we're at the thin edge of the bugfix wedge now.

---
set_load_weight() should be performed after p->quota is set. This fixes a
large SMP performance regression.

Make sure rr_interval is never set to less than one jiffy.

Some sanity checking in update_cpu_clock will prevent bogus sched_clock
values.

SCHED_BATCH tasks should not set the rq->best_static_prio field.

Correct sysctl rr_interval description to describe the value in milliseconds.

Style fixes.

Signed-off-by: Con Kolivas <kernel@kolivas.org>

---
 Documentation/sysctl/kernel.txt |    8 ++--
 kernel/sched.c                  |   73 +++++++++++++++++++++++++++++-----------
 2 files changed, 58 insertions(+), 23 deletions(-)

Index: linux-2.6.21-rc5-mm2/kernel/sched.c
===================================================================
--- linux-2.6.21-rc5-mm2.orig/kernel/sched.c	2007-03-28 09:01:03.000000000 +1000
+++ linux-2.6.21-rc5-mm2/kernel/sched.c	2007-03-29 00:02:33.000000000 +1000
@@ -88,10 +88,13 @@ unsigned long long __attribute__((weak))
 #define MAX_USER_PRIO		(USER_PRIO(MAX_PRIO))
 #define SCHED_PRIO(p)		((p)+MAX_RT_PRIO)
 
-/* Some helpers for converting to/from nanosecond timing */
+/* Some helpers for converting to/from various scales.*/
 #define NS_TO_JIFFIES(TIME)	((TIME) / (1000000000 / HZ))
-#define NS_TO_MS(TIME)		((TIME) / 1000000)
+#define JIFFIES_TO_NS(TIME)	((TIME) * (1000000000 / HZ))
 #define MS_TO_NS(TIME)		((TIME) * 1000000)
+/* Can return 0 */
+#define MS_TO_JIFFIES(TIME)	((TIME) * HZ / 1000)
+#define JIFFIES_TO_MS(TIME)	((TIME) * 1000 / HZ)
 
 #define TASK_PREEMPTS_CURR(p, curr)	((p)->prio < (curr)->prio)
 
@@ -852,16 +855,15 @@ static void requeue_task(struct task_str
 
 /*
  * task_timeslice - the total duration a task can run during one major
- * rotation.
+ * rotation. Returns value in jiffies.
  */
 static inline int task_timeslice(struct task_struct *p)
 {
-	int slice, rr;
+	int slice;
 
-	slice = rr = p->quota;
+	slice = NS_TO_JIFFIES(p->quota);
 	if (!rt_task(p))
-		slice += (PRIO_RANGE - 1 - TASK_USER_PRIO(p)) * rr;
-	slice = NS_TO_JIFFIES(slice) ? : 1;
+		slice += (PRIO_RANGE - 1 - TASK_USER_PRIO(p)) * slice;
 	return slice;
 }
 
@@ -875,7 +877,7 @@ static inline int task_timeslice(struct 
 	(((lp) * SCHED_LOAD_SCALE) / TIME_SLICE_NICE_ZERO)
 #define TASK_LOAD_WEIGHT(p)	LOAD_WEIGHT(task_timeslice(p))
 #define RTPRIO_TO_LOAD_WEIGHT(rp)	\
-	(LOAD_WEIGHT((rr_interval + 20 + (rp))))
+	(LOAD_WEIGHT((MS_TO_JIFFIES(rr_interval) + 20 + (rp))))
 
 static void set_load_weight(struct task_struct *p)
 {
@@ -973,11 +975,15 @@ static int effective_prio(struct task_st
  * tick still. Below nice 0 they get progressively larger.
  * ie nice -6..0 = rr_interval. nice -10 = 2.5 * rr_interval
  * nice -20 = 10 * rr_interval. nice 1-19 = rr_interval / 2.
+ * Value returned is in nanoseconds.
  */
 static unsigned int rr_quota(struct task_struct *p)
 {
 	int nice = TASK_NICE(p), rr = rr_interval;
 
+	/* Ensure that rr_interval is at least 1 tick */
+	if (unlikely(!MS_TO_JIFFIES(rr)))
+		rr = rr_interval = JIFFIES_TO_MS(1) ? : 1;
 	if (!rt_task(p)) {
 		if (nice < -6) {
 			rr *= nice * nice;
@@ -3198,13 +3204,34 @@ EXPORT_PER_CPU_SYMBOL(kstat);
 /*
  * This is called on clock ticks and on context switches.
  * Bank in p->sched_time the ns elapsed since the last tick or switch.
+ * CPU scheduler quota accounting is also performed here.
+ * The value returned from sched_clock() occasionally gives bogus values so
+ * some sanity checking is required.
  */
 static inline void
-update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now)
+update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now,
+		 int tick)
 {
 	cputime64_t time_diff = now - p->last_ran;
+	unsigned int min_diff = 1000;
 
-	/* cpu scheduler quota accounting is performed here */
+	if (tick) {
+		/*
+		 * Called from scheduler_tick() there should be less than two
+		 * jiffies worth, and not negative/overflow.
+		 */
+		if (time_diff > JIFFIES_TO_NS(2) || time_diff < min_diff)
+			time_diff = JIFFIES_TO_NS(1);
+	} else {
+		/*
+		 * Called from context_switch there should be less than one
+		 * jiffy worth, and not negative/overflowed. In the case when
+		 * sched_clock fails to return high resolution values this
+		 * also ensures at least 1 min_diff gets banked.
+		 */
+		if (time_diff > JIFFIES_TO_NS(1) || time_diff < min_diff)
+			time_diff = min_diff;
+	}
 	if (p != rq->idle && p->policy != SCHED_FIFO)
 		p->time_slice -= time_diff;
 	p->sched_time += time_diff;
@@ -3353,7 +3380,7 @@ void scheduler_tick(void)
 	int idle_at_tick = idle_cpu(cpu);
 	struct rq *rq = cpu_rq(cpu);
 
-	update_cpu_clock(p, rq, now);
+	update_cpu_clock(p, rq, now, 1);
 
 	if (!idle_at_tick)
 		task_running_tick(rq, p);
@@ -3425,7 +3452,7 @@ retry:
 	}
 	queue = array->queue + idx;
 	next = list_entry(queue->next, struct task_struct, run_list);
-	if (unlikely(next->time_slice < 0)) {
+	if (unlikely(next->time_slice <= 0)) {
 		/*
 		 * Unlucky enough that this task ran out of time_slice
 		 * before it hit a scheduler_tick so it should have its
@@ -3438,7 +3465,8 @@ retry:
 	}
 	rq->prio_level = idx;
 	next->rotation = rq->prio_rotation;
-	if (next->static_prio < rq->best_static_prio)
+	if (next->static_prio < rq->best_static_prio &&
+	    next->policy != SCHED_BATCH)
 		rq->best_static_prio = next->static_prio;
 	return next;
 }
@@ -3533,7 +3561,7 @@ switch_tasks:
 	clear_tsk_need_resched(prev);
 	rcu_qsctr_inc(task_cpu(prev));
 
-	update_cpu_clock(prev, rq, now);
+	update_cpu_clock(prev, rq, now, 0);
 	prev->timestamp = prev->last_ran = now;
 
 	sched_info_switch(prev, next);
@@ -3978,7 +4006,8 @@ void rt_mutex_setprio(struct task_struct
 	rq = task_rq_lock(p, &flags);
 
 	oldprio = p->prio;
-	if ((queued = task_queued(p)))
+	queued = task_queued(p);
+	if (queued)
 		dequeue_task(p, rq);
 	p->prio = prio;
 
@@ -4023,15 +4052,17 @@ void set_user_nice(struct task_struct *p
 		p->static_prio = NICE_TO_PRIO(nice);
 		goto out_unlock;
 	}
-	if ((queued = task_queued(p))) {
+	queued = task_queued(p);
+	if (queued) {
 		dequeue_task(p, rq);
 		dec_raw_weighted_load(rq, p);
 	}
 
 	p->static_prio = NICE_TO_PRIO(nice);
-	set_load_weight(p);
 	old_prio = p->prio;
 	p->prio = effective_prio(p);
+	p->quota = rr_quota(p);
+	set_load_weight(p);
 	delta = p->prio - old_prio;
 
 	if (queued) {
@@ -4045,7 +4076,6 @@ void set_user_nice(struct task_struct *p
 			resched_task(rq->curr);
 	}
 out_unlock:
-	p->quota = rr_quota(p);
 	task_rq_unlock(rq, &flags);
 }
 EXPORT_SYMBOL(set_user_nice);
@@ -4166,6 +4196,7 @@ static void __setscheduler(struct task_s
 	p->normal_prio = normal_prio(p);
 	/* we are holding p->pi_lock already */
 	p->prio = rt_mutex_getprio(p);
+	p->quota = rr_quota(p);
 	set_load_weight(p);
 }
 
@@ -4254,7 +4285,8 @@ recheck:
 		spin_unlock_irqrestore(&p->pi_lock, flags);
 		goto recheck;
 	}
-	if ((queued = task_queued(p)))
+	queued = task_queued(p);
+	if (queued)
 		deactivate_task(p, rq);
 	oldprio = p->prio;
 	__setscheduler(p, policy, param->sched_priority);
@@ -7088,7 +7120,8 @@ void normalize_rt_tasks(void)
 		spin_lock_irqsave(&p->pi_lock, flags);
 		rq = __task_rq_lock(p);
 
-		if ((queued = task_queued(p)))
+		queued = task_queued(p);
+		if (queued)
 			deactivate_task(p, task_rq(p));
 		__setscheduler(p, SCHED_NORMAL, 0);
 		if (queued) {
Index: linux-2.6.21-rc5-mm2/Documentation/sysctl/kernel.txt
===================================================================
--- linux-2.6.21-rc5-mm2.orig/Documentation/sysctl/kernel.txt	2007-03-28 09:01:03.000000000 +1000
+++ linux-2.6.21-rc5-mm2/Documentation/sysctl/kernel.txt	2007-03-28 09:01:04.000000000 +1000
@@ -294,9 +294,11 @@ rr_interval:
 This is the smallest duration that any cpu process scheduling unit
 will run for. Increasing this value can increase throughput of cpu
 bound tasks substantially but at the expense of increased latencies
-overall. This value is in _ticks_ and the default value chosen depends
-on the number of cpus available at scheduler initialisation. Valid
-values are from 1-100.
+overall. This value is in milliseconds and the default value chosen
+depends on the number of cpus available at scheduler initialisation
+with a minimum of 8.
+
+Valid values are from 1-100.
 
 ==============================================================
 

-- 
-ck

^ permalink raw reply	[flat|nested] 92+ messages in thread

end of thread, other threads:[~2007-04-23  8:59 UTC | newest]

Thread overview: 92+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-03-28 16:37 [PATCH] sched: staircase deadline misc fixes Con Kolivas
2007-03-28 17:34 ` [ck] " Prakash Punnoor
2007-04-01  6:40   ` Prakash Punnoor
     [not found]     ` <b14e81f00704010724i3155a16en91074ab789416f3d@mail.gmail.com>
2007-04-01 20:03       ` Prakash Punnoor
2007-03-28 18:48 ` Ingo Molnar
2007-03-28 23:44   ` Con Kolivas
2007-03-29  5:50     ` Mike Galbraith
2007-03-29  6:29       ` Mike Galbraith
2007-03-29  6:54         ` Mike Galbraith
2007-03-29  8:18       ` Mike Galbraith
2007-03-29 12:55         ` [ck] " michael chang
2007-04-03  2:35         ` Con Kolivas
2007-04-03  2:37       ` Con Kolivas
2007-04-03  5:31         ` Mike Galbraith
2007-04-03  6:00           ` Mike Galbraith
2007-04-03  6:01           ` Ingo Molnar
2007-04-03  6:11             ` Mike Galbraith
2007-04-05 11:02             ` Mike Galbraith
2007-04-05 11:09               ` Ingo Molnar
2007-04-05 11:12                 ` Mike Galbraith
2007-04-05 11:15                   ` Ingo Molnar
2007-04-05 13:18                   ` Johannes Stezenbach
2007-04-05 15:28                     ` Mike Galbraith
2007-04-05 11:54               ` [test] sched: SD-latest versus Mike's latest Ingo Molnar
2007-04-05 12:10                 ` Mike Galbraith
2007-04-05 12:12                   ` Ingo Molnar
2007-04-05 12:24                     ` Mike Galbraith
2007-04-05 16:08                 ` Con Kolivas
2007-04-05 19:05                   ` Ingo Molnar
2007-04-05 20:29                   ` Mike Galbraith
2007-04-06  1:03                 ` Ten percent test Con Kolivas
2007-04-06  9:07                   ` Mike Galbraith
2007-04-06  9:28                     ` Con Kolivas
2007-04-06 10:03                       ` Ingo Molnar
2007-04-06 10:40                         ` Mike Galbraith
2007-04-07  6:50                         ` Con Kolivas
2007-04-07 16:12                           ` Gene Heskett
2007-04-07 18:08                             ` Ingo Molnar
2007-04-07 18:23                               ` Gene Heskett
2007-04-07 18:52                                 ` Ingo Molnar
2007-04-07 20:30                                   ` Gene Heskett
2007-04-08 10:41                                     ` Ingo Molnar
2007-04-08 10:58                                       ` Ingo Molnar
2007-04-08 17:04                                         ` Gene Heskett
2007-04-09  4:03                                           ` Mike Galbraith
2007-04-09  4:08                                             ` Gene Heskett
2007-04-09  5:59                                               ` Mike Galbraith
2007-04-09 13:01                                                 ` Gene Heskett
2007-04-08 11:33                                       ` Gene Heskett
2007-04-08 11:40                                         ` Mike Galbraith
2007-04-08 12:02                                           ` Mike Galbraith
2007-04-08 17:57                                             ` Gene Heskett
2007-04-09  4:19                                               ` Mike Galbraith
2007-04-09  5:23                                                 ` Gene Heskett
2007-04-09  6:09                                                   ` Mike Galbraith
2007-04-08 17:56                                           ` Gene Heskett
2007-04-09  4:17                                             ` Mike Galbraith
2007-04-09  5:16                                               ` Gene Heskett
2007-04-09  6:06                                                 ` Mike Galbraith
2007-04-09  8:24                                                 ` Mike Galbraith
2007-04-08 18:51                                       ` Rene Herman
2007-04-09  4:23                                         ` Mike Galbraith
2007-04-09 12:14                                           ` Rene Herman
2007-04-09 13:27                                             ` Andreas Mohr
2007-04-09 19:54                                               ` Rene Herman
2007-04-09 14:15                                             ` Ingo Molnar
2007-04-09 17:05                                               ` Rene Herman
2007-04-09 17:48                                                 ` Ingo Molnar
2007-04-09 19:09                                                   ` Rene Herman
2007-04-09 19:56                                                   ` Gene Heskett
2007-04-09 17:10                                             ` Mike Galbraith
2007-04-09 13:53                                         ` Ingo Molnar
2007-04-09 15:37                                           ` Rene Herman
2007-04-07 19:14                               ` Mike Galbraith
2007-04-07 20:31                                 ` Gene Heskett
2007-04-09 17:51                                 ` William Lee Irwin III
2007-04-09 18:03                                   ` Ingo Molnar
2007-04-09 18:44                                     ` William Lee Irwin III
2007-04-07 16:32                           ` Mike Galbraith
2007-04-08 13:08                           ` Ed Tomlinson
2007-04-09  5:38                             ` Mike Galbraith
2007-04-09 11:26                               ` Ed Tomlinson
2007-04-09 16:50                                 ` Mike Galbraith
2007-04-22 10:48                                   ` [ck] " Martin Steigerwald
2007-04-22 11:15                                     ` Con Kolivas
2007-04-10  2:39                               ` Mike Galbraith
2007-04-10 11:23                                 ` Ed Tomlinson
2007-04-10 12:04                                   ` Mike Galbraith
2007-04-06 10:48                       ` Mike Galbraith
2007-04-03 10:57           ` [PATCH] sched: staircase deadline misc fixes Mike Galbraith
2007-03-29  6:36 ` Con Kolivas
2007-04-23  8:58 ` Andrew Morton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox