Re: [git pull request] scheduler updates

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Ingo Molnar <mingo@elte.hu>
To: Mike Galbraith <efault@gmx.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org,
	Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: Re: [git pull request] scheduler updates
Date: Tue, 28 Aug 2007 16:46:04 +0200	[thread overview]
Message-ID: <20070828144604.GA13875@elte.hu> (raw)
In-Reply-To: <1188310265.6336.5.camel@Homer.simpson.net>


* Mike Galbraith <efault@gmx.de> wrote:

> On Tue, 2007-08-28 at 13:32 +0200, Ingo Molnar wrote:
> > Linus, please pull the latest scheduler git tree from:
> > 
> >   git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched.git
> > 
> > no big changes - 5 small fixes and 1 small cleanup:
> 
> FWIW, I spent a few hours testing these patches with various loads, 
> and all was peachy here.  No multimedia or interactivity aberrations 
> noted.

great! Btw., there's another refinement Peter and me are working on (see 
the patch below): to place new tasks into the existing 'scheduling flow' 
in a more seemless way. In practice this should mean less firefox spikes 
during a kbuild workload. If you have some time to try it, could you add 
the patch below to your tree too, and see what happens during fork-happy 
workloads? It does not seem to be overly urgent to apply at the moment, 
but it is a nice touch i think.

	Ingo

------------------------>
Subject: sched: place new tasks in the middle of the task pool
From: Peter Zijlstra <a.p.zijlstra@chello.nl>

Place new tasks in the middle of the wait_runtime average. This smoothes 
out latency spikes caused by freshly started tasks, without being unfair 
to those tasks. Basically new tasks start right into the 'flow' of 
wait_runtime that exists in the system at that moment.

[ mingo@elte.hu: changed it to use cfs_rq->wait_runtime ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c      |    1 
 kernel/sched_fair.c |   59 +++++++++++++++++++++++++++++-----------------------
 2 files changed, 33 insertions(+), 27 deletions(-)

Index: linux/kernel/sched.c
===================================================================
--- linux.orig/kernel/sched.c
+++ linux/kernel/sched.c
@@ -858,7 +858,6 @@ static void dec_nr_running(struct task_s
 
 static void set_load_weight(struct task_struct *p)
 {
-	task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime;
 	p->se.wait_runtime = 0;
 
 	if (task_has_rt_policy(p)) {
Index: linux/kernel/sched_fair.c
===================================================================
--- linux.orig/kernel/sched_fair.c
+++ linux/kernel/sched_fair.c
@@ -86,8 +86,8 @@ unsigned int sysctl_sched_features __rea
 		SCHED_FEAT_SLEEPER_AVG		*0 |
 		SCHED_FEAT_SLEEPER_LOAD_AVG	*1 |
 		SCHED_FEAT_PRECISE_CPU_LOAD	*1 |
-		SCHED_FEAT_START_DEBIT		*1 |
-		SCHED_FEAT_SKIP_INITIAL		*0;
+		SCHED_FEAT_START_DEBIT		*0 |
+		SCHED_FEAT_SKIP_INITIAL		*1;
 
 extern struct sched_class fair_sched_class;
 
@@ -194,6 +194,8 @@ __enqueue_entity(struct cfs_rq *cfs_rq, 
 	update_load_add(&cfs_rq->load, se->load.weight);
 	cfs_rq->nr_running++;
 	se->on_rq = 1;
+
+	cfs_rq->wait_runtime += se->wait_runtime;
 }
 
 static inline void
@@ -205,6 +207,8 @@ __dequeue_entity(struct cfs_rq *cfs_rq, 
 	update_load_sub(&cfs_rq->load, se->load.weight);
 	cfs_rq->nr_running--;
 	se->on_rq = 0;
+
+	cfs_rq->wait_runtime -= se->wait_runtime;
 }
 
 static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq)
@@ -326,9 +330,9 @@ __add_wait_runtime(struct cfs_rq *cfs_rq
 static void
 add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta)
 {
-	schedstat_add(cfs_rq, wait_runtime, -se->wait_runtime);
+	cfs_rq->wait_runtime -= se->wait_runtime;
 	__add_wait_runtime(cfs_rq, se, delta);
-	schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
+	cfs_rq->wait_runtime += se->wait_runtime;
 }
 
 /*
@@ -574,7 +578,6 @@ static void __enqueue_sleeper(struct cfs
 
 	prev_runtime = se->wait_runtime;
 	__add_wait_runtime(cfs_rq, se, delta_fair);
-	schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
 	delta_fair = se->wait_runtime - prev_runtime;
 
 	/*
@@ -662,7 +665,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
 			if (tsk->state & TASK_UNINTERRUPTIBLE)
 				se->block_start = rq_of(cfs_rq)->clock;
 		}
-		cfs_rq->wait_runtime -= se->wait_runtime;
 #endif
 	}
 	__dequeue_entity(cfs_rq, se);
@@ -671,7 +673,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
 /*
  * Preempt the current task with a newly woken task if needed:
  */
-static int
+static void
 __check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se,
 			  struct sched_entity *curr, unsigned long granularity)
 {
@@ -684,9 +686,8 @@ __check_preempt_curr_fair(struct cfs_rq 
 	 */
 	if (__delta > niced_granularity(curr, granularity)) {
 		resched_task(rq_of(cfs_rq)->curr);
-		return 1;
+		curr->prev_sum_exec_runtime = curr->sum_exec_runtime;
 	}
-	return 0;
 }
 
 static inline void
@@ -762,8 +763,7 @@ static void entity_tick(struct cfs_rq *c
 	if (delta_exec > ideal_runtime)
 		gran = 0;
 
-	if (__check_preempt_curr_fair(cfs_rq, next, curr, gran))
-		curr->prev_sum_exec_runtime = curr->sum_exec_runtime;
+	__check_preempt_curr_fair(cfs_rq, next, curr, gran);
 }
 
 /**************************************************
@@ -1087,6 +1087,8 @@ static void task_tick_fair(struct rq *rq
 	}
 }
 
+#define swap(a,b) do { __typeof__(a) tmp = (a); (a) = (b); (b)=tmp; } while (0)
+
 /*
  * Share the fairness runtime between parent and child, thus the
  * total amount of pressure for CPU stays equal - new tasks
@@ -1102,14 +1104,27 @@ static void task_new_fair(struct rq *rq,
 	sched_info_queued(p);
 
 	update_curr(cfs_rq);
-	update_stats_enqueue(cfs_rq, se);
+	if ((long)cfs_rq->wait_runtime < 0)
+		se->wait_runtime = (long)cfs_rq->wait_runtime /
+				(long)cfs_rq->nr_running;
 	/*
-	 * Child runs first: we let it run before the parent
-	 * until it reschedules once. We set up the key so that
-	 * it will preempt the parent:
+	 * The statistical average of wait_runtime is about
+	 * -granularity/2, so initialize the task with that:
 	 */
-	se->fair_key = curr->fair_key -
-		niced_granularity(curr, sched_granularity(cfs_rq)) - 1;
+	if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) {
+		__add_wait_runtime(cfs_rq, se,
+			-niced_granularity(se, sched_granularity(cfs_rq))/2);
+	}
+
+	update_stats_enqueue(cfs_rq, se);
+
+	if (sysctl_sched_child_runs_first && (se->fair_key > curr->fair_key)) {
+		dequeue_entity(cfs_rq, curr, 0);
+		swap(se->wait_runtime, curr->wait_runtime);
+		update_stats_enqueue(cfs_rq, se);
+		enqueue_entity(cfs_rq, curr, 0);
+	}
+
 	/*
 	 * The first wait is dominated by the child-runs-first logic,
 	 * so do not credit it with that waiting time yet:
@@ -1117,16 +1132,8 @@ static void task_new_fair(struct rq *rq,
 	if (sysctl_sched_features & SCHED_FEAT_SKIP_INITIAL)
 		se->wait_start_fair = 0;
 
-	/*
-	 * The statistical average of wait_runtime is about
-	 * -granularity/2, so initialize the task with that:
-	 */
-	if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) {
-		se->wait_runtime = -(sched_granularity(cfs_rq) / 2);
-		schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
-	}
-
 	__enqueue_entity(cfs_rq, se);
+	__check_preempt_curr_fair(cfs_rq, __pick_next_entity(cfs_rq), curr, 0);
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED

next prev parent reply	other threads:[~2007-08-28 14:46 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-08-28 11:32 [git pull request] scheduler updates Ingo Molnar
2007-08-28 14:11 ` Mike Galbraith
2007-08-28 14:46   ` Ingo Molnar [this message]
2007-08-28 14:55     ` Mike Galbraith
  -- strict thread matches above, loose matches on Subject: below --
2007-08-24 14:12 Ingo Molnar
2007-08-24 18:09 ` Linus Torvalds
2007-08-24 19:37   ` Ingo Molnar
2007-08-25 17:23     ` Ingo Molnar
2007-08-25 20:43       ` Ingo Molnar
2007-08-25 21:20       ` Peter Zijlstra
2007-08-31  1:58   ` Roman Zippel
2007-08-23 16:07 Ingo Molnar
2007-08-12 16:32 Ingo Molnar
2007-08-10 21:22 Ingo Molnar
2007-08-08 20:30 Ingo Molnar
2007-08-02 16:08 Ingo Molnar
2007-07-26 12:08 Ingo Molnar
2007-07-19 16:50 Ingo Molnar
2007-07-16  7:53 Ingo Molnar
2007-07-11 19:38 Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070828144604.GA13875@elte.hu \
    --to=mingo@elte.hu \
    --cc=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=efault@gmx.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.