From: Ingo Molnar <mingo@elte.hu>
To: Mike Galbraith <efault@gmx.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
Andrew Morton <akpm@linux-foundation.org>,
linux-kernel@vger.kernel.org,
Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: Re: [git pull request] scheduler updates
Date: Tue, 28 Aug 2007 16:46:04 +0200 [thread overview]
Message-ID: <20070828144604.GA13875@elte.hu> (raw)
In-Reply-To: <1188310265.6336.5.camel@Homer.simpson.net>
* Mike Galbraith <efault@gmx.de> wrote:
> On Tue, 2007-08-28 at 13:32 +0200, Ingo Molnar wrote:
> > Linus, please pull the latest scheduler git tree from:
> >
> > git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched.git
> >
> > no big changes - 5 small fixes and 1 small cleanup:
>
> FWIW, I spent a few hours testing these patches with various loads,
> and all was peachy here. No multimedia or interactivity aberrations
> noted.
great! Btw., there's another refinement Peter and me are working on (see
the patch below): to place new tasks into the existing 'scheduling flow'
in a more seemless way. In practice this should mean less firefox spikes
during a kbuild workload. If you have some time to try it, could you add
the patch below to your tree too, and see what happens during fork-happy
workloads? It does not seem to be overly urgent to apply at the moment,
but it is a nice touch i think.
Ingo
------------------------>
Subject: sched: place new tasks in the middle of the task pool
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Place new tasks in the middle of the wait_runtime average. This smoothes
out latency spikes caused by freshly started tasks, without being unfair
to those tasks. Basically new tasks start right into the 'flow' of
wait_runtime that exists in the system at that moment.
[ mingo@elte.hu: changed it to use cfs_rq->wait_runtime ]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
kernel/sched.c | 1
kernel/sched_fair.c | 59 +++++++++++++++++++++++++++++-----------------------
2 files changed, 33 insertions(+), 27 deletions(-)
Index: linux/kernel/sched.c
===================================================================
--- linux.orig/kernel/sched.c
+++ linux/kernel/sched.c
@@ -858,7 +858,6 @@ static void dec_nr_running(struct task_s
static void set_load_weight(struct task_struct *p)
{
- task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime;
p->se.wait_runtime = 0;
if (task_has_rt_policy(p)) {
Index: linux/kernel/sched_fair.c
===================================================================
--- linux.orig/kernel/sched_fair.c
+++ linux/kernel/sched_fair.c
@@ -86,8 +86,8 @@ unsigned int sysctl_sched_features __rea
SCHED_FEAT_SLEEPER_AVG *0 |
SCHED_FEAT_SLEEPER_LOAD_AVG *1 |
SCHED_FEAT_PRECISE_CPU_LOAD *1 |
- SCHED_FEAT_START_DEBIT *1 |
- SCHED_FEAT_SKIP_INITIAL *0;
+ SCHED_FEAT_START_DEBIT *0 |
+ SCHED_FEAT_SKIP_INITIAL *1;
extern struct sched_class fair_sched_class;
@@ -194,6 +194,8 @@ __enqueue_entity(struct cfs_rq *cfs_rq,
update_load_add(&cfs_rq->load, se->load.weight);
cfs_rq->nr_running++;
se->on_rq = 1;
+
+ cfs_rq->wait_runtime += se->wait_runtime;
}
static inline void
@@ -205,6 +207,8 @@ __dequeue_entity(struct cfs_rq *cfs_rq,
update_load_sub(&cfs_rq->load, se->load.weight);
cfs_rq->nr_running--;
se->on_rq = 0;
+
+ cfs_rq->wait_runtime -= se->wait_runtime;
}
static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq)
@@ -326,9 +330,9 @@ __add_wait_runtime(struct cfs_rq *cfs_rq
static void
add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta)
{
- schedstat_add(cfs_rq, wait_runtime, -se->wait_runtime);
+ cfs_rq->wait_runtime -= se->wait_runtime;
__add_wait_runtime(cfs_rq, se, delta);
- schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
+ cfs_rq->wait_runtime += se->wait_runtime;
}
/*
@@ -574,7 +578,6 @@ static void __enqueue_sleeper(struct cfs
prev_runtime = se->wait_runtime;
__add_wait_runtime(cfs_rq, se, delta_fair);
- schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
delta_fair = se->wait_runtime - prev_runtime;
/*
@@ -662,7 +665,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
if (tsk->state & TASK_UNINTERRUPTIBLE)
se->block_start = rq_of(cfs_rq)->clock;
}
- cfs_rq->wait_runtime -= se->wait_runtime;
#endif
}
__dequeue_entity(cfs_rq, se);
@@ -671,7 +673,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
/*
* Preempt the current task with a newly woken task if needed:
*/
-static int
+static void
__check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se,
struct sched_entity *curr, unsigned long granularity)
{
@@ -684,9 +686,8 @@ __check_preempt_curr_fair(struct cfs_rq
*/
if (__delta > niced_granularity(curr, granularity)) {
resched_task(rq_of(cfs_rq)->curr);
- return 1;
+ curr->prev_sum_exec_runtime = curr->sum_exec_runtime;
}
- return 0;
}
static inline void
@@ -762,8 +763,7 @@ static void entity_tick(struct cfs_rq *c
if (delta_exec > ideal_runtime)
gran = 0;
- if (__check_preempt_curr_fair(cfs_rq, next, curr, gran))
- curr->prev_sum_exec_runtime = curr->sum_exec_runtime;
+ __check_preempt_curr_fair(cfs_rq, next, curr, gran);
}
/**************************************************
@@ -1087,6 +1087,8 @@ static void task_tick_fair(struct rq *rq
}
}
+#define swap(a,b) do { __typeof__(a) tmp = (a); (a) = (b); (b)=tmp; } while (0)
+
/*
* Share the fairness runtime between parent and child, thus the
* total amount of pressure for CPU stays equal - new tasks
@@ -1102,14 +1104,27 @@ static void task_new_fair(struct rq *rq,
sched_info_queued(p);
update_curr(cfs_rq);
- update_stats_enqueue(cfs_rq, se);
+ if ((long)cfs_rq->wait_runtime < 0)
+ se->wait_runtime = (long)cfs_rq->wait_runtime /
+ (long)cfs_rq->nr_running;
/*
- * Child runs first: we let it run before the parent
- * until it reschedules once. We set up the key so that
- * it will preempt the parent:
+ * The statistical average of wait_runtime is about
+ * -granularity/2, so initialize the task with that:
*/
- se->fair_key = curr->fair_key -
- niced_granularity(curr, sched_granularity(cfs_rq)) - 1;
+ if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) {
+ __add_wait_runtime(cfs_rq, se,
+ -niced_granularity(se, sched_granularity(cfs_rq))/2);
+ }
+
+ update_stats_enqueue(cfs_rq, se);
+
+ if (sysctl_sched_child_runs_first && (se->fair_key > curr->fair_key)) {
+ dequeue_entity(cfs_rq, curr, 0);
+ swap(se->wait_runtime, curr->wait_runtime);
+ update_stats_enqueue(cfs_rq, se);
+ enqueue_entity(cfs_rq, curr, 0);
+ }
+
/*
* The first wait is dominated by the child-runs-first logic,
* so do not credit it with that waiting time yet:
@@ -1117,16 +1132,8 @@ static void task_new_fair(struct rq *rq,
if (sysctl_sched_features & SCHED_FEAT_SKIP_INITIAL)
se->wait_start_fair = 0;
- /*
- * The statistical average of wait_runtime is about
- * -granularity/2, so initialize the task with that:
- */
- if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) {
- se->wait_runtime = -(sched_granularity(cfs_rq) / 2);
- schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
- }
-
__enqueue_entity(cfs_rq, se);
+ __check_preempt_curr_fair(cfs_rq, __pick_next_entity(cfs_rq), curr, 0);
}
#ifdef CONFIG_FAIR_GROUP_SCHED
next prev parent reply other threads:[~2007-08-28 14:46 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-08-28 11:32 [git pull request] scheduler updates Ingo Molnar
2007-08-28 14:11 ` Mike Galbraith
2007-08-28 14:46 ` Ingo Molnar [this message]
2007-08-28 14:55 ` Mike Galbraith
-- strict thread matches above, loose matches on Subject: below --
2007-08-24 14:12 Ingo Molnar
2007-08-24 18:09 ` Linus Torvalds
2007-08-24 19:37 ` Ingo Molnar
2007-08-25 17:23 ` Ingo Molnar
2007-08-25 20:43 ` Ingo Molnar
2007-08-25 21:20 ` Peter Zijlstra
2007-08-31 1:58 ` Roman Zippel
2007-08-23 16:07 Ingo Molnar
2007-08-12 16:32 Ingo Molnar
2007-08-10 21:22 Ingo Molnar
2007-08-08 20:30 Ingo Molnar
2007-08-02 16:08 Ingo Molnar
2007-07-26 12:08 Ingo Molnar
2007-07-19 16:50 Ingo Molnar
2007-07-16 7:53 Ingo Molnar
2007-07-11 19:38 Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070828144604.GA13875@elte.hu \
--to=mingo@elte.hu \
--cc=a.p.zijlstra@chello.nl \
--cc=akpm@linux-foundation.org \
--cc=efault@gmx.de \
--cc=linux-kernel@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.