From: Peter Zijlstra <peterz@infradead.org>
To: Mike Galbraith <efault@gmx.de>
Cc: Greg Smith <gsmith@gregsmith.com>, Ingo Molnar <mingo@elte.hu>,
Dhaval Giani <dhaval@linux.vnet.ibm.com>,
lkml <linux-kernel@vger.kernel.org>,
Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Subject: Re: [patch] Re: PostgreSQL pgbench performance regression in 2.6.23+
Date: Sat, 07 Jun 2008 15:08:04 +0200 [thread overview]
Message-ID: <1212844084.19205.85.camel@lappy.programming.kicks-ass.net> (raw)
In-Reply-To: <1212838682.5571.6.camel@marge.simson.net>
On Sat, 2008-06-07 at 13:38 +0200, Mike Galbraith wrote:
Interesting.. Looks good.
> Index: linux-2.6.26.git/kernel/sched_fair.c
> ===================================================================
> --- linux-2.6.26.git.orig/kernel/sched_fair.c
> +++ linux-2.6.26.git/kernel/sched_fair.c
> @@ -664,6 +664,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, st
>
> update_stats_dequeue(cfs_rq, se);
> if (sleep) {
> + se->last_preempter = NULL;
> update_avg_stats(cfs_rq, se);
> #ifdef CONFIG_SCHEDSTATS
> if (entity_is_task(se)) {
> @@ -692,8 +693,10 @@ check_preempt_tick(struct cfs_rq *cfs_rq
>
> ideal_runtime = sched_slice(cfs_rq, curr);
> delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
> - if (delta_exec > ideal_runtime)
> + if (delta_exec > ideal_runtime) {
> + curr->last_preempter = NULL;
> resched_task(rq_of(cfs_rq)->curr);
> + }
> }
>
> static void
> @@ -994,6 +997,7 @@ wake_affine(struct rq *rq, struct sched_
> unsigned int imbalance)
> {
> struct task_struct *curr = this_rq->curr;
> + struct sched_entity *se = &curr->se, *pse = &p->se;
> unsigned long tl = this_load;
> unsigned long tl_per_task;
> int balanced;
> @@ -1002,14 +1006,26 @@ wake_affine(struct rq *rq, struct sched_
> return 0;
>
> /*
> + * If the current task is being wakeup preempted by multiple tasks
> + * that it awakened, such that it can't get significant work done
> + * between preemptions, try to spread these preemption sources.
> + */
> + if (sync && se->last_preempter && se->last_preempter != pse) {
> + u64 se_last_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
> +
> + if (se_last_exec < sysctl_sched_migration_cost)
> + return 0;
> + }
> +
> + /*
> * If sync wakeup then subtract the (maximum possible)
> * effect of the currently running task from the load
> * of the current CPU:
> */
> if (sync)
> - tl -= current->se.load.weight;
> + tl -= se->load.weight;
>
> - balanced = 100*(tl + p->se.load.weight) <= imbalance*load;
> + balanced = 100*(tl + pse->load.weight) <= imbalance*load;
>
> /*
> * If the currently running task will sleep within
> @@ -1017,8 +1033,8 @@ wake_affine(struct rq *rq, struct sched_
> * woken task:
> */
> if (sync && balanced && curr->sched_class == &fair_sched_class) {
> - if (curr->se.avg_overlap < sysctl_sched_migration_cost &&
> - p->se.avg_overlap < sysctl_sched_migration_cost)
> + if (se->avg_overlap < sysctl_sched_migration_cost &&
> + pse->avg_overlap < sysctl_sched_migration_cost)
> return 1;
> }
>
> @@ -1219,8 +1235,27 @@ static void check_preempt_wakeup(struct
> pse = parent_entity(pse);
> }
>
> - if (wakeup_preempt_entity(se, pse) == 1)
> - resched_task(curr);
> + if (wakeup_preempt_entity(se, pse) == 1) {
> + int preempt = 1;
> +
> + /*
> + * If current task is being prempted by multiple wakees,
> + * tag it for 1:N affine wakeup preemption avoidance.
> + */
> + if (se->last_preempter && se->last_preempter != pse &&
> + se->load.weight >= pse->load.weight) {
> + u64 exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
> +
> + if (exec < sysctl_sched_migration_cost)
> + preempt = 0;
> + }
> +
> + if (se == ¤t->se)
> + se->last_preempter = pse;
> +
> + if (preempt)
> + resched_task(curr);
> + }
> }
>
> static struct task_struct *pick_next_task_fair(struct rq *rq)
> Index: linux-2.6.26.git/include/linux/sched.h
> ===================================================================
> --- linux-2.6.26.git.orig/include/linux/sched.h
> +++ linux-2.6.26.git/include/linux/sched.h
> @@ -963,6 +963,7 @@ struct sched_entity {
>
> u64 last_wakeup;
> u64 avg_overlap;
> + struct sched_entity *last_preempter;
>
> #ifdef CONFIG_SCHEDSTATS
> u64 wait_start;
> Index: linux-2.6.26.git/kernel/sched.c
> ===================================================================
> --- linux-2.6.26.git.orig/kernel/sched.c
> +++ linux-2.6.26.git/kernel/sched.c
> @@ -2176,6 +2176,7 @@ static void __sched_fork(struct task_str
> p->se.prev_sum_exec_runtime = 0;
> p->se.last_wakeup = 0;
> p->se.avg_overlap = 0;
> + p->se.last_preempter = NULL;
>
> #ifdef CONFIG_SCHEDSTATS
> p->se.wait_start = 0;
>
>
next prev parent reply other threads:[~2008-06-07 13:09 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-05-21 17:34 PostgreSQL pgbench performance regression in 2.6.23+ Greg Smith
2008-05-22 7:10 ` Mike Galbraith
2008-05-22 8:28 ` Dhaval Giani
2008-05-22 9:05 ` Mike Galbraith
2008-05-22 10:34 ` Mike Galbraith
2008-05-22 11:25 ` Mike Galbraith
2008-05-22 11:44 ` Peter Zijlstra
2008-05-22 12:09 ` Mike Galbraith
2008-05-22 12:24 ` Peter Zijlstra
2008-05-22 13:16 ` Mike Galbraith
2008-05-23 7:13 ` Greg Smith
2008-05-23 10:00 ` Mike Galbraith
2008-05-23 10:10 ` Ingo Molnar
2008-05-23 10:15 ` Mike Galbraith
2008-05-23 23:18 ` Greg Smith
2008-05-23 23:46 ` Mike Galbraith
2008-05-24 8:08 ` Mike Galbraith
2008-05-27 0:28 ` Greg Smith
2008-05-27 5:59 ` [patch] " Mike Galbraith
2008-05-27 8:20 ` Mike Galbraith
2008-05-27 8:35 ` Mike Galbraith
2008-06-06 5:03 ` Greg Smith
2008-06-06 6:13 ` Mike Galbraith
2008-06-07 11:38 ` Mike Galbraith
2008-06-07 12:50 ` Mike Galbraith
2008-06-07 13:07 ` Peter Zijlstra
2008-06-07 14:16 ` Mike Galbraith
2008-06-07 16:16 ` Peter Zijlstra
2008-06-07 17:56 ` Mike Galbraith
2008-06-07 13:08 ` Peter Zijlstra [this message]
2008-06-07 14:54 ` [patch part 2] " Mike Galbraith
2008-06-07 16:12 ` Peter Zijlstra
2008-06-07 17:53 ` Mike Galbraith
2008-06-07 18:19 ` Mike Galbraith
2008-05-23 13:05 ` Mike Galbraith
2008-05-23 13:35 ` Mike Galbraith
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1212844084.19205.85.camel@lappy.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=dhaval@linux.vnet.ibm.com \
--cc=efault@gmx.de \
--cc=gsmith@gregsmith.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=vatsa@linux.vnet.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox