From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Ingo Molnar <mingo@elte.hu>, LKML <linux-kernel@vger.kernel.org>,
Mike Galbraith <efault@gmx.de>,
Srivatsa Vaddagiri <vatsa@in.ibm.com>,
Chris Friesen <cfriesen@nortel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH 1/4] sched: optimize group load balancer
Date: Fri, 17 Oct 2008 19:27:02 +0200 [thread overview]
Message-ID: <20081017172829.568667401@chello.nl> (raw)
In-Reply-To: 20081017172701.047939625@chello.nl
[-- Attachment #1: sched-opt-group-balance.patch --]
[-- Type: text/plain, Size: 4441 bytes --]
I noticed that tg_shares_up() unconditionally takes rq-locks for all cpus
in the sched_domain. This hurts.
We need the rq-locks whenever we change the weight of the per-cpu group sched
entities. To allevate this a little, only change the weight when the new
weight is at least shares_thresh away from the old value.
This avoids the rq-lock for the top level entries, since those will never
be re-weighted, and fuzzes the lower level entries a little to gain performance
in semi-stable situations.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Chris Friesen <cfriesen@nortel.com>
---
include/linux/sched.h | 1 +
kernel/sched.c | 45 +++++++++++++++++++++++++--------------------
kernel/sysctl.c | 10 ++++++++++
3 files changed, 36 insertions(+), 20 deletions(-)
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -1660,6 +1660,7 @@ extern unsigned int sysctl_sched_feature
extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate;
extern unsigned int sysctl_sched_shares_ratelimit;
+extern unsigned int sysctl_sched_shares_thresh;
int sched_nr_latency_handler(struct ctl_table *table, int write,
struct file *file, void __user *buffer, size_t *length,
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -818,6 +818,13 @@ const_debug unsigned int sysctl_sched_nr
unsigned int sysctl_sched_shares_ratelimit = 250000;
/*
+ * Inject some fuzzyness into changing the per-cpu group shares
+ * this avoids remote rq-locks at the expense of fairness.
+ * default: 4
+ */
+unsigned int sysctl_sched_shares_thresh = 4;
+
+/*
* period over which we measure -rt task cpu usage in us.
* default: 1s
*/
@@ -1453,8 +1460,8 @@ static void __set_se_shares(struct sched
* Calculate and set the cpu's group shares.
*/
static void
-__update_group_shares_cpu(struct task_group *tg, int cpu,
- unsigned long sd_shares, unsigned long sd_rq_weight)
+update_group_shares_cpu(struct task_group *tg, int cpu,
+ unsigned long sd_shares, unsigned long sd_rq_weight)
{
int boost = 0;
unsigned long shares;
@@ -1485,19 +1492,23 @@ __update_group_shares_cpu(struct task_gr
*
*/
shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
+ shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
- /*
- * record the actual number of shares, not the boosted amount.
- */
- tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
- tg->cfs_rq[cpu]->rq_weight = rq_weight;
+ if (abs(shares - tg->se[cpu]->load.weight) >
+ sysctl_sched_shares_thresh) {
+ struct rq *rq = cpu_rq(cpu);
+ unsigned long flags;
- if (shares < MIN_SHARES)
- shares = MIN_SHARES;
- else if (shares > MAX_SHARES)
- shares = MAX_SHARES;
+ spin_lock_irqsave(&rq->lock, flags);
+ /*
+ * record the actual number of shares, not the boosted amount.
+ */
+ tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
+ tg->cfs_rq[cpu]->rq_weight = rq_weight;
- __set_se_shares(tg->se[cpu], shares);
+ __set_se_shares(tg->se[cpu], shares);
+ spin_unlock_irqrestore(&rq->lock, flags);
+ }
}
/*
@@ -1526,14 +1537,8 @@ static int tg_shares_up(struct task_grou
if (!rq_weight)
rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
- for_each_cpu_mask(i, sd->span) {
- struct rq *rq = cpu_rq(i);
- unsigned long flags;
-
- spin_lock_irqsave(&rq->lock, flags);
- __update_group_shares_cpu(tg, i, shares, rq_weight);
- spin_unlock_irqrestore(&rq->lock, flags);
- }
+ for_each_cpu_mask(i, sd->span)
+ update_group_shares_cpu(tg, i, shares, rq_weight);
return 0;
}
Index: linux-2.6/kernel/sysctl.c
===================================================================
--- linux-2.6.orig/kernel/sysctl.c
+++ linux-2.6/kernel/sysctl.c
@@ -277,6 +277,16 @@ static struct ctl_table kern_table[] = {
},
{
.ctl_name = CTL_UNNUMBERED,
+ .procname = "sched_shares_thresh",
+ .data = &sysctl_sched_shares_thresh,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
.procname = "sched_child_runs_first",
.data = &sysctl_sched_child_runs_first,
.maxlen = sizeof(unsigned int),
--
next prev parent reply other threads:[~2008-10-17 17:30 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-10-17 17:27 [PATCH 0/4] pending scheduler updates Peter Zijlstra
2008-10-17 17:27 ` Peter Zijlstra [this message]
2008-10-17 17:27 ` [PATCH 2/4] sched: fair scheduler should not resched rt tasks Peter Zijlstra
2008-10-17 17:27 ` [PATCH 3/4] sched: revert back to per-rq vruntime Peter Zijlstra
2008-10-17 17:27 ` [PATCH 4/4] sched: fix wakeup preemption Peter Zijlstra
2008-10-20 21:57 ` Chris Friesen
2008-10-20 12:05 ` [PATCH 0/4] pending scheduler updates Ingo Molnar
2008-10-21 17:35 ` Srivatsa Vaddagiri
2008-10-22 9:40 ` Ingo Molnar
2008-10-22 10:03 ` Mike Galbraith
2008-10-22 10:32 ` Mike Galbraith
2008-10-22 12:10 ` Ingo Molnar
2008-10-22 12:38 ` Mike Galbraith
2008-10-22 12:42 ` Ingo Molnar
2008-10-22 13:05 ` Mike Galbraith
2008-10-22 17:38 ` Peter Zijlstra
2008-10-22 17:56 ` Mike Galbraith
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20081017172829.568667401@chello.nl \
--to=a.p.zijlstra@chello.nl \
--cc=cfriesen@nortel.com \
--cc=efault@gmx.de \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=vatsa@in.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.