From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Ingo Molnar <mingo@elte.hu>,
Dhaval Giani <dhaval@linux.vnet.ibm.com>,
Srivatsa Vaddagiri <vatsa@in.ibm.com>,
Dmitry Adamushko <dmitry.adamushko@gmail.com>
Cc: linux-kernel <linux-kernel@vger.kernel.org>,
David Miller <davem@davemloft.net>,
Mike Galbraith <efault@gmx.de>
Subject: [RFC][PATCH 1/2] sched: higher granularity load on 64bit systems
Date: Thu, 24 Apr 2008 00:07:56 +0200 [thread overview]
Message-ID: <1208988476.2849.8.camel@lappy> (raw)
Hi
The below is an RFC because for some reason it regresses kbuild by 5% on
my machine (and more on the largesmp that are the reason for it).
I'm failing to see how adding a few shifts can cause this.
---
Subject: sched: higher granularity load on 64bit systems
Group scheduling stretches the 10 bit fixed point arithmetic in two ways:
1) shares - fraction of a groups weight
2) group load - recursive fraction of load
Esp. on LargeSMP 1) is a large problem as a group with load 1024 can easily
run into numerical trouble on a 128 CPU machine.
Increase the fixed point fraction to 20 bits on 64-bit machines (as LargeSMP
is hardly available on 32 bit).
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/sched.h | 5 +++++
kernel/sched.c | 28 ++++++++++++++++++++--------
kernel/sched_fair.c | 2 +-
3 files changed, 26 insertions(+), 9 deletions(-)
Index: linux-2.6-2/include/linux/sched.h
===================================================================
--- linux-2.6-2.orig/include/linux/sched.h
+++ linux-2.6-2/include/linux/sched.h
@@ -686,7 +686,12 @@ enum cpu_idle_type {
/*
* Increase resolution of nice-level calculations:
*/
+#if BITS_PER_LONG == 64
+#define SCHED_LOAD_SHIFT 20
+#else
#define SCHED_LOAD_SHIFT 10
+#endif
+
#define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT)
#define SCHED_LOAD_SCALE_FUZZ SCHED_LOAD_SCALE
Index: linux-2.6-2/kernel/sched.c
===================================================================
--- linux-2.6-2.orig/kernel/sched.c
+++ linux-2.6-2/kernel/sched.c
@@ -1416,6 +1416,15 @@ static void __resched_task(struct task_s
}
#endif
+/*
+ * We keep the prio_to_weight and its inverse in base WEIGHT_SHIFT
+ */
+#define WEIGHT_SHIFT 10
+#define WEIGHT_LOAD_SHIFT (SCHED_LOAD_SHIFT - WEIGHT_SHIFT)
+
+#define WLS(x) ((x) << WEIGHT_LOAD_SHIFT)
+#define inv_WLS(x) ((x) >> WEIGHT_LOAD_SHIFT)
+
#if BITS_PER_LONG == 32
# define WMULT_CONST (~0UL)
#else
@@ -1438,10 +1447,13 @@ calc_delta_mine(unsigned long delta_exec
{
u64 tmp;
- if (unlikely(!lw->inv_weight))
- lw->inv_weight = (WMULT_CONST-lw->weight/2) / (lw->weight+1);
+ if (unlikely(!lw->inv_weight)) {
+ unsigned long inv_wls = inv_WLS(lw->weight);
+
+ lw->inv_weight = 1 + (WMULT_CONST-inv_wls/2) / (inv_wls+1);
+ }
- tmp = (u64)delta_exec * weight;
+ tmp = inv_WLS((u64)delta_exec * weight);
/*
* Check whether we'd overflow the 64-bit multiplication:
*/
@@ -1960,7 +1972,7 @@ static void dec_nr_running(struct rq *rq
static void set_load_weight(struct task_struct *p)
{
if (task_has_rt_policy(p)) {
- p->se.load.weight = prio_to_weight[0] * 2;
+ p->se.load.weight = WLS(prio_to_weight[0] * 2);
p->se.load.inv_weight = prio_to_wmult[0] >> 1;
return;
}
@@ -1969,12 +1981,12 @@ static void set_load_weight(struct task_
* SCHED_IDLE tasks get minimal weight:
*/
if (p->policy == SCHED_IDLE) {
- p->se.load.weight = WEIGHT_IDLEPRIO;
+ p->se.load.weight = WLS(WEIGHT_IDLEPRIO);
p->se.load.inv_weight = WMULT_IDLEPRIO;
return;
}
- p->se.load.weight = prio_to_weight[p->static_prio - MAX_RT_PRIO];
+ p->se.load.weight = WLS(prio_to_weight[p->static_prio - MAX_RT_PRIO]);
p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
}
@@ -8072,7 +8084,7 @@ static void init_tg_cfs_entry(struct tas
se->my_q = cfs_rq;
se->load.weight = tg->shares;
- se->load.inv_weight = div64_64(1ULL<<32, se->load.weight);
+ se->load.inv_weight = 0;
se->parent = parent;
}
#endif
@@ -8739,7 +8751,7 @@ static void __set_se_shares(struct sched
dequeue_entity(cfs_rq, se, 0);
se->load.weight = shares;
- se->load.inv_weight = div64_64((1ULL<<32), shares);
+ se->load.inv_weight = 0;
if (on_rq)
enqueue_entity(cfs_rq, se, 0);
Index: linux-2.6-2/kernel/sched_fair.c
===================================================================
--- linux-2.6-2.orig/kernel/sched_fair.c
+++ linux-2.6-2/kernel/sched_fair.c
@@ -424,7 +424,7 @@ calc_delta_asym(unsigned long delta, str
{
struct load_weight lw = {
.weight = NICE_0_LOAD,
- .inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT)
+ .inv_weight = 1UL << (WMULT_SHIFT-WEIGHT_SHIFT),
};
for_each_sched_entity(se) {
next reply other threads:[~2008-04-23 22:08 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-04-23 22:07 Peter Zijlstra [this message]
2008-04-23 22:09 ` [RFC][PATCH 2/2] sched: aggregate_group_shares no loop Peter Zijlstra
2008-04-24 0:27 ` [RFC][PATCH 1/2] sched: higher granularity load on 64bit systems David Miller
2008-04-24 1:58 ` Dhaval Giani
2008-04-24 2:13 ` David Miller
2008-04-24 6:47 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1208988476.2849.8.camel@lappy \
--to=a.p.zijlstra@chello.nl \
--cc=davem@davemloft.net \
--cc=dhaval@linux.vnet.ibm.com \
--cc=dmitry.adamushko@gmail.com \
--cc=efault@gmx.de \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=vatsa@in.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox