[RFC][PATCH 1/2] sched: higher granularity load on 64bit systems

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Ingo Molnar <mingo@elte.hu>,
	Dhaval Giani <dhaval@linux.vnet.ibm.com>,
	Srivatsa Vaddagiri <vatsa@in.ibm.com>,
	Dmitry Adamushko <dmitry.adamushko@gmail.com>
Cc: linux-kernel <linux-kernel@vger.kernel.org>,
	David Miller <davem@davemloft.net>,
	Mike Galbraith <efault@gmx.de>
Subject: [RFC][PATCH 1/2] sched: higher granularity load on 64bit systems
Date: Thu, 24 Apr 2008 00:07:56 +0200	[thread overview]
Message-ID: <1208988476.2849.8.camel@lappy> (raw)

Hi

The below is an RFC because for some reason it regresses kbuild by 5% on
my machine (and more on the largesmp that are the reason for it).

I'm failing to see how adding a few shifts can cause this.

---

Subject: sched: higher granularity load on 64bit systems

Group scheduling stretches the 10 bit fixed point arithmetic in two ways:
 1) shares - fraction of a groups weight
 2) group load - recursive fraction of load

Esp. on LargeSMP 1) is a large problem as a group with load 1024 can easily
run into numerical trouble on a 128 CPU machine.

Increase the fixed point fraction to 20 bits on 64-bit machines (as LargeSMP
is hardly available on 32 bit).

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/sched.h |    5 +++++
 kernel/sched.c        |   28 ++++++++++++++++++++--------
 kernel/sched_fair.c   |    2 +-
 3 files changed, 26 insertions(+), 9 deletions(-)

Index: linux-2.6-2/include/linux/sched.h
===================================================================
--- linux-2.6-2.orig/include/linux/sched.h
+++ linux-2.6-2/include/linux/sched.h
@@ -686,7 +686,12 @@ enum cpu_idle_type {
 /*
  * Increase resolution of nice-level calculations:
  */
+#if BITS_PER_LONG == 64
+#define SCHED_LOAD_SHIFT	20
+#else
 #define SCHED_LOAD_SHIFT	10
+#endif
+
 #define SCHED_LOAD_SCALE	(1L << SCHED_LOAD_SHIFT)
 
 #define SCHED_LOAD_SCALE_FUZZ	SCHED_LOAD_SCALE
Index: linux-2.6-2/kernel/sched.c
===================================================================
--- linux-2.6-2.orig/kernel/sched.c
+++ linux-2.6-2/kernel/sched.c
@@ -1416,6 +1416,15 @@ static void __resched_task(struct task_s
 }
 #endif
 
+/*
+ * We keep the prio_to_weight and its inverse in base WEIGHT_SHIFT
+ */
+#define WEIGHT_SHIFT 		10
+#define WEIGHT_LOAD_SHIFT	(SCHED_LOAD_SHIFT - WEIGHT_SHIFT)
+
+#define WLS(x)		((x) << WEIGHT_LOAD_SHIFT)
+#define inv_WLS(x)	((x) >> WEIGHT_LOAD_SHIFT)
+
 #if BITS_PER_LONG == 32
 # define WMULT_CONST	(~0UL)
 #else
@@ -1438,10 +1447,13 @@ calc_delta_mine(unsigned long delta_exec
 {
 	u64 tmp;
 
-	if (unlikely(!lw->inv_weight))
-		lw->inv_weight = (WMULT_CONST-lw->weight/2) / (lw->weight+1);
+	if (unlikely(!lw->inv_weight)) {
+		unsigned long inv_wls = inv_WLS(lw->weight);
+
+		lw->inv_weight = 1 + (WMULT_CONST-inv_wls/2) / (inv_wls+1);
+	}
 
-	tmp = (u64)delta_exec * weight;
+	tmp = inv_WLS((u64)delta_exec * weight);
 	/*
 	 * Check whether we'd overflow the 64-bit multiplication:
 	 */
@@ -1960,7 +1972,7 @@ static void dec_nr_running(struct rq *rq
 static void set_load_weight(struct task_struct *p)
 {
 	if (task_has_rt_policy(p)) {
-		p->se.load.weight = prio_to_weight[0] * 2;
+		p->se.load.weight = WLS(prio_to_weight[0] * 2);
 		p->se.load.inv_weight = prio_to_wmult[0] >> 1;
 		return;
 	}
@@ -1969,12 +1981,12 @@ static void set_load_weight(struct task_
 	 * SCHED_IDLE tasks get minimal weight:
 	 */
 	if (p->policy == SCHED_IDLE) {
-		p->se.load.weight = WEIGHT_IDLEPRIO;
+		p->se.load.weight = WLS(WEIGHT_IDLEPRIO);
 		p->se.load.inv_weight = WMULT_IDLEPRIO;
 		return;
 	}
 
-	p->se.load.weight = prio_to_weight[p->static_prio - MAX_RT_PRIO];
+	p->se.load.weight = WLS(prio_to_weight[p->static_prio - MAX_RT_PRIO]);
 	p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
 }
 
@@ -8072,7 +8084,7 @@ static void init_tg_cfs_entry(struct tas
 
 	se->my_q = cfs_rq;
 	se->load.weight = tg->shares;
-	se->load.inv_weight = div64_64(1ULL<<32, se->load.weight);
+	se->load.inv_weight = 0;
 	se->parent = parent;
 }
 #endif
@@ -8739,7 +8751,7 @@ static void __set_se_shares(struct sched
 		dequeue_entity(cfs_rq, se, 0);
 
 	se->load.weight = shares;
-	se->load.inv_weight = div64_64((1ULL<<32), shares);
+	se->load.inv_weight = 0;
 
 	if (on_rq)
 		enqueue_entity(cfs_rq, se, 0);
Index: linux-2.6-2/kernel/sched_fair.c
===================================================================
--- linux-2.6-2.orig/kernel/sched_fair.c
+++ linux-2.6-2/kernel/sched_fair.c
@@ -424,7 +424,7 @@ calc_delta_asym(unsigned long delta, str
 {
 	struct load_weight lw = {
 		.weight = NICE_0_LOAD,
-		.inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT)
+		.inv_weight = 1UL << (WMULT_SHIFT-WEIGHT_SHIFT),
 	};
 
 	for_each_sched_entity(se) {

next             reply	other threads:[~2008-04-23 22:08 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-23 22:07 Peter Zijlstra [this message]
2008-04-23 22:09 ` [RFC][PATCH 2/2] sched: aggregate_group_shares no loop Peter Zijlstra
2008-04-24  0:27 ` [RFC][PATCH 1/2] sched: higher granularity load on 64bit systems David Miller
2008-04-24  1:58   ` Dhaval Giani
2008-04-24  2:13     ` David Miller
2008-04-24  6:47   ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1208988476.2849.8.camel@lappy \
    --to=a.p.zijlstra@chello.nl \
    --cc=davem@davemloft.net \
    --cc=dhaval@linux.vnet.ibm.com \
    --cc=dmitry.adamushko@gmail.com \
    --cc=efault@gmx.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=vatsa@in.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.