[tg_shares_up rewrite v3 03/11] sched: make tg_shares_up() walk on-demand

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Paul Turner <pjt@google.com>
To: linux-kernel@vger.kernel.org
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Ingo Molnar <mingo@elte.hu>,
	Srivatsa Vaddagiri <vatsa@in.ibm.com>,
	Chris Friesen <cfriesen@nortel.com>,
	Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>,
	Pierre Bourdon <pbourdon@excellency.fr>,
	Paul Turner <pjt@google.com>,
	Bharata B Rao <bharata@linux.vnet.ibm.com>,
	Karl Rister <kmr@us.ibm.com>,
	Balbir Singh <balbir@linux.vnet.ibm.com>,
	David Miller <davem@davemloft.net>
Subject: [tg_shares_up rewrite v3 03/11] sched: make tg_shares_up() walk on-demand
Date: Thu, 11 Nov 2010 19:24:08 -0800	[thread overview]
Message-ID: <20101112032701.552656833@google.com> (raw)
In-Reply-To: 20101112032405.657789056@google.com

[-- Attachment #1: sched-tg-more-ondemand.patch --]
[-- Type: text/plain, Size: 4421 bytes --]

From: Peter Zijlstra <a.p.zijlstra@chello.nl>

Make tg_shares_up() use the active cgroup list, this means we cannot
do a strict bottom-up walk of the hierarchy, but assuming its a very
wide tree with a small number of active groups it should be a win.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Paul Turner <pjt@google.com>

---
 kernel/sched.c      |   67 ----------------------------------------------------
 kernel/sched_fair.c |   58 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 67 deletions(-)

Index: kernel/sched.c
===================================================================
--- kernel/sched.c.orig
+++ kernel/sched.c
@@ -281,13 +281,6 @@ static DEFINE_SPINLOCK(task_group_lock);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-#ifdef CONFIG_SMP
-static int root_task_group_empty(void)
-{
-	return list_empty(&root_task_group.children);
-}
-#endif
-
 # define INIT_TASK_GROUP_LOAD	NICE_0_LOAD
 
 /*
@@ -1531,48 +1524,6 @@ static unsigned long cpu_avg_load_per_ta
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-static void update_cfs_load(struct cfs_rq *cfs_rq, int lb);
-static void update_cfs_shares(struct cfs_rq *cfs_rq);
-
-/*
- * update tg->load_weight by folding this cpu's load_avg
- */
-static int tg_shares_up(struct task_group *tg, void *data)
-{
-	long load_avg;
-	struct cfs_rq *cfs_rq;
-	unsigned long flags;
-	int cpu = (long)data;
-	struct rq *rq;
-
-	if (!tg->se[cpu])
-		return 0;
-
-	rq = cpu_rq(cpu);
-	cfs_rq = tg->cfs_rq[cpu];
-
-	raw_spin_lock_irqsave(&rq->lock, flags);
-
-	update_rq_clock(rq);
-	update_cfs_load(cfs_rq, 1);
-
-	load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
-	load_avg -= cfs_rq->load_contribution;
-
-	atomic_add(load_avg, &tg->load_weight);
-	cfs_rq->load_contribution += load_avg;
-
-	/*
-	 * We need to update shares after updating tg->load_weight in
-	 * order to adjust the weight of groups with long running tasks.
-	 */
-	update_cfs_shares(cfs_rq);
-
-	raw_spin_unlock_irqrestore(&rq->lock, flags);
-
-	return 0;
-}
-
 /*
  * Compute the cpu's hierarchical load factor for each task group.
  * This needs to be done in a top-down fashion because the load of a child
@@ -1596,29 +1547,11 @@ static int tg_load_down(struct task_grou
 	return 0;
 }
 
-static void update_shares(long cpu)
-{
-	if (root_task_group_empty())
-		return;
-
-	/*
-	 * XXX: replace with an on-demand list
-	 */
-
-	walk_tg_tree(tg_nop, tg_shares_up, (void *)cpu);
-}
-
 static void update_h_load(long cpu)
 {
 	walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
 }
 
-#else
-
-static inline void update_shares(int cpu)
-{
-}
-
 #endif
 
 #ifdef CONFIG_PREEMPT
Index: kernel/sched_fair.c
===================================================================
--- kernel/sched_fair.c.orig
+++ kernel/sched_fair.c
@@ -2000,6 +2000,60 @@ out:
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
+/*
+ * update tg->load_weight by folding this cpu's load_avg
+ */
+static int tg_shares_up(struct task_group *tg, int cpu)
+{
+	struct cfs_rq *cfs_rq;
+	unsigned long flags;
+	struct rq *rq;
+	long load_avg;
+
+	if (!tg->se[cpu])
+		return 0;
+
+	rq = cpu_rq(cpu);
+	cfs_rq = tg->cfs_rq[cpu];
+
+	raw_spin_lock_irqsave(&rq->lock, flags);
+
+	update_rq_clock(rq);
+	update_cfs_load(cfs_rq, 1);
+
+	load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
+	load_avg -= cfs_rq->load_contribution;
+	atomic_add(load_avg, &tg->load_weight);
+	cfs_rq->load_contribution += load_avg;
+
+	/*
+	 * We need to update shares after updating tg->load_weight in
+	 * order to adjust the weight of groups with long running tasks.
+	 */
+	update_cfs_shares(cfs_rq);
+
+	raw_spin_unlock_irqrestore(&rq->lock, flags);
+
+	return 0;
+}
+
+static void update_shares(int cpu)
+{
+	struct cfs_rq *cfs_rq;
+	struct rq *rq = cpu_rq(cpu);
+
+	rcu_read_lock();
+	for_each_leaf_cfs_rq(rq, cfs_rq) {
+		struct task_group *tg = cfs_rq->tg;
+
+		do {
+			tg_shares_up(tg, cpu);
+			tg = tg->parent;
+		} while (tg);
+	}
+	rcu_read_unlock();
+}
+
 static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		  unsigned long max_load_move,
@@ -2047,6 +2101,10 @@ load_balance_fair(struct rq *this_rq, in
 	return max_load_move - rem_load_move;
 }
 #else
+static inline void update_shares(int cpu)
+{
+}
+
 static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		  unsigned long max_load_move,

--

next prev parent reply	other threads:[~2010-11-12  3:34 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-11-12  3:24 [tg_shares_up rewrite v3 00/11] reduce overhead for tg->shares distribution Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 01/11] sched: rewrite tg_shares_up Paul Turner
2010-11-12 11:10   ` Peter Zijlstra
2010-11-12  3:24 ` [tg_shares_up rewrite v3 02/11] sched: on-demand (active) cfs_rq list Paul Turner
2010-11-12  3:24 ` Paul Turner [this message]
2010-11-12  3:24 ` [tg_shares_up rewrite v3 04/11] sched: fix load corruption from update_cfs_shares Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 05/11] sched: fix update_cfs_load synchronization Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 06/11] sched: hierarchal order on shares update list Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 07/11] sched: add sysctl_sched_shares_window Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 08/11] sched: update shares on idle_balance Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 09/11] sched: demand based update_cfs_load() Paul Turner
2010-11-12 10:53   ` Peter Zijlstra
2010-11-12 12:12     ` Peter Zijlstra
2010-11-13  1:01     ` Paul Turner
2010-11-13  2:42       ` Paul Turner
2010-11-13 22:45         ` Peter Zijlstra
2010-11-12  3:24 ` [tg_shares_up rewrite v3 10/11] sched: allow update_cfs_load to update global load Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 11/11] sched: update tg->shares after cpu.shares write Paul Turner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20101112032701.552656833@google.com \
    --to=pjt@google.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=bharata@linux.vnet.ibm.com \
    --cc=cfriesen@nortel.com \
    --cc=davem@davemloft.net \
    --cc=kmr@us.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=pbourdon@excellency.fr \
    --cc=svaidy@linux.vnet.ibm.com \
    --cc=vatsa@in.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox