All of lore.kernel.org
 help / color / mirror / Atom feed
From: Paul Turner <pjt@google.com>
To: linux-kernel@vger.kernel.org
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Ingo Molnar <mingo@elte.hu>,
	Srivatsa Vaddagiri <vatsa@in.ibm.com>,
	Chris Friesen <cfriesen@nortel.com>,
	Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>,
	Pierre Bourdon <pbourdon@excellency.fr>,
	Paul Turner <pjt@google.com>,
	Bharata B Rao <bharata@linux.vnet.ibm.com>,
	Karl Rister <kmr@us.ibm.com>,
	Balbir Singh <balbir@linux.vnet.ibm.com>,
	David Miller <davem@davemloft.net>
Subject: [tg_shares_up rewrite v3 03/11] sched: make tg_shares_up() walk on-demand
Date: Thu, 11 Nov 2010 19:24:08 -0800	[thread overview]
Message-ID: <20101112032701.552656833@google.com> (raw)
In-Reply-To: 20101112032405.657789056@google.com

[-- Attachment #1: sched-tg-more-ondemand.patch --]
[-- Type: text/plain, Size: 4421 bytes --]

From: Peter Zijlstra <a.p.zijlstra@chello.nl>

Make tg_shares_up() use the active cgroup list, this means we cannot
do a strict bottom-up walk of the hierarchy, but assuming its a very
wide tree with a small number of active groups it should be a win.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Paul Turner <pjt@google.com>

---
 kernel/sched.c      |   67 ----------------------------------------------------
 kernel/sched_fair.c |   58 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 67 deletions(-)

Index: kernel/sched.c
===================================================================
--- kernel/sched.c.orig
+++ kernel/sched.c
@@ -281,13 +281,6 @@ static DEFINE_SPINLOCK(task_group_lock);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-#ifdef CONFIG_SMP
-static int root_task_group_empty(void)
-{
-	return list_empty(&root_task_group.children);
-}
-#endif
-
 # define INIT_TASK_GROUP_LOAD	NICE_0_LOAD
 
 /*
@@ -1531,48 +1524,6 @@ static unsigned long cpu_avg_load_per_ta
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-static void update_cfs_load(struct cfs_rq *cfs_rq, int lb);
-static void update_cfs_shares(struct cfs_rq *cfs_rq);
-
-/*
- * update tg->load_weight by folding this cpu's load_avg
- */
-static int tg_shares_up(struct task_group *tg, void *data)
-{
-	long load_avg;
-	struct cfs_rq *cfs_rq;
-	unsigned long flags;
-	int cpu = (long)data;
-	struct rq *rq;
-
-	if (!tg->se[cpu])
-		return 0;
-
-	rq = cpu_rq(cpu);
-	cfs_rq = tg->cfs_rq[cpu];
-
-	raw_spin_lock_irqsave(&rq->lock, flags);
-
-	update_rq_clock(rq);
-	update_cfs_load(cfs_rq, 1);
-
-	load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
-	load_avg -= cfs_rq->load_contribution;
-
-	atomic_add(load_avg, &tg->load_weight);
-	cfs_rq->load_contribution += load_avg;
-
-	/*
-	 * We need to update shares after updating tg->load_weight in
-	 * order to adjust the weight of groups with long running tasks.
-	 */
-	update_cfs_shares(cfs_rq);
-
-	raw_spin_unlock_irqrestore(&rq->lock, flags);
-
-	return 0;
-}
-
 /*
  * Compute the cpu's hierarchical load factor for each task group.
  * This needs to be done in a top-down fashion because the load of a child
@@ -1596,29 +1547,11 @@ static int tg_load_down(struct task_grou
 	return 0;
 }
 
-static void update_shares(long cpu)
-{
-	if (root_task_group_empty())
-		return;
-
-	/*
-	 * XXX: replace with an on-demand list
-	 */
-
-	walk_tg_tree(tg_nop, tg_shares_up, (void *)cpu);
-}
-
 static void update_h_load(long cpu)
 {
 	walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
 }
 
-#else
-
-static inline void update_shares(int cpu)
-{
-}
-
 #endif
 
 #ifdef CONFIG_PREEMPT
Index: kernel/sched_fair.c
===================================================================
--- kernel/sched_fair.c.orig
+++ kernel/sched_fair.c
@@ -2000,6 +2000,60 @@ out:
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
+/*
+ * update tg->load_weight by folding this cpu's load_avg
+ */
+static int tg_shares_up(struct task_group *tg, int cpu)
+{
+	struct cfs_rq *cfs_rq;
+	unsigned long flags;
+	struct rq *rq;
+	long load_avg;
+
+	if (!tg->se[cpu])
+		return 0;
+
+	rq = cpu_rq(cpu);
+	cfs_rq = tg->cfs_rq[cpu];
+
+	raw_spin_lock_irqsave(&rq->lock, flags);
+
+	update_rq_clock(rq);
+	update_cfs_load(cfs_rq, 1);
+
+	load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
+	load_avg -= cfs_rq->load_contribution;
+	atomic_add(load_avg, &tg->load_weight);
+	cfs_rq->load_contribution += load_avg;
+
+	/*
+	 * We need to update shares after updating tg->load_weight in
+	 * order to adjust the weight of groups with long running tasks.
+	 */
+	update_cfs_shares(cfs_rq);
+
+	raw_spin_unlock_irqrestore(&rq->lock, flags);
+
+	return 0;
+}
+
+static void update_shares(int cpu)
+{
+	struct cfs_rq *cfs_rq;
+	struct rq *rq = cpu_rq(cpu);
+
+	rcu_read_lock();
+	for_each_leaf_cfs_rq(rq, cfs_rq) {
+		struct task_group *tg = cfs_rq->tg;
+
+		do {
+			tg_shares_up(tg, cpu);
+			tg = tg->parent;
+		} while (tg);
+	}
+	rcu_read_unlock();
+}
+
 static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		  unsigned long max_load_move,
@@ -2047,6 +2101,10 @@ load_balance_fair(struct rq *this_rq, in
 	return max_load_move - rem_load_move;
 }
 #else
+static inline void update_shares(int cpu)
+{
+}
+
 static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		  unsigned long max_load_move,

-- 


  parent reply	other threads:[~2010-11-12  3:34 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-11-12  3:24 [tg_shares_up rewrite v3 00/11] reduce overhead for tg->shares distribution Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 01/11] sched: rewrite tg_shares_up Paul Turner
2010-11-12 11:10   ` Peter Zijlstra
2010-11-12  3:24 ` [tg_shares_up rewrite v3 02/11] sched: on-demand (active) cfs_rq list Paul Turner
2010-11-12  3:24 ` Paul Turner [this message]
2010-11-12  3:24 ` [tg_shares_up rewrite v3 04/11] sched: fix load corruption from update_cfs_shares Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 05/11] sched: fix update_cfs_load synchronization Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 06/11] sched: hierarchal order on shares update list Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 07/11] sched: add sysctl_sched_shares_window Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 08/11] sched: update shares on idle_balance Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 09/11] sched: demand based update_cfs_load() Paul Turner
2010-11-12 10:53   ` Peter Zijlstra
2010-11-12 12:12     ` Peter Zijlstra
2010-11-13  1:01     ` Paul Turner
2010-11-13  2:42       ` Paul Turner
2010-11-13 22:45         ` Peter Zijlstra
2010-11-12  3:24 ` [tg_shares_up rewrite v3 10/11] sched: allow update_cfs_load to update global load Paul Turner
2010-11-12  3:24 ` [tg_shares_up rewrite v3 11/11] sched: update tg->shares after cpu.shares write Paul Turner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20101112032701.552656833@google.com \
    --to=pjt@google.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=bharata@linux.vnet.ibm.com \
    --cc=cfriesen@nortel.com \
    --cc=davem@davemloft.net \
    --cc=kmr@us.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=pbourdon@excellency.fr \
    --cc=svaidy@linux.vnet.ibm.com \
    --cc=vatsa@in.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.