From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S933892Ab0KOXwy (ORCPT ); Mon, 15 Nov 2010 18:52:54 -0500 Received: from smtp-out.google.com ([216.239.44.51]:37167 "EHLO smtp-out.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S933670Ab0KOXwt (ORCPT ); Mon, 15 Nov 2010 18:52:49 -0500 DomainKey-Signature: a=rsa-sha1; s=beta; d=google.com; c=nofws; q=dns; h=message-id:user-agent:date:from:to:cc:subject:references:content-disposition; b=bEBUDO773z/yXOpI5dLUJ72pQdK+qNr6DdxhQVD5RfboyXjaVbtMZosydzYtPqqdC it1Cl2p1GqVvUSKslZCUw== Message-Id: <20101115234937.754159484@google.com> User-Agent: quilt/0.48-1 Date: Mon, 15 Nov 2010 15:47:02 -0800 From: Paul Turner To: linux-kernel@vger.kernel.org Cc: Peter Zijlstra , Ingo Molnar , Srivatsa Vaddagiri , Chris Friesen , Vaidyanathan Srinivasan , Pierre Bourdon , Paul Turner , Bharata B Rao , Karl Rister , Balbir Singh , David Miller Subject: [tg_shares_up rewrite v4 03/11] sched: make tg_shares_up() walk on-demand References: <20101115234659.610333554@google.com> Content-Disposition: inline; filename=sched-tg-more-ondemand.patch Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org From: Peter Zijlstra Make tg_shares_up() use the active cgroup list, this means we cannot do a strict bottom-up walk of the hierarchy, but assuming its a very wide tree with a small number of active groups it should be a win. Signed-off-by: Peter Zijlstra Signed-off-by: Paul Turner --- kernel/sched.c | 67 ---------------------------------------------------- kernel/sched_fair.c | 58 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 67 deletions(-) Index: tip/kernel/sched.c =================================================================== --- tip.orig/kernel/sched.c +++ tip/kernel/sched.c @@ -279,13 +279,6 @@ static DEFINE_SPINLOCK(task_group_lock); #ifdef CONFIG_FAIR_GROUP_SCHED -#ifdef CONFIG_SMP -static int root_task_group_empty(void) -{ - return list_empty(&root_task_group.children); -} -#endif - # define INIT_TASK_GROUP_LOAD NICE_0_LOAD /* @@ -1536,48 +1529,6 @@ static unsigned long cpu_avg_load_per_ta #ifdef CONFIG_FAIR_GROUP_SCHED -static void update_cfs_load(struct cfs_rq *cfs_rq, int lb); -static void update_cfs_shares(struct cfs_rq *cfs_rq); - -/* - * update tg->load_weight by folding this cpu's load_avg - */ -static int tg_shares_up(struct task_group *tg, void *data) -{ - long load_avg; - struct cfs_rq *cfs_rq; - unsigned long flags; - int cpu = (long)data; - struct rq *rq; - - if (!tg->se[cpu]) - return 0; - - rq = cpu_rq(cpu); - cfs_rq = tg->cfs_rq[cpu]; - - raw_spin_lock_irqsave(&rq->lock, flags); - - update_rq_clock(rq); - update_cfs_load(cfs_rq, 1); - - load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1); - load_avg -= cfs_rq->load_contribution; - - atomic_add(load_avg, &tg->load_weight); - cfs_rq->load_contribution += load_avg; - - /* - * We need to update shares after updating tg->load_weight in - * order to adjust the weight of groups with long running tasks. - */ - update_cfs_shares(cfs_rq); - - raw_spin_unlock_irqrestore(&rq->lock, flags); - - return 0; -} - /* * Compute the cpu's hierarchical load factor for each task group. * This needs to be done in a top-down fashion because the load of a child @@ -1601,29 +1552,11 @@ static int tg_load_down(struct task_grou return 0; } -static void update_shares(long cpu) -{ - if (root_task_group_empty()) - return; - - /* - * XXX: replace with an on-demand list - */ - - walk_tg_tree(tg_nop, tg_shares_up, (void *)cpu); -} - static void update_h_load(long cpu) { walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); } -#else - -static inline void update_shares(int cpu) -{ -} - #endif #ifdef CONFIG_PREEMPT Index: tip/kernel/sched_fair.c =================================================================== --- tip.orig/kernel/sched_fair.c +++ tip/kernel/sched_fair.c @@ -1998,6 +1998,60 @@ out: } #ifdef CONFIG_FAIR_GROUP_SCHED +/* + * update tg->load_weight by folding this cpu's load_avg + */ +static int tg_shares_up(struct task_group *tg, int cpu) +{ + struct cfs_rq *cfs_rq; + unsigned long flags; + struct rq *rq; + long load_avg; + + if (!tg->se[cpu]) + return 0; + + rq = cpu_rq(cpu); + cfs_rq = tg->cfs_rq[cpu]; + + raw_spin_lock_irqsave(&rq->lock, flags); + + update_rq_clock(rq); + update_cfs_load(cfs_rq, 1); + + load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1); + load_avg -= cfs_rq->load_contribution; + atomic_add(load_avg, &tg->load_weight); + cfs_rq->load_contribution += load_avg; + + /* + * We need to update shares after updating tg->load_weight in + * order to adjust the weight of groups with long running tasks. + */ + update_cfs_shares(cfs_rq); + + raw_spin_unlock_irqrestore(&rq->lock, flags); + + return 0; +} + +static void update_shares(int cpu) +{ + struct cfs_rq *cfs_rq; + struct rq *rq = cpu_rq(cpu); + + rcu_read_lock(); + for_each_leaf_cfs_rq(rq, cfs_rq) { + struct task_group *tg = cfs_rq->tg; + + do { + tg_shares_up(tg, cpu); + tg = tg->parent; + } while (tg); + } + rcu_read_unlock(); +} + static unsigned long load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_load_move, @@ -2045,6 +2099,10 @@ load_balance_fair(struct rq *this_rq, in return max_load_move - rem_load_move; } #else +static inline void update_shares(int cpu) +{ +} + static unsigned long load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_load_move,