Re: [PATCH 09/16] sched: normalize tg load contributions against runnable time

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Namhyung Kim <namhyung@kernel.org>
To: Paul Turner <pjt@google.com>
Cc: linux-kernel@vger.kernel.org, Venki Pallipadi <venki@google.com>,
	Srivatsa Vaddagiri <vatsa@in.ibm.com>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>,
	Mike Galbraith <efault@gmx.de>,
	Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>,
	Ben Segall <bsegall@google.com>, Ingo Molnar <mingo@elte.hu>,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
	Morten Rasmussen <Morten.Rasmussen@arm.com>,
	Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Subject: Re: [PATCH 09/16] sched: normalize tg load contributions against runnable time
Date: Fri, 29 Jun 2012 16:26:05 +0900	[thread overview]
Message-ID: <87txxuwpde.fsf@sejong.aot.lge.com> (raw)
In-Reply-To: <20120628022414.30496.11931.stgit@kitami.mtv.corp.google.com> (Paul Turner's message of "Wed, 27 Jun 2012 19:24:14 -0700")

On Wed, 27 Jun 2012 19:24:14 -0700, Paul Turner wrote:
> Entities of equal weight should receive equitable distribution of cpu time.
> This is challenging in the case of a task_group's shares as execution may be
> occurring on multiple cpus simultaneously.
>
> To handle this we divide up the shares into weights proportionate with the load
> on each cfs_rq.  This does not however, account for the fact that the sum of
> the parts may be less than one cpu and so we need to normalize:
>   load(tg) = min(runnable_avg(tg), 1) * tg->shares
> Where runnable_avg is the aggregate time in which the task_group had runnable
> children.
>
> Signed-off-by: Paul Turner <pjt@google.com>
> Signed-off-by: Ben Segall <bsegall@google.com>.
> ---
>  kernel/sched/debug.c |    4 ++++
>  kernel/sched/fair.c  |   39 +++++++++++++++++++++++++++++++++++++++
>  kernel/sched/sched.h |    2 ++
>  3 files changed, 45 insertions(+), 0 deletions(-)
>
> diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
> index 9268fb7..9334c68 100644
> --- a/kernel/sched/debug.c
> +++ b/kernel/sched/debug.c
> @@ -237,6 +237,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
>  			atomic64_read(&cfs_rq->tg->load_avg));
>  	SEQ_printf(m, "  .%-30s: %lld\n", "tg_load_contrib",
>  			cfs_rq->tg_load_contrib);
> +	SEQ_printf(m, "  .%-30s: %d\n", "tg_runnable_contrib",
> +			cfs_rq->tg_runnable_contrib);
> +	SEQ_printf(m, "  .%-30s: %d\n", "tg->runnable_avg",
> +			atomic_read(&cfs_rq->tg->runnable_avg));
>  #endif
>  
>  	print_cfs_group_stats(m, cpu, cfs_rq->tg);
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index a416296..91d0b21 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -1117,19 +1117,56 @@ static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq,
>  	}
>  }
>  
> +/*
> + * Aggregate cfs_rq runnable averages into an equivalent task_group
> + * representation for computing load contributions.
> + */
> +static inline void __update_tg_runnable_avg(struct sched_avg *sa,
> +						  struct cfs_rq *cfs_rq)
> +{
> +	struct task_group *tg = cfs_rq->tg;
> +	long contrib;
> +
> +	contrib = div_u64(sa->runnable_avg_sum << 12,
> +			  sa->runnable_avg_period + 1);
> +	contrib -= cfs_rq->tg_runnable_contrib;
> +
> +	if (abs(contrib) > cfs_rq->tg_runnable_contrib / 64) {
> +		atomic_add(contrib, &tg->runnable_avg);
> +		cfs_rq->tg_runnable_contrib += contrib;
> +	}
> +}
> +
>  static inline void __update_group_entity_contrib(struct sched_entity *se)
>  {
>  	struct cfs_rq *cfs_rq = group_cfs_rq(se);
>  	struct task_group *tg = cfs_rq->tg;
> +	int runnable_avg;
> +
>  	u64 contrib;
>  
>  	contrib = cfs_rq->tg_load_contrib * tg->shares;
>  	se->avg.load_avg_contrib = div64_u64(contrib,
>  					     atomic64_read(&tg->load_avg) + 1);
> +
> +	/*
> +	 * Unlike a task-entity, a group entity may be using >=1 cpu globally.
> +	 * However, in the case that it's using <1 cpu we need to form a
> +	 * correction term so that we contribute the same load as a task of
> +	 * equal weight. (Global runnable time is taken as a fraction over
> +	 * 2^12.)

Wouldn't it be better using a symbolic name rather than the magic number?


> +	 */
> +	runnable_avg = atomic_read(&tg->runnable_avg);
> +	if (runnable_avg < (1<<12)) {
> +		se->avg.load_avg_contrib *= runnable_avg;
> +		se->avg.load_avg_contrib /= (1<<12);

Ditto.

Thanks,
Namhyung


> +	}
>  }
>  #else
>  static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq,
>  						 int force_update) {}
> +static inline void __update_tg_runnable_avg(struct sched_avg *sa,
> +						  struct cfs_rq *cfs_rq) {}
>  static inline void __update_group_entity_contrib(struct sched_entity *se) {}
>  #endif
>  
> @@ -1151,6 +1188,7 @@ static long __update_entity_load_avg_contrib(struct sched_entity *se)
>  	if (entity_is_task(se)) {
>  		__update_task_entity_contrib(se);
>  	} else {
> +		__update_tg_runnable_avg(&se->avg, group_cfs_rq(se));
>  		__update_group_entity_contrib(se);
>  	}
>  
> @@ -1219,6 +1257,7 @@ static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update)
>  static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
>  {
>  	__update_entity_runnable_avg(rq->clock_task, &rq->avg, runnable);
> +	__update_tg_runnable_avg(&rq->avg, &rq->cfs);
>  }
>  
>  /* Add the load generated by se into cfs_rq's child load-average */
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 4d3b3ad..b48bbd7 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -113,6 +113,7 @@ struct task_group {
>  
>  	atomic_t load_weight;
>  	atomic64_t load_avg;
> +	atomic_t runnable_avg;
>  #endif
>  
>  #ifdef CONFIG_RT_GROUP_SCHED
> @@ -234,6 +235,7 @@ struct cfs_rq {
>  	atomic64_t decay_counter, removed_load;
>  	u64 last_decay;
>  #ifdef CONFIG_FAIR_GROUP_SCHED
> +	u32 tg_runnable_contrib;
>  	u64 tg_load_contrib;
>  #endif
>  #endif

next prev parent reply	other threads:[~2012-06-29  7:30 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-06-28  2:24 [PATCH 00/16] Series short description Paul Turner
2012-06-28  2:24 ` [PATCH 01/16] sched: track the runnable average on a per-task entitiy basis Paul Turner
2012-06-28  6:06   ` Namhyung Kim
2012-07-12  0:14     ` Paul Turner
2012-07-04 15:32   ` Peter Zijlstra
2012-07-12  0:12     ` Paul Turner
2012-06-28  2:24 ` [PATCH 09/16] sched: normalize tg load contributions against runnable time Paul Turner
2012-06-29  7:26   ` Namhyung Kim [this message]
2012-07-04 19:48   ` Peter Zijlstra
2012-07-06 11:52     ` Peter Zijlstra
2012-07-12  1:08       ` Andre Noll
2012-07-12  0:02     ` Paul Turner
2012-07-06 12:23   ` Peter Zijlstra
2012-06-28  2:24 ` [PATCH 05/16] sched: add an rq migration call-back to sched_class Paul Turner
2012-06-29  1:32   ` Namhyung Kim
2012-06-28  2:24 ` [PATCH 06/16] sched: account for blocked load waking back up Paul Turner
2012-06-28  2:24 ` [PATCH 08/16] sched: compute load contribution by a group entity Paul Turner
2012-06-28  2:24 ` [PATCH 02/16] sched: maintain per-rq runnable averages Paul Turner
2012-06-28  2:24 ` [PATCH 04/16] sched: maintain the load contribution of blocked entities Paul Turner
2012-06-29  1:27   ` Namhyung Kim
2012-06-28  2:24 ` [PATCH 03/16] sched: aggregate load contributed by task entities on parenting cfs_rq Paul Turner
2012-06-28  6:33   ` Namhyung Kim
2012-07-04 15:28   ` Peter Zijlstra
2012-07-06 14:53     ` Peter Zijlstra
2012-07-09  9:15       ` Ingo Molnar
2012-06-28  2:24 ` [PATCH 07/16] sched: aggregate total task_group load Paul Turner
2012-06-28  2:24 ` [PATCH 16/16] sched: introduce temporary FAIR_GROUP_SCHED dependency for load-tracking Paul Turner
2012-06-28  2:24 ` [PATCH 10/16] sched: maintain runnable averages across throttled periods Paul Turner
2012-06-28  2:24 ` [PATCH 14/16] sched: make __update_entity_runnable_avg() fast Paul Turner
2012-07-04 15:41   ` Peter Zijlstra
2012-07-04 17:20     ` Peter Zijlstra
2012-07-09 20:18       ` Benjamin Segall
2012-07-10 10:51         ` Peter Zijlstra
2012-07-12  0:15           ` Paul Turner
2012-07-12 14:30             ` Peter Zijlstra
2012-07-04 16:51   ` Peter Zijlstra
2012-06-28  2:24 ` [PATCH 11/16] sched: replace update_shares weight distribution with per-entity computation Paul Turner
2012-06-28  2:24 ` [PATCH 12/16] sched: refactor update_shares_cpu() -> update_blocked_avgs() Paul Turner
2012-06-29  7:28   ` Namhyung Kim
2012-07-12  0:03     ` Paul Turner
2012-07-05 11:58   ` Peter Zijlstra
2012-07-12  0:11     ` Paul Turner
2012-07-12 14:40       ` Peter Zijlstra
2012-06-28  2:24 ` [PATCH 13/16] sched: update_cfs_shares at period edge Paul Turner
2012-06-28  2:24 ` [PATCH 15/16] sched: implement usage tracking Paul Turner
  -- strict thread matches above, loose matches on Subject: below --
2012-08-23 14:14 [patch 00/16] sched: per-entity load-tracking pjt
2012-08-23 14:14 ` [patch 09/16] sched: normalize tg load contributions against runnable time pjt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87txxuwpde.fsf@sejong.aot.lge.com \
    --to=namhyung@kernel.org \
    --cc=Morten.Rasmussen@arm.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=bsegall@google.com \
    --cc=efault@gmx.de \
    --cc=kamalesh@linux.vnet.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=nikunj@linux.vnet.ibm.com \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=pjt@google.com \
    --cc=svaidy@linux.vnet.ibm.com \
    --cc=vatsa@in.ibm.com \
    --cc=venki@google.com \
    --cc=vincent.guittot@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.