linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Bharata B Rao <bharata@linux.vnet.ibm.com>
To: Paul Turner <pjt@google.com>
Cc: linux-kernel@vger.kernel.org,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Dhaval Giani <dhaval.giani@gmail.com>,
	Balbir Singh <balbir@linux.vnet.ibm.com>,
	Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>,
	Srivatsa Vaddagiri <vatsa@in.ibm.com>,
	Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>,
	Ingo Molnar <mingo@elte.hu>, Pavel Emelyanov <xemul@openvz.org>,
	Nikhil Rao <ncrao@google.com>
Subject: Re: [patch 04/15] sched: throttle cfs_rq entities which exceed their local quota
Date: Thu, 24 Mar 2011 12:06:43 +0530	[thread overview]
Message-ID: <20110324063642.GB2721@in.ibm.com> (raw)
In-Reply-To: <20110323030449.047028257@google.com>

On Tue, Mar 22, 2011 at 08:03:30PM -0700, Paul Turner wrote:
> In account_cfs_rq_quota() (via update_curr()) we track consumption versus a
> cfs_rqs locally assigned quota and whether there is global quota available 
> to provide a refill when it runs out.
> 
> In the case that there is no quota remaining it's necessary to throttle so
> that execution ceases until the susbequent period.  While it is at this
> boundary that we detect (and signal for, via reshed_task) that a throttle is
> required, the actual operation is deferred until put_prev_entity().
> 
> At this point the cfs_rq is marked as throttled and not re-enqueued, this
> avoids potential interactions with throttled runqueues in the event that we
> are not immediately able to evict the running task.
> 
> Signed-off-by: Paul Turner <pjt@google.com>
> Signed-off-by: Nikhil Rao <ncrao@google.com>
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> ---
>  kernel/sched.c      |    2 
>  kernel/sched_fair.c |  117 +++++++++++++++++++++++++++++++++++++++++++++++++---
>  2 files changed, 113 insertions(+), 6 deletions(-)
> 
> Index: tip/kernel/sched.c
> ===================================================================
> --- tip.orig/kernel/sched.c
> +++ tip/kernel/sched.c
> @@ -386,7 +386,7 @@ struct cfs_rq {
>  	unsigned long load_contribution;
>  #endif
>  #ifdef CONFIG_CFS_BANDWIDTH
> -	int quota_enabled;
> +	int quota_enabled, throttled;
>  	s64 quota_remaining;
>  #endif
>  #endif
> Index: tip/kernel/sched_fair.c
> ===================================================================
> --- tip.orig/kernel/sched_fair.c
> +++ tip/kernel/sched_fair.c
> @@ -321,9 +321,6 @@ find_matching_se(struct sched_entity **s
> 
>  #endif	/* CONFIG_FAIR_GROUP_SCHED */
> 
> -static void account_cfs_rq_quota(struct cfs_rq *cfs_rq,
> -		                 unsigned long delta_exec);
> -
>  /**************************************************************
>   * Scheduling class tree data structure manipulation methods:
>   */
> @@ -588,6 +585,9 @@ __update_curr(struct cfs_rq *cfs_rq, str
>  #endif
>  }
> 
> +static void account_cfs_rq_quota(struct cfs_rq *cfs_rq,
> +		unsigned long delta_exec);
> +
>  static void update_curr(struct cfs_rq *cfs_rq)
>  {
>  	struct sched_entity *curr = cfs_rq->curr;
> @@ -1221,6 +1221,9 @@ static struct sched_entity *pick_next_en
>  	return se;
>  }
> 
> +static void throttle_cfs_rq(struct cfs_rq *cfs_rq);
> +static inline int within_bandwidth(struct cfs_rq *cfs_rq);
> +
>  static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
>  {
>  	/*
> @@ -1230,6 +1233,9 @@ static void put_prev_entity(struct cfs_r
>  	if (prev->on_rq)
>  		update_curr(cfs_rq);
> 
> +	if (!within_bandwidth(cfs_rq))
> +		throttle_cfs_rq(cfs_rq);
> +
>  	check_spread(cfs_rq, prev);
>  	if (prev->on_rq) {
>  		update_stats_wait_start(cfs_rq, prev);
> @@ -1241,6 +1247,8 @@ static void put_prev_entity(struct cfs_r
>  	cfs_rq->curr = NULL;
>  }
> 
> +static void check_cfs_rq_quota(struct cfs_rq *cfs_rq);
> +
>  static void
>  entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
>  {
> @@ -1249,6 +1257,9 @@ entity_tick(struct cfs_rq *cfs_rq, struc
>  	 */
>  	update_curr(cfs_rq);
> 
> +	/* check that entity's usage is still within quota (if enabled) */
> +	check_cfs_rq_quota(cfs_rq);
> +
>  	/*
>  	 * Update share accounting for long-running entities.
>  	 */
> @@ -1294,6 +1305,46 @@ static inline u64 sched_cfs_bandwidth_sl
>         return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC;
>  }
> 
> +static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
> +{
> +	return cfs_rq->throttled;
> +}
> +
> +static inline int throttled_hierarchy(struct cfs_rq *cfs_rq)
> +{
> +	struct task_group *tg;
> +	struct sched_entity *se;
> +
> +	if (cfs_rq_throttled(cfs_rq))
> +		return 1;
> +
> +	tg = cfs_rq->tg;
> +	se = tg->se[cpu_of(rq_of(cfs_rq))];
> +	if (!se)
> +		return 0;
> +
> +	for_each_sched_entity(se) {
> +		if (cfs_rq_throttled(cfs_rq_of(se)))
> +			return 1;
> +	}
> +
> +	return 0;
> +}
> +
> +static inline int within_bandwidth(struct cfs_rq *cfs_rq)
> +{
> +	return !cfs_rq->quota_enabled || cfs_rq->quota_remaining > 0;
> +}
> +
> +static void check_cfs_rq_quota(struct cfs_rq *cfs_rq)
> +{
> +	if (within_bandwidth(cfs_rq))
> +		return;
> +
> +
> +	resched_task(rq_of(cfs_rq)->curr);
> +}
> +
>  static void request_cfs_rq_quota(struct cfs_rq *cfs_rq)
>  {
>  	struct task_group *tg = cfs_rq->tg;
> @@ -1330,6 +1381,29 @@ static void account_cfs_rq_quota(struct 
>  	request_cfs_rq_quota(cfs_rq);
>  }
> 
> +static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
> +{
> +	struct sched_entity *se;
> +
> +	se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
> +
> +	/* account load preceding throttle */
> +	update_cfs_load(cfs_rq, 0);
> +
> +	for_each_sched_entity(se) {
> +		struct cfs_rq *qcfs_rq = cfs_rq_of(se);
> +		/* throttled entity or throttle-on-deactivate */
> +		if (!se->on_rq)
> +			break;
> +
> +		dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP);
> +		if (qcfs_rq->load.weight)
> +			break;
> +	}
> +
> +	cfs_rq->throttled = 1;
> +}

Since throttling is done from put_prev_entity(), iiuc, you will be
doing 'put' for current entities which are not on the tree. Can you
avoid the dequeue_entity() call here which I think will anyway bail out
from actual dequeueing (se != cfs_rq->curr check in dequeue_entity).

Regards,
Bharata.

  parent reply	other threads:[~2011-03-24  6:36 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-03-23  3:03 [patch 00/15] CFS Bandwidth Control V5 Paul Turner
2011-03-23  3:03 ` [patch 01/15] sched: introduce primitives to account for CFS bandwidth tracking Paul Turner
2011-03-24 12:38   ` Kamalesh Babulal
2011-04-05 13:28   ` Peter Zijlstra
2011-03-23  3:03 ` [patch 02/15] sched: validate CFS quota hierarchies Paul Turner
2011-03-23 10:39   ` torbenh
2011-03-23 20:49     ` Paul Turner
2011-03-24  6:31   ` Bharata B Rao
2011-04-08 17:01     ` Peter Zijlstra
2011-03-29  6:57   ` Hidetoshi Seto
2011-04-04 23:10     ` Paul Turner
2011-04-05 13:28   ` Peter Zijlstra
2011-03-23  3:03 ` [patch 03/15] sched: accumulate per-cfs_rq cpu usage Paul Turner
2011-04-05 13:28   ` Peter Zijlstra
2011-04-06 20:44     ` Paul Turner
2011-04-05 13:28   ` Peter Zijlstra
2011-04-06 20:47     ` Paul Turner
2011-03-23  3:03 ` [patch 04/15] sched: throttle cfs_rq entities which exceed their local quota Paul Turner
2011-03-23  5:09   ` Mike Galbraith
2011-03-23 20:53     ` Paul Turner
2011-03-24  6:36   ` Bharata B Rao [this message]
2011-03-24  7:40     ` Paul Turner
2011-04-05 13:28   ` Peter Zijlstra
2011-04-05 23:15     ` Paul Turner
2011-03-23  3:03 ` [patch 05/15] sched: unthrottle cfs_rq(s) who ran out of quota at period refresh Paul Turner
2011-04-05 13:28   ` Peter Zijlstra
2011-04-05 13:33     ` Peter Zijlstra
2011-04-05 13:28   ` Peter Zijlstra
2011-04-05 13:28   ` Peter Zijlstra
2011-03-23  3:03 ` [patch 06/15] sched: allow for positional tg_tree walks Paul Turner
2011-03-23  3:03 ` [patch 07/15] sched: prevent interactions between throttled entities and load-balance Paul Turner
2011-04-05 13:28   ` Peter Zijlstra
2011-03-23  3:03 ` [patch 08/15] sched: migrate throttled tasks on HOTPLUG Paul Turner
2011-04-05 13:28   ` Peter Zijlstra
2011-04-06  2:31     ` Paul Turner
2011-03-23  3:03 ` [patch 09/15] sched: add exports tracking cfs bandwidth control statistics Paul Turner
2011-04-05 13:28   ` Peter Zijlstra
2011-03-23  3:03 ` [patch 10/15] sched: (fixlet) dont update shares twice on on_rq parent Paul Turner
2011-04-05 13:28   ` Peter Zijlstra
2011-03-23  3:03 ` [patch 11/15] sched: hierarchical task accounting for SCHED_OTHER Paul Turner
2011-04-05 13:28   ` Peter Zijlstra
2011-03-23  3:03 ` [patch 12/15] sched: maintain throttled rqs as a list Paul Turner
2011-04-22  2:50   ` Hidetoshi Seto
2011-04-24 21:23     ` Paul Turner
2011-03-23  3:03 ` [patch 13/15] sched: expire slack quota using generation counters Paul Turner
2011-04-05 13:28   ` Peter Zijlstra
2011-04-06  7:22     ` Paul Turner
2011-04-06  8:15       ` Peter Zijlstra
2011-04-06 11:26       ` Peter Zijlstra
2011-03-23  3:03 ` [patch 14/15] sched: return unused quota on voluntary sleep Paul Turner
2011-04-05 13:28   ` Peter Zijlstra
2011-04-06  2:25     ` Paul Turner
2011-03-23  3:03 ` [patch 15/15] sched: add documentation for bandwidth control Paul Turner
2011-03-24  6:38   ` Bharata B Rao
2011-03-24 16:12 ` [patch 00/15] CFS Bandwidth Control V5 Bharata B Rao
2011-03-31  7:57 ` Xiao Guangrong
2011-04-04 23:10   ` Paul Turner
2011-04-05 13:28 ` Peter Zijlstra
2011-05-20  2:12 ` Test for CFS Bandwidth Control V6 Xiao Guangrong
2011-05-24  0:53   ` Hidetoshi Seto
2011-05-24  7:56     ` Xiao Guangrong
2011-06-08  2:54     ` Paul Turner
2011-06-08  5:55       ` Hidetoshi Seto

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110324063642.GB2721@in.ibm.com \
    --to=bharata@linux.vnet.ibm.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=dhaval.giani@gmail.com \
    --cc=kamalesh@linux.vnet.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=ncrao@google.com \
    --cc=pjt@google.com \
    --cc=svaidy@linux.vnet.ibm.com \
    --cc=vatsa@in.ibm.com \
    --cc=xemul@openvz.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).