public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Paul Turner <pjt@google.com>
To: linux-kernel@vger.kernel.org
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Bharata B Rao <bharata@linux.vnet.ibm.com>,
	Dhaval Giani <dhaval.giani@gmail.com>,
	Balbir Singh <bsingharora@gmail.com>,
	Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>,
	Srivatsa Vaddagiri <vatsa@in.ibm.com>,
	Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>,
	Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>,
	Ingo Molnar <mingo@elte.hu>, Pavel Emelyanov <xemul@openvz.org>,
	Jason Baron <jbaron@redhat.com>
Subject: [patch 06/18] sched: add a timer to handle CFS bandwidth refresh
Date: Thu, 21 Jul 2011 09:43:31 -0700	[thread overview]
Message-ID: <20110721184757.277271273@google.com> (raw)
In-Reply-To: 20110721164325.231521704@google.com

[-- Attachment #1: sched-bwc-bandwidth_timers.patch --]
[-- Type: text/plain, Size: 7522 bytes --]

This patch adds a per-task_group timer which handles the refresh of the global
CFS bandwidth pool.

Since the RT pool is using a similar timer there's some small refactoring to
share this support.

Signed-off-by: Paul Turner <pjt@google.com>
Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>

---
 kernel/sched.c      |  107 +++++++++++++++++++++++++++++++++++++++++-----------
 kernel/sched_fair.c |   40 +++++++++++++++++--
 2 files changed, 123 insertions(+), 24 deletions(-)

Index: tip/kernel/sched.c
===================================================================
--- tip.orig/kernel/sched.c
+++ tip/kernel/sched.c
@@ -193,10 +193,28 @@ static inline int rt_bandwidth_enabled(v
 	return sysctl_sched_rt_runtime >= 0;
 }
 
-static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
+static void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
 {
-	ktime_t now;
+	unsigned long delta;
+	ktime_t soft, hard, now;
+
+	for (;;) {
+		if (hrtimer_active(period_timer))
+			break;
+
+		now = hrtimer_cb_get_time(period_timer);
+		hrtimer_forward(period_timer, now, period);
 
+		soft = hrtimer_get_softexpires(period_timer);
+		hard = hrtimer_get_expires(period_timer);
+		delta = ktime_to_ns(ktime_sub(hard, soft));
+		__hrtimer_start_range_ns(period_timer, soft, delta,
+					 HRTIMER_MODE_ABS_PINNED, 0);
+	}
+}
+
+static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
+{
 	if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
 		return;
 
@@ -204,22 +222,7 @@ static void start_rt_bandwidth(struct rt
 		return;
 
 	raw_spin_lock(&rt_b->rt_runtime_lock);
-	for (;;) {
-		unsigned long delta;
-		ktime_t soft, hard;
-
-		if (hrtimer_active(&rt_b->rt_period_timer))
-			break;
-
-		now = hrtimer_cb_get_time(&rt_b->rt_period_timer);
-		hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);
-
-		soft = hrtimer_get_softexpires(&rt_b->rt_period_timer);
-		hard = hrtimer_get_expires(&rt_b->rt_period_timer);
-		delta = ktime_to_ns(ktime_sub(hard, soft));
-		__hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
-				HRTIMER_MODE_ABS_PINNED, 0);
-	}
+	start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
 	raw_spin_unlock(&rt_b->rt_runtime_lock);
 }
 
@@ -250,6 +253,9 @@ struct cfs_bandwidth {
 	ktime_t period;
 	u64 quota, runtime;
 	s64 hierarchal_quota;
+
+	int idle, timer_active;
+	struct hrtimer period_timer;
 #endif
 };
 
@@ -399,6 +405,28 @@ static inline struct cfs_bandwidth *tg_c
 }
 
 static inline u64 default_cfs_period(void);
+static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun);
+
+static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
+{
+	struct cfs_bandwidth *cfs_b =
+		container_of(timer, struct cfs_bandwidth, period_timer);
+	ktime_t now;
+	int overrun;
+	int idle = 0;
+
+	for (;;) {
+		now = hrtimer_cb_get_time(timer);
+		overrun = hrtimer_forward(timer, now, cfs_b->period);
+
+		if (!overrun)
+			break;
+
+		idle = do_sched_cfs_period_timer(cfs_b, overrun);
+	}
+
+	return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
+}
 
 static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
 {
@@ -406,6 +434,9 @@ static void init_cfs_bandwidth(struct cf
 	cfs_b->runtime = 0;
 	cfs_b->quota = RUNTIME_INF;
 	cfs_b->period = ns_to_ktime(default_cfs_period());
+
+	hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	cfs_b->period_timer.function = sched_cfs_period_timer;
 }
 
 static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
@@ -413,8 +444,34 @@ static void init_cfs_rq_runtime(struct c
 	cfs_rq->runtime_enabled = 0;
 }
 
+/* requires cfs_b->lock, may release to reprogram timer */
+static void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
+{
+	/*
+	 * The timer may be active because we're trying to set a new bandwidth
+	 * period or because we're racing with the tear-down path
+	 * (timer_active==0 becomes visible before the hrtimer call-back
+	 * terminates).  In either case we ensure that it's re-programmed
+	 */
+	while (unlikely(hrtimer_active(&cfs_b->period_timer))) {
+		raw_spin_unlock(&cfs_b->lock);
+		/* ensure cfs_b->lock is available while we wait */
+		hrtimer_cancel(&cfs_b->period_timer);
+
+		raw_spin_lock(&cfs_b->lock);
+		/* if someone else restarted the timer then we're done */
+		if (cfs_b->timer_active)
+			return;
+	}
+
+	cfs_b->timer_active = 1;
+	start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period);
+}
+
 static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
-{}
+{
+	hrtimer_cancel(&cfs_b->period_timer);
+}
 #else
 static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
 static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
@@ -8892,7 +8949,7 @@ static int __cfs_schedulable(struct task
 
 static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
 {
-	int i, ret = 0;
+	int i, ret = 0, runtime_enabled;
 	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
 
 	if (tg == &root_task_group)
@@ -8919,10 +8976,18 @@ static int tg_set_cfs_bandwidth(struct t
 	if (ret)
 		goto out_unlock;
 
+	runtime_enabled = quota != RUNTIME_INF;
 	raw_spin_lock_irq(&cfs_b->lock);
 	cfs_b->period = ns_to_ktime(period);
 	cfs_b->quota = quota;
 	cfs_b->runtime = quota;
+
+	/* restart the period timer (if active) to handle new period expiry */
+	if (runtime_enabled && cfs_b->timer_active) {
+		/* force a reprogram */
+		cfs_b->timer_active = 0;
+		__start_cfs_bandwidth(cfs_b);
+	}
 	raw_spin_unlock_irq(&cfs_b->lock);
 
 	for_each_possible_cpu(i) {
@@ -8930,7 +8995,7 @@ static int tg_set_cfs_bandwidth(struct t
 		struct rq *rq = rq_of(cfs_rq);
 
 		raw_spin_lock_irq(&rq->lock);
-		cfs_rq->runtime_enabled = quota != RUNTIME_INF;
+		cfs_rq->runtime_enabled = runtime_enabled;
 		cfs_rq->runtime_remaining = 0;
 		raw_spin_unlock_irq(&rq->lock);
 	}
Index: tip/kernel/sched_fair.c
===================================================================
--- tip.orig/kernel/sched_fair.c
+++ tip/kernel/sched_fair.c
@@ -1306,9 +1306,16 @@ static void assign_cfs_rq_runtime(struct
 	raw_spin_lock(&cfs_b->lock);
 	if (cfs_b->quota == RUNTIME_INF)
 		amount = min_amount;
-	else if (cfs_b->runtime > 0) {
-		amount = min(cfs_b->runtime, min_amount);
-		cfs_b->runtime -= amount;
+	else {
+		/* ensure bandwidth timer remains active under consumption */
+		if (!cfs_b->timer_active)
+			__start_cfs_bandwidth(cfs_b);
+
+		if (cfs_b->runtime > 0) {
+			amount = min(cfs_b->runtime, min_amount);
+			cfs_b->runtime -= amount;
+			cfs_b->idle = 0;
+		}
 	}
 	raw_spin_unlock(&cfs_b->lock);
 
@@ -1337,6 +1344,33 @@ static __always_inline void account_cfs_
 	__account_cfs_rq_runtime(cfs_rq, delta_exec);
 }
 
+/*
+ * Responsible for refilling a task_group's bandwidth and unthrottling its
+ * cfs_rqs as appropriate. If there has been no activity within the last
+ * period the timer is deactivated until scheduling resumes; cfs_b->idle is
+ * used to track this state.
+ */
+static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
+{
+	int idle = 1;
+
+	raw_spin_lock(&cfs_b->lock);
+	/* no need to continue the timer with no bandwidth constraint */
+	if (cfs_b->quota == RUNTIME_INF)
+		goto out_unlock;
+
+	idle = cfs_b->idle;
+	cfs_b->runtime = cfs_b->quota;
+
+	/* mark as potentially idle for the upcoming period */
+	cfs_b->idle = 1;
+out_unlock:
+	if (idle)
+		cfs_b->timer_active = 0;
+	raw_spin_unlock(&cfs_b->lock);
+
+	return idle;
+}
 #else
 static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
 				     unsigned long delta_exec) {}



  parent reply	other threads:[~2011-07-21 23:01 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-07-21 16:43 [patch 00/18] CFS Bandwidth Control v7.2 Paul Turner
2011-07-21 16:43 ` [patch 01/18] sched: (fixlet) dont update shares twice on on_rq parent Paul Turner
2011-07-22 11:06   ` Kamalesh Babulal
2011-07-21 16:43 ` [patch 02/18] sched: hierarchical task accounting for SCHED_OTHER Paul Turner
2011-08-14 16:15   ` [tip:sched/core] sched: Implement " tip-bot for Paul Turner
2011-07-21 16:43 ` [patch 03/18] sched: introduce primitives to account for CFS bandwidth tracking Paul Turner
2011-07-22 11:14   ` Kamalesh Babulal
2011-08-14 16:17   ` [tip:sched/core] sched: Introduce " tip-bot for Paul Turner
2011-07-21 16:43 ` [patch 04/18] sched: validate CFS quota hierarchies Paul Turner
2011-08-14 16:19   ` [tip:sched/core] sched: Validate " tip-bot for Paul Turner
2011-07-21 16:43 ` [patch 05/18] sched: accumulate per-cfs_rq cpu usage and charge against bandwidth Paul Turner
2011-08-14 16:21   ` [tip:sched/core] sched: Accumulate " tip-bot for Paul Turner
2011-07-21 16:43 ` Paul Turner [this message]
2011-08-14 16:23   ` [tip:sched/core] sched: Add a timer to handle CFS bandwidth refresh tip-bot for Paul Turner
2011-07-21 16:43 ` [patch 07/18] sched: expire invalid runtime Paul Turner
2011-08-14 16:24   ` [tip:sched/core] sched: Expire " tip-bot for Paul Turner
2011-07-21 16:43 ` [patch 08/18] sched: add support for throttling group entities Paul Turner
2011-08-08 15:46   ` Lin Ming
2011-08-08 16:00     ` Peter Zijlstra
2011-08-08 16:16       ` Paul Turner
2011-08-14 16:26   ` [tip:sched/core] sched: Add " tip-bot for Paul Turner
2011-07-21 16:43 ` [patch 09/18] sched: add support for unthrottling " Paul Turner
2011-08-14 16:27   ` [tip:sched/core] sched: Add " tip-bot for Paul Turner
2011-07-21 16:43 ` [patch 10/18] sched: allow for positional tg_tree walks Paul Turner
2011-08-14 16:29   ` [tip:sched/core] sched: Allow " tip-bot for Paul Turner
2011-07-21 16:43 ` [patch 11/18] sched: prevent interactions with throttled entities Paul Turner
2011-07-22 11:26   ` Kamalesh Babulal
2011-07-22 11:37     ` Peter Zijlstra
2011-07-22 11:41   ` Kamalesh Babulal
2011-07-22 11:43     ` Peter Zijlstra
2011-07-22 18:16       ` Kamalesh Babulal
2011-08-14 16:30   ` [tip:sched/core] sched: Prevent " tip-bot for Paul Turner
2011-07-21 16:43 ` [patch 12/18] sched: prevent buddy " Paul Turner
2011-08-14 16:32   ` [tip:sched/core] sched: Prevent " tip-bot for Paul Turner
2011-07-21 16:43 ` [patch 13/18] sched: migrate throttled tasks on HOTPLUG Paul Turner
2011-08-14 16:34   ` [tip:sched/core] sched: Migrate " tip-bot for Paul Turner
2011-07-21 16:43 ` [patch 14/18] sched: throttle entities exceeding their allowed bandwidth Paul Turner
2011-08-14 16:35   ` [tip:sched/core] sched: Throttle " tip-bot for Paul Turner
2011-07-21 16:43 ` [patch 15/18] sched: add exports tracking cfs bandwidth control statistics Paul Turner
2011-08-14 16:37   ` [tip:sched/core] sched: Add " tip-bot for Nikhil Rao
2011-07-21 16:43 ` [patch 16/18] sched: return unused runtime on group dequeue Paul Turner
2011-08-14 16:39   ` [tip:sched/core] sched: Return " tip-bot for Paul Turner
2011-07-21 16:43 ` [RFT][patch 17/18] sched: use jump labels to reduce overhead when bandwidth control is inactive Paul Turner
2011-07-21 16:43 ` [patch 18/18] sched: add documentation for bandwidth control Paul Turner
2011-08-14 16:41   ` [tip:sched/core] sched: Add " tip-bot for Bharata B Rao
2011-07-21 23:01 ` [patch 00/18] CFS Bandwidth Control v7.2 Paul Turner
2011-07-25 14:58 ` Peter Zijlstra
2011-07-25 15:00   ` Peter Zijlstra
2011-07-25 16:21     ` Paul E. McKenney
2011-07-25 16:28       ` Peter Zijlstra
2011-07-25 16:46         ` Paul E. McKenney
2011-07-25 17:08           ` Peter Zijlstra
2011-07-25 17:11             ` Dhaval Giani
2011-07-25 17:35               ` Peter Zijlstra
2011-07-28  2:59     ` Paul Turner
2011-09-13 12:10 ` Vladimir Davydov
2011-09-13 14:00   ` Peter Zijlstra
2011-09-16  8:06   ` Paul Turner
2011-09-19  8:22     ` Vladimir Davydov
2011-09-19  8:33       ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110721184757.277271273@google.com \
    --to=pjt@google.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=bharata@linux.vnet.ibm.com \
    --cc=bsingharora@gmail.com \
    --cc=dhaval.giani@gmail.com \
    --cc=jbaron@redhat.com \
    --cc=kamalesh@linux.vnet.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=seto.hidetoshi@jp.fujitsu.com \
    --cc=svaidy@linux.vnet.ibm.com \
    --cc=vatsa@in.ibm.com \
    --cc=xemul@openvz.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox