[PATCH v2 2/6] sched: accumulate per-cfs_rq cpu usage

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Paul Turner <pjt@google.com>
To: linux-kernel@vger.kernel.org
Cc: Paul Menage <menage@google.com>,
	Srivatsa Vaddagiri <vatsa@in.ibm.com>,
	Dhaval Giani <dhaval@linux.vnet.ibm.com>,
	Gautham R Shenoy <ego@in.ibm.com>,
	Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>,
	Herbert Poetzl <herbert@13thfloor.at>,
	Balbir Singh <balbir@linux.vnet.ibm.com>,
	Chris Friesen <cfriesen@nortel.com>, Avi Kivity <avi@redhat.com>,
	Bharata B Rao <bharata@linux.vnet.ibm.com>,
	Nikhil Rao <ncrao@google.com>, Ingo Molnar <mingo@elte.hu>,
	Pavel Emelyanov <xemul@openvz.org>,
	Mike Waychison <mikew@google.com>,
	Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH v2 2/6] sched: accumulate per-cfs_rq cpu usage
Date: Wed, 28 Apr 2010 04:16:56 -0700	[thread overview]
Message-ID: <20100428111656.7954.19756.stgit@kitami.corp.google.com> (raw)
In-Reply-To: <20100428110720.7954.53537.stgit@kitami.corp.google.com>

Introduce account_cfs_rq_quota() to account bandwidth usage on the cfs_rq
level versus task_groups for which bandwidth has been assigned.  This is
tracked by whether the local cfs_rq->quota_assigned is finite or infinite
(RUNTIME_INF).

For cfs_rq's that belong to a bandwidth constrained task_group we introduce
tg_request_cfs_quota() which attempts to allocate quota from the global pool
for use locally.  Updates involving the global pool are currently protected
under cfs_bandwidth->lock, local pools are protected by rq->lock.

This patch only attempts to assign and track quota, no action is taken in the
case that cfs_rq->quota_used exceeds cfs_rq->quota_assigned.

Signed-off-by: Paul Turner <pjt@google.com>
Signed-off-by: Nikhil Rao <ncrao@google.com>
---
 include/linux/sched.h |    4 ++++
 kernel/sched.c        |   13 +++++++++++++
 kernel/sched_fair.c   |   50 +++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sysctl.c       |   10 ++++++++++
 4 files changed, 77 insertions(+), 0 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index dad7f66..8603645 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1903,6 +1903,10 @@ int sched_rt_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp,
 		loff_t *ppos);
 
+#ifdef CONFIG_CFS_BANDWIDTH
+extern unsigned int sysctl_sched_cfs_bandwidth_slice;
+#endif
+
 extern unsigned int sysctl_sched_compat_yield;
 
 #ifdef CONFIG_RT_MUTEXES
diff --git a/kernel/sched.c b/kernel/sched.c
index 96db602..3b53695 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1912,6 +1912,19 @@ static const struct sched_class rt_sched_class;
  * default: 0.5s
  */
 static u64 sched_cfs_bandwidth_period = 500000000ULL;
+
+/*
+ * default slice of quota to allocate from global tg to local cfs_rq pool on
+ * each refresh
+ * default: 10ms
+ */
+unsigned int sysctl_sched_cfs_bandwidth_slice = 10000UL;
+
+static inline u64 sched_cfs_bandwidth_slice(void)
+{
+	return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC;
+}
+
 #endif
 
 #define sched_class_highest (&rt_sched_class)
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a61bc24..1db1991 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -267,6 +267,16 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse)
 
 #endif	/* CONFIG_FAIR_GROUP_SCHED */
 
+#ifdef CONFIG_CFS_BANDWIDTH
+static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
+{
+	return &tg->cfs_bandwidth;
+}
+
+static void account_cfs_rq_quota(struct cfs_rq *cfs_rq,
+		unsigned long delta_exec);
+#endif
+
 
 /**************************************************************
  * Scheduling class tree data structure manipulation methods:
@@ -546,6 +556,9 @@ static void update_curr(struct cfs_rq *cfs_rq)
 		cpuacct_charge(curtask, delta_exec);
 		account_group_exec_runtime(curtask, delta_exec);
 	}
+#ifdef CONFIG_CFS_BANDWIDTH
+	account_cfs_rq_quota(cfs_rq, delta_exec);
+#endif
 }
 
 static inline void
@@ -1148,6 +1161,43 @@ static void yield_task_fair(struct rq *rq)
 }
 
 #ifdef CONFIG_CFS_BANDWIDTH
+static u64 tg_request_cfs_quota(struct task_group *tg)
+{
+	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
+	u64 delta = 0;
+
+	if (cfs_b->runtime > 0 || cfs_b->quota == RUNTIME_INF) {
+		raw_spin_lock(&cfs_b->lock);
+		/*
+		 * it's possible a bandwidth update has changed the global
+		 * pool.
+		 */
+		if (cfs_b->quota == RUNTIME_INF)
+			delta = sched_cfs_bandwidth_slice();
+		else {
+			delta = min(cfs_b->runtime, 
+					sched_cfs_bandwidth_slice());
+			cfs_b->runtime -= delta;
+		}
+		raw_spin_unlock(&cfs_b->lock);
+	}
+	return delta;
+}
+
+static void account_cfs_rq_quota(struct cfs_rq *cfs_rq,
+		unsigned long delta_exec)
+{
+	if (cfs_rq->quota_assigned == RUNTIME_INF)
+		return;
+
+	cfs_rq->quota_used += delta_exec;
+
+	if (cfs_rq->quota_used < cfs_rq->quota_assigned)
+		return;
+
+	cfs_rq->quota_assigned += tg_request_cfs_quota(cfs_rq->tg);
+}
+
 static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
 {
 	return 1;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8686b0f..d0e17ca 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -354,6 +354,16 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+#ifdef CONFIG_CFS_BANDWIDTH
+	{
+		.procname	= "sched_cfs_bandwidth_slice_us",
+		.data		= &sysctl_sched_cfs_bandwidth_slice,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one,
+	},
+#endif
 #ifdef CONFIG_PROVE_LOCKING
 	{
 		.procname	= "prove_locking",

next prev parent reply	other threads:[~2010-04-28 11:18 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-04-28 11:16 [PATCH v2 0/6] CFS Bandwidth Control Paul Turner
2010-04-28 11:16 ` [PATCH v2 1/6] sched: introduce primitives to account for CFS bandwidth tracking Paul Turner
2010-04-28 11:16 ` Paul Turner [this message]
2010-04-28 11:17 ` [PATCH v2 3/6] sched: throttle cfs_rq entities which exceed their local quota Paul Turner
2010-04-28 11:17 ` [PATCH v2 4/6] sched: unthrottle cfs_rq(s) who ran out of quota at period refresh Paul Turner
2010-04-28 11:17 ` [PATCH v2 5/6] sched: add exports tracking cfs bandwidth control statistics Paul Turner
2010-04-28 11:17 ` [PATCH v2 6/6] sched: hierarchical task accounting for FAIR_GROUP_SCHED Paul Turner

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:dad7f66 dfblob:8603645 dfblob:96db602 dfblob:3b53695
dfblob:a61bc24 dfblob:1db1991 dfblob:8686b0f dfblob:d0e17ca )
 OR (
bs:"[PATCH v2 2/6] sched: accumulate per-cfs_rq cpu usage" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100428111656.7954.19756.stgit@kitami.corp.google.com \
    --to=pjt@google.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=avi@redhat.com \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=bharata@linux.vnet.ibm.com \
    --cc=cfriesen@nortel.com \
    --cc=dhaval@linux.vnet.ibm.com \
    --cc=ego@in.ibm.com \
    --cc=herbert@13thfloor.at \
    --cc=kamalesh@linux.vnet.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=menage@google.com \
    --cc=mikew@google.com \
    --cc=mingo@elte.hu \
    --cc=ncrao@google.com \
    --cc=svaidy@linux.vnet.ibm.com \
    --cc=vatsa@in.ibm.com \
    --cc=xemul@openvz.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.