All of lore.kernel.org
 help / color / mirror / Atom feed
From: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: dhaval@linux.vnet.ibm.com, containers@lists.linux-foundation.org,
	linux-kernel@vger.kernel.org, skumar@linux.vnet.ibm.com,
	menage@google.com, torvalds@linux-foundation.org, mingo@elte.hu,
	balbir@linux.vnet.ibm.com
Subject: Re: [PATCH] sched: cpu accounting controller
Date: Fri, 30 Nov 2007 01:48:33 +0530	[thread overview]
Message-ID: <20071129201833.GA18023@linux.vnet.ibm.com> (raw)
In-Reply-To: <20071129113035.bbdf35db.akpm@linux-foundation.org>

On Thu, Nov 29, 2007 at 11:30:35AM -0800, Andrew Morton wrote:
> > 	- Make the accounting scalable on SMP systems (perhaps
> >  	  for 2.6.25)
> 
> That sounds like a rather important todo.  How bad is it now?

It is indeed an important todo. Right now we take a per-group global
lock on every accounting update (which can be very frequent) and hence
it is pretty bad.

Ingo had expressed the need to reintroduce this patch asap and hence I resent 
it w/o attacking the scalability part. I will take a shot at scalability
enhancements tomorrow and send it as a separate patch.

> > +struct cpuacct {
> > +     struct cgroup_subsys_state css;
> > +     spinlock_t lock;
> > +     /* total time used by this class (in nanoseconds) */
> > +     u64 time;
> > +};
> > +
> > +struct cgroup_subsys cpuacct_subsys;
> 
> This can be made static.

This symbol is needed in kernel/cgroup.c file, where it does this:

static struct cgroup_subsys *subsys[] = {
#include <linux/cgroup_subsys.h>
};

and hence it cant be static. Thanks for the rest of your comments. I
have fixed them in this version below:

Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>

---
 include/linux/cgroup_subsys.h |    6 ++
 include/linux/cpu_acct.h      |   14 +++++
 init/Kconfig                  |    7 ++
 kernel/Makefile               |    1 
 kernel/cpu_acct.c             |  101 ++++++++++++++++++++++++++++++++++++++++++
 kernel/sched.c                |   27 -----------
 kernel/sched_fair.c           |    5 ++
 kernel/sched_rt.c             |    1 
 8 files changed, 136 insertions(+), 26 deletions(-)

Index: current/include/linux/cgroup_subsys.h
===================================================================
--- current.orig/include/linux/cgroup_subsys.h
+++ current/include/linux/cgroup_subsys.h
@@ -11,6 +11,12 @@
 SUBSYS(cpuset)
 #endif
 
+#ifdef CONFIG_CGROUP_CPUACCT
+SUBSYS(cpuacct)
+#endif
+
+/* */
+
 /* */
 
 #ifdef CONFIG_CGROUP_DEBUG
Index: current/include/linux/cpu_acct.h
===================================================================
--- /dev/null
+++ current/include/linux/cpu_acct.h
@@ -0,0 +1,14 @@
+
+#ifndef _LINUX_CPU_ACCT_H
+#define _LINUX_CPU_ACCT_H
+
+#include <linux/cgroup.h>
+#include <asm/cputime.h>
+
+#ifdef CONFIG_CGROUP_CPUACCT
+extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
+#else
+static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
+#endif
+
+#endif
Index: current/init/Kconfig
===================================================================
--- current.orig/init/Kconfig
+++ current/init/Kconfig
@@ -354,6 +354,13 @@ config FAIR_CGROUP_SCHED
 
 endchoice
 
+config CGROUP_CPUACCT
+	bool "Simple CPU accounting cgroup subsystem"
+	depends on CGROUPS
+	help
+	  Provides a simple Resource Controller for monitoring the
+	  total CPU consumed by the tasks in a cgroup
+
 config SYSFS_DEPRECATED
 	bool "Create deprecated sysfs files"
 	default y
Index: current/kernel/Makefile
===================================================================
--- current.orig/kernel/Makefile
+++ current/kernel/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_CGROUPS) += cgroup.o
 obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
+obj-$(CONFIG_CGROUP_CPUACCT) += cpu_acct.o
 obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
 obj-$(CONFIG_IKCONFIG) += configs.o
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
Index: current/kernel/cpu_acct.c
===================================================================
--- /dev/null
+++ current/kernel/cpu_acct.c
@@ -0,0 +1,101 @@
+/*
+ * kernel/cpu_acct.c - CPU accounting cgroup subsystem
+ *
+ * Copyright (C) Google Inc, 2006
+ *
+ * Developed by Paul Menage (menage@google.com) and Balbir Singh
+ * (balbir@in.ibm.com)
+ *
+ */
+
+/*
+ * Example cgroup subsystem for reporting total CPU usage of tasks in a
+ * cgroup.
+ */
+
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/rcupdate.h>
+
+struct cpuacct {
+	struct cgroup_subsys_state css;
+	spinlock_t lock;
+	/* total time used by this class (in nanoseconds) */
+	u64 time;
+};
+
+struct cgroup_subsys cpuacct_subsys;
+
+static inline struct cpuacct *cgroup_ca(struct cgroup *cont)
+{
+	return container_of(cgroup_subsys_state(cont, cpuacct_subsys_id),
+			    struct cpuacct, css);
+}
+
+static inline struct cpuacct *task_ca(struct task_struct *task)
+{
+	return container_of(task_subsys_state(task, cpuacct_subsys_id),
+			    struct cpuacct, css);
+}
+
+static struct cgroup_subsys_state *cpuacct_create(
+	struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+
+	if (!ca)
+		return ERR_PTR(-ENOMEM);
+	spin_lock_init(&ca->lock);
+	return &ca->css;
+}
+
+static void cpuacct_destroy(struct cgroup_subsys *ss,
+			    struct cgroup *cont)
+{
+	kfree(cgroup_ca(cont));
+}
+
+static u64 cpuusage_read(struct cgroup *cont, struct cftype *cft)
+{
+	struct cpuacct *ca = cgroup_ca(cont);
+
+	return ca->time;
+}
+
+static struct cftype files[] = {
+	{
+		.name = "usage",
+		.read_uint = cpuusage_read,
+	},
+};
+
+static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
+}
+
+void cpuacct_charge(struct task_struct *task, u64 cputime)
+{
+	struct cpuacct *ca;
+	unsigned long flags;
+
+	if (!cpuacct_subsys.active)
+		return;
+	rcu_read_lock();
+	ca = task_ca(task);
+	if (ca) {
+		spin_lock_irqsave(&ca->lock, flags);
+		ca->time += cputime;
+		spin_unlock_irqrestore(&ca->lock, flags);
+	}
+	rcu_read_unlock();
+}
+
+struct cgroup_subsys cpuacct_subsys = {
+	.name = "cpuacct",
+	.create = cpuacct_create,
+	.destroy = cpuacct_destroy,
+	.populate = cpuacct_populate,
+	.subsys_id = cpuacct_subsys_id,
+};
Index: current/kernel/sched.c
===================================================================
--- current.orig/kernel/sched.c
+++ current/kernel/sched.c
@@ -52,6 +52,7 @@
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
 #include <linux/percpu.h>
+#include <linux/cpu_acct.h>
 #include <linux/kthread.h>
 #include <linux/seq_file.h>
 #include <linux/sysctl.h>
@@ -7221,38 +7222,12 @@ static u64 cpu_shares_read_uint(struct c
 	return (u64) tg->shares;
 }
 
-static u64 cpu_usage_read(struct cgroup *cgrp, struct cftype *cft)
-{
-	struct task_group *tg = cgroup_tg(cgrp);
-	unsigned long flags;
-	u64 res = 0;
-	int i;
-
-	for_each_possible_cpu(i) {
-		/*
-		 * Lock to prevent races with updating 64-bit counters
-		 * on 32-bit arches.
-		 */
-		spin_lock_irqsave(&cpu_rq(i)->lock, flags);
-		res += tg->se[i]->sum_exec_runtime;
-		spin_unlock_irqrestore(&cpu_rq(i)->lock, flags);
-	}
-	/* Convert from ns to ms */
-	do_div(res, NSEC_PER_MSEC);
-
-	return res;
-}
-
 static struct cftype cpu_files[] = {
 	{
 		.name = "shares",
 		.read_uint = cpu_shares_read_uint,
 		.write_uint = cpu_shares_write_uint,
 	},
-	{
-		.name = "usage",
-		.read_uint = cpu_usage_read,
-	},
 };
 
 static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
Index: current/kernel/sched_fair.c
===================================================================
--- current.orig/kernel/sched_fair.c
+++ current/kernel/sched_fair.c
@@ -351,6 +351,11 @@ static void update_curr(struct cfs_rq *c
 
 	__update_curr(cfs_rq, curr, delta_exec);
 	curr->exec_start = now;
+	if (entity_is_task(curr)) {
+		struct task_struct *curtask = task_of(curr);
+
+		cpuacct_charge(curtask, delta_exec);
+	}
 }
 
 static inline void
Index: current/kernel/sched_rt.c
===================================================================
--- current.orig/kernel/sched_rt.c
+++ current/kernel/sched_rt.c
@@ -23,6 +23,7 @@ static void update_curr_rt(struct rq *rq
 
 	curr->se.sum_exec_runtime += delta_exec;
 	curr->se.exec_start = rq->clock;
+	cpuacct_charge(curr, delta_exec);
 }
 
 static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)

-- 
Regards,
vatsa

  reply	other threads:[~2007-11-29 20:06 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-11-13  5:25 Revert for cgroups CPU accounting subsystem patch Paul Menage
     [not found] ` <6599ad830711122125u576e85a6w428466a0ab46dbc6-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-11-13  6:00   ` Srivatsa Vaddagiri
2007-11-13  6:00 ` Srivatsa Vaddagiri
2007-11-13  6:05   ` Paul Menage
2007-11-13  7:00     ` Balbir Singh
2007-11-13  7:10       ` Paul Menage
     [not found]         ` <6599ad830711122310nf7530cfs5ef1fea061b1252c-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-11-13  7:29           ` Balbir Singh
2007-11-13  7:29         ` Balbir Singh
     [not found]           ` <47395277.1060006-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-11-13  7:34             ` Paul Menage
2007-11-13  7:59             ` Srivatsa Vaddagiri
2007-11-13  7:34           ` Paul Menage
2007-11-13  7:59           ` Srivatsa Vaddagiri
     [not found]             ` <20071113075946.GA14731-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-11-13  7:59               ` Paul Menage
2007-11-13  7:59             ` Paul Menage
     [not found]       ` <47394B84.8030008-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-11-13  7:10         ` Paul Menage
2007-11-13  7:48     ` Srivatsa Vaddagiri
     [not found]       ` <20071113074805.GA13499-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-11-13  7:57         ` Paul Menage
2007-11-13  7:57       ` Paul Menage
2007-11-29 19:17         ` [PATCH] sched: cpu accounting controller Srivatsa Vaddagiri
2007-11-29 19:20           ` Ingo Molnar
     [not found]             ` <20071129192058.GC5238-X9Un+BFzKDI@public.gmane.org>
2007-11-29 19:39               ` Srivatsa Vaddagiri
2007-11-29 19:39                 ` Srivatsa Vaddagiri
     [not found]           ` <20071129191737.GH5681-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-11-29 19:20             ` Ingo Molnar
2007-11-29 19:30             ` Andrew Morton
2007-11-29 19:30               ` Andrew Morton
2007-11-29 20:18               ` Srivatsa Vaddagiri [this message]
     [not found]                 ` <20071129201833.GA18023-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-11-30 12:42                   ` [PATCH] sched: cpu accounting controller (V2) Srivatsa Vaddagiri
2007-11-30 12:42                 ` Srivatsa Vaddagiri
2007-11-30 12:35                   ` Ingo Molnar
     [not found]                     ` <20071130123513.GA16690-X9Un+BFzKDI@public.gmane.org>
2007-11-30 13:09                       ` Srivatsa Vaddagiri
2007-11-30 13:09                     ` Srivatsa Vaddagiri
2007-11-30 13:34                       ` Ingo Molnar
     [not found]                       ` <20071130130903.GA30772-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-11-30 13:34                         ` Ingo Molnar
2007-11-30 12:45                   ` Balbir Singh
     [not found]                     ` <475005F7.7080702-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-11-30 13:53                       ` Ingo Molnar
2007-11-30 13:53                     ` Ingo Molnar
2007-11-30 14:00                       ` Balbir Singh
     [not found]                       ` <20071130135331.GA7145-X9Un+BFzKDI@public.gmane.org>
2007-11-30 14:00                         ` Balbir Singh
2007-11-30 18:45                       ` Balbir Singh
     [not found]                         ` <47505A53.8080906-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-11-30 19:46                           ` Ingo Molnar
2007-11-30 19:46                             ` Ingo Molnar
     [not found]                   ` <20071130124225.GM5681-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-11-30 12:35                     ` Ingo Molnar
2007-11-30 12:45                     ` Balbir Singh
2007-12-01  7:48                     ` Paul Menage
2007-12-01  7:48                   ` Paul Menage
2007-12-01  9:51                     ` Balbir Singh
     [not found]                     ` <6599ad830711302348n49900ca8jfa66fa71fde76a67-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-12-01  9:51                       ` Balbir Singh
     [not found]               ` <20071129113035.bbdf35db.akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
2007-11-29 20:18                 ` [PATCH] sched: cpu accounting controller Srivatsa Vaddagiri
     [not found]         ` <6599ad830711122357i60482475o10c0e0935a9e00c0-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-11-29 19:17           ` Srivatsa Vaddagiri
     [not found]     ` <6599ad830711122205g88aae4fua8dd76cf6e8ab84d-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2007-11-13  7:00       ` Revert for cgroups CPU accounting subsystem patch Balbir Singh
2007-11-13  7:48       ` Srivatsa Vaddagiri
     [not found]   ` <20071113060038.GC3359-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2007-11-13  6:05     ` Paul Menage

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20071129201833.GA18023@linux.vnet.ibm.com \
    --to=vatsa@linux.vnet.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=containers@lists.linux-foundation.org \
    --cc=dhaval@linux.vnet.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=menage@google.com \
    --cc=mingo@elte.hu \
    --cc=skumar@linux.vnet.ibm.com \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.