All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH -tip] cpuacct: per-cgroup utime/stime statistics - v5
@ 2009-03-24 12:57 Bharata B Rao
  2009-03-25  0:48 ` Li Zefan
  0 siblings, 1 reply; 3+ messages in thread
From: Bharata B Rao @ 2009-03-24 12:57 UTC (permalink / raw)
  To: linux-kernel
  Cc: Balaji Rao, Dhaval Giani, Balbir Singh, Li Zefan, Paul Menage,
	Andrew Morton, Ingo Molnar, Peter Zijlstra, KAMEZAWA Hiroyuki

Hi,

Here is the next and hopefully final version of the cpuacct
statistics patch to obtain per-cgroup system and user times.

Thanks for reviewing and tolerating so many iterations :)

Regards,
Bharata.

cpuacct: Add stime and utime statistics

Add per-cgroup cpuacct controller statistics like the system and user
time consumed by the group of tasks.

Changelog:

v5
- In cpuacct_stats_show(), use cputime64_to_clock_t() since we are
  operating on a 64bit variable here.

v4
- Remove comments in cpuacct_update_stats() which explained why rcu_read_lock()
  was needed (as per Peter Zijlstra's review comments).
- Don't say that percpu_counter_read() is broken in Documentation/cpuacct.txt
  as per KAMEZAWA Hiroyuki's review comments.

v3
- Fix a small race in the cpuacct hierarchy walk.

v2
- stime and utime now exported in clock_t units instead of msecs.
- Addressed the code review comments from Balbir and Li Zefan.
- Moved to -tip tree.

v1
- Moved the stime/utime accounting to cpuacct controller.

Earlier versions
- http://lkml.org/lkml/2009/2/25/129

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Balaji Rao <balajirrao@gmail.com>
Cc: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Tested-by: Balbir Singh <balbir@linux.vnet.ibm.com>
---
 Documentation/cgroups/cpuacct.txt |   18 +++++++
 kernel/sched.c                    |   88 +++++++++++++++++++++++++++++++++++---
 2 files changed, 100 insertions(+), 6 deletions(-)

--- a/Documentation/cgroups/cpuacct.txt
+++ b/Documentation/cgroups/cpuacct.txt
@@ -30,3 +30,21 @@ The above steps create a new group g1 an
 process (bash) into it. CPU time consumed by this bash and its children
 can be obtained from g1/cpuacct.usage and the same is accumulated in
 /cgroups/cpuacct.usage also.
+
+cpuacct.stat file lists a few statistics which further divide the
+CPU time obtained by the cgroup into user and system times. Currently
+the following statistics are supported:
+
+utime: Time spent by tasks of the cgroup in user mode.
+stime: Time spent by tasks of the cgroup in kernel mode.
+
+utime and stime are in USER_HZ unit.
+
+cpuacct controller uses percpu_counter interface to collect utime and
+stime. This causes two side effects:
+
+- It is theoretically possible to see wrong values for stime and utime.
+  This is because percpu_counter_read() on 32bit systems isn't safe
+  against concurrent writes.
+- It is possible to see slightly outdated values for stime and utime
+  due to the batch processing nature of percpu_counter.
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1442,10 +1442,22 @@ iter_move_one_task(struct rq *this_rq, i
 		   struct rq_iterator *iterator);
 #endif
 
+/* Time spent by the tasks of the cpu accounting group executing in ... */
+enum cpuacct_stat_index {
+	CPUACCT_STAT_UTIME,	/* ... user mode */
+	CPUACCT_STAT_STIME,	/* ... kernel mode */
+
+	CPUACCT_STAT_NSTATS,
+};
+
 #ifdef CONFIG_CGROUP_CPUACCT
 static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
+static void cpuacct_update_stats(struct task_struct *tsk,
+		enum cpuacct_stat_index idx, cputime_t val);
 #else
 static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
+static inline void cpuacct_update_stats(struct task_struct *tsk,
+		enum cpuacct_stat_index idx, cputime_t val) {}
 #endif
 
 static inline void inc_cpu_load(struct rq *rq, unsigned long load)
@@ -4375,6 +4387,8 @@ void account_user_time(struct task_struc
 		cpustat->nice = cputime64_add(cpustat->nice, tmp);
 	else
 		cpustat->user = cputime64_add(cpustat->user, tmp);
+
+	cpuacct_update_stats(p, CPUACCT_STAT_UTIME, cputime);
 	/* Account for user time used */
 	acct_update_integrals(p);
 }
@@ -4436,6 +4450,8 @@ void account_system_time(struct task_str
 	else
 		cpustat->system = cputime64_add(cpustat->system, tmp);
 
+	cpuacct_update_stats(p, CPUACCT_STAT_STIME, cputime);
+
 	/* Account for system time used */
 	acct_update_integrals(p);
 }
@@ -9724,6 +9740,7 @@ struct cpuacct {
 	struct cgroup_subsys_state css;
 	/* cpuusage holds pointer to a u64-type object on every cpu */
 	u64 *cpuusage;
+	struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
 	struct cpuacct *parent;
 };
 
@@ -9748,20 +9765,33 @@ static struct cgroup_subsys_state *cpuac
 	struct cgroup_subsys *ss, struct cgroup *cgrp)
 {
 	struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+	int i;
 
 	if (!ca)
-		return ERR_PTR(-ENOMEM);
+		goto out;
 
 	ca->cpuusage = alloc_percpu(u64);
-	if (!ca->cpuusage) {
-		kfree(ca);
-		return ERR_PTR(-ENOMEM);
-	}
+	if (!ca->cpuusage)
+		goto out_free_ca;
+
+	for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
+		if (percpu_counter_init(&ca->cpustat[i], 0))
+			goto out_free_counters;
 
 	if (cgrp->parent)
 		ca->parent = cgroup_ca(cgrp->parent);
 
 	return &ca->css;
+
+out_free_counters:
+	i--;
+	while (i-- >= 0)
+		percpu_counter_destroy(&ca->cpustat[i]);
+	free_percpu(ca->cpuusage);
+out_free_ca:
+	kfree(ca);
+out:
+	return ERR_PTR(-ENOMEM);
 }
 
 /* destroy an existing cpu accounting group */
@@ -9769,7 +9799,10 @@ static void
 cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
 {
 	struct cpuacct *ca = cgroup_ca(cgrp);
+	int i;
 
+	for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
+		percpu_counter_destroy(&ca->cpustat[i]);
 	free_percpu(ca->cpuusage);
 	kfree(ca);
 }
@@ -9856,6 +9889,25 @@ static int cpuacct_percpu_seq_read(struc
 	return 0;
 }
 
+static const char *cpuacct_stat_desc[] = {
+	[CPUACCT_STAT_UTIME] = "utime",
+	[CPUACCT_STAT_STIME] = "stime",
+};
+
+static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
+		struct cgroup_map_cb *cb)
+{
+	struct cpuacct *ca = cgroup_ca(cgrp);
+	int i;
+
+	for (i = 0; i < CPUACCT_STAT_NSTATS; i++) {
+		s64 val = percpu_counter_read(&ca->cpustat[i]);
+		val = cputime64_to_clock_t(val);
+		cb->fill(cb, cpuacct_stat_desc[i], val);
+	}
+	return 0;
+}
+
 static struct cftype files[] = {
 	{
 		.name = "usage",
@@ -9866,7 +9918,10 @@ static struct cftype files[] = {
 		.name = "usage_percpu",
 		.read_seq_string = cpuacct_percpu_seq_read,
 	},
-
+	{
+		.name = "stat",
+		.read_map = cpuacct_stats_show,
+	},
 };
 
 static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
@@ -9900,6 +9955,27 @@ static void cpuacct_charge(struct task_s
 	rcu_read_unlock();
 }
 
+/*
+ * Charge the system/user time to the task's accounting group.
+ */
+static void cpuacct_update_stats(struct task_struct *tsk,
+		enum cpuacct_stat_index idx, cputime_t val)
+{
+	struct cpuacct *ca;
+
+	if (unlikely(!cpuacct_subsys.active))
+		return;
+
+	rcu_read_lock();
+	ca = task_ca(tsk);
+
+	do {
+		percpu_counter_add(&ca->cpustat[idx], val);
+		ca = ca->parent;
+	} while (ca);
+	rcu_read_unlock();
+}
+
 struct cgroup_subsys cpuacct_subsys = {
 	.name = "cpuacct",
 	.create = cpuacct_create,

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH -tip] cpuacct: per-cgroup utime/stime statistics - v5
  2009-03-24 12:57 [PATCH -tip] cpuacct: per-cgroup utime/stime statistics - v5 Bharata B Rao
@ 2009-03-25  0:48 ` Li Zefan
  2009-03-25  4:40   ` Bharata B Rao
  0 siblings, 1 reply; 3+ messages in thread
From: Li Zefan @ 2009-03-25  0:48 UTC (permalink / raw)
  To: bharata
  Cc: linux-kernel, Balaji Rao, Dhaval Giani, Balbir Singh, Paul Menage,
	Andrew Morton, Ingo Molnar, Peter Zijlstra, KAMEZAWA Hiroyuki

> cpuacct: Add stime and utime statistics
> 
> Add per-cgroup cpuacct controller statistics like the system and user
> time consumed by the group of tasks.

Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>

except a bug below.

> +	for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
> +		if (percpu_counter_init(&ca->cpustat[i], 0))
> +			goto out_free_counters;
>  
>  	if (cgrp->parent)
>  		ca->parent = cgroup_ca(cgrp->parent);
>  
>  	return &ca->css;
> +
> +out_free_counters:
> +	i--;
> +	while (i-- >= 0)
> +		percpu_counter_destroy(&ca->cpustat[i]);

replace "i--; while (i-- >=0)" to "while (--i >=0)", otherwise
you'll be accessing ca->cpustat[-1].

> +	free_percpu(ca->cpuusage);
> +out_free_ca:
> +	kfree(ca);
> +out:
> +	return ERR_PTR(-ENOMEM);
>  }

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH -tip] cpuacct: per-cgroup utime/stime statistics - v5
  2009-03-25  0:48 ` Li Zefan
@ 2009-03-25  4:40   ` Bharata B Rao
  0 siblings, 0 replies; 3+ messages in thread
From: Bharata B Rao @ 2009-03-25  4:40 UTC (permalink / raw)
  To: Li Zefan
  Cc: linux-kernel, Balaji Rao, Dhaval Giani, Balbir Singh, Paul Menage,
	Andrew Morton, Ingo Molnar, Peter Zijlstra, KAMEZAWA Hiroyuki

On Wed, Mar 25, 2009 at 08:48:03AM +0800, Li Zefan wrote:
> > cpuacct: Add stime and utime statistics
> > 
> > Add per-cgroup cpuacct controller statistics like the system and user
> > time consumed by the group of tasks.
> 
> Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>

Thanks.

> 
> except a bug below.
> 
> > +	for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
> > +		if (percpu_counter_init(&ca->cpustat[i], 0))
> > +			goto out_free_counters;
> >  
> >  	if (cgrp->parent)
> >  		ca->parent = cgroup_ca(cgrp->parent);
> >  
> >  	return &ca->css;
> > +
> > +out_free_counters:
> > +	i--;
> > +	while (i-- >= 0)
> > +		percpu_counter_destroy(&ca->cpustat[i]);
> 
> replace "i--; while (i-- >=0)" to "while (--i >=0)", otherwise
> you'll be accessing ca->cpustat[-1].

Sorry about this. Posted the fixed version.

Regards,
Bharata.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2009-03-25  4:40 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-03-24 12:57 [PATCH -tip] cpuacct: per-cgroup utime/stime statistics - v5 Bharata B Rao
2009-03-25  0:48 ` Li Zefan
2009-03-25  4:40   ` Bharata B Rao

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.