All of lore.kernel.org
 help / color / mirror / Atom feed
From: Glauber Costa <glommer@parallels.com>
To: linux-kernel@vger.kernel.org
Cc: paul@paulmenage.org, lizf@cn.fujitsu.com, daniel.lezcano@free.fr,
	a.p.zijlstra@chello.nl, jbottomley@parallels.com,
	Glauber Costa <glommer@parallels.com>
Subject: [RFD 3/9] Display /proc/stat information per cgroup
Date: Fri, 23 Sep 2011 19:20:26 -0300	[thread overview]
Message-ID: <1316816432-9237-4-git-send-email-glommer@parallels.com> (raw)
In-Reply-To: <1316816432-9237-1-git-send-email-glommer@parallels.com>

Each cgroup has its own file, cpu.proc.stat that will
display the exact same format as /proc/stat. Users
that want to have access to a per-cgroup version of
this information, can query it for that purpose.

Signed-off-by: Glauber Costa <glommer@parallels.com>
---
 include/linux/kernel_stat.h |    2 +
 kernel/sched.c              |   91 +++++++++++++++++++++++++++++++++---------
 2 files changed, 73 insertions(+), 20 deletions(-)

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 93f64f3..e82c937 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -27,6 +27,8 @@ enum cpu_usage_stat {
 	STEAL,
 	GUEST,
 	GUEST_NICE,
+	IDLE_BASE,
+	IOWAIT_BASE,
 	NR_STATS,
 };
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 4d831fe..a272257 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3756,6 +3756,20 @@ unsigned long long thread_group_sched_runtime(struct task_struct *p)
 	return ns;
 }
 
+static inline void task_cgroup_account_field(struct task_struct *p,
+					     cputime64_t tmp, int index)
+{
+	struct kernel_stat *kstat;
+	struct task_group *tg = task_group(p);
+
+	do {
+		kstat = this_cpu_ptr(tg->cpustat);
+		kstat->cpustat[index] = cputime64_add(kstat->cpustat[index],
+						      tmp);
+		tg = tg->parent;
+	} while (tg);
+}
+
 /*
  * Account user cpu time to a process.
  * @p: the process that the cpu time gets accounted to
@@ -3763,9 +3777,8 @@ unsigned long long thread_group_sched_runtime(struct task_struct *p)
  * @cputime_scaled: cputime scaled by cpu frequency
  */
 void account_user_time(struct task_struct *p, cputime_t cputime,
-		       cputime_t cputime_scaled)
+		cputime_t cputime_scaled)
 {
-	cputime64_t *cpustat = kstat_this_cpu->cpustat;
 	cputime64_t tmp;
 
 	/* Add user time to process. */
@@ -3775,10 +3788,11 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
 
 	/* Add user time to cpustat. */
 	tmp = cputime_to_cputime64(cputime);
+
 	if (TASK_NICE(p) > 0)
-		cpustat[NICE] = cputime64_add(cpustat[NICE], tmp);
+		task_cgroup_account_field(p, tmp, NICE);
 	else
-		cpustat[USER] = cputime64_add(cpustat[USER], tmp);
+		task_cgroup_account_field(p, tmp, USER);
 
 	cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime);
 	/* Account for user time used */
@@ -3824,7 +3838,7 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime,
  */
 static inline
 void __account_system_time(struct task_struct *p, cputime_t cputime,
-			cputime_t cputime_scaled, cputime64_t *target_cputime64)
+			cputime_t cputime_scaled, int index)
 {
 	cputime64_t tmp = cputime_to_cputime64(cputime);
 
@@ -3834,7 +3848,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime,
 	account_group_system_time(p, cputime);
 
 	/* Add system time to cpustat. */
-	*target_cputime64 = cputime64_add(*target_cputime64, tmp);
+	task_cgroup_account_field(p, tmp, index);
 	cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
 
 	/* Account for system time used */
@@ -3851,8 +3865,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime,
 void account_system_time(struct task_struct *p, int hardirq_offset,
 			 cputime_t cputime, cputime_t cputime_scaled)
 {
-	cputime64_t *cpustat = kstat_this_cpu->cpustat;
-	cputime64_t *target_cputime64;
+	int index;
 
 	if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
 		account_guest_time(p, cputime, cputime_scaled);
@@ -3860,13 +3873,13 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
 	}
 
 	if (hardirq_count() - hardirq_offset)
-		target_cputime64 = &cpustat[IRQ];
+		index = IRQ;
 	else if (in_serving_softirq())
-		target_cputime64 = &cpustat[SOFTIRQ];
+		index = SOFTIRQ;
 	else
-		target_cputime64 = &cpustat[SYSTEM];
+		index = SYSTEM;
 
-	__account_system_time(p, cputime, cputime_scaled, target_cputime64);
+	__account_system_time(p, cputime, cputime_scaled, index);
 }
 
 /*
@@ -3941,27 +3954,29 @@ static __always_inline bool steal_account_process_tick(void)
  * softirq as those do not count in task exec_runtime any more.
  */
 static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
-						struct rq *rq)
+					 struct rq *rq)
 {
 	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
 	cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy);
-	cputime64_t *cpustat = kstat_this_cpu->cpustat;
+	struct task_group *tg;
 
 	if (steal_account_process_tick())
 		return;
 
+	tg = task_group(p);
+
 	if (irqtime_account_hi_update()) {
-		cpustat[IRQ] = cputime64_add(cpustat[IRQ], tmp);
+		task_cgroup_account_field(p, tmp, IRQ);
 	} else if (irqtime_account_si_update()) {
-		cpustat[SOFTIRQ] = cputime64_add(cpustat[SOFTIRQ], tmp);
+		task_cgroup_account_field(p, tmp, SOFTIRQ);
 	} else if (this_cpu_ksoftirqd() == p) {
 		/*
 		 * ksoftirqd time do not get accounted in cpu_softirq_time.
 		 * So, we have to handle it separately here.
 		 * Also, p->stime needs to be updated for ksoftirqd.
 		 */
-		__account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
-					&cpustat[SOFTIRQ]);
+		__account_system_time(p, cputime_one_jiffy,
+				      one_jiffy_scaled, SOFTIRQ);
 	} else if (user_tick) {
 		account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
 	} else if (p == rq->idle) {
@@ -3969,8 +3984,8 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 	} else if (p->flags & PF_VCPU) { /* System time or guest time */
 		account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled);
 	} else {
-		__account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
-					&cpustat[SYSTEM]);
+		__account_system_time(p, cputime_one_jiffy,
+				      one_jiffy_scaled, SYSTEM);
 	}
 }
 
@@ -8038,6 +8053,7 @@ void __init sched_init(void)
 {
 	int i, j;
 	unsigned long alloc_size = 0, ptr;
+	struct kernel_stat *kstat;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	alloc_size += 2 * nr_cpu_ids * sizeof(void **);
@@ -8092,6 +8108,18 @@ void __init sched_init(void)
 	INIT_LIST_HEAD(&root_task_group.children);
 	INIT_LIST_HEAD(&root_task_group.siblings);
 	autogroup_init(&init_task);
+
+	root_task_group.cpustat = alloc_percpu(struct kernel_stat);
+	/* Failing that early an allocation means we're screwed anyway */
+	BUG_ON(!root_task_group.cpustat);
+	for_each_possible_cpu(i) {
+		kstat = per_cpu_ptr(root_task_group.cpustat, i);
+		kstat->cpustat[IDLE] = 0;
+		kstat->cpustat[IDLE_BASE] = 0;
+		kstat->cpustat[IOWAIT_BASE] = 0;
+		kstat->cpustat[IOWAIT] = 0;
+	}
+
 #endif /* CONFIG_CGROUP_SCHED */
 
 	for_each_possible_cpu(i) {
@@ -8526,6 +8554,7 @@ static void free_sched_group(struct task_group *tg)
 	free_fair_sched_group(tg);
 	free_rt_sched_group(tg);
 	autogroup_free(tg);
+	free_percpu(tg->cpustat);
 	kfree(tg);
 }
 
@@ -8534,6 +8563,7 @@ struct task_group *sched_create_group(struct task_group *parent)
 {
 	struct task_group *tg;
 	unsigned long flags;
+	int i;
 
 	tg = kzalloc(sizeof(*tg), GFP_KERNEL);
 	if (!tg)
@@ -8545,6 +8575,19 @@ struct task_group *sched_create_group(struct task_group *parent)
 	if (!alloc_rt_sched_group(tg, parent))
 		goto err;
 
+	tg->cpustat = alloc_percpu(struct kernel_stat);
+	if (!tg->cpustat)
+		goto err;
+
+	for_each_possible_cpu(i) {
+		struct kernel_stat *kstat, *root_kstat;
+
+		kstat = per_cpu_ptr(tg->cpustat, i);
+		root_kstat = per_cpu_ptr(root_task_group.cpustat, i);
+		kstat->cpustat[IDLE_BASE]  = root_kstat->cpustat[IDLE];
+		kstat->cpustat[IOWAIT_BASE] = root_kstat->cpustat[IOWAIT];
+	}
+
 	spin_lock_irqsave(&task_group_lock, flags);
 	list_add_rcu(&tg->list, &task_groups);
 
@@ -9092,7 +9135,9 @@ int cpu_cgroup_proc_stat(struct cgroup *cgrp, struct cftype *cft, struct seq_fil
 		system = cputime64_add(system, kstat->cpustat[SYSTEM]);
 		idle = cputime64_add(idle, root_kstat->cpustat[IDLE]);
 		idle = cputime64_add(idle, arch_idle_time(i));
+		idle = cputime64_sub(idle, kstat->cpustat[IDLE_BASE]);
 		iowait = cputime64_add(iowait, root_kstat->cpustat[IOWAIT]);
+		iowait = cputime64_sub(iowait, kstat->cpustat[IOWAIT_BASE]);
 		irq = cputime64_add(irq, kstat->cpustat[IRQ]);
 		softirq = cputime64_add(softirq, kstat->cpustat[SOFTIRQ]);
 		steal = cputime64_add(steal, kstat->cpustat[STEAL]);
@@ -9134,7 +9179,9 @@ int cpu_cgroup_proc_stat(struct cgroup *cgrp, struct cftype *cft, struct seq_fil
 		system = kstat->cpustat[SYSTEM];
 		idle = root_kstat->cpustat[IDLE];
 		idle = cputime64_add(idle, arch_idle_time(i));
+		idle = cputime64_sub(idle, kstat->cpustat[IDLE_BASE]);
 		iowait = root_kstat->cpustat[IOWAIT];
+		iowait = cputime64_sub(iowait, kstat->cpustat[IOWAIT_BASE]);
 		irq = kstat->cpustat[IRQ];
 		softirq = kstat->cpustat[SOFTIRQ];
 		steal = kstat->cpustat[STEAL];
@@ -9205,6 +9252,10 @@ static struct cftype cpu_files[] = {
 		.write_u64 = cpu_rt_period_write_uint,
 	},
 #endif
+	{
+		.name = "proc.stat",
+		.read_seq_string = cpu_cgroup_proc_stat,
+	},
 };
 
 static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
-- 
1.7.6


  parent reply	other threads:[~2011-09-23 22:23 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-09-23 22:20 [RFD 0/9] per-cgroup /proc/stat statistics Glauber Costa
2011-09-23 22:20 ` [RFD 1/9] Change cpustat fields to an array Glauber Costa
2011-09-27 21:00   ` Peter Zijlstra
2011-09-28 15:13     ` Glauber Costa
2011-09-28 15:23       ` Peter Zijlstra
2011-09-28 18:19     ` Glauber Costa
2011-09-28 19:09       ` Peter Zijlstra
2011-09-28 20:04         ` Glauber Costa
2011-10-01 17:47         ` Glauber Costa
2011-09-27 21:03   ` Peter Zijlstra
2011-09-28 15:14     ` Glauber Costa
2011-09-23 22:20 ` [RFD 2/9] Move /proc/stat logic inside sched.c Glauber Costa
2011-09-23 22:20 ` Glauber Costa [this message]
2011-09-27 17:01   ` [RFD 3/9] Display /proc/stat information per cgroup Balbir Singh
2011-09-27 18:42     ` Glauber Costa
2011-09-27 22:21       ` Peter Zijlstra
2011-09-28 15:22         ` Glauber Costa
2011-09-28 15:23         ` Glauber Costa
2011-09-27 21:48   ` Peter Zijlstra
2011-09-28 15:14     ` Glauber Costa
2011-09-27 21:52   ` Peter Zijlstra
2011-09-28 15:15     ` Glauber Costa
2011-09-23 22:20 ` [RFD 4/9] Make total_forks per-cgroup Glauber Costa
2011-09-27 22:00   ` Peter Zijlstra
2011-09-28  8:13     ` Martin Schwidefsky
2011-09-28 10:35       ` Peter Zijlstra
2011-09-28 12:42         ` Martin Schwidefsky
2011-09-28 12:53           ` Peter Zijlstra
2011-09-28 15:29             ` Glauber Costa
2011-09-28 15:33               ` Peter Zijlstra
2011-09-28 15:35                 ` Glauber Costa
2011-09-28 15:37                   ` Peter Zijlstra
2011-09-28 15:39                     ` Glauber Costa
2011-09-28 15:28           ` Glauber Costa
2011-09-28 15:27         ` Glauber Costa
2011-09-28 15:26       ` Glauber Costa
2011-09-23 22:20 ` [RFD 5/9] per-cgroup boot time Glauber Costa
2011-09-23 22:20 ` [RFD 6/9] Report steal time for cgroup Glauber Costa
2011-09-23 22:20 ` [RFD 7/9] provide a version of cpuacct statistics inside cpu cgroup Glauber Costa
2011-09-23 22:20 ` [RFD 8/9] provide a version of cpuusage " Glauber Costa
2011-09-23 22:20 ` [RFD 9/9] Change CPUACCT to default n Glauber Costa
2011-09-27 22:11 ` [RFD 0/9] per-cgroup /proc/stat statistics Peter Zijlstra
2011-09-28 15:21   ` Glauber Costa
2011-09-28 15:27     ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1316816432-9237-4-git-send-email-glommer@parallels.com \
    --to=glommer@parallels.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=daniel.lezcano@free.fr \
    --cc=jbottomley@parallels.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lizf@cn.fujitsu.com \
    --cc=paul@paulmenage.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.