public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
To: Shailabh Nagar <nagar1234@in.ibm.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Venkatesh Pallipadi <venki@google.com>,
	Suresh Siddha <suresh.b.siddha@intel.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Ingo Molnar <mingo@elte.hu>, Oleg Nesterov <oleg@redhat.com>,
	John stultz <johnstul@us.ibm.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Balbir Singh <balbir@linux.vnet.ibm.com>,
	Martin Schwidefsky <schwidefsky@de.ibm.com>,
	Heiko Carstens <heiko.carstens@de.ibm.com>,
	Roland McGrath <roland@redhat.com>
Cc: linux-kernel@vger.kernel.org, linux-s390@vger.kernel.org
Subject: [RFC][PATCH v2 4/7] taskstats: Add per task steal time accounting
Date: Thu, 11 Nov 2010 18:03:56 +0100	[thread overview]
Message-ID: <20101111170815.024542355@linux.vnet.ibm.com> (raw)
In-Reply-To: 20101111170352.732381138@linux.vnet.ibm.com

[-- Attachment #1: 04-taskstats-top-add-sttime.patch --]
[-- Type: text/plain, Size: 13511 bytes --]

From: Michael Holzheu <holzheu@linux.vnet.ibm.com>

Currently steal time is only accounted for the whole system. With this
patch we add steal time to the per task CPU time accounting.
The triplet "user time", "system time" and "steal time" represents
all consumed CPU time on hypervisor based systems.

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
---
 arch/s390/kernel/vtime.c    |   19 +++++++++++--------
 fs/proc/array.c             |    6 +++---
 include/linux/kernel_stat.h |    2 +-
 include/linux/sched.h       |   14 ++++++++------
 include/linux/taskstats.h   |    1 +
 kernel/exit.c               |    9 +++++++--
 kernel/fork.c               |    1 +
 kernel/posix-cpu-timers.c   |    3 +++
 kernel/sched.c              |   26 ++++++++++++++++++++------
 kernel/sys.c                |   10 +++++-----
 kernel/tsacct.c             |    1 +
 11 files changed, 61 insertions(+), 31 deletions(-)

--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -56,31 +56,34 @@ static void do_account_vtime(struct task
 {
 	struct thread_info *ti = task_thread_info(tsk);
 	__u64 timer, clock, user, system, steal;
+	unsigned char clk[16];
 
 	timer = S390_lowcore.last_update_timer;
 	clock = S390_lowcore.last_update_clock;
 	asm volatile ("  STPT %0\n"    /* Store current cpu timer value */
-		      "  STCK %1"      /* Store current tod clock value */
+		      "  STCKE 0(%2)"  /* Store current tod clock value */
 		      : "=m" (S390_lowcore.last_update_timer),
-		        "=m" (S390_lowcore.last_update_clock) );
+		        "=m" (clk) : "a" (clk));
+	S390_lowcore.last_update_clock = *(__u64 *) &clk[1];
+	tsk->acct_time = ((clock - sched_clock_base_cc) * 125) >> 9;
 	S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
 	S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
 
 	user = S390_lowcore.user_timer - ti->user_timer;
-	S390_lowcore.steal_timer -= user;
 	ti->user_timer = S390_lowcore.user_timer;
 	account_user_time(tsk, user, user);
 
 	system = S390_lowcore.system_timer - ti->system_timer;
-	S390_lowcore.steal_timer -= system;
 	ti->system_timer = S390_lowcore.system_timer;
 	account_system_time(tsk, hardirq_offset, system, system);
 
 	steal = S390_lowcore.steal_timer;
-	if ((s64) steal > 0) {
-		S390_lowcore.steal_timer = 0;
-		account_steal_time(steal);
-	}
+	S390_lowcore.steal_timer = 0;
+	if (steal >= user + system)
+		steal -= user + system;
+	else
+		steal = 0;
+	account_steal_time(tsk, steal);
 }
 
 void account_vtime(struct task_struct *prev, struct task_struct *next)
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -375,7 +375,7 @@ static int do_task_stat(struct seq_file
 	unsigned long long start_time;
 	unsigned long cmin_flt = 0, cmaj_flt = 0;
 	unsigned long  min_flt = 0,  maj_flt = 0;
-	cputime_t cutime, cstime, utime, stime;
+	cputime_t cutime, cstime, utime, stime, sttime;
 	cputime_t cgtime, gtime;
 	unsigned long rsslim = 0;
 	char tcomm[sizeof(task->comm)];
@@ -432,7 +432,7 @@ static int do_task_stat(struct seq_file
 
 			min_flt += sig->min_flt;
 			maj_flt += sig->maj_flt;
-			thread_group_times(task, &utime, &stime);
+			thread_group_times(task, &utime, &stime, &sttime);
 			gtime = cputime_add(gtime, sig->gtime);
 		}
 
@@ -448,7 +448,7 @@ static int do_task_stat(struct seq_file
 	if (!whole) {
 		min_flt = task->min_flt;
 		maj_flt = task->maj_flt;
-		task_times(task, &utime, &stime);
+		task_times(task, &utime, &stime, &sttime);
 		gtime = task->gtime;
 	}
 
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -116,7 +116,7 @@ extern unsigned long long task_delta_exe
 
 extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
 extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
-extern void account_steal_time(cputime_t);
+extern void account_steal_time(struct task_struct *, cputime_t);
 extern void account_idle_time(cputime_t);
 
 extern void account_process_tick(struct task_struct *, int user);
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -470,6 +470,7 @@ struct cpu_itimer {
 struct task_cputime {
 	cputime_t utime;
 	cputime_t stime;
+	cputime_t sttime;
 	unsigned long long sum_exec_runtime;
 };
 /* Alternate field names when used to cache expirations. */
@@ -481,6 +482,7 @@ struct task_cputime {
 	(struct task_cputime) {					\
 		.utime = cputime_zero,				\
 		.stime = cputime_zero,				\
+		.sttime = cputime_zero,				\
 		.sum_exec_runtime = 0,				\
 	}
 
@@ -582,11 +584,11 @@ struct signal_struct {
 	 * Live threads maintain their own counters and add to these
 	 * in __exit_signal, except for the group leader.
 	 */
-	cputime_t utime, stime, cutime, cstime;
+	cputime_t utime, stime, sttime, cutime, cstime, csttime;
 	cputime_t gtime;
 	cputime_t cgtime;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
-	cputime_t prev_utime, prev_stime;
+	cputime_t prev_utime, prev_stime, prev_sttime;
 #endif
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
 	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
@@ -1294,10 +1296,10 @@ struct task_struct {
 	int __user *set_child_tid;		/* CLONE_CHILD_SETTID */
 	int __user *clear_child_tid;		/* CLONE_CHILD_CLEARTID */
 
-	cputime_t utime, stime, utimescaled, stimescaled;
+	cputime_t utime, stime, sttime, utimescaled, stimescaled;
 	cputime_t gtime;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
-	cputime_t prev_utime, prev_stime;
+	cputime_t prev_utime, prev_stime, prev_sttime;
 #endif
 	unsigned long long acct_time;		/* Time for last accounting */
 	unsigned long nvcsw, nivcsw; /* context switch counts */
@@ -1694,8 +1696,8 @@ static inline void put_task_struct(struc
 		__put_task_struct(t);
 }
 
-extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
-extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
+extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st, cputime_t *stt);
+extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st, cputime_t *stt);
 
 /*
  * Per process flags
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -168,6 +168,7 @@ struct taskstats {
 	/* Timestamp where data has been collected in ns since boot time */
 	__u64	time_ns;
 	__u32	ac_tgid;		/* Thread group ID */
+	__u64	ac_sttime;		/* Steal CPU time [usec] */
 };
 
 
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -124,6 +124,7 @@ static void __exit_signal(struct task_st
 		 */
 		sig->utime = cputime_add(sig->utime, tsk->utime);
 		sig->stime = cputime_add(sig->stime, tsk->stime);
+		sig->sttime = cputime_add(sig->sttime, tsk->sttime);
 		sig->gtime = cputime_add(sig->gtime, tsk->gtime);
 		sig->min_flt += tsk->min_flt;
 		sig->maj_flt += tsk->maj_flt;
@@ -1228,7 +1229,7 @@ static int wait_task_zombie(struct wait_
 		struct signal_struct *psig;
 		struct signal_struct *sig;
 		unsigned long maxrss;
-		cputime_t tgutime, tgstime;
+		cputime_t tgutime, tgstime, tgsttime;
 
 		/*
 		 * The resource counters for the group leader are in its
@@ -1249,7 +1250,7 @@ static int wait_task_zombie(struct wait_
 		 * group, which consolidates times for all threads in the
 		 * group including the group leader.
 		 */
-		thread_group_times(p, &tgutime, &tgstime);
+		thread_group_times(p, &tgutime, &tgstime, &tgsttime);
 		spin_lock_irq(&p->real_parent->sighand->siglock);
 		psig = p->real_parent->signal;
 		sig = p->signal;
@@ -1261,6 +1262,10 @@ static int wait_task_zombie(struct wait_
 			cputime_add(psig->cstime,
 			cputime_add(tgstime,
 				    sig->cstime));
+		psig->csttime =
+			cputime_add(psig->csttime,
+			cputime_add(tgsttime,
+				    sig->csttime));
 		psig->cgtime =
 			cputime_add(psig->cgtime,
 			cputime_add(p->gtime,
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1062,6 +1062,7 @@ static struct task_struct *copy_process(
 
 	p->utime = cputime_zero;
 	p->stime = cputime_zero;
+	p->sttime = cputime_zero;
 	p->gtime = cputime_zero;
 	p->utimescaled = cputime_zero;
 	p->stimescaled = cputime_zero;
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -237,6 +237,7 @@ void thread_group_cputime(struct task_st
 
 	times->utime = sig->utime;
 	times->stime = sig->stime;
+	times->sttime = sig->sttime;
 	times->sum_exec_runtime = sig->sum_sched_runtime;
 
 	rcu_read_lock();
@@ -248,6 +249,7 @@ void thread_group_cputime(struct task_st
 	do {
 		times->utime = cputime_add(times->utime, t->utime);
 		times->stime = cputime_add(times->stime, t->stime);
+		times->sttime = cputime_add(times->sttime, t->sttime);
 		times->sum_exec_runtime += t->se.sum_exec_runtime;
 	} while_each_thread(tsk, t);
 out:
@@ -1276,6 +1278,7 @@ static inline int fastpath_timer_check(s
 		struct task_cputime task_sample = {
 			.utime = tsk->utime,
 			.stime = tsk->stime,
+			.sttime = tsk->sttime,
 			.sum_exec_runtime = tsk->se.sum_exec_runtime
 		};
 
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3542,11 +3542,15 @@ void account_system_time(struct task_str
  * Account for involuntary wait time.
  * @steal: the cpu time spent in involuntary wait
  */
-void account_steal_time(cputime_t cputime)
+void account_steal_time(struct task_struct *p, cputime_t cputime)
 {
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
 	cputime64_t cputime64 = cputime_to_cputime64(cputime);
 
+	/* Add steal time to process. */
+	p->sttime = cputime_add(p->sttime, cputime);
+
+	/* Add steal time to cpustat. */
 	cpustat->steal = cputime64_add(cpustat->steal, cputime64);
 }
 
@@ -3594,7 +3598,7 @@ void account_process_tick(struct task_st
  */
 void account_steal_ticks(unsigned long ticks)
 {
-	account_steal_time(jiffies_to_cputime(ticks));
+	account_steal_time(current, jiffies_to_cputime(ticks));
 }
 
 /*
@@ -3612,13 +3616,16 @@ void account_idle_ticks(unsigned long ti
  * Use precise platform statistics if available:
  */
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
-void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st,
+		cputime_t *stt)
 {
 	*ut = p->utime;
 	*st = p->stime;
+	*stt = p->sttime;
 }
 
-void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st,
+			cputime_t *stt)
 {
 	struct task_cputime cputime;
 
@@ -3626,6 +3633,7 @@ void thread_group_times(struct task_stru
 
 	*ut = cputime.utime;
 	*st = cputime.stime;
+	*stt = cputime.sttime;
 }
 #else
 
@@ -3633,7 +3641,8 @@ void thread_group_times(struct task_stru
 # define nsecs_to_cputime(__nsecs)	nsecs_to_jiffies(__nsecs)
 #endif
 
-void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st,
+		cputime_t *stt)
 {
 	cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime);
 
@@ -3656,15 +3665,18 @@ void task_times(struct task_struct *p, c
 	 */
 	p->prev_utime = max(p->prev_utime, utime);
 	p->prev_stime = max(p->prev_stime, cputime_sub(rtime, p->prev_utime));
+	p->prev_sttime = cputime_zero;
 
 	*ut = p->prev_utime;
 	*st = p->prev_stime;
+	*stt = p->prev_sttime;
 }
 
 /*
  * Must be called with siglock held.
  */
-void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st,
+			cputime_t *stt)
 {
 	struct signal_struct *sig = p->signal;
 	struct task_cputime cputime;
@@ -3687,9 +3699,11 @@ void thread_group_times(struct task_stru
 	sig->prev_utime = max(sig->prev_utime, utime);
 	sig->prev_stime = max(sig->prev_stime,
 			      cputime_sub(rtime, sig->prev_utime));
+	sig->prev_sttime = cputime_zero;
 
 	*ut = sig->prev_utime;
 	*st = sig->prev_stime;
+	*stt = sig->prev_sttime;
 }
 #endif
 
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -880,10 +880,10 @@ change_okay:
 
 void do_sys_times(struct tms *tms)
 {
-	cputime_t tgutime, tgstime, cutime, cstime;
+	cputime_t tgutime, tgstime, tgsttime, cutime, cstime;
 
 	spin_lock_irq(&current->sighand->siglock);
-	thread_group_times(current, &tgutime, &tgstime);
+	thread_group_times(current, &tgutime, &tgstime, &tgsttime);
 	cutime = current->signal->cutime;
 	cstime = current->signal->cstime;
 	spin_unlock_irq(&current->sighand->siglock);
@@ -1488,14 +1488,14 @@ static void k_getrusage(struct task_stru
 {
 	struct task_struct *t;
 	unsigned long flags;
-	cputime_t tgutime, tgstime, utime, stime;
+	cputime_t tgutime, tgstime, tgsttime, utime, stime, sttime;
 	unsigned long maxrss = 0;
 
 	memset((char *) r, 0, sizeof *r);
 	utime = stime = cputime_zero;
 
 	if (who == RUSAGE_THREAD) {
-		task_times(current, &utime, &stime);
+		task_times(current, &utime, &stime, &sttime);
 		accumulate_thread_rusage(p, r);
 		maxrss = p->signal->maxrss;
 		goto out;
@@ -1521,7 +1521,7 @@ static void k_getrusage(struct task_stru
 				break;
 
 		case RUSAGE_SELF:
-			thread_group_times(p, &tgutime, &tgstime);
+			thread_group_times(p, &tgutime, &tgstime, &tgsttime);
 			utime = cputime_add(utime, tgutime);
 			stime = cputime_add(stime, tgstime);
 			r->ru_nvcsw += p->signal->nvcsw;
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -66,6 +66,7 @@ void bacct_add_tsk(struct taskstats *sta
 	rcu_read_unlock();
 	stats->ac_utime = cputime_to_usecs(tsk->utime);
 	stats->ac_stime = cputime_to_usecs(tsk->stime);
+	stats->ac_sttime = cputime_to_usecs(tsk->sttime);
 	stats->ac_utimescaled = cputime_to_usecs(tsk->utimescaled);
 	stats->ac_stimescaled = cputime_to_usecs(tsk->stimescaled);
 	stats->ac_minflt = tsk->min_flt;


  parent reply	other threads:[~2010-11-11 17:08 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-11-11 17:03 [RFC][PATCH v2 0/7] taskstats: Enhancements for precise process accounting (version 2) Michael Holzheu
2010-11-11 17:03 ` [RFC][PATCH v2 1/7] taskstats: Add new taskstats command TASKSTATS_CMD_ATTR_PIDS Michael Holzheu
2010-11-13 19:20   ` Peter Zijlstra
2010-11-15 15:53     ` Michael Holzheu
2010-11-15 16:06       ` Peter Zijlstra
2010-11-15 17:09         ` Michael Holzheu
2010-11-15 17:21           ` Peter Zijlstra
2010-11-16 12:16             ` Michael Holzheu
2010-11-16 12:36               ` Peter Zijlstra
2010-11-13 19:39   ` Peter Zijlstra
2010-11-13 20:00     ` Balbir Singh
2010-11-15 14:50     ` Michael Holzheu
2010-11-11 17:03 ` [RFC][PATCH v2 2/7] taskstats: Add "/proc/taskstats" Michael Holzheu
2010-11-11 17:03 ` [RFC][PATCH v2 3/7] taskstats: Add thread group ID to taskstats structure Michael Holzheu
2010-11-11 17:03 ` Michael Holzheu [this message]
2010-11-13 19:38   ` [RFC][PATCH v2 4/7] taskstats: Add per task steal time accounting Peter Zijlstra
2010-11-15 14:50     ` Martin Schwidefsky
2010-11-15 15:11       ` Peter Zijlstra
2010-11-15 17:42         ` Martin Schwidefsky
2010-11-15 17:45           ` Peter Zijlstra
2010-11-15 17:47           ` Peter Zijlstra
2010-11-15 17:48           ` Peter Zijlstra
2010-11-15 17:50           ` Peter Zijlstra
2010-11-15 17:59             ` Martin Schwidefsky
2010-11-15 18:08               ` Peter Zijlstra
2010-11-16  8:51                 ` Martin Schwidefsky
2010-11-16 12:16                   ` Peter Zijlstra
2010-11-16 15:33                     ` Martin Schwidefsky
2010-11-16 15:45                       ` Peter Zijlstra
2010-11-16 16:05                         ` Martin Schwidefsky
2010-11-16 18:39                           ` Jeremy Fitzhardinge
2010-11-16 16:38                         ` Avi Kivity
2010-11-16 16:43                           ` Peter Zijlstra
2010-11-16 16:56                             ` Avi Kivity
2010-11-16 17:06                               ` Avi Kivity
2010-11-11 17:03 ` [RFC][PATCH v2 5/7] taskstats: Improve cumulative CPU " Michael Holzheu
2010-11-13 18:38   ` Oleg Nesterov
2010-11-15 15:55     ` Martin Schwidefsky
2010-11-15 16:03       ` Peter Zijlstra
2010-11-15 17:49         ` Martin Schwidefsky
2010-11-15 17:51           ` Peter Zijlstra
2010-11-15 18:00             ` Martin Schwidefsky
2010-11-15 18:10               ` Peter Zijlstra
2010-11-16  8:54                 ` Martin Schwidefsky
2010-11-16 16:57     ` Michael Holzheu
2010-11-18 17:10       ` Oleg Nesterov
2010-11-19 19:46         ` Michael Holzheu
2010-11-16 17:34     ` Michael Holzheu
2010-11-16 17:50       ` Oleg Nesterov
2010-11-18 16:34       ` Oleg Nesterov
2010-11-11 17:03 ` [RFC][PATCH v2 6/7] taskstats: Fix accounting for non-leader thread exec Michael Holzheu
2010-11-11 17:11 ` [RFC][PATCH v2 7/7] taskstats: Precise process accounting user space Michael Holzheu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20101111170815.024542355@linux.vnet.ibm.com \
    --to=holzheu@linux.vnet.ibm.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=heiko.carstens@de.ibm.com \
    --cc=johnstul@us.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=nagar1234@in.ibm.com \
    --cc=oleg@redhat.com \
    --cc=roland@redhat.com \
    --cc=schwidefsky@de.ibm.com \
    --cc=suresh.b.siddha@intel.com \
    --cc=tglx@linutronix.de \
    --cc=venki@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox