From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
To: Shailabh Nagar <nagar1234@in.ibm.com>,
Andrew Morton <akpm@linux-foundation.org>,
Venkatesh Pallipadi <venki@google.com>,
Suresh Siddha <suresh.b.siddha@intel.com>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Ingo Molnar <mingo@elte.hu>, Oleg Nesterov <oleg@redhat.com>,
John stultz <johnstul@us.ibm.com>,
Thomas Gleixner <tglx@linutronix.de>,
Balbir Singh <balbir@linux.vnet.ibm.com>,
Martin Schwidefsky <schwidefsky@de.ibm.com>,
Heiko Carstens <heiko.carstens@de.ibm.com>,
Roland McGrath <roland@redhat.com>
Cc: linux-kernel@vger.kernel.org, linux-s390@vger.kernel.org
Subject: [RFC][PATCH v2 4/7] taskstats: Add per task steal time accounting
Date: Thu, 11 Nov 2010 18:03:56 +0100 [thread overview]
Message-ID: <20101111170815.024542355@linux.vnet.ibm.com> (raw)
In-Reply-To: 20101111170352.732381138@linux.vnet.ibm.com
[-- Attachment #1: 04-taskstats-top-add-sttime.patch --]
[-- Type: text/plain, Size: 13511 bytes --]
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Currently steal time is only accounted for the whole system. With this
patch we add steal time to the per task CPU time accounting.
The triplet "user time", "system time" and "steal time" represents
all consumed CPU time on hypervisor based systems.
Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
---
arch/s390/kernel/vtime.c | 19 +++++++++++--------
fs/proc/array.c | 6 +++---
include/linux/kernel_stat.h | 2 +-
include/linux/sched.h | 14 ++++++++------
include/linux/taskstats.h | 1 +
kernel/exit.c | 9 +++++++--
kernel/fork.c | 1 +
kernel/posix-cpu-timers.c | 3 +++
kernel/sched.c | 26 ++++++++++++++++++++------
kernel/sys.c | 10 +++++-----
kernel/tsacct.c | 1 +
11 files changed, 61 insertions(+), 31 deletions(-)
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -56,31 +56,34 @@ static void do_account_vtime(struct task
{
struct thread_info *ti = task_thread_info(tsk);
__u64 timer, clock, user, system, steal;
+ unsigned char clk[16];
timer = S390_lowcore.last_update_timer;
clock = S390_lowcore.last_update_clock;
asm volatile (" STPT %0\n" /* Store current cpu timer value */
- " STCK %1" /* Store current tod clock value */
+ " STCKE 0(%2)" /* Store current tod clock value */
: "=m" (S390_lowcore.last_update_timer),
- "=m" (S390_lowcore.last_update_clock) );
+ "=m" (clk) : "a" (clk));
+ S390_lowcore.last_update_clock = *(__u64 *) &clk[1];
+ tsk->acct_time = ((clock - sched_clock_base_cc) * 125) >> 9;
S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
user = S390_lowcore.user_timer - ti->user_timer;
- S390_lowcore.steal_timer -= user;
ti->user_timer = S390_lowcore.user_timer;
account_user_time(tsk, user, user);
system = S390_lowcore.system_timer - ti->system_timer;
- S390_lowcore.steal_timer -= system;
ti->system_timer = S390_lowcore.system_timer;
account_system_time(tsk, hardirq_offset, system, system);
steal = S390_lowcore.steal_timer;
- if ((s64) steal > 0) {
- S390_lowcore.steal_timer = 0;
- account_steal_time(steal);
- }
+ S390_lowcore.steal_timer = 0;
+ if (steal >= user + system)
+ steal -= user + system;
+ else
+ steal = 0;
+ account_steal_time(tsk, steal);
}
void account_vtime(struct task_struct *prev, struct task_struct *next)
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -375,7 +375,7 @@ static int do_task_stat(struct seq_file
unsigned long long start_time;
unsigned long cmin_flt = 0, cmaj_flt = 0;
unsigned long min_flt = 0, maj_flt = 0;
- cputime_t cutime, cstime, utime, stime;
+ cputime_t cutime, cstime, utime, stime, sttime;
cputime_t cgtime, gtime;
unsigned long rsslim = 0;
char tcomm[sizeof(task->comm)];
@@ -432,7 +432,7 @@ static int do_task_stat(struct seq_file
min_flt += sig->min_flt;
maj_flt += sig->maj_flt;
- thread_group_times(task, &utime, &stime);
+ thread_group_times(task, &utime, &stime, &sttime);
gtime = cputime_add(gtime, sig->gtime);
}
@@ -448,7 +448,7 @@ static int do_task_stat(struct seq_file
if (!whole) {
min_flt = task->min_flt;
maj_flt = task->maj_flt;
- task_times(task, &utime, &stime);
+ task_times(task, &utime, &stime, &sttime);
gtime = task->gtime;
}
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -116,7 +116,7 @@ extern unsigned long long task_delta_exe
extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
-extern void account_steal_time(cputime_t);
+extern void account_steal_time(struct task_struct *, cputime_t);
extern void account_idle_time(cputime_t);
extern void account_process_tick(struct task_struct *, int user);
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -470,6 +470,7 @@ struct cpu_itimer {
struct task_cputime {
cputime_t utime;
cputime_t stime;
+ cputime_t sttime;
unsigned long long sum_exec_runtime;
};
/* Alternate field names when used to cache expirations. */
@@ -481,6 +482,7 @@ struct task_cputime {
(struct task_cputime) { \
.utime = cputime_zero, \
.stime = cputime_zero, \
+ .sttime = cputime_zero, \
.sum_exec_runtime = 0, \
}
@@ -582,11 +584,11 @@ struct signal_struct {
* Live threads maintain their own counters and add to these
* in __exit_signal, except for the group leader.
*/
- cputime_t utime, stime, cutime, cstime;
+ cputime_t utime, stime, sttime, cutime, cstime, csttime;
cputime_t gtime;
cputime_t cgtime;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
- cputime_t prev_utime, prev_stime;
+ cputime_t prev_utime, prev_stime, prev_sttime;
#endif
unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
@@ -1294,10 +1296,10 @@ struct task_struct {
int __user *set_child_tid; /* CLONE_CHILD_SETTID */
int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
- cputime_t utime, stime, utimescaled, stimescaled;
+ cputime_t utime, stime, sttime, utimescaled, stimescaled;
cputime_t gtime;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
- cputime_t prev_utime, prev_stime;
+ cputime_t prev_utime, prev_stime, prev_sttime;
#endif
unsigned long long acct_time; /* Time for last accounting */
unsigned long nvcsw, nivcsw; /* context switch counts */
@@ -1694,8 +1696,8 @@ static inline void put_task_struct(struc
__put_task_struct(t);
}
-extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
-extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
+extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st, cputime_t *stt);
+extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st, cputime_t *stt);
/*
* Per process flags
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -168,6 +168,7 @@ struct taskstats {
/* Timestamp where data has been collected in ns since boot time */
__u64 time_ns;
__u32 ac_tgid; /* Thread group ID */
+ __u64 ac_sttime; /* Steal CPU time [usec] */
};
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -124,6 +124,7 @@ static void __exit_signal(struct task_st
*/
sig->utime = cputime_add(sig->utime, tsk->utime);
sig->stime = cputime_add(sig->stime, tsk->stime);
+ sig->sttime = cputime_add(sig->sttime, tsk->sttime);
sig->gtime = cputime_add(sig->gtime, tsk->gtime);
sig->min_flt += tsk->min_flt;
sig->maj_flt += tsk->maj_flt;
@@ -1228,7 +1229,7 @@ static int wait_task_zombie(struct wait_
struct signal_struct *psig;
struct signal_struct *sig;
unsigned long maxrss;
- cputime_t tgutime, tgstime;
+ cputime_t tgutime, tgstime, tgsttime;
/*
* The resource counters for the group leader are in its
@@ -1249,7 +1250,7 @@ static int wait_task_zombie(struct wait_
* group, which consolidates times for all threads in the
* group including the group leader.
*/
- thread_group_times(p, &tgutime, &tgstime);
+ thread_group_times(p, &tgutime, &tgstime, &tgsttime);
spin_lock_irq(&p->real_parent->sighand->siglock);
psig = p->real_parent->signal;
sig = p->signal;
@@ -1261,6 +1262,10 @@ static int wait_task_zombie(struct wait_
cputime_add(psig->cstime,
cputime_add(tgstime,
sig->cstime));
+ psig->csttime =
+ cputime_add(psig->csttime,
+ cputime_add(tgsttime,
+ sig->csttime));
psig->cgtime =
cputime_add(psig->cgtime,
cputime_add(p->gtime,
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1062,6 +1062,7 @@ static struct task_struct *copy_process(
p->utime = cputime_zero;
p->stime = cputime_zero;
+ p->sttime = cputime_zero;
p->gtime = cputime_zero;
p->utimescaled = cputime_zero;
p->stimescaled = cputime_zero;
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -237,6 +237,7 @@ void thread_group_cputime(struct task_st
times->utime = sig->utime;
times->stime = sig->stime;
+ times->sttime = sig->sttime;
times->sum_exec_runtime = sig->sum_sched_runtime;
rcu_read_lock();
@@ -248,6 +249,7 @@ void thread_group_cputime(struct task_st
do {
times->utime = cputime_add(times->utime, t->utime);
times->stime = cputime_add(times->stime, t->stime);
+ times->sttime = cputime_add(times->sttime, t->sttime);
times->sum_exec_runtime += t->se.sum_exec_runtime;
} while_each_thread(tsk, t);
out:
@@ -1276,6 +1278,7 @@ static inline int fastpath_timer_check(s
struct task_cputime task_sample = {
.utime = tsk->utime,
.stime = tsk->stime,
+ .sttime = tsk->sttime,
.sum_exec_runtime = tsk->se.sum_exec_runtime
};
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3542,11 +3542,15 @@ void account_system_time(struct task_str
* Account for involuntary wait time.
* @steal: the cpu time spent in involuntary wait
*/
-void account_steal_time(cputime_t cputime)
+void account_steal_time(struct task_struct *p, cputime_t cputime)
{
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
cputime64_t cputime64 = cputime_to_cputime64(cputime);
+ /* Add steal time to process. */
+ p->sttime = cputime_add(p->sttime, cputime);
+
+ /* Add steal time to cpustat. */
cpustat->steal = cputime64_add(cpustat->steal, cputime64);
}
@@ -3594,7 +3598,7 @@ void account_process_tick(struct task_st
*/
void account_steal_ticks(unsigned long ticks)
{
- account_steal_time(jiffies_to_cputime(ticks));
+ account_steal_time(current, jiffies_to_cputime(ticks));
}
/*
@@ -3612,13 +3616,16 @@ void account_idle_ticks(unsigned long ti
* Use precise platform statistics if available:
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st,
+ cputime_t *stt)
{
*ut = p->utime;
*st = p->stime;
+ *stt = p->sttime;
}
-void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st,
+ cputime_t *stt)
{
struct task_cputime cputime;
@@ -3626,6 +3633,7 @@ void thread_group_times(struct task_stru
*ut = cputime.utime;
*st = cputime.stime;
+ *stt = cputime.sttime;
}
#else
@@ -3633,7 +3641,8 @@ void thread_group_times(struct task_stru
# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
#endif
-void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st,
+ cputime_t *stt)
{
cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime);
@@ -3656,15 +3665,18 @@ void task_times(struct task_struct *p, c
*/
p->prev_utime = max(p->prev_utime, utime);
p->prev_stime = max(p->prev_stime, cputime_sub(rtime, p->prev_utime));
+ p->prev_sttime = cputime_zero;
*ut = p->prev_utime;
*st = p->prev_stime;
+ *stt = p->prev_sttime;
}
/*
* Must be called with siglock held.
*/
-void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st,
+ cputime_t *stt)
{
struct signal_struct *sig = p->signal;
struct task_cputime cputime;
@@ -3687,9 +3699,11 @@ void thread_group_times(struct task_stru
sig->prev_utime = max(sig->prev_utime, utime);
sig->prev_stime = max(sig->prev_stime,
cputime_sub(rtime, sig->prev_utime));
+ sig->prev_sttime = cputime_zero;
*ut = sig->prev_utime;
*st = sig->prev_stime;
+ *stt = sig->prev_sttime;
}
#endif
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -880,10 +880,10 @@ change_okay:
void do_sys_times(struct tms *tms)
{
- cputime_t tgutime, tgstime, cutime, cstime;
+ cputime_t tgutime, tgstime, tgsttime, cutime, cstime;
spin_lock_irq(¤t->sighand->siglock);
- thread_group_times(current, &tgutime, &tgstime);
+ thread_group_times(current, &tgutime, &tgstime, &tgsttime);
cutime = current->signal->cutime;
cstime = current->signal->cstime;
spin_unlock_irq(¤t->sighand->siglock);
@@ -1488,14 +1488,14 @@ static void k_getrusage(struct task_stru
{
struct task_struct *t;
unsigned long flags;
- cputime_t tgutime, tgstime, utime, stime;
+ cputime_t tgutime, tgstime, tgsttime, utime, stime, sttime;
unsigned long maxrss = 0;
memset((char *) r, 0, sizeof *r);
utime = stime = cputime_zero;
if (who == RUSAGE_THREAD) {
- task_times(current, &utime, &stime);
+ task_times(current, &utime, &stime, &sttime);
accumulate_thread_rusage(p, r);
maxrss = p->signal->maxrss;
goto out;
@@ -1521,7 +1521,7 @@ static void k_getrusage(struct task_stru
break;
case RUSAGE_SELF:
- thread_group_times(p, &tgutime, &tgstime);
+ thread_group_times(p, &tgutime, &tgstime, &tgsttime);
utime = cputime_add(utime, tgutime);
stime = cputime_add(stime, tgstime);
r->ru_nvcsw += p->signal->nvcsw;
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -66,6 +66,7 @@ void bacct_add_tsk(struct taskstats *sta
rcu_read_unlock();
stats->ac_utime = cputime_to_usecs(tsk->utime);
stats->ac_stime = cputime_to_usecs(tsk->stime);
+ stats->ac_sttime = cputime_to_usecs(tsk->sttime);
stats->ac_utimescaled = cputime_to_usecs(tsk->utimescaled);
stats->ac_stimescaled = cputime_to_usecs(tsk->stimescaled);
stats->ac_minflt = tsk->min_flt;
next prev parent reply other threads:[~2010-11-11 17:08 UTC|newest]
Thread overview: 52+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-11-11 17:03 [RFC][PATCH v2 0/7] taskstats: Enhancements for precise process accounting (version 2) Michael Holzheu
2010-11-11 17:03 ` [RFC][PATCH v2 1/7] taskstats: Add new taskstats command TASKSTATS_CMD_ATTR_PIDS Michael Holzheu
2010-11-13 19:20 ` Peter Zijlstra
2010-11-15 15:53 ` Michael Holzheu
2010-11-15 16:06 ` Peter Zijlstra
2010-11-15 17:09 ` Michael Holzheu
2010-11-15 17:21 ` Peter Zijlstra
2010-11-16 12:16 ` Michael Holzheu
2010-11-16 12:36 ` Peter Zijlstra
2010-11-13 19:39 ` Peter Zijlstra
2010-11-13 20:00 ` Balbir Singh
2010-11-15 14:50 ` Michael Holzheu
2010-11-11 17:03 ` [RFC][PATCH v2 2/7] taskstats: Add "/proc/taskstats" Michael Holzheu
2010-11-11 17:03 ` [RFC][PATCH v2 3/7] taskstats: Add thread group ID to taskstats structure Michael Holzheu
2010-11-11 17:03 ` Michael Holzheu [this message]
2010-11-13 19:38 ` [RFC][PATCH v2 4/7] taskstats: Add per task steal time accounting Peter Zijlstra
2010-11-15 14:50 ` Martin Schwidefsky
2010-11-15 15:11 ` Peter Zijlstra
2010-11-15 17:42 ` Martin Schwidefsky
2010-11-15 17:45 ` Peter Zijlstra
2010-11-15 17:47 ` Peter Zijlstra
2010-11-15 17:48 ` Peter Zijlstra
2010-11-15 17:50 ` Peter Zijlstra
2010-11-15 17:59 ` Martin Schwidefsky
2010-11-15 18:08 ` Peter Zijlstra
2010-11-16 8:51 ` Martin Schwidefsky
2010-11-16 12:16 ` Peter Zijlstra
2010-11-16 15:33 ` Martin Schwidefsky
2010-11-16 15:45 ` Peter Zijlstra
2010-11-16 16:05 ` Martin Schwidefsky
2010-11-16 18:39 ` Jeremy Fitzhardinge
2010-11-16 16:38 ` Avi Kivity
2010-11-16 16:43 ` Peter Zijlstra
2010-11-16 16:56 ` Avi Kivity
2010-11-16 17:06 ` Avi Kivity
2010-11-11 17:03 ` [RFC][PATCH v2 5/7] taskstats: Improve cumulative CPU " Michael Holzheu
2010-11-13 18:38 ` Oleg Nesterov
2010-11-15 15:55 ` Martin Schwidefsky
2010-11-15 16:03 ` Peter Zijlstra
2010-11-15 17:49 ` Martin Schwidefsky
2010-11-15 17:51 ` Peter Zijlstra
2010-11-15 18:00 ` Martin Schwidefsky
2010-11-15 18:10 ` Peter Zijlstra
2010-11-16 8:54 ` Martin Schwidefsky
2010-11-16 16:57 ` Michael Holzheu
2010-11-18 17:10 ` Oleg Nesterov
2010-11-19 19:46 ` Michael Holzheu
2010-11-16 17:34 ` Michael Holzheu
2010-11-16 17:50 ` Oleg Nesterov
2010-11-18 16:34 ` Oleg Nesterov
2010-11-11 17:03 ` [RFC][PATCH v2 6/7] taskstats: Fix accounting for non-leader thread exec Michael Holzheu
2010-11-11 17:11 ` [RFC][PATCH v2 7/7] taskstats: Precise process accounting user space Michael Holzheu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20101111170815.024542355@linux.vnet.ibm.com \
--to=holzheu@linux.vnet.ibm.com \
--cc=a.p.zijlstra@chello.nl \
--cc=akpm@linux-foundation.org \
--cc=balbir@linux.vnet.ibm.com \
--cc=heiko.carstens@de.ibm.com \
--cc=johnstul@us.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-s390@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=nagar1234@in.ibm.com \
--cc=oleg@redhat.com \
--cc=roland@redhat.com \
--cc=schwidefsky@de.ibm.com \
--cc=suresh.b.siddha@intel.com \
--cc=tglx@linutronix.de \
--cc=venki@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox