public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [patch 1/3] basic accounting over taskstats
@ 2006-08-03  4:20 Jay Lan
  2006-08-03  6:52 ` Andrew Morton
  0 siblings, 1 reply; 5+ messages in thread
From: Jay Lan @ 2006-08-03  4:20 UTC (permalink / raw)
  To: Andrew Morton
  Cc: LKML, Shailabh Nagar, Balbir Singh, Jes Sorensen,
	Chris Sturtivant, Tony Ernst, Guillaume Thouvenin

[-- Attachment #1: Type: text/plain, Size: 370 bytes --]

This patch is to replace the "[patch 1/3] add basic accounting
fields to taskstats" posted on 7/31.

This patch adds some basic accounting fields to the taskstats
struct, add a new kernel/tsacct.c to handle basic accounting
data handling upon exit. A handle is added to taskstats.c
to invoke the basic accounting data handling.


Signed-off-by: Jay Lan <jlan@sgi.com>



[-- Attachment #2: taskstats-rev2.patch --]
[-- Type: text/plain, Size: 6461 bytes --]

Index: linux/include/linux/taskstats.h
===================================================================
--- linux.orig/include/linux/taskstats.h	2006-07-31 11:38:54.132326042 -0700
+++ linux/include/linux/taskstats.h	2006-08-02 19:17:02.155010656 -0700
@@ -2,6 +2,7 @@
  *
  * Copyright (C) Shailabh Nagar, IBM Corp. 2006
  *           (C) Balbir Singh,   IBM Corp. 2006
+ *           (C) Jay Lan,        SGI, 2006
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2.1 of the GNU Lesser General Public License
@@ -29,16 +30,18 @@
  *	c) add new fields after version comment; maintain 64-bit alignment
  */
 
-#define TASKSTATS_VERSION	1
+
+#define TASKSTATS_VERSION	2
+#define TS_COMM_LEN		16	/* should sync up with TASK_COMM_LEN
+					 * in linux/sched.h */
 
 struct taskstats {
 
 	/* Version 1 */
 	__u16	version;
-	__u16	padding[3];	/* Userspace should not interpret the padding
-				 * field which can be replaced by useful
-				 * fields if struct taskstats is extended.
-				 */
+	__u32	ac_exitcode;	/* Exit status */
+	__u8	ac_flag;		/* Record flags */
+	__u8	ac_nice;		/* task_nice */
 
 	/* Delay accounting fields start
 	 *
@@ -88,6 +91,22 @@ struct taskstats {
 	__u64	cpu_run_virtual_total;
 	/* Delay accounting fields end */
 	/* version 1 ends here */
+
+	/* Basic Accounting Fields start */
+	char	ac_comm[TS_COMM_LEN];	/* Command name */
+	__u8	ac_sched;		/* Scheduling discipline */
+	__u8	ac_pad[3];
+	__u32	ac_uid;			/* User ID */
+	__u32	ac_gid;			/* Group ID */
+	__u32	ac_pid;			/* Process ID */
+	__u32	ac_ppid;		/* Parent process ID */
+	__u32	ac_btime;		/* Begin time [sec since 1970] */
+	__u64	ac_etime;		/* Elapsed time [usec] */
+	__u64	ac_utime;		/* User CPU time [usec] */
+	__u64	ac_stime;		/* SYstem CPU time [usec] */
+	__u64	ac_minflt;		/* Minor Page Fault */
+	__u64	ac_majflt;		/* Major Page Fault */
+	/* Basic Accounting Fields end */
 };
 
 
Index: linux/kernel/taskstats.c
===================================================================
--- linux.orig/kernel/taskstats.c	2006-07-31 11:38:54.160326367 -0700
+++ linux/kernel/taskstats.c	2006-08-02 19:17:02.155010656 -0700
@@ -18,6 +18,7 @@
 
 #include <linux/kernel.h>
 #include <linux/taskstats_kern.h>
+#include <linux/tsacct_kern.h>
 #include <linux/delayacct.h>
 #include <linux/cpumask.h>
 #include <linux/percpu.h>
@@ -198,7 +199,10 @@ static int fill_pid(pid_t pid, struct ta
 	 */
 
 	delayacct_add_tsk(stats, tsk);
+
+	/* fill in basic acct fields */
 	stats->version = TASKSTATS_VERSION;
+	bacct_add_tsk(stats, tsk);
 
 	/* Define err: label here if needed */
 	put_task_struct(tsk);
Index: linux/kernel/Makefile
===================================================================
--- linux.orig/kernel/Makefile	2006-08-02 14:52:08.000000000 -0700
+++ linux/kernel/Makefile	2006-08-02 16:56:23.029588787 -0700
@@ -49,7 +49,7 @@ obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
-obj-$(CONFIG_TASKSTATS) += taskstats.o
+obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
 
 ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
Index: linux/kernel/tsacct.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux/kernel/tsacct.c	2006-08-02 19:17:20.859257731 -0700
@@ -0,0 +1,69 @@
+/*
+ * tsacct.c - System accounting over taskstats interface
+ *
+ * Copyright (C) Jay Lan,	<jlan@sgi.com>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/tsacct_kern.h>
+#include <linux/acct.h>
+
+
+#define USEC_PER_TICK	(USEC_PER_SEC/HZ)
+/*
+ * fill in basic accounting fields
+ */
+void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
+{
+	struct timespec uptime, ts;
+
+	BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);
+
+	/* calculate task elapsed time in timespec */
+	do_posix_clock_monotonic_gettime(&uptime);
+	ts = timespec_sub(uptime, current->group_leader->start_time);
+	/* rebase elapsed time to usec */
+	stats->ac_etime = (timespec_to_ns(&ts))/NSEC_PER_USEC;
+	stats->ac_btime = xtime.tv_sec - ts.tv_sec;
+	if (thread_group_leader(tsk)) {
+		stats->ac_exitcode = tsk->exit_code;
+		if (tsk->flags & PF_FORKNOEXEC)
+			stats->ac_flag |= AFORK;
+	}
+	if (tsk->flags & PF_SUPERPRIV)
+		stats->ac_flag |= ASU;
+	if (tsk->flags & PF_DUMPCORE)
+		stats->ac_flag |= ACORE;
+	if (tsk->flags & PF_SIGNALED)
+		stats->ac_flag |= AXSIG;
+	stats->ac_nice	 = task_nice(tsk);
+	stats->ac_sched	 = tsk->policy;
+	stats->ac_uid	 = tsk->uid;
+	stats->ac_gid	 = tsk->gid;
+	stats->ac_pid	 = tsk->pid;
+	stats->ac_ppid	 = (tsk->parent) ? tsk->parent->pid : 0;
+	stats->ac_utime	 = cputime_to_msecs(tsk->utime) * USEC_PER_MSEC;
+	stats->ac_stime	 = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC;
+	stats->ac_minflt = tsk->min_flt;
+	stats->ac_majflt = tsk->maj_flt;
+	/* Each process gets a minimum of one usec cpu time */
+	if ((stats->ac_utime == 0) && (stats->ac_stime == 0)) {
+		stats->ac_stime = 1;
+	}
+
+	strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm));
+}
+
Index: linux/include/linux/tsacct_kern.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux/include/linux/tsacct_kern.h	2006-08-02 19:17:02.155010656 -0700
@@ -0,0 +1,19 @@
+/*
+ * tsacct_kern.h - kernel header for system accounting over taskstats interface
+ *
+ * Copyright (C) Jay Lan	SGI
+ */
+
+#ifndef _LINUX_TSACCT_KERN_H
+#define _LINUX_TSACCT_KERN_H
+
+#include <linux/taskstats.h>
+
+#ifdef CONFIG_TASKSTATS
+extern void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk);
+#else
+static inline void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
+{}
+#endif /* CONFIG_TASKSTATS */
+
+#endif


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [patch 1/3] basic accounting over taskstats
  2006-08-03  4:20 [patch 1/3] basic accounting over taskstats Jay Lan
@ 2006-08-03  6:52 ` Andrew Morton
  2006-08-03 13:56   ` Shailabh Nagar
  0 siblings, 1 reply; 5+ messages in thread
From: Andrew Morton @ 2006-08-03  6:52 UTC (permalink / raw)
  To: Jay Lan
  Cc: linux-kernel, nagar, balbir, jes, csturtiv, tee,
	guillaume.thouvenin

On Wed, 02 Aug 2006 21:20:53 -0700
Jay Lan <jlan@engr.sgi.com> wrote:

> This patch is to replace the "[patch 1/3] add basic accounting
> fields to taskstats" posted on 7/31.
> 
> This patch adds some basic accounting fields to the taskstats
> struct, add a new kernel/tsacct.c to handle basic accounting
> data handling upon exit. A handle is added to taskstats.c
> to invoke the basic accounting data handling.
> 

> +#define TS_COMM_LEN		16	/* should sync up with TASK_COMM_LEN
> +					 * in linux/sched.h */

There was a proposal recently to increase TASK_COMM_LENGTH from 16 to 20 so
that it was long enough to hold an entire IEEE(?) UUID so that the
operator can easily match up a kernel thread with the storage device which
it manages.

I don't know if/when that change will happen, but the message is that
TASK_COMM_LENGTH may increase.

> +	BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);

And if it does, we'll need to increase TS_COMM_LEN as well.  That will
amount to an non-compatible change to the interface which you are
proposing.   We want to avoid that.

Hence I'd propose that you increase TS_COMM_LEN to 32 or something and if
TASK_COMM_LEN later becomes really big, we might just choose to truncate
it.

Or we remove this field altogether, perhaps.  The same info is available
from /proc/pid/stat anyway.  Is it really needed?


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [patch 1/3] basic accounting over taskstats
  2006-08-03  6:52 ` Andrew Morton
@ 2006-08-03 13:56   ` Shailabh Nagar
  2006-08-03 18:48     ` Jay Lan
  0 siblings, 1 reply; 5+ messages in thread
From: Shailabh Nagar @ 2006-08-03 13:56 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Jay Lan, linux-kernel, balbir, jes, csturtiv, tee,
	guillaume.thouvenin

Andrew Morton wrote:

> 
> Or we remove this field altogether, perhaps.  The same info is available
> from /proc/pid/stat anyway.  Is it really needed?
> 

Gathering this data in userspace from /proc might be
difficult esp. for short-lived tasks.

Also, /proc may not be mounted ? I'd heard somewhere that
some sysadmins don't install /proc for security reasons.
Don't know how far thats true.

Several other fields, totalling 58 bytes, added by the CSA
patches are also duplicated in /proc/pid/stat. But all of them
could change in value during the lifetime of a task so I'm
guessing its not useful to get them from /proc
even if some kind of userspace polling of the value was
possible.

But if there is a way, it would sure save a lot of payload
sent over taskstats !

"duplicate" fields from CSA:
+	__u8	ac_nice;		/* task_nice */
+	char	ac_comm[TS_COMM_LEN];	/* Command name */
+	__u8	ac_sched;		/* Scheduling discipline */
+	__u32	ac_pid;			/* Process ID */
+	__u32	ac_ppid;		/* Parent process ID */
+	__u64	ac_utime;		/* User CPU time [usec] */
+	__u64	ac_stime;		/* SYstem CPU time [usec] */
+	__u64	ac_minflt;		/* Minor Page Fault */
+	__u64	ac_majflt;		/* Major Page Fault */

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [patch 1/3] basic accounting over taskstats
  2006-08-03 13:56   ` Shailabh Nagar
@ 2006-08-03 18:48     ` Jay Lan
  2006-08-03 19:29       ` Shailabh Nagar
  0 siblings, 1 reply; 5+ messages in thread
From: Jay Lan @ 2006-08-03 18:48 UTC (permalink / raw)
  To: Shailabh Nagar
  Cc: Andrew Morton, Jay Lan, linux-kernel, balbir, jes, csturtiv, tee,
	guillaume.thouvenin

Shailabh Nagar wrote:
> Andrew Morton wrote:
> 
>>
>> Or we remove this field altogether, perhaps.  The same info is available
>> from /proc/pid/stat anyway.  Is it really needed?
>>
> 
> Gathering this data in userspace from /proc might be
> difficult esp. for short-lived tasks.

This is a serious concern. I think increasing TS_COMM_LEN
to 32 would be a good solution.

> 
> Also, /proc may not be mounted ? I'd heard somewhere that
> some sysadmins don't install /proc for security reasons.
> Don't know how far thats true.
> 
> Several other fields, totalling 58 bytes, added by the CSA
> patches are also duplicated in /proc/pid/stat. But all of them
> could change in value during the lifetime of a task so I'm
> guessing its not useful to get them from /proc
> even if some kind of userspace polling of the value was
> possible.

The same concern above applies to here, doesn't it?

Regards,
  - jay


> 
> But if there is a way, it would sure save a lot of payload
> sent over taskstats !
> 
> "duplicate" fields from CSA:
> +    __u8    ac_nice;        /* task_nice */
> +    char    ac_comm[TS_COMM_LEN];    /* Command name */
> +    __u8    ac_sched;        /* Scheduling discipline */
> +    __u32    ac_pid;            /* Process ID */
> +    __u32    ac_ppid;        /* Parent process ID */
> +    __u64    ac_utime;        /* User CPU time [usec] */
> +    __u64    ac_stime;        /* SYstem CPU time [usec] */
> +    __u64    ac_minflt;        /* Minor Page Fault */
> +    __u64    ac_majflt;        /* Major Page Fault */


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [patch 1/3] basic accounting over taskstats
  2006-08-03 18:48     ` Jay Lan
@ 2006-08-03 19:29       ` Shailabh Nagar
  0 siblings, 0 replies; 5+ messages in thread
From: Shailabh Nagar @ 2006-08-03 19:29 UTC (permalink / raw)
  To: Jay Lan
  Cc: Andrew Morton, Jay Lan, linux-kernel, balbir, jes, csturtiv, tee,
	guillaume.thouvenin

Jay Lan wrote:
> Shailabh Nagar wrote:
> 
>> Andrew Morton wrote:
>>
>>>
>>> Or we remove this field altogether, perhaps.  The same info is available
>>> from /proc/pid/stat anyway.  Is it really needed?
>>>
>>
>> Gathering this data in userspace from /proc might be
>> difficult esp. for short-lived tasks.
> 
> 
> This is a serious concern. I think increasing TS_COMM_LEN
> to 32 would be a good solution.
> 
>>
>> Also, /proc may not be mounted ? I'd heard somewhere that
>> some sysadmins don't install /proc for security reasons.
>> Don't know how far thats true.
>>
>> Several other fields, totalling 58 bytes, added by the CSA
>> patches are also duplicated in /proc/pid/stat. But all of them
>> could change in value during the lifetime of a task so I'm
>> guessing its not useful to get them from /proc
>> even if some kind of userspace polling of the value was
>> possible.
> 
> 
> The same concern above applies to here, doesn't it?

Yes. For some of the fields, pid/ppid/nice/sched you may not
really care about whether you get the value present sometime
during the lifetime vs. value at the time task exited, but for
others like utime/stime/minflt/majflt, getting the last value
is likely to matter. Either way, since there's no guarantee
that you can poll a short-lived task fast enough to get any value,
exporting the value from the kernel at exit seems to be the only safe
way.

--Shailabh

> 
> Regards,
>  - jay
> 
> 
>>
>> But if there is a way, it would sure save a lot of payload
>> sent over taskstats !
>>
>> "duplicate" fields from CSA:
>> +    __u8    ac_nice;        /* task_nice */
>> +    char    ac_comm[TS_COMM_LEN];    /* Command name */
>> +    __u8    ac_sched;        /* Scheduling discipline */
>> +    __u32    ac_pid;            /* Process ID */
>> +    __u32    ac_ppid;        /* Parent process ID */
>> +    __u64    ac_utime;        /* User CPU time [usec] */
>> +    __u64    ac_stime;        /* SYstem CPU time [usec] */
>> +    __u64    ac_minflt;        /* Minor Page Fault */
>> +    __u64    ac_majflt;        /* Major Page Fault */
> 
> 


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2006-08-03 19:30 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-08-03  4:20 [patch 1/3] basic accounting over taskstats Jay Lan
2006-08-03  6:52 ` Andrew Morton
2006-08-03 13:56   ` Shailabh Nagar
2006-08-03 18:48     ` Jay Lan
2006-08-03 19:29       ` Shailabh Nagar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox