All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jay Lan <jlan@sgi.com>
To: Andrew Morton <akpm@osdl.org>
Cc: lkml <linux-kernel@vger.kernel.org>,
	Shailabh Nagar <nagar@watson.ibm.com>,
	Balbir Singh <balbir@in.ibm.com>, Jes Sorensen <jes@sgi.com>,
	Chris Sturtivant <csturtiv@sgi.com>, Tony Ernst <tee@sgi.com>,
	Guillaume Thouvenin <guillaume.thouvenin@bull.net>
Subject: [PATCH] csa accounting taskstats update
Date: Tue, 08 Aug 2006 12:33:29 -0700	[thread overview]
Message-ID: <44D8E709.2040705@sgi.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 940 bytes --]

Hi Andrew,

This patch addresses the feedbacks from you and Balbir.

This patch is generated against 2.6.18-rc3 plus your patch
+ csa-basic-accounting-over-taskstats-fix.patch

This patch is to be applied after your patch which you
already added to -mm tree.



Signed-off-by: Jay Lan <jlan@sgi.com>


I test built on ia64, i386 and x86_64 build servers.

ChangeLog:
   Feedbacks from Andrew Morton:
   - define TS_COMM_LEN to 32
   - change acct_stimexpd field of task_struct to be of
     cputime_t, which is to be used to save the tsk->stime
     of last timer interrupt update.
   - a new Documentation/accounting/taskstats-struct.txt
     to describe fields of taskstats struct.

   Feedback from Balbir Singh:
   - keep the stime of a task to be zero when both stime
     and utime are zero as recoreded in task_struct.

   Misc:
   - convert accumulated RSS/VM from platform dependent
     pages-ticks to MBytes-usecs in the kernel


[-- Attachment #2: 2.6.18-rc2-taskstats-acct-update --]
[-- Type: text/plain, Size: 11244 bytes --]

Index: linux/include/linux/taskstats.h
===================================================================
--- linux.orig/include/linux/taskstats.h	2006-08-08 11:14:05.180663670 -0700
+++ linux/include/linux/taskstats.h	2006-08-08 11:44:27.934463619 -0700
@@ -32,14 +32,21 @@
 
 
 #define TASKSTATS_VERSION	2
-#define TS_COMM_LEN		16	/* should sync up with TASK_COMM_LEN
+#define TS_COMM_LEN		32	/* should sync up with TASK_COMM_LEN
 					 * in linux/sched.h */
 
 struct taskstats {
 
-	/* Version 1 */
+	/* The version number of this struct. This field is always set to
+	 * TAKSTATS_VERSION, which is defined in <linux/taskstats.h>.
+	 * Each time the struct is changed, the value should be incremented.
+	 */
 	__u16	version;
-	__u32	ac_exitcode;	/* Exit status */
+	__u32	ac_exitcode;		/* Exit status */
+
+	/* The accounting flags of a task as defined in <linux/acct.h>
+	 * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG.
+	 */
 	__u8	ac_flag;		/* Record flags */
 	__u8	ac_nice;		/* task_nice */
 
@@ -104,15 +111,30 @@ struct taskstats {
 	__u64	ac_etime;		/* Elapsed time [usec] */
 	__u64	ac_utime;		/* User CPU time [usec] */
 	__u64	ac_stime;		/* SYstem CPU time [usec] */
-	__u64	ac_minflt;		/* Minor Page Fault */
-	__u64	ac_majflt;		/* Major Page Fault */
+	__u64	ac_minflt;		/* Minor Page Fault Count */
+	__u64	ac_majflt;		/* Major Page Fault Count */
 	/* Basic Accounting Fields end */
 
  	/* Extended accounting fields start */
- 	__u64	acct_rss_mem1;		/* accumulated rss usage */
- 	__u64	acct_vm_mem1;		/* accumulated virtual memory usage */
- 	__u64	hiwater_rss;		/* High-watermark of RSS usage */
- 	__u64	hiwater_vm;		/* High-water virtual memory usage */
+	/* Accumulated RSS usage in duration of a task, in MBytes-usecs.
+	 * The current rss usage is added to this counter every time
+	 * a tick is charged to a task's system time. So, at the end we
+	 * will have memory usage multiplied by system time. Thus an
+	 * average usage per system time unit can be calculated.
+	 */
+ 	__u64	coremem;		/* accumulated RSS usage in MB-usec */
+	/* Accumulated virtual memory usage in duration of a task.
+	 * Same as acct_rss_mem1 above except that we keep track of VM usage.
+	 */
+ 	__u64	virtmem;		/* accumulated VM  usage in MB-usec */
+
+	/* High watermark of RSS and virtual memory usage in duration of
+	 * a task, in KBytes.
+	 */
+ 	__u64	hiwater_rss;		/* High-watermark of RSS usage, in KB */
+ 	__u64	hiwater_vm;		/* High-water VM usage, in KB */
+
+	/* The following four fields are I/O statistics of a task. */
  	__u64	read_char;		/* bytes read */
  	__u64	write_char;		/* bytes written */
  	__u64	read_syscalls;		/* read syscalls */
Index: linux/include/linux/sched.h
===================================================================
--- linux.orig/include/linux/sched.h	2006-08-08 11:14:06.468678357 -0700
+++ linux/include/linux/sched.h	2006-08-08 11:14:14.964775244 -0700
@@ -967,7 +967,7 @@ struct task_struct {
 #if defined(CONFIG_TASK_XACCT)
 	u64 acct_rss_mem1;	/* accumulated rss usage */
 	u64 acct_vm_mem1;	/* accumulated virtual memory usage */
-	clock_t acct_stimexpd;	/* clock_t-converted stime since last update */
+	cputime_t acct_stimexpd;/* stime since last update */
 #endif
 #ifdef CONFIG_NUMA
   	struct mempolicy *mempolicy;
Index: linux/kernel/tsacct.c
===================================================================
--- linux.orig/kernel/tsacct.c	2006-08-08 11:14:12.132742947 -0700
+++ linux/kernel/tsacct.c	2006-08-08 11:14:14.968775290 -0700
@@ -20,6 +20,7 @@
 #include <linux/sched.h>
 #include <linux/tsacct_kern.h>
 #include <linux/acct.h>
+#include <linux/jiffies.h>
 
 
 #define USEC_PER_TICK	(USEC_PER_SEC/HZ)
@@ -62,33 +63,35 @@ void bacct_add_tsk(struct taskstats *sta
 	stats->ac_stime	 = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC;
 	stats->ac_minflt = tsk->min_flt;
 	stats->ac_majflt = tsk->maj_flt;
-	/* Each process gets a minimum of one usec cpu time */
-	if ((stats->ac_utime == 0) && (stats->ac_stime == 0)) {
-		stats->ac_stime = 1;
-	}
 
 	strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm));
 }
 
 
 #ifdef CONFIG_TASK_XACCT
+
+#define KB 1024
+#define MB (1024*KB)
 /*
  * fill in extended accounting fields
  */
 void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 {
-	stats->acct_rss_mem1 = p->acct_rss_mem1;
-	stats->acct_vm_mem1  = p->acct_vm_mem1;
+	/* convert pages-jiffies to Mbyte-usec */
+	stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB;
+	stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB;
 	if (p->mm) {
-		stats->hiwater_rss   = p->mm->hiwater_rss;
-		stats->hiwater_vm    = p->mm->hiwater_vm;
+		/* adjust to KB unit */
+		stats->hiwater_rss   = p->mm->hiwater_rss * PAGE_SIZE / KB;
+		stats->hiwater_vm    = p->mm->hiwater_vm * PAGE_SIZE / KB;
 	}
 	stats->read_char	= p->rchar;
 	stats->write_char	= p->wchar;
 	stats->read_syscalls	= p->syscr;
 	stats->write_syscalls	= p->syscw;
 }
-
+#undef KB
+#undef MB
 
 /**
  * acct_update_integrals - update mm integral fields in task_struct
@@ -97,8 +100,8 @@ void xacct_add_tsk(struct taskstats *sta
 void acct_update_integrals(struct task_struct *tsk)
 {
 	if (likely(tsk->mm)) {
-		long delta =
-			cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd;
+		long delta = cputime_to_jiffies(
+			cputime_sub(tsk->stime, tsk->acct_stimexpd));
 
 		if (delta == 0)
 			return;
Index: linux/Documentation/accounting/taskstats-struct.txt
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux/Documentation/accounting/taskstats-struct.txt	2006-08-08 11:35:56.724246832 -0700
@@ -0,0 +1,161 @@
+The struct taskstats
+--------------------
+
+This document contains an explanation of the struct taskstats fields.
+
+There are three different groups of fields in the struct taskstats:
+
+1) Common and basic accounting fields
+    If CONFIG_TASKSTATS is set, the taskstats inteface is enabled and
+    the common fields and basic accounting fields are collected for
+    delivery at do_exit() of a task.
+2) Delay accounting fields
+    These fields are placed between
+    /* Delay accounting fields start */
+    and
+    /* Delay accounting fields end */
+    Their values are collected if CONFIG_TASK_DELAY_ACCT is set.
+3) Extended accounting fields
+    These fields are placed between
+    /* Extended accounting fields start */
+    and
+    /* Extended accounting fields end */
+    Their values are collected if CONFIG_TASK_XACCT is set.
+
+Future extension should add fields to the end of the taskstats struct, and
+should not change the relative position of each field within the struct.
+
+
+struct taskstats {
+
+1) Common and basic accounting fields:
+	/* The version number of this struct. This field is always set to
+	 * TAKSTATS_VERSION, which is defined in <linux/taskstats.h>.
+	 * Each time the struct is changed, the value should be incremented.
+	 */
+	__u16	version;
+
+  	/* The exit code of a task. */
+	__u32	ac_exitcode;		/* Exit status */
+
+  	/* The accounting flags of a task as defined in <linux/acct.h>
+	 * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG.
+	 */
+	__u8	ac_flag;		/* Record flags */
+
+  	/* The value of task_nice() of a task. */
+	__u8	ac_nice;		/* task_nice */
+
+  	/* The name of the command that started this task. */
+	char	ac_comm[TS_COMM_LEN];	/* Command name */
+
+  	/* The scheduling discipline as set in task->policy field. */
+	__u8	ac_sched;		/* Scheduling discipline */
+
+	__u8	ac_pad[3];
+	__u32	ac_uid;			/* User ID */
+	__u32	ac_gid;			/* Group ID */
+	__u32	ac_pid;			/* Process ID */
+	__u32	ac_ppid;		/* Parent process ID */
+
+  	/* The time when a task begins, in [secs] since 1970. */
+	__u32	ac_btime;		/* Begin time [sec since 1970] */
+
+  	/* The elapsed time of a task, in [usec]. */
+	__u64	ac_etime;		/* Elapsed time [usec] */
+
+  	/* The user CPU time of a task, in [usec]. */
+	__u64	ac_utime;		/* User CPU time [usec] */
+
+  	/* The system CPU time of a task, in [usec]. */
+	__u64	ac_stime;		/* System CPU time [usec] */
+
+  	/* The minor page fault count of a task, as set in task->min_flt. */
+	__u64	ac_minflt;		/* Minor Page Fault Count */
+
+	/* The major page fault count of a task, as set in task->maj_flt. */
+	__u64	ac_majflt;		/* Major Page Fault Count */
+
+
+2) Delay accounting fields:
+	/* Delay accounting fields start
+	 *
+	 * All values, until the comment "Delay accounting fields end" are
+	 * available only if delay accounting is enabled, even though the last
+	 * few fields are not delays
+	 *
+	 * xxx_count is the number of delay values recorded
+	 * xxx_delay_total is the corresponding cumulative delay in nanoseconds
+	 *
+	 * xxx_delay_total wraps around to zero on overflow
+	 * xxx_count incremented regardless of overflow
+	 */
+
+	/* Delay waiting for cpu, while runnable
+	 * count, delay_total NOT updated atomically
+	 */
+	__u64	cpu_count;
+	__u64	cpu_delay_total;
+
+	/* Following four fields atomically updated using task->delays->lock */
+
+	/* Delay waiting for synchronous block I/O to complete
+	 * does not account for delays in I/O submission
+	 */
+	__u64	blkio_count;
+	__u64	blkio_delay_total;
+
+	/* Delay waiting for page fault I/O (swap in only) */
+	__u64	swapin_count;
+	__u64	swapin_delay_total;
+
+	/* cpu "wall-clock" running time
+	 * On some architectures, value will adjust for cpu time stolen
+	 * from the kernel in involuntary waits due to virtualization.
+	 * Value is cumulative, in nanoseconds, without a corresponding count
+	 * and wraps around to zero silently on overflow
+	 */
+	__u64	cpu_run_real_total;
+
+	/* cpu "virtual" running time
+	 * Uses time intervals seen by the kernel i.e. no adjustment
+	 * for kernel's involuntary waits due to virtualization.
+	 * Value is cumulative, in nanoseconds, without a corresponding count
+	 * and wraps around to zero silently on overflow
+	 */
+	__u64	cpu_run_virtual_total;
+	/* Delay accounting fields end */
+	/* version 1 ends here */
+
+
+3) Extended accounting fields
+	/* Extended accounting fields start */
+
+	/* Accumulated RSS usage in duration of a task, in MBytes-usecs.
+	 * The current rss usage is added to this counter every time
+	 * a tick is charged to a task's system time. So, at the end we
+	 * will have memory usage multiplied by system time. Thus an
+	 * average usage per system time unit can be calculated.
+	 */
+	__u64	coremem;		/* accumulated RSS usage in MB-usec */
+
+  	/* Accumulated virtual memory usage in duration of a task.
+	 * Same as acct_rss_mem1 above except that we keep track of VM usage.
+	 */
+	__u64	virtmem;		/* accumulated VM usage in MB-usec */
+
+  	/* High watermark of RSS usage in duration of a task, in KBytes. */
+	__u64	hiwater_rss;		/* High-watermark of RSS usage */
+
+  	/* High watermark of VM  usage in duration of a task, in KBytes. */
+	__u64	hiwater_vm;		/* High-water virtual memory usage */
+
+	/* The following four fields are I/O statistics of a task. */
+	__u64	read_char;		/* bytes read */
+	__u64	write_char;		/* bytes written */
+	__u64	read_syscalls;		/* read syscalls */
+	__u64	write_syscalls;		/* write syscalls */
+
+	/* Extended accounting fields end */
+
+}

             reply	other threads:[~2006-08-08 19:33 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-08-08 19:33 Jay Lan [this message]
2006-08-09  6:43 ` [PATCH] csa accounting taskstats update Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=44D8E709.2040705@sgi.com \
    --to=jlan@sgi.com \
    --cc=akpm@osdl.org \
    --cc=balbir@in.ibm.com \
    --cc=csturtiv@sgi.com \
    --cc=guillaume.thouvenin@bull.net \
    --cc=jes@sgi.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nagar@watson.ibm.com \
    --cc=tee@sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.