public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [patch 3/3] convert CONFIG tag for extended accounting routines
@ 2006-08-03  4:23 Jay Lan
  2006-08-03  7:03 ` Andrew Morton
  0 siblings, 1 reply; 4+ messages in thread
From: Jay Lan @ 2006-08-03  4:23 UTC (permalink / raw)
  To: Andrew Morton
  Cc: LKML, Shailabh Nagar, Balbir Singh, Jes Sorensen,
	Chris Sturtivant, Tony Ernst, Guillaume Thouvenin

[-- Attachment #1: Type: text/plain, Size: 480 bytes --]

This patch is to replace the "[patch 3/3] convert CONFIG
tag for a few accounting data used by CSA" posted on 7/31.

There were a few accounting data/macros that are used in CSA
but are #ifdef'ed inside CONFIG_BSD_PROCESS_ACCT. This patch is
to change those ifdef's from CONFIG_BSD_PROCESS_ACCT to
CONFIG_TASK_XACCT. A few defines are moved from kernel/acct.c and
include/linux/acct.h to kernel/tsacct.c and
include/linux/tsacct_kern.h.


Signed-off-by:  Jay Lan <jlan@sgi.com>



[-- Attachment #2: bsd-to-xacct.patch --]
[-- Type: text/plain, Size: 6741 bytes --]

Index: linux/include/linux/acct.h
===================================================================
--- linux.orig/include/linux/acct.h	2006-08-02 18:39:58.000000000 -0700
+++ linux/include/linux/acct.h	2006-08-02 18:42:33.257363761 -0700
@@ -124,16 +124,12 @@ extern void acct_auto_close(struct super
 extern void acct_init_pacct(struct pacct_struct *pacct);
 extern void acct_collect(long exitcode, int group_dead);
 extern void acct_process(void);
-extern void acct_update_integrals(struct task_struct *tsk);
-extern void acct_clear_integrals(struct task_struct *tsk);
 #else
 #define acct_auto_close_mnt(x)	do { } while (0)
 #define acct_auto_close(x)	do { } while (0)
 #define acct_init_pacct(x)	do { } while (0)
 #define acct_collect(x,y)	do { } while (0)
 #define acct_process()		do { } while (0)
-#define acct_update_integrals(x)		do { } while (0)
-#define acct_clear_integrals(task)	do { } while (0)
 #endif
 
 /*
Index: linux/kernel/acct.c
===================================================================
--- linux.orig/kernel/acct.c	2006-08-02 18:39:58.000000000 -0700
+++ linux/kernel/acct.c	2006-08-02 18:42:33.261363809 -0700
@@ -598,33 +598,3 @@ void acct_process(void)
 	do_acct_process(file);
 	fput(file);
 }
-
-
-/**
- * acct_update_integrals - update mm integral fields in task_struct
- * @tsk: task_struct for accounting
- */
-void acct_update_integrals(struct task_struct *tsk)
-{
-	if (likely(tsk->mm)) {
-		long delta =
-			cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd;
-
-		if (delta == 0)
-			return;
-		tsk->acct_stimexpd = tsk->stime;
-		tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
-		tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
-	}
-}
-
-/**
- * acct_clear_integrals - clear the mm integral fields in task_struct
- * @tsk: task_struct whose accounting fields are cleared
- */
-void acct_clear_integrals(struct task_struct *tsk)
-{
-	tsk->acct_stimexpd = 0;
-	tsk->acct_rss_mem1 = 0;
-	tsk->acct_vm_mem1 = 0;
-}
Index: linux/include/linux/sched.h
===================================================================
--- linux.orig/include/linux/sched.h	2006-08-02 18:39:58.000000000 -0700
+++ linux/include/linux/sched.h	2006-08-02 18:42:33.261363809 -0700
@@ -964,7 +964,7 @@ struct task_struct {
 	wait_queue_t *io_wait;
 /* i/o counters(bytes read/written, #syscalls */
 	u64 rchar, wchar, syscr, syscw;
-#if defined(CONFIG_BSD_PROCESS_ACCT)
+#if defined(CONFIG_TASK_XACCT)
 	u64 acct_rss_mem1;	/* accumulated rss usage */
 	u64 acct_vm_mem1;	/* accumulated virtual memory usage */
 	clock_t acct_stimexpd;	/* clock_t-converted stime since last update */
Index: linux/fs/compat.c
===================================================================
--- linux.orig/fs/compat.c	2006-08-02 18:39:58.000000000 -0700
+++ linux/fs/compat.c	2006-08-02 18:42:33.265363858 -0700
@@ -44,7 +44,7 @@
 #include <linux/nfsd/syscall.h>
 #include <linux/personality.h>
 #include <linux/rwsem.h>
-#include <linux/acct.h>
+#include <linux/tsacct_kern.h>
 #include <linux/mm.h>
 
 #include <net/sock.h>		/* siocdevprivate_ioctl */
Index: linux/fs/exec.c
===================================================================
--- linux.orig/fs/exec.c	2006-08-02 18:39:58.000000000 -0700
+++ linux/fs/exec.c	2006-08-02 18:42:33.265363858 -0700
@@ -46,7 +46,7 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/rmap.h>
-#include <linux/acct.h>
+#include <linux/tsacct_kern.h>
 #include <linux/cn_proc.h>
 #include <linux/audit.h>
 
Index: linux/kernel/exit.c
===================================================================
--- linux.orig/kernel/exit.c	2006-08-02 18:39:58.000000000 -0700
+++ linux/kernel/exit.c	2006-08-02 18:42:33.269363906 -0700
@@ -18,6 +18,7 @@
 #include <linux/security.h>
 #include <linux/cpu.h>
 #include <linux/acct.h>
+#include <linux/tsacct_kern.h>
 #include <linux/file.h>
 #include <linux/binfmts.h>
 #include <linux/ptrace.h>
Index: linux/kernel/fork.c
===================================================================
--- linux.orig/kernel/fork.c	2006-08-02 18:39:58.000000000 -0700
+++ linux/kernel/fork.c	2006-08-02 18:42:33.269363906 -0700
@@ -42,6 +42,7 @@
 #include <linux/profile.h>
 #include <linux/rmap.h>
 #include <linux/acct.h>
+#include <linux/tsacct_kern.h>
 #include <linux/cn_proc.h>
 #include <linux/delayacct.h>
 #include <linux/taskstats_kern.h>
Index: linux/kernel/sched.c
===================================================================
--- linux.orig/kernel/sched.c	2006-08-02 18:39:58.000000000 -0700
+++ linux/kernel/sched.c	2006-08-02 18:42:33.273363954 -0700
@@ -49,7 +49,7 @@
 #include <linux/seq_file.h>
 #include <linux/syscalls.h>
 #include <linux/times.h>
-#include <linux/acct.h>
+#include <linux/tsacct_kern.h>
 #include <linux/kprobes.h>
 #include <linux/delayacct.h>
 #include <asm/tlb.h>
Index: linux/include/linux/tsacct_kern.h
===================================================================
--- linux.orig/include/linux/tsacct_kern.h	2006-08-02 18:39:58.000000000 -0700
+++ linux/include/linux/tsacct_kern.h	2006-08-02 18:42:33.273363954 -0700
@@ -18,9 +18,15 @@ static inline void bacct_add_tsk(struct 
 
 #ifdef CONFIG_TASK_XACCT
 extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p);
+extern void acct_update_integrals(struct task_struct *tsk);
+extern void acct_clear_integrals(struct task_struct *tsk);
 #else
 static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
 {}
+static inline void acct_update_integrals(struct task_struct *tsk)
+{}
+static inline void acct_clear_integrals(struct task_struct *tsk)
+{}
 #endif /* CONFIG_TASK_XACCT */
 
 #endif
Index: linux/kernel/tsacct.c
===================================================================
--- linux.orig/kernel/tsacct.c	2006-08-02 18:42:06.000000000 -0700
+++ linux/kernel/tsacct.c	2006-08-02 18:46:45.232406831 -0700
@@ -85,4 +85,34 @@ void xacct_add_tsk(struct taskstats *sta
 	stats->read_syscalls	= p->syscr;
 	stats->write_syscalls	= p->syscw;
 }
+
+
+/**
+ * acct_update_integrals - update mm integral fields in task_struct
+ * @tsk: task_struct for accounting
+ */
+void acct_update_integrals(struct task_struct *tsk)
+{
+	if (likely(tsk->mm)) {
+		long delta =
+			cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd;
+
+		if (delta == 0)
+			return;
+		tsk->acct_stimexpd = tsk->stime;
+		tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
+		tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
+	}
+}
+
+/**
+ * acct_clear_integrals - clear the mm integral fields in task_struct
+ * @tsk: task_struct whose accounting fields are cleared
+ */
+void acct_clear_integrals(struct task_struct *tsk)
+{
+	tsk->acct_stimexpd = 0;
+	tsk->acct_rss_mem1 = 0;
+	tsk->acct_vm_mem1 = 0;
+}
 #endif



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [patch 3/3] convert CONFIG tag for extended accounting routines
  2006-08-03  4:23 [patch 3/3] convert CONFIG tag for extended accounting routines Jay Lan
@ 2006-08-03  7:03 ` Andrew Morton
  2006-08-03 21:15   ` Jay Lan
  0 siblings, 1 reply; 4+ messages in thread
From: Andrew Morton @ 2006-08-03  7:03 UTC (permalink / raw)
  To: Jay Lan
  Cc: linux-kernel, nagar, balbir, jes, csturtiv, tee,
	guillaume.thouvenin

On Wed, 02 Aug 2006 21:23:35 -0700
Jay Lan <jlan@engr.sgi.com> wrote:

> +/**
> + * acct_update_integrals - update mm integral fields in task_struct
> + * @tsk: task_struct for accounting
> + */
> +void acct_update_integrals(struct task_struct *tsk)
> +{
> +	if (likely(tsk->mm)) {
> +		long delta =
> +			cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd;

If a 32 architecture chooses to implement a 64-bit cputime_t, this
expression might go wrong for very long-running tasks and high HZ.

Perhaps we should do all this in terms of cputime_t and export everything
to userspace as u64?

> +		if (delta == 0)
> +			return;
> +		tsk->acct_stimexpd = tsk->stime;
> +		tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
> +		tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;

It's a bit weird to be multiplying RSS by time.  What unit is a "byte
second"?

If this is not a bug then I guess this is an intermediate term for
additional downstream processing.  There is information loss here and I'd
have thought that it would be better to simply send `delta' and the rss
straight to userspace, let userspace work out what math it wants to perform
on it.  If that makes sense?

I see that the code has been like this for a long time, so treat this as a
"please educate me about BSD accounting" email ;)


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [patch 3/3] convert CONFIG tag for extended accounting routines
  2006-08-03  7:03 ` Andrew Morton
@ 2006-08-03 21:15   ` Jay Lan
  2006-08-08  1:49     ` Jay Lan
  0 siblings, 1 reply; 4+ messages in thread
From: Jay Lan @ 2006-08-03 21:15 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Jay Lan, linux-kernel, nagar, balbir, jes, csturtiv, tee,
	guillaume.thouvenin

Andrew Morton wrote:
> On Wed, 02 Aug 2006 21:23:35 -0700
> Jay Lan <jlan@engr.sgi.com> wrote:
> 
> 
>>+/**
>>+ * acct_update_integrals - update mm integral fields in task_struct
>>+ * @tsk: task_struct for accounting
>>+ */
>>+void acct_update_integrals(struct task_struct *tsk)
>>+{
>>+	if (likely(tsk->mm)) {
>>+		long delta =
>>+			cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd;
> 
> 
> If a 32 architecture chooses to implement a 64-bit cputime_t, this
> expression might go wrong for very long-running tasks and high HZ.
> 
> Perhaps we should do all this in terms of cputime_t and export everything
> to userspace as u64?

Andrew,

We export to userspace the acct_rss_mem1 and acct_vm_mem1, both as u64.
The above logic is to calculate stime delta since last update. Note that
acc_update_integrals() is invoked at do_execve, do_exit, _AND_ at
account_system_time, which is called every jiffy by timer interrupt
handler.

The tsk->acct_stimexpd is used to save the tsk->stime of last update.
It should be changed to cputime_t as well. I will include the fix in
my next fix patch.

> 
> 
>>+		if (delta == 0)
>>+			return;
>>+		tsk->acct_stimexpd = tsk->stime;
>>+		tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
>>+		tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
> 
> 
> It's a bit weird to be multiplying RSS by time.  What unit is a "byte
> second"?
> 
> If this is not a bug then I guess this is an intermediate term for
> additional downstream processing.  There is information loss here and I'd
> have thought that it would be better to simply send `delta' and the rss
> straight to userspace, let userspace work out what math it wants to perform
> on it.  If that makes sense?
> 
> I see that the code has been like this for a long time, so treat this as a
> "please educate me about BSD accounting" email ;)

This is not a BSD accounting thing. It came from UNICOS and IRIX.
I am pinging the person who knows how the real world users use these
two fields...

Regards,
  - jay

> 



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [patch 3/3] convert CONFIG tag for extended accounting routines
  2006-08-03 21:15   ` Jay Lan
@ 2006-08-08  1:49     ` Jay Lan
  0 siblings, 0 replies; 4+ messages in thread
From: Jay Lan @ 2006-08-08  1:49 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Jay Lan, linux-kernel, nagar, balbir, jes, csturtiv, tee,
	guillaume.thouvenin

Jay Lan wrote:
> Andrew Morton wrote:
> 

[snip]

>>
>>> +        if (delta == 0)
>>> +            return;
>>> +        tsk->acct_stimexpd = tsk->stime;
>>> +        tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
>>> +        tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
>>
>>
>>
>> It's a bit weird to be multiplying RSS by time.  What unit is a "byte
>> second"?
>>
>> If this is not a bug then I guess this is an intermediate term for
>> additional downstream processing.  There is information loss here and I'd
>> have thought that it would be better to simply send `delta' and the rss
>> straight to userspace, let userspace work out what math it wants to 
>> perform
>> on it.  If that makes sense?
>>
>> I see that the code has been like this for a long time, so treat this 
>> as a
>> "please educate me about BSD accounting" email ;)
> 
> 
> This is not a BSD accounting thing. It came from UNICOS and IRIX.
> I am pinging the person who knows how the real world users use these
> two fields...

Andrew,

Here is the explanation i owe you.

We accumulated the RSS/VM value at each timer interrupt update in terms
of pages-tick. At userland, the value is divided by tsk->stime (in usec)
to gain average usage of RSS/VM.

I need to do a little bit more processing in the kernel to convert
the pages-tick values to Mbytes-usec unit before delivery to userland
since the calculation are platform dependent. I will include the
change in the upcoming update patch.

Regards,
  - jay


> 
> Regards,
>  - jay
> 
>>
> 
> 


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2006-08-08  1:50 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-08-03  4:23 [patch 3/3] convert CONFIG tag for extended accounting routines Jay Lan
2006-08-03  7:03 ` Andrew Morton
2006-08-03 21:15   ` Jay Lan
2006-08-08  1:49     ` Jay Lan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox