public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] new CSA patchset for 2.6.8
@ 2004-08-26  0:25 Jay Lan
  2004-08-26  5:18 ` Andrew Morton
  0 siblings, 1 reply; 27+ messages in thread
From: Jay Lan @ 2004-08-26  0:25 UTC (permalink / raw)
  To: LKML; +Cc: Andrew Morton, Erik Jacobson, Limin Gu

[-- Attachment #1: Type: text/plain, Size: 1365 bytes --]

Hi,

I have broken up one big CSA kernel patch into four smaller ones
as attached:

     csa_io     - collects io accounting data
     csa_mm     - collects mm accounting data
     csa_eop    - provides a hook to perform end-of-process accounting
     csa_module - builds csa loadable module


There are no functional changes in this set of csa patches compared
to the 2.6.7 patch linux-2.6.7.csa.patch.

Patches csa_io, csa_mm, and csa_eop are independent of each other.
You may apply any one, any two or all three and you will be able to
build a functional kernel. However, data collected needs an agent
to use it. The csa_module is one agent that takes advangtage of
the feature and it works with csa-2.0.0 (or later) to report system
accounting data of the host system. The csa-2.0.0 rpm can be
downloaded from ftp://oss.sgi.com/projects/csa/download

The csa_module patch requires all three accounting data patches to
be fully functional.

This set of csa patches has been tested with the pagg and job
kernel patches to linux 2.6.8 kernel. The information of pagg and
job project can be found at http://oss.sgi.com/projects/pagg/

The csa_module requires the pagg and job kernel patches.

Feedback, bug reports, and comments are very welcome!

Signed-off-by: Jay Lan <jlan@sgi.com>

---
Jay Lan - Linux System Software
Silicon Graphics Inc., Mountain View, CA

[-- Attachment #2: linux-2.6.8.csa_io.patch --]
[-- Type: text/plain, Size: 3394 bytes --]

Index: linux/drivers/block/ll_rw_blk.c
===================================================================
--- linux.orig/drivers/block/ll_rw_blk.c	2004-08-13 22:36:16.000000000 -0700
+++ linux/drivers/block/ll_rw_blk.c	2004-08-18 12:07:10.000000000 -0700
@@ -1674,6 +1674,7 @@
 {
 	DEFINE_WAIT(wait);
 	struct request *rq;
+	unsigned long start_wait = jiffies;
 
 	generic_unplug_device(q);
 	do {
@@ -1702,6 +1703,7 @@
 		finish_wait(&rl->wait[rw], &wait);
 	} while (!rq);
 
+	current->bwtime += (unsigned long) jiffies - start_wait;
 	return rq;
 }
 
@@ -1948,10 +1950,12 @@
 
 	if (rw == READ) {
 		disk_stat_add(rq->rq_disk, read_sectors, nr_sectors);
+		current->rblk += nr_sectors;
 		if (!new_io)
 			disk_stat_inc(rq->rq_disk, read_merges);
 	} else if (rw == WRITE) {
 		disk_stat_add(rq->rq_disk, write_sectors, nr_sectors);
+		current->wblk += nr_sectors;
 		if (!new_io)
 			disk_stat_inc(rq->rq_disk, write_merges);
 	}
Index: linux/fs/read_write.c
===================================================================
--- linux.orig/fs/read_write.c	2004-08-13 22:37:15.000000000 -0700
+++ linux/fs/read_write.c	2004-08-18 12:07:10.000000000 -0700
@@ -216,8 +216,11 @@
 				ret = file->f_op->read(file, buf, count, pos);
 			else
 				ret = do_sync_read(file, buf, count, pos);
-			if (ret > 0)
+			if (ret > 0) {
 				dnotify_parent(file->f_dentry, DN_ACCESS);
+				current->rchar += ret;
+			}
+			current->syscr++;
 		}
 	}
 
@@ -260,8 +263,11 @@
 				ret = file->f_op->write(file, buf, count, pos);
 			else
 				ret = do_sync_write(file, buf, count, pos);
-			if (ret > 0)
+			if (ret > 0) {
 				dnotify_parent(file->f_dentry, DN_MODIFY);
+				current->wchar += ret;
+			}
+			current->syscw++;
 		}
 	}
 
@@ -540,6 +546,10 @@
 		fput_light(file, fput_needed);
 	}
 
+	if (ret > 0) {
+		current->rchar += ret;
+	}
+	current->syscr++;
 	return ret;
 }
 
@@ -558,6 +568,10 @@
 		fput_light(file, fput_needed);
 	}
 
+	if (ret > 0) {
+		current->wchar += ret;
+	}
+	current->syscw++;
 	return ret;
 }
 
@@ -636,6 +650,13 @@
 
 	retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file);
 
+	if (retval > 0) {
+		current->rchar += retval;
+		current->wchar += retval;
+	}
+	current->syscr++;
+	current->syscw++;
+
 	if (*ppos > max)
 		retval = -EOVERFLOW;
 
Index: linux/kernel/fork.c
===================================================================
--- linux.orig/kernel/fork.c	2004-08-18 12:02:19.000000000 -0700
+++ linux/kernel/fork.c	2004-08-18 12:07:10.000000000 -0700
@@ -966,6 +966,8 @@
 
 	p->utime = p->stime = 0;
 	p->cutime = p->cstime = 0;
+	p->rchar = p->wchar = p->rblk = p->wblk = p->syscr = p->syscw = 0;
+	p->bwtime = 0;
 	p->lock_depth = -1;		/* -1 = no lock */
 	p->start_time = get_jiffies_64();
 	p->security = NULL;
Index: linux/include/linux/sched.h
===================================================================
--- linux.orig/include/linux/sched.h	2004-08-18 12:02:19.000000000 -0700
+++ linux/include/linux/sched.h	2004-08-18 12:07:10.000000000 -0700
@@ -523,6 +523,9 @@
 	unsigned long ptrace_message;
 	siginfo_t *last_siginfo; /* For ptrace use.  */
 
+/* i/o counters(bytes read/written, blocks read/written, #syscalls, waittime */
+        unsigned long rchar, wchar, rblk, wblk, syscr, syscw, bwtime;
+
 #ifdef CONFIG_NUMA
   	struct mempolicy *mempolicy;
   	short il_next;		/* could be shared with used_math */

[-- Attachment #3: linux-2.6.8.csa_mm.patch --]
[-- Type: text/plain, Size: 11254 bytes --]

Index: linux/fs/exec.c
===================================================================
--- linux.orig/fs/exec.c	2004-08-19 15:17:52.000000000 -0700
+++ linux/fs/exec.c	2004-08-19 15:27:54.000000000 -0700
@@ -47,6 +47,7 @@
 #include <linux/syscalls.h>
 #include <linux/rmap.h>
 #include <linux/pagg.h>
+#include <linux/csa_internal.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1145,6 +1146,9 @@
 
 		/* execve success */
 		security_bprm_free(&bprm);
+		/* no-op if CONFIG_CSA not set */
+                csa_update_integrals();
+                update_mem_hiwater();
 		return retval;
 	}
 
Index: linux/include/linux/csa_internal.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux/include/linux/csa_internal.h	2004-08-19 15:19:05.000000000 -0700
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc and LANL  All Rights Reserved.
+ * Copyright (c) 2004 Silicon Graphics, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information:  Silicon Graphics, Inc., 1500 Crittenden Lane,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ */
+
+/*
+ *  CSA (Comprehensive System Accounting)
+ *  Job Accounting for Linux
+ *
+ *  This header file contains the definitions needed for communication
+ *  between the kernel and the CSA module.
+ */
+
+#ifndef _LINUX_CSA_INTERNAL_H
+#define _LINUX_CSA_INTERNAL_H
+
+#include <linux/config.h>
+
+#if defined (CONFIG_CSA) || defined (CONFIG_CSA_MODULE)
+
+#include <linux/linkage.h>
+#include <linux/ptrace.h>
+
+static inline void csa_update_integrals(void)
+{
+	long delta;
+
+	if (current->mm) {
+		delta = current->stime - current->csa_stimexpd;
+		current->csa_stimexpd = current->stime;
+		current->csa_rss_mem1 += delta * current->mm->rss;
+		current->csa_vm_mem1 += delta * current->mm->total_vm;
+	}
+}
+
+static inline void csa_clear_integrals(struct task_struct *tsk)
+{
+	if (tsk) {
+		tsk->csa_stimexpd = 0;
+		tsk->csa_rss_mem1 = 0;
+		tsk->csa_vm_mem1 = 0;
+	}	
+}
+
+#else	/* CONFIG_CSA || CONFIG_CSA_MODULE */
+
+#define csa_update_integrals()		do { } while (0);
+#define csa_clear_integrals(task)	do { } while (0);
+#endif	/* CONFIG_CSA || CONFIG_CSA_MODULE */
+
+#endif	/* _LINUX_CSA_INTERNAL_H */
Index: linux/include/linux/sched.h
===================================================================
--- linux.orig/include/linux/sched.h	2004-08-19 15:17:52.000000000 -0700
+++ linux/include/linux/sched.h	2004-08-19 15:19:05.000000000 -0700
@@ -229,6 +229,8 @@
 	struct kioctx		*ioctx_list;
 
 	struct kioctx		default_kioctx;
+
+	unsigned long hiwater_rss, hiwater_vm;
 };
 
 extern int mmlist_nr;
@@ -525,6 +527,10 @@
 
 /* i/o counters(bytes read/written, blocks read/written, #syscalls, waittime */
         unsigned long rchar, wchar, rblk, wblk, syscr, syscw, bwtime;
+#if defined(CONFIG_CSA) || defined(CONFIG_CSA_MODULE)
+	unsigned long csa_rss_mem1, csa_vm_mem1;
+	clock_t csa_stimexpd;
+#endif
 
 #ifdef CONFIG_NUMA
   	struct mempolicy *mempolicy;
@@ -873,6 +879,19 @@
 /* Remove the current tasks stale references to the old mm_struct */
 extern void mm_release(struct task_struct *, struct mm_struct *);
 
+/* Update highwater values */
+static inline void update_mem_hiwater(void)
+{
+	if (current->mm) {
+		if (current->mm->hiwater_rss < current->mm->rss) {
+			current->mm->hiwater_rss = current->mm->rss;
+		}
+		if (current->mm->hiwater_vm < current->mm->total_vm) {
+			current->mm->hiwater_vm = current->mm->total_vm;
+		}
+	}
+}
+
 extern int  copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
 extern void flush_thread(void);
 extern void exit_thread(void);
Index: linux/kernel/exit.c
===================================================================
--- linux.orig/kernel/exit.c	2004-08-13 22:37:40.000000000 -0700
+++ linux/kernel/exit.c	2004-08-19 15:19:05.000000000 -0700
@@ -23,6 +23,7 @@
 #include <linux/mount.h>
 #include <linux/proc_fs.h>
 #include <linux/mempolicy.h>
+#include <linux/csa_internal.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -820,6 +821,9 @@
 		ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
 	}
 
+	/* no-op if CONFIG_CSA not set */
+	csa_update_integrals();
+	update_mem_hiwater();
 	acct_process(code);
 	__exit_mm(tsk);
 
Index: linux/kernel/fork.c
===================================================================
--- linux.orig/kernel/fork.c	2004-08-19 15:17:52.000000000 -0700
+++ linux/kernel/fork.c	2004-08-19 15:19:05.000000000 -0700
@@ -37,7 +37,7 @@
 #include <linux/audit.h>
 #include <linux/rmap.h>
 #include <linux/pagg.h>
-
+#include <linux/csa_internal.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
@@ -578,6 +578,9 @@
 	if (retval)
 		goto free_pt;
 
+	mm->hiwater_rss = mm->rss;
+	mm->hiwater_vm = mm->total_vm;	
+
 good_mm:
 	tsk->mm = mm;
 	tsk->active_mm = mm;
@@ -968,6 +971,8 @@
 	p->cutime = p->cstime = 0;
 	p->rchar = p->wchar = p->rblk = p->wblk = p->syscr = p->syscw = 0;
 	p->bwtime = 0;
+	/* no-op if CONFIG_CSA not set */
+	csa_clear_integrals(p);
 	p->lock_depth = -1;		/* -1 = no lock */
 	p->start_time = get_jiffies_64();
 	p->security = NULL;
Index: linux/mm/memory.c
===================================================================
--- linux.orig/mm/memory.c	2004-08-13 22:36:57.000000000 -0700
+++ linux/mm/memory.c	2004-08-19 15:19:05.000000000 -0700
@@ -44,6 +44,7 @@
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
 #include <linux/rmap.h>
+#include <linux/csa_internal.h>
 #include <linux/module.h>
 #include <linux/init.h>
 
@@ -605,6 +606,8 @@
 	tlb = tlb_gather_mmu(mm, 0);
 	unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
 	tlb_finish_mmu(tlb, address, end);
+	/* no-op unless CONFIG_CSA is set */
+        csa_update_integrals();
 	spin_unlock(&mm->page_table_lock);
 }
 
@@ -1095,9 +1098,12 @@
 	spin_lock(&mm->page_table_lock);
 	page_table = pte_offset_map(pmd, address);
 	if (likely(pte_same(*page_table, pte))) {
-		if (PageReserved(old_page))
+		if (PageReserved(old_page)) {
 			++mm->rss;
-		else
+			/* no-op if CONFIG_CSA not set */
+			csa_update_integrals();
+			update_mem_hiwater();
+		} else
 			page_remove_rmap(old_page);
 		break_cow(vma, new_page, address, page_table);
 		lru_cache_add_active(new_page);
@@ -1378,6 +1384,10 @@
 		remove_exclusive_swap_page(page);
 
 	mm->rss++;
+	/* no-op if CONFIG_CSA not set */
+	csa_update_integrals();
+	update_mem_hiwater();
+  
 	pte = mk_pte(page, vma->vm_page_prot);
 	if (write_access && can_share_swap_page(page)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -1443,6 +1453,9 @@
 			goto out;
 		}
 		mm->rss++;
+		/* no-op if CONFIG_CSA not set */
+		csa_update_integrals();
+		update_mem_hiwater();
 		entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
 							 vma->vm_page_prot)),
 				      vma);
@@ -1552,6 +1565,10 @@
 	if (pte_none(*page_table)) {
 		if (!PageReserved(new_page))
 			++mm->rss;
+		/* no-op if CONFIG_CSA not set */
+		csa_update_integrals();
+		update_mem_hiwater();
+
 		flush_icache_page(vma, new_page);
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		if (write_access)
Index: linux/mm/mmap.c
===================================================================
--- linux.orig/mm/mmap.c	2004-08-13 22:37:15.000000000 -0700
+++ linux/mm/mmap.c	2004-08-19 15:19:05.000000000 -0700
@@ -20,6 +20,7 @@
 #include <linux/hugetlb.h>
 #include <linux/profile.h>
 #include <linux/module.h>
+#include <linux/csa_internal.h>
 #include <linux/mount.h>
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
@@ -988,6 +989,9 @@
 					pgoff, flags & MAP_NONBLOCK);
 		down_write(&mm->mmap_sem);
 	}
+	/* no-op if CONFIG_CSA not set */
+	csa_update_integrals();
+	update_mem_hiwater();
 	return addr;
 
 unmap_and_free_vma:
@@ -1227,6 +1231,9 @@
 	vma->vm_mm->total_vm += grow;
 	if (vma->vm_flags & VM_LOCKED)
 		vma->vm_mm->locked_vm += grow;
+	/* no-op if CONFIG_CSA_JOB_ACCT not set */
+	csa_update_integrals();
+	update_mem_hiwater();
 	anon_vma_unlock(vma);
 	return 0;
 }
@@ -1688,6 +1695,9 @@
 		mm->locked_vm += len >> PAGE_SHIFT;
 		make_pages_present(addr, addr + len);
 	}
+	/* no-op if CONFIG_CSA not set */
+	csa_update_integrals();
+	update_mem_hiwater();
 	return addr;
 }
 
Index: linux/mm/mremap.c
===================================================================
--- linux.orig/mm/mremap.c	2004-08-13 22:36:59.000000000 -0700
+++ linux/mm/mremap.c	2004-08-19 15:19:05.000000000 -0700
@@ -16,6 +16,7 @@
 #include <linux/fs.h>
 #include <linux/highmem.h>
 #include <linux/security.h>
+#include <linux/csa_internal.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -231,6 +232,10 @@
 					   new_addr + new_len);
 	}
 
+	/* no-op if CONFIG_CSA not set */
+	csa_update_integrals();
+	update_mem_hiwater();
+
 	return new_addr;
 }
 
@@ -363,6 +368,9 @@
 				make_pages_present(addr + old_len,
 						   addr + new_len);
 			}
+			/* no-op if CONFIG_CSA not set */
+			csa_update_integrals();
+			update_mem_hiwater();
 			ret = addr;
 			goto out;
 		}
Index: linux/mm/rmap.c
===================================================================
--- linux.orig/mm/rmap.c	2004-08-13 22:37:42.000000000 -0700
+++ linux/mm/rmap.c	2004-08-19 15:19:05.000000000 -0700
@@ -29,6 +29,7 @@
 #include <linux/swapops.h>
 #include <linux/slab.h>
 #include <linux/init.h>
+#include <linux/csa_internal.h>
 #include <linux/rmap.h>
 
 #include <asm/tlbflush.h>
@@ -515,6 +516,8 @@
 	mm->rss--;
 	BUG_ON(!page->mapcount);
 	page->mapcount--;
+	/* no-op if CONFIG_CSA not set */
+	csa_update_integrals();
 	page_cache_release(page);
 
 out_unmap:
@@ -614,6 +617,8 @@
 
 		page_remove_rmap(page);
 		page_cache_release(page);
+		/* no-op if CONFIG_CSA not set */
+		csa_update_integrals();
 		mm->rss--;
 		(*mapcount)--;
 	}
Index: linux/mm/swapfile.c
===================================================================
--- linux.orig/mm/swapfile.c	2004-08-13 22:36:32.000000000 -0700
+++ linux/mm/swapfile.c	2004-08-19 15:19:05.000000000 -0700
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/rmap.h>
 #include <linux/security.h>
+#include <linux/csa_internal.h>
 #include <linux/backing-dev.h>
 
 #include <asm/pgtable.h>
@@ -435,6 +436,9 @@
 	set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
 	page_add_anon_rmap(page, vma, address);
 	swap_free(entry);
+	/* no-op if CONFIG_CSA not set */
+	csa_update_integrals();
+	update_mem_hiwater();
 }
 
 /* vma->vm_mm->page_table_lock is held */

[-- Attachment #4: linux-2.6.8.csa_eop.patch --]
[-- Type: text/plain, Size: 2199 bytes --]

Index: linux/kernel/exit.c
===================================================================
--- linux.orig/kernel/exit.c	2004-08-19 18:12:13.000000000 -0700
+++ linux/kernel/exit.c	2004-08-19 18:57:03.000000000 -0700
@@ -32,6 +32,8 @@
 
 extern void sem_exit (void);
 extern struct task_struct *child_reaper;
+void (*do_eop_acct) (int, struct task_struct *) = NULL;
+EXPORT_SYMBOL(do_eop_acct);
 
 int getrusage(struct task_struct *, int, struct rusage __user *);
 
@@ -825,6 +827,9 @@
 	csa_update_integrals();
 	update_mem_hiwater();
 	acct_process(code);
+	/* Handle end-of-process accounting */
+	if (do_eop_acct != NULL)
+		do_eop_acct(code, tsk);
 	__exit_mm(tsk);
 
 	exit_sem(tsk);
Index: linux/include/linux/acct_eop.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux/include/linux/acct_eop.h	2004-08-19 18:48:44.000000000 -0700
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2004 Silicon Graphics, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information:  Silicon Graphics, Inc., 1500 Crittenden Lane,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ */
+
+/*
+ *  End-of-process Accounting for Linux
+ *
+ *  This header file contains the hook needed for processing of 
+ *  end-of-process accounting record
+ *
+ */
+
+#ifndef _LINUX_ACCT_EOP_H
+#define _LINUX_ACCT_EOP_H
+
+
+extern void (*do_eop_acct) (int, struct task_struct *);
+
+#endif	/* _LINUX_CSA_INTERNAL_H */

[-- Attachment #5: linux-2.6.8.csa_module.patch --]
[-- Type: text/plain, Size: 63111 bytes --]

Index: linux/include/linux/csa.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux/include/linux/csa.h	2004-08-23 15:28:23.000000000 -0700
@@ -0,0 +1,526 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc and LANL  All Rights Reserved.
+ * Copyright (c) 2004 Silicon Graphics, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as 
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it would be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details. 
+ *
+ * You should have received a copy of the GNU General Public License along 
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information:  Silicon Graphics, Inc., 1500 Crittenden Lane,
+ * Mountain View, CA  94043, or:
+ * 
+ * http://www.sgi.com 
+ */
+/*
+ *  CSA (Comprehensive System Accounting)
+ *  Job Accounting for Linux
+ *
+ *  This header file contains the definitions needed for job
+ *  accounting. The kernel CSA accounting module code and all
+ *  user-level programs that try to write or process the binary job 
+ *  accounting data must include this file.
+ *
+ *
+ */
+
+#ifndef _LINUX_CSA_H
+#define _LINUX_CSA_H
+
+#ifndef __KERNEL__
+#include <stdint.h>
+#include <sys/types.h>
+#endif
+
+/*
+ *  accounting flags per-process
+ */
+#define AFORK		0x01	/* fork, but did not exec */
+#define ASU		0x02	/* super-user privileges */
+#define ACKPT   	0x04	/* process has been checkpointed */
+#define ACORE		0x08	/* produced corefile */
+#define AXSIG		0x10	/* killed by a signal */
+#define AMORE		0x20	/* more CSA acct records for this process */
+#define AINC		0x40	/* incremental accounting record */
+
+#define AHZ		100
+
+/*
+ * Magic number - for achead.ah_magic in the 1st header.  The magic number
+ *                in the 2nd header is the inverse of this.
+ */
+#define ACCT_MAGIC_BIG          030510  /* big-endian */
+#define ACCT_MAGIC_LITTLE       030512  /* little-endian */
+#ifdef __LITTLE_ENDIAN
+#define ACCT_MAGIC ACCT_MAGIC_LITTLE
+#else
+#define ACCT_MAGIC ACCT_MAGIC_BIG
+#endif
+
+/*
+ * Record types - for achead.ah_type in the 1st header.
+ */
+#define	ACCT_KERNEL_CSA		0001	/* Kernel: CSA base record */
+#define	ACCT_KERNEL_MEM		0002	/* Kernel: memory record */
+#define	ACCT_KERNEL_IO		0004	/* Kernel: input/output record */
+#define	ACCT_KERNEL_MT 		0006	/* Kernel: multi-tasking record */
+#define	ACCT_KERNEL_MPP		0010	/* Kernel: multi-PE appl record */
+#define	ACCT_KERNEL_SOJ		0012	/* Kernel: start-of-job record */
+#define	ACCT_KERNEL_EOJ		0014	/* Kernel: end-of-job record */
+#define	ACCT_KERNEL_CFG		0020	/* Kernel: configuration record */
+
+#define	ACCT_KERNEL_SITE0	0100	/* Kernel: reserved for site */
+#define	ACCT_KERNEL_SITE1	0101	/* Kernel: reserved for site */
+
+#define	ACCT_DAEMON_NQS		0120	/* Daemon: NQS record */
+#define	ACCT_DAEMON_WKMG      	0122	/* Daemon: workload management record,
+					           i.e., LSF */
+#define	ACCT_DAEMON_TAPE	0124	/* Daemon: tape record */
+#define	ACCT_DAEMON_DMIG	0126	/* Daemon: data migration record */
+#define	ACCT_DAEMON_SOCKET	0130	/* Daemon: socket record */
+
+#define	ACCT_DAEMON_SITE0	0200	/* Daemon: reserved for site */
+#define	ACCT_DAEMON_SITE1	0201	/* Daemon: reserved for site */
+
+#define	ACCT_JOB_HEADER		0220	/* csabuild: job header record */
+#define	ACCT_CACCT		0222	/* cacct:    consolidated data */
+#define	ACCT_CMS		0224	/* cms:      command summary data */
+
+/* Record types - for achead.ah_type in the 2nd header. */
+#define	ACCT_MEM	1<<0	/* Process generated memory record */
+#define	ACCT_IO		1<<1	/* Process generated I/O record */
+#define	ACCT_MT		1<<2	/* Process used multi-tasking */
+#define	ACCT_MPP	1<<3	/* Process used multi-PE */
+
+/*
+ * Record revision levels.
+ *
+ * These are incremented to indicate that a record's format has changed since
+ * a previous release.
+ */
+#define	REV_CSA		02400	/* Kernel: CSA base record */
+#define	REV_MEM		02400	/* Kernel: memory record */
+#define	REV_IO		02400	/* Kernel: I/O record */
+#define	REV_MT 		02400	/* Kernel: multi-tasking record */
+#define	REV_MPP		02400	/* Kernel: multi-PE appl record */
+#define	REV_SOJ		02400	/* Kernel: start-of-job record */
+#define	REV_EOJ		02400	/* Kernel: end-of-job record */
+#define	REV_CFG		02400	/* Kernel: configuration record */
+
+#define REV_NQS		02400 	/* Daemon: NQS record */
+#define REV_WKMG	02400 	/* Daemon: workload management (i.e., LSF)
+				           record */
+#define REV_TAPE	02400	/* Daemon: tape record */
+#define REV_DMIG	02400	/* Daemon: data migration record */
+#define REV_SOCKET	02400	/* Daemon: socket record */
+
+#define REV_JOB		02400	/* csabuild: job header record */
+#define REV_CACCT	02400	/* cacct:    consolidated data */
+#define REV_CMS		02400	/* cms:      command summary data */
+
+/*
+ * Record header
+ */
+struct achead
+{
+	unsigned int	ah_magic:17;	/* Magic */
+	unsigned int	ah_revision:15;	/* Revision */
+	unsigned int	ah_type:8;	/* Record type */
+	unsigned int	ah_flag:8;	/* Record flags */
+	unsigned int	ah_size:16;	/* Size of record */
+};
+
+/*
+ *  In order to keep the accounting records the same size across different
+ *  machine types, record fields will be defined to types that won't
+ *  vary (i.e. uint_32_t instead of uid_t).
+*/
+
+/*
+ * Per process base accounting record.
+ */
+struct acctcsa
+{
+	struct achead	ac_hdr1;	/* Header */
+	struct achead	ac_hdr2;	/* 2nd header for continued records */ 
+	double 		ac_sbu;		/* System billing units */
+	unsigned int	ac_stat:8;	/* Exit status */
+	unsigned int	ac_nice:8;	/* Nice value */
+	unsigned char	ac_sched;	/* Scheduling discipline */
+	unsigned int	:8;		/* Unused */
+	uint32_t	ac_uid;		/* User ID */
+	uint32_t	ac_gid;		/* Group ID */
+	uint64_t	ac_ash;		/* Array session handle */
+	uint64_t	ac_jid;		/* Job ID */
+	uint64_t	ac_prid;	/* Project ID -> account ID */
+	uint32_t	ac_pid;		/* Process ID */
+	uint32_t	ac_ppid;	/* Parent process ID */
+	time_t		ac_btime;	/* Beginning time [sec since 1970] */
+	char		ac_comm[16];	/* Command name */
+/*	CPU resource usage information. */
+	uint64_t	ac_etime;	/* Elapsed time [usecs] */
+	uint64_t	ac_utime;	/* User CPU time [usec] */
+	uint64_t	ac_stime;	/* System CPU time [usec] */
+	uint64_t	ac_spare;	/* Spare field */
+	uint64_t        ac_spare1;	/* Spare field */
+};
+
+/*
+ * Memory accounting structure
+ * This structure is part of the acctmem record.
+ */
+struct memint
+{
+	uint64_t	himem;	/* Hiwater memory usage [Kbytes] */
+	uint64_t	mem1;	/* Memory integral 1 [Mbytes/uSec] */
+	uint64_t	mem2;	/* Memory integral 2 - not used */
+	uint64_t	mem3;	/* Memory integral 3 - not used */
+};
+
+/*
+ * Memory accounting record
+ */
+struct acctmem
+{
+	struct achead	ac_hdr;		/* Header */
+	double 		ac_sbu;		/* System billing units */
+	struct memint	ac_core;	/* Core memory integrals */
+	struct memint	ac_virt;	/* Virtual memory integrals */
+	uint64_t	ac_pgswap;	/* # of pages swapped  */
+	uint64_t	ac_minflt;	/* # of minor page faults */
+	uint64_t	ac_majflt;	/* # of major page faults */
+	uint64_t	ac_spare;	/* Spare field */
+};
+
+/*
+ * Input/Output accounting record
+ */
+struct acctio
+{
+	struct achead		ac_hdr;	   /* Header */
+	double 			ac_sbu;	   /* System billing units */
+	uint64_t	ac_bwtime; /* Block I/O wait time [usecs] */
+	uint64_t	ac_rwtime; /* Raw I/O wait time [usecs] */
+	uint64_t	ac_chr;    /* Number of chars (bytes) read */
+	uint64_t	ac_chw;	   /* Number of chars (bytes) written */
+	uint64_t	ac_bkr;	   /* Number of blocks read */
+	uint64_t	ac_bkw;	   /* Number of blocks written */
+	uint64_t	ac_scr;	   /* Number of read system calls */
+	uint64_t	ac_scw;	   /* Number of write system calls */
+	uint64_t	ac_spare;  /* Spare field */
+};
+
+/*
+ * Multi-tasking accounting structure
+ * This structure is part of the acctmt record.
+ */
+struct mtask
+{
+	uint64_t	mt;		/* CPU+1 connect time [usecs] */
+	uint64_t	spare1;		/* Spare field */
+	uint64_t	spare2;		/* Spare field */
+};
+
+/*
+ * Multi-tasking accounting record - currently not used, adapted from UNICOS.
+ */
+#define	ACCT_MAXCPUS	512	/* Maximum number of CPUs supported */
+
+struct acctmt
+{
+	struct achead	ac_hdr;		/* Header */
+	double 		ac_sbu;		/* System billing units */
+	unsigned int	ac_numcpu:16;	/* Max number of CPUs used */
+	unsigned int	ac_maxcpu:16;	/* Max number of CPUs available */
+	unsigned int	:32;		/* Unused */
+	int64_t		ac_smwtime;	/* Semaphore wait time [usec] */
+	struct mtask	ac_mttime[ACCT_MAXCPUS]; /* Time connected to (i+1)
+						    CPUs [usec] */
+};
+
+/*
+ * MPP PE accounting structure - MPP hardware specific.
+ * This structure is part of the acctmpp record.
+ */
+struct acctpe
+{
+	uint64_t	utime;	 /* User CPU time [usecs] */
+	uint64_t	srtime;	 /* System & remote CPU time [usecs] */
+	uint64_t	io;	 /* Number of chars transferred */
+};
+
+/*
+ * MPP accounting record - MPP hardware specific; currently not used.
+ */
+#define	ACCT_MAXPES	1024	/* Maximum number of PEs */
+
+struct acctmpp
+{
+	struct achead 	ac_hdr;		/* Header */
+	double 		ac_sbu;		/* System billing units */
+	unsigned int	ac_mpbesu:8;	/* Number of BESUs used	*/
+	unsigned int	ac_mppe:24;	/* Number of PEs used */
+	uint64_t	ac_himem; /* Maximum memory hiwater [Mbytes] */
+
+	struct acctpe	ac_mpp[ACCT_MAXPES];	/* Per PE information */
+};
+
+/*
+ * MPP Detailed PE accounting structure - currently not used
+ */
+struct acctdpe
+{
+	struct achead 	ac_hdr;		/* Header */
+
+	uint64_t	utime;		/* User CPU time [usecs] */
+	uint64_t	stime;		/* System CPU time [usecs] */
+	uint64_t	rtime;		/* Remote CPU time [usecs] */
+
+	uint64_t	ctime;		/* Connect CPU time [usecs] */
+	uint64_t	io;		/* Number of chars transferred */
+	uint64_t	spare;		/* Spare field */
+};
+
+/*
+ * Start-of-job record
+ * Written when a job is created.
+ */
+
+typedef enum
+{
+        AC_INIT_LOGIN,          /* Initiated by login */
+        AC_INIT_NQS,            /* Initiated by NQS */
+        AC_INIT_LSF,            /* Initiated by LSF */
+        AC_INIT_CROND,          /* Initiated by crond */
+        AC_INIT_FTPD,           /* Initiated by ftpd */
+        AC_INIT_INETD,          /* Initiated by inetd */
+        AC_INIT_TELNETD,        /* Initiated by telnetd */
+        AC_INIT_MAX
+} ac_inittype;
+
+
+#define AC_SOJ	1	/* Start-of-job record type */
+#define AC_ROJ	2	/* Restart-of-job record type */
+
+struct acctsoj
+{
+	struct achead 	ac_hdr;		/* Header */
+	unsigned int	ac_type:8;	/* Record type (AC_SOJ, AC_ROJ) */
+	ac_inittype	ac_init:8;	/* Initiator - currently not used */
+	unsigned int	:16;		/* Unused */
+	uint32_t	ac_uid;		/* User ID */
+	uint64_t	ac_jid;		/* Job ID */
+	time_t	 	ac_btime;	/* Start time [secs since 1970] */
+	time_t	 	ac_rstime;	/* Restart time [secs since 1970] */
+};
+
+/*
+ * End-of-job record
+ * Written when the last process of a job exits.
+ */
+struct accteoj
+{
+	struct achead	ac_hdr1;	/* Header */ 
+	struct achead	ac_hdr2;	/* 2nd header for continued records */ 
+	double 		ac_sbu;		/* System billing units */
+	ac_inittype	ac_init:8;	/* Initiator - currently not used */
+	unsigned int	ac_nice:8;	/* Nice value */
+	unsigned int	:16;		/* Unused */
+	uint32_t	ac_uid;		/* User ID */
+	uint32_t	ac_gid;		/* Group ID */
+	uint64_t	ac_ash;		/* Array session handle; not used */
+	uint64_t	ac_jid;		/* Job ID */
+	uint64_t	ac_prid;	/* Project ID; not used */
+	time_t	 	ac_btime;	/* Job start time [secs since 1970] */
+	time_t  	ac_etime;	/* Job end time   [secs since 1970] */
+	uint64_t	ac_corehimem;	/* Hiwater core mem [Kbytes] */
+	uint64_t	ac_virthimem;	/* Hiwater virt mem [Kbytes] */
+/*	CPU resource usage information. */
+	uint64_t	ac_utime;  /* User CPU time [usec]  */
+	uint64_t	ac_stime; /* System CPU time [usec] */
+	uint32_t	ac_spare;	
+};
+
+/*
+ * Accounting configuration uname structure
+ * This structure is part of the acctcfg record.
+ */
+struct ac_utsname
+{
+	char	 sysname[26];
+	char	 nodename[26];
+	char	 release[42];
+	char	 version[41];
+	char	 machine[26];
+};
+
+/*
+ * Accounting configuration record
+ * Written for accounting configuration changes.
+ */
+typedef enum
+{
+        AC_CONFCHG_BOOT,        /* Boot time (always first) */
+        AC_CONFCHG_FILE,        /* Reporting pacct file change */
+        AC_CONFCHG_ON,          /* Reporting xxx ON */
+        AC_CONFCHG_OFF,         /* Reporting xxx OFF */
+        AC_CONFCHG_INC_DELTA,   /* Report incremental acct clock delta change */        AC_CONFCHG_INC_EVENT,   /* Report incremental accounting event */
+        AC_CONFCHG_MAX
+} ac_eventtype;
+
+struct acctcfg
+{
+	struct achead	ac_hdr;		/* Header */
+	unsigned int	ac_kdmask;	/* Kernel and daemon config mask */
+	unsigned int	ac_rmask;	/* Record configuration mask */
+	int64_t		ac_uptimelen;	/* Bytes from the end of the boot
+					   record to the next boot record */
+	ac_eventtype	ac_event:8;	/* Accounting configuration event */
+	unsigned int	:24;		/* Unused */
+	time_t		ac_boottime;	/* System boot time [secs since 1970]*/
+	time_t		ac_curtime;	/* Current time [secs since 1970] */
+	struct ac_utsname  ac_uname;	/* Condensed uname information */
+};
+
+
+/*
+ * Accounting control status values.
+ */
+typedef	enum
+{
+	ACS_OFF,	/* Accounting stopped for this entry */
+	ACS_ERROFF,	/* Accounting turned off by kernel */
+	ACS_ON		/* Accounting started for this entry */
+} ac_status;
+
+/*
+ * Function codes for CSA library interface
+ */
+typedef	enum
+{
+	AC_START,	/* Start kernel, daemon, or record accounting */
+	AC_STOP,	/* Stop kernel, daemon, or record accounting */
+	AC_HALT,	/* Stop all kernel, daemon, and record accounting */
+	AC_CHECK,	/* Check a kernel, daemon, or record accounting state*/
+	AC_KDSTAT,	/* Check all kernel & daemon accounting states */
+	AC_RCDSTAT,	/* Check all record accounting states */
+	AC_JASTART,	/* Start user job accounting  */
+	AC_JASTOP,	/* Stop user job accounting */
+	AC_WRACCT,	/* Write accounting record for daemon */
+	AC_AUTH,	/* Verify executing user is authorized */
+	AC_INCACCT,	/* Control incremental accounting */
+	AC_MREQ
+} ac_request;
+
+/*
+ * Define the CSA accounting record type indices.
+ */
+typedef	enum
+{
+	ACCT_KERN_CSA,		/* Kernel CSA accounting */
+	ACCT_KERN_JOB_PROC,	/* Kernel job process summary accounting */
+	ACCT_KERN_ASH,		/* Kernel array session summary accounting */
+	ACCT_DMD_NQS, 		/* Daemon NQS accounting */
+	ACCT_DMD_WKMG, 		/* Daemon workload management (i.e. LSF) acct */
+	ACCT_DMD_TAPE,		/* Daemon tape accounting */
+	ACCT_DMD_DMIG,		/* Daemon data migration accounting */
+	ACCT_DMD_SOCKET,	/* Daemon socket accounting */
+	ACCT_DMD_SITE1,		/* Site reserved daemon acct */
+	ACCT_DMD_SITE2,		/* Site reserved daemon acct */
+	ACCT_MAXKDS,		/* Max # kernel and daemon entries */
+
+	ACCT_RCD_MPPDET,	/* Record acct for MPP detail exit info */
+	ACCT_RCD_MEM,		/* Record acct for memory */
+	ACCT_RCD_IO,		/* Record acct for input/output */
+	ACCT_RCD_MT,		/* Record acct for multi-tasking */
+	ACCT_RCD_MPP,		/* Record acct for MPP accumulated info */
+	ACCT_THD_MEM,		/* Record acct for memory size threshhold */
+	ACCT_THD_TIME,		/* Record acct for CPU time threshhold */
+	ACCT_RCD_INCACCT,	/* Record acct for incremental accounting */
+	ACCT_RCD_APPACCT,	/* Record acct for application accounting */
+	ACCT_RCD_SITE1,		/* Site reserved record acct */
+	ACCT_RCD_SITE2,		/* Site reserved record acct */
+	ACCT_MAXRCDS		/* Max # record entries */
+} ac_kdrcd;
+
+#define	ACCT_RCDS	ACCT_RCD_MPPDET /* Record acct low range definition */
+#define	NUM_KDS		(ACCT_MAXKDS - ACCT_KERN_CSA)
+#define	NUM_RCDS	(ACCT_MAXRCDS - ACCT_RCDS)
+#define	NUM_KDRCDS	(NUM_KDS + NUM_RCDS)
+
+
+/*
+ * The following structures are used to get status of a CSA accounting type.
+ */
+
+/*
+ * Accounting entry status structure
+ */
+struct actstat
+{
+	ac_kdrcd	ac_ind;		/* Entry index */
+	ac_status	ac_state;	/* Entry status */
+	int64_t		ac_param;	/* Entry parameter */
+};
+
+/*
+ * Accounting control and status structure
+ */
+#define	ACCT_PATH	128	/* Max path length for accounting file */
+
+struct actctl
+{
+	int	ac_sttnum;		/* Number of status array entries */
+	char	ac_path[ACCT_PATH];	/* Path name for accounting file */
+	struct actstat	ac_stat[NUM_KDRCDS];	/* Entry status array */
+};
+
+/*
+ * Function codes for incremental accounting; currently not used
+ */
+typedef	enum
+{
+	IA_NONE,	/* Zero entry place holder */
+	IA_DELTA,	/* Change clock delta for incremental accounting */
+	IA_EVENT,	/* Cause incremental accounting event now */
+	IA_MAX
+} ac_iafnc;
+
+/*
+ * Incremental accounting structure; currently not used
+ */
+struct actinc
+{
+	int		ac_ind;		/* Entry index */
+	ac_iafnc	ac_fnc;		/* Entry function */
+	int64_t		ac_param;	/* Entry parameter */
+};
+
+/*
+ * Daemon write accounting structure
+ */
+#define	MAX_WRACCT	1024	/* Maximum buffer size of wracct() */
+
+struct actwra
+{
+	int	 ac_did;		/* Daemon index */
+	int	 ac_len;		/* Length of buffer (bytes) */
+	uint64_t ac_jid;		/* Job ID */
+	char	*ac_buf;		/* Daemon accounting buffer */
+};
+
+/* These definitions are used with the CSA /proc IOCTL interface */
+#define CSA_PROC	"csa"
+#define CSA_IOCTL_NUM	'A'
+
+
+#endif	/* _LINUX_CSA_H */
Index: linux/kernel/Makefile
===================================================================
--- linux.orig/kernel/Makefile	2004-08-19 18:12:13.000000000 -0700
+++ linux/kernel/Makefile	2004-08-23 15:28:23.000000000 -0700
@@ -20,6 +20,7 @@
 obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_PAGG) += pagg.o
 obj-$(CONFIG_PAGG_JOB) += job.o
+obj-$(CONFIG_CSA) += csa.o
 obj-$(CONFIG_IKCONFIG) += configs.o
 obj-$(CONFIG_IKCONFIG_PROC) += configs.o
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
Index: linux/kernel/csa.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux/kernel/csa.c	2004-08-24 18:32:24.000000000 -0700
@@ -0,0 +1,1665 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc and LANL  All Rights Reserved.
+ * Copyright (c) 2004 Silicon Graphics, Inc All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information:  Silicon Graphics, Inc., 1500 Crittenden Lane,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ */
+
+/*
+ *  Description:
+ *	This file, csa.c, contains the procedures that handle kernel CSA
+ *	job accounting. It configures CSA, writes CSA accounting
+ *	records, and processes the acctctl /proc ioctl.  This code can
+ *	either be compiled directly into the kernel or compiled as
+ *	a loadable module.
+ *
+ *	During initialization, this code registers procedure callbacks
+ *	with the PAGG job code.
+ *
+ *  Author:
+ *	Marlys Kohnke (kohnke@sgi.com)
+ *
+ *  Contributors:
+ *
+ *  Changes:
+ *	January 31, 2001  (kohnke)  Changed to use semaphores rather than
+ *	spinlocks.  Was seeing a spinlock deadlock sometimes when an accounting
+ *	record was being written to disk with 2.4.0 (didn't happen with 
+ *	2.4.0-test7).
+ *
+ *	February 2, 2001  (kohnke)  Changed to handle being compiled directly
+ *	into the kernel, not just compiled as a loadable module. Renamed
+ *	init_module() as init_csa() and cleanup_module() as cleanup_csa().
+ *	Added calls to module_init() and module_exit().
+ *
+ *	January 21, 2003 (jlan)  Changed to provide /proc ioctl interface.
+ *	Also, provided MODULE_* clause.
+ */
+
+
+#include <linux/config.h>
+
+#if defined(CONFIG_CSA) || defined(CONFIG_CSA_MODULE)
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+#include <linux/proc_fs.h>
+#include <asm/uaccess.h>
+#include <asm/semaphore.h>
+
+#include <linux/csa_internal.h>
+#include <linux/acct_eop.h>
+#include <linux/csa.h>
+#include <linux/job.h>
+
+
+static int csa_registered = 0;
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION("CSA Kernel Module");
+MODULE_LICENSE("GPL");
+
+static int	csa_jstart(int, void *);
+static int	csa_jexit(int, void *);
+static void	csa_acct_eop(int, struct task_struct *);
+static int	csa_modify_buf(char *, struct acctcsa *, struct acctmem *,
+			struct acctio *, int, int);
+static int	csa_write(char *, int, int, uint64_t, int, struct job_csa *);
+static void	csa_config_make(ac_eventtype, struct acctcfg *);
+static int	csa_config_write(ac_eventtype,struct file *);
+static void	csa_header(struct achead *, int, int, int);
+static long int sc_CLK(long int);
+
+#define JID_ERR1 "do_eop_acct:  No job table entry for jid 0x%llx.\n"
+#define JID_ERR2 "csa user job accounting write error %d, jid 0x%llx\n"
+#define JID_ERR3 "Can't disable csa user job accounting jid 0x%llx\n"
+#define JID_ERR4 "csa user job accounting disabled, jid 0x%llx\n"
+
+/* #define CSA_DEBUG 0 */
+
+#ifdef CSA_DEBUG
+#define PRINTK(args...) printk(args)
+#else
+#define PRINTK(args...)
+#endif /* CSA_DEBUG */
+
+/* this defines can be removed once they're available in kernel header files */
+/* #define USEC_PER_SEC	1000000L */	/* number of usecs for 1 second */
+#define USEC_PER_TICK	(USEC_PER_SEC/HZ)
+#define NBPC		PAGE_SIZE 	/* Number of bytes per click */
+#define ctob(x) ((uint64_t)(x)*NBPC)
+
+
+static struct file	*csa_acctvp = (struct file *)NULL;
+static time_t boottime = 0;
+
+struct  timeval acct_now;               /* present time (sec, usec) */
+
+static DECLARE_MUTEX(csa_sem);
+static DECLARE_MUTEX(csa_write_sem);
+
+static int     csa_flag = 0;		/* accounting start state flag */
+char    csa_path[ACCT_PATH] = "";	/* current accounting file path name */
+char    new_path[ACCT_PATH] = "";	/* new accounting file path name */
+
+
+static struct job_acctmod csa_job_callbacks = {
+	.type =		JOB_ACCT_CSA,
+	.jobstart =	csa_jstart,
+	.jobend =	csa_jexit,
+	.module =	THIS_MODULE
+};
+
+
+/* modify this when changes are made to ac_kdrcd in csa.h */ 
+char *acct_dmd_name[ACCT_MAXKDS] = 
+		{"CSA",
+		 "JOB",
+		 "ASH",
+		 "NQS",
+		 "WORKLOAD MGMT",
+		 "TAPE",
+		 "DATA MIGRATION",
+		 "SOCKET",
+		 "SITE1",
+		 "SITE2" };
+
+typedef enum {
+        A_SYS,          /* system accounting action     (0) */
+        A_CJA,          /* Job accounting action        (1) */
+        A_DMD,          /* daemon accounting action     (2) */
+        A_MAX} a_fnc;
+
+struct  actstat acct_dmd[ACCT_MAXKDS][A_MAX];
+struct  actstat acct_rcd[ACCT_MAXRCDS-ACCT_RCDS][A_MAX];
+
+/*  Initialize the CSA accounting state information. */
+#define INIT_DMD(t, i, s, p)    acct_dmd[i][t].ac_ind = i;              \
+                                acct_dmd[i][t].ac_state = s;            \
+                                acct_dmd[i][t].ac_param = p;
+#define INIT_RCD(t, i, s, p)    acct_rcd[i-ACCT_RCDS][t].ac_ind = i;    \
+                                acct_rcd[i-ACCT_RCDS][t].ac_state = s;  \
+                                acct_rcd[i-ACCT_RCDS][t].ac_param = p;
+
+static int csa_ioctl( struct inode *, struct file *, unsigned int,
+		unsigned long);
+/* proc dir entry */
+struct proc_dir_entry *csa_proc_entry;
+
+/* file operations for proc file */
+static struct file_operations csa_file_ops = {
+	owner: THIS_MODULE,
+	ioctl: csa_ioctl
+};
+
+#ifdef DEBUG
+
+#define DBG_PRINTINIT(s)	\
+	char *dbg_fname = s		
+
+#define DBG_PRINTENTRY()					\
+do {								\
+	printk(KERN_DEBUG __FILE__ ": %s: entry\n", dbg_fname);	\
+} while(0)
+
+#define DBG_PRINTEXIT(c)				 		\
+do {							 		\
+	printk(KERN_DEBUG __FILE__ ": %s: exit, code = %d\n", dbg_fname, c);	\
+} while(0)
+
+/* write lock semaphore */
+#define JOB_WLOCK(l)					\
+do {							\
+	printk(KERN_DEBUG __FILE__ ": wlock = %p\n", l);	\
+	down_write(l);					\
+} while(0);
+
+/* write unlock semaphore */
+#define JOB_WUNLOCK(l)					\
+do {							\
+	printk(KERN_DEBUG __FILE__ ": wunlock = %p\n", l);	\
+	up_write(l);					\
+} while(0);
+
+/* read lock semaphore */
+#define JOB_RLOCK(l)					\
+do {							\
+	printk(KERN_DEBUG __FILE__ ": rlock = %p\n", l);	\
+	down_read(l);					\
+} while(0);
+
+/* read unlock semaphore */
+#define JOB_RUNLOCK(l)					\
+do {							\
+	printk(KERN_DEBUG __FILE__ ": runlock = %p\n", l);	\
+	up_read(l);					\
+} while(0);
+
+
+#else /* #ifdef DEBUG */
+
+#define DBG_PRINTINIT(s)	
+
+#define DBG_PRINTENTRY() 	\
+do {				\
+} while(0)
+
+#define DBG_PRINTEXIT(c)	\
+do {				\
+} while(0)
+
+/* write lock semaphore */
+#define JOB_WLOCK(l)	\
+do {			\
+	down_write(l);	\
+} while(0);
+
+/* write unlock semaphore */
+#define JOB_WUNLOCK(l)	\
+do {			\
+	up_write(l);	\
+} while(0);
+
+/* read lock semaphore */
+#define JOB_RLOCK(l)	\
+do {			\
+	down_read(l);	\
+} while(0);
+
+/* read unlock semaphore */
+#define JOB_RUNLOCK(l)	\
+do {			\
+	up_read(l);	\
+} while(0);
+
+
+#endif /* #ifdef DEBUG */
+
+
+
+/*
+ *	register procedure callbacks with the kernel/csa.c CSA
+ *	code and with the PAGG job code
+ */
+static int __init
+init_csa(void)
+{
+	int retval = 0;
+
+	if (csa_registered) {
+		/*
+		 *
+		 * incorrectly using csa_job_acct.c as a loadable module and
+		 * compiled into the kernel??
+		 */     
+		 printk(KERN_WARNING "init_csa: %s\n",
+			"Multiple attempts to register CSA support\n");
+		return -EBUSY;
+	} else {
+		csa_registered = 1;
+	}
+
+	/*
+	 * register callbacks with the PAGG job code to process 
+	 * start-of-job and end-of-job accounting records.  If this is a
+	 * module, this registration will also increment the job module
+	 * use count so the job module won't be unloaded out from under
+	 * the CSA module.
+	 */
+	retval = job_register_acct(&csa_job_callbacks);
+	if (retval != 0) {
+		printk(KERN_INFO "CSA: failed to register job\n");
+		return retval;
+	}
+
+	/* setup our /proc entry file */
+	csa_proc_entry = create_proc_entry(CSA_PROC, S_IFREG|S_IRUGO,
+				&proc_root);
+	if (!csa_proc_entry) {
+		csa_registered = 0;
+		job_unregister_acct(&csa_job_callbacks);
+		return -1;
+	}
+
+	csa_proc_entry->proc_fops = &csa_file_ops;
+	csa_proc_entry->proc_iops = NULL;
+
+	do_eop_acct = csa_acct_eop;
+
+	printk(KERN_INFO "CSA: initialized\n");
+
+	return retval;
+}
+
+
+/*
+ *	Do module cleanup before the module is removed; unregister
+ *	procedure callbacks with the kernel non-module CSA code and
+ *	with the PAGG job module (which decrements the job module use count).
+ */
+static void __exit
+cleanup_csa(void)
+{
+	int retval = 0;
+
+	csa_registered = 0;
+	do_eop_acct = NULL;
+
+	retval = job_unregister_acct(&csa_job_callbacks);
+	if (retval < 0) {
+		printk(KERN_ERR "CSA module can't unregister with job module."
+		       "Continuing with CSA module cleanup.\n");
+	} 
+	remove_proc_entry(CSA_PROC, &proc_root);
+	printk(KERN_INFO "CSA removed\n");
+	return;
+}
+
+/*
+ *	Initialize the CSA accounting state table.
+ *	Modify this when changes are made to ac_kdrcd in csa.h
+ *	
+ */
+static void
+csa_init_acct(int flag)
+{
+	csa_flag = flag;
+
+	boottime = xtime.tv_sec - (jiffies / HZ);
+
+	/*  Initialize system accounting states. */
+	INIT_DMD(A_SYS, ACCT_KERN_CSA,		ACS_OFF, 0);
+	INIT_DMD(A_SYS, ACCT_KERN_JOB_PROC,	ACS_OFF, 0);
+	INIT_DMD(A_SYS, ACCT_KERN_ASH,		ACS_OFF, 0);
+	INIT_DMD(A_SYS, ACCT_DMD_NQS,		ACS_OFF, 0);
+	INIT_DMD(A_SYS, ACCT_DMD_WKMG,		ACS_OFF, 0);
+	INIT_DMD(A_SYS, ACCT_DMD_TAPE,		ACS_OFF, 0);
+	INIT_DMD(A_SYS, ACCT_DMD_SOCKET,	ACS_OFF, 0);
+	INIT_DMD(A_SYS, ACCT_DMD_DMIG,		ACS_OFF, 0);
+	INIT_DMD(A_SYS, ACCT_DMD_SITE1,		ACS_OFF, 0);
+	INIT_DMD(A_SYS, ACCT_DMD_SITE2,		ACS_OFF, 0);
+
+	INIT_RCD(A_SYS, ACCT_RCD_MPPDET,	ACS_OFF, 0);
+	INIT_RCD(A_SYS, ACCT_RCD_MEM,		ACS_OFF, 0);
+	INIT_RCD(A_SYS, ACCT_RCD_IO,		ACS_OFF, 0);
+	INIT_RCD(A_SYS, ACCT_RCD_MT,		ACS_OFF, 0);
+	INIT_RCD(A_SYS, ACCT_RCD_MPP,		ACS_OFF, 0);
+	INIT_RCD(A_SYS, ACCT_THD_MEM,		ACS_OFF, 0);
+	INIT_RCD(A_SYS, ACCT_THD_TIME,		ACS_OFF, 0);
+	INIT_RCD(A_SYS, ACCT_RCD_INCACCT,	ACS_OFF, 0);
+	INIT_RCD(A_SYS, ACCT_RCD_APPACCT,	ACS_OFF, 0);
+	INIT_RCD(A_SYS, ACCT_RCD_SITE1,		ACS_OFF, 0);
+	INIT_RCD(A_SYS, ACCT_RCD_SITE2,		ACS_OFF, 0);
+
+	return;
+}
+
+/*
+ *	convert ticks into microseconds; necessary kernel math ops not
+ *	available on 32-bit systems, so can't use uint64_t
+ */
+static long int
+sc_CLK(long int clock)
+{
+	long int sec, split;
+
+	sec = clock / HZ;
+	split = (clock % HZ) * 1000000 / HZ;
+
+	return ((sec * 1000000) + split);
+}
+
+/*  Initialize CSA accounting header. */
+static void
+csa_header(struct achead *head, int revision, int type, int size)
+{
+	head->ah_magic = ACCT_MAGIC;
+	head->ah_revision = revision;
+	head->ah_type = type;
+	head->ah_flag = 0;
+	head->ah_size = size;
+
+	return;
+}
+
+/*
+ *  Create a CSA end-of-process accounting record and write it to 
+ *  appropriate file(s)
+ */
+void
+csa_acct_eop(int exitcode, struct task_struct *p)
+{
+	char	acctent[sizeof(struct acctcsa) +
+			sizeof(struct acctmem) +
+			sizeof(struct acctio) ];
+	char	modacctent[sizeof(struct acctcsa) +
+			   sizeof(struct acctmem) +
+			   sizeof(struct acctio) ];
+	struct	acctcsa	*csa = NULL;
+	struct  acctmem *mem = NULL;
+	struct  acctio  *io = NULL;
+	struct	achead	*hdr1, *hdr2;
+	char	*cb = acctent;
+	struct job_csa job_acctbuf;
+	uint64_t jid = 0;
+	int	len = 0;
+	int	csa_enabled = 0;
+	int	ja_enabled = 0;
+	int	io_enabled = 0;
+	int	mem_enabled = 0;
+	int	retval = 0;
+	uint64_t memtime;
+
+	if (p == NULL) {
+		printk(KERN_ERR "do_eop_acct: CSA null task pointer\n");
+		return;
+	}
+	jid = job_getjid(p);
+	if (jid <= 0) {
+		/* no job table entry; not all processes are part of a job */
+		return;
+	}
+	memset(&job_acctbuf, 0, sizeof(job_acctbuf));
+	retval = job_getacct(jid, JOB_ACCT_CSA, &job_acctbuf);
+	if (retval != 0) {
+		/* couldn't get accounting info stored in the job table entry */
+		printk(KERN_WARNING JID_ERR1, (unsigned long long) jid);
+		return;
+	}
+
+	down(&csa_sem);
+	/*
+	 * figure out what's turned on, which determines which record types
+	 * need to be written.  All records are written to a user job
+	 * accounting file.  Only those record types configured on are
+	 * written to the system pacct file
+	 */
+	if (job_acctbuf.job_acctfile != (struct file *)NULL) {
+		ja_enabled = 1;
+	}
+        if (acct_dmd[ACCT_KERN_CSA][A_SYS].ac_state == ACS_ON) {
+		csa_enabled = 1;
+	}
+        if (acct_rcd[ACCT_RCD_IO-ACCT_RCDS][A_SYS].ac_state == ACS_ON) {
+		io_enabled = 1;
+	}
+        if (acct_rcd[ACCT_RCD_MEM-ACCT_RCDS][A_SYS].ac_state == ACS_ON) {
+		mem_enabled = 1;
+	}
+
+	if (!ja_enabled && !csa_enabled) {
+		/* nothing to do */
+		up(&csa_sem);
+		return;
+	}
+	up(&csa_sem);
+
+	csa = (struct acctcsa *)acctent;
+	memset(csa, 0, sizeof(struct acctcsa));
+	hdr1 = &csa->ac_hdr1;
+	csa_header(hdr1, REV_CSA, ACCT_KERNEL_CSA, sizeof(struct acctcsa) );
+	hdr2 = &csa->ac_hdr2;
+	csa_header(hdr2, REV_CSA, ACCT_KERNEL_CSA, 0 );
+	hdr2->ah_magic = ~ACCT_MAGIC;
+ 
+	csa->ac_stat = exitcode;
+	csa->ac_uid  = p->uid;
+	csa->ac_gid  = p->gid;
+
+	/* XXX change this when array session handle info available */
+	csa->ac_ash  = 0;
+	csa->ac_jid  = job_acctbuf.job_id;
+	/* XXX change this when project ids are available */
+	csa->ac_prid = 0;
+	csa->ac_nice = task_nice(p);
+	csa->ac_sched = p->policy;
+
+	csa->ac_pid  = p->pid;
+	csa->ac_ppid = (p->parent) ? p->parent->pid : 0;
+	if (p->flags & PF_FORKNOEXEC) {
+		csa->ac_hdr1.ah_flag |= AFORK;
+	}
+	if (p->flags & PF_SUPERPRIV) {
+		csa->ac_hdr1.ah_flag |= ASU;
+	}
+	if (p->flags & PF_DUMPCORE) {
+		csa->ac_hdr1.ah_flag |= ACORE;
+	}
+	if (p->flags & PF_SIGNALED) {
+		csa->ac_hdr1.ah_flag |= AXSIG;
+	}
+	csa->ac_hdr1.ah_flag &= ~ACKPT;
+
+	strncpy(csa->ac_comm, p->comm, sizeof(csa->ac_comm));
+/*	csa->ac_btime = CT_TO_SECS(p->start_time) + (xtime.tv_sec -
+		(jiffies / HZ)); */
+        csa->ac_btime = do_div(p->start_time, HZ) + (xtime.tv_sec -  (jiffies / HZ));
+
+	/*
+	 * cpu usage is accumulated by the kernel in ticks. 
+	 * convert from clock ticks to microseconds; each process gets
+	 * a minimum of a tick for elapsed time.  If the granularity
+	 * changes to something finer than a tick in the future,
+	 * then these zero cpu and elapsed time modifications should be 
+	 * looked at again.
+	 */
+	csa->ac_etime = (jiffies - p->start_time == 0) ? (USEC_PER_TICK) : 
+		((uint64_t)(jiffies - p->start_time) * USEC_PER_TICK);
+
+	cb += sizeof(struct acctcsa);
+	len += sizeof(struct acctcsa);
+
+	/* convert from ticks to microseconds */
+	csa->ac_utime = p->utime * USEC_PER_TICK;
+	csa->ac_stime = p->stime * USEC_PER_TICK;
+	/* Each process gets a minimum of a half tick cpu time */
+	if ((csa->ac_utime == 0) && (csa->ac_stime == 0)) {
+		csa->ac_stime = USEC_PER_TICK/2;
+	}
+
+	/*   Create the memory record if needed */
+	if (ja_enabled || mem_enabled) {
+		mem = (struct acctmem *)cb;
+		memset(mem, 0, sizeof(struct acctmem));
+		hdr1->ah_flag |= AMORE;
+		hdr2->ah_type |= ACCT_MEM;
+		hdr1 = &mem->ac_hdr;
+		csa_header(hdr1, REV_MEM, ACCT_KERNEL_MEM,
+			sizeof(struct acctmem) );
+
+		/* adjust from pages/ticks to Mb/usec */
+		memtime = sc_CLK((long int)p->csa_rss_mem1);
+		mem->ac_core.mem1 = ctob(memtime) / (1024 * 1024);
+		memtime = sc_CLK((long int)p->csa_vm_mem1);
+		mem->ac_virt.mem1 = ctob(memtime) / (1024 * 1024);
+
+		/* adjust page size to 1K units */
+		if (p->mm) {
+		    mem->ac_virt.himem = p->mm->hiwater_vm * (PAGE_SIZE / 1024);
+		    mem->ac_core.himem = p->mm->hiwater_rss * (PAGE_SIZE/1024);
+		    /*
+		     * For processes with zero systime, set the integral
+		     * to the highwater mark rather than leave at zero
+		     */
+		    if (mem->ac_core.mem1 == 0) {
+			mem->ac_core.mem1 = mem->ac_core.himem / 1024;
+		    }
+		    if (mem->ac_virt.mem1 == 0) {
+			mem->ac_virt.mem1 = mem->ac_virt.himem / 1024;
+		    }
+		}
+
+		mem->ac_minflt = p->min_flt;
+		mem->ac_majflt = p->maj_flt;
+
+		cb += sizeof(struct acctmem);
+		hdr2->ah_size += sizeof(struct acctmem);
+		len += sizeof(struct acctmem);
+	}
+	/*  Create the I/O record */
+	if (ja_enabled || io_enabled) {
+		io = (struct acctio *)cb;
+		memset(io, 0, sizeof(struct acctio));	
+		hdr1->ah_flag |= AMORE;
+		hdr2->ah_type |= ACCT_IO;
+		hdr1 = &io->ac_hdr;
+		csa_header(hdr1, REV_IO, ACCT_KERNEL_IO,
+			sizeof(struct acctio) );
+
+		/* convert from ticks to microseconds */
+		/* XXX when able to do kernel 64 bit divide, change type */
+		PRINTK(KERN_INFO "CSA: block wait time %lu\n",(unsigned long int)p->bwtime);
+		io->ac_bwtime = CT_TO_USECS((unsigned long int)p->bwtime);
+		PRINTK(KERN_INFO "CSA: converted bwtime %lu\n",io->ac_bwtime);
+
+		io->ac_bkr = p->rblk;
+		io->ac_bkw = p->wblk;
+
+		/* raw wait time; currently not used */
+		io->ac_rwtime = 0;
+
+		io->ac_chr = p->rchar;
+		io->ac_chw = p->wchar;
+		io->ac_scr  = p->syscr;
+		io->ac_scw  = p->syscw;
+
+		cb += sizeof(struct acctio);
+		hdr2->ah_size += sizeof(struct acctio);
+		len += sizeof(struct acctio);
+	}
+
+	/* record always written to a user job accounting file */
+	if ((len > 0) && (job_acctbuf.job_acctfile != (struct file *)NULL) ) {
+		csa_write((caddr_t)&acctent, ACCT_KERN_CSA,
+			len, jid, A_CJA, &job_acctbuf);
+	}
+	/*
+	 * check the cpu time and virtual memory thresholds before writing
+	 * this record to the system pacct file
+	 */
+	if ((acct_rcd[ACCT_THD_MEM-ACCT_RCDS][A_SYS].ac_state == ACS_ON) &&
+	    (ja_enabled || mem_enabled)) {
+		if (mem->ac_virt.himem < 
+	            acct_rcd[ACCT_THD_MEM-ACCT_RCDS][A_SYS].ac_param) {
+			/* don't write record to pacct */
+			return;
+		}
+	}
+	if ((acct_rcd[ACCT_THD_TIME-ACCT_RCDS][A_SYS].ac_state == ACS_ON)) {
+	     if ((csa->ac_utime + csa->ac_stime) <
+	          acct_rcd[ACCT_THD_TIME-ACCT_RCDS][A_SYS].ac_param) {
+			/* don't write record to pacct */
+			return;
+	     }
+	}
+				
+	if ((len > 0) && (csa_acctvp != (struct file *)NULL) && csa_enabled ) {
+		if (io_enabled && mem_enabled) {
+			/* write out buffer as is to system pacct file */
+			csa_write((caddr_t)&acctent, ACCT_KERN_CSA,
+				len, jid, A_SYS, &job_acctbuf);
+		} else {
+			/* only write out record types turned on */
+			len = csa_modify_buf(modacctent, csa, mem, io,
+				io_enabled, mem_enabled);
+			csa_write((caddr_t)&modacctent, ACCT_KERN_CSA,
+				len, jid, A_SYS, &job_acctbuf);
+		}
+	}
+	return;
+}
+
+/*
+ *	Copy needed accounting records into buffer, skipping record
+ *	types which are not enabled.  May need to adjust downward
+ *	the second header size if not both memory and io continuation
+ *	records are written, plus adjust the second header types and
+ * 	first header flags.
+ */
+static int
+csa_modify_buf(char *modacctent, struct acctcsa *csa, struct acctmem *mem,
+	       struct acctio *io, int io_enabled, int mem_enabled)
+{
+	int size = 0;
+	int len = 0;
+	char *bufptr;
+	struct achead *hdr1, *hdr2;
+
+	size = sizeof(struct acctcsa) + sizeof(struct acctmem) +
+		sizeof(struct acctio);
+	memset(modacctent, 0, size);
+	bufptr = modacctent;
+	/*
+	 * adjust values that might not be correct anymore if all of
+	 * the continuation records aren't written out to the pacct file
+	 */
+	hdr1 = &csa->ac_hdr1;
+	hdr2 = &csa->ac_hdr2;
+	hdr1->ah_flag &= ~AMORE;
+	hdr2->ah_type = ACCT_KERNEL_CSA;
+	hdr2->ah_size = 0;
+	if (mem_enabled) {
+		hdr1->ah_flag |= AMORE;
+		hdr2->ah_type |= ACCT_MEM;
+		hdr2->ah_size += sizeof(struct acctmem);
+		hdr1 = &mem->ac_hdr;
+		hdr1->ah_flag &= ~AMORE;
+	}
+	if (io_enabled) {
+		hdr1->ah_flag |= AMORE;
+		hdr2->ah_type |= ACCT_IO;
+		hdr2->ah_size += sizeof(struct acctio);
+		hdr1 = &io->ac_hdr;
+		hdr1->ah_flag &= ~AMORE;
+	}	
+	memcpy(bufptr, csa, sizeof(struct acctcsa));
+	bufptr += sizeof(struct acctcsa);
+	len += sizeof(struct acctcsa);
+
+	if (mem_enabled) {
+		memcpy(bufptr, mem, sizeof(struct acctmem));
+		len += sizeof(struct acctmem);
+		bufptr += sizeof(struct acctmem);
+	}
+	if(io_enabled) {
+		memcpy(bufptr, io, sizeof(struct acctio));
+		len += sizeof(struct acctio);
+	}
+
+	return len;
+}
+
+
+/*
+ * csa_ioctl
+ *
+ */
+static int
+csa_ioctl(
+	struct inode *inode,
+	struct file *file,
+	unsigned int req,
+	unsigned long data)
+{
+	struct	actctl	actctl;
+	struct	actstat	actstat;
+
+	int	daemon = 0;
+	int	error = 0;
+	int	err = 0;
+	static	int	flag = 010000;
+	int	ind;
+	int	id;
+	int	len;
+	int	num;
+
+	PRINTK(KERN_INFO "CSA: csa_ioctl\n");
+	down(&csa_sem);
+	if (!csa_flag) {
+		csa_init_acct(flag++);
+	}
+	up(&csa_sem);
+
+	if ((req < 0) || (req >= AC_MREQ) ) {
+		return -EINVAL;
+	}
+
+	memset(&actctl, 0, sizeof(struct actctl));
+	memset(&actstat, 0, sizeof(struct actstat));
+
+	switch (req) {
+	/*
+	 *  Start specified types of accounting.
+	 */
+	case AC_START:
+	    {
+		int id, ind;
+		struct file *newvp;
+
+		PRINTK(KERN_INFO "CSA: AC_START\n");
+		if (!capable(CAP_SYS_PACCT) ) {
+			error = -EPERM;
+			break;
+		}
+
+		if (copy_from_user(&actctl, (void*)data, sizeof(int)) ) {
+			error = -EFAULT;
+			break;
+		}
+
+		num = (actctl.ac_sttnum == 0) ? 1 : actctl.ac_sttnum;
+		if ((num < 0) || (num > NUM_KDRCDS) ) {
+			error = -EINVAL;
+			break;
+
+		}
+
+		len = sizeof(struct actctl) -
+		    sizeof(struct actstat) * NUM_KDRCDS + 
+		    sizeof(struct actstat) * num;
+		if (copy_from_user(&actctl, (void*)data, len)) {
+			error = -EFAULT;
+			break;
+		}
+		/*
+		 *	Verify all indexes in actstat structures specified.
+	 	 */
+		for(ind = 0; ind < num; ind++) {
+			id = actctl.ac_stat[ind].ac_ind;
+			if ((id < 0) || (id >= ACCT_MAXRCDS) ) {
+				error = -EINVAL;
+				break;
+			}
+
+			if (id == ACCT_MAXKDS) {
+				error = -EINVAL;
+				break;
+			}
+		}
+		down(&csa_sem);
+		/*
+		 *	If an accounting file was specified, make sure
+		 *	that we can access it.
+		 */
+		if (strlen(actctl.ac_path) ) {
+			strncpy(new_path, actctl.ac_path, ACCT_PATH);
+			newvp = filp_open(new_path,O_WRONLY|O_APPEND, 0);
+			if (IS_ERR(newvp)) {
+				error = PTR_ERR(newvp);
+				up(&csa_sem);
+				break;
+			} else if (!S_ISREG(newvp->f_dentry->d_inode->i_mode)) {
+				error = -EACCES;
+				filp_close(newvp, NULL);
+				up(&csa_sem);
+				break;
+			} else if (!newvp->f_op->write) {
+				error = -EIO;
+				filp_close(newvp, NULL);
+				up(&csa_sem);
+				break;
+			}
+			if ((csa_acctvp != (struct file *)NULL) &&
+					csa_acctvp == newvp) {
+				/*
+				 * this file already being used, so ignore
+				 * request to use this file; just continue on
+				 */
+				filp_close(newvp, NULL);
+				newvp = (struct file *)NULL;
+			}
+
+		} else {
+			newvp = (struct file *)NULL;
+		}
+		/*
+		 *	If a new accounting file was specified and there's
+		 *	an old accounting file, stop writing to it.
+		 */
+		if (newvp != (struct file *)NULL) {
+			if (csa_acctvp != (struct file *)NULL) {
+				error = csa_config_write(AC_CONFCHG_FILE,NULL);
+				filp_close(csa_acctvp, NULL);
+			} else if (!csa_flag) {
+				csa_init_acct(flag++);
+			}
+
+			strncpy(csa_path, new_path, ACCT_PATH);
+			down(&csa_write_sem);
+			csa_acctvp = newvp;
+			up(&csa_write_sem);
+
+		} else {
+			if (csa_acctvp == (struct file *)NULL) {
+				error = -EINVAL;
+				up(&csa_sem);
+				break;
+			}
+		}
+
+		/*
+		 *  Loop through each actstat block and turn ON that accounting.
+		 */
+		for(ind = 0; ind < num; ind++) {
+			struct	actstat	*stat;
+
+			id = actctl.ac_stat[ind].ac_ind;
+			stat = &actctl.ac_stat[ind];
+			if (id < ACCT_RCDS)  {
+				acct_dmd[id][A_SYS].ac_state = ACS_ON;
+				acct_dmd[id][A_SYS].ac_param = stat->ac_param;
+
+				stat->ac_state = acct_dmd[id][A_SYS].ac_state;
+				stat->ac_param = acct_dmd[id][A_SYS].ac_param;
+			} else {
+				int	tid = id -ACCT_RCDS;
+
+				acct_rcd[tid][A_SYS].ac_state = ACS_ON;
+				acct_rcd[tid][A_SYS].ac_param = stat->ac_param;
+
+				stat->ac_state = acct_rcd[tid][A_SYS].ac_state;
+				stat->ac_param = acct_rcd[tid][A_SYS].ac_param;
+			}
+		}
+
+		up(&csa_sem);
+		error = csa_config_write(AC_CONFCHG_ON, NULL);
+		/*
+		 *  Return the accounting states to the user.
+	 	 */
+		if (copy_to_user((void*)data, &actctl, len)) {
+			error = -EFAULT;
+			break;
+		}
+	    }
+	    break;
+
+	/*
+	 *  Stop specified types of accounting.
+	 */
+	case AC_STOP:
+	    {
+		int	id, ind;
+
+		PRINTK(KERN_INFO "CSA: AC_STOP\n");
+		if (!capable(CAP_SYS_PACCT) ) {
+			error = -EPERM;
+			break;
+		}
+
+		if (copy_from_user(&actctl, (void*)data, sizeof(int)) ) {
+			error = -EFAULT;
+			break;
+		}
+
+		num = (actctl.ac_sttnum == 0) ? 1 : actctl.ac_sttnum;
+		if ((num <= 0) || (num > NUM_KDRCDS) ) {
+			error = -EINVAL;
+			break;
+		}
+
+		len = sizeof(struct actctl) -
+		    sizeof(struct actstat) * NUM_KDRCDS + 
+		    sizeof(struct actstat) * num;
+		if (copy_from_user(&actctl, (void*)data, len)) {
+			error = -EFAULT;
+			break;
+		}
+
+		/*
+		 *  Verify all of the indexes in actstat structures specified.
+	 	 */
+		for(ind = 0; ind < num; ind++) {
+			id = actctl.ac_stat[ind].ac_ind;
+			if ((id < 0) || (id >= NUM_KDRCDS) ) {
+				error = -EINVAL;
+				break;
+			}
+		}
+
+		/*
+		 * Loop through each actstat block and turn off that accounting.
+		 */
+		down(&csa_sem);
+		/*
+		 *	Disable accounting for this entry.
+		 */
+		for(ind = 0; ind < num; ind++) {
+			id = actctl.ac_stat[ind].ac_ind;
+			if (id < ACCT_RCDS) {
+				acct_dmd[id][A_SYS].ac_state = ACS_OFF;
+				acct_dmd[id][A_SYS].ac_param = 0;
+
+				actctl.ac_stat[ind].ac_state =
+					acct_dmd[id][A_SYS].ac_state;
+				actctl.ac_stat[ind].ac_param = 0;
+			} else {
+				int	tid = id -ACCT_RCDS;
+
+				acct_rcd[tid][A_SYS].ac_state = ACS_OFF;
+				acct_rcd[tid][A_SYS].ac_param = 0;
+				actctl.ac_stat[ind].ac_state =
+					acct_rcd[tid][A_SYS].ac_state;
+				actctl.ac_stat[ind].ac_param = 
+					acct_rcd[tid][A_SYS].ac_param;
+			}
+		}		/* end of for(ind) */
+		/*
+		 *  Check the daemons to see if any are still on.
+	 	 */
+		for(ind = 0; ind < ACCT_MAXKDS; ind++) {
+			if (acct_dmd[ind][A_SYS].ac_state == ACS_ON) {
+				daemon += 1<<ind;
+			}
+		}
+		up(&csa_sem);
+		/*
+		 *  If all daemons are off and there's an old accounting file,
+		 *	stop writing to it.
+	 	*/
+		if (!daemon && (csa_acctvp != (struct file *)NULL) ) {
+			error = csa_config_write(AC_CONFCHG_OFF,NULL);
+			filp_close(csa_acctvp, NULL);
+			down(&csa_write_sem);
+			csa_acctvp = (struct file *)NULL;
+			up(&csa_write_sem);
+		} else {
+			error = csa_config_write(AC_CONFCHG_OFF, NULL);
+		}
+		/*
+		 *  Return the accounting states to the user.
+	 	*/
+		if (copy_to_user((void*)data, &actctl, len)) {
+			error = -EFAULT;
+			break;
+		}
+	    }
+	    break;
+
+	/*
+	 *  Halt all accounting.
+	 */
+	case AC_HALT:
+	    {
+		int	ind;
+
+		PRINTK(KERN_INFO "CSA: AC_HALT\n");
+		if (!capable(CAP_SYS_PACCT) ) {
+			error = -EPERM;
+			break;
+		}
+		down(&csa_sem);
+	 	/*  Turn off all accounting if any is on. */
+		for(ind = 0; ind <ACCT_MAXKDS; ind++) {
+			acct_dmd[ind][A_SYS].ac_state = ACS_OFF;
+			acct_dmd[ind][A_SYS].ac_param = 0;
+		}
+
+		for(ind = ACCT_RCDS; ind < ACCT_MAXRCDS; ind++) {
+			int	tid = ind -ACCT_RCDS;
+
+			acct_rcd[tid][A_SYS].ac_state = ACS_OFF;
+			acct_rcd[tid][A_SYS].ac_param = 0;
+		}
+ 
+		up(&csa_sem);
+	 	/*  If there's an old accounting file, stop writing to it. */
+		if (csa_acctvp != (struct file *)NULL) {
+			error = csa_config_write(AC_CONFCHG_OFF,NULL);
+			filp_close(csa_acctvp, NULL);
+			down(&csa_write_sem);
+			csa_acctvp = (struct file *)NULL;
+			up(&csa_write_sem);
+		}
+	    }
+	    break;
+
+	/*
+	 * Process daemon/record status function.
+	 */
+	case AC_CHECK:
+	    {
+		PRINTK(KERN_INFO "CSA: AC_CHECK\n");
+		if (copy_from_user(&actstat, (void*)data, sizeof(struct actstat)) ) {
+			error = -EFAULT;
+			break;
+		}
+		id = actstat.ac_ind;
+		if ((id >= 0) && (id < ACCT_MAXKDS) ) {
+			actstat.ac_state = acct_dmd[id][A_SYS].ac_state;
+			actstat.ac_param = acct_dmd[id][A_SYS].ac_param;
+
+		} else if ((id >= ACCT_RCDS) && (id < ACCT_MAXRCDS) ) {
+			int	tid = id-ACCT_RCDS;
+
+			actstat.ac_state = acct_rcd[tid][A_SYS].ac_state;
+			actstat.ac_param = acct_rcd[tid][A_SYS].ac_param;
+
+		} else {
+			error = -EINVAL;
+			break;
+		}
+		if (copy_to_user((void*)data, &actstat, sizeof(struct actstat)) ) {
+			error = -EFAULT;
+		}
+	    }
+		break;
+
+	/*
+	 *  Process daemon status function.
+	 */
+	case AC_KDSTAT:
+	    {
+		PRINTK(KERN_INFO "CSA: AC_KDSTAT\n");
+		if (copy_from_user(&actctl, (void*)data, sizeof(int)) ) {
+			error = -EFAULT;
+			break;
+		}
+
+		num = actctl.ac_sttnum;
+
+		if (num <= 0) {
+			error = EINVAL;
+			break;
+		} else if (num > NUM_KDS) {
+			num = NUM_KDS;
+		}
+		for(ind = 0; ind < num; ind++) {
+			actctl.ac_stat[ind].ac_ind   =
+				acct_dmd[ind][A_SYS].ac_ind;
+			actctl.ac_stat[ind].ac_state =
+				acct_dmd[ind][A_SYS].ac_state;
+			actctl.ac_stat[ind].ac_param =
+				acct_dmd[ind][A_SYS].ac_param;
+		}		/* end of for(ind) */
+		actctl.ac_sttnum = num;
+		strncpy(actctl.ac_path, csa_path, ACCT_PATH);
+
+		len = sizeof(struct actctl) -
+		    sizeof(struct actstat) * NUM_KDRCDS + 
+		    sizeof(struct actstat) * num;
+		if (copy_to_user((void*)data, &actctl, len)) {
+			error = -EFAULT;
+			break;
+		}
+	    }
+	    break;
+
+	/*
+	 *  Process record status function.
+	 */
+	case AC_RCDSTAT:
+	    {
+		PRINTK(KERN_INFO "CSA: AC_RCDSTAT\n");
+		if (copy_from_user(&actctl, (void*)data, sizeof(int)) ) {
+			error = -EFAULT;
+			break;
+		}
+		num = actctl.ac_sttnum;
+
+		if (num <= 0) {
+			error = -EINVAL;
+			break;
+		} else if (num > NUM_RCDS) {
+			num = NUM_RCDS;
+		}
+		for(ind = 0; ind < num; ind++) {
+			actctl.ac_stat[ind].ac_ind =
+				acct_rcd[ind][A_SYS].ac_ind;
+			actctl.ac_stat[ind].ac_state =
+				acct_rcd[ind][A_SYS].ac_state;
+			actctl.ac_stat[ind].ac_param =
+				acct_rcd[ind][A_SYS].ac_param;
+		}
+		actctl.ac_sttnum = num;
+		strncpy(actctl.ac_path, csa_path, ACCT_PATH);
+		len = sizeof(struct actctl) -
+		    sizeof(struct actstat) * NUM_KDRCDS + 
+		    sizeof(struct actstat) * num;
+		if (copy_to_user((void*)data, &actctl, len)) {
+			error = -EFAULT;
+			break;
+		}
+	    }
+	    break;
+
+	/*
+	 *  Turn user job accounting ON or OFF.
+	 */
+	case AC_JASTART:
+	case AC_JASTOP:	
+	    {
+		char	localpath[ACCT_PATH];
+		struct	file	*newvp = NULL;
+		struct	file	*oldvp;
+		uint64_t	jid;
+		struct job_csa job_acctbuf;
+		int retval = 0;
+
+		if (req == AC_JASTART)
+			PRINTK(KERN_INFO "CSA: AC_JASTART\n");
+		else
+			PRINTK(KERN_INFO "CSA: AC_JASTOP\n");
+		len = sizeof(struct actctl) -
+		    sizeof(struct actstat) * (NUM_KDRCDS -1);
+		if (copy_from_user(&actctl, (void*)data, len)) {
+			error = -EFAULT;
+			break;
+		}
+		/*
+		 * If an accounting file was specified, make sure
+		 * that we can access it.
+		 */
+		if (strlen(actctl.ac_path)) {
+			strncpy(localpath, actctl.ac_path, ACCT_PATH);
+			newvp = filp_open(localpath,O_WRONLY|O_APPEND,0);
+			if (IS_ERR(newvp)) {
+				error = PTR_ERR(newvp);
+				break;
+			} else if (!S_ISREG(newvp->f_dentry->d_inode->i_mode)) {
+				error = -EACCES;
+				filp_close(newvp, NULL);
+				break;
+			} else if (!newvp->f_op->write) {
+				error = -EIO;
+				filp_close(newvp, NULL);
+				break;
+			}
+		} else if (req == AC_JASTART) {
+			error = -EINVAL;
+			break;
+		}
+		if (req == AC_JASTOP) {
+			newvp = (struct file *)NULL;
+		}
+		jid = job_getjid(current);
+		if (jid <= 0) {
+			/* no job table entry */
+			error = -ENOENT;
+			break;
+		}
+		memset(&job_acctbuf, 0, sizeof(job_acctbuf));
+		retval = job_getacct(jid, JOB_ACCT_CSA, &job_acctbuf);
+		if (retval != 0) {
+			/* couldn't get csa info in the job table entry */
+			error = retval;
+			break;
+		}
+		/* Use this semaphore since csa_write() can also change this
+		 * file pointer.
+		 */
+		down(&csa_write_sem);
+		if ((oldvp = job_acctbuf.job_acctfile) != (struct file *)NULL) {
+			/* Stop writing to the old job accounting file */
+			filp_close(oldvp, NULL);
+		}
+
+	 	/* Establish new job accounting file or stop job accounting */
+		job_acctbuf.job_acctfile = newvp;
+
+		retval = job_setacct(jid, JOB_ACCT_CSA, JOB_CSA_ACCTFILE,
+			&job_acctbuf);
+		if (retval != 0) {
+			/* couldn't set the new file name in the job entry */
+			error = retval;
+			up(&csa_write_sem);
+			break;
+		}
+		up(&csa_write_sem);
+		/* Write a config record so ja has uname info */
+		if (req == AC_JASTART) {
+			error = csa_config_write(AC_CONFCHG_ON,
+				 job_acctbuf.job_acctfile);
+		}
+	    }
+	    break;
+
+	/*
+	 *  Write an accounting record for a system daemon.
+	 */
+	case AC_WRACCT:
+	    {
+		int	len;
+		int retval = 0;
+		uint64_t	jid;
+		struct job_csa job_acctbuf;
+		struct	actwra	actwra;
+
+		PRINTK(KERN_INFO "CSA: AC_WRACCT\n");
+		if (!capable(CAP_SYS_PACCT) ) {
+			error = -EPERM;
+			break;
+		}
+		if (copy_from_user(&actwra, (void*)data, sizeof(struct actwra))) {
+			error = -EFAULT;
+			break;
+		}
+	 	/*  Verify the parameters. */
+		jid = actwra.ac_jid;
+		if (jid < 0) {
+			error = -EINVAL;
+			break;
+		}
+
+		id = actwra.ac_did;
+		if ((id < 0) || (id >= ACCT_MAXKDS) ) {
+			error = -EINVAL;
+			break;
+		}
+
+		len = actwra.ac_len;
+		if ((len <= 0) || (len > MAX_WRACCT) ) {
+			error = -EINVAL;
+			break;
+		}
+
+		if (actwra.ac_buf == (char *)NULL) {
+			error = -EINVAL;
+			break;
+		}
+
+		/*  If the daemon type is on, write out the daemon buffer. */
+		if ((acct_dmd[id][A_SYS].ac_state == ACS_ON) &&
+				(csa_acctvp != (struct file *)NULL) ) {
+			error = csa_write(actwra.ac_buf, id, len,
+				jid, A_DMD, NULL);
+		}
+
+		/* get the job table entry for this jid */
+		memset(&job_acctbuf, 0, sizeof(job_acctbuf));
+		retval = job_getacct(jid, JOB_ACCT_CSA, &job_acctbuf);
+		if (retval != 0) {
+			/* couldn't get accounting info stored in job table */
+			error = retval;
+			break;
+		}
+
+		/* maybe write out daemon record to ja user accounting file */
+		if (job_acctbuf.job_acctfile != NULL) {
+			error = csa_write(actwra.ac_buf, id, len, jid, A_CJA,
+					&job_acctbuf);
+		}
+	    }
+	    break;
+
+	/*
+	 *  Return authorized state information.
+	 */
+	case AC_AUTH:
+	    {
+		PRINTK(KERN_INFO "CSA: AC_AUTH\n");
+		if (!capable(CAP_SYS_PACCT) ) {
+			error = -EPERM;
+			break;
+		}
+		/*
+		 *  Process user authorization request...If we get to this spot,
+		 *  the user is authorized.
+		 */
+	    }
+	    break;
+
+	/*
+	 *  Process the incremental accounting request.
+	 */
+	case AC_INCACCT:
+		PRINTK(KERN_INFO "CSA: AC_INCACCT\n");
+                error = -EINVAL;
+		break;
+
+	default:
+		PRINTK(KERN_INFO "CSA: Unknown request %d\n", req);
+		error = -EINVAL;
+
+	}  /* end of switch(req) */
+
+	return(error ? error : err);
+}
+
+
+/*
+ *	Create a configuration change accounting record.
+ */
+static void
+csa_config_make(ac_eventtype event, struct acctcfg *cfg)
+{
+	int	daemon = 0;
+	int	record = 0;
+	int	ind;
+	int	nmsize = 0;
+
+	memset(cfg, 0, sizeof(struct acctcfg));
+	/*  Setup the record and header. */
+	csa_header(&cfg->ac_hdr, REV_CFG, ACCT_KERNEL_CFG,
+		sizeof(struct acctcfg) );
+	cfg->ac_event = event;
+	if (!boottime) {
+		boottime = xtime.tv_sec - (jiffies / HZ);
+	}
+	cfg->ac_boottime = boottime;
+	cfg->ac_curtime  = xtime.tv_sec;
+
+	/*
+	 *  Create the masks of the types that are on.
+	 */
+	for(ind = 0; ind < ACCT_MAXKDS; ind++) {
+		if (acct_dmd[ind][A_SYS].ac_state == ACS_ON) {
+			daemon += 1<<ind;
+		}
+	}
+	for(ind = ACCT_RCDS; ind < ACCT_MAXRCDS; ind++) {
+		int	tid = ind -ACCT_RCDS;
+
+		if (acct_rcd[tid][A_SYS].ac_state == ACS_ON) {
+			record += 1<<tid;
+		}
+	}
+	cfg->ac_kdmask = daemon;
+	cfg->ac_rmask = record;
+
+	nmsize = sizeof(cfg->ac_uname.sysname);
+	memcpy(cfg->ac_uname.sysname, system_utsname.sysname, nmsize-1);
+	cfg->ac_uname.sysname[nmsize-1] = '\0';
+	nmsize = sizeof(cfg->ac_uname.nodename);
+	memcpy(cfg->ac_uname.nodename, system_utsname.nodename, nmsize-1);
+	cfg->ac_uname.nodename[nmsize-1] = '\0';
+	nmsize = sizeof(cfg->ac_uname.release);
+	memcpy(cfg->ac_uname.release, system_utsname.release, nmsize-1);
+	cfg->ac_uname.release[nmsize-1] = '\0';
+	nmsize = sizeof(cfg->ac_uname.version);
+	memcpy(cfg->ac_uname.version, system_utsname.version, nmsize-1);
+	cfg->ac_uname.version[nmsize-1] = '\0';
+	nmsize = sizeof(cfg->ac_uname.machine);
+	memcpy(cfg->ac_uname.machine, system_utsname.machine, nmsize-1);
+	cfg->ac_uname.machine[nmsize-1] = '\0';
+
+	return;
+}
+
+
+/*
+ *      Create and write a configuration change accounting record.
+ */
+static int
+csa_config_write(ac_eventtype event, struct file *job_acctfile)
+{
+	int	error = 0;	/* errno */
+        struct  acctcfg acctcfg;
+	mm_segment_t fs;
+
+        /* write record to process accounting file. */
+        csa_config_make(event, &acctcfg);
+
+	down(&csa_write_sem);
+	if (csa_acctvp != (struct file *)NULL) {
+		fs = get_fs();
+		set_fs(KERNEL_DS);
+		error = csa_acctvp->f_op->write(csa_acctvp, (char *)&acctcfg,
+			sizeof(struct acctcfg), &csa_acctvp->f_pos);
+		set_fs(fs);
+        }
+	if (job_acctfile != (struct file *)NULL) {
+		fs = get_fs();
+		set_fs(KERNEL_DS);
+		error = job_acctfile->f_op->write(job_acctfile,(char *)&acctcfg,
+			sizeof(struct acctcfg), &job_acctfile->f_pos);
+		set_fs(fs);
+	}
+	if (error >= 0) {
+		error = 0;
+	}
+	up(&csa_write_sem);
+        return(error);
+}
+
+
+
+/*
+ *	When first process in a job is created.
+ */
+int
+csa_jstart(int event, void *data)
+{
+	struct job_csa *job_sojbuf = (struct job_csa *)data;
+	struct acctsoj	acctsoj;	/* start of job record */
+	DBG_PRINTINIT(__FUNCTION__);
+
+	DBG_PRINTENTRY();
+
+	 /*  Are we doing any accounting?  */
+	if (csa_acctvp == (struct file *)NULL) {
+		DBG_PRINTEXIT(0);
+		return 0;
+	}
+
+	if (!job_sojbuf) {
+		/* bad pointer */
+		printk(KERN_ERR
+		    "csa_jstart: Received bad soj pointer, pid %d.\n",
+		     current->pid);
+		DBG_PRINTEXIT(-1);
+		return -1;
+	}
+		
+	memset(&acctsoj, 0, sizeof(struct acctsoj));
+	DBG_PRINTEXIT(__LINE__);
+	csa_header(&acctsoj.ac_hdr, REV_SOJ, ACCT_KERNEL_SOJ,
+		sizeof(struct acctsoj));
+	DBG_PRINTEXIT(__LINE__);
+	acctsoj.ac_jid = job_sojbuf->job_id;
+	DBG_PRINTEXIT(__LINE__);
+	acctsoj.ac_uid = job_sojbuf->job_uid;
+	DBG_PRINTEXIT(__LINE__);
+	if (event == JOB_EVENT_START) {
+	DBG_PRINTEXIT(__LINE__);
+		acctsoj.ac_type = AC_SOJ;
+		acctsoj.ac_btime = CT_TO_SECS(job_sojbuf->job_start) +
+			(xtime.tv_sec - (jiffies / HZ) );
+	} else if (event == JOB_EVENT_RESTART) {
+	DBG_PRINTEXIT(__LINE__);
+		acctsoj.ac_type = AC_ROJ;
+		acctsoj.ac_rstime = CT_TO_SECS(job_sojbuf->job_start) +
+			(xtime.tv_sec - (jiffies / HZ) );
+	} else {
+	DBG_PRINTEXIT(__LINE__);
+		DBG_PRINTEXIT(-1);
+		return -1;
+	}
+
+	/*
+	 *  Write the accounting record to the process accounting
+	 *  file if any accounting is enabled.
+ 	 */
+	DBG_PRINTEXIT(__LINE__);
+	if (csa_acctvp != (struct file *)NULL) {
+	DBG_PRINTEXIT(__LINE__);
+		(void)csa_write((caddr_t)&acctsoj, ACCT_KERN_CSA,
+			sizeof(acctsoj), job_sojbuf->job_id, A_SYS, job_sojbuf);
+	}
+
+	DBG_PRINTEXIT(__LINE__);
+	DBG_PRINTEXIT(0);
+	return 0;
+}
+
+/*
+ *	When last process in a job is done, write an EOJ record
+ */
+int
+csa_jexit(int event, void *data)
+{
+	struct	achead	*hdr1, *hdr2;
+	struct	accteoj	eoj;	/* end of job record */
+	struct job_csa *job_eojbuf = (struct job_csa *)data;
+
+	/*  Are we doing any accounting? */
+	if (csa_acctvp == (struct file *)NULL) {
+		return 0;
+	}
+
+	if (!job_eojbuf) {
+		/* bad pointer */
+		printk(KERN_ERR 
+		    "csa_jexit: Received bad eoj pointer, pid %d.\n",
+		    current->pid);
+		return -1;
+	}
+
+	memset(&eoj, 0, sizeof(struct accteoj));
+
+	/*  Set up record. */
+	hdr1 = &eoj.ac_hdr1;
+	csa_header(hdr1, REV_EOJ, ACCT_KERNEL_EOJ, sizeof(struct accteoj) );
+	hdr2 = &eoj.ac_hdr2;
+	csa_header(hdr2, REV_EOJ, ACCT_KERNEL_EOJ, 0 );
+	hdr2->ah_magic = ~ACCT_MAGIC;
+
+	eoj.ac_nice = task_nice(current);
+	eoj.ac_uid = job_eojbuf->job_uid;
+	eoj.ac_gid = current->gid;
+
+	eoj.ac_jid = job_eojbuf->job_id;
+
+	eoj.ac_btime = CT_TO_SECS(job_eojbuf->job_start) +
+		(xtime.tv_sec - (jiffies / HZ) );
+	eoj.ac_etime = xtime.tv_sec;
+
+	/*
+	 * XXX Once we have real values in these two fields, convert them
+	 * to Kbytes.
+	 */
+	eoj.ac_corehimem = job_eojbuf->job_corehimem;
+	eoj.ac_virthimem = job_eojbuf->job_virthimem;
+
+	/*
+	 *  Write the accounting record to the process accounting
+	 *  file if job accounting is enabled.
+ 	 */
+	if (csa_acctvp != (struct file *)NULL) {
+		(void) csa_write((caddr_t)&eoj, ACCT_KERN_CSA,
+			sizeof(struct accteoj), job_eojbuf->job_id, A_SYS,
+			job_eojbuf);
+	}
+
+	return 0;
+}
+
+/*
+ *	Write buf out to the accounting file.
+ *	If an error occurs, return the error code to the caller
+ */
+int
+csa_write(char *buf, int did, int nbyte, uint64_t jid, int type,
+	struct job_csa *jp)
+{
+	int	error = 0;	/* errno */
+	int	retval = 0;
+	struct file	*vp;	/* acct file */
+	mm_segment_t fs;
+	unsigned long limit;
+
+	down(&csa_write_sem);
+	 /*  Locate the accounting type. */
+	switch (type) {
+	case A_SYS:
+	case A_DMD:
+		vp = csa_acctvp;
+		break;
+
+	case A_CJA:
+		if (jp != (struct job_csa *)NULL) {
+			vp = jp->job_acctfile;
+		} else {
+			vp = (struct file *)NULL;
+		}
+		break;
+
+	default:
+		up(&csa_write_sem);
+		return -EINVAL;
+
+	}	/* end of switch(type) */
+
+	/*  Check if this type of accounting is turned on. */
+	if (vp == (struct file *)NULL) {
+		up(&csa_write_sem);
+		return 0;
+	}
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+
+	/* make sure we don't get hit by a process file size limit */
+	limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
+	current->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
+	error = vp->f_op->write(vp,buf, nbyte, &vp->f_pos);
+	current->rlim[RLIMIT_FSIZE].rlim_cur = limit;
+
+	set_fs(fs);
+	if (error >= 0) {
+		error = 0;
+	}
+	/*  If an error occurred, disable this type of accounting. */
+	if (error) {
+		switch(type) {
+
+		case A_SYS:
+		case A_DMD:
+			csa_acctvp = (struct file *)NULL;
+			acct_dmd[did][A_SYS].ac_state = ACS_ERROFF;
+			acct_dmd[ACCT_KERN_CSA][A_SYS].ac_state = ACS_ERROFF;
+			printk(KERN_ALERT
+			   "csa accounting pacct write error %d; %s disabled\n",
+			    error, acct_dmd_name[did]);
+			filp_close(vp, NULL);
+			break;
+		case A_CJA:
+			jp->job_acctfile = (struct file *)NULL;
+			retval = job_setacct(jid, JOB_ACCT_CSA,
+				JOB_CSA_ACCTFILE, jp);
+			printk(KERN_WARNING JID_ERR2, error,
+			       (unsigned long long) jid);
+			if (retval != 0) {
+			    printk(KERN_WARNING JID_ERR3,
+				   (unsigned long long) jid);
+			} else {
+			    printk(KERN_WARNING JID_ERR4,
+				   (unsigned long long) jid);
+			}
+			filp_close(vp, NULL);
+			break;
+		}
+		up(&csa_write_sem);
+		return(error);
+	} 
+	up(&csa_write_sem);
+	return(error);
+}
+
+
+module_init(init_csa);
+module_exit(cleanup_csa);
+
+#endif /* defined(CONFIG_CSA) || defined(CONFIG_CSA_MODULE) */
Index: linux/init/Kconfig
===================================================================
--- linux.orig/init/Kconfig	2004-08-19 18:12:13.000000000 -0700
+++ linux/init/Kconfig	2004-08-23 15:28:23.000000000 -0700
@@ -158,6 +158,30 @@
 	  a module, select this entry using M.  If you do not want support
 	  for jobs, select N.
 
+config CSA
+	tristate "  CSA Job Accounting"
+	depends on PAGG_JOB
+	help
+	  Comprehensive System Accounting (CSA) provides job level
+	  accounting of resource usage.  The accounting records are
+	  written by the kernel into a file.  CSA user level scripts
+	  and commands process the binary accounting records and
+	  combine them by job identifier within system boot uptime
+	  periods.  These accounting records are then used to produce
+	  reports and charge fees to users.
+
+	  Say Y here if you want job level accounting to be compiled
+	  into the kernel.  Say M here if you want the writing of
+	  accounting records portion of this feature to be a loadable
+	  module.  Say N here if you do not want job level accounting
+	  (the default).
+	
+	  The CSA commands and scripts package needs to be installed
+	  to process the CSA accounting records.  See
+	  http://oss.sgi.com/projects/csa for further information
+	  about CSA and download instructions for the CSA commands
+	  package and documentation.
+
 config SYSCTL
 	bool "Sysctl support"
 	---help---

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-26  0:25 [PATCH] new CSA patchset for 2.6.8 Jay Lan
@ 2004-08-26  5:18 ` Andrew Morton
  2004-08-26 17:15   ` Tim Schmielau
  2004-08-26 18:38   ` [Lse-tech] " John Hesterberg
  0 siblings, 2 replies; 27+ messages in thread
From: Andrew Morton @ 2004-08-26  5:18 UTC (permalink / raw)
  To: Jay Lan; +Cc: linux-kernel, erikj, limin, lse-tech

Jay Lan <jlan@engr.sgi.com> wrote:
>
> I have broken up one big CSA kernel patch into four smaller ones
>  as attached:
> 
>       csa_io     - collects io accounting data
>       csa_mm     - collects mm accounting data
>       csa_eop    - provides a hook to perform end-of-process accounting
>       csa_module - builds csa loadable module

Please don't send patches as attachments, and please don't send more than
one patch per email.

If you stick to the one-patch-per-email rule, you also get an opportunity
to nicely describe each patch within its email.

Coding style nit:


	if (foo)
		bar();

is preferable to

	if (foo) {
		bar();
	}


More broadly: Help!

I am 100% not in a position to judge whether Linux needs Comprehensive
System Accounting, nor am I able to define what the requirements for such a
thing should be.  All I can tell from your patch is the quality of its
implementation, and that's leaping far, far ahead of where we should be.

We're going to need help from you, and from all the other stakeholders in
judging how useful this feature is to Linux implementors and how well this
implementation meets the (unknown) requirements.  See my problem?

I've cc'ed lse-tech, where enterprise folks hang out.  I would request that
the people who are stakeholders in this feature

a) stick their hands up

b) let us know how important this kind of feature is for their users

c) review the offered feature set against their requirements

d) let us know how well the implementation fits that requirement and

e) inform us of any competing implementations.  Compare and contrast.

Thanks.


> There are no functional changes in this set of csa patches compared
> to the 2.6.7 patch linux-2.6.7.csa.patch.
> 
> Patches csa_io, csa_mm, and csa_eop are independent of each other.  You may
> apply any one, any two or all three and you will be able to build a
> functional kernel.  However, data collected needs an agent to use it.  The
> csa_module is one agent that takes advangtage of the feature and it works
> with csa-2.0.0 (or later) to report system accounting data of the host
> system.  The csa-2.0.0 rpm can be downloaded from
> ftp://oss.sgi.com/projects/csa/download
> 
> The csa_module patch requires all three accounting data patches to be fully
> functional.
> 
> This set of csa patches has been tested with the pagg and job kernel
> patches to linux 2.6.8 kernel.  The information of pagg and job project can
> be found at http://oss.sgi.com/projects/pagg/
> 
> The csa_module requires the pagg and job kernel patches.
> 
> Feedback, bug reports, and comments are very welcome!
> 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-26  5:18 ` Andrew Morton
@ 2004-08-26 17:15   ` Tim Schmielau
  2004-08-26 18:26     ` [Lse-tech] " Jay Lan
                       ` (2 more replies)
  2004-08-26 18:38   ` [Lse-tech] " John Hesterberg
  1 sibling, 3 replies; 27+ messages in thread
From: Tim Schmielau @ 2004-08-26 17:15 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Jay Lan, lkml, erikj, limin, lse-tech, Ragnar Kjørstad,
	Yoshitaka ISHIKAWA, Arthur Corliss, Guillaume Thouvenin

On Wed, 25 Aug 2004, Andrew Morton wrote:

> More broadly: Help!
> 
> I am 100% not in a position to judge whether Linux needs Comprehensive
> System Accounting, nor am I able to define what the requirements for such a
> thing should be.  All I can tell from your patch is the quality of its
> implementation, and that's leaping far, far ahead of where we should be.
> 
> We're going to need help from you, and from all the other stakeholders in
> judging how useful this feature is to Linux implementors and how well this
> implementation meets the (unknown) requirements.  See my problem?
> 
> I've cc'ed lse-tech, where enterprise folks hang out.  I would request that
> the people who are stakeholders in this feature
> 
> a) stick their hands up
> 
> b) let us know how important this kind of feature is for their users
> 
> c) review the offered feature set against their requirements
> 
> d) let us know how well the implementation fits that requirement and
> 
> e) inform us of any competing implementations.  Compare and contrast.

Judging from the feedback during it's stay in -mm (none at all!), general
interest in BSD accounting seems quite limited. The rate of downloads of
the updated userspace tools is hardly distinguishable from background
noise. (This might change with the correct URL in the help text now, but
even that was broken for months and nobody cared).
Also general interest in the user space tools is low, the latest release 
of the GNU acct package is from 1998 (and yes, there _are_ problems 
warranting updates).

Funnily enough, with three competing implementation even interest from
developers seems larger than that from users (This statement includes me,
I did a patch but am not a user of it).) But communication between
developers is poor. I for myself only recently learned about ELSA and CSA.

Therefore I've Cc:ed some people from whom I got valuable feedback on the
BSD accounting format patch.

IMHO CSA, ELSA and BSD accounting are too similar to have more than one of 
them in the kernel. We should either improve BSD accounting to do the job, 
or kill it in favor of a different implementation.

Tim

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Lse-tech] Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-26 17:15   ` Tim Schmielau
@ 2004-08-26 18:26     ` Jay Lan
  2004-08-26 19:44       ` Arthur Corliss
  2004-08-26 18:43     ` John Hesterberg
  2004-08-26 19:24     ` Arthur Corliss
  2 siblings, 1 reply; 27+ messages in thread
From: Jay Lan @ 2004-08-26 18:26 UTC (permalink / raw)
  To: Tim Schmielau
  Cc: Andrew Morton, lkml, erikj, limin, lse-tech, Ragnar Kjørstad,
	Yoshitaka ISHIKAWA, Arthur Corliss, Guillaume Thouvenin

The reason for breaking up one CSA patch into four patches was
so that the only CSA (http://oss.sgi.com/projects/csa/) specific
thing is the csa_module. My intention is to improve the system
accounting data collection and make the data available to any
clients that can use the data. The three areas of accounting
data we try to improve are io, mm, and per-process area.

As Tim said the problem of BSD accounting was that it has been
inactive for a long time. I do not mind incoporating the
three accounting data collection patches i submitted into BSD or
others as long as the data made available to modules that plan
to make use of the data. :)

Thanks,
  - jay


Tim Schmielau wrote:
> On Wed, 25 Aug 2004, Andrew Morton wrote:
> 
> 
>>More broadly: Help!
>>
>>I am 100% not in a position to judge whether Linux needs Comprehensive
>>System Accounting, nor am I able to define what the requirements for such a
>>thing should be.  All I can tell from your patch is the quality of its
>>implementation, and that's leaping far, far ahead of where we should be.
>>
>>We're going to need help from you, and from all the other stakeholders in
>>judging how useful this feature is to Linux implementors and how well this
>>implementation meets the (unknown) requirements.  See my problem?
>>
>>I've cc'ed lse-tech, where enterprise folks hang out.  I would request that
>>the people who are stakeholders in this feature
>>
>>a) stick their hands up
>>
>>b) let us know how important this kind of feature is for their users
>>
>>c) review the offered feature set against their requirements
>>
>>d) let us know how well the implementation fits that requirement and
>>
>>e) inform us of any competing implementations.  Compare and contrast.
> 
> 
> Judging from the feedback during it's stay in -mm (none at all!), general
> interest in BSD accounting seems quite limited. The rate of downloads of
> the updated userspace tools is hardly distinguishable from background
> noise. (This might change with the correct URL in the help text now, but
> even that was broken for months and nobody cared).
> Also general interest in the user space tools is low, the latest release 
> of the GNU acct package is from 1998 (and yes, there _are_ problems 
> warranting updates).
> 
> Funnily enough, with three competing implementation even interest from
> developers seems larger than that from users (This statement includes me,
> I did a patch but am not a user of it).) But communication between
> developers is poor. I for myself only recently learned about ELSA and CSA.
> 
> Therefore I've Cc:ed some people from whom I got valuable feedback on the
> BSD accounting format patch.
> 
> IMHO CSA, ELSA and BSD accounting are too similar to have more than one of 
> them in the kernel. We should either improve BSD accounting to do the job, 
> or kill it in favor of a different implementation.
> 
> Tim
> 
> 
> -------------------------------------------------------
> SF.Net email is sponsored by Shop4tech.com-Lowest price on Blank Media
> 100pk Sonic DVD-R 4x for only $29 -100pk Sonic DVD+R for only $33
> Save 50% off Retail on Ink & Toner - Free Shipping and Free Gift.
> http://www.shop4tech.com/z/Inkjet_Cartridges/9_108_r285
> _______________________________________________
> Lse-tech mailing list
> Lse-tech@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/lse-tech


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Lse-tech] Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-26  5:18 ` Andrew Morton
  2004-08-26 17:15   ` Tim Schmielau
@ 2004-08-26 18:38   ` John Hesterberg
       [not found]     ` <412EADBC.60607@bigpond.net.au>
  1 sibling, 1 reply; 27+ messages in thread
From: John Hesterberg @ 2004-08-26 18:38 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Jay Lan, linux-kernel, erikj, limin, lse-tech

On Wed, Aug 25, 2004 at 10:18:42PM -0700, Andrew Morton wrote:
> Jay Lan <jlan@engr.sgi.com> wrote:
> >
> > I have broken up one big CSA kernel patch into four smaller ones
> >  as attached:
> > 
> >       csa_io     - collects io accounting data
> >       csa_mm     - collects mm accounting data
> >       csa_eop    - provides a hook to perform end-of-process accounting
> >       csa_module - builds csa loadable module
> 
> More broadly: Help!
> 
> I am 100% not in a position to judge whether Linux needs Comprehensive
> System Accounting, nor am I able to define what the requirements for such a
> thing should be.  All I can tell from your patch is the quality of its
> implementation, and that's leaping far, far ahead of where we should be.

Linux needs something beyond what it has today, at least for the
HPC market SGI is familiar with.  We believe it will more generally
benefit Linux HPC and enterprise markets, which is one reason we've
released the whole CSA stack as open source.

> 
> We're going to need help from you, and from all the other stakeholders in
> judging how useful this feature is to Linux implementors and how well this
> implementation meets the (unknown) requirements.  See my problem?
> 
> I've cc'ed lse-tech, where enterprise folks hang out.  I would request that
> the people who are stakeholders in this feature
> 
> a) stick their hands up

We're running CSA in production on Altix (our Itanium/Linux platform)
for several years now.

> 
> b) let us know how important this kind of feature is for their users

A substantial number of our customers require it.  CSA has been
developed over the years on SGI's HPC systems in response to our
customers needs.  It's been reimplemented and opensourced for Linux,
originally as an SGI/LANL collaboration.

> c) review the offered feature set against their requirements
> 
> d) let us know how well the implementation fits that requirement and

It fits. :-)

Actually, one secondary feature on our wishlist is 'projects'.

Our customers are tied into the current CSA user interface.
However, there is lots of room for cooperation under that, particularly
in the kernel.  We can always consider a migration project as well.

John

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Lse-tech] Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-26 17:15   ` Tim Schmielau
  2004-08-26 18:26     ` [Lse-tech] " Jay Lan
@ 2004-08-26 18:43     ` John Hesterberg
  2004-08-27  8:26       ` Tim Schmielau
  2004-08-26 19:24     ` Arthur Corliss
  2 siblings, 1 reply; 27+ messages in thread
From: John Hesterberg @ 2004-08-26 18:43 UTC (permalink / raw)
  To: Tim Schmielau
  Cc: Andrew Morton, Jay Lan, lkml, erikj, limin, lse-tech,
	Ragnar Kj?rstad, Yoshitaka ISHIKAWA, Arthur Corliss,
	Guillaume Thouvenin

On Thu, Aug 26, 2004 at 07:15:40PM +0200, Tim Schmielau wrote:
> ...
> IMHO CSA, ELSA and BSD accounting are too similar to have more than one of 
> them in the kernel. We should either improve BSD accounting to do the job, 
> or kill it in favor of a different implementation.
> 
> Tim

We should at least have common data collection in the kernel.

I could more easily understand different accounting packages on top of
that that might meet different needs of different classes of users.

John

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-26 17:15   ` Tim Schmielau
  2004-08-26 18:26     ` [Lse-tech] " Jay Lan
  2004-08-26 18:43     ` John Hesterberg
@ 2004-08-26 19:24     ` Arthur Corliss
  2004-08-26 20:05       ` Tim Schmielau
  2 siblings, 1 reply; 27+ messages in thread
From: Arthur Corliss @ 2004-08-26 19:24 UTC (permalink / raw)
  To: Tim Schmielau
  Cc: Andrew Morton, Jay Lan, lkml, erikj, limin, lse-tech,
	=?X-UNKNOWN?Q?Ragnar_Kj=F8rstad?=, Yoshitaka ISHIKAWA,
	Guillaume Thouvenin

On Thu, 26 Aug 2004, Tim Schmielau wrote:

<snip>

> Therefore I've Cc:ed some people from whom I got valuable feedback on the
> BSD accounting format patch.
>
> IMHO CSA, ELSA and BSD accounting are too similar to have more than one of
> them in the kernel. We should either improve BSD accounting to do the job,
> or kill it in favor of a different implementation.

I would be very interested in a CSA implementation similar to what I have on
IRIX.  I will also plead guilty to not having downloaded the updated patches
for either the kernel or the tools.  I'm continuing to use my poor hack until
a permanent solution gets accepted into the kernel, at which point I'll
adopt that.

And if it counters the impression at all, I'm not a kernel developer, I
proposed my hack out of need as a user of the tools.  I also try to stay away
from modified kernels, so I'm running Marcelos' 2.4 stable branch with only
the 32bit u/gid_t hack applied.  That's why I haven't had any feedback on the
-mm branch.

In short, for my use BSD accounting is sufficient, but I'd love to see CSA in
Linux as well.  Linux hasn't moved too far into roles where it's a necessity
(for what I'm doing, anyway), but I see CSA as something that would certainly
help it assume those roles.

	--Arthur Corliss
	  Bolverk's Lair -- http://arthur.corlissfamily.org/
	  Digital Mages -- http://www.digitalmages.com/
	  "Live Free or Die, the Only Way to Live" -- NH State Motto

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Lse-tech] Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-26 18:26     ` [Lse-tech] " Jay Lan
@ 2004-08-26 19:44       ` Arthur Corliss
  0 siblings, 0 replies; 27+ messages in thread
From: Arthur Corliss @ 2004-08-26 19:44 UTC (permalink / raw)
  To: Jay Lan
  Cc: Tim Schmielau, Andrew Morton, lkml, erikj, limin, lse-tech,
	=?X-UNKNOWN?Q?Ragnar_Kj=F8rstad?=, Yoshitaka ISHIKAWA,
	Guillaume Thouvenin

On Thu, 26 Aug 2004, Jay Lan wrote:

> The reason for breaking up one CSA patch into four patches was
> so that the only CSA (http://oss.sgi.com/projects/csa/) specific
> thing is the csa_module. My intention is to improve the system
> accounting data collection and make the data available to any
> clients that can use the data. The three areas of accounting
> data we try to improve are io, mm, and per-process area.
>
> As Tim said the problem of BSD accounting was that it has been
> inactive for a long time. I do not mind incoporating the
> three accounting data collection patches i submitted into BSD or
> others as long as the data made available to modules that plan
> to make use of the data. :)

All right, I'm going to shut up now.  I had no idea that SGI had gone behind
my back and started porting CSA to Linux.  :-P  Before I shut up, though, as a
user of these tools I'd vote for a unified data collection method, as
suggested above.  There has to be at least five of us who have volunteered at
one point or another to help make the GNU utilities conform.  That should make
everyone happy.

	--Arthur Corliss
	  Bolverk's Lair -- http://arthur.corlissfamily.org/
	  Digital Mages -- http://www.digitalmages.com/
	  "Live Free or Die, the Only Way to Live" -- NH State Motto

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-26 19:24     ` Arthur Corliss
@ 2004-08-26 20:05       ` Tim Schmielau
  2004-08-26 20:46         ` Jay Lan
                           ` (2 more replies)
  0 siblings, 3 replies; 27+ messages in thread
From: Tim Schmielau @ 2004-08-26 20:05 UTC (permalink / raw)
  To: Arthur Corliss
  Cc: Andrew Morton, Jay Lan, lkml, erikj, limin, lse-tech,
	=?X-UNKNOWN?Q?Ragnar_Kj=F8rstad?=, Yoshitaka ISHIKAWA,
	Guillaume Thouvenin

On Thu, 26 Aug 2004, Arthur Corliss wrote:

> I would be very interested in a CSA implementation similar to what I have on
> IRIX.  I will also plead guilty to not having downloaded the updated patches
> for either the kernel or the tools.  I'm continuing to use my poor hack until
> a permanent solution gets accepted into the kernel, at which point I'll
> adopt that.

That's ok, we carefully discussed the changes to make sure no new tools
are required ;-)

> And if it counters the impression at all, I'm not a kernel developer, I
> proposed my hack out of need as a user of the tools.  I also try to stay away
> from modified kernels, so I'm running Marcelos' 2.4 stable branch with only
> the 32bit u/gid_t hack applied.  That's why I haven't had any feedback on the
> -mm branch.

I haven't even tried to get a patch into 2.4, since Marcelo is (rightly)
quite resilent to new features.
>
> In short, for my use BSD accounting is sufficient, but I'd love to see CSA in
> Linux as well.  Linux hasn't moved too far into roles where it's a necessity
> (for what I'm doing, anyway), but I see CSA as something that would certainly
> help it assume those roles.

Does this mean you would want to have both in the same kernel, potentially
turning on both at the same time?


Ok, let me summarize what I learned until now:

It should be easy to combine the data collection enhancements from
CSA and ELSA to provide a common superset of information.

Output file formats vary, but might be unified if projects don't insist
too much.
Main difference between CSA and ELSA on the one hand and BSD acct on the
other is that the latter writes one record per process, while the former
write one per job.
With the new BSD acct v3 format, it should be possible to do per job
accounting entirely from userspace, using pid and ppid information to
reconstruct the process tree and some userland database for the
pid -> job mapping. It would, however, be greatly simplified if the
accounting records provided some kind of job id, and some indicator
whether or not this process was the last of a job (group).

CSA and ELSA might even be more lightweight since fewer accounting records
are actually written.

Sounds like it should be possible to fulfill the different needs by
having loadable modules for the different output formats, or by a /proc
entry that controls some aspects like whether records are written per
job or per process.

Comments?

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-26 20:05       ` Tim Schmielau
@ 2004-08-26 20:46         ` Jay Lan
  2004-08-28  1:27           ` Arthur Corliss
       [not found]         ` <20040827054218.GA4142@frec.bull.fr>
  2004-08-28  1:26         ` Arthur Corliss
  2 siblings, 1 reply; 27+ messages in thread
From: Jay Lan @ 2004-08-26 20:46 UTC (permalink / raw)
  To: Tim Schmielau
  Cc: Arthur Corliss, Andrew Morton, lkml, erikj, limin, lse-tech, ?,
	Yoshitaka ISHIKAWA, Guillaume Thouvenin

I do like to see a common data collection method in the kernl. Kernel
does not need to decide how the data to be presented to the
user space. An accounting loadable module such as CSA or ELSA will
take care of how the data to be presented to meet the needs of
different users.

Sounds reasonable?

Regards,
  - jay


Tim Schmielau wrote:
> On Thu, 26 Aug 2004, Arthur Corliss wrote:
> 
> 
>>I would be very interested in a CSA implementation similar to what I have on
>>IRIX.  I will also plead guilty to not having downloaded the updated patches
>>for either the kernel or the tools.  I'm continuing to use my poor hack until
>>a permanent solution gets accepted into the kernel, at which point I'll
>>adopt that.
> 
> 
> That's ok, we carefully discussed the changes to make sure no new tools
> are required ;-)
> 
> 
>>And if it counters the impression at all, I'm not a kernel developer, I
>>proposed my hack out of need as a user of the tools.  I also try to stay away
>>from modified kernels, so I'm running Marcelos' 2.4 stable branch with only
>>the 32bit u/gid_t hack applied.  That's why I haven't had any feedback on the
>>-mm branch.
> 
> 
> I haven't even tried to get a patch into 2.4, since Marcelo is (rightly)
> quite resilent to new features.
> 
>>In short, for my use BSD accounting is sufficient, but I'd love to see CSA in
>>Linux as well.  Linux hasn't moved too far into roles where it's a necessity
>>(for what I'm doing, anyway), but I see CSA as something that would certainly
>>help it assume those roles.
> 
> 
> Does this mean you would want to have both in the same kernel, potentially
> turning on both at the same time?
> 
> 
> Ok, let me summarize what I learned until now:
> 
> It should be easy to combine the data collection enhancements from
> CSA and ELSA to provide a common superset of information.
> 
> Output file formats vary, but might be unified if projects don't insist
> too much.
> Main difference between CSA and ELSA on the one hand and BSD acct on the
> other is that the latter writes one record per process, while the former
> write one per job.
> With the new BSD acct v3 format, it should be possible to do per job
> accounting entirely from userspace, using pid and ppid information to
> reconstruct the process tree and some userland database for the
> pid -> job mapping. It would, however, be greatly simplified if the
> accounting records provided some kind of job id, and some indicator
> whether or not this process was the last of a job (group).
> 
> CSA and ELSA might even be more lightweight since fewer accounting records
> are actually written.
> 
> Sounds like it should be possible to fulfill the different needs by
> having loadable modules for the different output formats, or by a /proc
> entry that controls some aspects like whether records are written per
> job or per process.
> 
> Comments?


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Lse-tech] Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-26 18:43     ` John Hesterberg
@ 2004-08-27  8:26       ` Tim Schmielau
  0 siblings, 0 replies; 27+ messages in thread
From: Tim Schmielau @ 2004-08-27  8:26 UTC (permalink / raw)
  To: John Hesterberg
  Cc: Andrew Morton, Jay Lan, lkml, erikj, limin, lse-tech,
	Ragnar Kj?rstad, Yoshitaka ISHIKAWA, Arthur Corliss,
	Guillaume Thouvenin

On Thu, 26 Aug 2004, John Hesterberg wrote:

> On Thu, Aug 26, 2004 at 07:15:40PM +0200, Tim Schmielau wrote:
> > ...
> > IMHO CSA, ELSA and BSD accounting are too similar to have more than one of 
> > them in the kernel. We should either improve BSD accounting to do the job, 
> > or kill it in favor of a different implementation.
> > 
> > Tim
> 
> We should at least have common data collection in the kernel.
> 
> I could more easily understand different accounting packages on top of
> that that might meet different needs of different classes of users.

Sorry, that is of course what I meant - I am only talking about kernel 
code.

Tim

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Lse-tech] Re: [PATCH] new CSA patchset for 2.6.8
       [not found]         ` <20040827054218.GA4142@frec.bull.fr>
@ 2004-08-27 19:31           ` John Hesterberg
  2004-08-30  8:29             ` Tim Schmielau
  2004-08-27 19:55           ` Jay Lan
  2004-08-28  1:33           ` Arthur Corliss
  2 siblings, 1 reply; 27+ messages in thread
From: John Hesterberg @ 2004-08-27 19:31 UTC (permalink / raw)
  To: Guillaume Thouvenin
  Cc: Tim Schmielau, Arthur Corliss, Andrew Morton, Jay Lan, lkml,
	erikj, limin, lse-tech, Ragnar Kj?rstad, Yoshitaka ISHIKAWA

On Fri, Aug 27, 2004 at 07:42:18AM +0200, Guillaume Thouvenin wrote:
> On Thu, Aug 26, 2004 at 10:05:37PM +0200, Tim Schmielau wrote:
> > 
> > It should be easy to combine the data collection enhancements from
> > CSA and ELSA to provide a common superset of information.
> 
> ELSA uses current BSD accounting. The only difference with BSD is that
> accounting is done for a group of processes. I didn't use PAGG and
> rewrite something because I thought (I was wrong) that PAGG project
> wasn't maintained. I continue to maintain ELSA just because there is, 
> until today, no solution for doing job accounting. 
> So, the data collection enhancements from ELSA is not very useful.
> 
> > With the new BSD acct v3 format, it should be possible to do per job
> > accounting entirely from userspace, using pid and ppid information to
> > reconstruct the process tree and some userland database for the
> > pid -> job mapping. It would, however, be greatly simplified if the
> > accounting records provided some kind of job id, and some indicator
> > whether or not this process was the last of a job (group).
> 
> I like this solution.
> In fact what I proposed was to have PAGG and a modified BSD accounting
> that can be used with PAGG as both are already in the -mm tree. But
> manage group of processes from userspace is, IMHO, a better solution as
> modifications in the kernel will be minimal. 

The kernel part of linux-job is a module that uses PAGG, and 
isn't difficult.  We've been running it in production for a
couple years.

I don't think a kernel-based job is a requirement, though,
so I'd like to hear more about how you'd do it otherwise.

The other comments about only one acct record per job vs one
per process might be important, and that might mean the kernel
has to know about the job.

>   Therefore the solution could be to enhance BSD accounting with data
> collection from CSA and provide per job accounting with a userspace
> mechanism. Sounds great to me... 
> 
> Best,
> Guillaume

How does the BSD accounting define jobs?
What determines the job that a process is part of?

An important aspect of linux-job (ie the job part of the pagg/job/csa
stack) is that it is inescapable.  The user doesn't get to determine or
change their job (unlike process groups).  For true accounting, that
determines the real $$$ chargebacks on shared machines, this is
necessary.

Another aspect of jobs that isn't directly related to accounting
is that it gives users and admins a way to query, and kill :-),
all the processes that are part of the job.  The inescapable part
is again important...you can't fork off a process and detach it from
the job to hide it.  In fact, I've heard that some sites use pagg/job
without CSA for this reason.  It might have been an ISP or ASP, and
they liked the containment linux-job provided.

John

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Lse-tech] Re: [PATCH] new CSA patchset for 2.6.8
       [not found]         ` <20040827054218.GA4142@frec.bull.fr>
  2004-08-27 19:31           ` John Hesterberg
@ 2004-08-27 19:55           ` Jay Lan
  2004-08-31  9:06             ` Guillaume Thouvenin
  2004-08-28  1:33           ` Arthur Corliss
  2 siblings, 1 reply; 27+ messages in thread
From: Jay Lan @ 2004-08-27 19:55 UTC (permalink / raw)
  To: Guillaume Thouvenin
  Cc: Tim Schmielau, Arthur Corliss, Andrew Morton, Jay Lan, lkml,
	erikj, limin, lse-tech, Ragnar Kjørstad, Yoshitaka ISHIKAWA

Hi Guillaume,

Please visit http://oss.sgi.com/projects/pagg/
The page has been updated to provide information on a per job
accounting project called 'job' based on PAGG.

There is one userspace rpm and one kernel  module for job.
This may provide what you are looking for. It is a mature product
as well. I am sure Limin(job) and Erik(pagg) would appreciate any
input you can provide to make 'job' more useful.

Regards,
  - jay


Guillaume Thouvenin wrote:
> On Thu, Aug 26, 2004 at 10:05:37PM +0200, Tim Schmielau wrote:
> 
>>It should be easy to combine the data collection enhancements from
>>CSA and ELSA to provide a common superset of information.
> 
> 
> ELSA uses current BSD accounting. The only difference with BSD is that
> accounting is done for a group of processes. I didn't use PAGG and
> rewrite something because I thought (I was wrong) that PAGG project
> wasn't maintained. I continue to maintain ELSA just because there is, 
> until today, no solution for doing job accounting. 
> So, the data collection enhancements from ELSA is not very useful.
> 
> 
>>With the new BSD acct v3 format, it should be possible to do per job
>>accounting entirely from userspace, using pid and ppid information to
>>reconstruct the process tree and some userland database for the
>>pid -> job mapping. It would, however, be greatly simplified if the
>>accounting records provided some kind of job id, and some indicator
>>whether or not this process was the last of a job (group).
> 
> 
> I like this solution.
> In fact what I proposed was to have PAGG and a modified BSD accounting
> that can be used with PAGG as both are already in the -mm tree. But
> manage group of processes from userspace is, IMHO, a better solution as
> modifications in the kernel will be minimal. 
> 
>   Therefore the solution could be to enhance BSD accounting with data
> collection from CSA and provide per job accounting with a userspace
> mechanism. Sounds great to me... 
> 
> Best,
> Guillaume
> 
> 
> -------------------------------------------------------
> SF.Net email is sponsored by Shop4tech.com-Lowest price on Blank Media
> 100pk Sonic DVD-R 4x for only $29 -100pk Sonic DVD+R for only $33
> Save 50% off Retail on Ink & Toner - Free Shipping and Free Gift.
> http://www.shop4tech.com/z/Inkjet_Cartridges/9_108_r285
> _______________________________________________
> Lse-tech mailing list
> Lse-tech@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/lse-tech


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Lse-tech] Re: [PATCH] new CSA patchset for 2.6.8
       [not found]       ` <20040826205349.0582d38e.akpm@osdl.org>
@ 2004-08-27 20:24         ` Jay Lan
  2004-08-30 19:10         ` John Hesterberg
  1 sibling, 0 replies; 27+ messages in thread
From: Jay Lan @ 2004-08-27 20:24 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Peter Williams, jh, jlan, linux-kernel, erikj, limin, lse-tech,
	Tim Schmielau

My proposed patchset includes three accounting data collection
patches and one CSA loadable module that make use of the
collected data. The three data collection patches (in the area
of io, mm and end-of-process) do not conflict with BSD accounting
or ELSA. They can be viewed as "enhancement" to both BSD and
ELSA as Guillaume, the maintainer of ELSA put it:

    Therefore the solution could be to enhance BSD accounting with data
    collection from CSA and provide per job accounting with a userspace
    mechanism. Sounds great to me...

It would be then up to BSD, ELSA or CSA to decide how to process
the collected data and present it to users.

All response so far seems to favor this unified data collection
method proposal.

The ELSA approach appears to make changes to kernel/acct.c while
CSA will provide its own loadable module.

Of the three data collection patches, csa_io and csa_eop are very
much CSA-independent. I certainly can make csa_mm more so as well.

I think it is a everyone-win situation for all three projects. :)

Regards,
  - jay


Andrew Morton wrote:
> Thanks, guys.  So we now know that there are three potential
> implementations which do much the same thing, yes?
> 
> I didn't get a sense of a preferred direction, but at least nobody is
> flaming anybody else yet ;)
> 
> It strikes me that CSA is the most actively developed and is the furthest
> along.  But that enhancing BSD accounting might be the least intrusive and
> most back-compatible approach.
> 
> Is that a fair summary?  If not, what should I have said?
> 
> 
> -------------------------------------------------------
> SF.Net email is sponsored by Shop4tech.com-Lowest price on Blank Media
> 100pk Sonic DVD-R 4x for only $29 -100pk Sonic DVD+R for only $33
> Save 50% off Retail on Ink & Toner - Free Shipping and Free Gift.
> http://www.shop4tech.com/z/Inkjet_Cartridges/9_108_r285
> _______________________________________________
> Lse-tech mailing list
> Lse-tech@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/lse-tech


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-26 20:05       ` Tim Schmielau
  2004-08-26 20:46         ` Jay Lan
       [not found]         ` <20040827054218.GA4142@frec.bull.fr>
@ 2004-08-28  1:26         ` Arthur Corliss
  2 siblings, 0 replies; 27+ messages in thread
From: Arthur Corliss @ 2004-08-28  1:26 UTC (permalink / raw)
  To: Tim Schmielau
  Cc: Andrew Morton, Jay Lan, lkml, erikj, limin, lse-tech,
	=?X-UNKNOWN?Q?Ragnar_Kj=F8rstad?=, Yoshitaka ISHIKAWA,
	Guillaume Thouvenin

On Thu, 26 Aug 2004, Tim Schmielau wrote:

> That's ok, we carefully discussed the changes to make sure no new tools
> are required ;-)

Understood, and appreciated.

> Does this mean you would want to have both in the same kernel, potentially
> turning on both at the same time?

I think that's definitely the route to go.  Not everyone needs the level of
visibility that CSA provides, so I think that with a unified data collection
methodology the users can decide for themselves what they need, and the kernel
stays in good shape with no implementation-specific bloat.

> Ok, let me summarize what I learned until now:
>
> It should be easy to combine the data collection enhancements from
> CSA and ELSA to provide a common superset of information.
>
> Output file formats vary, but might be unified if projects don't insist
> too much.

I don't think we'd necessarly want a unified accounting *file*.  BSD account
files grow pretty damned quick as it is, and that's with a lot less visibility
than if you included the extract counters that CSA provides.  Make the data
available the logging module(s) or what have you via a common struct, but let
each module decide what to actually commit to disk, and when (on job exit,
etc.).

> Main difference between CSA and ELSA on the one hand and BSD acct on the
> other is that the latter writes one record per process, while the former
> write one per job.
> With the new BSD acct v3 format, it should be possible to do per job
> accounting entirely from userspace, using pid and ppid information to
> reconstruct the process tree and some userland database for the
> pid -> job mapping. It would, however, be greatly simplified if the
> accounting records provided some kind of job id, and some indicator
> whether or not this process was the last of a job (group).
>
> CSA and ELSA might even be more lightweight since fewer accounting records
> are actually written.
>
> Sounds like it should be possible to fulfill the different needs by
> having loadable modules for the different output formats, or by a /proc
> entry that controls some aspects like whether records are written per
> job or per process.

On one hand, loadable modules would be more helpful for people doing
side-by-side comparisons of the accounting systems, but the /proc method would
be better for dynamic adjustments of a single system.  I don't think I have a
horse in this race, either way.

	--Arthur Corliss
	  Bolverk's Lair -- http://arthur.corlissfamily.org/
	  Digital Mages -- http://www.digitalmages.com/
	  "Live Free or Die, the Only Way to Live" -- NH State Motto

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-26 20:46         ` Jay Lan
@ 2004-08-28  1:27           ` Arthur Corliss
  2004-08-30 12:26             ` Guillaume Thouvenin
  0 siblings, 1 reply; 27+ messages in thread
From: Arthur Corliss @ 2004-08-28  1:27 UTC (permalink / raw)
  To: Jay Lan
  Cc: Tim Schmielau, Arthur Corliss, Andrew Morton, lkml, erikj, limin,
	lse-tech, ?, Yoshitaka ISHIKAWA, Guillaume Thouvenin

On Thu, 26 Aug 2004, Jay Lan wrote:

> I do like to see a common data collection method in the kernl. Kernel
> does not need to decide how the data to be presented to the
> user space. An accounting loadable module such as CSA or ELSA will
> take care of how the data to be presented to meet the needs of
> different users.
>
> Sounds reasonable?

Seconded.

	--Arthur Corliss
	  Bolverk's Lair -- http://arthur.corlissfamily.org/
	  Digital Mages -- http://www.digitalmages.com/
	  "Live Free or Die, the Only Way to Live" -- NH State Motto

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] new CSA patchset for 2.6.8
       [not found]         ` <20040827054218.GA4142@frec.bull.fr>
  2004-08-27 19:31           ` John Hesterberg
  2004-08-27 19:55           ` Jay Lan
@ 2004-08-28  1:33           ` Arthur Corliss
  2 siblings, 0 replies; 27+ messages in thread
From: Arthur Corliss @ 2004-08-28  1:33 UTC (permalink / raw)
  To: Guillaume Thouvenin
  Cc: Tim Schmielau, Andrew Morton, Jay Lan, lkml, erikj, limin,
	lse-tech, =?X-UNKNOWN?Q?Ragnar_Kj=F8rstad?=, Yoshitaka ISHIKAWA

On Fri, 27 Aug 2004, Guillaume Thouvenin wrote:

> I like this solution.
> In fact what I proposed was to have PAGG and a modified BSD accounting
> that can be used with PAGG as both are already in the -mm tree. But
> manage group of processes from userspace is, IMHO, a better solution as
> modifications in the kernel will be minimal.
>
>   Therefore the solution could be to enhance BSD accounting with data
> collection from CSA and provide per job accounting with a userspace
> mechanism. Sounds great to me...

The only concern I have with a userspace solution is that you run the risk of
losing that data.  What happens if a process on the box drives drives it out
of memory and paging space?  The box would still be working, it just wouldn't
be able to fork new processes, and those already running that aren't purposely
made high priority may not get much of a chance to execute as well.  I've lost
SAR data that way.

	--Arthur Corliss
	  Bolverk's Lair -- http://arthur.corlissfamily.org/
	  Digital Mages -- http://www.digitalmages.com/
	  "Live Free or Die, the Only Way to Live" -- NH State Motto

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Lse-tech] Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-27 19:31           ` John Hesterberg
@ 2004-08-30  8:29             ` Tim Schmielau
  0 siblings, 0 replies; 27+ messages in thread
From: Tim Schmielau @ 2004-08-30  8:29 UTC (permalink / raw)
  To: John Hesterberg
  Cc: Guillaume Thouvenin, Arthur Corliss, Andrew Morton, Jay Lan, lkml,
	erikj, limin, lse-tech, Ragnar Kj?rstad, Yoshitaka ISHIKAWA

On Fri, 27 Aug 2004, John Hesterberg wrote:

> On Fri, Aug 27, 2004 at 07:42:18AM +0200, Guillaume Thouvenin wrote:
> > On Thu, Aug 26, 2004 at 10:05:37PM +0200, Tim Schmielau wrote:
> > 
> > > With the new BSD acct v3 format, it should be possible to do per job
> > > accounting entirely from userspace, using pid and ppid information to
> > > reconstruct the process tree and some userland database for the
> > > pid -> job mapping. It would, however, be greatly simplified if the
> > > accounting records provided some kind of job id, and some indicator
> > > whether or not this process was the last of a job (group).
> > 
> > I like this solution.
> > In fact what I proposed was to have PAGG and a modified BSD accounting
> > that can be used with PAGG as both are already in the -mm tree. But
> > manage group of processes from userspace is, IMHO, a better solution as
> > modifications in the kernel will be minimal. 
> 
> The kernel part of linux-job is a module that uses PAGG, and 
> isn't difficult.  We've been running it in production for a
> couple years.

Well, I'm rethinking my opinion of not wanting two accounting methods in 
the kernel. Make them share as much code as possible, with the only
remaining difference being the format of the record and wether it is 
written per process or per job. Then we just have to make sure that both
mechanisms get exercised regulary, to prevent bit-rot.

> I don't think a kernel-based job is a requirement, though,
> so I'd like to hear more about how you'd do it otherwise.
> 
> The other comments about only one acct record per job vs one
> per process might be important, and that might mean the kernel
> has to know about the job.

Yes, it would probably be easier if the kernel knows about the job and 
could stuff a job ID into the acct record. If that means going from 64 
byte records to 128 bytes, this would again double the already larger 
overhead of BSD accounting, however. This lightweightness of CSA is why I 
am not opposed to its inclusion.

On the other hand, there are a few uses (and users) of per-process
accounting records, i.e. for security auditing, so we should not back
it out of the kernel.

> How does the BSD accounting define jobs?
> What determines the job that a process is part of?

BSD accounting doesn't have the concept of a job at all. When we discussed 
the v3 format, we considered adding a job ID field from PAGG, but a) 
nobody answered and b) there wasn't any space left in the record anyways.
So a decision was postponed for a future 128 byte acct v4 structure. 

> An important aspect of linux-job (ie the job part of the pagg/job/csa
> stack) is that it is inescapable.  The user doesn't get to determine or
> change their job (unlike process groups).  For true accounting, that
> determines the real $$$ chargebacks on shared machines, this is
> necessary.

My proposed solution for a userspace method would also be inescapable:
To start a new job, just write out it's pid to a file that is only ever 
appended to, and consider all children of it as belonging to the same job.

Inescapeable, but probably some overhead in userspace.

Oh, wait - I think there is a problem in current BSD accounting, if the 
parent process dies before the child, and the child gets reparented to 
init. I should investigate that...
 
> Another aspect of jobs that isn't directly related to accounting
> is that it gives users and admins a way to query, and kill :-),
> all the processes that are part of the job.  The inescapable part
> is again important...you can't fork off a process and detach it from
> the job to hide it.  In fact, I've heard that some sites use pagg/job
> without CSA for this reason.  It might have been an ISP or ASP, and
> they liked the containment linux-job provided.

Yes, it's probably a lot easier if you don't have to search accounting
files to do that.

So from my view, we might turn the discussion from whether we want CSA to 
how we integrate it, i.e. do some code review.

Tim

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-28  1:27           ` Arthur Corliss
@ 2004-08-30 12:26             ` Guillaume Thouvenin
  2004-08-31 14:19               ` Tim Schmielau
  0 siblings, 1 reply; 27+ messages in thread
From: Guillaume Thouvenin @ 2004-08-30 12:26 UTC (permalink / raw)
  To: Arthur Corliss
  Cc: Jay Lan, Tim Schmielau, Andrew Morton, lkml, erikj, limin,
	lse-tech, ?, Yoshitaka ISHIKAWA, Guillaume Thouvenin

On Fri, Aug 27, 2004 at 05:27:19PM -0800, Arthur Corliss wrote:
> On Thu, 26 Aug 2004, Jay Lan wrote:
> 
> > I do like to see a common data collection method in the kernl. Kernel
> > does not need to decide how the data to be presented to the
> > user space. An accounting loadable module such as CSA or ELSA will
> > take care of how the data to be presented to meet the needs of
> > different users.
> >
> > Sounds reasonable?
> 
> Seconded.

  Thus, to be clear, the enhanced accounting can be divided into
three parts:

    1) A common data collection method in the kernel.
       We could start from BSD-accounting and add CSA information. Could
       it be something like BSD version4?

    2) A module that will manage a job history. I mean, it will manage a
       structure in which we will keep the relationship between processes and
       "containers" and also between process and its children. The
       property needed here is that a child belongs to the same "job"
       as its parent. This allows to do per-job accounting. I will have 
       a look to PAGG and JOB.
       
       Can it be done in userspace? Is the lost of data (as observed by 
       Arthur Corliss with SAR) can be avoided?
    
    3) Finally we need module that will be in charge of datas
       presentation. This will allow to be easily compatible with other
       applications.

  I will have a look to the per-job accounting patch. If I understand
well, this patch falls into the second requirement (manage groups of
processes). 

Regards,
Guillaume

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Lse-tech] Re: [PATCH] new CSA patchset for 2.6.8
       [not found]       ` <20040826205349.0582d38e.akpm@osdl.org>
  2004-08-27 20:24         ` Jay Lan
@ 2004-08-30 19:10         ` John Hesterberg
  1 sibling, 0 replies; 27+ messages in thread
From: John Hesterberg @ 2004-08-30 19:10 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Peter Williams, jlan, linux-kernel, erikj, limin, lse-tech,
	Tim Schmielau

On Thu, Aug 26, 2004 at 08:53:49PM -0700, Andrew Morton wrote:
> Thanks, guys.  So we now know that there are three potential
> implementations which do much the same thing, yes?

I believe CSA does than the others.

> I didn't get a sense of a preferred direction, but at least nobody is
> flaming anybody else yet ;)
> 
> It strikes me that CSA is the most actively developed and is the furthest
> along.  But that enhancing BSD accounting might be the least intrusive and
> most back-compatible approach.
> 
> Is that a fair summary?  If not, what should I have said?

Does anyone know if CSA is a super-set of BSD accounting and ELSA?
What would be missing?

I'm unconvinced that enhancing BSD accounting to encompass the
capabilities of CSA is appropriate.

I think we can make the data collection additions common.  That should
encompass the bulk of the invasive changes that are required by at least
CSA proper (ie there are still the PAGG changes for job support that we
can discuss separately).  Not sure about BSD accounting and ELSA.

With that cooperation, we can then either proceed with further
cooperation, or if the goals and users of the different accounting
approaches dictate different kernel modules and user support,
I'd propose that might be OK.

John

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Lse-tech] Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-27 19:55           ` Jay Lan
@ 2004-08-31  9:06             ` Guillaume Thouvenin
  2004-08-31 10:29               ` Guillaume Thouvenin
  2004-09-01 21:44               ` John Hesterberg
  0 siblings, 2 replies; 27+ messages in thread
From: Guillaume Thouvenin @ 2004-08-31  9:06 UTC (permalink / raw)
  To: Jay Lan
  Cc: Guillaume Thouvenin, Tim Schmielau, Arthur Corliss, Andrew Morton,
	Jay Lan, lkml, erikj, limin, lse-tech, Ragnar Kjørstad,
	Yoshitaka ISHIKAWA

On Fri, Aug 27, 2004 at 12:55:03PM -0700, Jay Lan wrote:
> Please visit http://oss.sgi.com/projects/pagg/
> The page has been updated to provide information on a per job
> accounting project called 'job' based on PAGG.
> 
> There is one userspace rpm and one kernel  module for job.
> This may provide what you are looking for. It is a mature product
> as well. I am sure Limin(job) and Erik(pagg) would appreciate any
> input you can provide to make 'job' more useful.

  I have a question about job. If I understand how it works, you can not
add a process in a job. I mean when you start a session, a container is 
created and it's the only way to create it. If I'm right, I think that it 
could be interesting to add a process using ioctl and /proc interface. For
example, if I want to know how resources are used by a compilation, I
need to add the process gcc in a container. Any comments? 

Best,
Guillaume

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Lse-tech] Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-31  9:06             ` Guillaume Thouvenin
@ 2004-08-31 10:29               ` Guillaume Thouvenin
  2004-08-31 16:04                 ` Limin Gu
  2004-09-01 21:44               ` John Hesterberg
  1 sibling, 1 reply; 27+ messages in thread
From: Guillaume Thouvenin @ 2004-08-31 10:29 UTC (permalink / raw)
  To: Guillaume Thouvenin
  Cc: Jay Lan, Tim Schmielau, Arthur Corliss, Andrew Morton, Jay Lan,
	lkml, erikj, limin, lse-tech, Ragnar Kjørstad,
	Yoshitaka ISHIKAWA

On Tue, Aug 31, 2004 at 11:06:47AM +0200, Guillaume Thouvenin wrote:
> On Fri, Aug 27, 2004 at 12:55:03PM -0700, Jay Lan wrote:
> > Please visit http://oss.sgi.com/projects/pagg/
> > The page has been updated to provide information on a per job
> > accounting project called 'job' based on PAGG.
> > 
> > There is one userspace rpm and one kernel  module for job.
> > This may provide what you are looking for. It is a mature product
> > as well. I am sure Limin(job) and Erik(pagg) would appreciate any
> > input you can provide to make 'job' more useful.
> 
>   I have a question about job. If I understand how it works, you can not
> add a process in a job. I mean when you start a session, a container is 
> created and it's the only way to create it. If I'm right, I think that it 
> could be interesting to add a process using ioctl and /proc interface.

I think that I'm not very clear. You can add a process to a container
using the /proc/csa interface but it seems that currently this feature
is not available with job-1.2.1 package. Therefore, maybe we can add a
command called jattach that will attach a process to a given jid...

Best,
Guillaume

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-30 12:26             ` Guillaume Thouvenin
@ 2004-08-31 14:19               ` Tim Schmielau
  2004-08-31 23:01                 ` [Lse-tech] " Jay Lan
  0 siblings, 1 reply; 27+ messages in thread
From: Tim Schmielau @ 2004-08-31 14:19 UTC (permalink / raw)
  To: Jay Lan
  Cc: Guillaume Thouvenin, Arthur Corliss, Andrew Morton, lkml, erikj,
	limin, lse-tech, ?, Yoshitaka ISHIKAWA

On Mon, 30 Aug 2004, Guillaume Thouvenin wrote:

>   Thus, to be clear, the enhanced accounting can be divided into
> three parts:
> 
>     1) A common data collection method in the kernel.
>        We could start from BSD-accounting and add CSA information. Could
>        it be something like BSD version4?

I've had a quick look at the CSA data collection patches. To get the 
discussion started, here are my comments:

> --- linux.orig/drivers/block/ll_rw_blk.c        2004-08-13 22:36:16.000000000 -0700
> +++ linux/drivers/block/ll_rw_blk.c     2004-08-18 12:07:10.000000000 -0700
> @@ -1948,10 +1950,12 @@
>  
>         if (rw == READ) {
>                 disk_stat_add(rq->rq_disk, read_sectors, nr_sectors);
> +               current->rblk += nr_sectors;
>                 if (!new_io)
>                         disk_stat_inc(rq->rq_disk, read_merges);
>         } else if (rw == WRITE) {
>                 disk_stat_add(rq->rq_disk, write_sectors, nr_sectors);
> +               current->wblk += nr_sectors;
>                 if (!new_io)
>                         disk_stat_inc(rq->rq_disk, write_merges);
>         }

Andi Kleen's comment on the ELSA patch also applies here - most writes
will get accounted to pdflushd. See

http://www.lib.uaa.alaska.edu/linux-kernel/archive/2004-Week-31/0047.html

for his comment.

> --- /dev/null   1970-01-01 00:00:00.000000000 +0000
> +++ linux/include/linux/csa_internal.h  2004-08-19 15:19:05.000000000 -0700
[...]
> +#else  /* CONFIG_CSA || CONFIG_CSA_MODULE */
> +
> +#define csa_update_integrals()         do { } while (0);
> +#define csa_clear_integrals(task)      do { } while (0);
> +#endif /* CONFIG_CSA || CONFIG_CSA_MODULE */

I suppose the semicolons are unintentional.

> --- linux.orig/include/linux/sched.h    2004-08-19 15:17:52.000000000 -0700
> +++ linux/include/linux/sched.h 2004-08-19 15:19:05.000000000 -0700
[...]
> @@ -525,6 +527,10 @@
>  
>  /* i/o counters(bytes read/written, blocks read/written, #syscalls, waittime */
>          unsigned long rchar, wchar, rblk, wblk, syscr, syscw, bwtime;
> +#if defined(CONFIG_CSA) || defined(CONFIG_CSA_MODULE)
> +       unsigned long csa_rss_mem1, csa_vm_mem1;
> +       clock_t csa_stimexpd;
> +#endif

These probably need to be u64, otherwise they might easily overflow within
a view seconds on 32 bit platforms.

> --- /dev/null   1970-01-01 00:00:00.000000000 +0000
> +++ linux/include/linux/acct_eop.h      2004-08-19 18:48:44.000000000 -0700

This should probably be unified with BSD accounting to a general accounting
hook.


Tim

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Lse-tech] Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-31 10:29               ` Guillaume Thouvenin
@ 2004-08-31 16:04                 ` Limin Gu
  0 siblings, 0 replies; 27+ messages in thread
From: Limin Gu @ 2004-08-31 16:04 UTC (permalink / raw)
  To: Guillaume Thouvenin
  Cc: Guillaume Thouvenin, Jay Lan, Tim Schmielau, Arthur Corliss,
	Andrew Morton, Jay Lan, lkml, erikj, limin, lse-tech,
	Ragnar Kjørstad, Yoshitaka ISHIKAWA

> 
> On Tue, Aug 31, 2004 at 11:06:47AM +0200, Guillaume Thouvenin wrote:
> > On Fri, Aug 27, 2004 at 12:55:03PM -0700, Jay Lan wrote:
> > > Please visit http://oss.sgi.com/projects/pagg/
> > > The page has been updated to provide information on a per job
> > > accounting project called 'job' based on PAGG.
> > > 
> > > There is one userspace rpm and one kernel  module for job.
> > > This may provide what you are looking for. It is a mature product
> > > as well. I am sure Limin(job) and Erik(pagg) would appreciate any
> > > input you can provide to make 'job' more useful.
> > 
> >   I have a question about job. If I understand how it works, you can not
> > add a process in a job. I mean when you start a session, a container is 
> > created and it's the only way to create it. If I'm right, I think that it 
> > could be interesting to add a process using ioctl and /proc interface.
> 
> I think that I'm not very clear. You can add a process to a container
> using the /proc/csa interface but it seems that currently this feature
> is not available with job-1.2.1 package. Therefore, maybe we can add a
> command called jattach that will attach a process to a given jid...

The current job package is job-1.4.0-1, you can find it at 
ftp://oss.sgi.com/projects/pagg/download

And it is correct that JOB_ATTACH ioctl is not implemented right now.
We could implement that ioctl and also add a user command like jattach.

We are trying to push the job kernel module to the community, but the ioctl
and /proc binary interface seems not an appropriate kernel and user space
communication interface. When job gets more users other CSA and there are
more interest about job, maybe we could request a new syscall number 
in linux. 

--Limin

> 
> Best,
> Guillaume
> 


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Lse-tech] Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-31 14:19               ` Tim Schmielau
@ 2004-08-31 23:01                 ` Jay Lan
  2004-09-08 18:32                   ` Jay Lan
  0 siblings, 1 reply; 27+ messages in thread
From: Jay Lan @ 2004-08-31 23:01 UTC (permalink / raw)
  To: Tim Schmielau
  Cc: Guillaume Thouvenin, Arthur Corliss, Andrew Morton, lkml, erikj,
	limin, lse-tech, ?, Yoshitaka ISHIKAWA, csa

Adding csa@oss.sgi.com, the CSA user group mailing list, to Cc.

Tim Schmielau wrote:
> On Mon, 30 Aug 2004, Guillaume Thouvenin wrote:
> 
> 
>>  Thus, to be clear, the enhanced accounting can be divided into
>>three parts:
>>
>>    1) A common data collection method in the kernel.
>>       We could start from BSD-accounting and add CSA information. Could
>>       it be something like BSD version4?
> 
> 
> I've had a quick look at the CSA data collection patches. To get the 
> discussion started, here are my comments:
> 
> 
>>--- linux.orig/drivers/block/ll_rw_blk.c        2004-08-13 22:36:16.000000000 -0700
>>+++ linux/drivers/block/ll_rw_blk.c     2004-08-18 12:07:10.000000000 -0700
>>@@ -1948,10 +1950,12 @@
>> 
>>        if (rw == READ) {
>>                disk_stat_add(rq->rq_disk, read_sectors, nr_sectors);
>>+               current->rblk += nr_sectors;
>>                if (!new_io)
>>                        disk_stat_inc(rq->rq_disk, read_merges);
>>        } else if (rw == WRITE) {
>>                disk_stat_add(rq->rq_disk, write_sectors, nr_sectors);
>>+               current->wblk += nr_sectors;
>>                if (!new_io)
>>                        disk_stat_inc(rq->rq_disk, write_merges);
>>        }
> 
> 
> Andi Kleen's comment on the ELSA patch also applies here - most writes
> will get accounted to pdflushd. See
> 
> http://www.lib.uaa.alaska.edu/linux-kernel/archive/2004-Week-31/0047.html
> 
> for his comment.

I need more time on this. :)

> 
> 
>>--- /dev/null   1970-01-01 00:00:00.000000000 +0000
>>+++ linux/include/linux/csa_internal.h  2004-08-19 15:19:05.000000000 -0700
> 
> [...]
> 
>>+#else  /* CONFIG_CSA || CONFIG_CSA_MODULE */
>>+
>>+#define csa_update_integrals()         do { } while (0);
>>+#define csa_clear_integrals(task)      do { } while (0);
>>+#endif /* CONFIG_CSA || CONFIG_CSA_MODULE */
> 
> 
> I suppose the semicolons are unintentional.

Good catch! I fixed this in our internal tree.

> 
> 
>>--- linux.orig/include/linux/sched.h    2004-08-19 15:17:52.000000000 -0700
>>+++ linux/include/linux/sched.h 2004-08-19 15:19:05.000000000 -0700
> 
> [...]
> 
>>@@ -525,6 +527,10 @@
>> 
>> /* i/o counters(bytes read/written, blocks read/written, #syscalls, waittime */
>>         unsigned long rchar, wchar, rblk, wblk, syscr, syscw, bwtime;
>>+#if defined(CONFIG_CSA) || defined(CONFIG_CSA_MODULE)
>>+       unsigned long csa_rss_mem1, csa_vm_mem1;
>>+       clock_t csa_stimexpd;
>>+#endif
> 
> 
> These probably need to be u64, otherwise they might easily overflow within
> a view seconds on 32 bit platforms.

Will fix it.

> 
> 
>>--- /dev/null   1970-01-01 00:00:00.000000000 +0000
>>+++ linux/include/linux/acct_eop.h      2004-08-19 18:48:44.000000000 -0700
> 
> 
> This should probably be unified with BSD accounting to a general accounting
> hook.

Do you suggest to merge acct_eop.h into acct.h? It sounds good to me!

Thanks!
  - jay

> 
> 
> Tim
> 
> 
> -------------------------------------------------------
> This SF.Net email is sponsored by BEA Weblogic Workshop
> FREE Java Enterprise J2EE developer tools!
> Get your free copy of BEA WebLogic Workshop 8.1 today.
> http://ads.osdn.com/?ad_id=5047&alloc_id=10808&op=click
> _______________________________________________
> Lse-tech mailing list
> Lse-tech@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/lse-tech


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Lse-tech] Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-31  9:06             ` Guillaume Thouvenin
  2004-08-31 10:29               ` Guillaume Thouvenin
@ 2004-09-01 21:44               ` John Hesterberg
  1 sibling, 0 replies; 27+ messages in thread
From: John Hesterberg @ 2004-09-01 21:44 UTC (permalink / raw)
  To: Guillaume Thouvenin
  Cc: Jay Lan, Tim Schmielau, Arthur Corliss, Andrew Morton, Jay Lan,
	lkml, erikj, limin, lse-tech, Ragnar Kj?rstad, Yoshitaka ISHIKAWA

On Tue, Aug 31, 2004 at 11:06:47AM +0200, Guillaume Thouvenin wrote:
> On Fri, Aug 27, 2004 at 12:55:03PM -0700, Jay Lan wrote:
> > Please visit http://oss.sgi.com/projects/pagg/
> > The page has been updated to provide information on a per job
> > accounting project called 'job' based on PAGG.
> > 
> > There is one userspace rpm and one kernel  module for job.
> > This may provide what you are looking for. It is a mature product
> > as well. I am sure Limin(job) and Erik(pagg) would appreciate any
> > input you can provide to make 'job' more useful.
> 
>   I have a question about job. If I understand how it works, you can not
> add a process in a job. I mean when you start a session, a container is 
> created and it's the only way to create it.

Right, that's the current implementation.  Any privileged process can
create a job, though, it doesn't *have* to be at the start of a session.
I believe job is currently hardwired that the initial member process is
the creator, and the only other way in is via inheritance, and there's
no way out of the job other than exiting or creating your own job.

> If I'm right, I think that it could be interesting to add a process
> using ioctl and /proc interface. 

We're planning on changing that interface, but I think your question
applies regardless of what interface is used.

> For example, if I want to know how resources are used by a
> compilation, I need to add the process gcc in a container. Any
> comments? 
>
> Best,
> Guillaume

It sounds like a slightly different kind of job.

The gcc process should already be in a job via it's parent.
If it's already in a job, we don't let it out, as jobs are designed to
be inescapable so that users can't sneak processes outside their job.
If the only client of job is accounting, that might not be required.
Maybe as long as they become a member of another job, and the usage is
tracked, that would be OK.  I'm not sure what that would do to
the current CSA tools, though.

On IRIX, I think jobs are also used to do resource limits, and that's
probably where the hard requirement for jobs being inescapable came from.

The ISP/ASP non-acct uses of job would probably want it to be inescapable.

Different inheritance and creation policies could be implemented
in job.  Or, since it's just a loadable module, different job modules
could be written to implement different styles of job as are required
for different uses.

John

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Lse-tech] Re: [PATCH] new CSA patchset for 2.6.8
  2004-08-31 23:01                 ` [Lse-tech] " Jay Lan
@ 2004-09-08 18:32                   ` Jay Lan
  0 siblings, 0 replies; 27+ messages in thread
From: Jay Lan @ 2004-09-08 18:32 UTC (permalink / raw)
  To: Jay Lan
  Cc: Tim Schmielau, Guillaume Thouvenin, Arthur Corliss, Andrew Morton,
	lkml, erikj, limin, lse-tech, ?, Yoshitaka ISHIKAWA, csa

The block IO data was not used in billing for CSA customers.

Nobody has ever charged for this data. It is more for accountability
and resource consumption tracking. Accuracy changes as the program
runs for hours or days in our market. Repeatable billing is critical
and block IO is one that is not repeatable.

The write blocks was useful and with the type of programs running for
hours and days (which are the ones of interest in block IO data). The
delayed write after the process terminates is ignored because that is
insignificant for a process doing io for hours or days. We use the
bytes transferred vs blocks transferred to see programs dominating
and polluting the cache. Processes big in one and not the other are of
interest to profile more closely.

However, if nobody else wants to have this feature, we can pull it out
until we can find a way of doing this that makes people happy.

I will submit a new set of patch based on 2.6.8.1 later.

Thanks!
  - jay


Jay Lan wrote:
> Adding csa@oss.sgi.com, the CSA user group mailing list, to Cc.
> 
> Tim Schmielau wrote:
> 
>> On Mon, 30 Aug 2004, Guillaume Thouvenin wrote:
>>
>>
>>>  Thus, to be clear, the enhanced accounting can be divided into
>>> three parts:
>>>
>>>    1) A common data collection method in the kernel.
>>>       We could start from BSD-accounting and add CSA information. Could
>>>       it be something like BSD version4?
>>
>>
>>
>> I've had a quick look at the CSA data collection patches. To get the 
>> discussion started, here are my comments:
>>
>>
>>> --- linux.orig/drivers/block/ll_rw_blk.c        2004-08-13 
>>> 22:36:16.000000000 -0700
>>> +++ linux/drivers/block/ll_rw_blk.c     2004-08-18 12:07:10.000000000 
>>> -0700
>>> @@ -1948,10 +1950,12 @@
>>>
>>>        if (rw == READ) {
>>>                disk_stat_add(rq->rq_disk, read_sectors, nr_sectors);
>>> +               current->rblk += nr_sectors;
>>>                if (!new_io)
>>>                        disk_stat_inc(rq->rq_disk, read_merges);
>>>        } else if (rw == WRITE) {
>>>                disk_stat_add(rq->rq_disk, write_sectors, nr_sectors);
>>> +               current->wblk += nr_sectors;
>>>                if (!new_io)
>>>                        disk_stat_inc(rq->rq_disk, write_merges);
>>>        }
>>
>>
>>
>> Andi Kleen's comment on the ELSA patch also applies here - most writes
>> will get accounted to pdflushd. See
>>
>> http://www.lib.uaa.alaska.edu/linux-kernel/archive/2004-Week-31/0047.html
>>
>> for his comment.
> 
> 
> I need more time on this. :)
> 
>>
>>
>>> --- /dev/null   1970-01-01 00:00:00.000000000 +0000
>>> +++ linux/include/linux/csa_internal.h  2004-08-19 15:19:05.000000000 
>>> -0700
>>
>>
>> [...]
>>
>>> +#else  /* CONFIG_CSA || CONFIG_CSA_MODULE */
>>> +
>>> +#define csa_update_integrals()         do { } while (0);
>>> +#define csa_clear_integrals(task)      do { } while (0);
>>> +#endif /* CONFIG_CSA || CONFIG_CSA_MODULE */
>>
>>
>>
>> I suppose the semicolons are unintentional.
> 
> 
> Good catch! I fixed this in our internal tree.
> 
>>
>>
>>> --- linux.orig/include/linux/sched.h    2004-08-19 15:17:52.000000000 
>>> -0700
>>> +++ linux/include/linux/sched.h 2004-08-19 15:19:05.000000000 -0700
>>
>>
>> [...]
>>
>>> @@ -525,6 +527,10 @@
>>>
>>> /* i/o counters(bytes read/written, blocks read/written, #syscalls, 
>>> waittime */
>>>         unsigned long rchar, wchar, rblk, wblk, syscr, syscw, bwtime;
>>> +#if defined(CONFIG_CSA) || defined(CONFIG_CSA_MODULE)
>>> +       unsigned long csa_rss_mem1, csa_vm_mem1;
>>> +       clock_t csa_stimexpd;
>>> +#endif
>>
>>
>>
>> These probably need to be u64, otherwise they might easily overflow 
>> within
>> a view seconds on 32 bit platforms.
> 
> 
> Will fix it.
> 
>>
>>
>>> --- /dev/null   1970-01-01 00:00:00.000000000 +0000
>>> +++ linux/include/linux/acct_eop.h      2004-08-19 18:48:44.000000000 
>>> -0700
>>
>>
>>
>> This should probably be unified with BSD accounting to a general 
>> accounting
>> hook.
> 
> 
> Do you suggest to merge acct_eop.h into acct.h? It sounds good to me!
> 
> Thanks!
>  - jay
> 
>>
>>
>> Tim
>>
>>
>> -------------------------------------------------------
>> This SF.Net email is sponsored by BEA Weblogic Workshop
>> FREE Java Enterprise J2EE developer tools!
>> Get your free copy of BEA WebLogic Workshop 8.1 today.
>> http://ads.osdn.com/?ad_id=5047&alloc_id=10808&op=click
>> _______________________________________________
>> Lse-tech mailing list
>> Lse-tech@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/lse-tech
> 
> 
> 
> 
> -------------------------------------------------------
> This SF.Net email is sponsored by BEA Weblogic Workshop
> FREE Java Enterprise J2EE developer tools!
> Get your free copy of BEA WebLogic Workshop 8.1 today.
> http://ads.osdn.com/?ad_id=5047&alloc_id=10808&op=click
> _______________________________________________
> Lse-tech mailing list
> Lse-tech@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/lse-tech


^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2004-09-08 18:38 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-08-26  0:25 [PATCH] new CSA patchset for 2.6.8 Jay Lan
2004-08-26  5:18 ` Andrew Morton
2004-08-26 17:15   ` Tim Schmielau
2004-08-26 18:26     ` [Lse-tech] " Jay Lan
2004-08-26 19:44       ` Arthur Corliss
2004-08-26 18:43     ` John Hesterberg
2004-08-27  8:26       ` Tim Schmielau
2004-08-26 19:24     ` Arthur Corliss
2004-08-26 20:05       ` Tim Schmielau
2004-08-26 20:46         ` Jay Lan
2004-08-28  1:27           ` Arthur Corliss
2004-08-30 12:26             ` Guillaume Thouvenin
2004-08-31 14:19               ` Tim Schmielau
2004-08-31 23:01                 ` [Lse-tech] " Jay Lan
2004-09-08 18:32                   ` Jay Lan
     [not found]         ` <20040827054218.GA4142@frec.bull.fr>
2004-08-27 19:31           ` John Hesterberg
2004-08-30  8:29             ` Tim Schmielau
2004-08-27 19:55           ` Jay Lan
2004-08-31  9:06             ` Guillaume Thouvenin
2004-08-31 10:29               ` Guillaume Thouvenin
2004-08-31 16:04                 ` Limin Gu
2004-09-01 21:44               ` John Hesterberg
2004-08-28  1:33           ` Arthur Corliss
2004-08-28  1:26         ` Arthur Corliss
2004-08-26 18:38   ` [Lse-tech] " John Hesterberg
     [not found]     ` <412EADBC.60607@bigpond.net.au>
     [not found]       ` <20040826205349.0582d38e.akpm@osdl.org>
2004-08-27 20:24         ` Jay Lan
2004-08-30 19:10         ` John Hesterberg

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox