* [Patch 1/8] Setup
@ 2006-05-02 6:12 Balbir Singh
2006-05-08 21:17 ` Andrew Morton
` (2 more replies)
0 siblings, 3 replies; 18+ messages in thread
From: Balbir Singh @ 2006-05-02 6:12 UTC (permalink / raw)
To: linux-kernel; +Cc: lse-tech, jlan
Changelog
Fixes comments by akpm
- unnecessary initialization of delayacct_on
- use kmem_cache_zalloc
- redundant check in __delayacct_tsk_exit
Other changes
- do not mix tabs and spaces
delayacct-setup.patch
Initialization code related to collection of per-task "delay"
statistics which measure how long it had to wait for cpu,
sync block io, swapping etc. The collection of statistics and
the interface are in other patches. This patch sets up the data
structures and allows the statistics collection to be disabled
through a kernel boot paramater.
Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com>
Signed-off-by: Balbir Singh <balbir@in.ibm.com>
---
Documentation/kernel-parameters.txt | 2
include/linux/delayacct.h | 69 ++++++++++++++++++++++++++++
include/linux/sched.h | 20 ++++++++
include/linux/time.h | 10 ++++
init/Kconfig | 10 ++++
init/main.c | 2
kernel/Makefile | 1
kernel/delayacct.c | 87 ++++++++++++++++++++++++++++++++++++
kernel/exit.c | 2
kernel/fork.c | 2
10 files changed, 205 insertions(+)
diff -puN Documentation/kernel-parameters.txt~delayacct-setup Documentation/kernel-parameters.txt
--- linux-2.6.17-rc3/Documentation/kernel-parameters.txt~delayacct-setup 2006-04-28 23:47:55.000000000 +0530
+++ linux-2.6.17-rc3-balbir/Documentation/kernel-parameters.txt 2006-04-28 23:47:55.000000000 +0530
@@ -430,6 +430,8 @@ running once the system is up.
Format: <area>[,<node>]
See also Documentation/networking/decnet.txt.
+ delayacct [KNL] Enable per-task delay accounting
+
devfs= [DEVFS]
See Documentation/filesystems/devfs/boot-options.
diff -puN /dev/null include/linux/delayacct.h
--- /dev/null 2004-06-24 23:34:38.000000000 +0530
+++ linux-2.6.17-rc3-balbir/include/linux/delayacct.h 2006-05-02 09:44:48.000000000 +0530
@@ -0,0 +1,69 @@
+/* delayacct.h - per-task delay accounting
+ *
+ * Copyright (C) Shailabh Nagar, IBM Corp. 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_DELAYACCT_H
+#define _LINUX_DELAYACCT_H
+
+#include <linux/sched.h>
+
+#ifdef CONFIG_TASK_DELAY_ACCT
+
+extern int delayacct_on; /* Delay accounting turned on/off */
+extern kmem_cache_t *delayacct_cache;
+extern void delayacct_init(void);
+extern void __delayacct_tsk_init(struct task_struct *);
+extern void __delayacct_tsk_exit(struct task_struct *);
+
+static inline void delayacct_set_flag(int flag)
+{
+ if (current->delays)
+ current->delays->flags |= flag;
+}
+
+static inline void delayacct_clear_flag(int flag)
+{
+ if (current->delays)
+ current->delays->flags &= ~flag;
+}
+
+static inline void delayacct_tsk_init(struct task_struct *tsk)
+{
+ /* reinitialize in case parent's non-null pointer was dup'ed*/
+ tsk->delays = NULL;
+ if (unlikely(delayacct_on))
+ __delayacct_tsk_init(tsk);
+}
+
+static inline void delayacct_tsk_exit(struct task_struct *tsk)
+{
+ if (tsk->delays)
+ __delayacct_tsk_exit(tsk);
+}
+
+#else
+static inline void delayacct_set_flag(int flag)
+{}
+static inline void delayacct_clear_flag(int flag)
+{}
+static inline void delayacct_init(void)
+{}
+static inline void delayacct_tsk_init(struct task_struct *tsk)
+{}
+static inline void delayacct_tsk_exit(struct task_struct *tsk)
+{}
+#endif /* CONFIG_TASK_DELAY_ACCT */
+
+#endif
diff -puN include/linux/sched.h~delayacct-setup include/linux/sched.h
--- linux-2.6.17-rc3/include/linux/sched.h~delayacct-setup 2006-04-28 23:47:55.000000000 +0530
+++ linux-2.6.17-rc3-balbir/include/linux/sched.h 2006-05-02 09:44:48.000000000 +0530
@@ -536,6 +536,23 @@ struct sched_info {
extern struct file_operations proc_schedstat_operations;
#endif
+#ifdef CONFIG_TASK_DELAY_ACCT
+struct task_delay_info {
+ spinlock_t lock;
+ unsigned int flags; /* Private per-task flags */
+
+ /* For each stat XXX, add following, aligned appropriately
+ *
+ * struct timespec XXX_start, XXX_end;
+ * u64 XXX_delay;
+ * u32 XXX_count;
+ *
+ * Atomicity of updates to XXX_delay, XXX_count protected by
+ * single lock above (split into XXX_lock if contention is an issue).
+ */
+};
+#endif
+
enum idle_type
{
SCHED_IDLE,
@@ -888,6 +905,9 @@ struct task_struct {
* cache last used pipe for splice
*/
struct pipe_inode_info *splice_pipe;
+#ifdef CONFIG_TASK_DELAY_ACCT
+ struct task_delay_info *delays;
+#endif
};
static inline pid_t process_group(struct task_struct *tsk)
diff -puN include/linux/time.h~delayacct-setup include/linux/time.h
--- linux-2.6.17-rc3/include/linux/time.h~delayacct-setup 2006-04-28 23:47:55.000000000 +0530
+++ linux-2.6.17-rc3-balbir/include/linux/time.h 2006-04-28 23:47:55.000000000 +0530
@@ -68,6 +68,16 @@ extern unsigned long mktime(const unsign
extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec);
/*
+ * sub = end - start, in normalized form
+ */
+static inline void timespec_sub(struct timespec *start, struct timespec *end,
+ struct timespec *sub)
+{
+ set_normalized_timespec(sub, end->tv_sec - start->tv_sec,
+ end->tv_nsec - start->tv_nsec);
+}
+
+/*
* Returns true if the timespec is norm, false if denorm:
*/
#define timespec_valid(ts) \
diff -puN init/Kconfig~delayacct-setup init/Kconfig
--- linux-2.6.17-rc3/init/Kconfig~delayacct-setup 2006-04-28 23:47:55.000000000 +0530
+++ linux-2.6.17-rc3-balbir/init/Kconfig 2006-05-02 09:44:40.000000000 +0530
@@ -150,6 +150,16 @@ config BSD_PROCESS_ACCT_V3
for processing it. A preliminary version of these tools is available
at <http://www.physik3.uni-rostock.de/tim/kernel/utils/acct/>.
+config TASK_DELAY_ACCT
+ bool "Enable per-task delay accounting (EXPERIMENTAL)"
+ help
+ Collect information on time spent by a task waiting for system
+ resources like cpu, synchronous block I/O completion and swapping
+ in pages. Such statistics can help in setting a task's priorities
+ relative to other tasks for cpu, io, rss limits etc.
+
+ Say N if unsure.
+
config SYSCTL
bool "Sysctl support"
---help---
diff -puN init/main.c~delayacct-setup init/main.c
--- linux-2.6.17-rc3/init/main.c~delayacct-setup 2006-04-28 23:47:55.000000000 +0530
+++ linux-2.6.17-rc3-balbir/init/main.c 2006-05-02 09:44:21.000000000 +0530
@@ -47,6 +47,7 @@
#include <linux/rmap.h>
#include <linux/mempolicy.h>
#include <linux/key.h>
+#include <linux/delayacct.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -541,6 +542,7 @@ asmlinkage void __init start_kernel(void
proc_root_init();
#endif
cpuset_init();
+ delayacct_init();
check_bugs();
diff -puN /dev/null kernel/delayacct.c
--- /dev/null 2004-06-24 23:34:38.000000000 +0530
+++ linux-2.6.17-rc3-balbir/kernel/delayacct.c 2006-05-02 09:46:26.000000000 +0530
@@ -0,0 +1,87 @@
+/* delayacct.c - per-task delay accounting
+ *
+ * Copyright (C) Shailabh Nagar, IBM Corp. 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/sysctl.h>
+#include <linux/delayacct.h>
+
+int delayacct_on __read_mostly; /* Delay accounting turned on/off */
+kmem_cache_t *delayacct_cache;
+
+static int __init delayacct_setup_enable(char *str)
+{
+ delayacct_on = 1;
+ return 1;
+}
+__setup("delayacct", delayacct_setup_enable);
+
+void delayacct_init(void)
+{
+ delayacct_cache = kmem_cache_create("delayacct_cache",
+ sizeof(struct task_delay_info),
+ 0,
+ SLAB_PANIC,
+ NULL, NULL);
+ delayacct_tsk_init(&init_task);
+}
+
+void __delayacct_tsk_init(struct task_struct *tsk)
+{
+ tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL);
+ if (tsk->delays)
+ spin_lock_init(&tsk->delays->lock);
+}
+
+void __delayacct_tsk_exit(struct task_struct *tsk)
+{
+ kmem_cache_free(delayacct_cache, tsk->delays);
+ tsk->delays = NULL;
+}
+
+/*
+ * Start accounting for a delay statistic using
+ * its starting timestamp (@start)
+ */
+
+static inline void delayacct_start(struct timespec *start)
+{
+ do_posix_clock_monotonic_gettime(start);
+}
+
+/*
+ * Finish delay accounting for a statistic using
+ * its timestamps (@start, @end), accumalator (@total) and @count
+ */
+
+static inline void delayacct_end(struct timespec *start, struct timespec *end,
+ u64 *total, u32 *count)
+{
+ struct timespec ts = {0, 0};
+ s64 ns;
+
+ do_posix_clock_monotonic_gettime(end);
+ timespec_sub(&ts, start, end);
+ ns = timespec_to_ns(&ts);
+ if (ns < 0)
+ return;
+
+ spin_lock(¤t->delays->lock);
+ *total += ns;
+ (*count)++;
+ spin_unlock(¤t->delays->lock);
+}
+
diff -puN kernel/exit.c~delayacct-setup kernel/exit.c
--- linux-2.6.17-rc3/kernel/exit.c~delayacct-setup 2006-04-28 23:47:55.000000000 +0530
+++ linux-2.6.17-rc3-balbir/kernel/exit.c 2006-05-02 09:44:21.000000000 +0530
@@ -35,6 +35,7 @@
#include <linux/futex.h>
#include <linux/compat.h>
#include <linux/pipe_fs_i.h>
+#include <linux/delayacct.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -910,6 +911,7 @@ fastcall NORET_TYPE void do_exit(long co
if (unlikely(tsk->compat_robust_list))
compat_exit_robust_list(tsk);
#endif
+ delayacct_tsk_exit(tsk);
exit_mm(tsk);
exit_sem(tsk);
diff -puN kernel/fork.c~delayacct-setup kernel/fork.c
--- linux-2.6.17-rc3/kernel/fork.c~delayacct-setup 2006-04-28 23:47:55.000000000 +0530
+++ linux-2.6.17-rc3-balbir/kernel/fork.c 2006-04-28 23:47:55.000000000 +0530
@@ -44,6 +44,7 @@
#include <linux/rmap.h>
#include <linux/acct.h>
#include <linux/cn_proc.h>
+#include <linux/delayacct.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -984,6 +985,7 @@ static task_t *copy_process(unsigned lon
goto bad_fork_cleanup_put_domain;
p->did_exec = 0;
+ delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
copy_flags(clone_flags, p);
p->pid = pid;
retval = -EFAULT;
diff -puN kernel/Makefile~delayacct-setup kernel/Makefile
--- linux-2.6.17-rc3/kernel/Makefile~delayacct-setup 2006-04-28 23:47:55.000000000 +0530
+++ linux-2.6.17-rc3-balbir/kernel/Makefile 2006-05-02 09:44:21.000000000 +0530
@@ -38,6 +38,7 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
obj-$(CONFIG_RELAY) += relay.o
+obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
_
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [Patch 1/8] Setup
2006-05-02 6:12 [Patch 1/8] Setup Balbir Singh
@ 2006-05-08 21:17 ` Andrew Morton
2006-05-09 3:48 ` Balbir Singh
2006-05-10 10:16 ` [PATCH][delayacct] Fix the timespec_sub() interface (was Re: [Patch 1/8] Setup) Balbir Singh
2006-05-08 21:23 ` [Patch 1/8] Setup Andrew Morton
2006-05-09 11:46 ` [Patch 1/8] Setup Thomas Gleixner
2 siblings, 2 replies; 18+ messages in thread
From: Andrew Morton @ 2006-05-08 21:17 UTC (permalink / raw)
To: balbir; +Cc: linux-kernel, lse-tech, jlan, Thomas Gleixner
Balbir Singh <balbir@in.ibm.com> wrote:
>
> /*
> + * sub = end - start, in normalized form
> + */
> +static inline void timespec_sub(struct timespec *start, struct timespec *end,
> + struct timespec *sub)
> +{
> + set_normalized_timespec(sub, end->tv_sec - start->tv_sec,
> + end->tv_nsec - start->tv_nsec);
> +}
The interface might not be right here.
- I think "lhs" and "rhs" would be better names than "start" and "end".
After all, we don't _know_ that the caller is using the two times as a
start and an end. The caller might be taking the difference between two
differences, for example.
- The existing timespec and timeval funtions tend to do return-by-value.
So this would become
static inline struct timespec timespec_sub(struct timespec lhs,
struct timespec rhs)
(and given that it's inlined, the added overhead of passing the
arguments by value will be zero)
- If we don't want to do that then at least let's get the arguments in a
sane order:
static inline void timespec_sub(struct timespec *result,
struct timespec lhs, struct timespec rhs)
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [Patch 1/8] Setup
2006-05-08 21:17 ` Andrew Morton
@ 2006-05-09 3:48 ` Balbir Singh
2006-05-10 10:16 ` [PATCH][delayacct] Fix the timespec_sub() interface (was Re: [Patch 1/8] Setup) Balbir Singh
1 sibling, 0 replies; 18+ messages in thread
From: Balbir Singh @ 2006-05-09 3:48 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel, lse-tech, jlan, Thomas Gleixner
On Mon, May 08, 2006 at 02:17:13PM -0700, Andrew Morton wrote:
> Balbir Singh <balbir@in.ibm.com> wrote:
> >
> > /*
> > + * sub = end - start, in normalized form
> > + */
> > +static inline void timespec_sub(struct timespec *start, struct timespec *end,
> > + struct timespec *sub)
> > +{
> > + set_normalized_timespec(sub, end->tv_sec - start->tv_sec,
> > + end->tv_nsec - start->tv_nsec);
> > +}
>
> The interface might not be right here.
>
> - I think "lhs" and "rhs" would be better names than "start" and "end".
> After all, we don't _know_ that the caller is using the two times as a
> start and an end. The caller might be taking the difference between two
> differences, for example.
>
> - The existing timespec and timeval funtions tend to do return-by-value.
> So this would become
>
> static inline struct timespec timespec_sub(struct timespec lhs,
> struct timespec rhs)
>
> (and given that it's inlined, the added overhead of passing the
> arguments by value will be zero)
Agreed, I will make these changes.
>
> - If we don't want to do that then at least let's get the arguments in a
> sane order:
>
> static inline void timespec_sub(struct timespec *result,
> struct timespec lhs, struct timespec rhs)
>
--
Balbir Singh,
Linux Technology Center,
IBM Software Labs
^ permalink raw reply [flat|nested] 18+ messages in thread* [PATCH][delayacct] Fix the timespec_sub() interface (was Re: [Patch 1/8] Setup)
2006-05-08 21:17 ` Andrew Morton
2006-05-09 3:48 ` Balbir Singh
@ 2006-05-10 10:16 ` Balbir Singh
2006-05-10 10:24 ` Andrew Morton
1 sibling, 1 reply; 18+ messages in thread
From: Balbir Singh @ 2006-05-10 10:16 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel, lse-tech, jlan, Thomas Gleixner
On Mon, May 08, 2006 at 02:17:13PM -0700, Andrew Morton wrote:
> Balbir Singh <balbir@in.ibm.com> wrote:
> >
> > /*
> > + * sub = end - start, in normalized form
> > + */
> > +static inline void timespec_sub(struct timespec *start, struct timespec *end,
> > + struct timespec *sub)
> > +{
> > + set_normalized_timespec(sub, end->tv_sec - start->tv_sec,
> > + end->tv_nsec - start->tv_nsec);
> > +}
>
> The interface might not be right here.
>
> - I think "lhs" and "rhs" would be better names than "start" and "end".
> After all, we don't _know_ that the caller is using the two times as a
> start and an end. The caller might be taking the difference between two
> differences, for example.
>
> - The existing timespec and timeval funtions tend to do return-by-value.
> So this would become
>
> static inline struct timespec timespec_sub(struct timespec lhs,
> struct timespec rhs)
>
> (and given that it's inlined, the added overhead of passing the
> arguments by value will be zero)
>
> - If we don't want to do that then at least let's get the arguments in a
> sane order:
>
> static inline void timespec_sub(struct timespec *result,
> struct timespec lhs, struct timespec rhs)
>
Hi, Andrew,
Please find the updated patch, which changes the interface of timespec_sub()
as suggested in the review comments
Changelog
1. Change the interface of timespec_sub() to return by value and rename
the arguments. Use lhs,rhs instead of end,start
Changes under consideration
1. Migrate to the ktime interface (Thomas Gleixner)
Balbir Singh,
Linux Technology Center,
IBM Software Labs
Signed-off-by: Balbir Singh <balbir@in.ibm.com>
---
include/linux/time.h | 12 +++++++-----
kernel/delayacct.c | 2 +-
2 files changed, 8 insertions(+), 6 deletions(-)
diff -puN include/linux/time.h~timespec-sub-return-by-value include/linux/time.h
--- linux-2.6.17-rc3/include/linux/time.h~timespec-sub-return-by-value 2006-05-10 12:03:11.000000000 +0530
+++ linux-2.6.17-rc3-balbir/include/linux/time.h 2006-05-10 12:26:44.000000000 +0530
@@ -68,13 +68,15 @@ extern unsigned long mktime(const unsign
extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec);
/*
- * sub = end - start, in normalized form
+ * sub = lhs - rhs, in normalized form
*/
-static inline void timespec_sub(struct timespec *start, struct timespec *end,
- struct timespec *sub)
+static inline struct timespec timespec_sub(struct timespec *lhs,
+ struct timespec *rhs)
{
- set_normalized_timespec(sub, end->tv_sec - start->tv_sec,
- end->tv_nsec - start->tv_nsec);
+ struct timespec ts_delta;
+ set_normalized_timespec(&ts_delta, lhs->tv_sec - rhs->tv_sec,
+ lhs->tv_nsec - rhs->tv_nsec);
+ return ts_delta;
}
/*
diff -puN kernel/delayacct.c~timespec-sub-return-by-value kernel/delayacct.c
--- linux-2.6.17-rc3/kernel/delayacct.c~timespec-sub-return-by-value 2006-05-10 12:03:38.000000000 +0530
+++ linux-2.6.17-rc3-balbir/kernel/delayacct.c 2006-05-10 14:29:28.000000000 +0530
@@ -74,7 +74,7 @@ static inline void delayacct_end(struct
s64 ns;
do_posix_clock_monotonic_gettime(end);
- timespec_sub(&ts, start, end);
+ ts = timespec_sub(end, start);
ns = timespec_to_ns(&ts);
if (ns < 0)
return;
_
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [PATCH][delayacct] Fix the timespec_sub() interface (was Re: [Patch 1/8] Setup)
2006-05-10 10:16 ` [PATCH][delayacct] Fix the timespec_sub() interface (was Re: [Patch 1/8] Setup) Balbir Singh
@ 2006-05-10 10:24 ` Andrew Morton
2006-05-10 10:27 ` Balbir Singh
0 siblings, 1 reply; 18+ messages in thread
From: Andrew Morton @ 2006-05-10 10:24 UTC (permalink / raw)
To: balbir; +Cc: linux-kernel, lse-tech, jlan, tglx
Balbir Singh <balbir@in.ibm.com> wrote:
>
> Please find the updated patch, which changes the interface of timespec_sub()
> as suggested in the review comments
>
> ...
>
> /*
> - * sub = end - start, in normalized form
> + * sub = lhs - rhs, in normalized form
> */
> -static inline void timespec_sub(struct timespec *start, struct timespec *end,
> - struct timespec *sub)
> +static inline struct timespec timespec_sub(struct timespec *lhs,
> + struct timespec *rhs)
> {
I'd have thought that it would be more consistent and a saner interface to
use pass-by-value:
static inline struct timespec timespec_sub(struct timespec lhs,
struct timespec rhs)
It should generate the same code.
I mentioned this last time - did you choose to not do this for some reason,
or did it just slip past?
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [PATCH][delayacct] Fix the timespec_sub() interface (was Re: [Patch 1/8] Setup)
2006-05-10 10:24 ` Andrew Morton
@ 2006-05-10 10:27 ` Balbir Singh
2006-05-10 10:58 ` Balbir Singh
0 siblings, 1 reply; 18+ messages in thread
From: Balbir Singh @ 2006-05-10 10:27 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel, lse-tech, jlan, tglx
On Wed, May 10, 2006 at 03:24:49AM -0700, Andrew Morton wrote:
> Balbir Singh <balbir@in.ibm.com> wrote:
> >
> > Please find the updated patch, which changes the interface of timespec_sub()
> > as suggested in the review comments
> >
> > ...
> >
> > /*
> > - * sub = end - start, in normalized form
> > + * sub = lhs - rhs, in normalized form
> > */
> > -static inline void timespec_sub(struct timespec *start, struct timespec *end,
> > - struct timespec *sub)
> > +static inline struct timespec timespec_sub(struct timespec *lhs,
> > + struct timespec *rhs)
> > {
>
> I'd have thought that it would be more consistent and a saner interface to
> use pass-by-value:
>
> static inline struct timespec timespec_sub(struct timespec lhs,
> struct timespec rhs)
>
> It should generate the same code.
>
> I mentioned this last time - did you choose to not do this for some reason,
> or did it just slip past?
Sorry, that definitely slip past.
I'll send another update
Balbir Singh,
Linux Technology Center,
IBM Software Labs
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [PATCH][delayacct] Fix the timespec_sub() interface (was Re: [Patch 1/8] Setup)
2006-05-10 10:27 ` Balbir Singh
@ 2006-05-10 10:58 ` Balbir Singh
0 siblings, 0 replies; 18+ messages in thread
From: Balbir Singh @ 2006-05-10 10:58 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel, lse-tech, jlan, tglx
On Wed, May 10, 2006 at 03:57:16PM +0530, Balbir Singh wrote:
> On Wed, May 10, 2006 at 03:24:49AM -0700, Andrew Morton wrote:
> > Balbir Singh <balbir@in.ibm.com> wrote:
> > >
> > > Please find the updated patch, which changes the interface of timespec_sub()
> > > as suggested in the review comments
> > >
> > > ...
> > >
> > > /*
> > > - * sub = end - start, in normalized form
> > > + * sub = lhs - rhs, in normalized form
> > > */
> > > -static inline void timespec_sub(struct timespec *start, struct timespec *end,
> > > - struct timespec *sub)
> > > +static inline struct timespec timespec_sub(struct timespec *lhs,
> > > + struct timespec *rhs)
> > > {
> >
> > I'd have thought that it would be more consistent and a saner interface to
> > use pass-by-value:
> >
> > static inline struct timespec timespec_sub(struct timespec lhs,
> > struct timespec rhs)
> >
> > It should generate the same code.
> >
> > I mentioned this last time - did you choose to not do this for some reason,
> > or did it just slip past?
>
> Sorry, that definitely slip past.
>
> I'll send another update
>
Hi, Andrew,
Here is another attempt to do fix the interface.
Thanks for the review,
Balbir Singh,
Linux Technology Center,
IBM Software Labs
Changelog
1. Change the interface of timespec_sub() to return by value and rename
the arguments. Use lhs,rhs instead of end,start.
2. Pass the arguments by value
Signed-off-by: Balbir Singh <balbir@in.ibm.com>
---
include/linux/time.h | 12 +++++++-----
kernel/delayacct.c | 2 +-
2 files changed, 8 insertions(+), 6 deletions(-)
diff -puN include/linux/time.h~timespec-sub-return-by-value include/linux/time.h
--- linux-2.6.17-rc3/include/linux/time.h~timespec-sub-return-by-value 2006-05-10 15:58:19.000000000 +0530
+++ linux-2.6.17-rc3-balbir/include/linux/time.h 2006-05-10 15:59:48.000000000 +0530
@@ -68,13 +68,15 @@ extern unsigned long mktime(const unsign
extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec);
/*
- * sub = end - start, in normalized form
+ * sub = lhs - rhs, in normalized form
*/
-static inline void timespec_sub(struct timespec *start, struct timespec *end,
- struct timespec *sub)
+static inline struct timespec timespec_sub(struct timespec lhs,
+ struct timespec rhs)
{
- set_normalized_timespec(sub, end->tv_sec - start->tv_sec,
- end->tv_nsec - start->tv_nsec);
+ struct timespec ts_delta;
+ set_normalized_timespec(&ts_delta, lhs.tv_sec - rhs.tv_sec,
+ lhs.tv_nsec - rhs.tv_nsec);
+ return ts_delta;
}
/*
diff -puN kernel/delayacct.c~timespec-sub-return-by-value kernel/delayacct.c
--- linux-2.6.17-rc3/kernel/delayacct.c~timespec-sub-return-by-value 2006-05-10 15:58:19.000000000 +0530
+++ linux-2.6.17-rc3-balbir/kernel/delayacct.c 2006-05-10 16:00:30.000000000 +0530
@@ -74,7 +74,7 @@ static inline void delayacct_end(struct
s64 ns;
do_posix_clock_monotonic_gettime(end);
- timespec_sub(&ts, start, end);
+ ts = timespec_sub(*end, *start);
ns = timespec_to_ns(&ts);
if (ns < 0)
return;
_
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [Patch 1/8] Setup
2006-05-02 6:12 [Patch 1/8] Setup Balbir Singh
2006-05-08 21:17 ` Andrew Morton
@ 2006-05-08 21:23 ` Andrew Morton
2006-05-09 3:56 ` Balbir Singh
2006-05-10 10:18 ` [PATCH][delayacct] un-inline delayacct_end(), remove initialization of ts (was Re: [Patch 1/8] Setup) Balbir Singh
2006-05-09 11:46 ` [Patch 1/8] Setup Thomas Gleixner
2 siblings, 2 replies; 18+ messages in thread
From: Andrew Morton @ 2006-05-08 21:23 UTC (permalink / raw)
To: balbir; +Cc: linux-kernel, lse-tech, jlan
Balbir Singh <balbir@in.ibm.com> wrote:
>
> +static inline void delayacct_end(struct timespec *start, struct timespec *end,
> + u64 *total, u32 *count)
> +{
> + struct timespec ts = {0, 0};
> + s64 ns;
> +
> + do_posix_clock_monotonic_gettime(end);
> + timespec_sub(&ts, start, end);
> + ns = timespec_to_ns(&ts);
> + if (ns < 0)
> + return;
> +
> + spin_lock(¤t->delays->lock);
> + *total += ns;
> + (*count)++;
> + spin_unlock(¤t->delays->lock);
> +}
- too large to be inlined
- The initialisation of `ts' is unneeded (maybe it generated a bogus
warning, but it won't do that if you switch timespec_sub to
return-by-value)
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [Patch 1/8] Setup
2006-05-08 21:23 ` [Patch 1/8] Setup Andrew Morton
@ 2006-05-09 3:56 ` Balbir Singh
2006-05-10 10:18 ` [PATCH][delayacct] un-inline delayacct_end(), remove initialization of ts (was Re: [Patch 1/8] Setup) Balbir Singh
1 sibling, 0 replies; 18+ messages in thread
From: Balbir Singh @ 2006-05-09 3:56 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel, lse-tech, jlan
On Mon, May 08, 2006 at 02:23:22PM -0700, Andrew Morton wrote:
> Balbir Singh <balbir@in.ibm.com> wrote:
> >
> > +static inline void delayacct_end(struct timespec *start, struct timespec *end,
> > + u64 *total, u32 *count)
> > +{
> > + struct timespec ts = {0, 0};
> > + s64 ns;
> > +
> > + do_posix_clock_monotonic_gettime(end);
> > + timespec_sub(&ts, start, end);
> > + ns = timespec_to_ns(&ts);
> > + if (ns < 0)
> > + return;
> > +
> > + spin_lock(¤t->delays->lock);
> > + *total += ns;
> > + (*count)++;
> > + spin_unlock(¤t->delays->lock);
> > +}
>
> - too large to be inlined
I will un-inline it.
>
> - The initialisation of `ts' is unneeded (maybe it generated a bogus
> warning, but it won't do that if you switch timespec_sub to
> return-by-value)
gcc-4.1 does generate a bogus warning. I will switch to return by value
and remove the initialization of `ts'
Thanks,
Balbir Singh,
Linux Technology Center,
IBM Software Labs
^ permalink raw reply [flat|nested] 18+ messages in thread* [PATCH][delayacct] un-inline delayacct_end(), remove initialization of ts (was Re: [Patch 1/8] Setup)
2006-05-08 21:23 ` [Patch 1/8] Setup Andrew Morton
2006-05-09 3:56 ` Balbir Singh
@ 2006-05-10 10:18 ` Balbir Singh
1 sibling, 0 replies; 18+ messages in thread
From: Balbir Singh @ 2006-05-10 10:18 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel, lse-tech, jlan
On Mon, May 08, 2006 at 02:23:22PM -0700, Andrew Morton wrote:
> Balbir Singh <balbir@in.ibm.com> wrote:
> >
> > +static inline void delayacct_end(struct timespec *start, struct timespec *end,
> > + u64 *total, u32 *count)
> > +{
> > + struct timespec ts = {0, 0};
> > + s64 ns;
> > +
> > + do_posix_clock_monotonic_gettime(end);
> > + timespec_sub(&ts, start, end);
> > + ns = timespec_to_ns(&ts);
> > + if (ns < 0)
> > + return;
> > +
> > + spin_lock(¤t->delays->lock);
> > + *total += ns;
> > + (*count)++;
> > + spin_unlock(¤t->delays->lock);
> > +}
>
> - too large to be inlined
>
> - The initialisation of `ts' is unneeded (maybe it generated a bogus
> warning, but it won't do that if you switch timespec_sub to
> return-by-value)
Hi, Andrew,
Here is an update to un-inline delayacct_end() and remove the initialization
of ts to 0.
Balbir Singh,
Linux Technology Center,
IBM Software Labs
Changelog
1. Remove inlining of delayacct_end(), the function is too big to be inlined
2. Remove initialization of ts.
Signed-off-by: Balbir Singh <balbir@in.ibm.com>
---
kernel/delayacct.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff -puN kernel/delayacct.c~remove-initialization-of-ts-and-inline kernel/delayacct.c
--- linux-2.6.17-rc3/kernel/delayacct.c~remove-initialization-of-ts-and-inline 2006-05-10 14:11:21.000000000 +0530
+++ linux-2.6.17-rc3-balbir/kernel/delayacct.c 2006-05-10 14:11:57.000000000 +0530
@@ -67,10 +67,10 @@ static inline void delayacct_start(struc
* its timestamps (@start, @end), accumalator (@total) and @count
*/
-static inline void delayacct_end(struct timespec *start, struct timespec *end,
+static void delayacct_end(struct timespec *start, struct timespec *end,
u64 *total, u32 *count)
{
- struct timespec ts = {0, 0};
+ struct timespec ts;
s64 ns;
do_posix_clock_monotonic_gettime(end);
_
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [Patch 1/8] Setup
2006-05-02 6:12 [Patch 1/8] Setup Balbir Singh
2006-05-08 21:17 ` Andrew Morton
2006-05-08 21:23 ` [Patch 1/8] Setup Andrew Morton
@ 2006-05-09 11:46 ` Thomas Gleixner
2006-05-09 13:20 ` [Lse-tech] " Balbir Singh
2 siblings, 1 reply; 18+ messages in thread
From: Thomas Gleixner @ 2006-05-09 11:46 UTC (permalink / raw)
To: balbir; +Cc: linux-kernel, lse-tech, jlan, Andrew Morton
On Tue, 2006-05-02 at 11:42 +0530, Balbir Singh wrote:
> /*
> + * sub = end - start, in normalized form
> + */
> +static inline void timespec_sub(struct timespec *start, struct timespec *end,
> + struct timespec *sub)
> +{
> + set_normalized_timespec(sub, end->tv_sec - start->tv_sec,
> + end->tv_nsec - start->tv_nsec);
> +}
Please use the existing ktime_t functions for that purpose. The ktime_t
format has nanosecond resolution and is optimized for 32/64bit machines.
> +static inline void delayacct_start(struct timespec *start)
> +{
> + do_posix_clock_monotonic_gettime(start);
> +}
Please get rid of this wrapper and use the ktime based functions for
that.
> +/*
> + * Finish delay accounting for a statistic using
> + * its timestamps (@start, @end), accumalator (@total) and @count
> + */
> +
> +static inline void delayacct_end(struct timespec *start, struct timespec *end,
> + u64 *total, u32 *count)
Please use ktime_t for total.
> +{
> + struct timespec ts = {0, 0};
> + s64 ns;
> +
> + do_posix_clock_monotonic_gettime(end);
> + timespec_sub(&ts, start, end);
> + ns = timespec_to_ns(&ts);
> + if (ns < 0)
> + return;
monotonic time is monotonic increasing. So delta is always >= 0 !
tglx
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [Lse-tech] Re: [Patch 1/8] Setup
2006-05-09 11:46 ` [Patch 1/8] Setup Thomas Gleixner
@ 2006-05-09 13:20 ` Balbir Singh
0 siblings, 0 replies; 18+ messages in thread
From: Balbir Singh @ 2006-05-09 13:20 UTC (permalink / raw)
To: Thomas Gleixner; +Cc: linux-kernel, lse-tech, jlan, Andrew Morton
On Tue, May 09, 2006 at 11:46:46AM +0000, Thomas Gleixner wrote:
> On Tue, 2006-05-02 at 11:42 +0530, Balbir Singh wrote:
> > /*
> > + * sub = end - start, in normalized form
> > + */
> > +static inline void timespec_sub(struct timespec *start, struct timespec *end,
> > + struct timespec *sub)
> > +{
> > + set_normalized_timespec(sub, end->tv_sec - start->tv_sec,
> > + end->tv_nsec - start->tv_nsec);
> > +}
>
> Please use the existing ktime_t functions for that purpose. The ktime_t
> format has nanosecond resolution and is optimized for 32/64bit machines.
>
> > +static inline void delayacct_start(struct timespec *start)
> > +{
> > + do_posix_clock_monotonic_gettime(start);
> > +}
>
> Please get rid of this wrapper and use the ktime based functions for
> that.
>
> > +/*
> > + * Finish delay accounting for a statistic using
> > + * its timestamps (@start, @end), accumalator (@total) and @count
> > + */
> > +
> > +static inline void delayacct_end(struct timespec *start, struct timespec *end,
> > + u64 *total, u32 *count)
>
> Please use ktime_t for total.
>
> > +{
> > + struct timespec ts = {0, 0};
> > + s64 ns;
> > +
> > + do_posix_clock_monotonic_gettime(end);
> > + timespec_sub(&ts, start, end);
> > + ns = timespec_to_ns(&ts);
> > + if (ns < 0)
> > + return;
>
> monotonic time is monotonic increasing. So delta is always >= 0 !
>
> tglx
>
>
>
>
I am going through the ktime interface and it seems interesting.
I will look into it and see if we can migrate the interface
to use ktime
Balbir Singh,
Linux Technology Center,
IBM Software Labs
^ permalink raw reply [flat|nested] 18+ messages in thread
* [Patch 0/8] per-task delay accounting
@ 2006-04-22 2:16 Shailabh Nagar
2006-04-22 2:23 ` [Patch 1/8] Setup Shailabh Nagar
0 siblings, 1 reply; 18+ messages in thread
From: Shailabh Nagar @ 2006-04-22 2:16 UTC (permalink / raw)
To: linux-kernel
Cc: LSE, Jes Sorensen, Peter Chubb, Erich Focht, Levent Serinol,
Jay Lan
Here are the delay accounting patches again. I'm not using the
earlier email thread due to code being refactored a bit.
The previous posting
http://www.uwsg.indiana.edu/hypermail/linux/kernel/0603.3/1776.html
of these patches elicited several review comments from Andrew Morton
all of which have been addressed.
The other main thread of the comments was whether other accounting
stakeholders would be ok with this interface. Towards this end,
I'd posted an overview of what the other packages do (which didn't seem
to make the archives) and some of the stakeholders responded.
I'll repost the analysis as a reply to this post. Meanwhile, here's
the list of the stakeholders identified by Andrew and a summary of status
of their comments.
1. CSA accounting/PAGG/JOB: Jay Lan <jlan@engr.sgi.com>
Raised several points
http://www.uwsg.indiana.edu/hypermail/linux/kernel/0604.1/0397.html
all of which have been addressed in this set of patches.
2. per-process IO statistics: Levent Serinol <lserinol@gmail.com>
No reponse.
I'd ascertained that its needs are a subset of CSA.
3. per-cpu time statistics: Erich Focht <efocht@ess.nec.de>
No response.
I'd ascertained that its needs can be met by taskstats
interface whenever these statistics are submitted for inclusion.
4. Microstate accounting: Peter Chubb <peterc@gelato.unsw.edu.au>
Mentioned overlap of patches with delay accounting
http://www.uwsg.indiana.edu/hypermail/linux/kernel/0603.3/2286.html
and also that a /proc interface was preferable due to convenience.
My position is that the netlink interface is a superset of /proc due to
former's ability to supply exit-time data.
5. ELSA: Guillaume Thouvenin <guillaume.thouvenin@bull.net>
Confirmed that ELSA is not a direct user of a new kernel statistics
interface since it is a consumer of CSA or BSD accounting's statistics.
6. pnotify: Jes Sorensen <jes@sgi.com>
(taken over pnotify from Erik Jacobson)
Informed over private email that pnotify replacement is
being worked on.
I'd ascertained that pnotify (or its replacemenent) will not be
concerned with exporting data to userspace or collecting any stats.
Thats left to the kernel module that uses pnotify to get
notifications. CSA is one expected user of pnotify.
Hence CSA's concerns are the only ones relevant to pnotify as well.
7. Scalable statistics counters with /proc reporting:
Ravikiran G Thirumalai, Dipankar Sarma <dipankar@in.ibm.com>
Confirmed these counters aren't relevant to this discussion.
--Shailabh
Series
delayacct-setup.patch
delayacct-blkio-swapin.patch
delayacct-schedstats.patch
genetlink-utils.patch
taskstats-setup.patch
delayacct-taskstats.patch
delayacct-doc.patch
delayacct-procfs.patch
^ permalink raw reply [flat|nested] 18+ messages in thread* [Patch 1/8] Setup
2006-04-22 2:16 [Patch 0/8] per-task delay accounting Shailabh Nagar
@ 2006-04-22 2:23 ` Shailabh Nagar
2006-04-24 2:02 ` Randy.Dunlap
0 siblings, 1 reply; 18+ messages in thread
From: Shailabh Nagar @ 2006-04-22 2:23 UTC (permalink / raw)
To: linux-kernel; +Cc: LSE, Jay Lan
Changelog
Fixes comments by akpm
- unnecessary initialization of delayacct_on
- use kmem_cache_zalloc
- redundant check in __delayacct_tsk_exit
delayacct-setup.patch
Initialization code related to collection of per-task "delay"
statistics which measure how long it had to wait for cpu,
sync block io, swapping etc. The collection of statistics and
the interface are in other patches. This patch sets up the data
structures and allows the statistics collection to be disabled
through a kernel boot paramater.
Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com>
Documentation/kernel-parameters.txt | 2
include/linux/delayacct.h | 69 ++++++++++++++++++++++++++++
include/linux/sched.h | 21 ++++++++
include/linux/time.h | 10 ++++
init/Kconfig | 13 +++++
init/main.c | 2
kernel/Makefile | 1
kernel/delayacct.c | 87 ++++++++++++++++++++++++++++++++++++
kernel/exit.c | 3 +
kernel/fork.c | 2
10 files changed, 210 insertions(+)
Index: linux-2.6.17-rc1/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.17-rc1.orig/Documentation/kernel-parameters.txt 2006-04-13 10:55:54.000000000 -0400
+++ linux-2.6.17-rc1/Documentation/kernel-parameters.txt 2006-04-14 14:59:21.000000000 -0400
@@ -430,6 +430,8 @@ running once the system is up.
Format: <area>[,<node>]
See also Documentation/networking/decnet.txt.
+ delayacct [KNL] Enable per-task delay accounting
+
devfs= [DEVFS]
See Documentation/filesystems/devfs/boot-options.
Index: linux-2.6.17-rc1/kernel/Makefile
===================================================================
--- linux-2.6.17-rc1.orig/kernel/Makefile 2006-04-13 10:55:54.000000000 -0400
+++ linux-2.6.17-rc1/kernel/Makefile 2006-04-21 19:39:28.000000000 -0400
@@ -38,6 +38,7 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
obj-$(CONFIG_RELAY) += relay.o
+obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
Index: linux-2.6.17-rc1/include/linux/delayacct.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.17-rc1/include/linux/delayacct.h 2006-04-21 19:39:29.000000000 -0400
@@ -0,0 +1,69 @@
+/* delayacct.h - per-task delay accounting
+ *
+ * Copyright (C) Shailabh Nagar, IBM Corp. 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_TASKDELAYS_H
+#define _LINUX_TASKDELAYS_H
+
+#include <linux/sched.h>
+
+#ifdef CONFIG_TASK_DELAY_ACCT
+
+extern int delayacct_on; /* Delay accounting turned on/off */
+extern kmem_cache_t *delayacct_cache;
+extern void delayacct_init(void);
+extern void __delayacct_tsk_init(struct task_struct *);
+extern void __delayacct_tsk_exit(struct task_struct *);
+
+static inline void delayacct_set_flag(int flag)
+{
+ if (current->delays)
+ current->delays->flags |= flag;
+}
+
+static inline void delayacct_clear_flag(int flag)
+{
+ if (current->delays)
+ current->delays->flags &= ~flag;
+}
+
+static inline void delayacct_tsk_init(struct task_struct *tsk)
+{
+ /* reinitialize in case parent's non-null pointer was dup'ed*/
+ tsk->delays = NULL;
+ if (unlikely(delayacct_on))
+ __delayacct_tsk_init(tsk);
+}
+
+static inline void delayacct_tsk_exit(struct task_struct *tsk)
+{
+ if (tsk->delays)
+ __delayacct_tsk_exit(tsk);
+}
+
+#else
+static inline void delayacct_set_flag(int flag)
+{}
+static inline void delayacct_clear_flag(int flag)
+{}
+static inline void delayacct_init(void)
+{}
+static inline void delayacct_tsk_init(struct task_struct *tsk)
+{}
+static inline void delayacct_tsk_exit(struct task_struct *tsk)
+{}
+#endif /* CONFIG_TASK_DELAY_ACCT */
+
+#endif
Index: linux-2.6.17-rc1/include/linux/sched.h
===================================================================
--- linux-2.6.17-rc1.orig/include/linux/sched.h 2006-04-13 10:55:54.000000000 -0400
+++ linux-2.6.17-rc1/include/linux/sched.h 2006-04-21 19:39:29.000000000 -0400
@@ -536,6 +536,24 @@ struct sched_info {
extern struct file_operations proc_schedstat_operations;
#endif
+#ifdef CONFIG_TASK_DELAY_ACCT
+struct task_delay_info {
+ spinlock_t lock;
+ unsigned int flags; /* Private per-task flags */
+
+ /* For each stat XXX, add following, aligned appropriately
+ *
+ * struct timespec XXX_start, XXX_end;
+ * u64 XXX_delay;
+ * u32 XXX_count;
+ *
+ * Atomicity of updates to XXX_delay, XXX_count protected by
+ * single lock above (split into XXX_lock if contention is an issue).
+ */
+};
+#endif
+
+
enum idle_type
{
SCHED_IDLE,
@@ -882,6 +900,9 @@ struct task_struct {
atomic_t fs_excl; /* holding fs exclusive resources */
struct rcu_head rcu;
+#ifdef CONFIG_TASK_DELAY_ACCT
+ struct task_delay_info *delays;
+#endif
};
static inline pid_t process_group(struct task_struct *tsk)
Index: linux-2.6.17-rc1/init/Kconfig
===================================================================
--- linux-2.6.17-rc1.orig/init/Kconfig 2006-04-13 10:55:54.000000000 -0400
+++ linux-2.6.17-rc1/init/Kconfig 2006-04-21 19:39:28.000000000 -0400
@@ -150,6 +150,19 @@ config BSD_PROCESS_ACCT_V3
for processing it. A preliminary version of these tools is available
at <http://www.physik3.uni-rostock.de/tim/kernel/utils/acct/>.
+config TASK_DELAY_ACCT
+ bool "Enable per-task delay accounting (EXPERIMENTAL)"
+ help
+ Collect information on time spent by a task waiting for system
+ resources like cpu, synchronous block I/O completion and swapping
+ in pages. Such statistics can help in setting a task's priorities
+ relative to other tasks for cpu, io, rss limits etc.
+
+ Unlike BSD process accounting, this information is available
+ continuously during the lifetime of a task.
+
+ Say N if unsure.
+
config SYSCTL
bool "Sysctl support"
---help---
Index: linux-2.6.17-rc1/init/main.c
===================================================================
--- linux-2.6.17-rc1.orig/init/main.c 2006-04-13 10:55:54.000000000 -0400
+++ linux-2.6.17-rc1/init/main.c 2006-04-21 19:39:28.000000000 -0400
@@ -47,6 +47,7 @@
#include <linux/rmap.h>
#include <linux/mempolicy.h>
#include <linux/key.h>
+#include <linux/delayacct.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -541,6 +542,7 @@ asmlinkage void __init start_kernel(void
proc_root_init();
#endif
cpuset_init();
+ delayacct_init();
check_bugs();
Index: linux-2.6.17-rc1/kernel/delayacct.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.17-rc1/kernel/delayacct.c 2006-04-21 19:39:29.000000000 -0400
@@ -0,0 +1,87 @@
+/* delayacct.c - per-task delay accounting
+ *
+ * Copyright (C) Shailabh Nagar, IBM Corp. 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/sysctl.h>
+#include <linux/delayacct.h>
+
+int delayacct_on __read_mostly; /* Delay accounting turned on/off */
+kmem_cache_t *delayacct_cache;
+
+static int __init delayacct_setup_enable(char *str)
+{
+ delayacct_on = 1;
+ return 1;
+}
+__setup("delayacct", delayacct_setup_enable);
+
+void delayacct_init(void)
+{
+ delayacct_cache = kmem_cache_create("delayacct_cache",
+ sizeof(struct task_delay_info),
+ 0,
+ SLAB_PANIC,
+ NULL, NULL);
+ delayacct_tsk_init(&init_task);
+}
+
+void __delayacct_tsk_init(struct task_struct *tsk)
+{
+ tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL);
+ if (tsk->delays)
+ spin_lock_init(&tsk->delays->lock);
+}
+
+void __delayacct_tsk_exit(struct task_struct *tsk)
+{
+ kmem_cache_free(delayacct_cache, tsk->delays);
+ tsk->delays = NULL;
+}
+
+/*
+ * Start accounting for a delay statistic using
+ * its starting timestamp (@start)
+ */
+
+static inline void delayacct_start(struct timespec *start)
+{
+ do_posix_clock_monotonic_gettime(start);
+}
+
+/*
+ * Finish delay accounting for a statistic using
+ * its timestamps (@start, @end), accumalator (@total) and @count
+ */
+
+static inline void delayacct_end(struct timespec *start, struct timespec *end,
+ u64 *total, u32 *count)
+{
+ struct timespec ts;
+ s64 ns;
+
+ do_posix_clock_monotonic_gettime(end);
+ timespec_sub(&ts, start, end);
+ ns = timespec_to_ns(&ts);
+ if (ns < 0)
+ return;
+
+ spin_lock(¤t->delays->lock);
+ *total += ns;
+ (*count)++;
+ spin_unlock(¤t->delays->lock);
+}
+
Index: linux-2.6.17-rc1/kernel/fork.c
===================================================================
--- linux-2.6.17-rc1.orig/kernel/fork.c 2006-04-13 10:55:54.000000000 -0400
+++ linux-2.6.17-rc1/kernel/fork.c 2006-04-14 14:59:21.000000000 -0400
@@ -44,6 +44,7 @@
#include <linux/rmap.h>
#include <linux/acct.h>
#include <linux/cn_proc.h>
+#include <linux/delayacct.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -989,6 +990,7 @@ static task_t *copy_process(unsigned lon
goto bad_fork_cleanup_put_domain;
p->did_exec = 0;
+ delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
copy_flags(clone_flags, p);
p->pid = pid;
retval = -EFAULT;
Index: linux-2.6.17-rc1/kernel/exit.c
===================================================================
--- linux-2.6.17-rc1.orig/kernel/exit.c 2006-04-13 10:55:54.000000000 -0400
+++ linux-2.6.17-rc1/kernel/exit.c 2006-04-21 19:39:28.000000000 -0400
@@ -34,6 +34,7 @@
#include <linux/mutex.h>
#include <linux/futex.h>
#include <linux/compat.h>
+#include <linux/delayacct.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -893,6 +894,7 @@ fastcall NORET_TYPE void do_exit(long co
preempt_count());
acct_update_integrals(tsk);
+
if (tsk->mm) {
update_hiwater_rss(tsk->mm);
update_hiwater_vm(tsk->mm);
@@ -909,6 +911,7 @@ fastcall NORET_TYPE void do_exit(long co
if (unlikely(tsk->compat_robust_list))
compat_exit_robust_list(tsk);
#endif
+ delayacct_tsk_exit(tsk);
exit_mm(tsk);
exit_sem(tsk);
Index: linux-2.6.17-rc1/include/linux/time.h
===================================================================
--- linux-2.6.17-rc1.orig/include/linux/time.h 2006-04-13 10:55:54.000000000 -0400
+++ linux-2.6.17-rc1/include/linux/time.h 2006-04-14 14:59:21.000000000 -0400
@@ -68,6 +68,16 @@ extern unsigned long mktime(const unsign
extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec);
/*
+ * sub = end - start, in normalized form
+ */
+static inline void timespec_sub(struct timespec *start, struct timespec *end,
+ struct timespec *sub)
+{
+ set_normalized_timespec(sub, end->tv_sec - start->tv_sec,
+ end->tv_nsec - start->tv_nsec);
+}
+
+/*
* Returns true if the timespec is norm, false if denorm:
*/
#define timespec_valid(ts) \
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [Patch 1/8] Setup
2006-04-22 2:23 ` [Patch 1/8] Setup Shailabh Nagar
@ 2006-04-24 2:02 ` Randy.Dunlap
2006-04-24 17:26 ` Shailabh Nagar
0 siblings, 1 reply; 18+ messages in thread
From: Randy.Dunlap @ 2006-04-24 2:02 UTC (permalink / raw)
To: Shailabh Nagar; +Cc: linux-kernel, lse-tech, jlan
On Fri, 21 Apr 2006 22:23:25 -0400 Shailabh Nagar wrote:
> Index: linux-2.6.17-rc1/include/linux/delayacct.h
> ===================================================================
> --- /dev/null 1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6.17-rc1/include/linux/delayacct.h 2006-04-21 19:39:29.000000000 -0400
> @@ -0,0 +1,69 @@
> +/* delayacct.h - per-task delay accounting
> + */
> +
> +#ifndef _LINUX_TASKDELAYS_H
> +#define _LINUX_TASKDELAYS_H
Probably _LINUX_DELAYACCT_H.
Or if I add linux/taskdelays.h, what #include guard should I use?
---
~Randy
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [Patch 1/8] Setup
2006-04-24 2:02 ` Randy.Dunlap
@ 2006-04-24 17:26 ` Shailabh Nagar
0 siblings, 0 replies; 18+ messages in thread
From: Shailabh Nagar @ 2006-04-24 17:26 UTC (permalink / raw)
To: Randy.Dunlap; +Cc: linux-kernel, lse-tech, jlan
Randy.Dunlap wrote:
>On Fri, 21 Apr 2006 22:23:25 -0400 Shailabh Nagar wrote:
>
>
>
>>Index: linux-2.6.17-rc1/include/linux/delayacct.h
>>===================================================================
>>--- /dev/null 1970-01-01 00:00:00.000000000 +0000
>>+++ linux-2.6.17-rc1/include/linux/delayacct.h 2006-04-21 19:39:29.000000000 -0400
>>@@ -0,0 +1,69 @@
>>+/* delayacct.h - per-task delay accounting
>>+ */
>>+
>>+#ifndef _LINUX_TASKDELAYS_H
>>+#define _LINUX_TASKDELAYS_H
>>
>>
>
>Probably _LINUX_DELAYACCT_H.
>
>
Yup. Hangover from old name...will fix.
>Or if I add linux/taskdelays.h, what #include guard should I use?
>
>---
>~Randy
>
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* [Patch 0/8] per-task delay accounting
@ 2006-03-30 0:32 Shailabh Nagar
2006-03-30 0:35 ` [Patch 1/8] Setup Shailabh Nagar
0 siblings, 1 reply; 18+ messages in thread
From: Shailabh Nagar @ 2006-03-30 0:32 UTC (permalink / raw)
To: Andrew Morton; +Cc: Greg KH, Arjan van de Ven, Jamal, Andi Kleen, linux-kernel
Andrew,
Could you please include the following delay accounting patches
in -mm ?
The patches have gone through several iterations on lkml and
numerous comments raised by reviewers have been addressed
- several netlink interface comments (Jamal)
- block I/O collection method (Arjan)
- block I/O delays export through /proc (Andi)
- performance issues (Greg) (just addressed, see below)
- GPL headers (Arjan)
Most of the descriptions of the patches are either in the
patch itself or in the documentation patch at the end.
Thanks
--Shailabh
Patch series
delayacct-setup.patch
delayacct-blkio-swapin.patch
delayacct-schedstats.patch
genetlink-utils.patch
delayacct-genetlink.patch
delayacct-virtcpu.patch
delayacct-procfs.patch
delayacct-doc.patch
Results highlights
- No statistically significant performance degradation is seen in
kernbench, hackbench and large OLTP benchmark when delay
accounting is configured.
The overheads of configuring delay accounting,
without enabling at boot time, are statistically negligible
for hackbench and a large OLTP benchmark and negative
(i.e. performance improves) in kernbench.
- Similar lack of degradation is seen in kernbench and hackbench
even when delay accounting is enabled at boot.
No data could be collected for the large OLTP benchmark (efforts
ongoing).
Legend
Base
Vanilla 2.6.16 kernel
without any patches applied
+patch
Delay accounting configured
but not enabled at boot
+patch+enable
Delay accounting enabled at boot
but no stats read
Time Elapsed time, averaged over 10 runs
Stddev Standard deviation of elapsed times
Ovhd % difference of elapsed time with respect to base kernel
t-value Used to measure statistical significance
of difference of two mean values (in this
case mean elapsed time). Low t-values indicate
insignificant difference. The t-values here were
calculated at 95% confidence interval using the tool at
http://www.polarismr.com/education/tools_stat_diff_means.html
Hackbench
---------
200 groups, using pipes
Elapsed time, in seconds, lower better
Ovhd Time Stddev Ovhd significant (t-value)
Base 0% 43.483 0.178 na
+patch 0.1% 43.517 0.265 No (0.337)
+patch+enable 0.3% 43.629 0.167 No (1.892)
Kernbench
---------
Average of 10 iterations
Elapsed time, in seconds, lower better
Ovhd Time Stddev Ovhd significant (t-value)
Base 0% 196.704 0.459 na
+patch -0.5% 195.812 0.477 Yes (4.261)
+patch+enable 0.02% 196.752 0.356 No (0.261)
Large OLTP benchmark
--------------------
An industry standard large database online transaction processing
workload was run with delay accounting patches configured
ON and OFF.
The performance degradation of delay accounting was about 0.2%,
which was well within the normal range of variation between
similar runs.
No runs were taken with delay accounting enabled at boot time.
^ permalink raw reply [flat|nested] 18+ messages in thread
* [Patch 1/8] Setup
2006-03-30 0:32 [Patch 0/8] per-task delay accounting Shailabh Nagar
@ 2006-03-30 0:35 ` Shailabh Nagar
2006-03-30 5:03 ` Andrew Morton
0 siblings, 1 reply; 18+ messages in thread
From: Shailabh Nagar @ 2006-03-30 0:35 UTC (permalink / raw)
To: linux-kernel
delayacct-setup.patch
Initialization code related to collection of per-task "delay"
statistics which measure how long it had to wait for cpu,
sync block io, swapping etc. The collection of statistics and
the interface are in other patches. This patch sets up the data
structures and allows the statistics collection to be disabled
through a kernel boot paramater.
Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com>
Documentation/kernel-parameters.txt | 2
include/linux/delayacct.h | 51 +++++++++++++++++++
include/linux/sched.h | 17 ++++++
init/Kconfig | 13 +++++
init/main.c | 2
kernel/Makefile | 1
kernel/delayacct.c | 92 ++++++++++++++++++++++++++++++++++++
kernel/exit.c | 3 +
kernel/fork.c | 2
9 files changed, 183 insertions(+)
Index: linux-2.6.16/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.16.orig/Documentation/kernel-parameters.txt 2006-03-29 18:12:55.000000000 -0500
+++ linux-2.6.16/Documentation/kernel-parameters.txt 2006-03-29 18:12:57.000000000 -0500
@@ -416,6 +416,8 @@ running once the system is up.
Format: <area>[,<node>]
See also Documentation/networking/decnet.txt.
+ delayacct [KNL] Enable per-task delay accounting
+
devfs= [DEVFS]
See Documentation/filesystems/devfs/boot-options.
Index: linux-2.6.16/kernel/Makefile
===================================================================
--- linux-2.6.16.orig/kernel/Makefile 2006-03-29 18:12:55.000000000 -0500
+++ linux-2.6.16/kernel/Makefile 2006-03-29 18:12:57.000000000 -0500
@@ -34,6 +34,7 @@ obj-$(CONFIG_DETECT_SOFTLOCKUP) += softl
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
+obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
Index: linux-2.6.16/include/linux/delayacct.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.16/include/linux/delayacct.h 2006-03-29 18:12:57.000000000 -0500
@@ -0,0 +1,51 @@
+/* delayacct.h - per-task delay accounting
+ *
+ * Copyright (C) Shailabh Nagar, IBM Corp. 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_TASKDELAYS_H
+#define _LINUX_TASKDELAYS_H
+
+#include <linux/sched.h>
+
+#ifdef CONFIG_TASK_DELAY_ACCT
+extern int delayacct_on; /* Delay accounting turned on/off */
+extern kmem_cache_t *delayacct_cache;
+extern void delayacct_init(void);
+extern void __delayacct_tsk_init(struct task_struct *);
+extern void __delayacct_tsk_exit(struct task_struct *);
+
+static inline void delayacct_tsk_init(struct task_struct *tsk)
+{
+ /* reinitialize in case parent's non-null pointer was dup'ed*/
+ tsk->delays = NULL;
+ if (unlikely(delayacct_on))
+ __delayacct_tsk_init(tsk);
+}
+
+static inline void delayacct_tsk_exit(struct task_struct *tsk)
+{
+ if (tsk->delays)
+ __delayacct_tsk_exit(tsk);
+}
+
+#else
+static inline void delayacct_init(void)
+{}
+static inline void delayacct_tsk_init(struct task_struct *tsk)
+{}
+static inline void delayacct_tsk_exit(struct task_struct *tsk)
+{}
+#endif /* CONFIG_TASK_DELAY_ACCT */
+#endif /* _LINUX_TASKDELAYS_H */
Index: linux-2.6.16/include/linux/sched.h
===================================================================
--- linux-2.6.16.orig/include/linux/sched.h 2006-03-29 18:12:55.000000000 -0500
+++ linux-2.6.16/include/linux/sched.h 2006-03-29 18:12:57.000000000 -0500
@@ -540,6 +540,20 @@ struct sched_info {
extern struct file_operations proc_schedstat_operations;
#endif
+#ifdef CONFIG_TASK_DELAY_ACCT
+struct task_delay_info {
+ spinlock_t lock;
+
+ /* For each stat XXX, add following, aligned appropriately
+ *
+ * struct timespec XXX_start, XXX_end;
+ * u64 XXX_delay;
+ * u32 XXX_count;
+ */
+};
+#endif
+
+
enum idle_type
{
SCHED_IDLE,
@@ -871,6 +885,9 @@ struct task_struct {
#endif
atomic_t fs_excl; /* holding fs exclusive resources */
struct rcu_head rcu;
+#ifdef CONFIG_TASK_DELAY_ACCT
+ struct task_delay_info *delays;
+#endif
};
static inline pid_t process_group(struct task_struct *tsk)
Index: linux-2.6.16/init/Kconfig
===================================================================
--- linux-2.6.16.orig/init/Kconfig 2006-03-29 18:12:55.000000000 -0500
+++ linux-2.6.16/init/Kconfig 2006-03-29 18:12:57.000000000 -0500
@@ -150,6 +150,19 @@ config BSD_PROCESS_ACCT_V3
for processing it. A preliminary version of these tools is available
at <http://www.physik3.uni-rostock.de/tim/kernel/utils/acct/>.
+config TASK_DELAY_ACCT
+ bool "Enable per-task delay accounting (EXPERIMENTAL)"
+ help
+ Collect information on time spent by a task waiting for system
+ resources like cpu, synchronous block I/O completion and swapping
+ in pages. Such statistics can help in setting a task's priorities
+ relative to other tasks for cpu, io, rss limits etc.
+
+ Unlike BSD process accounting, this information is available
+ continuously during the lifetime of a task.
+
+ Say N if unsure.
+
config SYSCTL
bool "Sysctl support"
---help---
Index: linux-2.6.16/init/main.c
===================================================================
--- linux-2.6.16.orig/init/main.c 2006-03-29 18:12:55.000000000 -0500
+++ linux-2.6.16/init/main.c 2006-03-29 18:12:57.000000000 -0500
@@ -47,6 +47,7 @@
#include <linux/rmap.h>
#include <linux/mempolicy.h>
#include <linux/key.h>
+#include <linux/delayacct.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -537,6 +538,7 @@ asmlinkage void __init start_kernel(void
proc_root_init();
#endif
cpuset_init();
+ delayacct_init();
check_bugs();
Index: linux-2.6.16/kernel/delayacct.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.16/kernel/delayacct.c 2006-03-29 18:12:57.000000000 -0500
@@ -0,0 +1,92 @@
+/* delayacct.c - per-task delay accounting
+ *
+ * Copyright (C) Shailabh Nagar, IBM Corp. 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/sysctl.h>
+#include <linux/delayacct.h>
+
+int delayacct_on __read_mostly = 0; /* Delay accounting turned on/off */
+kmem_cache_t *delayacct_cache;
+
+static int __init delayacct_setup_enable(char *str)
+{
+ delayacct_on = 1;
+ return 1;
+}
+__setup("delayacct", delayacct_setup_enable);
+
+void delayacct_init(void)
+{
+ delayacct_cache = kmem_cache_create("delayacct_cache",
+ sizeof(struct task_delay_info),
+ 0,
+ SLAB_PANIC,
+ NULL, NULL);
+ delayacct_tsk_init(&init_task);
+}
+
+void __delayacct_tsk_init(struct task_struct *tsk)
+{
+ tsk->delays = kmem_cache_alloc(delayacct_cache, SLAB_KERNEL);
+ if (tsk->delays) {
+ memset(tsk->delays, 0, sizeof(*tsk->delays));
+ spin_lock_init(&tsk->delays->lock);
+ }
+}
+
+void __delayacct_tsk_exit(struct task_struct *tsk)
+{
+ if (tsk->delays) {
+ kmem_cache_free(delayacct_cache, tsk->delays);
+ tsk->delays = NULL;
+ }
+}
+
+/*
+ * Start accounting for a delay statistic using
+ * its starting timestamp (@start)
+ */
+
+static inline void delayacct_start(struct timespec *start)
+{
+ do_posix_clock_monotonic_gettime(start);
+}
+
+/*
+ * Finish delay accounting for a statistic using
+ * its timestamps (@start, @end), accumalator (@total) and @count
+ */
+
+static inline void delayacct_end(struct timespec *start, struct timespec *end,
+ u64 *total, u32 *count)
+{
+ struct timespec ts;
+ nsec_t ns;
+
+ do_posix_clock_monotonic_gettime(end);
+ ts.tv_sec = end->tv_sec - start->tv_sec;
+ ts.tv_nsec = end->tv_nsec - start->tv_nsec;
+ ns = timespec_to_ns(&ts);
+ if (ns < 0)
+ return;
+
+ spin_lock(¤t->delays->lock);
+ *total += ns;
+ (*count)++;
+ spin_unlock(¤t->delays->lock);
+}
+
Index: linux-2.6.16/kernel/fork.c
===================================================================
--- linux-2.6.16.orig/kernel/fork.c 2006-03-29 18:12:55.000000000 -0500
+++ linux-2.6.16/kernel/fork.c 2006-03-29 18:12:57.000000000 -0500
@@ -44,6 +44,7 @@
#include <linux/rmap.h>
#include <linux/acct.h>
#include <linux/cn_proc.h>
+#include <linux/delayacct.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -972,6 +973,7 @@ static task_t *copy_process(unsigned lon
goto bad_fork_cleanup_put_domain;
p->did_exec = 0;
+ delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
copy_flags(clone_flags, p);
p->pid = pid;
retval = -EFAULT;
Index: linux-2.6.16/kernel/exit.c
===================================================================
--- linux-2.6.16.orig/kernel/exit.c 2006-03-29 18:12:55.000000000 -0500
+++ linux-2.6.16/kernel/exit.c 2006-03-29 18:12:57.000000000 -0500
@@ -31,6 +31,7 @@
#include <linux/signal.h>
#include <linux/cn_proc.h>
#include <linux/mutex.h>
+#include <linux/delayacct.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -842,6 +843,8 @@ fastcall NORET_TYPE void do_exit(long co
preempt_count());
acct_update_integrals(tsk);
+ delayacct_tsk_exit(tsk);
+
if (tsk->mm) {
update_hiwater_rss(tsk->mm);
update_hiwater_vm(tsk->mm);
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [Patch 1/8] Setup
2006-03-30 0:35 ` [Patch 1/8] Setup Shailabh Nagar
@ 2006-03-30 5:03 ` Andrew Morton
2006-03-30 15:07 ` Shailabh Nagar
0 siblings, 1 reply; 18+ messages in thread
From: Andrew Morton @ 2006-03-30 5:03 UTC (permalink / raw)
To: Shailabh Nagar; +Cc: linux-kernel
Shailabh Nagar <nagar@watson.ibm.com> wrote:
>
> delayacct-setup.patch
>
> Initialization code related to collection of per-task "delay"
> statistics which measure how long it had to wait for cpu,
> sync block io, swapping etc. The collection of statistics and
> the interface are in other patches. This patch sets up the data
> structures and allows the statistics collection to be disabled
> through a kernel boot paramater.
>
> ...
>
> + delayacct [KNL] Enable per-task delay accounting
> +
Why does this boot parameter exist?
The code is neat-looking.
> --- /dev/null 1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6.16/kernel/delayacct.c 2006-03-29 18:12:57.000000000 -0500
> @@ -0,0 +1,92 @@
> +/* delayacct.c - per-task delay accounting
> + *
> + * Copyright (C) Shailabh Nagar, IBM Corp. 2006
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it would be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
> + * the GNU General Public License for more details.
> + */
> +
> +#include <linux/sched.h>
> +#include <linux/slab.h>
> +#include <linux/time.h>
> +#include <linux/sysctl.h>
> +#include <linux/delayacct.h>
> +
> +int delayacct_on __read_mostly = 0; /* Delay accounting turned on/off */
Yes, it should be __read_mostly. But it shouldn't be initialised to zero.
> +void __delayacct_tsk_init(struct task_struct *tsk)
> +{
> + tsk->delays = kmem_cache_alloc(delayacct_cache, SLAB_KERNEL);
> + if (tsk->delays) {
> + memset(tsk->delays, 0, sizeof(*tsk->delays));
> + spin_lock_init(&tsk->delays->lock);
> + }
> +}
We have kmem_cache_zalloc() now.
> +void __delayacct_tsk_exit(struct task_struct *tsk)
> +{
> + if (tsk->delays) {
> + kmem_cache_free(delayacct_cache, tsk->delays);
> + tsk->delays = NULL;
> + }
> +}
delayacct_tsk_exit() already checked tsk->delays.
> +/*
> + * Finish delay accounting for a statistic using
> + * its timestamps (@start, @end), accumalator (@total) and @count
> + */
> +
> +static inline void delayacct_end(struct timespec *start, struct timespec *end,
> + u64 *total, u32 *count)
> +{
> + struct timespec ts;
> + nsec_t ns;
> +
> + do_posix_clock_monotonic_gettime(end);
> + ts.tv_sec = end->tv_sec - start->tv_sec;
> + ts.tv_nsec = end->tv_nsec - start->tv_nsec;
> + ns = timespec_to_ns(&ts);
> + if (ns < 0)
> + return;
> +
> + spin_lock(¤t->delays->lock);
> + *total += ns;
> + (*count)++;
> + spin_unlock(¤t->delays->lock);
> +}
It's strange to have a static inline function at the end of a .c file. I
guess this gets used in later patches.
It looks rather too big to be inlined.
I'm surprised that we don't already have a timeval_sub() function
somewhere.
The code you have there will cause ts.tv_nsec to go negative half the time.
It looks like timespec_to_ns() will happily fix that up for us, but it's
all a bit fragile.
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [Patch 1/8] Setup
2006-03-30 5:03 ` Andrew Morton
@ 2006-03-30 15:07 ` Shailabh Nagar
0 siblings, 0 replies; 18+ messages in thread
From: Shailabh Nagar @ 2006-03-30 15:07 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
Andrew Morton wrote:
>Shailabh Nagar <nagar@watson.ibm.com> wrote:
>
>
>>delayacct-setup.patch
>>
>>Initialization code related to collection of per-task "delay"
>>statistics which measure how long it had to wait for cpu,
>>sync block io, swapping etc. The collection of statistics and
>>the interface are in other patches. This patch sets up the data
>>structures and allows the statistics collection to be disabled
>>through a kernel boot paramater.
>>
>>...
>>
>>+ delayacct [KNL] Enable per-task delay accounting
>>+
>>
>>
>
>Why does this boot parameter exist?
>
>
To allow people who aren't interested in these statistics from paying the
overhead of collecting the stats for each task, the memory consumed by
per-task
accounting structures that get allocated etc.
>The code is neat-looking.
>
>
Thanks !
>>--- /dev/null 1970-01-01 00:00:00.000000000 +0000
>>+++ linux-2.6.16/kernel/delayacct.c 2006-03-29 18:12:57.000000000 -0500
>>@@ -0,0 +1,92 @@
>>+/* delayacct.c - per-task delay accounting
>>+ *
>>+ * Copyright (C) Shailabh Nagar, IBM Corp. 2006
>>+ *
>>+ * This program is free software; you can redistribute it and/or modify
>>+ * it under the terms of the GNU General Public License as published by
>>+ * the Free Software Foundation; either version 2 of the License, or
>>+ * (at your option) any later version.
>>+ *
>>+ * This program is distributed in the hope that it would be useful, but
>>+ * WITHOUT ANY WARRANTY; without even the implied warranty of
>>+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
>>+ * the GNU General Public License for more details.
>>+ */
>>+
>>+#include <linux/sched.h>
>>+#include <linux/slab.h>
>>+#include <linux/time.h>
>>+#include <linux/sysctl.h>
>>+#include <linux/delayacct.h>
>>+
>>+int delayacct_on __read_mostly = 0; /* Delay accounting turned on/off */
>>
>>
>
>Yes, it should be __read_mostly. But it shouldn't be initialised to zero.
>
>
Will fix.
>
>
>>+void __delayacct_tsk_init(struct task_struct *tsk)
>>+{
>>+ tsk->delays = kmem_cache_alloc(delayacct_cache, SLAB_KERNEL);
>>+ if (tsk->delays) {
>>+ memset(tsk->delays, 0, sizeof(*tsk->delays));
>>+ spin_lock_init(&tsk->delays->lock);
>>+ }
>>+}
>>
>>
>
>We have kmem_cache_zalloc() now.
>
>
Will use.
>
>
>>+void __delayacct_tsk_exit(struct task_struct *tsk)
>>+{
>>+ if (tsk->delays) {
>>+ kmem_cache_free(delayacct_cache, tsk->delays);
>>+ tsk->delays = NULL;
>>+ }
>>+}
>>
>>
>
>delayacct_tsk_exit() already checked tsk->delays.
>
>
Oops. Will fix.
>
>
>>+/*
>>+ * Finish delay accounting for a statistic using
>>+ * its timestamps (@start, @end), accumalator (@total) and @count
>>+ */
>>+
>>+static inline void delayacct_end(struct timespec *start, struct timespec *end,
>>+ u64 *total, u32 *count)
>>+{
>>+ struct timespec ts;
>>+ nsec_t ns;
>>+
>>+ do_posix_clock_monotonic_gettime(end);
>>+ ts.tv_sec = end->tv_sec - start->tv_sec;
>>+ ts.tv_nsec = end->tv_nsec - start->tv_nsec;
>>+ ns = timespec_to_ns(&ts);
>>+ if (ns < 0)
>>+ return;
>>+
>>+ spin_lock(¤t->delays->lock);
>>+ *total += ns;
>>+ (*count)++;
>>+ spin_unlock(¤t->delays->lock);
>>+}
>>
>>
>
>It's strange to have a static inline function at the end of a .c file. I
>guess this gets used in later patches.
>
>
Yes. It gets called as part of the __delayacct_blkio_end call in a later
patch.
>It looks rather too big to be inlined.
>
>
It gets used only once currently so defining it as a separate function
was more to
aid understanding.
>I'm surprised that we don't already have a timeval_sub() function
>somewhere.
>
>
There isn't one currently though we'd added a generic function timespec_diff
in an earlier iteration of the patches
http://www.uwsg.indiana.edu/hypermail/linux/kernel/0603.1/1914.html
In the ensuing discussion (under the same thread above), the problem you
mention
below came up - whether a negative ts.tv_nsec should be returned as is
or should the
entire timespec be normalized. The consensus was that a normalized value
would be
appropriate for a generic function.
However, we didn't want to pay the extra cycles of normalizing the
return value since our
usage (using timespec_to_ns) wouldn't need it.
So we chose to remove the generic function and use the two line
subtraction directly.
Especially since we don't really care to use truly negative differences
(which shouldn't happen
since the timestamps are collected assuming monotonically increasing
values).
>The code you have there will cause ts.tv_nsec to go negative half the time.
>It looks like timespec_to_ns() will happily fix that up for us, but it's
>all a bit fragile.
>
>
If you think relying on timespec_to_ns as an "auto" normalizer is flaky,
we can go back to
introducing a timespec_sub (which is a better name than timespec_diff)
which returns a normalized
diff and use that.
--Shailabh
^ permalink raw reply [flat|nested] 18+ messages in thread
end of thread, other threads:[~2006-05-10 11:02 UTC | newest]
Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-05-02 6:12 [Patch 1/8] Setup Balbir Singh
2006-05-08 21:17 ` Andrew Morton
2006-05-09 3:48 ` Balbir Singh
2006-05-10 10:16 ` [PATCH][delayacct] Fix the timespec_sub() interface (was Re: [Patch 1/8] Setup) Balbir Singh
2006-05-10 10:24 ` Andrew Morton
2006-05-10 10:27 ` Balbir Singh
2006-05-10 10:58 ` Balbir Singh
2006-05-08 21:23 ` [Patch 1/8] Setup Andrew Morton
2006-05-09 3:56 ` Balbir Singh
2006-05-10 10:18 ` [PATCH][delayacct] un-inline delayacct_end(), remove initialization of ts (was Re: [Patch 1/8] Setup) Balbir Singh
2006-05-09 11:46 ` [Patch 1/8] Setup Thomas Gleixner
2006-05-09 13:20 ` [Lse-tech] " Balbir Singh
-- strict thread matches above, loose matches on Subject: below --
2006-04-22 2:16 [Patch 0/8] per-task delay accounting Shailabh Nagar
2006-04-22 2:23 ` [Patch 1/8] Setup Shailabh Nagar
2006-04-24 2:02 ` Randy.Dunlap
2006-04-24 17:26 ` Shailabh Nagar
2006-03-30 0:32 [Patch 0/8] per-task delay accounting Shailabh Nagar
2006-03-30 0:35 ` [Patch 1/8] Setup Shailabh Nagar
2006-03-30 5:03 ` Andrew Morton
2006-03-30 15:07 ` Shailabh Nagar
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox