All of lore.kernel.org
 help / color / mirror / Atom feed
* [Patch 6/8] delay accounting usage of taskstats interface
@ 2006-05-02  6:19 Balbir Singh
  2006-05-04  2:13 ` [Lse-tech] " Jay Lan
  2006-05-04 18:44 ` [updated] " Balbir Singh
  0 siblings, 2 replies; 6+ messages in thread
From: Balbir Singh @ 2006-05-02  6:19 UTC (permalink / raw)
  To: linux-kernel; +Cc: lse-tech, jlan


Changelog

Fixes suggested by Jay Lan
- check for tidstats before taking the mutex_lock in taskstats_exit_send()
- add back version information for struct taskstats

Fixes comments by akpm (on earlier patch now incorporated here)
- detailed comments on atomicity rules of accounting fields
- replace use of nsec_t

Other changes
- fix coding style

delayacct-taskstats.patch

Usage of taskstats interface by delay accounting.

Signed-off-by: Shailabh Nagar <nagar@us.ibm.com>
Signed-off-by: Balbir Singh <balbir@in.ibm.com>
---

 include/linux/delayacct.h |   13 +++++++++++
 include/linux/taskstats.h |   53 ++++++++++++++++++++++++++++++++++++++++++++--
 init/Kconfig              |    1 
 kernel/delayacct.c        |   42 ++++++++++++++++++++++++++++++++++++
 kernel/taskstats.c        |   13 ++++++++++-
 5 files changed, 119 insertions(+), 3 deletions(-)

diff -puN include/linux/delayacct.h~delayacct-taskstats include/linux/delayacct.h
--- linux-2.6.17-rc3/include/linux/delayacct.h~delayacct-taskstats	2006-05-02 10:36:31.000000000 +0530
+++ linux-2.6.17-rc3-balbir/include/linux/delayacct.h	2006-05-02 10:39:30.000000000 +0530
@@ -18,6 +18,7 @@
 #define _LINUX_DELAYACCT_H
 
 #include <linux/sched.h>
+#include <linux/taskstats_kern.h>
 
 /*
  * Per-task flags relevant to delay accounting
@@ -35,6 +36,7 @@ extern void __delayacct_tsk_init(struct 
 extern void __delayacct_tsk_exit(struct task_struct *);
 extern void __delayacct_blkio_start(void);
 extern void __delayacct_blkio_end(void);
+extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
 
 static inline void delayacct_set_flag(int flag)
 {
@@ -74,6 +76,14 @@ static inline void delayacct_blkio_end(v
 		__delayacct_blkio_end();
 }
 
+static inline int delayacct_add_tsk(struct taskstats *d,
+					struct task_struct *tsk)
+{
+	if (!tsk->delays)
+		return -EINVAL;
+	return __delayacct_add_tsk(d, tsk);
+}
+
 #else
 static inline void delayacct_set_flag(int flag)
 {}
@@ -89,6 +99,9 @@ static inline void delayacct_blkio_start
 {}
 static inline void delayacct_blkio_end(void)
 {}
+static inline int delayacct_add_tsk(struct taskstats *d,
+					struct task_struct *tsk)
+{ return 0; }
 #endif /* CONFIG_TASK_DELAY_ACCT */
 
 #endif
diff -puN include/linux/taskstats.h~delayacct-taskstats include/linux/taskstats.h
--- linux-2.6.17-rc3/include/linux/taskstats.h~delayacct-taskstats	2006-05-02 10:36:31.000000000 +0530
+++ linux-2.6.17-rc3-balbir/include/linux/taskstats.h	2006-05-02 10:36:31.000000000 +0530
@@ -33,9 +33,58 @@
 
 struct taskstats {
 
-	/* Version 1 */
+	/* Delay accounting fields start
+	 *
+	 * All values, until comment "Delay accounting fields end" are
+	 * available only if delay accounting is enabled, even though the last
+	 * few fields are not delays
+	 *
+	 * xxx_count is the number of delay values recorded
+	 * xxx_delay_total is the corresponding cumulative delay in nanoseconds
+	 *
+	 * xxx_delay_total wraps around to zero on overflow
+	 * xxx_count incremented regardless of overflow
+	 */
+
+	/* Delay waiting for cpu, while runnable
+	 * count, delay_total NOT updated atomically
+	 */
+	__u64	cpu_count;
+	__u64	cpu_delay_total;
+
+	/* Following four fields atomically updated using task->delays->lock */
+
+	/* Delay waiting for synchronous block I/O to complete
+	 * does not account for delays in I/O submission
+	 */
+	__u64	blkio_count;
+	__u64	blkio_delay_total;
+
+	/* Delay waiting for page fault I/O (swap in only) */
+	__u64	swapin_count;
+	__u64	swapin_delay_total;
+
+	/* cpu "wall-clock" running time
+	 * On some architectures, value will adjust for cpu time stolen
+	 * from the kernel in involuntary waits due to virtualization.
+	 * Value is cumulative, in nanoseconds, without a corresponding count
+	 * and wraps around to zero silently on overflow
+	 */
+	__u64	cpu_run_real_total;
+
+	/* cpu "virtual" running time
+	 * Uses time intervals seen by the kernel i.e. no adjustment
+	 * for kernel's involuntary waits due to virtualization.
+	 * Value is cumulative, in nanoseconds, without a corresponding count
+	 * and wraps around to zero silently on overflow
+	 */
+	__u64	cpu_run_virtual_total;
+	/* Delay accounting fields end */
+	/* version 1 ends here */
+
+	/* version of taskstats */
+	__u64	version;
 
-	int filler_avoids_empty_struct_warnings;
 };
 
 
diff -puN init/Kconfig~delayacct-taskstats init/Kconfig
--- linux-2.6.17-rc3/init/Kconfig~delayacct-taskstats	2006-05-02 10:36:31.000000000 +0530
+++ linux-2.6.17-rc3-balbir/init/Kconfig	2006-05-02 10:36:31.000000000 +0530
@@ -164,6 +164,7 @@ config TASKSTATS
 
 config TASK_DELAY_ACCT
 	bool "Enable per-task delay accounting (EXPERIMENTAL)"
+	depends on TASKSTATS
 	help
 	  Collect information on time spent by a task waiting for system
 	  resources like cpu, synchronous block I/O completion and swapping
diff -puN kernel/delayacct.c~delayacct-taskstats kernel/delayacct.c
--- linux-2.6.17-rc3/kernel/delayacct.c~delayacct-taskstats	2006-05-02 10:36:31.000000000 +0530
+++ linux-2.6.17-rc3-balbir/kernel/delayacct.c	2006-05-02 10:37:57.000000000 +0530
@@ -104,3 +104,45 @@ void __delayacct_blkio_end(void)
 			&current->delays->blkio_delay,
 			&current->delays->blkio_count);
 }
+
+int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
+{
+	s64 tmp;
+	struct timespec ts;
+	unsigned long t1,t2,t3;
+
+	tmp = (s64)d->cpu_run_real_total;
+	tmp += (u64)(tsk->utime + tsk->stime) * TICK_NSEC;
+	d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
+
+	/*
+	 * No locking available for sched_info (and too expensive to add one)
+	 * Mitigate by taking snapshot of values
+	 */
+	t1 = tsk->sched_info.pcnt;
+	t2 = tsk->sched_info.run_delay;
+	t3 = tsk->sched_info.cpu_time;
+
+	d->cpu_count += t1;
+
+	jiffies_to_timespec(t2, &ts);
+	tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts);
+	d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
+
+	tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000;
+	d->cpu_run_virtual_total =
+		(tmp < (s64)d->cpu_run_virtual_total) ?	0 : tmp;
+
+	/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
+
+	spin_lock(&tsk->delays->lock);
+	tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
+	d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
+	tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
+	d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
+	d->blkio_count += tsk->delays->blkio_count;
+	d->swapin_count += tsk->delays->swapin_count;
+	spin_unlock(&tsk->delays->lock);
+
+	return 0;
+}
diff -puN kernel/taskstats.c~delayacct-taskstats kernel/taskstats.c
--- linux-2.6.17-rc3/kernel/taskstats.c~delayacct-taskstats	2006-05-02 10:36:31.000000000 +0530
+++ linux-2.6.17-rc3-balbir/kernel/taskstats.c	2006-05-02 10:36:31.000000000 +0530
@@ -18,6 +18,7 @@
 
 #include <linux/kernel.h>
 #include <linux/taskstats_kern.h>
+#include <linux/delayacct.h>
 #include <net/genetlink.h>
 #include <asm/atomic.h>
 
@@ -120,7 +121,9 @@ static int fill_pid(pid_t pid, struct ta
 	 *		goto err;
 	 */
 
-err:
+	rc = delayacct_add_tsk(stats, tsk);
+
+	/* Define err: label here if needed */
 	put_task_struct(tsk);
 	return rc;
 
@@ -152,6 +155,10 @@ static int fill_tgid(pid_t tgid, struct 
 		 *		break;
 		 */
 
+		rc = delayacct_add_tsk(stats, tsk);
+		if (rc)
+			break;
+
 	} while_each_thread(first, tsk);
 	read_unlock(&tasklist_lock);
 
@@ -255,6 +262,8 @@ void taskstats_exit_send(struct task_str
 	if (rc < 0)
 		goto err_skb;
 
+	tidstats->version = TASKSTATS_VERSION;
+
 	na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_PID);
 	NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_PID, (u32)tsk->pid);
 	NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
@@ -270,6 +279,8 @@ void taskstats_exit_send(struct task_str
 	if (rc < 0)
 		goto err_skb;
 
+	tgidstats->version = TASKSTATS_VERSION;
+
 	na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID);
 	NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid);
 	NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
_

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Lse-tech] [Patch 6/8] delay accounting usage of taskstats interface
  2006-05-02  6:19 [Patch 6/8] delay accounting usage of taskstats interface Balbir Singh
@ 2006-05-04  2:13 ` Jay Lan
  2006-05-04  4:23   ` Balbir Singh
  2006-05-04 18:44 ` [updated] " Balbir Singh
  1 sibling, 1 reply; 6+ messages in thread
From: Jay Lan @ 2006-05-04  2:13 UTC (permalink / raw)
  To: balbir; +Cc: linux-kernel, lse-tech

Balbir Singh wrote:
>Changelog
>
>Fixes suggested by Jay Lan
>- check for tidstats before taking the mutex_lock in taskstats_exit_send()
>- add back version information for struct taskstats
>
><clip>
> 
> struct taskstats {
> 
>-	/* Version 1 */
>+	/* Delay accounting fields start
>+	 *
>+	 * All values, until comment "Delay accounting fields end" are
>+	 * available only if delay accounting is enabled, even though the last
>+	 * few fields are not delays
>+	 *
>+	 * xxx_count is the number of delay values recorded
>+	 * xxx_delay_total is the corresponding cumulative delay in nanoseconds
>+	 *
>+	 * xxx_delay_total wraps around to zero on overflow
>+	 * xxx_count incremented regardless of overflow
>+	 */
>+
>+	/* Delay waiting for cpu, while runnable
>+	 * count, delay_total NOT updated atomically
>+	 */
>+	__u64	cpu_count;
>+	__u64	cpu_delay_total;
>+
>+	/* Following four fields atomically updated using task->delays->lock */
>+
>+	/* Delay waiting for synchronous block I/O to complete
>+	 * does not account for delays in I/O submission
>+	 */
>+	__u64	blkio_count;
>+	__u64	blkio_delay_total;
>+
>+	/* Delay waiting for page fault I/O (swap in only) */
>+	__u64	swapin_count;
>+	__u64	swapin_delay_total;
>+
>+	/* cpu "wall-clock" running time
>+	 * On some architectures, value will adjust for cpu time stolen
>+	 * from the kernel in involuntary waits due to virtualization.
>+	 * Value is cumulative, in nanoseconds, without a corresponding count
>+	 * and wraps around to zero silently on overflow
>+	 */
>+	__u64	cpu_run_real_total;
>+
>+	/* cpu "virtual" running time
>+	 * Uses time intervals seen by the kernel i.e. no adjustment
>+	 * for kernel's involuntary waits due to virtualization.
>+	 * Value is cumulative, in nanoseconds, without a corresponding count
>+	 * and wraps around to zero silently on overflow
>+	 */
>+	__u64	cpu_run_virtual_total;
>+	/* Delay accounting fields end */
>+	/* version 1 ends here */
>+
>+	/* version of taskstats */
>+	__u64	version;
>  

Could you place the common field "version" before any acct subsystem
specific fields?

As a matter of fact, we do not need
'filler_avoids_empty_struct_warnings' in [patch 5/8] taskstats
interface. Replacing that field with "version" would be great!

Thanks,
 - jay


> 
>-	int filler_avoids_empty_struct_warnings;
> };
> 
> 
>  


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Lse-tech] [Patch 6/8] delay accounting usage of taskstats interface
  2006-05-04  2:13 ` [Lse-tech] " Jay Lan
@ 2006-05-04  4:23   ` Balbir Singh
  0 siblings, 0 replies; 6+ messages in thread
From: Balbir Singh @ 2006-05-04  4:23 UTC (permalink / raw)
  To: Jay Lan; +Cc: linux-kernel, lse-tech

On Wed, May 03, 2006 at 07:13:54PM -0700, Jay Lan wrote:
> Balbir Singh wrote:
> >Changelog
> >
> >Fixes suggested by Jay Lan
> >- check for tidstats before taking the mutex_lock in taskstats_exit_send()
> >- add back version information for struct taskstats
> >
> ><clip>
> > 
> > struct taskstats {
> > 
> >-	/* Version 1 */
> >+	/* Delay accounting fields start
> >+	 *
> >+	 * All values, until comment "Delay accounting fields end" are
> >+	 * available only if delay accounting is enabled, even though the last
> >+	 * few fields are not delays
> >+	 *
> >+	 * xxx_count is the number of delay values recorded
> >+	 * xxx_delay_total is the corresponding cumulative delay in nanoseconds
> >+	 *
> >+	 * xxx_delay_total wraps around to zero on overflow
> >+	 * xxx_count incremented regardless of overflow
> >+	 */
> >+
> >+	/* Delay waiting for cpu, while runnable
> >+	 * count, delay_total NOT updated atomically
> >+	 */
> >+	__u64	cpu_count;
> >+	__u64	cpu_delay_total;
> >+
> >+	/* Following four fields atomically updated using task->delays->lock */
> >+
> >+	/* Delay waiting for synchronous block I/O to complete
> >+	 * does not account for delays in I/O submission
> >+	 */
> >+	__u64	blkio_count;
> >+	__u64	blkio_delay_total;
> >+
> >+	/* Delay waiting for page fault I/O (swap in only) */
> >+	__u64	swapin_count;
> >+	__u64	swapin_delay_total;
> >+
> >+	/* cpu "wall-clock" running time
> >+	 * On some architectures, value will adjust for cpu time stolen
> >+	 * from the kernel in involuntary waits due to virtualization.
> >+	 * Value is cumulative, in nanoseconds, without a corresponding count
> >+	 * and wraps around to zero silently on overflow
> >+	 */
> >+	__u64	cpu_run_real_total;
> >+
> >+	/* cpu "virtual" running time
> >+	 * Uses time intervals seen by the kernel i.e. no adjustment
> >+	 * for kernel's involuntary waits due to virtualization.
> >+	 * Value is cumulative, in nanoseconds, without a corresponding count
> >+	 * and wraps around to zero silently on overflow
> >+	 */
> >+	__u64	cpu_run_virtual_total;
> >+	/* Delay accounting fields end */
> >+	/* version 1 ends here */
> >+
> >+	/* version of taskstats */
> >+	__u64	version;
> >  
> 
> Could you place the common field "version" before any acct subsystem
> specific fields?
> 
> As a matter of fact, we do not need
> 'filler_avoids_empty_struct_warnings' in [patch 5/8] taskstats
> interface. Replacing that field with "version" would be great!

Yes, I thought about it and wanted to put it upfront. To maintain compatibility
with the previous post, I decided to add it to the end.

If putting the version right up helps the readability of taskstats.h,
I can make the changes and repost the patches again.

Thanks for your review,
Balbir


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [updated] [Patch 6/8] delay accounting usage of taskstats interface
  2006-05-02  6:19 [Patch 6/8] delay accounting usage of taskstats interface Balbir Singh
  2006-05-04  2:13 ` [Lse-tech] " Jay Lan
@ 2006-05-04 18:44 ` Balbir Singh
  2006-05-09 11:46   ` Thomas Gleixner
  1 sibling, 1 reply; 6+ messages in thread
From: Balbir Singh @ 2006-05-04 18:44 UTC (permalink / raw)
  To: linux-kernel; +Cc: lse-tech, jlan, akpm


Changelog

Fixes suggested by Jay Lan
- check for tidstats before taking the mutex_lock in taskstats_exit_send()
- add back and fill version information for struct taskstats

Fixes comments by akpm (on earlier patch now incorporated here)
- detailed comments on atomicity rules of accounting fields
- replace use of nsec_t

delayacct-taskstats.patch

Usage of taskstats interface by delay accounting.

Signed-off-by: Shailabh Nagar <nagar@us.ibm.com>
Signed-off-by: Balbir Singh <balbir@in.ibm.com>
---

 include/linux/delayacct.h |   13 ++++++++++++
 include/linux/taskstats.h |   49 ++++++++++++++++++++++++++++++++++++++++++++++
 init/Kconfig              |    1 
 kernel/delayacct.c        |   42 +++++++++++++++++++++++++++++++++++++++
 kernel/taskstats.c        |   12 ++++++++++-
 5 files changed, 116 insertions(+), 1 deletion(-)

diff -puN include/linux/delayacct.h~delayacct-taskstats include/linux/delayacct.h
--- linux-2.6.17-rc3/include/linux/delayacct.h~delayacct-taskstats	2006-05-04 09:31:59.000000000 +0530
+++ linux-2.6.17-rc3-balbir/include/linux/delayacct.h	2006-05-04 11:26:18.000000000 +0530
@@ -18,6 +18,7 @@
 #define _LINUX_DELAYACCT_H
 
 #include <linux/sched.h>
+#include <linux/taskstats_kern.h>
 
 /*
  * Per-task flags relevant to delay accounting
@@ -35,6 +36,7 @@ extern void __delayacct_tsk_init(struct 
 extern void __delayacct_tsk_exit(struct task_struct *);
 extern void __delayacct_blkio_start(void);
 extern void __delayacct_blkio_end(void);
+extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
 
 static inline void delayacct_set_flag(int flag)
 {
@@ -74,6 +76,14 @@ static inline void delayacct_blkio_end(v
 		__delayacct_blkio_end();
 }
 
+static inline int delayacct_add_tsk(struct taskstats *d,
+					struct task_struct *tsk)
+{
+	if (!tsk->delays)
+		return -EINVAL;
+	return __delayacct_add_tsk(d, tsk);
+}
+
 #else
 static inline void delayacct_set_flag(int flag)
 {}
@@ -89,6 +99,9 @@ static inline void delayacct_blkio_start
 {}
 static inline void delayacct_blkio_end(void)
 {}
+static inline int delayacct_add_tsk(struct taskstats *d,
+					struct task_struct *tsk)
+{ return 0; }
 #endif /* CONFIG_TASK_DELAY_ACCT */
 
 #endif
diff -puN include/linux/taskstats.h~delayacct-taskstats include/linux/taskstats.h
--- linux-2.6.17-rc3/include/linux/taskstats.h~delayacct-taskstats	2006-05-04 09:31:59.000000000 +0530
+++ linux-2.6.17-rc3-balbir/include/linux/taskstats.h	2006-05-04 09:34:11.000000000 +0530
@@ -35,6 +35,55 @@ struct taskstats {
 
 	/* Version 1 */
 	__u64	version;
+
+	/* Delay accounting fields start
+	 *
+	 * All values, until comment "Delay accounting fields end" are
+	 * available only if delay accounting is enabled, even though the last
+	 * few fields are not delays
+	 *
+	 * xxx_count is the number of delay values recorded
+	 * xxx_delay_total is the corresponding cumulative delay in nanoseconds
+	 *
+	 * xxx_delay_total wraps around to zero on overflow
+	 * xxx_count incremented regardless of overflow
+	 */
+
+	/* Delay waiting for cpu, while runnable
+	 * count, delay_total NOT updated atomically
+	 */
+	__u64	cpu_count;
+	__u64	cpu_delay_total;
+
+	/* Following four fields atomically updated using task->delays->lock */
+
+	/* Delay waiting for synchronous block I/O to complete
+	 * does not account for delays in I/O submission
+	 */
+	__u64	blkio_count;
+	__u64	blkio_delay_total;
+
+	/* Delay waiting for page fault I/O (swap in only) */
+	__u64	swapin_count;
+	__u64	swapin_delay_total;
+
+	/* cpu "wall-clock" running time
+	 * On some architectures, value will adjust for cpu time stolen
+	 * from the kernel in involuntary waits due to virtualization.
+	 * Value is cumulative, in nanoseconds, without a corresponding count
+	 * and wraps around to zero silently on overflow
+	 */
+	__u64	cpu_run_real_total;
+
+	/* cpu "virtual" running time
+	 * Uses time intervals seen by the kernel i.e. no adjustment
+	 * for kernel's involuntary waits due to virtualization.
+	 * Value is cumulative, in nanoseconds, without a corresponding count
+	 * and wraps around to zero silently on overflow
+	 */
+	__u64	cpu_run_virtual_total;
+	/* Delay accounting fields end */
+	/* version 1 ends here */
 };
 
 
diff -puN init/Kconfig~delayacct-taskstats init/Kconfig
--- linux-2.6.17-rc3/init/Kconfig~delayacct-taskstats	2006-05-04 09:31:59.000000000 +0530
+++ linux-2.6.17-rc3-balbir/init/Kconfig	2006-05-04 09:31:59.000000000 +0530
@@ -164,6 +164,7 @@ config TASKSTATS
 
 config TASK_DELAY_ACCT
 	bool "Enable per-task delay accounting (EXPERIMENTAL)"
+	depends on TASKSTATS
 	help
 	  Collect information on time spent by a task waiting for system
 	  resources like cpu, synchronous block I/O completion and swapping
diff -puN kernel/delayacct.c~delayacct-taskstats kernel/delayacct.c
--- linux-2.6.17-rc3/kernel/delayacct.c~delayacct-taskstats	2006-05-04 09:31:59.000000000 +0530
+++ linux-2.6.17-rc3-balbir/kernel/delayacct.c	2006-05-04 11:26:18.000000000 +0530
@@ -104,3 +104,45 @@ void __delayacct_blkio_end(void)
 			&current->delays->blkio_delay,
 			&current->delays->blkio_count);
 }
+
+int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
+{
+	s64 tmp;
+	struct timespec ts;
+	unsigned long t1,t2,t3;
+
+	tmp = (s64)d->cpu_run_real_total;
+	tmp += (u64)(tsk->utime + tsk->stime) * TICK_NSEC;
+	d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
+
+	/*
+	 * No locking available for sched_info (and too expensive to add one)
+	 * Mitigate by taking snapshot of values
+	 */
+	t1 = tsk->sched_info.pcnt;
+	t2 = tsk->sched_info.run_delay;
+	t3 = tsk->sched_info.cpu_time;
+
+	d->cpu_count += t1;
+
+	jiffies_to_timespec(t2, &ts);
+	tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts);
+	d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
+
+	tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000;
+	d->cpu_run_virtual_total =
+		(tmp < (s64)d->cpu_run_virtual_total) ?	0 : tmp;
+
+	/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
+
+	spin_lock(&tsk->delays->lock);
+	tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
+	d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
+	tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
+	d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
+	d->blkio_count += tsk->delays->blkio_count;
+	d->swapin_count += tsk->delays->swapin_count;
+	spin_unlock(&tsk->delays->lock);
+
+	return 0;
+}
diff -puN kernel/taskstats.c~delayacct-taskstats kernel/taskstats.c
--- linux-2.6.17-rc3/kernel/taskstats.c~delayacct-taskstats	2006-05-04 09:31:59.000000000 +0530
+++ linux-2.6.17-rc3-balbir/kernel/taskstats.c	2006-05-04 11:27:53.000000000 +0530
@@ -18,6 +18,7 @@
 
 #include <linux/kernel.h>
 #include <linux/taskstats_kern.h>
+#include <linux/delayacct.h>
 #include <net/genetlink.h>
 #include <asm/atomic.h>
 
@@ -120,7 +121,10 @@ static int fill_pid(pid_t pid, struct ta
 	 *		goto err;
 	 */
 
-err:
+	rc = delayacct_add_tsk(stats, tsk);
+	stats->version = TASKSTATS_VERSION;
+
+	/* Define err: label here if needed */
 	put_task_struct(tsk);
 	return rc;
 
@@ -152,8 +156,14 @@ static int fill_tgid(pid_t tgid, struct 
 		 *		break;
 		 */
 
+		rc = delayacct_add_tsk(stats, tsk);
+		if (rc)
+			break;
+
 	} while_each_thread(first, tsk);
 	read_unlock(&tasklist_lock);
+	stats->version = TASKSTATS_VERSION;
+
 
 	/*
 	 * Accounting subsytems can also add calls here if they don't
_

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [updated] [Patch 6/8] delay accounting usage of taskstats interface
  2006-05-04 18:44 ` [updated] " Balbir Singh
@ 2006-05-09 11:46   ` Thomas Gleixner
  2006-05-09 17:11     ` Balbir Singh
  0 siblings, 1 reply; 6+ messages in thread
From: Thomas Gleixner @ 2006-05-09 11:46 UTC (permalink / raw)
  To: balbir; +Cc: linux-kernel, lse-tech, jlan, akpm

On Fri, 2006-05-05 at 00:14 +0530, Balbir Singh wrote:

>  #else
>  static inline void delayacct_set_flag(int flag)
>  {}

Please make this

static inline void delayacct_set_flag(int flag) { }

Same in all other files

> @@ -89,6 +99,9 @@ static inline void delayacct_blkio_start
>  {}
>  static inline void delayacct_blkio_end(void)
>  {}
> +static inline int delayacct_add_tsk(struct taskstats *d,
> +					struct task_struct *tsk)
> +{ return 0; }

{
	return 0;
}

> +	 *
> +	 * xxx_count is the number of delay values recorded
> +	 * xxx_delay_total is the corresponding cumulative delay in nanoseconds
> +	 *
> +	 * xxx_delay_total wraps around to zero on overflow
> +	 * xxx_count incremented regardless of overflow
> +	 */

Please use a structure for that.

struct delay_account {
	ktime_t delay;
	u64 count;
};

Also instead of having tons of fields added, please use something like

enum {
	CPU_ACCT,
	BLKIO_ACCT,
	....,
	MAX_ACCT_TYPES
};
	struct delay_account stats[MAX_ACCT_TYPES];


> +	/* Delay waiting for cpu, while runnable
> +	 * count, delay_total NOT updated atomically
> +	 */
> +	__u64	cpu_count;
> +	__u64	cpu_delay_total;
> +	/* Following four fields atomically updated using task->delays->lock */
> +
> +	/* Delay waiting for synchronous block I/O to complete
> +	 * does not account for delays in I/O submission
> +	 */
> +	__u64	blkio_count;
> +	__u64	blkio_delay_total;
> +
> +	/* Delay waiting for page fault I/O (swap in only) */
> +	__u64	swapin_count;
> +	__u64	swapin_delay_total;
> +
> +	/* cpu "wall-clock" running time
> +	 * On some architectures, value will adjust for cpu time stolen
> +	 * from the kernel in involuntary waits due to virtualization.
> +	 * Value is cumulative, in nanoseconds, without a corresponding count
> +	 * and wraps around to zero silently on overflow

When will this overflow happen ? 2^64 nsec ~= 584 years


> +int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
> +{

Please add some comment what this code does.

> +	s64 tmp;
> +	struct timespec ts;
> +	unsigned long t1,t2,t3;
> +
> +	tmp = (s64)d->cpu_run_real_total;
> +	tmp += (u64)(tsk->utime + tsk->stime) * TICK_NSEC;
> +	d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
> +
> +	/*
> +	 * No locking available for sched_info (and too expensive to add one)
> +	 * Mitigate by taking snapshot of values
> +	 */
> +	t1 = tsk->sched_info.pcnt;
> +	t2 = tsk->sched_info.run_delay;
> +	t3 = tsk->sched_info.cpu_time;
> +
> +	d->cpu_count += t1;
> +
> +	jiffies_to_timespec(t2, &ts);
> +	tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts);
> +	d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
> +
> +	tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000;
> +	d->cpu_run_virtual_total =
> +		(tmp < (s64)d->cpu_run_virtual_total) ?	0 : tmp;
> +
> +	/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
> +
> +	spin_lock(&tsk->delays->lock);
> +	tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
> +	d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;

I really do not get whatfor all these comparisons are. How can those
values get > 2^63 within a reasonable timeframe ?

> +	tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
> +	d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
> +	d->blkio_count += tsk->delays->blkio_count;
> +	d->swapin_count += tsk->delays->swapin_count;
> +	spin_unlock(&tsk->delays->lock);
> +	return 0;
> +}
> diff -puN kernel/taskstats.c~delayacct-taskstats kernel/taskstats.c
> --- linux-2.6.17-rc3/kernel/taskstats.c~delayacct-taskstats	2006-05-04 09:31:59.000000000 +0530
> +++ linux-2.6.17-rc3-balbir/kernel/taskstats.c	2006-05-04 11:27:53.000000000 +0530
> @@ -18,6 +18,7 @@
>  
>  #include <linux/kernel.h>
>  #include <linux/taskstats_kern.h>
> +#include <linux/delayacct.h>
>  #include <net/genetlink.h>
>  #include <asm/atomic.h>
>  
> @@ -120,7 +121,10 @@ static int fill_pid(pid_t pid, struct ta
>  	 *		goto err;
>  	 */
>  
> -err:
> +	rc = delayacct_add_tsk(stats, tsk);
> +	stats->version = TASKSTATS_VERSION;
> +
> +	/* Define err: label here if needed */

Please remove that comment. The err label goes to a place where it makes
sense.

>  	put_task_struct(tsk);
>  	return rc;
>  
> @@ -152,8 +156,14 @@ static int fill_tgid(pid_t tgid, struct 
>  		 *		break;
>  		 */
>  
> +		rc = delayacct_add_tsk(stats, tsk);
> +		if (rc)
> +			break;
> +
>  	} while_each_thread(first, tsk);
>  	read_unlock(&tasklist_lock);
> +	stats->version = TASKSTATS_VERSION;
> +
>  
>  	/*
>  	 * Accounting subsytems can also add calls here if they don't
> _
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [updated] [Patch 6/8] delay accounting usage of taskstats interface
  2006-05-09 11:46   ` Thomas Gleixner
@ 2006-05-09 17:11     ` Balbir Singh
  0 siblings, 0 replies; 6+ messages in thread
From: Balbir Singh @ 2006-05-09 17:11 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: linux-kernel, lse-tech, jlan, akpm

On Tue, May 09, 2006 at 11:46:48AM +0000, Thomas Gleixner wrote:
> On Fri, 2006-05-05 at 00:14 +0530, Balbir Singh wrote:
> 
> >  #else
> >  static inline void delayacct_set_flag(int flag)
> >  {}
> 
> Please make this
> 
> static inline void delayacct_set_flag(int flag) { }
> 
> Same in all other files

Will do.

> 
> > @@ -89,6 +99,9 @@ static inline void delayacct_blkio_start
> >  {}
> >  static inline void delayacct_blkio_end(void)
> >  {}
> > +static inline int delayacct_add_tsk(struct taskstats *d,
> > +					struct task_struct *tsk)
> > +{ return 0; }
> 
> {
> 	return 0;
> }
> 
> > +	 *
> > +	 * xxx_count is the number of delay values recorded
> > +	 * xxx_delay_total is the corresponding cumulative delay in nanoseconds
> > +	 *
> > +	 * xxx_delay_total wraps around to zero on overflow
> > +	 * xxx_count incremented regardless of overflow
> > +	 */
> 
> Please use a structure for that.
> 
> struct delay_account {
> 	ktime_t delay;
> 	u64 count;
> };
> 
> Also instead of having tons of fields added, please use something like
> 
> enum {
> 	CPU_ACCT,
> 	BLKIO_ACCT,
> 	....,
> 	MAX_ACCT_TYPES
> };
> 	struct delay_account stats[MAX_ACCT_TYPES];
>

Yes, this would make sense if all the fields were of type delay_account.
currently, only CPU, BLKIO and SWAPIN are of that type. The new
fields that will be added to this structure (since this is a part of
a statistics interface now) could potentially belong to other subsystems.
 
> 
> > +	/* Delay waiting for cpu, while runnable
> > +	 * count, delay_total NOT updated atomically
> > +	 */
> > +	__u64	cpu_count;
> > +	__u64	cpu_delay_total;
> > +	/* Following four fields atomically updated using task->delays->lock */
> > +
> > +	/* Delay waiting for synchronous block I/O to complete
> > +	 * does not account for delays in I/O submission
> > +	 */
> > +	__u64	blkio_count;
> > +	__u64	blkio_delay_total;
> > +
> > +	/* Delay waiting for page fault I/O (swap in only) */
> > +	__u64	swapin_count;
> > +	__u64	swapin_delay_total;
> > +
> > +	/* cpu "wall-clock" running time
> > +	 * On some architectures, value will adjust for cpu time stolen
> > +	 * from the kernel in involuntary waits due to virtualization.
> > +	 * Value is cumulative, in nanoseconds, without a corresponding count
> > +	 * and wraps around to zero silently on overflow
> 
> When will this overflow happen ? 2^64 nsec ~= 584 years
>

Detecting a potential overflow is not a bad thing IMHO. Currently
the existing data type makes this overflow very rare.
 
> 
> > +int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
> > +{
> 
> Please add some comment what this code does.

Will do

> 
> > +	s64 tmp;
> > +	struct timespec ts;
> > +	unsigned long t1,t2,t3;
> > +
> > +	tmp = (s64)d->cpu_run_real_total;
> > +	tmp += (u64)(tsk->utime + tsk->stime) * TICK_NSEC;
> > +	d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
> > +
> > +	/*
> > +	 * No locking available for sched_info (and too expensive to add one)
> > +	 * Mitigate by taking snapshot of values
> > +	 */
> > +	t1 = tsk->sched_info.pcnt;
> > +	t2 = tsk->sched_info.run_delay;
> > +	t3 = tsk->sched_info.cpu_time;
> > +
> > +	d->cpu_count += t1;
> > +
> > +	jiffies_to_timespec(t2, &ts);
> > +	tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts);
> > +	d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
> > +
> > +	tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000;
> > +	d->cpu_run_virtual_total =
> > +		(tmp < (s64)d->cpu_run_virtual_total) ?	0 : tmp;
> > +
> > +	/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
> > +
> > +	spin_lock(&tsk->delays->lock);
> > +	tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
> > +	d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
> 
> I really do not get whatfor all these comparisons are. How can those
> values get > 2^63 within a reasonable timeframe ?

Its defensive programming, granted that it is probably not required.

> 
> > +	tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
> > +	d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
> > +	d->blkio_count += tsk->delays->blkio_count;
> > +	d->swapin_count += tsk->delays->swapin_count;
> > +	spin_unlock(&tsk->delays->lock);
> > +	return 0;
> > +}
> > diff -puN kernel/taskstats.c~delayacct-taskstats kernel/taskstats.c
> > --- linux-2.6.17-rc3/kernel/taskstats.c~delayacct-taskstats	2006-05-04 09:31:59.000000000 +0530
> > +++ linux-2.6.17-rc3-balbir/kernel/taskstats.c	2006-05-04 11:27:53.000000000 +0530
> > @@ -18,6 +18,7 @@
> >  
> >  #include <linux/kernel.h>
> >  #include <linux/taskstats_kern.h>
> > +#include <linux/delayacct.h>
> >  #include <net/genetlink.h>
> >  #include <asm/atomic.h>
> >  
> > @@ -120,7 +121,10 @@ static int fill_pid(pid_t pid, struct ta
> >  	 *		goto err;
> >  	 */
> >  
> > -err:
> > +	rc = delayacct_add_tsk(stats, tsk);
> > +	stats->version = TASKSTATS_VERSION;
> > +
> > +	/* Define err: label here if needed */
> 
> Please remove that comment. The err label goes to a place where it makes
> sense.

Will fix this.

> 
> >  	put_task_struct(tsk);
> >  	return rc;
> >  
> > @@ -152,8 +156,14 @@ static int fill_tgid(pid_t tgid, struct 
> >  		 *		break;
> >  		 */
> >  
> > +		rc = delayacct_add_tsk(stats, tsk);
> > +		if (rc)
> > +			break;
> > +
> >  	} while_each_thread(first, tsk);
> >  	read_unlock(&tasklist_lock);
> > +	stats->version = TASKSTATS_VERSION;
> > +
> >  
> >  	/*
> >  	 * Accounting subsytems can also add calls here if they don't
> > _
> > -
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/


	Thanks for your review,
	Balbir Singh,
	Linux Technology Center,
	IBM Software Labs

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2006-05-09 17:15 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-05-02  6:19 [Patch 6/8] delay accounting usage of taskstats interface Balbir Singh
2006-05-04  2:13 ` [Lse-tech] " Jay Lan
2006-05-04  4:23   ` Balbir Singh
2006-05-04 18:44 ` [updated] " Balbir Singh
2006-05-09 11:46   ` Thomas Gleixner
2006-05-09 17:11     ` Balbir Singh

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.