* [PATCH] Performance Stats: Kernel patch
@ 2007-05-11 17:13 Maxim Uvarov
2007-05-12 10:39 ` Andrea Righi
0 siblings, 1 reply; 19+ messages in thread
From: Maxim Uvarov @ 2007-05-11 17:13 UTC (permalink / raw)
To: LKML; +Cc: linuxppc-dev, wli, dada1, pavel, linas, msyrchin
Hello,
Thanks all for very useful comments.
Please review this version.
Best regards,
Maxim.
Patch makes available to the user the following
task and process performance statistics:
* Involuntary Context Switches (task_struct->nivcsw)
* Voluntary Context Switches (task_struct->nvcsw)
* Number of system calls (added new counter
thread_info->sysall_count)
Statistics information is available from:
1. taskstats interface (Documentation/accounting/)
2. /proc/PID/status (task only).
This data is useful for detecting hyperactivity
patterns between processes.
Signed-off-by: Maxim Uvarov <muvarov@ru.mvista.com>
---
Documentation/accounting/getdelays.c | 20 ++++++++++++++++++--
Documentation/accounting/taskstats-struct.txt | 7 +++++++
arch/i386/kernel/asm-offsets.c | 1 +
arch/i386/kernel/entry.S | 3 +++
arch/powerpc/kernel/asm-offsets.c | 2 ++
arch/powerpc/kernel/entry_32.S | 5 +++++
arch/powerpc/kernel/entry_64.S | 5 +++++
arch/x86_64/kernel/asm-offsets.c | 1 +
arch/x86_64/kernel/entry.S | 3 +++
fs/proc/array.c | 14 ++++++++++++++
include/asm-i386/thread_info.h | 1 +
include/asm-powerpc/thread_info.h | 1 +
include/asm-x86_64/thread_info.h | 1 +
include/linux/taskstats.h | 6 +++++-
kernel/fork.c | 3 +++
kernel/taskstats.c | 6 ++++++
16 files changed, 76 insertions(+), 3 deletions(-)
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index e9126e7..1be7d65 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -49,6 +49,7 @@ char name[100];
int dbg;
int print_delays;
int print_io_accounting;
+int print_task_stats;
__u64 stime, utime;
#define PRINTF(fmt, arg...) { \
@@ -187,7 +188,7 @@ void print_delayacct(struct taskstats *t)
"IO %15s%15s\n"
" %15llu%15llu\n"
"MEM %15s%15s\n"
- " %15llu%15llu\n\n",
+ " %15llu%15llu\n"
"count", "real total", "virtual total", "delay total",
t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
t->cpu_delay_total,
@@ -196,6 +197,15 @@ void print_delayacct(struct taskstats *t)
"count", "delay total", t->swapin_count, t->swapin_delay_total);
}
+void print_taskstats(struct taskstats *t)
+{
+ printf("\n\nTask %15s%15s%15s\n"
+ " %15lu%15lu%15lu\n",
+ "syscalls", "voluntary", "nonvoluntary",
+ t->syscall_counter, t->nvcsw, t->nivcsw);
+
+}
+
void print_ioacct(struct taskstats *t)
{
printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
@@ -227,7 +237,7 @@ int main(int argc, char *argv[])
struct msgtemplate msg;
while (1) {
- c = getopt(argc, argv, "diw:r:m:t:p:v:l");
+ c = getopt(argc, argv, "qdiw:r:m:t:p:v:l");
if (c < 0)
break;
@@ -240,6 +250,10 @@ int main(int argc, char *argv[])
printf("printing IO accounting\n");
print_io_accounting = 1;
break;
+ case 'q':
+ printf("printing task/process stasistics:\n");
+ print_task_stats = 1;
+ break;
case 'w':
strncpy(logfile, optarg, MAX_FILENAME);
printf("write to file %s\n", logfile);
@@ -381,6 +395,8 @@ int main(int argc, char *argv[])
print_delayacct((struct taskstats *) NLA_DATA(na));
if (print_io_accounting)
print_ioacct((struct taskstats *) NLA_DATA(na));
+ if (print_task_stats)
+ print_taskstats((struct taskstats *) NLA_DATA(na));
if (fd) {
if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
err(1,"write error\n");
diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt
index 661c797..606aef6 100644
--- a/Documentation/accounting/taskstats-struct.txt
+++ b/Documentation/accounting/taskstats-struct.txt
@@ -22,6 +22,8 @@ There are three different groups of fields in the struct taskstats:
/* Extended accounting fields end */
Their values are collected if CONFIG_TASK_XACCT is set.
+4) Per-task and per-thread statistics
+
Future extension should add fields to the end of the taskstats struct, and
should not change the relative position of each field within the struct.
@@ -158,4 +160,9 @@ struct taskstats {
/* Extended accounting fields end */
+4) Per-task and per-thread statiscits
+ __u64 syscall_counter; /* Syscall counter */
+ __u64 nvcsw; /* Context voluntary switch counter */
+ __u64 nivcsw; /* Context involuntary switch counter */
+
}
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
index 1b2f3cd..4ad49d2 100644
--- a/arch/i386/kernel/asm-offsets.c
+++ b/arch/i386/kernel/asm-offsets.c
@@ -56,6 +56,7 @@ void foo(void)
OFFSET(TI_addr_limit, thread_info, addr_limit);
OFFSET(TI_restart_block, thread_info, restart_block);
OFFSET(TI_sysenter_return, thread_info, sysenter_return);
+ OFFSET(TI_syscall_count, thread_info, syscall_count);
BLANK();
OFFSET(GDS_size, Xgt_desc_struct, size);
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 5e47683..836961f 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -332,6 +332,9 @@ sysenter_past_esp:
SAVE_ALL
GET_THREAD_INFO(%ebp)
+#ifdef CONFIG_TASKSTATS
+ incl TI_syscall_count(%ebp) # Increment syscalls counter
+#endif
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
jnz syscall_trace_entry
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 030d300..b640039 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -94,6 +94,8 @@ int main(void)
DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
DEFINE(TI_TASK, offsetof(struct thread_info, task));
+ DEFINE(TI_SYSCALL_COUNT, offsetof(struct thread_info, syscall_count));
+
#ifdef CONFIG_PPC32
DEFINE(TI_EXECDOMAIN, offsetof(struct thread_info, exec_domain));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index c03e829..5d919e4 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -202,6 +202,11 @@ _GLOBAL(DoSyscall)
bl do_show_syscall
#endif /* SHOW_SYSCALLS */
rlwinm r10,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */
+#ifdef CONFIG_TASKSTATS
+ lwz r11,TI_SYSC_CNT(r10)
+ addi r11,r11,1
+ stw r11,TI_SYSC_CNT(r10)
+#endif
lwz r11,TI_FLAGS(r10)
andi. r11,r11,_TIF_SYSCALL_T_OR_A
bne- syscall_dotrace
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2551c08..5907f76 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -115,6 +115,11 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
addi r9,r1,STACK_FRAME_OVERHEAD
#endif
clrrdi r11,r1,THREAD_SHIFT
+#ifdef CONFIG_TASKSTATS
+ ld r10,TI_SYSCALL_COUNT(r11)
+ addi r10,r10,1
+ std r10,TI_SYSCALL_COUNT(r11)
+#endif
ld r10,TI_FLAGS(r11)
andi. r11,r10,_TIF_SYSCALL_T_OR_A
bne- syscall_dotrace
diff --git a/arch/x86_64/kernel/asm-offsets.c b/arch/x86_64/kernel/asm-offsets.c
index 96687e2..da57356 100644
--- a/arch/x86_64/kernel/asm-offsets.c
+++ b/arch/x86_64/kernel/asm-offsets.c
@@ -35,6 +35,7 @@ int main(void)
ENTRY(addr_limit);
ENTRY(preempt_count);
ENTRY(status);
+ ENTRY(syscall_count);
BLANK();
#undef ENTRY
#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 9f5dac6..af40ead 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -229,6 +229,9 @@ ENTRY(system_call)
movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET
GET_THREAD_INFO(%rcx)
+#ifdef CONFIG_TASKSTATS
+ addq $1, threadinfo_syscall_count(%rcx) # Increment syscalls counter
+#endif
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
jnz tracesys
cmpq $__NR_syscall_max,%rax
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 70e4fab..c805c08 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -290,6 +290,19 @@ static inline char *task_cap(struct task_struct *p, char *buffer)
cap_t(p->cap_permitted),
cap_t(p->cap_effective));
}
+static inline char *task_perf(struct task_struct *p, char *buffer)
+{
+ /* Syscall counter adds 1 line overhead on each syscall execution
+ * in entry.S, so probably it is the leave this stuff under ifdefs.
+ */
+#ifdef CONFIG_TASKSTATS
+ buffer += sprintf(buffer, "Syscalls:\t%lu\n", p->thread_info->syscall_count);
+#endif
+ return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n"
+ "nonvoluntary_ctxt_switches:\t%lu\n",
+ p->nvcsw,
+ p->nivcsw);
+}
int proc_pid_status(struct task_struct *task, char * buffer)
{
@@ -309,6 +322,7 @@ int proc_pid_status(struct task_struct *task, char * buffer)
#if defined(CONFIG_S390)
buffer = task_show_regs(task, buffer);
#endif
+ buffer = task_perf(task, buffer);
return buffer - orig;
}
diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h
index 4b187bb..bccfd6a 100644
--- a/include/asm-i386/thread_info.h
+++ b/include/asm-i386/thread_info.h
@@ -33,6 +33,7 @@ struct thread_info {
int preempt_count; /* 0 => preemptable, <0 => BUG */
+ unsigned long syscall_count; /* Syscall counter */
mm_segment_t addr_limit; /* thread address space:
0-0xBFFFFFFF for user-thead
0-0xFFFFFFFF for kernel-thread
diff --git a/include/asm-powerpc/thread_info.h b/include/asm-powerpc/thread_info.h
index 3f32ca8..5306ac2 100644
--- a/include/asm-powerpc/thread_info.h
+++ b/include/asm-powerpc/thread_info.h
@@ -35,6 +35,7 @@ struct thread_info {
int cpu; /* cpu we're on */
int preempt_count; /* 0 => preemptable,
<0 => BUG */
+ unsigned long syscall_count; /* Syscall counter */
struct restart_block restart_block;
unsigned long local_flags; /* private flags for thread */
diff --git a/include/asm-x86_64/thread_info.h b/include/asm-x86_64/thread_info.h
index 74a6c74..e53022d 100644
--- a/include/asm-x86_64/thread_info.h
+++ b/include/asm-x86_64/thread_info.h
@@ -31,6 +31,7 @@ struct thread_info {
__u32 cpu; /* current CPU */
int preempt_count; /* 0 => preemptable, <0 => BUG */
+ unsigned long syscall_count; /* Syscall counter */
mm_segment_t addr_limit;
struct restart_block restart_block;
};
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 3fced47..98dfde7 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -31,7 +31,7 @@
*/
-#define TASKSTATS_VERSION 3
+#define TASKSTATS_VERSION 4
#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
* in linux/sched.h */
@@ -141,6 +141,10 @@ struct taskstats {
__u64 write_syscalls; /* write syscalls */
/* Extended accounting fields end */
+ __u64 syscall_counter; /* Syscall counter */
+ __u64 nvcsw;
+ __u64 nivcsw;
+
#define TASKSTATS_HAS_IO_ACCOUNTING
/* Per-task storage I/O accounting starts */
__u64 read_bytes; /* bytes of read I/O */
diff --git a/kernel/fork.c b/kernel/fork.c
index fc723e5..5213738 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1042,6 +1042,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->wchar = 0; /* I/O counter: bytes written */
p->syscr = 0; /* I/O counter: read syscalls */
p->syscw = 0; /* I/O counter: write syscalls */
+#ifdef CONFIG_TASKSTATS
+ p->thread_info->syscall_count = 0; /* Syscall counter: total numbers of syscalls */
+#endif
task_io_accounting_init(p);
acct_clear_integrals(p);
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 4c3476f..d7bf33f 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -196,6 +196,9 @@ static int fill_pid(pid_t pid, struct task_struct *tsk,
/* fill in basic acct fields */
stats->version = TASKSTATS_VERSION;
+ stats->syscall_counter = tsk->thread_info->syscall_count;
+ stats->nvcsw = tsk->nvcsw;
+ stats->nivcsw = tsk->nivcsw;
bacct_add_tsk(stats, tsk);
/* fill in extended acct fields */
@@ -242,6 +245,9 @@ static int fill_tgid(pid_t tgid, struct task_struct *first,
*/
delayacct_add_tsk(stats, tsk);
+ stats->syscall_counter += tsk->thread_info->syscall_count;
+ stats->nvcsw += tsk->nvcsw;
+ stats->nivcsw += tsk->nivcsw;
} while_each_thread(first, tsk);
unlock_task_sighand(first, &flags);
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH] Performance Stats: Kernel patch
2007-05-11 17:13 [PATCH] Performance Stats: Kernel patch Maxim Uvarov
@ 2007-05-12 10:39 ` Andrea Righi
0 siblings, 0 replies; 19+ messages in thread
From: Andrea Righi @ 2007-05-12 10:39 UTC (permalink / raw)
To: Maxim Uvarov; +Cc: LKML, linuxppc-dev, wli, dada1, pavel, linas, msyrchin
Maxim Uvarov wrote:
> diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt
> index 661c797..606aef6 100644
> --- a/Documentation/accounting/taskstats-struct.txt
> +++ b/Documentation/accounting/taskstats-struct.txt
> @@ -22,6 +22,8 @@ There are three different groups of fields in the struct taskstats:
> /* Extended accounting fields end */
> Their values are collected if CONFIG_TASK_XACCT is set.
>
> +4) Per-task and per-thread statistics
> +
> Future extension should add fields to the end of the taskstats struct, and
> should not change the relative position of each field within the struct.
>
> @@ -158,4 +160,9 @@ struct taskstats {
>
> /* Extended accounting fields end */
>
> +4) Per-task and per-thread statiscits
...a small typo in the documentation: s/statiscits/statistics/
-Andrea
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH] Performance Stats: Kernel patch
@ 2007-06-05 14:43 Maxim Uvarov
2007-06-06 6:38 ` Andrew Morton
0 siblings, 1 reply; 19+ messages in thread
From: Maxim Uvarov @ 2007-06-05 14:43 UTC (permalink / raw)
To: LKML
Changes:
Names are renamed to task_context_switch_rates.
Patch makes available to the user the following
task and process performance statistics:
* Involuntary Context Switches (task_struct->nivcsw)
* Voluntary Context Switches (task_struct->nvcsw)
Statistics information is available from:
1. taskstats interface (Documentation/accounting/)
2. /proc/PID/status (task only).
This data is useful for detecting hyperactivity
patterns between processes.
Signed-off-by: Maxim Uvarov <muvarov@ru.mvista.com>
---
Documentation/accounting/getdelays.c | 19 +++++++++++++++++--
Documentation/accounting/taskstats-struct.txt | 6 ++++++
fs/proc/array.c | 8 ++++++++
include/linux/taskstats.h | 5 ++++-
kernel/taskstats.c | 4 ++++
5 files changed, 39 insertions(+), 3 deletions(-)
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index e9126e7..fcfe796 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -49,6 +49,7 @@ char name[100];
int dbg;
int print_delays;
int print_io_accounting;
+int print_task_context_switch_rates;
__u64 stime, utime;
#define PRINTF(fmt, arg...) { \
@@ -187,7 +188,7 @@ void print_delayacct(struct taskstats *t)
"IO %15s%15s\n"
" %15llu%15llu\n"
"MEM %15s%15s\n"
- " %15llu%15llu\n\n",
+ " %15llu%15llu\n"
"count", "real total", "virtual total", "delay total",
t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
t->cpu_delay_total,
@@ -196,6 +197,14 @@ void print_delayacct(struct taskstats *t)
"count", "delay total", t->swapin_count, t->swapin_delay_total);
}
+void task_context_switch_rates(struct taskstats *t)
+{
+ printf("\n\nTask %15s%15s\n"
+ " %15lu%15lu\n",
+ "voluntary", "nonvoluntary",
+ t->nvcsw, t->nivcsw);
+}
+
void print_ioacct(struct taskstats *t)
{
printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
@@ -227,7 +236,7 @@ int main(int argc, char *argv[])
struct msgtemplate msg;
while (1) {
- c = getopt(argc, argv, "diw:r:m:t:p:v:l");
+ c = getopt(argc, argv, "qdiw:r:m:t:p:v:l");
if (c < 0)
break;
@@ -240,6 +249,10 @@ int main(int argc, char *argv[])
printf("printing IO accounting\n");
print_io_accounting = 1;
break;
+ case 'q':
+ printf("printing task/process context switch rates\n");
+ print_task_context_switch_rates = 1;
+ break;
case 'w':
strncpy(logfile, optarg, MAX_FILENAME);
printf("write to file %s\n", logfile);
@@ -381,6 +394,8 @@ int main(int argc, char *argv[])
print_delayacct((struct taskstats *) NLA_DATA(na));
if (print_io_accounting)
print_ioacct((struct taskstats *) NLA_DATA(na));
+ if (print_task_context_switch_rates)
+ task_context_switch_rates((struct taskstats *) NLA_DATA(na));
if (fd) {
if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
err(1,"write error\n");
diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt
index 661c797..dc2a7a1 100644
--- a/Documentation/accounting/taskstats-struct.txt
+++ b/Documentation/accounting/taskstats-struct.txt
@@ -22,6 +22,8 @@ There are three different groups of fields in the struct taskstats:
/* Extended accounting fields end */
Their values are collected if CONFIG_TASK_XACCT is set.
+4) Per-task and per-thread context switch rates statistics
+
Future extension should add fields to the end of the taskstats struct, and
should not change the relative position of each field within the struct.
@@ -158,4 +160,8 @@ struct taskstats {
/* Extended accounting fields end */
+4) Per-task and per-thread statistics
+ __u64 nvcsw; /* Context voluntary switch counter */
+ __u64 nivcsw; /* Context involuntary switch counter */
+
}
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 70e4fab..cf406e8 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -290,6 +290,13 @@ static inline char *task_cap(struct task_struct *p, char *buffer)
cap_t(p->cap_permitted),
cap_t(p->cap_effective));
}
+static inline char *task_context_switch_rates(struct task_struct *p, char *buffer)
+{
+ return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n"
+ "nonvoluntary_ctxt_switches:\t%lu\n",
+ p->nvcsw,
+ p->nivcsw);
+}
int proc_pid_status(struct task_struct *task, char * buffer)
{
@@ -309,6 +316,7 @@ int proc_pid_status(struct task_struct *task, char * buffer)
#if defined(CONFIG_S390)
buffer = task_show_regs(task, buffer);
#endif
+ buffer = task_context_switch_rates(task, buffer);
return buffer - orig;
}
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 3fced47..6927f81 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -31,7 +31,7 @@
*/
-#define TASKSTATS_VERSION 3
+#define TASKSTATS_VERSION 4
#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
* in linux/sched.h */
@@ -146,6 +146,9 @@ struct taskstats {
__u64 read_bytes; /* bytes of read I/O */
__u64 write_bytes; /* bytes of write I/O */
__u64 cancelled_write_bytes; /* bytes of cancelled write I/O */
+
+ __u64 nvcsw; /* voluntary_ctxt_switches */
+ __u64 nivcsw; /* nonvoluntary_ctxt_switches */
};
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 4c3476f..e5bc666 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -196,6 +196,8 @@ static int fill_pid(pid_t pid, struct task_struct *tsk,
/* fill in basic acct fields */
stats->version = TASKSTATS_VERSION;
+ stats->nvcsw = tsk->nvcsw;
+ stats->nivcsw = tsk->nivcsw;
bacct_add_tsk(stats, tsk);
/* fill in extended acct fields */
@@ -242,6 +244,8 @@ static int fill_tgid(pid_t tgid, struct task_struct *first,
*/
delayacct_add_tsk(stats, tsk);
+ stats->nvcsw += tsk->nvcsw;
+ stats->nivcsw += tsk->nivcsw;
} while_each_thread(first, tsk);
unlock_task_sighand(first, &flags);
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH] Performance Stats: Kernel patch
2007-06-05 14:43 Maxim Uvarov
@ 2007-06-06 6:38 ` Andrew Morton
2007-06-06 17:29 ` Jay Lan
0 siblings, 1 reply; 19+ messages in thread
From: Andrew Morton @ 2007-06-06 6:38 UTC (permalink / raw)
To: Maxim Uvarov; +Cc: LKML, Shailabh Nagar, Balbir Singh, Jay Lan, Jonathan Lim
On Tue, 05 Jun 2007 14:43:50 +0000 Maxim Uvarov <muvarov@ru.mvista.com> wrote:
> Patch makes available to the user the following
> task and process performance statistics:
> * Involuntary Context Switches (task_struct->nivcsw)
> * Voluntary Context Switches (task_struct->nvcsw)
>
> Statistics information is available from:
> 1. taskstats interface (Documentation/accounting/)
> 2. /proc/PID/status (task only).
>
> This data is useful for detecting hyperactivity
> patterns between processes.
> Signed-off-by: Maxim Uvarov <muvarov@ru.mvista.com>
Thanks.
Please retain suitable cc's when discussing a patch. I keep on having to
find your updates on the mailing list, and then adding all the same cc's
when I reply.
Your patch seemed to be against some horridly prehistoric kernel, so I had
a few things to fix up.
I disagree with the term "rates" for these things. They are counters, not
rates. Adjustments are below.
We still need to think about whether we missed any other fields.
diff -puN Documentation/accounting/getdelays.c~taskstats-add-context-switch-counters-fix Documentation/accounting/getdelays.c
--- a/Documentation/accounting/getdelays.c~taskstats-add-context-switch-counters-fix
+++ a/Documentation/accounting/getdelays.c
@@ -49,7 +49,7 @@ char name[100];
int dbg;
int print_delays;
int print_io_accounting;
-int print_task_context_switch_rates;
+int print_task_context_switch_counts;
__u64 stime, utime;
#define PRINTF(fmt, arg...) { \
@@ -205,7 +205,7 @@ void print_delayacct(struct taskstats *t
"count", "delay total", t->swapin_count, t->swapin_delay_total);
}
-void task_context_switch_rates(struct taskstats *t)
+void task_context_switch_counts(struct taskstats *t)
{
printf("\n\nTask %15s%15s\n"
" %15lu%15lu\n",
@@ -259,7 +259,7 @@ int main(int argc, char *argv[])
break;
case 'q':
printf("printing task/process context switch rates\n");
- print_task_context_switch_rates = 1;
+ print_task_context_switch_counts = 1;
break;
case 'w':
logfile = strdup(optarg);
@@ -402,8 +402,8 @@ int main(int argc, char *argv[])
print_delayacct((struct taskstats *) NLA_DATA(na));
if (print_io_accounting)
print_ioacct((struct taskstats *) NLA_DATA(na));
- if (print_task_context_switch_rates)
- task_context_switch_rates((struct taskstats *) NLA_DATA(na));
+ if (print_task_context_switch_counts)
+ task_context_switch_counts((struct taskstats *) NLA_DATA(na));
if (fd) {
if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
err(1,"write error\n");
diff -puN Documentation/accounting/taskstats-struct.txt~taskstats-add-context-switch-counters-fix Documentation/accounting/taskstats-struct.txt
--- a/Documentation/accounting/taskstats-struct.txt~taskstats-add-context-switch-counters-fix
+++ a/Documentation/accounting/taskstats-struct.txt
@@ -22,7 +22,7 @@ There are three different groups of fiel
/* Extended accounting fields end */
Their values are collected if CONFIG_TASK_XACCT is set.
-4) Per-task and per-thread context switch rates statistics
+4) Per-task and per-thread context switch count statistics
Future extension should add fields to the end of the taskstats struct, and
should not change the relative position of each field within the struct.
diff -puN fs/proc/array.c~taskstats-add-context-switch-counters-fix fs/proc/array.c
--- a/fs/proc/array.c~taskstats-add-context-switch-counters-fix
+++ a/fs/proc/array.c
@@ -290,7 +290,9 @@ static inline char *task_cap(struct task
cap_t(p->cap_permitted),
cap_t(p->cap_effective));
}
-static inline char *task_context_switch_rates(struct task_struct *p, char *buffer)
+
+static inline char *task_context_switch_counts(struct task_struct *p,
+ char *buffer)
{
return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n"
"nonvoluntary_ctxt_switches:\t%lu\n",
@@ -316,7 +318,7 @@ int proc_pid_status(struct task_struct *
#if defined(CONFIG_S390)
buffer = task_show_regs(task, buffer);
#endif
- buffer = task_context_switch_rates(task, buffer);
+ buffer = task_context_switch_counts(task, buffer);
return buffer - orig;
}
_
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH] Performance Stats: Kernel patch
2007-06-06 6:38 ` Andrew Morton
@ 2007-06-06 17:29 ` Jay Lan
0 siblings, 0 replies; 19+ messages in thread
From: Jay Lan @ 2007-06-06 17:29 UTC (permalink / raw)
To: Andrew Morton
Cc: Maxim Uvarov, LKML, Shailabh Nagar, Balbir Singh, Jay Lan,
Jonathan Lim
Andrew Morton wrote:
> On Tue, 05 Jun 2007 14:43:50 +0000 Maxim Uvarov <muvarov@ru.mvista.com> wrote:
>
>> Patch makes available to the user the following
>> task and process performance statistics:
>> * Involuntary Context Switches (task_struct->nivcsw)
>> * Voluntary Context Switches (task_struct->nvcsw)
>>
>> Statistics information is available from:
>> 1. taskstats interface (Documentation/accounting/)
>> 2. /proc/PID/status (task only).
>>
>> This data is useful for detecting hyperactivity
>> patterns between processes.
>> Signed-off-by: Maxim Uvarov <muvarov@ru.mvista.com>
>
> Thanks.
>
> Please retain suitable cc's when discussing a patch. I keep on having to
> find your updates on the mailing list, and then adding all the same cc's
> when I reply.
>
> Your patch seemed to be against some horridly prehistoric kernel, so I had
> a few things to fix up.
>
> I disagree with the term "rates" for these things. They are counters, not
> rates. Adjustments are below.
>
> We still need to think about whether we missed any other fields.
My initial cut of bringing in general accounting fields was to cover
the "struct acct" in linux/acct.h. It was done so that at least data
needed by BSD accounting is in "struct taskstats".
We need to poll other interested parties into this subject, or we can
add more fields later when users of other accounting fields decide
to take advantage of the taskstats interface.
Thanks,
- jay
>
>
> diff -puN Documentation/accounting/getdelays.c~taskstats-add-context-switch-counters-fix Documentation/accounting/getdelays.c
> --- a/Documentation/accounting/getdelays.c~taskstats-add-context-switch-counters-fix
> +++ a/Documentation/accounting/getdelays.c
> @@ -49,7 +49,7 @@ char name[100];
> int dbg;
> int print_delays;
> int print_io_accounting;
> -int print_task_context_switch_rates;
> +int print_task_context_switch_counts;
> __u64 stime, utime;
>
> #define PRINTF(fmt, arg...) { \
> @@ -205,7 +205,7 @@ void print_delayacct(struct taskstats *t
> "count", "delay total", t->swapin_count, t->swapin_delay_total);
> }
>
> -void task_context_switch_rates(struct taskstats *t)
> +void task_context_switch_counts(struct taskstats *t)
> {
> printf("\n\nTask %15s%15s\n"
> " %15lu%15lu\n",
> @@ -259,7 +259,7 @@ int main(int argc, char *argv[])
> break;
> case 'q':
> printf("printing task/process context switch rates\n");
> - print_task_context_switch_rates = 1;
> + print_task_context_switch_counts = 1;
> break;
> case 'w':
> logfile = strdup(optarg);
> @@ -402,8 +402,8 @@ int main(int argc, char *argv[])
> print_delayacct((struct taskstats *) NLA_DATA(na));
> if (print_io_accounting)
> print_ioacct((struct taskstats *) NLA_DATA(na));
> - if (print_task_context_switch_rates)
> - task_context_switch_rates((struct taskstats *) NLA_DATA(na));
> + if (print_task_context_switch_counts)
> + task_context_switch_counts((struct taskstats *) NLA_DATA(na));
> if (fd) {
> if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
> err(1,"write error\n");
> diff -puN Documentation/accounting/taskstats-struct.txt~taskstats-add-context-switch-counters-fix Documentation/accounting/taskstats-struct.txt
> --- a/Documentation/accounting/taskstats-struct.txt~taskstats-add-context-switch-counters-fix
> +++ a/Documentation/accounting/taskstats-struct.txt
> @@ -22,7 +22,7 @@ There are three different groups of fiel
> /* Extended accounting fields end */
> Their values are collected if CONFIG_TASK_XACCT is set.
>
> -4) Per-task and per-thread context switch rates statistics
> +4) Per-task and per-thread context switch count statistics
>
> Future extension should add fields to the end of the taskstats struct, and
> should not change the relative position of each field within the struct.
> diff -puN fs/proc/array.c~taskstats-add-context-switch-counters-fix fs/proc/array.c
> --- a/fs/proc/array.c~taskstats-add-context-switch-counters-fix
> +++ a/fs/proc/array.c
> @@ -290,7 +290,9 @@ static inline char *task_cap(struct task
> cap_t(p->cap_permitted),
> cap_t(p->cap_effective));
> }
> -static inline char *task_context_switch_rates(struct task_struct *p, char *buffer)
> +
> +static inline char *task_context_switch_counts(struct task_struct *p,
> + char *buffer)
> {
> return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n"
> "nonvoluntary_ctxt_switches:\t%lu\n",
> @@ -316,7 +318,7 @@ int proc_pid_status(struct task_struct *
> #if defined(CONFIG_S390)
> buffer = task_show_regs(task, buffer);
> #endif
> - buffer = task_context_switch_rates(task, buffer);
> + buffer = task_context_switch_counts(task, buffer);
> return buffer - orig;
> }
>
> _
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH] Performance Stats: Kernel patch
@ 2007-05-30 18:49 Maxim Uvarov
2007-06-04 19:19 ` Andrew Morton
0 siblings, 1 reply; 19+ messages in thread
From: Maxim Uvarov @ 2007-05-30 18:49 UTC (permalink / raw)
To: LKML
Removed syscall counters from patch.
Patch makes available to the user the following
task and process performance statistics:
* Involuntary Context Switches (task_struct->nivcsw)
* Voluntary Context Switches (task_struct->nvcsw)
Statistics information is available from:
1. taskstats interface (Documentation/accounting/)
2. /proc/PID/status (task only).
This data is useful for detecting hyperactivity
patterns between processes.
Signed-off-by: Maxim Uvarov <muvarov@ru.mvista.com>
---
Documentation/accounting/getdelays.c | 19 +++++++++++++++++--
Documentation/accounting/taskstats-struct.txt | 6 ++++++
fs/proc/array.c | 8 ++++++++
include/linux/taskstats.h | 5 ++++-
kernel/taskstats.c | 4 ++++
5 files changed, 39 insertions(+), 3 deletions(-)
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index e9126e7..18d22ad 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -49,6 +49,7 @@ char name[100];
int dbg;
int print_delays;
int print_io_accounting;
+int print_task_stats;
__u64 stime, utime;
#define PRINTF(fmt, arg...) { \
@@ -187,7 +188,7 @@ void print_delayacct(struct taskstats *t)
"IO %15s%15s\n"
" %15llu%15llu\n"
"MEM %15s%15s\n"
- " %15llu%15llu\n\n",
+ " %15llu%15llu\n"
"count", "real total", "virtual total", "delay total",
t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
t->cpu_delay_total,
@@ -196,6 +197,14 @@ void print_delayacct(struct taskstats *t)
"count", "delay total", t->swapin_count, t->swapin_delay_total);
}
+void print_taskstats(struct taskstats *t)
+{
+ printf("\n\nTask %15s%15s\n"
+ " %15lu%15lu\n",
+ "voluntary", "nonvoluntary",
+ t->nvcsw, t->nivcsw);
+}
+
void print_ioacct(struct taskstats *t)
{
printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
@@ -227,7 +236,7 @@ int main(int argc, char *argv[])
struct msgtemplate msg;
while (1) {
- c = getopt(argc, argv, "diw:r:m:t:p:v:l");
+ c = getopt(argc, argv, "qdiw:r:m:t:p:v:l");
if (c < 0)
break;
@@ -240,6 +249,10 @@ int main(int argc, char *argv[])
printf("printing IO accounting\n");
print_io_accounting = 1;
break;
+ case 'q':
+ printf("printing task/process stasistics:\n");
+ print_task_stats = 1;
+ break;
case 'w':
strncpy(logfile, optarg, MAX_FILENAME);
printf("write to file %s\n", logfile);
@@ -381,6 +394,8 @@ int main(int argc, char *argv[])
print_delayacct((struct taskstats *) NLA_DATA(na));
if (print_io_accounting)
print_ioacct((struct taskstats *) NLA_DATA(na));
+ if (print_task_stats)
+ print_taskstats((struct taskstats *) NLA_DATA(na));
if (fd) {
if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
err(1,"write error\n");
diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt
index 661c797..c3ae6a9 100644
--- a/Documentation/accounting/taskstats-struct.txt
+++ b/Documentation/accounting/taskstats-struct.txt
@@ -22,6 +22,8 @@ There are three different groups of fields in the struct taskstats:
/* Extended accounting fields end */
Their values are collected if CONFIG_TASK_XACCT is set.
+4) Per-task and per-thread statistics
+
Future extension should add fields to the end of the taskstats struct, and
should not change the relative position of each field within the struct.
@@ -158,4 +160,8 @@ struct taskstats {
/* Extended accounting fields end */
+4) Per-task and per-thread statistics
+ __u64 nvcsw; /* Context voluntary switch counter */
+ __u64 nivcsw; /* Context involuntary switch counter */
+
}
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 70e4fab..52e2bd9 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -290,6 +290,13 @@ static inline char *task_cap(struct task_struct *p, char *buffer)
cap_t(p->cap_permitted),
cap_t(p->cap_effective));
}
+static inline char *task_perf(struct task_struct *p, char *buffer)
+{
+ return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n"
+ "nonvoluntary_ctxt_switches:\t%lu\n",
+ p->nvcsw,
+ p->nivcsw);
+}
int proc_pid_status(struct task_struct *task, char * buffer)
{
@@ -309,6 +316,7 @@ int proc_pid_status(struct task_struct *task, char * buffer)
#if defined(CONFIG_S390)
buffer = task_show_regs(task, buffer);
#endif
+ buffer = task_perf(task, buffer);
return buffer - orig;
}
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 3fced47..6927f81 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -31,7 +31,7 @@
*/
-#define TASKSTATS_VERSION 3
+#define TASKSTATS_VERSION 4
#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
* in linux/sched.h */
@@ -146,6 +146,9 @@ struct taskstats {
__u64 read_bytes; /* bytes of read I/O */
__u64 write_bytes; /* bytes of write I/O */
__u64 cancelled_write_bytes; /* bytes of cancelled write I/O */
+
+ __u64 nvcsw; /* voluntary_ctxt_switches */
+ __u64 nivcsw; /* nonvoluntary_ctxt_switches */
};
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 4c3476f..e5bc666 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -196,6 +196,8 @@ static int fill_pid(pid_t pid, struct task_struct *tsk,
/* fill in basic acct fields */
stats->version = TASKSTATS_VERSION;
+ stats->nvcsw = tsk->nvcsw;
+ stats->nivcsw = tsk->nivcsw;
bacct_add_tsk(stats, tsk);
/* fill in extended acct fields */
@@ -242,6 +244,8 @@ static int fill_tgid(pid_t tgid, struct task_struct *first,
*/
delayacct_add_tsk(stats, tsk);
+ stats->nvcsw += tsk->nvcsw;
+ stats->nivcsw += tsk->nivcsw;
} while_each_thread(first, tsk);
unlock_task_sighand(first, &flags);
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH] Performance Stats: Kernel patch
2007-05-30 18:49 Maxim Uvarov
@ 2007-06-04 19:19 ` Andrew Morton
2007-06-04 19:33 ` Jay Lan
` (2 more replies)
0 siblings, 3 replies; 19+ messages in thread
From: Andrew Morton @ 2007-06-04 19:19 UTC (permalink / raw)
To: Maxim Uvarov; +Cc: LKML, Shailabh Nagar, Balbir Singh, Jay Lan
On Wed, 30 May 2007 18:49:46 +0000
Maxim Uvarov <muvarov@ru.mvista.com> wrote:
>
> Removed syscall counters from patch.
>
>
>
>
> Patch makes available to the user the following
> task and process performance statistics:
> * Involuntary Context Switches (task_struct->nivcsw)
> * Voluntary Context Switches (task_struct->nvcsw)
>
> Statistics information is available from:
> 1. taskstats interface (Documentation/accounting/)
> 2. /proc/PID/status (task only).
>
> This data is useful for detecting hyperactivity
> patterns between processes.
> Signed-off-by: Maxim Uvarov <muvarov@ru.mvista.com>
>
A few little things:
>
> diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
> index e9126e7..18d22ad 100644
> --- a/Documentation/accounting/getdelays.c
> +++ b/Documentation/accounting/getdelays.c
> @@ -49,6 +49,7 @@ char name[100];
> int dbg;
> int print_delays;
> int print_io_accounting;
> +int print_task_stats;
> __u64 stime, utime;
>
> #define PRINTF(fmt, arg...) { \
> @@ -187,7 +188,7 @@ void print_delayacct(struct taskstats *t)
> "IO %15s%15s\n"
> " %15llu%15llu\n"
> "MEM %15s%15s\n"
> - " %15llu%15llu\n\n",
> + " %15llu%15llu\n"
> "count", "real total", "virtual total", "delay total",
> t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
> t->cpu_delay_total,
> @@ -196,6 +197,14 @@ void print_delayacct(struct taskstats *t)
> "count", "delay total", t->swapin_count, t->swapin_delay_total);
> }
>
> +void print_taskstats(struct taskstats *t)
> +{
> + printf("\n\nTask %15s%15s\n"
> + " %15lu%15lu\n",
> + "voluntary", "nonvoluntary",
> + t->nvcsw, t->nivcsw);
> +}
print_task_stats versus print_taskstats is a bit confusing, but I guess it
doesn't matter.
More significantly, the whole idea of calling it "task stats" isn't a good
one: it's far too general. The whole kernel interface is called taskstats,
but the additions here are a tiny part of that.
Perhaps task_context_switch_rates would be more appropriate, although
rather a lot to type.
> void print_ioacct(struct taskstats *t)
> {
> printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
> @@ -227,7 +236,7 @@ int main(int argc, char *argv[])
> struct msgtemplate msg;
>
> while (1) {
> - c = getopt(argc, argv, "diw:r:m:t:p:v:l");
> + c = getopt(argc, argv, "qdiw:r:m:t:p:v:l");
> if (c < 0)
> break;
>
> @@ -240,6 +249,10 @@ int main(int argc, char *argv[])
> printf("printing IO accounting\n");
> print_io_accounting = 1;
> break;
> + case 'q':
> + printf("printing task/process stasistics:\n");
> + print_task_stats = 1;
> + break;
> case 'w':
> strncpy(logfile, optarg, MAX_FILENAME);
> printf("write to file %s\n", logfile);
> @@ -381,6 +394,8 @@ int main(int argc, char *argv[])
> print_delayacct((struct taskstats *) NLA_DATA(na));
> if (print_io_accounting)
> print_ioacct((struct taskstats *) NLA_DATA(na));
> + if (print_task_stats)
> + print_taskstats((struct taskstats *) NLA_DATA(na));
> if (fd) {
> if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
> err(1,"write error\n");
> diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt
> index 661c797..c3ae6a9 100644
> --- a/Documentation/accounting/taskstats-struct.txt
> +++ b/Documentation/accounting/taskstats-struct.txt
> @@ -22,6 +22,8 @@ There are three different groups of fields in the struct taskstats:
> /* Extended accounting fields end */
> Their values are collected if CONFIG_TASK_XACCT is set.
>
> +4) Per-task and per-thread statistics
> +
> Future extension should add fields to the end of the taskstats struct, and
> should not change the relative position of each field within the struct.
>
> @@ -158,4 +160,8 @@ struct taskstats {
>
> /* Extended accounting fields end */
>
> +4) Per-task and per-thread statistics
> + __u64 nvcsw; /* Context voluntary switch counter */
> + __u64 nivcsw; /* Context involuntary switch counter */
> +
> }
> diff --git a/fs/proc/array.c b/fs/proc/array.c
> index 70e4fab..52e2bd9 100644
> --- a/fs/proc/array.c
> +++ b/fs/proc/array.c
> @@ -290,6 +290,13 @@ static inline char *task_cap(struct task_struct *p, char *buffer)
> cap_t(p->cap_permitted),
> cap_t(p->cap_effective));
> }
> +static inline char *task_perf(struct task_struct *p, char *buffer)
> +{
> + return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n"
> + "nonvoluntary_ctxt_switches:\t%lu\n",
> + p->nvcsw,
> + p->nivcsw);
> +}
And here we call it task_perf, which is inconsistent, and also not very
descriptive. Again, task_context_switch_rates would better.
err, except it's not a "rate". How about task_context_switches?
> int proc_pid_status(struct task_struct *task, char * buffer)
> {
> @@ -309,6 +316,7 @@ int proc_pid_status(struct task_struct *task, char * buffer)
> #if defined(CONFIG_S390)
> buffer = task_show_regs(task, buffer);
> #endif
> + buffer = task_perf(task, buffer);
> return buffer - orig;
> }
>
> diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
> index 3fced47..6927f81 100644
> --- a/include/linux/taskstats.h
> +++ b/include/linux/taskstats.h
> @@ -31,7 +31,7 @@
> */
>
>
> -#define TASKSTATS_VERSION 3
> +#define TASKSTATS_VERSION 4
> #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
> * in linux/sched.h */
>
> @@ -146,6 +146,9 @@ struct taskstats {
> __u64 read_bytes; /* bytes of read I/O */
> __u64 write_bytes; /* bytes of write I/O */
> __u64 cancelled_write_bytes; /* bytes of cancelled write I/O */
> +
> + __u64 nvcsw; /* voluntary_ctxt_switches */
> + __u64 nivcsw; /* nonvoluntary_ctxt_switches */
> };
>
>
> diff --git a/kernel/taskstats.c b/kernel/taskstats.c
> index 4c3476f..e5bc666 100644
> --- a/kernel/taskstats.c
> +++ b/kernel/taskstats.c
> @@ -196,6 +196,8 @@ static int fill_pid(pid_t pid, struct task_struct *tsk,
>
> /* fill in basic acct fields */
> stats->version = TASKSTATS_VERSION;
> + stats->nvcsw = tsk->nvcsw;
> + stats->nivcsw = tsk->nivcsw;
> bacct_add_tsk(stats, tsk);
>
> /* fill in extended acct fields */
> @@ -242,6 +244,8 @@ static int fill_tgid(pid_t tgid, struct task_struct *first,
> */
> delayacct_add_tsk(stats, tsk);
>
> + stats->nvcsw += tsk->nvcsw;
> + stats->nivcsw += tsk->nivcsw;
> } while_each_thread(first, tsk);
>
> unlock_task_sighand(first, &flags);
>
The patch otherwise seems OK. Thoughts:
- Do we need to increment TASKSTATS_VERSION for this? I forget the rules
there.
- The lack of context-switch accounting in taskstats is, I think, a
simple oversight. It should have been included on day one.
There are perhaps other things which _should_ be in taskstats, but we
forgot to add them. Can we think of any such things?
We shouldn't just toss any old random stuff in there: it should be
things which make sense, and which Unix or Linux accounting traditionally
provides, and it should be something which we expect won't suddenly
become unsupportable if people make internal kernel changes.
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH] Performance Stats: Kernel patch
2007-06-04 19:19 ` Andrew Morton
@ 2007-06-04 19:33 ` Jay Lan
2007-06-04 19:49 ` Jonathan Lim
2007-06-04 20:13 ` Jay Lan
2007-06-05 6:50 ` Balbir Singh
2 siblings, 1 reply; 19+ messages in thread
From: Jay Lan @ 2007-06-04 19:33 UTC (permalink / raw)
To: Andrew Morton
Cc: Maxim Uvarov, LKML, Shailabh Nagar, Balbir Singh, Jonathan Lim,
David Wright, John Hesterberg
Add Jonathan Lim to cc, who is working on CSA userland implementation
to use the taskstats data that this patch is going to remove.
Thanks,
- jay
Andrew Morton wrote:
> On Wed, 30 May 2007 18:49:46 +0000
> Maxim Uvarov <muvarov@ru.mvista.com> wrote:
>
>> Removed syscall counters from patch.
>>
>>
>>
>>
>> Patch makes available to the user the following
>> task and process performance statistics:
>> * Involuntary Context Switches (task_struct->nivcsw)
>> * Voluntary Context Switches (task_struct->nvcsw)
>>
>> Statistics information is available from:
>> 1. taskstats interface (Documentation/accounting/)
>> 2. /proc/PID/status (task only).
>>
>> This data is useful for detecting hyperactivity
>> patterns between processes.
>> Signed-off-by: Maxim Uvarov <muvarov@ru.mvista.com>
>>
>
> A few little things:
>
>> diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
>> index e9126e7..18d22ad 100644
>> --- a/Documentation/accounting/getdelays.c
>> +++ b/Documentation/accounting/getdelays.c
>> @@ -49,6 +49,7 @@ char name[100];
>> int dbg;
>> int print_delays;
>> int print_io_accounting;
>> +int print_task_stats;
>> __u64 stime, utime;
>>
>> #define PRINTF(fmt, arg...) { \
>> @@ -187,7 +188,7 @@ void print_delayacct(struct taskstats *t)
>> "IO %15s%15s\n"
>> " %15llu%15llu\n"
>> "MEM %15s%15s\n"
>> - " %15llu%15llu\n\n",
>> + " %15llu%15llu\n"
>> "count", "real total", "virtual total", "delay total",
>> t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
>> t->cpu_delay_total,
>> @@ -196,6 +197,14 @@ void print_delayacct(struct taskstats *t)
>> "count", "delay total", t->swapin_count, t->swapin_delay_total);
>> }
>>
>> +void print_taskstats(struct taskstats *t)
>> +{
>> + printf("\n\nTask %15s%15s\n"
>> + " %15lu%15lu\n",
>> + "voluntary", "nonvoluntary",
>> + t->nvcsw, t->nivcsw);
>> +}
>
> print_task_stats versus print_taskstats is a bit confusing, but I guess it
> doesn't matter.
>
> More significantly, the whole idea of calling it "task stats" isn't a good
> one: it's far too general. The whole kernel interface is called taskstats,
> but the additions here are a tiny part of that.
>
> Perhaps task_context_switch_rates would be more appropriate, although
> rather a lot to type.
>
>> void print_ioacct(struct taskstats *t)
>> {
>> printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
>> @@ -227,7 +236,7 @@ int main(int argc, char *argv[])
>> struct msgtemplate msg;
>>
>> while (1) {
>> - c = getopt(argc, argv, "diw:r:m:t:p:v:l");
>> + c = getopt(argc, argv, "qdiw:r:m:t:p:v:l");
>> if (c < 0)
>> break;
>>
>> @@ -240,6 +249,10 @@ int main(int argc, char *argv[])
>> printf("printing IO accounting\n");
>> print_io_accounting = 1;
>> break;
>> + case 'q':
>> + printf("printing task/process stasistics:\n");
>> + print_task_stats = 1;
>> + break;
>> case 'w':
>> strncpy(logfile, optarg, MAX_FILENAME);
>> printf("write to file %s\n", logfile);
>> @@ -381,6 +394,8 @@ int main(int argc, char *argv[])
>> print_delayacct((struct taskstats *) NLA_DATA(na));
>> if (print_io_accounting)
>> print_ioacct((struct taskstats *) NLA_DATA(na));
>> + if (print_task_stats)
>> + print_taskstats((struct taskstats *) NLA_DATA(na));
>> if (fd) {
>> if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
>> err(1,"write error\n");
>> diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt
>> index 661c797..c3ae6a9 100644
>> --- a/Documentation/accounting/taskstats-struct.txt
>> +++ b/Documentation/accounting/taskstats-struct.txt
>> @@ -22,6 +22,8 @@ There are three different groups of fields in the struct taskstats:
>> /* Extended accounting fields end */
>> Their values are collected if CONFIG_TASK_XACCT is set.
>>
>> +4) Per-task and per-thread statistics
>> +
>> Future extension should add fields to the end of the taskstats struct, and
>> should not change the relative position of each field within the struct.
>>
>> @@ -158,4 +160,8 @@ struct taskstats {
>>
>> /* Extended accounting fields end */
>>
>> +4) Per-task and per-thread statistics
>> + __u64 nvcsw; /* Context voluntary switch counter */
>> + __u64 nivcsw; /* Context involuntary switch counter */
>> +
>> }
>> diff --git a/fs/proc/array.c b/fs/proc/array.c
>> index 70e4fab..52e2bd9 100644
>> --- a/fs/proc/array.c
>> +++ b/fs/proc/array.c
>> @@ -290,6 +290,13 @@ static inline char *task_cap(struct task_struct *p, char *buffer)
>> cap_t(p->cap_permitted),
>> cap_t(p->cap_effective));
>> }
>> +static inline char *task_perf(struct task_struct *p, char *buffer)
>> +{
>> + return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n"
>> + "nonvoluntary_ctxt_switches:\t%lu\n",
>> + p->nvcsw,
>> + p->nivcsw);
>> +}
>
> And here we call it task_perf, which is inconsistent, and also not very
> descriptive. Again, task_context_switch_rates would better.
>
> err, except it's not a "rate". How about task_context_switches?
>
>> int proc_pid_status(struct task_struct *task, char * buffer)
>> {
>> @@ -309,6 +316,7 @@ int proc_pid_status(struct task_struct *task, char * buffer)
>> #if defined(CONFIG_S390)
>> buffer = task_show_regs(task, buffer);
>> #endif
>> + buffer = task_perf(task, buffer);
>> return buffer - orig;
>> }
>>
>> diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
>> index 3fced47..6927f81 100644
>> --- a/include/linux/taskstats.h
>> +++ b/include/linux/taskstats.h
>> @@ -31,7 +31,7 @@
>> */
>>
>>
>> -#define TASKSTATS_VERSION 3
>> +#define TASKSTATS_VERSION 4
>> #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
>> * in linux/sched.h */
>>
>> @@ -146,6 +146,9 @@ struct taskstats {
>> __u64 read_bytes; /* bytes of read I/O */
>> __u64 write_bytes; /* bytes of write I/O */
>> __u64 cancelled_write_bytes; /* bytes of cancelled write I/O */
>> +
>> + __u64 nvcsw; /* voluntary_ctxt_switches */
>> + __u64 nivcsw; /* nonvoluntary_ctxt_switches */
>> };
>>
>>
>> diff --git a/kernel/taskstats.c b/kernel/taskstats.c
>> index 4c3476f..e5bc666 100644
>> --- a/kernel/taskstats.c
>> +++ b/kernel/taskstats.c
>> @@ -196,6 +196,8 @@ static int fill_pid(pid_t pid, struct task_struct *tsk,
>>
>> /* fill in basic acct fields */
>> stats->version = TASKSTATS_VERSION;
>> + stats->nvcsw = tsk->nvcsw;
>> + stats->nivcsw = tsk->nivcsw;
>> bacct_add_tsk(stats, tsk);
>>
>> /* fill in extended acct fields */
>> @@ -242,6 +244,8 @@ static int fill_tgid(pid_t tgid, struct task_struct *first,
>> */
>> delayacct_add_tsk(stats, tsk);
>>
>> + stats->nvcsw += tsk->nvcsw;
>> + stats->nivcsw += tsk->nivcsw;
>> } while_each_thread(first, tsk);
>>
>> unlock_task_sighand(first, &flags);
>>
>
> The patch otherwise seems OK. Thoughts:
>
> - Do we need to increment TASKSTATS_VERSION for this? I forget the rules
> there.
>
> - The lack of context-switch accounting in taskstats is, I think, a
> simple oversight. It should have been included on day one.
>
> There are perhaps other things which _should_ be in taskstats, but we
> forgot to add them. Can we think of any such things?
>
> We shouldn't just toss any old random stuff in there: it should be
> things which make sense, and which Unix or Linux accounting traditionally
> provides, and it should be something which we expect won't suddenly
> become unsupportable if people make internal kernel changes.
>
>
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH] Performance Stats: Kernel patch
2007-06-04 19:33 ` Jay Lan
@ 2007-06-04 19:49 ` Jonathan Lim
0 siblings, 0 replies; 19+ messages in thread
From: Jonathan Lim @ 2007-06-04 19:49 UTC (permalink / raw)
To: Jay Lan
Cc: Andrew Morton, Maxim Uvarov, LKML, Shailabh Nagar, Balbir Singh,
David Wright, jh
Looking at the diffs below, I see context switch counters added. What is
actually being removed?
Jonathan
On Mon Jun 4 12:33:15 2007, jlan@sgi.com wrote:
>
> Add Jonathan Lim to cc, who is working on CSA userland implementation
> to use the taskstats data that this patch is going to remove.
>
> Thanks,
> - jay
>
> Andrew Morton wrote:
> > On Wed, 30 May 2007 18:49:46 +0000
> > Maxim Uvarov <muvarov@ru.mvista.com> wrote:
> >
> >> Removed syscall counters from patch.
> >>
> >> Patch makes available to the user the following
> >> task and process performance statistics:
> >> * Involuntary Context Switches (task_struct->nivcsw)
> >> * Voluntary Context Switches (task_struct->nvcsw)
> >>
> >> Statistics information is available from:
> >> 1. taskstats interface (Documentation/accounting/)
> >> 2. /proc/PID/status (task only).
> >>
> >> This data is useful for detecting hyperactivity
> >> patterns between processes.
> >> Signed-off-by: Maxim Uvarov <muvarov@ru.mvista.com>
> >
> > A few little things:
> >
> >> diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
> >> index e9126e7..18d22ad 100644
> >> --- a/Documentation/accounting/getdelays.c
> >> +++ b/Documentation/accounting/getdelays.c
> >> @@ -49,6 +49,7 @@ char name[100];
> >> int dbg;
> >> int print_delays;
> >> int print_io_accounting;
> >> +int print_task_stats;
> >> __u64 stime, utime;
> >>
> >> #define PRINTF(fmt, arg...) { \
> >> @@ -187,7 +188,7 @@ void print_delayacct(struct taskstats *t)
> >> "IO %15s%15s\n"
> >> " %15llu%15llu\n"
> >> "MEM %15s%15s\n"
> >> - " %15llu%15llu\n\n",
> >> + " %15llu%15llu\n"
> >> "count", "real total", "virtual total", "delay total",
> >> t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
> >> t->cpu_delay_total,
> >> @@ -196,6 +197,14 @@ void print_delayacct(struct taskstats *t)
> >> "count", "delay total", t->swapin_count, t->swapin_delay_total);
> >> }
> >>
> >> +void print_taskstats(struct taskstats *t)
> >> +{
> >> + printf("\n\nTask %15s%15s\n"
> >> + " %15lu%15lu\n",
> >> + "voluntary", "nonvoluntary",
> >> + t->nvcsw, t->nivcsw);
> >> +}
> >
> > print_task_stats versus print_taskstats is a bit confusing, but I guess it
> > doesn't matter.
> >
> > More significantly, the whole idea of calling it "task stats" isn't a good
> > one: it's far too general. The whole kernel interface is called taskstats,
> > but the additions here are a tiny part of that.
> >
> > Perhaps task_context_switch_rates would be more appropriate, although
> > rather a lot to type.
> >
> >> void print_ioacct(struct taskstats *t)
> >> {
> >> printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
> >> @@ -227,7 +236,7 @@ int main(int argc, char *argv[])
> >> struct msgtemplate msg;
> >>
> >> while (1) {
> >> - c = getopt(argc, argv, "diw:r:m:t:p:v:l");
> >> + c = getopt(argc, argv, "qdiw:r:m:t:p:v:l");
> >> if (c < 0)
> >> break;
> >>
> >> @@ -240,6 +249,10 @@ int main(int argc, char *argv[])
> >> printf("printing IO accounting\n");
> >> print_io_accounting = 1;
> >> break;
> >> + case 'q':
> >> + printf("printing task/process stasistics:\n");
> >> + print_task_stats = 1;
> >> + break;
> >> case 'w':
> >> strncpy(logfile, optarg, MAX_FILENAME);
> >> printf("write to file %s\n", logfile);
> >> @@ -381,6 +394,8 @@ int main(int argc, char *argv[])
> >> print_delayacct((struct taskstats *) NLA_DATA(na));
> >> if (print_io_accounting)
> >> print_ioacct((struct taskstats *) NLA_DATA(na));
> >> + if (print_task_stats)
> >> + print_taskstats((struct taskstats *) NLA_DATA(na));
> >> if (fd) {
> >> if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
> >> err(1,"write error\n");
> >> diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt
> >> index 661c797..c3ae6a9 100644
> >> --- a/Documentation/accounting/taskstats-struct.txt
> >> +++ b/Documentation/accounting/taskstats-struct.txt
> >> @@ -22,6 +22,8 @@ There are three different groups of fields in the struct taskstats:
> >> /* Extended accounting fields end */
> >> Their values are collected if CONFIG_TASK_XACCT is set.
> >>
> >> +4) Per-task and per-thread statistics
> >> +
> >> Future extension should add fields to the end of the taskstats struct, and
> >> should not change the relative position of each field within the struct.
> >>
> >> @@ -158,4 +160,8 @@ struct taskstats {
> >>
> >> /* Extended accounting fields end */
> >>
> >> +4) Per-task and per-thread statistics
> >> + __u64 nvcsw; /* Context voluntary switch counter */
> >> + __u64 nivcsw; /* Context involuntary switch counter */
> >> +
> >> }
> >> diff --git a/fs/proc/array.c b/fs/proc/array.c
> >> index 70e4fab..52e2bd9 100644
> >> --- a/fs/proc/array.c
> >> +++ b/fs/proc/array.c
> >> @@ -290,6 +290,13 @@ static inline char *task_cap(struct task_struct *p, char *buffer)
> >> cap_t(p->cap_permitted),
> >> cap_t(p->cap_effective));
> >> }
> >> +static inline char *task_perf(struct task_struct *p, char *buffer)
> >> +{
> >> + return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n"
> >> + "nonvoluntary_ctxt_switches:\t%lu\n",
> >> + p->nvcsw,
> >> + p->nivcsw);
> >> +}
> >
> > And here we call it task_perf, which is inconsistent, and also not very
> > descriptive. Again, task_context_switch_rates would better.
> >
> > err, except it's not a "rate". How about task_context_switches?
> >
> >> int proc_pid_status(struct task_struct *task, char * buffer)
> >> {
> >> @@ -309,6 +316,7 @@ int proc_pid_status(struct task_struct *task, char * buffer)
> >> #if defined(CONFIG_S390)
> >> buffer = task_show_regs(task, buffer);
> >> #endif
> >> + buffer = task_perf(task, buffer);
> >> return buffer - orig;
> >> }
> >>
> >> diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
> >> index 3fced47..6927f81 100644
> >> --- a/include/linux/taskstats.h
> >> +++ b/include/linux/taskstats.h
> >> @@ -31,7 +31,7 @@
> >> */
> >>
> >>
> >> -#define TASKSTATS_VERSION 3
> >> +#define TASKSTATS_VERSION 4
> >> #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
> >> * in linux/sched.h */
> >>
> >> @@ -146,6 +146,9 @@ struct taskstats {
> >> __u64 read_bytes; /* bytes of read I/O */
> >> __u64 write_bytes; /* bytes of write I/O */
> >> __u64 cancelled_write_bytes; /* bytes of cancelled write I/O */
> >> +
> >> + __u64 nvcsw; /* voluntary_ctxt_switches */
> >> + __u64 nivcsw; /* nonvoluntary_ctxt_switches */
> >> };
> >>
> >>
> >> diff --git a/kernel/taskstats.c b/kernel/taskstats.c
> >> index 4c3476f..e5bc666 100644
> >> --- a/kernel/taskstats.c
> >> +++ b/kernel/taskstats.c
> >> @@ -196,6 +196,8 @@ static int fill_pid(pid_t pid, struct task_struct *tsk,
> >>
> >> /* fill in basic acct fields */
> >> stats->version = TASKSTATS_VERSION;
> >> + stats->nvcsw = tsk->nvcsw;
> >> + stats->nivcsw = tsk->nivcsw;
> >> bacct_add_tsk(stats, tsk);
> >>
> >> /* fill in extended acct fields */
> >> @@ -242,6 +244,8 @@ static int fill_tgid(pid_t tgid, struct task_struct *first,
> >> */
> >> delayacct_add_tsk(stats, tsk);
> >>
> >> + stats->nvcsw += tsk->nvcsw;
> >> + stats->nivcsw += tsk->nivcsw;
> >> } while_each_thread(first, tsk);
> >>
> >> unlock_task_sighand(first, &flags);
> >>
> >
> > The patch otherwise seems OK. Thoughts:
> >
> > - Do we need to increment TASKSTATS_VERSION for this? I forget the rules
> > there.
> >
> > - The lack of context-switch accounting in taskstats is, I think, a
> > simple oversight. It should have been included on day one.
> >
> > There are perhaps other things which _should_ be in taskstats, but we
> > forgot to add them. Can we think of any such things?
> >
> > We shouldn't just toss any old random stuff in there: it should be
> > things which make sense, and which Unix or Linux accounting traditionally
> > provides, and it should be something which we expect won't suddenly
> > become unsupportable if people make internal kernel changes.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] Performance Stats: Kernel patch
2007-06-04 19:19 ` Andrew Morton
2007-06-04 19:33 ` Jay Lan
@ 2007-06-04 20:13 ` Jay Lan
2007-06-05 6:50 ` Balbir Singh
2 siblings, 0 replies; 19+ messages in thread
From: Jay Lan @ 2007-06-04 20:13 UTC (permalink / raw)
To: Andrew Morton; +Cc: Maxim Uvarov, LKML, Shailabh Nagar, Balbir Singh, Jay Lan
Andrew Morton wrote:
> On Wed, 30 May 2007 18:49:46 +0000
> Maxim Uvarov <muvarov@ru.mvista.com> wrote:
>
>> Removed syscall counters from patch.
>>
>>
>>
>>
>> Patch makes available to the user the following
>> task and process performance statistics:
>> * Involuntary Context Switches (task_struct->nivcsw)
>> * Voluntary Context Switches (task_struct->nvcsw)
>>
>> Statistics information is available from:
>> 1. taskstats interface (Documentation/accounting/)
>> 2. /proc/PID/status (task only).
>>
>> This data is useful for detecting hyperactivity
>> patterns between processes.
>> Signed-off-by: Maxim Uvarov <muvarov@ru.mvista.com>
>>
>
> A few little things:
>
>> diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
>> index e9126e7..18d22ad 100644
>> --- a/Documentation/accounting/getdelays.c
>> +++ b/Documentation/accounting/getdelays.c
>> @@ -49,6 +49,7 @@ char name[100];
>> int dbg;
>> int print_delays;
>> int print_io_accounting;
>> +int print_task_stats;
>> __u64 stime, utime;
>>
>> #define PRINTF(fmt, arg...) { \
>> @@ -187,7 +188,7 @@ void print_delayacct(struct taskstats *t)
>> "IO %15s%15s\n"
>> " %15llu%15llu\n"
>> "MEM %15s%15s\n"
>> - " %15llu%15llu\n\n",
>> + " %15llu%15llu\n"
>> "count", "real total", "virtual total", "delay total",
>> t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
>> t->cpu_delay_total,
>> @@ -196,6 +197,14 @@ void print_delayacct(struct taskstats *t)
>> "count", "delay total", t->swapin_count, t->swapin_delay_total);
>> }
>>
>> +void print_taskstats(struct taskstats *t)
>> +{
>> + printf("\n\nTask %15s%15s\n"
>> + " %15lu%15lu\n",
>> + "voluntary", "nonvoluntary",
>> + t->nvcsw, t->nivcsw);
>> +}
>
> print_task_stats versus print_taskstats is a bit confusing, but I guess it
> doesn't matter.
>
> More significantly, the whole idea of calling it "task stats" isn't a good
> one: it's far too general. The whole kernel interface is called taskstats,
> but the additions here are a tiny part of that.
>
> Perhaps task_context_switch_rates would be more appropriate, although
> rather a lot to type.
>
>> void print_ioacct(struct taskstats *t)
>> {
>> printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
>> @@ -227,7 +236,7 @@ int main(int argc, char *argv[])
>> struct msgtemplate msg;
>>
>> while (1) {
>> - c = getopt(argc, argv, "diw:r:m:t:p:v:l");
>> + c = getopt(argc, argv, "qdiw:r:m:t:p:v:l");
>> if (c < 0)
>> break;
>>
>> @@ -240,6 +249,10 @@ int main(int argc, char *argv[])
>> printf("printing IO accounting\n");
>> print_io_accounting = 1;
>> break;
>> + case 'q':
>> + printf("printing task/process stasistics:\n");
>> + print_task_stats = 1;
>> + break;
>> case 'w':
>> strncpy(logfile, optarg, MAX_FILENAME);
>> printf("write to file %s\n", logfile);
>> @@ -381,6 +394,8 @@ int main(int argc, char *argv[])
>> print_delayacct((struct taskstats *) NLA_DATA(na));
>> if (print_io_accounting)
>> print_ioacct((struct taskstats *) NLA_DATA(na));
>> + if (print_task_stats)
>> + print_taskstats((struct taskstats *) NLA_DATA(na));
>> if (fd) {
>> if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
>> err(1,"write error\n");
>> diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt
>> index 661c797..c3ae6a9 100644
>> --- a/Documentation/accounting/taskstats-struct.txt
>> +++ b/Documentation/accounting/taskstats-struct.txt
>> @@ -22,6 +22,8 @@ There are three different groups of fields in the struct taskstats:
>> /* Extended accounting fields end */
>> Their values are collected if CONFIG_TASK_XACCT is set.
>>
>> +4) Per-task and per-thread statistics
>> +
>> Future extension should add fields to the end of the taskstats struct, and
>> should not change the relative position of each field within the struct.
>>
>> @@ -158,4 +160,8 @@ struct taskstats {
>>
>> /* Extended accounting fields end */
>>
>> +4) Per-task and per-thread statistics
>> + __u64 nvcsw; /* Context voluntary switch counter */
>> + __u64 nivcsw; /* Context involuntary switch counter */
>> +
>> }
>> diff --git a/fs/proc/array.c b/fs/proc/array.c
>> index 70e4fab..52e2bd9 100644
>> --- a/fs/proc/array.c
>> +++ b/fs/proc/array.c
>> @@ -290,6 +290,13 @@ static inline char *task_cap(struct task_struct *p, char *buffer)
>> cap_t(p->cap_permitted),
>> cap_t(p->cap_effective));
>> }
>> +static inline char *task_perf(struct task_struct *p, char *buffer)
>> +{
>> + return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n"
>> + "nonvoluntary_ctxt_switches:\t%lu\n",
>> + p->nvcsw,
>> + p->nivcsw);
>> +}
>
> And here we call it task_perf, which is inconsistent, and also not very
> descriptive. Again, task_context_switch_rates would better.
>
> err, except it's not a "rate". How about task_context_switches?
>
>> int proc_pid_status(struct task_struct *task, char * buffer)
>> {
>> @@ -309,6 +316,7 @@ int proc_pid_status(struct task_struct *task, char * buffer)
>> #if defined(CONFIG_S390)
>> buffer = task_show_regs(task, buffer);
>> #endif
>> + buffer = task_perf(task, buffer);
>> return buffer - orig;
>> }
>>
>> diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
>> index 3fced47..6927f81 100644
>> --- a/include/linux/taskstats.h
>> +++ b/include/linux/taskstats.h
>> @@ -31,7 +31,7 @@
>> */
>>
>>
>> -#define TASKSTATS_VERSION 3
>> +#define TASKSTATS_VERSION 4
>> #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
>> * in linux/sched.h */
>>
>> @@ -146,6 +146,9 @@ struct taskstats {
>> __u64 read_bytes; /* bytes of read I/O */
>> __u64 write_bytes; /* bytes of write I/O */
>> __u64 cancelled_write_bytes; /* bytes of cancelled write I/O */
>> +
>> + __u64 nvcsw; /* voluntary_ctxt_switches */
>> + __u64 nivcsw; /* nonvoluntary_ctxt_switches */
>> };
>>
>>
>> diff --git a/kernel/taskstats.c b/kernel/taskstats.c
>> index 4c3476f..e5bc666 100644
>> --- a/kernel/taskstats.c
>> +++ b/kernel/taskstats.c
>> @@ -196,6 +196,8 @@ static int fill_pid(pid_t pid, struct task_struct *tsk,
>>
>> /* fill in basic acct fields */
>> stats->version = TASKSTATS_VERSION;
>> + stats->nvcsw = tsk->nvcsw;
>> + stats->nivcsw = tsk->nivcsw;
>> bacct_add_tsk(stats, tsk);
>>
>> /* fill in extended acct fields */
>> @@ -242,6 +244,8 @@ static int fill_tgid(pid_t tgid, struct task_struct *first,
>> */
>> delayacct_add_tsk(stats, tsk);
>>
>> + stats->nvcsw += tsk->nvcsw;
>> + stats->nivcsw += tsk->nivcsw;
>> } while_each_thread(first, tsk);
>>
>> unlock_task_sighand(first, &flags);
>>
>
> The patch otherwise seems OK. Thoughts:
>
> - Do we need to increment TASKSTATS_VERSION for this? I forget the rules
> there.
As long as we change the size of taskstats struct or the relative
position of existing fields, or the taskstats interface, we need to
increment the TASKSTATS_VERSION. I guess if we make more than one
changes within one 2.6.x release cycle, we can bundle them in one
version change. Shailabh or Balbir?
Thanks,
- jay
>
> - The lack of context-switch accounting in taskstats is, I think, a
> simple oversight. It should have been included on day one.
>
> There are perhaps other things which _should_ be in taskstats, but we
> forgot to add them. Can we think of any such things?
>
> We shouldn't just toss any old random stuff in there: it should be
> things which make sense, and which Unix or Linux accounting traditionally
> provides, and it should be something which we expect won't suddenly
> become unsupportable if people make internal kernel changes.
>
>
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH] Performance Stats: Kernel patch
2007-06-04 19:19 ` Andrew Morton
2007-06-04 19:33 ` Jay Lan
2007-06-04 20:13 ` Jay Lan
@ 2007-06-05 6:50 ` Balbir Singh
2 siblings, 0 replies; 19+ messages in thread
From: Balbir Singh @ 2007-06-05 6:50 UTC (permalink / raw)
To: Andrew Morton; +Cc: Maxim Uvarov, LKML, Shailabh Nagar, Balbir Singh, Jay Lan
Andrew Morton wrote:
> On Wed, 30 May 2007 18:49:46 +0000
> Maxim Uvarov <muvarov@ru.mvista.com> wrote:
>
>> +void print_taskstats(struct taskstats *t)
>> +{
>> + printf("\n\nTask %15s%15s\n"
>> + " %15lu%15lu\n",
>> + "voluntary", "nonvoluntary",
>> + t->nvcsw, t->nivcsw);
>> +}
>
> print_task_stats versus print_taskstats is a bit confusing, but I guess it
> doesn't matter.
>
> More significantly, the whole idea of calling it "task stats" isn't a good
> one: it's far too general. The whole kernel interface is called taskstats,
> but the additions here are a tiny part of that.
>
> Perhaps task_context_switch_rates would be more appropriate, although
> rather a lot to type.
>
I agree, taskstats is the name given to the genetlink interface.
> The patch otherwise seems OK. Thoughts:
>
> - Do we need to increment TASKSTATS_VERSION for this? I forget the rules
> there.
Any ABI change should result in a version bump. So the bump is ok
>
> - The lack of context-switch accounting in taskstats is, I think, a
> simple oversight. It should have been included on day one.
>
Yes, it should have been included
> There are perhaps other things which _should_ be in taskstats, but we
> forgot to add them. Can we think of any such things?
>
I think it's worth reviewing the data exported. I thought CSA filled
out the gaps, but it's definitely worth revisiting.
> We shouldn't just toss any old random stuff in there: it should be
> things which make sense, and which Unix or Linux accounting traditionally
> provides, and it should be something which we expect won't suddenly
> become unsupportable if people make internal kernel changes.
>
Yes, agreed. The interface must also be open for changes to accounting information
that might be useful as a result of new features, like containers, etc.
--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH] Performance Stats: Kernel patch
@ 2007-05-22 17:19 Maxim Uvarov
2007-05-22 18:48 ` Dave Jones
2007-05-22 20:08 ` Andrew Morton
0 siblings, 2 replies; 19+ messages in thread
From: Maxim Uvarov @ 2007-05-22 17:19 UTC (permalink / raw)
To: LKML; +Cc: linuxppc-dev
Hello Andrew,
Sorry for bothering you. I know you are very busy but could
you please tell me what is situation of this patch? You wrote
me you'll discuss it with someone about it and say can it be
accepted or not and in which form. As I understand the
situation all problems now are in syscall counter. What
about other counters?
I changed headers and re-resent patch as you asked. Are there any new?
If it could be accepted I can do fast syscalls counting too.
Thank you very much,
Maxim.
Patch makes available to the user the following
task and process performance statistics:
* Involuntary Context Switches (task_struct->nivcsw)
* Voluntary Context Switches (task_struct->nvcsw)
* Number of system calls (added new counter
thread_info->sysall_count)
Statistics information is available from:
1. taskstats interface (Documentation/accounting/)
2. /proc/PID/status (task only).
This data is useful for detecting hyperactivity
patterns between processes.
Signed-off-by: Maxim Uvarov <muvarov@ru.mvista.com>
---
Documentation/accounting/getdelays.c | 20 ++++++++++++++++++--
Documentation/accounting/taskstats-struct.txt | 7 +++++++
arch/i386/kernel/asm-offsets.c | 1 +
arch/i386/kernel/entry.S | 3 +++
arch/powerpc/kernel/asm-offsets.c | 2 ++
arch/powerpc/kernel/entry_32.S | 5 +++++
arch/powerpc/kernel/entry_64.S | 5 +++++
arch/x86_64/kernel/asm-offsets.c | 1 +
arch/x86_64/kernel/entry.S | 3 +++
fs/proc/array.c | 14 ++++++++++++++
include/asm-i386/thread_info.h | 1 +
include/asm-powerpc/thread_info.h | 1 +
include/asm-x86_64/thread_info.h | 1 +
include/linux/taskstats.h | 6 +++++-
kernel/fork.c | 3 +++
kernel/taskstats.c | 6 ++++++
16 files changed, 76 insertions(+), 3 deletions(-)
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index e9126e7..1be7d65 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -49,6 +49,7 @@ char name[100];
int dbg;
int print_delays;
int print_io_accounting;
+int print_task_stats;
__u64 stime, utime;
#define PRINTF(fmt, arg...) { \
@@ -187,7 +188,7 @@ void print_delayacct(struct taskstats *t)
"IO %15s%15s\n"
" %15llu%15llu\n"
"MEM %15s%15s\n"
- " %15llu%15llu\n\n",
+ " %15llu%15llu\n"
"count", "real total", "virtual total", "delay total",
t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
t->cpu_delay_total,
@@ -196,6 +197,15 @@ void print_delayacct(struct taskstats *t)
"count", "delay total", t->swapin_count, t->swapin_delay_total);
}
+void print_taskstats(struct taskstats *t)
+{
+ printf("\n\nTask %15s%15s%15s\n"
+ " %15lu%15lu%15lu\n",
+ "syscalls", "voluntary", "nonvoluntary",
+ t->syscall_counter, t->nvcsw, t->nivcsw);
+
+}
+
void print_ioacct(struct taskstats *t)
{
printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
@@ -227,7 +237,7 @@ int main(int argc, char *argv[])
struct msgtemplate msg;
while (1) {
- c = getopt(argc, argv, "diw:r:m:t:p:v:l");
+ c = getopt(argc, argv, "qdiw:r:m:t:p:v:l");
if (c < 0)
break;
@@ -240,6 +250,10 @@ int main(int argc, char *argv[])
printf("printing IO accounting\n");
print_io_accounting = 1;
break;
+ case 'q':
+ printf("printing task/process stasistics:\n");
+ print_task_stats = 1;
+ break;
case 'w':
strncpy(logfile, optarg, MAX_FILENAME);
printf("write to file %s\n", logfile);
@@ -381,6 +395,8 @@ int main(int argc, char *argv[])
print_delayacct((struct taskstats *) NLA_DATA(na));
if (print_io_accounting)
print_ioacct((struct taskstats *) NLA_DATA(na));
+ if (print_task_stats)
+ print_taskstats((struct taskstats *) NLA_DATA(na));
if (fd) {
if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
err(1,"write error\n");
diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt
index 661c797..606aef6 100644
--- a/Documentation/accounting/taskstats-struct.txt
+++ b/Documentation/accounting/taskstats-struct.txt
@@ -22,6 +22,8 @@ There are three different groups of fields in the struct taskstats:
/* Extended accounting fields end */
Their values are collected if CONFIG_TASK_XACCT is set.
+4) Per-task and per-thread statistics
+
Future extension should add fields to the end of the taskstats struct, and
should not change the relative position of each field within the struct.
@@ -158,4 +160,9 @@ struct taskstats {
/* Extended accounting fields end */
+4) Per-task and per-thread statiscits
+ __u64 syscall_counter; /* Syscall counter */
+ __u64 nvcsw; /* Context voluntary switch counter */
+ __u64 nivcsw; /* Context involuntary switch counter */
+
}
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
index 1b2f3cd..4ad49d2 100644
--- a/arch/i386/kernel/asm-offsets.c
+++ b/arch/i386/kernel/asm-offsets.c
@@ -56,6 +56,7 @@ void foo(void)
OFFSET(TI_addr_limit, thread_info, addr_limit);
OFFSET(TI_restart_block, thread_info, restart_block);
OFFSET(TI_sysenter_return, thread_info, sysenter_return);
+ OFFSET(TI_syscall_count, thread_info, syscall_count);
BLANK();
OFFSET(GDS_size, Xgt_desc_struct, size);
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 5e47683..836961f 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -332,6 +332,9 @@ sysenter_past_esp:
SAVE_ALL
GET_THREAD_INFO(%ebp)
+#ifdef CONFIG_TASKSTATS
+ incl TI_syscall_count(%ebp) # Increment syscalls counter
+#endif
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
jnz syscall_trace_entry
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 030d300..b640039 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -94,6 +94,8 @@ int main(void)
DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
DEFINE(TI_TASK, offsetof(struct thread_info, task));
+ DEFINE(TI_SYSCALL_COUNT, offsetof(struct thread_info, syscall_count));
+
#ifdef CONFIG_PPC32
DEFINE(TI_EXECDOMAIN, offsetof(struct thread_info, exec_domain));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index c03e829..5d919e4 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -202,6 +202,11 @@ _GLOBAL(DoSyscall)
bl do_show_syscall
#endif /* SHOW_SYSCALLS */
rlwinm r10,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */
+#ifdef CONFIG_TASKSTATS
+ lwz r11,TI_SYSC_CNT(r10)
+ addi r11,r11,1
+ stw r11,TI_SYSC_CNT(r10)
+#endif
lwz r11,TI_FLAGS(r10)
andi. r11,r11,_TIF_SYSCALL_T_OR_A
bne- syscall_dotrace
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2551c08..5907f76 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -115,6 +115,11 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
addi r9,r1,STACK_FRAME_OVERHEAD
#endif
clrrdi r11,r1,THREAD_SHIFT
+#ifdef CONFIG_TASKSTATS
+ ld r10,TI_SYSCALL_COUNT(r11)
+ addi r10,r10,1
+ std r10,TI_SYSCALL_COUNT(r11)
+#endif
ld r10,TI_FLAGS(r11)
andi. r11,r10,_TIF_SYSCALL_T_OR_A
bne- syscall_dotrace
diff --git a/arch/x86_64/kernel/asm-offsets.c b/arch/x86_64/kernel/asm-offsets.c
index 96687e2..da57356 100644
--- a/arch/x86_64/kernel/asm-offsets.c
+++ b/arch/x86_64/kernel/asm-offsets.c
@@ -35,6 +35,7 @@ int main(void)
ENTRY(addr_limit);
ENTRY(preempt_count);
ENTRY(status);
+ ENTRY(syscall_count);
BLANK();
#undef ENTRY
#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 9f5dac6..af40ead 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -229,6 +229,9 @@ ENTRY(system_call)
movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET
GET_THREAD_INFO(%rcx)
+#ifdef CONFIG_TASKSTATS
+ addq $1, threadinfo_syscall_count(%rcx) # Increment syscalls counter
+#endif
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
jnz tracesys
cmpq $__NR_syscall_max,%rax
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 70e4fab..c805c08 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -290,6 +290,19 @@ static inline char *task_cap(struct task_struct *p, char *buffer)
cap_t(p->cap_permitted),
cap_t(p->cap_effective));
}
+static inline char *task_perf(struct task_struct *p, char *buffer)
+{
+ /* Syscall counter adds 1 line overhead on each syscall execution
+ * in entry.S, so probably it is the leave this stuff under ifdefs.
+ */
+#ifdef CONFIG_TASKSTATS
+ buffer += sprintf(buffer, "Syscalls:\t%lu\n", p->thread_info->syscall_count);
+#endif
+ return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n"
+ "nonvoluntary_ctxt_switches:\t%lu\n",
+ p->nvcsw,
+ p->nivcsw);
+}
int proc_pid_status(struct task_struct *task, char * buffer)
{
@@ -309,6 +322,7 @@ int proc_pid_status(struct task_struct *task, char * buffer)
#if defined(CONFIG_S390)
buffer = task_show_regs(task, buffer);
#endif
+ buffer = task_perf(task, buffer);
return buffer - orig;
}
diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h
index 4b187bb..bccfd6a 100644
--- a/include/asm-i386/thread_info.h
+++ b/include/asm-i386/thread_info.h
@@ -33,6 +33,7 @@ struct thread_info {
int preempt_count; /* 0 => preemptable, <0 => BUG */
+ unsigned long syscall_count; /* Syscall counter */
mm_segment_t addr_limit; /* thread address space:
0-0xBFFFFFFF for user-thead
0-0xFFFFFFFF for kernel-thread
diff --git a/include/asm-powerpc/thread_info.h b/include/asm-powerpc/thread_info.h
index 3f32ca8..5306ac2 100644
--- a/include/asm-powerpc/thread_info.h
+++ b/include/asm-powerpc/thread_info.h
@@ -35,6 +35,7 @@ struct thread_info {
int cpu; /* cpu we're on */
int preempt_count; /* 0 => preemptable,
<0 => BUG */
+ unsigned long syscall_count; /* Syscall counter */
struct restart_block restart_block;
unsigned long local_flags; /* private flags for thread */
diff --git a/include/asm-x86_64/thread_info.h b/include/asm-x86_64/thread_info.h
index 74a6c74..e53022d 100644
--- a/include/asm-x86_64/thread_info.h
+++ b/include/asm-x86_64/thread_info.h
@@ -31,6 +31,7 @@ struct thread_info {
__u32 cpu; /* current CPU */
int preempt_count; /* 0 => preemptable, <0 => BUG */
+ unsigned long syscall_count; /* Syscall counter */
mm_segment_t addr_limit;
struct restart_block restart_block;
};
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 3fced47..98dfde7 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -31,7 +31,7 @@
*/
-#define TASKSTATS_VERSION 3
+#define TASKSTATS_VERSION 4
#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
* in linux/sched.h */
@@ -141,6 +141,10 @@ struct taskstats {
__u64 write_syscalls; /* write syscalls */
/* Extended accounting fields end */
+ __u64 syscall_counter; /* Syscall counter */
+ __u64 nvcsw;
+ __u64 nivcsw;
+
#define TASKSTATS_HAS_IO_ACCOUNTING
/* Per-task storage I/O accounting starts */
__u64 read_bytes; /* bytes of read I/O */
diff --git a/kernel/fork.c b/kernel/fork.c
index fc723e5..5213738 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1042,6 +1042,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->wchar = 0; /* I/O counter: bytes written */
p->syscr = 0; /* I/O counter: read syscalls */
p->syscw = 0; /* I/O counter: write syscalls */
+#ifdef CONFIG_TASKSTATS
+ p->thread_info->syscall_count = 0; /* Syscall counter: total numbers of syscalls */
+#endif
task_io_accounting_init(p);
acct_clear_integrals(p);
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 4c3476f..d7bf33f 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -196,6 +196,9 @@ static int fill_pid(pid_t pid, struct task_struct *tsk,
/* fill in basic acct fields */
stats->version = TASKSTATS_VERSION;
+ stats->syscall_counter = tsk->thread_info->syscall_count;
+ stats->nvcsw = tsk->nvcsw;
+ stats->nivcsw = tsk->nivcsw;
bacct_add_tsk(stats, tsk);
/* fill in extended acct fields */
@@ -242,6 +245,9 @@ static int fill_tgid(pid_t tgid, struct task_struct *first,
*/
delayacct_add_tsk(stats, tsk);
+ stats->syscall_counter += tsk->thread_info->syscall_count;
+ stats->nvcsw += tsk->nvcsw;
+ stats->nivcsw += tsk->nivcsw;
} while_each_thread(first, tsk);
unlock_task_sighand(first, &flags);
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH] Performance Stats: Kernel patch
2007-05-22 17:19 Maxim Uvarov
@ 2007-05-22 18:48 ` Dave Jones
2007-05-22 20:08 ` Andrew Morton
1 sibling, 0 replies; 19+ messages in thread
From: Dave Jones @ 2007-05-22 18:48 UTC (permalink / raw)
To: Maxim Uvarov; +Cc: LKML, linuxppc-dev
On Tue, May 22, 2007 at 05:19:52PM +0000, Maxim Uvarov wrote:
> diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h
> index 4b187bb..bccfd6a 100644
> --- a/include/asm-i386/thread_info.h
> +++ b/include/asm-i386/thread_info.h
> @@ -33,6 +33,7 @@ struct thread_info {
> int preempt_count; /* 0 => preemptable, <0 => BUG */
>
>
> + unsigned long syscall_count; /* Syscall counter */
> mm_segment_t addr_limit; /* thread address space:
> 0-0xBFFFFFFF for user-thead
> 0-0xFFFFFFFF for kernel-thread
It seems a bit unkind to bloat up the thread_info for every process
of every user when the common case will be people that don't care about
this feature at all.
Especially when the same information could be got from ptrace.
Dave
--
http://www.codemonkey.org.uk
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH] Performance Stats: Kernel patch
2007-05-22 17:19 Maxim Uvarov
2007-05-22 18:48 ` Dave Jones
@ 2007-05-22 20:08 ` Andrew Morton
1 sibling, 0 replies; 19+ messages in thread
From: Andrew Morton @ 2007-05-22 20:08 UTC (permalink / raw)
To: Maxim Uvarov; +Cc: LKML, linuxppc-dev
On Tue, 22 May 2007 17:19:52 +0000
Maxim Uvarov <muvarov@ru.mvista.com> wrote:
> Sorry for bothering you. I know you are very busy but could
> you please tell me what is situation of this patch?
I'd like to add the context-switch accounting to the taskstats payload.
As we'd then need to uprev the taskstats payload and version it makes sense
to have a look around, see if there's anything else which should be in
there but got missed.
I don't think we can accept the number-of-syscalls accounting feature. It
adds a memory increment into the kernel's number-one hotpath. Something
which people like to obsessively microbenchmark.
And as I said earlier, a 32-bit counter can be overflowed in mere seconds,
so that needs to become 64-bit, in which case we add a memory increment and
a test-n-branch to that hottest path.
There _is_ some cumulative overhead here, and I don't see how the value of
the syscall counter can justify it.
^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH] Performance Stats: Kernel patch
@ 2007-05-10 17:19 Maxim Uvarov
2007-05-10 23:31 ` Andi Kleen
0 siblings, 1 reply; 19+ messages in thread
From: Maxim Uvarov @ 2007-05-10 17:19 UTC (permalink / raw)
To: LKML
I'm sending here the latest version of this patch.
Seems it was not on lkml mail list.
Patch makes available to the user the following
task and process performance statistics:
* Involuntary Context Switches (task_struct->nivcsw)
* Voluntary Context Switches (task_struct->nvcsw)
* Number of system calls (added new counter
thread_info->sysall_count)
Statistics information is available from:
1. taskstats interface (Documentation/accounting/)
2. /proc/PID/status (task only).
This data is useful for detecting hyperactivity
patterns between processes.
Signed-off-by: Maxim Uvarov <muvarov@ru.mvista.com>
Signed-off-by: Maxim Uvarov <muvarov@ru.mvista.com>
---
Documentation/accounting/getdelays.c | 20 ++++++++++++++++++--
Documentation/accounting/taskstats-struct.txt | 7 +++++++
arch/i386/kernel/asm-offsets.c | 1 +
arch/i386/kernel/entry.S | 3 +++
arch/powerpc/kernel/asm-offsets.c | 2 ++
arch/powerpc/kernel/entry_32.S | 5 +++++
arch/powerpc/kernel/entry_64.S | 5 +++++
arch/x86_64/kernel/asm-offsets.c | 1 +
arch/x86_64/kernel/entry.S | 3 +++
fs/proc/array.c | 14 ++++++++++++++
include/asm-i386/thread_info.h | 1 +
include/asm-powerpc/thread_info.h | 1 +
include/asm-x86_64/thread_info.h | 1 +
include/linux/taskstats.h | 6 +++++-
kernel/fork.c | 3 +++
kernel/taskstats.c | 6 ++++++
16 files changed, 76 insertions(+), 3 deletions(-)
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index e9126e7..1be7d65 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -49,6 +49,7 @@ char name[100];
int dbg;
int print_delays;
int print_io_accounting;
+int print_task_stats;
__u64 stime, utime;
#define PRINTF(fmt, arg...) { \
@@ -187,7 +188,7 @@ void print_delayacct(struct taskstats *t)
"IO %15s%15s\n"
" %15llu%15llu\n"
"MEM %15s%15s\n"
- " %15llu%15llu\n\n",
+ " %15llu%15llu\n"
"count", "real total", "virtual total", "delay total",
t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
t->cpu_delay_total,
@@ -196,6 +197,15 @@ void print_delayacct(struct taskstats *t)
"count", "delay total", t->swapin_count, t->swapin_delay_total);
}
+void print_taskstats(struct taskstats *t)
+{
+ printf("\n\nTask %15s%15s%15s\n"
+ " %15lu%15lu%15lu\n",
+ "syscalls", "voluntary", "nonvoluntary",
+ t->syscall_counter, t->nvcsw, t->nivcsw);
+
+}
+
void print_ioacct(struct taskstats *t)
{
printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
@@ -227,7 +237,7 @@ int main(int argc, char *argv[])
struct msgtemplate msg;
while (1) {
- c = getopt(argc, argv, "diw:r:m:t:p:v:l");
+ c = getopt(argc, argv, "qdiw:r:m:t:p:v:l");
if (c < 0)
break;
@@ -240,6 +250,10 @@ int main(int argc, char *argv[])
printf("printing IO accounting\n");
print_io_accounting = 1;
break;
+ case 'q':
+ printf("printing task/process stasistics:\n");
+ print_task_stats = 1;
+ break;
case 'w':
strncpy(logfile, optarg, MAX_FILENAME);
printf("write to file %s\n", logfile);
@@ -381,6 +395,8 @@ int main(int argc, char *argv[])
print_delayacct((struct taskstats *) NLA_DATA(na));
if (print_io_accounting)
print_ioacct((struct taskstats *) NLA_DATA(na));
+ if (print_task_stats)
+ print_taskstats((struct taskstats *) NLA_DATA(na));
if (fd) {
if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
err(1,"write error\n");
diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt
index 661c797..5dac173 100644
--- a/Documentation/accounting/taskstats-struct.txt
+++ b/Documentation/accounting/taskstats-struct.txt
@@ -22,6 +22,8 @@ There are three different groups of fields in the struct taskstats:
/* Extended accounting fields end */
Their values are collected if CONFIG_TASK_XACCT is set.
+4) Per-task and per-thread statistics
+
Future extension should add fields to the end of the taskstats struct, and
should not change the relative position of each field within the struct.
@@ -158,4 +160,9 @@ struct taskstats {
/* Extended accounting fields end */
+4) Per-task and per-thread statiscits
+ __u32 syscall_counter; /* Syscall counter */
+ __u32 nvcsw; /* Context voluntary switch counter */
+ __u32 nivcsw; /* Context involuntary switch counter */
+
}
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
index 1b2f3cd..4ad49d2 100644
--- a/arch/i386/kernel/asm-offsets.c
+++ b/arch/i386/kernel/asm-offsets.c
@@ -56,6 +56,7 @@ void foo(void)
OFFSET(TI_addr_limit, thread_info, addr_limit);
OFFSET(TI_restart_block, thread_info, restart_block);
OFFSET(TI_sysenter_return, thread_info, sysenter_return);
+ OFFSET(TI_syscall_count, thread_info, syscall_count);
BLANK();
OFFSET(GDS_size, Xgt_desc_struct, size);
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 5e47683..836961f 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -332,6 +332,9 @@ sysenter_past_esp:
SAVE_ALL
GET_THREAD_INFO(%ebp)
+#ifdef CONFIG_TASKSTATS
+ incl TI_syscall_count(%ebp) # Increment syscalls counter
+#endif
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
jnz syscall_trace_entry
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 030d300..b640039 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -94,6 +94,8 @@ int main(void)
DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
DEFINE(TI_TASK, offsetof(struct thread_info, task));
+ DEFINE(TI_SYSCALL_COUNT, offsetof(struct thread_info, syscall_count));
+
#ifdef CONFIG_PPC32
DEFINE(TI_EXECDOMAIN, offsetof(struct thread_info, exec_domain));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index c03e829..5d919e4 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -202,6 +202,11 @@ _GLOBAL(DoSyscall)
bl do_show_syscall
#endif /* SHOW_SYSCALLS */
rlwinm r10,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */
+#ifdef CONFIG_TASKSTATS
+ lwz r11,TI_SYSC_CNT(r10)
+ addi r11,r11,1
+ stw r11,TI_SYSC_CNT(r10)
+#endif
lwz r11,TI_FLAGS(r10)
andi. r11,r11,_TIF_SYSCALL_T_OR_A
bne- syscall_dotrace
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2551c08..5907f76 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -115,6 +115,11 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
addi r9,r1,STACK_FRAME_OVERHEAD
#endif
clrrdi r11,r1,THREAD_SHIFT
+#ifdef CONFIG_TASKSTATS
+ ld r10,TI_SYSCALL_COUNT(r11)
+ addi r10,r10,1
+ std r10,TI_SYSCALL_COUNT(r11)
+#endif
ld r10,TI_FLAGS(r11)
andi. r11,r10,_TIF_SYSCALL_T_OR_A
bne- syscall_dotrace
diff --git a/arch/x86_64/kernel/asm-offsets.c b/arch/x86_64/kernel/asm-offsets.c
index 96687e2..da57356 100644
--- a/arch/x86_64/kernel/asm-offsets.c
+++ b/arch/x86_64/kernel/asm-offsets.c
@@ -35,6 +35,7 @@ int main(void)
ENTRY(addr_limit);
ENTRY(preempt_count);
ENTRY(status);
+ ENTRY(syscall_count);
BLANK();
#undef ENTRY
#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 9f5dac6..af40ead 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -229,6 +229,9 @@ ENTRY(system_call)
movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET
GET_THREAD_INFO(%rcx)
+#ifdef CONFIG_TASKSTATS
+ addq $1, threadinfo_syscall_count(%rcx) # Increment syscalls counter
+#endif
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
jnz tracesys
cmpq $__NR_syscall_max,%rax
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 70e4fab..c805c08 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -290,6 +290,19 @@ static inline char *task_cap(struct task_struct *p, char *buffer)
cap_t(p->cap_permitted),
cap_t(p->cap_effective));
}
+static inline char *task_perf(struct task_struct *p, char *buffer)
+{
+ /* Syscall counter adds 1 line overhead on each syscall execution
+ * in entry.S, so probably it is the leave this stuff under ifdefs.
+ */
+#ifdef CONFIG_TASKSTATS
+ buffer += sprintf(buffer, "Syscalls:\t%lu\n", p->thread_info->syscall_count);
+#endif
+ return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n"
+ "nonvoluntary_ctxt_switches:\t%lu\n",
+ p->nvcsw,
+ p->nivcsw);
+}
int proc_pid_status(struct task_struct *task, char * buffer)
{
@@ -309,6 +322,7 @@ int proc_pid_status(struct task_struct *task, char * buffer)
#if defined(CONFIG_S390)
buffer = task_show_regs(task, buffer);
#endif
+ buffer = task_perf(task, buffer);
return buffer - orig;
}
diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h
index 4b187bb..bccfd6a 100644
--- a/include/asm-i386/thread_info.h
+++ b/include/asm-i386/thread_info.h
@@ -33,6 +33,7 @@ struct thread_info {
int preempt_count; /* 0 => preemptable, <0 => BUG */
+ unsigned long syscall_count; /* Syscall counter */
mm_segment_t addr_limit; /* thread address space:
0-0xBFFFFFFF for user-thead
0-0xFFFFFFFF for kernel-thread
diff --git a/include/asm-powerpc/thread_info.h b/include/asm-powerpc/thread_info.h
index 3f32ca8..5306ac2 100644
--- a/include/asm-powerpc/thread_info.h
+++ b/include/asm-powerpc/thread_info.h
@@ -35,6 +35,7 @@ struct thread_info {
int cpu; /* cpu we're on */
int preempt_count; /* 0 => preemptable,
<0 => BUG */
+ unsigned long syscall_count; /* Syscall counter */
struct restart_block restart_block;
unsigned long local_flags; /* private flags for thread */
diff --git a/include/asm-x86_64/thread_info.h b/include/asm-x86_64/thread_info.h
index 74a6c74..e53022d 100644
--- a/include/asm-x86_64/thread_info.h
+++ b/include/asm-x86_64/thread_info.h
@@ -31,6 +31,7 @@ struct thread_info {
__u32 cpu; /* current CPU */
int preempt_count; /* 0 => preemptable, <0 => BUG */
+ unsigned long syscall_count; /* Syscall counter */
mm_segment_t addr_limit;
struct restart_block restart_block;
};
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 3fced47..e3341b6 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -31,7 +31,7 @@
*/
-#define TASKSTATS_VERSION 3
+#define TASKSTATS_VERSION 4
#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
* in linux/sched.h */
@@ -141,6 +141,10 @@ struct taskstats {
__u64 write_syscalls; /* write syscalls */
/* Extended accounting fields end */
+ __u32 syscall_counter; /* Syscall counter */
+ __u32 nvcsw;
+ __u32 nivcsw;
+
#define TASKSTATS_HAS_IO_ACCOUNTING
/* Per-task storage I/O accounting starts */
__u64 read_bytes; /* bytes of read I/O */
diff --git a/kernel/fork.c b/kernel/fork.c
index fc723e5..5213738 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1042,6 +1042,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->wchar = 0; /* I/O counter: bytes written */
p->syscr = 0; /* I/O counter: read syscalls */
p->syscw = 0; /* I/O counter: write syscalls */
+#ifdef CONFIG_TASKSTATS
+ p->thread_info->syscall_count = 0; /* Syscall counter: total numbers of syscalls */
+#endif
task_io_accounting_init(p);
acct_clear_integrals(p);
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 4c3476f..d7bf33f 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -196,6 +196,9 @@ static int fill_pid(pid_t pid, struct task_struct *tsk,
/* fill in basic acct fields */
stats->version = TASKSTATS_VERSION;
+ stats->syscall_counter = tsk->thread_info->syscall_count;
+ stats->nvcsw = tsk->nvcsw;
+ stats->nivcsw = tsk->nivcsw;
bacct_add_tsk(stats, tsk);
/* fill in extended acct fields */
@@ -242,6 +245,9 @@ static int fill_tgid(pid_t tgid, struct task_struct *first,
*/
delayacct_add_tsk(stats, tsk);
+ stats->syscall_counter += tsk->thread_info->syscall_count;
+ stats->nvcsw += tsk->nvcsw;
+ stats->nivcsw += tsk->nivcsw;
} while_each_thread(first, tsk);
unlock_task_sighand(first, &flags);
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH] Performance Stats: Kernel patch
2007-05-10 17:19 Maxim Uvarov
@ 2007-05-10 23:31 ` Andi Kleen
2007-05-11 16:55 ` Maxim Uvarov
0 siblings, 1 reply; 19+ messages in thread
From: Andi Kleen @ 2007-05-10 23:31 UTC (permalink / raw)
To: Maxim Uvarov; +Cc: LKML
Maxim Uvarov <muvarov@ru.mvista.com> writes:
>
> This data is useful for detecting hyperactivity
> patterns between processes.
You need a lot better rationale to slow down these important
fast paths. Particularly the syscall path is very hot.
Is this something that is really generally useful? The context
switch counters might be occasionally useful, but strace can
you just give the syscall count anyways. Still not sure
it really should go into the standard kernel. Perhaps just keep it
as a systemtap script?
-Andi
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] Performance Stats: Kernel patch
2007-05-10 23:31 ` Andi Kleen
@ 2007-05-11 16:55 ` Maxim Uvarov
0 siblings, 0 replies; 19+ messages in thread
From: Maxim Uvarov @ 2007-05-11 16:55 UTC (permalink / raw)
To: Andi Kleen; +Cc: LKML
Andi Kleen wrote:
>Maxim Uvarov <muvarov@ru.mvista.com> writes:
>
>
>>This data is useful for detecting hyperactivity
>>patterns between processes.
>>
>>
>
>You need a lot better rationale to slow down these important
>fast paths. Particularly the syscall path is very hot.
>
>Is this something that is really generally useful? The context
>switch counters might be occasionally useful, but strace can
>you just give the syscall count anyways. Still not sure
>it really should go into the standard kernel. Perhaps just keep it
>as a systemtap script?
>
>-Andi
>
>
It is difficult to use strace for multi thread applications to
verify which thread is doing what.
^ permalink raw reply [flat|nested] 19+ messages in thread
[parent not found: <4625FFCF.8040402@ru.mvista.com>]
* Re: [patch] Performance Stats: Kernel patch
[not found] <4625FFCF.8040402@ru.mvista.com>
@ 2007-04-20 4:36 ` Andrew Morton
2007-04-25 10:59 ` Maxim Uvarov
0 siblings, 1 reply; 19+ messages in thread
From: Andrew Morton @ 2007-04-20 4:36 UTC (permalink / raw)
To: Maxim Uvarov; +Cc: linux-kernel
(re-added lklml)
> Patch makes available to the user the following
> thread performance statistics:
> * Involuntary Context Switches (task_struct->nivcsw)
> * Voluntary Context Switches (task_struct->nvcsw)
I suppose they might be useful, but I'd be interested in hearing what
the uses of this information are - why is it valuable?
> * Number of system calls (added new counter
> thread_info->sysc_cnt)
eek. syscall entry is a really hot hotpath, and, perhaps worse, it's the
sort of thing which people often measure ;)
I agree that this is a potentially interesting piece of instrumentation,
but it would need to be _super_ interesting to justify just the single
instruction overhead, and the cacheline touch.
So, again, please provide justification for this additional overhead.
> Statistics information is available from
> /proc/PID/status
No, /prod/pid is very lame. If we're going to do this then we'll need to
deliver the information via taskstats. And update getdelays.c and the
documentation, too. We could also make it visible in /proc I guess, if
that's cheap to do. But taskstats is the primary means of delivery - using
/proc is daft when we have that.
> arch/powerpc/kernel/entry_32.S | 5 +++++
> arch/powerpc/kernel/entry_64.S | 5 +++++
> arch/x86_64/kernel/asm-offsets.c | 3 +++
> arch/x86_64/kernel/entry.S | 3 +++
> fs/proc/array.c | 17 +++++++++++++++++
> include/asm-i386/thread_info.h | 5 +++--
> include/asm-powerpc/thread_info.h | 3 +++
> include/asm-x86_64/thread_info.h | 4 +++-
> kernel/fork.c | 4 ++++
> lib/Kconfig.debug | 15 +++++++++++++++
> 13 files changed, 71 insertions(+), 3 deletions(-)
>
The patch adds far too many ifdefs to core C files.
> +#ifdef CONFIG_THREAD_PERF_STAT
> +static inline char *task_perf(struct task_struct *p, char *buffer)
> +{
> +#ifdef CONFIG_THREAD_PERF_STAT_SYSC
> + buffer += sprintf(buffer, "Syscalls:\t%lu\n", p->thread_info->sysc_cnt);
> +#endif /* CONFIG_THREAD_PERF_STAT_SYSC */
> +
> + return buffer + sprintf(buffer, "Nvcsw:\t%lu\n"
> + "Nivcsw:\t%lu\n",
> + p->nvcsw,
> + p->nivcsw);
> +}
Here, you can put
#else
static inline char *task_perf(struct task_struct *p, char *buffer)
{
return buffer;
}
> +#endif /* CONFIG_THREAD_PERF_STAT */
> +
> int proc_pid_status(struct task_struct *task, char * buffer)
> {
> char * orig = buffer;
> @@ -309,6 +323,9 @@ int proc_pid_status(struct task_struct *
> #if defined(CONFIG_S390)
> buffer = task_show_regs(task, buffer);
> #endif
> +#ifdef CONFIG_THREAD_PERF_STAT
> + buffer = task_perf(task, buffer);
> +#endif /* CONFIG_THREAD_PERF_STAT */
so these ifdefs go away
> return buffer - orig;
> }
>
> Index: linux-2.6.21-rc5/kernel/fork.c
> ===================================================================
> --- linux-2.6.21-rc5.orig/kernel/fork.c
> +++ linux-2.6.21-rc5/kernel/fork.c
> @@ -1044,6 +1044,10 @@ static struct task_struct *copy_process(
> p->syscr = 0; /* I/O counter: read syscalls */
> p->syscw = 0; /* I/O counter: write syscalls */
> #endif
> +#ifdef CONFIG_THREAD_PERF_STAT_SYSC
> + p->thread_info->sysc_cnt = 0; /* Syscall counter: total numbers of syscalls */
> +#endif /* CONFIG_THREAD_PERF_STAT_SYSC */
And this can be removed via
#ifdef CONFIG_THREAD_PERF_STAT_SYSC
static inline thread_perf_stat_init(struct task_struct *p)
{
p->thread_info->sysc_cnt = 0;
}
#else
static inline thread_perf_stat_init(struct task_struct *p)
{
}
#endif
in a header file somewhere.
But I expect that you'll find that all your ifdefs can be removed, and you
can piggyback the whole feature on top of one of the existing taskstats
config items.
> task_io_accounting_init(p);
So sysc_cnt will then get initialised in task_io_accounting_init() (perhaps
after suitably renaming task_io_accounting_init())
> +config THREAD_PERF_STAT
> + bool "Thread performance statistics"
> + help
> + Make available to the user the following per-thread performance statistics:
> + * Number of involuntary context switches
> + * Number of voluntary context switches
> + * Number of system calls (optional)
> + This information is available via /proc/PID/status.
> +
> +config THREAD_PERF_STAT_SYSC
> + bool "Enable syscall counter"
> + depends on THREAD_PERF_STAT && (X86 || PPC)
> + help
> + This option adds a syscall counter to /proc/PID/status.
I'm dubious about the configurability.
I think this is the sort of feature which we'd want to have generally
available, and to discourage people from disabling it. I mean, if it's
useful enough to justify the runtime overhead, then it's pretty darn useful
and a lot of people will want it.
Probably making this a part of the taskstats suite would cover this well
enough.
> Index: linux-2.6.21-rc5/arch/i386/kernel/entry.S
> ===================================================================
> --- linux-2.6.21-rc5.orig/arch/i386/kernel/entry.S
> +++ linux-2.6.21-rc5/arch/i386/kernel/entry.S
> @@ -334,6 +334,9 @@ sysenter_past_esp:
> CFI_ADJUST_CFA_OFFSET 4
> SAVE_ALL
> GET_THREAD_INFO(%ebp)
> +#ifdef CONFIG_THREAD_PERF_STAT_SYSC
> + incl TI_sysc_cnt(%ebp) # Increment syscalls counter
> +#endif /* CONFIG_THREAD_PERF_STAT_SYSC */
There's no sane way of avoiding an ifdef here.
> + ENTRY(sysc_cnt);
Please don't use arbitrarily truncated identifiers like sysc_count. oops,
syscall_cnt, oops, sysc_cnt.
nr_syscalls or syscall_count would be the expected identifiers here.
But there's not much point in firing up the editor until we've worked out
why we want to add these features to the kernel.
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [patch] Performance Stats: Kernel patch
2007-04-20 4:36 ` [patch] " Andrew Morton
@ 2007-04-25 10:59 ` Maxim Uvarov
0 siblings, 0 replies; 19+ messages in thread
From: Maxim Uvarov @ 2007-04-25 10:59 UTC (permalink / raw)
To: Andrew Morton; +Cc: linux-kernel
[-- Attachment #1: Type: text/plain, Size: 2926 bytes --]
Hello Andrew,
I've added taskstats interface to that. Patch is attached.
Please also see my answers bellow.
Andrew Morton wrote:
>(re-added lklml)
>
>
>
>>Patch makes available to the user the following
>>thread performance statistics:
>> * Involuntary Context Switches (task_struct->nivcsw)
>> * Voluntary Context Switches (task_struct->nvcsw)
>>
>>
>
>I suppose they might be useful, but I'd be interested in hearing what
>the uses of this information are - why is it valuable?
>
>
>
We have had a customer request this feature. I'm not sure exactly why they
want this feature, but in a telecom system it is very common to monitor
statistical information about various parts of the system and watch for
anomalies and trends. If one of these statistics has a sudden spike or
has a
gradual trend, the operator knows they need to take action before a problem
occurs or can go back and analyze why a spike occurred.
>> * Number of system calls (added new counter
>> thread_info->sysc_cnt)
>>
>>
>
>eek. syscall entry is a really hot hotpath, and, perhaps worse, it's the
>sort of thing which people often measure ;)
>
>I agree that this is a potentially interesting piece of instrumentation,
>but it would need to be _super_ interesting to justify just the single
>instruction overhead, and the cacheline touch.
>
>So, again, please provide justification for this additional overhead.
>
>
>
Overhead is too small. And it is difficult to measure this difference.
I tried to measure syscall execution time with lat_syscall program
from lmbench package. I run each tests for 2-3 hours with different
syscalls on qemu UP machine.
Test is:
./lat_syscall -N 1000 null
./lat_syscall -N 1000 read
./lat_syscall -N 1000 write
./lat_syscall -N 1000 stat
./lat_syscall -N 1000 fstat
./lat_syscall -N 1000 open
Result is that syscall patch is in the middle of 3 execution
without patch.
without patch:
Simple syscall: 1.0660 microseconds
Simple read: 3.5032 microseconds
Simple write: 2.6576 microseconds
Simple stat: 41.0829 microseconds
Simple fstat: 10.1343 microseconds
Simple open/close: 904.6792 microseconds
Simple syscall: 0.9961 microseconds
Simple read: 3.0027 microseconds
Simple write: 2.1600 microseconds
Simple stat: 47.7678 microseconds
Simple fstat: 10.8242 microseconds
Simple open/close: 905.9916 microseconds
Simple syscall: 1.0035 microseconds
Simple read: 3.0627 microseconds
Simple write: 2.1435 microseconds
Simple stat: 39.1947 microseconds
Simple fstat: 10.2982 microseconds
Simple open/close: 849.1624 microseconds
with patch:
Simple syscall: 1.0013 microseconds
Simple read: 3.6981 microseconds
Simple write: 2.6216 microseconds
Simple stat: 43.5101 microseconds
Simple fstat: 11.1318 microseconds
Simple open/close: 925.4793 microseconds
Because this measurement will be used with taskstats interface
I left it under ifdef CONFIG_TASKSTATS.
Best regards,
Maxim.
[-- Attachment #2: perf_stat.patch --]
[-- Type: text/plain, Size: 13460 bytes --]
-------------------------------------------------------
Patch makes available to the user the following
task and process performance statistics:
* Involuntary Context Switches (task_struct->nivcsw)
* Voluntary Context Switches (task_struct->nvcsw)
* Number of system calls (added new counter
thread_info->sysall_count)
Statistics information is available from:
1. taskstats interface (Documentation/accounting/)
2. /proc/PID/status (task only).
This data is useful for detecting hyperactivity
patterns between processes.
Signed-off-by: Maxim Uvarov muvarov@ru.mvista.com
Documentation/accounting/getdelays.c | 20 ++++++++++++++++++--
Documentation/accounting/taskstats-struct.txt | 7 +++++++
arch/i386/kernel/asm-offsets.c | 1 +
arch/i386/kernel/entry.S | 1 +
arch/powerpc/kernel/asm-offsets.c | 2 ++
arch/powerpc/kernel/entry_32.S | 5 +++++
arch/powerpc/kernel/entry_64.S | 5 +++++
arch/x86_64/kernel/asm-offsets.c | 1 +
arch/x86_64/kernel/entry.S | 1 +
fs/proc/array.c | 14 ++++++++++++++
include/asm-i386/thread_info.h | 1 +
include/asm-powerpc/thread_info.h | 1 +
include/asm-x86_64/thread_info.h | 1 +
include/linux/taskstats.h | 6 +++++-
kernel/fork.c | 4 ++++
kernel/taskstats.c | 6 ++++++
16 files changed, 73 insertions(+), 3 deletions(-)
Index: linux-2.6.21-rc5/fs/proc/array.c
===================================================================
--- linux-2.6.21-rc5.orig/fs/proc/array.c
+++ linux-2.6.21-rc5/fs/proc/array.c
@@ -290,6 +290,19 @@ static inline char *task_cap(struct task
cap_t(p->cap_permitted),
cap_t(p->cap_effective));
}
+static inline char *task_perf(struct task_struct *p, char *buffer)
+{
+ /* Syscall counter adds 1 line overhead on each syscall execution
+ * in entry.S, so probably it is the leave this stuff under ifdefs.
+ */
+#ifdef CONFIG_TASKSTATS
+ buffer += sprintf(buffer, "Syscalls:\t%lu\n", p->thread_info->syscall_count);
+#endif
+ return buffer + sprintf(buffer, "Nvcsw:\t%lu\n"
+ "Nivcsw:\t%lu\n",
+ p->nvcsw,
+ p->nivcsw);
+}
int proc_pid_status(struct task_struct *task, char * buffer)
{
@@ -309,6 +322,7 @@ int proc_pid_status(struct task_struct *
#if defined(CONFIG_S390)
buffer = task_show_regs(task, buffer);
#endif
+ buffer = task_perf(task, buffer);
return buffer - orig;
}
Index: linux-2.6.21-rc5/kernel/fork.c
===================================================================
--- linux-2.6.21-rc5.orig/kernel/fork.c
+++ linux-2.6.21-rc5/kernel/fork.c
@@ -1044,6 +1044,10 @@ static struct task_struct *copy_process(
p->syscr = 0; /* I/O counter: read syscalls */
p->syscw = 0; /* I/O counter: write syscalls */
#endif
+#ifdef CONFIG_TASKSTATS
+ p->thread_info->syscall_count = 0; /* Syscall counter: total numbers of syscalls */
+#endif
+
task_io_accounting_init(p);
acct_clear_integrals(p);
Index: linux-2.6.21-rc5/arch/i386/kernel/entry.S
===================================================================
--- linux-2.6.21-rc5.orig/arch/i386/kernel/entry.S
+++ linux-2.6.21-rc5/arch/i386/kernel/entry.S
@@ -334,6 +334,7 @@ sysenter_past_esp:
CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
GET_THREAD_INFO(%ebp)
+ incl TI_syscall_count(%ebp) # Increment syscalls counter
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
Index: linux-2.6.21-rc5/arch/i386/kernel/asm-offsets.c
===================================================================
--- linux-2.6.21-rc5.orig/arch/i386/kernel/asm-offsets.c
+++ linux-2.6.21-rc5/arch/i386/kernel/asm-offsets.c
@@ -56,6 +56,7 @@ void foo(void)
OFFSET(TI_addr_limit, thread_info, addr_limit);
OFFSET(TI_restart_block, thread_info, restart_block);
OFFSET(TI_sysenter_return, thread_info, sysenter_return);
+ OFFSET(TI_syscall_count, thread_info, syscall_count);
BLANK();
OFFSET(GDS_size, Xgt_desc_struct, size);
Index: linux-2.6.21-rc5/include/asm-i386/thread_info.h
===================================================================
--- linux-2.6.21-rc5.orig/include/asm-i386/thread_info.h
+++ linux-2.6.21-rc5/include/asm-i386/thread_info.h
@@ -33,6 +33,7 @@ struct thread_info {
int preempt_count; /* 0 => preemptable, <0 => BUG */
+ unsigned long syscall_count; /* Syscall counter */
mm_segment_t addr_limit; /* thread address space:
0-0xBFFFFFFF for user-thead
0-0xFFFFFFFF for kernel-thread
Index: linux-2.6.21-rc5/arch/powerpc/kernel/asm-offsets.c
===================================================================
--- linux-2.6.21-rc5.orig/arch/powerpc/kernel/asm-offsets.c
+++ linux-2.6.21-rc5/arch/powerpc/kernel/asm-offsets.c
@@ -94,6 +94,8 @@ int main(void)
DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
DEFINE(TI_TASK, offsetof(struct thread_info, task));
+ DEFINE(TI_SYSCALL_COUNT, offsetof(struct thread_info, syscall_count));
+
#ifdef CONFIG_PPC32
DEFINE(TI_EXECDOMAIN, offsetof(struct thread_info, exec_domain));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
Index: linux-2.6.21-rc5/arch/powerpc/kernel/entry_32.S
===================================================================
--- linux-2.6.21-rc5.orig/arch/powerpc/kernel/entry_32.S
+++ linux-2.6.21-rc5/arch/powerpc/kernel/entry_32.S
@@ -202,6 +202,11 @@ _GLOBAL(DoSyscall)
bl do_show_syscall
#endif /* SHOW_SYSCALLS */
rlwinm r10,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */
+#ifdef CONFIG_THREAD_PERF_STAT_SYSC
+ lwz r11,TI_SYSC_CNT(r10)
+ addi r11,r11,1
+ stw r11,TI_SYSC_CNT(r10)
+#endif /* CONFIG_THREAD_PERF_STAT_SYSC */
lwz r11,TI_FLAGS(r10)
andi. r11,r11,_TIF_SYSCALL_T_OR_A
bne- syscall_dotrace
Index: linux-2.6.21-rc5/arch/powerpc/kernel/entry_64.S
===================================================================
--- linux-2.6.21-rc5.orig/arch/powerpc/kernel/entry_64.S
+++ linux-2.6.21-rc5/arch/powerpc/kernel/entry_64.S
@@ -115,6 +115,11 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISER
addi r9,r1,STACK_FRAME_OVERHEAD
#endif
clrrdi r11,r1,THREAD_SHIFT
+#ifdef CONFIG_TASKSTATS
+ ld r10,TI_SYSCALL_COUNT(r11)
+ addi r10,r10,1
+ std r10,TI_SYSCALL_COUNT(r11)
+#endif
ld r10,TI_FLAGS(r11)
andi. r11,r10,_TIF_SYSCALL_T_OR_A
bne- syscall_dotrace
Index: linux-2.6.21-rc5/arch/x86_64/kernel/asm-offsets.c
===================================================================
--- linux-2.6.21-rc5.orig/arch/x86_64/kernel/asm-offsets.c
+++ linux-2.6.21-rc5/arch/x86_64/kernel/asm-offsets.c
@@ -35,6 +35,7 @@ int main(void)
ENTRY(addr_limit);
ENTRY(preempt_count);
ENTRY(status);
+ ENTRY(syscall_count);
BLANK();
#undef ENTRY
#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
Index: linux-2.6.21-rc5/arch/x86_64/kernel/entry.S
===================================================================
--- linux-2.6.21-rc5.orig/arch/x86_64/kernel/entry.S
+++ linux-2.6.21-rc5/arch/x86_64/kernel/entry.S
@@ -229,6 +229,7 @@ ENTRY(system_call)
movq %rcx,RIP-ARGOFFSET(%rsp)
CFI_REL_OFFSET rip,RIP-ARGOFFSET
GET_THREAD_INFO(%rcx)
+ addq $1, threadinfo_syscall_count(%rcx) # Increment syscalls counter
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
jnz tracesys
cmpq $__NR_syscall_max,%rax
Index: linux-2.6.21-rc5/include/asm-powerpc/thread_info.h
===================================================================
--- linux-2.6.21-rc5.orig/include/asm-powerpc/thread_info.h
+++ linux-2.6.21-rc5/include/asm-powerpc/thread_info.h
@@ -35,6 +35,7 @@ struct thread_info {
int cpu; /* cpu we're on */
int preempt_count; /* 0 => preemptable,
<0 => BUG */
+ unsigned long syscall_count; /* Syscall counter */
struct restart_block restart_block;
unsigned long local_flags; /* private flags for thread */
Index: linux-2.6.21-rc5/include/asm-x86_64/thread_info.h
===================================================================
--- linux-2.6.21-rc5.orig/include/asm-x86_64/thread_info.h
+++ linux-2.6.21-rc5/include/asm-x86_64/thread_info.h
@@ -31,6 +31,7 @@ struct thread_info {
__u32 cpu; /* current CPU */
int preempt_count; /* 0 => preemptable, <0 => BUG */
+ unsigned long syscall_count; /* Syscall counter */
mm_segment_t addr_limit;
struct restart_block restart_block;
};
Index: linux-2.6.21-rc5/Documentation/accounting/getdelays.c
===================================================================
--- linux-2.6.21-rc5.orig/Documentation/accounting/getdelays.c
+++ linux-2.6.21-rc5/Documentation/accounting/getdelays.c
@@ -49,6 +49,7 @@ char name[100];
int dbg;
int print_delays;
int print_io_accounting;
+int print_task_stats;
__u64 stime, utime;
#define PRINTF(fmt, arg...) { \
@@ -187,7 +188,7 @@ void print_delayacct(struct taskstats *t
"IO %15s%15s\n"
" %15llu%15llu\n"
"MEM %15s%15s\n"
- " %15llu%15llu\n\n",
+ " %15llu%15llu\n"
"count", "real total", "virtual total", "delay total",
t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
t->cpu_delay_total,
@@ -196,6 +197,15 @@ void print_delayacct(struct taskstats *t
"count", "delay total", t->swapin_count, t->swapin_delay_total);
}
+void print_taskstats(struct taskstats *t)
+{
+ printf("\n\nTask %15s%15s%15s\n"
+ " %15lu%15lu%15lu\n",
+ "syscalls", "voluntary", "nonvoluntary",
+ t->syscall_counter, t->nvcsw, t->nivcsw);
+
+}
+
void print_ioacct(struct taskstats *t)
{
printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
@@ -227,7 +237,7 @@ int main(int argc, char *argv[])
struct msgtemplate msg;
while (1) {
- c = getopt(argc, argv, "diw:r:m:t:p:v:l");
+ c = getopt(argc, argv, "qdiw:r:m:t:p:v:l");
if (c < 0)
break;
@@ -240,6 +250,10 @@ int main(int argc, char *argv[])
printf("printing IO accounting\n");
print_io_accounting = 1;
break;
+ case 'q':
+ printf("printing task/process stasistics:\n");
+ print_task_stats = 1;
+ break;
case 'w':
strncpy(logfile, optarg, MAX_FILENAME);
printf("write to file %s\n", logfile);
@@ -381,6 +395,8 @@ int main(int argc, char *argv[])
print_delayacct((struct taskstats *) NLA_DATA(na));
if (print_io_accounting)
print_ioacct((struct taskstats *) NLA_DATA(na));
+ if (print_task_stats)
+ print_taskstats((struct taskstats *) NLA_DATA(na));
if (fd) {
if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
err(1,"write error\n");
Index: linux-2.6.21-rc5/include/linux/taskstats.h
===================================================================
--- linux-2.6.21-rc5.orig/include/linux/taskstats.h
+++ linux-2.6.21-rc5/include/linux/taskstats.h
@@ -31,7 +31,7 @@
*/
-#define TASKSTATS_VERSION 3
+#define TASKSTATS_VERSION 4
#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
* in linux/sched.h */
@@ -141,6 +141,10 @@ struct taskstats {
__u64 write_syscalls; /* write syscalls */
/* Extended accounting fields end */
+ __u32 syscall_counter; /* Syscall counter */
+ __u32 nvcsw;
+ __u32 nivcsw;
+
#define TASKSTATS_HAS_IO_ACCOUNTING
/* Per-task storage I/O accounting starts */
__u64 read_bytes; /* bytes of read I/O */
Index: linux-2.6.21-rc5/kernel/taskstats.c
===================================================================
--- linux-2.6.21-rc5.orig/kernel/taskstats.c
+++ linux-2.6.21-rc5/kernel/taskstats.c
@@ -196,6 +196,9 @@ static int fill_pid(pid_t pid, struct ta
/* fill in basic acct fields */
stats->version = TASKSTATS_VERSION;
+ stats->syscall_counter = tsk->thread_info->syscall_count;
+ stats->nvcsw = tsk->nvcsw;
+ stats->nivcsw = tsk->nivcsw;
bacct_add_tsk(stats, tsk);
/* fill in extended acct fields */
@@ -242,6 +245,9 @@ static int fill_tgid(pid_t tgid, struct
*/
delayacct_add_tsk(stats, tsk);
+ stats->syscall_counter += tsk->thread_info->syscall_count;
+ stats->nvcsw += tsk->nvcsw;
+ stats->nivcsw += tsk->nivcsw;
} while_each_thread(first, tsk);
unlock_task_sighand(first, &flags);
Index: linux-2.6.21-rc5/Documentation/accounting/taskstats-struct.txt
===================================================================
--- linux-2.6.21-rc5.orig/Documentation/accounting/taskstats-struct.txt
+++ linux-2.6.21-rc5/Documentation/accounting/taskstats-struct.txt
@@ -22,6 +22,8 @@ There are three different groups of fiel
/* Extended accounting fields end */
Their values are collected if CONFIG_TASK_XACCT is set.
+4) Per-task and per-thread statistics
+
Future extension should add fields to the end of the taskstats struct, and
should not change the relative position of each field within the struct.
@@ -158,4 +160,9 @@ struct taskstats {
/* Extended accounting fields end */
+4) Per-task and per-thread statiscits
+ __u32 syscall_counter; /* Syscall counter */
+ __u32 nvcsw; /* Context voluntary switch counter */
+ __u32 nivcsw; /* Context involuntary switch counter */
+
}
^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2007-06-06 17:29 UTC | newest]
Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-05-11 17:13 [PATCH] Performance Stats: Kernel patch Maxim Uvarov
2007-05-12 10:39 ` Andrea Righi
-- strict thread matches above, loose matches on Subject: below --
2007-06-05 14:43 Maxim Uvarov
2007-06-06 6:38 ` Andrew Morton
2007-06-06 17:29 ` Jay Lan
2007-05-30 18:49 Maxim Uvarov
2007-06-04 19:19 ` Andrew Morton
2007-06-04 19:33 ` Jay Lan
2007-06-04 19:49 ` Jonathan Lim
2007-06-04 20:13 ` Jay Lan
2007-06-05 6:50 ` Balbir Singh
2007-05-22 17:19 Maxim Uvarov
2007-05-22 18:48 ` Dave Jones
2007-05-22 20:08 ` Andrew Morton
2007-05-10 17:19 Maxim Uvarov
2007-05-10 23:31 ` Andi Kleen
2007-05-11 16:55 ` Maxim Uvarov
[not found] <4625FFCF.8040402@ru.mvista.com>
2007-04-20 4:36 ` [patch] " Andrew Morton
2007-04-25 10:59 ` Maxim Uvarov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox