* [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
@ 2006-07-06 11:37 Jan Kiszka
2006-07-06 11:41 ` Jan Kiszka
` (2 more replies)
0 siblings, 3 replies; 24+ messages in thread
From: Jan Kiszka @ 2006-07-06 11:37 UTC (permalink / raw)
To: xenomai-core
[-- Attachment #1: Type: text/plain, Size: 6892 bytes --]
Hi,
I think to remember people crying for some execution time statistics for quite a
while now. Here comes a straightforward approach to add this on a per-thread basis.
The results are printed like this:
root@domain.hid :/root# cat /proc/xenomai/stat
CPU PID MSW CSW PF STAT TIME NAME
0 0 0 7508041 0 01400080 166038894 ROOT
0 0 0 2 0 00000082 55 timsPipeReceiver
0 1009 751 8800 0 00c00082 143026 display-1008
0 1010 0 7514585 0 00c00084 54472477 sampling-1008
The new TIME column contains the accumulated execution time of each active thread in
nanoseconds. It's now a job of some user-space tool (shell script?) to pick those
numbers up and do, e.g., periodic evaluation (I'm thinking of some "xeno-top"). Any
volunteer around to hack this? Would be very welcome!
Jan
PS: Could someone have a look that I didn't miss an accounting point and my
acquisition is sound?
--
include/nucleus/pod.h | 28 ++++++++++++++++++++++++++++
include/nucleus/thread.h | 1 +
ksrc/nucleus/module.c | 26 +++++++++++++++++++++-----
ksrc/nucleus/pod.c | 5 +++++
ksrc/nucleus/thread.c | 1 +
5 files changed, 56 insertions(+), 5 deletions(-)
Index: include/nucleus/thread.h
===================================================================
--- include/nucleus/thread.h (revision 1303)
+++ include/nucleus/thread.h (working copy)
@@ -152,6 +152,7 @@ typedef struct xnthread {
unsigned long csw; /* Context switches (includes
secondary -> primary switches) */
unsigned long pf; /* Number of page faults */
+ xnticks_t exec_time; /* Accumulated execution time (ticks) */
} stat;
#endif /* CONFIG_XENO_OPT_STATS */
Index: include/nucleus/pod.h
===================================================================
--- include/nucleus/pod.h (revision 1303)
+++ include/nucleus/pod.h (working copy)
@@ -145,6 +145,10 @@ typedef struct xnsched {
xnthread_t rootcb; /*!< Root thread control block. */
+#ifdef CONFIG_XENO_OPT_STATS
+ xnticks_t last_csw; /*!< Last context switch (ticks). */
+#endif /* CONFIG_XENO_OPT_STATS */
+
} xnsched_t;
#ifdef CONFIG_SMP
@@ -544,6 +548,30 @@ static inline void xnpod_delete_self (vo
xnpod_delete_thread(xnpod_current_thread());
}
+#ifdef CONFIG_XENO_OPT_STATS
+static inline void xnpod_acc_exec_time(xnsched_t *sched,
+ xnthread_t *threadout)
+{
+ xnticks_t now = xntimer_get_rawclock();
+ threadout->stat.exec_time += now - sched->last_csw;
+ sched->last_csw = now;
+}
+
+static inline void xnpod_update_csw_date(xnsched_t *sched)
+{
+ sched->last_csw = xntimer_get_rawclock();
+}
+#else /* !CONFIG_XENO_OPT_STATS */
+static inline void xnpod_acc_exec_time(xnsched_t *sched,
+ xnthread_t *threadout)
+{
+}
+
+static inline void xnpod_update_csw_date(xnsched_t *sched)
+{
+}
+#endif /* CONFIG_XENO_OPT_STATS */
+
#ifdef __cplusplus
}
#endif
Index: ksrc/nucleus/thread.c
===================================================================
--- ksrc/nucleus/thread.c (revision 1303)
+++ ksrc/nucleus/thread.c (working copy)
@@ -90,6 +90,7 @@ int xnthread_init(xnthread_t *thread,
thread->stat.ssw = 0;
thread->stat.csw = 0;
thread->stat.pf = 0;
+ thread->stat.exec_time = 0;
#endif /* CONFIG_XENO_OPT_STATS */
/* These will be filled by xnpod_start_thread() */
Index: ksrc/nucleus/pod.c
===================================================================
--- ksrc/nucleus/pod.c (revision 1303)
+++ ksrc/nucleus/pod.c (working copy)
@@ -669,6 +669,9 @@ static inline void xnpod_switch_zombie(x
xnthread_cleanup_tcb(threadout);
+ /* no need to update stats of dying thread */
+ xnpod_update_csw_date(sched);
+
xnarch_finalize_and_switch(xnthread_archtcb(threadout),
xnthread_archtcb(threadin));
@@ -2431,6 +2434,7 @@ void xnpod_schedule(void)
xnarch_enter_root(xnthread_archtcb(threadin));
}
+ xnpod_acc_exec_time(sched, threadout);
xnthread_inc_csw(threadin);
xnarch_switch_to(xnthread_archtcb(threadout),
@@ -2602,6 +2606,7 @@ void xnpod_schedule_runnable(xnthread_t
nkpod->schedhook(runthread, XNREADY);
#endif /* __XENO_SIM__ */
+ xnpod_acc_exec_time(sched, runthread);
xnthread_inc_csw(threadin);
xnarch_switch_to(xnthread_archtcb(runthread),
Index: ksrc/nucleus/module.c
===================================================================
--- ksrc/nucleus/module.c (revision 1303)
+++ ksrc/nucleus/module.c (working copy)
@@ -254,6 +254,7 @@ struct stat_seq_iterator {
unsigned long ssw;
unsigned long csw;
unsigned long pf;
+ xnticks_t exec_time;
} stat_info[1];
};
@@ -294,13 +295,17 @@ static void stat_seq_stop(struct seq_fil
static int stat_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN)
- seq_printf(seq, "%-3s %-6s %-10s %-10s %-4s %-8s %s\n",
- "CPU", "PID", "MSW", "CSW", "PF", "STAT", "NAME");
+ seq_printf(seq, "%-3s %-6s %-10s %-10s %-4s %-8s %12s"
+ " %s\n",
+ "CPU", "PID", "MSW", "CSW", "PF", "STAT", "TIME",
+ "NAME");
else {
struct stat_seq_info *p = (struct stat_seq_info *)v;
- seq_printf(seq, "%3u %-6d %-10lu %-10lu %-4lu %.8lx %s\n",
+ unsigned long long exec_time = xnpod_ticks2ns(p->exec_time);
+ seq_printf(seq, "%3u %-6d %-10lu %-10lu %-4lu %.8lx %12llu"
+ " %s\n",
p->cpu, p->pid, p->ssw, p->csw, p->pf, p->status,
- p->name);
+ xnarch_ulldiv(exec_time, 1000, NULL), p->name);
}
return 0;
@@ -318,7 +323,7 @@ static int stat_seq_open(struct inode *i
struct stat_seq_iterator *iter;
struct seq_file *seq;
xnholder_t *holder;
- int err, count;
+ int err, count, cpu;
spl_t s;
if (!nkpod)
@@ -341,6 +346,16 @@ static int stat_seq_open(struct inode *i
iter->nentries = 0;
+ /* update exec-time stats of currently running threads */
+ for_each_online_cpu(cpu) {
+ xnsched_t *sched;
+
+ xnlock_get_irqsave(&nklock, s);
+ sched = xnpod_sched_slot(cpu);
+ xnpod_acc_exec_time(sched, sched->runthread);
+ xnlock_put_irqrestore(&nklock, s);
+ }
+
/* Take a snapshot and release the nucleus lock immediately after,
so that dumping /proc/xenomai/stat with lots of entries won't
cause massive jittery. */
@@ -359,6 +374,7 @@ static int stat_seq_open(struct inode *i
iter->stat_info[n].ssw = thread->stat.ssw;
iter->stat_info[n].csw = thread->stat.csw;
iter->stat_info[n].pf = thread->stat.pf;
+ iter->stat_info[n].exec_time = thread->stat.exec_time;
}
xnlock_put_irqrestore(&nklock, s);
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 250 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-06 11:37 [Xenomai-core] [RFC, PATCH] per-thread exec-time stats Jan Kiszka @ 2006-07-06 11:41 ` Jan Kiszka 2006-07-06 14:32 ` Philippe Gerum 2006-07-06 15:04 ` Philippe Gerum 2 siblings, 0 replies; 24+ messages in thread From: Jan Kiszka @ 2006-07-06 11:41 UTC (permalink / raw) To: xenomai-core [-- Attachment #1: Type: text/plain, Size: 1213 bytes --] Jan Kiszka wrote: > Hi, > > I think to remember people crying for some execution time statistics for quite a > while now. Here comes a straightforward approach to add this on a per-thread basis. > The results are printed like this: > > root@domain.hid :/root# cat /proc/xenomai/stat > CPU PID MSW CSW PF STAT TIME NAME > 0 0 0 7508041 0 01400080 166038894 ROOT > 0 0 0 2 0 00000082 55 timsPipeReceiver > 0 1009 751 8800 0 00c00082 143026 display-1008 > 0 1010 0 7514585 0 00c00084 54472477 sampling-1008 > > The new TIME column contains the accumulated execution time of each active thread in > nanoseconds. It's now a job of some user-space tool (shell script?) to pick those I was thinking of the right unit, I swear: s/nanoseconds/microseconds/ > numbers up and do, e.g., periodic evaluation (I'm thinking of some "xeno-top"). Any > volunteer around to hack this? Would be very welcome! > > Jan > > PS: Could someone have a look that I didn't miss an accounting point and my > acquisition is sound? > Jan [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 250 bytes --] ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-06 11:37 [Xenomai-core] [RFC, PATCH] per-thread exec-time stats Jan Kiszka 2006-07-06 11:41 ` Jan Kiszka @ 2006-07-06 14:32 ` Philippe Gerum 2006-07-06 15:09 ` Jan Kiszka 2006-07-06 23:47 ` Gilles Chanteperdrix 2006-07-06 15:04 ` Philippe Gerum 2 siblings, 2 replies; 24+ messages in thread From: Philippe Gerum @ 2006-07-06 14:32 UTC (permalink / raw) To: Jan Kiszka; +Cc: xenomai-core On Thu, 2006-07-06 at 13:37 +0200, Jan Kiszka wrote: <snip> > PS: Could someone have a look that I didn't miss an accounting point and my > acquisition is sound? > > > -- > include/nucleus/pod.h | 28 ++++++++++++++++++++++++++++ > include/nucleus/thread.h | 1 + > ksrc/nucleus/module.c | 26 +++++++++++++++++++++----- > ksrc/nucleus/pod.c | 5 +++++ > ksrc/nucleus/thread.c | 1 + > 5 files changed, 56 insertions(+), 5 deletions(-) > > Index: include/nucleus/thread.h > =================================================================== > --- include/nucleus/thread.h (revision 1303) > +++ include/nucleus/thread.h (working copy) > @@ -152,6 +152,7 @@ typedef struct xnthread { > unsigned long csw; /* Context switches (includes > secondary -> primary switches) */ > unsigned long pf; /* Number of page faults */ > + xnticks_t exec_time; /* Accumulated execution time (ticks) */ > } stat; > #endif /* CONFIG_XENO_OPT_STATS */ > > Index: include/nucleus/pod.h > =================================================================== > --- include/nucleus/pod.h (revision 1303) > +++ include/nucleus/pod.h (working copy) > @@ -145,6 +145,10 @@ typedef struct xnsched { > > xnthread_t rootcb; /*!< Root thread control block. */ > > +#ifdef CONFIG_XENO_OPT_STATS > + xnticks_t last_csw; /*!< Last context switch (ticks). */ > +#endif /* CONFIG_XENO_OPT_STATS */ > + > } xnsched_t; > > #ifdef CONFIG_SMP > @@ -544,6 +548,30 @@ static inline void xnpod_delete_self (vo > xnpod_delete_thread(xnpod_current_thread()); > } > > +#ifdef CONFIG_XENO_OPT_STATS > +static inline void xnpod_acc_exec_time(xnsched_t *sched, > + xnthread_t *threadout) > +{ > + xnticks_t now = xntimer_get_rawclock(); > + threadout->stat.exec_time += now - sched->last_csw; > + sched->last_csw = now; > +} It would be better to only pass the thread pointer, then use the thread->sched member. This would clearly explain the relationship between both, and prevent any bugous attempt at mixing things. > + > +static inline void xnpod_update_csw_date(xnsched_t *sched) > +{ > + sched->last_csw = xntimer_get_rawclock(); > +} > +#else /* !CONFIG_XENO_OPT_STATS */ > +static inline void xnpod_acc_exec_time(xnsched_t *sched, > + xnthread_t *threadout) > +{ > +} > + > +static inline void xnpod_update_csw_date(xnsched_t *sched) > +{ > +} > +#endif /* CONFIG_XENO_OPT_STATS */ > + > #ifdef __cplusplus > } > #endif > Index: ksrc/nucleus/thread.c > =================================================================== > --- ksrc/nucleus/thread.c (revision 1303) > +++ ksrc/nucleus/thread.c (working copy) > @@ -90,6 +90,7 @@ int xnthread_init(xnthread_t *thread, > thread->stat.ssw = 0; > thread->stat.csw = 0; > thread->stat.pf = 0; > + thread->stat.exec_time = 0; > #endif /* CONFIG_XENO_OPT_STATS */ > > /* These will be filled by xnpod_start_thread() */ > Index: ksrc/nucleus/pod.c > =================================================================== > --- ksrc/nucleus/pod.c (revision 1303) > +++ ksrc/nucleus/pod.c (working copy) > @@ -669,6 +669,9 @@ static inline void xnpod_switch_zombie(x > > xnthread_cleanup_tcb(threadout); > > + /* no need to update stats of dying thread */ > + xnpod_update_csw_date(sched); > + > xnarch_finalize_and_switch(xnthread_archtcb(threadout), > xnthread_archtcb(threadin)); > > @@ -2431,6 +2434,7 @@ void xnpod_schedule(void) > xnarch_enter_root(xnthread_archtcb(threadin)); > } > > + xnpod_acc_exec_time(sched, threadout); > xnthread_inc_csw(threadin); > > xnarch_switch_to(xnthread_archtcb(threadout), > @@ -2602,6 +2606,7 @@ void xnpod_schedule_runnable(xnthread_t > nkpod->schedhook(runthread, XNREADY); > #endif /* __XENO_SIM__ */ > > + xnpod_acc_exec_time(sched, runthread); > xnthread_inc_csw(threadin); > > xnarch_switch_to(xnthread_archtcb(runthread), > Index: ksrc/nucleus/module.c > =================================================================== > --- ksrc/nucleus/module.c (revision 1303) > +++ ksrc/nucleus/module.c (working copy) > @@ -254,6 +254,7 @@ struct stat_seq_iterator { > unsigned long ssw; > unsigned long csw; > unsigned long pf; > + xnticks_t exec_time; > } stat_info[1]; > }; > > @@ -294,13 +295,17 @@ static void stat_seq_stop(struct seq_fil > static int stat_seq_show(struct seq_file *seq, void *v) > { > if (v == SEQ_START_TOKEN) > - seq_printf(seq, "%-3s %-6s %-10s %-10s %-4s %-8s %s\n", > - "CPU", "PID", "MSW", "CSW", "PF", "STAT", "NAME"); > + seq_printf(seq, "%-3s %-6s %-10s %-10s %-4s %-8s %12s" > + " %s\n", > + "CPU", "PID", "MSW", "CSW", "PF", "STAT", "TIME", > + "NAME"); > else { > struct stat_seq_info *p = (struct stat_seq_info *)v; > - seq_printf(seq, "%3u %-6d %-10lu %-10lu %-4lu %.8lx %s\n", > + unsigned long long exec_time = xnpod_ticks2ns(p->exec_time); > + seq_printf(seq, "%3u %-6d %-10lu %-10lu %-4lu %.8lx %12llu" > + " %s\n", > p->cpu, p->pid, p->ssw, p->csw, p->pf, p->status, > - p->name); > + xnarch_ulldiv(exec_time, 1000, NULL), p->name); > } > > return 0; > @@ -318,7 +323,7 @@ static int stat_seq_open(struct inode *i > struct stat_seq_iterator *iter; > struct seq_file *seq; > xnholder_t *holder; > - int err, count; > + int err, count, cpu; > spl_t s; > > if (!nkpod) > @@ -341,6 +346,16 @@ static int stat_seq_open(struct inode *i > > iter->nentries = 0; > > + /* update exec-time stats of currently running threads */ > + for_each_online_cpu(cpu) { > + xnsched_t *sched; > + > + xnlock_get_irqsave(&nklock, s); > + sched = xnpod_sched_slot(cpu); > + xnpod_acc_exec_time(sched, sched->runthread); > + xnlock_put_irqrestore(&nklock, s); > + } > + We could do that from the current loop below, given that the accumulation routine is changed to use thread->sched implicitely. > /* Take a snapshot and release the nucleus lock immediately after, > so that dumping /proc/xenomai/stat with lots of entries won't > cause massive jittery. */ > @@ -359,6 +374,7 @@ static int stat_seq_open(struct inode *i > iter->stat_info[n].ssw = thread->stat.ssw; > iter->stat_info[n].csw = thread->stat.csw; > iter->stat_info[n].pf = thread->stat.pf; > + iter->stat_info[n].exec_time = thread->stat.exec_time; > } > > xnlock_put_irqrestore(&nklock, s); > > > > _______________________________________________ > Xenomai-core mailing list > Xenomai-core@domain.hid > https://mail.gna.org/listinfo/xenomai-core -- Philippe. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-06 14:32 ` Philippe Gerum @ 2006-07-06 15:09 ` Jan Kiszka 2006-07-06 15:37 ` Philippe Gerum 2006-07-06 23:47 ` Gilles Chanteperdrix 1 sibling, 1 reply; 24+ messages in thread From: Jan Kiszka @ 2006-07-06 15:09 UTC (permalink / raw) To: rpm; +Cc: xenomai-core [-- Attachment #1: Type: text/plain, Size: 7247 bytes --] Philippe Gerum wrote: > On Thu, 2006-07-06 at 13:37 +0200, Jan Kiszka wrote: > > <snip> > >> PS: Could someone have a look that I didn't miss an accounting point and my >> acquisition is sound? >> >> >> -- >> include/nucleus/pod.h | 28 ++++++++++++++++++++++++++++ >> include/nucleus/thread.h | 1 + >> ksrc/nucleus/module.c | 26 +++++++++++++++++++++----- >> ksrc/nucleus/pod.c | 5 +++++ >> ksrc/nucleus/thread.c | 1 + >> 5 files changed, 56 insertions(+), 5 deletions(-) >> >> Index: include/nucleus/thread.h >> =================================================================== >> --- include/nucleus/thread.h (revision 1303) >> +++ include/nucleus/thread.h (working copy) >> @@ -152,6 +152,7 @@ typedef struct xnthread { >> unsigned long csw; /* Context switches (includes >> secondary -> primary switches) */ >> unsigned long pf; /* Number of page faults */ >> + xnticks_t exec_time; /* Accumulated execution time (ticks) */ >> } stat; >> #endif /* CONFIG_XENO_OPT_STATS */ >> >> Index: include/nucleus/pod.h >> =================================================================== >> --- include/nucleus/pod.h (revision 1303) >> +++ include/nucleus/pod.h (working copy) >> @@ -145,6 +145,10 @@ typedef struct xnsched { >> >> xnthread_t rootcb; /*!< Root thread control block. */ >> >> +#ifdef CONFIG_XENO_OPT_STATS >> + xnticks_t last_csw; /*!< Last context switch (ticks). */ >> +#endif /* CONFIG_XENO_OPT_STATS */ >> + >> } xnsched_t; >> >> #ifdef CONFIG_SMP >> @@ -544,6 +548,30 @@ static inline void xnpod_delete_self (vo >> xnpod_delete_thread(xnpod_current_thread()); >> } >> >> +#ifdef CONFIG_XENO_OPT_STATS >> +static inline void xnpod_acc_exec_time(xnsched_t *sched, >> + xnthread_t *threadout) >> +{ >> + xnticks_t now = xntimer_get_rawclock(); >> + threadout->stat.exec_time += now - sched->last_csw; >> + sched->last_csw = now; >> +} > > It would be better to only pass the thread pointer, then use the > thread->sched member. This would clearly explain the relationship > between both, and prevent any bugous attempt at mixing things. True, will rework. > >> + >> +static inline void xnpod_update_csw_date(xnsched_t *sched) >> +{ >> + sched->last_csw = xntimer_get_rawclock(); >> +} >> +#else /* !CONFIG_XENO_OPT_STATS */ >> +static inline void xnpod_acc_exec_time(xnsched_t *sched, >> + xnthread_t *threadout) >> +{ >> +} >> + >> +static inline void xnpod_update_csw_date(xnsched_t *sched) >> +{ >> +} >> +#endif /* CONFIG_XENO_OPT_STATS */ >> + >> #ifdef __cplusplus >> } >> #endif >> Index: ksrc/nucleus/thread.c >> =================================================================== >> --- ksrc/nucleus/thread.c (revision 1303) >> +++ ksrc/nucleus/thread.c (working copy) >> @@ -90,6 +90,7 @@ int xnthread_init(xnthread_t *thread, >> thread->stat.ssw = 0; >> thread->stat.csw = 0; >> thread->stat.pf = 0; >> + thread->stat.exec_time = 0; >> #endif /* CONFIG_XENO_OPT_STATS */ >> >> /* These will be filled by xnpod_start_thread() */ >> Index: ksrc/nucleus/pod.c >> =================================================================== >> --- ksrc/nucleus/pod.c (revision 1303) >> +++ ksrc/nucleus/pod.c (working copy) >> @@ -669,6 +669,9 @@ static inline void xnpod_switch_zombie(x >> >> xnthread_cleanup_tcb(threadout); >> >> + /* no need to update stats of dying thread */ >> + xnpod_update_csw_date(sched); >> + >> xnarch_finalize_and_switch(xnthread_archtcb(threadout), >> xnthread_archtcb(threadin)); >> >> @@ -2431,6 +2434,7 @@ void xnpod_schedule(void) >> xnarch_enter_root(xnthread_archtcb(threadin)); >> } >> >> + xnpod_acc_exec_time(sched, threadout); >> xnthread_inc_csw(threadin); >> >> xnarch_switch_to(xnthread_archtcb(threadout), >> @@ -2602,6 +2606,7 @@ void xnpod_schedule_runnable(xnthread_t >> nkpod->schedhook(runthread, XNREADY); >> #endif /* __XENO_SIM__ */ >> >> + xnpod_acc_exec_time(sched, runthread); >> xnthread_inc_csw(threadin); >> >> xnarch_switch_to(xnthread_archtcb(runthread), >> Index: ksrc/nucleus/module.c >> =================================================================== >> --- ksrc/nucleus/module.c (revision 1303) >> +++ ksrc/nucleus/module.c (working copy) >> @@ -254,6 +254,7 @@ struct stat_seq_iterator { >> unsigned long ssw; >> unsigned long csw; >> unsigned long pf; >> + xnticks_t exec_time; >> } stat_info[1]; >> }; >> >> @@ -294,13 +295,17 @@ static void stat_seq_stop(struct seq_fil >> static int stat_seq_show(struct seq_file *seq, void *v) >> { >> if (v == SEQ_START_TOKEN) >> - seq_printf(seq, "%-3s %-6s %-10s %-10s %-4s %-8s %s\n", >> - "CPU", "PID", "MSW", "CSW", "PF", "STAT", "NAME"); >> + seq_printf(seq, "%-3s %-6s %-10s %-10s %-4s %-8s %12s" >> + " %s\n", >> + "CPU", "PID", "MSW", "CSW", "PF", "STAT", "TIME", >> + "NAME"); >> else { >> struct stat_seq_info *p = (struct stat_seq_info *)v; >> - seq_printf(seq, "%3u %-6d %-10lu %-10lu %-4lu %.8lx %s\n", >> + unsigned long long exec_time = xnpod_ticks2ns(p->exec_time); >> + seq_printf(seq, "%3u %-6d %-10lu %-10lu %-4lu %.8lx %12llu" >> + " %s\n", >> p->cpu, p->pid, p->ssw, p->csw, p->pf, p->status, >> - p->name); >> + xnarch_ulldiv(exec_time, 1000, NULL), p->name); >> } >> >> return 0; >> @@ -318,7 +323,7 @@ static int stat_seq_open(struct inode *i >> struct stat_seq_iterator *iter; >> struct seq_file *seq; >> xnholder_t *holder; >> - int err, count; >> + int err, count, cpu; >> spl_t s; >> >> if (!nkpod) >> @@ -341,6 +346,16 @@ static int stat_seq_open(struct inode *i >> >> iter->nentries = 0; >> >> + /* update exec-time stats of currently running threads */ >> + for_each_online_cpu(cpu) { >> + xnsched_t *sched; >> + >> + xnlock_get_irqsave(&nklock, s); >> + sched = xnpod_sched_slot(cpu); >> + xnpod_acc_exec_time(sched, sched->runthread); >> + xnlock_put_irqrestore(&nklock, s); >> + } >> + > > We could do that from the current loop below, given that the > accumulation routine is changed to use thread->sched implicitely. The idea is avoid adding even further load to the nklock-protected loop. And we only update the current thread, not each and every. > >> /* Take a snapshot and release the nucleus lock immediately after, >> so that dumping /proc/xenomai/stat with lots of entries won't >> cause massive jittery. */ >> @@ -359,6 +374,7 @@ static int stat_seq_open(struct inode *i >> iter->stat_info[n].ssw = thread->stat.ssw; >> iter->stat_info[n].csw = thread->stat.csw; >> iter->stat_info[n].pf = thread->stat.pf; >> + iter->stat_info[n].exec_time = thread->stat.exec_time; >> } >> >> xnlock_put_irqrestore(&nklock, s); >> >> >> >> _______________________________________________ >> Xenomai-core mailing list >> Xenomai-core@domain.hid >> https://mail.gna.org/listinfo/xenomai-core [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 250 bytes --] ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-06 15:09 ` Jan Kiszka @ 2006-07-06 15:37 ` Philippe Gerum 2006-07-06 15:46 ` Jan Kiszka 0 siblings, 1 reply; 24+ messages in thread From: Philippe Gerum @ 2006-07-06 15:37 UTC (permalink / raw) To: Jan Kiszka; +Cc: xenomai-core On Thu, 2006-07-06 at 17:09 +0200, Jan Kiszka wrote: > > We could do that from the current loop below, given that the > > accumulation routine is changed to use thread->sched implicitely. > > The idea is avoid adding even further load to the nklock-protected loop. > And we only update the current thread, not each and every. > Yes, but why? I mean, accumulated time so far remains significant even for non-running threads. > > > >> /* Take a snapshot and release the nucleus lock immediately after, > >> so that dumping /proc/xenomai/stat with lots of entries won't > >> cause massive jittery. */ > >> @@ -359,6 +374,7 @@ static int stat_seq_open(struct inode *i > >> iter->stat_info[n].ssw = thread->stat.ssw; > >> iter->stat_info[n].csw = thread->stat.csw; > >> iter->stat_info[n].pf = thread->stat.pf; > >> + iter->stat_info[n].exec_time = thread->stat.exec_time; > >> } > >> > >> xnlock_put_irqrestore(&nklock, s); > >> > >> > >> > >> _______________________________________________ > >> Xenomai-core mailing list > >> Xenomai-core@domain.hid > >> https://mail.gna.org/listinfo/xenomai-core > > -- Philippe. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-06 15:37 ` Philippe Gerum @ 2006-07-06 15:46 ` Jan Kiszka 2006-07-06 16:36 ` Jan Kiszka 2006-07-06 16:37 ` Philippe Gerum 0 siblings, 2 replies; 24+ messages in thread From: Jan Kiszka @ 2006-07-06 15:46 UTC (permalink / raw) To: rpm; +Cc: xenomai-core [-- Attachment #1: Type: text/plain, Size: 1471 bytes --] Philippe Gerum wrote: > On Thu, 2006-07-06 at 17:09 +0200, Jan Kiszka wrote: >>> We could do that from the current loop below, given that the >>> accumulation routine is changed to use thread->sched implicitely. >> The idea is avoid adding even further load to the nklock-protected loop. >> And we only update the current thread, not each and every. >> > > Yes, but why? I mean, accumulated time so far remains significant even > for non-running threads. Update must only happen for the currently running thread on each cpu, otherwise you skew the stats up. But looking at the whole code in stat_seq_open() again, I now wonder if that whole routine isn't a) fragile on task removal and b) still poorly scaling. The thread name is only copied by reference, a disappearing instance my cause troubles on printing it. And, instead of holding the lock all the time, shouldn't we better 1. pick some element from the queue and mark it somehow in-use under lock 2. print or copy the entry 3. reacquire the lock to proceed to the next one - after checking if that element happened to be removed from the queue meanwhile (in that case we could abort the output or try to resync) I'm worrying about a potential nice xeno-top tool polling the stats at high frequency and causing unexpected jitters if there are a significant number tasks registered (think of passive shadows we may create automatically in the future!). Jan [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 250 bytes --] ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-06 15:46 ` Jan Kiszka @ 2006-07-06 16:36 ` Jan Kiszka 2006-07-06 17:17 ` Jan Kiszka 2006-07-07 8:04 ` Philippe Gerum 2006-07-06 16:37 ` Philippe Gerum 1 sibling, 2 replies; 24+ messages in thread From: Jan Kiszka @ 2006-07-06 16:36 UTC (permalink / raw) To: rpm; +Cc: xenomai-core [-- Attachment #1: Type: text/plain, Size: 1836 bytes --] Jan Kiszka wrote: > Philippe Gerum wrote: >> On Thu, 2006-07-06 at 17:09 +0200, Jan Kiszka wrote: >>>> We could do that from the current loop below, given that the >>>> accumulation routine is changed to use thread->sched implicitely. >>> The idea is avoid adding even further load to the nklock-protected loop. >>> And we only update the current thread, not each and every. >>> >> Yes, but why? I mean, accumulated time so far remains significant even >> for non-running threads. > > Update must only happen for the currently running thread on each cpu, > otherwise you skew the stats up. > > > But looking at the whole code in stat_seq_open() again, I now wonder if > that whole routine isn't > > a) fragile on task removal and > b) still poorly scaling. > > The thread name is only copied by reference, a disappearing instance my > cause troubles on printing it. And, instead of holding the lock all the > time, shouldn't we better > > 1. pick some element from the queue and mark it somehow > in-use under lock > 2. print or copy the entry > 3. reacquire the lock to proceed to the next one - after checking if > that element happened to be removed from the queue meanwhile (in that > case we could abort the output or try to resync) Not feasible (threads may not always be prevented from being deleted), but what about this: - maintain a modification counter for nkpod->threadq - in our /proc-loops (sched and latency e.g.): 1. take nklock 2. get current counter 3. compare with previous, restart whole loop if not matching 4. copy current element (including the name...) 5. forward element pointer 6. release nklock 7. goto 1 if not end-of-list As modifications on threadq should be fairly rare, I don't see a risk of looping endlessly here. Jan [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 250 bytes --] ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-06 16:36 ` Jan Kiszka @ 2006-07-06 17:17 ` Jan Kiszka 2006-07-07 8:20 ` Jan Kiszka 2006-07-07 8:04 ` Philippe Gerum 1 sibling, 1 reply; 24+ messages in thread From: Jan Kiszka @ 2006-07-06 17:17 UTC (permalink / raw) To: rpm; +Cc: xenomai-core [-- Attachment #1: Type: text/plain, Size: 14481 bytes --] Jan Kiszka wrote: > Jan Kiszka wrote: > >> Philippe Gerum wrote: >> >>> On Thu, 2006-07-06 at 17:09 +0200, Jan Kiszka wrote: >>> >>>>> We could do that from the current loop below, given that the >>>>> accumulation routine is changed to use thread->sched implicitely. >>>>> >>>> The idea is avoid adding even further load to the nklock-protected loop. >>>> And we only update the current thread, not each and every. >>>> >>>> >>> Yes, but why? I mean, accumulated time so far remains significant even >>> for non-running threads. >>> >> Update must only happen for the currently running thread on each cpu, >> otherwise you skew the stats up. >> >> >> But looking at the whole code in stat_seq_open() again, I now wonder if >> that whole routine isn't >> >> a) fragile on task removal and >> b) still poorly scaling. >> >> The thread name is only copied by reference, a disappearing instance my >> cause troubles on printing it. And, instead of holding the lock all the >> time, shouldn't we better >> >> 1. pick some element from the queue and mark it somehow >> in-use under lock >> 2. print or copy the entry >> 3. reacquire the lock to proceed to the next one - after checking if >> that element happened to be removed from the queue meanwhile (in that >> case we could abort the output or try to resync) >> > > Not feasible (threads may not always be prevented from being deleted), > but what about this: > > - maintain a modification counter for nkpod->threadq > - in our /proc-loops (sched and latency e.g.): > 1. take nklock > 2. get current counter > 3. compare with previous, restart whole loop if not matching > 4. copy current element (including the name...) > 5. forward element pointer > 6. release nklock > 7. goto 1 if not end-of-list > > As modifications on threadq should be fairly rare, I don't see a risk of > looping endlessly here. Here is such a patch + a reworked version of the exec-time stats. Compiles and boots, doesn't cause obvious troubles when running in a loop while threads are created and destroyed in parallel. Nevertheless, a quick hack which may contain bugs. Jan --- include/nucleus/pod.h | 1 ksrc/nucleus/module.c | 94 +++++++++++++++++++++++++++++++++----------------- ksrc/nucleus/pod.c | 2 + 3 files changed, 66 insertions(+), 31 deletions(-) Index: xenomai/include/nucleus/pod.h =================================================================== --- xenomai.orig/include/nucleus/pod.h +++ xenomai/include/nucleus/pod.h @@ -195,6 +195,7 @@ struct xnpod { xnsched_t sched[XNARCH_NR_CPUS]; /*!< Per-cpu scheduler slots. */ xnqueue_t threadq; /*!< All existing threads. */ + int threadq_rev; /*!< Modification counter of threadq. */ volatile u_long schedlck; /*!< Scheduler lock count. */ Index: xenomai/ksrc/nucleus/module.c =================================================================== --- xenomai.orig/ksrc/nucleus/module.c +++ xenomai/ksrc/nucleus/module.c @@ -99,7 +99,7 @@ struct sched_seq_iterator { struct sched_seq_info { int cpu; pid_t pid; - const char *name; + char name[XNOBJECT_NAME_LEN]; int cprio; xnticks_t timeout; xnflags_t status; @@ -177,17 +177,26 @@ static struct seq_operations sched_op = static int sched_seq_open(struct inode *inode, struct file *file) { - struct sched_seq_iterator *iter; + struct sched_seq_iterator *iter = NULL; struct seq_file *seq; xnholder_t *holder; - int err, count; + int err, count, rev; spl_t s; if (!nkpod) return -ESRCH; + restart: + xnlock_get_irqsave(&nklock, s); + + rev = nkpod->threadq_rev; count = countq(&nkpod->threadq); /* Cannot be empty (ROOT) */ + holder = getheadq(&nkpod->threadq); + + xnlock_put_irqrestore(&nklock, s); + if (iter) + kfree(iter); iter = kmalloc(sizeof(*iter) + (count - 1) * sizeof(struct sched_seq_info), GFP_KERNEL); @@ -202,31 +211,37 @@ static int sched_seq_open(struct inode * } iter->nentries = 0; + iter->start_time = xntimer_get_jiffies(); - /* Take a snapshot and release the nucleus lock immediately after, - so that dumping /proc/xenomai/sched with lots of entries won't - cause massive jittery. */ + /* Take a snapshot element-wise, restart if something changes + underneath us. */ - xnlock_get_irqsave(&nklock, s); + while (holder) { + xnthread_t *thread; + int n; - iter->start_time = xntimer_get_jiffies(); + xnlock_get_irqsave(&nklock, s); + + if (nkpod->threadq_rev != rev) + goto restart; + rev = nkpod->threadq_rev; - for (holder = getheadq(&nkpod->threadq); - holder && count > 0; - holder = nextq(&nkpod->threadq, holder), count--) { - xnthread_t *thread = link2thread(holder, glink); - int n = iter->nentries++; + thread = link2thread(holder, glink); + n = iter->nentries++; iter->sched_info[n].cpu = xnsched_cpu(thread->sched); iter->sched_info[n].pid = xnthread_user_pid(thread); - iter->sched_info[n].name = thread->name; + memcpy(iter->sched_info[n].name, thread->name, + sizeof(iter->sched_info[n].name)); iter->sched_info[n].cprio = thread->cprio; iter->sched_info[n].timeout = xnthread_get_timeout(thread, iter->start_time); iter->sched_info[n].status = thread->status; - } - xnlock_put_irqrestore(&nklock, s); + holder = nextq(&nkpod->threadq, holder); + + xnlock_put_irqrestore(&nklock, s); + } seq = (struct seq_file *)file->private_data; seq->private = iter; @@ -250,7 +265,7 @@ struct stat_seq_iterator { int cpu; pid_t pid; xnflags_t status; - const char *name; + char name[XNOBJECT_NAME_LEN]; unsigned long ssw; unsigned long csw; unsigned long pf; @@ -315,17 +330,26 @@ static struct seq_operations stat_op = { static int stat_seq_open(struct inode *inode, struct file *file) { - struct stat_seq_iterator *iter; + struct stat_seq_iterator *iter = NULL; struct seq_file *seq; xnholder_t *holder; - int err, count; + int err, count, rev; spl_t s; if (!nkpod) return -ESRCH; + restart: + xnlock_get_irqsave(&nklock, s); + + rev = nkpod->threadq_rev; count = countq(&nkpod->threadq); /* Cannot be empty (ROOT) */ + holder = getheadq(&nkpod->threadq); + + xnlock_put_irqrestore(&nklock, s); + if (iter) + kfree(iter); iter = kmalloc(sizeof(*iter) + (count - 1) * sizeof(struct stat_seq_info), GFP_KERNEL); @@ -341,27 +365,35 @@ static int stat_seq_open(struct inode *i iter->nentries = 0; - /* Take a snapshot and release the nucleus lock immediately after, - so that dumping /proc/xenomai/stat with lots of entries won't - cause massive jittery. */ + /* Take a snapshot element-wise, restart if something changes + underneath us. */ - xnlock_get_irqsave(&nklock, s); + while (holder) { + xnthread_t *thread; + int n; + + xnlock_get_irqsave(&nklock, s); + + if (nkpod->threadq_rev != rev) + goto restart; + rev = nkpod->threadq_rev; + + thread = link2thread(holder, glink); + n = iter->nentries++; - for (holder = getheadq(&nkpod->threadq); - holder && count > 0; - holder = nextq(&nkpod->threadq, holder), count--) { - xnthread_t *thread = link2thread(holder, glink); - int n = iter->nentries++; iter->stat_info[n].cpu = xnsched_cpu(thread->sched); iter->stat_info[n].pid = xnthread_user_pid(thread); - iter->stat_info[n].name = thread->name; + memcpy(iter->stat_info[n].name, thread->name, + sizeof(iter->stat_info[n].name)); iter->stat_info[n].status = thread->status; iter->stat_info[n].ssw = thread->stat.ssw; iter->stat_info[n].csw = thread->stat.csw; iter->stat_info[n].pf = thread->stat.pf; - } - xnlock_put_irqrestore(&nklock, s); + holder = nextq(&nkpod->threadq, holder); + + xnlock_put_irqrestore(&nklock, s); + } seq = (struct seq_file *)file->private_data; seq->private = iter; Index: xenomai/ksrc/nucleus/pod.c =================================================================== --- xenomai.orig/ksrc/nucleus/pod.c +++ xenomai/ksrc/nucleus/pod.c @@ -815,6 +815,7 @@ int xnpod_init_thread(xnthread_t *thread xnlock_get_irqsave(&nklock, s); thread->sched = xnpod_current_sched(); appendq(&nkpod->threadq, &thread->glink); + nkpod->threadq_rev++; xnpod_suspend_thread(thread, XNDORMANT | (flags & XNSUSP), XN_INFINITE, NULL); xnlock_put_irqrestore(&nklock, s); @@ -1225,6 +1226,7 @@ void xnpod_delete_thread(xnthread_t *thr sched = thread->sched; removeq(&nkpod->threadq, &thread->glink); + nkpod->threadq_rev++; if (!testbits(thread->status, XNTHREAD_BLOCK_BITS)) { if (testbits(thread->status, XNREADY)) { --- include/nucleus/pod.h | 28 ++++++++++++++++++++++++++++ include/nucleus/thread.h | 1 + ksrc/nucleus/module.c | 26 +++++++++++++++++++++----- ksrc/nucleus/pod.c | 5 +++++ ksrc/nucleus/thread.c | 1 + 5 files changed, 56 insertions(+), 5 deletions(-) Index: include/nucleus/thread.h =================================================================== --- include/nucleus/thread.h.orig +++ include/nucleus/thread.h @@ -152,6 +152,7 @@ typedef struct xnthread { unsigned long csw; /* Context switches (includes secondary -> primary switches) */ unsigned long pf; /* Number of page faults */ + xnticks_t exec_time; /* Accumulated execution time (ticks) */ } stat; #endif /* CONFIG_XENO_OPT_STATS */ Index: include/nucleus/pod.h =================================================================== --- include/nucleus/pod.h.orig +++ include/nucleus/pod.h @@ -145,6 +145,10 @@ typedef struct xnsched { xnthread_t rootcb; /*!< Root thread control block. */ +#ifdef CONFIG_XENO_OPT_STATS + xnticks_t last_csw; /*!< Last context switch (ticks). */ +#endif /* CONFIG_XENO_OPT_STATS */ + } xnsched_t; #ifdef CONFIG_SMP @@ -545,6 +549,30 @@ static inline void xnpod_delete_self (vo xnpod_delete_thread(xnpod_current_thread()); } +#ifdef CONFIG_XENO_OPT_STATS +static inline void xnpod_acc_exec_time(xnthread_t *thread) +{ + xnsched_t *sched = thread->sched; + xnticks_t now = xntimer_get_rawclock(); + + thread->stat.exec_time += now - sched->last_csw; + sched->last_csw = now; +} + +static inline void xnpod_update_csw_date(xnsched_t *sched) +{ + sched->last_csw = xntimer_get_rawclock(); +} +#else /* !CONFIG_XENO_OPT_STATS */ +static inline void xnpod_acc_exec_time(xnthread_t *thread) +{ +} + +static inline void xnpod_update_csw_date(xnsched_t *sched) +{ +} +#endif /* CONFIG_XENO_OPT_STATS */ + #ifdef __cplusplus } #endif Index: ksrc/nucleus/thread.c =================================================================== --- ksrc/nucleus/thread.c.orig +++ ksrc/nucleus/thread.c @@ -90,6 +90,7 @@ int xnthread_init(xnthread_t *thread, thread->stat.ssw = 0; thread->stat.csw = 0; thread->stat.pf = 0; + thread->stat.exec_time = 0; #endif /* CONFIG_XENO_OPT_STATS */ /* These will be filled by xnpod_start_thread() */ Index: ksrc/nucleus/pod.c =================================================================== --- ksrc/nucleus/pod.c.orig +++ ksrc/nucleus/pod.c @@ -669,6 +669,9 @@ static inline void xnpod_switch_zombie(x xnthread_cleanup_tcb(threadout); + /* no need to update stats of dying thread */ + xnpod_update_csw_date(sched); + xnarch_finalize_and_switch(xnthread_archtcb(threadout), xnthread_archtcb(threadin)); @@ -2433,6 +2436,7 @@ void xnpod_schedule(void) xnarch_enter_root(xnthread_archtcb(threadin)); } + xnpod_acc_exec_time(threadout); xnthread_inc_csw(threadin); xnarch_switch_to(xnthread_archtcb(threadout), @@ -2604,6 +2608,7 @@ void xnpod_schedule_runnable(xnthread_t nkpod->schedhook(runthread, XNREADY); #endif /* __XENO_SIM__ */ + xnpod_acc_exec_time(runthread); xnthread_inc_csw(threadin); xnarch_switch_to(xnthread_archtcb(runthread), Index: ksrc/nucleus/module.c =================================================================== --- ksrc/nucleus/module.c.orig +++ ksrc/nucleus/module.c @@ -269,6 +269,7 @@ struct stat_seq_iterator { unsigned long ssw; unsigned long csw; unsigned long pf; + xnticks_t exec_time; } stat_info[1]; }; @@ -309,13 +310,17 @@ static void stat_seq_stop(struct seq_fil static int stat_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) - seq_printf(seq, "%-3s %-6s %-10s %-10s %-4s %-8s %s\n", - "CPU", "PID", "MSW", "CSW", "PF", "STAT", "NAME"); + seq_printf(seq, "%-3s %-6s %-10s %-10s %-4s %-8s %12s" + " %s\n", + "CPU", "PID", "MSW", "CSW", "PF", "STAT", "TIME", + "NAME"); else { struct stat_seq_info *p = (struct stat_seq_info *)v; - seq_printf(seq, "%3u %-6d %-10lu %-10lu %-4lu %.8lx %s\n", + unsigned long long exec_time = xnpod_ticks2ns(p->exec_time); + seq_printf(seq, "%3u %-6d %-10lu %-10lu %-4lu %.8lx %12llu" + " %s\n", p->cpu, p->pid, p->ssw, p->csw, p->pf, p->status, - p->name); + xnarch_ulldiv(exec_time, 1000, NULL), p->name); } return 0; @@ -333,7 +338,7 @@ static int stat_seq_open(struct inode *i struct stat_seq_iterator *iter = NULL; struct seq_file *seq; xnholder_t *holder; - int err, count, rev; + int err, count, rev, cpu; spl_t s; if (!nkpod) @@ -365,6 +370,16 @@ static int stat_seq_open(struct inode *i iter->nentries = 0; + /* update exec-time stats of currently running threads */ + for_each_online_cpu(cpu) { + xnsched_t *sched; + + xnlock_get_irqsave(&nklock, s); + sched = xnpod_sched_slot(cpu); + xnpod_acc_exec_time(sched->runthread); + xnlock_put_irqrestore(&nklock, s); + } + /* Take a snapshot element-wise, restart if something changes underneath us. */ @@ -389,6 +404,7 @@ static int stat_seq_open(struct inode *i iter->stat_info[n].ssw = thread->stat.ssw; iter->stat_info[n].csw = thread->stat.csw; iter->stat_info[n].pf = thread->stat.pf; + iter->stat_info[n].exec_time = thread->stat.exec_time; holder = nextq(&nkpod->threadq, holder); [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 250 bytes --] ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-06 17:17 ` Jan Kiszka @ 2006-07-07 8:20 ` Jan Kiszka 0 siblings, 0 replies; 24+ messages in thread From: Jan Kiszka @ 2006-07-07 8:20 UTC (permalink / raw) To: rpm; +Cc: xenomai-core [-- Attachment #1: Type: text/plain, Size: 5941 bytes --] Jan Kiszka wrote: > Here is such a patch + a reworked version of the exec-time stats. Compiles and boots, > doesn't cause obvious troubles when running in a loop while threads are created and > destroyed in parallel. Nevertheless, a quick hack which may contain bugs. As expected, there were bugs. This version of the exec-time patch always uses TSC for timestamping and the related correct conversion function. And it doesn't try to update the exec-time across CPUs as the TSCs may not be in sync. Thus, when there is no scheduling activity on some CPU, we may not see progress in /stat. But this is the least invasive way, and we typically DO have activity anyway. Jan --- include/nucleus/pod.h | 28 ++++++++++++++++++++++++++++ include/nucleus/thread.h | 1 + ksrc/nucleus/module.c | 20 ++++++++++++++++---- ksrc/nucleus/pod.c | 5 +++++ ksrc/nucleus/thread.c | 1 + 5 files changed, 51 insertions(+), 4 deletions(-) Index: include/nucleus/thread.h =================================================================== --- include/nucleus/thread.h.orig +++ include/nucleus/thread.h @@ -152,6 +152,7 @@ typedef struct xnthread { unsigned long csw; /* Context switches (includes secondary -> primary switches) */ unsigned long pf; /* Number of page faults */ + xnticks_t exec_time; /* Accumulated execution time (ticks) */ } stat; #endif /* CONFIG_XENO_OPT_STATS */ Index: include/nucleus/pod.h =================================================================== --- include/nucleus/pod.h.orig +++ include/nucleus/pod.h @@ -145,6 +145,10 @@ typedef struct xnsched { xnthread_t rootcb; /*!< Root thread control block. */ +#ifdef CONFIG_XENO_OPT_STATS + xnticks_t last_csw; /*!< Last context switch (ticks). */ +#endif /* CONFIG_XENO_OPT_STATS */ + } xnsched_t; #ifdef CONFIG_SMP @@ -545,6 +549,30 @@ static inline void xnpod_delete_self (vo xnpod_delete_thread(xnpod_current_thread()); } +#ifdef CONFIG_XENO_OPT_STATS +static inline void xnpod_acc_exec_time(xnthread_t *thread) +{ + xnsched_t *sched = thread->sched; + xnticks_t now = xnarch_get_cpu_tsc(); + + thread->stat.exec_time += now - sched->last_csw; + sched->last_csw = now; +} + +static inline void xnpod_update_csw_date(xnsched_t *sched) +{ + sched->last_csw = xnarch_get_cpu_tsc(); +} +#else /* !CONFIG_XENO_OPT_STATS */ +static inline void xnpod_acc_exec_time(xnthread_t *thread) +{ +} + +static inline void xnpod_update_csw_date(xnsched_t *sched) +{ +} +#endif /* CONFIG_XENO_OPT_STATS */ + #ifdef __cplusplus } #endif Index: ksrc/nucleus/thread.c =================================================================== --- ksrc/nucleus/thread.c.orig +++ ksrc/nucleus/thread.c @@ -90,6 +90,7 @@ int xnthread_init(xnthread_t *thread, thread->stat.ssw = 0; thread->stat.csw = 0; thread->stat.pf = 0; + thread->stat.exec_time = 0; #endif /* CONFIG_XENO_OPT_STATS */ /* These will be filled by xnpod_start_thread() */ Index: ksrc/nucleus/pod.c =================================================================== --- ksrc/nucleus/pod.c.orig +++ ksrc/nucleus/pod.c @@ -669,6 +669,9 @@ static inline void xnpod_switch_zombie(x xnthread_cleanup_tcb(threadout); + /* no need to update stats of dying thread */ + xnpod_update_csw_date(sched); + xnarch_finalize_and_switch(xnthread_archtcb(threadout), xnthread_archtcb(threadin)); @@ -2433,6 +2436,7 @@ void xnpod_schedule(void) xnarch_enter_root(xnthread_archtcb(threadin)); } + xnpod_acc_exec_time(threadout); xnthread_inc_csw(threadin); xnarch_switch_to(xnthread_archtcb(threadout), @@ -2604,6 +2608,7 @@ void xnpod_schedule_runnable(xnthread_t nkpod->schedhook(runthread, XNREADY); #endif /* __XENO_SIM__ */ + xnpod_acc_exec_time(runthread); xnthread_inc_csw(threadin); xnarch_switch_to(xnthread_archtcb(runthread), Index: ksrc/nucleus/module.c =================================================================== --- ksrc/nucleus/module.c.orig +++ ksrc/nucleus/module.c @@ -269,6 +269,7 @@ struct stat_seq_iterator { unsigned long ssw; unsigned long csw; unsigned long pf; + xnticks_t exec_time; } stat_info[1]; }; @@ -309,13 +310,17 @@ static void stat_seq_stop(struct seq_fil static int stat_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) - seq_printf(seq, "%-3s %-6s %-10s %-10s %-4s %-8s %s\n", - "CPU", "PID", "MSW", "CSW", "PF", "STAT", "NAME"); + seq_printf(seq, "%-3s %-6s %-10s %-10s %-4s %-8s %12s" + " %s\n", + "CPU", "PID", "MSW", "CSW", "PF", "STAT", "TIME", + "NAME"); else { struct stat_seq_info *p = (struct stat_seq_info *)v; - seq_printf(seq, "%3u %-6d %-10lu %-10lu %-4lu %.8lx %s\n", + unsigned long long exec_time = xnarch_tsc_to_ns(p->exec_time); + seq_printf(seq, "%3u %-6d %-10lu %-10lu %-4lu %.8lx %12llu" + " %s\n", p->cpu, p->pid, p->ssw, p->csw, p->pf, p->status, - p->name); + xnarch_ulldiv(exec_time, 1000, NULL), p->name); } return 0; @@ -365,6 +370,12 @@ static int stat_seq_open(struct inode *i iter->nentries = 0; + /* Update exec-time stats at least on current CPU, unsync'ed TSCs + prevent cross-CPU update yet. */ + xnlock_get_irqsave(&nklock, s); + xnpod_acc_exec_time(xnpod_current_thread()); + xnlock_put_irqrestore(&nklock, s); + /* Take a snapshot element-wise, restart if something changes underneath us. */ @@ -389,6 +400,7 @@ static int stat_seq_open(struct inode *i iter->stat_info[n].ssw = thread->stat.ssw; iter->stat_info[n].csw = thread->stat.csw; iter->stat_info[n].pf = thread->stat.pf; + iter->stat_info[n].exec_time = thread->stat.exec_time; holder = nextq(&nkpod->threadq, holder); [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 249 bytes --] ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-06 16:36 ` Jan Kiszka 2006-07-06 17:17 ` Jan Kiszka @ 2006-07-07 8:04 ` Philippe Gerum 2006-07-07 8:09 ` Jan Kiszka 1 sibling, 1 reply; 24+ messages in thread From: Philippe Gerum @ 2006-07-07 8:04 UTC (permalink / raw) To: Jan Kiszka; +Cc: xenomai-core On Thu, 2006-07-06 at 18:36 +0200, Jan Kiszka wrote: > Jan Kiszka wrote: > > Philippe Gerum wrote: > >> On Thu, 2006-07-06 at 17:09 +0200, Jan Kiszka wrote: > >>>> We could do that from the current loop below, given that the > >>>> accumulation routine is changed to use thread->sched implicitely. > >>> The idea is avoid adding even further load to the nklock-protected loop. > >>> And we only update the current thread, not each and every. > >>> > >> Yes, but why? I mean, accumulated time so far remains significant even > >> for non-running threads. > > > > Update must only happen for the currently running thread on each cpu, > > otherwise you skew the stats up. > > > > > > But looking at the whole code in stat_seq_open() again, I now wonder if > > that whole routine isn't > > > > a) fragile on task removal and > > b) still poorly scaling. > > > > The thread name is only copied by reference, a disappearing instance my > > cause troubles on printing it. And, instead of holding the lock all the > > time, shouldn't we better > > > > 1. pick some element from the queue and mark it somehow > > in-use under lock > > 2. print or copy the entry > > 3. reacquire the lock to proceed to the next one - after checking if > > that element happened to be removed from the queue meanwhile (in that > > case we could abort the output or try to resync) > > Not feasible (threads may not always be prevented from being deleted), > but what about this: > > - maintain a modification counter for nkpod->threadq > - in our /proc-loops (sched and latency e.g.): > 1. take nklock > 2. get current counter > 3. compare with previous, restart whole loop if not matching > 4. copy current element (including the name...) > 5. forward element pointer > 6. release nklock > 7. goto 1 if not end-of-list > > As modifications on threadq should be fairly rare, I don't see a risk of > looping endlessly here. > The more I think of it, the more I'm convinced that we are trying to tweak /proc/xenomai/stats for the wrong purpose. Actually, some xeno_top tool would rather need the equivalent of individual /proc/<pid> files, thus reducing the contention on access, and the need for weird grepping on the output. e.g. /proc/xenomai/threads/<pid> could emit per-thread data in some easily greppable form by a user-space tool. > Jan > -- Philippe. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-07 8:04 ` Philippe Gerum @ 2006-07-07 8:09 ` Jan Kiszka 2006-07-07 8:49 ` Philippe Gerum 0 siblings, 1 reply; 24+ messages in thread From: Jan Kiszka @ 2006-07-07 8:09 UTC (permalink / raw) To: rpm; +Cc: xenomai-core Philippe Gerum wrote: > On Thu, 2006-07-06 at 18:36 +0200, Jan Kiszka wrote: >> Jan Kiszka wrote: >>> Philippe Gerum wrote: >>>> On Thu, 2006-07-06 at 17:09 +0200, Jan Kiszka wrote: >>>>>> We could do that from the current loop below, given that the >>>>>> accumulation routine is changed to use thread->sched implicitely. >>>>> The idea is avoid adding even further load to the nklock-protected loop. >>>>> And we only update the current thread, not each and every. >>>>> >>>> Yes, but why? I mean, accumulated time so far remains significant even >>>> for non-running threads. >>> Update must only happen for the currently running thread on each cpu, >>> otherwise you skew the stats up. >>> >>> >>> But looking at the whole code in stat_seq_open() again, I now wonder if >>> that whole routine isn't >>> >>> a) fragile on task removal and >>> b) still poorly scaling. >>> >>> The thread name is only copied by reference, a disappearing instance my >>> cause troubles on printing it. And, instead of holding the lock all the >>> time, shouldn't we better >>> >>> 1. pick some element from the queue and mark it somehow >>> in-use under lock >>> 2. print or copy the entry >>> 3. reacquire the lock to proceed to the next one - after checking if >>> that element happened to be removed from the queue meanwhile (in that >>> case we could abort the output or try to resync) >> Not feasible (threads may not always be prevented from being deleted), >> but what about this: >> >> - maintain a modification counter for nkpod->threadq >> - in our /proc-loops (sched and latency e.g.): >> 1. take nklock >> 2. get current counter >> 3. compare with previous, restart whole loop if not matching >> 4. copy current element (including the name...) >> 5. forward element pointer >> 6. release nklock >> 7. goto 1 if not end-of-list >> >> As modifications on threadq should be fairly rare, I don't see a risk of >> looping endlessly here. >> > > The more I think of it, the more I'm convinced that we are trying to > tweak /proc/xenomai/stats for the wrong purpose. Actually, some xeno_top > tool would rather need the equivalent of individual /proc/<pid> files, > thus reducing the contention on access, and the need for weird grepping > on the output. e.g. /proc/xenomai/threads/<pid> could emit per-thread > data in some easily greppable form by a user-space tool. Far too complex in my eyes for this simple purpose (what's the pid of kernel-only threads?) - and we need to solve the latency problem of /sched and /stat anyway. Jan ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-07 8:09 ` Jan Kiszka @ 2006-07-07 8:49 ` Philippe Gerum 2006-07-07 9:03 ` Jan Kiszka 0 siblings, 1 reply; 24+ messages in thread From: Philippe Gerum @ 2006-07-07 8:49 UTC (permalink / raw) To: Jan Kiszka; +Cc: xenomai-core On Fri, 2006-07-07 at 10:09 +0200, Jan Kiszka wrote: > Philippe Gerum wrote: > > On Thu, 2006-07-06 at 18:36 +0200, Jan Kiszka wrote: > >> Jan Kiszka wrote: > >>> Philippe Gerum wrote: > >>>> On Thu, 2006-07-06 at 17:09 +0200, Jan Kiszka wrote: > >>>>>> We could do that from the current loop below, given that the > >>>>>> accumulation routine is changed to use thread->sched implicitely. > >>>>> The idea is avoid adding even further load to the nklock-protected loop. > >>>>> And we only update the current thread, not each and every. > >>>>> > >>>> Yes, but why? I mean, accumulated time so far remains significant even > >>>> for non-running threads. > >>> Update must only happen for the currently running thread on each cpu, > >>> otherwise you skew the stats up. > >>> > >>> > >>> But looking at the whole code in stat_seq_open() again, I now wonder if > >>> that whole routine isn't > >>> > >>> a) fragile on task removal and > >>> b) still poorly scaling. > >>> > >>> The thread name is only copied by reference, a disappearing instance my > >>> cause troubles on printing it. And, instead of holding the lock all the > >>> time, shouldn't we better > >>> > >>> 1. pick some element from the queue and mark it somehow > >>> in-use under lock > >>> 2. print or copy the entry > >>> 3. reacquire the lock to proceed to the next one - after checking if > >>> that element happened to be removed from the queue meanwhile (in that > >>> case we could abort the output or try to resync) > >> Not feasible (threads may not always be prevented from being deleted), > >> but what about this: > >> > >> - maintain a modification counter for nkpod->threadq > >> - in our /proc-loops (sched and latency e.g.): > >> 1. take nklock > >> 2. get current counter > >> 3. compare with previous, restart whole loop if not matching > >> 4. copy current element (including the name...) > >> 5. forward element pointer > >> 6. release nklock > >> 7. goto 1 if not end-of-list > >> > >> As modifications on threadq should be fairly rare, I don't see a risk of > >> looping endlessly here. > >> > > > > The more I think of it, the more I'm convinced that we are trying to > > tweak /proc/xenomai/stats for the wrong purpose. Actually, some xeno_top > > tool would rather need the equivalent of individual /proc/<pid> files, > > thus reducing the contention on access, and the need for weird grepping > > on the output. e.g. /proc/xenomai/threads/<pid> could emit per-thread > > data in some easily greppable form by a user-space tool. > > Far too complex in my eyes for this simple purpose (what's the pid of > kernel-only threads?) - It's symbolic name. It's not a matter of simplicity, your patch for that purpose is rather complex already. > and we need to solve the latency problem of > /sched and /stat anyway. That's separate issues. Solving the scalability issue of /proc/xenomai/stats and friends does not improve their usability in the context of user-space tools monitoring thread activity. E.g. how are we going to extend the reporting about such activity if need be, adding yet another column to /stats and /sched? > > Jan -- Philippe. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-07 8:49 ` Philippe Gerum @ 2006-07-07 9:03 ` Jan Kiszka 2006-07-07 11:28 ` Philippe Gerum 0 siblings, 1 reply; 24+ messages in thread From: Jan Kiszka @ 2006-07-07 9:03 UTC (permalink / raw) To: rpm; +Cc: xenomai-core [-- Attachment #1: Type: text/plain, Size: 3722 bytes --] Philippe Gerum wrote: > On Fri, 2006-07-07 at 10:09 +0200, Jan Kiszka wrote: >> Philippe Gerum wrote: >>> On Thu, 2006-07-06 at 18:36 +0200, Jan Kiszka wrote: >>>> Jan Kiszka wrote: >>>>> Philippe Gerum wrote: >>>>>> On Thu, 2006-07-06 at 17:09 +0200, Jan Kiszka wrote: >>>>>>>> We could do that from the current loop below, given that the >>>>>>>> accumulation routine is changed to use thread->sched implicitely. >>>>>>> The idea is avoid adding even further load to the nklock-protected loop. >>>>>>> And we only update the current thread, not each and every. >>>>>>> >>>>>> Yes, but why? I mean, accumulated time so far remains significant even >>>>>> for non-running threads. >>>>> Update must only happen for the currently running thread on each cpu, >>>>> otherwise you skew the stats up. >>>>> >>>>> >>>>> But looking at the whole code in stat_seq_open() again, I now wonder if >>>>> that whole routine isn't >>>>> >>>>> a) fragile on task removal and >>>>> b) still poorly scaling. >>>>> >>>>> The thread name is only copied by reference, a disappearing instance my >>>>> cause troubles on printing it. And, instead of holding the lock all the >>>>> time, shouldn't we better >>>>> >>>>> 1. pick some element from the queue and mark it somehow >>>>> in-use under lock >>>>> 2. print or copy the entry >>>>> 3. reacquire the lock to proceed to the next one - after checking if >>>>> that element happened to be removed from the queue meanwhile (in that >>>>> case we could abort the output or try to resync) >>>> Not feasible (threads may not always be prevented from being deleted), >>>> but what about this: >>>> >>>> - maintain a modification counter for nkpod->threadq >>>> - in our /proc-loops (sched and latency e.g.): >>>> 1. take nklock >>>> 2. get current counter >>>> 3. compare with previous, restart whole loop if not matching >>>> 4. copy current element (including the name...) >>>> 5. forward element pointer >>>> 6. release nklock >>>> 7. goto 1 if not end-of-list >>>> >>>> As modifications on threadq should be fairly rare, I don't see a risk of >>>> looping endlessly here. >>>> >>> The more I think of it, the more I'm convinced that we are trying to >>> tweak /proc/xenomai/stats for the wrong purpose. Actually, some xeno_top >>> tool would rather need the equivalent of individual /proc/<pid> files, >>> thus reducing the contention on access, and the need for weird grepping >>> on the output. e.g. /proc/xenomai/threads/<pid> could emit per-thread >>> data in some easily greppable form by a user-space tool. >> Far too complex in my eyes for this simple purpose (what's the pid of >> kernel-only threads?) - > > It's symbolic name. It's not a matter of simplicity, your patch for that > purpose is rather complex already. Haven't measured, but the amount of code added for collecting the data and printing just another column should be marginal as yet. Introducing a new infrastructure for more /proc subdirs with probably multiple entries would certainly cost more. > >> and we need to solve the latency problem of >> /sched and /stat anyway. > > That's separate issues. Solving the scalability issue > of /proc/xenomai/stats and friends does not improve their usability in > the context of user-space tools monitoring thread activity. E.g. how are > we going to extend the reporting about such activity if need be, adding > yet another column to /stats and /sched? As long as there are no stand-alone tools relying on the layout, just only our own ones - fairly simple. But before speculating more, I will have a look now how simple such tool/script may actually be. Jan [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 250 bytes --] ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-07 9:03 ` Jan Kiszka @ 2006-07-07 11:28 ` Philippe Gerum 0 siblings, 0 replies; 24+ messages in thread From: Philippe Gerum @ 2006-07-07 11:28 UTC (permalink / raw) To: Jan Kiszka; +Cc: xenomai-core On Fri, 2006-07-07 at 11:03 +0200, Jan Kiszka wrote: > >> Far too complex in my eyes for this simple purpose (what's the pid of > >> kernel-only threads?) - > > > > It's symbolic name. It's not a matter of simplicity, your patch for that > > purpose is rather complex already. > > Haven't measured, but the amount of code added for collecting the data > and printing just another column should be marginal as yet. Introducing > a new infrastructure for more /proc subdirs with probably multiple > entries would certainly cost more. > This has no cost on the real-time side of things. In contrast, scanning the threadq within /proc handlers like /stats and /sched are doing now, does have some noticeable cost. By having separate per-thread entries, we provide the necessary infrastructure for exporting more raw statistical data to user-space tools, in a format which would be script-friendly. > > > >> and we need to solve the latency problem of > >> /sched and /stat anyway. > > > > That's separate issues. Solving the scalability issue > > of /proc/xenomai/stats and friends does not improve their usability in > > the context of user-space tools monitoring thread activity. E.g. how are > > we going to extend the reporting about such activity if need be, adding > > yet another column to /stats and /sched? > > As long as there are no stand-alone tools relying on the layout, just > only our own ones - fairly simple. But before speculating more, I will > have a look now how simple such tool/script may actually be. > > Jan > -- Philippe. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-06 15:46 ` Jan Kiszka 2006-07-06 16:36 ` Jan Kiszka @ 2006-07-06 16:37 ` Philippe Gerum 1 sibling, 0 replies; 24+ messages in thread From: Philippe Gerum @ 2006-07-06 16:37 UTC (permalink / raw) To: Jan Kiszka; +Cc: xenomai-core On Thu, 2006-07-06 at 17:46 +0200, Jan Kiszka wrote: > Philippe Gerum wrote: > > On Thu, 2006-07-06 at 17:09 +0200, Jan Kiszka wrote: > >>> We could do that from the current loop below, given that the > >>> accumulation routine is changed to use thread->sched implicitely. > >> The idea is avoid adding even further load to the nklock-protected loop. > >> And we only update the current thread, not each and every. > >> > > > > Yes, but why? I mean, accumulated time so far remains significant even > > for non-running threads. > > Update must only happen for the currently running thread on each cpu, > otherwise you skew the stats up. > The point is that you don't want to update there, just peek at the current value for all threads, and indeed update for the current one before that. So we should keep the initial loop, but only read the accumulator in the second one. > > But looking at the whole code in stat_seq_open() again, I now wonder if > that whole routine isn't > > a) fragile on task removal and > b) still poorly scaling. > > The thread name is only copied by reference, a disappearing instance my > cause troubles on printing it. Yes, this will admittedly print garbage if the day really goes rainy (the kernel memory we point at would still be mapped, but occupied by other data). The other option would be to copy the name, but that's unacceptable latency-wise, since we would need to do that under nklock protection. > And, instead of holding the lock all the > time, shouldn't we better > > 1. pick some element from the queue and mark it somehow > in-use under lock > 2. print or copy the entry > 3. reacquire the lock to proceed to the next one - after checking if > that element happened to be removed from the queue meanwhile Unfortunately, you can't since the associated memory might be utterly trashed. > (in that > case we could abort the output or try to resync) > It's the "try to resync" that bothers me. We currently have no sane way to give any guarantee about the consistency of the output sequence here, unless maybe using an array representation for indexing the threads. But I suspect ore issues ahead. > I'm worrying about a potential nice xeno-top tool polling the stats at > high frequency and causing unexpected jitters if there are a significant > number tasks registered (think of passive shadows we may create > automatically in the future!). > I agree, that's a potential issue. But we should also find an approach that does not cause half-baked or mis-resynced data to be emitted, and the current seqfile interface is not providing an easy way to do this. > Jan > -- Philippe. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-06 14:32 ` Philippe Gerum 2006-07-06 15:09 ` Jan Kiszka @ 2006-07-06 23:47 ` Gilles Chanteperdrix 2006-07-07 7:54 ` Philippe Gerum 1 sibling, 1 reply; 24+ messages in thread From: Gilles Chanteperdrix @ 2006-07-06 23:47 UTC (permalink / raw) To: rpm; +Cc: Jan Kiszka, xenomai-core Philippe Gerum wrote: > > +#ifdef CONFIG_XENO_OPT_STATS > > +static inline void xnpod_acc_exec_time(xnsched_t *sched, > > + xnthread_t *threadout) > > +{ > > + xnticks_t now = xntimer_get_rawclock(); > > + threadout->stat.exec_time += now - sched->last_csw; > > + sched->last_csw = now; > > +} > > It would be better to only pass the thread pointer, then use the > thread->sched member. This would clearly explain the relationship > between both, and prevent any bugous attempt at mixing things. Beware, when xnpod_schedule is called from within xnpod_migrate_thread(), the sched pointer of the switched out thread is the one of the destination cpu. So, passing the sched and threadout pointers to xnpod_acc_exec_time is safer. -- Gilles Chanteperdrix. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-06 23:47 ` Gilles Chanteperdrix @ 2006-07-07 7:54 ` Philippe Gerum 2006-07-07 10:22 ` Gilles Chanteperdrix 0 siblings, 1 reply; 24+ messages in thread From: Philippe Gerum @ 2006-07-07 7:54 UTC (permalink / raw) To: Gilles Chanteperdrix; +Cc: Jan Kiszka, xenomai-core On Fri, 2006-07-07 at 01:47 +0200, Gilles Chanteperdrix wrote: > Philippe Gerum wrote: > > > +#ifdef CONFIG_XENO_OPT_STATS > > > +static inline void xnpod_acc_exec_time(xnsched_t *sched, > > > + xnthread_t *threadout) > > > +{ > > > + xnticks_t now = xntimer_get_rawclock(); > > > + threadout->stat.exec_time += now - sched->last_csw; > > > + sched->last_csw = now; > > > +} > > > > It would be better to only pass the thread pointer, then use the > > thread->sched member. This would clearly explain the relationship > > between both, and prevent any bugous attempt at mixing things. > > Beware, when xnpod_schedule is called from within > xnpod_migrate_thread(), the sched pointer of the switched out thread is > the one of the destination cpu. So, passing the sched and threadout > pointers to xnpod_acc_exec_time is safer. Nope, the nklock is held by the migrating thread all along until the switch has actually occured. > > -- Philippe. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-07 7:54 ` Philippe Gerum @ 2006-07-07 10:22 ` Gilles Chanteperdrix 2006-07-07 10:30 ` Jan Kiszka 2006-07-07 13:23 ` Philippe Gerum 0 siblings, 2 replies; 24+ messages in thread From: Gilles Chanteperdrix @ 2006-07-07 10:22 UTC (permalink / raw) To: rpm; +Cc: Jan Kiszka, xenomai-core Philippe Gerum wrote: > On Fri, 2006-07-07 at 01:47 +0200, Gilles Chanteperdrix wrote: > > Philippe Gerum wrote: > > > > +#ifdef CONFIG_XENO_OPT_STATS > > > > +static inline void xnpod_acc_exec_time(xnsched_t *sched, > > > > + xnthread_t *threadout) > > > > +{ > > > > + xnticks_t now = xntimer_get_rawclock(); > > > > + threadout->stat.exec_time += now - sched->last_csw; > > > > + sched->last_csw = now; > > > > +} > > > > > > It would be better to only pass the thread pointer, then use the > > > thread->sched member. This would clearly explain the relationship > > > between both, and prevent any bugous attempt at mixing things. > > > > Beware, when xnpod_schedule is called from within > > xnpod_migrate_thread(), the sched pointer of the switched out thread is > > the one of the destination cpu. So, passing the sched and threadout > > pointers to xnpod_acc_exec_time is safer. > > Nope, the nklock is held by the migrating thread all along until the > switch has actually occured. xnpod_acc_exec_time() is called from within xnpod_schedule() that is called from within xnpod_migrate_thread() at a time where threadout->sched is the wrong pointer. -- Gilles Chanteperdrix. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-07 10:22 ` Gilles Chanteperdrix @ 2006-07-07 10:30 ` Jan Kiszka 2006-07-07 13:23 ` Philippe Gerum 1 sibling, 0 replies; 24+ messages in thread From: Jan Kiszka @ 2006-07-07 10:30 UTC (permalink / raw) To: Gilles Chanteperdrix; +Cc: xenomai-core [-- Attachment #1: Type: text/plain, Size: 1401 bytes --] Gilles Chanteperdrix wrote: > Philippe Gerum wrote: > > On Fri, 2006-07-07 at 01:47 +0200, Gilles Chanteperdrix wrote: > > > Philippe Gerum wrote: > > > > > +#ifdef CONFIG_XENO_OPT_STATS > > > > > +static inline void xnpod_acc_exec_time(xnsched_t *sched, > > > > > + xnthread_t *threadout) > > > > > +{ > > > > > + xnticks_t now = xntimer_get_rawclock(); > > > > > + threadout->stat.exec_time += now - sched->last_csw; > > > > > + sched->last_csw = now; > > > > > +} > > > > > > > > It would be better to only pass the thread pointer, then use the > > > > thread->sched member. This would clearly explain the relationship > > > > between both, and prevent any bugous attempt at mixing things. > > > > > > Beware, when xnpod_schedule is called from within > > > xnpod_migrate_thread(), the sched pointer of the switched out thread is > > > the one of the destination cpu. So, passing the sched and threadout > > > pointers to xnpod_acc_exec_time is safer. > > > > Nope, the nklock is held by the migrating thread all along until the > > switch has actually occured. > > xnpod_acc_exec_time() is called from within xnpod_schedule() that is > called from within xnpod_migrate_thread() at a time where > threadout->sched is the wrong pointer. > True, will revert this. Thanks, Jan [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 250 bytes --] ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-07 10:22 ` Gilles Chanteperdrix 2006-07-07 10:30 ` Jan Kiszka @ 2006-07-07 13:23 ` Philippe Gerum 2006-07-07 13:59 ` Gilles Chanteperdrix 1 sibling, 1 reply; 24+ messages in thread From: Philippe Gerum @ 2006-07-07 13:23 UTC (permalink / raw) To: Gilles Chanteperdrix; +Cc: Jan Kiszka, xenomai-core On Fri, 2006-07-07 at 12:22 +0200, Gilles Chanteperdrix wrote: > Philippe Gerum wrote: > > On Fri, 2006-07-07 at 01:47 +0200, Gilles Chanteperdrix wrote: > > > Philippe Gerum wrote: > > > > > +#ifdef CONFIG_XENO_OPT_STATS > > > > > +static inline void xnpod_acc_exec_time(xnsched_t *sched, > > > > > + xnthread_t *threadout) > > > > > +{ > > > > > + xnticks_t now = xntimer_get_rawclock(); > > > > > + threadout->stat.exec_time += now - sched->last_csw; > > > > > + sched->last_csw = now; > > > > > +} > > > > > > > > It would be better to only pass the thread pointer, then use the > > > > thread->sched member. This would clearly explain the relationship > > > > between both, and prevent any bugous attempt at mixing things. > > > > > > Beware, when xnpod_schedule is called from within > > > xnpod_migrate_thread(), the sched pointer of the switched out thread is > > > the one of the destination cpu. So, passing the sched and threadout > > > pointers to xnpod_acc_exec_time is safer. > > > > Nope, the nklock is held by the migrating thread all along until the > > switch has actually occured. > > xnpod_acc_exec_time() is called from within xnpod_schedule() that is > called from within xnpod_migrate_thread() at a time where > threadout->sched is the wrong pointer. Gasp. Unfortunately, we are trapped by an exception case, and I don't see any better approach. > -- Philippe. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-07 13:23 ` Philippe Gerum @ 2006-07-07 13:59 ` Gilles Chanteperdrix 2006-07-07 14:25 ` Philippe Gerum 0 siblings, 1 reply; 24+ messages in thread From: Gilles Chanteperdrix @ 2006-07-07 13:59 UTC (permalink / raw) To: rpm; +Cc: xenomai Philippe Gerum wrote: > > xnpod_acc_exec_time() is called from within xnpod_schedule() that is > > called from within xnpod_migrate_thread() at a time where > > threadout->sched is the wrong pointer. > > Gasp. Unfortunately, we are trapped by an exception case, and I don't > see any better approach. We could reimplement an xnpod_migrate_thread() which suspend the calling thread and wake up a server thread on the destination CPU, this server thread migrating the suspended thread by changing its sched pointer outside of xnpod_schedule(). Or we can get rid of xnpod_migrate_thread(), it is currently not used by any skin. -- Gilles Chanteperdrix. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-07 13:59 ` Gilles Chanteperdrix @ 2006-07-07 14:25 ` Philippe Gerum 0 siblings, 0 replies; 24+ messages in thread From: Philippe Gerum @ 2006-07-07 14:25 UTC (permalink / raw) To: Gilles Chanteperdrix; +Cc: xenomai On Fri, 2006-07-07 at 15:59 +0200, Gilles Chanteperdrix wrote: > Philippe Gerum wrote: > > > xnpod_acc_exec_time() is called from within xnpod_schedule() that is > > > called from within xnpod_migrate_thread() at a time where > > > threadout->sched is the wrong pointer. > > > > Gasp. Unfortunately, we are trapped by an exception case, and I don't > > see any better approach. > > We could reimplement an xnpod_migrate_thread() which suspend the calling > thread and wake up a server thread on the destination CPU, this server > thread migrating the suspended thread by changing its sched pointer > outside of xnpod_schedule(). > No, really. A small hack to get per-thread accounting should not trigger a storm of fundamental changes like this. > Or we can get rid of xnpod_migrate_thread(), it is currently not used by > any skin. > It's a fundamental feature for placing SMP jobs, and kernel-based Xenomai threads could not rely on sched_setscheduler() to do it. Let's keep this service, and simply pass the sched pointer to the accumulation routine; I was wrong initially suggesting the opposite. IOW, let's avoid smashing a squadron of flies with nukes... -- Philippe. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-06 11:37 [Xenomai-core] [RFC, PATCH] per-thread exec-time stats Jan Kiszka 2006-07-06 11:41 ` Jan Kiszka 2006-07-06 14:32 ` Philippe Gerum @ 2006-07-06 15:04 ` Philippe Gerum 2006-07-06 15:06 ` Philippe Gerum 2 siblings, 1 reply; 24+ messages in thread From: Philippe Gerum @ 2006-07-06 15:04 UTC (permalink / raw) To: Jan Kiszka; +Cc: xenomai-core On Thu, 2006-07-06 at 13:37 +0200, Jan Kiszka wrote: > c_time); > + seq_printf(seq, "%3u %-6d %-10lu %-10lu %-4lu %.8lx %12llu" > + " %s\n", > p->cpu, p->pid, p->ssw, p->csw, p->pf, p->status, > - p->name); > + xnarch_ulldiv(exec_time, 1000, NULL), p->name); xntimer_get_rawclock() returns TSC or jiffies, depending on the current aperiodic/periodic mode; what we need is something like xnarch_ulldiv(xnpod_ticks2ns(exec_time), ...) -- Philippe. ^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats 2006-07-06 15:04 ` Philippe Gerum @ 2006-07-06 15:06 ` Philippe Gerum 0 siblings, 0 replies; 24+ messages in thread From: Philippe Gerum @ 2006-07-06 15:06 UTC (permalink / raw) To: Jan Kiszka; +Cc: xenomai-core On Thu, 2006-07-06 at 17:04 +0200, Philippe Gerum wrote: > On Thu, 2006-07-06 at 13:37 +0200, Jan Kiszka wrote: > > c_time); > > + seq_printf(seq, "%3u %-6d %-10lu %-10lu %-4lu %.8lx %12llu" > > + " %s\n", > > p->cpu, p->pid, p->ssw, p->csw, p->pf, p->status, > > - p->name); > > + xnarch_ulldiv(exec_time, 1000, NULL), p->name); > > xntimer_get_rawclock() returns TSC or jiffies, depending on the current > aperiodic/periodic mode; what we need is something like > xnarch_ulldiv(xnpod_ticks2ns(exec_time), ...) > Yeah, right. Forget about this one. -ENOBRAIN returned on my side. -- Philippe. ^ permalink raw reply [flat|nested] 24+ messages in thread
end of thread, other threads:[~2006-07-07 14:25 UTC | newest] Thread overview: 24+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2006-07-06 11:37 [Xenomai-core] [RFC, PATCH] per-thread exec-time stats Jan Kiszka 2006-07-06 11:41 ` Jan Kiszka 2006-07-06 14:32 ` Philippe Gerum 2006-07-06 15:09 ` Jan Kiszka 2006-07-06 15:37 ` Philippe Gerum 2006-07-06 15:46 ` Jan Kiszka 2006-07-06 16:36 ` Jan Kiszka 2006-07-06 17:17 ` Jan Kiszka 2006-07-07 8:20 ` Jan Kiszka 2006-07-07 8:04 ` Philippe Gerum 2006-07-07 8:09 ` Jan Kiszka 2006-07-07 8:49 ` Philippe Gerum 2006-07-07 9:03 ` Jan Kiszka 2006-07-07 11:28 ` Philippe Gerum 2006-07-06 16:37 ` Philippe Gerum 2006-07-06 23:47 ` Gilles Chanteperdrix 2006-07-07 7:54 ` Philippe Gerum 2006-07-07 10:22 ` Gilles Chanteperdrix 2006-07-07 10:30 ` Jan Kiszka 2006-07-07 13:23 ` Philippe Gerum 2006-07-07 13:59 ` Gilles Chanteperdrix 2006-07-07 14:25 ` Philippe Gerum 2006-07-06 15:04 ` Philippe Gerum 2006-07-06 15:06 ` Philippe Gerum
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.