[Xenomai-core] [RFC, PATCH] per-thread exec-time stats

All of lore.kernel.org
 help / color / mirror / Atom feed

* [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
@ 2006-07-06 11:37 Jan Kiszka
  2006-07-06 11:41 ` Jan Kiszka
                   ` (2 more replies)
  0 siblings, 3 replies; 24+ messages in thread
From: Jan Kiszka @ 2006-07-06 11:37 UTC (permalink / raw)
  To: xenomai-core

[-- Attachment #1: Type: text/plain, Size: 6892 bytes --]

Hi,

I think to remember people crying for some execution time statistics for quite a
while now. Here comes a straightforward approach to add this on a per-thread basis.
The results are printed like this:

root@domain.hid :/root# cat /proc/xenomai/stat 
CPU  PID    MSW        CSW        PF    STAT              TIME  NAME
  0  0      0          7508041    0     01400080     166038894  ROOT
  0  0      0          2          0     00000082            55  timsPipeReceiver
  0  1009   751        8800       0     00c00082        143026  display-1008
  0  1010   0          7514585    0     00c00084      54472477  sampling-1008

The new TIME column contains the accumulated execution time of each active thread in
nanoseconds. It's now a job of some user-space tool (shell script?) to pick those
numbers up and do, e.g., periodic evaluation (I'm thinking of some "xeno-top"). Any
volunteer around to hack this? Would be very welcome!

Jan

PS: Could someone have a look that I didn't miss an accounting point and my
acquisition is sound?


--
 include/nucleus/pod.h    |   28 ++++++++++++++++++++++++++++
 include/nucleus/thread.h |    1 +
 ksrc/nucleus/module.c    |   26 +++++++++++++++++++++-----
 ksrc/nucleus/pod.c       |    5 +++++
 ksrc/nucleus/thread.c    |    1 +
 5 files changed, 56 insertions(+), 5 deletions(-)

Index: include/nucleus/thread.h
===================================================================
--- include/nucleus/thread.h	(revision 1303)
+++ include/nucleus/thread.h	(working copy)
@@ -152,6 +152,7 @@ typedef struct xnthread {
 	unsigned long csw;	/* Context switches (includes
 				   secondary -> primary switches) */
 	unsigned long pf;	/* Number of page faults */
+	xnticks_t exec_time;	/* Accumulated execution time (ticks) */
     } stat;
 #endif /* CONFIG_XENO_OPT_STATS */
 
Index: include/nucleus/pod.h
===================================================================
--- include/nucleus/pod.h	(revision 1303)
+++ include/nucleus/pod.h	(working copy)
@@ -145,6 +145,10 @@ typedef struct xnsched {
 
     xnthread_t rootcb;          /*!< Root thread control block. */
 
+#ifdef CONFIG_XENO_OPT_STATS
+    xnticks_t last_csw;         /*!< Last context switch (ticks). */
+#endif /* CONFIG_XENO_OPT_STATS */
+
 } xnsched_t;
 
 #ifdef CONFIG_SMP
@@ -544,6 +548,30 @@ static inline void xnpod_delete_self (vo
     xnpod_delete_thread(xnpod_current_thread());
 }
 
+#ifdef CONFIG_XENO_OPT_STATS
+static inline void xnpod_acc_exec_time(xnsched_t *sched,
+                                       xnthread_t *threadout)
+{
+    xnticks_t now = xntimer_get_rawclock();
+    threadout->stat.exec_time += now - sched->last_csw;
+    sched->last_csw = now;
+}
+
+static inline void xnpod_update_csw_date(xnsched_t *sched)
+{
+    sched->last_csw = xntimer_get_rawclock();
+}
+#else /* !CONFIG_XENO_OPT_STATS */
+static inline void xnpod_acc_exec_time(xnsched_t *sched,
+                                       xnthread_t *threadout)
+{
+}
+
+static inline void xnpod_update_csw_date(xnsched_t *sched)
+{
+}
+#endif /* CONFIG_XENO_OPT_STATS */
+
 #ifdef __cplusplus
 }
 #endif
Index: ksrc/nucleus/thread.c
===================================================================
--- ksrc/nucleus/thread.c	(revision 1303)
+++ ksrc/nucleus/thread.c	(working copy)
@@ -90,6 +90,7 @@ int xnthread_init(xnthread_t *thread,
 	thread->stat.ssw = 0;
 	thread->stat.csw = 0;
 	thread->stat.pf = 0;
+	thread->stat.exec_time = 0;
 #endif /* CONFIG_XENO_OPT_STATS */
 
 	/* These will be filled by xnpod_start_thread() */
Index: ksrc/nucleus/pod.c
===================================================================
--- ksrc/nucleus/pod.c	(revision 1303)
+++ ksrc/nucleus/pod.c	(working copy)
@@ -669,6 +669,9 @@ static inline void xnpod_switch_zombie(x
 
 	xnthread_cleanup_tcb(threadout);
 
+	/* no need to update stats of dying thread */
+	xnpod_update_csw_date(sched);
+
 	xnarch_finalize_and_switch(xnthread_archtcb(threadout),
 				   xnthread_archtcb(threadin));
 
@@ -2431,6 +2434,7 @@ void xnpod_schedule(void)
 		xnarch_enter_root(xnthread_archtcb(threadin));
 	}
 
+	xnpod_acc_exec_time(sched, threadout);
 	xnthread_inc_csw(threadin);
 
 	xnarch_switch_to(xnthread_archtcb(threadout),
@@ -2602,6 +2606,7 @@ void xnpod_schedule_runnable(xnthread_t 
 		nkpod->schedhook(runthread, XNREADY);
 #endif /* __XENO_SIM__ */
 
+	xnpod_acc_exec_time(sched, runthread);
 	xnthread_inc_csw(threadin);
 
 	xnarch_switch_to(xnthread_archtcb(runthread),
Index: ksrc/nucleus/module.c
===================================================================
--- ksrc/nucleus/module.c	(revision 1303)
+++ ksrc/nucleus/module.c	(working copy)
@@ -254,6 +254,7 @@ struct stat_seq_iterator {
 		unsigned long ssw;
 		unsigned long csw;
 		unsigned long pf;
+		xnticks_t exec_time;
 	} stat_info[1];
 };
 
@@ -294,13 +295,17 @@ static void stat_seq_stop(struct seq_fil
 static int stat_seq_show(struct seq_file *seq, void *v)
 {
 	if (v == SEQ_START_TOKEN)
-		seq_printf(seq, "%-3s  %-6s %-10s %-10s %-4s  %-8s  %s\n",
-			   "CPU", "PID", "MSW", "CSW", "PF", "STAT", "NAME");
+		seq_printf(seq, "%-3s  %-6s %-10s %-10s %-4s  %-8s  %12s"
+			   "  %s\n",
+			   "CPU", "PID", "MSW", "CSW", "PF", "STAT", "TIME",
+			   "NAME");
 	else {
 		struct stat_seq_info *p = (struct stat_seq_info *)v;
-		seq_printf(seq, "%3u  %-6d %-10lu %-10lu %-4lu  %.8lx  %s\n",
+		unsigned long long exec_time = xnpod_ticks2ns(p->exec_time);
+		seq_printf(seq, "%3u  %-6d %-10lu %-10lu %-4lu  %.8lx  %12llu"
+			   "  %s\n",
 			   p->cpu, p->pid, p->ssw, p->csw, p->pf, p->status,
-			   p->name);
+			   xnarch_ulldiv(exec_time, 1000, NULL), p->name);
 	}
 
 	return 0;
@@ -318,7 +323,7 @@ static int stat_seq_open(struct inode *i
 	struct stat_seq_iterator *iter;
 	struct seq_file *seq;
 	xnholder_t *holder;
-	int err, count;
+	int err, count, cpu;
 	spl_t s;
 
 	if (!nkpod)
@@ -341,6 +346,16 @@ static int stat_seq_open(struct inode *i
 
 	iter->nentries = 0;
 
+	/* update exec-time stats of currently running threads */
+	for_each_online_cpu(cpu) {
+		xnsched_t *sched;
+
+		xnlock_get_irqsave(&nklock, s);
+		sched = xnpod_sched_slot(cpu);
+		xnpod_acc_exec_time(sched, sched->runthread);
+		xnlock_put_irqrestore(&nklock, s);
+	}
+
 	/* Take a snapshot and release the nucleus lock immediately after,
 	   so that dumping /proc/xenomai/stat with lots of entries won't
 	   cause massive jittery. */
@@ -359,6 +374,7 @@ static int stat_seq_open(struct inode *i
 		iter->stat_info[n].ssw = thread->stat.ssw;
 		iter->stat_info[n].csw = thread->stat.csw;
 		iter->stat_info[n].pf = thread->stat.pf;
+		iter->stat_info[n].exec_time = thread->stat.exec_time;
 	}
 
 	xnlock_put_irqrestore(&nklock, s);




[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 250 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-06 11:37 [Xenomai-core] [RFC, PATCH] per-thread exec-time stats Jan Kiszka
@ 2006-07-06 11:41 ` Jan Kiszka
  2006-07-06 14:32 ` Philippe Gerum
  2006-07-06 15:04 ` Philippe Gerum
  2 siblings, 0 replies; 24+ messages in thread
From: Jan Kiszka @ 2006-07-06 11:41 UTC (permalink / raw)
  To: xenomai-core

[-- Attachment #1: Type: text/plain, Size: 1213 bytes --]

Jan Kiszka wrote:
> Hi,
> 
> I think to remember people crying for some execution time statistics for quite a
> while now. Here comes a straightforward approach to add this on a per-thread basis.
> The results are printed like this:
> 
> root@domain.hid :/root# cat /proc/xenomai/stat 
> CPU  PID    MSW        CSW        PF    STAT              TIME  NAME
>   0  0      0          7508041    0     01400080     166038894  ROOT
>   0  0      0          2          0     00000082            55  timsPipeReceiver
>   0  1009   751        8800       0     00c00082        143026  display-1008
>   0  1010   0          7514585    0     00c00084      54472477  sampling-1008
> 
> The new TIME column contains the accumulated execution time of each active thread in
> nanoseconds. It's now a job of some user-space tool (shell script?) to pick those

I was thinking of the right unit, I swear:
s/nanoseconds/microseconds/

> numbers up and do, e.g., periodic evaluation (I'm thinking of some "xeno-top"). Any
> volunteer around to hack this? Would be very welcome!
> 
> Jan
> 
> PS: Could someone have a look that I didn't miss an accounting point and my
> acquisition is sound?
> 

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 250 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-06 11:37 [Xenomai-core] [RFC, PATCH] per-thread exec-time stats Jan Kiszka
  2006-07-06 11:41 ` Jan Kiszka
@ 2006-07-06 14:32 ` Philippe Gerum
  2006-07-06 15:09   ` Jan Kiszka
  2006-07-06 23:47   ` Gilles Chanteperdrix
  2006-07-06 15:04 ` Philippe Gerum
  2 siblings, 2 replies; 24+ messages in thread
From: Philippe Gerum @ 2006-07-06 14:32 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: xenomai-core

On Thu, 2006-07-06 at 13:37 +0200, Jan Kiszka wrote:

<snip>

> PS: Could someone have a look that I didn't miss an accounting point and my
> acquisition is sound?
> 
> 
> --
>  include/nucleus/pod.h    |   28 ++++++++++++++++++++++++++++
>  include/nucleus/thread.h |    1 +
>  ksrc/nucleus/module.c    |   26 +++++++++++++++++++++-----
>  ksrc/nucleus/pod.c       |    5 +++++
>  ksrc/nucleus/thread.c    |    1 +
>  5 files changed, 56 insertions(+), 5 deletions(-)
> 
> Index: include/nucleus/thread.h
> ===================================================================
> --- include/nucleus/thread.h	(revision 1303)
> +++ include/nucleus/thread.h	(working copy)
> @@ -152,6 +152,7 @@ typedef struct xnthread {
>  	unsigned long csw;	/* Context switches (includes
>  				   secondary -> primary switches) */
>  	unsigned long pf;	/* Number of page faults */
> +	xnticks_t exec_time;	/* Accumulated execution time (ticks) */
>      } stat;
>  #endif /* CONFIG_XENO_OPT_STATS */
>  
> Index: include/nucleus/pod.h
> ===================================================================
> --- include/nucleus/pod.h	(revision 1303)
> +++ include/nucleus/pod.h	(working copy)
> @@ -145,6 +145,10 @@ typedef struct xnsched {
>  
>      xnthread_t rootcb;          /*!< Root thread control block. */
>  
> +#ifdef CONFIG_XENO_OPT_STATS
> +    xnticks_t last_csw;         /*!< Last context switch (ticks). */
> +#endif /* CONFIG_XENO_OPT_STATS */
> +
>  } xnsched_t;
>  
>  #ifdef CONFIG_SMP
> @@ -544,6 +548,30 @@ static inline void xnpod_delete_self (vo
>      xnpod_delete_thread(xnpod_current_thread());
>  }
>  
> +#ifdef CONFIG_XENO_OPT_STATS
> +static inline void xnpod_acc_exec_time(xnsched_t *sched,
> +                                       xnthread_t *threadout)
> +{
> +    xnticks_t now = xntimer_get_rawclock();
> +    threadout->stat.exec_time += now - sched->last_csw;
> +    sched->last_csw = now;
> +}

It would be better to only pass the thread pointer, then use the
thread->sched member. This would clearly explain the relationship
between both, and prevent any bugous attempt at mixing things.

> +
> +static inline void xnpod_update_csw_date(xnsched_t *sched)
> +{
> +    sched->last_csw = xntimer_get_rawclock();
> +}
> +#else /* !CONFIG_XENO_OPT_STATS */
> +static inline void xnpod_acc_exec_time(xnsched_t *sched,
> +                                       xnthread_t *threadout)
> +{
> +}
> +
> +static inline void xnpod_update_csw_date(xnsched_t *sched)
> +{
> +}
> +#endif /* CONFIG_XENO_OPT_STATS */
> +
>  #ifdef __cplusplus
>  }
>  #endif
> Index: ksrc/nucleus/thread.c
> ===================================================================
> --- ksrc/nucleus/thread.c	(revision 1303)
> +++ ksrc/nucleus/thread.c	(working copy)
> @@ -90,6 +90,7 @@ int xnthread_init(xnthread_t *thread,
>  	thread->stat.ssw = 0;
>  	thread->stat.csw = 0;
>  	thread->stat.pf = 0;
> +	thread->stat.exec_time = 0;
>  #endif /* CONFIG_XENO_OPT_STATS */
>  
>  	/* These will be filled by xnpod_start_thread() */
> Index: ksrc/nucleus/pod.c
> ===================================================================
> --- ksrc/nucleus/pod.c	(revision 1303)
> +++ ksrc/nucleus/pod.c	(working copy)
> @@ -669,6 +669,9 @@ static inline void xnpod_switch_zombie(x
>  
>  	xnthread_cleanup_tcb(threadout);
>  
> +	/* no need to update stats of dying thread */
> +	xnpod_update_csw_date(sched);
> +
>  	xnarch_finalize_and_switch(xnthread_archtcb(threadout),
>  				   xnthread_archtcb(threadin));
>  
> @@ -2431,6 +2434,7 @@ void xnpod_schedule(void)
>  		xnarch_enter_root(xnthread_archtcb(threadin));
>  	}
>  
> +	xnpod_acc_exec_time(sched, threadout);
>  	xnthread_inc_csw(threadin);
>  
>  	xnarch_switch_to(xnthread_archtcb(threadout),
> @@ -2602,6 +2606,7 @@ void xnpod_schedule_runnable(xnthread_t 
>  		nkpod->schedhook(runthread, XNREADY);
>  #endif /* __XENO_SIM__ */
>  
> +	xnpod_acc_exec_time(sched, runthread);
>  	xnthread_inc_csw(threadin);
>  
>  	xnarch_switch_to(xnthread_archtcb(runthread),
> Index: ksrc/nucleus/module.c
> ===================================================================
> --- ksrc/nucleus/module.c	(revision 1303)
> +++ ksrc/nucleus/module.c	(working copy)
> @@ -254,6 +254,7 @@ struct stat_seq_iterator {
>  		unsigned long ssw;
>  		unsigned long csw;
>  		unsigned long pf;
> +		xnticks_t exec_time;
>  	} stat_info[1];
>  };
>  
> @@ -294,13 +295,17 @@ static void stat_seq_stop(struct seq_fil
>  static int stat_seq_show(struct seq_file *seq, void *v)
>  {
>  	if (v == SEQ_START_TOKEN)
> -		seq_printf(seq, "%-3s  %-6s %-10s %-10s %-4s  %-8s  %s\n",
> -			   "CPU", "PID", "MSW", "CSW", "PF", "STAT", "NAME");
> +		seq_printf(seq, "%-3s  %-6s %-10s %-10s %-4s  %-8s  %12s"
> +			   "  %s\n",
> +			   "CPU", "PID", "MSW", "CSW", "PF", "STAT", "TIME",
> +			   "NAME");
>  	else {
>  		struct stat_seq_info *p = (struct stat_seq_info *)v;
> -		seq_printf(seq, "%3u  %-6d %-10lu %-10lu %-4lu  %.8lx  %s\n",
> +		unsigned long long exec_time = xnpod_ticks2ns(p->exec_time);
> +		seq_printf(seq, "%3u  %-6d %-10lu %-10lu %-4lu  %.8lx  %12llu"
> +			   "  %s\n",
>  			   p->cpu, p->pid, p->ssw, p->csw, p->pf, p->status,
> -			   p->name);
> +			   xnarch_ulldiv(exec_time, 1000, NULL), p->name);
>  	}
>  
>  	return 0;
> @@ -318,7 +323,7 @@ static int stat_seq_open(struct inode *i
>  	struct stat_seq_iterator *iter;
>  	struct seq_file *seq;
>  	xnholder_t *holder;
> -	int err, count;
> +	int err, count, cpu;
>  	spl_t s;
>  
>  	if (!nkpod)
> @@ -341,6 +346,16 @@ static int stat_seq_open(struct inode *i
>  
>  	iter->nentries = 0;
>  
> +	/* update exec-time stats of currently running threads */
> +	for_each_online_cpu(cpu) {
> +		xnsched_t *sched;
> +
> +		xnlock_get_irqsave(&nklock, s);
> +		sched = xnpod_sched_slot(cpu);
> +		xnpod_acc_exec_time(sched, sched->runthread);
> +		xnlock_put_irqrestore(&nklock, s);
> +	}
> +

We could do that from the current loop below, given that the
accumulation routine is changed to use thread->sched implicitely.

>  	/* Take a snapshot and release the nucleus lock immediately after,
>  	   so that dumping /proc/xenomai/stat with lots of entries won't
>  	   cause massive jittery. */
> @@ -359,6 +374,7 @@ static int stat_seq_open(struct inode *i
>  		iter->stat_info[n].ssw = thread->stat.ssw;
>  		iter->stat_info[n].csw = thread->stat.csw;
>  		iter->stat_info[n].pf = thread->stat.pf;
> +		iter->stat_info[n].exec_time = thread->stat.exec_time;
>  	}
>  
>  	xnlock_put_irqrestore(&nklock, s);
> 
> 
> 
> _______________________________________________
> Xenomai-core mailing list
> Xenomai-core@domain.hid
> https://mail.gna.org/listinfo/xenomai-core
-- 
Philippe.




^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-06 11:37 [Xenomai-core] [RFC, PATCH] per-thread exec-time stats Jan Kiszka
  2006-07-06 11:41 ` Jan Kiszka
  2006-07-06 14:32 ` Philippe Gerum
@ 2006-07-06 15:04 ` Philippe Gerum
  2006-07-06 15:06   ` Philippe Gerum
  2 siblings, 1 reply; 24+ messages in thread
From: Philippe Gerum @ 2006-07-06 15:04 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: xenomai-core

On Thu, 2006-07-06 at 13:37 +0200, Jan Kiszka wrote:
> c_time);
> +		seq_printf(seq, "%3u  %-6d %-10lu %-10lu %-4lu  %.8lx  %12llu"
> +			   "  %s\n",
>  			   p->cpu, p->pid, p->ssw, p->csw, p->pf, p->status,
> -			   p->name);
> +			   xnarch_ulldiv(exec_time, 1000, NULL), p->name);

xntimer_get_rawclock() returns TSC or jiffies, depending on the current
aperiodic/periodic mode; what we need is something like
xnarch_ulldiv(xnpod_ticks2ns(exec_time), ...)

-- 
Philippe.




^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-06 15:04 ` Philippe Gerum
@ 2006-07-06 15:06   ` Philippe Gerum
  0 siblings, 0 replies; 24+ messages in thread
From: Philippe Gerum @ 2006-07-06 15:06 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: xenomai-core

On Thu, 2006-07-06 at 17:04 +0200, Philippe Gerum wrote:
> On Thu, 2006-07-06 at 13:37 +0200, Jan Kiszka wrote:
> > c_time);
> > +		seq_printf(seq, "%3u  %-6d %-10lu %-10lu %-4lu  %.8lx  %12llu"
> > +			   "  %s\n",
> >  			   p->cpu, p->pid, p->ssw, p->csw, p->pf, p->status,
> > -			   p->name);
> > +			   xnarch_ulldiv(exec_time, 1000, NULL), p->name);
> 
> xntimer_get_rawclock() returns TSC or jiffies, depending on the current
> aperiodic/periodic mode; what we need is something like
> xnarch_ulldiv(xnpod_ticks2ns(exec_time), ...)
> 

Yeah, right. Forget about this one. -ENOBRAIN returned on my side.

-- 
Philippe.




^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-06 14:32 ` Philippe Gerum
@ 2006-07-06 15:09   ` Jan Kiszka
  2006-07-06 15:37     ` Philippe Gerum
  2006-07-06 23:47   ` Gilles Chanteperdrix
  1 sibling, 1 reply; 24+ messages in thread
From: Jan Kiszka @ 2006-07-06 15:09 UTC (permalink / raw)
  To: rpm; +Cc: xenomai-core

[-- Attachment #1: Type: text/plain, Size: 7247 bytes --]

Philippe Gerum wrote:
> On Thu, 2006-07-06 at 13:37 +0200, Jan Kiszka wrote:
> 
> <snip>
> 
>> PS: Could someone have a look that I didn't miss an accounting point and my
>> acquisition is sound?
>>
>>
>> --
>>  include/nucleus/pod.h    |   28 ++++++++++++++++++++++++++++
>>  include/nucleus/thread.h |    1 +
>>  ksrc/nucleus/module.c    |   26 +++++++++++++++++++++-----
>>  ksrc/nucleus/pod.c       |    5 +++++
>>  ksrc/nucleus/thread.c    |    1 +
>>  5 files changed, 56 insertions(+), 5 deletions(-)
>>
>> Index: include/nucleus/thread.h
>> ===================================================================
>> --- include/nucleus/thread.h	(revision 1303)
>> +++ include/nucleus/thread.h	(working copy)
>> @@ -152,6 +152,7 @@ typedef struct xnthread {
>>  	unsigned long csw;	/* Context switches (includes
>>  				   secondary -> primary switches) */
>>  	unsigned long pf;	/* Number of page faults */
>> +	xnticks_t exec_time;	/* Accumulated execution time (ticks) */
>>      } stat;
>>  #endif /* CONFIG_XENO_OPT_STATS */
>>  
>> Index: include/nucleus/pod.h
>> ===================================================================
>> --- include/nucleus/pod.h	(revision 1303)
>> +++ include/nucleus/pod.h	(working copy)
>> @@ -145,6 +145,10 @@ typedef struct xnsched {
>>  
>>      xnthread_t rootcb;          /*!< Root thread control block. */
>>  
>> +#ifdef CONFIG_XENO_OPT_STATS
>> +    xnticks_t last_csw;         /*!< Last context switch (ticks). */
>> +#endif /* CONFIG_XENO_OPT_STATS */
>> +
>>  } xnsched_t;
>>  
>>  #ifdef CONFIG_SMP
>> @@ -544,6 +548,30 @@ static inline void xnpod_delete_self (vo
>>      xnpod_delete_thread(xnpod_current_thread());
>>  }
>>  
>> +#ifdef CONFIG_XENO_OPT_STATS
>> +static inline void xnpod_acc_exec_time(xnsched_t *sched,
>> +                                       xnthread_t *threadout)
>> +{
>> +    xnticks_t now = xntimer_get_rawclock();
>> +    threadout->stat.exec_time += now - sched->last_csw;
>> +    sched->last_csw = now;
>> +}
> 
> It would be better to only pass the thread pointer, then use the
> thread->sched member. This would clearly explain the relationship
> between both, and prevent any bugous attempt at mixing things.

True, will rework.

> 
>> +
>> +static inline void xnpod_update_csw_date(xnsched_t *sched)
>> +{
>> +    sched->last_csw = xntimer_get_rawclock();
>> +}
>> +#else /* !CONFIG_XENO_OPT_STATS */
>> +static inline void xnpod_acc_exec_time(xnsched_t *sched,
>> +                                       xnthread_t *threadout)
>> +{
>> +}
>> +
>> +static inline void xnpod_update_csw_date(xnsched_t *sched)
>> +{
>> +}
>> +#endif /* CONFIG_XENO_OPT_STATS */
>> +
>>  #ifdef __cplusplus
>>  }
>>  #endif
>> Index: ksrc/nucleus/thread.c
>> ===================================================================
>> --- ksrc/nucleus/thread.c	(revision 1303)
>> +++ ksrc/nucleus/thread.c	(working copy)
>> @@ -90,6 +90,7 @@ int xnthread_init(xnthread_t *thread,
>>  	thread->stat.ssw = 0;
>>  	thread->stat.csw = 0;
>>  	thread->stat.pf = 0;
>> +	thread->stat.exec_time = 0;
>>  #endif /* CONFIG_XENO_OPT_STATS */
>>  
>>  	/* These will be filled by xnpod_start_thread() */
>> Index: ksrc/nucleus/pod.c
>> ===================================================================
>> --- ksrc/nucleus/pod.c	(revision 1303)
>> +++ ksrc/nucleus/pod.c	(working copy)
>> @@ -669,6 +669,9 @@ static inline void xnpod_switch_zombie(x
>>  
>>  	xnthread_cleanup_tcb(threadout);
>>  
>> +	/* no need to update stats of dying thread */
>> +	xnpod_update_csw_date(sched);
>> +
>>  	xnarch_finalize_and_switch(xnthread_archtcb(threadout),
>>  				   xnthread_archtcb(threadin));
>>  
>> @@ -2431,6 +2434,7 @@ void xnpod_schedule(void)
>>  		xnarch_enter_root(xnthread_archtcb(threadin));
>>  	}
>>  
>> +	xnpod_acc_exec_time(sched, threadout);
>>  	xnthread_inc_csw(threadin);
>>  
>>  	xnarch_switch_to(xnthread_archtcb(threadout),
>> @@ -2602,6 +2606,7 @@ void xnpod_schedule_runnable(xnthread_t 
>>  		nkpod->schedhook(runthread, XNREADY);
>>  #endif /* __XENO_SIM__ */
>>  
>> +	xnpod_acc_exec_time(sched, runthread);
>>  	xnthread_inc_csw(threadin);
>>  
>>  	xnarch_switch_to(xnthread_archtcb(runthread),
>> Index: ksrc/nucleus/module.c
>> ===================================================================
>> --- ksrc/nucleus/module.c	(revision 1303)
>> +++ ksrc/nucleus/module.c	(working copy)
>> @@ -254,6 +254,7 @@ struct stat_seq_iterator {
>>  		unsigned long ssw;
>>  		unsigned long csw;
>>  		unsigned long pf;
>> +		xnticks_t exec_time;
>>  	} stat_info[1];
>>  };
>>  
>> @@ -294,13 +295,17 @@ static void stat_seq_stop(struct seq_fil
>>  static int stat_seq_show(struct seq_file *seq, void *v)
>>  {
>>  	if (v == SEQ_START_TOKEN)
>> -		seq_printf(seq, "%-3s  %-6s %-10s %-10s %-4s  %-8s  %s\n",
>> -			   "CPU", "PID", "MSW", "CSW", "PF", "STAT", "NAME");
>> +		seq_printf(seq, "%-3s  %-6s %-10s %-10s %-4s  %-8s  %12s"
>> +			   "  %s\n",
>> +			   "CPU", "PID", "MSW", "CSW", "PF", "STAT", "TIME",
>> +			   "NAME");
>>  	else {
>>  		struct stat_seq_info *p = (struct stat_seq_info *)v;
>> -		seq_printf(seq, "%3u  %-6d %-10lu %-10lu %-4lu  %.8lx  %s\n",
>> +		unsigned long long exec_time = xnpod_ticks2ns(p->exec_time);
>> +		seq_printf(seq, "%3u  %-6d %-10lu %-10lu %-4lu  %.8lx  %12llu"
>> +			   "  %s\n",
>>  			   p->cpu, p->pid, p->ssw, p->csw, p->pf, p->status,
>> -			   p->name);
>> +			   xnarch_ulldiv(exec_time, 1000, NULL), p->name);
>>  	}
>>  
>>  	return 0;
>> @@ -318,7 +323,7 @@ static int stat_seq_open(struct inode *i
>>  	struct stat_seq_iterator *iter;
>>  	struct seq_file *seq;
>>  	xnholder_t *holder;
>> -	int err, count;
>> +	int err, count, cpu;
>>  	spl_t s;
>>  
>>  	if (!nkpod)
>> @@ -341,6 +346,16 @@ static int stat_seq_open(struct inode *i
>>  
>>  	iter->nentries = 0;
>>  
>> +	/* update exec-time stats of currently running threads */
>> +	for_each_online_cpu(cpu) {
>> +		xnsched_t *sched;
>> +
>> +		xnlock_get_irqsave(&nklock, s);
>> +		sched = xnpod_sched_slot(cpu);
>> +		xnpod_acc_exec_time(sched, sched->runthread);
>> +		xnlock_put_irqrestore(&nklock, s);
>> +	}
>> +
> 
> We could do that from the current loop below, given that the
> accumulation routine is changed to use thread->sched implicitely.

The idea is avoid adding even further load to the nklock-protected loop.
And we only update the current thread, not each and every.

> 
>>  	/* Take a snapshot and release the nucleus lock immediately after,
>>  	   so that dumping /proc/xenomai/stat with lots of entries won't
>>  	   cause massive jittery. */
>> @@ -359,6 +374,7 @@ static int stat_seq_open(struct inode *i
>>  		iter->stat_info[n].ssw = thread->stat.ssw;
>>  		iter->stat_info[n].csw = thread->stat.csw;
>>  		iter->stat_info[n].pf = thread->stat.pf;
>> +		iter->stat_info[n].exec_time = thread->stat.exec_time;
>>  	}
>>  
>>  	xnlock_put_irqrestore(&nklock, s);
>>
>>
>>
>> _______________________________________________
>> Xenomai-core mailing list
>> Xenomai-core@domain.hid
>> https://mail.gna.org/listinfo/xenomai-core



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 250 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-06 15:09   ` Jan Kiszka
@ 2006-07-06 15:37     ` Philippe Gerum
  2006-07-06 15:46       ` Jan Kiszka
  0 siblings, 1 reply; 24+ messages in thread
From: Philippe Gerum @ 2006-07-06 15:37 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: xenomai-core

On Thu, 2006-07-06 at 17:09 +0200, Jan Kiszka wrote:
> > We could do that from the current loop below, given that the
> > accumulation routine is changed to use thread->sched implicitely.
> 
> The idea is avoid adding even further load to the nklock-protected loop.
> And we only update the current thread, not each and every.
> 

Yes, but why? I mean, accumulated time so far remains significant even
for non-running threads.

> > 
> >>  	/* Take a snapshot and release the nucleus lock immediately after,
> >>  	   so that dumping /proc/xenomai/stat with lots of entries won't
> >>  	   cause massive jittery. */
> >> @@ -359,6 +374,7 @@ static int stat_seq_open(struct inode *i
> >>  		iter->stat_info[n].ssw = thread->stat.ssw;
> >>  		iter->stat_info[n].csw = thread->stat.csw;
> >>  		iter->stat_info[n].pf = thread->stat.pf;
> >> +		iter->stat_info[n].exec_time = thread->stat.exec_time;
> >>  	}
> >>  
> >>  	xnlock_put_irqrestore(&nklock, s);
> >>
> >>
> >>
> >> _______________________________________________
> >> Xenomai-core mailing list
> >> Xenomai-core@domain.hid
> >> https://mail.gna.org/listinfo/xenomai-core
> 
> 
-- 
Philippe.




^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-06 15:37     ` Philippe Gerum
@ 2006-07-06 15:46       ` Jan Kiszka
  2006-07-06 16:36         ` Jan Kiszka
  2006-07-06 16:37         ` Philippe Gerum
  0 siblings, 2 replies; 24+ messages in thread
From: Jan Kiszka @ 2006-07-06 15:46 UTC (permalink / raw)
  To: rpm; +Cc: xenomai-core

[-- Attachment #1: Type: text/plain, Size: 1471 bytes --]

Philippe Gerum wrote:
> On Thu, 2006-07-06 at 17:09 +0200, Jan Kiszka wrote:
>>> We could do that from the current loop below, given that the
>>> accumulation routine is changed to use thread->sched implicitely.
>> The idea is avoid adding even further load to the nklock-protected loop.
>> And we only update the current thread, not each and every.
>>
> 
> Yes, but why? I mean, accumulated time so far remains significant even
> for non-running threads.

Update must only happen for the currently running thread on each cpu,
otherwise you skew the stats up.

But looking at the whole code in stat_seq_open() again, I now wonder if
that whole routine isn't

a) fragile on task removal and
b) still poorly scaling.

The thread name is only copied by reference, a disappearing instance my
cause troubles on printing it. And, instead of holding the lock all the
time, shouldn't we better

1. pick some element from the queue and mark it somehow
   in-use under lock
2. print or copy the entry
3. reacquire the lock to proceed to the next one - after checking if
   that element happened to be removed from the queue meanwhile (in that
   case we could abort the output or try to resync)

I'm worrying about a potential nice xeno-top tool polling the stats at
high frequency and causing unexpected jitters if there are a significant
number tasks registered (think of passive shadows we may create
automatically in the future!).

Jan

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 250 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-06 15:46       ` Jan Kiszka
@ 2006-07-06 16:36         ` Jan Kiszka
  2006-07-06 17:17           ` Jan Kiszka
  2006-07-07  8:04           ` Philippe Gerum
  2006-07-06 16:37         ` Philippe Gerum
  1 sibling, 2 replies; 24+ messages in thread
From: Jan Kiszka @ 2006-07-06 16:36 UTC (permalink / raw)
  To: rpm; +Cc: xenomai-core

[-- Attachment #1: Type: text/plain, Size: 1836 bytes --]

Jan Kiszka wrote:
> Philippe Gerum wrote:
>> On Thu, 2006-07-06 at 17:09 +0200, Jan Kiszka wrote:
>>>> We could do that from the current loop below, given that the
>>>> accumulation routine is changed to use thread->sched implicitely.
>>> The idea is avoid adding even further load to the nklock-protected loop.
>>> And we only update the current thread, not each and every.
>>>
>> Yes, but why? I mean, accumulated time so far remains significant even
>> for non-running threads.
> 
> Update must only happen for the currently running thread on each cpu,
> otherwise you skew the stats up.
> 
> 
> But looking at the whole code in stat_seq_open() again, I now wonder if
> that whole routine isn't
> 
> a) fragile on task removal and
> b) still poorly scaling.
> 
> The thread name is only copied by reference, a disappearing instance my
> cause troubles on printing it. And, instead of holding the lock all the
> time, shouldn't we better
> 
> 1. pick some element from the queue and mark it somehow
>    in-use under lock
> 2. print or copy the entry
> 3. reacquire the lock to proceed to the next one - after checking if
>    that element happened to be removed from the queue meanwhile (in that
>    case we could abort the output or try to resync)

Not feasible (threads may not always be prevented from being deleted),
but what about this:

- maintain a modification counter for nkpod->threadq
- in our /proc-loops (sched and latency e.g.):
  1. take nklock
  2. get current counter
  3. compare with previous, restart whole loop if not matching
  4. copy current element (including the name...)
  5. forward element pointer
  6. release nklock
  7. goto 1 if not end-of-list

As modifications on threadq should be fairly rare, I don't see a risk of
looping endlessly here.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 250 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-06 15:46       ` Jan Kiszka
  2006-07-06 16:36         ` Jan Kiszka
@ 2006-07-06 16:37         ` Philippe Gerum
  1 sibling, 0 replies; 24+ messages in thread
From: Philippe Gerum @ 2006-07-06 16:37 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: xenomai-core

On Thu, 2006-07-06 at 17:46 +0200, Jan Kiszka wrote:
> Philippe Gerum wrote:
> > On Thu, 2006-07-06 at 17:09 +0200, Jan Kiszka wrote:
> >>> We could do that from the current loop below, given that the
> >>> accumulation routine is changed to use thread->sched implicitely.
> >> The idea is avoid adding even further load to the nklock-protected loop.
> >> And we only update the current thread, not each and every.
> >>
> > 
> > Yes, but why? I mean, accumulated time so far remains significant even
> > for non-running threads.
> 
> Update must only happen for the currently running thread on each cpu,
> otherwise you skew the stats up.
> 

The point is that you don't want to update there, just peek at the
current value for all threads, and indeed update for the current one
before that. So we should keep the initial loop, but only read the
accumulator in the second one.

> 
> But looking at the whole code in stat_seq_open() again, I now wonder if
> that whole routine isn't
> 
> a) fragile on task removal and
> b) still poorly scaling.
> 
> The thread name is only copied by reference, a disappearing instance my
> cause troubles on printing it.

Yes, this will admittedly print garbage if the day really goes rainy
(the kernel memory we point at would still be mapped, but occupied by
other data). The other option would be to copy the name, but that's
unacceptable latency-wise, since we would need to do that under nklock
protection.

>  And, instead of holding the lock all the
> time, shouldn't we better
> 
> 1. pick some element from the queue and mark it somehow
>    in-use under lock
> 2. print or copy the entry
> 3. reacquire the lock to proceed to the next one - after checking if
>    that element happened to be removed from the queue meanwhile

Unfortunately, you can't since the associated memory might be utterly
trashed.

>  (in that
>    case we could abort the output or try to resync)
> 

It's the "try to resync" that bothers me. We currently have no sane way
to give any guarantee about the consistency of the output sequence here,
unless maybe using an array representation for indexing the threads. But
I suspect ore issues ahead.

> I'm worrying about a potential nice xeno-top tool polling the stats at
> high frequency and causing unexpected jitters if there are a significant
> number tasks registered (think of passive shadows we may create
> automatically in the future!).
> 

I agree, that's a potential issue. But we should also find an approach
that does not cause half-baked or mis-resynced data to be emitted, and
the current seqfile interface is not providing an easy way to do this.

> Jan
> 
-- 
Philippe.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-06 16:36         ` Jan Kiszka
@ 2006-07-06 17:17           ` Jan Kiszka
  2006-07-07  8:20             ` Jan Kiszka
  2006-07-07  8:04           ` Philippe Gerum
  1 sibling, 1 reply; 24+ messages in thread
From: Jan Kiszka @ 2006-07-06 17:17 UTC (permalink / raw)
  To: rpm; +Cc: xenomai-core

[-- Attachment #1: Type: text/plain, Size: 14481 bytes --]

Jan Kiszka wrote:

> Jan Kiszka wrote:
>   
>> Philippe Gerum wrote:
>>     
>>> On Thu, 2006-07-06 at 17:09 +0200, Jan Kiszka wrote:
>>>       
>>>>> We could do that from the current loop below, given that the
>>>>> accumulation routine is changed to use thread->sched implicitely.
>>>>>           
>>>> The idea is avoid adding even further load to the nklock-protected loop.
>>>> And we only update the current thread, not each and every.
>>>>
>>>>         
>>> Yes, but why? I mean, accumulated time so far remains significant even
>>> for non-running threads.
>>>       
>> Update must only happen for the currently running thread on each cpu,
>> otherwise you skew the stats up.
>>
>>
>> But looking at the whole code in stat_seq_open() again, I now wonder if
>> that whole routine isn't
>>
>> a) fragile on task removal and
>> b) still poorly scaling.
>>
>> The thread name is only copied by reference, a disappearing instance my
>> cause troubles on printing it. And, instead of holding the lock all the
>> time, shouldn't we better
>>
>> 1. pick some element from the queue and mark it somehow
>>    in-use under lock
>> 2. print or copy the entry
>> 3. reacquire the lock to proceed to the next one - after checking if
>>    that element happened to be removed from the queue meanwhile (in that
>>    case we could abort the output or try to resync)
>>     
>
> Not feasible (threads may not always be prevented from being deleted),
> but what about this:
>
> - maintain a modification counter for nkpod->threadq
> - in our /proc-loops (sched and latency e.g.):
>   1. take nklock
>   2. get current counter
>   3. compare with previous, restart whole loop if not matching
>   4. copy current element (including the name...)
>   5. forward element pointer
>   6. release nklock
>   7. goto 1 if not end-of-list
>
> As modifications on threadq should be fairly rare, I don't see a risk of
> looping endlessly here.
Here is such a patch + a reworked version of the exec-time stats. Compiles and boots,
doesn't cause obvious troubles when running in a loop while threads are created and
destroyed in parallel. Nevertheless, a quick hack which may contain bugs.

Jan

---
 include/nucleus/pod.h |    1 
 ksrc/nucleus/module.c |   94 +++++++++++++++++++++++++++++++++-----------------
 ksrc/nucleus/pod.c    |    2 +
 3 files changed, 66 insertions(+), 31 deletions(-)

Index: xenomai/include/nucleus/pod.h
===================================================================
--- xenomai.orig/include/nucleus/pod.h
+++ xenomai/include/nucleus/pod.h
@@ -195,6 +195,7 @@ struct xnpod {
     xnsched_t sched[XNARCH_NR_CPUS]; /*!< Per-cpu scheduler slots. */
 
     xnqueue_t threadq;          /*!< All existing threads. */
+    int threadq_rev;            /*!< Modification counter of threadq. */
 
     volatile u_long schedlck;	/*!< Scheduler lock count. */
 
Index: xenomai/ksrc/nucleus/module.c
===================================================================
--- xenomai.orig/ksrc/nucleus/module.c
+++ xenomai/ksrc/nucleus/module.c
@@ -99,7 +99,7 @@ struct sched_seq_iterator {
 	struct sched_seq_info {
 		int cpu;
 		pid_t pid;
-		const char *name;
+		char name[XNOBJECT_NAME_LEN];
 		int cprio;
 		xnticks_t timeout;
 		xnflags_t status;
@@ -177,17 +177,26 @@ static struct seq_operations sched_op = 
 
 static int sched_seq_open(struct inode *inode, struct file *file)
 {
-	struct sched_seq_iterator *iter;
+	struct sched_seq_iterator *iter = NULL;
 	struct seq_file *seq;
 	xnholder_t *holder;
-	int err, count;
+	int err, count, rev;
 	spl_t s;
 
 	if (!nkpod)
 		return -ESRCH;
 
+      restart:
+	xnlock_get_irqsave(&nklock, s);
+
+	rev = nkpod->threadq_rev;
 	count = countq(&nkpod->threadq);	/* Cannot be empty (ROOT) */
+	holder = getheadq(&nkpod->threadq);
+
+	xnlock_put_irqrestore(&nklock, s);
 
+	if (iter)
+		kfree(iter);
 	iter = kmalloc(sizeof(*iter)
 		       + (count - 1) * sizeof(struct sched_seq_info),
 		       GFP_KERNEL);
@@ -202,31 +211,37 @@ static int sched_seq_open(struct inode *
 	}
 
 	iter->nentries = 0;
+	iter->start_time = xntimer_get_jiffies();
 
-	/* Take a snapshot and release the nucleus lock immediately after,
-	   so that dumping /proc/xenomai/sched with lots of entries won't
-	   cause massive jittery. */
+	/* Take a snapshot element-wise, restart if something changes
+	   underneath us. */
 
-	xnlock_get_irqsave(&nklock, s);
+	while (holder) {
+		xnthread_t *thread;
+		int n;
 
-	iter->start_time = xntimer_get_jiffies();
+		xnlock_get_irqsave(&nklock, s);
+
+		if (nkpod->threadq_rev != rev)
+			goto restart;
+		rev = nkpod->threadq_rev;
 
-	for (holder = getheadq(&nkpod->threadq);
-	     holder && count > 0;
-	     holder = nextq(&nkpod->threadq, holder), count--) {
-		xnthread_t *thread = link2thread(holder, glink);
-		int n = iter->nentries++;
+		thread = link2thread(holder, glink);
+		n = iter->nentries++;
 
 		iter->sched_info[n].cpu = xnsched_cpu(thread->sched);
 		iter->sched_info[n].pid = xnthread_user_pid(thread);
-		iter->sched_info[n].name = thread->name;
+		memcpy(iter->sched_info[n].name, thread->name,
+		       sizeof(iter->sched_info[n].name));
 		iter->sched_info[n].cprio = thread->cprio;
 		iter->sched_info[n].timeout =
 		    xnthread_get_timeout(thread, iter->start_time);
 		iter->sched_info[n].status = thread->status;
-	}
 
-	xnlock_put_irqrestore(&nklock, s);
+		holder = nextq(&nkpod->threadq, holder);
+
+		xnlock_put_irqrestore(&nklock, s);
+	}
 
 	seq = (struct seq_file *)file->private_data;
 	seq->private = iter;
@@ -250,7 +265,7 @@ struct stat_seq_iterator {
 		int cpu;
 		pid_t pid;
 		xnflags_t status;
-		const char *name;
+		char name[XNOBJECT_NAME_LEN];
 		unsigned long ssw;
 		unsigned long csw;
 		unsigned long pf;
@@ -315,17 +330,26 @@ static struct seq_operations stat_op = {
 
 static int stat_seq_open(struct inode *inode, struct file *file)
 {
-	struct stat_seq_iterator *iter;
+	struct stat_seq_iterator *iter = NULL;
 	struct seq_file *seq;
 	xnholder_t *holder;
-	int err, count;
+	int err, count, rev;
 	spl_t s;
 
 	if (!nkpod)
 		return -ESRCH;
 
+      restart:
+	xnlock_get_irqsave(&nklock, s);
+
+	rev = nkpod->threadq_rev;
 	count = countq(&nkpod->threadq);	/* Cannot be empty (ROOT) */
+	holder = getheadq(&nkpod->threadq);
+
+	xnlock_put_irqrestore(&nklock, s);
 
+	if (iter)
+		kfree(iter);
 	iter = kmalloc(sizeof(*iter)
 		       + (count - 1) * sizeof(struct stat_seq_info),
 		       GFP_KERNEL);
@@ -341,27 +365,35 @@ static int stat_seq_open(struct inode *i
 
 	iter->nentries = 0;
 
-	/* Take a snapshot and release the nucleus lock immediately after,
-	   so that dumping /proc/xenomai/stat with lots of entries won't
-	   cause massive jittery. */
+	/* Take a snapshot element-wise, restart if something changes
+	   underneath us. */
 
-	xnlock_get_irqsave(&nklock, s);
+	while (holder) {
+		xnthread_t *thread;
+		int n;
+
+		xnlock_get_irqsave(&nklock, s);
+
+		if (nkpod->threadq_rev != rev)
+			goto restart;
+		rev = nkpod->threadq_rev;
+
+		thread = link2thread(holder, glink);
+		n = iter->nentries++;
 
-	for (holder = getheadq(&nkpod->threadq);
-	     holder && count > 0;
-	     holder = nextq(&nkpod->threadq, holder), count--) {
-		xnthread_t *thread = link2thread(holder, glink);
-		int n = iter->nentries++;
 		iter->stat_info[n].cpu = xnsched_cpu(thread->sched);
 		iter->stat_info[n].pid = xnthread_user_pid(thread);
-		iter->stat_info[n].name = thread->name;
+		memcpy(iter->stat_info[n].name, thread->name,
+		       sizeof(iter->stat_info[n].name));
 		iter->stat_info[n].status = thread->status;
 		iter->stat_info[n].ssw = thread->stat.ssw;
 		iter->stat_info[n].csw = thread->stat.csw;
 		iter->stat_info[n].pf = thread->stat.pf;
-	}
 
-	xnlock_put_irqrestore(&nklock, s);
+		holder = nextq(&nkpod->threadq, holder);
+
+		xnlock_put_irqrestore(&nklock, s);
+	}
 
 	seq = (struct seq_file *)file->private_data;
 	seq->private = iter;
Index: xenomai/ksrc/nucleus/pod.c
===================================================================
--- xenomai.orig/ksrc/nucleus/pod.c
+++ xenomai/ksrc/nucleus/pod.c
@@ -815,6 +815,7 @@ int xnpod_init_thread(xnthread_t *thread
 	xnlock_get_irqsave(&nklock, s);
 	thread->sched = xnpod_current_sched();
 	appendq(&nkpod->threadq, &thread->glink);
+	nkpod->threadq_rev++;
 	xnpod_suspend_thread(thread, XNDORMANT | (flags & XNSUSP), XN_INFINITE,
 			     NULL);
 	xnlock_put_irqrestore(&nklock, s);
@@ -1225,6 +1226,7 @@ void xnpod_delete_thread(xnthread_t *thr
 	sched = thread->sched;
 
 	removeq(&nkpod->threadq, &thread->glink);
+	nkpod->threadq_rev++;
 
 	if (!testbits(thread->status, XNTHREAD_BLOCK_BITS)) {
 		if (testbits(thread->status, XNREADY)) {


---
 include/nucleus/pod.h    |   28 ++++++++++++++++++++++++++++
 include/nucleus/thread.h |    1 +
 ksrc/nucleus/module.c    |   26 +++++++++++++++++++++-----
 ksrc/nucleus/pod.c       |    5 +++++
 ksrc/nucleus/thread.c    |    1 +
 5 files changed, 56 insertions(+), 5 deletions(-)

Index: include/nucleus/thread.h
===================================================================
--- include/nucleus/thread.h.orig
+++ include/nucleus/thread.h
@@ -152,6 +152,7 @@ typedef struct xnthread {
 	unsigned long csw;	/* Context switches (includes
 				   secondary -> primary switches) */
 	unsigned long pf;	/* Number of page faults */
+	xnticks_t exec_time;	/* Accumulated execution time (ticks) */
     } stat;
 #endif /* CONFIG_XENO_OPT_STATS */
 
Index: include/nucleus/pod.h
===================================================================
--- include/nucleus/pod.h.orig
+++ include/nucleus/pod.h
@@ -145,6 +145,10 @@ typedef struct xnsched {
 
     xnthread_t rootcb;          /*!< Root thread control block. */
 
+#ifdef CONFIG_XENO_OPT_STATS
+    xnticks_t last_csw;         /*!< Last context switch (ticks). */
+#endif /* CONFIG_XENO_OPT_STATS */
+
 } xnsched_t;
 
 #ifdef CONFIG_SMP
@@ -545,6 +549,30 @@ static inline void xnpod_delete_self (vo
     xnpod_delete_thread(xnpod_current_thread());
 }
 
+#ifdef CONFIG_XENO_OPT_STATS
+static inline void xnpod_acc_exec_time(xnthread_t *thread)
+{
+    xnsched_t *sched = thread->sched;
+    xnticks_t now = xntimer_get_rawclock();
+
+    thread->stat.exec_time += now - sched->last_csw;
+    sched->last_csw = now;
+}
+
+static inline void xnpod_update_csw_date(xnsched_t *sched)
+{
+    sched->last_csw = xntimer_get_rawclock();
+}
+#else /* !CONFIG_XENO_OPT_STATS */
+static inline void xnpod_acc_exec_time(xnthread_t *thread)
+{
+}
+
+static inline void xnpod_update_csw_date(xnsched_t *sched)
+{
+}
+#endif /* CONFIG_XENO_OPT_STATS */
+
 #ifdef __cplusplus
 }
 #endif
Index: ksrc/nucleus/thread.c
===================================================================
--- ksrc/nucleus/thread.c.orig
+++ ksrc/nucleus/thread.c
@@ -90,6 +90,7 @@ int xnthread_init(xnthread_t *thread,
 	thread->stat.ssw = 0;
 	thread->stat.csw = 0;
 	thread->stat.pf = 0;
+	thread->stat.exec_time = 0;
 #endif /* CONFIG_XENO_OPT_STATS */
 
 	/* These will be filled by xnpod_start_thread() */
Index: ksrc/nucleus/pod.c
===================================================================
--- ksrc/nucleus/pod.c.orig
+++ ksrc/nucleus/pod.c
@@ -669,6 +669,9 @@ static inline void xnpod_switch_zombie(x
 
 	xnthread_cleanup_tcb(threadout);
 
+	/* no need to update stats of dying thread */
+	xnpod_update_csw_date(sched);
+
 	xnarch_finalize_and_switch(xnthread_archtcb(threadout),
 				   xnthread_archtcb(threadin));
 
@@ -2433,6 +2436,7 @@ void xnpod_schedule(void)
 		xnarch_enter_root(xnthread_archtcb(threadin));
 	}
 
+	xnpod_acc_exec_time(threadout);
 	xnthread_inc_csw(threadin);
 
 	xnarch_switch_to(xnthread_archtcb(threadout),
@@ -2604,6 +2608,7 @@ void xnpod_schedule_runnable(xnthread_t 
 		nkpod->schedhook(runthread, XNREADY);
 #endif /* __XENO_SIM__ */
 
+	xnpod_acc_exec_time(runthread);
 	xnthread_inc_csw(threadin);
 
 	xnarch_switch_to(xnthread_archtcb(runthread),
Index: ksrc/nucleus/module.c
===================================================================
--- ksrc/nucleus/module.c.orig
+++ ksrc/nucleus/module.c
@@ -269,6 +269,7 @@ struct stat_seq_iterator {
 		unsigned long ssw;
 		unsigned long csw;
 		unsigned long pf;
+		xnticks_t exec_time;
 	} stat_info[1];
 };
 
@@ -309,13 +310,17 @@ static void stat_seq_stop(struct seq_fil
 static int stat_seq_show(struct seq_file *seq, void *v)
 {
 	if (v == SEQ_START_TOKEN)
-		seq_printf(seq, "%-3s  %-6s %-10s %-10s %-4s  %-8s  %s\n",
-			   "CPU", "PID", "MSW", "CSW", "PF", "STAT", "NAME");
+		seq_printf(seq, "%-3s  %-6s %-10s %-10s %-4s  %-8s  %12s"
+			   "  %s\n",
+			   "CPU", "PID", "MSW", "CSW", "PF", "STAT", "TIME",
+			   "NAME");
 	else {
 		struct stat_seq_info *p = (struct stat_seq_info *)v;
-		seq_printf(seq, "%3u  %-6d %-10lu %-10lu %-4lu  %.8lx  %s\n",
+		unsigned long long exec_time = xnpod_ticks2ns(p->exec_time);
+		seq_printf(seq, "%3u  %-6d %-10lu %-10lu %-4lu  %.8lx  %12llu"
+			   "  %s\n",
 			   p->cpu, p->pid, p->ssw, p->csw, p->pf, p->status,
-			   p->name);
+			   xnarch_ulldiv(exec_time, 1000, NULL), p->name);
 	}
 
 	return 0;
@@ -333,7 +338,7 @@ static int stat_seq_open(struct inode *i
 	struct stat_seq_iterator *iter = NULL;
 	struct seq_file *seq;
 	xnholder_t *holder;
-	int err, count, rev;
+	int err, count, rev, cpu;
 	spl_t s;
 
 	if (!nkpod)
@@ -365,6 +370,16 @@ static int stat_seq_open(struct inode *i
 
 	iter->nentries = 0;
 
+	/* update exec-time stats of currently running threads */
+	for_each_online_cpu(cpu) {
+		xnsched_t *sched;
+
+		xnlock_get_irqsave(&nklock, s);
+		sched = xnpod_sched_slot(cpu);
+		xnpod_acc_exec_time(sched->runthread);
+		xnlock_put_irqrestore(&nklock, s);
+	}
+
 	/* Take a snapshot element-wise, restart if something changes
 	   underneath us. */
 
@@ -389,6 +404,7 @@ static int stat_seq_open(struct inode *i
 		iter->stat_info[n].ssw = thread->stat.ssw;
 		iter->stat_info[n].csw = thread->stat.csw;
 		iter->stat_info[n].pf = thread->stat.pf;
+		iter->stat_info[n].exec_time = thread->stat.exec_time;
 
 		holder = nextq(&nkpod->threadq, holder);
 




[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 250 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-06 14:32 ` Philippe Gerum
  2006-07-06 15:09   ` Jan Kiszka
@ 2006-07-06 23:47   ` Gilles Chanteperdrix
  2006-07-07  7:54     ` Philippe Gerum
  1 sibling, 1 reply; 24+ messages in thread
From: Gilles Chanteperdrix @ 2006-07-06 23:47 UTC (permalink / raw)
  To: rpm; +Cc: Jan Kiszka, xenomai-core

Philippe Gerum wrote:
 > > +#ifdef CONFIG_XENO_OPT_STATS
 > > +static inline void xnpod_acc_exec_time(xnsched_t *sched,
 > > +                                       xnthread_t *threadout)
 > > +{
 > > +    xnticks_t now = xntimer_get_rawclock();
 > > +    threadout->stat.exec_time += now - sched->last_csw;
 > > +    sched->last_csw = now;
 > > +}
 > 
 > It would be better to only pass the thread pointer, then use the
 > thread->sched member. This would clearly explain the relationship
 > between both, and prevent any bugous attempt at mixing things.

Beware, when xnpod_schedule is called from within
xnpod_migrate_thread(), the sched pointer of the switched out thread is
the one of the destination cpu. So, passing the sched and threadout
pointers to xnpod_acc_exec_time is safer.


-- 


					    Gilles Chanteperdrix.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-06 23:47   ` Gilles Chanteperdrix
@ 2006-07-07  7:54     ` Philippe Gerum
  2006-07-07 10:22       ` Gilles Chanteperdrix
  0 siblings, 1 reply; 24+ messages in thread
From: Philippe Gerum @ 2006-07-07  7:54 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: Jan Kiszka, xenomai-core

On Fri, 2006-07-07 at 01:47 +0200, Gilles Chanteperdrix wrote:
> Philippe Gerum wrote:
>  > > +#ifdef CONFIG_XENO_OPT_STATS
>  > > +static inline void xnpod_acc_exec_time(xnsched_t *sched,
>  > > +                                       xnthread_t *threadout)
>  > > +{
>  > > +    xnticks_t now = xntimer_get_rawclock();
>  > > +    threadout->stat.exec_time += now - sched->last_csw;
>  > > +    sched->last_csw = now;
>  > > +}
>  > 
>  > It would be better to only pass the thread pointer, then use the
>  > thread->sched member. This would clearly explain the relationship
>  > between both, and prevent any bugous attempt at mixing things.
> 
> Beware, when xnpod_schedule is called from within
> xnpod_migrate_thread(), the sched pointer of the switched out thread is
> the one of the destination cpu. So, passing the sched and threadout
> pointers to xnpod_acc_exec_time is safer.

Nope, the nklock is held by the migrating thread all along until the
switch has actually occured.

> 
> 
-- 
Philippe.




^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-06 16:36         ` Jan Kiszka
  2006-07-06 17:17           ` Jan Kiszka
@ 2006-07-07  8:04           ` Philippe Gerum
  2006-07-07  8:09             ` Jan Kiszka
  1 sibling, 1 reply; 24+ messages in thread
From: Philippe Gerum @ 2006-07-07  8:04 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: xenomai-core

On Thu, 2006-07-06 at 18:36 +0200, Jan Kiszka wrote:
> Jan Kiszka wrote:
> > Philippe Gerum wrote:
> >> On Thu, 2006-07-06 at 17:09 +0200, Jan Kiszka wrote:
> >>>> We could do that from the current loop below, given that the
> >>>> accumulation routine is changed to use thread->sched implicitely.
> >>> The idea is avoid adding even further load to the nklock-protected loop.
> >>> And we only update the current thread, not each and every.
> >>>
> >> Yes, but why? I mean, accumulated time so far remains significant even
> >> for non-running threads.
> > 
> > Update must only happen for the currently running thread on each cpu,
> > otherwise you skew the stats up.
> > 
> > 
> > But looking at the whole code in stat_seq_open() again, I now wonder if
> > that whole routine isn't
> > 
> > a) fragile on task removal and
> > b) still poorly scaling.
> > 
> > The thread name is only copied by reference, a disappearing instance my
> > cause troubles on printing it. And, instead of holding the lock all the
> > time, shouldn't we better
> > 
> > 1. pick some element from the queue and mark it somehow
> >    in-use under lock
> > 2. print or copy the entry
> > 3. reacquire the lock to proceed to the next one - after checking if
> >    that element happened to be removed from the queue meanwhile (in that
> >    case we could abort the output or try to resync)
> 
> Not feasible (threads may not always be prevented from being deleted),
> but what about this:
> 
> - maintain a modification counter for nkpod->threadq
> - in our /proc-loops (sched and latency e.g.):
>   1. take nklock
>   2. get current counter
>   3. compare with previous, restart whole loop if not matching
>   4. copy current element (including the name...)
>   5. forward element pointer
>   6. release nklock
>   7. goto 1 if not end-of-list
> 
> As modifications on threadq should be fairly rare, I don't see a risk of
> looping endlessly here.
> 

The more I think of it, the more I'm convinced that we are trying to
tweak /proc/xenomai/stats for the wrong purpose. Actually, some xeno_top
tool would rather need the equivalent of individual /proc/<pid> files,
thus reducing the contention on access, and the need for weird grepping
on the output. e.g. /proc/xenomai/threads/<pid> could emit per-thread
data in some easily greppable form by a user-space tool.

> Jan
> 
-- 
Philippe.




^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-07  8:04           ` Philippe Gerum
@ 2006-07-07  8:09             ` Jan Kiszka
  2006-07-07  8:49               ` Philippe Gerum
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Kiszka @ 2006-07-07  8:09 UTC (permalink / raw)
  To: rpm; +Cc: xenomai-core

Philippe Gerum wrote:
> On Thu, 2006-07-06 at 18:36 +0200, Jan Kiszka wrote:
>> Jan Kiszka wrote:
>>> Philippe Gerum wrote:
>>>> On Thu, 2006-07-06 at 17:09 +0200, Jan Kiszka wrote:
>>>>>> We could do that from the current loop below, given that the
>>>>>> accumulation routine is changed to use thread->sched implicitely.
>>>>> The idea is avoid adding even further load to the nklock-protected loop.
>>>>> And we only update the current thread, not each and every.
>>>>>
>>>> Yes, but why? I mean, accumulated time so far remains significant even
>>>> for non-running threads.
>>> Update must only happen for the currently running thread on each cpu,
>>> otherwise you skew the stats up.
>>>
>>>
>>> But looking at the whole code in stat_seq_open() again, I now wonder if
>>> that whole routine isn't
>>>
>>> a) fragile on task removal and
>>> b) still poorly scaling.
>>>
>>> The thread name is only copied by reference, a disappearing instance my
>>> cause troubles on printing it. And, instead of holding the lock all the
>>> time, shouldn't we better
>>>
>>> 1. pick some element from the queue and mark it somehow
>>>    in-use under lock
>>> 2. print or copy the entry
>>> 3. reacquire the lock to proceed to the next one - after checking if
>>>    that element happened to be removed from the queue meanwhile (in that
>>>    case we could abort the output or try to resync)
>> Not feasible (threads may not always be prevented from being deleted),
>> but what about this:
>>
>> - maintain a modification counter for nkpod->threadq
>> - in our /proc-loops (sched and latency e.g.):
>>   1. take nklock
>>   2. get current counter
>>   3. compare with previous, restart whole loop if not matching
>>   4. copy current element (including the name...)
>>   5. forward element pointer
>>   6. release nklock
>>   7. goto 1 if not end-of-list
>>
>> As modifications on threadq should be fairly rare, I don't see a risk of
>> looping endlessly here.
>>
> 
> The more I think of it, the more I'm convinced that we are trying to
> tweak /proc/xenomai/stats for the wrong purpose. Actually, some xeno_top
> tool would rather need the equivalent of individual /proc/<pid> files,
> thus reducing the contention on access, and the need for weird grepping
> on the output. e.g. /proc/xenomai/threads/<pid> could emit per-thread
> data in some easily greppable form by a user-space tool.

Far too complex in my eyes for this simple purpose (what's the pid of
kernel-only threads?) - and we need to solve the latency problem of
/sched and /stat anyway.

Jan


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-06 17:17           ` Jan Kiszka
@ 2006-07-07  8:20             ` Jan Kiszka
  0 siblings, 0 replies; 24+ messages in thread
From: Jan Kiszka @ 2006-07-07  8:20 UTC (permalink / raw)
  To: rpm; +Cc: xenomai-core

[-- Attachment #1: Type: text/plain, Size: 5941 bytes --]

Jan Kiszka wrote:

> Here is such a patch + a reworked version of the exec-time stats. Compiles and boots,
> doesn't cause obvious troubles when running in a loop while threads are created and
> destroyed in parallel. Nevertheless, a quick hack which may contain bugs.

As expected, there were bugs. This version of the exec-time patch always uses TSC for
timestamping and the related correct conversion function. And it doesn't try to update
the exec-time across CPUs as the TSCs may not be in sync. Thus, when there is no
scheduling activity on some CPU, we may not see progress in /stat. But this is the least
invasive way, and we typically DO have activity anyway.

Jan


---
 include/nucleus/pod.h    |   28 ++++++++++++++++++++++++++++
 include/nucleus/thread.h |    1 +
 ksrc/nucleus/module.c    |   20 ++++++++++++++++----
 ksrc/nucleus/pod.c       |    5 +++++
 ksrc/nucleus/thread.c    |    1 +
 5 files changed, 51 insertions(+), 4 deletions(-)

Index: include/nucleus/thread.h
===================================================================
--- include/nucleus/thread.h.orig
+++ include/nucleus/thread.h
@@ -152,6 +152,7 @@ typedef struct xnthread {
 	unsigned long csw;	/* Context switches (includes
 				   secondary -> primary switches) */
 	unsigned long pf;	/* Number of page faults */
+	xnticks_t exec_time;	/* Accumulated execution time (ticks) */
     } stat;
 #endif /* CONFIG_XENO_OPT_STATS */
 
Index: include/nucleus/pod.h
===================================================================
--- include/nucleus/pod.h.orig
+++ include/nucleus/pod.h
@@ -145,6 +145,10 @@ typedef struct xnsched {
 
     xnthread_t rootcb;          /*!< Root thread control block. */
 
+#ifdef CONFIG_XENO_OPT_STATS
+    xnticks_t last_csw;         /*!< Last context switch (ticks). */
+#endif /* CONFIG_XENO_OPT_STATS */
+
 } xnsched_t;
 
 #ifdef CONFIG_SMP
@@ -545,6 +549,30 @@ static inline void xnpod_delete_self (vo
     xnpod_delete_thread(xnpod_current_thread());
 }
 
+#ifdef CONFIG_XENO_OPT_STATS
+static inline void xnpod_acc_exec_time(xnthread_t *thread)
+{
+    xnsched_t *sched = thread->sched;
+    xnticks_t now = xnarch_get_cpu_tsc();
+
+    thread->stat.exec_time += now - sched->last_csw;
+    sched->last_csw = now;
+}
+
+static inline void xnpod_update_csw_date(xnsched_t *sched)
+{
+    sched->last_csw = xnarch_get_cpu_tsc();
+}
+#else /* !CONFIG_XENO_OPT_STATS */
+static inline void xnpod_acc_exec_time(xnthread_t *thread)
+{
+}
+
+static inline void xnpod_update_csw_date(xnsched_t *sched)
+{
+}
+#endif /* CONFIG_XENO_OPT_STATS */
+
 #ifdef __cplusplus
 }
 #endif
Index: ksrc/nucleus/thread.c
===================================================================
--- ksrc/nucleus/thread.c.orig
+++ ksrc/nucleus/thread.c
@@ -90,6 +90,7 @@ int xnthread_init(xnthread_t *thread,
 	thread->stat.ssw = 0;
 	thread->stat.csw = 0;
 	thread->stat.pf = 0;
+	thread->stat.exec_time = 0;
 #endif /* CONFIG_XENO_OPT_STATS */
 
 	/* These will be filled by xnpod_start_thread() */
Index: ksrc/nucleus/pod.c
===================================================================
--- ksrc/nucleus/pod.c.orig
+++ ksrc/nucleus/pod.c
@@ -669,6 +669,9 @@ static inline void xnpod_switch_zombie(x
 
 	xnthread_cleanup_tcb(threadout);
 
+	/* no need to update stats of dying thread */
+	xnpod_update_csw_date(sched);
+
 	xnarch_finalize_and_switch(xnthread_archtcb(threadout),
 				   xnthread_archtcb(threadin));
 
@@ -2433,6 +2436,7 @@ void xnpod_schedule(void)
 		xnarch_enter_root(xnthread_archtcb(threadin));
 	}
 
+	xnpod_acc_exec_time(threadout);
 	xnthread_inc_csw(threadin);
 
 	xnarch_switch_to(xnthread_archtcb(threadout),
@@ -2604,6 +2608,7 @@ void xnpod_schedule_runnable(xnthread_t 
 		nkpod->schedhook(runthread, XNREADY);
 #endif /* __XENO_SIM__ */
 
+	xnpod_acc_exec_time(runthread);
 	xnthread_inc_csw(threadin);
 
 	xnarch_switch_to(xnthread_archtcb(runthread),
Index: ksrc/nucleus/module.c
===================================================================
--- ksrc/nucleus/module.c.orig
+++ ksrc/nucleus/module.c
@@ -269,6 +269,7 @@ struct stat_seq_iterator {
 		unsigned long ssw;
 		unsigned long csw;
 		unsigned long pf;
+		xnticks_t exec_time;
 	} stat_info[1];
 };
 
@@ -309,13 +310,17 @@ static void stat_seq_stop(struct seq_fil
 static int stat_seq_show(struct seq_file *seq, void *v)
 {
 	if (v == SEQ_START_TOKEN)
-		seq_printf(seq, "%-3s  %-6s %-10s %-10s %-4s  %-8s  %s\n",
-			   "CPU", "PID", "MSW", "CSW", "PF", "STAT", "NAME");
+		seq_printf(seq, "%-3s  %-6s %-10s %-10s %-4s  %-8s  %12s"
+			   "  %s\n",
+			   "CPU", "PID", "MSW", "CSW", "PF", "STAT", "TIME",
+			   "NAME");
 	else {
 		struct stat_seq_info *p = (struct stat_seq_info *)v;
-		seq_printf(seq, "%3u  %-6d %-10lu %-10lu %-4lu  %.8lx  %s\n",
+		unsigned long long exec_time = xnarch_tsc_to_ns(p->exec_time);
+		seq_printf(seq, "%3u  %-6d %-10lu %-10lu %-4lu  %.8lx  %12llu"
+			   "  %s\n",
 			   p->cpu, p->pid, p->ssw, p->csw, p->pf, p->status,
-			   p->name);
+			   xnarch_ulldiv(exec_time, 1000, NULL), p->name);
 	}
 
 	return 0;
@@ -365,6 +370,12 @@ static int stat_seq_open(struct inode *i
 
 	iter->nentries = 0;
 
+	/* Update exec-time stats at least on current CPU, unsync'ed TSCs
+	   prevent cross-CPU update yet. */
+	xnlock_get_irqsave(&nklock, s);
+	xnpod_acc_exec_time(xnpod_current_thread());
+	xnlock_put_irqrestore(&nklock, s);
+
 	/* Take a snapshot element-wise, restart if something changes
 	   underneath us. */
 
@@ -389,6 +400,7 @@ static int stat_seq_open(struct inode *i
 		iter->stat_info[n].ssw = thread->stat.ssw;
 		iter->stat_info[n].csw = thread->stat.csw;
 		iter->stat_info[n].pf = thread->stat.pf;
+		iter->stat_info[n].exec_time = thread->stat.exec_time;
 
 		holder = nextq(&nkpod->threadq, holder);
 




[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 249 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-07  8:09             ` Jan Kiszka
@ 2006-07-07  8:49               ` Philippe Gerum
  2006-07-07  9:03                 ` Jan Kiszka
  0 siblings, 1 reply; 24+ messages in thread
From: Philippe Gerum @ 2006-07-07  8:49 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: xenomai-core

On Fri, 2006-07-07 at 10:09 +0200, Jan Kiszka wrote:
> Philippe Gerum wrote:
> > On Thu, 2006-07-06 at 18:36 +0200, Jan Kiszka wrote:
> >> Jan Kiszka wrote:
> >>> Philippe Gerum wrote:
> >>>> On Thu, 2006-07-06 at 17:09 +0200, Jan Kiszka wrote:
> >>>>>> We could do that from the current loop below, given that the
> >>>>>> accumulation routine is changed to use thread->sched implicitely.
> >>>>> The idea is avoid adding even further load to the nklock-protected loop.
> >>>>> And we only update the current thread, not each and every.
> >>>>>
> >>>> Yes, but why? I mean, accumulated time so far remains significant even
> >>>> for non-running threads.
> >>> Update must only happen for the currently running thread on each cpu,
> >>> otherwise you skew the stats up.
> >>>
> >>>
> >>> But looking at the whole code in stat_seq_open() again, I now wonder if
> >>> that whole routine isn't
> >>>
> >>> a) fragile on task removal and
> >>> b) still poorly scaling.
> >>>
> >>> The thread name is only copied by reference, a disappearing instance my
> >>> cause troubles on printing it. And, instead of holding the lock all the
> >>> time, shouldn't we better
> >>>
> >>> 1. pick some element from the queue and mark it somehow
> >>>    in-use under lock
> >>> 2. print or copy the entry
> >>> 3. reacquire the lock to proceed to the next one - after checking if
> >>>    that element happened to be removed from the queue meanwhile (in that
> >>>    case we could abort the output or try to resync)
> >> Not feasible (threads may not always be prevented from being deleted),
> >> but what about this:
> >>
> >> - maintain a modification counter for nkpod->threadq
> >> - in our /proc-loops (sched and latency e.g.):
> >>   1. take nklock
> >>   2. get current counter
> >>   3. compare with previous, restart whole loop if not matching
> >>   4. copy current element (including the name...)
> >>   5. forward element pointer
> >>   6. release nklock
> >>   7. goto 1 if not end-of-list
> >>
> >> As modifications on threadq should be fairly rare, I don't see a risk of
> >> looping endlessly here.
> >>
> > 
> > The more I think of it, the more I'm convinced that we are trying to
> > tweak /proc/xenomai/stats for the wrong purpose. Actually, some xeno_top
> > tool would rather need the equivalent of individual /proc/<pid> files,
> > thus reducing the contention on access, and the need for weird grepping
> > on the output. e.g. /proc/xenomai/threads/<pid> could emit per-thread
> > data in some easily greppable form by a user-space tool.
> 
> Far too complex in my eyes for this simple purpose (what's the pid of
> kernel-only threads?) -

It's symbolic name. It's not a matter of simplicity, your patch for that
purpose is rather complex already.

>  and we need to solve the latency problem of
> /sched and /stat anyway.

That's separate issues. Solving the scalability issue
of /proc/xenomai/stats and friends does not improve their usability in
the context of user-space tools monitoring thread activity. E.g. how are
we going to extend the reporting about such activity if need be, adding
yet another column to /stats and /sched?

> 
> Jan
-- 
Philippe.




^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-07  8:49               ` Philippe Gerum
@ 2006-07-07  9:03                 ` Jan Kiszka
  2006-07-07 11:28                   ` Philippe Gerum
  0 siblings, 1 reply; 24+ messages in thread
From: Jan Kiszka @ 2006-07-07  9:03 UTC (permalink / raw)
  To: rpm; +Cc: xenomai-core

[-- Attachment #1: Type: text/plain, Size: 3722 bytes --]

Philippe Gerum wrote:
> On Fri, 2006-07-07 at 10:09 +0200, Jan Kiszka wrote:
>> Philippe Gerum wrote:
>>> On Thu, 2006-07-06 at 18:36 +0200, Jan Kiszka wrote:
>>>> Jan Kiszka wrote:
>>>>> Philippe Gerum wrote:
>>>>>> On Thu, 2006-07-06 at 17:09 +0200, Jan Kiszka wrote:
>>>>>>>> We could do that from the current loop below, given that the
>>>>>>>> accumulation routine is changed to use thread->sched implicitely.
>>>>>>> The idea is avoid adding even further load to the nklock-protected loop.
>>>>>>> And we only update the current thread, not each and every.
>>>>>>>
>>>>>> Yes, but why? I mean, accumulated time so far remains significant even
>>>>>> for non-running threads.
>>>>> Update must only happen for the currently running thread on each cpu,
>>>>> otherwise you skew the stats up.
>>>>>
>>>>>
>>>>> But looking at the whole code in stat_seq_open() again, I now wonder if
>>>>> that whole routine isn't
>>>>>
>>>>> a) fragile on task removal and
>>>>> b) still poorly scaling.
>>>>>
>>>>> The thread name is only copied by reference, a disappearing instance my
>>>>> cause troubles on printing it. And, instead of holding the lock all the
>>>>> time, shouldn't we better
>>>>>
>>>>> 1. pick some element from the queue and mark it somehow
>>>>>    in-use under lock
>>>>> 2. print or copy the entry
>>>>> 3. reacquire the lock to proceed to the next one - after checking if
>>>>>    that element happened to be removed from the queue meanwhile (in that
>>>>>    case we could abort the output or try to resync)
>>>> Not feasible (threads may not always be prevented from being deleted),
>>>> but what about this:
>>>>
>>>> - maintain a modification counter for nkpod->threadq
>>>> - in our /proc-loops (sched and latency e.g.):
>>>>   1. take nklock
>>>>   2. get current counter
>>>>   3. compare with previous, restart whole loop if not matching
>>>>   4. copy current element (including the name...)
>>>>   5. forward element pointer
>>>>   6. release nklock
>>>>   7. goto 1 if not end-of-list
>>>>
>>>> As modifications on threadq should be fairly rare, I don't see a risk of
>>>> looping endlessly here.
>>>>
>>> The more I think of it, the more I'm convinced that we are trying to
>>> tweak /proc/xenomai/stats for the wrong purpose. Actually, some xeno_top
>>> tool would rather need the equivalent of individual /proc/<pid> files,
>>> thus reducing the contention on access, and the need for weird grepping
>>> on the output. e.g. /proc/xenomai/threads/<pid> could emit per-thread
>>> data in some easily greppable form by a user-space tool.
>> Far too complex in my eyes for this simple purpose (what's the pid of
>> kernel-only threads?) -
> 
> It's symbolic name. It's not a matter of simplicity, your patch for that
> purpose is rather complex already.

Haven't measured, but the amount of code added for collecting the data
and printing just another column should be marginal as yet. Introducing
a new infrastructure for more /proc subdirs with probably multiple
entries would certainly cost more.

> 
>>  and we need to solve the latency problem of
>> /sched and /stat anyway.
> 
> That's separate issues. Solving the scalability issue
> of /proc/xenomai/stats and friends does not improve their usability in
> the context of user-space tools monitoring thread activity. E.g. how are
> we going to extend the reporting about such activity if need be, adding
> yet another column to /stats and /sched?

As long as there are no stand-alone tools relying on the layout, just
only our own ones - fairly simple. But before speculating more, I will
have a look now how simple such tool/script may actually be.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 250 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-07  7:54     ` Philippe Gerum
@ 2006-07-07 10:22       ` Gilles Chanteperdrix
  2006-07-07 10:30         ` Jan Kiszka
  2006-07-07 13:23         ` Philippe Gerum
  0 siblings, 2 replies; 24+ messages in thread
From: Gilles Chanteperdrix @ 2006-07-07 10:22 UTC (permalink / raw)
  To: rpm; +Cc: Jan Kiszka, xenomai-core

Philippe Gerum wrote:
 > On Fri, 2006-07-07 at 01:47 +0200, Gilles Chanteperdrix wrote:
 > > Philippe Gerum wrote:
 > >  > > +#ifdef CONFIG_XENO_OPT_STATS
 > >  > > +static inline void xnpod_acc_exec_time(xnsched_t *sched,
 > >  > > +                                       xnthread_t *threadout)
 > >  > > +{
 > >  > > +    xnticks_t now = xntimer_get_rawclock();
 > >  > > +    threadout->stat.exec_time += now - sched->last_csw;
 > >  > > +    sched->last_csw = now;
 > >  > > +}
 > >  > 
 > >  > It would be better to only pass the thread pointer, then use the
 > >  > thread->sched member. This would clearly explain the relationship
 > >  > between both, and prevent any bugous attempt at mixing things.
 > > 
 > > Beware, when xnpod_schedule is called from within
 > > xnpod_migrate_thread(), the sched pointer of the switched out thread is
 > > the one of the destination cpu. So, passing the sched and threadout
 > > pointers to xnpod_acc_exec_time is safer.
 > 
 > Nope, the nklock is held by the migrating thread all along until the
 > switch has actually occured.

xnpod_acc_exec_time() is called from within xnpod_schedule() that is
called from within xnpod_migrate_thread() at a time where
threadout->sched is the wrong pointer.

-- 


					    Gilles Chanteperdrix.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-07 10:22       ` Gilles Chanteperdrix
@ 2006-07-07 10:30         ` Jan Kiszka
  2006-07-07 13:23         ` Philippe Gerum
  1 sibling, 0 replies; 24+ messages in thread
From: Jan Kiszka @ 2006-07-07 10:30 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-core

[-- Attachment #1: Type: text/plain, Size: 1401 bytes --]

Gilles Chanteperdrix wrote:
> Philippe Gerum wrote:
>  > On Fri, 2006-07-07 at 01:47 +0200, Gilles Chanteperdrix wrote:
>  > > Philippe Gerum wrote:
>  > >  > > +#ifdef CONFIG_XENO_OPT_STATS
>  > >  > > +static inline void xnpod_acc_exec_time(xnsched_t *sched,
>  > >  > > +                                       xnthread_t *threadout)
>  > >  > > +{
>  > >  > > +    xnticks_t now = xntimer_get_rawclock();
>  > >  > > +    threadout->stat.exec_time += now - sched->last_csw;
>  > >  > > +    sched->last_csw = now;
>  > >  > > +}
>  > >  > 
>  > >  > It would be better to only pass the thread pointer, then use the
>  > >  > thread->sched member. This would clearly explain the relationship
>  > >  > between both, and prevent any bugous attempt at mixing things.
>  > > 
>  > > Beware, when xnpod_schedule is called from within
>  > > xnpod_migrate_thread(), the sched pointer of the switched out thread is
>  > > the one of the destination cpu. So, passing the sched and threadout
>  > > pointers to xnpod_acc_exec_time is safer.
>  > 
>  > Nope, the nklock is held by the migrating thread all along until the
>  > switch has actually occured.
> 
> xnpod_acc_exec_time() is called from within xnpod_schedule() that is
> called from within xnpod_migrate_thread() at a time where
> threadout->sched is the wrong pointer.
> 

True, will revert this.

Thanks,
Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 250 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-07  9:03                 ` Jan Kiszka
@ 2006-07-07 11:28                   ` Philippe Gerum
  0 siblings, 0 replies; 24+ messages in thread
From: Philippe Gerum @ 2006-07-07 11:28 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: xenomai-core

On Fri, 2006-07-07 at 11:03 +0200, Jan Kiszka wrote:
> >> Far too complex in my eyes for this simple purpose (what's the pid of
> >> kernel-only threads?) -
> > 
> > It's symbolic name. It's not a matter of simplicity, your patch for that
> > purpose is rather complex already.
> 
> Haven't measured, but the amount of code added for collecting the data
> and printing just another column should be marginal as yet. Introducing
> a new infrastructure for more /proc subdirs with probably multiple
> entries would certainly cost more.
> 

This has no cost on the real-time side of things. In contrast, scanning
the threadq within /proc handlers like /stats and /sched are doing now,
does have some noticeable cost. By having separate per-thread entries,
we provide the necessary infrastructure for exporting more raw
statistical data to user-space tools, in a format which would be
script-friendly.

> > 
> >>  and we need to solve the latency problem of
> >> /sched and /stat anyway.
> > 
> > That's separate issues. Solving the scalability issue
> > of /proc/xenomai/stats and friends does not improve their usability in
> > the context of user-space tools monitoring thread activity. E.g. how are
> > we going to extend the reporting about such activity if need be, adding
> > yet another column to /stats and /sched?
> 
> As long as there are no stand-alone tools relying on the layout, just
> only our own ones - fairly simple. But before speculating more, I will
> have a look now how simple such tool/script may actually be.
> 
> Jan
> 
-- 
Philippe.




^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-07 10:22       ` Gilles Chanteperdrix
  2006-07-07 10:30         ` Jan Kiszka
@ 2006-07-07 13:23         ` Philippe Gerum
  2006-07-07 13:59           ` Gilles Chanteperdrix
  1 sibling, 1 reply; 24+ messages in thread
From: Philippe Gerum @ 2006-07-07 13:23 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: Jan Kiszka, xenomai-core

On Fri, 2006-07-07 at 12:22 +0200, Gilles Chanteperdrix wrote:
> Philippe Gerum wrote:
>  > On Fri, 2006-07-07 at 01:47 +0200, Gilles Chanteperdrix wrote:
>  > > Philippe Gerum wrote:
>  > >  > > +#ifdef CONFIG_XENO_OPT_STATS
>  > >  > > +static inline void xnpod_acc_exec_time(xnsched_t *sched,
>  > >  > > +                                       xnthread_t *threadout)
>  > >  > > +{
>  > >  > > +    xnticks_t now = xntimer_get_rawclock();
>  > >  > > +    threadout->stat.exec_time += now - sched->last_csw;
>  > >  > > +    sched->last_csw = now;
>  > >  > > +}
>  > >  > 
>  > >  > It would be better to only pass the thread pointer, then use the
>  > >  > thread->sched member. This would clearly explain the relationship
>  > >  > between both, and prevent any bugous attempt at mixing things.
>  > > 
>  > > Beware, when xnpod_schedule is called from within
>  > > xnpod_migrate_thread(), the sched pointer of the switched out thread is
>  > > the one of the destination cpu. So, passing the sched and threadout
>  > > pointers to xnpod_acc_exec_time is safer.
>  > 
>  > Nope, the nklock is held by the migrating thread all along until the
>  > switch has actually occured.
> 
> xnpod_acc_exec_time() is called from within xnpod_schedule() that is
> called from within xnpod_migrate_thread() at a time where
> threadout->sched is the wrong pointer.

Gasp. Unfortunately, we are trapped by an exception case, and I don't
see any better approach.

> 
-- 
Philippe.




^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-07 13:23         ` Philippe Gerum
@ 2006-07-07 13:59           ` Gilles Chanteperdrix
  2006-07-07 14:25             ` Philippe Gerum
  0 siblings, 1 reply; 24+ messages in thread
From: Gilles Chanteperdrix @ 2006-07-07 13:59 UTC (permalink / raw)
  To: rpm; +Cc: xenomai

Philippe Gerum wrote:
 > > xnpod_acc_exec_time() is called from within xnpod_schedule() that is
 > > called from within xnpod_migrate_thread() at a time where
 > > threadout->sched is the wrong pointer.
 > 
 > Gasp. Unfortunately, we are trapped by an exception case, and I don't
 > see any better approach.

We could reimplement an xnpod_migrate_thread() which suspend the calling
thread and wake up a server thread on the destination CPU, this server
thread migrating the suspended thread by changing its sched pointer
outside of xnpod_schedule().

Or we can get rid of xnpod_migrate_thread(), it is currently not used by
any skin.

-- 


					    Gilles Chanteperdrix.


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Xenomai-core] [RFC, PATCH] per-thread exec-time stats
  2006-07-07 13:59           ` Gilles Chanteperdrix
@ 2006-07-07 14:25             ` Philippe Gerum
  0 siblings, 0 replies; 24+ messages in thread
From: Philippe Gerum @ 2006-07-07 14:25 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai

On Fri, 2006-07-07 at 15:59 +0200, Gilles Chanteperdrix wrote:
> Philippe Gerum wrote:
>  > > xnpod_acc_exec_time() is called from within xnpod_schedule() that is
>  > > called from within xnpod_migrate_thread() at a time where
>  > > threadout->sched is the wrong pointer.
>  > 
>  > Gasp. Unfortunately, we are trapped by an exception case, and I don't
>  > see any better approach.
> 
> We could reimplement an xnpod_migrate_thread() which suspend the calling
> thread and wake up a server thread on the destination CPU, this server
> thread migrating the suspended thread by changing its sched pointer
> outside of xnpod_schedule().
> 

No, really. A small hack to get per-thread accounting should not trigger
a storm of fundamental changes like this.

> Or we can get rid of xnpod_migrate_thread(), it is currently not used by
> any skin.
> 

It's a fundamental feature for placing SMP jobs, and kernel-based
Xenomai threads could not rely on sched_setscheduler() to do it. Let's
keep this service, and simply pass the sched pointer to the accumulation
routine; I was wrong initially suggesting the opposite. IOW, let's avoid
smashing a squadron of flies with nukes...

-- 
Philippe.




^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2006-07-07 14:25 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-07-06 11:37 [Xenomai-core] [RFC, PATCH] per-thread exec-time stats Jan Kiszka
2006-07-06 11:41 ` Jan Kiszka
2006-07-06 14:32 ` Philippe Gerum
2006-07-06 15:09   ` Jan Kiszka
2006-07-06 15:37     ` Philippe Gerum
2006-07-06 15:46       ` Jan Kiszka
2006-07-06 16:36         ` Jan Kiszka
2006-07-06 17:17           ` Jan Kiszka
2006-07-07  8:20             ` Jan Kiszka
2006-07-07  8:04           ` Philippe Gerum
2006-07-07  8:09             ` Jan Kiszka
2006-07-07  8:49               ` Philippe Gerum
2006-07-07  9:03                 ` Jan Kiszka
2006-07-07 11:28                   ` Philippe Gerum
2006-07-06 16:37         ` Philippe Gerum
2006-07-06 23:47   ` Gilles Chanteperdrix
2006-07-07  7:54     ` Philippe Gerum
2006-07-07 10:22       ` Gilles Chanteperdrix
2006-07-07 10:30         ` Jan Kiszka
2006-07-07 13:23         ` Philippe Gerum
2006-07-07 13:59           ` Gilles Chanteperdrix
2006-07-07 14:25             ` Philippe Gerum
2006-07-06 15:04 ` Philippe Gerum
2006-07-06 15:06   ` Philippe Gerum

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.