All of lore.kernel.org
 help / color / mirror / Atom feed
* [Xenomai-help] pthread cancelation and scheduling magics
@ 2008-11-30 21:34 Wolfgang Grandegger
  2008-11-30 21:46 ` Gilles Chanteperdrix
  2008-11-30 21:59 ` Gilles Chanteperdrix
  0 siblings, 2 replies; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-11-30 21:34 UTC (permalink / raw)
  To: xenomai-help

[-- Attachment #1: Type: text/plain, Size: 523 bytes --]

Hello,

I have written the attached test program to cancel Xenomai POSIX
threads. The "calc_task" does some busy work, which the higher priority
task "ctrl_task" interrupts and aborts after some time. The program does
not behave like I expect and it also behaves differently on my PowerPC
and ARM test system. The "calc_task" continues after calling
pthread_cancel() in "ctrl_task". On ARM, the behaviour is even more
wired. Is there anything wrong in my test program or anything else I
should care of?

Thanks.

Wolfgang.

[-- Attachment #2: cancel-test.c --]
[-- Type: text/x-csrc, Size: 4762 bytes --]

/*
 * Test program for thread cancelation
 */

#include <execinfo.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <time.h>
#include <sys/mman.h>

#ifndef __XENO__
#define rt_printf printf
#else
#include <rtdk.h>
#endif

#define USE_SIGXCPU
//#define USE_EXPLICIT_SCHED
#define CTRL_PRIO 39
#define CALC_PRIO 38

static pthread_t calc_thread;
static pthread_t ctrl_thread;

volatile unsigned long count = 0;
static int load_ms = 2500;

void check_err(char *string, int err)
{
	if (err) {
		printf("Failed with %d at %s", err, string);
		exit(1);
	}
}

static void create_load_100ms(void)
{
	struct timespec now, stop;

	clock_gettime(CLOCK_MONOTONIC, &stop);
	stop.tv_nsec += 100000000;
	if (stop.tv_nsec >= 1000000000) {
		stop.tv_nsec -= 1000000000;
		stop.tv_sec++;
	}

	while (1) {
		clock_gettime(CLOCK_MONOTONIC, &now);
		if (now.tv_sec > stop.tv_sec)
			break;
		else if (now.tv_sec == stop.tv_sec &&
			 now.tv_nsec >= stop.tv_nsec)
			break;
	}
}

void *ctrl_func(void *parm)
{
	struct timespec ts;
#ifndef USE_EXPLICIT_SCHED
	struct sched_param param;

	memset(&param, 0 , sizeof(param));
	param.sched_priority = CTRL_PRIO;
	pthread_setschedparam(pthread_self(), SCHED_FIFO, &param);
#endif
#ifdef __XENO__
	pthread_set_name_np(pthread_self(), __func__);
#ifdef USE_SIGXCPU
	pthread_set_mode_np(0, PTHREAD_WARNSW);
#endif
#endif
	rt_printf("%s: started at count %d\n", __func__, count);

	ts.tv_sec = load_ms / 1000;
	ts.tv_nsec = (load_ms % 1000) * 1000000;
	rt_printf("%s: sleeping for %dsec %dns\n", __func__,
		  ts.tv_sec, ts.tv_nsec);
	nanosleep(&ts, NULL);
	rt_printf("%s: cancel at count %ld\n", __func__, count);
	pthread_cancel(calc_thread);
	rt_printf("%s: stopped at count %ld\n", __func__, count);

	return NULL;
}
void *calc_func(void *parm)
{
	int rc , count_max;
#ifndef USE_EXPLICIT_SCHED
	struct sched_param param;
#endif

#ifndef USE_EXPLICIT_SCHED
	memset(&param, 0 , sizeof(param));
	param.sched_priority = CALC_PRIO;
	pthread_setschedparam(pthread_self(), SCHED_FIFO, &param);
#endif
#ifdef __XENO__
	pthread_set_name_np(pthread_self(), __func__);
#ifdef USE_SIGXCPU
	pthread_set_mode_np(0, PTHREAD_WARNSW);
#endif
#endif
	rc = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
	check_err("pthread_setcanceltype()\n", rc);
	rc = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
	check_err("pthread_setcanceltype()\n", rc);

	count_max = 2 * load_ms / 100;
	rt_printf("%s: counting till %d\n", __func__, count_max);
	while (count < count_max) {
		create_load_100ms();
		rt_printf("%s: at count %ld\n", __func__, count);
		count++;
	}

	rt_printf("%s: stopped at count %ld\n", __func__, count);

	return NULL;
}

#ifdef USE_SIGXCPU
void mode_switch_handler(int sig)
{
        void *bt[32];
        int nentries;

        /* Dump a backtrace of the frame which caused the switch to
           secondary mode: */
        nentries = backtrace(bt,sizeof(bt) / sizeof(bt[0]));
        backtrace_symbols_fd(bt, nentries, fileno(stdout));
}
#endif

int main(int argc, char **argv)
{
	int rc = 0;
	void *status;
#ifdef USE_EXPLICIT_SCHED
	pthread_attr_t calc_attr, ctrl_attr;
	struct sched_param calc_param, ctrl_param;
#endif

	if (argc == 2)
		load_ms = atoi(argv[1]);

	/* Lock process memory */
	mlockall(MCL_CURRENT | MCL_FUTURE);

#ifdef USE_SIGXCPU
	signal(SIGXCPU, mode_switch_handler);
#endif
#ifdef __XENO__
	/* We use rt_printf() for debugging real-time code */
	rt_print_auto_init(1);
	rt_printf("Real-Time debugging started\n");
#endif

#ifdef USE_EXPLICIT_SCHED
	memset(&ctrl_param, 0 , sizeof(ctrl_param));
	pthread_attr_init(&ctrl_attr);
	pthread_attr_setschedpolicy(&ctrl_attr, SCHED_FIFO);
 	ctrl_param.sched_priority = CTRL_PRIO;
	pthread_attr_setschedparam(&ctrl_attr, &ctrl_param);
	pthread_attr_setinheritsched(&ctrl_attr, PTHREAD_EXPLICIT_SCHED);
	rc = pthread_create(&ctrl_thread, &ctrl_attr, ctrl_func, NULL);
#else
	rc = pthread_create(&ctrl_thread, NULL, ctrl_func, NULL);
#endif
	check_err("pthread_create()\n", rc);

#ifdef USE_EXPLICIT_SCHED
	memset(&calc_param, 0 , sizeof(calc_param));
	pthread_attr_init(&calc_attr);
	pthread_attr_setschedpolicy(&calc_attr, SCHED_FIFO);
	calc_param.sched_priority = CALC_PRIO;
	pthread_attr_setschedparam(&calc_attr, &calc_param);
	pthread_attr_setinheritsched(&calc_attr, PTHREAD_EXPLICIT_SCHED);
	rc = pthread_create(&calc_thread, &calc_attr, calc_func, NULL);
#else
	rc = pthread_create(&calc_thread, NULL, calc_func, NULL);
#endif
	check_err("pthread_create()\n", rc);

	rc = pthread_join(calc_thread, &status);
	check_err("pthread_join()\n", rc);

	if (status != PTHREAD_CANCELED)
		printf("Unexpected thread status\n");

	printf("main terminating in 3 seconds...\n");
	sleep(3);
	return 0;
}

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-11-30 21:34 [Xenomai-help] pthread cancelation and scheduling magics Wolfgang Grandegger
@ 2008-11-30 21:46 ` Gilles Chanteperdrix
  2008-11-30 21:59 ` Gilles Chanteperdrix
  1 sibling, 0 replies; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-11-30 21:46 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Hello,
> 
> I have written the attached test program to cancel Xenomai POSIX
> threads. The "calc_task" does some busy work, which the higher priority
> task "ctrl_task" interrupts and aborts after some time. The program does
> not behave like I expect and it also behaves differently on my PowerPC
> and ARM test system. The "calc_task" continues after calling
> pthread_cancel() in "ctrl_task". On ARM, the behaviour is even more
> wired. Is there anything wrong in my test program or anything else I
> should care of?

Hi Wolfgang,

before I delve into your program, did you read:
http://www.xenomai.org/documentation/trunk/html/api/group__posix__cancel.html
and
http://www.opengroup.org/onlinepubs/000095399/functions/xsh_chap02_09.html#tag_02_09_05
?


-- 
					    Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-11-30 21:34 [Xenomai-help] pthread cancelation and scheduling magics Wolfgang Grandegger
  2008-11-30 21:46 ` Gilles Chanteperdrix
@ 2008-11-30 21:59 ` Gilles Chanteperdrix
  2008-12-01 10:22   ` Gilles Chanteperdrix
  2008-12-01 13:31   ` Wolfgang Grandegger
  1 sibling, 2 replies; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-11-30 21:59 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Hello,
> 
> I have written the attached test program to cancel Xenomai POSIX
> threads. The "calc_task" does some busy work, which the higher priority
> task "ctrl_task" interrupts and aborts after some time. The program does
> not behave like I expect and it also behaves differently on my PowerPC
> and ARM test system. The "calc_task" continues after calling
> pthread_cancel() in "ctrl_task". On ARM, the behaviour is even more
> wired. Is there anything wrong in my test program or anything else I
> should care of?

First, you should know that PTHREAD_CANCEL_ASYNCHRONOUS is evil, it will
almost inevitably leave things in an unknown state when canceling a
thread, you would better use PTHREAD_CANCEL_DEFERRED and ensure that
your thread has some cancellation point, if it has not, use
pthread_testcancel.

Second, your program will only work if root thread priority coupling is
enabled. Is it enabled in your case?

-- 
					    Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-11-30 21:59 ` Gilles Chanteperdrix
@ 2008-12-01 10:22   ` Gilles Chanteperdrix
  2008-12-01 14:16     ` Wolfgang Grandegger
  2008-12-01 13:31   ` Wolfgang Grandegger
  1 sibling, 1 reply; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-01 10:22 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Hello,
>>
>> I have written the attached test program to cancel Xenomai POSIX
>> threads. The "calc_task" does some busy work, which the higher priority
>> task "ctrl_task" interrupts and aborts after some time. The program does
>> not behave like I expect and it also behaves differently on my PowerPC
>> and ARM test system. The "calc_task" continues after calling
>> pthread_cancel() in "ctrl_task". On ARM, the behaviour is even more
>> wired. Is there anything wrong in my test program or anything else I
>> should care of?
> 
> First, you should know that PTHREAD_CANCEL_ASYNCHRONOUS is evil, it will
> almost inevitably leave things in an unknown state when canceling a
> thread, you would better use PTHREAD_CANCEL_DEFERRED and ensure that
> your thread has some cancellation point, if it has not, use
> pthread_testcancel.
> 
> Second, your program will only work if root thread priority coupling is
> enabled. Is it enabled in your case?

Actually, even with priority coupling I am not sure it can not work. The
problem is the way do_sigwake_event signal threads: it make them switch
to secondary mode only if they pass through a syscall. Since the
"calc_task" thread does not issue any syscall, it will never check see
that it should relax to handle the cancelation signal.

Now, the question is, do you realistically plan to write an application
which makes no syscall in its real-time loop?

-- 
                                                 Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-11-30 21:59 ` Gilles Chanteperdrix
  2008-12-01 10:22   ` Gilles Chanteperdrix
@ 2008-12-01 13:31   ` Wolfgang Grandegger
  1 sibling, 0 replies; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-01 13:31 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Hello,
>>
>> I have written the attached test program to cancel Xenomai POSIX
>> threads. The "calc_task" does some busy work, which the higher priority
>> task "ctrl_task" interrupts and aborts after some time. The program does
>> not behave like I expect and it also behaves differently on my PowerPC
>> and ARM test system. The "calc_task" continues after calling
>> pthread_cancel() in "ctrl_task". On ARM, the behaviour is even more
>> wired. Is there anything wrong in my test program or anything else I
>> should care of?
> 
> First, you should know that PTHREAD_CANCEL_ASYNCHRONOUS is evil, it will
> almost inevitably leave things in an unknown state when canceling a
> thread, you would better use PTHREAD_CANCEL_DEFERRED and ensure that
> your thread has some cancellation point, if it has not, use
> pthread_testcancel.

Actually I want to do a defined exit in case on a fatal error condition.
Normally I just would printf an error message and call exit to abort the
whole application. Things are more tricky with Xenomai. For error
logging and debugging, I'm using rt_printk() (RTDK) and I want to see
all messages till the fatal error occurred, which requires stopping all
activity/tasks of the scheduler. My idea was to cancel all non-active
tasks, wait for a few hundredth milliseconds before calling finally
exit. Maybe there is a simpler method to achieve such a defined abort.

> Second, your program will only work if root thread priority coupling is
> enabled. Is it enabled in your case?

Where can I read more about this thread priority coupling? I see
different ROOT priorities when using USE_EXPLICIT_SCHED.

Wolfgang,



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-01 14:16     ` Wolfgang Grandegger
@ 2008-12-01 14:15       ` Gilles Chanteperdrix
  2008-12-01 15:10         ` Gilles Chanteperdrix
  2008-12-03  8:04         ` Wolfgang Grandegger
  0 siblings, 2 replies; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-01 14:15 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Gilles Chanteperdrix wrote:
>> Gilles Chanteperdrix wrote:
>>> Wolfgang Grandegger wrote:
>>>> Hello,
>>>>
>>>> I have written the attached test program to cancel Xenomai POSIX
>>>> threads. The "calc_task" does some busy work, which the higher priority
>>>> task "ctrl_task" interrupts and aborts after some time. The program does
>>>> not behave like I expect and it also behaves differently on my PowerPC
>>>> and ARM test system. The "calc_task" continues after calling
>>>> pthread_cancel() in "ctrl_task". On ARM, the behaviour is even more
>>>> wired. Is there anything wrong in my test program or anything else I
>>>> should care of?
>>> First, you should know that PTHREAD_CANCEL_ASYNCHRONOUS is evil, it will
>>> almost inevitably leave things in an unknown state when canceling a
>>> thread, you would better use PTHREAD_CANCEL_DEFERRED and ensure that
>>> your thread has some cancellation point, if it has not, use
>>> pthread_testcancel.
>>>
>>> Second, your program will only work if root thread priority coupling is
>>> enabled. Is it enabled in your case?
>> Actually, even with priority coupling I am not sure it can not work. The
>> problem is the way do_sigwake_event signal threads: it make them switch
>> to secondary mode only if they pass through a syscall. Since the
>> "calc_task" thread does not issue any syscall, it will never check see
>> that it should relax to handle the cancelation signal.
> 
> calc_task() calls clock_gettime() permanently, which does a syscall as
> long as vDSO is not used, which might be the case on PowerPC. This would
> explain why cancelation of "calc_task" works on ARM.

For architectures with a high-resolution counter,
clock_gettime(CLOCK_MONOTONIC) is a xenomai service which uses the
counter and does not issue any syscall, this includes arm unless you
configured xenomai with --enable-arm-arch=generic or with
--disable-arm-tsc, and this probably includes powerpc.

> 
>> Now, the question is, do you realistically plan to write an application
>> which makes no syscall in its real-time loop?
> 
> Unlikely, but it may happen in case of programming errors. Anyhow, the
> pthreads will run legacy code and it would be a pain to add
> pthread_testcancel where necessary. But maybe there is a more elegant
> and simple solution to do a defined exit/abort.

In case of programming error, enable the xenomai watchdog, it will
forcibly kill the problematic thread.

-- 
                                                 Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-01 10:22   ` Gilles Chanteperdrix
@ 2008-12-01 14:16     ` Wolfgang Grandegger
  2008-12-01 14:15       ` Gilles Chanteperdrix
  0 siblings, 1 reply; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-01 14:16 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Gilles Chanteperdrix wrote:
>> Wolfgang Grandegger wrote:
>>> Hello,
>>>
>>> I have written the attached test program to cancel Xenomai POSIX
>>> threads. The "calc_task" does some busy work, which the higher priority
>>> task "ctrl_task" interrupts and aborts after some time. The program does
>>> not behave like I expect and it also behaves differently on my PowerPC
>>> and ARM test system. The "calc_task" continues after calling
>>> pthread_cancel() in "ctrl_task". On ARM, the behaviour is even more
>>> wired. Is there anything wrong in my test program or anything else I
>>> should care of?
>> First, you should know that PTHREAD_CANCEL_ASYNCHRONOUS is evil, it will
>> almost inevitably leave things in an unknown state when canceling a
>> thread, you would better use PTHREAD_CANCEL_DEFERRED and ensure that
>> your thread has some cancellation point, if it has not, use
>> pthread_testcancel.
>>
>> Second, your program will only work if root thread priority coupling is
>> enabled. Is it enabled in your case?
> 
> Actually, even with priority coupling I am not sure it can not work. The
> problem is the way do_sigwake_event signal threads: it make them switch
> to secondary mode only if they pass through a syscall. Since the
> "calc_task" thread does not issue any syscall, it will never check see
> that it should relax to handle the cancelation signal.

calc_task() calls clock_gettime() permanently, which does a syscall as
long as vDSO is not used, which might be the case on PowerPC. This would
explain why cancelation of "calc_task" works on ARM.

> Now, the question is, do you realistically plan to write an application
> which makes no syscall in its real-time loop?

Unlikely, but it may happen in case of programming errors. Anyhow, the
pthreads will run legacy code and it would be a pain to add
pthread_testcancel where necessary. But maybe there is a more elegant
and simple solution to do a defined exit/abort.

Wolfgang.



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-01 14:15       ` Gilles Chanteperdrix
@ 2008-12-01 15:10         ` Gilles Chanteperdrix
  2008-12-02 15:59           ` Wolfgang Grandegger
  2008-12-03  8:04         ` Wolfgang Grandegger
  1 sibling, 1 reply; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-01 15:10 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
>>> Now, the question is, do you realistically plan to write an application
>>> which makes no syscall in its real-time loop?
>> Unlikely, but it may happen in case of programming errors. Anyhow, the
>> pthreads will run legacy code and it would be a pain to add
>> pthread_testcancel where necessary. But maybe there is a more elegant
>> and simple solution to do a defined exit/abort.
> 
> In case of programming error, enable the xenomai watchdog, it will
> forcibly kill the problematic thread.

To give you a more complete answer: most blocking functions are
cancellation points in the PTHREAD_CANCEL_DEFERRED case, so, you
probably do not need to add pthread_testcancel at all. The only
exception is pthread_mutex_lock: this way, cancellation happens for well
defined mutex states, and you may install cleanup handlers with
pthread_cleanup_push/pthread_cleanup_pop if ever a thread may be
destroyed while holding a mutex. With PTHREAD_CANCEL_ASYNCHRONOUS, the
situation is not that clean.

-- 
                                                 Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-02 15:59           ` Wolfgang Grandegger
@ 2008-12-02 15:55             ` Gilles Chanteperdrix
  2008-12-02 18:18               ` Wolfgang Grandegger
  0 siblings, 1 reply; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-02 15:55 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Hi Gilles,
> 
> Gilles Chanteperdrix wrote:
>> Gilles Chanteperdrix wrote:
>>>>> Now, the question is, do you realistically plan to write an application
>>>>> which makes no syscall in its real-time loop?
>>>> Unlikely, but it may happen in case of programming errors. Anyhow, the
>>>> pthreads will run legacy code and it would be a pain to add
>>>> pthread_testcancel where necessary. But maybe there is a more elegant
>>>> and simple solution to do a defined exit/abort.
>>> In case of programming error, enable the xenomai watchdog, it will
>>> forcibly kill the problematic thread.
>> To give you a more complete answer: most blocking functions are
>> cancellation points in the PTHREAD_CANCEL_DEFERRED case, so, you
>> probably do not need to add pthread_testcancel at all. The only
>> exception is pthread_mutex_lock: this way, cancellation happens for well
>> defined mutex states, and you may install cleanup handlers with
>> pthread_cleanup_push/pthread_cleanup_pop if ever a thread may be
>> destroyed while holding a mutex. With PTHREAD_CANCEL_ASYNCHRONOUS, the
>> situation is not that clean.
> 
> Well, there seems something wrong with it, also PTHREAD_CANCEL_DEFERRED
> with pthread_testcancel does not work reliably and consistently and it
> still behaves different on my ARM and PowerPC systems. I have attached
> my revised test program allowing to enable/disable various method of
> thread creation, setup and cancellation. They all work fine with the
> Linux POSIX libraries. With Xenomai, only a few work as expected on my
> ARM and PowerPC test systems.

Could you explain us exactly what happens ?

-- 
                                                 Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-01 15:10         ` Gilles Chanteperdrix
@ 2008-12-02 15:59           ` Wolfgang Grandegger
  2008-12-02 15:55             ` Gilles Chanteperdrix
  0 siblings, 1 reply; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-02 15:59 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

[-- Attachment #1: Type: text/plain, Size: 1775 bytes --]

Hi Gilles,

Gilles Chanteperdrix wrote:
> Gilles Chanteperdrix wrote:
>>>> Now, the question is, do you realistically plan to write an application
>>>> which makes no syscall in its real-time loop?
>>> Unlikely, but it may happen in case of programming errors. Anyhow, the
>>> pthreads will run legacy code and it would be a pain to add
>>> pthread_testcancel where necessary. But maybe there is a more elegant
>>> and simple solution to do a defined exit/abort.
>> In case of programming error, enable the xenomai watchdog, it will
>> forcibly kill the problematic thread.
> 
> To give you a more complete answer: most blocking functions are
> cancellation points in the PTHREAD_CANCEL_DEFERRED case, so, you
> probably do not need to add pthread_testcancel at all. The only
> exception is pthread_mutex_lock: this way, cancellation happens for well
> defined mutex states, and you may install cleanup handlers with
> pthread_cleanup_push/pthread_cleanup_pop if ever a thread may be
> destroyed while holding a mutex. With PTHREAD_CANCEL_ASYNCHRONOUS, the
> situation is not that clean.

Well, there seems something wrong with it, also PTHREAD_CANCEL_DEFERRED
with pthread_testcancel does not work reliably and consistently and it
still behaves different on my ARM and PowerPC systems. I have attached
my revised test program allowing to enable/disable various method of
thread creation, setup and cancellation. They all work fine with the
Linux POSIX libraries. With Xenomai, only a few work as expected on my
ARM and PowerPC test systems. It would be nice if somebody could test it
on a X86 system. Maybe there is still something wrong with my test program.
I'm also puzzled why pthread_setschedparam() does make a mode switch to
secondary mode (sometimes).

Wolfgang.




[-- Attachment #2: cancel-test.c --]
[-- Type: text/x-csrc, Size: 5030 bytes --]

/*
 * Test program for thread cancelation
 */

#include <execinfo.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <time.h>
#include <sys/mman.h>

#ifndef __XENO__
#define rt_printf printf
#else
#include <rtdk.h>
#endif

//#define USE_SIGXCPU
//#define USE_EXPLICIT_SCHED
#define CANCEL_TYPE PTHREAD_CANCEL_DEFERRED
//#define CANCEL_TYPE PTHREAD_CANCEL_ASYNCHRONOUS
#define USE_TEST_CANCEL

#define CTRL_PRIO 39
#define CALC_PRIO 38

static pthread_t calc_thread;
static pthread_t ctrl_thread;

volatile unsigned long count = 0;
static int load_ms = 2500;
static int calc_exit;

void check_err(char *string, int err)
{
	if (err) {
		printf("Failed with %d at %s", err, string);
		exit(1);
	}
}

static void create_load_100ms(void)
{
	struct timespec now, stop;

	clock_gettime(CLOCK_MONOTONIC, &stop);
	stop.tv_nsec += 100000000;
	if (stop.tv_nsec >= 1000000000) {
		stop.tv_nsec -= 1000000000;
		stop.tv_sec++;
	}

	while (1) {
#ifdef USE_TEST_CANCEL
		pthread_testcancel();
#endif
		clock_gettime(CLOCK_MONOTONIC, &now);
		if (now.tv_sec > stop.tv_sec)
			break;
		else if (now.tv_sec == stop.tv_sec &&
			 now.tv_nsec >= stop.tv_nsec)
			break;
	}
}

void *ctrl_func(void *parm)
{
	struct timespec ts;
#ifndef USE_EXPLICIT_SCHED
	struct sched_param param;
#endif
	int rc;

#ifdef __XENO__
	pthread_set_name_np(pthread_self(), __func__);
#ifdef USE_SIGXCPU
	pthread_set_mode_np(0, PTHREAD_WARNSW);
#endif
#endif
#ifndef USE_EXPLICIT_SCHED
	memset(&param, 0 , sizeof(param));
	param.sched_priority = CTRL_PRIO;
	rc = pthread_setschedparam(pthread_self(), SCHED_FIFO, &param);
	check_err("pthread_setschedparam()\n", rc);
#endif
	rt_printf("%s: started at count %ld\n", __func__, count);

	ts.tv_sec = load_ms / 1000;
	ts.tv_nsec = (load_ms % 1000) * 1000000;
	rt_printf("%s: sleeping for %ldsec %ldns\n", __func__,
		  (long)ts.tv_sec, (long)ts.tv_nsec);
	nanosleep(&ts, NULL);
	if (!calc_exit) {
		rt_printf("%s: cancel at count %ld\n", __func__, count);
		pthread_cancel(calc_thread);
	}
	rt_printf("%s: stopped at count %ld\n", __func__, count);

	return NULL;
}
void *calc_func(void *parm)
{
	int rc , count_max;
#ifndef USE_EXPLICIT_SCHED
	struct sched_param param;

	memset(&param, 0 , sizeof(param));
	param.sched_priority = CALC_PRIO;
	rc = pthread_setschedparam(pthread_self(), SCHED_FIFO, &param);
	check_err("pthread_setschedparam()\n", rc);
#endif
#ifdef __XENO__
	pthread_set_name_np(pthread_self(), __func__);
#ifdef USE_SIGXCPU
	pthread_set_mode_np(0, PTHREAD_WARNSW);
#endif
#endif
#ifdef CANCEL_TYPE
	rc = pthread_setcanceltype(CANCEL_TYPE, NULL);
	check_err("pthread_setcanceltype()\n", rc);
#endif

	count_max = 2 * load_ms / 100;
	rt_printf("%s: counting till %d\n", __func__, count_max);
	while (count < count_max) {
		create_load_100ms();
		rt_printf("%s: at count %ld\n", __func__, count);
		count++;
	}

	rt_printf("%s: stopped at count %ld\n", __func__, count);

	calc_exit = 1;

	return NULL;
}

#ifdef USE_SIGXCPU
void mode_switch_handler(int sig)
{
        void *bt[32];
        int nentries;

        /* Dump a backtrace of the frame which caused the switch to
           secondary mode: */
        nentries = backtrace(bt,sizeof(bt) / sizeof(bt[0]));
        backtrace_symbols_fd(bt, nentries, fileno(stdout));
}
#endif

int main(int argc, char **argv)
{
	int rc = 0;
	void *status;
#ifdef USE_EXPLICIT_SCHED
	pthread_attr_t calc_attr, ctrl_attr;
	struct sched_param calc_param, ctrl_param;
#endif

	if (argc == 2)
		load_ms = atoi(argv[1]);

	/* Lock process memory */
	mlockall(MCL_CURRENT | MCL_FUTURE);

#ifdef USE_SIGXCPU
	signal(SIGXCPU, mode_switch_handler);
#endif
#ifdef __XENO__
	/* We use rt_printf() for debugging real-time code */
	rt_print_auto_init(1);
	rt_printf("Real-Time debugging started\n");
#endif

#ifdef USE_EXPLICIT_SCHED
	memset(&ctrl_param, 0 , sizeof(ctrl_param));
	pthread_attr_init(&ctrl_attr);
	pthread_attr_setschedpolicy(&ctrl_attr, SCHED_FIFO);
 	ctrl_param.sched_priority = CTRL_PRIO;
	pthread_attr_setschedparam(&ctrl_attr, &ctrl_param);
	pthread_attr_setinheritsched(&ctrl_attr, PTHREAD_EXPLICIT_SCHED);
	rc = pthread_create(&ctrl_thread, &ctrl_attr, ctrl_func, NULL);
#else
	rc = pthread_create(&ctrl_thread, NULL, ctrl_func, NULL);
#endif
	check_err("pthread_create()\n", rc);

#ifdef USE_EXPLICIT_SCHED
	memset(&calc_param, 0 , sizeof(calc_param));
	pthread_attr_init(&calc_attr);
	pthread_attr_setschedpolicy(&calc_attr, SCHED_FIFO);
	calc_param.sched_priority = CALC_PRIO;
	pthread_attr_setschedparam(&calc_attr, &calc_param);
	pthread_attr_setinheritsched(&calc_attr, PTHREAD_EXPLICIT_SCHED);
	rc = pthread_create(&calc_thread, &calc_attr, calc_func, NULL);
#else
	rc = pthread_create(&calc_thread, NULL, calc_func, NULL);
#endif
	check_err("pthread_create()\n", rc);

	rc = pthread_join(calc_thread, &status);
	check_err("pthread_join()\n", rc);

	if (status != PTHREAD_CANCELED)
		printf("Unexpected thread status\n");

	printf("main terminating in 2 seconds...\n");
	sleep(3);
	return 0;
}

[-- Attachment #3: Makefile --]
[-- Type: text/plain, Size: 524 bytes --]

APP := cancel-test

CFLAGS = -Wall -O0 -g

ifeq ($(XENO),)

CC = gcc
LDFLAGS = -lpthread -lrt

else

XENOCONFIG = \
$(shell PATH=$(XENO):$(XENO)/bin:$(PATH) which xeno-config 2>/dev/null)

CC = $(shell $(XENOCONFIG) --cc)
LD = $(patsubst %-gcc,%-ld,$(CC))
CFLAGS += $(shell $(XENOCONFIG) --posix-cflags)
LDFLAGS = $(shell $(XENOCONFIG) --posix-ldflags) -lrtdk
LDFLAGS += -Xlinker -rpath -Xlinker $(shell $(XENOCONFIG) --libdir)

endif

all: $(APP)

$(APP): % : %.c
	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)

clean:
	$(RM) $(APP)

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-02 15:55             ` Gilles Chanteperdrix
@ 2008-12-02 18:18               ` Wolfgang Grandegger
  2008-12-02 18:35                 ` Gilles Chanteperdrix
  2008-12-03 10:16                 ` Gilles Chanteperdrix
  0 siblings, 2 replies; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-02 18:18 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Hi Gilles,
>>
>> Gilles Chanteperdrix wrote:
>>> Gilles Chanteperdrix wrote:
>>>>>> Now, the question is, do you realistically plan to write an application
>>>>>> which makes no syscall in its real-time loop?
>>>>> Unlikely, but it may happen in case of programming errors. Anyhow, the
>>>>> pthreads will run legacy code and it would be a pain to add
>>>>> pthread_testcancel where necessary. But maybe there is a more elegant
>>>>> and simple solution to do a defined exit/abort.
>>>> In case of programming error, enable the xenomai watchdog, it will
>>>> forcibly kill the problematic thread.
>>> To give you a more complete answer: most blocking functions are
>>> cancellation points in the PTHREAD_CANCEL_DEFERRED case, so, you
>>> probably do not need to add pthread_testcancel at all. The only
>>> exception is pthread_mutex_lock: this way, cancellation happens for well
>>> defined mutex states, and you may install cleanup handlers with
>>> pthread_cleanup_push/pthread_cleanup_pop if ever a thread may be
>>> destroyed while holding a mutex. With PTHREAD_CANCEL_ASYNCHRONOUS, the
>>> situation is not that clean.
>> Well, there seems something wrong with it, also PTHREAD_CANCEL_DEFERRED
>> with pthread_testcancel does not work reliably and consistently and it
>> still behaves different on my ARM and PowerPC systems. I have attached
>> my revised test program allowing to enable/disable various method of
>> thread creation, setup and cancellation. They all work fine with the
>> Linux POSIX libraries. With Xenomai, only a few work as expected on my
>> ARM and PowerPC test systems.
> 
> Could you explain us exactly what happens

OK, with the definitions

  //#define USE_SIGXCPU
  //#define USE_EXPLICIT_SCHED
  #define CANCEL_TYPE PTHREAD_CANCEL_DEFERRED
  //#define CANCEL_TYPE PTHREAD_CANCEL_ASYNCHRONOUS
  #define USE_TEST_CANCEL

I get on my ARM MX31ADS system:

  -bash-3.2# ./cancel-test
  Real-Time debugging started
  Segmentation fault

The program behaves differently when running under gdb but the
segmentation fault happens somewhere in pthread_cancel. It works better
on my PowerPC TQM5200 system:

  -bash-3.2# ./cancel-test
  Real-Time debugging started
  ctrl_func: started at count 0
  ctrl_func: sleeping for 2sec 500000000ns
  calc_func: counting till 50
  calc_func: at count 0
  calc_func: at count 1
  calc_func: at count 2
  calc_func: at count 3
  calc_func: at count 4
  calc_func: at count 5
  calc_func: at count 6
  calc_func: at count 7
  calc_func: at count 8
  calc_func: at count 9
  calc_func: at count 10
  calc_func: at count 11
  calc_func: at count 12
  calc_func: at count 13
  calc_func: at count 14
  calc_func: at count 15
  calc_func: at count 16
  calc_func: at count 17
  calc_func: at count 18
  calc_func: at count 19
  calc_func: at count 20
  calc_func: at count 21
  calc_func: at count 22
  ctrl_func: cancel at count 23
  ctrl_func: stopped at count 23
  main terminating in 2 seconds...

But the messages from calc_func are display before the task gets
actually canceled, which I do not understand. On ARM, it behaves similar
if I disable explicit setting of the cancellation type:

  //#define USE_SIGXCPU

  //#define USE_EXPLICIT_SCHED

  //#define CANCEL_TYPE PTHREAD_CANCEL_DEFERRED

  //#define CANCEL_TYPE PTHREAD_CANCEL_ASYNCHRONOUS

  #define USE_TEST_CANCEL


Enabling/disabling other options does not work as expected either, like
using USE_EXPLICIT_SCHED. The cancellation does then not work any more.

Wolfgang.




^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-02 18:18               ` Wolfgang Grandegger
@ 2008-12-02 18:35                 ` Gilles Chanteperdrix
  2008-12-02 19:50                   ` Wolfgang Grandegger
  2008-12-07 16:05                   ` Wolfgang Grandegger
  2008-12-03 10:16                 ` Gilles Chanteperdrix
  1 sibling, 2 replies; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-02 18:35 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Gilles Chanteperdrix wrote:
>> Wolfgang Grandegger wrote:
>>> Hi Gilles,
>>>
>>> Gilles Chanteperdrix wrote:
>>>> Gilles Chanteperdrix wrote:
>>>>>>> Now, the question is, do you realistically plan to write an application
>>>>>>> which makes no syscall in its real-time loop?
>>>>>> Unlikely, but it may happen in case of programming errors. Anyhow, the
>>>>>> pthreads will run legacy code and it would be a pain to add
>>>>>> pthread_testcancel where necessary. But maybe there is a more elegant
>>>>>> and simple solution to do a defined exit/abort.
>>>>> In case of programming error, enable the xenomai watchdog, it will
>>>>> forcibly kill the problematic thread.
>>>> To give you a more complete answer: most blocking functions are
>>>> cancellation points in the PTHREAD_CANCEL_DEFERRED case, so, you
>>>> probably do not need to add pthread_testcancel at all. The only
>>>> exception is pthread_mutex_lock: this way, cancellation happens for well
>>>> defined mutex states, and you may install cleanup handlers with
>>>> pthread_cleanup_push/pthread_cleanup_pop if ever a thread may be
>>>> destroyed while holding a mutex. With PTHREAD_CANCEL_ASYNCHRONOUS, the
>>>> situation is not that clean.
>>> Well, there seems something wrong with it, also PTHREAD_CANCEL_DEFERRED
>>> with pthread_testcancel does not work reliably and consistently and it
>>> still behaves different on my ARM and PowerPC systems. I have attached
>>> my revised test program allowing to enable/disable various method of
>>> thread creation, setup and cancellation. They all work fine with the
>>> Linux POSIX libraries. With Xenomai, only a few work as expected on my
>>> ARM and PowerPC test systems.
>> Could you explain us exactly what happens
> 
> OK, with the definitions
> 
>   //#define USE_SIGXCPU
>   //#define USE_EXPLICIT_SCHED
>   #define CANCEL_TYPE PTHREAD_CANCEL_DEFERRED
>   //#define CANCEL_TYPE PTHREAD_CANCEL_ASYNCHRONOUS
>   #define USE_TEST_CANCEL
> 
> I get on my ARM MX31ADS system:
> 
>   -bash-3.2# ./cancel-test
>   Real-Time debugging started
>   Segmentation fault
> 
> The program behaves differently when running under gdb but the
> segmentation fault happens somewhere in pthread_cancel. It works better
> on my PowerPC TQM5200 system:

If you want to get the real pc of a segmentation fault on arm, you can
enable "verbose user faults" in the kernel hacking menu and boot the
kernel with user_debug=29, the kernel will then dump the value of the
registers upon segmentation fault. You can also trigger a backtrace dump
by registering a signal handler for the SIGSEGV signal. Note however that:
- the backtrace will lack the inner function call;
- such a signal handler should end with:
signal(sig, SIG_DFL);
raise(sig);
Otherwise you will end up with a lockup.

> 
>   -bash-3.2# ./cancel-test
>   Real-Time debugging started
>   ctrl_func: started at count 0
>   ctrl_func: sleeping for 2sec 500000000ns
>   calc_func: counting till 50
>   calc_func: at count 0
>   calc_func: at count 1
>   calc_func: at count 2
>   calc_func: at count 3
>   calc_func: at count 4
>   calc_func: at count 5
>   calc_func: at count 6
>   calc_func: at count 7
>   calc_func: at count 8
>   calc_func: at count 9
>   calc_func: at count 10
>   calc_func: at count 11
>   calc_func: at count 12
>   calc_func: at count 13
>   calc_func: at count 14
>   calc_func: at count 15
>   calc_func: at count 16
>   calc_func: at count 17
>   calc_func: at count 18
>   calc_func: at count 19
>   calc_func: at count 20
>   calc_func: at count 21
>   calc_func: at count 22
>   ctrl_func: cancel at count 23
>   ctrl_func: stopped at count 23
>   main terminating in 2 seconds...
> 
> But the messages from calc_func are display before the task gets
> actually canceled, which I do not understand.

How do you know that ? I mean messages printed with rt_printf are
printed with a delay, and messages printed with printf are only printed
when the buffer is flushed (which probably happens upon exit in your case).

Also, does the "switchtest" test work on these platforms ? switchtest
uses pthread_cancel and pthread_join too.


 On ARM, it behaves similar
> if I disable explicit setting of the cancellation type:
> 
>   //#define USE_SIGXCPU
> 
>   //#define USE_EXPLICIT_SCHED
> 
>   //#define CANCEL_TYPE PTHREAD_CANCEL_DEFERRED
> 
>   //#define CANCEL_TYPE PTHREAD_CANCEL_ASYNCHRONOUS
> 
>   #define USE_TEST_CANCEL
> 
> 
> Enabling/disabling other options does not work as expected either, like
> using USE_EXPLICIT_SCHED. The cancellation does then not work any more.

Could you try to call pthread_getschedparam to check whether the threads
priority is correct?

> I'm also puzzled why pthread_setschedparam() does make a mode switch
> to secondary mode (sometimes).

That is normal. The glibc caches threads priority value, so we have to
call __real_pthread_setschedparam to update them. This issue has been
solved differently on trunk, but unfortunately, we can not backport this
modification on v2.4.x branch.

-- 
                                                 Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-02 18:35                 ` Gilles Chanteperdrix
@ 2008-12-02 19:50                   ` Wolfgang Grandegger
  2008-12-02 20:03                     ` Philippe Gerum
  2008-12-07 16:05                   ` Wolfgang Grandegger
  1 sibling, 1 reply; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-02 19:50 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Gilles Chanteperdrix wrote:
>>> Wolfgang Grandegger wrote:
>>>> Hi Gilles,
>>>>
>>>> Gilles Chanteperdrix wrote:
>>>>> Gilles Chanteperdrix wrote:
>>>>>>>> Now, the question is, do you realistically plan to write an application
>>>>>>>> which makes no syscall in its real-time loop?
>>>>>>> Unlikely, but it may happen in case of programming errors. Anyhow, the
>>>>>>> pthreads will run legacy code and it would be a pain to add
>>>>>>> pthread_testcancel where necessary. But maybe there is a more elegant
>>>>>>> and simple solution to do a defined exit/abort.
>>>>>> In case of programming error, enable the xenomai watchdog, it will
>>>>>> forcibly kill the problematic thread.
>>>>> To give you a more complete answer: most blocking functions are
>>>>> cancellation points in the PTHREAD_CANCEL_DEFERRED case, so, you
>>>>> probably do not need to add pthread_testcancel at all. The only
>>>>> exception is pthread_mutex_lock: this way, cancellation happens for well
>>>>> defined mutex states, and you may install cleanup handlers with
>>>>> pthread_cleanup_push/pthread_cleanup_pop if ever a thread may be
>>>>> destroyed while holding a mutex. With PTHREAD_CANCEL_ASYNCHRONOUS, the
>>>>> situation is not that clean.
>>>> Well, there seems something wrong with it, also PTHREAD_CANCEL_DEFERRED
>>>> with pthread_testcancel does not work reliably and consistently and it
>>>> still behaves different on my ARM and PowerPC systems. I have attached
>>>> my revised test program allowing to enable/disable various method of
>>>> thread creation, setup and cancellation. They all work fine with the
>>>> Linux POSIX libraries. With Xenomai, only a few work as expected on my
>>>> ARM and PowerPC test systems.
>>> Could you explain us exactly what happens
>> OK, with the definitions
>>
>>   //#define USE_SIGXCPU
>>   //#define USE_EXPLICIT_SCHED
>>   #define CANCEL_TYPE PTHREAD_CANCEL_DEFERRED
>>   //#define CANCEL_TYPE PTHREAD_CANCEL_ASYNCHRONOUS
>>   #define USE_TEST_CANCEL
>>
>> I get on my ARM MX31ADS system:
>>
>>   -bash-3.2# ./cancel-test
>>   Real-Time debugging started
>>   Segmentation fault
>>
>> The program behaves differently when running under gdb but the
>> segmentation fault happens somewhere in pthread_cancel. It works better
>> on my PowerPC TQM5200 system:
> 
> If you want to get the real pc of a segmentation fault on arm, you can
> enable "verbose user faults" in the kernel hacking menu and boot the
> kernel with user_debug=29, the kernel will then dump the value of the
> registers upon segmentation fault. You can also trigger a backtrace dump
> by registering a signal handler for the SIGSEGV signal. Note however that:
> - the backtrace will lack the inner function call;
> - such a signal handler should end with:
> signal(sig, SIG_DFL);
> raise(sig);
> Otherwise you will end up with a lockup.

OK, will try that. More below.

>>   -bash-3.2# ./cancel-test
>>   Real-Time debugging started
>>   ctrl_func: started at count 0
>>   ctrl_func: sleeping for 2sec 500000000ns
>>   calc_func: counting till 50
>>   calc_func: at count 0
>>   calc_func: at count 1
>>   calc_func: at count 2
>>   calc_func: at count 3
>>   calc_func: at count 4
>>   calc_func: at count 5
>>   calc_func: at count 6
>>   calc_func: at count 7
>>   calc_func: at count 8
>>   calc_func: at count 9
>>   calc_func: at count 10
>>   calc_func: at count 11
>>   calc_func: at count 12
>>   calc_func: at count 13
>>   calc_func: at count 14
>>   calc_func: at count 15
>>   calc_func: at count 16
>>   calc_func: at count 17
>>   calc_func: at count 18
>>   calc_func: at count 19
>>   calc_func: at count 20
>>   calc_func: at count 21
>>   calc_func: at count 22
>>   ctrl_func: cancel at count 23
>>   ctrl_func: stopped at count 23
>>   main terminating in 2 seconds...
>>
>> But the messages from calc_func are display before the task gets
>> actually canceled, which I do not understand.
> 
> How do you know that ? I mean messages printed with rt_printf are
> printed with a delay, and messages printed with printf are only printed
> when the buffer is flushed (which probably happens upon exit in your case).

The calc_thread will take (almost) all CPU resources until it get's
canceled. No messages should be display before that happens. Maybe that's
due to the miraculously ROOT priority coupling:

-bash-3.2# cat stat sched

CPU  PID    MSW        CSW        PF    STAT       %CPU  NAME
  0  0      0          12541      0     00500080   99.9  ROOT
  0  1392   1          1          0     00300380    0.0  cancel-test
  0  1394   3          4          0     00300184    0.0  ctrl_func
  0  1395   3          3          0     00300380    0.0  calc_func
  0  0      0          4178149    0     00000000    0.1  IRQ512: [timer]
CPU  PID    PRI      PERIOD     TIMEOUT    TIMEBASE  STAT       NAME
  0  0       38      0          0          master    R          ROOT
  0  1392     0      0          0          master    X          cancel-test
  0  1394    39      0          5379598667 master    D          ctrl_func
  0  1395    38      0          0          master    X          calc_func

ROOT has the *same* priority as calc_func. 

> Also, does the "switchtest" test work on these platforms ? switchtest
> uses pthread_cancel and pthread_join too.

OK.
 
> 
>  On ARM, it behaves similar
>> if I disable explicit setting of the cancellation type:
>>
>>   //#define USE_SIGXCPU
>>
>>   //#define USE_EXPLICIT_SCHED
>>
>>   //#define CANCEL_TYPE PTHREAD_CANCEL_DEFERRED
>>
>>   //#define CANCEL_TYPE PTHREAD_CANCEL_ASYNCHRONOUS
>>
>>   #define USE_TEST_CANCEL
>>
>>
>> Enabling/disabling other options does not work as expected either, like
>> using USE_EXPLICIT_SCHED. The cancellation does then not work any more.
> 
> Could you try to call pthread_getschedparam to check whether the threads
> priority is correct?

The values returned seem OK but I get:

  -bash-3.2# ./cancel-test
  Real-Time debugging started
  ctrl_func: policy=1 prio=39
  ctrl_func: started at count 0
  ctrl_func: sleeping for 2sec 500000000ns
  **** nothing showed for 5 seconds ***
  calc_func: policy=1 prio=38
  calc_func: counting till 50
  calc_func: at count 0
  ...
  calc_func: at count 22
  ctrl_func: cancel at count 23
  calc_func: at count 23
  ...
  calc_func: at count 49
  calc_func: stopped at count 50

  Segmentation fault (core dumped)
  -bash-3.2# gdb cancel-test core.1407 
  ...
  (gdb) where
  #0  0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
  Cannot access memory at address 0x4885cd24

The reason for the segmentation fault might be that the calc_func
already exited. Interesting is also that now ROOT runs at priority 39:

-bash-3.2# cat stat sched
CPU  PID    MSW        CSW        PF    STAT       %CPU  NAME
  0  0      0          12563      0     00500080   99.9  ROOT
  0  1407   1          1          0     00300380    0.0  cancel-test
  0  1409   1          3          1     00300380    0.0  ctrl_func
  0  1410   1          3          0     00300380    0.0  calc_func
  0  0      0          4373414    0     00000000    0.1  IRQ512: [timer]
CPU  PID    PRI      PERIOD     TIMEOUT    TIMEBASE  STAT       NAME
  0  0       39      0          0          master    R          ROOT
  0  1407     0      0          0          master    X          cancel-test
  0  1409    39      0          0          master    X          ctrl_func
  0  1410    38      0          0          master    X          calc_func

When does this priority coupling happen. Anyhow in this case no message are
showed for about 5 seconds (see *** above) and the cancellation does not work.

>> I'm also puzzled why pthread_setschedparam() does make a mode switch
>> to secondary mode (sometimes).
> 
> That is normal. The glibc caches threads priority value, so we have to
> call __real_pthread_setschedparam to update them. This issue has been
> solved differently on trunk, but unfortunately, we can not backport this
> modification on v2.4.x branch.

OK, and how can I then increase/decrease the priority without switching
to secondary mode?

Thanks.

Wolfgang.




^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-02 19:50                   ` Wolfgang Grandegger
@ 2008-12-02 20:03                     ` Philippe Gerum
  0 siblings, 0 replies; 55+ messages in thread
From: Philippe Gerum @ 2008-12-02 20:03 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> -bash-3.2# cat stat sched
> CPU  PID    MSW        CSW        PF    STAT       %CPU  NAME
>   0  0      0          12563      0     00500080   99.9  ROOT
>   0  1407   1          1          0     00300380    0.0  cancel-test
>   0  1409   1          3          1     00300380    0.0  ctrl_func
>   0  1410   1          3          0     00300380    0.0  calc_func
>   0  0      0          4373414    0     00000000    0.1  IRQ512: [timer]
> CPU  PID    PRI      PERIOD     TIMEOUT    TIMEBASE  STAT       NAME
>   0  0       39      0          0          master    R          ROOT
>   0  1407     0      0          0          master    X          cancel-test
>   0  1409    39      0          0          master    X          ctrl_func
>   0  1410    38      0          0          master    X          calc_func
> 
> When does this priority coupling happen.

Each time a thread switches from primary to secondary mode, it is enqueued by
decreasing priority order into a list tracking relaxed threads for the given
CPU. The root thread for that CPU dynamically inherits the priority of the
topmost thread queued to the relax/RPI list. This list is updated as soon as:
- a primary mode thread is relaxed (enqueued)
- a relaxed thread goes back to primary mode (dequeued)
- a relaxed thread blocks on a linux service (dequeued)
- a relaxed thread resumes from a linux service (enqueued)
When no more threads are linked to the RPI list, the root thread priority is
downgraded to -1.

In effect, this couples the Xenomai priority scale to the linux one, so that
switching to secondary mode does not allow lower priority threads still in
primary mode to preempt.

CONFIG_XENO_OPT_PRIOCPL controls whether such coupling should happen at all.

-- 
Philippe.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-01 14:15       ` Gilles Chanteperdrix
  2008-12-01 15:10         ` Gilles Chanteperdrix
@ 2008-12-03  8:04         ` Wolfgang Grandegger
  2008-12-03 10:12           ` Gilles Chanteperdrix
  1 sibling, 1 reply; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-03  8:04 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Gilles Chanteperdrix wrote:
>>> Gilles Chanteperdrix wrote:
>>>> Wolfgang Grandegger wrote:
>>>>> Hello,
>>>>>
>>>>> I have written the attached test program to cancel Xenomai POSIX
>>>>> threads. The "calc_task" does some busy work, which the higher priority
>>>>> task "ctrl_task" interrupts and aborts after some time. The program does
>>>>> not behave like I expect and it also behaves differently on my PowerPC
>>>>> and ARM test system. The "calc_task" continues after calling
>>>>> pthread_cancel() in "ctrl_task". On ARM, the behaviour is even more
>>>>> wired. Is there anything wrong in my test program or anything else I
>>>>> should care of?
>>>> First, you should know that PTHREAD_CANCEL_ASYNCHRONOUS is evil, it will
>>>> almost inevitably leave things in an unknown state when canceling a
>>>> thread, you would better use PTHREAD_CANCEL_DEFERRED and ensure that
>>>> your thread has some cancellation point, if it has not, use
>>>> pthread_testcancel.
>>>>
>>>> Second, your program will only work if root thread priority coupling is
>>>> enabled. Is it enabled in your case?
>>> Actually, even with priority coupling I am not sure it can not work. The
>>> problem is the way do_sigwake_event signal threads: it make them switch
>>> to secondary mode only if they pass through a syscall. Since the
>>> "calc_task" thread does not issue any syscall, it will never check see
>>> that it should relax to handle the cancelation signal.
>> calc_task() calls clock_gettime() permanently, which does a syscall as
>> long as vDSO is not used, which might be the case on PowerPC. This would
>> explain why cancelation of "calc_task" works on ARM.
> 
> For architectures with a high-resolution counter,
> clock_gettime(CLOCK_MONOTONIC) is a xenomai service which uses the
> counter and does not issue any syscall, this includes arm unless you
> configured xenomai with --enable-arm-arch=generic or with
> --disable-arm-tsc, and this probably includes powerpc.
> 

clock_gettime() is wrapped to __wrap_clock_gettime(), which does a syscall:

http://www.rts.uni-hannover.de/xenomai/lxr/source/src/skins/posix/clock.c?v=SVN-trunk#058

Have I missed something?

Wolfgang.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-03  8:04         ` Wolfgang Grandegger
@ 2008-12-03 10:12           ` Gilles Chanteperdrix
  2008-12-03 10:46             ` Wolfgang Grandegger
  0 siblings, 1 reply; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-03 10:12 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Gilles Chanteperdrix wrote:
>> For architectures with a high-resolution counter,
>> clock_gettime(CLOCK_MONOTONIC) is a xenomai service which uses the
>> counter and does not issue any syscall, this includes arm unless you
>> configured xenomai with --enable-arm-arch=generic or with
>> --disable-arm-tsc, and this probably includes powerpc.
>>
> 
> clock_gettime() is wrapped to __wrap_clock_gettime(), which does a syscall:
> 
> http://www.rts.uni-hannover.de/xenomai/lxr/source/src/skins/posix/clock.c?v=SVN-trunk#058
> 
> Have I missed something?

Yes, read the code again.

-- 
                                                 Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-02 18:18               ` Wolfgang Grandegger
  2008-12-02 18:35                 ` Gilles Chanteperdrix
@ 2008-12-03 10:16                 ` Gilles Chanteperdrix
  2008-12-03 11:19                   ` Wolfgang Grandegger
  1 sibling, 1 reply; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-03 10:16 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Gilles Chanteperdrix wrote:
>> Wolfgang Grandegger wrote:
>>> Hi Gilles,
>>>
>>> Gilles Chanteperdrix wrote:
>>>> Gilles Chanteperdrix wrote:
>>>>>>> Now, the question is, do you realistically plan to write an application
>>>>>>> which makes no syscall in its real-time loop?
>>>>>> Unlikely, but it may happen in case of programming errors. Anyhow, the
>>>>>> pthreads will run legacy code and it would be a pain to add
>>>>>> pthread_testcancel where necessary. But maybe there is a more elegant
>>>>>> and simple solution to do a defined exit/abort.
>>>>> In case of programming error, enable the xenomai watchdog, it will
>>>>> forcibly kill the problematic thread.
>>>> To give you a more complete answer: most blocking functions are
>>>> cancellation points in the PTHREAD_CANCEL_DEFERRED case, so, you
>>>> probably do not need to add pthread_testcancel at all. The only
>>>> exception is pthread_mutex_lock: this way, cancellation happens for well
>>>> defined mutex states, and you may install cleanup handlers with
>>>> pthread_cleanup_push/pthread_cleanup_pop if ever a thread may be
>>>> destroyed while holding a mutex. With PTHREAD_CANCEL_ASYNCHRONOUS, the
>>>> situation is not that clean.
>>> Well, there seems something wrong with it, also PTHREAD_CANCEL_DEFERRED
>>> with pthread_testcancel does not work reliably and consistently and it
>>> still behaves different on my ARM and PowerPC systems. I have attached
>>> my revised test program allowing to enable/disable various method of
>>> thread creation, setup and cancellation. They all work fine with the
>>> Linux POSIX libraries. With Xenomai, only a few work as expected on my
>>> ARM and PowerPC test systems.
>> Could you explain us exactly what happens
> 
> OK, with the definitions
> 
>   //#define USE_SIGXCPU
>   //#define USE_EXPLICIT_SCHED
>   #define CANCEL_TYPE PTHREAD_CANCEL_DEFERRED
>   //#define CANCEL_TYPE PTHREAD_CANCEL_ASYNCHRONOUS
>   #define USE_TEST_CANCEL
> 
> I get on my ARM MX31ADS system:
> 
>   -bash-3.2# ./cancel-test
>   Real-Time debugging started
>   Segmentation fault
> 
> The program behaves differently when running under gdb but the
> segmentation fault happens somewhere in pthread_cancel. It works better
> on my PowerPC TQM5200 system:
> 
>   -bash-3.2# ./cancel-test
>   Real-Time debugging started
>   ctrl_func: started at count 0
>   ctrl_func: sleeping for 2sec 500000000ns
>   calc_func: counting till 50
>   calc_func: at count 0
>   calc_func: at count 1
>   calc_func: at count 2
>   calc_func: at count 3
>   calc_func: at count 4
>   calc_func: at count 5
>   calc_func: at count 6
>   calc_func: at count 7
>   calc_func: at count 8
>   calc_func: at count 9
>   calc_func: at count 10
>   calc_func: at count 11
>   calc_func: at count 12
>   calc_func: at count 13
>   calc_func: at count 14
>   calc_func: at count 15
>   calc_func: at count 16
>   calc_func: at count 17
>   calc_func: at count 18
>   calc_func: at count 19
>   calc_func: at count 20
>   calc_func: at count 21
>   calc_func: at count 22
>   ctrl_func: cancel at count 23
>   ctrl_func: stopped at count 23
>   main terminating in 2 seconds...
> 
> But the messages from calc_func are display before the task gets
> actually canceled, which I do not understand. On ARM, it behaves similar
> if I disable explicit setting of the cancellation type:
> 
>   //#define USE_SIGXCPU
> 
>   //#define USE_EXPLICIT_SCHED
> 
>   //#define CANCEL_TYPE PTHREAD_CANCEL_DEFERRED
> 
>   //#define CANCEL_TYPE PTHREAD_CANCEL_ASYNCHRONOUS
> 
>   #define USE_TEST_CANCEL
> 
> 
> Enabling/disabling other options does not work as expected either, like
> using USE_EXPLICIT_SCHED. The cancellation does then not work any more.

The problem is that the way you create threads is racy, you do not know
in which order the two tasks are created, and if ever calc_func is
created before ctrl_func, it will use all the cpu and ctrl_func will not
have a chance to interrupc calc_func.

-- 
                                                 Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-03 10:46             ` Wolfgang Grandegger
@ 2008-12-03 10:40               ` Gilles Chanteperdrix
  2008-12-03 11:16                 ` Wolfgang Grandegger
  2008-12-03 11:11               ` Philippe Gerum
  1 sibling, 1 reply; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-03 10:40 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Gilles Chanteperdrix wrote:
>> Wolfgang Grandegger wrote:
>>> Gilles Chanteperdrix wrote:
>>>> For architectures with a high-resolution counter,
>>>> clock_gettime(CLOCK_MONOTONIC) is a xenomai service which uses the
>>>> counter and does not issue any syscall, this includes arm unless you
>>>> configured xenomai with --enable-arm-arch=generic or with
>>>> --disable-arm-tsc, and this probably includes powerpc.
>>>>
>>> clock_gettime() is wrapped to __wrap_clock_gettime(), which does a syscall:
>>>
>>> http://www.rts.uni-hannover.de/xenomai/lxr/source/src/skins/posix/clock.c?v=SVN-trunk#058
>>>
>>> Have I missed something?
>> Yes, read the code again.
> 
> I was aware of CONFIG_XENO_HW_DIRECT_TSC but didn't look it up correctly
> :-(. It is set for my ARM  but not for my PowerPC system. Which explains
> some differences.

In include/asm-powerpc/syscall.h, I see:
#define CONFIG_XENO_HW_DIRECT_TSC 1

-- 
                                                 Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-03 10:12           ` Gilles Chanteperdrix
@ 2008-12-03 10:46             ` Wolfgang Grandegger
  2008-12-03 10:40               ` Gilles Chanteperdrix
  2008-12-03 11:11               ` Philippe Gerum
  0 siblings, 2 replies; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-03 10:46 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Gilles Chanteperdrix wrote:
>>> For architectures with a high-resolution counter,
>>> clock_gettime(CLOCK_MONOTONIC) is a xenomai service which uses the
>>> counter and does not issue any syscall, this includes arm unless you
>>> configured xenomai with --enable-arm-arch=generic or with
>>> --disable-arm-tsc, and this probably includes powerpc.
>>>
>> clock_gettime() is wrapped to __wrap_clock_gettime(), which does a syscall:
>>
>> http://www.rts.uni-hannover.de/xenomai/lxr/source/src/skins/posix/clock.c?v=SVN-trunk#058
>>
>> Have I missed something?
> 
> Yes, read the code again.

I was aware of CONFIG_XENO_HW_DIRECT_TSC but didn't look it up correctly
:-(. It is set for my ARM  but not for my PowerPC system. Which explains
some differences.

Wolfgang.



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-03 10:46             ` Wolfgang Grandegger
  2008-12-03 10:40               ` Gilles Chanteperdrix
@ 2008-12-03 11:11               ` Philippe Gerum
  2008-12-03 11:22                 ` Wolfgang Grandegger
  1 sibling, 1 reply; 55+ messages in thread
From: Philippe Gerum @ 2008-12-03 11:11 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Gilles Chanteperdrix wrote:
>> Wolfgang Grandegger wrote:
>>> Gilles Chanteperdrix wrote:
>>>> For architectures with a high-resolution counter,
>>>> clock_gettime(CLOCK_MONOTONIC) is a xenomai service which uses the
>>>> counter and does not issue any syscall, this includes arm unless you
>>>> configured xenomai with --enable-arm-arch=generic or with
>>>> --disable-arm-tsc, and this probably includes powerpc.
>>>>
>>> clock_gettime() is wrapped to __wrap_clock_gettime(), which does a syscall:
>>>
>>> http://www.rts.uni-hannover.de/xenomai/lxr/source/src/skins/posix/clock.c?v=SVN-trunk#058
>>>
>>> Have I missed something?
>> Yes, read the code again.
> 
> I was aware of CONFIG_XENO_HW_DIRECT_TSC but didn't look it up correctly
> :-(. It is set for my ARM  but not for my PowerPC system. Which explains
> some differences.
>

We do have direct TSC readings on powerpc as well by looking at the timebase
register from userland, but this only applies to rt_native_tsc(), or
clock_gettime() when using CLOCK_MONOTONIC in aperiodic mode.

> Wolfgang.
> 
> 
> _______________________________________________
> Xenomai-help mailing list
> Xenomai-help@domain.hid
> https://mail.gna.org/listinfo/xenomai-help
> 


-- 
Philippe.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-03 10:40               ` Gilles Chanteperdrix
@ 2008-12-03 11:16                 ` Wolfgang Grandegger
  0 siblings, 0 replies; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-03 11:16 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Gilles Chanteperdrix wrote:
>>> Wolfgang Grandegger wrote:
>>>> Gilles Chanteperdrix wrote:
>>>>> For architectures with a high-resolution counter,
>>>>> clock_gettime(CLOCK_MONOTONIC) is a xenomai service which uses the
>>>>> counter and does not issue any syscall, this includes arm unless you
>>>>> configured xenomai with --enable-arm-arch=generic or with
>>>>> --disable-arm-tsc, and this probably includes powerpc.
>>>>>
>>>> clock_gettime() is wrapped to __wrap_clock_gettime(), which does a syscall:
>>>>
>>>> http://www.rts.uni-hannover.de/xenomai/lxr/source/src/skins/posix/clock.c?v=SVN-trunk#058
>>>>
>>>> Have I missed something?
>>> Yes, read the code again.
>> I was aware of CONFIG_XENO_HW_DIRECT_TSC but didn't look it up correctly
>> :-(. It is set for my ARM  but not for my PowerPC system. Which explains
>> some differences.
> 
> In include/asm-powerpc/syscall.h, I see:
> #define CONFIG_XENO_HW_DIRECT_TSC 1

Well, you are right. Puh, already too much trail and error.

Wolfgang.




^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-03 10:16                 ` Gilles Chanteperdrix
@ 2008-12-03 11:19                   ` Wolfgang Grandegger
  2008-12-03 13:30                     ` Gilles Chanteperdrix
  0 siblings, 1 reply; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-03 11:19 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Gilles Chanteperdrix wrote:
>>> Wolfgang Grandegger wrote:
>>>> Hi Gilles,
>>>>
>>>> Gilles Chanteperdrix wrote:
>>>>> Gilles Chanteperdrix wrote:
>>>>>>>> Now, the question is, do you realistically plan to write an application
>>>>>>>> which makes no syscall in its real-time loop?
>>>>>>> Unlikely, but it may happen in case of programming errors. Anyhow, the
>>>>>>> pthreads will run legacy code and it would be a pain to add
>>>>>>> pthread_testcancel where necessary. But maybe there is a more elegant
>>>>>>> and simple solution to do a defined exit/abort.
>>>>>> In case of programming error, enable the xenomai watchdog, it will
>>>>>> forcibly kill the problematic thread.
>>>>> To give you a more complete answer: most blocking functions are
>>>>> cancellation points in the PTHREAD_CANCEL_DEFERRED case, so, you
>>>>> probably do not need to add pthread_testcancel at all. The only
>>>>> exception is pthread_mutex_lock: this way, cancellation happens for well
>>>>> defined mutex states, and you may install cleanup handlers with
>>>>> pthread_cleanup_push/pthread_cleanup_pop if ever a thread may be
>>>>> destroyed while holding a mutex. With PTHREAD_CANCEL_ASYNCHRONOUS, the
>>>>> situation is not that clean.
>>>> Well, there seems something wrong with it, also PTHREAD_CANCEL_DEFERRED
>>>> with pthread_testcancel does not work reliably and consistently and it
>>>> still behaves different on my ARM and PowerPC systems. I have attached
>>>> my revised test program allowing to enable/disable various method of
>>>> thread creation, setup and cancellation. They all work fine with the
>>>> Linux POSIX libraries. With Xenomai, only a few work as expected on my
>>>> ARM and PowerPC test systems.
>>> Could you explain us exactly what happens
>> OK, with the definitions
>>
>>   //#define USE_SIGXCPU
>>   //#define USE_EXPLICIT_SCHED
>>   #define CANCEL_TYPE PTHREAD_CANCEL_DEFERRED
>>   //#define CANCEL_TYPE PTHREAD_CANCEL_ASYNCHRONOUS
>>   #define USE_TEST_CANCEL
>>
>> I get on my ARM MX31ADS system:
>>
>>   -bash-3.2# ./cancel-test
>>   Real-Time debugging started
>>   Segmentation fault
>>
>> The program behaves differently when running under gdb but the
>> segmentation fault happens somewhere in pthread_cancel. It works better
>> on my PowerPC TQM5200 system:
>>
>>   -bash-3.2# ./cancel-test
>>   Real-Time debugging started
>>   ctrl_func: started at count 0
>>   ctrl_func: sleeping for 2sec 500000000ns
>>   calc_func: counting till 50
>>   calc_func: at count 0
>>   calc_func: at count 1
>>   calc_func: at count 2
>>   calc_func: at count 3
>>   calc_func: at count 4
>>   calc_func: at count 5
>>   calc_func: at count 6
>>   calc_func: at count 7
>>   calc_func: at count 8
>>   calc_func: at count 9
>>   calc_func: at count 10
>>   calc_func: at count 11
>>   calc_func: at count 12
>>   calc_func: at count 13
>>   calc_func: at count 14
>>   calc_func: at count 15
>>   calc_func: at count 16
>>   calc_func: at count 17
>>   calc_func: at count 18
>>   calc_func: at count 19
>>   calc_func: at count 20
>>   calc_func: at count 21
>>   calc_func: at count 22
>>   ctrl_func: cancel at count 23
>>   ctrl_func: stopped at count 23
>>   main terminating in 2 seconds...
>>
>> But the messages from calc_func are display before the task gets
>> actually canceled, which I do not understand. On ARM, it behaves similar
>> if I disable explicit setting of the cancellation type:
>>
>>   //#define USE_SIGXCPU
>>
>>   //#define USE_EXPLICIT_SCHED
>>
>>   //#define CANCEL_TYPE PTHREAD_CANCEL_DEFERRED
>>
>>   //#define CANCEL_TYPE PTHREAD_CANCEL_ASYNCHRONOUS
>>
>>   #define USE_TEST_CANCEL
>>
>>
>> Enabling/disabling other options does not work as expected either, like
>> using USE_EXPLICIT_SCHED. The cancellation does then not work any more.
> 
> The problem is that the way you create threads is racy, you do not know
> in which order the two tasks are created, and if ever calc_func is
> created before ctrl_func, it will use all the cpu and ctrl_func will not
> have a chance to interrupc calc_func.

I already put some sleep or ctrl-thread-is-up test before creating
calc_thread, which did not help. Also the output above indicates that
ctrl_thread did start before calc_thread.

Wolfgang.



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-03 11:11               ` Philippe Gerum
@ 2008-12-03 11:22                 ` Wolfgang Grandegger
  0 siblings, 0 replies; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-03 11:22 UTC (permalink / raw)
  To: rpm; +Cc: xenomai-help

Philippe Gerum wrote:
> Wolfgang Grandegger wrote:
>> Gilles Chanteperdrix wrote:
>>> Wolfgang Grandegger wrote:
>>>> Gilles Chanteperdrix wrote:
>>>>> For architectures with a high-resolution counter,
>>>>> clock_gettime(CLOCK_MONOTONIC) is a xenomai service which uses the
>>>>> counter and does not issue any syscall, this includes arm unless you
>>>>> configured xenomai with --enable-arm-arch=generic or with
>>>>> --disable-arm-tsc, and this probably includes powerpc.
>>>>>
>>>> clock_gettime() is wrapped to __wrap_clock_gettime(), which does a syscall:
>>>>
>>>> http://www.rts.uni-hannover.de/xenomai/lxr/source/src/skins/posix/clock.c?v=SVN-trunk#058
>>>>
>>>> Have I missed something?
>>> Yes, read the code again.
>> I was aware of CONFIG_XENO_HW_DIRECT_TSC but didn't look it up correctly
>> :-(. It is set for my ARM  but not for my PowerPC system. Which explains
>> some differences.
>>
> 
> We do have direct TSC readings on powerpc as well by looking at the timebase
> register from userland, but this only applies to rt_native_tsc(), or
> clock_gettime() when using CLOCK_MONOTONIC in aperiodic mode.

Well, yes, and the code is  quite clear in this respect. I just did not
 search for CONFIG_XENO_HW_DIRECT_TSC carefully. Sorry for the snapshot.

Wolfgang.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-03 11:19                   ` Wolfgang Grandegger
@ 2008-12-03 13:30                     ` Gilles Chanteperdrix
  2008-12-03 18:02                       ` Wolfgang Grandegger
  0 siblings, 1 reply; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-03 13:30 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Gilles Chanteperdrix wrote:
>> Wolfgang Grandegger wrote:
>>> Gilles Chanteperdrix wrote:
>>>> Wolfgang Grandegger wrote:
>>>>> Hi Gilles,
>>>>>
>>>>> Gilles Chanteperdrix wrote:
>>>>>> Gilles Chanteperdrix wrote:
>>>>>>>>> Now, the question is, do you realistically plan to write an application
>>>>>>>>> which makes no syscall in its real-time loop?
>>>>>>>> Unlikely, but it may happen in case of programming errors. Anyhow, the
>>>>>>>> pthreads will run legacy code and it would be a pain to add
>>>>>>>> pthread_testcancel where necessary. But maybe there is a more elegant
>>>>>>>> and simple solution to do a defined exit/abort.
>>>>>>> In case of programming error, enable the xenomai watchdog, it will
>>>>>>> forcibly kill the problematic thread.
>>>>>> To give you a more complete answer: most blocking functions are
>>>>>> cancellation points in the PTHREAD_CANCEL_DEFERRED case, so, you
>>>>>> probably do not need to add pthread_testcancel at all. The only
>>>>>> exception is pthread_mutex_lock: this way, cancellation happens for well
>>>>>> defined mutex states, and you may install cleanup handlers with
>>>>>> pthread_cleanup_push/pthread_cleanup_pop if ever a thread may be
>>>>>> destroyed while holding a mutex. With PTHREAD_CANCEL_ASYNCHRONOUS, the
>>>>>> situation is not that clean.
>>>>> Well, there seems something wrong with it, also PTHREAD_CANCEL_DEFERRED
>>>>> with pthread_testcancel does not work reliably and consistently and it
>>>>> still behaves different on my ARM and PowerPC systems. I have attached
>>>>> my revised test program allowing to enable/disable various method of
>>>>> thread creation, setup and cancellation. They all work fine with the
>>>>> Linux POSIX libraries. With Xenomai, only a few work as expected on my
>>>>> ARM and PowerPC test systems.
>>>> Could you explain us exactly what happens
>>> OK, with the definitions
>>>
>>>   //#define USE_SIGXCPU
>>>   //#define USE_EXPLICIT_SCHED
>>>   #define CANCEL_TYPE PTHREAD_CANCEL_DEFERRED
>>>   //#define CANCEL_TYPE PTHREAD_CANCEL_ASYNCHRONOUS
>>>   #define USE_TEST_CANCEL
>>>
>>> I get on my ARM MX31ADS system:
>>>
>>>   -bash-3.2# ./cancel-test
>>>   Real-Time debugging started
>>>   Segmentation fault
>>>
>>> The program behaves differently when running under gdb but the
>>> segmentation fault happens somewhere in pthread_cancel. It works better
>>> on my PowerPC TQM5200 system:
>>>
>>>   -bash-3.2# ./cancel-test
>>>   Real-Time debugging started
>>>   ctrl_func: started at count 0
>>>   ctrl_func: sleeping for 2sec 500000000ns
>>>   calc_func: counting till 50
>>>   calc_func: at count 0
>>>   calc_func: at count 1
>>>   calc_func: at count 2
>>>   calc_func: at count 3
>>>   calc_func: at count 4
>>>   calc_func: at count 5
>>>   calc_func: at count 6
>>>   calc_func: at count 7
>>>   calc_func: at count 8
>>>   calc_func: at count 9
>>>   calc_func: at count 10
>>>   calc_func: at count 11
>>>   calc_func: at count 12
>>>   calc_func: at count 13
>>>   calc_func: at count 14
>>>   calc_func: at count 15
>>>   calc_func: at count 16
>>>   calc_func: at count 17
>>>   calc_func: at count 18
>>>   calc_func: at count 19
>>>   calc_func: at count 20
>>>   calc_func: at count 21
>>>   calc_func: at count 22
>>>   ctrl_func: cancel at count 23
>>>   ctrl_func: stopped at count 23
>>>   main terminating in 2 seconds...
>>>
>>> But the messages from calc_func are display before the task gets
>>> actually canceled, which I do not understand. On ARM, it behaves similar
>>> if I disable explicit setting of the cancellation type:
>>>
>>>   //#define USE_SIGXCPU
>>>
>>>   //#define USE_EXPLICIT_SCHED
>>>
>>>   //#define CANCEL_TYPE PTHREAD_CANCEL_DEFERRED
>>>
>>>   //#define CANCEL_TYPE PTHREAD_CANCEL_ASYNCHRONOUS
>>>
>>>   #define USE_TEST_CANCEL
>>>
>>>
>>> Enabling/disabling other options does not work as expected either, like
>>> using USE_EXPLICIT_SCHED. The cancellation does then not work any more.
>> The problem is that the way you create threads is racy, you do not know
>> in which order the two tasks are created, and if ever calc_func is
>> created before ctrl_func, it will use all the cpu and ctrl_func will not
>> have a chance to interrupc calc_func.
> 
> I already put some sleep or ctrl-thread-is-up test before creating
> calc_thread, which did not help. Also the output above indicates that
> ctrl_thread did start before calc_thread.

Unless I am wrong the output above indicates that the test works... What
I am talking about is the cases where the test does not work, especially
 when USE_EXPLICIT_SCHED is not set.

-- 
                                                 Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-03 18:02                       ` Wolfgang Grandegger
@ 2008-12-03 17:57                         ` Gilles Chanteperdrix
  2008-12-03 18:37                           ` Wolfgang Grandegger
  0 siblings, 1 reply; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-03 17:57 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Running under gdb shows:
> 
>   Program received signal SIGSEGV, Segmentation fault.
>   [Switching to Thread 0x4885d4b0 (LWP 1127)]
>   0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>   (gdb) where
>   #0  0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>   #1  0x10001d64 in ctrl_func (parm=0x0) at cancel-test.c:104
>   #2  0x0ffa98e4 in __pthread_trampoline ()
>      from /home/wolf/xenomai/lib/libpthread_rt.so.1
>   #3  0x0ff42a6c in start_thread () from /lib/libpthread.so.0
>   #4  0x0fdd18a0 in clone () from /lib/libc.so.6
>   Backtrace stopped: previous frame inner to this frame (corrupt stack?)
> 
> Is pthread_cancel used from the Linux pthread library? And
> pthread_testcancel() as well?

Yes, and I guess, as you said, that it happens because calc_func is dead
when you try and cancel it.

-- 
                                                 Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-03 13:30                     ` Gilles Chanteperdrix
@ 2008-12-03 18:02                       ` Wolfgang Grandegger
  2008-12-03 17:57                         ` Gilles Chanteperdrix
  0 siblings, 1 reply; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-03 18:02 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Gilles Chanteperdrix wrote:
>>> Wolfgang Grandegger wrote:
>>>> Gilles Chanteperdrix wrote:
>>>>> Wolfgang Grandegger wrote:
>>>>>> Hi Gilles,
>>>>>>
>>>>>> Gilles Chanteperdrix wrote:
>>>>>>> Gilles Chanteperdrix wrote:
>>>>>>>>>> Now, the question is, do you realistically plan to write an application
>>>>>>>>>> which makes no syscall in its real-time loop?
>>>>>>>>> Unlikely, but it may happen in case of programming errors. Anyhow, the
>>>>>>>>> pthreads will run legacy code and it would be a pain to add
>>>>>>>>> pthread_testcancel where necessary. But maybe there is a more elegant
>>>>>>>>> and simple solution to do a defined exit/abort.
>>>>>>>> In case of programming error, enable the xenomai watchdog, it will
>>>>>>>> forcibly kill the problematic thread.
>>>>>>> To give you a more complete answer: most blocking functions are
>>>>>>> cancellation points in the PTHREAD_CANCEL_DEFERRED case, so, you
>>>>>>> probably do not need to add pthread_testcancel at all. The only
>>>>>>> exception is pthread_mutex_lock: this way, cancellation happens for well
>>>>>>> defined mutex states, and you may install cleanup handlers with
>>>>>>> pthread_cleanup_push/pthread_cleanup_pop if ever a thread may be
>>>>>>> destroyed while holding a mutex. With PTHREAD_CANCEL_ASYNCHRONOUS, the
>>>>>>> situation is not that clean.
>>>>>> Well, there seems something wrong with it, also PTHREAD_CANCEL_DEFERRED
>>>>>> with pthread_testcancel does not work reliably and consistently and it
>>>>>> still behaves different on my ARM and PowerPC systems. I have attached
>>>>>> my revised test program allowing to enable/disable various method of
>>>>>> thread creation, setup and cancellation. They all work fine with the
>>>>>> Linux POSIX libraries. With Xenomai, only a few work as expected on my
>>>>>> ARM and PowerPC test systems.
>>>>> Could you explain us exactly what happens
>>>> OK, with the definitions
>>>>
>>>>   //#define USE_SIGXCPU
>>>>   //#define USE_EXPLICIT_SCHED
>>>>   #define CANCEL_TYPE PTHREAD_CANCEL_DEFERRED
>>>>   //#define CANCEL_TYPE PTHREAD_CANCEL_ASYNCHRONOUS
>>>>   #define USE_TEST_CANCEL
>>>>
>>>> I get on my ARM MX31ADS system:
>>>>
>>>>   -bash-3.2# ./cancel-test
>>>>   Real-Time debugging started
>>>>   Segmentation fault
>>>>
>>>> The program behaves differently when running under gdb but the
>>>> segmentation fault happens somewhere in pthread_cancel. It works better
>>>> on my PowerPC TQM5200 system:
>>>>
>>>>   -bash-3.2# ./cancel-test
>>>>   Real-Time debugging started
>>>>   ctrl_func: started at count 0
>>>>   ctrl_func: sleeping for 2sec 500000000ns
>>>>   calc_func: counting till 50
>>>>   calc_func: at count 0
>>>>   calc_func: at count 1
>>>>   calc_func: at count 2
>>>>   calc_func: at count 3
>>>>   calc_func: at count 4
>>>>   calc_func: at count 5
>>>>   calc_func: at count 6
>>>>   calc_func: at count 7
>>>>   calc_func: at count 8
>>>>   calc_func: at count 9
>>>>   calc_func: at count 10
>>>>   calc_func: at count 11
>>>>   calc_func: at count 12
>>>>   calc_func: at count 13
>>>>   calc_func: at count 14
>>>>   calc_func: at count 15
>>>>   calc_func: at count 16
>>>>   calc_func: at count 17
>>>>   calc_func: at count 18
>>>>   calc_func: at count 19
>>>>   calc_func: at count 20
>>>>   calc_func: at count 21
>>>>   calc_func: at count 22
>>>>   ctrl_func: cancel at count 23
>>>>   ctrl_func: stopped at count 23
>>>>   main terminating in 2 seconds...
>>>>
>>>> But the messages from calc_func are display before the task gets
>>>> actually canceled, which I do not understand. On ARM, it behaves similar
>>>> if I disable explicit setting of the cancellation type:
>>>>
>>>>   //#define USE_SIGXCPU
>>>>
>>>>   //#define USE_EXPLICIT_SCHED
>>>>
>>>>   //#define CANCEL_TYPE PTHREAD_CANCEL_DEFERRED
>>>>
>>>>   //#define CANCEL_TYPE PTHREAD_CANCEL_ASYNCHRONOUS
>>>>
>>>>   #define USE_TEST_CANCEL
>>>>
>>>>
>>>> Enabling/disabling other options does not work as expected either, like
>>>> using USE_EXPLICIT_SCHED. The cancellation does then not work any more.
>>> The problem is that the way you create threads is racy, you do not know
>>> in which order the two tasks are created, and if ever calc_func is
>>> created before ctrl_func, it will use all the cpu and ctrl_func will not
>>> have a chance to interrupc calc_func.
>> I already put some sleep or ctrl-thread-is-up test before creating
>> calc_thread, which did not help. Also the output above indicates that
>> ctrl_thread did start before calc_thread.
> 
> Unless I am wrong the output above indicates that the test works... What

Yes, on PowerPC it works.

> I am talking about is the cases where the test does not work, especially
>  when USE_EXPLICIT_SCHED is not set.

Right. But I never observed something different. Here is the output from
a previous mail:

  -bash-3.2# ./cancel-test
  Real-Time debugging started
  ctrl_func: policy=1 prio=39
  ctrl_func: started at count 0
  ctrl_func: sleeping for 2sec 500000000ns
  **** nothing showed for 5 seconds ***
  calc_func: policy=1 prio=38
  calc_func: counting till 50
  calc_func: at count 0
  ...
  calc_func: at count 22
  ctrl_func: cancel at count 23
  calc_func: at count 23
  ...
  calc_func: at count 49
  calc_func: stopped at count 50

  Segmentation fault (core dumped)

Running under gdb shows:

  Program received signal SIGSEGV, Segmentation fault.
  [Switching to Thread 0x4885d4b0 (LWP 1127)]
  0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
  (gdb) where
  #0  0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
  #1  0x10001d64 in ctrl_func (parm=0x0) at cancel-test.c:104
  #2  0x0ffa98e4 in __pthread_trampoline ()
     from /home/wolf/xenomai/lib/libpthread_rt.so.1
  #3  0x0ff42a6c in start_thread () from /lib/libpthread.so.0
  #4  0x0fdd18a0 in clone () from /lib/libc.so.6
  Backtrace stopped: previous frame inner to this frame (corrupt stack?)

Is pthread_cancel used from the Linux pthread library? And
pthread_testcancel() as well?

Wolfgang.




^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-03 18:37                           ` Wolfgang Grandegger
@ 2008-12-03 18:32                             ` Gilles Chanteperdrix
  2008-12-03 18:55                               ` Wolfgang Grandegger
  0 siblings, 1 reply; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-03 18:32 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Gilles Chanteperdrix wrote:
>> Wolfgang Grandegger wrote:
>>> Running under gdb shows:
>>>
>>>   Program received signal SIGSEGV, Segmentation fault.
>>>   [Switching to Thread 0x4885d4b0 (LWP 1127)]
>>>   0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>>   (gdb) where
>>>   #0  0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>>   #1  0x10001d64 in ctrl_func (parm=0x0) at cancel-test.c:104
>>>   #2  0x0ffa98e4 in __pthread_trampoline ()
>>>      from /home/wolf/xenomai/lib/libpthread_rt.so.1
>>>   #3  0x0ff42a6c in start_thread () from /lib/libpthread.so.0
>>>   #4  0x0fdd18a0 in clone () from /lib/libc.so.6
>>>   Backtrace stopped: previous frame inner to this frame (corrupt stack?)
>>>
>>> Is pthread_cancel used from the Linux pthread library? And
>>> pthread_testcancel() as well?
>> Yes, and I guess, as you said, that it happens because calc_func is dead
>> when you try and cancel it.
> 
> Yep, but it should not crash.

The spec says:
The pthread_cancel() function may fail if:

[ESRCH]
    No thread could be found corresponding to that specified by the
given thread ID.


So, it is a "may", returning ESRCH, as Xenomai does in kernel-space, is
not mandatory.

-- 
                                                 Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-03 17:57                         ` Gilles Chanteperdrix
@ 2008-12-03 18:37                           ` Wolfgang Grandegger
  2008-12-03 18:32                             ` Gilles Chanteperdrix
  0 siblings, 1 reply; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-03 18:37 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Running under gdb shows:
>>
>>   Program received signal SIGSEGV, Segmentation fault.
>>   [Switching to Thread 0x4885d4b0 (LWP 1127)]
>>   0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>   (gdb) where
>>   #0  0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>   #1  0x10001d64 in ctrl_func (parm=0x0) at cancel-test.c:104
>>   #2  0x0ffa98e4 in __pthread_trampoline ()
>>      from /home/wolf/xenomai/lib/libpthread_rt.so.1
>>   #3  0x0ff42a6c in start_thread () from /lib/libpthread.so.0
>>   #4  0x0fdd18a0 in clone () from /lib/libc.so.6
>>   Backtrace stopped: previous frame inner to this frame (corrupt stack?)
>>
>> Is pthread_cancel used from the Linux pthread library? And
>> pthread_testcancel() as well?
> 
> Yes, and I guess, as you said, that it happens because calc_func is dead
> when you try and cancel it.

Yep, but it should not crash.

Wolfgang.



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-03 18:55                               ` Wolfgang Grandegger
@ 2008-12-03 18:55                                 ` Gilles Chanteperdrix
  2008-12-03 19:19                                   ` Wolfgang Grandegger
  0 siblings, 1 reply; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-03 18:55 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Gilles Chanteperdrix wrote:
>> Wolfgang Grandegger wrote:
>>> Gilles Chanteperdrix wrote:
>>>> Wolfgang Grandegger wrote:
>>>>> Running under gdb shows:
>>>>>
>>>>>   Program received signal SIGSEGV, Segmentation fault.
>>>>>   [Switching to Thread 0x4885d4b0 (LWP 1127)]
>>>>>   0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>>>>   (gdb) where
>>>>>   #0  0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>>>>   #1  0x10001d64 in ctrl_func (parm=0x0) at cancel-test.c:104
>>>>>   #2  0x0ffa98e4 in __pthread_trampoline ()
>>>>>      from /home/wolf/xenomai/lib/libpthread_rt.so.1
>>>>>   #3  0x0ff42a6c in start_thread () from /lib/libpthread.so.0
>>>>>   #4  0x0fdd18a0 in clone () from /lib/libc.so.6
>>>>>   Backtrace stopped: previous frame inner to this frame (corrupt stack?)
>>>>>
>>>>> Is pthread_cancel used from the Linux pthread library? And
>>>>> pthread_testcancel() as well?
>>>> Yes, and I guess, as you said, that it happens because calc_func is dead
>>>> when you try and cancel it.
>>> Yep, but it should not crash.
>> The spec says:
>> The pthread_cancel() function may fail if:
>>
>> [ESRCH]
>>     No thread could be found corresponding to that specified by the
>> given thread ID.
>>
>>
>> So, it is a "may", returning ESRCH, as Xenomai does in kernel-space, is
>> not mandatory.
> 
> I also got the return value ESRCH in another test. Nevertheless, a crash
> is not the expected behaviour, to say the least. Here pthread_cancel()
> obvoiusly get's interrupted and the calc_thread continues. Is it
> possible that pthread_cancel() switches to secondary mode?

pthread_cancel switches to secondary mode if it has to send a signal (if
cancellation is in asynchronous mode, this happens when the target
thread is blocked inside a blocking call). But this should not be a
problem with RPI.

But the problem you should focus on is why the scheduler does not let
pthread_cancel run earlier.

-- 
                                                 Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-03 18:32                             ` Gilles Chanteperdrix
@ 2008-12-03 18:55                               ` Wolfgang Grandegger
  2008-12-03 18:55                                 ` Gilles Chanteperdrix
  0 siblings, 1 reply; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-03 18:55 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Gilles Chanteperdrix wrote:
>>> Wolfgang Grandegger wrote:
>>>> Running under gdb shows:
>>>>
>>>>   Program received signal SIGSEGV, Segmentation fault.
>>>>   [Switching to Thread 0x4885d4b0 (LWP 1127)]
>>>>   0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>>>   (gdb) where
>>>>   #0  0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>>>   #1  0x10001d64 in ctrl_func (parm=0x0) at cancel-test.c:104
>>>>   #2  0x0ffa98e4 in __pthread_trampoline ()
>>>>      from /home/wolf/xenomai/lib/libpthread_rt.so.1
>>>>   #3  0x0ff42a6c in start_thread () from /lib/libpthread.so.0
>>>>   #4  0x0fdd18a0 in clone () from /lib/libc.so.6
>>>>   Backtrace stopped: previous frame inner to this frame (corrupt stack?)
>>>>
>>>> Is pthread_cancel used from the Linux pthread library? And
>>>> pthread_testcancel() as well?
>>> Yes, and I guess, as you said, that it happens because calc_func is dead
>>> when you try and cancel it.
>> Yep, but it should not crash.
> 
> The spec says:
> The pthread_cancel() function may fail if:
> 
> [ESRCH]
>     No thread could be found corresponding to that specified by the
> given thread ID.
> 
> 
> So, it is a "may", returning ESRCH, as Xenomai does in kernel-space, is
> not mandatory.

I also got the return value ESRCH in another test. Nevertheless, a crash
is not the expected behaviour, to say the least. Here pthread_cancel()
obvoiusly get's interrupted and the calc_thread continues. Is it
possible that pthread_cancel() switches to secondary mode?

Wolfgang.



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-03 18:55                                 ` Gilles Chanteperdrix
@ 2008-12-03 19:19                                   ` Wolfgang Grandegger
  2008-12-03 19:19                                     ` Gilles Chanteperdrix
  0 siblings, 1 reply; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-03 19:19 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Gilles Chanteperdrix wrote:
>>> Wolfgang Grandegger wrote:
>>>> Gilles Chanteperdrix wrote:
>>>>> Wolfgang Grandegger wrote:
>>>>>> Running under gdb shows:
>>>>>>
>>>>>>   Program received signal SIGSEGV, Segmentation fault.
>>>>>>   [Switching to Thread 0x4885d4b0 (LWP 1127)]
>>>>>>   0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>>>>>   (gdb) where
>>>>>>   #0  0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>>>>>   #1  0x10001d64 in ctrl_func (parm=0x0) at cancel-test.c:104
>>>>>>   #2  0x0ffa98e4 in __pthread_trampoline ()
>>>>>>      from /home/wolf/xenomai/lib/libpthread_rt.so.1
>>>>>>   #3  0x0ff42a6c in start_thread () from /lib/libpthread.so.0
>>>>>>   #4  0x0fdd18a0 in clone () from /lib/libc.so.6
>>>>>>   Backtrace stopped: previous frame inner to this frame (corrupt stack?)
>>>>>>
>>>>>> Is pthread_cancel used from the Linux pthread library? And
>>>>>> pthread_testcancel() as well?
>>>>> Yes, and I guess, as you said, that it happens because calc_func is dead
>>>>> when you try and cancel it.
>>>> Yep, but it should not crash.
>>> The spec says:
>>> The pthread_cancel() function may fail if:
>>>
>>> [ESRCH]
>>>     No thread could be found corresponding to that specified by the
>>> given thread ID.
>>>
>>>
>>> So, it is a "may", returning ESRCH, as Xenomai does in kernel-space, is
>>> not mandatory.
>> I also got the return value ESRCH in another test. Nevertheless, a crash
>> is not the expected behaviour, to say the least. Here pthread_cancel()
>> obvoiusly get's interrupted and the calc_thread continues. Is it
>> possible that pthread_cancel() switches to secondary mode?
> 
> pthread_cancel switches to secondary mode if it has to send a signal (if
> cancellation is in asynchronous mode, this happens when the target
> thread is blocked inside a blocking call). But this should not be a
> problem with RPI.

I disabled priority coupling in the kernel and it did not help or harm.
This test uses PTHREAD_CANCEL_DEFERRED, which is  also the default, if
I understood correctly.

> But the problem you should focus on is why the scheduler does not let
> pthread_cancel run earlier.

Don't know what you mean. The calc_func gets preempted and the ctrl_func
calls pthread_cancel as expected...

  calc_func: at count 20
  calc_func: at count 21
  calc_func: at count 22
  ctrl_func: cancel at count 23
  ^^^^^^^^^
  calc_func: at count 23

But then it stops somehow in pthread_cancel and calc_func continues to run.

Wolfgang.



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-03 19:19                                   ` Wolfgang Grandegger
@ 2008-12-03 19:19                                     ` Gilles Chanteperdrix
  2008-12-03 20:02                                       ` Wolfgang Grandegger
  0 siblings, 1 reply; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-03 19:19 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Gilles Chanteperdrix wrote:
>> Wolfgang Grandegger wrote:
>>> Gilles Chanteperdrix wrote:
>>>> Wolfgang Grandegger wrote:
>>>>> Gilles Chanteperdrix wrote:
>>>>>> Wolfgang Grandegger wrote:
>>>>>>> Running under gdb shows:
>>>>>>>
>>>>>>>   Program received signal SIGSEGV, Segmentation fault.
>>>>>>>   [Switching to Thread 0x4885d4b0 (LWP 1127)]
>>>>>>>   0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>>>>>>   (gdb) where
>>>>>>>   #0  0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>>>>>>   #1  0x10001d64 in ctrl_func (parm=0x0) at cancel-test.c:104
>>>>>>>   #2  0x0ffa98e4 in __pthread_trampoline ()
>>>>>>>      from /home/wolf/xenomai/lib/libpthread_rt.so.1
>>>>>>>   #3  0x0ff42a6c in start_thread () from /lib/libpthread.so.0
>>>>>>>   #4  0x0fdd18a0 in clone () from /lib/libc.so.6
>>>>>>>   Backtrace stopped: previous frame inner to this frame (corrupt stack?)
>>>>>>>
>>>>>>> Is pthread_cancel used from the Linux pthread library? And
>>>>>>> pthread_testcancel() as well?
>>>>>> Yes, and I guess, as you said, that it happens because calc_func is dead
>>>>>> when you try and cancel it.
>>>>> Yep, but it should not crash.
>>>> The spec says:
>>>> The pthread_cancel() function may fail if:
>>>>
>>>> [ESRCH]
>>>>     No thread could be found corresponding to that specified by the
>>>> given thread ID.
>>>>
>>>>
>>>> So, it is a "may", returning ESRCH, as Xenomai does in kernel-space, is
>>>> not mandatory.
>>> I also got the return value ESRCH in another test. Nevertheless, a crash
>>> is not the expected behaviour, to say the least. Here pthread_cancel()
>>> obvoiusly get's interrupted and the calc_thread continues. Is it
>>> possible that pthread_cancel() switches to secondary mode?
>> pthread_cancel switches to secondary mode if it has to send a signal (if
>> cancellation is in asynchronous mode, this happens when the target
>> thread is blocked inside a blocking call). But this should not be a
>> problem with RPI.
> 
> I disabled priority coupling in the kernel and it did not help or harm.
> This test uses PTHREAD_CANCEL_DEFERRED, which is  also the default, if
> I understood correctly.

You should definitely enable priority coupling. Even if you use
PTHREAD_CANCEL_DEFERRED, when you call a blocking call, the cancellation
is switched for the time of the blocking call to asynchronous. But since
you do not call any blocking call, I agree that pthread_cancel should
not switch to secondary mode, it should just set a bit in some TCB
attached to the target thread.

> 
>> But the problem you should focus on is why the scheduler does not let
>> pthread_cancel run earlier.
> 
> Don't know what you mean. The calc_func gets preempted and the ctrl_func
> calls pthread_cancel as expected...
> 
>   calc_func: at count 20
>   calc_func: at count 21
>   calc_func: at count 22
>   ctrl_func: cancel at count 23
>   ^^^^^^^^^
>   calc_func: at count 23
> 
> But then it stops somehow in pthread_cancel and calc_func continues to run.

Yes, but since "ctrl_func: stopped at count 23" does not appear, it
means that ctrl_func is somehow blocked in pthread_cancel.

Does the test work if calc_func calls nanosleep instead of
create_load_100ms ?


-- 
                                                 Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-03 19:19                                     ` Gilles Chanteperdrix
@ 2008-12-03 20:02                                       ` Wolfgang Grandegger
  2008-12-03 20:02                                         ` Gilles Chanteperdrix
  0 siblings, 1 reply; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-03 20:02 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Gilles Chanteperdrix wrote:
>>> Wolfgang Grandegger wrote:
>>>> Gilles Chanteperdrix wrote:
>>>>> Wolfgang Grandegger wrote:
>>>>>> Gilles Chanteperdrix wrote:
>>>>>>> Wolfgang Grandegger wrote:
>>>>>>>> Running under gdb shows:
>>>>>>>>
>>>>>>>>   Program received signal SIGSEGV, Segmentation fault.
>>>>>>>>   [Switching to Thread 0x4885d4b0 (LWP 1127)]
>>>>>>>>   0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>>>>>>>   (gdb) where
>>>>>>>>   #0  0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>>>>>>>   #1  0x10001d64 in ctrl_func (parm=0x0) at cancel-test.c:104
>>>>>>>>   #2  0x0ffa98e4 in __pthread_trampoline ()
>>>>>>>>      from /home/wolf/xenomai/lib/libpthread_rt.so.1
>>>>>>>>   #3  0x0ff42a6c in start_thread () from /lib/libpthread.so.0
>>>>>>>>   #4  0x0fdd18a0 in clone () from /lib/libc.so.6
>>>>>>>>   Backtrace stopped: previous frame inner to this frame (corrupt stack?)
>>>>>>>>
>>>>>>>> Is pthread_cancel used from the Linux pthread library? And
>>>>>>>> pthread_testcancel() as well?
>>>>>>> Yes, and I guess, as you said, that it happens because calc_func is dead
>>>>>>> when you try and cancel it.
>>>>>> Yep, but it should not crash.
>>>>> The spec says:
>>>>> The pthread_cancel() function may fail if:
>>>>>
>>>>> [ESRCH]
>>>>>     No thread could be found corresponding to that specified by the
>>>>> given thread ID.
>>>>>
>>>>>
>>>>> So, it is a "may", returning ESRCH, as Xenomai does in kernel-space, is
>>>>> not mandatory.
>>>> I also got the return value ESRCH in another test. Nevertheless, a crash
>>>> is not the expected behaviour, to say the least. Here pthread_cancel()
>>>> obvoiusly get's interrupted and the calc_thread continues. Is it
>>>> possible that pthread_cancel() switches to secondary mode?
>>> pthread_cancel switches to secondary mode if it has to send a signal (if
>>> cancellation is in asynchronous mode, this happens when the target
>>> thread is blocked inside a blocking call). But this should not be a
>>> problem with RPI.
>> I disabled priority coupling in the kernel and it did not help or harm.
>> This test uses PTHREAD_CANCEL_DEFERRED, which is  also the default, if
>> I understood correctly.
> 
> You should definitely enable priority coupling. Even if you use
> PTHREAD_CANCEL_DEFERRED, when you call a blocking call, the cancellation
> is switched for the time of the blocking call to asynchronous. But since
> you do not call any blocking call, I agree that pthread_cancel should
> not switch to secondary mode, it should just set a bit in some TCB
> attached to the target thread.
> 
>>> But the problem you should focus on is why the scheduler does not let
>>> pthread_cancel run earlier.
>> Don't know what you mean. The calc_func gets preempted and the ctrl_func
>> calls pthread_cancel as expected...
>>
>>   calc_func: at count 20
>>   calc_func: at count 21
>>   calc_func: at count 22
>>   ctrl_func: cancel at count 23
>>   ^^^^^^^^^
>>   calc_func: at count 23
>>
>> But then it stops somehow in pthread_cancel and calc_func continues to run.
> 
> Yes, but since "ctrl_func: stopped at count 23" does not appear, it
> means that ctrl_func is somehow blocked in pthread_cancel.
> 
> Does the test work if calc_func calls nanosleep instead of
> create_load_100ms ?

Yes.

I'm getting closer now, I think, I hope. pthread_cancel seems only to
work if calc_thread runs in secondary mode. If I set policy and priority
at the beginning of the thread function, nor pthread_setschedparam nor
clock_gettime switches to primary mode and therefore calc_thread runs in
secondary mode. If I add explicit
pthread_set_mode_np(0, PTHREAD_PRIMARY), pthread_cancel is not able to
terminate the calc_thread anymore, even with pthread_testcancel.

Wolfgang.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-03 20:02                                       ` Wolfgang Grandegger
@ 2008-12-03 20:02                                         ` Gilles Chanteperdrix
  2008-12-04 15:29                                           ` Wolfgang Grandegger
  0 siblings, 1 reply; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-03 20:02 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Gilles Chanteperdrix wrote:
>> Wolfgang Grandegger wrote:
>>> Gilles Chanteperdrix wrote:
>>>> Wolfgang Grandegger wrote:
>>>>> Gilles Chanteperdrix wrote:
>>>>>> Wolfgang Grandegger wrote:
>>>>>>> Gilles Chanteperdrix wrote:
>>>>>>>> Wolfgang Grandegger wrote:
>>>>>>>>> Running under gdb shows:
>>>>>>>>>
>>>>>>>>>   Program received signal SIGSEGV, Segmentation fault.
>>>>>>>>>   [Switching to Thread 0x4885d4b0 (LWP 1127)]
>>>>>>>>>   0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>>>>>>>>   (gdb) where
>>>>>>>>>   #0  0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>>>>>>>>   #1  0x10001d64 in ctrl_func (parm=0x0) at cancel-test.c:104
>>>>>>>>>   #2  0x0ffa98e4 in __pthread_trampoline ()
>>>>>>>>>      from /home/wolf/xenomai/lib/libpthread_rt.so.1
>>>>>>>>>   #3  0x0ff42a6c in start_thread () from /lib/libpthread.so.0
>>>>>>>>>   #4  0x0fdd18a0 in clone () from /lib/libc.so.6
>>>>>>>>>   Backtrace stopped: previous frame inner to this frame (corrupt stack?)
>>>>>>>>>
>>>>>>>>> Is pthread_cancel used from the Linux pthread library? And
>>>>>>>>> pthread_testcancel() as well?
>>>>>>>> Yes, and I guess, as you said, that it happens because calc_func is dead
>>>>>>>> when you try and cancel it.
>>>>>>> Yep, but it should not crash.
>>>>>> The spec says:
>>>>>> The pthread_cancel() function may fail if:
>>>>>>
>>>>>> [ESRCH]
>>>>>>     No thread could be found corresponding to that specified by the
>>>>>> given thread ID.
>>>>>>
>>>>>>
>>>>>> So, it is a "may", returning ESRCH, as Xenomai does in kernel-space, is
>>>>>> not mandatory.
>>>>> I also got the return value ESRCH in another test. Nevertheless, a crash
>>>>> is not the expected behaviour, to say the least. Here pthread_cancel()
>>>>> obvoiusly get's interrupted and the calc_thread continues. Is it
>>>>> possible that pthread_cancel() switches to secondary mode?
>>>> pthread_cancel switches to secondary mode if it has to send a signal (if
>>>> cancellation is in asynchronous mode, this happens when the target
>>>> thread is blocked inside a blocking call). But this should not be a
>>>> problem with RPI.
>>> I disabled priority coupling in the kernel and it did not help or harm.
>>> This test uses PTHREAD_CANCEL_DEFERRED, which is  also the default, if
>>> I understood correctly.
>> You should definitely enable priority coupling. Even if you use
>> PTHREAD_CANCEL_DEFERRED, when you call a blocking call, the cancellation
>> is switched for the time of the blocking call to asynchronous. But since
>> you do not call any blocking call, I agree that pthread_cancel should
>> not switch to secondary mode, it should just set a bit in some TCB
>> attached to the target thread.
>>
>>>> But the problem you should focus on is why the scheduler does not let
>>>> pthread_cancel run earlier.
>>> Don't know what you mean. The calc_func gets preempted and the ctrl_func
>>> calls pthread_cancel as expected...
>>>
>>>   calc_func: at count 20
>>>   calc_func: at count 21
>>>   calc_func: at count 22
>>>   ctrl_func: cancel at count 23
>>>   ^^^^^^^^^
>>>   calc_func: at count 23
>>>
>>> But then it stops somehow in pthread_cancel and calc_func continues to run.
>> Yes, but since "ctrl_func: stopped at count 23" does not appear, it
>> means that ctrl_func is somehow blocked in pthread_cancel.
>>
>> Does the test work if calc_func calls nanosleep instead of
>> create_load_100ms ?
> 
> Yes.

So, pthread_cancel works even for threads running in primary mode, when
they issue xenomai syscalls.

> 
> I'm getting closer now, I think, I hope. pthread_cancel seems only to
> work if calc_thread runs in secondary mode. If I set policy and priority
> at the beginning of the thread function, nor pthread_setschedparam nor
> clock_gettime switches to primary mode and therefore calc_thread runs in
> secondary mode. If I add explicit
> pthread_set_mode_np(0, PTHREAD_PRIMARY), pthread_cancel is not able to
> terminate the calc_thread anymore, even with pthread_testcancel.

That is not expected. But this brings me back to my initial question, do
you have to work with a real world application that runs without issuing
any syscall ?

-- 
					    Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-03 20:02                                         ` Gilles Chanteperdrix
@ 2008-12-04 15:29                                           ` Wolfgang Grandegger
  2008-12-04 15:38                                             ` Gilles Chanteperdrix
  0 siblings, 1 reply; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-04 15:29 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Gilles Chanteperdrix wrote:
>>> Wolfgang Grandegger wrote:
>>>> Gilles Chanteperdrix wrote:
>>>>> Wolfgang Grandegger wrote:
>>>>>> Gilles Chanteperdrix wrote:
>>>>>>> Wolfgang Grandegger wrote:
>>>>>>>> Gilles Chanteperdrix wrote:
>>>>>>>>> Wolfgang Grandegger wrote:
>>>>>>>>>> Running under gdb shows:
>>>>>>>>>>
>>>>>>>>>>   Program received signal SIGSEGV, Segmentation fault.
>>>>>>>>>>   [Switching to Thread 0x4885d4b0 (LWP 1127)]
>>>>>>>>>>   0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>>>>>>>>>   (gdb) where
>>>>>>>>>>   #0  0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>>>>>>>>>   #1  0x10001d64 in ctrl_func (parm=0x0) at cancel-test.c:104
>>>>>>>>>>   #2  0x0ffa98e4 in __pthread_trampoline ()
>>>>>>>>>>      from /home/wolf/xenomai/lib/libpthread_rt.so.1
>>>>>>>>>>   #3  0x0ff42a6c in start_thread () from /lib/libpthread.so.0
>>>>>>>>>>   #4  0x0fdd18a0 in clone () from /lib/libc.so.6
>>>>>>>>>>   Backtrace stopped: previous frame inner to this frame (corrupt stack?)
>>>>>>>>>>
>>>>>>>>>> Is pthread_cancel used from the Linux pthread library? And
>>>>>>>>>> pthread_testcancel() as well?
>>>>>>>>> Yes, and I guess, as you said, that it happens because calc_func is dead
>>>>>>>>> when you try and cancel it.
>>>>>>>> Yep, but it should not crash.
>>>>>>> The spec says:
>>>>>>> The pthread_cancel() function may fail if:
>>>>>>>
>>>>>>> [ESRCH]
>>>>>>>     No thread could be found corresponding to that specified by the
>>>>>>> given thread ID.
>>>>>>>
>>>>>>>
>>>>>>> So, it is a "may", returning ESRCH, as Xenomai does in kernel-space, is
>>>>>>> not mandatory.
>>>>>> I also got the return value ESRCH in another test. Nevertheless, a crash
>>>>>> is not the expected behaviour, to say the least. Here pthread_cancel()
>>>>>> obvoiusly get's interrupted and the calc_thread continues. Is it
>>>>>> possible that pthread_cancel() switches to secondary mode?
>>>>> pthread_cancel switches to secondary mode if it has to send a signal (if
>>>>> cancellation is in asynchronous mode, this happens when the target
>>>>> thread is blocked inside a blocking call). But this should not be a
>>>>> problem with RPI.
>>>> I disabled priority coupling in the kernel and it did not help or harm.
>>>> This test uses PTHREAD_CANCEL_DEFERRED, which is  also the default, if
>>>> I understood correctly.
>>> You should definitely enable priority coupling. Even if you use
>>> PTHREAD_CANCEL_DEFERRED, when you call a blocking call, the cancellation
>>> is switched for the time of the blocking call to asynchronous. But since
>>> you do not call any blocking call, I agree that pthread_cancel should
>>> not switch to secondary mode, it should just set a bit in some TCB
>>> attached to the target thread.
>>>
>>>>> But the problem you should focus on is why the scheduler does not let
>>>>> pthread_cancel run earlier.
>>>> Don't know what you mean. The calc_func gets preempted and the ctrl_func
>>>> calls pthread_cancel as expected...
>>>>
>>>>   calc_func: at count 20
>>>>   calc_func: at count 21
>>>>   calc_func: at count 22
>>>>   ctrl_func: cancel at count 23
>>>>   ^^^^^^^^^
>>>>   calc_func: at count 23
>>>>
>>>> But then it stops somehow in pthread_cancel and calc_func continues to run.
>>> Yes, but since "ctrl_func: stopped at count 23" does not appear, it
>>> means that ctrl_func is somehow blocked in pthread_cancel.
>>>
>>> Does the test work if calc_func calls nanosleep instead of
>>> create_load_100ms ?
>> Yes.
> 
> So, pthread_cancel works even for threads running in primary mode, when
> they issue xenomai syscalls.
> 
>> I'm getting closer now, I think, I hope. pthread_cancel seems only to
>> work if calc_thread runs in secondary mode. If I set policy and priority
>> at the beginning of the thread function, nor pthread_setschedparam nor
>> clock_gettime switches to primary mode and therefore calc_thread runs in
>> secondary mode. If I add explicit
>> pthread_set_mode_np(0, PTHREAD_PRIMARY), pthread_cancel is not able to
>> terminate the calc_thread anymore, even with pthread_testcancel.
> 
> That is not expected. But this brings me back to my initial question, do
> you have to work with a real world application that runs without issuing
> any syscall ?

If a add long nanosleeps, e.g. 100, 10 or 1 ms, cancellation works but
it fails with short nanosleeps. A syscall seems not sufficient. I have
the impression that pthread_cancel needs some time in secondary mode to
do it's duties, e.g. mark the thread as canceled. Would it make sense to
wrap pthread_cancel, and friends to the corresponding kernel functions
in ksrc/skins/posix?
It might happen that an application does not block due to overload. Is
there a way to force a thread switching to secondary mode?

Wolfgang.




^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-04 15:29                                           ` Wolfgang Grandegger
@ 2008-12-04 15:38                                             ` Gilles Chanteperdrix
  2008-12-04 15:42                                               ` Gilles Chanteperdrix
  2008-12-04 16:31                                               ` Wolfgang Grandegger
  0 siblings, 2 replies; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-04 15:38 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Gilles Chanteperdrix wrote:
>> Wolfgang Grandegger wrote:
>>> Gilles Chanteperdrix wrote:
>>>> Wolfgang Grandegger wrote:
>>>>> Gilles Chanteperdrix wrote:
>>>>>> Wolfgang Grandegger wrote:
>>>>>>> Gilles Chanteperdrix wrote:
>>>>>>>> Wolfgang Grandegger wrote:
>>>>>>>>> Gilles Chanteperdrix wrote:
>>>>>>>>>> Wolfgang Grandegger wrote:
>>>>>>>>>>> Running under gdb shows:
>>>>>>>>>>>
>>>>>>>>>>>   Program received signal SIGSEGV, Segmentation fault.
>>>>>>>>>>>   [Switching to Thread 0x4885d4b0 (LWP 1127)]
>>>>>>>>>>>   0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>>>>>>>>>>   (gdb) where
>>>>>>>>>>>   #0  0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>>>>>>>>>>   #1  0x10001d64 in ctrl_func (parm=0x0) at cancel-test.c:104
>>>>>>>>>>>   #2  0x0ffa98e4 in __pthread_trampoline ()
>>>>>>>>>>>      from /home/wolf/xenomai/lib/libpthread_rt.so.1
>>>>>>>>>>>   #3  0x0ff42a6c in start_thread () from /lib/libpthread.so.0
>>>>>>>>>>>   #4  0x0fdd18a0 in clone () from /lib/libc.so.6
>>>>>>>>>>>   Backtrace stopped: previous frame inner to this frame (corrupt stack?)
>>>>>>>>>>>
>>>>>>>>>>> Is pthread_cancel used from the Linux pthread library? And
>>>>>>>>>>> pthread_testcancel() as well?
>>>>>>>>>> Yes, and I guess, as you said, that it happens because calc_func is dead
>>>>>>>>>> when you try and cancel it.
>>>>>>>>> Yep, but it should not crash.
>>>>>>>> The spec says:
>>>>>>>> The pthread_cancel() function may fail if:
>>>>>>>>
>>>>>>>> [ESRCH]
>>>>>>>>     No thread could be found corresponding to that specified by the
>>>>>>>> given thread ID.
>>>>>>>>
>>>>>>>>
>>>>>>>> So, it is a "may", returning ESRCH, as Xenomai does in kernel-space, is
>>>>>>>> not mandatory.
>>>>>>> I also got the return value ESRCH in another test. Nevertheless, a crash
>>>>>>> is not the expected behaviour, to say the least. Here pthread_cancel()
>>>>>>> obvoiusly get's interrupted and the calc_thread continues. Is it
>>>>>>> possible that pthread_cancel() switches to secondary mode?
>>>>>> pthread_cancel switches to secondary mode if it has to send a signal (if
>>>>>> cancellation is in asynchronous mode, this happens when the target
>>>>>> thread is blocked inside a blocking call). But this should not be a
>>>>>> problem with RPI.
>>>>> I disabled priority coupling in the kernel and it did not help or harm.
>>>>> This test uses PTHREAD_CANCEL_DEFERRED, which is  also the default, if
>>>>> I understood correctly.
>>>> You should definitely enable priority coupling. Even if you use
>>>> PTHREAD_CANCEL_DEFERRED, when you call a blocking call, the cancellation
>>>> is switched for the time of the blocking call to asynchronous. But since
>>>> you do not call any blocking call, I agree that pthread_cancel should
>>>> not switch to secondary mode, it should just set a bit in some TCB
>>>> attached to the target thread.
>>>>
>>>>>> But the problem you should focus on is why the scheduler does not let
>>>>>> pthread_cancel run earlier.
>>>>> Don't know what you mean. The calc_func gets preempted and the ctrl_func
>>>>> calls pthread_cancel as expected...
>>>>>
>>>>>   calc_func: at count 20
>>>>>   calc_func: at count 21
>>>>>   calc_func: at count 22
>>>>>   ctrl_func: cancel at count 23
>>>>>   ^^^^^^^^^
>>>>>   calc_func: at count 23
>>>>>
>>>>> But then it stops somehow in pthread_cancel and calc_func continues to run.
>>>> Yes, but since "ctrl_func: stopped at count 23" does not appear, it
>>>> means that ctrl_func is somehow blocked in pthread_cancel.
>>>>
>>>> Does the test work if calc_func calls nanosleep instead of
>>>> create_load_100ms ?
>>> Yes.
>> So, pthread_cancel works even for threads running in primary mode, when
>> they issue xenomai syscalls.
>>
>>> I'm getting closer now, I think, I hope. pthread_cancel seems only to
>>> work if calc_thread runs in secondary mode. If I set policy and priority
>>> at the beginning of the thread function, nor pthread_setschedparam nor
>>> clock_gettime switches to primary mode and therefore calc_thread runs in
>>> secondary mode. If I add explicit
>>> pthread_set_mode_np(0, PTHREAD_PRIMARY), pthread_cancel is not able to
>>> terminate the calc_thread anymore, even with pthread_testcancel.
>> That is not expected. But this brings me back to my initial question, do
>> you have to work with a real world application that runs without issuing
>> any syscall ?
> 
> If a add long nanosleeps, e.g. 100, 10 or 1 ms, cancellation works but
> it fails with short nanosleeps. A syscall seems not sufficient. I have
> the impression that pthread_cancel needs some time in secondary mode to

When calling nanosleep, the threads spends on time in secondary mode. I
think the problem is rather that only asynchronous cancelation (meaning
cancelation with a signal) works. Setting the cancelation bit somehow
gets lost.

> do it's duties, e.g. mark the thread as canceled. Would it make sense to
> wrap pthread_cancel, and friends to the corresponding kernel functions
> in ksrc/skins/posix?
> Is there a way to force a thread switching to secondary mode?

No, there is no way to force a thread to switch to secondary mode, the
xnshadow_relax call explicitely requires to be called by the target
thread. Before I wrap pthread_cancel, I would really like to understand
why setting a bit with pthread_cancel and testing it with
pthread_testcancel does not work.

What is the trace of your test when run:
- on ARM
- with root thread priority inheritance,
- with USE_EXPLICIT_SCHED and USE_TEST_CANCEL, and CANCEL_TYPE set to
PTHREAD_CANCEL_DEFERRED
- posting a semaphore in ctrl_func before calling nanosleep, and waiting
for that semaphore in main before creating the calc_func thread.

> It might happen that an application does not block due to overload.

IMO, we do not care much about these cases, the watchdog is there to
catch them.

-- 
                                                 Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-04 15:38                                             ` Gilles Chanteperdrix
@ 2008-12-04 15:42                                               ` Gilles Chanteperdrix
  2008-12-04 16:31                                               ` Wolfgang Grandegger
  1 sibling, 0 replies; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-04 15:42 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Gilles Chanteperdrix wrote:
>>> Wolfgang Grandegger wrote:
>>>> Gilles Chanteperdrix wrote:
>>>>> Wolfgang Grandegger wrote:
>>>>>> Gilles Chanteperdrix wrote:
>>>>>>> Wolfgang Grandegger wrote:
>>>>>>>> Gilles Chanteperdrix wrote:
>>>>>>>>> Wolfgang Grandegger wrote:
>>>>>>>>>> Gilles Chanteperdrix wrote:
>>>>>>>>>>> Wolfgang Grandegger wrote:
>>>>>>>>>>>> Running under gdb shows:
>>>>>>>>>>>>
>>>>>>>>>>>>   Program received signal SIGSEGV, Segmentation fault.
>>>>>>>>>>>>   [Switching to Thread 0x4885d4b0 (LWP 1127)]
>>>>>>>>>>>>   0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>>>>>>>>>>>   (gdb) where
>>>>>>>>>>>>   #0  0x0ff49100 in pthread_cancel () from /lib/libpthread.so.0
>>>>>>>>>>>>   #1  0x10001d64 in ctrl_func (parm=0x0) at cancel-test.c:104
>>>>>>>>>>>>   #2  0x0ffa98e4 in __pthread_trampoline ()
>>>>>>>>>>>>      from /home/wolf/xenomai/lib/libpthread_rt.so.1
>>>>>>>>>>>>   #3  0x0ff42a6c in start_thread () from /lib/libpthread.so.0
>>>>>>>>>>>>   #4  0x0fdd18a0 in clone () from /lib/libc.so.6
>>>>>>>>>>>>   Backtrace stopped: previous frame inner to this frame (corrupt stack?)
>>>>>>>>>>>>
>>>>>>>>>>>> Is pthread_cancel used from the Linux pthread library? And
>>>>>>>>>>>> pthread_testcancel() as well?
>>>>>>>>>>> Yes, and I guess, as you said, that it happens because calc_func is dead
>>>>>>>>>>> when you try and cancel it.
>>>>>>>>>> Yep, but it should not crash.
>>>>>>>>> The spec says:
>>>>>>>>> The pthread_cancel() function may fail if:
>>>>>>>>>
>>>>>>>>> [ESRCH]
>>>>>>>>>     No thread could be found corresponding to that specified by the
>>>>>>>>> given thread ID.
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> So, it is a "may", returning ESRCH, as Xenomai does in kernel-space, is
>>>>>>>>> not mandatory.
>>>>>>>> I also got the return value ESRCH in another test. Nevertheless, a crash
>>>>>>>> is not the expected behaviour, to say the least. Here pthread_cancel()
>>>>>>>> obvoiusly get's interrupted and the calc_thread continues. Is it
>>>>>>>> possible that pthread_cancel() switches to secondary mode?
>>>>>>> pthread_cancel switches to secondary mode if it has to send a signal (if
>>>>>>> cancellation is in asynchronous mode, this happens when the target
>>>>>>> thread is blocked inside a blocking call). But this should not be a
>>>>>>> problem with RPI.
>>>>>> I disabled priority coupling in the kernel and it did not help or harm.
>>>>>> This test uses PTHREAD_CANCEL_DEFERRED, which is  also the default, if
>>>>>> I understood correctly.
>>>>> You should definitely enable priority coupling. Even if you use
>>>>> PTHREAD_CANCEL_DEFERRED, when you call a blocking call, the cancellation
>>>>> is switched for the time of the blocking call to asynchronous. But since
>>>>> you do not call any blocking call, I agree that pthread_cancel should
>>>>> not switch to secondary mode, it should just set a bit in some TCB
>>>>> attached to the target thread.
>>>>>
>>>>>>> But the problem you should focus on is why the scheduler does not let
>>>>>>> pthread_cancel run earlier.
>>>>>> Don't know what you mean. The calc_func gets preempted and the ctrl_func
>>>>>> calls pthread_cancel as expected...
>>>>>>
>>>>>>   calc_func: at count 20
>>>>>>   calc_func: at count 21
>>>>>>   calc_func: at count 22
>>>>>>   ctrl_func: cancel at count 23
>>>>>>   ^^^^^^^^^
>>>>>>   calc_func: at count 23
>>>>>>
>>>>>> But then it stops somehow in pthread_cancel and calc_func continues to run.
>>>>> Yes, but since "ctrl_func: stopped at count 23" does not appear, it
>>>>> means that ctrl_func is somehow blocked in pthread_cancel.
>>>>>
>>>>> Does the test work if calc_func calls nanosleep instead of
>>>>> create_load_100ms ?
>>>> Yes.
>>> So, pthread_cancel works even for threads running in primary mode, when
>>> they issue xenomai syscalls.
>>>
>>>> I'm getting closer now, I think, I hope. pthread_cancel seems only to
>>>> work if calc_thread runs in secondary mode. If I set policy and priority
>>>> at the beginning of the thread function, nor pthread_setschedparam nor
>>>> clock_gettime switches to primary mode and therefore calc_thread runs in
>>>> secondary mode. If I add explicit
>>>> pthread_set_mode_np(0, PTHREAD_PRIMARY), pthread_cancel is not able to
>>>> terminate the calc_thread anymore, even with pthread_testcancel.
>>> That is not expected. But this brings me back to my initial question, do
>>> you have to work with a real world application that runs without issuing
>>> any syscall ?
>> If a add long nanosleeps, e.g. 100, 10 or 1 ms, cancellation works but
>> it fails with short nanosleeps. A syscall seems not sufficient. I have
>> the impression that pthread_cancel needs some time in secondary mode to
> 
> When calling nanosleep, the threads spends on time in secondary mode. I
> think the problem is rather that only asynchronous cancelation (meaning
> cancelation with a signal) works. Setting the cancelation bit somehow
> gets lost.
> 
>> do it's duties, e.g. mark the thread as canceled. Would it make sense to
>> wrap pthread_cancel, and friends to the corresponding kernel functions
>> in ksrc/skins/posix?
>> Is there a way to force a thread switching to secondary mode?
> 
> No, there is no way to force a thread to switch to secondary mode, the
> xnshadow_relax call explicitely requires to be called by the target
> thread. Before I wrap pthread_cancel, I would really like to understand
> why setting a bit with pthread_cancel and testing it with
> pthread_testcancel does not work.
> 
> What is the trace of your test when run:
> - on ARM

by the way, could not there be a NPTL vs linuxthreads difference between
ARM and powerpc ?

-- 
                                                 Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-04 16:31                                               ` Wolfgang Grandegger
@ 2008-12-04 16:26                                                 ` Gilles Chanteperdrix
  2008-12-04 16:49                                                   ` Wolfgang Grandegger
  0 siblings, 1 reply; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-04 16:26 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Gilles Chanteperdrix wrote:
>> Wolfgang Grandegger wrote:
>>> Gilles Chanteperdrix wrote:
> [...deletions...]
>>>> That is not expected. But this brings me back to my initial question, do
>>>> you have to work with a real world application that runs without issuing
>>>> any syscall ?
>>> If a add long nanosleeps, e.g. 100, 10 or 1 ms, cancellation works but
>>> it fails with short nanosleeps. A syscall seems not sufficient. I have
>>> the impression that pthread_cancel needs some time in secondary mode to
>> When calling nanosleep, the threads spends on time in secondary mode. I
> 
> What do you mean? nanosleep switches to secondary mode?

I meant spends "no" time in secondary mode. nanosleep does not switch to
secondary mode.

> 
>> think the problem is rather that only asynchronous cancelation (meaning
>> cancelation with a signal) works. Setting the cancelation bit somehow
>> gets lost.
>>
>>> do it's duties, e.g. mark the thread as canceled. Would it make sense to
>>> wrap pthread_cancel, and friends to the corresponding kernel functions
>>> in ksrc/skins/posix?
>>> Is there a way to force a thread switching to secondary mode?
>> No, there is no way to force a thread to switch to secondary mode, the
>> xnshadow_relax call explicitely requires to be called by the target
>> thread. Before I wrap pthread_cancel, I would really like to understand
>> why setting a bit with pthread_cancel and testing it with
>> pthread_testcancel does not work.
>>
>> What is the trace of your test when run:
>> - on ARM
>> - with root thread priority inheritance,
>> - with USE_EXPLICIT_SCHED and USE_TEST_CANCEL, and CANCEL_TYPE set to
>> PTHREAD_CANCEL_DEFERRED
>> - posting a semaphore in ctrl_func before calling nanosleep, and waiting
>> for that semaphore in main before creating the calc_func thread.
> 
> The calc_thread is not canceled and a segmentation fault happens.
> Canceling the calc_thread does *not* work if it's running in primrary
> mode, nor on ARM, nor on PowerPC. It does work when calc_thread does run
> in secondary mode, which happened with USE_EXPLICIT_SCHED not set. If
> you want do to some tests, I could send you my revised test program.
> My conclusion is that pthread_cancel can not be used to cancel busy
> realtime threads. I'm going to re-write the program using the native
> skin and rt_task_delete.

Yes, but what interests me is how it fails, that is seeing the trace of
your program.

-- 
                                                 Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-04 15:38                                             ` Gilles Chanteperdrix
  2008-12-04 15:42                                               ` Gilles Chanteperdrix
@ 2008-12-04 16:31                                               ` Wolfgang Grandegger
  2008-12-04 16:26                                                 ` Gilles Chanteperdrix
  1 sibling, 1 reply; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-04 16:31 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Gilles Chanteperdrix wrote:
[...deletions...]
>>> That is not expected. But this brings me back to my initial question, do
>>> you have to work with a real world application that runs without issuing
>>> any syscall ?
>> If a add long nanosleeps, e.g. 100, 10 or 1 ms, cancellation works but
>> it fails with short nanosleeps. A syscall seems not sufficient. I have
>> the impression that pthread_cancel needs some time in secondary mode to
> 
> When calling nanosleep, the threads spends on time in secondary mode. I

What do you mean? nanosleep switches to secondary mode?

> think the problem is rather that only asynchronous cancelation (meaning
> cancelation with a signal) works. Setting the cancelation bit somehow
> gets lost.
> 
>> do it's duties, e.g. mark the thread as canceled. Would it make sense to
>> wrap pthread_cancel, and friends to the corresponding kernel functions
>> in ksrc/skins/posix?
>> Is there a way to force a thread switching to secondary mode?
> 
> No, there is no way to force a thread to switch to secondary mode, the
> xnshadow_relax call explicitely requires to be called by the target
> thread. Before I wrap pthread_cancel, I would really like to understand
> why setting a bit with pthread_cancel and testing it with
> pthread_testcancel does not work.
> 
> What is the trace of your test when run:
> - on ARM
> - with root thread priority inheritance,
> - with USE_EXPLICIT_SCHED and USE_TEST_CANCEL, and CANCEL_TYPE set to
> PTHREAD_CANCEL_DEFERRED
> - posting a semaphore in ctrl_func before calling nanosleep, and waiting
> for that semaphore in main before creating the calc_func thread.

The calc_thread is not canceled and a segmentation fault happens.
Canceling the calc_thread does *not* work if it's running in primrary
mode, nor on ARM, nor on PowerPC. It does work when calc_thread does run
in secondary mode, which happened with USE_EXPLICIT_SCHED not set. If
you want do to some tests, I could send you my revised test program.
My conclusion is that pthread_cancel can not be used to cancel busy
realtime threads. I'm going to re-write the program using the native
skin and rt_task_delete.

>> It might happen that an application does not block due to overload.
> 
> IMO, we do not care much about these cases, the watchdog is there to
> catch them.

But then pthread_cancel is not really useful. Unfortunately, this
application needs some special measures to allow Linux to breath,
especially in the initialization phase, e.g. with the SCHED_SPORADIC.

Thanks.

Wolfgang.




^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-04 16:26                                                 ` Gilles Chanteperdrix
@ 2008-12-04 16:49                                                   ` Wolfgang Grandegger
  2008-12-04 17:02                                                     ` Gilles Chanteperdrix
  0 siblings, 1 reply; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-04 16:49 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Gilles Chanteperdrix wrote:
>>> Wolfgang Grandegger wrote:
>>>> Gilles Chanteperdrix wrote:
>> [...deletions...]
>>>>> That is not expected. But this brings me back to my initial question, do
>>>>> you have to work with a real world application that runs without issuing
>>>>> any syscall ?
>>>> If a add long nanosleeps, e.g. 100, 10 or 1 ms, cancellation works but
>>>> it fails with short nanosleeps. A syscall seems not sufficient. I have
>>>> the impression that pthread_cancel needs some time in secondary mode to
>>> When calling nanosleep, the threads spends on time in secondary mode. I
>> What do you mean? nanosleep switches to secondary mode?
> 
> I meant spends "no" time in secondary mode. nanosleep does not switch to
> secondary mode.
> 
>>> think the problem is rather that only asynchronous cancelation (meaning
>>> cancelation with a signal) works. Setting the cancelation bit somehow
>>> gets lost.
>>>
>>>> do it's duties, e.g. mark the thread as canceled. Would it make sense to
>>>> wrap pthread_cancel, and friends to the corresponding kernel functions
>>>> in ksrc/skins/posix?
>>>> Is there a way to force a thread switching to secondary mode?
>>> No, there is no way to force a thread to switch to secondary mode, the
>>> xnshadow_relax call explicitely requires to be called by the target
>>> thread. Before I wrap pthread_cancel, I would really like to understand
>>> why setting a bit with pthread_cancel and testing it with
>>> pthread_testcancel does not work.
>>>
>>> What is the trace of your test when run:
>>> - on ARM
>>> - with root thread priority inheritance,
>>> - with USE_EXPLICIT_SCHED and USE_TEST_CANCEL, and CANCEL_TYPE set to
>>> PTHREAD_CANCEL_DEFERRED
>>> - posting a semaphore in ctrl_func before calling nanosleep, and waiting
>>> for that semaphore in main before creating the calc_func thread.
>> The calc_thread is not canceled and a segmentation fault happens.
>> Canceling the calc_thread does *not* work if it's running in primrary
>> mode, nor on ARM, nor on PowerPC. It does work when calc_thread does run
>> in secondary mode, which happened with USE_EXPLICIT_SCHED not set. If
>> you want do to some tests, I could send you my revised test program.
>> My conclusion is that pthread_cancel can not be used to cancel busy
>> realtime threads. I'm going to re-write the program using the native
>> skin and rt_task_delete.
> 
> Yes, but what interests me is how it fails, that is seeing the trace of
> your program.

With pthread_cancel you do not see too much because the rt_printf
messages do not get printed:

-bash-3.2# ./cancel-test
Real-Time debugging started
ctrl_func: started at count 0
Now starting calc_thread (ctrl_started=1)
Segmentation fault

If I comment out pthread_cancel, I get:

-bash-3.2# ./cancel-test
Real-Time debugging started
ctrl_func: started at count 0
Now starting calc_thread (ctrl_started=1)
calc_func: counting till 50
ctrl_func: sleeping for 2sec 500000000ns
calc_func: at count 0
calc_func: at count 1
calc_func: at count 2
calc_func: at count 3
calc_func: at count 4
calc_func: at count 5
calc_func: at count 6
calc_func: at count 7
calc_func: at count 8
calc_func: at count 9
calc_func: at count 10
calc_func: at count 11
calc_func: at count 12
calc_func: at count 13
calc_func: at count 14
calc_func: at count 15
calc_func: at count 16
calc_func: at count 17
calc_func: at count 18
calc_func: at count 19
calc_func: at count 20
calc_func: at count 21
calc_func: at count 22
calc_func: at count 23
ctrl_func: cancel at count 24
ctrl_func: stopped at count 24
calc_func: at count 24
calc_func: at count 25
calc_func: at count 26
calc_func: at count 27
calc_func: at count 28
calc_func: at count 29
calc_func: at count 30
calc_func: at count 31
calc_func: at count 32
calc_func: at count 33
calc_func: at count 34
calc_func: at count 35
calc_func: at count 36
calc_func: at count 37
calc_func: at count 38
calc_func: at count 39
calc_func: at count 40
calc_func: at count 41
calc_func: at count 42
calc_func: at count 43
calc_func: at count 44
calc_func: at count 45
calc_func: at count 46
calc_func: at count 47
calc_func: at count 48
calc_func: at count 49
calc_func: stopped at count 50
Unexpected thread status
main terminating in 2 seconds...

Wolfgang.



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-04 16:49                                                   ` Wolfgang Grandegger
@ 2008-12-04 17:02                                                     ` Gilles Chanteperdrix
  2008-12-04 17:52                                                       ` Wolfgang Grandegger
  2008-12-05 14:58                                                       ` Wolfgang Grandegger
  0 siblings, 2 replies; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-04 17:02 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Gilles Chanteperdrix wrote:
>> Wolfgang Grandegger wrote:
>>> Gilles Chanteperdrix wrote:
>>>> Wolfgang Grandegger wrote:
>>>>> Gilles Chanteperdrix wrote:
>>> [...deletions...]
>>>>>> That is not expected. But this brings me back to my initial question, do
>>>>>> you have to work with a real world application that runs without issuing
>>>>>> any syscall ?
>>>>> If a add long nanosleeps, e.g. 100, 10 or 1 ms, cancellation works but
>>>>> it fails with short nanosleeps. A syscall seems not sufficient. I have
>>>>> the impression that pthread_cancel needs some time in secondary mode to
>>>> When calling nanosleep, the threads spends on time in secondary mode. I
>>> What do you mean? nanosleep switches to secondary mode?
>> I meant spends "no" time in secondary mode. nanosleep does not switch to
>> secondary mode.
>>
>>>> think the problem is rather that only asynchronous cancelation (meaning
>>>> cancelation with a signal) works. Setting the cancelation bit somehow
>>>> gets lost.
>>>>
>>>>> do it's duties, e.g. mark the thread as canceled. Would it make sense to
>>>>> wrap pthread_cancel, and friends to the corresponding kernel functions
>>>>> in ksrc/skins/posix?
>>>>> Is there a way to force a thread switching to secondary mode?
>>>> No, there is no way to force a thread to switch to secondary mode, the
>>>> xnshadow_relax call explicitely requires to be called by the target
>>>> thread. Before I wrap pthread_cancel, I would really like to understand
>>>> why setting a bit with pthread_cancel and testing it with
>>>> pthread_testcancel does not work.
>>>>
>>>> What is the trace of your test when run:
>>>> - on ARM
>>>> - with root thread priority inheritance,
>>>> - with USE_EXPLICIT_SCHED and USE_TEST_CANCEL, and CANCEL_TYPE set to
>>>> PTHREAD_CANCEL_DEFERRED
>>>> - posting a semaphore in ctrl_func before calling nanosleep, and waiting
>>>> for that semaphore in main before creating the calc_func thread.
>>> The calc_thread is not canceled and a segmentation fault happens.
>>> Canceling the calc_thread does *not* work if it's running in primrary
>>> mode, nor on ARM, nor on PowerPC. It does work when calc_thread does run
>>> in secondary mode, which happened with USE_EXPLICIT_SCHED not set. If
>>> you want do to some tests, I could send you my revised test program.
>>> My conclusion is that pthread_cancel can not be used to cancel busy
>>> realtime threads. I'm going to re-write the program using the native
>>> skin and rt_task_delete.
>> Yes, but what interests me is how it fails, that is seeing the trace of
>> your program.
> 
> With pthread_cancel you do not see too much because the rt_printf
> messages do not get printed:

As a temporary quirk, could you try, in rtdk, to install a handler for
SIGSEGV which flushes the print buffers ?

-- 
                                                 Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-04 17:52                                                       ` Wolfgang Grandegger
@ 2008-12-04 17:51                                                         ` Gilles Chanteperdrix
  0 siblings, 0 replies; 55+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-04 17:51 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Gilles Chanteperdrix wrote:
>> Wolfgang Grandegger wrote:
>>> Gilles Chanteperdrix wrote:
>>>> Wolfgang Grandegger wrote:
>>>>> Gilles Chanteperdrix wrote:
>>>>>> Wolfgang Grandegger wrote:
>>>>>>> Gilles Chanteperdrix wrote:
>>>>> [...deletions...]
>>>>>>>> That is not expected. But this brings me back to my initial question, do
>>>>>>>> you have to work with a real world application that runs without issuing
>>>>>>>> any syscall ?
>>>>>>> If a add long nanosleeps, e.g. 100, 10 or 1 ms, cancellation works but
>>>>>>> it fails with short nanosleeps. A syscall seems not sufficient. I have
>>>>>>> the impression that pthread_cancel needs some time in secondary mode to
>>>>>> When calling nanosleep, the threads spends on time in secondary mode. I
>>>>> What do you mean? nanosleep switches to secondary mode?
>>>> I meant spends "no" time in secondary mode. nanosleep does not switch to
>>>> secondary mode.
>>>>
>>>>>> think the problem is rather that only asynchronous cancelation (meaning
>>>>>> cancelation with a signal) works. Setting the cancelation bit somehow
>>>>>> gets lost.
>>>>>>
>>>>>>> do it's duties, e.g. mark the thread as canceled. Would it make sense to
>>>>>>> wrap pthread_cancel, and friends to the corresponding kernel functions
>>>>>>> in ksrc/skins/posix?
>>>>>>> Is there a way to force a thread switching to secondary mode?
>>>>>> No, there is no way to force a thread to switch to secondary mode, the
>>>>>> xnshadow_relax call explicitely requires to be called by the target
>>>>>> thread. Before I wrap pthread_cancel, I would really like to understand
>>>>>> why setting a bit with pthread_cancel and testing it with
>>>>>> pthread_testcancel does not work.
>>>>>>
>>>>>> What is the trace of your test when run:
>>>>>> - on ARM
>>>>>> - with root thread priority inheritance,
>>>>>> - with USE_EXPLICIT_SCHED and USE_TEST_CANCEL, and CANCEL_TYPE set to
>>>>>> PTHREAD_CANCEL_DEFERRED
>>>>>> - posting a semaphore in ctrl_func before calling nanosleep, and waiting
>>>>>> for that semaphore in main before creating the calc_func thread.
>>>>> The calc_thread is not canceled and a segmentation fault happens.
>>>>> Canceling the calc_thread does *not* work if it's running in primrary
>>>>> mode, nor on ARM, nor on PowerPC. It does work when calc_thread does run
>>>>> in secondary mode, which happened with USE_EXPLICIT_SCHED not set. If
>>>>> you want do to some tests, I could send you my revised test program.
>>>>> My conclusion is that pthread_cancel can not be used to cancel busy
>>>>> realtime threads. I'm going to re-write the program using the native
>>>>> skin and rt_task_delete.
>>>> Yes, but what interests me is how it fails, that is seeing the trace of
>>>> your program.
>>> With pthread_cancel you do not see too much because the rt_printf
>>> messages do not get printed:
>> As a temporary quirk, could you try, in rtdk, to install a handler for
>> SIGSEGV which flushes the print buffers ?
> 
> OK. Hm, flushing the rt_print buffers before exit is what I'm actually
> looking for. Registering an cleanup handler would already be enough. But
>   an active real-time task will also delay the exit().

In case of segmentation fault, the SIGSEGV signal is sent to the
application and exit is not called at all, which is why the buffers do
not get flushed, and why I am suggesting to flush them in the SIGSEGV
handler.

> 
> Nevertheless, I tried with the native skin and it cancellation works
> fine with:
> 
> 		rt_task_suspend(&calc_thread);
> 		rt_task_delete(&calc_thread);
> 
> Can I do a task suspend with the Xenomai POSIX skin?

You should not need to suspend the task with the native skin either.
With the posix skin, you still have the same problem: send the SIGSTOP
signal to the task, but it will not work if the task does not issue any
syscall.

-- 
                                                 Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-04 17:02                                                     ` Gilles Chanteperdrix
@ 2008-12-04 17:52                                                       ` Wolfgang Grandegger
  2008-12-04 17:51                                                         ` Gilles Chanteperdrix
  2008-12-05 14:58                                                       ` Wolfgang Grandegger
  1 sibling, 1 reply; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-04 17:52 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Gilles Chanteperdrix wrote:
>>> Wolfgang Grandegger wrote:
>>>> Gilles Chanteperdrix wrote:
>>>>> Wolfgang Grandegger wrote:
>>>>>> Gilles Chanteperdrix wrote:
>>>> [...deletions...]
>>>>>>> That is not expected. But this brings me back to my initial question, do
>>>>>>> you have to work with a real world application that runs without issuing
>>>>>>> any syscall ?
>>>>>> If a add long nanosleeps, e.g. 100, 10 or 1 ms, cancellation works but
>>>>>> it fails with short nanosleeps. A syscall seems not sufficient. I have
>>>>>> the impression that pthread_cancel needs some time in secondary mode to
>>>>> When calling nanosleep, the threads spends on time in secondary mode. I
>>>> What do you mean? nanosleep switches to secondary mode?
>>> I meant spends "no" time in secondary mode. nanosleep does not switch to
>>> secondary mode.
>>>
>>>>> think the problem is rather that only asynchronous cancelation (meaning
>>>>> cancelation with a signal) works. Setting the cancelation bit somehow
>>>>> gets lost.
>>>>>
>>>>>> do it's duties, e.g. mark the thread as canceled. Would it make sense to
>>>>>> wrap pthread_cancel, and friends to the corresponding kernel functions
>>>>>> in ksrc/skins/posix?
>>>>>> Is there a way to force a thread switching to secondary mode?
>>>>> No, there is no way to force a thread to switch to secondary mode, the
>>>>> xnshadow_relax call explicitely requires to be called by the target
>>>>> thread. Before I wrap pthread_cancel, I would really like to understand
>>>>> why setting a bit with pthread_cancel and testing it with
>>>>> pthread_testcancel does not work.
>>>>>
>>>>> What is the trace of your test when run:
>>>>> - on ARM
>>>>> - with root thread priority inheritance,
>>>>> - with USE_EXPLICIT_SCHED and USE_TEST_CANCEL, and CANCEL_TYPE set to
>>>>> PTHREAD_CANCEL_DEFERRED
>>>>> - posting a semaphore in ctrl_func before calling nanosleep, and waiting
>>>>> for that semaphore in main before creating the calc_func thread.
>>>> The calc_thread is not canceled and a segmentation fault happens.
>>>> Canceling the calc_thread does *not* work if it's running in primrary
>>>> mode, nor on ARM, nor on PowerPC. It does work when calc_thread does run
>>>> in secondary mode, which happened with USE_EXPLICIT_SCHED not set. If
>>>> you want do to some tests, I could send you my revised test program.
>>>> My conclusion is that pthread_cancel can not be used to cancel busy
>>>> realtime threads. I'm going to re-write the program using the native
>>>> skin and rt_task_delete.
>>> Yes, but what interests me is how it fails, that is seeing the trace of
>>> your program.
>> With pthread_cancel you do not see too much because the rt_printf
>> messages do not get printed:
> 
> As a temporary quirk, could you try, in rtdk, to install a handler for
> SIGSEGV which flushes the print buffers ?

OK. Hm, flushing the rt_print buffers before exit is what I'm actually
looking for. Registering an cleanup handler would already be enough. But
  an active real-time task will also delay the exit().

Nevertheless, I tried with the native skin and it cancellation works
fine with:

		rt_task_suspend(&calc_thread);
		rt_task_delete(&calc_thread);

Can I do a task suspend with the Xenomai POSIX skin?

Wolfgang.



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-04 17:02                                                     ` Gilles Chanteperdrix
  2008-12-04 17:52                                                       ` Wolfgang Grandegger
@ 2008-12-05 14:58                                                       ` Wolfgang Grandegger
  1 sibling, 0 replies; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-05 14:58 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Gilles Chanteperdrix wrote:
>>> Wolfgang Grandegger wrote:
>>>> Gilles Chanteperdrix wrote:
>>>>> Wolfgang Grandegger wrote:
>>>>>> Gilles Chanteperdrix wrote:
>>>> [...deletions...]
>>>>>>> That is not expected. But this brings me back to my initial question, do
>>>>>>> you have to work with a real world application that runs without issuing
>>>>>>> any syscall ?
>>>>>> If a add long nanosleeps, e.g. 100, 10 or 1 ms, cancellation works but
>>>>>> it fails with short nanosleeps. A syscall seems not sufficient. I have
>>>>>> the impression that pthread_cancel needs some time in secondary mode to
>>>>> When calling nanosleep, the threads spends on time in secondary mode. I
>>>> What do you mean? nanosleep switches to secondary mode?
>>> I meant spends "no" time in secondary mode. nanosleep does not switch to
>>> secondary mode.
>>>
>>>>> think the problem is rather that only asynchronous cancelation (meaning
>>>>> cancelation with a signal) works. Setting the cancelation bit somehow
>>>>> gets lost.
>>>>>
>>>>>> do it's duties, e.g. mark the thread as canceled. Would it make sense to
>>>>>> wrap pthread_cancel, and friends to the corresponding kernel functions
>>>>>> in ksrc/skins/posix?
>>>>>> Is there a way to force a thread switching to secondary mode?
>>>>> No, there is no way to force a thread to switch to secondary mode, the
>>>>> xnshadow_relax call explicitely requires to be called by the target
>>>>> thread. Before I wrap pthread_cancel, I would really like to understand
>>>>> why setting a bit with pthread_cancel and testing it with
>>>>> pthread_testcancel does not work.
>>>>>
>>>>> What is the trace of your test when run:
>>>>> - on ARM
>>>>> - with root thread priority inheritance,
>>>>> - with USE_EXPLICIT_SCHED and USE_TEST_CANCEL, and CANCEL_TYPE set to
>>>>> PTHREAD_CANCEL_DEFERRED
>>>>> - posting a semaphore in ctrl_func before calling nanosleep, and waiting
>>>>> for that semaphore in main before creating the calc_func thread.
>>>> The calc_thread is not canceled and a segmentation fault happens.
>>>> Canceling the calc_thread does *not* work if it's running in primrary
>>>> mode, nor on ARM, nor on PowerPC. It does work when calc_thread does run
>>>> in secondary mode, which happened with USE_EXPLICIT_SCHED not set. If
>>>> you want do to some tests, I could send you my revised test program.
>>>> My conclusion is that pthread_cancel can not be used to cancel busy
>>>> realtime threads. I'm going to re-write the program using the native
>>>> skin and rt_task_delete.
>>> Yes, but what interests me is how it fails, that is seeing the trace of
>>> your program.
>> With pthread_cancel you do not see too much because the rt_printf
>> messages do not get printed:
> 
> As a temporary quirk, could you try, in rtdk, to install a handler for
> SIGSEGV which flushes the print buffers ?

Here it comes... With

void sigsegv_handler(int sig)
{
	printf("%s: signal %d catched\n", __func__, sig);
	rt_print_cleanup();
	exit(2);
}

I get:

-bash-3.2# ./cancel-test
Real-Time debugging started
ctrl_func: started at count 0
Now starting calc_thread (ctrl_started=1)
calc_func: counting till 50
sigsegv_handler: signal 11 catched
ctrl_func: sleeping for 2sec 500000000ns
calc_func: at count 0
calc_func: at count 1
calc_func: at count 2
calc_func: at count 3
calc_func: at count 4
calc_func: at count 5
calc_func: at count 6
calc_func: at count 7
calc_func: at count 8
calc_func: at count 9
calc_func: at count 10
calc_func: at count 11
calc_func: at count 12
calc_func: at count 13
calc_func: at count 14
calc_func: at count 15
calc_func: at count 16
calc_func: at count 17
calc_func: at count 18
calc_func: at count 19
calc_func: at count 20
calc_func: at count 21
calc_func: at count 22
calc_func: at count 23
ctrl_func: cancel at count 24
calc_func: at count 24
calc_func: at count 25
calc_func: at count 26
calc_func: at count 27
calc_func: at count 28
calc_func: at count 29
calc_func: at count 30
calc_func: at count 31
calc_func: at count 32
calc_func: at count 33
calc_func: at count 34
calc_func: at count 35
calc_func: at count 36
calc_func: at count 37
calc_func: at count 38
calc_func: at count 39
calc_func: at count 40
calc_func: at count 41
calc_func: at count 42
calc_func: at count 43
calc_func: at count 44
calc_func: at count 45
calc_func: at count 46
calc_func: at count 47
calc_func: at count 48
calc_func: at count 49
calc_func: stopped at count 50

Wolfgang.




^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-02 18:35                 ` Gilles Chanteperdrix
  2008-12-02 19:50                   ` Wolfgang Grandegger
@ 2008-12-07 16:05                   ` Wolfgang Grandegger
  2008-12-10 11:16                     ` Wolfgang Grandegger
  1 sibling, 1 reply; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-07 16:05 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Hi Gilles,

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:

>> I'm also puzzled why pthread_setschedparam() does make a mode switch
>> to secondary mode (sometimes).
> 
> That is normal. The glibc caches threads priority value, so we have to
> call __real_pthread_setschedparam to update them. This issue has been
> solved differently on trunk, but unfortunately, we can not backport this
> modification on v2.4.x branch.

To get you right. With v2.4.x it is not possible with the POSIX skin to
change the priority of a real-time thread in the primary mode without
loosing determinism (because it will switch to secondary mode). What
options do I have?

Wolfgang.




^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-07 16:05                   ` Wolfgang Grandegger
@ 2008-12-10 11:16                     ` Wolfgang Grandegger
  2008-12-11 15:26                       ` Jan Kiszka
  2009-01-01 13:34                       ` Gilles Chanteperdrix
  0 siblings, 2 replies; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-10 11:16 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Hi Gilles,
> 
> Gilles Chanteperdrix wrote:
>> Wolfgang Grandegger wrote:
> 
>>> I'm also puzzled why pthread_setschedparam() does make a mode switch
>>> to secondary mode (sometimes).
>> That is normal. The glibc caches threads priority value, so we have to
>> call __real_pthread_setschedparam to update them. This issue has been
>> solved differently on trunk, but unfortunately, we can not backport this
>> modification on v2.4.x branch.
> 
> To get you right. With v2.4.x it is not possible with the POSIX skin to
> change the priority of a real-time thread in the primary mode without
> loosing determinism (because it will switch to secondary mode). What
> options do I have?

I gave Xenomai trunk a try and pthread_setschedparam() does not switch
to secondary mode any more on my PowerPC test system. Nice, I just get
an Oops in thread_delete from time to time. More on that issue later.
For my ARM i.mx31 system, a need a few patches to get the Xenomai src's
compiled:

Index: include/asm-generic/bits/bind.h
===================================================================
--- include/asm-generic/bits/bind.h	(revision 4450)
+++ include/asm-generic/bits/bind.h	(working copy)
@@ -72,7 +72,7 @@
 	err = XENOMAI_SYSCALL1(__xn_sys_current, &current);
 	if (err) {
 		fprintf(stderr, "Xenomai: error obtaining handle for current "
-			"thread: %s\n", strerror(err));
+			"thread: %s\n", strerror(-err));
 		exit(1);
 	}
 	__xeno_set_current(current);
Index: include/asm-arm/syscall.h
===================================================================
--- include/asm-arm/syscall.h	(revision 4450)
+++ include/asm-arm/syscall.h	(working copy)
@@ -228,7 +228,7 @@
 	volatile unsigned long long *const tscp = __xn_tscinfo.u.fr.tsc;
 	volatile unsigned *const counterp = __xn_tscinfo.u.fr.counter;
         const unsigned mask = __xn_tscinfo.u.fr.mask;
-	register unsigned long long after, before;
+	register unsigned long long result;
         unsigned counter;
 
         __asm__ ("ldmia %1, %M0\n": "=r"(result): "r"(tscp), "m"(*tscp));


When I then start my application or cyclictest I get:

-bash-3.2# ./cyclictest -n 
Xenomai: error obtaining handle for current thread: Operation not permitted

As the latency program works fine, it seems to be a problem with the
POSIX skin. Any idea what it is?

TIA,

Wolfgang.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-10 11:16                     ` Wolfgang Grandegger
@ 2008-12-11 15:26                       ` Jan Kiszka
  2008-12-13 15:55                         ` Wolfgang Grandegger
  2009-01-01 13:34                       ` Gilles Chanteperdrix
  1 sibling, 1 reply; 55+ messages in thread
From: Jan Kiszka @ 2008-12-11 15:26 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Wolfgang Grandegger wrote:
>> Hi Gilles,
>>
>> Gilles Chanteperdrix wrote:
>>> Wolfgang Grandegger wrote:
>>>> I'm also puzzled why pthread_setschedparam() does make a mode switch
>>>> to secondary mode (sometimes).
>>> That is normal. The glibc caches threads priority value, so we have to
>>> call __real_pthread_setschedparam to update them. This issue has been
>>> solved differently on trunk, but unfortunately, we can not backport this
>>> modification on v2.4.x branch.
>> To get you right. With v2.4.x it is not possible with the POSIX skin to
>> change the priority of a real-time thread in the primary mode without
>> loosing determinism (because it will switch to secondary mode). What
>> options do I have?
> 
> I gave Xenomai trunk a try and pthread_setschedparam() does not switch
> to secondary mode any more on my PowerPC test system. Nice, I just get
> an Oops in thread_delete from time to time. More on that issue later.
> For my ARM i.mx31 system, a need a few patches to get the Xenomai src's
> compiled:
> 
> Index: include/asm-generic/bits/bind.h
> ===================================================================
> --- include/asm-generic/bits/bind.h	(revision 4450)
> +++ include/asm-generic/bits/bind.h	(working copy)
> @@ -72,7 +72,7 @@
>  	err = XENOMAI_SYSCALL1(__xn_sys_current, &current);
>  	if (err) {
>  		fprintf(stderr, "Xenomai: error obtaining handle for current "
> -			"thread: %s\n", strerror(err));
> +			"thread: %s\n", strerror(-err));
>  		exit(1);
>  	}
>  	__xeno_set_current(current);
> Index: include/asm-arm/syscall.h
> ===================================================================
> --- include/asm-arm/syscall.h	(revision 4450)
> +++ include/asm-arm/syscall.h	(working copy)
> @@ -228,7 +228,7 @@
>  	volatile unsigned long long *const tscp = __xn_tscinfo.u.fr.tsc;
>  	volatile unsigned *const counterp = __xn_tscinfo.u.fr.counter;
>          const unsigned mask = __xn_tscinfo.u.fr.mask;
> -	register unsigned long long after, before;
> +	register unsigned long long result;
>          unsigned counter;
>  
>          __asm__ ("ldmia %1, %M0\n": "=r"(result): "r"(tscp), "m"(*tscp));
> 
> 
> When I then start my application or cyclictest I get:
> 
> -bash-3.2# ./cyclictest -n 
> Xenomai: error obtaining handle for current thread: Operation not permitted
> 
> As the latency program works fine, it seems to be a problem with the
> POSIX skin. Any idea what it is?

First of all, are you sure that all parts were properly updated and in
sync now?

The error means that something went wrong during the
__wrap_pthread_setschedparam execution on __init_posix_interface. More
precisely, we lack the shadow thread for the current (main) thread for
unknown reasons.

Could you fire up gdb, step through this and check (looking at
/proc/xenomai/sched) what happens on ARM here? x86 was fine the last
time I checked (will recheck later today).

Jan

-- 
Siemens AG, Corporate Technology, CT SE 26
Corporate Competence Center Embedded Linux


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-11 15:26                       ` Jan Kiszka
@ 2008-12-13 15:55                         ` Wolfgang Grandegger
  0 siblings, 0 replies; 55+ messages in thread
From: Wolfgang Grandegger @ 2008-12-13 15:55 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: xenomai-help

Jan Kiszka wrote:
> Wolfgang Grandegger wrote:
>> Wolfgang Grandegger wrote:
>>> Hi Gilles,
>>>
>>> Gilles Chanteperdrix wrote:
>>>> Wolfgang Grandegger wrote:
>>>>> I'm also puzzled why pthread_setschedparam() does make a mode switch
>>>>> to secondary mode (sometimes).
>>>> That is normal. The glibc caches threads priority value, so we have to
>>>> call __real_pthread_setschedparam to update them. This issue has been
>>>> solved differently on trunk, but unfortunately, we can not backport this
>>>> modification on v2.4.x branch.
>>> To get you right. With v2.4.x it is not possible with the POSIX skin to
>>> change the priority of a real-time thread in the primary mode without
>>> loosing determinism (because it will switch to secondary mode). What
>>> options do I have?
>> I gave Xenomai trunk a try and pthread_setschedparam() does not switch
>> to secondary mode any more on my PowerPC test system. Nice, I just get
>> an Oops in thread_delete from time to time. More on that issue later.
>> For my ARM i.mx31 system, a need a few patches to get the Xenomai src's
>> compiled:
>>
>> Index: include/asm-generic/bits/bind.h
>> ===================================================================
>> --- include/asm-generic/bits/bind.h	(revision 4450)
>> +++ include/asm-generic/bits/bind.h	(working copy)
>> @@ -72,7 +72,7 @@
>>  	err = XENOMAI_SYSCALL1(__xn_sys_current, &current);
>>  	if (err) {
>>  		fprintf(stderr, "Xenomai: error obtaining handle for current "
>> -			"thread: %s\n", strerror(err));
>> +			"thread: %s\n", strerror(-err));
>>  		exit(1);
>>  	}
>>  	__xeno_set_current(current);
>> Index: include/asm-arm/syscall.h
>> ===================================================================
>> --- include/asm-arm/syscall.h	(revision 4450)
>> +++ include/asm-arm/syscall.h	(working copy)
>> @@ -228,7 +228,7 @@
>>  	volatile unsigned long long *const tscp = __xn_tscinfo.u.fr.tsc;
>>  	volatile unsigned *const counterp = __xn_tscinfo.u.fr.counter;
>>          const unsigned mask = __xn_tscinfo.u.fr.mask;
>> -	register unsigned long long after, before;
>> +	register unsigned long long result;
>>          unsigned counter;
>>  
>>          __asm__ ("ldmia %1, %M0\n": "=r"(result): "r"(tscp), "m"(*tscp));
>>
>>
>> When I then start my application or cyclictest I get:
>>
>> -bash-3.2# ./cyclictest -n 
>> Xenomai: error obtaining handle for current thread: Operation not permitted
>>
>> As the latency program works fine, it seems to be a problem with the
>> POSIX skin. Any idea what it is?
> 
> First of all, are you sure that all parts were properly updated and in
> sync now?

Well, everything (kernel, Xenomai unser-space) was built from scratch
using the latest SVN revision.

> 
> The error means that something went wrong during the
> __wrap_pthread_setschedparam execution on __init_posix_interface. More
> precisely, we lack the shadow thread for the current (main) thread for
> unknown reasons.
> 
> Could you fire up gdb, step through this and check (looking at
> /proc/xenomai/sched) what happens on ARM here? x86 was fine the last
> time I checked (will recheck later today).

With --enable-debug it works just fine :-(.

Wolfgang.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2008-12-10 11:16                     ` Wolfgang Grandegger
  2008-12-11 15:26                       ` Jan Kiszka
@ 2009-01-01 13:34                       ` Gilles Chanteperdrix
  2009-01-01 17:07                         ` Philippe Gerum
  2009-01-01 17:10                         ` Wolfgang Grandegger
  1 sibling, 2 replies; 55+ messages in thread
From: Gilles Chanteperdrix @ 2009-01-01 13:34 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Wolfgang Grandegger wrote:
>> Hi Gilles,
>>
>> Gilles Chanteperdrix wrote:
>>> Wolfgang Grandegger wrote:
>>>> I'm also puzzled why pthread_setschedparam() does make a mode switch
>>>> to secondary mode (sometimes).
>>> That is normal. The glibc caches threads priority value, so we have to
>>> call __real_pthread_setschedparam to update them. This issue has been
>>> solved differently on trunk, but unfortunately, we can not backport this
>>> modification on v2.4.x branch.
>> To get you right. With v2.4.x it is not possible with the POSIX skin to
>> change the priority of a real-time thread in the primary mode without
>> loosing determinism (because it will switch to secondary mode). What
>> options do I have?
> 
> I gave Xenomai trunk a try and pthread_setschedparam() does not switch
> to secondary mode any more on my PowerPC test system. Nice, I just get
> an Oops in thread_delete from time to time. More on that issue later.
> For my ARM i.mx31 system, a need a few patches to get the Xenomai src's
> compiled:
> 
> Index: include/asm-generic/bits/bind.h
> ===================================================================
> --- include/asm-generic/bits/bind.h	(revision 4450)
> +++ include/asm-generic/bits/bind.h	(working copy)
> @@ -72,7 +72,7 @@
>  	err = XENOMAI_SYSCALL1(__xn_sys_current, &current);
>  	if (err) {
>  		fprintf(stderr, "Xenomai: error obtaining handle for current "
> -			"thread: %s\n", strerror(err));
> +			"thread: %s\n", strerror(-err));
>  		exit(1);
>  	}
>  	__xeno_set_current(current);
> Index: include/asm-arm/syscall.h
> ===================================================================
> --- include/asm-arm/syscall.h	(revision 4450)
> +++ include/asm-arm/syscall.h	(working copy)
> @@ -228,7 +228,7 @@
>  	volatile unsigned long long *const tscp = __xn_tscinfo.u.fr.tsc;
>  	volatile unsigned *const counterp = __xn_tscinfo.u.fr.counter;
>          const unsigned mask = __xn_tscinfo.u.fr.mask;
> -	register unsigned long long after, before;
> +	register unsigned long long result;
>          unsigned counter;
>  
>          __asm__ ("ldmia %1, %M0\n": "=r"(result): "r"(tscp), "m"(*tscp));
> 
> 
> When I then start my application or cyclictest I get:
> 
> -bash-3.2# ./cyclictest -n 
> Xenomai: error obtaining handle for current thread: Operation not permitted
> 
> As the latency program works fine, it seems to be a problem with the
> POSIX skin. Any idea what it is?

What version of gcc are you using ? When trying to get xenomai running
on an ARM platform, I found out that gcc 4.3.1 and 4.3.2 have a bug on
ARM which thrashes the parameters passed to  some xenomai syscalls.

-- 
					    Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2009-01-01 13:34                       ` Gilles Chanteperdrix
@ 2009-01-01 17:07                         ` Philippe Gerum
  2009-01-01 18:00                           ` Gilles Chanteperdrix
  2009-01-09 13:08                           ` Gilles Chanteperdrix
  2009-01-01 17:10                         ` Wolfgang Grandegger
  1 sibling, 2 replies; 55+ messages in thread
From: Philippe Gerum @ 2009-01-01 17:07 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Wolfgang Grandegger wrote:
>>> Hi Gilles,
>>>
>>> Gilles Chanteperdrix wrote:
>>>> Wolfgang Grandegger wrote:
>>>>> I'm also puzzled why pthread_setschedparam() does make a mode switch
>>>>> to secondary mode (sometimes).
>>>> That is normal. The glibc caches threads priority value, so we have to
>>>> call __real_pthread_setschedparam to update them. This issue has been
>>>> solved differently on trunk, but unfortunately, we can not backport this
>>>> modification on v2.4.x branch.
>>> To get you right. With v2.4.x it is not possible with the POSIX skin to
>>> change the priority of a real-time thread in the primary mode without
>>> loosing determinism (because it will switch to secondary mode). What
>>> options do I have?
>> I gave Xenomai trunk a try and pthread_setschedparam() does not switch
>> to secondary mode any more on my PowerPC test system. Nice, I just get
>> an Oops in thread_delete from time to time. More on that issue later.
>> For my ARM i.mx31 system, a need a few patches to get the Xenomai src's
>> compiled:
>>
>> Index: include/asm-generic/bits/bind.h
>> ===================================================================
>> --- include/asm-generic/bits/bind.h	(revision 4450)
>> +++ include/asm-generic/bits/bind.h	(working copy)
>> @@ -72,7 +72,7 @@
>>  	err = XENOMAI_SYSCALL1(__xn_sys_current, &current);
>>  	if (err) {
>>  		fprintf(stderr, "Xenomai: error obtaining handle for current "
>> -			"thread: %s\n", strerror(err));
>> +			"thread: %s\n", strerror(-err));
>>  		exit(1);
>>  	}
>>  	__xeno_set_current(current);
>> Index: include/asm-arm/syscall.h
>> ===================================================================
>> --- include/asm-arm/syscall.h	(revision 4450)
>> +++ include/asm-arm/syscall.h	(working copy)
>> @@ -228,7 +228,7 @@
>>  	volatile unsigned long long *const tscp = __xn_tscinfo.u.fr.tsc;
>>  	volatile unsigned *const counterp = __xn_tscinfo.u.fr.counter;
>>          const unsigned mask = __xn_tscinfo.u.fr.mask;
>> -	register unsigned long long after, before;
>> +	register unsigned long long result;
>>          unsigned counter;
>>  
>>          __asm__ ("ldmia %1, %M0\n": "=r"(result): "r"(tscp), "m"(*tscp));
>>
>>
>> When I then start my application or cyclictest I get:
>>
>> -bash-3.2# ./cyclictest -n 
>> Xenomai: error obtaining handle for current thread: Operation not permitted
>>
>> As the latency program works fine, it seems to be a problem with the
>> POSIX skin. Any idea what it is?
> 
> What version of gcc are you using ? When trying to get xenomai running
> on an ARM platform, I found out that gcc 4.3.1 and 4.3.2 have a bug on
> ARM which thrashes the parameters passed to  some xenomai syscalls.
> 

A common issue happens when pthread_self() is inlined in the syscall arg list.
Early gcc 4.x for powerpc has this problem; never tried with newer releases, but
rather made sure to use a temporary variable to pass this value to the syscall.

-- 
Philippe.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2009-01-01 13:34                       ` Gilles Chanteperdrix
  2009-01-01 17:07                         ` Philippe Gerum
@ 2009-01-01 17:10                         ` Wolfgang Grandegger
  2009-01-01 18:11                           ` Gilles Chanteperdrix
  1 sibling, 1 reply; 55+ messages in thread
From: Wolfgang Grandegger @ 2009-01-01 17:10 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Wolfgang Grandegger wrote:
>> Wolfgang Grandegger wrote:
>>> Hi Gilles,
>>>
>>> Gilles Chanteperdrix wrote:
>>>> Wolfgang Grandegger wrote:
>>>>> I'm also puzzled why pthread_setschedparam() does make a mode switch
>>>>> to secondary mode (sometimes).
>>>> That is normal. The glibc caches threads priority value, so we have to
>>>> call __real_pthread_setschedparam to update them. This issue has been
>>>> solved differently on trunk, but unfortunately, we can not backport this
>>>> modification on v2.4.x branch.
>>> To get you right. With v2.4.x it is not possible with the POSIX skin to
>>> change the priority of a real-time thread in the primary mode without
>>> loosing determinism (because it will switch to secondary mode). What
>>> options do I have?
>> I gave Xenomai trunk a try and pthread_setschedparam() does not switch
>> to secondary mode any more on my PowerPC test system. Nice, I just get
>> an Oops in thread_delete from time to time. More on that issue later.
>> For my ARM i.mx31 system, a need a few patches to get the Xenomai src's
>> compiled:
>>
>> Index: include/asm-generic/bits/bind.h
>> ===================================================================
>> --- include/asm-generic/bits/bind.h	(revision 4450)
>> +++ include/asm-generic/bits/bind.h	(working copy)
>> @@ -72,7 +72,7 @@
>>  	err = XENOMAI_SYSCALL1(__xn_sys_current, &current);
>>  	if (err) {
>>  		fprintf(stderr, "Xenomai: error obtaining handle for current "
>> -			"thread: %s\n", strerror(err));
>> +			"thread: %s\n", strerror(-err));
>>  		exit(1);
>>  	}
>>  	__xeno_set_current(current);
>> Index: include/asm-arm/syscall.h
>> ===================================================================
>> --- include/asm-arm/syscall.h	(revision 4450)
>> +++ include/asm-arm/syscall.h	(working copy)
>> @@ -228,7 +228,7 @@
>>  	volatile unsigned long long *const tscp = __xn_tscinfo.u.fr.tsc;
>>  	volatile unsigned *const counterp = __xn_tscinfo.u.fr.counter;
>>          const unsigned mask = __xn_tscinfo.u.fr.mask;
>> -	register unsigned long long after, before;
>> +	register unsigned long long result;
>>          unsigned counter;
>>  
>>          __asm__ ("ldmia %1, %M0\n": "=r"(result): "r"(tscp), "m"(*tscp));
>>
>>
>> When I then start my application or cyclictest I get:
>>
>> -bash-3.2# ./cyclictest -n 
>> Xenomai: error obtaining handle for current thread: Operation not permitted
>>
>> As the latency program works fine, it seems to be a problem with the
>> POSIX skin. Any idea what it is?
> 
> What version of gcc are you using ? When trying to get xenomai running
> on an ARM platform, I found out that gcc 4.3.1 and 4.3.2 have a bug on
> ARM which thrashes the parameters passed to  some xenomai syscalls.

The new ELDK 4.2 for ARM11 with VFP support [1] uses:

  $ armVFP-linux-gcc --version
  armVFP-linux-gcc (GCC) 4.2.2

Nevertheless, the problems do not show up with Xenomai v2.4.x and
therefore it's unlikely that Xenomai syscalls do not work properly.

[1] http://www.denx.de/en/News/WebHome#NewsEldk42Arm

Wolfgang.




^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2009-01-01 17:07                         ` Philippe Gerum
@ 2009-01-01 18:00                           ` Gilles Chanteperdrix
  2009-01-09 13:08                           ` Gilles Chanteperdrix
  1 sibling, 0 replies; 55+ messages in thread
From: Gilles Chanteperdrix @ 2009-01-01 18:00 UTC (permalink / raw)
  To: rpm; +Cc: xenomai-help

Philippe Gerum wrote:
> Gilles Chanteperdrix wrote:
>> Wolfgang Grandegger wrote:
>>> Wolfgang Grandegger wrote:
>>>> Hi Gilles,
>>>>
>>>> Gilles Chanteperdrix wrote:
>>>>> Wolfgang Grandegger wrote:
>>>>>> I'm also puzzled why pthread_setschedparam() does make a mode switch
>>>>>> to secondary mode (sometimes).
>>>>> That is normal. The glibc caches threads priority value, so we have to
>>>>> call __real_pthread_setschedparam to update them. This issue has been
>>>>> solved differently on trunk, but unfortunately, we can not backport this
>>>>> modification on v2.4.x branch.
>>>> To get you right. With v2.4.x it is not possible with the POSIX skin to
>>>> change the priority of a real-time thread in the primary mode without
>>>> loosing determinism (because it will switch to secondary mode). What
>>>> options do I have?
>>> I gave Xenomai trunk a try and pthread_setschedparam() does not switch
>>> to secondary mode any more on my PowerPC test system. Nice, I just get
>>> an Oops in thread_delete from time to time. More on that issue later.
>>> For my ARM i.mx31 system, a need a few patches to get the Xenomai src's
>>> compiled:
>>>
>>> Index: include/asm-generic/bits/bind.h
>>> ===================================================================
>>> --- include/asm-generic/bits/bind.h	(revision 4450)
>>> +++ include/asm-generic/bits/bind.h	(working copy)
>>> @@ -72,7 +72,7 @@
>>>  	err = XENOMAI_SYSCALL1(__xn_sys_current, &current);
>>>  	if (err) {
>>>  		fprintf(stderr, "Xenomai: error obtaining handle for current "
>>> -			"thread: %s\n", strerror(err));
>>> +			"thread: %s\n", strerror(-err));
>>>  		exit(1);
>>>  	}
>>>  	__xeno_set_current(current);
>>> Index: include/asm-arm/syscall.h
>>> ===================================================================
>>> --- include/asm-arm/syscall.h	(revision 4450)
>>> +++ include/asm-arm/syscall.h	(working copy)
>>> @@ -228,7 +228,7 @@
>>>  	volatile unsigned long long *const tscp = __xn_tscinfo.u.fr.tsc;
>>>  	volatile unsigned *const counterp = __xn_tscinfo.u.fr.counter;
>>>          const unsigned mask = __xn_tscinfo.u.fr.mask;
>>> -	register unsigned long long after, before;
>>> +	register unsigned long long result;
>>>          unsigned counter;
>>>  
>>>          __asm__ ("ldmia %1, %M0\n": "=r"(result): "r"(tscp), "m"(*tscp));
>>>
>>>
>>> When I then start my application or cyclictest I get:
>>>
>>> -bash-3.2# ./cyclictest -n 
>>> Xenomai: error obtaining handle for current thread: Operation not permitted
>>>
>>> As the latency program works fine, it seems to be a problem with the
>>> POSIX skin. Any idea what it is?
>> What version of gcc are you using ? When trying to get xenomai running
>> on an ARM platform, I found out that gcc 4.3.1 and 4.3.2 have a bug on
>> ARM which thrashes the parameters passed to  some xenomai syscalls.
>>
> 
> A common issue happens when pthread_self() is inlined in the syscall arg list.
> Early gcc 4.x for powerpc has this problem; never tried with newer releases, but
> rather made sure to use a temporary variable to pass this value to the syscall.
> 
What happens with gcc 4.3.x really looks like a bug to me, so I filled a
bug report on gcc bugzilla:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=38674

We could work around this bug for xenomai syscalls, but it would not be
easy, and we do not know if the bug does not happen anywhere else.

-- 
					    Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2009-01-01 17:10                         ` Wolfgang Grandegger
@ 2009-01-01 18:11                           ` Gilles Chanteperdrix
  0 siblings, 0 replies; 55+ messages in thread
From: Gilles Chanteperdrix @ 2009-01-01 18:11 UTC (permalink / raw)
  To: Wolfgang Grandegger; +Cc: xenomai-help

Wolfgang Grandegger wrote:
> Gilles Chanteperdrix wrote:
>> Wolfgang Grandegger wrote:
>>> Wolfgang Grandegger wrote:
>>>> Hi Gilles,
>>>>
>>>> Gilles Chanteperdrix wrote:
>>>>> Wolfgang Grandegger wrote:
>>>>>> I'm also puzzled why pthread_setschedparam() does make a mode switch
>>>>>> to secondary mode (sometimes).
>>>>> That is normal. The glibc caches threads priority value, so we have to
>>>>> call __real_pthread_setschedparam to update them. This issue has been
>>>>> solved differently on trunk, but unfortunately, we can not backport this
>>>>> modification on v2.4.x branch.
>>>> To get you right. With v2.4.x it is not possible with the POSIX skin to
>>>> change the priority of a real-time thread in the primary mode without
>>>> loosing determinism (because it will switch to secondary mode). What
>>>> options do I have?
>>> I gave Xenomai trunk a try and pthread_setschedparam() does not switch
>>> to secondary mode any more on my PowerPC test system. Nice, I just get
>>> an Oops in thread_delete from time to time. More on that issue later.
>>> For my ARM i.mx31 system, a need a few patches to get the Xenomai src's
>>> compiled:
>>>
>>> Index: include/asm-generic/bits/bind.h
>>> ===================================================================
>>> --- include/asm-generic/bits/bind.h	(revision 4450)
>>> +++ include/asm-generic/bits/bind.h	(working copy)
>>> @@ -72,7 +72,7 @@
>>>  	err = XENOMAI_SYSCALL1(__xn_sys_current, &current);
>>>  	if (err) {
>>>  		fprintf(stderr, "Xenomai: error obtaining handle for current "
>>> -			"thread: %s\n", strerror(err));
>>> +			"thread: %s\n", strerror(-err));
>>>  		exit(1);
>>>  	}
>>>  	__xeno_set_current(current);
>>> Index: include/asm-arm/syscall.h
>>> ===================================================================
>>> --- include/asm-arm/syscall.h	(revision 4450)
>>> +++ include/asm-arm/syscall.h	(working copy)
>>> @@ -228,7 +228,7 @@
>>>  	volatile unsigned long long *const tscp = __xn_tscinfo.u.fr.tsc;
>>>  	volatile unsigned *const counterp = __xn_tscinfo.u.fr.counter;
>>>          const unsigned mask = __xn_tscinfo.u.fr.mask;
>>> -	register unsigned long long after, before;
>>> +	register unsigned long long result;
>>>          unsigned counter;
>>>  
>>>          __asm__ ("ldmia %1, %M0\n": "=r"(result): "r"(tscp), "m"(*tscp));
>>>
>>>
>>> When I then start my application or cyclictest I get:
>>>
>>> -bash-3.2# ./cyclictest -n 
>>> Xenomai: error obtaining handle for current thread: Operation not permitted
>>>
>>> As the latency program works fine, it seems to be a problem with the
>>> POSIX skin. Any idea what it is?
>> What version of gcc are you using ? When trying to get xenomai running
>> on an ARM platform, I found out that gcc 4.3.1 and 4.3.2 have a bug on
>> ARM which thrashes the parameters passed to  some xenomai syscalls.
> 
> The new ELDK 4.2 for ARM11 with VFP support [1] uses:
> 
>   $ armVFP-linux-gcc --version
>   armVFP-linux-gcc (GCC) 4.2.2
> 
> Nevertheless, the problems do not show up with Xenomai v2.4.x and
> therefore it's unlikely that Xenomai syscalls do not work properly.
> 
> [1] http://www.denx.de/en/News/WebHome#NewsEldk42Arm

Ok. So this is not the same bug. I am about to reproduce the issue. I
wanted to do it at work, but have been struggling (still unsuccessfully)
to get a working toolchain with EABI and NPTL for big endian arm. I am
using crosstool-ng which made an apparently working version of gcc 4.3.2
 until I found that it had a bug, and since then any attempt to make a
toolchain with an earlier version of gcc and/org glibc failed for
different reasons.

So, if anyones knows of a working combination of gcc/glibc/binutils for
big-endian arms with EABI and NPTL, I am interested.

-- 
					    Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2009-01-01 17:07                         ` Philippe Gerum
  2009-01-01 18:00                           ` Gilles Chanteperdrix
@ 2009-01-09 13:08                           ` Gilles Chanteperdrix
  2009-01-09 13:38                             ` Philippe Gerum
  1 sibling, 1 reply; 55+ messages in thread
From: Gilles Chanteperdrix @ 2009-01-09 13:08 UTC (permalink / raw)
  To: rpm; +Cc: xenomai-help

Philippe Gerum wrote:
> Gilles Chanteperdrix wrote:
>> Wolfgang Grandegger wrote:
>>> Wolfgang Grandegger wrote:
>>>> Hi Gilles,
>>>>
>>>> Gilles Chanteperdrix wrote:
>>>>> Wolfgang Grandegger wrote:
>>>>>> I'm also puzzled why pthread_setschedparam() does make a mode switch
>>>>>> to secondary mode (sometimes).
>>>>> That is normal. The glibc caches threads priority value, so we have to
>>>>> call __real_pthread_setschedparam to update them. This issue has been
>>>>> solved differently on trunk, but unfortunately, we can not backport this
>>>>> modification on v2.4.x branch.
>>>> To get you right. With v2.4.x it is not possible with the POSIX skin to
>>>> change the priority of a real-time thread in the primary mode without
>>>> loosing determinism (because it will switch to secondary mode). What
>>>> options do I have?
>>> I gave Xenomai trunk a try and pthread_setschedparam() does not switch
>>> to secondary mode any more on my PowerPC test system. Nice, I just get
>>> an Oops in thread_delete from time to time. More on that issue later.
>>> For my ARM i.mx31 system, a need a few patches to get the Xenomai src's
>>> compiled:
>>>
>>> Index: include/asm-generic/bits/bind.h
>>> ===================================================================
>>> --- include/asm-generic/bits/bind.h	(revision 4450)
>>> +++ include/asm-generic/bits/bind.h	(working copy)
>>> @@ -72,7 +72,7 @@
>>>  	err = XENOMAI_SYSCALL1(__xn_sys_current, &current);
>>>  	if (err) {
>>>  		fprintf(stderr, "Xenomai: error obtaining handle for current "
>>> -			"thread: %s\n", strerror(err));
>>> +			"thread: %s\n", strerror(-err));
>>>  		exit(1);
>>>  	}
>>>  	__xeno_set_current(current);
>>> Index: include/asm-arm/syscall.h
>>> ===================================================================
>>> --- include/asm-arm/syscall.h	(revision 4450)
>>> +++ include/asm-arm/syscall.h	(working copy)
>>> @@ -228,7 +228,7 @@
>>>  	volatile unsigned long long *const tscp = __xn_tscinfo.u.fr.tsc;
>>>  	volatile unsigned *const counterp = __xn_tscinfo.u.fr.counter;
>>>          const unsigned mask = __xn_tscinfo.u.fr.mask;
>>> -	register unsigned long long after, before;
>>> +	register unsigned long long result;
>>>          unsigned counter;
>>>  
>>>          __asm__ ("ldmia %1, %M0\n": "=r"(result): "r"(tscp), "m"(*tscp));
>>>
>>>
>>> When I then start my application or cyclictest I get:
>>>
>>> -bash-3.2# ./cyclictest -n 
>>> Xenomai: error obtaining handle for current thread: Operation not permitted
>>>
>>> As the latency program works fine, it seems to be a problem with the
>>> POSIX skin. Any idea what it is?
>> What version of gcc are you using ? When trying to get xenomai running
>> on an ARM platform, I found out that gcc 4.3.1 and 4.3.2 have a bug on
>> ARM which thrashes the parameters passed to  some xenomai syscalls.
>>
> 
> A common issue happens when pthread_self() is inlined in the syscall arg list.
> Early gcc 4.x for powerpc has this problem; never tried with newer releases, but
> rather made sure to use a temporary variable to pass this value to the syscall.

The issue on arm is a similar issue: the problem comes from the
"mode_buf" parameter now passed to pthread_create which clobbers the
register r0, normally used to pass the syscall number.

I am still trying to figure out if it is a bug in Xenomai's syscall
inline assembly, or a bug in gcc or binutils. And even if it is a bug in
the toolchain, I am afraid we will have to find a workaround in the
syscall inline assmbley anyway.

-- 
                                                 Gilles.


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [Xenomai-help] pthread cancelation and scheduling magics
  2009-01-09 13:08                           ` Gilles Chanteperdrix
@ 2009-01-09 13:38                             ` Philippe Gerum
  0 siblings, 0 replies; 55+ messages in thread
From: Philippe Gerum @ 2009-01-09 13:38 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-help

Gilles Chanteperdrix wrote:
> Philippe Gerum wrote:
>> Gilles Chanteperdrix wrote:
>>> Wolfgang Grandegger wrote:
>>>> Wolfgang Grandegger wrote:
>>>>> Hi Gilles,
>>>>>
>>>>> Gilles Chanteperdrix wrote:
>>>>>> Wolfgang Grandegger wrote:
>>>>>>> I'm also puzzled why pthread_setschedparam() does make a mode switch
>>>>>>> to secondary mode (sometimes).
>>>>>> That is normal. The glibc caches threads priority value, so we have to
>>>>>> call __real_pthread_setschedparam to update them. This issue has been
>>>>>> solved differently on trunk, but unfortunately, we can not backport this
>>>>>> modification on v2.4.x branch.
>>>>> To get you right. With v2.4.x it is not possible with the POSIX skin to
>>>>> change the priority of a real-time thread in the primary mode without
>>>>> loosing determinism (because it will switch to secondary mode). What
>>>>> options do I have?
>>>> I gave Xenomai trunk a try and pthread_setschedparam() does not switch
>>>> to secondary mode any more on my PowerPC test system. Nice, I just get
>>>> an Oops in thread_delete from time to time. More on that issue later.
>>>> For my ARM i.mx31 system, a need a few patches to get the Xenomai src's
>>>> compiled:
>>>>
>>>> Index: include/asm-generic/bits/bind.h
>>>> ===================================================================
>>>> --- include/asm-generic/bits/bind.h	(revision 4450)
>>>> +++ include/asm-generic/bits/bind.h	(working copy)
>>>> @@ -72,7 +72,7 @@
>>>>  	err = XENOMAI_SYSCALL1(__xn_sys_current, &current);
>>>>  	if (err) {
>>>>  		fprintf(stderr, "Xenomai: error obtaining handle for current "
>>>> -			"thread: %s\n", strerror(err));
>>>> +			"thread: %s\n", strerror(-err));
>>>>  		exit(1);
>>>>  	}
>>>>  	__xeno_set_current(current);
>>>> Index: include/asm-arm/syscall.h
>>>> ===================================================================
>>>> --- include/asm-arm/syscall.h	(revision 4450)
>>>> +++ include/asm-arm/syscall.h	(working copy)
>>>> @@ -228,7 +228,7 @@
>>>>  	volatile unsigned long long *const tscp = __xn_tscinfo.u.fr.tsc;
>>>>  	volatile unsigned *const counterp = __xn_tscinfo.u.fr.counter;
>>>>          const unsigned mask = __xn_tscinfo.u.fr.mask;
>>>> -	register unsigned long long after, before;
>>>> +	register unsigned long long result;
>>>>          unsigned counter;
>>>>  
>>>>          __asm__ ("ldmia %1, %M0\n": "=r"(result): "r"(tscp), "m"(*tscp));
>>>>
>>>>
>>>> When I then start my application or cyclictest I get:
>>>>
>>>> -bash-3.2# ./cyclictest -n 
>>>> Xenomai: error obtaining handle for current thread: Operation not permitted
>>>>
>>>> As the latency program works fine, it seems to be a problem with the
>>>> POSIX skin. Any idea what it is?
>>> What version of gcc are you using ? When trying to get xenomai running
>>> on an ARM platform, I found out that gcc 4.3.1 and 4.3.2 have a bug on
>>> ARM which thrashes the parameters passed to  some xenomai syscalls.
>>>
>> A common issue happens when pthread_self() is inlined in the syscall arg list.
>> Early gcc 4.x for powerpc has this problem; never tried with newer releases, but
>> rather made sure to use a temporary variable to pass this value to the syscall.
> 
> The issue on arm is a similar issue: the problem comes from the
> "mode_buf" parameter now passed to pthread_create which clobbers the
> register r0, normally used to pass the syscall number.
> 
> I am still trying to figure out if it is a bug in Xenomai's syscall
> inline assembly, or a bug in gcc or binutils. And even if it is a bug in
> the toolchain, I am afraid we will have to find a workaround in the
> syscall inline assmbley anyway.
> 

Yes.

-- 
Philippe.


^ permalink raw reply	[flat|nested] 55+ messages in thread

end of thread, other threads:[~2009-01-09 13:38 UTC | newest]

Thread overview: 55+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-11-30 21:34 [Xenomai-help] pthread cancelation and scheduling magics Wolfgang Grandegger
2008-11-30 21:46 ` Gilles Chanteperdrix
2008-11-30 21:59 ` Gilles Chanteperdrix
2008-12-01 10:22   ` Gilles Chanteperdrix
2008-12-01 14:16     ` Wolfgang Grandegger
2008-12-01 14:15       ` Gilles Chanteperdrix
2008-12-01 15:10         ` Gilles Chanteperdrix
2008-12-02 15:59           ` Wolfgang Grandegger
2008-12-02 15:55             ` Gilles Chanteperdrix
2008-12-02 18:18               ` Wolfgang Grandegger
2008-12-02 18:35                 ` Gilles Chanteperdrix
2008-12-02 19:50                   ` Wolfgang Grandegger
2008-12-02 20:03                     ` Philippe Gerum
2008-12-07 16:05                   ` Wolfgang Grandegger
2008-12-10 11:16                     ` Wolfgang Grandegger
2008-12-11 15:26                       ` Jan Kiszka
2008-12-13 15:55                         ` Wolfgang Grandegger
2009-01-01 13:34                       ` Gilles Chanteperdrix
2009-01-01 17:07                         ` Philippe Gerum
2009-01-01 18:00                           ` Gilles Chanteperdrix
2009-01-09 13:08                           ` Gilles Chanteperdrix
2009-01-09 13:38                             ` Philippe Gerum
2009-01-01 17:10                         ` Wolfgang Grandegger
2009-01-01 18:11                           ` Gilles Chanteperdrix
2008-12-03 10:16                 ` Gilles Chanteperdrix
2008-12-03 11:19                   ` Wolfgang Grandegger
2008-12-03 13:30                     ` Gilles Chanteperdrix
2008-12-03 18:02                       ` Wolfgang Grandegger
2008-12-03 17:57                         ` Gilles Chanteperdrix
2008-12-03 18:37                           ` Wolfgang Grandegger
2008-12-03 18:32                             ` Gilles Chanteperdrix
2008-12-03 18:55                               ` Wolfgang Grandegger
2008-12-03 18:55                                 ` Gilles Chanteperdrix
2008-12-03 19:19                                   ` Wolfgang Grandegger
2008-12-03 19:19                                     ` Gilles Chanteperdrix
2008-12-03 20:02                                       ` Wolfgang Grandegger
2008-12-03 20:02                                         ` Gilles Chanteperdrix
2008-12-04 15:29                                           ` Wolfgang Grandegger
2008-12-04 15:38                                             ` Gilles Chanteperdrix
2008-12-04 15:42                                               ` Gilles Chanteperdrix
2008-12-04 16:31                                               ` Wolfgang Grandegger
2008-12-04 16:26                                                 ` Gilles Chanteperdrix
2008-12-04 16:49                                                   ` Wolfgang Grandegger
2008-12-04 17:02                                                     ` Gilles Chanteperdrix
2008-12-04 17:52                                                       ` Wolfgang Grandegger
2008-12-04 17:51                                                         ` Gilles Chanteperdrix
2008-12-05 14:58                                                       ` Wolfgang Grandegger
2008-12-03  8:04         ` Wolfgang Grandegger
2008-12-03 10:12           ` Gilles Chanteperdrix
2008-12-03 10:46             ` Wolfgang Grandegger
2008-12-03 10:40               ` Gilles Chanteperdrix
2008-12-03 11:16                 ` Wolfgang Grandegger
2008-12-03 11:11               ` Philippe Gerum
2008-12-03 11:22                 ` Wolfgang Grandegger
2008-12-01 13:31   ` Wolfgang Grandegger

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.