* [Xenomai-core] [RFC][PATCH] Allow for deregistering current mode user space address @ 2010-03-05 11:37 Jan Kiszka 2010-03-05 12:02 ` Gilles Chanteperdrix 0 siblings, 1 reply; 9+ messages in thread From: Jan Kiszka @ 2010-03-05 11:37 UTC (permalink / raw) To: Gilles Chanteperdrix; +Cc: xenomai-core This adds __xn_sys_drop_u_mode, a syscall to deregister the u_mode user space address passed on shadow creation for the current thread. We need this in order to safely shut down a thread that wants to release its u_mode memory (either malloc'ed or TLS-allocated). So far, the kernel might have touched this memory even after the release which caused subtle corruptions. After we released u_mode, we can no longer make reliable decisions based on it. For the fast mutex use case, we then assume the worst case, ie. we enforce syscall-based acquisitions unconditionally. For the assert_nrt case, we simply acquire the required information via __xn_sys_current_info. --- include/asm-generic/bits/current.h | 24 ++++++++---- include/asm-generic/syscall.h | 1 + include/nucleus/thread.h | 2 + ksrc/nucleus/shadow.c | 14 +++++++ src/rtdk/assert_context.c | 9 +++- src/skins/common/current.c | 72 ++++++++++++++++++++++++++++-------- 6 files changed, 95 insertions(+), 27 deletions(-) diff --git a/include/asm-generic/bits/current.h b/include/asm-generic/bits/current.h index 0c3166b..f0e569c 100644 --- a/include/asm-generic/bits/current.h +++ b/include/asm-generic/bits/current.h @@ -2,7 +2,9 @@ #define _XENO_ASM_GENERIC_CURRENT_H #include <pthread.h> -#include <nucleus/types.h> +#include <nucleus/thread.h> + +extern pthread_key_t xeno_current_mode_key; #ifdef HAVE___THREAD extern __thread xnhandle_t xeno_current __attribute__ ((tls_model ("initial-exec"))); @@ -19,15 +21,13 @@ static inline unsigned long xeno_get_current_mode(void) return xeno_current_mode; } -static inline unsigned long *xeno_init_current_mode(void) +static inline int xeno_primary_mode(void) { - return &xeno_current_mode; + return xeno_current_mode & XNRELAX; } -#define xeno_init_current_keys() do { } while (0) #else /* ! HAVE___THREAD */ extern pthread_key_t xeno_current_key; -extern pthread_key_t xeno_current_mode_key; static inline xnhandle_t xeno_get_current(void) { @@ -41,14 +41,22 @@ static inline xnhandle_t xeno_get_current(void) static inline unsigned long xeno_get_current_mode(void) { - return *(unsigned long *)pthread_getspecific(xeno_current_mode_key); + unsigned long *mode = pthread_getspecific(xeno_current_mode_key); + + return mode ? *mode : -1; } -unsigned long *xeno_init_current_mode(void); +static inline int xeno_primary_mode(void) +{ + unsigned long *mode = pthread_getspecific(xeno_current_mode_key); -void xeno_init_current_keys(void); + return mode ? (*mode & XNRELAX) : 0; +} #endif /* ! HAVE___THREAD */ void xeno_set_current(void); +unsigned long *xeno_init_current_mode(void); +void xeno_init_current_keys(void); + #endif /* _XENO_ASM_GENERIC_CURRENT_H */ diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h index 315822e..62f7aa2 100644 --- a/include/asm-generic/syscall.h +++ b/include/asm-generic/syscall.h @@ -46,6 +46,7 @@ #define __xn_sys_current 8 /* threadh = xnthread_handle(cur) */ #define __xn_sys_current_info 9 /* r = xnshadow_current_info(&info) */ #define __xn_sys_get_next_sigs 10 /* only unqueue pending signals. */ +#define __xn_sys_drop_u_mode 11 /* stop updating thread->u_mode */ #define XENOMAI_LINUX_DOMAIN 0 #define XENOMAI_XENO_DOMAIN 1 diff --git a/include/nucleus/thread.h b/include/nucleus/thread.h index b2baccb..cb772f7 100644 --- a/include/nucleus/thread.h +++ b/include/nucleus/thread.h @@ -328,6 +328,8 @@ typedef struct xnthread { #ifdef CONFIG_XENO_OPT_PERVASIVE unsigned long __user *u_mode; /* Thread mode variable in userland. */ + unsigned long u_mode_dump; /* u_mode points here once userland dropped it. */ + unsigned u_sigpending; /* One bit per skin */ #endif /* CONFIG_XENO_OPT_PERVASIVE */ diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c index d82d6c7..8431025 100644 --- a/ksrc/nucleus/shadow.c +++ b/ksrc/nucleus/shadow.c @@ -1967,6 +1967,18 @@ static int xnshadow_sys_get_next_sigs(struct pt_regs *regs) return -EINTR; } +static int xnshadow_sys_drop_u_mode(struct pt_regs *regs) +{ + xnthread_t *cur = xnshadow_thread(current); + + if (!cur) + return -EPERM; + + cur->u_mode = (unsigned long __user*)&cur->u_mode_dump; + + return 0; +} + static xnsysent_t __systab[] = { [__xn_sys_migrate] = {&xnshadow_sys_migrate, __xn_exec_current}, [__xn_sys_arch] = {&xnshadow_sys_arch, __xn_exec_any}, @@ -1981,6 +1993,8 @@ static xnsysent_t __systab[] = { {&xnshadow_sys_current_info, __xn_exec_shadow}, [__xn_sys_get_next_sigs] = {&xnshadow_sys_get_next_sigs, __xn_exec_conforming}, + [__xn_sys_drop_u_mode] = + {&xnshadow_sys_drop_u_mode, __xn_exec_shadow}, }; static void post_ppd_release(struct xnheap *h) diff --git a/src/rtdk/assert_context.c b/src/rtdk/assert_context.c index f67bcd8..bad19f3 100644 --- a/src/rtdk/assert_context.c +++ b/src/rtdk/assert_context.c @@ -30,9 +30,12 @@ void assert_nrt(void) xnthread_info_t info; int err; - if (unlikely(xeno_get_current() != XN_NO_HANDLE && - !(xeno_get_current_mode() & XNRELAX))) { + if (xeno_get_current() != XN_NO_HANDLE) + return; + info.state = xeno_get_current_mode(); + + if (unlikely(!(info.state & XNRELAX) || info.state == -1)) { err = XENOMAI_SYSCALL1(__xn_sys_current_info, &info); if (err) { @@ -41,7 +44,7 @@ void assert_nrt(void) return; } - if (info.state & XNTRAPSW) + if ((info.state & (XNRELAX | XNTRAPSW)) == XNTRAPSW) pthread_kill(pthread_self(), SIGXCPU); } } diff --git a/src/skins/common/current.c b/src/skins/common/current.c index 2922d6e..1b09390 100644 --- a/src/skins/common/current.c +++ b/src/skins/common/current.c @@ -1,10 +1,13 @@ #include <stdlib.h> #include <stdio.h> #include <string.h> +#include <pthread.h> #include <asm/xenomai/syscall.h> #include <nucleus/types.h> +pthread_key_t xeno_current_mode_key; + #ifdef HAVE___THREAD __thread __attribute__ ((tls_model ("initial-exec"))) xnhandle_t xeno_current = XN_NO_HANDLE; @@ -16,36 +19,62 @@ static inline void __xeno_set_current(xnhandle_t current) xeno_current = current; } #else /* !HAVE___THREAD */ -#include <pthread.h> - pthread_key_t xeno_current_key; -pthread_key_t xeno_current_mode_key; static inline void __xeno_set_current(xnhandle_t current) { pthread_setspecific(xeno_current_key, (void *)current); } +#endif /* !HAVE___THREAD */ -unsigned long *xeno_init_current_mode(void) +static void free_current_mode(void *key) { - unsigned long *mode = malloc(sizeof(unsigned long)); - pthread_setspecific(xeno_current_mode_key, mode); - return mode; + unsigned long *mode = key; + int err; + + *mode = -1; + + err = XENOMAI_SYSCALL0(__xn_sys_drop_u_mode); + +#ifndef HAVE___THREAD + if (err) { + static int warned; + + if (!warned) { + warned = 1; + fprintf(stderr, + "\nXenomai: WARNING, this Xenomai kernel can " + "cause spurious application\n" + " crashes on thread termination.\n" + " To reduce the probality, we will " + "leak a few bytes heap now.\n" + ); + } + } else + free(mode); +#endif /* !HAVE___THREAD */ } static void init_current_keys(void) { - int err = pthread_key_create(&xeno_current_key, NULL); + int err; + +#ifndef HAVE___THREAD + err = pthread_key_create(&xeno_current_key, NULL); if (err) goto error_exit; +#endif /* !HAVE___THREAD */ - err = pthread_key_create(&xeno_current_mode_key, free); - if (err) { - error_exit: - fprintf(stderr, "Xenomai: error creating TSD key: %s\n", - strerror(err)); - exit(EXIT_FAILURE); - } + err = pthread_key_create(&xeno_current_mode_key, free_current_mode); + if (err) + goto error_exit; + + return; + + error_exit: + fprintf(stderr, "Xenomai: error creating TSD key: %s\n", + strerror(err)); + exit(EXIT_FAILURE); } void xeno_init_current_keys(void) @@ -53,7 +82,6 @@ void xeno_init_current_keys(void) static pthread_once_t xeno_init_current_keys_once = PTHREAD_ONCE_INIT; pthread_once(&xeno_init_current_keys_once, init_current_keys); } -#endif /* !HAVE___THREAD */ void xeno_set_current(void) { @@ -68,3 +96,15 @@ void xeno_set_current(void) } __xeno_set_current(current); } + +unsigned long *xeno_init_current_mode(void) +{ +#ifdef HAVE___THREAD + unsigned long *mode = &xeno_current_mode; +#else /* !HAVE___THREAD */ + unsigned long *mode = malloc(sizeof(unsigned long)); +#endif /* !HAVE___THREAD */ + + pthread_setspecific(xeno_current_mode_key, mode); + return mode; +} -- 1.6.0.2 ^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [Xenomai-core] [RFC][PATCH] Allow for deregistering current mode user space address 2010-03-05 11:37 [Xenomai-core] [RFC][PATCH] Allow for deregistering current mode user space address Jan Kiszka @ 2010-03-05 12:02 ` Gilles Chanteperdrix 2010-03-05 15:53 ` [Xenomai-core] [RFC][PATCH v2] " Jan Kiszka 0 siblings, 1 reply; 9+ messages in thread From: Gilles Chanteperdrix @ 2010-03-05 12:02 UTC (permalink / raw) To: Jan Kiszka; +Cc: xenomai-core Jan Kiszka wrote: > This adds __xn_sys_drop_u_mode, a syscall to deregister the u_mode user > space address passed on shadow creation for the current thread. We need > this in order to safely shut down a thread that wants to release its > u_mode memory (either malloc'ed or TLS-allocated). So far, the kernel > might have touched this memory even after the release which caused > subtle corruptions. > > After we released u_mode, we can no longer make reliable decisions based > on it. For the fast mutex use case, we then assume the worst case, ie. > we enforce syscall-based acquisitions unconditionally. For the > assert_nrt case, we simply acquire the required information via > __xn_sys_current_info. As usual, you are firing patches too fast: - you are lacking the exit syscall trap. - the patch is not correct and will cause kernel-space addresses to be passed to put_user, which probably yields bugs when the kernel is compiled with proper debug flags; - I do not see the point where the new syscall is called with __thread, but I am not sure it is missing, and if it is missing, you will get the rightfull warning when hitting the exit syscall. - the user-space users of the function to get current_mode are still cluttered with special cases to handle the invalid state. And illustrating nicely the deficiency of this approach, you have forgot one user. I am not going to make my version before tomorrow, so you have plenty of time to send me an at least correct version which would take into account all of our discussions. Please also consider that your "patch machinegun" way of working is not really sane. When there are so many often untested patches to review coming from you, we sometime let some errors slip through. And from a user point of view, seeing so many buggy patches refused is not really reassuring. -- Gilles. ^ permalink raw reply [flat|nested] 9+ messages in thread
* [Xenomai-core] [RFC][PATCH v2] Allow for deregistering current mode user space address 2010-03-05 12:02 ` Gilles Chanteperdrix @ 2010-03-05 15:53 ` Jan Kiszka 2010-03-05 16:30 ` Gilles Chanteperdrix 0 siblings, 1 reply; 9+ messages in thread From: Jan Kiszka @ 2010-03-05 15:53 UTC (permalink / raw) To: Gilles Chanteperdrix; +Cc: xenomai-core Gilles Chanteperdrix wrote: > Jan Kiszka wrote: >> This adds __xn_sys_drop_u_mode, a syscall to deregister the u_mode user >> space address passed on shadow creation for the current thread. We need >> this in order to safely shut down a thread that wants to release its >> u_mode memory (either malloc'ed or TLS-allocated). So far, the kernel >> might have touched this memory even after the release which caused >> subtle corruptions. >> >> After we released u_mode, we can no longer make reliable decisions based >> on it. For the fast mutex use case, we then assume the worst case, ie. >> we enforce syscall-based acquisitions unconditionally. For the >> assert_nrt case, we simply acquire the required information via >> __xn_sys_current_info. > > As usual, you are firing patches too fast: > - you are lacking the exit syscall trap. Don't worry, my memory is not yet _that_ bad. :) > - the patch is not correct and will cause kernel-space addresses to be > passed to put_user, which probably yields bugs when the kernel is > compiled with proper debug flags; Right, e.g. ARM requires set_fs() fiddling in addition. But then a simple NULL-check is way better. Fixed. > - I do not see the point where the new syscall is called with __thread, > but I am not sure it is missing, and if it is missing, you will get the > rightfull warning when hitting the exit syscall. Destructor of (now unconditionally used) xeno_current_mode_key. > - the user-space users of the function to get current_mode are still > cluttered with special cases to handle the invalid state. And > illustrating nicely the deficiency of this approach, you have forgot one > user. My impression is that you are still a bit biased due to TLS limitations on ARM. And nope, I didn't forget to address it, I just forgot to mention that there is nothing to address (the torture test does not use xeno_get_current_mode from within a TSD destructor, thus potentially after drop_u_mode). > > I am not going to make my version before tomorrow, so you have plenty of > time to send me an at least correct version which would take into > account all of our discussions. Well... what should I reply? > > Please also consider that your "patch machinegun" way of working is not > really sane. When there are so many often untested patches to review > coming from you, we sometime let some errors slip through. And from a > user point of view, seeing so many buggy patches refused is not really > reassuring. Please don't make me cite counter examples for issues with different workflows we also experienced. The key of an open development process is open discussion, and that ideally based on code, even if it is not yet perfect. I can't imagine you want Xenomai being developed in closed chambers (or even just a single one). If you are afraid of imperfect patches being posted or even merged into some software, you shouldn't use e.g. the Linux kernel as well. Jan ------> This adds __xn_sys_drop_u_mode, a syscall to deregister the u_mode user space address passed on shadow creation for the current thread. We need this in order to safely shut down a thread that wants to release its u_mode memory (either malloc'ed or TLS-allocated). So far, the kernel might have touched this memory even after the release which caused subtle corruptions. After we released u_mode, we can no longer make reliable decisions based on it. For the fast mutex use case, we then assume the worst case, ie. we enforce syscall-based acquisitions unconditionally. For the assert_nrt case, we simply acquire the required information via __xn_sys_current_info. The mutex torture test requires no special care as it cannot retrieve invalid u_mode states. --- Changes in v2: - Drop non-working redirection of u_mode after release, just check for NULL u_mode before writing to it - Trap exit syscall to clear u_mode if user space failed to - Add/improve warnings about too old user/kernel space - Code refactorings to avoid new #ifdefs include/asm-generic/bits/current.h | 24 +++++++---- include/asm-generic/syscall.h | 1 + ksrc/nucleus/shadow.c | 51 +++++++++++++++++++++--- src/rtdk/assert_context.c | 9 +++- src/skins/common/current.c | 78 +++++++++++++++++++++++++++++++---- 5 files changed, 137 insertions(+), 26 deletions(-) diff --git a/include/asm-generic/bits/current.h b/include/asm-generic/bits/current.h index 0c3166b..f0e569c 100644 --- a/include/asm-generic/bits/current.h +++ b/include/asm-generic/bits/current.h @@ -2,7 +2,9 @@ #define _XENO_ASM_GENERIC_CURRENT_H #include <pthread.h> -#include <nucleus/types.h> +#include <nucleus/thread.h> + +extern pthread_key_t xeno_current_mode_key; #ifdef HAVE___THREAD extern __thread xnhandle_t xeno_current __attribute__ ((tls_model ("initial-exec"))); @@ -19,15 +21,13 @@ static inline unsigned long xeno_get_current_mode(void) return xeno_current_mode; } -static inline unsigned long *xeno_init_current_mode(void) +static inline int xeno_primary_mode(void) { - return &xeno_current_mode; + return xeno_current_mode & XNRELAX; } -#define xeno_init_current_keys() do { } while (0) #else /* ! HAVE___THREAD */ extern pthread_key_t xeno_current_key; -extern pthread_key_t xeno_current_mode_key; static inline xnhandle_t xeno_get_current(void) { @@ -41,14 +41,22 @@ static inline xnhandle_t xeno_get_current(void) static inline unsigned long xeno_get_current_mode(void) { - return *(unsigned long *)pthread_getspecific(xeno_current_mode_key); + unsigned long *mode = pthread_getspecific(xeno_current_mode_key); + + return mode ? *mode : -1; } -unsigned long *xeno_init_current_mode(void); +static inline int xeno_primary_mode(void) +{ + unsigned long *mode = pthread_getspecific(xeno_current_mode_key); -void xeno_init_current_keys(void); + return mode ? (*mode & XNRELAX) : 0; +} #endif /* ! HAVE___THREAD */ void xeno_set_current(void); +unsigned long *xeno_init_current_mode(void); +void xeno_init_current_keys(void); + #endif /* _XENO_ASM_GENERIC_CURRENT_H */ diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h index 315822e..62f7aa2 100644 --- a/include/asm-generic/syscall.h +++ b/include/asm-generic/syscall.h @@ -46,6 +46,7 @@ #define __xn_sys_current 8 /* threadh = xnthread_handle(cur) */ #define __xn_sys_current_info 9 /* r = xnshadow_current_info(&info) */ #define __xn_sys_get_next_sigs 10 /* only unqueue pending signals. */ +#define __xn_sys_drop_u_mode 11 /* stop updating thread->u_mode */ #define XENOMAI_LINUX_DOMAIN 0 #define XENOMAI_XENO_DOMAIN 1 diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c index d82d6c7..8a50cbb 100644 --- a/ksrc/nucleus/shadow.c +++ b/ksrc/nucleus/shadow.c @@ -1018,7 +1018,8 @@ redo: /* Grab the request token. */ return -ERESTARTSYS; - __xn_put_user(0, thread->u_mode); + if (thread->u_mode) + __xn_put_user(0, thread->u_mode); preempt_disable(); @@ -1208,7 +1209,8 @@ void xnshadow_relax(int notify, int reason) /* "current" is now running into the Linux domain on behalf of the root thread. */ - __xn_put_user(XNRELAX, thread->u_mode); + if (thread->u_mode) + __xn_put_user(XNRELAX, thread->u_mode); trace_mark(xn_nucleus, shadow_relaxed, "thread %p thread_name %s comm %s", @@ -1382,7 +1384,8 @@ int xnshadow_map(xnthread_t *thread, xncompletion_t __user *u_completion, if (ret) return ret; - __xn_put_user(XNRELAX, u_mode); + if (thread->u_mode) + __xn_put_user(XNRELAX, u_mode); ret = xnshadow_harden(); @@ -1967,6 +1970,18 @@ static int xnshadow_sys_get_next_sigs(struct pt_regs *regs) return -EINTR; } +static int xnshadow_sys_drop_u_mode(struct pt_regs *regs) +{ + xnthread_t *cur = xnshadow_thread(current); + + if (!cur) + return -EPERM; + + cur->u_mode = NULL; + + return 0; +} + static xnsysent_t __systab[] = { [__xn_sys_migrate] = {&xnshadow_sys_migrate, __xn_exec_current}, [__xn_sys_arch] = {&xnshadow_sys_arch, __xn_exec_any}, @@ -1981,6 +1996,8 @@ static xnsysent_t __systab[] = { {&xnshadow_sys_current_info, __xn_exec_shadow}, [__xn_sys_get_next_sigs] = {&xnshadow_sys_get_next_sigs, __xn_exec_conforming}, + [__xn_sys_drop_u_mode] = + {&xnshadow_sys_drop_u_mode, __xn_exec_shadow}, }; static void post_ppd_release(struct xnheap *h) @@ -2034,8 +2051,30 @@ static struct xnskin_props __props = { .module = NULL }; -static inline int substitute_linux_syscall(struct pt_regs *regs) +static void handle_shadow_exit(xnthread_t *thread) { + static int warned; + + /* + * If user space did not deregister u_mode at this point, we + * are confronted with old libraries and should warn about + * potential instabilities. + */ + if (thread->u_mode && !warned) { + warned = 1; + printk(KERN_WARNING + "Xenomai: old Xenomai libs detected which may crash " + "on thread termination\n"); + } + thread->u_mode = NULL; +} + +static inline int +substitute_linux_syscall(xnthread_t *thread, struct pt_regs *regs) +{ + if (unlikely(__xn_reg_mux(regs) == __NR_exit)) + handle_shadow_exit(thread); + /* No real-time replacement for now -- let Linux handle this call. */ return 0; } @@ -2204,7 +2243,7 @@ static inline int do_hisyscall_event(unsigned event, unsigned domid, void *data) * From now on, we know that we have a valid shadow thread * pointer. */ - if (substitute_linux_syscall(regs)) + if (substitute_linux_syscall(thread, regs)) /* * This is a Linux syscall issued on behalf of a * shadow thread running inside the Xenomai @@ -2265,7 +2304,7 @@ static inline int do_losyscall_event(unsigned event, unsigned domid, void *data) if (__xn_reg_mux_p(regs)) goto xenomai_syscall; - if (!thread || !substitute_linux_syscall(regs)) + if (!thread || !substitute_linux_syscall(thread, regs)) /* Fall back to Linux syscall handling. */ return RTHAL_EVENT_PROPAGATE; diff --git a/src/rtdk/assert_context.c b/src/rtdk/assert_context.c index f67bcd8..bad19f3 100644 --- a/src/rtdk/assert_context.c +++ b/src/rtdk/assert_context.c @@ -30,9 +30,12 @@ void assert_nrt(void) xnthread_info_t info; int err; - if (unlikely(xeno_get_current() != XN_NO_HANDLE && - !(xeno_get_current_mode() & XNRELAX))) { + if (xeno_get_current() != XN_NO_HANDLE) + return; + info.state = xeno_get_current_mode(); + + if (unlikely(!(info.state & XNRELAX) || info.state == -1)) { err = XENOMAI_SYSCALL1(__xn_sys_current_info, &info); if (err) { @@ -41,7 +44,7 @@ void assert_nrt(void) return; } - if (info.state & XNTRAPSW) + if ((info.state & (XNRELAX | XNTRAPSW)) == XNTRAPSW) pthread_kill(pthread_self(), SIGXCPU); } } diff --git a/src/skins/common/current.c b/src/skins/common/current.c index 2922d6e..7db66c0 100644 --- a/src/skins/common/current.c +++ b/src/skins/common/current.c @@ -1,45 +1,98 @@ #include <stdlib.h> #include <stdio.h> #include <string.h> +#include <pthread.h> #include <asm/xenomai/syscall.h> #include <nucleus/types.h> +pthread_key_t xeno_current_mode_key; + #ifdef HAVE___THREAD __thread __attribute__ ((tls_model ("initial-exec"))) xnhandle_t xeno_current = XN_NO_HANDLE; __thread __attribute__ ((tls_model ("initial-exec"))) unsigned long xeno_current_mode; +static inline int create_current_key(void) { return 0; } + static inline void __xeno_set_current(xnhandle_t current) { xeno_current = current; } + +static inline unsigned long *create_current_mode(void) +{ + return &xeno_current_mode; +} + +static inline void free_current_mode(unsigned long *mode) { } + +#define XENO_MODE_LEAK_WARNING "" + #else /* !HAVE___THREAD */ -#include <pthread.h> pthread_key_t xeno_current_key; -pthread_key_t xeno_current_mode_key; + +static inline int create_current_key(void) +{ + return pthread_key_create(&xeno_current_key, NULL); +} static inline void __xeno_set_current(xnhandle_t current) { pthread_setspecific(xeno_current_key, (void *)current); } -unsigned long *xeno_init_current_mode(void) +static inline unsigned long *create_current_mode(void) { - unsigned long *mode = malloc(sizeof(unsigned long)); - pthread_setspecific(xeno_current_mode_key, mode); - return mode; + return malloc(sizeof(unsigned long)); +} + +static inline void free_current_mode(unsigned long *mode) +{ + free(mode); +} + +#define XENO_MODE_LEAK_WARNING \ + " To reduce the probality, we leak a few bytes of heap " \ + "per thread.\n" + +#endif /* !HAVE___THREAD */ + +static void cleanup_current_mode(void *key) +{ + unsigned long *mode = key; + int err; + + *mode = -1; + + err = XENOMAI_SYSCALL0(__xn_sys_drop_u_mode); + + if (err) { + static int warned; + + if (!warned) { + warned = 1; + fprintf(stderr, + "\nXenomai: WARNING, this Xenomai kernel can " + "cause spurious application\n" + " crashes on thread termination. " + "Upgrade is highly recommended.\n%s\n", + XENO_MODE_LEAK_WARNING); + } + } else + free_current_mode(mode); } static void init_current_keys(void) { - int err = pthread_key_create(&xeno_current_key, NULL); + int err = create_current_key(); + if (err) goto error_exit; - err = pthread_key_create(&xeno_current_mode_key, free); + err = pthread_key_create(&xeno_current_mode_key, cleanup_current_mode); if (err) { error_exit: fprintf(stderr, "Xenomai: error creating TSD key: %s\n", @@ -53,7 +106,6 @@ void xeno_init_current_keys(void) static pthread_once_t xeno_init_current_keys_once = PTHREAD_ONCE_INIT; pthread_once(&xeno_init_current_keys_once, init_current_keys); } -#endif /* !HAVE___THREAD */ void xeno_set_current(void) { @@ -68,3 +120,11 @@ void xeno_set_current(void) } __xeno_set_current(current); } + +unsigned long *xeno_init_current_mode(void) +{ + unsigned long *mode = create_current_mode(); + + pthread_setspecific(xeno_current_mode_key, mode); + return mode; +} -- 1.6.0.2 ^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [Xenomai-core] [RFC][PATCH v2] Allow for deregistering current mode user space address 2010-03-05 15:53 ` [Xenomai-core] [RFC][PATCH v2] " Jan Kiszka @ 2010-03-05 16:30 ` Gilles Chanteperdrix 2010-03-05 17:16 ` Jan Kiszka 0 siblings, 1 reply; 9+ messages in thread From: Gilles Chanteperdrix @ 2010-03-05 16:30 UTC (permalink / raw) To: Jan Kiszka; +Cc: xenomai-core Jan Kiszka wrote: > Gilles Chanteperdrix wrote: >> Jan Kiszka wrote: >>> This adds __xn_sys_drop_u_mode, a syscall to deregister the u_mode user >>> space address passed on shadow creation for the current thread. We need >>> this in order to safely shut down a thread that wants to release its >>> u_mode memory (either malloc'ed or TLS-allocated). So far, the kernel >>> might have touched this memory even after the release which caused >>> subtle corruptions. >>> >>> After we released u_mode, we can no longer make reliable decisions based >>> on it. For the fast mutex use case, we then assume the worst case, ie. >>> we enforce syscall-based acquisitions unconditionally. For the >>> assert_nrt case, we simply acquire the required information via >>> __xn_sys_current_info. >> As usual, you are firing patches too fast: >> - you are lacking the exit syscall trap. > > Don't worry, my memory is not yet _that_ bad. :) > >> - the patch is not correct and will cause kernel-space addresses to be >> passed to put_user, which probably yields bugs when the kernel is >> compiled with proper debug flags; > > Right, e.g. ARM requires set_fs() fiddling in addition. But then a simple > NULL-check is way better. Fixed. > >> - I do not see the point where the new syscall is called with __thread, >> but I am not sure it is missing, and if it is missing, you will get the >> rightfull warning when hitting the exit syscall. > > Destructor of (now unconditionally used) xeno_current_mode_key. > >> - the user-space users of the function to get current_mode are still >> cluttered with special cases to handle the invalid state. And >> illustrating nicely the deficiency of this approach, you have forgot one >> user. > > My impression is that you are still a bit biased due to TLS limitations > on ARM. No, I think the __thread stuff thinks make C code look too much like C++ where the compiler makes things behind your back for such simple code as: void *foo = &bar; This is not what I expect from a C compiler, and I suspect a lot of C developers, including the people who advocate that C, as a simple language, and not C++ should be used for free software development. Yes, there is no gain on ARM, where this generates a function call as pthread_getspecific and causes bugs with gcc >= 4.3, but not much gain on x86 either where a function call is so cheap. And using __thread in Xenomai source code adds an awful lot of code, and what is more, an awful lot of conditionnaly compiled code, which inevitably gets less testing. So, from a maintenance point of view, this looks like a bad call. > > And nope, I didn't forget to address it, I just forgot to mention that > there is nothing to address (the torture test does not use > xeno_get_current_mode from within a TSD destructor, thus potentially > after drop_u_mode). Expect that if someone changes the test code, he will have not to forget about this. As soon as other users exist, we will have to think about updating them. > >> I am not going to make my version before tomorrow, so you have plenty of >> time to send me an at least correct version which would take into >> account all of our discussions. > > Well... what should I reply? That starting from now, you are going to test your patches before sending them. > >> Please also consider that your "patch machinegun" way of working is not >> really sane. When there are so many often untested patches to review >> coming from you, we sometime let some errors slip through. And from a >> user point of view, seeing so many buggy patches refused is not really >> reassuring. > > Please don't make me cite counter examples for issues with different > workflows we also experienced. > > The key of an open development process is open discussion, and that > ideally based on code, even if it is not yet perfect. I can't imagine > you want Xenomai being developed in closed chambers (or even just a > single one). If you are afraid of imperfect patches being posted or even > merged into some software, you shouldn't use e.g. the Linux kernel as > well. You misunderstand me. What I would like you to do is to avoid sending me patches which are a 5 minutes, untested hack, to fix something which took much longer to think and nevertheless get wrong. Because most of these patches get it inevitably wrong too. I think every sane free-software projects ask people to test their patches/pull requests before sending them, even the Linux kernel. See for instance: http://lwn.net/Articles/377091/ I am not asking you for one week. 24 hours would be enough. In fact, the mistake is partly mine too, I am replying to your patches as fast as you send them. We were going crazy tuesday with the condition variable patches, I do not think it gives a reassuring image to the users. So that at some point, I stopped reviewing your patches. I have not even read the last 8 you sent. Without going to the other extreme of a "closed door" development model, I think we can find a more reasonable trade off than the current one. Our objective now, is to put 2.5.2 in a stable state. > +substitute_linux_syscall(xnthread_t *thread, struct pt_regs *regs) > +{ > + if (unlikely(__xn_reg_mux(regs) == __NR_exit)) > + handle_shadow_exit(thread); > + Not your fault, but this does not work on ARM. Do not know if __xn_reg_mux needs fixing there or if we need to add a new macro to get the roots syscall number. -- Gilles. ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Xenomai-core] [RFC][PATCH v2] Allow for deregistering current mode user space address 2010-03-05 16:30 ` Gilles Chanteperdrix @ 2010-03-05 17:16 ` Jan Kiszka 2010-03-05 17:36 ` Gilles Chanteperdrix 0 siblings, 1 reply; 9+ messages in thread From: Jan Kiszka @ 2010-03-05 17:16 UTC (permalink / raw) To: Gilles Chanteperdrix; +Cc: xenomai-core Gilles Chanteperdrix wrote: > Jan Kiszka wrote: >> Gilles Chanteperdrix wrote: >>> Jan Kiszka wrote: >>>> This adds __xn_sys_drop_u_mode, a syscall to deregister the u_mode user >>>> space address passed on shadow creation for the current thread. We need >>>> this in order to safely shut down a thread that wants to release its >>>> u_mode memory (either malloc'ed or TLS-allocated). So far, the kernel >>>> might have touched this memory even after the release which caused >>>> subtle corruptions. >>>> >>>> After we released u_mode, we can no longer make reliable decisions based >>>> on it. For the fast mutex use case, we then assume the worst case, ie. >>>> we enforce syscall-based acquisitions unconditionally. For the >>>> assert_nrt case, we simply acquire the required information via >>>> __xn_sys_current_info. >>> As usual, you are firing patches too fast: >>> - you are lacking the exit syscall trap. >> Don't worry, my memory is not yet _that_ bad. :) >> >>> - the patch is not correct and will cause kernel-space addresses to be >>> passed to put_user, which probably yields bugs when the kernel is >>> compiled with proper debug flags; >> Right, e.g. ARM requires set_fs() fiddling in addition. But then a simple >> NULL-check is way better. Fixed. >> >>> - I do not see the point where the new syscall is called with __thread, >>> but I am not sure it is missing, and if it is missing, you will get the >>> rightfull warning when hitting the exit syscall. >> Destructor of (now unconditionally used) xeno_current_mode_key. >> >>> - the user-space users of the function to get current_mode are still >>> cluttered with special cases to handle the invalid state. And >>> illustrating nicely the deficiency of this approach, you have forgot one >>> user. >> My impression is that you are still a bit biased due to TLS limitations >> on ARM. > > No, I think the __thread stuff thinks make C code look too much like C++ > where the compiler makes things behind your back for such simple code as: > > void *foo = &bar; > > This is not what I expect from a C compiler, and I suspect a lot of C > developers, including the people who advocate that C, as a simple > language, and not C++ should be used for free software development. > > Yes, there is no gain on ARM, where this generates a function call as > pthread_getspecific and causes bugs with gcc >= 4.3, but not much gain > on x86 either where a function call is so cheap. As are individual memory accesses. The sum makes the difference. > And using __thread in > Xenomai source code adds an awful lot of code, and what is more, an > awful lot of conditionnaly compiled code, which inevitably gets less > testing. So, from a maintenance point of view, this looks like a bad call. > >> And nope, I didn't forget to address it, I just forgot to mention that >> there is nothing to address (the torture test does not use >> xeno_get_current_mode from within a TSD destructor, thus potentially >> after drop_u_mode). > > Expect that if someone changes the test code, he will have not to forget > about this. As soon as other users exist, we will have to think about > updating them. Once someone starts testing mutexes or other stuff from TSD destructors, not earlier. And this is so special that anyone of us interested in this test case will naturally be aware of its specifics. > >>> I am not going to make my version before tomorrow, so you have plenty of >>> time to send me an at least correct version which would take into >>> account all of our discussions. >> Well... what should I reply? > > That starting from now, you are going to test your patches before > sending them. > >>> Please also consider that your "patch machinegun" way of working is not >>> really sane. When there are so many often untested patches to review >>> coming from you, we sometime let some errors slip through. And from a >>> user point of view, seeing so many buggy patches refused is not really >>> reassuring. >> Please don't make me cite counter examples for issues with different >> workflows we also experienced. >> >> The key of an open development process is open discussion, and that >> ideally based on code, even if it is not yet perfect. I can't imagine >> you want Xenomai being developed in closed chambers (or even just a >> single one). If you are afraid of imperfect patches being posted or even >> merged into some software, you shouldn't use e.g. the Linux kernel as >> well. > > You misunderstand me. What I would like you to do is to avoid sending me > patches which are a 5 minutes, untested hack, to fix something which > took much longer to think and nevertheless get wrong. Because most of > these patches get it inevitably wrong too. > > I think every sane free-software projects ask people to test their > patches/pull requests before sending them, even the Linux kernel. See > for instance: > http://lwn.net/Articles/377091/ > > I am not asking you for one week. 24 hours would be enough. As you may know, complete test coverage is near to impossible. On unlucky days, I happen to send out stuff that broke while continuing to type after the last test ran. But specifically these things were tested in various combinations. It is not very polite to assume the contrary. I'm trying to avoid the same assumption about other people as well, even when things look differently from time to time. To err is human, to always write correct code is divine. But as you already mentioned in another mail, there is room for improvement /wrt testability. On step forward would be an automated build and run of some git tree that we can check into. One board per arch would be enough to avoid issues we face from time to time when we only stress our favorite archs with new features or fixes. > > In fact, the mistake is partly mine too, I am replying to your patches > as fast as you send them. We were going crazy tuesday with the condition > variable patches, I do not think it gives a reassuring image to the > users. I can only say that, as a developer in this business, I trust code having been discussed to the extreme _way_ more than stuff a single clever person checked in even after meditating over it for weeks. I've seen enough of this, produced by non-clever /me as well as way more clever people in all kinds of projects. > So that at some point, I stopped reviewing your patches. I have > not even read the last 8 you sent. Without going to the other extreme of > a "closed door" development model, I think we can find a more reasonable > trade off than the current one. Well, you have to decide which piece to take, or to request it to be rewritten, or to write yourself. > > Our objective now, is to put 2.5.2 in a stable state. > >> +substitute_linux_syscall(xnthread_t *thread, struct pt_regs *regs) >> +{ >> + if (unlikely(__xn_reg_mux(regs) == __NR_exit)) >> + handle_shadow_exit(thread); >> + > > Not your fault, but this does not work on ARM. Do not know if > __xn_reg_mux needs fixing there or if we need to add a new macro to get > the roots syscall number. And that's the point of sending patches for discussion. It's regs->ARM_r7 == __NR_SYSCALL_BASE + __NR_MY_SYSCALL, right? If __xn_reg_mux is supposed to be only a mux value for Xenomai, we need a separate accessor for the Linux syscall number on all archs. Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Xenomai-core] [RFC][PATCH v2] Allow for deregistering current mode user space address 2010-03-05 17:16 ` Jan Kiszka @ 2010-03-05 17:36 ` Gilles Chanteperdrix 2010-03-06 9:39 ` Jan Kiszka 0 siblings, 1 reply; 9+ messages in thread From: Gilles Chanteperdrix @ 2010-03-05 17:36 UTC (permalink / raw) To: Jan Kiszka; +Cc: xenomai-core Jan Kiszka wrote: > Gilles Chanteperdrix wrote: >> Jan Kiszka wrote: >>> Gilles Chanteperdrix wrote: >>>> Jan Kiszka wrote: >>>>> This adds __xn_sys_drop_u_mode, a syscall to deregister the u_mode user >>>>> space address passed on shadow creation for the current thread. We need >>>>> this in order to safely shut down a thread that wants to release its >>>>> u_mode memory (either malloc'ed or TLS-allocated). So far, the kernel >>>>> might have touched this memory even after the release which caused >>>>> subtle corruptions. >>>>> >>>>> After we released u_mode, we can no longer make reliable decisions based >>>>> on it. For the fast mutex use case, we then assume the worst case, ie. >>>>> we enforce syscall-based acquisitions unconditionally. For the >>>>> assert_nrt case, we simply acquire the required information via >>>>> __xn_sys_current_info. >>>> As usual, you are firing patches too fast: >>>> - you are lacking the exit syscall trap. >>> Don't worry, my memory is not yet _that_ bad. :) >>> >>>> - the patch is not correct and will cause kernel-space addresses to be >>>> passed to put_user, which probably yields bugs when the kernel is >>>> compiled with proper debug flags; >>> Right, e.g. ARM requires set_fs() fiddling in addition. But then a simple >>> NULL-check is way better. Fixed. >>> >>>> - I do not see the point where the new syscall is called with __thread, >>>> but I am not sure it is missing, and if it is missing, you will get the >>>> rightfull warning when hitting the exit syscall. >>> Destructor of (now unconditionally used) xeno_current_mode_key. >>> >>>> - the user-space users of the function to get current_mode are still >>>> cluttered with special cases to handle the invalid state. And >>>> illustrating nicely the deficiency of this approach, you have forgot one >>>> user. >>> My impression is that you are still a bit biased due to TLS limitations >>> on ARM. >> No, I think the __thread stuff thinks make C code look too much like C++ >> where the compiler makes things behind your back for such simple code as: >> >> void *foo = &bar; >> >> This is not what I expect from a C compiler, and I suspect a lot of C >> developers, including the people who advocate that C, as a simple >> language, and not C++ should be used for free software development. >> >> Yes, there is no gain on ARM, where this generates a function call as >> pthread_getspecific and causes bugs with gcc >= 4.3, but not much gain >> on x86 either where a function call is so cheap. > > As are individual memory accesses. The sum makes the difference. Ok. If you find before 2.5.3 a platform where this makes a difference of more than 5% of this platform's latency, I do not remove the __thread stuff. Deal? > Once someone starts testing mutexes or other stuff from TSD destructors, > not earlier. And this is so special that anyone of us interested in this > test case will naturally be aware of its specifics. As you said to err is human. And having a single point for handling an exceptional case is a good idea. And this shortens your patch. >> I think every sane free-software projects ask people to test their >> patches/pull requests before sending them, even the Linux kernel. See >> for instance: >> http://lwn.net/Articles/377091/ >> >> I am not asking you for one week. 24 hours would be enough. > > As you may know, complete test coverage is near to impossible. That is no excuse for not making any test. > On unlucky days, I happen to send out stuff that broke while continuing to > type after the last test ran. But specifically these things were tested > in various combinations. > > It is not very polite to assume the contrary. When you send a patch every 5 minutes, it is obvious that it was not tested. No politeness involved. Everybody can check the timestamps in the mailing lists and do the math. I'm trying to avoid the > same assumption about other people as well, even when things look > differently from time to time. To err is human, to always write correct > code is divine. > > But as you already mentioned in another mail, there is room for > improvement /wrt testability. On step forward would be an automated > build and run of some git tree that we can check into. One board per > arch would be enough to avoid issues we face from time to time when we > only stress our favorite archs with new features or fixes. We have the automated build. I plan seriously to work on the automated run for 2.5.3. But it is too late for 2.5.2. Also note that I gave you access to my server where several targets are configured and usable. If you forgot the password, I can reset it. > >> In fact, the mistake is partly mine too, I am replying to your patches >> as fast as you send them. We were going crazy tuesday with the condition >> variable patches, I do not think it gives a reassuring image to the >> users. > > I can only say that, as a developer in this business, I trust code > having been discussed to the extreme _way_ more than stuff a single > clever person checked in even after meditating over it for weeks. I've > seen enough of this, produced by non-clever /me as well as way more > clever people in all kinds of projects. > >> So that at some point, I stopped reviewing your patches. I have >> not even read the last 8 you sent. Without going to the other extreme of >> a "closed door" development model, I think we can find a more reasonable >> trade off than the current one. > > Well, you have to decide which piece to take, or to request it to be > rewritten, or to write yourself. Yes. That is the plan. Tomorrow. > >> Our objective now, is to put 2.5.2 in a stable state. >> >>> +substitute_linux_syscall(xnthread_t *thread, struct pt_regs *regs) >>> +{ >>> + if (unlikely(__xn_reg_mux(regs) == __NR_exit)) >>> + handle_shadow_exit(thread); >>> + >> Not your fault, but this does not work on ARM. Do not know if >> __xn_reg_mux needs fixing there or if we need to add a new macro to get >> the roots syscall number. > > And that's the point of sending patches for discussion. > > It's regs->ARM_r7 == __NR_SYSCALL_BASE + __NR_MY_SYSCALL, right? If > __xn_reg_mux is supposed to be only a mux value for Xenomai, we need a > separate accessor for the Linux syscall number on all archs. or __NR_OABI_SYSCALL_BASE + __NR_MY_SYSCALL depending on CONFIG_OABI_COMPAT. > > Jan > -- Gilles. ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Xenomai-core] [RFC][PATCH v2] Allow for deregistering current mode user space address 2010-03-05 17:36 ` Gilles Chanteperdrix @ 2010-03-06 9:39 ` Jan Kiszka 2010-03-06 12:07 ` Gilles Chanteperdrix 0 siblings, 1 reply; 9+ messages in thread From: Jan Kiszka @ 2010-03-06 9:39 UTC (permalink / raw) To: Gilles Chanteperdrix; +Cc: xenomai-core [-- Attachment #1: Type: text/plain, Size: 7732 bytes --] Gilles Chanteperdrix wrote: > Jan Kiszka wrote: >> Gilles Chanteperdrix wrote: >>> Jan Kiszka wrote: >>>> Gilles Chanteperdrix wrote: >>>>> Jan Kiszka wrote: >>>>>> This adds __xn_sys_drop_u_mode, a syscall to deregister the u_mode user >>>>>> space address passed on shadow creation for the current thread. We need >>>>>> this in order to safely shut down a thread that wants to release its >>>>>> u_mode memory (either malloc'ed or TLS-allocated). So far, the kernel >>>>>> might have touched this memory even after the release which caused >>>>>> subtle corruptions. >>>>>> >>>>>> After we released u_mode, we can no longer make reliable decisions based >>>>>> on it. For the fast mutex use case, we then assume the worst case, ie. >>>>>> we enforce syscall-based acquisitions unconditionally. For the >>>>>> assert_nrt case, we simply acquire the required information via >>>>>> __xn_sys_current_info. >>>>> As usual, you are firing patches too fast: >>>>> - you are lacking the exit syscall trap. >>>> Don't worry, my memory is not yet _that_ bad. :) >>>> >>>>> - the patch is not correct and will cause kernel-space addresses to be >>>>> passed to put_user, which probably yields bugs when the kernel is >>>>> compiled with proper debug flags; >>>> Right, e.g. ARM requires set_fs() fiddling in addition. But then a simple >>>> NULL-check is way better. Fixed. >>>> >>>>> - I do not see the point where the new syscall is called with __thread, >>>>> but I am not sure it is missing, and if it is missing, you will get the >>>>> rightfull warning when hitting the exit syscall. >>>> Destructor of (now unconditionally used) xeno_current_mode_key. >>>> >>>>> - the user-space users of the function to get current_mode are still >>>>> cluttered with special cases to handle the invalid state. And >>>>> illustrating nicely the deficiency of this approach, you have forgot one >>>>> user. >>>> My impression is that you are still a bit biased due to TLS limitations >>>> on ARM. >>> No, I think the __thread stuff thinks make C code look too much like C++ >>> where the compiler makes things behind your back for such simple code as: >>> >>> void *foo = &bar; >>> >>> This is not what I expect from a C compiler, and I suspect a lot of C >>> developers, including the people who advocate that C, as a simple >>> language, and not C++ should be used for free software development. >>> >>> Yes, there is no gain on ARM, where this generates a function call as >>> pthread_getspecific and causes bugs with gcc >= 4.3, but not much gain >>> on x86 either where a function call is so cheap. >> As are individual memory accesses. The sum makes the difference. > > Ok. If you find before 2.5.3 a platform where this makes a difference of > more than 5% of this platform's latency, I do not remove the __thread > stuff. Deal? If you manage to remove 5% of the configuration variables of Xenomai that way. Or 5% of the conditional code. > >> Once someone starts testing mutexes or other stuff from TSD destructors, >> not earlier. And this is so special that anyone of us interested in this >> test case will naturally be aware of its specifics. > > As you said to err is human. And having a single point for handling an > exceptional case is a good idea. And this shortens your patch. > > >>> I think every sane free-software projects ask people to test their >>> patches/pull requests before sending them, even the Linux kernel. See >>> for instance: >>> http://lwn.net/Articles/377091/ >>> >>> I am not asking you for one week. 24 hours would be enough. >> As you may know, complete test coverage is near to impossible. > > That is no excuse for not making any test. > > >> On unlucky days, I happen to send out stuff that broke while continuing to >> type after the last test ran. But specifically these things were tested >> in various combinations. >> >> It is not very polite to assume the contrary. > > When you send a patch every 5 minutes, it is obvious that it was not > tested. No politeness involved. Everybody can check the timestamps in > the mailing lists and do the math. I think my workflow was misleading regarding this. Not necessarily all stuff I push into my 'for-upstream' branch is done with all tests at the time when I push. Official pull request mails are supposed to mark this. I'm pushing to save my work and, of course, to expose it. So I appreciate comments, but I surely don't expect them within minutes. > > I'm trying to avoid the >> same assumption about other people as well, even when things look >> differently from time to time. To err is human, to always write correct >> code is divine. >> >> But as you already mentioned in another mail, there is room for >> improvement /wrt testability. On step forward would be an automated >> build and run of some git tree that we can check into. One board per >> arch would be enough to avoid issues we face from time to time when we >> only stress our favorite archs with new features or fixes. > > We have the automated build. Then we just need to run it against some 'build-test' branches of everyone's git repositories on checkin. That would be great. > I plan seriously to work on the automated > run for 2.5.3. But it is too late for 2.5.2. Also note that I gave you > access to my server where several targets are configured and usable. If > you forgot the password, I can reset it. I do remember this, but I thought this was for a specific test. If I'm supposed to access that box regularly, I'm fine, we can arrange this. > >>> In fact, the mistake is partly mine too, I am replying to your patches >>> as fast as you send them. We were going crazy tuesday with the condition >>> variable patches, I do not think it gives a reassuring image to the >>> users. >> I can only say that, as a developer in this business, I trust code >> having been discussed to the extreme _way_ more than stuff a single >> clever person checked in even after meditating over it for weeks. I've >> seen enough of this, produced by non-clever /me as well as way more >> clever people in all kinds of projects. >> >>> So that at some point, I stopped reviewing your patches. I have >>> not even read the last 8 you sent. Without going to the other extreme of >>> a "closed door" development model, I think we can find a more reasonable >>> trade off than the current one. >> Well, you have to decide which piece to take, or to request it to be >> rewritten, or to write yourself. > > Yes. That is the plan. Tomorrow. > >>> Our objective now, is to put 2.5.2 in a stable state. >>> >>>> +substitute_linux_syscall(xnthread_t *thread, struct pt_regs *regs) >>>> +{ >>>> + if (unlikely(__xn_reg_mux(regs) == __NR_exit)) >>>> + handle_shadow_exit(thread); >>>> + >>> Not your fault, but this does not work on ARM. Do not know if >>> __xn_reg_mux needs fixing there or if we need to add a new macro to get >>> the roots syscall number. >> And that's the point of sending patches for discussion. >> >> It's regs->ARM_r7 == __NR_SYSCALL_BASE + __NR_MY_SYSCALL, right? If >> __xn_reg_mux is supposed to be only a mux value for Xenomai, we need a >> separate accessor for the Linux syscall number on all archs. > > or __NR_OABI_SYSCALL_BASE + __NR_MY_SYSCALL depending on CONFIG_OABI_COMPAT. > Patch is in my queue if you are interested. Jan PS: Note that this u_mode fix exposed an issue of the sigtest: cancel_with_signals fails if there is some TSD destructor registered that issues a Xenomai syscall. Not sure yet if it is a fundamental issue or just related to the test itself. [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 257 bytes --] ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Xenomai-core] [RFC][PATCH v2] Allow for deregistering current mode user space address 2010-03-06 9:39 ` Jan Kiszka @ 2010-03-06 12:07 ` Gilles Chanteperdrix 2010-03-08 16:06 ` Gilles Chanteperdrix 0 siblings, 1 reply; 9+ messages in thread From: Gilles Chanteperdrix @ 2010-03-06 12:07 UTC (permalink / raw) To: Jan Kiszka; +Cc: xenomai-core Jan Kiszka wrote: > Gilles Chanteperdrix wrote: >> Jan Kiszka wrote: >>> Gilles Chanteperdrix wrote: >>>> Jan Kiszka wrote: >>>>> Gilles Chanteperdrix wrote: >>>>>> Jan Kiszka wrote: >>>>>>> This adds __xn_sys_drop_u_mode, a syscall to deregister the u_mode user >>>>>>> space address passed on shadow creation for the current thread. We need >>>>>>> this in order to safely shut down a thread that wants to release its >>>>>>> u_mode memory (either malloc'ed or TLS-allocated). So far, the kernel >>>>>>> might have touched this memory even after the release which caused >>>>>>> subtle corruptions. >>>>>>> >>>>>>> After we released u_mode, we can no longer make reliable decisions based >>>>>>> on it. For the fast mutex use case, we then assume the worst case, ie. >>>>>>> we enforce syscall-based acquisitions unconditionally. For the >>>>>>> assert_nrt case, we simply acquire the required information via >>>>>>> __xn_sys_current_info. >>>>>> As usual, you are firing patches too fast: >>>>>> - you are lacking the exit syscall trap. >>>>> Don't worry, my memory is not yet _that_ bad. :) >>>>> >>>>>> - the patch is not correct and will cause kernel-space addresses to be >>>>>> passed to put_user, which probably yields bugs when the kernel is >>>>>> compiled with proper debug flags; >>>>> Right, e.g. ARM requires set_fs() fiddling in addition. But then a simple >>>>> NULL-check is way better. Fixed. >>>>> >>>>>> - I do not see the point where the new syscall is called with __thread, >>>>>> but I am not sure it is missing, and if it is missing, you will get the >>>>>> rightfull warning when hitting the exit syscall. >>>>> Destructor of (now unconditionally used) xeno_current_mode_key. >>>>> >>>>>> - the user-space users of the function to get current_mode are still >>>>>> cluttered with special cases to handle the invalid state. And >>>>>> illustrating nicely the deficiency of this approach, you have forgot one >>>>>> user. >>>>> My impression is that you are still a bit biased due to TLS limitations >>>>> on ARM. >>>> No, I think the __thread stuff thinks make C code look too much like C++ >>>> where the compiler makes things behind your back for such simple code as: >>>> >>>> void *foo = &bar; >>>> >>>> This is not what I expect from a C compiler, and I suspect a lot of C >>>> developers, including the people who advocate that C, as a simple >>>> language, and not C++ should be used for free software development. >>>> >>>> Yes, there is no gain on ARM, where this generates a function call as >>>> pthread_getspecific and causes bugs with gcc >= 4.3, but not much gain >>>> on x86 either where a function call is so cheap. >>> As are individual memory accesses. The sum makes the difference. >> Ok. If you find before 2.5.3 a platform where this makes a difference of >> more than 5% of this platform's latency, I do not remove the __thread >> stuff. Deal? > > If you manage to remove 5% of the configuration variables of Xenomai > that way. Or 5% of the conditional code. The __thread stuff introduces two configuration variables, that is 4 combinations. Now, for the u_mode modifications for which you sent me a pull-request (so, according to what you said, is tested), did you test the 4 combinations? If no, then chances of bug are high. If yes, then do not you think that it is a major PITA to compile Xenomai user-space 4 times for a so little gain? So __thread has a maintenance cost, and if we do not pay attention to it, we release buggy code. Now, let me quote something someone you know very well wrote some time ago: "While RTAI is focused on lowest technically feasible latencies, Xenomai also considers clean extensibility (RTOS skins), portability, and maintainability as very important goals." This is in our wiki. __thread and the wrong life-cycle u_mode are examples of things which focus on ridiculous micro-optimizations rather on maintainability. > I think my workflow was misleading regarding this. Not necessarily all > stuff I push into my 'for-upstream' branch is done with all tests at the > time when I push. Official pull request mails are supposed to mark this. > I'm pushing to save my work and, of course, to expose it. So I > appreciate comments, but I surely don't expect them within minutes. No. You still refuse to acknowledge the truth: I receive pull requests for untested stuff. Read John's mail again. He cherry picked changes, for which I received a pull request, he ran the mutex-torture tests, which the patchset had modified, and the test was not passing. So, the conclusion is that you did not run mutex-torture, which the commit had modified before sending the pull request. I did not have to dig long in the git logs to find this example. It was just yesterday. Think about it. That is precisely the situation I want to avoid from now on. > >> I'm trying to avoid the >>> same assumption about other people as well, even when things look >>> differently from time to time. To err is human, to always write correct >>> code is divine. >>> >>> But as you already mentioned in another mail, there is room for >>> improvement /wrt testability. On step forward would be an automated >>> build and run of some git tree that we can check into. One board per >>> arch would be enough to avoid issues we face from time to time when we >>> only stress our favorite archs with new features or fixes. >> We have the automated build. > > Then we just need to run it against some 'build-test' branches of > everyone's git repositories on checkin. That would be great. Unfortunately, I am running the tests with ccache on a core i7 on a raid0 array, and it still takes around one hour. So, I prefer running this by hand. But do not worry, it is run before the releases. > >> I plan seriously to work on the automated >> run for 2.5.3. But it is too late for 2.5.2. Also note that I gave you >> access to my server where several targets are configured and usable. If >> you forgot the password, I can reset it. > > I do remember this, but I thought this was for a specific test. If I'm > supposed to access that box regularly, I'm fine, we can arrange this. I was just mentioning this to mean that if you want to test, you can. You are not supposed to do anything, you are free to use it, you can do as you like. > PS: Note that this u_mode fix exposed an issue of the sigtest: > cancel_with_signals fails if there is some TSD destructor registered > that issues a Xenomai syscall. Not sure yet if it is a fundamental issue > or just related to the test itself. The sigtest test probably assumes that no parasite syscall occurs. Yet another reason for not using this u_mode implementation. -- Gilles. ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [Xenomai-core] [RFC][PATCH v2] Allow for deregistering current mode user space address 2010-03-06 12:07 ` Gilles Chanteperdrix @ 2010-03-08 16:06 ` Gilles Chanteperdrix 0 siblings, 0 replies; 9+ messages in thread From: Gilles Chanteperdrix @ 2010-03-08 16:06 UTC (permalink / raw) To: Jan Kiszka; +Cc: xenomai-core Gilles Chanteperdrix wrote: >> I think my workflow was misleading regarding this. Not necessarily all >> stuff I push into my 'for-upstream' branch is done with all tests at the >> time when I push. Official pull request mails are supposed to mark this. >> I'm pushing to save my work and, of course, to expose it. So I >> appreciate comments, but I surely don't expect them within minutes. > > No. You still refuse to acknowledge the truth: I receive pull requests > for untested stuff. Read John's mail again. He cherry picked changes, > for which I received a pull request, he ran the mutex-torture tests, > which the patchset had modified, and the test was not passing. So, the > conclusion is that you did not run mutex-torture, which the commit had > modified before sending the pull request. I did not have to dig long in > the git logs to find this example. It was just yesterday. Think about it. > > That is precisely the situation I want to avoid from now on. Ok. It turns out that yes, the timed mutex tests had a bug, but probably only visible on uniprocessor machines. And I guess you test on SMP machines. So, my apologies, the patch was tested, not enough, but tested. >> PS: Note that this u_mode fix exposed an issue of the sigtest: >> cancel_with_signals fails if there is some TSD destructor registered >> that issues a Xenomai syscall. Not sure yet if it is a fundamental issue >> or just related to the test itself. > > The sigtest test probably assumes that no parasite syscall occurs. Yet > another reason for not using this u_mode implementation. Remove that test case. I do not know what it was testing anyway. -- Gilles. ^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2010-03-08 16:06 UTC | newest] Thread overview: 9+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2010-03-05 11:37 [Xenomai-core] [RFC][PATCH] Allow for deregistering current mode user space address Jan Kiszka 2010-03-05 12:02 ` Gilles Chanteperdrix 2010-03-05 15:53 ` [Xenomai-core] [RFC][PATCH v2] " Jan Kiszka 2010-03-05 16:30 ` Gilles Chanteperdrix 2010-03-05 17:16 ` Jan Kiszka 2010-03-05 17:36 ` Gilles Chanteperdrix 2010-03-06 9:39 ` Jan Kiszka 2010-03-06 12:07 ` Gilles Chanteperdrix 2010-03-08 16:06 ` Gilles Chanteperdrix
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.