[Xenomai-core] [RFC][PATCH] Allow for deregistering current mode user space address

All of lore.kernel.org
 help / color / mirror / Atom feed

* [Xenomai-core] [RFC][PATCH] Allow for deregistering current mode user space address
@ 2010-03-05 11:37 Jan Kiszka
  2010-03-05 12:02 ` Gilles Chanteperdrix
  0 siblings, 1 reply; 9+ messages in thread
From: Jan Kiszka @ 2010-03-05 11:37 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-core

This adds __xn_sys_drop_u_mode, a syscall to deregister the u_mode user
space address passed on shadow creation for the current thread. We need
this in order to safely shut down a thread that wants to release its
u_mode memory (either malloc'ed or TLS-allocated). So far, the kernel
might have touched this memory even after the release which caused
subtle corruptions.

After we released u_mode, we can no longer make reliable decisions based
on it. For the fast mutex use case, we then assume the worst case, ie.
we enforce syscall-based acquisitions unconditionally. For the
assert_nrt case, we simply acquire the required information via
__xn_sys_current_info.

---
 include/asm-generic/bits/current.h |   24 ++++++++----
 include/asm-generic/syscall.h      |    1 +
 include/nucleus/thread.h           |    2 +
 ksrc/nucleus/shadow.c              |   14 +++++++
 src/rtdk/assert_context.c          |    9 +++-
 src/skins/common/current.c         |   72 ++++++++++++++++++++++++++++--------
 6 files changed, 95 insertions(+), 27 deletions(-)

diff --git a/include/asm-generic/bits/current.h b/include/asm-generic/bits/current.h
index 0c3166b..f0e569c 100644
--- a/include/asm-generic/bits/current.h
+++ b/include/asm-generic/bits/current.h
@@ -2,7 +2,9 @@
 #define _XENO_ASM_GENERIC_CURRENT_H
 
 #include <pthread.h>
-#include <nucleus/types.h>
+#include <nucleus/thread.h>
+
+extern pthread_key_t xeno_current_mode_key;
 
 #ifdef HAVE___THREAD
 extern __thread xnhandle_t xeno_current __attribute__ ((tls_model ("initial-exec")));
@@ -19,15 +21,13 @@ static inline unsigned long xeno_get_current_mode(void)
 	return xeno_current_mode;
 }
 
-static inline unsigned long *xeno_init_current_mode(void)
+static inline int xeno_primary_mode(void)
 {
-	return &xeno_current_mode;
+	return xeno_current_mode & XNRELAX;
 }
 
-#define xeno_init_current_keys() do { } while (0)
 #else /* ! HAVE___THREAD */
 extern pthread_key_t xeno_current_key;
-extern pthread_key_t xeno_current_mode_key;
 
 static inline xnhandle_t xeno_get_current(void)
 {
@@ -41,14 +41,22 @@ static inline xnhandle_t xeno_get_current(void)
 
 static inline unsigned long xeno_get_current_mode(void)
 {
-	return *(unsigned long *)pthread_getspecific(xeno_current_mode_key);
+	unsigned long *mode = pthread_getspecific(xeno_current_mode_key);
+
+	return mode ? *mode : -1;
 }
 
-unsigned long *xeno_init_current_mode(void);
+static inline int xeno_primary_mode(void)
+{
+	unsigned long *mode = pthread_getspecific(xeno_current_mode_key);
 
-void xeno_init_current_keys(void);
+	return mode ? (*mode & XNRELAX) : 0;
+}
 #endif /* ! HAVE___THREAD */
 
 void xeno_set_current(void);
 
+unsigned long *xeno_init_current_mode(void);
+void xeno_init_current_keys(void);
+
 #endif /* _XENO_ASM_GENERIC_CURRENT_H */
diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h
index 315822e..62f7aa2 100644
--- a/include/asm-generic/syscall.h
+++ b/include/asm-generic/syscall.h
@@ -46,6 +46,7 @@
 #define __xn_sys_current	8	/* threadh = xnthread_handle(cur) */
 #define __xn_sys_current_info	9	/* r = xnshadow_current_info(&info) */
 #define __xn_sys_get_next_sigs 10	/* only unqueue pending signals. */
+#define __xn_sys_drop_u_mode   11	/* stop updating thread->u_mode */
 
 #define XENOMAI_LINUX_DOMAIN  0
 #define XENOMAI_XENO_DOMAIN   1
diff --git a/include/nucleus/thread.h b/include/nucleus/thread.h
index b2baccb..cb772f7 100644
--- a/include/nucleus/thread.h
+++ b/include/nucleus/thread.h
@@ -328,6 +328,8 @@ typedef struct xnthread {
 #ifdef CONFIG_XENO_OPT_PERVASIVE
 	unsigned long __user *u_mode;	/* Thread mode variable in userland. */
 
+	unsigned long u_mode_dump;	/* u_mode points here once userland dropped it. */
+
 	unsigned u_sigpending;		/* One bit per skin */
 #endif /* CONFIG_XENO_OPT_PERVASIVE */
 
diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c
index d82d6c7..8431025 100644
--- a/ksrc/nucleus/shadow.c
+++ b/ksrc/nucleus/shadow.c
@@ -1967,6 +1967,18 @@ static int xnshadow_sys_get_next_sigs(struct pt_regs *regs)
 	return -EINTR;
 }
 
+static int xnshadow_sys_drop_u_mode(struct pt_regs *regs)
+{
+	xnthread_t *cur = xnshadow_thread(current);
+
+	if (!cur)
+		return -EPERM;
+
+	cur->u_mode = (unsigned long __user*)&cur->u_mode_dump;
+
+	return 0;
+}
+
 static xnsysent_t __systab[] = {
 	[__xn_sys_migrate] = {&xnshadow_sys_migrate, __xn_exec_current},
 	[__xn_sys_arch] = {&xnshadow_sys_arch, __xn_exec_any},
@@ -1981,6 +1993,8 @@ static xnsysent_t __systab[] = {
 		{&xnshadow_sys_current_info, __xn_exec_shadow},
 	[__xn_sys_get_next_sigs] = 
 		{&xnshadow_sys_get_next_sigs, __xn_exec_conforming},
+	[__xn_sys_drop_u_mode] =
+		{&xnshadow_sys_drop_u_mode, __xn_exec_shadow},
 };
 
 static void post_ppd_release(struct xnheap *h)
diff --git a/src/rtdk/assert_context.c b/src/rtdk/assert_context.c
index f67bcd8..bad19f3 100644
--- a/src/rtdk/assert_context.c
+++ b/src/rtdk/assert_context.c
@@ -30,9 +30,12 @@ void assert_nrt(void)
 	xnthread_info_t info;
 	int err;
 
-	if (unlikely(xeno_get_current() != XN_NO_HANDLE &&
-		     !(xeno_get_current_mode() & XNRELAX))) {
+	if (xeno_get_current() != XN_NO_HANDLE)
+		return;
 
+	info.state = xeno_get_current_mode();
+
+	if (unlikely(!(info.state & XNRELAX) || info.state == -1)) {
 		err = XENOMAI_SYSCALL1(__xn_sys_current_info, &info);
 
 		if (err) {
@@ -41,7 +44,7 @@ void assert_nrt(void)
 			return;
 		}
 
-		if (info.state & XNTRAPSW)
+		if ((info.state & (XNRELAX | XNTRAPSW)) == XNTRAPSW)
 			pthread_kill(pthread_self(), SIGXCPU);
 	}
 }
diff --git a/src/skins/common/current.c b/src/skins/common/current.c
index 2922d6e..1b09390 100644
--- a/src/skins/common/current.c
+++ b/src/skins/common/current.c
@@ -1,10 +1,13 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
+#include <pthread.h>
 
 #include <asm/xenomai/syscall.h>
 #include <nucleus/types.h>
 
+pthread_key_t xeno_current_mode_key;
+
 #ifdef HAVE___THREAD
 __thread __attribute__ ((tls_model ("initial-exec")))
 xnhandle_t xeno_current = XN_NO_HANDLE;
@@ -16,36 +19,62 @@ static inline void __xeno_set_current(xnhandle_t current)
 	xeno_current = current;
 }
 #else /* !HAVE___THREAD */
-#include <pthread.h>
-
 pthread_key_t xeno_current_key;
-pthread_key_t xeno_current_mode_key;
 
 static inline void __xeno_set_current(xnhandle_t current)
 {
 	pthread_setspecific(xeno_current_key, (void *)current);
 }
+#endif /* !HAVE___THREAD */
 
-unsigned long *xeno_init_current_mode(void)
+static void free_current_mode(void *key)
 {
-	unsigned long *mode = malloc(sizeof(unsigned long));
-	pthread_setspecific(xeno_current_mode_key, mode);
-	return mode;
+	unsigned long *mode = key;
+	int err;
+
+	*mode = -1;
+
+	err = XENOMAI_SYSCALL0(__xn_sys_drop_u_mode);
+
+#ifndef HAVE___THREAD
+	if (err) {
+		static int warned;
+
+		if (!warned) {
+			warned = 1;
+			fprintf(stderr,
+				"\nXenomai: WARNING, this Xenomai kernel can "
+					"cause spurious application\n"
+				"         crashes on thread termination.\n"
+				"         To reduce the probality, we will "
+					"leak a few bytes heap now.\n"
+				);
+		}
+	} else
+		free(mode);
+#endif /* !HAVE___THREAD */
 }
 
 static void init_current_keys(void)
 {
-	int err = pthread_key_create(&xeno_current_key, NULL);
+	int err;
+
+#ifndef HAVE___THREAD
+	err = pthread_key_create(&xeno_current_key, NULL);
 	if (err)
 		goto error_exit;
+#endif /* !HAVE___THREAD */
 
-	err = pthread_key_create(&xeno_current_mode_key, free);
-	if (err) {
-	  error_exit:
-		fprintf(stderr, "Xenomai: error creating TSD key: %s\n",
-			strerror(err));
-		exit(EXIT_FAILURE);
-	}
+	err = pthread_key_create(&xeno_current_mode_key, free_current_mode);
+	if (err)
+		goto error_exit;
+
+	return;
+
+  error_exit:
+	fprintf(stderr, "Xenomai: error creating TSD key: %s\n",
+		strerror(err));
+	exit(EXIT_FAILURE);
 }
 
 void xeno_init_current_keys(void)
@@ -53,7 +82,6 @@ void xeno_init_current_keys(void)
 	static pthread_once_t xeno_init_current_keys_once = PTHREAD_ONCE_INIT;
 	pthread_once(&xeno_init_current_keys_once, init_current_keys);
 }
-#endif /* !HAVE___THREAD */
 
 void xeno_set_current(void)
 {
@@ -68,3 +96,15 @@ void xeno_set_current(void)
 	}
 	__xeno_set_current(current);
 }
+
+unsigned long *xeno_init_current_mode(void)
+{
+#ifdef HAVE___THREAD
+	unsigned long *mode = &xeno_current_mode;
+#else /* !HAVE___THREAD */
+	unsigned long *mode = malloc(sizeof(unsigned long));
+#endif /* !HAVE___THREAD */
+
+	pthread_setspecific(xeno_current_mode_key, mode);
+	return mode;
+}
-- 
1.6.0.2


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [Xenomai-core] [RFC][PATCH] Allow for deregistering current mode user space address
  2010-03-05 11:37 [Xenomai-core] [RFC][PATCH] Allow for deregistering current mode user space address Jan Kiszka
@ 2010-03-05 12:02 ` Gilles Chanteperdrix
  2010-03-05 15:53   ` [Xenomai-core] [RFC][PATCH v2] " Jan Kiszka
  0 siblings, 1 reply; 9+ messages in thread
From: Gilles Chanteperdrix @ 2010-03-05 12:02 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: xenomai-core

Jan Kiszka wrote:
> This adds __xn_sys_drop_u_mode, a syscall to deregister the u_mode user
> space address passed on shadow creation for the current thread. We need
> this in order to safely shut down a thread that wants to release its
> u_mode memory (either malloc'ed or TLS-allocated). So far, the kernel
> might have touched this memory even after the release which caused
> subtle corruptions.
> 
> After we released u_mode, we can no longer make reliable decisions based
> on it. For the fast mutex use case, we then assume the worst case, ie.
> we enforce syscall-based acquisitions unconditionally. For the
> assert_nrt case, we simply acquire the required information via
> __xn_sys_current_info.

As usual, you are firing patches too fast:
- you are lacking the exit syscall trap.
- the patch is not correct and will cause kernel-space addresses to be
passed to put_user, which probably yields bugs when the kernel is
compiled with proper debug flags;
- I do not see the point where the new syscall is called with __thread,
but I am not sure it is missing, and if it is missing, you will get the
rightfull warning when hitting the exit syscall.
- the user-space users of the function to get current_mode are still
cluttered with special cases to handle the invalid state. And
illustrating nicely the deficiency of this approach, you have forgot one
user.

I am not going to make my version before tomorrow, so you have plenty of
time to send me an at least correct version which would take into
account all of our discussions.

Please also consider that your "patch machinegun" way of working is not
really sane. When there are so many often untested patches to review
coming from you, we sometime let some errors slip through. And from a
user point of view, seeing so many buggy patches refused is not really
reassuring.

-- 
					    Gilles.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Xenomai-core] [RFC][PATCH v2] Allow for deregistering current mode user space address
  2010-03-05 12:02 ` Gilles Chanteperdrix
@ 2010-03-05 15:53   ` Jan Kiszka
  2010-03-05 16:30     ` Gilles Chanteperdrix
  0 siblings, 1 reply; 9+ messages in thread
From: Jan Kiszka @ 2010-03-05 15:53 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-core

Gilles Chanteperdrix wrote:
> Jan Kiszka wrote:
>> This adds __xn_sys_drop_u_mode, a syscall to deregister the u_mode user
>> space address passed on shadow creation for the current thread. We need
>> this in order to safely shut down a thread that wants to release its
>> u_mode memory (either malloc'ed or TLS-allocated). So far, the kernel
>> might have touched this memory even after the release which caused
>> subtle corruptions.
>>
>> After we released u_mode, we can no longer make reliable decisions based
>> on it. For the fast mutex use case, we then assume the worst case, ie.
>> we enforce syscall-based acquisitions unconditionally. For the
>> assert_nrt case, we simply acquire the required information via
>> __xn_sys_current_info.
> 
> As usual, you are firing patches too fast:
> - you are lacking the exit syscall trap.

Don't worry, my memory is not yet _that_ bad. :)

> - the patch is not correct and will cause kernel-space addresses to be
> passed to put_user, which probably yields bugs when the kernel is
> compiled with proper debug flags;

Right, e.g. ARM requires set_fs() fiddling in addition. But then a simple
NULL-check is way better. Fixed.

> - I do not see the point where the new syscall is called with __thread,
> but I am not sure it is missing, and if it is missing, you will get the
> rightfull warning when hitting the exit syscall.

Destructor of (now unconditionally used) xeno_current_mode_key.

> - the user-space users of the function to get current_mode are still
> cluttered with special cases to handle the invalid state. And
> illustrating nicely the deficiency of this approach, you have forgot one
> user.

My impression is that you are still a bit biased due to TLS limitations
on ARM.

And nope, I didn't forget to address it, I just forgot to mention that
there is nothing to address (the torture test does not use
xeno_get_current_mode from within a TSD destructor, thus potentially
after drop_u_mode).

> 
> I am not going to make my version before tomorrow, so you have plenty of
> time to send me an at least correct version which would take into
> account all of our discussions.

Well... what should I reply?

> 
> Please also consider that your "patch machinegun" way of working is not
> really sane. When there are so many often untested patches to review
> coming from you, we sometime let some errors slip through. And from a
> user point of view, seeing so many buggy patches refused is not really
> reassuring.

Please don't make me cite counter examples for issues with different
workflows we also experienced.

The key of an open development process is open discussion, and that
ideally based on code, even if it is not yet perfect. I can't imagine
you want Xenomai being developed in closed chambers (or even just a
single one). If you are afraid of imperfect patches being posted or even
merged into some software, you shouldn't use e.g. the Linux kernel as
well.

Jan

------>

This adds __xn_sys_drop_u_mode, a syscall to deregister the u_mode user
space address passed on shadow creation for the current thread. We need
this in order to safely shut down a thread that wants to release its
u_mode memory (either malloc'ed or TLS-allocated). So far, the kernel
might have touched this memory even after the release which caused
subtle corruptions.

After we released u_mode, we can no longer make reliable decisions based
on it. For the fast mutex use case, we then assume the worst case, ie.
we enforce syscall-based acquisitions unconditionally. For the
assert_nrt case, we simply acquire the required information via
__xn_sys_current_info. The mutex torture test requires no special care
as it cannot retrieve invalid u_mode states.

---

Changes in v2:
 - Drop non-working redirection of u_mode after release, just check for
   NULL u_mode before writing to it
 - Trap exit syscall to clear u_mode if user space failed to
 - Add/improve warnings about too old user/kernel space
 - Code refactorings to avoid new #ifdefs

 include/asm-generic/bits/current.h |   24 +++++++----
 include/asm-generic/syscall.h      |    1 +
 ksrc/nucleus/shadow.c              |   51 +++++++++++++++++++++---
 src/rtdk/assert_context.c          |    9 +++-
 src/skins/common/current.c         |   78 +++++++++++++++++++++++++++++++----
 5 files changed, 137 insertions(+), 26 deletions(-)

diff --git a/include/asm-generic/bits/current.h b/include/asm-generic/bits/current.h
index 0c3166b..f0e569c 100644
--- a/include/asm-generic/bits/current.h
+++ b/include/asm-generic/bits/current.h
@@ -2,7 +2,9 @@
 #define _XENO_ASM_GENERIC_CURRENT_H
 
 #include <pthread.h>
-#include <nucleus/types.h>
+#include <nucleus/thread.h>
+
+extern pthread_key_t xeno_current_mode_key;
 
 #ifdef HAVE___THREAD
 extern __thread xnhandle_t xeno_current __attribute__ ((tls_model ("initial-exec")));
@@ -19,15 +21,13 @@ static inline unsigned long xeno_get_current_mode(void)
 	return xeno_current_mode;
 }
 
-static inline unsigned long *xeno_init_current_mode(void)
+static inline int xeno_primary_mode(void)
 {
-	return &xeno_current_mode;
+	return xeno_current_mode & XNRELAX;
 }
 
-#define xeno_init_current_keys() do { } while (0)
 #else /* ! HAVE___THREAD */
 extern pthread_key_t xeno_current_key;
-extern pthread_key_t xeno_current_mode_key;
 
 static inline xnhandle_t xeno_get_current(void)
 {
@@ -41,14 +41,22 @@ static inline xnhandle_t xeno_get_current(void)
 
 static inline unsigned long xeno_get_current_mode(void)
 {
-	return *(unsigned long *)pthread_getspecific(xeno_current_mode_key);
+	unsigned long *mode = pthread_getspecific(xeno_current_mode_key);
+
+	return mode ? *mode : -1;
 }
 
-unsigned long *xeno_init_current_mode(void);
+static inline int xeno_primary_mode(void)
+{
+	unsigned long *mode = pthread_getspecific(xeno_current_mode_key);
 
-void xeno_init_current_keys(void);
+	return mode ? (*mode & XNRELAX) : 0;
+}
 #endif /* ! HAVE___THREAD */
 
 void xeno_set_current(void);
 
+unsigned long *xeno_init_current_mode(void);
+void xeno_init_current_keys(void);
+
 #endif /* _XENO_ASM_GENERIC_CURRENT_H */
diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h
index 315822e..62f7aa2 100644
--- a/include/asm-generic/syscall.h
+++ b/include/asm-generic/syscall.h
@@ -46,6 +46,7 @@
 #define __xn_sys_current	8	/* threadh = xnthread_handle(cur) */
 #define __xn_sys_current_info	9	/* r = xnshadow_current_info(&info) */
 #define __xn_sys_get_next_sigs 10	/* only unqueue pending signals. */
+#define __xn_sys_drop_u_mode   11	/* stop updating thread->u_mode */
 
 #define XENOMAI_LINUX_DOMAIN  0
 #define XENOMAI_XENO_DOMAIN   1
diff --git a/ksrc/nucleus/shadow.c b/ksrc/nucleus/shadow.c
index d82d6c7..8a50cbb 100644
--- a/ksrc/nucleus/shadow.c
+++ b/ksrc/nucleus/shadow.c
@@ -1018,7 +1018,8 @@ redo:
 		/* Grab the request token. */
 		return -ERESTARTSYS;
 
-	__xn_put_user(0, thread->u_mode);
+	if (thread->u_mode)
+		__xn_put_user(0, thread->u_mode);
 
 	preempt_disable();
 
@@ -1208,7 +1209,8 @@ void xnshadow_relax(int notify, int reason)
 	/* "current" is now running into the Linux domain on behalf of the
 	   root thread. */
 
-	__xn_put_user(XNRELAX, thread->u_mode);
+	if (thread->u_mode)
+		__xn_put_user(XNRELAX, thread->u_mode);
 
 	trace_mark(xn_nucleus, shadow_relaxed,
 		  "thread %p thread_name %s comm %s",
@@ -1382,7 +1384,8 @@ int xnshadow_map(xnthread_t *thread, xncompletion_t __user *u_completion,
 	if (ret)
 		return ret;
 
-	__xn_put_user(XNRELAX, u_mode);
+	if (thread->u_mode)
+		__xn_put_user(XNRELAX, u_mode);
 
 	ret = xnshadow_harden();
 
@@ -1967,6 +1970,18 @@ static int xnshadow_sys_get_next_sigs(struct pt_regs *regs)
 	return -EINTR;
 }
 
+static int xnshadow_sys_drop_u_mode(struct pt_regs *regs)
+{
+	xnthread_t *cur = xnshadow_thread(current);
+
+	if (!cur)
+		return -EPERM;
+
+	cur->u_mode = NULL;
+
+	return 0;
+}
+
 static xnsysent_t __systab[] = {
 	[__xn_sys_migrate] = {&xnshadow_sys_migrate, __xn_exec_current},
 	[__xn_sys_arch] = {&xnshadow_sys_arch, __xn_exec_any},
@@ -1981,6 +1996,8 @@ static xnsysent_t __systab[] = {
 		{&xnshadow_sys_current_info, __xn_exec_shadow},
 	[__xn_sys_get_next_sigs] = 
 		{&xnshadow_sys_get_next_sigs, __xn_exec_conforming},
+	[__xn_sys_drop_u_mode] =
+		{&xnshadow_sys_drop_u_mode, __xn_exec_shadow},
 };
 
 static void post_ppd_release(struct xnheap *h)
@@ -2034,8 +2051,30 @@ static struct xnskin_props __props = {
 	.module = NULL
 };
 
-static inline int substitute_linux_syscall(struct pt_regs *regs)
+static void handle_shadow_exit(xnthread_t *thread)
 {
+	static int warned;
+
+	/*
+	 * If user space did not deregister u_mode at this point, we
+	 * are confronted with old libraries and should warn about
+	 * potential instabilities.
+	 */
+	if (thread->u_mode && !warned) {
+		warned = 1;
+		printk(KERN_WARNING
+		       "Xenomai: old Xenomai libs detected which may crash "
+		       "on thread termination\n");
+	}
+	thread->u_mode = NULL;
+}
+
+static inline int
+substitute_linux_syscall(xnthread_t *thread, struct pt_regs *regs)
+{
+	if (unlikely(__xn_reg_mux(regs) == __NR_exit))
+		handle_shadow_exit(thread);
+
 	/* No real-time replacement for now -- let Linux handle this call. */
 	return 0;
 }
@@ -2204,7 +2243,7 @@ static inline int do_hisyscall_event(unsigned event, unsigned domid, void *data)
 	 * From now on, we know that we have a valid shadow thread
 	 * pointer.
 	 */
-	if (substitute_linux_syscall(regs))
+	if (substitute_linux_syscall(thread, regs))
 		/*
 		 * This is a Linux syscall issued on behalf of a
 		 * shadow thread running inside the Xenomai
@@ -2265,7 +2304,7 @@ static inline int do_losyscall_event(unsigned event, unsigned domid, void *data)
 	if (__xn_reg_mux_p(regs))
 		goto xenomai_syscall;
 
-	if (!thread || !substitute_linux_syscall(regs))
+	if (!thread || !substitute_linux_syscall(thread, regs))
 		/* Fall back to Linux syscall handling. */
 		return RTHAL_EVENT_PROPAGATE;
 
diff --git a/src/rtdk/assert_context.c b/src/rtdk/assert_context.c
index f67bcd8..bad19f3 100644
--- a/src/rtdk/assert_context.c
+++ b/src/rtdk/assert_context.c
@@ -30,9 +30,12 @@ void assert_nrt(void)
 	xnthread_info_t info;
 	int err;
 
-	if (unlikely(xeno_get_current() != XN_NO_HANDLE &&
-		     !(xeno_get_current_mode() & XNRELAX))) {
+	if (xeno_get_current() != XN_NO_HANDLE)
+		return;
 
+	info.state = xeno_get_current_mode();
+
+	if (unlikely(!(info.state & XNRELAX) || info.state == -1)) {
 		err = XENOMAI_SYSCALL1(__xn_sys_current_info, &info);
 
 		if (err) {
@@ -41,7 +44,7 @@ void assert_nrt(void)
 			return;
 		}
 
-		if (info.state & XNTRAPSW)
+		if ((info.state & (XNRELAX | XNTRAPSW)) == XNTRAPSW)
 			pthread_kill(pthread_self(), SIGXCPU);
 	}
 }
diff --git a/src/skins/common/current.c b/src/skins/common/current.c
index 2922d6e..7db66c0 100644
--- a/src/skins/common/current.c
+++ b/src/skins/common/current.c
@@ -1,45 +1,98 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
+#include <pthread.h>
 
 #include <asm/xenomai/syscall.h>
 #include <nucleus/types.h>
 
+pthread_key_t xeno_current_mode_key;
+
 #ifdef HAVE___THREAD
 __thread __attribute__ ((tls_model ("initial-exec")))
 xnhandle_t xeno_current = XN_NO_HANDLE;
 __thread __attribute__ ((tls_model ("initial-exec")))
 unsigned long xeno_current_mode;
 
+static inline int create_current_key(void) { return 0; }
+
 static inline void __xeno_set_current(xnhandle_t current)
 {
 	xeno_current = current;
 }
+
+static inline unsigned long *create_current_mode(void)
+{
+	return &xeno_current_mode;
+}
+
+static inline void free_current_mode(unsigned long *mode) { }
+
+#define XENO_MODE_LEAK_WARNING ""
+
 #else /* !HAVE___THREAD */
-#include <pthread.h>
 
 pthread_key_t xeno_current_key;
-pthread_key_t xeno_current_mode_key;
+
+static inline int create_current_key(void)
+{
+	return pthread_key_create(&xeno_current_key, NULL);
+}
 
 static inline void __xeno_set_current(xnhandle_t current)
 {
 	pthread_setspecific(xeno_current_key, (void *)current);
 }
 
-unsigned long *xeno_init_current_mode(void)
+static inline unsigned long *create_current_mode(void)
 {
-	unsigned long *mode = malloc(sizeof(unsigned long));
-	pthread_setspecific(xeno_current_mode_key, mode);
-	return mode;
+	return malloc(sizeof(unsigned long));
+}
+
+static inline void free_current_mode(unsigned long *mode)
+{
+	free(mode);
+}
+
+#define XENO_MODE_LEAK_WARNING \
+	"         To reduce the probality, we leak a few bytes of heap " \
+	"per thread.\n"
+
+#endif /* !HAVE___THREAD */
+
+static void cleanup_current_mode(void *key)
+{
+	unsigned long *mode = key;
+	int err;
+
+	*mode = -1;
+
+	err = XENOMAI_SYSCALL0(__xn_sys_drop_u_mode);
+
+	if (err) {
+		static int warned;
+
+		if (!warned) {
+			warned = 1;
+			fprintf(stderr,
+				"\nXenomai: WARNING, this Xenomai kernel can "
+					"cause spurious application\n"
+				"         crashes on thread termination. "
+					"Upgrade is highly recommended.\n%s\n",
+				XENO_MODE_LEAK_WARNING);
+		}
+	} else
+		free_current_mode(mode);
 }
 
 static void init_current_keys(void)
 {
-	int err = pthread_key_create(&xeno_current_key, NULL);
+	int err = create_current_key();
+
 	if (err)
 		goto error_exit;
 
-	err = pthread_key_create(&xeno_current_mode_key, free);
+	err = pthread_key_create(&xeno_current_mode_key, cleanup_current_mode);
 	if (err) {
 	  error_exit:
 		fprintf(stderr, "Xenomai: error creating TSD key: %s\n",
@@ -53,7 +106,6 @@ void xeno_init_current_keys(void)
 	static pthread_once_t xeno_init_current_keys_once = PTHREAD_ONCE_INIT;
 	pthread_once(&xeno_init_current_keys_once, init_current_keys);
 }
-#endif /* !HAVE___THREAD */
 
 void xeno_set_current(void)
 {
@@ -68,3 +120,11 @@ void xeno_set_current(void)
 	}
 	__xeno_set_current(current);
 }
+
+unsigned long *xeno_init_current_mode(void)
+{
+	unsigned long *mode = create_current_mode();
+
+	pthread_setspecific(xeno_current_mode_key, mode);
+	return mode;
+}
-- 
1.6.0.2


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [Xenomai-core] [RFC][PATCH v2] Allow for deregistering current mode user space address
  2010-03-05 15:53   ` [Xenomai-core] [RFC][PATCH v2] " Jan Kiszka
@ 2010-03-05 16:30     ` Gilles Chanteperdrix
  2010-03-05 17:16       ` Jan Kiszka
  0 siblings, 1 reply; 9+ messages in thread
From: Gilles Chanteperdrix @ 2010-03-05 16:30 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: xenomai-core

Jan Kiszka wrote:
> Gilles Chanteperdrix wrote:
>> Jan Kiszka wrote:
>>> This adds __xn_sys_drop_u_mode, a syscall to deregister the u_mode user
>>> space address passed on shadow creation for the current thread. We need
>>> this in order to safely shut down a thread that wants to release its
>>> u_mode memory (either malloc'ed or TLS-allocated). So far, the kernel
>>> might have touched this memory even after the release which caused
>>> subtle corruptions.
>>>
>>> After we released u_mode, we can no longer make reliable decisions based
>>> on it. For the fast mutex use case, we then assume the worst case, ie.
>>> we enforce syscall-based acquisitions unconditionally. For the
>>> assert_nrt case, we simply acquire the required information via
>>> __xn_sys_current_info.
>> As usual, you are firing patches too fast:
>> - you are lacking the exit syscall trap.
> 
> Don't worry, my memory is not yet _that_ bad. :)
> 
>> - the patch is not correct and will cause kernel-space addresses to be
>> passed to put_user, which probably yields bugs when the kernel is
>> compiled with proper debug flags;
> 
> Right, e.g. ARM requires set_fs() fiddling in addition. But then a simple
> NULL-check is way better. Fixed.
> 
>> - I do not see the point where the new syscall is called with __thread,
>> but I am not sure it is missing, and if it is missing, you will get the
>> rightfull warning when hitting the exit syscall.
> 
> Destructor of (now unconditionally used) xeno_current_mode_key.
> 
>> - the user-space users of the function to get current_mode are still
>> cluttered with special cases to handle the invalid state. And
>> illustrating nicely the deficiency of this approach, you have forgot one
>> user.
> 
> My impression is that you are still a bit biased due to TLS limitations
> on ARM.

No, I think the __thread stuff thinks make C code look too much like C++
where the compiler makes things behind your back for such simple code as:

void *foo = &bar;

This is not what I expect from a C compiler, and I suspect a lot of C
developers, including the people who advocate that C, as a simple
language, and not C++ should be used for free software development.

Yes, there is no gain on ARM, where this generates a function call as
pthread_getspecific and causes bugs with gcc >= 4.3, but not much gain
on x86 either where a function call is so cheap. And using __thread in
Xenomai source code adds an awful lot of code, and what is more, an
awful lot of conditionnaly compiled code, which inevitably gets less
testing. So, from a maintenance point of view, this looks like a bad call.

> 
> And nope, I didn't forget to address it, I just forgot to mention that
> there is nothing to address (the torture test does not use
> xeno_get_current_mode from within a TSD destructor, thus potentially
> after drop_u_mode).

Expect that if someone changes the test code, he will have not to forget
about this. As soon as other users exist, we will have to think about
updating them.

> 
>> I am not going to make my version before tomorrow, so you have plenty of
>> time to send me an at least correct version which would take into
>> account all of our discussions.
> 
> Well... what should I reply?

That starting from now, you are going to test your patches before
sending them.

> 
>> Please also consider that your "patch machinegun" way of working is not
>> really sane. When there are so many often untested patches to review
>> coming from you, we sometime let some errors slip through. And from a
>> user point of view, seeing so many buggy patches refused is not really
>> reassuring.
> 
> Please don't make me cite counter examples for issues with different
> workflows we also experienced.
> 
> The key of an open development process is open discussion, and that
> ideally based on code, even if it is not yet perfect. I can't imagine
> you want Xenomai being developed in closed chambers (or even just a
> single one). If you are afraid of imperfect patches being posted or even
> merged into some software, you shouldn't use e.g. the Linux kernel as
> well.

You misunderstand me. What I would like you to do is to avoid sending me
patches which are a 5 minutes, untested hack, to fix something which
took much longer to think and nevertheless get wrong. Because most of
these patches get it inevitably wrong too.

I think every sane free-software projects ask people to test their
patches/pull requests before sending them, even the Linux kernel. See
for instance:
http://lwn.net/Articles/377091/

I am not asking you for one week. 24 hours would be enough.

In fact, the mistake is partly mine too, I am replying to your patches
as fast as you send them. We were going crazy tuesday with the condition
variable patches, I do not think it gives a reassuring image to the
users. So that at some point, I stopped reviewing your patches. I have
not even read the last 8 you sent. Without going to the other extreme of
a "closed door" development model, I think we can find a more reasonable
trade off than the current one.

Our objective now, is to put 2.5.2 in a stable state.

> +substitute_linux_syscall(xnthread_t *thread, struct pt_regs *regs)
> +{
> +	if (unlikely(__xn_reg_mux(regs) == __NR_exit))
> +		handle_shadow_exit(thread);
> +

Not your fault, but this does not work on ARM. Do not know if
__xn_reg_mux needs fixing there or if we need to add a new macro to get
the roots syscall number.

-- 
					    Gilles.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Xenomai-core] [RFC][PATCH v2] Allow for deregistering current mode user space address
  2010-03-05 16:30     ` Gilles Chanteperdrix
@ 2010-03-05 17:16       ` Jan Kiszka
  2010-03-05 17:36         ` Gilles Chanteperdrix
  0 siblings, 1 reply; 9+ messages in thread
From: Jan Kiszka @ 2010-03-05 17:16 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-core

Gilles Chanteperdrix wrote:
> Jan Kiszka wrote:
>> Gilles Chanteperdrix wrote:
>>> Jan Kiszka wrote:
>>>> This adds __xn_sys_drop_u_mode, a syscall to deregister the u_mode user
>>>> space address passed on shadow creation for the current thread. We need
>>>> this in order to safely shut down a thread that wants to release its
>>>> u_mode memory (either malloc'ed or TLS-allocated). So far, the kernel
>>>> might have touched this memory even after the release which caused
>>>> subtle corruptions.
>>>>
>>>> After we released u_mode, we can no longer make reliable decisions based
>>>> on it. For the fast mutex use case, we then assume the worst case, ie.
>>>> we enforce syscall-based acquisitions unconditionally. For the
>>>> assert_nrt case, we simply acquire the required information via
>>>> __xn_sys_current_info.
>>> As usual, you are firing patches too fast:
>>> - you are lacking the exit syscall trap.
>> Don't worry, my memory is not yet _that_ bad. :)
>>
>>> - the patch is not correct and will cause kernel-space addresses to be
>>> passed to put_user, which probably yields bugs when the kernel is
>>> compiled with proper debug flags;
>> Right, e.g. ARM requires set_fs() fiddling in addition. But then a simple
>> NULL-check is way better. Fixed.
>>
>>> - I do not see the point where the new syscall is called with __thread,
>>> but I am not sure it is missing, and if it is missing, you will get the
>>> rightfull warning when hitting the exit syscall.
>> Destructor of (now unconditionally used) xeno_current_mode_key.
>>
>>> - the user-space users of the function to get current_mode are still
>>> cluttered with special cases to handle the invalid state. And
>>> illustrating nicely the deficiency of this approach, you have forgot one
>>> user.
>> My impression is that you are still a bit biased due to TLS limitations
>> on ARM.
> 
> No, I think the __thread stuff thinks make C code look too much like C++
> where the compiler makes things behind your back for such simple code as:
> 
> void *foo = &bar;
> 
> This is not what I expect from a C compiler, and I suspect a lot of C
> developers, including the people who advocate that C, as a simple
> language, and not C++ should be used for free software development.
> 
> Yes, there is no gain on ARM, where this generates a function call as
> pthread_getspecific and causes bugs with gcc >= 4.3, but not much gain
> on x86 either where a function call is so cheap.

As are individual memory accesses. The sum makes the difference.

> And using __thread in
> Xenomai source code adds an awful lot of code, and what is more, an
> awful lot of conditionnaly compiled code, which inevitably gets less
> testing. So, from a maintenance point of view, this looks like a bad call.
> 
>> And nope, I didn't forget to address it, I just forgot to mention that
>> there is nothing to address (the torture test does not use
>> xeno_get_current_mode from within a TSD destructor, thus potentially
>> after drop_u_mode).
> 
> Expect that if someone changes the test code, he will have not to forget
> about this. As soon as other users exist, we will have to think about
> updating them.

Once someone starts testing mutexes or other stuff from TSD destructors,
not earlier. And this is so special that anyone of us interested in this
test case will naturally be aware of its specifics.

> 
>>> I am not going to make my version before tomorrow, so you have plenty of
>>> time to send me an at least correct version which would take into
>>> account all of our discussions.
>> Well... what should I reply?
> 
> That starting from now, you are going to test your patches before
> sending them.
> 
>>> Please also consider that your "patch machinegun" way of working is not
>>> really sane. When there are so many often untested patches to review
>>> coming from you, we sometime let some errors slip through. And from a
>>> user point of view, seeing so many buggy patches refused is not really
>>> reassuring.
>> Please don't make me cite counter examples for issues with different
>> workflows we also experienced.
>>
>> The key of an open development process is open discussion, and that
>> ideally based on code, even if it is not yet perfect. I can't imagine
>> you want Xenomai being developed in closed chambers (or even just a
>> single one). If you are afraid of imperfect patches being posted or even
>> merged into some software, you shouldn't use e.g. the Linux kernel as
>> well.
> 
> You misunderstand me. What I would like you to do is to avoid sending me
> patches which are a 5 minutes, untested hack, to fix something which
> took much longer to think and nevertheless get wrong. Because most of
> these patches get it inevitably wrong too.
> 
> I think every sane free-software projects ask people to test their
> patches/pull requests before sending them, even the Linux kernel. See
> for instance:
> http://lwn.net/Articles/377091/
> 
> I am not asking you for one week. 24 hours would be enough.

As you may know, complete test coverage is near to impossible. On
unlucky days, I happen to send out stuff that broke while continuing to
type after the last test ran. But specifically these things were tested
in various combinations.

It is not very polite to assume the contrary. I'm trying to avoid the
same assumption about other people as well, even when things look
differently from time to time. To err is human, to always write correct
code is divine.

But as you already mentioned in another mail, there is room for
improvement /wrt testability. On step forward would be an automated
build and run of some git tree that we can check into. One board per
arch would be enough to avoid issues we face from time to time when we
only stress our favorite archs with new features or fixes.

> 
> In fact, the mistake is partly mine too, I am replying to your patches
> as fast as you send them. We were going crazy tuesday with the condition
> variable patches, I do not think it gives a reassuring image to the
> users.

I can only say that, as a developer in this business, I trust code
having been discussed to the extreme _way_ more than stuff a single
clever person checked in even after meditating over it for weeks. I've
seen enough of this, produced by non-clever /me as well as way more
clever people in all kinds of projects.

> So that at some point, I stopped reviewing your patches. I have
> not even read the last 8 you sent. Without going to the other extreme of
> a "closed door" development model, I think we can find a more reasonable
> trade off than the current one.

Well, you have to decide which piece to take, or to request it to be
rewritten, or to write yourself.

> 
> Our objective now, is to put 2.5.2 in a stable state.
> 
>> +substitute_linux_syscall(xnthread_t *thread, struct pt_regs *regs)
>> +{
>> +	if (unlikely(__xn_reg_mux(regs) == __NR_exit))
>> +		handle_shadow_exit(thread);
>> +
> 
> Not your fault, but this does not work on ARM. Do not know if
> __xn_reg_mux needs fixing there or if we need to add a new macro to get
> the roots syscall number.

And that's the point of sending patches for discussion.

It's regs->ARM_r7 == __NR_SYSCALL_BASE + __NR_MY_SYSCALL, right? If
__xn_reg_mux is supposed to be only a mux value for Xenomai, we need a
separate accessor for the Linux syscall number on all archs.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Xenomai-core] [RFC][PATCH v2] Allow for deregistering current mode user space address
  2010-03-05 17:16       ` Jan Kiszka
@ 2010-03-05 17:36         ` Gilles Chanteperdrix
  2010-03-06  9:39           ` Jan Kiszka
  0 siblings, 1 reply; 9+ messages in thread
From: Gilles Chanteperdrix @ 2010-03-05 17:36 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: xenomai-core

Jan Kiszka wrote:
> Gilles Chanteperdrix wrote:
>> Jan Kiszka wrote:
>>> Gilles Chanteperdrix wrote:
>>>> Jan Kiszka wrote:
>>>>> This adds __xn_sys_drop_u_mode, a syscall to deregister the u_mode user
>>>>> space address passed on shadow creation for the current thread. We need
>>>>> this in order to safely shut down a thread that wants to release its
>>>>> u_mode memory (either malloc'ed or TLS-allocated). So far, the kernel
>>>>> might have touched this memory even after the release which caused
>>>>> subtle corruptions.
>>>>>
>>>>> After we released u_mode, we can no longer make reliable decisions based
>>>>> on it. For the fast mutex use case, we then assume the worst case, ie.
>>>>> we enforce syscall-based acquisitions unconditionally. For the
>>>>> assert_nrt case, we simply acquire the required information via
>>>>> __xn_sys_current_info.
>>>> As usual, you are firing patches too fast:
>>>> - you are lacking the exit syscall trap.
>>> Don't worry, my memory is not yet _that_ bad. :)
>>>
>>>> - the patch is not correct and will cause kernel-space addresses to be
>>>> passed to put_user, which probably yields bugs when the kernel is
>>>> compiled with proper debug flags;
>>> Right, e.g. ARM requires set_fs() fiddling in addition. But then a simple
>>> NULL-check is way better. Fixed.
>>>
>>>> - I do not see the point where the new syscall is called with __thread,
>>>> but I am not sure it is missing, and if it is missing, you will get the
>>>> rightfull warning when hitting the exit syscall.
>>> Destructor of (now unconditionally used) xeno_current_mode_key.
>>>
>>>> - the user-space users of the function to get current_mode are still
>>>> cluttered with special cases to handle the invalid state. And
>>>> illustrating nicely the deficiency of this approach, you have forgot one
>>>> user.
>>> My impression is that you are still a bit biased due to TLS limitations
>>> on ARM.
>> No, I think the __thread stuff thinks make C code look too much like C++
>> where the compiler makes things behind your back for such simple code as:
>>
>> void *foo = &bar;
>>
>> This is not what I expect from a C compiler, and I suspect a lot of C
>> developers, including the people who advocate that C, as a simple
>> language, and not C++ should be used for free software development.
>>
>> Yes, there is no gain on ARM, where this generates a function call as
>> pthread_getspecific and causes bugs with gcc >= 4.3, but not much gain
>> on x86 either where a function call is so cheap.
> 
> As are individual memory accesses. The sum makes the difference.

Ok. If you find before 2.5.3 a platform where this makes a difference of
more than 5% of this platform's latency, I do not remove the __thread
stuff. Deal?

> Once someone starts testing mutexes or other stuff from TSD destructors,
> not earlier. And this is so special that anyone of us interested in this
> test case will naturally be aware of its specifics.

As you said to err is human. And having a single point for handling an
exceptional case is a good idea. And this shortens your patch.


>> I think every sane free-software projects ask people to test their
>> patches/pull requests before sending them, even the Linux kernel. See
>> for instance:
>> http://lwn.net/Articles/377091/
>>
>> I am not asking you for one week. 24 hours would be enough.
> 
> As you may know, complete test coverage is near to impossible.

That is no excuse for not making any test.


> On unlucky days, I happen to send out stuff that broke while continuing to
> type after the last test ran. But specifically these things were tested
> in various combinations.
> 
> It is not very polite to assume the contrary.

When you send a patch every 5 minutes, it is obvious that it was not
tested. No politeness involved. Everybody can check the timestamps in
the mailing lists and do the math.


 I'm trying to avoid the
> same assumption about other people as well, even when things look
> differently from time to time. To err is human, to always write correct
> code is divine.
> 
> But as you already mentioned in another mail, there is room for
> improvement /wrt testability. On step forward would be an automated
> build and run of some git tree that we can check into. One board per
> arch would be enough to avoid issues we face from time to time when we
> only stress our favorite archs with new features or fixes.

We have the automated build. I plan seriously to work on the automated
run for 2.5.3. But it is too late for 2.5.2. Also note that I gave you
access to my server where several targets are configured and usable. If
you forgot the password, I can reset it.

> 
>> In fact, the mistake is partly mine too, I am replying to your patches
>> as fast as you send them. We were going crazy tuesday with the condition
>> variable patches, I do not think it gives a reassuring image to the
>> users.
> 
> I can only say that, as a developer in this business, I trust code
> having been discussed to the extreme _way_ more than stuff a single
> clever person checked in even after meditating over it for weeks. I've
> seen enough of this, produced by non-clever /me as well as way more
> clever people in all kinds of projects.
> 
>> So that at some point, I stopped reviewing your patches. I have
>> not even read the last 8 you sent. Without going to the other extreme of
>> a "closed door" development model, I think we can find a more reasonable
>> trade off than the current one.
> 
> Well, you have to decide which piece to take, or to request it to be
> rewritten, or to write yourself.

Yes. That is the plan. Tomorrow.

> 
>> Our objective now, is to put 2.5.2 in a stable state.
>>
>>> +substitute_linux_syscall(xnthread_t *thread, struct pt_regs *regs)
>>> +{
>>> +	if (unlikely(__xn_reg_mux(regs) == __NR_exit))
>>> +		handle_shadow_exit(thread);
>>> +
>> Not your fault, but this does not work on ARM. Do not know if
>> __xn_reg_mux needs fixing there or if we need to add a new macro to get
>> the roots syscall number.
> 
> And that's the point of sending patches for discussion.
> 
> It's regs->ARM_r7 == __NR_SYSCALL_BASE + __NR_MY_SYSCALL, right? If
> __xn_reg_mux is supposed to be only a mux value for Xenomai, we need a
> separate accessor for the Linux syscall number on all archs.

or __NR_OABI_SYSCALL_BASE + __NR_MY_SYSCALL depending on CONFIG_OABI_COMPAT.

> 
> Jan
> 


-- 
					    Gilles.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Xenomai-core] [RFC][PATCH v2] Allow for deregistering current mode user space address
  2010-03-05 17:36         ` Gilles Chanteperdrix
@ 2010-03-06  9:39           ` Jan Kiszka
  2010-03-06 12:07             ` Gilles Chanteperdrix
  0 siblings, 1 reply; 9+ messages in thread
From: Jan Kiszka @ 2010-03-06  9:39 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai-core

[-- Attachment #1: Type: text/plain, Size: 7732 bytes --]

Gilles Chanteperdrix wrote:
> Jan Kiszka wrote:
>> Gilles Chanteperdrix wrote:
>>> Jan Kiszka wrote:
>>>> Gilles Chanteperdrix wrote:
>>>>> Jan Kiszka wrote:
>>>>>> This adds __xn_sys_drop_u_mode, a syscall to deregister the u_mode user
>>>>>> space address passed on shadow creation for the current thread. We need
>>>>>> this in order to safely shut down a thread that wants to release its
>>>>>> u_mode memory (either malloc'ed or TLS-allocated). So far, the kernel
>>>>>> might have touched this memory even after the release which caused
>>>>>> subtle corruptions.
>>>>>>
>>>>>> After we released u_mode, we can no longer make reliable decisions based
>>>>>> on it. For the fast mutex use case, we then assume the worst case, ie.
>>>>>> we enforce syscall-based acquisitions unconditionally. For the
>>>>>> assert_nrt case, we simply acquire the required information via
>>>>>> __xn_sys_current_info.
>>>>> As usual, you are firing patches too fast:
>>>>> - you are lacking the exit syscall trap.
>>>> Don't worry, my memory is not yet _that_ bad. :)
>>>>
>>>>> - the patch is not correct and will cause kernel-space addresses to be
>>>>> passed to put_user, which probably yields bugs when the kernel is
>>>>> compiled with proper debug flags;
>>>> Right, e.g. ARM requires set_fs() fiddling in addition. But then a simple
>>>> NULL-check is way better. Fixed.
>>>>
>>>>> - I do not see the point where the new syscall is called with __thread,
>>>>> but I am not sure it is missing, and if it is missing, you will get the
>>>>> rightfull warning when hitting the exit syscall.
>>>> Destructor of (now unconditionally used) xeno_current_mode_key.
>>>>
>>>>> - the user-space users of the function to get current_mode are still
>>>>> cluttered with special cases to handle the invalid state. And
>>>>> illustrating nicely the deficiency of this approach, you have forgot one
>>>>> user.
>>>> My impression is that you are still a bit biased due to TLS limitations
>>>> on ARM.
>>> No, I think the __thread stuff thinks make C code look too much like C++
>>> where the compiler makes things behind your back for such simple code as:
>>>
>>> void *foo = &bar;
>>>
>>> This is not what I expect from a C compiler, and I suspect a lot of C
>>> developers, including the people who advocate that C, as a simple
>>> language, and not C++ should be used for free software development.
>>>
>>> Yes, there is no gain on ARM, where this generates a function call as
>>> pthread_getspecific and causes bugs with gcc >= 4.3, but not much gain
>>> on x86 either where a function call is so cheap.
>> As are individual memory accesses. The sum makes the difference.
> 
> Ok. If you find before 2.5.3 a platform where this makes a difference of
> more than 5% of this platform's latency, I do not remove the __thread
> stuff. Deal?

If you manage to remove 5% of the configuration variables of Xenomai
that way. Or 5% of the conditional code.

> 
>> Once someone starts testing mutexes or other stuff from TSD destructors,
>> not earlier. And this is so special that anyone of us interested in this
>> test case will naturally be aware of its specifics.
> 
> As you said to err is human. And having a single point for handling an
> exceptional case is a good idea. And this shortens your patch.
> 
> 
>>> I think every sane free-software projects ask people to test their
>>> patches/pull requests before sending them, even the Linux kernel. See
>>> for instance:
>>> http://lwn.net/Articles/377091/
>>>
>>> I am not asking you for one week. 24 hours would be enough.
>> As you may know, complete test coverage is near to impossible.
> 
> That is no excuse for not making any test.
> 
> 
>> On unlucky days, I happen to send out stuff that broke while continuing to
>> type after the last test ran. But specifically these things were tested
>> in various combinations.
>>
>> It is not very polite to assume the contrary.
> 
> When you send a patch every 5 minutes, it is obvious that it was not
> tested. No politeness involved. Everybody can check the timestamps in
> the mailing lists and do the math.

I think my workflow was misleading regarding this. Not necessarily all
stuff I push into my 'for-upstream' branch is done with all tests at the
time when I push. Official pull request mails are supposed to mark this.
I'm pushing to save my work and, of course, to expose it. So I
appreciate comments, but I surely don't expect them within minutes.

> 
>  I'm trying to avoid the
>> same assumption about other people as well, even when things look
>> differently from time to time. To err is human, to always write correct
>> code is divine.
>>
>> But as you already mentioned in another mail, there is room for
>> improvement /wrt testability. On step forward would be an automated
>> build and run of some git tree that we can check into. One board per
>> arch would be enough to avoid issues we face from time to time when we
>> only stress our favorite archs with new features or fixes.
> 
> We have the automated build.

Then we just need to run it against some 'build-test' branches of
everyone's git repositories on checkin. That would be great.

> I plan seriously to work on the automated
> run for 2.5.3. But it is too late for 2.5.2. Also note that I gave you
> access to my server where several targets are configured and usable. If
> you forgot the password, I can reset it.

I do remember this, but I thought this was for a specific test. If I'm
supposed to access that box regularly, I'm fine, we can arrange this.

> 
>>> In fact, the mistake is partly mine too, I am replying to your patches
>>> as fast as you send them. We were going crazy tuesday with the condition
>>> variable patches, I do not think it gives a reassuring image to the
>>> users.
>> I can only say that, as a developer in this business, I trust code
>> having been discussed to the extreme _way_ more than stuff a single
>> clever person checked in even after meditating over it for weeks. I've
>> seen enough of this, produced by non-clever /me as well as way more
>> clever people in all kinds of projects.
>>
>>> So that at some point, I stopped reviewing your patches. I have
>>> not even read the last 8 you sent. Without going to the other extreme of
>>> a "closed door" development model, I think we can find a more reasonable
>>> trade off than the current one.
>> Well, you have to decide which piece to take, or to request it to be
>> rewritten, or to write yourself.
> 
> Yes. That is the plan. Tomorrow.
> 
>>> Our objective now, is to put 2.5.2 in a stable state.
>>>
>>>> +substitute_linux_syscall(xnthread_t *thread, struct pt_regs *regs)
>>>> +{
>>>> +	if (unlikely(__xn_reg_mux(regs) == __NR_exit))
>>>> +		handle_shadow_exit(thread);
>>>> +
>>> Not your fault, but this does not work on ARM. Do not know if
>>> __xn_reg_mux needs fixing there or if we need to add a new macro to get
>>> the roots syscall number.
>> And that's the point of sending patches for discussion.
>>
>> It's regs->ARM_r7 == __NR_SYSCALL_BASE + __NR_MY_SYSCALL, right? If
>> __xn_reg_mux is supposed to be only a mux value for Xenomai, we need a
>> separate accessor for the Linux syscall number on all archs.
> 
> or __NR_OABI_SYSCALL_BASE + __NR_MY_SYSCALL depending on CONFIG_OABI_COMPAT.
> 

Patch is in my queue if you are interested.

Jan

PS: Note that this u_mode fix exposed an issue of the sigtest:
cancel_with_signals fails if there is some TSD destructor registered
that issues a Xenomai syscall. Not sure yet if it is a fundamental issue
or just related to the test itself.


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Xenomai-core] [RFC][PATCH v2] Allow for deregistering current mode user space address
  2010-03-06  9:39           ` Jan Kiszka
@ 2010-03-06 12:07             ` Gilles Chanteperdrix
  2010-03-08 16:06               ` Gilles Chanteperdrix
  0 siblings, 1 reply; 9+ messages in thread
From: Gilles Chanteperdrix @ 2010-03-06 12:07 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: xenomai-core

Jan Kiszka wrote:
> Gilles Chanteperdrix wrote:
>> Jan Kiszka wrote:
>>> Gilles Chanteperdrix wrote:
>>>> Jan Kiszka wrote:
>>>>> Gilles Chanteperdrix wrote:
>>>>>> Jan Kiszka wrote:
>>>>>>> This adds __xn_sys_drop_u_mode, a syscall to deregister the u_mode user
>>>>>>> space address passed on shadow creation for the current thread. We need
>>>>>>> this in order to safely shut down a thread that wants to release its
>>>>>>> u_mode memory (either malloc'ed or TLS-allocated). So far, the kernel
>>>>>>> might have touched this memory even after the release which caused
>>>>>>> subtle corruptions.
>>>>>>>
>>>>>>> After we released u_mode, we can no longer make reliable decisions based
>>>>>>> on it. For the fast mutex use case, we then assume the worst case, ie.
>>>>>>> we enforce syscall-based acquisitions unconditionally. For the
>>>>>>> assert_nrt case, we simply acquire the required information via
>>>>>>> __xn_sys_current_info.
>>>>>> As usual, you are firing patches too fast:
>>>>>> - you are lacking the exit syscall trap.
>>>>> Don't worry, my memory is not yet _that_ bad. :)
>>>>>
>>>>>> - the patch is not correct and will cause kernel-space addresses to be
>>>>>> passed to put_user, which probably yields bugs when the kernel is
>>>>>> compiled with proper debug flags;
>>>>> Right, e.g. ARM requires set_fs() fiddling in addition. But then a simple
>>>>> NULL-check is way better. Fixed.
>>>>>
>>>>>> - I do not see the point where the new syscall is called with __thread,
>>>>>> but I am not sure it is missing, and if it is missing, you will get the
>>>>>> rightfull warning when hitting the exit syscall.
>>>>> Destructor of (now unconditionally used) xeno_current_mode_key.
>>>>>
>>>>>> - the user-space users of the function to get current_mode are still
>>>>>> cluttered with special cases to handle the invalid state. And
>>>>>> illustrating nicely the deficiency of this approach, you have forgot one
>>>>>> user.
>>>>> My impression is that you are still a bit biased due to TLS limitations
>>>>> on ARM.
>>>> No, I think the __thread stuff thinks make C code look too much like C++
>>>> where the compiler makes things behind your back for such simple code as:
>>>>
>>>> void *foo = &bar;
>>>>
>>>> This is not what I expect from a C compiler, and I suspect a lot of C
>>>> developers, including the people who advocate that C, as a simple
>>>> language, and not C++ should be used for free software development.
>>>>
>>>> Yes, there is no gain on ARM, where this generates a function call as
>>>> pthread_getspecific and causes bugs with gcc >= 4.3, but not much gain
>>>> on x86 either where a function call is so cheap.
>>> As are individual memory accesses. The sum makes the difference.
>> Ok. If you find before 2.5.3 a platform where this makes a difference of
>> more than 5% of this platform's latency, I do not remove the __thread
>> stuff. Deal?
> 
> If you manage to remove 5% of the configuration variables of Xenomai
> that way. Or 5% of the conditional code.

The __thread stuff introduces two configuration variables, that is 4
combinations. Now, for the u_mode modifications for which you sent me a
pull-request (so, according to what you said, is tested), did you test
the 4 combinations? If no, then chances of bug are high. If yes, then do
not you think that it is a major PITA to compile Xenomai user-space 4
times for a so little gain?

So __thread has a maintenance cost, and if we do not pay attention to
it, we release buggy code. Now, let me quote something someone you know
very well wrote some time ago:
"While RTAI is focused on lowest technically feasible latencies, Xenomai
also considers clean extensibility (RTOS skins), portability, and
maintainability as very important goals."

This is in our wiki.
__thread and the wrong life-cycle u_mode are examples of things which
focus on ridiculous micro-optimizations rather on maintainability.


> I think my workflow was misleading regarding this. Not necessarily all
> stuff I push into my 'for-upstream' branch is done with all tests at the
> time when I push. Official pull request mails are supposed to mark this.
> I'm pushing to save my work and, of course, to expose it. So I
> appreciate comments, but I surely don't expect them within minutes.

No. You still refuse to acknowledge the truth: I receive pull requests
for untested stuff. Read John's mail again. He cherry picked changes,
for which I received a pull request, he ran the mutex-torture tests,
which the patchset had modified, and the test was not passing. So, the
conclusion is that you did not run mutex-torture, which the commit had
modified before sending the pull request. I did not have to dig long in
the git logs to find this example. It was just yesterday. Think about it.

That is precisely the situation I want to avoid from now on.

> 
>>  I'm trying to avoid the
>>> same assumption about other people as well, even when things look
>>> differently from time to time. To err is human, to always write correct
>>> code is divine.
>>>
>>> But as you already mentioned in another mail, there is room for
>>> improvement /wrt testability. On step forward would be an automated
>>> build and run of some git tree that we can check into. One board per
>>> arch would be enough to avoid issues we face from time to time when we
>>> only stress our favorite archs with new features or fixes.
>> We have the automated build.
> 
> Then we just need to run it against some 'build-test' branches of
> everyone's git repositories on checkin. That would be great.

Unfortunately, I am running the tests with ccache on a core i7 on a
raid0 array, and it still takes around one hour. So, I prefer running
this by hand. But do not worry, it is run before the releases.

> 
>> I plan seriously to work on the automated
>> run for 2.5.3. But it is too late for 2.5.2. Also note that I gave you
>> access to my server where several targets are configured and usable. If
>> you forgot the password, I can reset it.
> 
> I do remember this, but I thought this was for a specific test. If I'm
> supposed to access that box regularly, I'm fine, we can arrange this.

I was just mentioning this to mean that if you want to test, you can.
You are not supposed to do anything, you are free to use it, you can do
as you like.


> PS: Note that this u_mode fix exposed an issue of the sigtest:
> cancel_with_signals fails if there is some TSD destructor registered
> that issues a Xenomai syscall. Not sure yet if it is a fundamental issue
> or just related to the test itself.

The sigtest test probably assumes that no parasite syscall occurs. Yet
another reason for not using this u_mode implementation.

-- 
					    Gilles.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Xenomai-core] [RFC][PATCH v2] Allow for deregistering current mode user space address
  2010-03-06 12:07             ` Gilles Chanteperdrix
@ 2010-03-08 16:06               ` Gilles Chanteperdrix
  0 siblings, 0 replies; 9+ messages in thread
From: Gilles Chanteperdrix @ 2010-03-08 16:06 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: xenomai-core

Gilles Chanteperdrix wrote:
>> I think my workflow was misleading regarding this. Not necessarily all
>> stuff I push into my 'for-upstream' branch is done with all tests at the
>> time when I push. Official pull request mails are supposed to mark this.
>> I'm pushing to save my work and, of course, to expose it. So I
>> appreciate comments, but I surely don't expect them within minutes.
> 
> No. You still refuse to acknowledge the truth: I receive pull requests
> for untested stuff. Read John's mail again. He cherry picked changes,
> for which I received a pull request, he ran the mutex-torture tests,
> which the patchset had modified, and the test was not passing. So, the
> conclusion is that you did not run mutex-torture, which the commit had
> modified before sending the pull request. I did not have to dig long in
> the git logs to find this example. It was just yesterday. Think about it.
> 
> That is precisely the situation I want to avoid from now on.

Ok. It turns out that yes, the timed mutex tests had a bug, but probably
only visible on uniprocessor machines. And I guess you test on SMP
machines. So, my apologies, the patch was tested, not enough, but tested.

>> PS: Note that this u_mode fix exposed an issue of the sigtest:
>> cancel_with_signals fails if there is some TSD destructor registered
>> that issues a Xenomai syscall. Not sure yet if it is a fundamental issue
>> or just related to the test itself.
> 
> The sigtest test probably assumes that no parasite syscall occurs. Yet
> another reason for not using this u_mode implementation.

Remove that test case. I do not know what it was testing anyway.

-- 
					    Gilles.


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2010-03-08 16:06 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-03-05 11:37 [Xenomai-core] [RFC][PATCH] Allow for deregistering current mode user space address Jan Kiszka
2010-03-05 12:02 ` Gilles Chanteperdrix
2010-03-05 15:53   ` [Xenomai-core] [RFC][PATCH v2] " Jan Kiszka
2010-03-05 16:30     ` Gilles Chanteperdrix
2010-03-05 17:16       ` Jan Kiszka
2010-03-05 17:36         ` Gilles Chanteperdrix
2010-03-06  9:39           ` Jan Kiszka
2010-03-06 12:07             ` Gilles Chanteperdrix
2010-03-08 16:06               ` Gilles Chanteperdrix

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.