public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [patch 06/21] mutex subsystem, add include/asm-arm/mutex.h
@ 2006-01-04 14:42 Ingo Molnar
  0 siblings, 0 replies; 3+ messages in thread
From: Ingo Molnar @ 2006-01-04 14:42 UTC (permalink / raw)
  To: lkml
  Cc: Linus Torvalds, Andrew Morton, Arjan van de Ven, Nicolas Pitre,
	Jes Sorensen, Al Viro, Oleg Nesterov, David Howells, Alan Cox,
	Christoph Hellwig, Andi Kleen, Russell King

From: Nicolas Pitre <nico@cam.org>

add the ARM version of mutex.h, which is optimized in assembly for
ARMv6, and uses the xchg implementation on pre-ARMv6.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

----

 include/asm-arm/mutex.h |  128 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 128 insertions(+)

Index: linux/include/asm-arm/mutex.h
===================================================================
--- /dev/null
+++ linux/include/asm-arm/mutex.h
@@ -0,0 +1,128 @@
+/*
+ * include/asm-arm/mutex.h
+ *
+ * ARM optimized mutex locking primitives
+ *
+ * Please look into asm-generic/mutex-xchg.h for a formal definition.
+ */
+#ifndef _ASM_MUTEX_H
+#define _ASM_MUTEX_H
+
+#if __LINUX_ARM_ARCH__ < 6
+/* On pre-ARMv6 hardware the swp based implementation is the most efficient. */
+# include <asm-generic/mutex-xchg.h>
+#else
+
+/*
+ * Attempting to lock a mutex on ARMv6+ can be done with a bastardized
+ * atomic decrement (it is not a reliable atomic decrement but it satisfies
+ * the defined semantics for our purpose, while being smaller and faster
+ * than a real atomic decrement or atomic swap.  The idea is to attempt
+ * decrementing the lock value only once.  If once decremented it isn't zero,
+ * or if its store-back fails due to a dispute on the exclusive store, we
+ * simply bail out immediately through the slow path where the lock will be
+ * reattempted until it succeeds.
+ */
+#define __mutex_fastpath_lock(count, fail_fn)				\
+do {									\
+	int __ex_flag, __res;						\
+									\
+	typecheck(atomic_t *, count);					\
+	typecheck_fn(fastcall void (*)(atomic_t *), fail_fn);		\
+									\
+	__asm__ (							\
+		"ldrex	%0, [%2]	\n"				\
+		"sub	%0, %0, #1	\n"				\
+		"strex	%1, %0, [%2]	\n"				\
+									\
+		: "=&r" (__res), "=&r" (__ex_flag)			\
+		: "r" (&(count)->counter)				\
+		: "cc","memory" );					\
+									\
+	if (unlikely(__res || __ex_flag))				\
+		fail_fn(count);						\
+} while (0)
+
+#define __mutex_fastpath_lock_retval(count, fail_fn)			\
+({									\
+	int __ex_flag, __res;						\
+									\
+	typecheck(atomic_t *, count);					\
+	typecheck_fn(fastcall int (*)(atomic_t *), fail_fn);		\
+									\
+	__asm__ (							\
+		"ldrex	%0, [%2]	\n"				\
+		"sub	%0, %0, #1	\n"				\
+		"strex	%1, %0, [%2]	\n"				\
+									\
+		: "=&r" (__res), "=&r" (__ex_flag)			\
+		: "r" (&(count)->counter)				\
+		: "cc","memory" );					\
+									\
+	__res |= __ex_flag;						\
+	if (unlikely(__res != 0))					\
+		__res = fail_fn(count);					\
+	__res;								\
+})
+
+/*
+ * Same trick is used for the unlock fast path. However the original value,
+ * rather than the result, is used to test for success in order to have
+ * better generated assembly.
+ */
+#define __mutex_fastpath_unlock(count, fail_fn)				\
+do {									\
+	int __ex_flag, __res, __orig;					\
+									\
+	typecheck(atomic_t *, count);					\
+	typecheck_fn(fastcall void (*)(atomic_t *), fail_fn);		\
+									\
+	__asm__ (							\
+		"ldrex	%0, [%3]	\n"				\
+		"add	%1, %0, #1	\n"				\
+		"strex	%2, %1, [%3]	\n"				\
+									\
+		: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)	\
+		: "r" (&(count)->counter)				\
+		: "cc","memory" );					\
+									\
+	if (unlikely(__orig || __ex_flag))				\
+		fail_fn(count);						\
+} while (0)
+
+/*
+ * If the unlock was done on a contended lock, or if the unlock simply fails
+ * then the mutex remains locked.
+ */
+#define __mutex_slowpath_needs_to_unlock()	1
+
+/*
+ * For __mutex_fastpath_trylock we use another construct which could be
+ * described as a "single value cmpxchg".
+ *
+ * This provides the needed trylock semantics like cmpxchg would, but it is
+ * lighter and less generic than a true cmpxchg implementation.
+ */
+static inline int
+__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
+{
+	int __ex_flag, __res, __orig;
+
+	__asm__ (
+
+		"1: ldrex	%0, [%3]	\n"
+		"subs		%1, %0, #1	\n"
+		"strexeq	%2, %1, [%3]	\n"
+		"movlt		%0, #0		\n"
+		"cmpeq		%2, #0		\n"
+		"bgt		1b		\n"
+
+		: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)
+		: "r" (&count->counter)
+		: "cc", "memory" );
+
+	return __orig;
+}
+
+#endif
+#endif

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [patch 06/21] mutex subsystem, add include/asm-arm/mutex.h
@ 2006-01-05 15:37 Ingo Molnar
  2006-01-07  3:04 ` Nicolas Pitre
  0 siblings, 1 reply; 3+ messages in thread
From: Ingo Molnar @ 2006-01-05 15:37 UTC (permalink / raw)
  To: lkml
  Cc: Linus Torvalds, Andrew Morton, Arjan van de Ven, Nicolas Pitre,
	Jes Sorensen, Al Viro, Oleg Nesterov, David Howells, Alan Cox,
	Christoph Hellwig, Andi Kleen, Russell King

From: Nicolas Pitre <nico@cam.org>

add the ARM version of mutex.h, which is optimized in assembly for
ARMv6, and uses the xchg implementation on pre-ARMv6.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

----

 include/asm-arm/mutex.h |  128 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 128 insertions(+)

Index: linux/include/asm-arm/mutex.h
===================================================================
--- /dev/null
+++ linux/include/asm-arm/mutex.h
@@ -0,0 +1,128 @@
+/*
+ * include/asm-arm/mutex.h
+ *
+ * ARM optimized mutex locking primitives
+ *
+ * Please look into asm-generic/mutex-xchg.h for a formal definition.
+ */
+#ifndef _ASM_MUTEX_H
+#define _ASM_MUTEX_H
+
+#if __LINUX_ARM_ARCH__ < 6
+/* On pre-ARMv6 hardware the swp based implementation is the most efficient. */
+# include <asm-generic/mutex-xchg.h>
+#else
+
+/*
+ * Attempting to lock a mutex on ARMv6+ can be done with a bastardized
+ * atomic decrement (it is not a reliable atomic decrement but it satisfies
+ * the defined semantics for our purpose, while being smaller and faster
+ * than a real atomic decrement or atomic swap.  The idea is to attempt
+ * decrementing the lock value only once.  If once decremented it isn't zero,
+ * or if its store-back fails due to a dispute on the exclusive store, we
+ * simply bail out immediately through the slow path where the lock will be
+ * reattempted until it succeeds.
+ */
+#define __mutex_fastpath_lock(count, fail_fn)				\
+do {									\
+	int __ex_flag, __res;						\
+									\
+	typecheck(atomic_t *, count);					\
+	typecheck_fn(fastcall void (*)(atomic_t *), fail_fn);		\
+									\
+	__asm__ (							\
+		"ldrex	%0, [%2]	\n"				\
+		"sub	%0, %0, #1	\n"				\
+		"strex	%1, %0, [%2]	\n"				\
+									\
+		: "=&r" (__res), "=&r" (__ex_flag)			\
+		: "r" (&(count)->counter)				\
+		: "cc","memory" );					\
+									\
+	if (unlikely(__res || __ex_flag))				\
+		fail_fn(count);						\
+} while (0)
+
+#define __mutex_fastpath_lock_retval(count, fail_fn)			\
+({									\
+	int __ex_flag, __res;						\
+									\
+	typecheck(atomic_t *, count);					\
+	typecheck_fn(fastcall int (*)(atomic_t *), fail_fn);		\
+									\
+	__asm__ (							\
+		"ldrex	%0, [%2]	\n"				\
+		"sub	%0, %0, #1	\n"				\
+		"strex	%1, %0, [%2]	\n"				\
+									\
+		: "=&r" (__res), "=&r" (__ex_flag)			\
+		: "r" (&(count)->counter)				\
+		: "cc","memory" );					\
+									\
+	__res |= __ex_flag;						\
+	if (unlikely(__res != 0))					\
+		__res = fail_fn(count);					\
+	__res;								\
+})
+
+/*
+ * Same trick is used for the unlock fast path. However the original value,
+ * rather than the result, is used to test for success in order to have
+ * better generated assembly.
+ */
+#define __mutex_fastpath_unlock(count, fail_fn)				\
+do {									\
+	int __ex_flag, __res, __orig;					\
+									\
+	typecheck(atomic_t *, count);					\
+	typecheck_fn(fastcall void (*)(atomic_t *), fail_fn);		\
+									\
+	__asm__ (							\
+		"ldrex	%0, [%3]	\n"				\
+		"add	%1, %0, #1	\n"				\
+		"strex	%2, %1, [%3]	\n"				\
+									\
+		: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)	\
+		: "r" (&(count)->counter)				\
+		: "cc","memory" );					\
+									\
+	if (unlikely(__orig || __ex_flag))				\
+		fail_fn(count);						\
+} while (0)
+
+/*
+ * If the unlock was done on a contended lock, or if the unlock simply fails
+ * then the mutex remains locked.
+ */
+#define __mutex_slowpath_needs_to_unlock()	1
+
+/*
+ * For __mutex_fastpath_trylock we use another construct which could be
+ * described as a "single value cmpxchg".
+ *
+ * This provides the needed trylock semantics like cmpxchg would, but it is
+ * lighter and less generic than a true cmpxchg implementation.
+ */
+static inline int
+__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
+{
+	int __ex_flag, __res, __orig;
+
+	__asm__ (
+
+		"1: ldrex	%0, [%3]	\n"
+		"subs		%1, %0, #1	\n"
+		"strexeq	%2, %1, [%3]	\n"
+		"movlt		%0, #0		\n"
+		"cmpeq		%2, #0		\n"
+		"bgt		1b		\n"
+
+		: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)
+		: "r" (&count->counter)
+		: "cc", "memory" );
+
+	return __orig;
+}
+
+#endif
+#endif

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [patch 06/21] mutex subsystem, add include/asm-arm/mutex.h
  2006-01-05 15:37 Ingo Molnar
@ 2006-01-07  3:04 ` Nicolas Pitre
  0 siblings, 0 replies; 3+ messages in thread
From: Nicolas Pitre @ 2006-01-07  3:04 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: lkml, Linus Torvalds, Andrew Morton, Arjan van de Ven,
	Jes Sorensen, Al Viro, Oleg Nesterov, David Howells, Alan Cox,
	Christoph Hellwig, Andi Kleen, Russell King

On Thu, 5 Jan 2006, Ingo Molnar wrote:

> From: Nicolas Pitre <nico@cam.org>
> 
> add the ARM version of mutex.h, which is optimized in assembly for
> ARMv6, and uses the xchg implementation on pre-ARMv6.
> 
> Signed-off-by: Ingo Molnar <mingo@elte.hu>

Please consider the ARM cleanups below.  I was reviewing the generated 
code and found out that, from my original version, you changed:

	if (unlikely(__res | __ex_flag))

for:

	if (unlikely(__res || __ex_flag))

which produces worse code on ARM.  I therefore made it more explicit:

	__res |= __ex_flag;
	if (unlikely(__res != 0))

that the single | (bitwise or) was not a mistake.

I also made everything static inline rather than macros for better 
readability (both produce the same code after all).

And finally added missing \t from multi-line assembly code.

Signed-off-by: Nicolas Pitre <nico@cam.org>

Index: linux-2.6/include/asm-arm/mutex.h
===================================================================
--- linux-2.6.orig/include/asm-arm/mutex.h
+++ linux-2.6/include/asm-arm/mutex.h
@@ -23,72 +23,71 @@
  * simply bail out immediately through the slow path where the lock will be
  * reattempted until it succeeds.
  */
-#define __mutex_fastpath_lock(count, fail_fn)				\
-do {									\
-	int __ex_flag, __res;						\
-									\
-	typecheck(atomic_t *, count);					\
-	typecheck_fn(fastcall void (*)(atomic_t *), fail_fn);		\
-									\
-	__asm__ (							\
-		"ldrex	%0, [%2]	\n"				\
-		"sub	%0, %0, #1	\n"				\
-		"strex	%1, %0, [%2]	\n"				\
-									\
-		: "=&r" (__res), "=&r" (__ex_flag)			\
-		: "r" (&(count)->counter)				\
-		: "cc","memory" );					\
-									\
-	if (unlikely(__res || __ex_flag))				\
-		fail_fn(count);						\
-} while (0)
-
-#define __mutex_fastpath_lock_retval(count, fail_fn)			\
-({									\
-	int __ex_flag, __res;						\
-									\
-	typecheck(atomic_t *, count);					\
-	typecheck_fn(fastcall int (*)(atomic_t *), fail_fn);		\
-									\
-	__asm__ (							\
-		"ldrex	%0, [%2]	\n"				\
-		"sub	%0, %0, #1	\n"				\
-		"strex	%1, %0, [%2]	\n"				\
-									\
-		: "=&r" (__res), "=&r" (__ex_flag)			\
-		: "r" (&(count)->counter)				\
-		: "cc","memory" );					\
-									\
-	__res |= __ex_flag;						\
-	if (unlikely(__res != 0))					\
-		__res = fail_fn(count);					\
-	__res;								\
-})
+static inline void
+__mutex_fastpath_lock(atomic_t *count, fastcall void (*fail_fn)(atomic_t *))
+{
+	int __ex_flag, __res;
+
+	__asm__ (
+
+		"ldrex	%0, [%2]	\n\t"
+		"sub	%0, %0, #1	\n\t"
+		"strex	%1, %0, [%2]	"
+
+		: "=&r" (__res), "=&r" (__ex_flag)
+		: "r" (&(count)->counter)
+		: "cc","memory" );
+
+	__res |= __ex_flag;
+	if (unlikely(__res != 0))
+		fail_fn(count);
+}
+
+static inline int
+__mutex_fastpath_lock_retval(atomic_t *count, fastcall int (*fail_fn)(atomic_t *))
+{
+	int __ex_flag, __res;
+
+	__asm__ (
+
+		"ldrex	%0, [%2]	\n\t"
+		"sub	%0, %0, #1	\n\t"
+		"strex	%1, %0, [%2]	"
+
+		: "=&r" (__res), "=&r" (__ex_flag)
+		: "r" (&(count)->counter)
+		: "cc","memory" );
+
+	__res |= __ex_flag;
+	if (unlikely(__res != 0))
+		__res = fail_fn(count);
+	return __res;
+}
 
 /*
  * Same trick is used for the unlock fast path. However the original value,
  * rather than the result, is used to test for success in order to have
  * better generated assembly.
  */
-#define __mutex_fastpath_unlock(count, fail_fn)				\
-do {									\
-	int __ex_flag, __res, __orig;					\
-									\
-	typecheck(atomic_t *, count);					\
-	typecheck_fn(fastcall void (*)(atomic_t *), fail_fn);		\
-									\
-	__asm__ (							\
-		"ldrex	%0, [%3]	\n"				\
-		"add	%1, %0, #1	\n"				\
-		"strex	%2, %1, [%3]	\n"				\
-									\
-		: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)	\
-		: "r" (&(count)->counter)				\
-		: "cc","memory" );					\
-									\
-	if (unlikely(__orig || __ex_flag))				\
-		fail_fn(count);						\
-} while (0)
+static inline void
+__mutex_fastpath_unlock(atomic_t *count, fastcall void (*fail_fn)(atomic_t *))
+{
+	int __ex_flag, __res, __orig;
+
+	__asm__ (
+
+		"ldrex	%0, [%3]	\n\t"
+		"add	%1, %0, #1	\n\t"
+		"strex	%2, %1, [%3]	"
+
+		: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)
+		: "r" (&(count)->counter)
+		: "cc","memory" );
+
+	__orig |= __ex_flag;
+	if (unlikely(__orig != 0))
+		fail_fn(count);
+}
 
 /*
  * If the unlock was done on a contended lock, or if the unlock simply fails
@@ -110,12 +109,12 @@ __mutex_fastpath_trylock(atomic_t *count
 
 	__asm__ (
 
-		"1: ldrex	%0, [%3]	\n"
-		"subs		%1, %0, #1	\n"
-		"strexeq	%2, %1, [%3]	\n"
-		"movlt		%0, #0		\n"
-		"cmpeq		%2, #0		\n"
-		"bgt		1b		\n"
+		"1: ldrex	%0, [%3]	\n\t"
+		"subs		%1, %0, #1	\n\t"
+		"strexeq	%2, %1, [%3]	\n\t"
+		"movlt		%0, #0		\n\t"
+		"cmpeq		%2, #0		\n\t"
+		"bgt		1b		"
 
 		: "=&r" (__orig), "=&r" (__res), "=&r" (__ex_flag)
 		: "r" (&count->counter)

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2006-01-07  3:05 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-01-04 14:42 [patch 06/21] mutex subsystem, add include/asm-arm/mutex.h Ingo Molnar
  -- strict thread matches above, loose matches on Subject: below --
2006-01-05 15:37 Ingo Molnar
2006-01-07  3:04 ` Nicolas Pitre

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox