public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [patch 04/11] mutex subsystem, add include/asm-x86_64/mutex.h
@ 2005-12-23 16:17 Ingo Molnar
  0 siblings, 0 replies; 5+ messages in thread
From: Ingo Molnar @ 2005-12-23 16:17 UTC (permalink / raw)
  To: lkml
  Cc: Linus Torvalds, Andrew Morton, Arjan van de Ven, Nicolas Pitre,
	Jes Sorensen, Zwane Mwaikambo, Oleg Nesterov, David Howells,
	Alan Cox, Benjamin LaHaise, Steven Rostedt, Christoph Hellwig,
	Andi Kleen, Russell King

add the x86_64 version of mutex.h, optimized in assembly.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>

----

 include/asm-x86_64/mutex.h |   74 +++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 74 insertions(+)

Index: linux/include/asm-x86_64/mutex.h
===================================================================
--- /dev/null
+++ linux/include/asm-x86_64/mutex.h
@@ -0,0 +1,74 @@
+/*
+ * Assembly implementation of the mutex fastpath, based on atomic
+ * decrement/increment.
+ *
+ * started by Ingo Molnar:
+ *
+ *  Copyright (C) 2004, 2005 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ */
+#ifndef _ASM_MUTEX_H
+#define _ASM_MUTEX_H
+
+/**
+ * __mutex_fastpath_lock - decrement and call function if negative
+ * @v: pointer of type atomic_t
+ * @fn: function to call if the result is negative
+ *
+ * Atomically decrements @v and calls <fn> if the result is negative.
+ */
+#define __mutex_fastpath_lock(v, fn_name)				\
+do {									\
+	/* type-check the function too: */				\
+	fastcall void (*__tmp)(atomic_t *) = fn_name;			\
+	unsigned long dummy;						\
+									\
+	(void)__tmp;							\
+	typecheck(atomic_t *, v);					\
+									\
+	__asm__ __volatile__(						\
+		LOCK "decl (%%rdi)\n"					\
+		"js 2f\n"						\
+		"1:\n"							\
+		LOCK_SECTION_START("")					\
+		"2: call "#fn_name"\n\t"				\
+		"jmp 1b\n"						\
+		LOCK_SECTION_END					\
+		:"=D" (dummy)						\
+		:"D" (v)						\
+		:"rax", "rsi", "rdx", "rcx",				\
+		 "r8", "r9", "r10", "r11", "memory");			\
+} while (0)
+
+/**
+ * __mutex_fastpath_unlock - increment and call function if nonpositive
+ * @v: pointer of type atomic_t
+ * @fn: function to call if the result is nonpositive
+ *
+ * Atomically increments @v and calls <fn> if the result is nonpositive.
+ */
+#define __mutex_fastpath_unlock(v, fn_name)				\
+do {									\
+	/* type-check the function too: */				\
+	fastcall void (*__tmp)(atomic_t *) = fn_name;			\
+	unsigned long dummy;						\
+									\
+	(void)__tmp;							\
+	typecheck(atomic_t *, v);					\
+									\
+	__asm__ __volatile__(						\
+		LOCK "incl (%%rdi)\n"					\
+		"jle 2f\n"						\
+		"1:\n"							\
+		LOCK_SECTION_START("")					\
+		"2: call "#fn_name"\n\t"				\
+		"jmp 1b\n"						\
+		LOCK_SECTION_END					\
+		:"=D" (dummy)						\
+		:"D" (v)						\
+		:"rax", "rsi", "rdx", "rcx",				\
+		 "r8", "r9", "r10", "r11", "memory");			\
+} while (0)
+
+#define __mutex_slowpath_needs_to_unlock()	1
+
+#endif

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [patch 04/11] mutex subsystem, add include/asm-x86_64/mutex.h
@ 2005-12-27 14:15 Ingo Molnar
  2005-12-27 15:07 ` Eric Dumazet
  0 siblings, 1 reply; 5+ messages in thread
From: Ingo Molnar @ 2005-12-27 14:15 UTC (permalink / raw)
  To: lkml
  Cc: Linus Torvalds, Andrew Morton, Arjan van de Ven, Nicolas Pitre,
	Jes Sorensen, Zwane Mwaikambo, Oleg Nesterov, David Howells,
	Alan Cox, Benjamin LaHaise, Steven Rostedt, Christoph Hellwig,
	Andi Kleen, Russell King

add the x86_64 version of mutex.h, optimized in assembly.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>

----

 include/asm-x86_64/mutex.h |   97 +++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 97 insertions(+)

Index: linux/include/asm-x86_64/mutex.h
===================================================================
--- /dev/null
+++ linux/include/asm-x86_64/mutex.h
@@ -0,0 +1,97 @@
+/*
+ * Assembly implementation of the mutex fastpath, based on atomic
+ * decrement/increment.
+ *
+ * started by Ingo Molnar:
+ *
+ *  Copyright (C) 2004, 2005 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ */
+#ifndef _ASM_MUTEX_H
+#define _ASM_MUTEX_H
+
+/**
+ * __mutex_fastpath_lock - decrement and call function if negative
+ * @v: pointer of type atomic_t
+ * @fn: function to call if the result is negative
+ *
+ * Atomically decrements @v and calls <fn> if the result is negative.
+ */
+#define __mutex_fastpath_lock(v, fn_name)				\
+do {									\
+	/* type-check the function too: */				\
+	fastcall void (*__tmp)(atomic_t *) = fn_name;			\
+	unsigned long dummy;						\
+									\
+	(void)__tmp;							\
+	typecheck(atomic_t *, v);					\
+									\
+	__asm__ __volatile__(						\
+		LOCK	"   decl (%%rdi)	\n"			\
+			"   js 2f		\n"			\
+			"1:			\n"			\
+									\
+		LOCK_SECTION_START("")					\
+			"2: call "#fn_name"	\n"			\
+			"   jmp 1b		\n"			\
+		LOCK_SECTION_END					\
+									\
+		:"=D" (dummy)						\
+		: "D" (v)						\
+		: "rax", "rsi", "rdx", "rcx",				\
+		  "r8", "r9", "r10", "r11", "memory");			\
+} while (0)
+
+/**
+ * __mutex_fastpath_unlock - increment and call function if nonpositive
+ * @v: pointer of type atomic_t
+ * @fn: function to call if the result is nonpositive
+ *
+ * Atomically increments @v and calls <fn> if the result is nonpositive.
+ */
+#define __mutex_fastpath_unlock(v, fn_name)				\
+do {									\
+	/* type-check the function too: */				\
+	fastcall void (*__tmp)(atomic_t *) = fn_name;			\
+	unsigned long dummy;						\
+									\
+	(void)__tmp;							\
+	typecheck(atomic_t *, v);					\
+									\
+	__asm__ __volatile__(						\
+		LOCK	"   incl (%%rdi)	\n"			\
+			"   jle 2f		\n"			\
+			"1:			\n"			\
+									\
+		LOCK_SECTION_START("")					\
+			"2: call "#fn_name"	\n"			\
+			"   jmp 1b		\n"			\
+		LOCK_SECTION_END					\
+									\
+		:"=D" (dummy)						\
+		: "D" (v)						\
+		: "rax", "rsi", "rdx", "rcx",				\
+		  "r8", "r9", "r10", "r11", "memory");			\
+} while (0)
+
+#define __mutex_slowpath_needs_to_unlock()	1
+
+/**
+ * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
+ *
+ *  @count: pointer of type atomic_t
+ *  @fn: fallback function
+ *
+ * Change the count from 1 to 0 and return 1 (success), or return 0 (failure)
+ * if it wasn't 1 originally. [the fallback function is never used on
+ * x86_64, because all x86_64 CPUs have a CMPXCHG instruction.]
+ */
+static inline int
+__mutex_fastpath_trylock(atomic_t *count, int (*fn)(atomic_t *))
+{
+	if (likely(atomic_cmpxchg(count, 1, 0)) == 1)
+		return 1;
+	else
+		return 0;
+}
+
+#endif

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [patch 04/11] mutex subsystem, add include/asm-x86_64/mutex.h
  2005-12-27 14:15 Ingo Molnar
@ 2005-12-27 15:07 ` Eric Dumazet
  2005-12-27 15:41   ` Ingo Molnar
  2005-12-27 17:40   ` Andreas Kleen
  0 siblings, 2 replies; 5+ messages in thread
From: Eric Dumazet @ 2005-12-27 15:07 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: lkml, Linus Torvalds, Andrew Morton, Arjan van de Ven,
	Nicolas Pitre, Jes Sorensen, Zwane Mwaikambo, Oleg Nesterov,
	David Howells, Alan Cox, Benjamin LaHaise, Steven Rostedt,
	Christoph Hellwig, Andi Kleen, Russell King

Ingo Molnar a écrit :
> add the x86_64 version of mutex.h, optimized in assembly.
> +/**
> + * __mutex_fastpath_lock - decrement and call function if negative
> + * @v: pointer of type atomic_t
> + * @fn: function to call if the result is negative
> + *
> + * Atomically decrements @v and calls <fn> if the result is negative.
> + */
> +#define __mutex_fastpath_lock(v, fn_name)				\
> +do {									\
> +	/* type-check the function too: */				\
> +	fastcall void (*__tmp)(atomic_t *) = fn_name;			\
> +	unsigned long dummy;						\
> +									\
> +	(void)__tmp;							\
> +	typecheck(atomic_t *, v);					\
> +									\
> +	__asm__ __volatile__(						\
> +		LOCK	"   decl (%%rdi)	\n"			\
> +			"   js 2f		\n"			\
> +			"1:			\n"			\
> +									\
> +		LOCK_SECTION_START("")					\
> +			"2: call "#fn_name"	\n"			\
> +			"   jmp 1b		\n"			\
> +		LOCK_SECTION_END					\
> +									\
> +		:"=D" (dummy)						\
> +		: "D" (v)						\
> +		: "rax", "rsi", "rdx", "rcx",				\
> +		  "r8", "r9", "r10", "r11", "memory");			\
> +} while (0)

Hi Ingo

I do think this assembly is not very fair.
It has an *insane* register pressure for the compiler :
The fast path is thus not so fast.

Compare with the include/asm-x86_64/semaphore.h
         __asm__ __volatile__(
                 "# atomic down operation\n\t"
                 LOCK "decl %0\n\t"     /* --sem->count */
                 "js 2f\n"
                 "1:\n"
                 LOCK_SECTION_START("")
                 "2:\tcall __down_failed\n\t"
                 "jmp 1b\n"
                 LOCK_SECTION_END
                 :"=m" (sem->count)
                 :"D" (sem)
                 :"memory");

Only one register is mandatory (%rdi), instead of nine.

Two solutions :

(This one has no register constraint, but the slowpath is 'long')
It also requires the mutex is not on the stack.

#define PUSH_SCRATCH "push %%rdi; push %%rax; push %%rsi;"  \
		     "push %%rdx; push %%rcx; push %%r8;" \
		     "push %%r9; push %%r10; push %%r11\n"

#define POP_SCRATCH  "pop %%r11; pop %%r10; pop %%r9;"    \
	             "pop %%r8; pop %%rcx; pop %%rdx;"     \
                      "pop %%rsi; pop %%rax"; pop %%rdi\n"

  	__asm__ __volatile__(					\
  		LOCK	"   decl %0	\n"		\
  			"   js 2f		\n"		\
  			"1:			\n"		\
  								\
  		LOCK_SECTION_START("")				\
                        "2:" PUSH_SCRATCH                        \
			"lea %0,%%rdi            \n"            \
  			"call "#fn_name"	\n"             \
			POP_SCRATCH				\
  			"   jmp 1b		\n"		\
  		LOCK_SECTION_END				\
  								\
  		:"=m" (v->count)					\
  		: "m" (v->count)					\
  		: "memory");			\


Or call a wrapper that does the PUSH/POP thing.

Thank you
Eric

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [patch 04/11] mutex subsystem, add include/asm-x86_64/mutex.h
  2005-12-27 15:07 ` Eric Dumazet
@ 2005-12-27 15:41   ` Ingo Molnar
  2005-12-27 17:40   ` Andreas Kleen
  1 sibling, 0 replies; 5+ messages in thread
From: Ingo Molnar @ 2005-12-27 15:41 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: lkml, Linus Torvalds, Andrew Morton, Arjan van de Ven,
	Nicolas Pitre, Jes Sorensen, Zwane Mwaikambo, Oleg Nesterov,
	David Howells, Alan Cox, Benjamin LaHaise, Steven Rostedt,
	Christoph Hellwig, Andi Kleen, Russell King


* Eric Dumazet <dada1@cosmosbay.com> wrote:

> >+		:"=D" (dummy)						\
> >+		: "D" (v)						\
> >+		: "rax", "rsi", "rdx", "rcx",				\
> >+		  "r8", "r9", "r10", "r11", "memory");			\
> >+} while (0)
> 
> Hi Ingo
> 
> I do think this assembly is not very fair. It has an *insane* register 
> pressure for the compiler : The fast path is thus not so fast.

if you look at the compiler output you'll notice that it's not a problem 
actually: this fastpath is only inlined into the generic code, where it 
has no clobbering side-effects.

you are right in that if this were to be inlined left and right, this 
would be quite bad.

	Ingo

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [patch 04/11] mutex subsystem, add include/asm-x86_64/mutex.h
  2005-12-27 15:07 ` Eric Dumazet
  2005-12-27 15:41   ` Ingo Molnar
@ 2005-12-27 17:40   ` Andreas Kleen
  1 sibling, 0 replies; 5+ messages in thread
From: Andreas Kleen @ 2005-12-27 17:40 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Ingo Molnar, lkml, Linus Torvalds, Andrew Morton,
	Arjan van de Ven, Nicolas Pitre, Jes Sorensen, Zwane Mwaikambo,
	Oleg Nesterov, David Howells, Alan Cox, Benjamin LaHaise,
	Steven Rostedt, Christoph Hellwig, Russell King

Am Di 27.12.2005 16:07 schrieb Eric Dumazet <dada1@cosmosbay.com>:

> Or call a wrapper that does the PUSH/POP thing.

Standard wrappers for this are in arch/x86_64/lib/thunk.S

-Andi (who always wished gcc had an function __attribute__ for this)



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2005-12-27 17:41 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-12-23 16:17 [patch 04/11] mutex subsystem, add include/asm-x86_64/mutex.h Ingo Molnar
  -- strict thread matches above, loose matches on Subject: below --
2005-12-27 14:15 Ingo Molnar
2005-12-27 15:07 ` Eric Dumazet
2005-12-27 15:41   ` Ingo Molnar
2005-12-27 17:40   ` Andreas Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox