linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ingo Molnar <mingo@elte.hu>
To: Miklos Szeredi <miklos@szeredi.hu>
Cc: cebbert@redhat.com, chris@atlee.ca, linux-kernel@vger.kernel.org,
	tglx@linutronix.de, torvalds@linux-foundation.org,
	akpm@linux-foundation.org, kiran@scalex86.org
Subject: Re: [BUG] long freezes on thinkpad t60
Date: Mon, 18 Jun 2007 14:36:46 +0200	[thread overview]
Message-ID: <20070618123646.GA22672@elte.hu> (raw)
In-Reply-To: <E1I0EJc-0007El-00@dorka.pomaz.szeredi.hu>


* Miklos Szeredi <miklos@szeredi.hu> wrote:

> > * Ingo Molnar <mingo@elte.hu> wrote:
> > 
> > > how about the patch below? Boot-tested on 32-bit. As a side-effect 
> > > this change also removes the 255 CPUs limit from the 32-bit kernel.
> > 
> > boot-tested on 64-bit too now.
> 
> Strange, I can't even get past the compile stage ;)
> 
>   CC      kernel/spinlock.o
> {standard input}: Assembler messages:
> {standard input}:207: Error: backward ref to unknown label "4:"

oh, sorry - i built it with !PREEMPT, which doesnt make use of the flags 
thing. I fixed the build and have cleaned up and simplified that code 
some more - does the patch below work for you? (it does for me on both 
32-bit and 64-bit)

	Ingo

----------------------------->
Subject: [patch] x86: fix spin-loop starvation bug
From: Ingo Molnar <mingo@elte.hu>

Miklos Szeredi reported very long pauses (several seconds, sometimes
more) on his T60 (with a Core2Duo) which he managed to track down to
wait_task_inactive()'s open-coded busy-loop. He observed that an
interrupt on one core tries to acquire the runqueue-lock but does not
succeed in doing so for a very long time - while wait_task_inactive() on
the other core loops waiting for the first core to deschedule a task
(which it wont do while spinning in an interrupt handler).

The problem is: both the spin_lock() code and the wait_task_inactive()
loop uses cpu_relax()/rep_nop(), so in theory the CPU should have
guaranteed MESI-fairness to the two cores - but that didnt happen: one
of the cores was able to monopolize the cacheline that holds the
runqueue lock, for extended periods of time.

This patch changes the spin-loop to assert an atomic op after every REP
NOP instance - this will cause the CPU to express its "MESI interest" in
that cacheline after every REP NOP.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-i386/spinlock.h   |   94 ++++++++++++++++--------------------------
 include/asm-x86_64/spinlock.h |   69 +++++++++++++++---------------
 2 files changed, 72 insertions(+), 91 deletions(-)

Index: linux-cfs-2.6.22-rc5.q/include/asm-i386/spinlock.h
===================================================================
--- linux-cfs-2.6.22-rc5.q.orig/include/asm-i386/spinlock.h
+++ linux-cfs-2.6.22-rc5.q/include/asm-i386/spinlock.h
@@ -29,21 +29,24 @@
 
 static inline int __raw_spin_is_locked(raw_spinlock_t *x)
 {
-	return *(volatile signed char *)(&(x)->slock) <= 0;
+	return *(volatile unsigned int *)(&(x)->slock) == 0;
 }
 
 static inline void __raw_spin_lock(raw_spinlock_t *lock)
 {
-	asm volatile("\n1:\t"
-		     LOCK_PREFIX " ; decb %0\n\t"
-		     "jns 3f\n"
-		     "2:\t"
-		     "rep;nop\n\t"
-		     "cmpb $0,%0\n\t"
-		     "jle 2b\n\t"
-		     "jmp 1b\n"
-		     "3:\n\t"
-		     : "+m" (lock->slock) : : "memory");
+	asm volatile(
+	"1:							\n"
+		/* copy bit 0 to carry-flag and clear bit 0 */
+		LOCK_PREFIX " ; btrl $0, %[slock]		\n"
+	"	jc 2f						\n"
+		/* PAUSE */
+	"	rep; nop					\n"
+	"	jmp 1b						\n"
+	"2:							\n"
+		: [slock] "+m" (lock->slock)
+		:
+		: "memory", "cc"
+	);
 }
 
 /*
@@ -58,69 +61,46 @@ static inline void __raw_spin_lock(raw_s
 static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
 {
 	asm volatile(
-		"\n1:\t"
-		LOCK_PREFIX " ; decb %[slock]\n\t"
-		"jns 5f\n"
-		"2:\t"
-		"testl $0x200, %[flags]\n\t"
-		"jz 4f\n\t"
-		STI_STRING "\n"
-		"3:\t"
-		"rep;nop\n\t"
-		"cmpb $0, %[slock]\n\t"
-		"jle 3b\n\t"
-		CLI_STRING "\n\t"
-		"jmp 1b\n"
-		"4:\t"
-		"rep;nop\n\t"
-		"cmpb $0, %[slock]\n\t"
-		"jg 1b\n\t"
-		"jmp 4b\n"
-		"5:\n\t"
+	"1:							 \n"
+		/* copy bit 0 to carry-flag and clear bit 0 */
+		LOCK_PREFIX " ; btrl $0, %[slock]		 \n"
+	"	jc 3f						 \n"
+		/* were interrupts disabled? */
+	"	testl $0x200, %[flags]				 \n"
+	"	jz 2f						 \n"
+		STI_STRING					"\n"
+		/* PAUSE */
+	"2:	rep; nop					 \n"
+		CLI_STRING					"\n"
+	"	jmp 1b						 \n"
+	"3:							 \n"
 		: [slock] "+m" (lock->slock)
-		: [flags] "r" (flags)
-	 	  CLI_STI_INPUT_ARGS
-		: "memory" CLI_STI_CLOBBERS);
+		: [flags] "r"  (flags)
+		  CLI_STI_INPUT_ARGS
+		: "memory", "cc" CLI_STI_CLOBBERS);
 }
 #endif
 
 static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 {
-	char oldval;
+	unsigned int oldval;
+
 	asm volatile(
-		"xchgb %b0,%1"
+		"xchgl %0, %1"
 		:"=q" (oldval), "+m" (lock->slock)
 		:"0" (0) : "memory");
-	return oldval > 0;
+
+	return oldval != 0;
 }
 
 /*
- * __raw_spin_unlock based on writing $1 to the low byte.
- * This method works. Despite all the confusion.
- * (except on PPro SMP or if we are using OOSTORE, so we use xchgb there)
- * (PPro errata 66, 92)
+ * __raw_spin_unlock based on writing $1 to the lock.
  */
-
-#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
-
-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
-{
-	asm volatile("movb $1,%0" : "+m" (lock->slock) :: "memory");
-}
-
-#else
-
 static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 {
-	char oldval = 1;
-
-	asm volatile("xchgb %b0, %1"
-		     : "=q" (oldval), "+m" (lock->slock)
-		     : "0" (oldval) : "memory");
+	asm volatile("movl $1, %0" : "+m" (lock->slock) : : "memory");
 }
 
-#endif
-
 static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
 {
 	while (__raw_spin_is_locked(lock))
Index: linux-cfs-2.6.22-rc5.q/include/asm-x86_64/spinlock.h
===================================================================
--- linux-cfs-2.6.22-rc5.q.orig/include/asm-x86_64/spinlock.h
+++ linux-cfs-2.6.22-rc5.q/include/asm-x86_64/spinlock.h
@@ -19,21 +19,24 @@
 
 static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
 {
-	return *(volatile signed int *)(&(lock)->slock) <= 0;
+	return *(volatile unsigned int *)(&(lock)->slock) == 0;
 }
 
 static inline void __raw_spin_lock(raw_spinlock_t *lock)
 {
 	asm volatile(
-		"\n1:\t"
-		LOCK_PREFIX " ; decl %0\n\t"
-		"jns 2f\n"
-		"3:\n"
-		"rep;nop\n\t"
-		"cmpl $0,%0\n\t"
-		"jle 3b\n\t"
-		"jmp 1b\n"
-		"2:\t" : "=m" (lock->slock) : : "memory");
+	"1:							\n"
+		/* copy bit 0 to carry-flag and clear bit 0 */
+		LOCK_PREFIX " ; btrl $0, %[slock]		\n"
+	"	jc 2f						\n"
+		/* PAUSE */
+	"	rep; nop					\n"
+	"	jmp 1b						\n"
+	"2:							\n"
+		: [slock] "+m" (lock->slock)
+		:
+		: "memory", "cc"
+	);
 }
 
 /*
@@ -43,43 +46,41 @@ static inline void __raw_spin_lock(raw_s
 static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
 {
 	asm volatile(
-		"\n1:\t"
-		LOCK_PREFIX " ; decl %0\n\t"
-		"jns 5f\n"
-		"testl $0x200, %1\n\t"	/* interrupts were disabled? */
-		"jz 4f\n\t"
-	        "sti\n"
-		"3:\t"
-		"rep;nop\n\t"
-		"cmpl $0, %0\n\t"
-		"jle 3b\n\t"
-		"cli\n\t"
-		"jmp 1b\n"
-		"4:\t"
-		"rep;nop\n\t"
-		"cmpl $0, %0\n\t"
-		"jg 1b\n\t"
-		"jmp 4b\n"
-		"5:\n\t"
-		: "+m" (lock->slock) : "r" ((unsigned)flags) : "memory");
+	"1:							\n"
+		/* copy bit 0 to carry-flag and clear bit 0 */
+		LOCK_PREFIX " ; btrl $0, %[slock]		\n"
+	"	jc 3f						\n"
+		/* were interrupts disabled? */
+	"	testl $0x200, %[flags]				\n"
+	"	jz 2f						\n"
+	"	sti						\n"
+		/* PAUSE */
+	"2:	rep; nop					\n"
+	"	cli						\n"
+	"	jmp 1b						\n"
+	"3:							\n"
+		: [slock] "+m" (lock->slock)
+		: [flags] "r"  ((unsigned int)flags)
+		: "memory", "cc"
+	);
 }
 #endif
 
 static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 {
-	int oldval;
+	unsigned int oldval;
 
 	asm volatile(
-		"xchgl %0,%1"
-		:"=q" (oldval), "=m" (lock->slock)
+		"xchgl %0, %1"
+		:"=q" (oldval), "+m" (lock->slock)
 		:"0" (0) : "memory");
 
-	return oldval > 0;
+	return oldval != 0;
 }
 
 static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 {
-	asm volatile("movl $1,%0" :"=m" (lock->slock) :: "memory");
+	asm volatile("movl $1, %0" : "+m" (lock->slock) : : "memory");
 }
 
 static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)

  reply	other threads:[~2007-06-18 12:37 UTC|newest]

Thread overview: 88+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-05-24 12:04 [BUG] long freezes on thinkpad t60 Miklos Szeredi
2007-05-24 12:54 ` Ingo Molnar
2007-05-24 14:03   ` Miklos Szeredi
2007-05-24 14:10     ` Ingo Molnar
2007-05-24 14:28       ` Miklos Szeredi
2007-05-24 14:42         ` Ingo Molnar
2007-05-24 14:44         ` Ingo Molnar
2007-05-24 17:09           ` Miklos Szeredi
2007-05-24 21:01             ` Ingo Molnar
2007-05-25  9:51               ` Miklos Szeredi
2007-06-14 16:04                 ` Miklos Szeredi
2007-06-15 21:25                   ` Chuck Ebbert
2007-06-16 10:37                   ` Ingo Molnar
2007-06-17 21:46                     ` Miklos Szeredi
2007-06-18  6:43                       ` Ingo Molnar
2007-06-18  7:24                         ` Miklos Szeredi
2007-06-18  8:12                           ` Ingo Molnar
2007-06-18  8:20                             ` Andrew Morton
2007-06-19  4:22                               ` Ravikiran G Thirumalai
2007-06-18  8:25                             ` Miklos Szeredi
2007-06-18  8:31                               ` Ingo Molnar
2007-06-18  8:34                                 ` Miklos Szeredi
2007-06-18  9:18                                   ` Ingo Molnar
2007-06-18  9:38                                     ` Ingo Molnar
2007-06-18  9:44                                       ` Ingo Molnar
2007-06-18 10:18                                         ` Miklos Szeredi
2007-06-18 12:36                                           ` Ingo Molnar [this message]
2007-06-18 13:10                                             ` Miklos Szeredi
2007-06-18 16:34                             ` Linus Torvalds
2007-06-18 17:41                               ` Miklos Szeredi
2007-06-18 17:48                                 ` Linus Torvalds
2007-06-18 18:02                                   ` Ingo Molnar
2007-06-18 18:00                               ` Ingo Molnar
2007-06-18 18:25                                 ` Linus Torvalds
2007-06-20  9:36                               ` Jarek Poplawski
2007-06-20 17:34                                 ` Linus Torvalds
2007-06-21  7:30                                   ` Ingo Molnar
2007-06-21 15:50                                     ` Linus Torvalds
2007-06-21 16:08                                       ` Ingo Molnar
2007-06-21 16:32                                         ` Linus Torvalds
2007-06-21 16:44                                         ` Chuck Ebbert
2007-06-21 17:31                                           ` Linus Torvalds
2007-06-21 18:29                                             ` Eric Dumazet
2007-06-21 18:44                                               ` Linus Torvalds
2007-06-21 19:35                                                 ` Linus Torvalds
2007-06-21 20:09                                                   ` Ingo Molnar
2007-06-21 20:14                                                     ` Linus Torvalds
2007-06-21 20:30                                                       ` Ingo Molnar
2007-06-21 20:48                                                         ` Linus Torvalds
2007-06-21 21:06                                                           ` Ingo Molnar
2007-06-21 20:42                                                       ` [patch] spinlock debug: make looping nicer Ingo Molnar
2007-06-21 20:58                                                         ` Linus Torvalds
2007-06-21 21:15                                                           ` Ingo Molnar
2007-06-22  7:00                                                             ` Jarek Poplawski
2007-06-21 20:36                                                   ` [BUG] long freezes on thinkpad t60 Eric Dumazet
2007-06-21 19:56                                                 ` Ingo Molnar
2007-06-21 20:10                                                   ` Linus Torvalds
2007-06-21 20:23                                                     ` Ingo Molnar
2007-06-21 20:12                                                 ` Ingo Molnar
2007-06-26  8:42                                                 ` Nick Piggin
2007-06-26 10:56                                                   ` Jarek Poplawski
2007-06-26 17:23                                                   ` Linus Torvalds
2007-06-27  5:23                                                     ` Nick Piggin
2007-06-27  6:04                                                       ` Linus Torvalds
2007-06-27  6:20                                                         ` Nick Piggin
2007-06-27 19:47                                                         ` Linus Torvalds
2007-06-27 20:10                                                           ` Ingo Molnar
2007-06-27 20:17                                                           ` Davide Libenzi
2007-06-27 22:11                                                             ` Linus Torvalds
2007-06-27 23:30                                                               ` Davide Libenzi
2007-06-28  0:46                                                                 ` Linus Torvalds
2007-06-28  3:03                                                                   ` Davide Libenzi
2007-07-02  7:06                                                           ` Nick Piggin
2007-06-21 20:16                                             ` Ingo Molnar
2007-06-22  8:17                                               ` Ingo Molnar
2007-06-23 10:36                                                 ` Miklos Szeredi
2007-06-23 16:39                                                   ` Linus Torvalds
2007-06-25  6:45                                                   ` Jarek Poplawski
2007-06-21 20:18                                             ` Ingo Molnar
2007-06-21 20:36                                               ` Linus Torvalds
2007-06-21  7:38                                   ` Jarek Poplawski
2007-06-21  8:39                                     ` Ingo Molnar
2007-06-21 11:09                                       ` Jarek Poplawski
2007-06-21 16:01                                     ` Linus Torvalds
2007-06-22 10:38                                       ` Jarek Poplawski
2007-05-24 22:08 ` Henrique de Moraes Holschuh
2007-05-24 22:13   ` Kok, Auke
2007-05-25  6:58     ` Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070618123646.GA22672@elte.hu \
    --to=mingo@elte.hu \
    --cc=akpm@linux-foundation.org \
    --cc=cebbert@redhat.com \
    --cc=chris@atlee.ca \
    --cc=kiran@scalex86.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).