Generic Linux architectural discussions
 help / color / mirror / Atom feed
* [patch 1/2] spinlock: lockbreak cleanup
@ 2007-08-08  4:22 Nick Piggin
  2007-08-08  4:24 ` [patch 2/2] x86_64: ticket lock spinlock Nick Piggin
  0 siblings, 1 reply; 7+ messages in thread
From: Nick Piggin @ 2007-08-08  4:22 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Andi Kleen, Linus Torvalds, Ingo Molnar, linux-arch,
	Linux Kernel Mailing List


The break_lock data structure and code for spinlocks is quite nasty.
Not only does it double the size of a spinlock but it changes locking to
a potentially less optimal trylock.

Put all of that under CONFIG_GENERIC_LOCKBREAK, and introduce a
__raw_spin_is_contended that uses the lock data itself to determine whether
there are waiters on the lock, to be used if CONFIG_GENERIC_LOCKBREAK is
not set.

Rename need_lockbreak to spin_needbreak, make it use spin_is_contended to
decouple it from the spinlock implementation, and make it typesafe (rwlocks
do not have any need_lockbreak sites -- why do they even get bloated up
with that break_lock then?).

Signed-off-by: Nick Piggin <npiggin@suse.de>

---
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -1741,26 +1741,16 @@ extern int cond_resched_softirq(void);
 
 /*
  * Does a critical section need to be broken due to another
- * task waiting?:
+ * task waiting?: (technically does not depend on CONFIG_PREEMPT,
+ * but a general need for low latency)
  */
-#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
-# define need_lockbreak(lock) ((lock)->break_lock)
+#ifdef CONFIG_PREEMPT
+# define spin_needbreak(lock)	spin_is_contended(lock)
 #else
-# define need_lockbreak(lock) 0
+# define spin_needbreak(lock)	0
 #endif
 
 /*
- * Does a critical section need to be broken due to another
- * task waiting or preemption being signalled:
- */
-static inline int lock_need_resched(spinlock_t *lock)
-{
-	if (need_lockbreak(lock) || need_resched())
-		return 1;
-	return 0;
-}
-
-/*
  * Reevaluate whether the task has signals pending delivery.
  * Wake the task if so.
  * This is required every time the blocked sigset_t changes.
Index: linux-2.6/include/linux/spinlock.h
===================================================================
--- linux-2.6.orig/include/linux/spinlock.h
+++ linux-2.6/include/linux/spinlock.h
@@ -120,6 +120,12 @@ do {								\
 
 #define spin_is_locked(lock)	__raw_spin_is_locked(&(lock)->raw_lock)
 
+#ifdef CONFIG_GENERIC_LOCKBREAK
+#define spin_is_contended(lock) ((lock)->break_lock)
+#else
+#define spin_is_contended(lock)	__raw_spin_is_contended(&(lock)->raw_lock)
+#endif
+
 /**
  * spin_unlock_wait - wait until the spinlock gets unlocked
  * @lock: the spinlock in question.
Index: linux-2.6/fs/jbd/checkpoint.c
===================================================================
--- linux-2.6.orig/fs/jbd/checkpoint.c
+++ linux-2.6/fs/jbd/checkpoint.c
@@ -347,7 +347,8 @@ restart:
 				break;
 			}
 			retry = __process_buffer(journal, jh, bhs,&batch_count);
-			if (!retry && lock_need_resched(&journal->j_list_lock)){
+			if (!retry && (need_resched() ||
+				spin_needbreak(&journal->j_list_lock))) {
 				spin_unlock(&journal->j_list_lock);
 				retry = 1;
 				break;
Index: linux-2.6/fs/jbd/commit.c
===================================================================
--- linux-2.6.orig/fs/jbd/commit.c
+++ linux-2.6/fs/jbd/commit.c
@@ -265,7 +265,7 @@ write_out_data:
 			put_bh(bh);
 		}
 
-		if (lock_need_resched(&journal->j_list_lock)) {
+		if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
 			spin_unlock(&journal->j_list_lock);
 			goto write_out_data;
 		}
Index: linux-2.6/fs/jbd2/checkpoint.c
===================================================================
--- linux-2.6.orig/fs/jbd2/checkpoint.c
+++ linux-2.6/fs/jbd2/checkpoint.c
@@ -347,7 +347,8 @@ restart:
 				break;
 			}
 			retry = __process_buffer(journal, jh, bhs,&batch_count);
-			if (!retry && lock_need_resched(&journal->j_list_lock)){
+			if (!retry && (need_resched() ||
+				spin_needbreak(&journal->j_list_lock))) {
 				spin_unlock(&journal->j_list_lock);
 				retry = 1;
 				break;
Index: linux-2.6/fs/jbd2/commit.c
===================================================================
--- linux-2.6.orig/fs/jbd2/commit.c
+++ linux-2.6/fs/jbd2/commit.c
@@ -265,7 +265,7 @@ write_out_data:
 			put_bh(bh);
 		}
 
-		if (lock_need_resched(&journal->j_list_lock)) {
+		if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
 			spin_unlock(&journal->j_list_lock);
 			goto write_out_data;
 		}
Index: linux-2.6/include/linux/spinlock_up.h
===================================================================
--- linux-2.6.orig/include/linux/spinlock_up.h
+++ linux-2.6/include/linux/spinlock_up.h
@@ -64,6 +64,8 @@ static inline void __raw_spin_unlock(raw
 # define __raw_spin_trylock(lock)	({ (void)(lock); 1; })
 #endif /* DEBUG_SPINLOCK */
 
+#define __raw_spin_is_contended(lock)	(((void)(lock), 0))
+
 #define __raw_read_can_lock(lock)	(((void)(lock), 1))
 #define __raw_write_can_lock(lock)	(((void)(lock), 1))
 
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -4500,19 +4500,15 @@ EXPORT_SYMBOL(cond_resched);
  */
 int cond_resched_lock(spinlock_t *lock)
 {
+	int resched = need_resched() && system_state == SYSTEM_RUNNING;
 	int ret = 0;
 
-	if (need_lockbreak(lock)) {
+	if (spin_needbreak(lock) || resched) {
 		spin_unlock(lock);
-		cpu_relax();
-		ret = 1;
-		spin_lock(lock);
-	}
-	if (need_resched() && system_state == SYSTEM_RUNNING) {
-		spin_release(&lock->dep_map, 1, _THIS_IP_);
-		_raw_spin_unlock(lock);
-		preempt_enable_no_resched();
-		__cond_resched();
+		if (resched && need_resched())
+			__cond_resched();
+		else
+			cpu_relax();
 		ret = 1;
 		spin_lock(lock);
 	}
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -514,8 +514,7 @@ again:
 		if (progress >= 32) {
 			progress = 0;
 			if (need_resched() ||
-			    need_lockbreak(src_ptl) ||
-			    need_lockbreak(dst_ptl))
+			    spin_needbreak(src_ptl) || spin_needbreak(dst_ptl))
 				break;
 		}
 		if (pte_none(*src_pte)) {
@@ -854,7 +853,7 @@ unsigned long unmap_vmas(struct mmu_gath
 			tlb_finish_mmu(*tlbp, tlb_start, start);
 
 			if (need_resched() ||
-				(i_mmap_lock && need_lockbreak(i_mmap_lock))) {
+				(i_mmap_lock && spin_needbreak(i_mmap_lock))) {
 				if (i_mmap_lock) {
 					*tlbp = NULL;
 					goto out;
@@ -1860,8 +1859,7 @@ again:
 
 	restart_addr = zap_page_range(vma, start_addr,
 					end_addr - start_addr, details);
-	need_break = need_resched() ||
-			need_lockbreak(details->i_mmap_lock);
+	need_break = need_resched() || spin_needbreak(details->i_mmap_lock);
 
 	if (restart_addr >= end_addr) {
 		/* We have now completed this vma: mark it so */
Index: linux-2.6/arch/x86_64/Kconfig
===================================================================
--- linux-2.6.orig/arch/x86_64/Kconfig
+++ linux-2.6/arch/x86_64/Kconfig
@@ -74,6 +74,11 @@ config ISA
 config SBUS
 	bool
 
+config GENERIC_LOCKBREAK
+	bool
+	default y
+	depends on SMP && PREEMPT
+
 config RWSEM_GENERIC_SPINLOCK
 	bool
 	default y
Index: linux-2.6/include/linux/spinlock_types.h
===================================================================
--- linux-2.6.orig/include/linux/spinlock_types.h
+++ linux-2.6/include/linux/spinlock_types.h
@@ -19,7 +19,7 @@
 
 typedef struct {
 	raw_spinlock_t raw_lock;
-#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
+#ifdef CONFIG_GENERIC_LOCKBREAK
 	unsigned int break_lock;
 #endif
 #ifdef CONFIG_DEBUG_SPINLOCK
@@ -35,7 +35,7 @@ typedef struct {
 
 typedef struct {
 	raw_rwlock_t raw_lock;
-#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
+#ifdef CONFIG_GENERIC_LOCKBREAK
 	unsigned int break_lock;
 #endif
 #ifdef CONFIG_DEBUG_SPINLOCK
Index: linux-2.6/kernel/spinlock.c
===================================================================
--- linux-2.6.orig/kernel/spinlock.c
+++ linux-2.6/kernel/spinlock.c
@@ -65,8 +65,7 @@ EXPORT_SYMBOL(_write_trylock);
  * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
  * not re-enabled during lock-acquire (which the preempt-spin-ops do):
  */
-#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \
-	defined(CONFIG_DEBUG_LOCK_ALLOC)
+#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
 
 void __lockfunc _read_lock(rwlock_t *lock)
 {
Index: linux-2.6/arch/arm/Kconfig
===================================================================
--- linux-2.6.orig/arch/arm/Kconfig
+++ linux-2.6/arch/arm/Kconfig
@@ -91,6 +91,11 @@ config GENERIC_IRQ_PROBE
 	bool
 	default y
 
+config GENERIC_LOCKBREAK
+	bool
+	default y
+	depends on SMP && PREEMPT
+
 config RWSEM_GENERIC_SPINLOCK
 	bool
 	default y
Index: linux-2.6/arch/i386/Kconfig
===================================================================
--- linux-2.6.orig/arch/i386/Kconfig
+++ linux-2.6/arch/i386/Kconfig
@@ -14,6 +14,11 @@ config X86_32
 	  486, 586, Pentiums, and various instruction-set-compatible chips by
 	  AMD, Cyrix, and others.
 
+config GENERIC_LOCKBREAK
+	bool
+	default y
+	depends on SMP && PREEMPT
+
 config GENERIC_TIME
 	bool
 	default y
Index: linux-2.6/arch/ia64/Kconfig
===================================================================
--- linux-2.6.orig/arch/ia64/Kconfig
+++ linux-2.6/arch/ia64/Kconfig
@@ -42,6 +42,11 @@ config MMU
 config SWIOTLB
        bool
 
+config GENERIC_LOCKBREAK
+	bool
+	default y
+	depends on SMP && PREEMPT
+
 config RWSEM_XCHGADD_ALGORITHM
 	bool
 	default y
Index: linux-2.6/arch/m32r/Kconfig
===================================================================
--- linux-2.6.orig/arch/m32r/Kconfig
+++ linux-2.6/arch/m32r/Kconfig
@@ -215,6 +215,11 @@ config IRAM_SIZE
 # Define implied options from the CPU selection here
 #
 
+config GENERIC_LOCKBREAK
+	bool
+	default y
+	depends on SMP && PREEMPT
+
 config RWSEM_GENERIC_SPINLOCK
 	bool
 	depends on M32R
Index: linux-2.6/arch/mips/Kconfig
===================================================================
--- linux-2.6.orig/arch/mips/Kconfig
+++ linux-2.6/arch/mips/Kconfig
@@ -647,6 +647,11 @@ source "arch/mips/philips/pnx8550/common
 
 endmenu
 
+config GENERIC_LOCKBREAK
+	bool
+	default y
+	depends on SMP && PREEMPT
+
 config RWSEM_GENERIC_SPINLOCK
 	bool
 	default y
Index: linux-2.6/arch/parisc/Kconfig
===================================================================
--- linux-2.6.orig/arch/parisc/Kconfig
+++ linux-2.6/arch/parisc/Kconfig
@@ -19,6 +19,11 @@ config MMU
 config STACK_GROWSUP
 	def_bool y
 
+config GENERIC_LOCKBREAK
+	bool
+	default y
+	depends on SMP && PREEMPT
+
 config RWSEM_GENERIC_SPINLOCK
 	def_bool y
 
Index: linux-2.6/arch/sparc64/Kconfig
===================================================================
--- linux-2.6.orig/arch/sparc64/Kconfig
+++ linux-2.6/arch/sparc64/Kconfig
@@ -196,6 +196,11 @@ config US2E_FREQ
 	  If in doubt, say N.
 
 # Global things across all Sun machines.
+config GENERIC_LOCKBREAK
+	bool
+	default y
+	depends on SMP && PREEMPT
+
 config RWSEM_GENERIC_SPINLOCK
 	bool
 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [patch 2/2] x86_64: ticket lock spinlock
  2007-08-08  4:22 [patch 1/2] spinlock: lockbreak cleanup Nick Piggin
@ 2007-08-08  4:24 ` Nick Piggin
  2007-08-08 10:26   ` Andi Kleen
  2007-08-08 17:31   ` Valdis.Kletnieks
  0 siblings, 2 replies; 7+ messages in thread
From: Nick Piggin @ 2007-08-08  4:24 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Andi Kleen, Linus Torvalds, Ingo Molnar, linux-arch,
	Linux Kernel Mailing List


Introduce ticket lock spinlocks for x86-64 which are FIFO. The implementation
is described in the comments. The straight-line lock/unlock instruction
sequence is slightly slower than the dec based locks on modern x86 CPUs,
however the difference is quite small on Core2 and Opteron when working out of
cache, and becomes almost insignificant even on P4 when the lock misses cache.
trylock is more significantly slower, but they are relatively rare.

The memory ordering of the lock does conform to Intel's standards, and the
implementation has been reviewed by Intel and AMD engineers.

The algorithm also tells us how many CPUs are contending the lock, so
lockbreak becomes trivial and we no longer have to waste 4 bytes per
spinlock for it.

After this, we can no longer spin on any locks with preempt enabled,
and cannot reenable interrupts when spinning on an irq safe lock, because
at that point we have already taken a ticket and the would deadlock if
the same CPU tries to take the lock again.  These are hackish anyway: if
the lock happens to be called under a preempt or interrupt disabled section,
then it will just have the same latency problems. The real fix is to keep
critical sections short, and ensure locks are reasonably fair (which this
patch does).

Signed-off-by: Nick Piggin <npiggin@suse.de>

---
Index: linux-2.6/include/asm-x86_64/spinlock.h
===================================================================
--- linux-2.6.orig/include/asm-x86_64/spinlock.h
+++ linux-2.6/include/asm-x86_64/spinlock.h
@@ -12,74 +12,93 @@
  * Simple spin lock operations.  There are two variants, one clears IRQ's
  * on the local processor, one does not.
  *
- * We make no fairness assumptions. They have a cost.
+ * These are fair FIFO ticket locks, which are currently limited to 256
+ * CPUs.
  *
  * (the type definitions are in asm/spinlock_types.h)
  */
 
+#if (NR_CPUS > 256)
+#error spinlock supports a maximum of 256 CPUs
+#endif
+
 static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
 {
-	return *(volatile signed int *)(&(lock)->slock) <= 0;
+	int tmp = *(volatile signed int *)(&(lock)->slock);
+
+	return (((tmp >> 8) & 0xff) != (tmp & 0xff));
 }
 
-static inline void __raw_spin_lock(raw_spinlock_t *lock)
+static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
 {
-	asm volatile(
-		"\n1:\t"
-		LOCK_PREFIX " ; decl %0\n\t"
-		"jns 2f\n"
-		"3:\n"
-		"rep;nop\n\t"
-		"cmpl $0,%0\n\t"
-		"jle 3b\n\t"
-		"jmp 1b\n"
-		"2:\t" : "=m" (lock->slock) : : "memory");
+	int tmp = *(volatile signed int *)(&(lock)->slock);
+
+	return (((tmp >> 8) & 0xff) - (tmp & 0xff)) > 1;
 }
 
-/*
- * Same as __raw_spin_lock, but reenable interrupts during spinning.
- */
-#ifndef CONFIG_PROVE_LOCKING
-static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
+static inline void __raw_spin_lock(raw_spinlock_t *lock)
 {
-	asm volatile(
-		"\n1:\t"
-		LOCK_PREFIX " ; decl %0\n\t"
-		"jns 5f\n"
-		"testl $0x200, %1\n\t"	/* interrupts were disabled? */
-		"jz 4f\n\t"
-	        "sti\n"
-		"3:\t"
-		"rep;nop\n\t"
-		"cmpl $0, %0\n\t"
-		"jle 3b\n\t"
-		"cli\n\t"
+	short inc = 0x0100;
+
+	/*
+	 * Ticket locks are conceptually two bytes, one indicating the current
+	 * head of the queue, and the other indicating the current tail. The
+	 * lock is acquired by atomically noting the tail and incrementing it
+	 * by one (thus adding ourself to the queue and noting our position),
+	 * then waiting until the head becomes equal to the the initial value
+	 * of the tail.
+	 *
+	 * This uses a 16-bit xadd to increment the tail and also load the
+	 * position of the head, which takes care of memory ordering issues
+	 * and should be optimal for the uncontended case. Note the tail must
+	 * be in the high byte, otherwise the 16-bit wide increment of the low
+	 * byte would carry up and contaminate the high byte.
+	 */
+
+	__asm__ __volatile__ (
+		LOCK_PREFIX "xaddw %w0, %1\n"
+		"1:\t"
+		"cmpb %h0, %b0\n\t"
+		"je 2f\n\t"
+		"rep ; nop\n\t"
+		"movb %1, %b0\n\t"
+		"lfence\n\t"
 		"jmp 1b\n"
-		"4:\t"
-		"rep;nop\n\t"
-		"cmpl $0, %0\n\t"
-		"jg 1b\n\t"
-		"jmp 4b\n"
-		"5:\n\t"
-		: "+m" (lock->slock) : "r" ((unsigned)flags) : "memory");
+		"2:"
+		:"+Q" (inc), "+m" (lock->slock)
+		:
+		:"memory", "cc");
 }
-#endif
+
+#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
 
 static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 {
-	int oldval;
+	short tmp;
+        short oldval;
 
 	asm volatile(
-		"xchgl %0,%1"
-		:"=q" (oldval), "=m" (lock->slock)
-		:"0" (0) : "memory");
+                "movw %2,%w0\n\t"
+                "cmpb %h0, %b0\n\t"
+                "jne 1f\n\t"
+                "movw %w0,%w1\n\t"
+                "incb %h1\n\t"
+                LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
+                "1:"
+                :"=a" (oldval), "=Q" (tmp), "+m" (lock->slock)
+                :
+		: "memory", "cc");
 
-	return oldval > 0;
+	return ((oldval & 0xff) == ((oldval >> 8) & 0xff));
 }
 
 static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 {
-	asm volatile("movl $1,%0" :"=m" (lock->slock) :: "memory");
+	__asm__ __volatile__(
+		"incb %0"
+		:"+m" (lock->slock)
+		:
+		:"memory", "cc");
 }
 
 static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
Index: linux-2.6/include/asm-x86_64/spinlock_types.h
===================================================================
--- linux-2.6.orig/include/asm-x86_64/spinlock_types.h
+++ linux-2.6/include/asm-x86_64/spinlock_types.h
@@ -9,7 +9,7 @@ typedef struct {
 	unsigned int slock;
 } raw_spinlock_t;
 
-#define __RAW_SPIN_LOCK_UNLOCKED	{ 1 }
+#define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
 
 typedef struct {
 	unsigned int lock;
Index: linux-2.6/arch/x86_64/Kconfig
===================================================================
--- linux-2.6.orig/arch/x86_64/Kconfig
+++ linux-2.6/arch/x86_64/Kconfig
@@ -76,8 +76,7 @@ config SBUS
 
 config GENERIC_LOCKBREAK
 	bool
-	default y
-	depends on SMP && PREEMPT
+	default n
 
 config RWSEM_GENERIC_SPINLOCK
 	bool

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/2] x86_64: ticket lock spinlock
  2007-08-08  4:24 ` [patch 2/2] x86_64: ticket lock spinlock Nick Piggin
@ 2007-08-08 10:26   ` Andi Kleen
  2007-08-09  1:42     ` Nick Piggin
  2007-08-08 17:31   ` Valdis.Kletnieks
  1 sibling, 1 reply; 7+ messages in thread
From: Andi Kleen @ 2007-08-08 10:26 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Andrew Morton, Linus Torvalds, Ingo Molnar, linux-arch,
	Linux Kernel Mailing List


>   *
>   * (the type definitions are in asm/spinlock_types.h)
>   */
>  
> +#if (NR_CPUS > 256)
> +#error spinlock supports a maximum of 256 CPUs
> +#endif
> +
>  static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
>  {
> -	return *(volatile signed int *)(&(lock)->slock) <= 0;
> +	int tmp = *(volatile signed int *)(&(lock)->slock);

Why is slock not volatile signed int in the first place? 

> -	int oldval;
> +	short tmp;
> +        short oldval;

Broken white space?


-Andi

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/2] x86_64: ticket lock spinlock
  2007-08-08  4:24 ` [patch 2/2] x86_64: ticket lock spinlock Nick Piggin
  2007-08-08 10:26   ` Andi Kleen
@ 2007-08-08 17:31   ` Valdis.Kletnieks
  2007-08-09  1:40     ` Nick Piggin
  1 sibling, 1 reply; 7+ messages in thread
From: Valdis.Kletnieks @ 2007-08-08 17:31 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Andrew Morton, Andi Kleen, Linus Torvalds, Ingo Molnar,
	linux-arch, Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 861 bytes --]

On Wed, 08 Aug 2007 06:24:44 +0200, Nick Piggin said:

> After this, we can no longer spin on any locks with preempt enabled,
> and cannot reenable interrupts when spinning on an irq safe lock, because
> at that point we have already taken a ticket and the would deadlock if
> the same CPU tries to take the lock again.  These are hackish anyway: if
> the lock happens to be called under a preempt or interrupt disabled section,
> then it will just have the same latency problems. The real fix is to keep
> critical sections short, and ensure locks are reasonably fair (which this
> patch does).

Any guesstimates how often we do that sort of hackish thing currently, and
how hard it will be to debug each one?  "Deadlock if the same CPU tries to
take the lock again" is pretty easy to notice - are there more subtle failure
modes (larger loops of locks, etc)?

[-- Attachment #2: Type: application/pgp-signature, Size: 226 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/2] x86_64: ticket lock spinlock
  2007-08-08 17:31   ` Valdis.Kletnieks
@ 2007-08-09  1:40     ` Nick Piggin
  0 siblings, 0 replies; 7+ messages in thread
From: Nick Piggin @ 2007-08-09  1:40 UTC (permalink / raw)
  To: Valdis.Kletnieks
  Cc: Andrew Morton, Andi Kleen, Linus Torvalds, Ingo Molnar,
	linux-arch, Linux Kernel Mailing List

On Wed, Aug 08, 2007 at 01:31:58PM -0400, Valdis.Kletnieks@vt.edu wrote:
> On Wed, 08 Aug 2007 06:24:44 +0200, Nick Piggin said:
> 
> > After this, we can no longer spin on any locks with preempt enabled,
> > and cannot reenable interrupts when spinning on an irq safe lock, because
> > at that point we have already taken a ticket and the would deadlock if
> > the same CPU tries to take the lock again.  These are hackish anyway: if
> > the lock happens to be called under a preempt or interrupt disabled section,
> > then it will just have the same latency problems. The real fix is to keep
> > critical sections short, and ensure locks are reasonably fair (which this
> > patch does).
> 
> Any guesstimates how often we do that sort of hackish thing currently, and
> how hard it will be to debug each one?  "Deadlock if the same CPU tries to
> take the lock again" is pretty easy to notice - are there more subtle failure
> modes (larger loops of locks, etc)?

I'll try to explain better:

The old spinlocks re-enable preemption and interrupts while they spin
waiting for a held lock. This was done because people noticed some
long latencies while spinning. The problem however is that preemption
and interrupts can only be re-enabled if they were enabled before the
spin_lock call. So if you have code that perhaps takes nested locks,
or locks while interrupts are already disabled, then you get the latency
problems back.

So the non-hack fix is to keep critical sections short (which is what
we've been working at forever), and to have relatively fair locks
(which is what this patch does).

A side-effect of this patch is that it can no longer enable preemption
or ints while spinning, so my changelog is a rationale of why that
shouldn't be a big problem.



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/2] x86_64: ticket lock spinlock
  2007-08-08 10:26   ` Andi Kleen
@ 2007-08-09  1:42     ` Nick Piggin
  2007-08-09  9:54       ` Andi Kleen
  0 siblings, 1 reply; 7+ messages in thread
From: Nick Piggin @ 2007-08-09  1:42 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Andrew Morton, Linus Torvalds, Ingo Molnar, linux-arch,
	Linux Kernel Mailing List

On Wed, Aug 08, 2007 at 12:26:55PM +0200, Andi Kleen wrote:
> 
> >   *
> >   * (the type definitions are in asm/spinlock_types.h)
> >   */
> >  
> > +#if (NR_CPUS > 256)
> > +#error spinlock supports a maximum of 256 CPUs
> > +#endif
> > +
> >  static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
> >  {
> > -	return *(volatile signed int *)(&(lock)->slock) <= 0;
> > +	int tmp = *(volatile signed int *)(&(lock)->slock);
> 
> Why is slock not volatile signed int in the first place? 

Don't know really. Why does spin_is_locked need it to be volatile?


> > -	int oldval;
> > +	short tmp;
> > +        short oldval;
> 
> Broken white space?

Hmm, I'll fix it.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/2] x86_64: ticket lock spinlock
  2007-08-09  1:42     ` Nick Piggin
@ 2007-08-09  9:54       ` Andi Kleen
  0 siblings, 0 replies; 7+ messages in thread
From: Andi Kleen @ 2007-08-09  9:54 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Andrew Morton, Linus Torvalds, Ingo Molnar, linux-arch,
	Linux Kernel Mailing List

On Thursday 09 August 2007 03:42:54 Nick Piggin wrote:
> On Wed, Aug 08, 2007 at 12:26:55PM +0200, Andi Kleen wrote:
> > 
> > >   *
> > >   * (the type definitions are in asm/spinlock_types.h)
> > >   */
> > >  
> > > +#if (NR_CPUS > 256)
> > > +#error spinlock supports a maximum of 256 CPUs
> > > +#endif
> > > +
> > >  static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
> > >  {
> > > -	return *(volatile signed int *)(&(lock)->slock) <= 0;
> > > +	int tmp = *(volatile signed int *)(&(lock)->slock);
> > 
> > Why is slock not volatile signed int in the first place? 
> 
> Don't know really. Why does spin_is_locked need it to be volatile?

I suppose in case a caller doesn't have a memory barrier
(they should in theory, but might not). Without any barrier
or volatile gcc might optimize it away.

The other accesses in spinlocks hopefully all have barriers.

Ok anyways the patches look good.

-Andi

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2007-08-09  9:54 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-08-08  4:22 [patch 1/2] spinlock: lockbreak cleanup Nick Piggin
2007-08-08  4:24 ` [patch 2/2] x86_64: ticket lock spinlock Nick Piggin
2007-08-08 10:26   ` Andi Kleen
2007-08-09  1:42     ` Nick Piggin
2007-08-09  9:54       ` Andi Kleen
2007-08-08 17:31   ` Valdis.Kletnieks
2007-08-09  1:40     ` Nick Piggin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox