[RFC][PATCH] spinlock: Kill spin_unlock

public inbox for linux-arch@vger.kernel.org
 help / color / mirror / Atom feed

* [RFC][PATCH] spinlock: Kill spin_unlock_wait()
       [not found]       ` <20110104064542.GF3402@amd>
@ 2011-01-05 19:14         ` Peter Zijlstra
  2011-01-05 19:26           ` Oleg Nesterov
  2011-01-05 19:43           ` Linus Torvalds
  0 siblings, 2 replies; 12+ messages in thread
From: Peter Zijlstra @ 2011-01-05 19:14 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Linus Torvalds, Chris Mason, Frank Rowand, Ingo Molnar,
	Thomas Gleixner, Mike Galbraith, Oleg Nesterov, Paul Turner,
	Jens Axboe, Yong Zhang, linux-kernel, Jeremy Fitzhardinge,
	Linux-Arch, Jeff Garzik, Tejun Heo

On Tue, 2011-01-04 at 17:45 +1100, Nick Piggin wrote:
> So I agree, taking it out the back and shooting it in the head would make
> the world a better place. 

There appear to be only two callsites of said horror, one in the exit
path and one in ata-eh, neither appear to be performance critical so I
replaced them with a simple lock-unlock sequence.

---
 arch/alpha/include/asm/spinlock.h          |    2 --
 arch/arm/include/asm/spinlock.h            |    2 --
 arch/blackfin/include/asm/spinlock.h       |    6 ------
 arch/cris/include/arch-v32/arch/spinlock.h |    6 ------
 arch/ia64/include/asm/spinlock.h           |   19 -------------------
 arch/m32r/include/asm/spinlock.h           |    2 --
 arch/mips/include/asm/spinlock.h           |    2 --
 arch/mn10300/include/asm/spinlock.h        |    1 -
 arch/parisc/include/asm/spinlock.h         |    2 --
 arch/powerpc/include/asm/spinlock.h        |    7 -------
 arch/powerpc/lib/locks.c                   |   11 -----------
 arch/s390/include/asm/spinlock.h           |    3 ---
 arch/sh/include/asm/spinlock.h             |    2 --
 arch/sparc/include/asm/spinlock_32.h       |    3 ---
 arch/sparc/include/asm/spinlock_64.h       |    4 ----
 arch/tile/include/asm/spinlock_32.h        |    2 --
 arch/tile/lib/spinlock_32.c                |    8 --------
 arch/x86/include/asm/spinlock.h            |    6 ------
 drivers/ata/libata-eh.c                    |    6 ++++--
 include/linux/spinlock.h                   |   11 -----------
 include/linux/spinlock_up.h                |    3 ---
 kernel/exit.c                              |    3 ++-
 22 files changed, 6 insertions(+), 105 deletions(-)

diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
index d0faca1..0545bef 100644
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -14,8 +14,6 @@
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 #define arch_spin_is_locked(x)	((x)->lock != 0)
-#define arch_spin_unlock_wait(x) \
-		do { cpu_relax(); } while ((x)->lock)
 
 static inline void arch_spin_unlock(arch_spinlock_t * lock)
 {
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 17eb355..91ede71 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -34,8 +34,6 @@ static inline void dsb_sev(void)
  */
 
 #define arch_spin_is_locked(x)		((x)->lock != 0)
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h
index 1942ccf..d1630fc 100644
--- a/arch/blackfin/include/asm/spinlock.h
+++ b/arch/blackfin/include/asm/spinlock.h
@@ -46,12 +46,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 	__raw_spin_unlock_asm(&lock->lock);
 }
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	while (arch_spin_is_locked(lock))
-		cpu_relax();
-}
-
 static inline int arch_read_can_lock(arch_rwlock_t *rw)
 {
 	return __raw_uncached_fetch_asm(&rw->lock) > 0;
diff --git a/arch/cris/include/arch-v32/arch/spinlock.h b/arch/cris/include/arch-v32/arch/spinlock.h
index f171a66..01da21c 100644
--- a/arch/cris/include/arch-v32/arch/spinlock.h
+++ b/arch/cris/include/arch-v32/arch/spinlock.h
@@ -22,12 +22,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 			  : "memory");
 }
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	while (arch_spin_is_locked(lock))
-		cpu_relax();
-}
-
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	return cris_spin_trylock((void *)&lock->slock);
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index 1a91c91..7c4de8b 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -75,20 +75,6 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
 	ACCESS_ONCE(*p) = (tmp + 2) & ~1;
 }
 
-static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	int	*p = (int *)&lock->lock, ticket;
-
-	ia64_invala();
-
-	for (;;) {
-		asm volatile ("ld4.c.nc %0=[%1]" : "=r"(ticket) : "r"(p) : "memory");
-		if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK))
-			return;
-		cpu_relax();
-	}
-}
-
 static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
 {
 	long tmp = ACCESS_ONCE(lock->lock);
@@ -135,11 +121,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
 	arch_spin_lock(lock);
 }
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	__ticket_spin_unlock_wait(lock);
-}
-
 #define arch_read_can_lock(rw)		(*(volatile int *)(rw) >= 0)
 #define arch_write_can_lock(rw)	(*(volatile int *)(rw) == 0)
 
diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h
index 179a064..0e099cc 100644
--- a/arch/m32r/include/asm/spinlock.h
+++ b/arch/m32r/include/asm/spinlock.h
@@ -26,8 +26,6 @@
 
 #define arch_spin_is_locked(x)		(*(volatile int *)(&(x)->slock) <= 0)
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
-		do { cpu_relax(); } while (arch_spin_is_locked(x))
 
 /**
  * arch_spin_trylock - Try spin lock and return a result
diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index 396e402..711425f 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -42,8 +42,6 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 }
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
-	while (arch_spin_is_locked(x)) { cpu_relax(); }
 
 static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
diff --git a/arch/mn10300/include/asm/spinlock.h b/arch/mn10300/include/asm/spinlock.h
index 9342915..3460e5d 100644
--- a/arch/mn10300/include/asm/spinlock.h
+++ b/arch/mn10300/include/asm/spinlock.h
@@ -23,7 +23,6 @@
  */
 
 #define arch_spin_is_locked(x)	(*(volatile signed char *)(&(x)->slock) != 0)
-#define arch_spin_unlock_wait(x) do { barrier(); } while (arch_spin_is_locked(x))
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index 74036f4..b419128 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -12,8 +12,6 @@ static inline int arch_spin_is_locked(arch_spinlock_t *x)
 }
 
 #define arch_spin_lock(lock) arch_spin_lock_flags(lock, 0)
-#define arch_spin_unlock_wait(x) \
-		do { cpu_relax(); } while (arch_spin_is_locked(x))
 
 static inline void arch_spin_lock_flags(arch_spinlock_t *x,
 					 unsigned long flags)
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index f9611bd..c9fd936 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -150,13 +150,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 	lock->slock = 0;
 }
 
-#ifdef CONFIG_PPC64
-extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
-#else
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
-#endif
-
 /*
  * Read-write spinlocks, allowing multiple readers
  * but only one writer.
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 9b8182e..e800d5f 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -82,14 +82,3 @@ void __rw_yield(arch_rwlock_t *rw)
 }
 #endif
 
-void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	while (lock->slock) {
-		HMT_low();
-		if (SHARED_PROCESSOR)
-			__spin_yield(lock);
-	}
-	HMT_medium();
-}
-
-EXPORT_SYMBOL(arch_spin_unlock_wait);
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 56612fc..cb98ce7 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -35,9 +35,6 @@ _raw_compare_and_swap(volatile unsigned int *lock,
  */
 
 #define arch_spin_is_locked(x) ((x)->owner_cpu != 0)
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) \
-		 arch_spin_relax(lock); } while (0)
 
 extern void arch_spin_lock_wait(arch_spinlock_t *);
 extern void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags);
diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h
index bdc0f3b..7ac8916 100644
--- a/arch/sh/include/asm/spinlock.h
+++ b/arch/sh/include/asm/spinlock.h
@@ -25,8 +25,6 @@
 
 #define arch_spin_is_locked(x)		((x)->lock <= 0)
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
-	do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0)
 
 /*
  * Simple spin lock operations.  There are two variants, one clears IRQ's
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index 7f9b9db..f10958a 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -12,9 +12,6 @@
 
 #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
 
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
-
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	__asm__ __volatile__(
diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h
index 073936a..2845f6e 100644
--- a/arch/sparc/include/asm/spinlock_64.h
+++ b/arch/sparc/include/asm/spinlock_64.h
@@ -23,10 +23,6 @@
 
 #define arch_spin_is_locked(lp)	((lp)->lock != 0)
 
-#define arch_spin_unlock_wait(lp)	\
-	do {	rmb();			\
-	} while((lp)->lock)
-
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h
index 88efdde..3ccb768 100644
--- a/arch/tile/include/asm/spinlock_32.h
+++ b/arch/tile/include/asm/spinlock_32.h
@@ -61,8 +61,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 	lock->current_ticket = old_ticket + TICKET_QUANTUM;
 }
 
-void arch_spin_unlock_wait(arch_spinlock_t *lock);
-
 /*
  * Read-write spinlocks, allowing multiple readers
  * but only one writer.
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index 5cd1c40..72a6507 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -61,14 +61,6 @@ int arch_spin_trylock(arch_spinlock_t *lock)
 }
 EXPORT_SYMBOL(arch_spin_trylock);
 
-void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	u32 iterations = 0;
-	while (arch_spin_is_locked(lock))
-		delay_backoff(iterations++);
-}
-EXPORT_SYMBOL(arch_spin_unlock_wait);
-
 /*
  * The low byte is always reserved to be the marker for a "tns" operation
  * since the low bit is set to "1" by a tns.  The next seven bits are
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 3089f70..f23106f 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -208,12 +208,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
 
 #endif	/* CONFIG_PARAVIRT_SPINLOCKS */
 
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	while (arch_spin_is_locked(lock))
-		cpu_relax();
-}
-
 /*
  * Read-write spinlocks, allowing multiple readers
  * but only one writer.
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 17a6378..f246965 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -668,8 +668,10 @@ void ata_scsi_error(struct Scsi_Host *host)
 
 		/* initialize eh_tries */
 		ap->eh_tries = ATA_EH_MAX_TRIES;
-	} else
-		spin_unlock_wait(ap->lock);
+	} else {
+		spin_lock_irqsave(ap->lock, flags);
+		spin_unlock_irqrestore(ap->lock, flags);
+	}
 
 	/* If we timed raced normal completion and there is nothing to
 	   recover nr_timedout == 0 why exactly are we doing error recovery ? */
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 80e5358..d7b89ef 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -122,12 +122,6 @@ do {								\
 static inline void smp_mb__after_lock(void) { smp_mb(); }
 #endif
 
-/**
- * raw_spin_unlock_wait - wait until the spinlock gets unlocked
- * @lock: the spinlock in question.
- */
-#define raw_spin_unlock_wait(lock)	arch_spin_unlock_wait(&(lock)->raw_lock)
-
 #ifdef CONFIG_DEBUG_SPINLOCK
  extern void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock);
 #define do_raw_spin_lock_flags(lock, flags) do_raw_spin_lock(lock)
@@ -355,11 +349,6 @@ static inline int spin_trylock_irq(spinlock_t *lock)
 	raw_spin_trylock_irqsave(spinlock_check(lock), flags); \
 })
 
-static inline void spin_unlock_wait(spinlock_t *lock)
-{
-	raw_spin_unlock_wait(&lock->rlock);
-}
-
 static inline int spin_is_locked(spinlock_t *lock)
 {
 	return raw_spin_is_locked(&lock->rlock);
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index b14f6a9..53c7a86 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -70,7 +70,4 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 #define arch_read_can_lock(lock)	(((void)(lock), 1))
 #define arch_write_can_lock(lock)	(((void)(lock), 1))
 
-#define arch_spin_unlock_wait(lock) \
-		do { cpu_relax(); } while (arch_spin_is_locked(lock))
-
 #endif /* __LINUX_SPINLOCK_UP_H */
diff --git a/kernel/exit.c b/kernel/exit.c
index 676149a..2c6b725 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -956,7 +956,8 @@ NORET_TYPE void do_exit(long code)
 	 * an exiting task cleaning up the robust pi futexes.
 	 */
 	smp_mb();
-	raw_spin_unlock_wait(&tsk->pi_lock);
+	raw_spin_lock_irq(&tsk->pi_lock);
+	raw_spin_unlock_irq(&tsk->pi_lock);
 
 	if (unlikely(in_atomic()))
 		printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH] spinlock: Kill spin_unlock_wait()
  2011-01-05 19:14         ` [RFC][PATCH] spinlock: Kill spin_unlock_wait() Peter Zijlstra
@ 2011-01-05 19:26           ` Oleg Nesterov
  2011-01-05 19:43           ` Linus Torvalds
  1 sibling, 0 replies; 12+ messages in thread
From: Oleg Nesterov @ 2011-01-05 19:26 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Nick Piggin, Linus Torvalds, Chris Mason, Frank Rowand,
	Ingo Molnar, Thomas Gleixner, Mike Galbraith, Paul Turner,
	Jens Axboe, Yong Zhang, linux-kernel, Jeremy Fitzhardinge,
	Linux-Arch, Jeff Garzik, Tejun Heo

On 01/05, Peter Zijlstra wrote:
>
> On Tue, 2011-01-04 at 17:45 +1100, Nick Piggin wrote:
> > So I agree, taking it out the back and shooting it in the head would make
> > the world a better place.
>
> There appear to be only two callsites of said horror,

Not sure I understand why spin_unlock_wait() is really awful, but OK.

> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -956,7 +956,8 @@ NORET_TYPE void do_exit(long code)
>  	 * an exiting task cleaning up the robust pi futexes.
>  	 */
>  	smp_mb();
> -	raw_spin_unlock_wait(&tsk->pi_lock);
> +	raw_spin_lock_irq(&tsk->pi_lock);
> +	raw_spin_unlock_irq(&tsk->pi_lock);

then you can kill smp_mb() above.

Oleg.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH] spinlock: Kill spin_unlock_wait()
  2011-01-05 19:14         ` [RFC][PATCH] spinlock: Kill spin_unlock_wait() Peter Zijlstra
  2011-01-05 19:26           ` Oleg Nesterov
@ 2011-01-05 19:43           ` Linus Torvalds
  2011-01-05 19:43             ` Linus Torvalds
  2011-01-06  9:32             ` Peter Zijlstra
  1 sibling, 2 replies; 12+ messages in thread
From: Linus Torvalds @ 2011-01-05 19:43 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Nick Piggin, Chris Mason, Frank Rowand, Ingo Molnar,
	Thomas Gleixner, Mike Galbraith, Oleg Nesterov, Paul Turner,
	Jens Axboe, Yong Zhang, linux-kernel, Jeremy Fitzhardinge,
	Linux-Arch, Jeff Garzik, Tejun Heo

On Wed, Jan 5, 2011 at 11:14 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
>
> There appear to be only two callsites of said horror, one in the exit
> path and one in ata-eh, neither appear to be performance critical so I
> replaced them with a simple lock-unlock sequence.

Again, WHY?

What's the problem with the current code? Instead of generating ugly
patches to change it, and instead of removing it, just say what the
PROBLEM is.

Stop this wanking already. The exit path is certainly not unimportant.

And if you want to change the thing to be more efficient, I'm ok with
that, as long as it's done PRETTILY instead of making the damn thing
an unreadable mess.

The current "spin_unlock_wait()" is obvious. I'm perfectly happy
improving on it, but I would want to retain the "obvious" part, which
your previous patch certainly didn't do.

Some simple helper functions to extract the tail/head part of the
ticket lock to make the comparisons understandable, together with
always accessing the lock with the proper ACCESS_ONCE() would have
made your previous patch acceptable. But you ignored that feedback,
and instead you now want to do a "let's just remove it entirely patch"
that is even worse.

And in NEITHER version did you actually give any actual *REASON* for
the change in the first place.

Why? WHY?

                      Linus

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH] spinlock: Kill spin_unlock_wait()
  2011-01-05 19:43           ` Linus Torvalds
@ 2011-01-05 19:43             ` Linus Torvalds
  2011-01-06  9:32             ` Peter Zijlstra
  1 sibling, 0 replies; 12+ messages in thread
From: Linus Torvalds @ 2011-01-05 19:43 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Nick Piggin, Chris Mason, Frank Rowand, Ingo Molnar,
	Thomas Gleixner, Mike Galbraith, Oleg Nesterov, Paul Turner,
	Jens Axboe, Yong Zhang, linux-kernel, Jeremy Fitzhardinge,
	Linux-Arch, Jeff Garzik, Tejun Heo

On Wed, Jan 5, 2011 at 11:14 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
>
> There appear to be only two callsites of said horror, one in the exit
> path and one in ata-eh, neither appear to be performance critical so I
> replaced them with a simple lock-unlock sequence.

Again, WHY?

What's the problem with the current code? Instead of generating ugly
patches to change it, and instead of removing it, just say what the
PROBLEM is.

Stop this wanking already. The exit path is certainly not unimportant.

And if you want to change the thing to be more efficient, I'm ok with
that, as long as it's done PRETTILY instead of making the damn thing
an unreadable mess.

The current "spin_unlock_wait()" is obvious. I'm perfectly happy
improving on it, but I would want to retain the "obvious" part, which
your previous patch certainly didn't do.

Some simple helper functions to extract the tail/head part of the
ticket lock to make the comparisons understandable, together with
always accessing the lock with the proper ACCESS_ONCE() would have
made your previous patch acceptable. But you ignored that feedback,
and instead you now want to do a "let's just remove it entirely patch"
that is even worse.

And in NEITHER version did you actually give any actual *REASON* for
the change in the first place.

Why? WHY?

                      Linus

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH] spinlock: Kill spin_unlock_wait()
  2011-01-05 19:43           ` Linus Torvalds
  2011-01-05 19:43             ` Linus Torvalds
@ 2011-01-06  9:32             ` Peter Zijlstra
  2011-01-06 10:38               ` Nick Piggin
  1 sibling, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2011-01-06  9:32 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Nick Piggin, Chris Mason, Frank Rowand, Ingo Molnar,
	Thomas Gleixner, Mike Galbraith, Oleg Nesterov, Paul Turner,
	Jens Axboe, Yong Zhang, linux-kernel, Jeremy Fitzhardinge,
	Linux-Arch, Jeff Garzik, Tejun Heo

On Wed, 2011-01-05 at 11:43 -0800, Linus Torvalds wrote:
> On Wed, Jan 5, 2011 at 11:14 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> >
> > There appear to be only two callsites of said horror, one in the exit
> > path and one in ata-eh, neither appear to be performance critical so I
> > replaced them with a simple lock-unlock sequence.
> 
> Again, WHY?
> 
> What's the problem with the current code? Instead of generating ugly
> patches to change it, and instead of removing it, just say what the
> PROBLEM is.

Well, I don't care about the primitive anymore, and Nick had some
reasonable arguments on why its not a good primitive to have. So in a
brief moment I decided to see what it would take to make it go away.

Apparently you don't like it, I'm fine with that, consider the patch
discarded.

> Some simple helper functions to extract the tail/head part of the
> ticket lock to make the comparisons understandable,

Jeremy has a number of pending patches making things more pretty. If you
wish I can revisit this once that work hits your tree.

  http://lkml.org/lkml/2010/11/16/479

He makes the thing looks like:

+#if (CONFIG_NR_CPUS < 256)
+typedef u8  __ticket_t;
+#else
+typedef u16 __ticket_t;
+#endif
+
+#define TICKET_SHIFT   (sizeof(__ticket_t) * 8)
+#define TICKET_MASK    ((__ticket_t)((1 << TICKET_SHIFT) - 1))
+
 typedef struct arch_spinlock {
+       union {
+               unsigned int slock;
+               struct __raw_tickets {
+                       __ticket_t head, tail;
+               } tickets;
+       };
 } arch_spinlock_t;

>  together with
> always accessing the lock with the proper ACCESS_ONCE() would have
> made your previous patch acceptable.

I'm still not quite seeing where I was missing an ACCESS_ONCE(), the
second loop had a cpu_relax() in, which is a compiler barrier so it
forces a reload that way.

>  But you ignored that feedback,
> and instead you now want to do a "let's just remove it entirely patch"
> that is even worse.

My locking improved and became a lot more obvious by not using the
primitive, so for the work I was doing not using it seemed the better
solution.

And as said, this was inspired by Nick's comments and it was a quick
edit to see what it would take.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH] spinlock: Kill spin_unlock_wait()
  2011-01-06  9:32             ` Peter Zijlstra
@ 2011-01-06 10:38               ` Nick Piggin
  2011-01-06 18:26                 ` Peter Zijlstra
  0 siblings, 1 reply; 12+ messages in thread
From: Nick Piggin @ 2011-01-06 10:38 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Linus Torvalds, Nick Piggin, Chris Mason, Frank Rowand,
	Ingo Molnar, Thomas Gleixner, Mike Galbraith, Oleg Nesterov,
	Paul Turner, Jens Axboe, Yong Zhang, linux-kernel,
	Jeremy Fitzhardinge, Linux-Arch, Jeff Garzik, Tejun Heo

On Thu, Jan 06, 2011 at 10:32:33AM +0100, Peter Zijlstra wrote:
> On Wed, 2011-01-05 at 11:43 -0800, Linus Torvalds wrote:
> > On Wed, Jan 5, 2011 at 11:14 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> > >
> > > There appear to be only two callsites of said horror, one in the exit
> > > path and one in ata-eh, neither appear to be performance critical so I
> > > replaced them with a simple lock-unlock sequence.
> > 
> > Again, WHY?
> > 
> > What's the problem with the current code? Instead of generating ugly
> > patches to change it, and instead of removing it, just say what the
> > PROBLEM is.
> 
> Well, I don't care about the primitive anymore, and Nick had some
> reasonable arguments on why its not a good primitive to have. So in a
> brief moment I decided to see what it would take to make it go away.
> 
> Apparently you don't like it, I'm fine with that, consider the patch
> discarded.
> 
> > Some simple helper functions to extract the tail/head part of the
> > ticket lock to make the comparisons understandable,
> 
> Jeremy has a number of pending patches making things more pretty. If you
> wish I can revisit this once that work hits your tree.
> 
>   http://lkml.org/lkml/2010/11/16/479
> 
> He makes the thing looks like:
> 
> +#if (CONFIG_NR_CPUS < 256)
> +typedef u8  __ticket_t;
> +#else
> +typedef u16 __ticket_t;
> +#endif
> +
> +#define TICKET_SHIFT   (sizeof(__ticket_t) * 8)
> +#define TICKET_MASK    ((__ticket_t)((1 << TICKET_SHIFT) - 1))
> +
>  typedef struct arch_spinlock {
> +       union {
> +               unsigned int slock;
> +               struct __raw_tickets {
> +                       __ticket_t head, tail;
> +               } tickets;
> +       };
>  } arch_spinlock_t;
> 
> >  together with
> > always accessing the lock with the proper ACCESS_ONCE() would have
> > made your previous patch acceptable.
> 
> I'm still not quite seeing where I was missing an ACCESS_ONCE(), the
> second loop had a cpu_relax() in, which is a compiler barrier so it
> forces a reload that way.
> 
> >  But you ignored that feedback,
> > and instead you now want to do a "let's just remove it entirely patch"
> > that is even worse.
> 
> My locking improved and became a lot more obvious by not using the
> primitive, so for the work I was doing not using it seemed the better
> solution.
> 
> And as said, this was inspired by Nick's comments and it was a quick
> edit to see what it would take.

I was hoping more that it could be removed from callers rather than
replaced with lock/unlock sequence...

I can't see how the usage in libata could be right. If we're waiting
for some modifications inside the critical section, then it seems we
could load some shared data before the lock is actually released, on
an architecture which does load/load reordering. At best it needs some
good comments and perhaps a barrier or two.

The spin_unlock_wait in do_exit seems like it shouldn't be needed --
exit_pi_state_list takes the pi_lock after the task has died anyway,
so I can't see what the problem would be. Removing the call (and
perhaps put a BUG_ON(!(curr->flags & PF_EXITING)) in exit_pi_state_list
would do the same thing, avoid the barrier, and localize fuxtex exit
protocol to futex.c.

(If the spin_unlock_wait call *was* actually required, eg. we subsequently
 checked current->pi_state_list without holding the pi_lock, then we
 would require a barrier after the spin_unlock_wait call too, to prevent
 speculative load pulling it up before the load of the lock, so I don't
 really agree Linus that it's an obvious API -- to you perhaps because
 barriers are second nature to you, but not for driver writers :)).

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH] spinlock: Kill spin_unlock_wait()
  2011-01-06 10:38               ` Nick Piggin
@ 2011-01-06 18:26                 ` Peter Zijlstra
  2011-01-07 21:01                   ` Tejun Heo
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2011-01-06 18:26 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Linus Torvalds, Chris Mason, Frank Rowand, Ingo Molnar,
	Thomas Gleixner, Mike Galbraith, Oleg Nesterov, Paul Turner,
	Jens Axboe, Yong Zhang, linux-kernel, Jeremy Fitzhardinge,
	Linux-Arch, Jeff Garzik, Tejun Heo

On Thu, 2011-01-06 at 21:38 +1100, Nick Piggin wrote:
> I can't see how the usage in libata could be right. If we're waiting
> for some modifications inside the critical section, then it seems we
> could load some shared data before the lock is actually released, on
> an architecture which does load/load reordering. At best it needs some
> good comments and perhaps a barrier or two.
> 
> The spin_unlock_wait in do_exit seems like it shouldn't be needed --
> exit_pi_state_list takes the pi_lock after the task has died anyway,
> so I can't see what the problem would be. Removing the call (and
> perhaps put a BUG_ON(!(curr->flags & PF_EXITING)) in exit_pi_state_list
> would do the same thing, avoid the barrier, and localize fuxtex exit
> protocol to futex.c. 

Jeff, Tejun, could you look at the ata-eh thing, then I'll put sorting
through the futex thing on my todo list.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH] spinlock: Kill spin_unlock_wait()
  2011-01-06 18:26                 ` Peter Zijlstra
@ 2011-01-07 21:01                   ` Tejun Heo
  2011-01-07 21:13                     ` Jeff Garzik
  0 siblings, 1 reply; 12+ messages in thread
From: Tejun Heo @ 2011-01-07 21:01 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Nick Piggin, Linus Torvalds, Chris Mason, Frank Rowand,
	Ingo Molnar, Thomas Gleixner, Mike Galbraith, Oleg Nesterov,
	Paul Turner, Jens Axboe, Yong Zhang, linux-kernel,
	Jeremy Fitzhardinge, Linux-Arch, Jeff Garzik

Hello,

On Thu, Jan 06, 2011 at 07:26:35PM +0100, Peter Zijlstra wrote:
> Jeff, Tejun, could you look at the ata-eh thing, then I'll put sorting
> through the futex thing on my todo list.

Hmm... I think the ->eng_timeout path is already dead.  We no longer
have any in-kernel implementation, so killing spin_unlock_wait()
should be fine.  I'll follow up with removal of the unused callback.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH] spinlock: Kill spin_unlock_wait()
  2011-01-07 21:01                   ` Tejun Heo
@ 2011-01-07 21:13                     ` Jeff Garzik
  2011-01-07 21:13                       ` Jeff Garzik
  2011-01-07 21:33                       ` Tejun Heo
  0 siblings, 2 replies; 12+ messages in thread
From: Jeff Garzik @ 2011-01-07 21:13 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Peter Zijlstra, Nick Piggin, Linus Torvalds, Chris Mason,
	Frank Rowand, Ingo Molnar, Thomas Gleixner, Mike Galbraith,
	Oleg Nesterov, Paul Turner, Jens Axboe, Yong Zhang, linux-kernel,
	Jeremy Fitzhardinge, Linux-Arch, linux-scsi,
	IDE/ATA development list

On Fri, Jan 7, 2011 at 4:01 PM, Tejun Heo <tj@kernel.org> wrote:
> On Thu, Jan 06, 2011 at 07:26:35PM +0100, Peter Zijlstra wrote:
>> Jeff, Tejun, could you look at the ata-eh thing, then I'll put sorting
>> through the futex thing on my todo list.
>
> Hmm... I think the ->eng_timeout path is already dead.  We no longer
> have any in-kernel implementation, so killing spin_unlock_wait()
> should be fine.  I'll follow up with removal of the unused callback.

Unfortunately...  libsas continues to avoid the new EH :(

It's a hairy mess to untangle, too.  libata does proper error handling
of ATA device errors, notably NCQ error handling, which libsas sorely
misses.  But libata new EH assumes a bit too much about "owning" the
entirety of the EH process.  These assumptions are proper for wholly
ATA drivers (drivers/ata/*) where new EH can drive the EH process, but
in the SAS situation, a phy in SATA mode is simply a subset of a
larger set of EH conditions that must be handled.

Thus libsas uses the ancient libata hook ->phy_reset and lacks ->error_handler.

I think libata's old-EH path is entirely SAS-specific at this point.

     Jeff

P.S.  Note that libsas and ipr are peers; thus ipr driver also uses old EH.
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH] spinlock: Kill spin_unlock_wait()
  2011-01-07 21:13                     ` Jeff Garzik
@ 2011-01-07 21:13                       ` Jeff Garzik
  2011-01-07 21:33                       ` Tejun Heo
  1 sibling, 0 replies; 12+ messages in thread
From: Jeff Garzik @ 2011-01-07 21:13 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Peter Zijlstra, Nick Piggin, Linus Torvalds, Chris Mason,
	Frank Rowand, Ingo Molnar, Thomas Gleixner, Mike Galbraith,
	Oleg Nesterov, Paul Turner, Jens Axboe, Yong Zhang, linux-kernel,
	Jeremy Fitzhardinge, Linux-Arch, linux-scsi,
	IDE/ATA development list

On Fri, Jan 7, 2011 at 4:01 PM, Tejun Heo <tj@kernel.org> wrote:
> On Thu, Jan 06, 2011 at 07:26:35PM +0100, Peter Zijlstra wrote:
>> Jeff, Tejun, could you look at the ata-eh thing, then I'll put sorting
>> through the futex thing on my todo list.
>
> Hmm... I think the ->eng_timeout path is already dead.  We no longer
> have any in-kernel implementation, so killing spin_unlock_wait()
> should be fine.  I'll follow up with removal of the unused callback.

Unfortunately...  libsas continues to avoid the new EH :(

It's a hairy mess to untangle, too.  libata does proper error handling
of ATA device errors, notably NCQ error handling, which libsas sorely
misses.  But libata new EH assumes a bit too much about "owning" the
entirety of the EH process.  These assumptions are proper for wholly
ATA drivers (drivers/ata/*) where new EH can drive the EH process, but
in the SAS situation, a phy in SATA mode is simply a subset of a
larger set of EH conditions that must be handled.

Thus libsas uses the ancient libata hook ->phy_reset and lacks ->error_handler.

I think libata's old-EH path is entirely SAS-specific at this point.

     Jeff

P.S.  Note that libsas and ipr are peers; thus ipr driver also uses old EH.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH] spinlock: Kill spin_unlock_wait()
  2011-01-07 21:13                     ` Jeff Garzik
  2011-01-07 21:13                       ` Jeff Garzik
@ 2011-01-07 21:33                       ` Tejun Heo
  2011-01-07 21:33                         ` Tejun Heo
  1 sibling, 1 reply; 12+ messages in thread
From: Tejun Heo @ 2011-01-07 21:33 UTC (permalink / raw)
  To: Jeff Garzik
  Cc: Peter Zijlstra, Nick Piggin, Linus Torvalds, Chris Mason,
	Frank Rowand, Ingo Molnar, Thomas Gleixner, Mike Galbraith,
	Oleg Nesterov, Paul Turner, Jens Axboe, Yong Zhang, linux-kernel,
	Jeremy Fitzhardinge, Linux-Arch, linux-scsi,
	IDE/ATA development list

Hello, Jeff.

On Fri, Jan 07, 2011 at 04:13:53PM -0500, Jeff Garzik wrote:
> > Hmm... I think the ->eng_timeout path is already dead.  We no longer
> > have any in-kernel implementation, so killing spin_unlock_wait()
> > should be fine.  I'll follow up with removal of the unused callback.
> 
> Unfortunately...  libsas continues to avoid the new EH :(
> 
> It's a hairy mess to untangle, too.  libata does proper error handling
> of ATA device errors, notably NCQ error handling, which libsas sorely
> misses.  But libata new EH assumes a bit too much about "owning" the
> entirety of the EH process.  These assumptions are proper for wholly
> ATA drivers (drivers/ata/*) where new EH can drive the EH process, but
> in the SAS situation, a phy in SATA mode is simply a subset of a
> larger set of EH conditions that must be handled.
> 
> Thus libsas uses the ancient libata hook ->phy_reset and lacks ->error_handler.
> 
> I think libata's old-EH path is entirely SAS-specific at this point.

Hmm... but they don't use ata_scsi_error() and ->eng_timeout() at all,
no?  We can't remove phy_reset() and need to keep the silly "if
(->error_handler)" tests around but should be able to remove those
from ata_scsi_error() and other EH routines, at least.  Am I missing
something?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH] spinlock: Kill spin_unlock_wait()
  2011-01-07 21:33                       ` Tejun Heo
@ 2011-01-07 21:33                         ` Tejun Heo
  0 siblings, 0 replies; 12+ messages in thread
From: Tejun Heo @ 2011-01-07 21:33 UTC (permalink / raw)
  To: Jeff Garzik
  Cc: Peter Zijlstra, Nick Piggin, Linus Torvalds, Chris Mason,
	Frank Rowand, Ingo Molnar, Thomas Gleixner, Mike Galbraith,
	Oleg Nesterov, Paul Turner, Jens Axboe, Yong Zhang, linux-kernel,
	Jeremy Fitzhardinge, Linux-Arch, linux-scsi,
	IDE/ATA development list

Hello, Jeff.

On Fri, Jan 07, 2011 at 04:13:53PM -0500, Jeff Garzik wrote:
> > Hmm... I think the ->eng_timeout path is already dead.  We no longer
> > have any in-kernel implementation, so killing spin_unlock_wait()
> > should be fine.  I'll follow up with removal of the unused callback.
> 
> Unfortunately...  libsas continues to avoid the new EH :(
> 
> It's a hairy mess to untangle, too.  libata does proper error handling
> of ATA device errors, notably NCQ error handling, which libsas sorely
> misses.  But libata new EH assumes a bit too much about "owning" the
> entirety of the EH process.  These assumptions are proper for wholly
> ATA drivers (drivers/ata/*) where new EH can drive the EH process, but
> in the SAS situation, a phy in SATA mode is simply a subset of a
> larger set of EH conditions that must be handled.
> 
> Thus libsas uses the ancient libata hook ->phy_reset and lacks ->error_handler.
> 
> I think libata's old-EH path is entirely SAS-specific at this point.

Hmm... but they don't use ata_scsi_error() and ->eng_timeout() at all,
no?  We can't remove phy_reset() and need to keep the silly "if
(->error_handler)" tests around but should be able to remove those
from ata_scsi_error() and other EH routines, at least.  Am I missing
something?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2011-01-07 21:33 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20101224122338.172750730@chello.nl>
     [not found] ` <20101224123742.724459093@chello.nl>
     [not found]   ` <AANLkTikGBsh0-QTe9CA2gwDtwzpdMi+fbDsTEKiJAL0P@mail.gmail.com>
     [not found]     ` <1294054362.2016.74.camel@laptop>
     [not found]       ` <20110104064542.GF3402@amd>
2011-01-05 19:14         ` [RFC][PATCH] spinlock: Kill spin_unlock_wait() Peter Zijlstra
2011-01-05 19:26           ` Oleg Nesterov
2011-01-05 19:43           ` Linus Torvalds
2011-01-05 19:43             ` Linus Torvalds
2011-01-06  9:32             ` Peter Zijlstra
2011-01-06 10:38               ` Nick Piggin
2011-01-06 18:26                 ` Peter Zijlstra
2011-01-07 21:01                   ` Tejun Heo
2011-01-07 21:13                     ` Jeff Garzik
2011-01-07 21:13                       ` Jeff Garzik
2011-01-07 21:33                       ` Tejun Heo
2011-01-07 21:33                         ` Tejun Heo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox