public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] lockdep: spin_lock_irqsave_nested()
@ 2006-10-30  9:03 Peter Zijlstra
  2006-10-30  9:06 ` [PATCH 2/2] lockdep: annotate bcsp driver Peter Zijlstra
                   ` (3 more replies)
  0 siblings, 4 replies; 25+ messages in thread
From: Peter Zijlstra @ 2006-10-30  9:03 UTC (permalink / raw)
  To: linux-kernel, Andrew Morton
  Cc: Arjan van de Ven, Ingo Molnar, Jiri Kosina, Marcel Holtmann,
	David Woodhouse, Peter Zijlstra


From: Arjan van de Ven <arjan@linux.intel.com>
Subject: spin_lock_irqsave_nested()

Introduce spin_lock_irqsave_nested(); implementation from:
 http://lkml.org/lkml/2006/6/1/122
Patch from:
 http://lkml.org/lkml/2006/9/13/258

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Jiri Kosina <jikos@jikos.cz>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/spinlock.h         |    5 +++++
 include/linux/spinlock_api_smp.h |    2 ++
 include/linux/spinlock_api_up.h  |    1 +
 kernel/spinlock.c                |   21 +++++++++++++++++++++
 4 files changed, 29 insertions(+)

Index: linux-2.6/include/linux/spinlock_api_smp.h
===================================================================
--- linux-2.6.orig/include/linux/spinlock_api_smp.h
+++ linux-2.6/include/linux/spinlock_api_smp.h
@@ -32,6 +32,8 @@ void __lockfunc _read_lock_irq(rwlock_t 
 void __lockfunc _write_lock_irq(rwlock_t *lock)		__acquires(lock);
 unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
 							__acquires(lock);
+unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
+							__acquires(spinlock_t);
 unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
 							__acquires(lock);
 unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
Index: linux-2.6/include/linux/spinlock_api_up.h
===================================================================
--- linux-2.6.orig/include/linux/spinlock_api_up.h
+++ linux-2.6/include/linux/spinlock_api_up.h
@@ -59,6 +59,7 @@
 #define _read_lock_irq(lock)			__LOCK_IRQ(lock)
 #define _write_lock_irq(lock)			__LOCK_IRQ(lock)
 #define _spin_lock_irqsave(lock, flags)		__LOCK_IRQSAVE(lock, flags)
+#define _spin_lock_irqsave_nested(lock, flags, subclass) __LOCK_IRQSAVE(lock, flags, subclass)
 #define _read_lock_irqsave(lock, flags)		__LOCK_IRQSAVE(lock, flags)
 #define _write_lock_irqsave(lock, flags)	__LOCK_IRQSAVE(lock, flags)
 #define _spin_trylock(lock)			({ __LOCK(lock); 1; })
Index: linux-2.6/include/linux/spinlock.h
===================================================================
--- linux-2.6.orig/include/linux/spinlock.h
+++ linux-2.6/include/linux/spinlock.h
@@ -186,6 +186,11 @@ do {								\
 #define spin_lock_irqsave(lock, flags)	flags = _spin_lock_irqsave(lock)
 #define read_lock_irqsave(lock, flags)	flags = _read_lock_irqsave(lock)
 #define write_lock_irqsave(lock, flags)	flags = _write_lock_irqsave(lock)
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define spin_lock_irqsave_nested(lock, flags, subclass)	flags = _spin_lock_irqsave_nested(lock, subclass)
+#else
+#define spin_lock_irqsave_nested(lock, flags, subclass)	flags = _spin_lock_irqsave(lock)
+#endif
 #else
 #define spin_lock_irqsave(lock, flags)	_spin_lock_irqsave(lock, flags)
 #define read_lock_irqsave(lock, flags)	_read_lock_irqsave(lock, flags)
Index: linux-2.6/kernel/spinlock.c
===================================================================
--- linux-2.6.orig/kernel/spinlock.c
+++ linux-2.6/kernel/spinlock.c
@@ -293,6 +293,27 @@ void __lockfunc _spin_lock_nested(spinlo
 }
 
 EXPORT_SYMBOL(_spin_lock_nested);
+unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	preempt_disable();
+	spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+	/*
+	 * On lockdep we dont want the hand-coded irq-enable of
+	 * _raw_spin_lock_flags() code, because lockdep assumes
+	 * that interrupts are not re-enabled during lock-acquire:
+	 */
+#ifdef CONFIG_PROVE_SPIN_LOCKING
+	_raw_spin_lock(lock);
+#else
+	_raw_spin_lock_flags(lock, &flags);
+#endif
+	return flags;
+}
+
+EXPORT_SYMBOL(_spin_lock_irqsave_nested);
 
 #endif
 



^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 2/2] lockdep: annotate bcsp driver
  2006-10-30  9:03 [PATCH 1/2] lockdep: spin_lock_irqsave_nested() Peter Zijlstra
@ 2006-10-30  9:06 ` Peter Zijlstra
  2006-10-30  9:06   ` Ingo Molnar
                     ` (2 more replies)
  2006-10-30  9:07 ` [PATCH 1/2] lockdep: spin_lock_irqsave_nested() Ingo Molnar
                   ` (2 subsequent siblings)
  3 siblings, 3 replies; 25+ messages in thread
From: Peter Zijlstra @ 2006-10-30  9:06 UTC (permalink / raw)
  To: linux-kernel, Andrew Morton
  Cc: Arjan van de Ven, Ingo Molnar, Jiri Kosina, Marcel Holtmann,
	David Woodhouse


=============================================
[ INFO: possible recursive locking detected ]
2.6.18-1.2699.fc6 #1
---------------------------------------------
swapper/0 is trying to acquire lock:
 (&list->lock#3){+...}, at: [<c05ad307>] skb_dequeue+0x12/0x43

but task is already holding lock:
 (&list->lock#3){+...}, at: [<df98cd79>] bcsp_dequeue+0x6a/0x11e [hci_uart]


Two different list locks nest, annotate so.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 drivers/bluetooth/hci_bcsp.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Index: linux-2.6/drivers/bluetooth/hci_bcsp.c
===================================================================
--- linux-2.6.orig/drivers/bluetooth/hci_bcsp.c
+++ linux-2.6/drivers/bluetooth/hci_bcsp.c
@@ -330,7 +330,7 @@ static struct sk_buff *bcsp_dequeue(stru
 	   reliable packet if the number of packets sent but not yet ack'ed
 	   is < than the winsize */
 
-	spin_lock_irqsave(&bcsp->unack.lock, flags);
+	spin_lock_irqsave_nested(&bcsp->unack.lock, flags, SINGLE_DEPTH_NESTING);
 
 	if (bcsp->unack.qlen < BCSP_TXWINSIZE && (skb = skb_dequeue(&bcsp->rel)) != NULL) {
 		struct sk_buff *nskb = bcsp_prepare_pkt(bcsp, skb->data, skb->len, bt_cb(skb)->pkt_type);



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] lockdep: annotate bcsp driver
  2006-10-30  9:06 ` [PATCH 2/2] lockdep: annotate bcsp driver Peter Zijlstra
@ 2006-10-30  9:06   ` Ingo Molnar
  2006-10-30  9:30   ` Marcel Holtmann
  2006-10-30  9:31   ` [PATCH 2/2] lockdep: annotate bcsp driver - v2 Peter Zijlstra
  2 siblings, 0 replies; 25+ messages in thread
From: Ingo Molnar @ 2006-10-30  9:06 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Andrew Morton, Arjan van de Ven, Jiri Kosina,
	Marcel Holtmann, David Woodhouse


* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:

> =============================================
> [ INFO: possible recursive locking detected ]
> 2.6.18-1.2699.fc6 #1
> ---------------------------------------------
> swapper/0 is trying to acquire lock:
>  (&list->lock#3){+...}, at: [<c05ad307>] skb_dequeue+0x12/0x43
> 
> but task is already holding lock:
>  (&list->lock#3){+...}, at: [<df98cd79>] bcsp_dequeue+0x6a/0x11e [hci_uart]
> 
> 
> Two different list locks nest, annotate so.
> 
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

Acked-by: Ingo Molnar <mingo@elte.hu>

	Ingo

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] lockdep: spin_lock_irqsave_nested()
  2006-10-30  9:03 [PATCH 1/2] lockdep: spin_lock_irqsave_nested() Peter Zijlstra
  2006-10-30  9:06 ` [PATCH 2/2] lockdep: annotate bcsp driver Peter Zijlstra
@ 2006-10-30  9:07 ` Ingo Molnar
  2006-10-30 13:12 ` Jarek Poplawski
  2006-10-31  6:48 ` [PATCH 1/2] lockdep: spin_lock_irqsave_nested() Andrew Morton
  3 siblings, 0 replies; 25+ messages in thread
From: Ingo Molnar @ 2006-10-30  9:07 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Andrew Morton, Arjan van de Ven, Jiri Kosina,
	Marcel Holtmann, David Woodhouse


* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:

> From: Arjan van de Ven <arjan@linux.intel.com>
> Subject: spin_lock_irqsave_nested()
> 
> Introduce spin_lock_irqsave_nested(); implementation from:
>  http://lkml.org/lkml/2006/6/1/122
> Patch from:
>  http://lkml.org/lkml/2006/9/13/258
> 
> Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
> Signed-off-by: Jiri Kosina <jikos@jikos.cz>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

Acked-by: Ingo Molnar <mingo@elte.hu>

	Ingo

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] lockdep: annotate bcsp driver
  2006-10-30  9:06 ` [PATCH 2/2] lockdep: annotate bcsp driver Peter Zijlstra
  2006-10-30  9:06   ` Ingo Molnar
@ 2006-10-30  9:30   ` Marcel Holtmann
  2006-10-30  9:31   ` [PATCH 2/2] lockdep: annotate bcsp driver - v2 Peter Zijlstra
  2 siblings, 0 replies; 25+ messages in thread
From: Marcel Holtmann @ 2006-10-30  9:30 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Andrew Morton, Arjan van de Ven, Ingo Molnar,
	Jiri Kosina, David Woodhouse

Hi Peter,

> =============================================
> [ INFO: possible recursive locking detected ]
> 2.6.18-1.2699.fc6 #1
> ---------------------------------------------
> swapper/0 is trying to acquire lock:
>  (&list->lock#3){+...}, at: [<c05ad307>] skb_dequeue+0x12/0x43
> 
> but task is already holding lock:
>  (&list->lock#3){+...}, at: [<df98cd79>] bcsp_dequeue+0x6a/0x11e [hci_uart]
> 
> 
> Two different list locks nest, annotate so.
> 
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>

Regards

Marcel



^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 2/2] lockdep: annotate bcsp driver - v2
  2006-10-30  9:06 ` [PATCH 2/2] lockdep: annotate bcsp driver Peter Zijlstra
  2006-10-30  9:06   ` Ingo Molnar
  2006-10-30  9:30   ` Marcel Holtmann
@ 2006-10-30  9:31   ` Peter Zijlstra
  2 siblings, 0 replies; 25+ messages in thread
From: Peter Zijlstra @ 2006-10-30  9:31 UTC (permalink / raw)
  To: linux-kernel
  Cc: Andrew Morton, Arjan van de Ven, Ingo Molnar, Jiri Kosina,
	Marcel Holtmann, David Woodhouse


=============================================
[ INFO: possible recursive locking detected ]
2.6.18-1.2699.fc6 #1
---------------------------------------------
swapper/0 is trying to acquire lock:
 (&list->lock#3){+...}, at: [<c05ad307>] skb_dequeue+0x12/0x43

but task is already holding lock:
 (&list->lock#3){+...}, at: [<df98cd79>] bcsp_dequeue+0x6a/0x11e [hci_uart]

Moving a skb from the unack'ed to the rel(iable) list nests the two list locks.
Reliable packets are never moved the other way, hence no circular dependency
exists.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 drivers/bluetooth/hci_bcsp.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

Index: linux-2.6/drivers/bluetooth/hci_bcsp.c
===================================================================
--- linux-2.6.orig/drivers/bluetooth/hci_bcsp.c
+++ linux-2.6/drivers/bluetooth/hci_bcsp.c
@@ -330,7 +330,7 @@ static struct sk_buff *bcsp_dequeue(stru
 	   reliable packet if the number of packets sent but not yet ack'ed
 	   is < than the winsize */
 
-	spin_lock_irqsave(&bcsp->unack.lock, flags);
+	spin_lock_irqsave_nested(&bcsp->unack.lock, flags, SINGLE_DEPTH_NESTING);
 
 	if (bcsp->unack.qlen < BCSP_TXWINSIZE && (skb = skb_dequeue(&bcsp->rel)) != NULL) {
 		struct sk_buff *nskb = bcsp_prepare_pkt(bcsp, skb->data, skb->len, bt_cb(skb)->pkt_type);
@@ -696,7 +696,7 @@ static void bcsp_timed_event(unsigned lo
 
 	BT_DBG("hu %p retransmitting %u pkts", hu, bcsp->unack.qlen);
 
-	spin_lock_irqsave(&bcsp->unack.lock, flags);
+	spin_lock_irqsave_nested(&bcsp->unack.lock, flags, SINGLE_DEPTH_NESTING);
 
 	while ((skb = __skb_dequeue_tail(&bcsp->unack)) != NULL) {
 		bcsp->msgq_txseq = (bcsp->msgq_txseq - 1) & 0x07;



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] lockdep: spin_lock_irqsave_nested()
  2006-10-30  9:03 [PATCH 1/2] lockdep: spin_lock_irqsave_nested() Peter Zijlstra
  2006-10-30  9:06 ` [PATCH 2/2] lockdep: annotate bcsp driver Peter Zijlstra
  2006-10-30  9:07 ` [PATCH 1/2] lockdep: spin_lock_irqsave_nested() Ingo Molnar
@ 2006-10-30 13:12 ` Jarek Poplawski
  2006-10-30 13:27   ` Jarek Poplawski
  2006-10-30 13:40   ` [PATCH 1/2] lockdep: spin_lock_irqsave_nested() -v2 Peter Zijlstra
  2006-10-31  6:48 ` [PATCH 1/2] lockdep: spin_lock_irqsave_nested() Andrew Morton
  3 siblings, 2 replies; 25+ messages in thread
From: Jarek Poplawski @ 2006-10-30 13:12 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Arjan van de Ven, Ingo Molnar, Jiri Kosina,
	Marcel Holtmann, David Woodhouse

Here are some doubts...

Jarek P.

On 30-10-2006 10:03, Peter Zijlstra wrote:
> From: Arjan van de Ven <arjan@linux.intel.com>
> Subject: spin_lock_irqsave_nested()
> 
> Introduce spin_lock_irqsave_nested(); implementation from:
>  http://lkml.org/lkml/2006/6/1/122
> Patch from:
>  http://lkml.org/lkml/2006/9/13/258
> 
> Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
> Signed-off-by: Jiri Kosina <jikos@jikos.cz>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> ---
>  include/linux/spinlock.h         |    5 +++++
>  include/linux/spinlock_api_smp.h |    2 ++
>  include/linux/spinlock_api_up.h  |    1 +
>  kernel/spinlock.c                |   21 +++++++++++++++++++++
>  4 files changed, 29 insertions(+)
> 
> Index: linux-2.6/include/linux/spinlock_api_smp.h
> ===================================================================
> --- linux-2.6.orig/include/linux/spinlock_api_smp.h
> +++ linux-2.6/include/linux/spinlock_api_smp.h
> @@ -32,6 +32,8 @@ void __lockfunc _read_lock_irq(rwlock_t 
>  void __lockfunc _write_lock_irq(rwlock_t *lock)		__acquires(lock);
>  unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
>  							__acquires(lock);
> +unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
> +							__acquires(spinlock_t);

According to neighbours rather:
 +							__acquires(lock);

>  unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
>  							__acquires(lock);
>  unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
> Index: linux-2.6/include/linux/spinlock_api_up.h
> ===================================================================
> --- linux-2.6.orig/include/linux/spinlock_api_up.h
> +++ linux-2.6/include/linux/spinlock_api_up.h
> @@ -59,6 +59,7 @@
>  #define _read_lock_irq(lock)			__LOCK_IRQ(lock)
>  #define _write_lock_irq(lock)			__LOCK_IRQ(lock)
>  #define _spin_lock_irqsave(lock, flags)		__LOCK_IRQSAVE(lock, flags)
> +#define _spin_lock_irqsave_nested(lock, flags, subclass) __LOCK_IRQSAVE(lock, flags, subclass)

Is __LOCK_IRQSAVE() with 3 args defined?

>  #define _read_lock_irqsave(lock, flags)		__LOCK_IRQSAVE(lock, flags)
>  #define _write_lock_irqsave(lock, flags)	__LOCK_IRQSAVE(lock, flags)
>  #define _spin_trylock(lock)			({ __LOCK(lock); 1; })
> Index: linux-2.6/include/linux/spinlock.h
> ===================================================================
> --- linux-2.6.orig/include/linux/spinlock.h
> +++ linux-2.6/include/linux/spinlock.h
> @@ -186,6 +186,11 @@ do {								\
>  #define spin_lock_irqsave(lock, flags)	flags = _spin_lock_irqsave(lock)
>  #define read_lock_irqsave(lock, flags)	flags = _read_lock_irqsave(lock)
>  #define write_lock_irqsave(lock, flags)	flags = _write_lock_irqsave(lock)
> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
> +#define spin_lock_irqsave_nested(lock, flags, subclass)	flags = _spin_lock_irqsave_nested(lock, subclass)
> +#else
> +#define spin_lock_irqsave_nested(lock, flags, subclass)	flags = _spin_lock_irqsave(lock)
> +#endif
>  #else

Plus for api_up:

+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define spin_lock_irqsave_nested(lock, flags, subclass)	_spin_lock_irqsave_nested(lock, flags, subclass)
+#else
+#define spin_lock_irqsave_nested(lock, flags, subclass)	_spin_lock_irqsave(lock, flags)
+#endif

>  #define spin_lock_irqsave(lock, flags)	_spin_lock_irqsave(lock, flags)
>  #define read_lock_irqsave(lock, flags)	_read_lock_irqsave(lock, flags)
> Index: linux-2.6/kernel/spinlock.c
> ===================================================================
> --- linux-2.6.orig/kernel/spinlock.c
> +++ linux-2.6/kernel/spinlock.c
> @@ -293,6 +293,27 @@ void __lockfunc _spin_lock_nested(spinlo
>  }
>  
>  EXPORT_SYMBOL(_spin_lock_nested);
> +unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
> +{
> +	unsigned long flags;
> +
> +	local_irq_save(flags);
> +	preempt_disable();
> +	spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
> +	/*
> +	 * On lockdep we dont want the hand-coded irq-enable of
> +	 * _raw_spin_lock_flags() code, because lockdep assumes
> +	 * that interrupts are not re-enabled during lock-acquire:
> +	 */
> +#ifdef CONFIG_PROVE_SPIN_LOCKING
> +	_raw_spin_lock(lock);
> +#else
> +	_raw_spin_lock_flags(lock, &flags);
> +#endif
> +	return flags;
> +}
> +
> +EXPORT_SYMBOL(_spin_lock_irqsave_nested);
>  
>  #endif
>  
> 

Shouldn't this _nested locks be considered in: 
#else /* CONFIG_PREEMPT: */
part?

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] lockdep: spin_lock_irqsave_nested()
  2006-10-30 13:12 ` Jarek Poplawski
@ 2006-10-30 13:27   ` Jarek Poplawski
  2006-10-30 13:40   ` [PATCH 1/2] lockdep: spin_lock_irqsave_nested() -v2 Peter Zijlstra
  1 sibling, 0 replies; 25+ messages in thread
From: Jarek Poplawski @ 2006-10-30 13:27 UTC (permalink / raw)
  To: Peter Zijlstra, linux-kernel, Arjan van de Ven, Ingo Molnar,
	Jiri Kosina, Marcel Holtmann, David Woodhouse

On Mon, Oct 30, 2006 at 02:12:41PM +0100, Jarek Poplawski wrote:
> Here are some doubts...
...
> On 30-10-2006 10:03, Peter Zijlstra wrote:
> > From: Arjan van de Ven <arjan@linux.intel.com>
> > Subject: spin_lock_irqsave_nested()
> > +EXPORT_SYMBOL(_spin_lock_irqsave_nested);
> >  
> >  #endif
> >  
> > 
> 
> Shouldn't this _nested locks be considered in: 
> #else /* CONFIG_PREEMPT: */
> part?

Sorry, this one is not my doubt anymore.

Jarek P.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 1/2] lockdep: spin_lock_irqsave_nested() -v2
  2006-10-30 13:12 ` Jarek Poplawski
  2006-10-30 13:27   ` Jarek Poplawski
@ 2006-10-30 13:40   ` Peter Zijlstra
  2006-10-30 14:12     ` Jarek Poplawski
  1 sibling, 1 reply; 25+ messages in thread
From: Peter Zijlstra @ 2006-10-30 13:40 UTC (permalink / raw)
  To: Jarek Poplawski
  Cc: linux-kernel, Arjan van de Ven, Ingo Molnar, Jiri Kosina,
	Marcel Holtmann, David Woodhouse

*sigh*, I truly am cursed today :-(

how about this...
---

From: Arjan van de Ven <arjan@linux.intel.com>
Subject: spin_lock_irqsave_nested()

Introduce spin_lock_irqsave_nested(); implementation from:
 http://lkml.org/lkml/2006/6/1/122
Patch from:
 http://lkml.org/lkml/2006/9/13/258

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Jiri Kosina <jikos@jikos.cz>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/spinlock.h         |    6 ++++++
 include/linux/spinlock_api_smp.h |    2 ++
 kernel/spinlock.c                |   22 ++++++++++++++++++++++
 3 files changed, 30 insertions(+)

Index: linux-2.6/include/linux/spinlock_api_smp.h
===================================================================
--- linux-2.6.orig/include/linux/spinlock_api_smp.h
+++ linux-2.6/include/linux/spinlock_api_smp.h
@@ -32,6 +32,8 @@ void __lockfunc _read_lock_irq(rwlock_t 
 void __lockfunc _write_lock_irq(rwlock_t *lock)		__acquires(lock);
 unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
 							__acquires(lock);
+unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
+							__acquires(lock);
 unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
 							__acquires(lock);
 unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
Index: linux-2.6/include/linux/spinlock.h
===================================================================
--- linux-2.6.orig/include/linux/spinlock.h
+++ linux-2.6/include/linux/spinlock.h
@@ -186,10 +186,16 @@ do {								\
 #define spin_lock_irqsave(lock, flags)	flags = _spin_lock_irqsave(lock)
 #define read_lock_irqsave(lock, flags)	flags = _read_lock_irqsave(lock)
 #define write_lock_irqsave(lock, flags)	flags = _write_lock_irqsave(lock)
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define spin_lock_irqsave_nested(lock, flags, subclass)	flags = _spin_lock_irqsave_nested(lock, subclass)
+#else
+#define spin_lock_irqsave_nested(lock, flags, subclass)	flags = _spin_lock_irqsave(lock)
+#endif
 #else
 #define spin_lock_irqsave(lock, flags)	_spin_lock_irqsave(lock, flags)
 #define read_lock_irqsave(lock, flags)	_read_lock_irqsave(lock, flags)
 #define write_lock_irqsave(lock, flags)	_write_lock_irqsave(lock, flags)
+#define spin_lock_irqsave_nested(lock, flags, subclass)	_spin_lock_irqsave(lock, flags)
 #endif
 
 #define spin_lock_irq(lock)		_spin_lock_irq(lock)
Index: linux-2.6/kernel/spinlock.c
===================================================================
--- linux-2.6.orig/kernel/spinlock.c
+++ linux-2.6/kernel/spinlock.c
@@ -294,6 +294,28 @@ void __lockfunc _spin_lock_nested(spinlo
 
 EXPORT_SYMBOL(_spin_lock_nested);
 
+unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	preempt_disable();
+	spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+	/*
+	 * On lockdep we dont want the hand-coded irq-enable of
+	 * _raw_spin_lock_flags() code, because lockdep assumes
+	 * that interrupts are not re-enabled during lock-acquire:
+	 */
+#ifdef CONFIG_PROVE_SPIN_LOCKING
+	_raw_spin_lock(lock);
+#else
+	_raw_spin_lock_flags(lock, &flags);
+#endif
+	return flags;
+}
+
+EXPORT_SYMBOL(_spin_lock_irqsave_nested);
+
 #endif
 
 void __lockfunc _spin_unlock(spinlock_t *lock)



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] lockdep: spin_lock_irqsave_nested() -v2
  2006-10-30 13:40   ` [PATCH 1/2] lockdep: spin_lock_irqsave_nested() -v2 Peter Zijlstra
@ 2006-10-30 14:12     ` Jarek Poplawski
  0 siblings, 0 replies; 25+ messages in thread
From: Jarek Poplawski @ 2006-10-30 14:12 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Arjan van de Ven, Ingo Molnar, Jiri Kosina,
	Marcel Holtmann, David Woodhouse

On Mon, Oct 30, 2006 at 02:40:48PM +0100, Peter Zijlstra wrote:
> *sigh*, I truly am cursed today :-(
> 
> how about this...
> ---

Beautiful!

Jarek P.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] lockdep: spin_lock_irqsave_nested()
  2006-10-30  9:03 [PATCH 1/2] lockdep: spin_lock_irqsave_nested() Peter Zijlstra
                   ` (2 preceding siblings ...)
  2006-10-30 13:12 ` Jarek Poplawski
@ 2006-10-31  6:48 ` Andrew Morton
  2006-10-31  7:25   ` [PATCH] splice : two smp_mb() can be omitted Eric Dumazet
  3 siblings, 1 reply; 25+ messages in thread
From: Andrew Morton @ 2006-10-31  6:48 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Arjan van de Ven, Ingo Molnar, Jiri Kosina,
	Marcel Holtmann, David Woodhouse

On Mon, 30 Oct 2006 10:03:25 +0100
Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:

> Introduce spin_lock_irqsave_nested(); implementation from:
>  http://lkml.org/lkml/2006/6/1/122
> Patch from:
>  http://lkml.org/lkml/2006/9/13/258

Needed quite som massaging due to
enforce-unsigned-long-flags-when-spinlocking.patch.  Please check the
result.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH] splice : two smp_mb() can be omitted
  2006-10-31  6:48 ` [PATCH 1/2] lockdep: spin_lock_irqsave_nested() Andrew Morton
@ 2006-10-31  7:25   ` Eric Dumazet
  2006-10-31  7:32     ` Jens Axboe
  2006-10-31  9:40     ` Nick Piggin
  0 siblings, 2 replies; 25+ messages in thread
From: Eric Dumazet @ 2006-10-31  7:25 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, Ingo Molnar, Jens Axboe

[-- Attachment #1: Type: text/plain, Size: 469 bytes --]

This patch deletes two calls to smp_mb() that were done after mutex_unlock() 
that contains an implicit memory barrier.

The first one in splice_to_pipe(), where 'do_wakeup' is set to true only if 
pipe->inode is set (and in this case the
if (pipe->inode)
    mutex_unlock(&pipe->inode->i_mutex);
is done too)

The second one in link_pipe(), following inode_double_unlock() that contains 
calls to mutex_unlock() too.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>

[-- Attachment #2: splice.patch --]
[-- Type: text/plain, Size: 610 bytes --]

--- linux/fs/splice.c	2006-10-31 07:49:52.000000000 +0100
+++ linux-ed/fs/splice.c	2006-10-31 08:04:58.000000000 +0100
@@ -248,7 +248,6 @@
 		mutex_unlock(&pipe->inode->i_mutex);
 
 	if (do_wakeup) {
-		smp_mb();
 		if (waitqueue_active(&pipe->wait))
 			wake_up_interruptible(&pipe->wait);
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
@@ -1518,7 +1517,6 @@
 	 * If we put data in the output pipe, wakeup any potential readers.
 	 */
 	if (ret > 0) {
-		smp_mb();
 		if (waitqueue_active(&opipe->wait))
 			wake_up_interruptible(&opipe->wait);
 		kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] splice : two smp_mb() can be omitted
  2006-10-31  7:25   ` [PATCH] splice : two smp_mb() can be omitted Eric Dumazet
@ 2006-10-31  7:32     ` Jens Axboe
  2006-10-31  7:41       ` Eric Dumazet
  2006-10-31  9:40     ` Nick Piggin
  1 sibling, 1 reply; 25+ messages in thread
From: Jens Axboe @ 2006-10-31  7:32 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Andrew Morton, linux-kernel, Ingo Molnar

On Tue, Oct 31 2006, Eric Dumazet wrote:
> This patch deletes two calls to smp_mb() that were done after 
> mutex_unlock() that contains an implicit memory barrier.
> 
> The first one in splice_to_pipe(), where 'do_wakeup' is set to true only if 
> pipe->inode is set (and in this case the
> if (pipe->inode)
>    mutex_unlock(&pipe->inode->i_mutex);
> is done too)
> 
> The second one in link_pipe(), following inode_double_unlock() that 
> contains calls to mutex_unlock() too.

NAK on that patch, the smp_mb() follows the waitqueue_active(). If you
later change the code and move the locks or whatnot, you have lost that
connection.

If you change the patch to insert a comment, then it may be more
applicable.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] splice : two smp_mb() can be omitted
  2006-10-31  7:32     ` Jens Axboe
@ 2006-10-31  7:41       ` Eric Dumazet
  2006-10-31  7:46         ` Jens Axboe
  0 siblings, 1 reply; 25+ messages in thread
From: Eric Dumazet @ 2006-10-31  7:41 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Andrew Morton, linux-kernel, Ingo Molnar

Jens Axboe a écrit :
> On Tue, Oct 31 2006, Eric Dumazet wrote:
>> This patch deletes two calls to smp_mb() that were done after 
>> mutex_unlock() that contains an implicit memory barrier.
>>
>> The first one in splice_to_pipe(), where 'do_wakeup' is set to true only if 
>> pipe->inode is set (and in this case the
>> if (pipe->inode)
>>    mutex_unlock(&pipe->inode->i_mutex);
>> is done too)
>>
>> The second one in link_pipe(), following inode_double_unlock() that 
>> contains calls to mutex_unlock() too.
> 
> NAK on that patch, the smp_mb() follows the waitqueue_active(). If you
> later change the code and move the locks or whatnot, you have lost that
> connection.
> 
> If you change the patch to insert a comment, then it may be more
> applicable.
> 

Hum... I read fs/pipe.c and see no smp_mb() there, but I suspect same 
semantics are/were used.

Should we add comments on fs/pipe.c too ?

Eric

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] splice : two smp_mb() can be omitted
  2006-10-31  7:41       ` Eric Dumazet
@ 2006-10-31  7:46         ` Jens Axboe
  0 siblings, 0 replies; 25+ messages in thread
From: Jens Axboe @ 2006-10-31  7:46 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Andrew Morton, linux-kernel, Ingo Molnar

On Tue, Oct 31 2006, Eric Dumazet wrote:
> Jens Axboe a écrit :
> >On Tue, Oct 31 2006, Eric Dumazet wrote:
> >>This patch deletes two calls to smp_mb() that were done after 
> >>mutex_unlock() that contains an implicit memory barrier.
> >>
> >>The first one in splice_to_pipe(), where 'do_wakeup' is set to true only 
> >>if pipe->inode is set (and in this case the
> >>if (pipe->inode)
> >>   mutex_unlock(&pipe->inode->i_mutex);
> >>is done too)
> >>
> >>The second one in link_pipe(), following inode_double_unlock() that 
> >>contains calls to mutex_unlock() too.
> >
> >NAK on that patch, the smp_mb() follows the waitqueue_active(). If you
> >later change the code and move the locks or whatnot, you have lost that
> >connection.
> >
> >If you change the patch to insert a comment, then it may be more
> >applicable.
> >
> 
> Hum... I read fs/pipe.c and see no smp_mb() there, but I suspect same 
> semantics are/were used.
> 
> Should we add comments on fs/pipe.c too ?

fs/pipe.c looks different:

        if (do_wakeup) {
                wake_up_interruptible_sync(&pipe->wait);
                ...
        }

The smp_mb() is not needed if you call wake_up() directly, only if
checking via waitqueue_active().

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] splice : two smp_mb() can be omitted
  2006-10-31  7:25   ` [PATCH] splice : two smp_mb() can be omitted Eric Dumazet
  2006-10-31  7:32     ` Jens Axboe
@ 2006-10-31  9:40     ` Nick Piggin
  2006-10-31  9:49       ` Jens Axboe
  2006-10-31 10:51       ` Eric Dumazet
  1 sibling, 2 replies; 25+ messages in thread
From: Nick Piggin @ 2006-10-31  9:40 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Andrew Morton, linux-kernel, Ingo Molnar, Jens Axboe

Eric Dumazet wrote:
> This patch deletes two calls to smp_mb() that were done after 
> mutex_unlock() that contains an implicit memory barrier.

Uh, there is nothing that says mutex_unlock or any unlock
functions contain an implicit smp_mb(). What is given is that the
lock and unlock obey aquire and release memory ordering,
respectively.

a = x;
xxx_unlock
b = y;

In this situation, the load of y can be executed before that of x.
And some architectures will even do so (i386 can, because the
unlock is an unprefixed store; ia64 can, because it uses a release
barrier in the unlock).

Whenever you rely on orderings of things *outside* locks (even
partially outside), you do need to be very careful about barriers
and can't rely on locks to do the right thing for you.

> 
> The first one in splice_to_pipe(), where 'do_wakeup' is set to true only 
> if pipe->inode is set (and in this case the
> if (pipe->inode)
>    mutex_unlock(&pipe->inode->i_mutex);
> is done too)
> 
> The second one in link_pipe(), following inode_double_unlock() that 
> contains calls to mutex_unlock() too.

It *may* be the case that these can be removed, but not by virtue
of the fact that the smp_mb is redundant.

> 
> Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
> 
> 
> ------------------------------------------------------------------------
> 
> --- linux/fs/splice.c	2006-10-31 07:49:52.000000000 +0100
> +++ linux-ed/fs/splice.c	2006-10-31 08:04:58.000000000 +0100
> @@ -248,7 +248,6 @@
>  		mutex_unlock(&pipe->inode->i_mutex);
>  
>  	if (do_wakeup) {
> -		smp_mb();
>  		if (waitqueue_active(&pipe->wait))
>  			wake_up_interruptible(&pipe->wait);
>  		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
> @@ -1518,7 +1517,6 @@
>  	 * If we put data in the output pipe, wakeup any potential readers.
>  	 */
>  	if (ret > 0) {
> -		smp_mb();
>  		if (waitqueue_active(&opipe->wait))
>  			wake_up_interruptible(&opipe->wait);
>  		kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);


-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] splice : two smp_mb() can be omitted
  2006-10-31  9:40     ` Nick Piggin
@ 2006-10-31  9:49       ` Jens Axboe
  2006-10-31 10:51       ` Eric Dumazet
  1 sibling, 0 replies; 25+ messages in thread
From: Jens Axboe @ 2006-10-31  9:49 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Eric Dumazet, Andrew Morton, linux-kernel, Ingo Molnar

On Tue, Oct 31 2006, Nick Piggin wrote:
> Eric Dumazet wrote:
> >This patch deletes two calls to smp_mb() that were done after 
> >mutex_unlock() that contains an implicit memory barrier.
> 
> Uh, there is nothing that says mutex_unlock or any unlock
> functions contain an implicit smp_mb(). What is given is that the
> lock and unlock obey aquire and release memory ordering,
> respectively.
> 
> a = x;
> xxx_unlock
> b = y;
> 
> In this situation, the load of y can be executed before that of x.
> And some architectures will even do so (i386 can, because the
> unlock is an unprefixed store; ia64 can, because it uses a release
> barrier in the unlock).
> 
> Whenever you rely on orderings of things *outside* locks (even
> partially outside), you do need to be very careful about barriers
> and can't rely on locks to do the right thing for you.

Good point, we should not make any assumptions on the way the
architecture implements the mutexes.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] splice : two smp_mb() can be omitted
  2006-10-31  9:40     ` Nick Piggin
  2006-10-31  9:49       ` Jens Axboe
@ 2006-10-31 10:51       ` Eric Dumazet
  2006-10-31 22:16         ` Nick Piggin
  2006-11-02 17:02         ` [PATCH] splice : Must fully check for fifos Eric Dumazet
  1 sibling, 2 replies; 25+ messages in thread
From: Eric Dumazet @ 2006-10-31 10:51 UTC (permalink / raw)
  To: Nick Piggin, Ingo Molnar; +Cc: Andrew Morton, linux-kernel, Jens Axboe

On Tuesday 31 October 2006 10:40, Nick Piggin wrote:

> Uh, there is nothing that says mutex_unlock or any unlock
> functions contain an implicit smp_mb(). What is given is that the
> lock and unlock obey aquire and release memory ordering,
> respectively.
>
> a = x;
> xxx_unlock
> b = y;
>
> In this situation, the load of y can be executed before that of x.
> And some architectures will even do so (i386 can, because the
> unlock is an unprefixed store; ia64 can, because it uses a release
> barrier in the unlock).

Hum... it seems your mutex_unlock() i386/x86_64 copy is not same as mine :)

Maybe we could document the fact that mutex_{lock|unlock}() has or has not an 
implicit smp_mb().

If not, delete smp_mb() calls from include/asm-generic/mutex-dec.h 

Ingo ?

Eric

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] splice : two smp_mb() can be omitted
  2006-10-31 10:51       ` Eric Dumazet
@ 2006-10-31 22:16         ` Nick Piggin
  2006-10-31 23:08           ` Eric Dumazet
  2006-11-02 17:02         ` [PATCH] splice : Must fully check for fifos Eric Dumazet
  1 sibling, 1 reply; 25+ messages in thread
From: Nick Piggin @ 2006-10-31 22:16 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Ingo Molnar, Andrew Morton, linux-kernel, Jens Axboe

Eric Dumazet wrote:

>On Tuesday 31 October 2006 10:40, Nick Piggin wrote:
>
>
>>Uh, there is nothing that says mutex_unlock or any unlock
>>functions contain an implicit smp_mb(). What is given is that the
>>lock and unlock obey aquire and release memory ordering,
>>respectively.
>>
>>a = x;
>>xxx_unlock
>>b = y;
>>
>>In this situation, the load of y can be executed before that of x.
>>And some architectures will even do so (i386 can, because the
>>unlock is an unprefixed store; ia64 can, because it uses a release
>>barrier in the unlock).
>>
>
>Hum... it seems your mutex_unlock() i386/x86_64 copy is not same as mine :)
>

OK, replace xxx with mutex, and what I've said still holds true for ia64.

>Maybe we could document the fact that mutex_{lock|unlock}() has or has not an 
>implicit smp_mb().
>

It does not, none of the unlock functions ever have.

>If not, delete smp_mb() calls from include/asm-generic/mutex-dec.h 
>

They should be deleted (and from mutex-xchg). NOT because there is no 
need for
a memory barrier, but because the atomic_alter_value_and_return_something
functions always provide a barrier before and after the operation, as per
Documentation/atomic_ops.txt

Again, lock / unlock operations require acquire / release consistency. 
This is a
memory ordering operation. It is not equivalent to smp_mb, though.

--

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] splice : two smp_mb() can be omitted
  2006-10-31 22:16         ` Nick Piggin
@ 2006-10-31 23:08           ` Eric Dumazet
  2006-10-31 23:45             ` Nick Piggin
  0 siblings, 1 reply; 25+ messages in thread
From: Eric Dumazet @ 2006-10-31 23:08 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Ingo Molnar, Andrew Morton, linux-kernel, Jens Axboe

Nick Piggin a écrit :
> Eric Dumazet wrote:
> 
>> On Tuesday 31 October 2006 10:40, Nick Piggin wrote:
>>
>>
>>> Uh, there is nothing that says mutex_unlock or any unlock
>>> functions contain an implicit smp_mb(). What is given is that the
>>> lock and unlock obey aquire and release memory ordering,
>>> respectively.
>>>
>>> a = x;
>>> xxx_unlock
>>> b = y;
>>>
>>> In this situation, the load of y can be executed before that of x.
>>> And some architectures will even do so (i386 can, because the
>>> unlock is an unprefixed store; ia64 can, because it uses a release
>>> barrier in the unlock).
>>>
>>
>> Hum... it seems your mutex_unlock() i386/x86_64 copy is not same as 
>> mine :)
>>
> 
> OK, replace xxx with mutex, and what I've said still holds true for ia64.
> 
>> Maybe we could document the fact that mutex_{lock|unlock}() has or has 
>> not an implicit smp_mb().
>>
> 
> It does not, none of the unlock functions ever have.
> 
>> If not, delete smp_mb() calls from include/asm-generic/mutex-dec.h
> 
> They should be deleted (and from mutex-xchg). NOT because there is no 
> need for
> a memory barrier, but because the atomic_alter_value_and_return_something
> functions always provide a barrier before and after the operation, as per
> Documentation/atomic_ops.txt
> 
> Again, lock / unlock operations require acquire / release consistency. 
> This is a
> memory ordering operation. It is not equivalent to smp_mb, though.

This thread just show how difficult it is to have consistent use of all this 
stuff in all kernel. Maybe it is just me ? Should I work on IA64 to have a 
chance to learn ?


For example, Documentation/atomic_ops.txt comments about atomic_inc_return() 
and atomic_dec_return() seems in contradiction with itself.

--------------------------

Unlike the above routines, it is required that explicit memory
barriers are performed before and after the operation.  It must be
done such that all memory operations before and after the atomic
operation calls are strongly ordered with respect to the atomic
operation itself.

-------------------------

When I read this, I understand we (the user of such functions) need to add 
smp_mb(). (That is, those functions wont do it themselves)

Then following text is :

----------------------------
For example, it should behave as if a smp_mb() call existed both
before and after the atomic operation.

--------------------------

Now I understand the reverse.


Time to sleep for sure :) And find a IA64 platform tomorrow morning :)


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] splice : two smp_mb() can be omitted
  2006-10-31 23:08           ` Eric Dumazet
@ 2006-10-31 23:45             ` Nick Piggin
  0 siblings, 0 replies; 25+ messages in thread
From: Nick Piggin @ 2006-10-31 23:45 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Ingo Molnar, Andrew Morton, linux-kernel, Jens Axboe

Eric Dumazet wrote:

> Nick Piggin a écrit :
>
>>
>> Again, lock / unlock operations require acquire / release 
>> consistency. This is a
>> memory ordering operation. It is not equivalent to smp_mb, though.
>
>
> This thread just show how difficult it is to have consistent use of 
> all this stuff in all kernel. Maybe it is just me ? Should I work on 
> IA64 to have a chance to learn ?


No need, just don't go thinking that mutex_unlock implies smp_mb.

spin_unlock has never implied an smp_rmb on i386.

> For example, Documentation/atomic_ops.txt comments about 
> atomic_inc_return() and atomic_dec_return() seems in contradiction 
> with itself.
>
> --------------------------
>
> Unlike the above routines, it is required that explicit memory
> barriers are performed before and after the operation.  It must be
> done such that all memory operations before and after the atomic
> operation calls are strongly ordered with respect to the atomic
> operation itself.
>
> -------------------------
>
> When I read this, I understand we (the user of such functions) need to 
> add smp_mb(). (That is, those functions wont do it themselves)


This is written from the point of view of the _implementor_. I agree it 
is a bit
confusing, but does the example below clear it up?

>
> Then following text is :
>
> ----------------------------
> For example, it should behave as if a smp_mb() call existed both
> before and after the atomic operation.
>
> --------------------------
>
> Now I understand the reverse.


Now you understand correctly ;)

--

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH] splice : Must fully check for fifos
  2006-10-31 10:51       ` Eric Dumazet
  2006-10-31 22:16         ` Nick Piggin
@ 2006-11-02 17:02         ` Eric Dumazet
  2006-11-02 17:05           ` Eric Dumazet
  1 sibling, 1 reply; 25+ messages in thread
From: Eric Dumazet @ 2006-11-02 17:02 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Nick Piggin, Ingo Molnar, linux-kernel, Jens Axboe, tytso

Hi Andrew

I think this patch is necessary. It's quite easy to crash a 2.6.19-rc4 box :(

AFAIK the problem come from inode-diet (by Theodore Ts'o, (2006/Sep/27))

Thank you

[PATCH] splice : Must fully check for FIFO

It appears that i_pipe, i_cdev and i_bdev share the same memory location 
(anonymous union in struct inode) since commits 
577c4eb09d1034d0739e3135fd2cff50588024be
eaf796e7ef6014f208c409b2b14fddcfaafe7e3a

Because of that, testing i_pipe being NULL is not anymore sufficient to tell 
if an inode is a FIFO or not.

Therefore, we must use the S_ISFIFO(inode->i_mode) test before assuming i_pipe 
pointer is pointing to a struct pipe_inode_info.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH] splice : Must fully check for fifos
  2006-11-02 17:02         ` [PATCH] splice : Must fully check for fifos Eric Dumazet
@ 2006-11-02 17:05           ` Eric Dumazet
  2006-11-02 19:07             ` Jens Axboe
  0 siblings, 1 reply; 25+ messages in thread
From: Eric Dumazet @ 2006-11-02 17:05 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Nick Piggin, Ingo Molnar, linux-kernel, Jens Axboe, tytso

[-- Attachment #1: Type: text/plain, Size: 754 bytes --]

With the patch this time :( Sorry guys

Hi Andrew

I think this patch is necessary. It's quite easy to crash a 2.6.19-rc4 box :(

AFAIK the problem come from inode-diet (by Theodore Ts'o, (2006/Sep/27))

Thank you

[PATCH] splice : Must fully check for FIFO

It appears that i_pipe, i_cdev and i_bdev share the same memory location 
(anonymous union in struct inode) since commits 
577c4eb09d1034d0739e3135fd2cff50588024be
eaf796e7ef6014f208c409b2b14fddcfaafe7e3a

Because of that, testing i_pipe being NULL is not anymore sufficient to tell 
if an inode is a FIFO or not.

Therefore, we must use the S_ISFIFO(inode->i_mode) test before assuming i_pipe 
pointer is pointing to a struct pipe_inode_info.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>

[-- Attachment #2: splice_fix.patch --]
[-- Type: text/plain, Size: 2262 bytes --]

--- linux-2.6.19-rc4/fs/splice.c	2006-11-02 17:14:55.000000000 +0100
+++ linux-2.6.19-rc4-ed/fs/splice.c	2006-11-02 17:38:29.000000000 +0100
@@ -1115,12 +1115,14 @@
 		      struct file *out, loff_t __user *off_out,
 		      size_t len, unsigned int flags)
 {
+	struct inode *inode;
 	struct pipe_inode_info *pipe;
 	loff_t offset, *off;
 	long ret;
 
-	pipe = in->f_dentry->d_inode->i_pipe;
-	if (pipe) {
+	inode = in->f_dentry->d_inode;
+	pipe = inode->i_pipe;
+	if (pipe && S_ISFIFO(inode->i_mode)) {
 		if (off_in)
 			return -ESPIPE;
 		if (off_out) {
@@ -1140,8 +1142,9 @@
 		return ret;
 	}
 
-	pipe = out->f_dentry->d_inode->i_pipe;
-	if (pipe) {
+	inode = out->f_dentry->d_inode;
+	pipe = inode->i_pipe;
+	if (pipe && S_ISFIFO(inode->i_mode)) {
 		if (off_out)
 			return -ESPIPE;
 		if (off_in) {
@@ -1298,7 +1301,8 @@
 static long do_vmsplice(struct file *file, const struct iovec __user *iov,
 			unsigned long nr_segs, unsigned int flags)
 {
-	struct pipe_inode_info *pipe = file->f_dentry->d_inode->i_pipe;
+	struct inode *inode = file->f_dentry->d_inode;
+	struct pipe_inode_info *pipe = inode->i_pipe;
 	struct page *pages[PIPE_BUFFERS];
 	struct partial_page partial[PIPE_BUFFERS];
 	struct splice_pipe_desc spd = {
@@ -1308,7 +1312,7 @@
 		.ops = &user_page_pipe_buf_ops,
 	};
 
-	if (unlikely(!pipe))
+	if (unlikely(!pipe || !S_ISFIFO(inode->i_mode)))
 		return -EBADF;
 	if (unlikely(nr_segs > UIO_MAXIOV))
 		return -EINVAL;
@@ -1535,11 +1539,21 @@
 static long do_tee(struct file *in, struct file *out, size_t len,
 		   unsigned int flags)
 {
-	struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
-	struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
+	struct inode *in_inode = in->f_dentry->d_inode;
+	struct inode *out_inode = out->f_dentry->d_inode;
+	struct pipe_inode_info *ipipe;
+	struct pipe_inode_info *opipe;
 	int ret = -EINVAL;
 
 	/*
+	 * CAUTION : As i_pipe/i_bdev/i_cdev share the same location,
+	 * we must check we deal with fifos/pipes, not cdev or bdev.
+	 */
+	if (!S_ISFIFO(in_inode->i_mode) || !S_ISFIFO(out_inode->i_mode))
+		return ret;
+	ipipe = in_inode->i_pipe;
+	opipe = out_inode->i_pipe;
+	/*
 	 * Duplicate the contents of ipipe to opipe without actually
 	 * copying the data.
 	 */

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] splice : Must fully check for fifos
  2006-11-02 17:05           ` Eric Dumazet
@ 2006-11-02 19:07             ` Jens Axboe
  2006-11-03  8:50               ` Jens Axboe
  0 siblings, 1 reply; 25+ messages in thread
From: Jens Axboe @ 2006-11-02 19:07 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Andrew Morton, Nick Piggin, Ingo Molnar, linux-kernel, tytso

On Thu, Nov 02 2006, Eric Dumazet wrote:
> With the patch this time :( Sorry guys
> 
> Hi Andrew
> 
> I think this patch is necessary. It's quite easy to crash a 2.6.19-rc4 box :(
> 
> AFAIK the problem come from inode-diet (by Theodore Ts'o, (2006/Sep/27))
> 
> Thank you
> 
> [PATCH] splice : Must fully check for FIFO
> 
> It appears that i_pipe, i_cdev and i_bdev share the same memory location 
> (anonymous union in struct inode) since commits 
> 577c4eb09d1034d0739e3135fd2cff50588024be
> eaf796e7ef6014f208c409b2b14fddcfaafe7e3a
> 
> Because of that, testing i_pipe being NULL is not anymore sufficient
> to tell if an inode is a FIFO or not.
> 
> Therefore, we must use the S_ISFIFO(inode->i_mode) test before
> assuming i_pipe pointer is pointing to a struct pipe_inode_info.

Indeed, the inode slimming introduced this bug. I'll queue up a test run
of things and send it upstream, thanks for catching this.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH] splice : Must fully check for fifos
  2006-11-02 19:07             ` Jens Axboe
@ 2006-11-03  8:50               ` Jens Axboe
  0 siblings, 0 replies; 25+ messages in thread
From: Jens Axboe @ 2006-11-03  8:50 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Andrew Morton, Nick Piggin, Ingo Molnar, linux-kernel, tytso

On Thu, Nov 02 2006, Jens Axboe wrote:
> On Thu, Nov 02 2006, Eric Dumazet wrote:
> > With the patch this time :( Sorry guys
> > 
> > Hi Andrew
> > 
> > I think this patch is necessary. It's quite easy to crash a 2.6.19-rc4 box :(
> > 
> > AFAIK the problem come from inode-diet (by Theodore Ts'o, (2006/Sep/27))
> > 
> > Thank you
> > 
> > [PATCH] splice : Must fully check for FIFO
> > 
> > It appears that i_pipe, i_cdev and i_bdev share the same memory location 
> > (anonymous union in struct inode) since commits 
> > 577c4eb09d1034d0739e3135fd2cff50588024be
> > eaf796e7ef6014f208c409b2b14fddcfaafe7e3a
> > 
> > Because of that, testing i_pipe being NULL is not anymore sufficient
> > to tell if an inode is a FIFO or not.
> > 
> > Therefore, we must use the S_ISFIFO(inode->i_mode) test before
> > assuming i_pipe pointer is pointing to a struct pipe_inode_info.
> 
> Indeed, the inode slimming introduced this bug. I'll queue up a test run
> of things and send it upstream, thanks for catching this.

This is the version I tested and merged.

diff --git a/fs/splice.c b/fs/splice.c
index 8d70595..87694de 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1109,6 +1109,19 @@ out_release:
 EXPORT_SYMBOL(do_splice_direct);
 
 /*
+ * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
+ * location, so checking ->i_pipe is not enough to verify that this is a
+ * pipe.
+ */
+static inline int is_pipe(struct inode *inode)
+{
+	if (inode->i_pipe && S_ISFIFO(inode->i_mode))
+		return 1;
+
+	return 0;
+}
+
+/*
  * Determine where to splice to/from.
  */
 static long do_splice(struct file *in, loff_t __user *off_in,
@@ -1119,8 +1132,8 @@ static long do_splice(struct file *in, l
 	loff_t offset, *off;
 	long ret;
 
-	pipe = in->f_dentry->d_inode->i_pipe;
-	if (pipe) {
+	if (is_pipe(in->f_dentry->d_inode)) {
+		pipe = in->f_dentry->d_inode->i_pipe;
 		if (off_in)
 			return -ESPIPE;
 		if (off_out) {
@@ -1140,8 +1153,8 @@ static long do_splice(struct file *in, l
 		return ret;
 	}
 
-	pipe = out->f_dentry->d_inode->i_pipe;
-	if (pipe) {
+	if (is_pipe(out->f_dentry->d_inode)) {
+		pipe = out->f_dentry->d_inode->i_pipe;
 		if (off_out)
 			return -ESPIPE;
 		if (off_in) {
@@ -1298,7 +1311,7 @@ static int get_iovec_page_array(const st
 static long do_vmsplice(struct file *file, const struct iovec __user *iov,
 			unsigned long nr_segs, unsigned int flags)
 {
-	struct pipe_inode_info *pipe = file->f_dentry->d_inode->i_pipe;
+	struct pipe_inode_info *pipe;
 	struct page *pages[PIPE_BUFFERS];
 	struct partial_page partial[PIPE_BUFFERS];
 	struct splice_pipe_desc spd = {
@@ -1308,7 +1321,7 @@ static long do_vmsplice(struct file *fil
 		.ops = &user_page_pipe_buf_ops,
 	};
 
-	if (unlikely(!pipe))
+	if (!is_pipe(file->f_dentry->d_inode))
 		return -EBADF;
 	if (unlikely(nr_segs > UIO_MAXIOV))
 		return -EINVAL;
@@ -1320,6 +1333,7 @@ static long do_vmsplice(struct file *fil
 	if (spd.nr_pages <= 0)
 		return spd.nr_pages;
 
+	pipe = file->f_dentry->d_inode->i_pipe;
 	return splice_to_pipe(pipe, &spd);
 }
 
@@ -1535,15 +1549,20 @@ static int link_pipe(struct pipe_inode_i
 static long do_tee(struct file *in, struct file *out, size_t len,
 		   unsigned int flags)
 {
-	struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
-	struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
+	struct pipe_inode_info *ipipe;
+	struct pipe_inode_info *opipe;
 	int ret = -EINVAL;
 
+	if (!is_pipe(in->f_dentry->d_inode) || !is_pipe(out->f_dentry->d_inode))
+		return ret;
+
 	/*
 	 * Duplicate the contents of ipipe to opipe without actually
 	 * copying the data.
 	 */
-	if (ipipe && opipe && ipipe != opipe) {
+	ipipe = in->f_dentry->d_inode->i_pipe;
+	opipe = out->f_dentry->d_inode->i_pipe;
+	if (ipipe != opipe) {
 		/*
 		 * Keep going, unless we encounter an error. The ipipe/opipe
 		 * ordering doesn't really matter.

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2006-11-03  8:48 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-10-30  9:03 [PATCH 1/2] lockdep: spin_lock_irqsave_nested() Peter Zijlstra
2006-10-30  9:06 ` [PATCH 2/2] lockdep: annotate bcsp driver Peter Zijlstra
2006-10-30  9:06   ` Ingo Molnar
2006-10-30  9:30   ` Marcel Holtmann
2006-10-30  9:31   ` [PATCH 2/2] lockdep: annotate bcsp driver - v2 Peter Zijlstra
2006-10-30  9:07 ` [PATCH 1/2] lockdep: spin_lock_irqsave_nested() Ingo Molnar
2006-10-30 13:12 ` Jarek Poplawski
2006-10-30 13:27   ` Jarek Poplawski
2006-10-30 13:40   ` [PATCH 1/2] lockdep: spin_lock_irqsave_nested() -v2 Peter Zijlstra
2006-10-30 14:12     ` Jarek Poplawski
2006-10-31  6:48 ` [PATCH 1/2] lockdep: spin_lock_irqsave_nested() Andrew Morton
2006-10-31  7:25   ` [PATCH] splice : two smp_mb() can be omitted Eric Dumazet
2006-10-31  7:32     ` Jens Axboe
2006-10-31  7:41       ` Eric Dumazet
2006-10-31  7:46         ` Jens Axboe
2006-10-31  9:40     ` Nick Piggin
2006-10-31  9:49       ` Jens Axboe
2006-10-31 10:51       ` Eric Dumazet
2006-10-31 22:16         ` Nick Piggin
2006-10-31 23:08           ` Eric Dumazet
2006-10-31 23:45             ` Nick Piggin
2006-11-02 17:02         ` [PATCH] splice : Must fully check for fifos Eric Dumazet
2006-11-02 17:05           ` Eric Dumazet
2006-11-02 19:07             ` Jens Axboe
2006-11-03  8:50               ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox