linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [patch 4/4] vunmap: Fix racy use of rcu_head
       [not found] <20091006143727.868480435@polymtl.ca>
@ 2009-10-06 14:37 ` Mathieu Desnoyers
  2009-10-06 15:23   ` Mathieu Desnoyers
  2009-10-06 16:43   ` Christoph Lameter
  0 siblings, 2 replies; 4+ messages in thread
From: Mathieu Desnoyers @ 2009-10-06 14:37 UTC (permalink / raw)
  To: akpm, Ingo Molnar, linux-kernel, Paul E. McKenney
  Cc: Mathieu Desnoyers, cl, linux-mm

[-- Attachment #1: vunmap-fix-teardown.patch --]
[-- Type: text/plain, Size: 3839 bytes --]

Repetitive use of vmap/vunmap on the same address triggers this bug.

Simplest fix: directly kfree the data structure rather than doing it lazily.

Impact:
(-) slight slowdown on vunmap
(+) stop messing up rcu callback lists ;)

Caught it with DEBUG_RCU_HEAD, running LTTng armall/disarmall in loops.
Immediate values (with breakpoint-ipi scheme) are still using vunmap.

------------[ cut here ]------------
WARNING: at kernel/rcutree.c:1199 __call_rcu+0x181/0x190()
Hardware name: X7DAL
Modules linked in: loop ltt_statedump ltt_userspace_event ipc_tr]
Pid: 4527, comm: ltt-armall Not tainted 2.6.30.9-trace #30
Call Trace:
 [<ffffffff8027b181>] ? __call_rcu+0x181/0x190
 [<ffffffff8027b181>] ? __call_rcu+0x181/0x190
 [<ffffffff8023a6a9>] ? warn_slowpath_common+0x79/0xd0
 [<ffffffff802b4890>] ? rcu_free_va+0x0/0x10
 [<ffffffff8027b181>] ? __call_rcu+0x181/0x190
 [<ffffffff802b5298>] ? __purge_vmap_area_lazy+0x1a8/0x1e0
 [<ffffffff802b59d4>] ? free_unmap_vmap_area_noflush+0x74/0x80
 [<ffffffff802b5a0e>] ? remove_vm_area+0x2e/0x80
 [<ffffffff802b5b35>] ? __vunmap+0x45/0xf0
 [<ffffffffa002e826>] ? ltt_statedump_start+0x7b6/0x820 [ltt_sta]
 [<ffffffff8068978a>] ? arch_imv_update+0x16a/0x2f0
 [<ffffffff8027dd13>] ? imv_update_range+0x53/0xa0
 [<ffffffff8026235b>] ? _module_imv_update+0x4b/0x60
 [<ffffffff80262385>] ? module_imv_update+0x15/0x30
 [<ffffffff80280049>] ? marker_probe_register+0x149/0xb90
 [<ffffffff8040c8e0>] ? ltt_vtrace+0x0/0x8c0
 [<ffffffff80409330>] ? ltt_marker_connect+0xd0/0x150
 [<ffffffff8040f8dc>] ? marker_enable_write+0xec/0x120
 [<ffffffff802c6cb8>] ? __dentry_open+0x268/0x350
 [<ffffffff80280b2c>] ? marker_probe_cb+0x9c/0x170
 [<ffffffff80280b2c>] ? marker_probe_cb+0x9c/0x170
 [<ffffffff802c973b>] ? vfs_write+0xcb/0x170
 [<ffffffff802c9984>] ? sys_write+0x64/0x130
 [<ffffffff8020beeb>] ? system_call_fastpath+0x16/0x1b
---[ end trace ef92443e716fa199 ]---
------------[ cut here ]------------

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: cl@linux-foundation.org
CC: mingo@elte.hu
CC: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
CC: linux-mm@kvack.org
CC: akpm@linux-foundation.org
---
 mm/vmalloc.c |   18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

Index: linux-2.6-lttng/mm/vmalloc.c
===================================================================
--- linux-2.6-lttng.orig/mm/vmalloc.c	2009-10-06 09:37:04.000000000 -0400
+++ linux-2.6-lttng/mm/vmalloc.c	2009-10-06 09:37:56.000000000 -0400
@@ -417,13 +417,6 @@ overflow:
 	return va;
 }
 
-static void rcu_free_va(struct rcu_head *head)
-{
-	struct vmap_area *va = container_of(head, struct vmap_area, rcu_head);
-
-	kfree(va);
-}
-
 static void __free_vmap_area(struct vmap_area *va)
 {
 	BUG_ON(RB_EMPTY_NODE(&va->rb_node));
@@ -431,7 +424,7 @@ static void __free_vmap_area(struct vmap
 	RB_CLEAR_NODE(&va->rb_node);
 	list_del_rcu(&va->list);
 
-	call_rcu(&va->rcu_head, rcu_free_va);
+	kfree(va);
 }
 
 /*
@@ -757,13 +750,6 @@ static struct vmap_block *new_vmap_block
 	return vb;
 }
 
-static void rcu_free_vb(struct rcu_head *head)
-{
-	struct vmap_block *vb = container_of(head, struct vmap_block, rcu_head);
-
-	kfree(vb);
-}
-
 static void free_vmap_block(struct vmap_block *vb)
 {
 	struct vmap_block *tmp;
@@ -778,7 +764,7 @@ static void free_vmap_block(struct vmap_
 	BUG_ON(tmp != vb);
 
 	free_unmap_vmap_area_noflush(vb->va);
-	call_rcu(&vb->rcu_head, rcu_free_vb);
+	kfree(vb);
 }
 
 static void *vb_alloc(unsigned long size, gfp_t gfp_mask)

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [patch 4/4] vunmap: Fix racy use of rcu_head
  2009-10-06 14:37 ` [patch 4/4] vunmap: Fix racy use of rcu_head Mathieu Desnoyers
@ 2009-10-06 15:23   ` Mathieu Desnoyers
  2009-10-06 16:43   ` Christoph Lameter
  1 sibling, 0 replies; 4+ messages in thread
From: Mathieu Desnoyers @ 2009-10-06 15:23 UTC (permalink / raw)
  To: akpm, Ingo Molnar, linux-kernel, Paul E. McKenney; +Cc: cl, linux-mm

* Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) wrote:
> Repetitive use of vmap/vunmap on the same address triggers this bug.
> 
> Simplest fix: directly kfree the data structure rather than doing it lazily.
> 

I assumed that call_rcu in vunmap() is only used to postpone kfree()
from the hot path, but I ask for comfirmation/infirmation about that. I
fear delayed reclamation might be there for a reason. Are there rcu read
sides depending on this behavior ?

If yes, then this patch is wrong, and we could change it for

synchronize_rcu();
kfree(....);

Mathieu

> Impact:
> (-) slight slowdown on vunmap
> (+) stop messing up rcu callback lists ;)
> 
> Caught it with DEBUG_RCU_HEAD, running LTTng armall/disarmall in loops.
> Immediate values (with breakpoint-ipi scheme) are still using vunmap.
> 
> ------------[ cut here ]------------
> WARNING: at kernel/rcutree.c:1199 __call_rcu+0x181/0x190()
> Hardware name: X7DAL
> Modules linked in: loop ltt_statedump ltt_userspace_event ipc_tr]
> Pid: 4527, comm: ltt-armall Not tainted 2.6.30.9-trace #30
> Call Trace:
>  [<ffffffff8027b181>] ? __call_rcu+0x181/0x190
>  [<ffffffff8027b181>] ? __call_rcu+0x181/0x190
>  [<ffffffff8023a6a9>] ? warn_slowpath_common+0x79/0xd0
>  [<ffffffff802b4890>] ? rcu_free_va+0x0/0x10
>  [<ffffffff8027b181>] ? __call_rcu+0x181/0x190
>  [<ffffffff802b5298>] ? __purge_vmap_area_lazy+0x1a8/0x1e0
>  [<ffffffff802b59d4>] ? free_unmap_vmap_area_noflush+0x74/0x80
>  [<ffffffff802b5a0e>] ? remove_vm_area+0x2e/0x80
>  [<ffffffff802b5b35>] ? __vunmap+0x45/0xf0
>  [<ffffffffa002e826>] ? ltt_statedump_start+0x7b6/0x820 [ltt_sta]
>  [<ffffffff8068978a>] ? arch_imv_update+0x16a/0x2f0
>  [<ffffffff8027dd13>] ? imv_update_range+0x53/0xa0
>  [<ffffffff8026235b>] ? _module_imv_update+0x4b/0x60
>  [<ffffffff80262385>] ? module_imv_update+0x15/0x30
>  [<ffffffff80280049>] ? marker_probe_register+0x149/0xb90
>  [<ffffffff8040c8e0>] ? ltt_vtrace+0x0/0x8c0
>  [<ffffffff80409330>] ? ltt_marker_connect+0xd0/0x150
>  [<ffffffff8040f8dc>] ? marker_enable_write+0xec/0x120
>  [<ffffffff802c6cb8>] ? __dentry_open+0x268/0x350
>  [<ffffffff80280b2c>] ? marker_probe_cb+0x9c/0x170
>  [<ffffffff80280b2c>] ? marker_probe_cb+0x9c/0x170
>  [<ffffffff802c973b>] ? vfs_write+0xcb/0x170
>  [<ffffffff802c9984>] ? sys_write+0x64/0x130
>  [<ffffffff8020beeb>] ? system_call_fastpath+0x16/0x1b
> ---[ end trace ef92443e716fa199 ]---
> ------------[ cut here ]------------
> 
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
> CC: cl@linux-foundation.org
> CC: mingo@elte.hu
> CC: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
> CC: linux-mm@kvack.org
> CC: akpm@linux-foundation.org
> ---
>  mm/vmalloc.c |   18 ++----------------
>  1 file changed, 2 insertions(+), 16 deletions(-)
> 
> Index: linux-2.6-lttng/mm/vmalloc.c
> ===================================================================
> --- linux-2.6-lttng.orig/mm/vmalloc.c	2009-10-06 09:37:04.000000000 -0400
> +++ linux-2.6-lttng/mm/vmalloc.c	2009-10-06 09:37:56.000000000 -0400
> @@ -417,13 +417,6 @@ overflow:
>  	return va;
>  }
>  
> -static void rcu_free_va(struct rcu_head *head)
> -{
> -	struct vmap_area *va = container_of(head, struct vmap_area, rcu_head);
> -
> -	kfree(va);
> -}
> -
>  static void __free_vmap_area(struct vmap_area *va)
>  {
>  	BUG_ON(RB_EMPTY_NODE(&va->rb_node));
> @@ -431,7 +424,7 @@ static void __free_vmap_area(struct vmap
>  	RB_CLEAR_NODE(&va->rb_node);
>  	list_del_rcu(&va->list);
>  
> -	call_rcu(&va->rcu_head, rcu_free_va);
> +	kfree(va);
>  }
>  
>  /*
> @@ -757,13 +750,6 @@ static struct vmap_block *new_vmap_block
>  	return vb;
>  }
>  
> -static void rcu_free_vb(struct rcu_head *head)
> -{
> -	struct vmap_block *vb = container_of(head, struct vmap_block, rcu_head);
> -
> -	kfree(vb);
> -}
> -
>  static void free_vmap_block(struct vmap_block *vb)
>  {
>  	struct vmap_block *tmp;
> @@ -778,7 +764,7 @@ static void free_vmap_block(struct vmap_
>  	BUG_ON(tmp != vb);
>  
>  	free_unmap_vmap_area_noflush(vb->va);
> -	call_rcu(&vb->rcu_head, rcu_free_vb);
> +	kfree(vb);
>  }
>  
>  static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
> 
> -- 
> Mathieu Desnoyers
> OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [patch 4/4] vunmap: Fix racy use of rcu_head
  2009-10-06 14:37 ` [patch 4/4] vunmap: Fix racy use of rcu_head Mathieu Desnoyers
  2009-10-06 15:23   ` Mathieu Desnoyers
@ 2009-10-06 16:43   ` Christoph Lameter
  2009-10-06 17:33     ` Mathieu Desnoyers
  1 sibling, 1 reply; 4+ messages in thread
From: Christoph Lameter @ 2009-10-06 16:43 UTC (permalink / raw)
  To: nickpiggin, Mathieu Desnoyers
  Cc: akpm, Ingo Molnar, linux-kernel, Paul E. McKenney, linux-mm

On Tue, 6 Oct 2009, Mathieu Desnoyers wrote:

> Simplest fix: directly kfree the data structure rather than doing it lazily.

The delay is necessary as far as I can tell for performance reasons. But
Nick was the last one fiddling around with the subsystem as far as I
remember. CCing him. May be he has a minute to think about a fix that
preserved performance.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [patch 4/4] vunmap: Fix racy use of rcu_head
  2009-10-06 16:43   ` Christoph Lameter
@ 2009-10-06 17:33     ` Mathieu Desnoyers
  0 siblings, 0 replies; 4+ messages in thread
From: Mathieu Desnoyers @ 2009-10-06 17:33 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: nickpiggin, akpm, Ingo Molnar, linux-kernel, Paul E. McKenney,
	linux-mm

* Christoph Lameter (cl@linux-foundation.org) wrote:
> On Tue, 6 Oct 2009, Mathieu Desnoyers wrote:
> 
> > Simplest fix: directly kfree the data structure rather than doing it lazily.
> 
> The delay is necessary as far as I can tell for performance reasons. But
> Nick was the last one fiddling around with the subsystem as far as I
> remember. CCing him. May be he has a minute to think about a fix that
> preserved performance.
> 

Thanks,

More info on the problem:

I added a check in alloc_vmap_area() at:

        va = kmalloc_node(sizeof(struct vmap_area),
                        gfp_mask & GFP_RECLAIM_MASK, node);
        WARN_ON(va->rcu_head.debug == LIST_POISON1);

(memory allocation debugging is off in my config)

I used this along with my new call rcu debug patch to check if memory is
reallocated before the callback (doing kfree) is executed. It triggers
this:

WARNING: at mm/vmalloc.c:343 alloc_vmap_area+0x305/0x340()
Hardware name: X7DAL
Modules linked in: loop ltt_statedump ltt_userspace_event ipc_tr]
Pid: 5584, comm: ltt-disarmall Not tainted 2.6.30.9-trace #41
Call Trace:
 [<ffffffff802b8665>] ? alloc_vmap_area+0x305/0x340
 [<ffffffff802b8665>] ? alloc_vmap_area+0x305/0x340
 [<ffffffff8023cf29>] ? warn_slowpath_common+0x79/0xd0
 [<ffffffff802b8665>] ? alloc_vmap_area+0x305/0x340
 [<ffffffff80237670>] ? default_wake_function+0x0/0x10
 [<ffffffff802b8769>] ? __get_vm_area_node+0xc9/0x1d0
 [<ffffffff805ae965>] ? sys_getsockopt+0x85/0x160
 [<ffffffff8040f160>] ? ltt_vtrace+0x0/0x8d0
 [<ffffffff802b88dd>] ? get_vm_area_caller+0x2d/0x40
 [<ffffffff8068bc3e>] ? arch_imv_update+0x10e/0x2f0
 [<ffffffff802b93aa>] ? vmap+0x4a/0x80
 [<ffffffff8068bc3e>] ? arch_imv_update+0x10e/0x2f0
 [<ffffffff80283e6d>] ? get_tracepoint+0x25d/0x290
 [<ffffffff80280b93>] ? imv_update_range+0x53/0xa0
 [<ffffffff80284821>] ? tracepoint_update_probes+0x21/0x30
 [<ffffffff802848b3>] ? tracepoint_probe_update_all+0x83/0x100
 [<ffffffff80282ac1>] ? marker_update_probes+0x21/0x40
 [<ffffffff8040f160>] ? ltt_vtrace+0x0/0x8d0
 [<ffffffff80282d5d>] ? marker_probe_unregister+0xad/0x130
 [<ffffffff8040b9dc>] ? ltt_marker_disconnect+0xdc/0x120
 [<ffffffff804121d2>] ? marker_enable_write+0x112/0x120
 [<ffffffff80688dd0>] ? _spin_unlock+0x10/0x30
 [<ffffffff802e6b36>] ? mnt_drop_write+0x76/0x180
 [<ffffffff802d96dd>] ? do_filp_open+0x2cd/0xa00
 [<ffffffff804377c1>] ? tty_write+0x221/0x270
 [<ffffffff802cc6cb>] ? vfs_write+0xcb/0x170
 [<ffffffff802cc914>] ? sys_write+0x64/0x130
 [<ffffffff8020beab>] ? system_call_fastpath+0x16/0x1b
---[ end trace 7ef506680d7a9e26 ]---

Could it be that:

static void __free_vmap_area(struct vmap_area *va)
{
        BUG_ON(RB_EMPTY_NODE(&va->rb_node));
        rb_erase(&va->rb_node, &vmap_area_root);
        RB_CLEAR_NODE(&va->rb_node);
        list_del_rcu(&va->list);

        call_rcu(&va->rcu_head, rcu_free_va);
}

(especially clearing from the rb tree and va list)
allows reallocation of the node by kmalloc_node before its reclamation
(only done later by rcu_free_va) ?

Mathieu

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2009-10-06 17:33 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20091006143727.868480435@polymtl.ca>
2009-10-06 14:37 ` [patch 4/4] vunmap: Fix racy use of rcu_head Mathieu Desnoyers
2009-10-06 15:23   ` Mathieu Desnoyers
2009-10-06 16:43   ` Christoph Lameter
2009-10-06 17:33     ` Mathieu Desnoyers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).