* [patch 4/4] vunmap: Fix racy use of rcu_head [not found] <20091006143727.868480435@polymtl.ca> @ 2009-10-06 14:37 ` Mathieu Desnoyers 2009-10-06 15:23 ` Mathieu Desnoyers 2009-10-06 16:43 ` Christoph Lameter 0 siblings, 2 replies; 4+ messages in thread From: Mathieu Desnoyers @ 2009-10-06 14:37 UTC (permalink / raw) To: akpm, Ingo Molnar, linux-kernel, Paul E. McKenney Cc: Mathieu Desnoyers, cl, linux-mm [-- Attachment #1: vunmap-fix-teardown.patch --] [-- Type: text/plain, Size: 3839 bytes --] Repetitive use of vmap/vunmap on the same address triggers this bug. Simplest fix: directly kfree the data structure rather than doing it lazily. Impact: (-) slight slowdown on vunmap (+) stop messing up rcu callback lists ;) Caught it with DEBUG_RCU_HEAD, running LTTng armall/disarmall in loops. Immediate values (with breakpoint-ipi scheme) are still using vunmap. ------------[ cut here ]------------ WARNING: at kernel/rcutree.c:1199 __call_rcu+0x181/0x190() Hardware name: X7DAL Modules linked in: loop ltt_statedump ltt_userspace_event ipc_tr] Pid: 4527, comm: ltt-armall Not tainted 2.6.30.9-trace #30 Call Trace: [<ffffffff8027b181>] ? __call_rcu+0x181/0x190 [<ffffffff8027b181>] ? __call_rcu+0x181/0x190 [<ffffffff8023a6a9>] ? warn_slowpath_common+0x79/0xd0 [<ffffffff802b4890>] ? rcu_free_va+0x0/0x10 [<ffffffff8027b181>] ? __call_rcu+0x181/0x190 [<ffffffff802b5298>] ? __purge_vmap_area_lazy+0x1a8/0x1e0 [<ffffffff802b59d4>] ? free_unmap_vmap_area_noflush+0x74/0x80 [<ffffffff802b5a0e>] ? remove_vm_area+0x2e/0x80 [<ffffffff802b5b35>] ? __vunmap+0x45/0xf0 [<ffffffffa002e826>] ? ltt_statedump_start+0x7b6/0x820 [ltt_sta] [<ffffffff8068978a>] ? arch_imv_update+0x16a/0x2f0 [<ffffffff8027dd13>] ? imv_update_range+0x53/0xa0 [<ffffffff8026235b>] ? _module_imv_update+0x4b/0x60 [<ffffffff80262385>] ? module_imv_update+0x15/0x30 [<ffffffff80280049>] ? marker_probe_register+0x149/0xb90 [<ffffffff8040c8e0>] ? ltt_vtrace+0x0/0x8c0 [<ffffffff80409330>] ? ltt_marker_connect+0xd0/0x150 [<ffffffff8040f8dc>] ? marker_enable_write+0xec/0x120 [<ffffffff802c6cb8>] ? __dentry_open+0x268/0x350 [<ffffffff80280b2c>] ? marker_probe_cb+0x9c/0x170 [<ffffffff80280b2c>] ? marker_probe_cb+0x9c/0x170 [<ffffffff802c973b>] ? vfs_write+0xcb/0x170 [<ffffffff802c9984>] ? sys_write+0x64/0x130 [<ffffffff8020beeb>] ? system_call_fastpath+0x16/0x1b ---[ end trace ef92443e716fa199 ]--- ------------[ cut here ]------------ Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> CC: cl@linux-foundation.org CC: mingo@elte.hu CC: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> CC: linux-mm@kvack.org CC: akpm@linux-foundation.org --- mm/vmalloc.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) Index: linux-2.6-lttng/mm/vmalloc.c =================================================================== --- linux-2.6-lttng.orig/mm/vmalloc.c 2009-10-06 09:37:04.000000000 -0400 +++ linux-2.6-lttng/mm/vmalloc.c 2009-10-06 09:37:56.000000000 -0400 @@ -417,13 +417,6 @@ overflow: return va; } -static void rcu_free_va(struct rcu_head *head) -{ - struct vmap_area *va = container_of(head, struct vmap_area, rcu_head); - - kfree(va); -} - static void __free_vmap_area(struct vmap_area *va) { BUG_ON(RB_EMPTY_NODE(&va->rb_node)); @@ -431,7 +424,7 @@ static void __free_vmap_area(struct vmap RB_CLEAR_NODE(&va->rb_node); list_del_rcu(&va->list); - call_rcu(&va->rcu_head, rcu_free_va); + kfree(va); } /* @@ -757,13 +750,6 @@ static struct vmap_block *new_vmap_block return vb; } -static void rcu_free_vb(struct rcu_head *head) -{ - struct vmap_block *vb = container_of(head, struct vmap_block, rcu_head); - - kfree(vb); -} - static void free_vmap_block(struct vmap_block *vb) { struct vmap_block *tmp; @@ -778,7 +764,7 @@ static void free_vmap_block(struct vmap_ BUG_ON(tmp != vb); free_unmap_vmap_area_noflush(vb->va); - call_rcu(&vb->rcu_head, rcu_free_vb); + kfree(vb); } static void *vb_alloc(unsigned long size, gfp_t gfp_mask) -- Mathieu Desnoyers OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [patch 4/4] vunmap: Fix racy use of rcu_head 2009-10-06 14:37 ` [patch 4/4] vunmap: Fix racy use of rcu_head Mathieu Desnoyers @ 2009-10-06 15:23 ` Mathieu Desnoyers 2009-10-06 16:43 ` Christoph Lameter 1 sibling, 0 replies; 4+ messages in thread From: Mathieu Desnoyers @ 2009-10-06 15:23 UTC (permalink / raw) To: akpm, Ingo Molnar, linux-kernel, Paul E. McKenney; +Cc: cl, linux-mm * Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) wrote: > Repetitive use of vmap/vunmap on the same address triggers this bug. > > Simplest fix: directly kfree the data structure rather than doing it lazily. > I assumed that call_rcu in vunmap() is only used to postpone kfree() from the hot path, but I ask for comfirmation/infirmation about that. I fear delayed reclamation might be there for a reason. Are there rcu read sides depending on this behavior ? If yes, then this patch is wrong, and we could change it for synchronize_rcu(); kfree(....); Mathieu > Impact: > (-) slight slowdown on vunmap > (+) stop messing up rcu callback lists ;) > > Caught it with DEBUG_RCU_HEAD, running LTTng armall/disarmall in loops. > Immediate values (with breakpoint-ipi scheme) are still using vunmap. > > ------------[ cut here ]------------ > WARNING: at kernel/rcutree.c:1199 __call_rcu+0x181/0x190() > Hardware name: X7DAL > Modules linked in: loop ltt_statedump ltt_userspace_event ipc_tr] > Pid: 4527, comm: ltt-armall Not tainted 2.6.30.9-trace #30 > Call Trace: > [<ffffffff8027b181>] ? __call_rcu+0x181/0x190 > [<ffffffff8027b181>] ? __call_rcu+0x181/0x190 > [<ffffffff8023a6a9>] ? warn_slowpath_common+0x79/0xd0 > [<ffffffff802b4890>] ? rcu_free_va+0x0/0x10 > [<ffffffff8027b181>] ? __call_rcu+0x181/0x190 > [<ffffffff802b5298>] ? __purge_vmap_area_lazy+0x1a8/0x1e0 > [<ffffffff802b59d4>] ? free_unmap_vmap_area_noflush+0x74/0x80 > [<ffffffff802b5a0e>] ? remove_vm_area+0x2e/0x80 > [<ffffffff802b5b35>] ? __vunmap+0x45/0xf0 > [<ffffffffa002e826>] ? ltt_statedump_start+0x7b6/0x820 [ltt_sta] > [<ffffffff8068978a>] ? arch_imv_update+0x16a/0x2f0 > [<ffffffff8027dd13>] ? imv_update_range+0x53/0xa0 > [<ffffffff8026235b>] ? _module_imv_update+0x4b/0x60 > [<ffffffff80262385>] ? module_imv_update+0x15/0x30 > [<ffffffff80280049>] ? marker_probe_register+0x149/0xb90 > [<ffffffff8040c8e0>] ? ltt_vtrace+0x0/0x8c0 > [<ffffffff80409330>] ? ltt_marker_connect+0xd0/0x150 > [<ffffffff8040f8dc>] ? marker_enable_write+0xec/0x120 > [<ffffffff802c6cb8>] ? __dentry_open+0x268/0x350 > [<ffffffff80280b2c>] ? marker_probe_cb+0x9c/0x170 > [<ffffffff80280b2c>] ? marker_probe_cb+0x9c/0x170 > [<ffffffff802c973b>] ? vfs_write+0xcb/0x170 > [<ffffffff802c9984>] ? sys_write+0x64/0x130 > [<ffffffff8020beeb>] ? system_call_fastpath+0x16/0x1b > ---[ end trace ef92443e716fa199 ]--- > ------------[ cut here ]------------ > > Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> > CC: cl@linux-foundation.org > CC: mingo@elte.hu > CC: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> > CC: linux-mm@kvack.org > CC: akpm@linux-foundation.org > --- > mm/vmalloc.c | 18 ++---------------- > 1 file changed, 2 insertions(+), 16 deletions(-) > > Index: linux-2.6-lttng/mm/vmalloc.c > =================================================================== > --- linux-2.6-lttng.orig/mm/vmalloc.c 2009-10-06 09:37:04.000000000 -0400 > +++ linux-2.6-lttng/mm/vmalloc.c 2009-10-06 09:37:56.000000000 -0400 > @@ -417,13 +417,6 @@ overflow: > return va; > } > > -static void rcu_free_va(struct rcu_head *head) > -{ > - struct vmap_area *va = container_of(head, struct vmap_area, rcu_head); > - > - kfree(va); > -} > - > static void __free_vmap_area(struct vmap_area *va) > { > BUG_ON(RB_EMPTY_NODE(&va->rb_node)); > @@ -431,7 +424,7 @@ static void __free_vmap_area(struct vmap > RB_CLEAR_NODE(&va->rb_node); > list_del_rcu(&va->list); > > - call_rcu(&va->rcu_head, rcu_free_va); > + kfree(va); > } > > /* > @@ -757,13 +750,6 @@ static struct vmap_block *new_vmap_block > return vb; > } > > -static void rcu_free_vb(struct rcu_head *head) > -{ > - struct vmap_block *vb = container_of(head, struct vmap_block, rcu_head); > - > - kfree(vb); > -} > - > static void free_vmap_block(struct vmap_block *vb) > { > struct vmap_block *tmp; > @@ -778,7 +764,7 @@ static void free_vmap_block(struct vmap_ > BUG_ON(tmp != vb); > > free_unmap_vmap_area_noflush(vb->va); > - call_rcu(&vb->rcu_head, rcu_free_vb); > + kfree(vb); > } > > static void *vb_alloc(unsigned long size, gfp_t gfp_mask) > > -- > Mathieu Desnoyers > OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 -- Mathieu Desnoyers OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [patch 4/4] vunmap: Fix racy use of rcu_head 2009-10-06 14:37 ` [patch 4/4] vunmap: Fix racy use of rcu_head Mathieu Desnoyers 2009-10-06 15:23 ` Mathieu Desnoyers @ 2009-10-06 16:43 ` Christoph Lameter 2009-10-06 17:33 ` Mathieu Desnoyers 1 sibling, 1 reply; 4+ messages in thread From: Christoph Lameter @ 2009-10-06 16:43 UTC (permalink / raw) To: nickpiggin, Mathieu Desnoyers Cc: akpm, Ingo Molnar, linux-kernel, Paul E. McKenney, linux-mm On Tue, 6 Oct 2009, Mathieu Desnoyers wrote: > Simplest fix: directly kfree the data structure rather than doing it lazily. The delay is necessary as far as I can tell for performance reasons. But Nick was the last one fiddling around with the subsystem as far as I remember. CCing him. May be he has a minute to think about a fix that preserved performance. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [patch 4/4] vunmap: Fix racy use of rcu_head 2009-10-06 16:43 ` Christoph Lameter @ 2009-10-06 17:33 ` Mathieu Desnoyers 0 siblings, 0 replies; 4+ messages in thread From: Mathieu Desnoyers @ 2009-10-06 17:33 UTC (permalink / raw) To: Christoph Lameter Cc: nickpiggin, akpm, Ingo Molnar, linux-kernel, Paul E. McKenney, linux-mm * Christoph Lameter (cl@linux-foundation.org) wrote: > On Tue, 6 Oct 2009, Mathieu Desnoyers wrote: > > > Simplest fix: directly kfree the data structure rather than doing it lazily. > > The delay is necessary as far as I can tell for performance reasons. But > Nick was the last one fiddling around with the subsystem as far as I > remember. CCing him. May be he has a minute to think about a fix that > preserved performance. > Thanks, More info on the problem: I added a check in alloc_vmap_area() at: va = kmalloc_node(sizeof(struct vmap_area), gfp_mask & GFP_RECLAIM_MASK, node); WARN_ON(va->rcu_head.debug == LIST_POISON1); (memory allocation debugging is off in my config) I used this along with my new call rcu debug patch to check if memory is reallocated before the callback (doing kfree) is executed. It triggers this: WARNING: at mm/vmalloc.c:343 alloc_vmap_area+0x305/0x340() Hardware name: X7DAL Modules linked in: loop ltt_statedump ltt_userspace_event ipc_tr] Pid: 5584, comm: ltt-disarmall Not tainted 2.6.30.9-trace #41 Call Trace: [<ffffffff802b8665>] ? alloc_vmap_area+0x305/0x340 [<ffffffff802b8665>] ? alloc_vmap_area+0x305/0x340 [<ffffffff8023cf29>] ? warn_slowpath_common+0x79/0xd0 [<ffffffff802b8665>] ? alloc_vmap_area+0x305/0x340 [<ffffffff80237670>] ? default_wake_function+0x0/0x10 [<ffffffff802b8769>] ? __get_vm_area_node+0xc9/0x1d0 [<ffffffff805ae965>] ? sys_getsockopt+0x85/0x160 [<ffffffff8040f160>] ? ltt_vtrace+0x0/0x8d0 [<ffffffff802b88dd>] ? get_vm_area_caller+0x2d/0x40 [<ffffffff8068bc3e>] ? arch_imv_update+0x10e/0x2f0 [<ffffffff802b93aa>] ? vmap+0x4a/0x80 [<ffffffff8068bc3e>] ? arch_imv_update+0x10e/0x2f0 [<ffffffff80283e6d>] ? get_tracepoint+0x25d/0x290 [<ffffffff80280b93>] ? imv_update_range+0x53/0xa0 [<ffffffff80284821>] ? tracepoint_update_probes+0x21/0x30 [<ffffffff802848b3>] ? tracepoint_probe_update_all+0x83/0x100 [<ffffffff80282ac1>] ? marker_update_probes+0x21/0x40 [<ffffffff8040f160>] ? ltt_vtrace+0x0/0x8d0 [<ffffffff80282d5d>] ? marker_probe_unregister+0xad/0x130 [<ffffffff8040b9dc>] ? ltt_marker_disconnect+0xdc/0x120 [<ffffffff804121d2>] ? marker_enable_write+0x112/0x120 [<ffffffff80688dd0>] ? _spin_unlock+0x10/0x30 [<ffffffff802e6b36>] ? mnt_drop_write+0x76/0x180 [<ffffffff802d96dd>] ? do_filp_open+0x2cd/0xa00 [<ffffffff804377c1>] ? tty_write+0x221/0x270 [<ffffffff802cc6cb>] ? vfs_write+0xcb/0x170 [<ffffffff802cc914>] ? sys_write+0x64/0x130 [<ffffffff8020beab>] ? system_call_fastpath+0x16/0x1b ---[ end trace 7ef506680d7a9e26 ]--- Could it be that: static void __free_vmap_area(struct vmap_area *va) { BUG_ON(RB_EMPTY_NODE(&va->rb_node)); rb_erase(&va->rb_node, &vmap_area_root); RB_CLEAR_NODE(&va->rb_node); list_del_rcu(&va->list); call_rcu(&va->rcu_head, rcu_free_va); } (especially clearing from the rb tree and va list) allows reallocation of the node by kmalloc_node before its reclamation (only done later by rcu_free_va) ? Mathieu -- Mathieu Desnoyers OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2009-10-06 17:33 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20091006143727.868480435@polymtl.ca>
2009-10-06 14:37 ` [patch 4/4] vunmap: Fix racy use of rcu_head Mathieu Desnoyers
2009-10-06 15:23 ` Mathieu Desnoyers
2009-10-06 16:43 ` Christoph Lameter
2009-10-06 17:33 ` Mathieu Desnoyers
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).