* [PATCH] mm: prevent concurrent unmap_mapping_range() on the same inode --to stable
@ 2011-06-24 7:02 Wengang Wang
2011-07-07 23:55 ` [stable] " Greg KH
0 siblings, 1 reply; 4+ messages in thread
From: Wengang Wang @ 2011-06-24 7:02 UTC (permalink / raw)
To: stable; +Cc: greg.marsden, joe.jin, linux-kernel
mainline commit 2aa15890f3c191326678f1bd68af61ec6b8753ec
mm: prevent concurrent unmap_mapping_range() on the same inode
Michael Leun reported that running parallel opens on a fuse filesystem
can trigger a "kernel BUG at mm/truncate.c:475"
Gurudas Pai reported the same bug on NFS.
The reason is, unmap_mapping_range() is not prepared for more than
one concurrent invocation per inode. For example:
thread1: going through a big range, stops in the middle of a vma and
stores the restart address in vm_truncate_count.
thread2: comes in with a small (e.g. single page) unmap request on
the same vma, somewhere before restart_address, finds that the
vma was already unmapped up to the restart address and happily
returns without doing anything.
Another scenario would be two big unmap requests, both having to
restart the unmapping and each one setting vm_truncate_count to its
own value. This could go on forever without any of them being able to
finish.
Truncate and hole punching already serialize with i_mutex. Other
callers of unmap_mapping_range() do not, and it's difficult to get
i_mutex protection for all callers. In particular ->d_revalidate(),
which calls invalidate_inode_pages2_range() in fuse, may be called
with or without i_mutex.
This patch adds a new mutex to 'struct address_space' to prevent
running multiple concurrent unmap_mapping_range() on the same mapping.
[ We'll hopefully get rid of all this with the upcoming mm
preemptibility series by Peter Zijlstra, the "mm: Remove i_mmap_mutex
lockbreak" patch in particular. But that is for 2.6.39 ]
Adding this patch causes Kabi breakage.
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Reported-by: Michael Leun <lkml20101129@newton.leun.net>
Reported-by: Gurudas Pai <gurudas.pai@oracle.com>
Tested-by: Gurudas Pai <gurudas.pai@oracle.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
---
fs/inode.c | 22 +++++++++++++++-------
fs/nilfs2/btnode.c | 13 -------------
fs/nilfs2/btnode.h | 1 -
fs/nilfs2/super.c | 2 +-
include/linux/fs.h | 2 ++
mm/memory.c | 2 ++
6 files changed, 20 insertions(+), 22 deletions(-)
diff --git a/fs/inode.c b/fs/inode.c
index 4d8e3be..8bbe005 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -256,6 +256,20 @@ void destroy_inode(struct inode *inode)
kmem_cache_free(inode_cachep, (inode));
}
+void address_space_init_once(struct address_space *mapping)
+{
+ memset(mapping, 0, sizeof(*mapping));
+ INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
+ spin_lock_init(&mapping->tree_lock);
+ spin_lock_init(&mapping->i_mmap_lock);
+ INIT_LIST_HEAD(&mapping->private_list);
+ spin_lock_init(&mapping->private_lock);
+ INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
+ INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
+ mutex_init(&mapping->unmap_mutex);
+}
+EXPORT_SYMBOL(address_space_init_once);
+
/*
* These are initializations that only need to be done
* once, because the fields are idempotent across use
@@ -267,13 +281,7 @@ void inode_init_once(struct inode *inode)
INIT_HLIST_NODE(&inode->i_hash);
INIT_LIST_HEAD(&inode->i_dentry);
INIT_LIST_HEAD(&inode->i_devices);
- INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
- spin_lock_init(&inode->i_data.tree_lock);
- spin_lock_init(&inode->i_data.i_mmap_lock);
- INIT_LIST_HEAD(&inode->i_data.private_list);
- spin_lock_init(&inode->i_data.private_lock);
- INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
- INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
+ address_space_init_once(&inode->i_data);
i_size_ordered_init(inode);
#ifdef CONFIG_INOTIFY
INIT_LIST_HEAD(&inode->inotify_watches);
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 84c2538..8dff317 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -34,19 +34,6 @@
#include "btnode.h"
-void nilfs_btnode_cache_init_once(struct address_space *btnc)
-{
- memset(btnc, 0, sizeof(*btnc));
- INIT_RADIX_TREE(&btnc->page_tree, GFP_ATOMIC);
- spin_lock_init(&btnc->tree_lock);
- INIT_LIST_HEAD(&btnc->private_list);
- spin_lock_init(&btnc->private_lock);
-
- spin_lock_init(&btnc->i_mmap_lock);
- INIT_RAW_PRIO_TREE_ROOT(&btnc->i_mmap);
- INIT_LIST_HEAD(&btnc->i_mmap_nonlinear);
-}
-
static const struct address_space_operations def_btnode_aops = {
.sync_page = block_sync_page,
};
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 3e22751..067913e 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
struct buffer_head *newbh;
};
-void nilfs_btnode_cache_init_once(struct address_space *);
void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
void nilfs_btnode_cache_clear(struct address_space *);
int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t,
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 63e7b10..93c11af 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -166,7 +166,7 @@ static void init_once(void *obj)
#ifdef CONFIG_NILFS_XATTR
init_rwsem(&ii->xattr_sem);
#endif
- nilfs_btnode_cache_init_once(&ii->i_btnode_cache);
+ address_space_init_once(&ii->i_btnode_cache);
ii->i_bmap = (struct nilfs_bmap *)&ii->i_bmap_union;
inode_init_once(&ii->vfs_inode);
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1ff0962..1b9a47a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -635,6 +635,7 @@ struct address_space {
spinlock_t private_lock; /* for use by the address_space */
struct list_head private_list; /* ditto */
struct address_space *assoc_mapping; /* ditto */
+ struct mutex unmap_mutex; /* to protect unmapping */
} __attribute__((aligned(sizeof(long))));
/*
* On most architectures that alignment is already the case; but
@@ -2158,6 +2159,7 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
extern int inode_init_always(struct super_block *, struct inode *);
extern void inode_init_once(struct inode *);
+extern void address_space_init_once(struct address_space *mapping);
extern void inode_add_to_lists(struct super_block *, struct inode *);
extern void iput(struct inode *);
extern struct inode * igrab(struct inode *);
diff --git a/mm/memory.c b/mm/memory.c
index 53c1da0..6c836d3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2454,6 +2454,7 @@ void unmap_mapping_range(struct address_space *mapping,
details.last_index = ULONG_MAX;
details.i_mmap_lock = &mapping->i_mmap_lock;
+ mutex_lock(&mapping->unmap_mutex);
spin_lock(&mapping->i_mmap_lock);
/* Protect against endless unmapping loops */
@@ -2470,6 +2471,7 @@ void unmap_mapping_range(struct address_space *mapping,
if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->unmap_mutex);
}
EXPORT_SYMBOL(unmap_mapping_range);
--
1.7.5.2
^ permalink raw reply related [flat|nested] 4+ messages in thread* Re: [stable] [PATCH] mm: prevent concurrent unmap_mapping_range() on the same inode --to stable
2011-06-24 7:02 [PATCH] mm: prevent concurrent unmap_mapping_range() on the same inode --to stable Wengang Wang
@ 2011-07-07 23:55 ` Greg KH
2011-07-08 1:26 ` Wengang Wang
0 siblings, 1 reply; 4+ messages in thread
From: Greg KH @ 2011-07-07 23:55 UTC (permalink / raw)
To: Wengang Wang; +Cc: stable, greg.marsden, joe.jin, linux-kernel
On Fri, Jun 24, 2011 at 03:02:08PM +0800, Wengang Wang wrote:
> mainline commit 2aa15890f3c191326678f1bd68af61ec6b8753ec
>
> mm: prevent concurrent unmap_mapping_range() on the same inode
>
> Michael Leun reported that running parallel opens on a fuse filesystem
> can trigger a "kernel BUG at mm/truncate.c:475"
>
> Gurudas Pai reported the same bug on NFS.
>
> The reason is, unmap_mapping_range() is not prepared for more than
> one concurrent invocation per inode. For example:
>
> thread1: going through a big range, stops in the middle of a vma and
> stores the restart address in vm_truncate_count.
>
> thread2: comes in with a small (e.g. single page) unmap request on
> the same vma, somewhere before restart_address, finds that the
> vma was already unmapped up to the restart address and happily
> returns without doing anything.
>
> Another scenario would be two big unmap requests, both having to
> restart the unmapping and each one setting vm_truncate_count to its
> own value. This could go on forever without any of them being able to
> finish.
>
> Truncate and hole punching already serialize with i_mutex. Other
> callers of unmap_mapping_range() do not, and it's difficult to get
> i_mutex protection for all callers. In particular ->d_revalidate(),
> which calls invalidate_inode_pages2_range() in fuse, may be called
> with or without i_mutex.
>
> This patch adds a new mutex to 'struct address_space' to prevent
> running multiple concurrent unmap_mapping_range() on the same mapping.
>
> [ We'll hopefully get rid of all this with the upcoming mm
> preemptibility series by Peter Zijlstra, the "mm: Remove i_mmap_mutex
> lockbreak" patch in particular. But that is for 2.6.39 ]
>
>
> Adding this patch causes Kabi breakage.
>
> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
> Reported-by: Michael Leun <lkml20101129@newton.leun.net>
> Reported-by: Gurudas Pai <gurudas.pai@oracle.com>
> Tested-by: Gurudas Pai <gurudas.pai@oracle.com>
> Acked-by: Hugh Dickins <hughd@google.com>
> Cc: stable@kernel.org
> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
As this patch showed up in 2.6.39, I'm confused as to what you wanted me
to do with it, so I've dropped it from my queue.
greg k-h
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [stable] [PATCH] mm: prevent concurrent unmap_mapping_range() on the same inode --to stable
2011-07-07 23:55 ` [stable] " Greg KH
@ 2011-07-08 1:26 ` Wengang Wang
2011-07-08 1:41 ` Greg KH
0 siblings, 1 reply; 4+ messages in thread
From: Wengang Wang @ 2011-07-08 1:26 UTC (permalink / raw)
To: Greg KH; +Cc: Wengang Wang, stable, greg.marsden, joe.jin, linux-kernel
Hi, greg k-h
On 11-07-07 16:55, Greg KH wrote:
> On Fri, Jun 24, 2011 at 03:02:08PM +0800, Wengang Wang wrote:
> > mainline commit 2aa15890f3c191326678f1bd68af61ec6b8753ec
> >
> > mm: prevent concurrent unmap_mapping_range() on the same inode
> >
> > Michael Leun reported that running parallel opens on a fuse filesystem
> > can trigger a "kernel BUG at mm/truncate.c:475"
> >
> > Gurudas Pai reported the same bug on NFS.
> >
> > The reason is, unmap_mapping_range() is not prepared for more than
> > one concurrent invocation per inode. For example:
> >
> > thread1: going through a big range, stops in the middle of a vma and
> > stores the restart address in vm_truncate_count.
> >
> > thread2: comes in with a small (e.g. single page) unmap request on
> > the same vma, somewhere before restart_address, finds that the
> > vma was already unmapped up to the restart address and happily
> > returns without doing anything.
> >
> > Another scenario would be two big unmap requests, both having to
> > restart the unmapping and each one setting vm_truncate_count to its
> > own value. This could go on forever without any of them being able to
> > finish.
> >
> > Truncate and hole punching already serialize with i_mutex. Other
> > callers of unmap_mapping_range() do not, and it's difficult to get
> > i_mutex protection for all callers. In particular ->d_revalidate(),
> > which calls invalidate_inode_pages2_range() in fuse, may be called
> > with or without i_mutex.
> >
> > This patch adds a new mutex to 'struct address_space' to prevent
> > running multiple concurrent unmap_mapping_range() on the same mapping.
> >
> > [ We'll hopefully get rid of all this with the upcoming mm
> > preemptibility series by Peter Zijlstra, the "mm: Remove i_mmap_mutex
> > lockbreak" patch in particular. But that is for 2.6.39 ]
> >
> >
> > Adding this patch causes Kabi breakage.
> >
> > Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
> > Reported-by: Michael Leun <lkml20101129@newton.leun.net>
> > Reported-by: Gurudas Pai <gurudas.pai@oracle.com>
> > Tested-by: Gurudas Pai <gurudas.pai@oracle.com>
> > Acked-by: Hugh Dickins <hughd@google.com>
> > Cc: stable@kernel.org
> > Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
> > Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
>
> As this patch showed up in 2.6.39, I'm confused as to what you wanted me
> to do with it, so I've dropped it from my queue.
I hope this committed in 2.6.32 stable tree please.
regards,
wengang.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [stable] [PATCH] mm: prevent concurrent unmap_mapping_range() on the same inode --to stable
2011-07-08 1:26 ` Wengang Wang
@ 2011-07-08 1:41 ` Greg KH
0 siblings, 0 replies; 4+ messages in thread
From: Greg KH @ 2011-07-08 1:41 UTC (permalink / raw)
To: Wengang Wang; +Cc: stable, greg.marsden, joe.jin, linux-kernel
On Fri, Jul 08, 2011 at 09:26:49AM +0800, Wengang Wang wrote:
> Hi, greg k-h
>
> On 11-07-07 16:55, Greg KH wrote:
> > On Fri, Jun 24, 2011 at 03:02:08PM +0800, Wengang Wang wrote:
> > > mainline commit 2aa15890f3c191326678f1bd68af61ec6b8753ec
> > >
> > > mm: prevent concurrent unmap_mapping_range() on the same inode
> > >
> > > Michael Leun reported that running parallel opens on a fuse filesystem
> > > can trigger a "kernel BUG at mm/truncate.c:475"
> > >
> > > Gurudas Pai reported the same bug on NFS.
> > >
> > > The reason is, unmap_mapping_range() is not prepared for more than
> > > one concurrent invocation per inode. For example:
> > >
> > > thread1: going through a big range, stops in the middle of a vma and
> > > stores the restart address in vm_truncate_count.
> > >
> > > thread2: comes in with a small (e.g. single page) unmap request on
> > > the same vma, somewhere before restart_address, finds that the
> > > vma was already unmapped up to the restart address and happily
> > > returns without doing anything.
> > >
> > > Another scenario would be two big unmap requests, both having to
> > > restart the unmapping and each one setting vm_truncate_count to its
> > > own value. This could go on forever without any of them being able to
> > > finish.
> > >
> > > Truncate and hole punching already serialize with i_mutex. Other
> > > callers of unmap_mapping_range() do not, and it's difficult to get
> > > i_mutex protection for all callers. In particular ->d_revalidate(),
> > > which calls invalidate_inode_pages2_range() in fuse, may be called
> > > with or without i_mutex.
> > >
> > > This patch adds a new mutex to 'struct address_space' to prevent
> > > running multiple concurrent unmap_mapping_range() on the same mapping.
> > >
> > > [ We'll hopefully get rid of all this with the upcoming mm
> > > preemptibility series by Peter Zijlstra, the "mm: Remove i_mmap_mutex
> > > lockbreak" patch in particular. But that is for 2.6.39 ]
> > >
> > >
> > > Adding this patch causes Kabi breakage.
> > >
> > > Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
> > > Reported-by: Michael Leun <lkml20101129@newton.leun.net>
> > > Reported-by: Gurudas Pai <gurudas.pai@oracle.com>
> > > Tested-by: Gurudas Pai <gurudas.pai@oracle.com>
> > > Acked-by: Hugh Dickins <hughd@google.com>
> > > Cc: stable@kernel.org
> > > Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
> > > Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
> >
> > As this patch showed up in 2.6.39, I'm confused as to what you wanted me
> > to do with it, so I've dropped it from my queue.
>
> I hope this committed in 2.6.32 stable tree please.
Ah, ok, care to resend this, with that information in it somewhere, so I
can do that?
thanks,
greg "I get a _lot_ of email" k-h
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2011-07-08 1:52 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-06-24 7:02 [PATCH] mm: prevent concurrent unmap_mapping_range() on the same inode --to stable Wengang Wang
2011-07-07 23:55 ` [stable] " Greg KH
2011-07-08 1:26 ` Wengang Wang
2011-07-08 1:41 ` Greg KH
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox