* [PATCH 1/3] inotify: seperate new watch creation updating existing watches
2009-08-24 17:37 [PATCH 0/3] Inotify cleanup and idr race Eric Paris
@ 2009-08-24 17:38 ` Eric Paris
2009-08-24 17:38 ` [PATCH 2/3] inotify: do not BUG on idr entries at inotify destruction Eric Paris
2009-08-24 17:38 ` [PATCH 3/3] inotify: fix locking around inotify watching in the idr Eric Paris
2 siblings, 0 replies; 13+ messages in thread
From: Eric Paris @ 2009-08-24 17:38 UTC (permalink / raw)
To: linux-kernel, linux-fs-devel
Cc: zdenek.kabelac, torvalds, christoph.thielecke, akpm, viro,
grant.wilson, mikko.cal
There is nothing known wrong with the inotify watch addition/modification
but this patch seperates the two code paths to make them each easy to
verify as correct.
Signed-off-by: Eric Paris <eparis@redhat.com>
---
fs/notify/inotify/inotify_user.c | 172 +++++++++++++++++++++++---------------
1 files changed, 103 insertions(+), 69 deletions(-)
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index dc32ed8..d8f73c2 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -431,80 +431,29 @@ static void inotify_free_mark(struct fsnotify_mark_entry *entry)
kmem_cache_free(inotify_inode_mark_cachep, ientry);
}
-static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg)
+static int inotify_update_existing_watch(struct fsnotify_group *group,
+ struct inode *inode,
+ u32 arg)
{
- struct fsnotify_mark_entry *entry = NULL;
+ struct fsnotify_mark_entry *entry;
struct inotify_inode_mark_entry *ientry;
- struct inotify_inode_mark_entry *tmp_ientry;
- int ret = 0;
- int add = (arg & IN_MASK_ADD);
- __u32 mask;
__u32 old_mask, new_mask;
+ __u32 mask;
+ int add = (arg & IN_MASK_ADD);
+ int ret;
/* don't allow invalid bits: we don't want flags set */
mask = inotify_arg_to_mask(arg);
if (unlikely(!mask))
return -EINVAL;
- tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
- if (unlikely(!tmp_ientry))
- return -ENOMEM;
- /* we set the mask at the end after attaching it */
- fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark);
- tmp_ientry->wd = -1;
-
-find_entry:
spin_lock(&inode->i_lock);
entry = fsnotify_find_mark_entry(group, inode);
spin_unlock(&inode->i_lock);
- if (entry) {
- ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
- } else {
- ret = -ENOSPC;
- if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
- goto out_err;
-retry:
- ret = -ENOMEM;
- if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
- goto out_err;
-
- spin_lock(&group->inotify_data.idr_lock);
- ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
- group->inotify_data.last_wd,
- &tmp_ientry->wd);
- spin_unlock(&group->inotify_data.idr_lock);
- if (ret) {
- if (ret == -EAGAIN)
- goto retry;
- goto out_err;
- }
+ if (!entry)
+ return -ENOENT;
- ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode);
- if (ret) {
- inotify_remove_from_idr(group, tmp_ientry);
- if (ret == -EEXIST)
- goto find_entry;
- goto out_err;
- }
-
- /* tmp_ientry has been added to the inode, so we are all set up.
- * now we just need to make sure tmp_ientry doesn't get freed and
- * we need to set up entry and ientry so the generic code can
- * do its thing. */
- ientry = tmp_ientry;
- entry = &ientry->fsn_entry;
- tmp_ientry = NULL;
-
- atomic_inc(&group->inotify_data.user->inotify_watches);
-
- /* update the idr hint */
- group->inotify_data.last_wd = ientry->wd;
-
- /* we put the mark on the idr, take a reference */
- fsnotify_get_mark(entry);
- }
-
- ret = ientry->wd;
+ ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
spin_lock(&entry->lock);
@@ -536,18 +485,103 @@ retry:
fsnotify_recalc_group_mask(group);
}
- /* this either matches fsnotify_find_mark_entry, or init_mark_entry
- * depending on which path we took... */
+ /* return the wd */
+ ret = ientry->wd;
+
+ /* match the get from fsnotify_find_mark_entry() */
fsnotify_put_mark(entry);
+ return ret;
+}
+
+static int inotify_new_watch(struct fsnotify_group *group,
+ struct inode *inode,
+ u32 arg)
+{
+ struct inotify_inode_mark_entry *tmp_ientry;
+ __u32 mask;
+ int ret;
+
+ /* don't allow invalid bits: we don't want flags set */
+ mask = inotify_arg_to_mask(arg);
+ if (unlikely(!mask))
+ return -EINVAL;
+
+ tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
+ if (unlikely(!tmp_ientry))
+ return -ENOMEM;
+
+ fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark);
+ tmp_ientry->fsn_entry.mask = mask;
+ tmp_ientry->wd = -1;
+
+ ret = -ENOSPC;
+ if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
+ goto out_err;
+retry:
+ ret = -ENOMEM;
+ if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
+ goto out_err;
+
+ spin_lock(&group->inotify_data.idr_lock);
+ ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
+ group->inotify_data.last_wd,
+ &tmp_ientry->wd);
+ spin_unlock(&group->inotify_data.idr_lock);
+ if (ret) {
+ /* idr was out of memory allocate and try again */
+ if (ret == -EAGAIN)
+ goto retry;
+ goto out_err;
+ }
+
+ /* we are on the idr, now get on the inode */
+ ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode);
+ if (ret) {
+ /* we failed to get on the inode, get off the idr */
+ inotify_remove_from_idr(group, tmp_ientry);
+ goto out_err;
+ }
+
+ /* we put the mark on the idr, take a reference */
+ fsnotify_get_mark(&tmp_ientry->fsn_entry);
+
+ /* update the idr hint, who cares about races, it's just a hint */
+ group->inotify_data.last_wd = tmp_ientry->wd;
+
+ /* increment the number of watches the user has */
+ atomic_inc(&group->inotify_data.user->inotify_watches);
+
+ /* return the watch descriptor for this new entry */
+ ret = tmp_ientry->wd;
+
+ /* match the ref from fsnotify_init_markentry() */
+ fsnotify_put_mark(&tmp_ientry->fsn_entry);
+
out_err:
- /* could be an error, could be that we found an existing mark */
- if (tmp_ientry) {
- /* on the idr but didn't make it on the inode */
- if (tmp_ientry->wd != -1)
- inotify_remove_from_idr(group, tmp_ientry);
+ if (ret < 0)
kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry);
- }
+
+ return ret;
+}
+
+static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg)
+{
+ int ret = 0;
+
+retry:
+ /* try to update and existing watch with the new arg */
+ ret = inotify_update_existing_watch(group, inode, arg);
+ /* no mark present, try to add a new one */
+ if (ret == -ENOENT)
+ ret = inotify_new_watch(group, inode, arg);
+ /*
+ * inotify_new_watch could race with another thread which did an
+ * inotify_new_watch between the update_existing and the add watch
+ * here, go back and try to update an existing mark again.
+ */
+ if (ret == -EEXIST)
+ goto retry;
return ret;
}
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH 2/3] inotify: do not BUG on idr entries at inotify destruction
2009-08-24 17:37 [PATCH 0/3] Inotify cleanup and idr race Eric Paris
2009-08-24 17:38 ` [PATCH 1/3] inotify: seperate new watch creation updating existing watches Eric Paris
@ 2009-08-24 17:38 ` Eric Paris
2009-08-24 18:34 ` Frans Pop
2009-08-24 17:38 ` [PATCH 3/3] inotify: fix locking around inotify watching in the idr Eric Paris
2 siblings, 1 reply; 13+ messages in thread
From: Eric Paris @ 2009-08-24 17:38 UTC (permalink / raw)
To: linux-kernel, linux-fs-devel
Cc: zdenek.kabelac, torvalds, christoph.thielecke, akpm, viro,
grant.wilson, mikko.cal
If an inotify watch is left in the idr when an fsnotify group is destroyed
this will lead to a BUG. This is not a dangerous situation and really
indicates a programming bug and leak of memory. This patch changes it to
use a WARN and a printk rather than killing people's boxes.
Signed-off-by: Eric Paris <eparis@redhat.com>
---
fs/notify/inotify/inotify_fsnotify.c | 10 ++++++++++
1 files changed, 10 insertions(+), 0 deletions(-)
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 5dcbafe..cf003fc 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -107,6 +107,16 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
static int idr_callback(int id, void *p, void *data)
{
+ struct fsnotify_mark_entry *entry;
+ struct inotify_inode_mark_entry *ientry;
+
+ entry = p;
+ ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
+
+ WARN(1, "inotify closing but id=%d still in idr. Probably leaking memory\n", id);
+
+ printk(KERN_WARNING "group=%p entry->group=%p inode=%p wd=%d\n",
+ data, entry->group, entry->inode, ientry->wd);
BUG();
return 0;
}
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH 2/3] inotify: do not BUG on idr entries at inotify destruction
2009-08-24 17:38 ` [PATCH 2/3] inotify: do not BUG on idr entries at inotify destruction Eric Paris
@ 2009-08-24 18:34 ` Frans Pop
2009-08-24 19:09 ` Eric Paris
0 siblings, 1 reply; 13+ messages in thread
From: Frans Pop @ 2009-08-24 18:34 UTC (permalink / raw)
To: Eric Paris
Cc: linux-kernel, linux-fs-devel, zdenek.kabelac, torvalds,
christoph.thielecke, akpm, viro, grant.wilson, mikko.cal
Eric Paris wrote:
> If an inotify watch is left in the idr when an fsnotify group is destroyed
> this will lead to a BUG. This is not a dangerous situation and really
> indicates a programming bug and leak of memory. This patch changes it to
> use a WARN and a printk rather than killing people's boxes.
>
> Signed-off-by: Eric Paris <eparis@redhat.com>
> ---
>
> --- a/fs/notify/inotify/inotify_fsnotify.c
> +++ b/fs/notify/inotify/inotify_fsnotify.c
> @@ -107,6 +107,16 @@ static bool inotify_should_send_event(struct
> fsnotify_group *group, struct inode
>
> static int idr_callback(int id, void *p, void *data)
> {
> + struct fsnotify_mark_entry *entry;
> + struct inotify_inode_mark_entry *ientry;
> +
> + entry = p;
> + ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
> +
> + WARN(1, "inotify closing but id=%d still in idr. Probably leaking memory\n", id); +
> + printk(KERN_WARNING "group=%p entry->group=%p inode=%p wd=%d\n",
> + data, entry->group, entry->inode, ientry->wd);
> BUG();
> return 0;
> }
I suspect you intended to remove the BUG?
Cheers,
FJP
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH 2/3] inotify: do not BUG on idr entries at inotify destruction
2009-08-24 18:34 ` Frans Pop
@ 2009-08-24 19:09 ` Eric Paris
2009-08-24 19:23 ` Linus Torvalds
0 siblings, 1 reply; 13+ messages in thread
From: Eric Paris @ 2009-08-24 19:09 UTC (permalink / raw)
To: Frans Pop
Cc: linux-kernel, linux-fs-devel, zdenek.kabelac, torvalds,
christoph.thielecke, akpm, viro, grant.wilson, mikko.cal
On Mon, 2009-08-24 at 20:34 +0200, Frans Pop wrote:
> Eric Paris wrote:
>
> > If an inotify watch is left in the idr when an fsnotify group is destroyed
> > this will lead to a BUG. This is not a dangerous situation and really
> > indicates a programming bug and leak of memory. This patch changes it to
> > use a WARN and a printk rather than killing people's boxes.
> >
> > Signed-off-by: Eric Paris <eparis@redhat.com>
> > ---
> >
> > --- a/fs/notify/inotify/inotify_fsnotify.c
> > +++ b/fs/notify/inotify/inotify_fsnotify.c
> > @@ -107,6 +107,16 @@ static bool inotify_should_send_event(struct
> > fsnotify_group *group, struct inode
> >
> > static int idr_callback(int id, void *p, void *data)
> > {
> > + struct fsnotify_mark_entry *entry;
> > + struct inotify_inode_mark_entry *ientry;
> > +
> > + entry = p;
> > + ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
> > +
> > + WARN(1, "inotify closing but id=%d still in idr. Probably leaking memory\n", id); +
> > + printk(KERN_WARNING "group=%p entry->group=%p inode=%p wd=%d\n",
> > + data, entry->group, entry->inode, ientry->wd);
> > BUG();
> > return 0;
> > }
>
> I suspect you intended to remove the BUG?
You suspect correctly. :( I'll fix in my tree before I request a
pull....
-Eric
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH 2/3] inotify: do not BUG on idr entries at inotify destruction
2009-08-24 19:09 ` Eric Paris
@ 2009-08-24 19:23 ` Linus Torvalds
2009-08-24 19:32 ` Eric Paris
2009-08-24 20:36 ` Eric Paris
0 siblings, 2 replies; 13+ messages in thread
From: Linus Torvalds @ 2009-08-24 19:23 UTC (permalink / raw)
To: Eric Paris
Cc: Frans Pop, linux-kernel, linux-fs-devel, zdenek.kabelac,
christoph.thielecke, akpm, viro, grant.wilson, mikko.cal
On Mon, 24 Aug 2009, Eric Paris wrote:
>
> You suspect correctly. :( I'll fix in my tree before I request a
> pull....
Can you change the WARN() to a WARN_ONCE() too.
Generally, we prefer the "ONCE" variant if there isn't some intrisic
reason why we'd want to know about each event. If there is some load that
triggers this, do you care about _how_many_times_ it happens, or about the
fact that it happens in the first place? In this case, I think it's the
"happens in the first place" case - once you see it a single time, the
rest of the reports aren't that interesting.
And using the ONCE thing means that we know it's not going to spam the
logs.
Linus
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/3] inotify: do not BUG on idr entries at inotify destruction
2009-08-24 19:23 ` Linus Torvalds
@ 2009-08-24 19:32 ` Eric Paris
2009-08-24 20:36 ` Eric Paris
1 sibling, 0 replies; 13+ messages in thread
From: Eric Paris @ 2009-08-24 19:32 UTC (permalink / raw)
To: Linus Torvalds
Cc: Frans Pop, linux-kernel, zdenek.kabelac, christoph.thielecke,
akpm, viro, grant.wilson, mikko.cal
On Mon, 2009-08-24 at 12:23 -0700, Linus Torvalds wrote:
>
> On Mon, 24 Aug 2009, Eric Paris wrote:
> >
> > You suspect correctly. :( I'll fix in my tree before I request a
> > pull....
>
> Can you change the WARN() to a WARN_ONCE() too.
>
> Generally, we prefer the "ONCE" variant if there isn't some intrisic
> reason why we'd want to know about each event. If there is some load that
> triggers this, do you care about _how_many_times_ it happens, or about the
> fact that it happens in the first place? In this case, I think it's the
> "happens in the first place" case - once you see it a single time, the
> rest of the reports aren't that interesting.
>
> And using the ONCE thing means that we know it's not going to spam the
> logs.
Fixed.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/3] inotify: do not BUG on idr entries at inotify destruction
2009-08-24 19:23 ` Linus Torvalds
2009-08-24 19:32 ` Eric Paris
@ 2009-08-24 20:36 ` Eric Paris
2009-08-25 11:42 ` Mikko C.
1 sibling, 1 reply; 13+ messages in thread
From: Eric Paris @ 2009-08-24 20:36 UTC (permalink / raw)
To: Linus Torvalds
Cc: Frans Pop, linux-kernel, zdenek.kabelac, christoph.thielecke,
akpm, viro, grant.wilson, mikko.cal
On Mon, 2009-08-24 at 12:23 -0700, Linus Torvalds wrote:
>
> On Mon, 24 Aug 2009, Eric Paris wrote:
> >
> > You suspect correctly. :( I'll fix in my tree before I request a
> > pull....
>
> Can you change the WARN() to a WARN_ONCE() too.
>
> Generally, we prefer the "ONCE" variant if there isn't some intrisic
> reason why we'd want to know about each event. If there is some load that
> triggers this, do you care about _how_many_times_ it happens, or about the
> fact that it happens in the first place? In this case, I think it's the
> "happens in the first place" case - once you see it a single time, the
> rest of the reports aren't that interesting.
>
> And using the ONCE thing means that we know it's not going to spam the
> logs.
If everyone having {fs,i}notify problems want to check out my latest
for-linus tree that would be great. It has his latest tree plus fixed
'versions' of the 3 patches I sent earlier.
http://git.infradead.org/users/eparis/notify.git/shortlog/refs/heads/for-linus
-Eric
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH 2/3] inotify: do not BUG on idr entries at inotify destruction
2009-08-24 20:36 ` Eric Paris
@ 2009-08-25 11:42 ` Mikko C.
2009-08-25 14:18 ` Eric Paris
2009-08-25 16:43 ` Linus Torvalds
0 siblings, 2 replies; 13+ messages in thread
From: Mikko C. @ 2009-08-25 11:42 UTC (permalink / raw)
To: Eric Paris
Cc: Linus Torvalds, Frans Pop, linux-kernel, zdenek.kabelac,
christoph.thielecke, akpm, viro, grant.wilson
>> On Mon, 24 Aug 2009, Eric Paris wrote:
>>>
>>> You suspect correctly. :( I'll fix in my tree before I request a
>>> pull....
I just got this with -rc7, but it doesn't look related to what I was
having before:
BUG: Bad page map in process kio_thumbnail pte:ffff88006cc99128
pmd:6d3b1067
addr:00007f9d4e3a5000 vm_flags:08000070 anon_vma:(null)
mapping:ffff88007abe21a0 index:200
vma->vm_ops->fault: filemap_fault+0x0/0x460
vma->vm_file->f_op->mmap: ext4_file_mmap+0x0/0x80
Pid: 28022, comm: kio_thumbnail Not tainted 2.6.31-rc7 #1
Call Trace:
[<ffffffff810afaf4>] ? print_bad_pte+0x1d4/0x2c0
[<ffffffff810afc79>] ? vm_normal_page+0x99/0xa0
[<ffffffff810b0c7d>] ? unmap_vmas+0x4cd/0x970
[<ffffffff810b6c74>] ? exit_mmap+0x104/0x1d0
[<ffffffff81043e0d>] ? mmput+0x4d/0x100
[<ffffffff81048d81>] ? exit_mm+0x101/0x150
[<ffffffff8104b240>] ? do_exit+0x6c0/0x750
[<ffffffff8104b326>] ? do_group_exit+0x56/0xd0
[<ffffffff8104b3c2>] ? sys_exit_group+0x22/0x40
[<ffffffff8100b7eb>] ? system_call_fastpath+0x16/0x1b
Disabling lock debugging due to kernel taint
No lockups or anything.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/3] inotify: do not BUG on idr entries at inotify destruction
2009-08-25 11:42 ` Mikko C.
@ 2009-08-25 14:18 ` Eric Paris
2009-08-25 16:43 ` Linus Torvalds
1 sibling, 0 replies; 13+ messages in thread
From: Eric Paris @ 2009-08-25 14:18 UTC (permalink / raw)
To: Mikko C.
Cc: Linus Torvalds, Frans Pop, linux-kernel, zdenek.kabelac,
christoph.thielecke, akpm, viro, grant.wilson
On Tue, 2009-08-25 at 13:42 +0200, Mikko C. wrote:
> >> On Mon, 24 Aug 2009, Eric Paris wrote:
> >>>
> >>> You suspect correctly. :( I'll fix in my tree before I request a
> >>> pull....
>
> I just got this with -rc7, but it doesn't look related to what I was
> having before:
>
> BUG: Bad page map in process kio_thumbnail pte:ffff88006cc99128
> pmd:6d3b1067
> addr:00007f9d4e3a5000 vm_flags:08000070 anon_vma:(null)
> mapping:ffff88007abe21a0 index:200
> vma->vm_ops->fault: filemap_fault+0x0/0x460
> vma->vm_file->f_op->mmap: ext4_file_mmap+0x0/0x80
> Pid: 28022, comm: kio_thumbnail Not tainted 2.6.31-rc7 #1
> Call Trace:
> [<ffffffff810afaf4>] ? print_bad_pte+0x1d4/0x2c0
> [<ffffffff810afc79>] ? vm_normal_page+0x99/0xa0
> [<ffffffff810b0c7d>] ? unmap_vmas+0x4cd/0x970
> [<ffffffff810b6c74>] ? exit_mmap+0x104/0x1d0
> [<ffffffff81043e0d>] ? mmput+0x4d/0x100
> [<ffffffff81048d81>] ? exit_mm+0x101/0x150
> [<ffffffff8104b240>] ? do_exit+0x6c0/0x750
> [<ffffffff8104b326>] ? do_group_exit+0x56/0xd0
> [<ffffffff8104b3c2>] ? sys_exit_group+0x22/0x40
> [<ffffffff8100b7eb>] ? system_call_fastpath+0x16/0x1b
> Disabling lock debugging due to kernel taint
>
> No lockups or anything.
I can safely say that I have absolutely no idea, but I'm doubting it is
mine.
Does everyone who hit the: kernel BUG at fs/notify/notification.c:93!
have CONFIG_DEBUG_VM=N ? Mikko indicated that his problems seem to
crop up with CONFIG_DEBUG_VM=N but not when he enabled it. I'll be
testing again today with it disabled.....
-Eric
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/3] inotify: do not BUG on idr entries at inotify destruction
2009-08-25 11:42 ` Mikko C.
2009-08-25 14:18 ` Eric Paris
@ 2009-08-25 16:43 ` Linus Torvalds
2009-08-25 17:17 ` Mikko C.
1 sibling, 1 reply; 13+ messages in thread
From: Linus Torvalds @ 2009-08-25 16:43 UTC (permalink / raw)
To: Mikko C.
Cc: Eric Paris, Frans Pop, linux-kernel, zdenek.kabelac,
christoph.thielecke, akpm, viro, grant.wilson
On Tue, 25 Aug 2009, Mikko C. wrote:
>
> I just got this with -rc7, but it doesn't look related to what I was having
> before:
>
> BUG: Bad page map in process kio_thumbnail pte:ffff88006cc99128 pmd:6d3b1067
> addr:00007f9d4e3a5000 vm_flags:08000070 anon_vma:(null)
> mapping:ffff88007abe21a0 index:200
> vma->vm_ops->fault: filemap_fault+0x0/0x460
> vma->vm_file->f_op->mmap: ext4_file_mmap+0x0/0x80
> Pid: 28022, comm: kio_thumbnail Not tainted 2.6.31-rc7 #1
> Call Trace:
> [<ffffffff810afaf4>] ? print_bad_pte+0x1d4/0x2c0
> [<ffffffff810afc79>] ? vm_normal_page+0x99/0xa0
> [<ffffffff810b0c7d>] ? unmap_vmas+0x4cd/0x970
> [<ffffffff810b6c74>] ? exit_mmap+0x104/0x1d0
> [<ffffffff81043e0d>] ? mmput+0x4d/0x100
> [<ffffffff81048d81>] ? exit_mm+0x101/0x150
> [<ffffffff8104b240>] ? do_exit+0x6c0/0x750
> [<ffffffff8104b326>] ? do_group_exit+0x56/0xd0
> [<ffffffff8104b3c2>] ? sys_exit_group+0x22/0x40
> [<ffffffff8100b7eb>] ? system_call_fastpath+0x16/0x1b
> Disabling lock debugging due to kernel taint
>
> No lockups or anything.
That looks like a memory corruption bug. Your page table entry is bad:
pte:ffff88006cc99128. It has the "special" bit set (one of the software
bits), in a mapping that should not have special pages.
But that pte entry is odd in other ways too - it's _PAGE_PROTNONE, which
is unusual (but not necessarily _wrong_) and _PAGE_BIT_ACCESSED. And it
has the high bits set, which is really not ok for a page table entry. The
PTE entry should look more like the pmd entry.
So it looks like the pte has been overwritten by some bogus value,
presumably by a stale pointer. And it migth be related to your inotify
problems that way.
Linus
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/3] inotify: do not BUG on idr entries at inotify destruction
2009-08-25 16:43 ` Linus Torvalds
@ 2009-08-25 17:17 ` Mikko C.
0 siblings, 0 replies; 13+ messages in thread
From: Mikko C. @ 2009-08-25 17:17 UTC (permalink / raw)
To: Linus Torvalds
Cc: Eric Paris, Frans Pop, linux-kernel, zdenek.kabelac,
christoph.thielecke, akpm, viro, grant.wilson
on 25/08/2009 18:43 Linus Torvalds wrote:
>
> On Tue, 25 Aug 2009, Mikko C. wrote:
>>
>> I just got this with -rc7, but it doesn't look related to what I was having
>> before:
>>
>> BUG: Bad page map in process kio_thumbnail pte:ffff88006cc99128 pmd:6d3b1067
>> addr:00007f9d4e3a5000 vm_flags:08000070 anon_vma:(null)
>> mapping:ffff88007abe21a0 index:200
>> vma->vm_ops->fault: filemap_fault+0x0/0x460
>> vma->vm_file->f_op->mmap: ext4_file_mmap+0x0/0x80
>> Pid: 28022, comm: kio_thumbnail Not tainted 2.6.31-rc7 #1
>> Call Trace:
>> [<ffffffff810afaf4>] ? print_bad_pte+0x1d4/0x2c0
>> [<ffffffff810afc79>] ? vm_normal_page+0x99/0xa0
>> [<ffffffff810b0c7d>] ? unmap_vmas+0x4cd/0x970
>> [<ffffffff810b6c74>] ? exit_mmap+0x104/0x1d0
>> [<ffffffff81043e0d>] ? mmput+0x4d/0x100
>> [<ffffffff81048d81>] ? exit_mm+0x101/0x150
>> [<ffffffff8104b240>] ? do_exit+0x6c0/0x750
>> [<ffffffff8104b326>] ? do_group_exit+0x56/0xd0
>> [<ffffffff8104b3c2>] ? sys_exit_group+0x22/0x40
>> [<ffffffff8100b7eb>] ? system_call_fastpath+0x16/0x1b
>> Disabling lock debugging due to kernel taint
>>
>> No lockups or anything.
>
> That looks like a memory corruption bug. Your page table entry is bad:
> pte:ffff88006cc99128.
Now that you mention it, I remember having a couple segfaults with a
program I was testing earlier so I guess that could have screwed with
memory... I'd say it's safe to ignore it unless I get another one.
Meanwhile still no notify bug with -rc7 :)
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 3/3] inotify: fix locking around inotify watching in the idr
2009-08-24 17:37 [PATCH 0/3] Inotify cleanup and idr race Eric Paris
2009-08-24 17:38 ` [PATCH 1/3] inotify: seperate new watch creation updating existing watches Eric Paris
2009-08-24 17:38 ` [PATCH 2/3] inotify: do not BUG on idr entries at inotify destruction Eric Paris
@ 2009-08-24 17:38 ` Eric Paris
2 siblings, 0 replies; 13+ messages in thread
From: Eric Paris @ 2009-08-24 17:38 UTC (permalink / raw)
To: linux-kernel, linux-fs-devel
Cc: zdenek.kabelac, torvalds, christoph.thielecke, akpm, viro,
grant.wilson, mikko.cal
The are races around the idr storage of inotify watches. It's possible
that a watch could be found from sys_inotify_rm_watch() in the idr, but it
could be removed from the idr before that code does it's removal. Move the
locking and the refcnt'ing so that these have to happen atomically.
Signed-off-by: Eric Paris <eparis@redhat.com>
---
fs/notify/inotify/inotify_user.c | 51 +++++++++++++++++++++++++++++++-------
1 files changed, 41 insertions(+), 10 deletions(-)
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index d8f73c2..c0ed0aa 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -364,20 +364,54 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
return error;
}
+/*
+ * Remove the mark from the idr (if present) and drop the reference
+ * on the mark because it was in the idr.
+ */
static void inotify_remove_from_idr(struct fsnotify_group *group,
struct inotify_inode_mark_entry *ientry)
{
struct idr *idr;
+ struct fsnotify_mark_entry *entry;
+ struct inotify_inode_mark_entry *found_ientry;
+ int wd;
spin_lock(&group->inotify_data.idr_lock);
idr = &group->inotify_data.idr;
- idr_remove(idr, ientry->wd);
- spin_unlock(&group->inotify_data.idr_lock);
+ wd = ientry->wd;
+
+ if (wd == -1)
+ goto out;
+
+ entry = idr_find(&group->inotify_data.idr, wd);
+ if (unlikely(!entry))
+ goto out;
+
+ found_ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
+ if (unlikely(found_ientry != ientry)) {
+ /* We found an entry in the idr with the right wd, but it's
+ * not the entry we were told to remove. eparis seriously
+ * fucked up somewhere. */
+ WARN_ON(1);
+ ientry->wd = -1;
+ goto out;
+ }
+
+ /* There better be at least one ref for being in the idr and one
+ * reference from whatever called us. */
+ BUG_ON(atomic_read(&entry->refcnt) < 2);
+
+ idr_remove(idr, wd);
ientry->wd = -1;
+
+ /* removed from the idr, drop that ref */
+ fsnotify_put_mark(entry);
+out:
+ spin_unlock(&group->inotify_data.idr_lock);
}
+
/*
- * Send IN_IGNORED for this wd, remove this wd from the idr, and drop the
- * internal reference help on the mark because it is in the idr.
+ * Send IN_IGNORED for this wd, remove this wd from the idr.
*/
void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
struct fsnotify_group *group)
@@ -417,9 +451,6 @@ skip_send_ignore:
/* remove this entry from the idr */
inotify_remove_from_idr(group, ientry);
- /* removed from idr, drop that reference */
- fsnotify_put_mark(entry);
-
atomic_dec(&group->inotify_data.user->inotify_watches);
}
@@ -535,6 +566,9 @@ retry:
goto out_err;
}
+ /* we put the mark on the idr, take a reference */
+ fsnotify_get_mark(&tmp_ientry->fsn_entry);
+
/* we are on the idr, now get on the inode */
ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode);
if (ret) {
@@ -543,9 +577,6 @@ retry:
goto out_err;
}
- /* we put the mark on the idr, take a reference */
- fsnotify_get_mark(&tmp_ientry->fsn_entry);
-
/* update the idr hint, who cares about races, it's just a hint */
group->inotify_data.last_wd = tmp_ientry->wd;
^ permalink raw reply related [flat|nested] 13+ messages in thread