* [PATCH 1/4] vfs - check non-mountpoint dentry might block in __follow_mount_rcu()
2011-03-01 4:56 [PATCH 0/4] more on vfs-scale and vfs-automount Ian Kent
@ 2011-03-01 4:56 ` Ian Kent
2011-03-01 4:57 ` [PATCH 2/4] autofs4 - fix rootless multi-mount race Ian Kent
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Ian Kent @ 2011-03-01 4:56 UTC (permalink / raw)
To: Al Viro
Cc: Nick Piggin, David Howells, Kernel Mailing List, linux-fsdevel,
Linus Torvalds, Andrew Morton
When following a mount in rcu-walk mode we must check if the incoming dentry
is telling us it may need to block, even if it isn't actually a mountpoint.
Signed-off-by: Ian Kent <raven@themaw.net>
---
fs/namei.c | 24 ++++++++++++++++++++----
1 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/fs/namei.c b/fs/namei.c
index 0087cf9..a0f2179 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1022,6 +1022,14 @@ int follow_down_one(struct path *path)
return 0;
}
+static bool managed_dentry_might_block(struct dentry *dentry)
+{
+ if (dentry->d_flags & DCACHE_MANAGE_TRANSIT &&
+ dentry->d_op->d_manage(dentry, false, true) < 0)
+ return true;
+ return false;
+}
+
/*
* Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we
* meet a managed dentry and we're not walking to "..". True is returned to
@@ -1030,12 +1038,17 @@ int follow_down_one(struct path *path)
static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
struct inode **inode, bool reverse_transit)
{
+ /*
+ * Don't forgat we might have a non-mountpoint managed dentry
+ * that wants to block transit.
+ */
+ *inode = path->dentry->d_inode;
+ if (!reverse_transit &&
+ unlikely(managed_dentry_might_block(path->dentry)))
+ return false;
+
while (d_mountpoint(path->dentry)) {
struct vfsmount *mounted;
- if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) &&
- !reverse_transit &&
- path->dentry->d_op->d_manage(path->dentry, false, true) < 0)
- return false;
mounted = __lookup_mnt(path->mnt, path->dentry, 1);
if (!mounted)
break;
@@ -1043,6 +1056,9 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
path->dentry = mounted->mnt_root;
nd->seq = read_seqcount_begin(&path->dentry->d_seq);
*inode = path->dentry->d_inode;
+ if (!reverse_transit &&
+ unlikely(managed_dentry_might_block(path->dentry)))
+ return false;
}
if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/4] autofs4 - fix rootless multi-mount race
2011-03-01 4:56 [PATCH 0/4] more on vfs-scale and vfs-automount Ian Kent
2011-03-01 4:56 ` [PATCH 1/4] vfs - check non-mountpoint dentry might block in __follow_mount_rcu() Ian Kent
@ 2011-03-01 4:57 ` Ian Kent
2011-03-01 4:57 ` [PATCH 3/4] autofs4 - fix dentry leak in autofs4_expire_direct() Ian Kent
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Ian Kent @ 2011-03-01 4:57 UTC (permalink / raw)
To: Al Viro
Cc: Nick Piggin, David Howells, Kernel Mailing List, linux-fsdevel,
Linus Torvalds, Andrew Morton
When there is no mount at the base of a mount tree (rootless multi-mount)
there is a small amount of time when both DCACHE_MANAGE_TRANSIT and
DCACHE_NEED_AUTOMOUNT are clear. This occassionally alows a process
to pass instead of bock leading to an incorrect fail return. Keeping
the DCACHE_MANAGE_TRANSIT set all the time resolves the problem.
Signed-off-by: Ian Kent <raven@themaw.net>
---
fs/autofs4/expire.c | 13 ++-----------
fs/autofs4/root.c | 20 ++++----------------
2 files changed, 6 insertions(+), 27 deletions(-)
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index f43100b..c896dd6 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -294,7 +294,6 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
spin_unlock(&sbi->fs_lock);
return NULL;
}
- managed_dentry_set_transit(root);
if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
struct autofs_info *ino = autofs4_dentry_ino(root);
ino->flags |= AUTOFS_INF_EXPIRING;
@@ -302,7 +301,6 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
spin_unlock(&sbi->fs_lock);
return root;
}
- managed_dentry_clear_transit(root);
spin_unlock(&sbi->fs_lock);
dput(root);
@@ -341,8 +339,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
ino = autofs4_dentry_ino(dentry);
/* No point expiring a pending mount */
if (ino->flags & AUTOFS_INF_PENDING)
- goto cont;
- managed_dentry_set_transit(dentry);
+ goto next;
/*
* Case 1: (i) indirect mount or top level pseudo direct mount
@@ -402,8 +399,6 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
}
}
next:
- managed_dentry_clear_transit(dentry);
-cont:
spin_unlock(&sbi->fs_lock);
}
return NULL;
@@ -484,8 +479,6 @@ int autofs4_expire_run(struct super_block *sb,
spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(dentry);
ino->flags &= ~AUTOFS_INF_EXPIRING;
- if (!d_unhashed(dentry))
- managed_dentry_clear_transit(dentry);
complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
@@ -513,9 +506,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
spin_lock(&sbi->fs_lock);
ino->flags &= ~AUTOFS_INF_EXPIRING;
spin_lock(&dentry->d_lock);
- if (ret)
- __managed_dentry_clear_transit(dentry);
- else {
+ if (!ret) {
if ((IS_ROOT(dentry) ||
(autofs_type_indirect(sbi->type) &&
IS_ROOT(dentry->d_parent))) &&
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 014e7ab..ad11a44 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -338,18 +338,6 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
DPRINTK("dentry=%p %.*s",
dentry, dentry->d_name.len, dentry->d_name.name);
- /*
- * Someone may have manually umounted this or it was a submount
- * that has gone away.
- */
- spin_lock(&dentry->d_lock);
- if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
- if (!(dentry->d_flags & DCACHE_MANAGE_TRANSIT) &&
- (dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
- __managed_dentry_set_transit(path->dentry);
- }
- spin_unlock(&dentry->d_lock);
-
/* The daemon never triggers a mount. */
if (autofs4_oz_mode(sbi))
return NULL;
@@ -418,12 +406,12 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
done:
if (!(ino->flags & AUTOFS_INF_EXPIRING)) {
/*
- * Any needed mounting has been completed and the path updated
- * so turn this into a normal dentry so we don't continually
- * call ->d_automount() and ->d_manage().
+ * Any needed mounting has been completed and the path
+ * updated so clear DCACHE_NEED_AUTOMOUNT so we don't
+ * call ->d_automount() on rootless multi-mounts since
+ * it can lead to an incorrect ELOOP error return.
*/
spin_lock(&dentry->d_lock);
- __managed_dentry_clear_transit(dentry);
/*
* Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and
* symlinks as in all other cases the dentry will be covered by
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/4] autofs4 - fix dentry leak in autofs4_expire_direct()
2011-03-01 4:56 [PATCH 0/4] more on vfs-scale and vfs-automount Ian Kent
2011-03-01 4:56 ` [PATCH 1/4] vfs - check non-mountpoint dentry might block in __follow_mount_rcu() Ian Kent
2011-03-01 4:57 ` [PATCH 2/4] autofs4 - fix rootless multi-mount race Ian Kent
@ 2011-03-01 4:57 ` Ian Kent
2011-03-01 4:57 ` [PATCH 4/4] autofs4 - fix autofs4_expire_indirect() traversal Ian Kent
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Ian Kent @ 2011-03-01 4:57 UTC (permalink / raw)
To: Al Viro
Cc: Nick Piggin, David Howells, Kernel Mailing List, linux-fsdevel,
Linus Torvalds, Andrew Morton
There is a missing dput() when returning from autofs4_expire_direct()
when we see that the dentry is already a pending mount.
Signed-off-by: Ian Kent <raven@themaw.net>
---
fs/autofs4/expire.c | 7 +++----
1 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index c896dd6..c403abc 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -290,10 +290,8 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(root);
/* No point expiring a pending mount */
- if (ino->flags & AUTOFS_INF_PENDING) {
- spin_unlock(&sbi->fs_lock);
- return NULL;
- }
+ if (ino->flags & AUTOFS_INF_PENDING)
+ goto out;
if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
struct autofs_info *ino = autofs4_dentry_ino(root);
ino->flags |= AUTOFS_INF_EXPIRING;
@@ -301,6 +299,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
spin_unlock(&sbi->fs_lock);
return root;
}
+out:
spin_unlock(&sbi->fs_lock);
dput(root);
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 4/4] autofs4 - fix autofs4_expire_indirect() traversal
2011-03-01 4:56 [PATCH 0/4] more on vfs-scale and vfs-automount Ian Kent
` (2 preceding siblings ...)
2011-03-01 4:57 ` [PATCH 3/4] autofs4 - fix dentry leak in autofs4_expire_direct() Ian Kent
@ 2011-03-01 4:57 ` Ian Kent
2011-03-01 5:14 ` [PATCH 0/4] more on vfs-scale and vfs-automount Ian Kent
2011-03-01 12:49 ` Ian Kent
5 siblings, 0 replies; 7+ messages in thread
From: Ian Kent @ 2011-03-01 4:57 UTC (permalink / raw)
To: Al Viro
Cc: Nick Piggin, David Howells, Kernel Mailing List, linux-fsdevel,
Linus Torvalds, Andrew Morton
The vfs-scale changes changed the traversal used in
autofs4_expire_indirect() from a list to a depth first tree traversal
which isn't right.
Signed-off-by: Ian Kent <raven@themaw.net>
---
fs/autofs4/expire.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 51 insertions(+), 1 deletions(-)
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index c403abc..bc482e0 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -87,6 +87,56 @@ done:
}
/*
+ * Calculate and dget next entry in the subdirs list under root.
+ */
+static struct dentry *get_next_positive_subdir(struct dentry *prev,
+ struct dentry *root)
+{
+ struct list_head *next;
+ struct dentry *p, *q;
+
+ spin_lock(&autofs4_lock);
+
+ if (prev == NULL) {
+ spin_lock(&root->d_lock);
+ prev = dget_dlock(root);
+ next = prev->d_subdirs.next;
+ p = prev;
+ goto start;
+ }
+
+ p = prev;
+ spin_lock(&p->d_lock);
+again:
+ next = p->d_u.d_child.next;
+start:
+ if (next == &root->d_subdirs) {
+ spin_unlock(&p->d_lock);
+ spin_unlock(&autofs4_lock);
+ dput(prev);
+ return NULL;
+ }
+
+ q = list_entry(next, struct dentry, d_u.d_child);
+
+ spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED);
+ /* Negative dentry - try next */
+ if (!simple_positive(q)) {
+ spin_unlock(&p->d_lock);
+ p = q;
+ goto again;
+ }
+ dget_dlock(q);
+ spin_unlock(&q->d_lock);
+ spin_unlock(&p->d_lock);
+ spin_unlock(&autofs4_lock);
+
+ dput(prev);
+
+ return q;
+}
+
+/*
* Calculate and dget next entry in top down tree traversal.
*/
static struct dentry *get_next_positive_dentry(struct dentry *prev,
@@ -333,7 +383,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
timeout = sbi->exp_timeout;
dentry = NULL;
- while ((dentry = get_next_positive_dentry(dentry, root))) {
+ while ((dentry = get_next_positive_subdir(dentry, root))) {
spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(dentry);
/* No point expiring a pending mount */
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 0/4] more on vfs-scale and vfs-automount
2011-03-01 4:56 [PATCH 0/4] more on vfs-scale and vfs-automount Ian Kent
` (3 preceding siblings ...)
2011-03-01 4:57 ` [PATCH 4/4] autofs4 - fix autofs4_expire_indirect() traversal Ian Kent
@ 2011-03-01 5:14 ` Ian Kent
2011-03-01 12:49 ` Ian Kent
5 siblings, 0 replies; 7+ messages in thread
From: Ian Kent @ 2011-03-01 5:14 UTC (permalink / raw)
To: Al Viro
Cc: Nick Piggin, David Howells, Kernel Mailing List, linux-fsdevel,
Linus Torvalds, Andrew Morton
On Tue, 2011-03-01 at 12:56 +0800, Ian Kent wrote:
> I've put quite a bit of time into testing 2.6.38-rc and, given the
> time frame, and update is needed. The included patch series much
> improves the behaviour of autofs under load.
Sorry Al, I didn't yet test against the namei patches you provided.
This was against rc6.
>
> I first thought the dentry leak you found was not the cause of the
> BUG() I was seeing but that appears to not be the case. I'm not
> seeing the BUG() at shutdown when umounting any more.
>
> I am still seeing occassional incorrect ENOENT returns. They must be
> comming from the VFS or the daemon as I've changed almost all the
> ENOENT returns in the autofs module to identify where it's comming
> from.
>
> Anyway, all, please review.
>
> ---
>
> Ian Kent (4):
> autofs4 - fix autofs4_expire_indirect() traversal
> autofs4 - fix dentry leak in autofs4_expire_direct()
> autofs4 - fix rootless multi-mount race
> vfs - check non-mountpoint dentry might block in __follow_mount_rcu()
>
>
> fs/autofs4/expire.c | 72 ++++++++++++++++++++++++++++++++++++++++-----------
> fs/autofs4/root.c | 20 +++-----------
> fs/namei.c | 24 ++++++++++++++---
> 3 files changed, 80 insertions(+), 36 deletions(-)
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 0/4] more on vfs-scale and vfs-automount
2011-03-01 4:56 [PATCH 0/4] more on vfs-scale and vfs-automount Ian Kent
` (4 preceding siblings ...)
2011-03-01 5:14 ` [PATCH 0/4] more on vfs-scale and vfs-automount Ian Kent
@ 2011-03-01 12:49 ` Ian Kent
5 siblings, 0 replies; 7+ messages in thread
From: Ian Kent @ 2011-03-01 12:49 UTC (permalink / raw)
To: Al Viro
Cc: Nick Piggin, David Howells, Kernel Mailing List, linux-fsdevel,
Linus Torvalds, Andrew Morton
On Tue, 2011-03-01 at 12:56 +0800, Ian Kent wrote:
>
> I am still seeing occassional incorrect ENOENT returns. They must be
> comming from the VFS or the daemon as I've changed almost all the
> ENOENT returns in the autofs module to identify where it's comming
> from.
That's not quite right either.
I got another of these (after 5 runs of my test) but from the autofs
module this time which means I still have a race on blocking pending
mounts. Oh well, there's still more for me to do.
Ian
^ permalink raw reply [flat|nested] 7+ messages in thread