[RFC][PATCH 1/8] namespace: use global namespace semaphore

linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [RFC][PATCH 1/8] namespace: use global namespace semaphore
@ 2005-05-30 13:19 Miklos Szeredi
  2005-05-31  2:58 ` Ian Kent
  0 siblings, 1 reply; 37+ messages in thread
From: Miklos Szeredi @ 2005-05-30 13:19 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: mike, jamie, viro

This patch removes the per-namespace semaphore in favor of a global
semaphore.  This can have an effect on scalability, however it is
necessary for the following patches.

A similar scheme can be added later if needed.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>

Index: linux/include/linux/namespace.h
===================================================================
--- linux.orig/include/linux/namespace.h	2005-05-29 13:34:30.000000000 +0200
+++ linux/include/linux/namespace.h	2005-05-29 13:46:23.000000000 +0200
@@ -9,7 +9,6 @@ struct namespace {
 	atomic_t		count;
 	struct vfsmount *	root;
 	struct list_head	list;
-	struct rw_semaphore	sem;
 };
 
 extern int copy_namespace(int, struct task_struct *);
Index: linux/fs/namespace.c
===================================================================
--- linux.orig/fs/namespace.c	2005-05-29 13:42:33.000000000 +0200
+++ linux/fs/namespace.c	2005-05-29 14:06:50.000000000 +0200
@@ -42,6 +42,7 @@ static inline int sysfs_init(void)
 static struct list_head *mount_hashtable;
 static int hash_mask, hash_bits;
 static kmem_cache_t *mnt_cache; 
+static struct rw_semaphore namespace_sem;
 
 static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
 {
@@ -189,7 +190,7 @@ static void *m_start(struct seq_file *m,
 	struct list_head *p;
 	loff_t l = *pos;
 
-	down_read(&n->sem);
+	down_read(&namespace_sem);
 	list_for_each(p, &n->list)
 		if (!l--)
 			return list_entry(p, struct vfsmount, mnt_list);
@@ -206,8 +207,7 @@ static void *m_next(struct seq_file *m, 
 
 static void m_stop(struct seq_file *m, void *v)
 {
-	struct namespace *n = m->private;
-	up_read(&n->sem);
+	up_read(&namespace_sem);
 }
 
 static inline void mangle(struct seq_file *m, const char *s)
@@ -432,7 +432,7 @@ static int do_umount(struct vfsmount *mn
 		return retval;
 	}
 
-	down_write(&current->namespace->sem);
+	down_write(&namespace_sem);
 	spin_lock(&vfsmount_lock);
 
 	if (atomic_read(&sb->s_active) == 1) {
@@ -454,7 +454,7 @@ static int do_umount(struct vfsmount *mn
 	spin_unlock(&vfsmount_lock);
 	if (retval)
 		security_sb_umount_busy(mnt);
-	up_write(&current->namespace->sem);
+	up_write(&namespace_sem);
 	return retval;
 }
 
@@ -632,7 +632,7 @@ static int do_loopback(struct nameidata 
 	if (err)
 		return err;
 
-	down_write(&current->namespace->sem);
+	down_write(&namespace_sem);
 	err = -EINVAL;
 	if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd.mnt))) {
 		err = -ENOMEM;
@@ -657,7 +657,7 @@ static int do_loopback(struct nameidata 
 			mntput(mnt);
 	}
 
-	up_write(&current->namespace->sem);
+	up_write(&namespace_sem);
 	path_release(&old_nd);
 	return err;
 }
@@ -706,7 +706,7 @@ static int do_move_mount(struct nameidat
 	if (err)
 		return err;
 
-	down_write(&current->namespace->sem);
+	down_write(&namespace_sem);
 	while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
 		;
 	err = -EINVAL;
@@ -750,7 +750,7 @@ out2:
 out1:
 	up(&nd->dentry->d_inode->i_sem);
 out:
-	up_write(&current->namespace->sem);
+	up_write(&namespace_sem);
 	if (!err)
 		path_release(&parent_nd);
 	path_release(&old_nd);
@@ -789,7 +789,7 @@ int do_add_mount(struct vfsmount *newmnt
 {
 	int err;
 
-	down_write(&current->namespace->sem);
+	down_write(&namespace_sem);
 	/* Something was mounted here while we slept */
 	while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
 		;
@@ -819,7 +819,7 @@ int do_add_mount(struct vfsmount *newmnt
 	}
 
 unlock:
-	up_write(&current->namespace->sem);
+	up_write(&namespace_sem);
 	mntput(newmnt);
 	return err;
 }
@@ -875,7 +875,6 @@ static void expire_mount(struct vfsmount
  */
 void mark_mounts_for_expiry(struct list_head *mounts)
 {
-	struct namespace *namespace;
 	struct vfsmount *mnt, *next;
 	LIST_HEAD(graveyard);
 
@@ -909,20 +908,12 @@ void mark_mounts_for_expiry(struct list_
 		mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire);
 		list_del_init(&mnt->mnt_expire);
 
-		/* don't do anything if the namespace is dead - all the
-		 * vfsmounts from it are going away anyway */
-		namespace = mnt->mnt_namespace;
-		if (!namespace || !namespace->root)
-			continue;
-		get_namespace(namespace);
-
 		spin_unlock(&vfsmount_lock);
-		down_write(&namespace->sem);
+		down_write(&namespace_sem);
 		expire_mount(mnt, mounts);
-		up_write(&namespace->sem);
+		up_write(&namespace_sem);
 
 		mntput(mnt);
-		put_namespace(namespace);
 
 		spin_lock(&vfsmount_lock);
 	}
@@ -1087,14 +1078,13 @@ int copy_namespace(int flags, struct tas
 		goto out;
 
 	atomic_set(&new_ns->count, 1);
-	init_rwsem(&new_ns->sem);
 	INIT_LIST_HEAD(&new_ns->list);
 
-	down_write(&tsk->namespace->sem);
+	down_write(&namespace_sem);
 	/* First pass: copy the tree topology */
 	new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root);
 	if (!new_ns->root) {
-		up_write(&tsk->namespace->sem);
+		up_write(&namespace_sem);
 		kfree(new_ns);
 		goto out;
 	}
@@ -1128,7 +1118,7 @@ int copy_namespace(int flags, struct tas
 		p = next_mnt(p, namespace->root);
 		q = next_mnt(q, new_ns->root);
 	}
-	up_write(&tsk->namespace->sem);
+	up_write(&namespace_sem);
 
 	tsk->namespace = new_ns;
 
@@ -1310,7 +1300,7 @@ asmlinkage long sys_pivot_root(const cha
 	user_nd.mnt = mntget(current->fs->rootmnt);
 	user_nd.dentry = dget(current->fs->root);
 	read_unlock(&current->fs->lock);
-	down_write(&current->namespace->sem);
+	down_write(&namespace_sem);
 	down(&old_nd.dentry->d_inode->i_sem);
 	error = -EINVAL;
 	if (!check_mnt(user_nd.mnt))
@@ -1356,7 +1346,7 @@ asmlinkage long sys_pivot_root(const cha
 	path_release(&parent_nd);
 out2:
 	up(&old_nd.dentry->d_inode->i_sem);
-	up_write(&current->namespace->sem);
+	up_write(&namespace_sem);
 	path_release(&user_nd);
 	path_release(&old_nd);
 out1:
@@ -1383,7 +1373,6 @@ static void __init init_mount_tree(void)
 		panic("Can't allocate initial namespace");
 	atomic_set(&namespace->count, 1);
 	INIT_LIST_HEAD(&namespace->list);
-	init_rwsem(&namespace->sem);
 	list_add(&mnt->mnt_list, &namespace->list);
 	namespace->root = mnt;
 	mnt->mnt_namespace = namespace;
@@ -1406,6 +1395,8 @@ void __init mnt_init(unsigned long mempa
 	unsigned int nr_hash;
 	int i;
 
+	init_rwsem(&namespace_sem);
+
 	mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
 			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 
@@ -1454,10 +1445,10 @@ void __put_namespace(struct namespace *n
 	struct vfsmount *root = namespace->root;
 	namespace->root = NULL;
 	spin_unlock(&vfsmount_lock);
-	down_write(&namespace->sem);
+	down_write(&namespace_sem);
 	spin_lock(&vfsmount_lock);
 	umount_tree(root);
 	spin_unlock(&vfsmount_lock);
-	up_write(&namespace->sem);
+	up_write(&namespace_sem);
 	kfree(namespace);
 }

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-05-30 13:19 [RFC][PATCH 1/8] namespace: use global namespace semaphore Miklos Szeredi
@ 2005-05-31  2:58 ` Ian Kent
  2005-05-31  3:23   ` Mike Waychison
  0 siblings, 1 reply; 37+ messages in thread
From: Ian Kent @ 2005-05-31  2:58 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-fsdevel, mike, jamie, viro

On Mon, 30 May 2005, Miklos Szeredi wrote:

> This patch removes the per-namespace semaphore in favor of a global
> semaphore.  This can have an effect on scalability, however it is
> necessary for the following patches.
> 
> A similar scheme can be added later if needed.

Which, together with patch 3, effectively removes the ability for code in 
a module to safely find out about vfsmounts in its namespace.

Given the assumption in link_path_walk, that a vfsmount will not be 
followed prior to a call to follow_link this stops me from doing 
autofs direct mount via this method.

Any ideas on how I might cope with this?

> 
> Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
> 
> Index: linux/include/linux/namespace.h
> ===================================================================
> --- linux.orig/include/linux/namespace.h	2005-05-29 13:34:30.000000000 +0200
> +++ linux/include/linux/namespace.h	2005-05-29 13:46:23.000000000 +0200
> @@ -9,7 +9,6 @@ struct namespace {
>  	atomic_t		count;
>  	struct vfsmount *	root;
>  	struct list_head	list;
> -	struct rw_semaphore	sem;
>  };
>  
>  extern int copy_namespace(int, struct task_struct *);
> Index: linux/fs/namespace.c
> ===================================================================
> --- linux.orig/fs/namespace.c	2005-05-29 13:42:33.000000000 +0200
> +++ linux/fs/namespace.c	2005-05-29 14:06:50.000000000 +0200
> @@ -42,6 +42,7 @@ static inline int sysfs_init(void)
>  static struct list_head *mount_hashtable;
>  static int hash_mask, hash_bits;
>  static kmem_cache_t *mnt_cache; 
> +static struct rw_semaphore namespace_sem;
>  
>  static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
>  {
> @@ -189,7 +190,7 @@ static void *m_start(struct seq_file *m,
>  	struct list_head *p;
>  	loff_t l = *pos;
>  
> -	down_read(&n->sem);
> +	down_read(&namespace_sem);
>  	list_for_each(p, &n->list)
>  		if (!l--)
>  			return list_entry(p, struct vfsmount, mnt_list);
> @@ -206,8 +207,7 @@ static void *m_next(struct seq_file *m, 
>  
>  static void m_stop(struct seq_file *m, void *v)
>  {
> -	struct namespace *n = m->private;
> -	up_read(&n->sem);
> +	up_read(&namespace_sem);
>  }
>  
>  static inline void mangle(struct seq_file *m, const char *s)
> @@ -432,7 +432,7 @@ static int do_umount(struct vfsmount *mn
>  		return retval;
>  	}
>  
> -	down_write(&current->namespace->sem);
> +	down_write(&namespace_sem);
>  	spin_lock(&vfsmount_lock);
>  
>  	if (atomic_read(&sb->s_active) == 1) {
> @@ -454,7 +454,7 @@ static int do_umount(struct vfsmount *mn
>  	spin_unlock(&vfsmount_lock);
>  	if (retval)
>  		security_sb_umount_busy(mnt);
> -	up_write(&current->namespace->sem);
> +	up_write(&namespace_sem);
>  	return retval;
>  }
>  
> @@ -632,7 +632,7 @@ static int do_loopback(struct nameidata 
>  	if (err)
>  		return err;
>  
> -	down_write(&current->namespace->sem);
> +	down_write(&namespace_sem);
>  	err = -EINVAL;
>  	if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd.mnt))) {
>  		err = -ENOMEM;
> @@ -657,7 +657,7 @@ static int do_loopback(struct nameidata 
>  			mntput(mnt);
>  	}
>  
> -	up_write(&current->namespace->sem);
> +	up_write(&namespace_sem);
>  	path_release(&old_nd);
>  	return err;
>  }
> @@ -706,7 +706,7 @@ static int do_move_mount(struct nameidat
>  	if (err)
>  		return err;
>  
> -	down_write(&current->namespace->sem);
> +	down_write(&namespace_sem);
>  	while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
>  		;
>  	err = -EINVAL;
> @@ -750,7 +750,7 @@ out2:
>  out1:
>  	up(&nd->dentry->d_inode->i_sem);
>  out:
> -	up_write(&current->namespace->sem);
> +	up_write(&namespace_sem);
>  	if (!err)
>  		path_release(&parent_nd);
>  	path_release(&old_nd);
> @@ -789,7 +789,7 @@ int do_add_mount(struct vfsmount *newmnt
>  {
>  	int err;
>  
> -	down_write(&current->namespace->sem);
> +	down_write(&namespace_sem);
>  	/* Something was mounted here while we slept */
>  	while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
>  		;
> @@ -819,7 +819,7 @@ int do_add_mount(struct vfsmount *newmnt
>  	}
>  
>  unlock:
> -	up_write(&current->namespace->sem);
> +	up_write(&namespace_sem);
>  	mntput(newmnt);
>  	return err;
>  }
> @@ -875,7 +875,6 @@ static void expire_mount(struct vfsmount
>   */
>  void mark_mounts_for_expiry(struct list_head *mounts)
>  {
> -	struct namespace *namespace;
>  	struct vfsmount *mnt, *next;
>  	LIST_HEAD(graveyard);
>  
> @@ -909,20 +908,12 @@ void mark_mounts_for_expiry(struct list_
>  		mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire);
>  		list_del_init(&mnt->mnt_expire);
>  
> -		/* don't do anything if the namespace is dead - all the
> -		 * vfsmounts from it are going away anyway */
> -		namespace = mnt->mnt_namespace;
> -		if (!namespace || !namespace->root)
> -			continue;
> -		get_namespace(namespace);
> -
>  		spin_unlock(&vfsmount_lock);
> -		down_write(&namespace->sem);
> +		down_write(&namespace_sem);
>  		expire_mount(mnt, mounts);
> -		up_write(&namespace->sem);
> +		up_write(&namespace_sem);
>  
>  		mntput(mnt);
> -		put_namespace(namespace);
>  
>  		spin_lock(&vfsmount_lock);
>  	}
> @@ -1087,14 +1078,13 @@ int copy_namespace(int flags, struct tas
>  		goto out;
>  
>  	atomic_set(&new_ns->count, 1);
> -	init_rwsem(&new_ns->sem);
>  	INIT_LIST_HEAD(&new_ns->list);
>  
> -	down_write(&tsk->namespace->sem);
> +	down_write(&namespace_sem);
>  	/* First pass: copy the tree topology */
>  	new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root);
>  	if (!new_ns->root) {
> -		up_write(&tsk->namespace->sem);
> +		up_write(&namespace_sem);
>  		kfree(new_ns);
>  		goto out;
>  	}
> @@ -1128,7 +1118,7 @@ int copy_namespace(int flags, struct tas
>  		p = next_mnt(p, namespace->root);
>  		q = next_mnt(q, new_ns->root);
>  	}
> -	up_write(&tsk->namespace->sem);
> +	up_write(&namespace_sem);
>  
>  	tsk->namespace = new_ns;
>  
> @@ -1310,7 +1300,7 @@ asmlinkage long sys_pivot_root(const cha
>  	user_nd.mnt = mntget(current->fs->rootmnt);
>  	user_nd.dentry = dget(current->fs->root);
>  	read_unlock(&current->fs->lock);
> -	down_write(&current->namespace->sem);
> +	down_write(&namespace_sem);
>  	down(&old_nd.dentry->d_inode->i_sem);
>  	error = -EINVAL;
>  	if (!check_mnt(user_nd.mnt))
> @@ -1356,7 +1346,7 @@ asmlinkage long sys_pivot_root(const cha
>  	path_release(&parent_nd);
>  out2:
>  	up(&old_nd.dentry->d_inode->i_sem);
> -	up_write(&current->namespace->sem);
> +	up_write(&namespace_sem);
>  	path_release(&user_nd);
>  	path_release(&old_nd);
>  out1:
> @@ -1383,7 +1373,6 @@ static void __init init_mount_tree(void)
>  		panic("Can't allocate initial namespace");
>  	atomic_set(&namespace->count, 1);
>  	INIT_LIST_HEAD(&namespace->list);
> -	init_rwsem(&namespace->sem);
>  	list_add(&mnt->mnt_list, &namespace->list);
>  	namespace->root = mnt;
>  	mnt->mnt_namespace = namespace;
> @@ -1406,6 +1395,8 @@ void __init mnt_init(unsigned long mempa
>  	unsigned int nr_hash;
>  	int i;
>  
> +	init_rwsem(&namespace_sem);
> +
>  	mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
>  			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
>  
> @@ -1454,10 +1445,10 @@ void __put_namespace(struct namespace *n
>  	struct vfsmount *root = namespace->root;
>  	namespace->root = NULL;
>  	spin_unlock(&vfsmount_lock);
> -	down_write(&namespace->sem);
> +	down_write(&namespace_sem);
>  	spin_lock(&vfsmount_lock);
>  	umount_tree(root);
>  	spin_unlock(&vfsmount_lock);
> -	up_write(&namespace->sem);
> +	up_write(&namespace_sem);
>  	kfree(namespace);
>  }
> -
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-05-31  2:58 ` Ian Kent
@ 2005-05-31  3:23   ` Mike Waychison
  2005-05-31  7:02     ` Ian Kent
  0 siblings, 1 reply; 37+ messages in thread
From: Mike Waychison @ 2005-05-31  3:23 UTC (permalink / raw)
  To: Ian Kent; +Cc: Miklos Szeredi, linux-fsdevel, jamie, viro

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Ian Kent wrote:
> On Mon, 30 May 2005, Miklos Szeredi wrote:
> 
> 
>>This patch removes the per-namespace semaphore in favor of a global
>>semaphore.  This can have an effect on scalability, however it is
>>necessary for the following patches.
>>
>>A similar scheme can be added later if needed.
> 
> 
> Which, together with patch 3, effectively removes the ability for code in 
> a module to safely find out about vfsmounts in its namespace.
> 
> Given the assumption in link_path_walk, that a vfsmount will not be 
> followed prior to a call to follow_link this stops me from doing 
> autofs direct mount via this method.
> 
> Any ideas on how I might cope with this?

No, the removal of struct namespace shouldn't keep you from walking
vfsmounts from modules.  In fact, in Miklos' 1/8 patch, he moved
namespace->sem to a global semaphore, which isn't needed by the end of
the patch set.  namespace->sem only ever protected the namespace->list
field.

If you which to walk a mounttree, you still only ever require grabbing
vfsmount_lock and use mntget/mntput.

Overall, this means autofs4 stuff will need to be updated to reflect
these changes, but they are minimal at best.

> 
> 
>>Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
>>
>>Index: linux/include/linux/namespace.h
>>===================================================================
>>--- linux.orig/include/linux/namespace.h	2005-05-29 13:34:30.000000000 +0200
>>+++ linux/include/linux/namespace.h	2005-05-29 13:46:23.000000000 +0200
>>@@ -9,7 +9,6 @@ struct namespace {
>> 	atomic_t		count;
>> 	struct vfsmount *	root;
>> 	struct list_head	list;
>>-	struct rw_semaphore	sem;
>> };
>> 
>> extern int copy_namespace(int, struct task_struct *);
>>Index: linux/fs/namespace.c
>>===================================================================
>>--- linux.orig/fs/namespace.c	2005-05-29 13:42:33.000000000 +0200
>>+++ linux/fs/namespace.c	2005-05-29 14:06:50.000000000 +0200
>>@@ -42,6 +42,7 @@ static inline int sysfs_init(void)
>> static struct list_head *mount_hashtable;
>> static int hash_mask, hash_bits;
>> static kmem_cache_t *mnt_cache; 
>>+static struct rw_semaphore namespace_sem;
>> 
>> static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
>> {
>>@@ -189,7 +190,7 @@ static void *m_start(struct seq_file *m,
>> 	struct list_head *p;
>> 	loff_t l = *pos;
>> 
>>-	down_read(&n->sem);
>>+	down_read(&namespace_sem);
>> 	list_for_each(p, &n->list)
>> 		if (!l--)
>> 			return list_entry(p, struct vfsmount, mnt_list);
>>@@ -206,8 +207,7 @@ static void *m_next(struct seq_file *m, 
>> 
>> static void m_stop(struct seq_file *m, void *v)
>> {
>>-	struct namespace *n = m->private;
>>-	up_read(&n->sem);
>>+	up_read(&namespace_sem);
>> }
>> 
>> static inline void mangle(struct seq_file *m, const char *s)
>>@@ -432,7 +432,7 @@ static int do_umount(struct vfsmount *mn
>> 		return retval;
>> 	}
>> 
>>-	down_write(&current->namespace->sem);
>>+	down_write(&namespace_sem);
>> 	spin_lock(&vfsmount_lock);
>> 
>> 	if (atomic_read(&sb->s_active) == 1) {
>>@@ -454,7 +454,7 @@ static int do_umount(struct vfsmount *mn
>> 	spin_unlock(&vfsmount_lock);
>> 	if (retval)
>> 		security_sb_umount_busy(mnt);
>>-	up_write(&current->namespace->sem);
>>+	up_write(&namespace_sem);
>> 	return retval;
>> }
>> 
>>@@ -632,7 +632,7 @@ static int do_loopback(struct nameidata 
>> 	if (err)
>> 		return err;
>> 
>>-	down_write(&current->namespace->sem);
>>+	down_write(&namespace_sem);
>> 	err = -EINVAL;
>> 	if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd.mnt))) {
>> 		err = -ENOMEM;
>>@@ -657,7 +657,7 @@ static int do_loopback(struct nameidata 
>> 			mntput(mnt);
>> 	}
>> 
>>-	up_write(&current->namespace->sem);
>>+	up_write(&namespace_sem);
>> 	path_release(&old_nd);
>> 	return err;
>> }
>>@@ -706,7 +706,7 @@ static int do_move_mount(struct nameidat
>> 	if (err)
>> 		return err;
>> 
>>-	down_write(&current->namespace->sem);
>>+	down_write(&namespace_sem);
>> 	while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
>> 		;
>> 	err = -EINVAL;
>>@@ -750,7 +750,7 @@ out2:
>> out1:
>> 	up(&nd->dentry->d_inode->i_sem);
>> out:
>>-	up_write(&current->namespace->sem);
>>+	up_write(&namespace_sem);
>> 	if (!err)
>> 		path_release(&parent_nd);
>> 	path_release(&old_nd);
>>@@ -789,7 +789,7 @@ int do_add_mount(struct vfsmount *newmnt
>> {
>> 	int err;
>> 
>>-	down_write(&current->namespace->sem);
>>+	down_write(&namespace_sem);
>> 	/* Something was mounted here while we slept */
>> 	while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
>> 		;
>>@@ -819,7 +819,7 @@ int do_add_mount(struct vfsmount *newmnt
>> 	}
>> 
>> unlock:
>>-	up_write(&current->namespace->sem);
>>+	up_write(&namespace_sem);
>> 	mntput(newmnt);
>> 	return err;
>> }
>>@@ -875,7 +875,6 @@ static void expire_mount(struct vfsmount
>>  */
>> void mark_mounts_for_expiry(struct list_head *mounts)
>> {
>>-	struct namespace *namespace;
>> 	struct vfsmount *mnt, *next;
>> 	LIST_HEAD(graveyard);
>> 
>>@@ -909,20 +908,12 @@ void mark_mounts_for_expiry(struct list_
>> 		mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire);
>> 		list_del_init(&mnt->mnt_expire);
>> 
>>-		/* don't do anything if the namespace is dead - all the
>>-		 * vfsmounts from it are going away anyway */
>>-		namespace = mnt->mnt_namespace;
>>-		if (!namespace || !namespace->root)
>>-			continue;
>>-		get_namespace(namespace);
>>-
>> 		spin_unlock(&vfsmount_lock);
>>-		down_write(&namespace->sem);
>>+		down_write(&namespace_sem);
>> 		expire_mount(mnt, mounts);
>>-		up_write(&namespace->sem);
>>+		up_write(&namespace_sem);
>> 
>> 		mntput(mnt);
>>-		put_namespace(namespace);
>> 
>> 		spin_lock(&vfsmount_lock);
>> 	}
>>@@ -1087,14 +1078,13 @@ int copy_namespace(int flags, struct tas
>> 		goto out;
>> 
>> 	atomic_set(&new_ns->count, 1);
>>-	init_rwsem(&new_ns->sem);
>> 	INIT_LIST_HEAD(&new_ns->list);
>> 
>>-	down_write(&tsk->namespace->sem);
>>+	down_write(&namespace_sem);
>> 	/* First pass: copy the tree topology */
>> 	new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root);
>> 	if (!new_ns->root) {
>>-		up_write(&tsk->namespace->sem);
>>+		up_write(&namespace_sem);
>> 		kfree(new_ns);
>> 		goto out;
>> 	}
>>@@ -1128,7 +1118,7 @@ int copy_namespace(int flags, struct tas
>> 		p = next_mnt(p, namespace->root);
>> 		q = next_mnt(q, new_ns->root);
>> 	}
>>-	up_write(&tsk->namespace->sem);
>>+	up_write(&namespace_sem);
>> 
>> 	tsk->namespace = new_ns;
>> 
>>@@ -1310,7 +1300,7 @@ asmlinkage long sys_pivot_root(const cha
>> 	user_nd.mnt = mntget(current->fs->rootmnt);
>> 	user_nd.dentry = dget(current->fs->root);
>> 	read_unlock(&current->fs->lock);
>>-	down_write(&current->namespace->sem);
>>+	down_write(&namespace_sem);
>> 	down(&old_nd.dentry->d_inode->i_sem);
>> 	error = -EINVAL;
>> 	if (!check_mnt(user_nd.mnt))
>>@@ -1356,7 +1346,7 @@ asmlinkage long sys_pivot_root(const cha
>> 	path_release(&parent_nd);
>> out2:
>> 	up(&old_nd.dentry->d_inode->i_sem);
>>-	up_write(&current->namespace->sem);
>>+	up_write(&namespace_sem);
>> 	path_release(&user_nd);
>> 	path_release(&old_nd);
>> out1:
>>@@ -1383,7 +1373,6 @@ static void __init init_mount_tree(void)
>> 		panic("Can't allocate initial namespace");
>> 	atomic_set(&namespace->count, 1);
>> 	INIT_LIST_HEAD(&namespace->list);
>>-	init_rwsem(&namespace->sem);
>> 	list_add(&mnt->mnt_list, &namespace->list);
>> 	namespace->root = mnt;
>> 	mnt->mnt_namespace = namespace;
>>@@ -1406,6 +1395,8 @@ void __init mnt_init(unsigned long mempa
>> 	unsigned int nr_hash;
>> 	int i;
>> 
>>+	init_rwsem(&namespace_sem);
>>+
>> 	mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
>> 			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
>> 
>>@@ -1454,10 +1445,10 @@ void __put_namespace(struct namespace *n
>> 	struct vfsmount *root = namespace->root;
>> 	namespace->root = NULL;
>> 	spin_unlock(&vfsmount_lock);
>>-	down_write(&namespace->sem);
>>+	down_write(&namespace_sem);
>> 	spin_lock(&vfsmount_lock);
>> 	umount_tree(root);
>> 	spin_unlock(&vfsmount_lock);
>>-	up_write(&namespace->sem);
>>+	up_write(&namespace_sem);
>> 	kfree(namespace);
>> }
>>-
>>To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
>>the body of a message to majordomo@vger.kernel.org
>>More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
> 
> 

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.0 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://enigmail.mozdev.org

iD8DBQFCm9i3dQs4kOxk3/MRAlESAJ9rItxr1YbOfOJCWvArhlSmlNUM0QCfezPN
GyXzPaNSzLAUx/sInVOhRkU=
=v1Ix
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-05-31  3:23   ` Mike Waychison
@ 2005-05-31  7:02     ` Ian Kent
  2005-05-31  7:56       ` Miklos Szeredi
  2005-05-31 15:55       ` Mike Waychison
  0 siblings, 2 replies; 37+ messages in thread
From: Ian Kent @ 2005-05-31  7:02 UTC (permalink / raw)
  To: Mike Waychison; +Cc: Miklos Szeredi, linux-fsdevel, jamie, viro

On Mon, 30 May 2005, Mike Waychison wrote:

> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
> 
> Ian Kent wrote:
> > On Mon, 30 May 2005, Miklos Szeredi wrote:
> > 
> > 
> >>This patch removes the per-namespace semaphore in favor of a global
> >>semaphore.  This can have an effect on scalability, however it is
> >>necessary for the following patches.
> >>
> >>A similar scheme can be added later if needed.
> > 
> > 
> > Which, together with patch 3, effectively removes the ability for code in 
> > a module to safely find out about vfsmounts in its namespace.
> > 
> > Given the assumption in link_path_walk, that a vfsmount will not be 
> > followed prior to a call to follow_link this stops me from doing 
> > autofs direct mount via this method.
> > 
> > Any ideas on how I might cope with this?
> 
> No, the removal of struct namespace shouldn't keep you from walking
> vfsmounts from modules.  In fact, in Miklos' 1/8 patch, he moved
> namespace->sem to a global semaphore, which isn't needed by the end of
> the patch set.  namespace->sem only ever protected the namespace->list
> field.

But my impression was that this semaphore surrounded the mount and umount 
processes. The list updates looked like they were all done when this 
semaphore was held.

> 
> If you which to walk a mounttree, you still only ever require grabbing
> vfsmount_lock and use mntget/mntput.

Which is not exported.

So do you think it should be?
Maybe lookup_mnt shouldn't be static and should be exported instead?

> 
> Overall, this means autofs4 stuff will need to be updated to reflect
> these changes, but they are minimal at best.

OK. But I'm not sure how I can get at this from within a module.

Perhaps something like updating the nameidata struct passed to 
follow_link, similar to what you did elsewhere in your autofsng 
patches, needs to be added to this lot.

> 
> > 
> > 
> >>Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
> >>
> >>Index: linux/include/linux/namespace.h
> >>===================================================================
> >>--- linux.orig/include/linux/namespace.h	2005-05-29 13:34:30.000000000 +0200
> >>+++ linux/include/linux/namespace.h	2005-05-29 13:46:23.000000000 +0200
> >>@@ -9,7 +9,6 @@ struct namespace {
> >> 	atomic_t		count;
> >> 	struct vfsmount *	root;
> >> 	struct list_head	list;
> >>-	struct rw_semaphore	sem;
> >> };
> >> 
> >> extern int copy_namespace(int, struct task_struct *);
> >>Index: linux/fs/namespace.c
> >>===================================================================
> >>--- linux.orig/fs/namespace.c	2005-05-29 13:42:33.000000000 +0200
> >>+++ linux/fs/namespace.c	2005-05-29 14:06:50.000000000 +0200
> >>@@ -42,6 +42,7 @@ static inline int sysfs_init(void)
> >> static struct list_head *mount_hashtable;
> >> static int hash_mask, hash_bits;
> >> static kmem_cache_t *mnt_cache; 
> >>+static struct rw_semaphore namespace_sem;
> >> 
> >> static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
> >> {
> >>@@ -189,7 +190,7 @@ static void *m_start(struct seq_file *m,
> >> 	struct list_head *p;
> >> 	loff_t l = *pos;
> >> 
> >>-	down_read(&n->sem);
> >>+	down_read(&namespace_sem);
> >> 	list_for_each(p, &n->list)
> >> 		if (!l--)
> >> 			return list_entry(p, struct vfsmount, mnt_list);
> >>@@ -206,8 +207,7 @@ static void *m_next(struct seq_file *m, 
> >> 
> >> static void m_stop(struct seq_file *m, void *v)
> >> {
> >>-	struct namespace *n = m->private;
> >>-	up_read(&n->sem);
> >>+	up_read(&namespace_sem);
> >> }
> >> 
> >> static inline void mangle(struct seq_file *m, const char *s)
> >>@@ -432,7 +432,7 @@ static int do_umount(struct vfsmount *mn
> >> 		return retval;
> >> 	}
> >> 
> >>-	down_write(&current->namespace->sem);
> >>+	down_write(&namespace_sem);
> >> 	spin_lock(&vfsmount_lock);
> >> 
> >> 	if (atomic_read(&sb->s_active) == 1) {
> >>@@ -454,7 +454,7 @@ static int do_umount(struct vfsmount *mn
> >> 	spin_unlock(&vfsmount_lock);
> >> 	if (retval)
> >> 		security_sb_umount_busy(mnt);
> >>-	up_write(&current->namespace->sem);
> >>+	up_write(&namespace_sem);
> >> 	return retval;
> >> }
> >> 
> >>@@ -632,7 +632,7 @@ static int do_loopback(struct nameidata 
> >> 	if (err)
> >> 		return err;
> >> 
> >>-	down_write(&current->namespace->sem);
> >>+	down_write(&namespace_sem);
> >> 	err = -EINVAL;
> >> 	if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd.mnt))) {
> >> 		err = -ENOMEM;
> >>@@ -657,7 +657,7 @@ static int do_loopback(struct nameidata 
> >> 			mntput(mnt);
> >> 	}
> >> 
> >>-	up_write(&current->namespace->sem);
> >>+	up_write(&namespace_sem);
> >> 	path_release(&old_nd);
> >> 	return err;
> >> }
> >>@@ -706,7 +706,7 @@ static int do_move_mount(struct nameidat
> >> 	if (err)
> >> 		return err;
> >> 
> >>-	down_write(&current->namespace->sem);
> >>+	down_write(&namespace_sem);
> >> 	while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
> >> 		;
> >> 	err = -EINVAL;
> >>@@ -750,7 +750,7 @@ out2:
> >> out1:
> >> 	up(&nd->dentry->d_inode->i_sem);
> >> out:
> >>-	up_write(&current->namespace->sem);
> >>+	up_write(&namespace_sem);
> >> 	if (!err)
> >> 		path_release(&parent_nd);
> >> 	path_release(&old_nd);
> >>@@ -789,7 +789,7 @@ int do_add_mount(struct vfsmount *newmnt
> >> {
> >> 	int err;
> >> 
> >>-	down_write(&current->namespace->sem);
> >>+	down_write(&namespace_sem);
> >> 	/* Something was mounted here while we slept */
> >> 	while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
> >> 		;
> >>@@ -819,7 +819,7 @@ int do_add_mount(struct vfsmount *newmnt
> >> 	}
> >> 
> >> unlock:
> >>-	up_write(&current->namespace->sem);
> >>+	up_write(&namespace_sem);
> >> 	mntput(newmnt);
> >> 	return err;
> >> }
> >>@@ -875,7 +875,6 @@ static void expire_mount(struct vfsmount
> >>  */
> >> void mark_mounts_for_expiry(struct list_head *mounts)
> >> {
> >>-	struct namespace *namespace;
> >> 	struct vfsmount *mnt, *next;
> >> 	LIST_HEAD(graveyard);
> >> 
> >>@@ -909,20 +908,12 @@ void mark_mounts_for_expiry(struct list_
> >> 		mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire);
> >> 		list_del_init(&mnt->mnt_expire);
> >> 
> >>-		/* don't do anything if the namespace is dead - all the
> >>-		 * vfsmounts from it are going away anyway */
> >>-		namespace = mnt->mnt_namespace;
> >>-		if (!namespace || !namespace->root)
> >>-			continue;
> >>-		get_namespace(namespace);
> >>-
> >> 		spin_unlock(&vfsmount_lock);
> >>-		down_write(&namespace->sem);
> >>+		down_write(&namespace_sem);
> >> 		expire_mount(mnt, mounts);
> >>-		up_write(&namespace->sem);
> >>+		up_write(&namespace_sem);
> >> 
> >> 		mntput(mnt);
> >>-		put_namespace(namespace);
> >> 
> >> 		spin_lock(&vfsmount_lock);
> >> 	}
> >>@@ -1087,14 +1078,13 @@ int copy_namespace(int flags, struct tas
> >> 		goto out;
> >> 
> >> 	atomic_set(&new_ns->count, 1);
> >>-	init_rwsem(&new_ns->sem);
> >> 	INIT_LIST_HEAD(&new_ns->list);
> >> 
> >>-	down_write(&tsk->namespace->sem);
> >>+	down_write(&namespace_sem);
> >> 	/* First pass: copy the tree topology */
> >> 	new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root);
> >> 	if (!new_ns->root) {
> >>-		up_write(&tsk->namespace->sem);
> >>+		up_write(&namespace_sem);
> >> 		kfree(new_ns);
> >> 		goto out;
> >> 	}
> >>@@ -1128,7 +1118,7 @@ int copy_namespace(int flags, struct tas
> >> 		p = next_mnt(p, namespace->root);
> >> 		q = next_mnt(q, new_ns->root);
> >> 	}
> >>-	up_write(&tsk->namespace->sem);
> >>+	up_write(&namespace_sem);
> >> 
> >> 	tsk->namespace = new_ns;
> >> 
> >>@@ -1310,7 +1300,7 @@ asmlinkage long sys_pivot_root(const cha
> >> 	user_nd.mnt = mntget(current->fs->rootmnt);
> >> 	user_nd.dentry = dget(current->fs->root);
> >> 	read_unlock(&current->fs->lock);
> >>-	down_write(&current->namespace->sem);
> >>+	down_write(&namespace_sem);
> >> 	down(&old_nd.dentry->d_inode->i_sem);
> >> 	error = -EINVAL;
> >> 	if (!check_mnt(user_nd.mnt))
> >>@@ -1356,7 +1346,7 @@ asmlinkage long sys_pivot_root(const cha
> >> 	path_release(&parent_nd);
> >> out2:
> >> 	up(&old_nd.dentry->d_inode->i_sem);
> >>-	up_write(&current->namespace->sem);
> >>+	up_write(&namespace_sem);
> >> 	path_release(&user_nd);
> >> 	path_release(&old_nd);
> >> out1:
> >>@@ -1383,7 +1373,6 @@ static void __init init_mount_tree(void)
> >> 		panic("Can't allocate initial namespace");
> >> 	atomic_set(&namespace->count, 1);
> >> 	INIT_LIST_HEAD(&namespace->list);
> >>-	init_rwsem(&namespace->sem);
> >> 	list_add(&mnt->mnt_list, &namespace->list);
> >> 	namespace->root = mnt;
> >> 	mnt->mnt_namespace = namespace;
> >>@@ -1406,6 +1395,8 @@ void __init mnt_init(unsigned long mempa
> >> 	unsigned int nr_hash;
> >> 	int i;
> >> 
> >>+	init_rwsem(&namespace_sem);
> >>+
> >> 	mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
> >> 			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
> >> 
> >>@@ -1454,10 +1445,10 @@ void __put_namespace(struct namespace *n
> >> 	struct vfsmount *root = namespace->root;
> >> 	namespace->root = NULL;
> >> 	spin_unlock(&vfsmount_lock);
> >>-	down_write(&namespace->sem);
> >>+	down_write(&namespace_sem);
> >> 	spin_lock(&vfsmount_lock);
> >> 	umount_tree(root);
> >> 	spin_unlock(&vfsmount_lock);
> >>-	up_write(&namespace->sem);
> >>+	up_write(&namespace_sem);
> >> 	kfree(namespace);
> >> }
> >>-
> >>To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> >>the body of a message to majordomo@vger.kernel.org
> >>More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >>
> > 
> > 
> 
> -----BEGIN PGP SIGNATURE-----
> Version: GnuPG v1.4.0 (GNU/Linux)
> Comment: Using GnuPG with Thunderbird - http://enigmail.mozdev.org
> 
> iD8DBQFCm9i3dQs4kOxk3/MRAlESAJ9rItxr1YbOfOJCWvArhlSmlNUM0QCfezPN
> GyXzPaNSzLAUx/sInVOhRkU=
> =v1Ix
> -----END PGP SIGNATURE-----
> 


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-05-31  7:02     ` Ian Kent
@ 2005-05-31  7:56       ` Miklos Szeredi
  2005-05-31  9:34         ` Ian Kent
  2005-05-31 15:55       ` Mike Waychison
  1 sibling, 1 reply; 37+ messages in thread
From: Miklos Szeredi @ 2005-05-31  7:56 UTC (permalink / raw)
  To: raven; +Cc: mike, linux-fsdevel, jamie, viro


> Perhaps something like updating the nameidata struct passed to 
> follow_link, similar to what you did elsewhere in your autofsng 
> patches, needs to be added to this lot.

Can you explain, how direct mounts will work?  I understand that some
maginc symlink is involved, but I don't yet see the exact process.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-05-31  7:56       ` Miklos Szeredi
@ 2005-05-31  9:34         ` Ian Kent
  2005-05-31 10:05           ` Miklos Szeredi
  0 siblings, 1 reply; 37+ messages in thread
From: Ian Kent @ 2005-05-31  9:34 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: mike, linux-fsdevel, jamie, viro

On Tue, 31 May 2005, Miklos Szeredi wrote:

> 
> > Perhaps something like updating the nameidata struct passed to 
> > follow_link, similar to what you did elsewhere in your autofsng 
> > patches, needs to be added to this lot.
> 
> Can you explain, how direct mounts will work?  I understand that some
> maginc symlink is involved, but I don't yet see the exact process.

There's not really anything magic about using the follow link method. It's 
just a bit of a cheat as a directory can't also be a symlink and the 
callback comes at exactly the right time to make stuff happen.

The important bit is that every direct mount is an autofs mount and when 
it's walked on it mounts the corresponding map entry.

Ealier kernel versions had limitations on the number of anonymous mounts 
so this approach wasn't possible. Thanks to MikeW et. al. that's now 
fixed.

Fortuneatly I posted a few words about what I plan to do at
http://marc.theaimsgroup.com/?l=linux-fsdevel&m=110916819617914&w=2

Ian

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-05-31  9:34         ` Ian Kent
@ 2005-05-31 10:05           ` Miklos Szeredi
  2005-05-31 10:13             ` Miklos Szeredi
  2005-06-01  4:36             ` Ian Kent
  0 siblings, 2 replies; 37+ messages in thread
From: Miklos Szeredi @ 2005-05-31 10:05 UTC (permalink / raw)
  To: raven; +Cc: mike, linux-fsdevel, jamie, viro

> There's not really anything magic about using the follow link method. It's 
> just a bit of a cheat as a directory can't also be a symlink and the 
> callback comes at exactly the right time to make stuff happen.

So is it something like this?

  - autofs is mounted over direct mountpoint 'dm'

  - when dm is looked up, autofs's follow_link method is called

  - this triggers the actual mount in the daemon, which mounts over
    the autofs filesystem (with some trick to avoid recursion)

  - when the daemon is finished, the autofs's follow_link method
    enters the new mount (how?) and returns


Why do you need the mount topology for this?

Miklos



^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-05-31 10:05           ` Miklos Szeredi
@ 2005-05-31 10:13             ` Miklos Szeredi
  2005-05-31 15:43               ` Mike Waychison
                                 ` (2 more replies)
  2005-06-01  4:36             ` Ian Kent
  1 sibling, 3 replies; 37+ messages in thread
From: Miklos Szeredi @ 2005-05-31 10:13 UTC (permalink / raw)
  To: raven; +Cc: mike, linux-fsdevel, jamie, viro

>   - this triggers the actual mount in the daemon, which mounts over
>     the autofs filesystem (with some trick to avoid recursion)

BTW, there's an elegant way to avoid recursion:

 - _before_ mounting the autofs, do mpfd = open(mountpoint, O_RDONLY).

 - then mount autofs

 - when mount trigger comes in do:  fchdir(mpfd), mount whatever .

This way the autofs mount won't be followed by the mount program,
since looking up '.' does not follow mounts.

So the result is two sibling mounts: the original autofs and the
actual filesystem.  But subsequent lookups will see the later mount
only, since it's placed earlier on the hash chain, and lookup_mnt() will
always return that.

Miklos

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-05-31 10:13             ` Miklos Szeredi
@ 2005-05-31 15:43               ` Mike Waychison
  2005-06-01  5:04               ` Ian Kent
  2005-06-03 17:13               ` Mike Waychison
  2 siblings, 0 replies; 37+ messages in thread
From: Mike Waychison @ 2005-05-31 15:43 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: raven, linux-fsdevel, jamie, viro

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Miklos Szeredi wrote:
>>  - this triggers the actual mount in the daemon, which mounts over
>>    the autofs filesystem (with some trick to avoid recursion)
> 
> 
> BTW, there's an elegant way to avoid recursion:
> 
>  - _before_ mounting the autofs, do mpfd = open(mountpoint, O_RDONLY).
> 
>  - then mount autofs
> 
>  - when mount trigger comes in do:  fchdir(mpfd), mount whatever .
> 
> This way the autofs mount won't be followed by the mount program,
> since looking up '.' does not follow mounts.
> 
> So the result is two sibling mounts: the original autofs and the
> actual filesystem.  But subsequent lookups will see the later mount
> only, since it's placed earlier on the hash chain, and lookup_mnt() will
> always return that.

Indeed.  This is how autofsng handles direct mounts and 'browse' mounts
currently, although it currently uses mountfds to do a move in the end
(avoiding having to hijack cwd to do the final move).  This could be
better achieved by mount --move'ing to /proc/self/fd/#.

Mike Waychison
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.0 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://enigmail.mozdev.org

iD8DBQFCnIYZdQs4kOxk3/MRAskPAJ9kAXkVsCu9GKtliB5O5otVmbVb7QCePbxy
xMst1n+X3kGvWmJrUr1Je+0=
=WKDP
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-05-31 10:13             ` Miklos Szeredi
  2005-05-31 15:43               ` Mike Waychison
@ 2005-06-01  5:04               ` Ian Kent
  2005-06-01  8:35                 ` Miklos Szeredi
  2005-06-03 17:13               ` Mike Waychison
  2 siblings, 1 reply; 37+ messages in thread
From: Ian Kent @ 2005-06-01  5:04 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: mike, linux-fsdevel, jamie, viro

On Tue, 31 May 2005, Miklos Szeredi wrote:

> >   - this triggers the actual mount in the daemon, which mounts over
> >     the autofs filesystem (with some trick to avoid recursion)
> 
> BTW, there's an elegant way to avoid recursion:
> 
>  - _before_ mounting the autofs, do mpfd = open(mountpoint, O_RDONLY).
> 
>  - then mount autofs
> 
>  - when mount trigger comes in do:  fchdir(mpfd), mount whatever .
> 
> This way the autofs mount won't be followed by the mount program,
> since looking up '.' does not follow mounts.

But a path walk must be carried out in order to set pwd (or chroot), at 
which time the mount is carried out and remains busy due to the reference 
that is held.

> 
> So the result is two sibling mounts: the original autofs and the
> actual filesystem.  But subsequent lookups will see the later mount
> only, since it's placed earlier on the hash chain, and lookup_mnt() will
> always return that.

Don't know that I care about that to much.
It yields correct behaviour except for expires.
In this case the expire can only determine timeouts relative to busyness 
as walks scoot over the top. Indirect mounts (the other class of autofs 
mounts) update usage on every access (or walk).

Ian


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-01  5:04               ` Ian Kent
@ 2005-06-01  8:35                 ` Miklos Szeredi
  2005-06-03  2:10                   ` Ian Kent
  0 siblings, 1 reply; 37+ messages in thread
From: Miklos Szeredi @ 2005-06-01  8:35 UTC (permalink / raw)
  To: raven; +Cc: mike, linux-fsdevel, jamie, viro

> > 
> > This way the autofs mount won't be followed by the mount program,
> > since looking up '.' does not follow mounts.
> 
> But a path walk must be carried out in order to set pwd (or chroot), at 
> which time the mount is carried out and remains busy due to the reference 
> that is held.

Huh?  Path walk of '.' will do nothing.  It does not follow mounts or
anything.

> > So the result is two sibling mounts: the original autofs and the
> > actual filesystem.  But subsequent lookups will see the later mount
> > only, since it's placed earlier on the hash chain, and lookup_mnt() will
> > always return that.
> 
> Don't know that I care about that to much.
> It yields correct behaviour except for expires.
> In this case the expire can only determine timeouts relative to busyness 
> as walks scoot over the top. Indirect mounts (the other class of autofs 
> mounts) update usage on every access (or walk).

Yes, but with direct mounts you cannot use this to check busyness.

But there's ->mnt_expiry_mark, I think you can use that to check if
mount was recently walked over.

So for direct mounts, the sibling mount (as opposed to the overmount)
has the advantage of not having to do ugly recursion checking hacks in
autofs.

Miklos

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-01  8:35                 ` Miklos Szeredi
@ 2005-06-03  2:10                   ` Ian Kent
  2005-06-03  5:28                     ` Miklos Szeredi
  2005-06-03 17:22                     ` Mike Waychison
  0 siblings, 2 replies; 37+ messages in thread
From: Ian Kent @ 2005-06-03  2:10 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: mike, linux-fsdevel, jamie, viro

On Wed, 1 Jun 2005, Miklos Szeredi wrote:

> > > 
> > > This way the autofs mount won't be followed by the mount program,
> > > since looking up '.' does not follow mounts.
> > 
> > But a path walk must be carried out in order to set pwd (or chroot), at 
> > which time the mount is carried out and remains busy due to the reference 
> > that is held.
> 
> Huh?  Path walk of '.' will do nothing.  It does not follow mounts or
> anything.

Does "chdir ." make sense. Looks a bit like a noop to me.
Surely if your current working directory is "." you've had to set it with 
a provided path at some point.

> 
> > > So the result is two sibling mounts: the original autofs and the
> > > actual filesystem.  But subsequent lookups will see the later mount
> > > only, since it's placed earlier on the hash chain, and lookup_mnt() will
> > > always return that.
> > 
> > Don't know that I care about that to much.
> > It yields correct behaviour except for expires.
> > In this case the expire can only determine timeouts relative to busyness 
> > as walks scoot over the top. Indirect mounts (the other class of autofs 
> > mounts) update usage on every access (or walk).
> 
> Yes, but with direct mounts you cannot use this to check busyness.

That's not entirely right but your point is well made.

> 
> But there's ->mnt_expiry_mark, I think you can use that to check if
> mount was recently walked over.

I'll check that.

I'll probably be reluctant to go that way simply because I want to 
backport this to 2.4. But maybe that will become secondary as work 
progresses.

Never the less it may well provide the extra functionality I need.

> 
> So for direct mounts, the sibling mount (as opposed to the overmount)
> has the advantage of not having to do ugly recursion checking hacks in
> autofs.

I've found that all I really need to do is check that the follow_mount is 
successful and respond accordingly.

Ian


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-03  2:10                   ` Ian Kent
@ 2005-06-03  5:28                     ` Miklos Szeredi
  2005-06-04  4:13                       ` raven
  2005-06-03 17:22                     ` Mike Waychison
  1 sibling, 1 reply; 37+ messages in thread
From: Miklos Szeredi @ 2005-06-03  5:28 UTC (permalink / raw)
  To: raven; +Cc: mike, linux-fsdevel, jamie, viro

> > > > 
> > > > This way the autofs mount won't be followed by the mount program,
> > > > since looking up '.' does not follow mounts.
> > > 
> > > But a path walk must be carried out in order to set pwd (or
> > > chroot), at which time the mount is carried out and remains busy
> > > due to the reference that is held.
> > 
> > Huh?  Path walk of '.' will do nothing.  It does not follow mounts or
> > anything.
> 
> Does "chdir ." make sense. Looks a bit like a noop to me.  Surely if
> your current working directory is "." you've had to set it with a
> provided path at some point.

OK, sorry, I misunderstood you.

But still I don't see why it is a problem if you have a reference to
the mountpoint dentry/vfsmount.  You already have a reference with the
mounted autofs, so adding another reference by opening a file, and
another by changing cwd to it won't change anything at all.

> > So for direct mounts, the sibling mount (as opposed to the overmount)
> > has the advantage of not having to do ugly recursion checking hacks in
> > autofs.
> 
> I've found that all I really need to do is check that the follow_mount is 
> successful and respond accordingly.

I don't see how that would work.

Don't you have to somehow special-case the mount program to allow that
to succeed in path lookup, but make all other lookups block until the
mount is finished?

Miklos

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-03  5:28                     ` Miklos Szeredi
@ 2005-06-04  4:13                       ` raven
  2005-06-04  6:32                         ` Miklos Szeredi
  0 siblings, 1 reply; 37+ messages in thread
From: raven @ 2005-06-04  4:13 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: mike, linux-fsdevel, jamie, viro

On Fri, 3 Jun 2005, Miklos Szeredi wrote:

> > > > > 
> > > > > This way the autofs mount won't be followed by the mount program,
> > > > > since looking up '.' does not follow mounts.
> > > > 
> > > > But a path walk must be carried out in order to set pwd (or
> > > > chroot), at which time the mount is carried out and remains busy
> > > > due to the reference that is held.
> > > 
> > > Huh?  Path walk of '.' will do nothing.  It does not follow mounts or
> > > anything.
> > 
> > Does "chdir ." make sense. Looks a bit like a noop to me.  Surely if
> > your current working directory is "." you've had to set it with a
> > provided path at some point.
> 
> OK, sorry, I misunderstood you.
> 
> But still I don't see why it is a problem if you have a reference to
> the mountpoint dentry/vfsmount.  You already have a reference with the
> mounted autofs, so adding another reference by opening a file, and
> another by changing cwd to it won't change anything at all.

I think we`ve lost track of what we originally started discussing here.
I think we started discussing why I needed the nameidata struct to be upto 
date when the follow link method was called, it simply isn't when a mount 
is followed at the end of the do_lookup call in link_path_walk.

> 
> > > So for direct mounts, the sibling mount (as opposed to the overmount)
> > > has the advantage of not having to do ugly recursion checking hacks in
> > > autofs.
> > 
> > I've found that all I really need to do is check that the follow_mount is 
> > successful and respond accordingly.
> 
> I don't see how that would work.

Seems to work fine so far.

> 
> Don't you have to somehow special-case the mount program to allow that
> to succeed in path lookup, but make all other lookups block until the
> mount is finished?

I haven't found that to be the case. There are a couple of mechanisms for 
this.

First is a in kernel module waitq for daemon notification that has been 
well tested over time. I've done quite a bit of work on the interaction 
between expire events and lookups. There are still a couple of patches I 
need to submit but it's getting there.

Second the user space daemon is implemented as an FSM which implies a 
considerable amount of serialisation wrt kernel communication through the 
pipe.

Ian

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-04  4:13                       ` raven
@ 2005-06-04  6:32                         ` Miklos Szeredi
  2005-06-04  6:50                           ` raven
  0 siblings, 1 reply; 37+ messages in thread
From: Miklos Szeredi @ 2005-06-04  6:32 UTC (permalink / raw)
  To: raven; +Cc: mike, linux-fsdevel, jamie, viro

> > Don't you have to somehow special-case the mount program to allow that
> > to succeed in path lookup, but make all other lookups block until the
> > mount is finished?
> 
> I haven't found that to be the case. There are a couple of mechanisms for 
> this.
> 
> First is a in kernel module waitq for daemon notification that has been 
> well tested over time. I've done quite a bit of work on the interaction 
> between expire events and lookups. There are still a couple of patches I 
> need to submit but it's getting there.
> 
> Second the user space daemon is implemented as an FSM which implies a 
> considerable amount of serialisation wrt kernel communication through the 
> pipe.

No.  These are not the issues I'm worried about.  I'm interested in
how you (plan to) deal with the following situation:

There are three processes: A (autofs), B and C.  When a mount trigger
comes into A it fork/execs M (mount).  Direct autofs mountpoint is
/x/y/dm.

B: stat("/x/y/dm/z", ...) [blocks]
A: [trigger comes in]
A: system("mount xyfs /x/y/dm") [blocks]
M: [prepare for mount]
M: mount(..., "/x/y/dm", ....) [mount in progress]
C: stat("/x/y/dm/q", ...) [???]

Now what exactly happens when C tries to look up a path, that's at the
moment being mounted.  How does autofs differentiate between the
lookup done by the mount, and a lookup done by any other process?

Miklos

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-04  6:32                         ` Miklos Szeredi
@ 2005-06-04  6:50                           ` raven
  2005-06-04  7:20                             ` Miklos Szeredi
  0 siblings, 1 reply; 37+ messages in thread
From: raven @ 2005-06-04  6:50 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: mike, linux-fsdevel, jamie, viro

On Sat, 4 Jun 2005, Miklos Szeredi wrote:

> > > Don't you have to somehow special-case the mount program to allow that
> > > to succeed in path lookup, but make all other lookups block until the
> > > mount is finished?
> > 
> > I haven't found that to be the case. There are a couple of mechanisms for 
> > this.
> > 
> > First is a in kernel module waitq for daemon notification that has been 
> > well tested over time. I've done quite a bit of work on the interaction 
> > between expire events and lookups. There are still a couple of patches I 
> > need to submit but it's getting there.
> > 
> > Second the user space daemon is implemented as an FSM which implies a 
> > considerable amount of serialisation wrt kernel communication through the 
> > pipe.
> 
> No.  These are not the issues I'm worried about.  I'm interested in
> how you (plan to) deal with the following situation:
> 
> There are three processes: A (autofs), B and C.  When a mount trigger
> comes into A it fork/execs M (mount).  Direct autofs mountpoint is
> /x/y/dm.
> 
> B: stat("/x/y/dm/z", ...) [blocks]
> A: [trigger comes in]
> A: system("mount xyfs /x/y/dm") [blocks]
> M: [prepare for mount]
> M: mount(..., "/x/y/dm", ....) [mount in progress]
> C: stat("/x/y/dm/q", ...) [???]
> 
> Now what exactly happens when C tries to look up a path, that's at the
> moment being mounted.  How does autofs differentiate between the
> lookup done by the mount, and a lookup done by any other process?

B creates the waitq and waits for the mount.
C gets's placed on the waitq, waiting for the mount.
M & A complete due to below.
B & C are released from the waitq and find what they expect.

The autofs4 module knows the process group of userspace autofs that 
ownes the mount and provides right of passage not causing any mounts, just 
doing what the daemon needs instead of what processes requesting mounts need.

The method is basically the same as already done for indirect mounts.

Of course it's not quite as simple as this makes it sound but it is 
basically the way it happens. I'm going to use the same model for 
direct and indirect mounts. But of course there may yet be suprises for me 
as I've not quite finished the direct mount bits.

Look at autofs4_revalidate and autofs4_lookup in fs/autofs4/root.c and 
autofs4_wait in fs/autofs4/waitq.c for more detailed info.

With all these questions Miklos you must be thinking it's like getting 
blood from a stone with me.

So I appologize in advance. If I appear like that to you I'm sorry I don't 
mean to. Please continue to ask questions. I'm happy to try my best to 
answer as clearly as possible.

btw I've had the flu for a while now so my head is far from clear. 

Ian

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-04  6:50                           ` raven
@ 2005-06-04  7:20                             ` Miklos Szeredi
  2005-06-04  7:27                               ` raven
  0 siblings, 1 reply; 37+ messages in thread
From: Miklos Szeredi @ 2005-06-04  7:20 UTC (permalink / raw)
  To: raven; +Cc: miklos, mike, linux-fsdevel, jamie, viro

> The autofs4 module knows the process group of userspace autofs that 
> ownes the mount and provides right of passage not causing any mounts, just 
> doing what the daemon needs instead of what processes requesting mounts need.

OK, this is the info I was missing.

And I find it quite hackish.  But if it works, OK.  The next autofs
you write should do it more cleanly (as outlined by MikeW).

> With all these questions Miklos you must be thinking it's like getting 
> blood from a stone with me.

Yes, quite :)

> So I appologize in advance. If I appear like that to you I'm sorry I don't 
> mean to. Please continue to ask questions. I'm happy to try my best to 
> answer as clearly as possible.
> 
> btw I've had the flu for a while now so my head is far from clear. 

I understand.  Get well!

Thanks
Miklos

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-04  7:20                             ` Miklos Szeredi
@ 2005-06-04  7:27                               ` raven
  0 siblings, 0 replies; 37+ messages in thread
From: raven @ 2005-06-04  7:27 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: mike, linux-fsdevel, jamie, viro

On Sat, 4 Jun 2005, Miklos Szeredi wrote:

> > The autofs4 module knows the process group of userspace autofs that 
> > ownes the mount and provides right of passage not causing any mounts, just 
> > doing what the daemon needs instead of what processes requesting mounts need.
> 
> OK, this is the info I was missing.
> 
> And I find it quite hackish.  But if it works, OK.  The next autofs
> you write should do it more cleanly (as outlined by MikeW).

I've taken on maintenance of autofs and am working within the original 
design. I'm not the original author but I have to say that the method  
works quite well.

I've looked at what Mike has done and it looks very good to me, in both 
the userspace and kernelspce.

Never the less the current autofs is being maintained and is usefull to a 
bunch of people. In time, I expect that Mike's development will resolve 
some of the deeper issues in supporting recent kernel developments. 

Hopefully, I'll be able to contribute to his effort when I'm not so 
heavily loaded with the current implementation.

> 
> > With all these questions Miklos you must be thinking it's like getting 
> > blood from a stone with me.
> 
> Yes, quite :)
> 
> > So I appologize in advance. If I appear like that to you I'm sorry I don't 
> > mean to. Please continue to ask questions. I'm happy to try my best to 
> > answer as clearly as possible.
> > 
> > btw I've had the flu for a while now so my head is far from clear. 
> 
> I understand.  Get well!
> 
> Thanks
> Miklos
> 

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-03  2:10                   ` Ian Kent
  2005-06-03  5:28                     ` Miklos Szeredi
@ 2005-06-03 17:22                     ` Mike Waychison
  2005-06-04  4:32                       ` raven
  1 sibling, 1 reply; 37+ messages in thread
From: Mike Waychison @ 2005-06-03 17:22 UTC (permalink / raw)
  To: Ian Kent; +Cc: Miklos Szeredi, linux-fsdevel, jamie, viro

Ian Kent wrote:
> On Wed, 1 Jun 2005, Miklos Szeredi wrote:
> 
> 
>>>>This way the autofs mount won't be followed by the mount program,
>>>>since looking up '.' does not follow mounts.
>>>
>>>But a path walk must be carried out in order to set pwd (or chroot), at 
>>>which time the mount is carried out and remains busy due to the reference 
>>>that is held.
>>
>>Huh?  Path walk of '.' will do nothing.  It does not follow mounts or
>>anything.
> 
> 
> Does "chdir ." make sense. Looks a bit like a noop to me.
> Surely if your current working directory is "." you've had to set it with 
> a provided path at some point.

Just to make sure we are all on the same page:

If you use follow_link logic on a _directory_, it becomes impossible for 
userspace to open(2) that given directory.  However, if the kernel knows 
all, and can do an open(2) on it, and pass that to the userspace autofs 
handler.

Now, the userspace autofs bits can fchdir(fd) on that fd, and the kernel 
will happily change current's cwd to that magic directory.

Then, with the cwd set to that magic directory, userspace can do a 
'mount --move /tmp/temp_mountpoint .', which moves the filesystem 
mounted at /tmp/temp_mountpoint on top of the magic directory.

Now, the magic directory is covered up, and ->follow_link will never be 
called again because the VFS will directly walk over it during a 
follow_down loop.

Care has to be taken to ensure that multiple processes that 
->follow_link get put on the same wait_queue waiting for the mount to occur.

Take a look at fs/autofsng/request.c for an implementation of this.

> 
> 
>>>>So the result is two sibling mounts: the original autofs and the
>>>>actual filesystem.  But subsequent lookups will see the later mount
>>>>only, since it's placed earlier on the hash chain, and lookup_mnt() will
>>>>always return that.
>>>
>>>Don't know that I care about that to much.
>>>It yields correct behaviour except for expires.
>>>In this case the expire can only determine timeouts relative to busyness 
>>>as walks scoot over the top. Indirect mounts (the other class of autofs 
>>>mounts) update usage on every access (or walk).
>>
>>Yes, but with direct mounts you cannot use this to check busyness.
> 
> 
> That's not entirely right but your point is well made.
> 
> 
>>But there's ->mnt_expiry_mark, I think you can use that to check if
>>mount was recently walked over.
> 
> 
> I'll check that.
> 
> I'll probably be reluctant to go that way simply because I want to 
> backport this to 2.4. But maybe that will become secondary as work 
> progresses.
> 
> Never the less it may well provide the extra functionality I need.
> 

The one problem with mnt_expiry_mark is that it isn't recursive, so if 
you want to expire a tree of mountpoints (like autofs4 currently does), 
you have to feed it steroids:

http://marc.theaimsgroup.com/?l=linux-fsdevel&m=109875168418269&w=2
http://marc.theaimsgroup.com/?l=linux-kernel&m=109875147222911&w=2
http://marc.theaimsgroup.com/?l=linux-fsdevel&m=109871584122188&w=2

> 
>>So for direct mounts, the sibling mount (as opposed to the overmount)
>>has the advantage of not having to do ugly recursion checking hacks in
>>autofs.
> 
> 
> I've found that all I really need to do is check that the follow_mount is 
> successful and respond accordingly.
> 
> Ian
> 


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-03 17:22                     ` Mike Waychison
@ 2005-06-04  4:32                       ` raven
  0 siblings, 0 replies; 37+ messages in thread
From: raven @ 2005-06-04  4:32 UTC (permalink / raw)
  To: Mike Waychison; +Cc: Miklos Szeredi, linux-fsdevel, jamie, viro

On Fri, 3 Jun 2005, Mike Waychison wrote:

> Ian Kent wrote:
> > On Wed, 1 Jun 2005, Miklos Szeredi wrote:
> > 
> > 
> > > > >This way the autofs mount won't be followed by the mount program,
> > > > >since looking up '.' does not follow mounts.
> > > >
> > > >But a path walk must be carried out in order to set pwd (or chroot), at
> > > >which time the mount is carried out and remains busy due to the reference
> > > >that is held.
> > >
> > >Huh?  Path walk of '.' will do nothing.  It does not follow mounts or
> > >anything.
> > 
> > 
> > Does "chdir ." make sense. Looks a bit like a noop to me.
> > Surely if your current working directory is "." you've had to set it with a
> > provided path at some point.
> 
> Just to make sure we are all on the same page:

I think we're going to confuse the hell out of Miklos here.

Each talking about fundamentally different automount implementations using 
similar approaches.

Sorry Miklos.

> 
> If you use follow_link logic on a _directory_, it becomes impossible for
> userspace to open(2) that given directory.  However, if the kernel knows all,
> and can do an open(2) on it, and pass that to the userspace autofs handler.

Indeed, analysing the implications of open(2) is where I'm upto at the 
moment. So I'm not quite on the same page here just yet.

> 
> Now, the userspace autofs bits can fchdir(fd) on that fd, and the kernel will
> happily change current's cwd to that magic directory.
> 
> Then, with the cwd set to that magic directory, userspace can do a 'mount
> --move /tmp/temp_mountpoint .', which moves the filesystem mounted at
> /tmp/temp_mountpoint on top of the magic directory.
> 
> Now, the magic directory is covered up, and ->follow_link will never be called
> again because the VFS will directly walk over it during a follow_down loop.
> 
> Care has to be taken to ensure that multiple processes that 
> ->follow_link get put on the same wait_queue waiting for the mount to occur.
> 
> Take a look at fs/autofsng/request.c for an implementation of this.
> 
> > 
> > 
> > > > >So the result is two sibling mounts: the original autofs and the
> > > > >actual filesystem.  But subsequent lookups will see the later mount
> > > > >only, since it's placed earlier on the hash chain, and lookup_mnt()
> > > > >will
> > > > >always return that.
> > > >
> > > >Don't know that I care about that to much.
> > > >It yields correct behaviour except for expires.
> > > >In this case the expire can only determine timeouts relative to busyness
> > > >as walks scoot over the top. Indirect mounts (the other class of autofs
> > > >mounts) update usage on every access (or walk).
> > >
> > >Yes, but with direct mounts you cannot use this to check busyness.
> > 
> > 
> > That's not entirely right but your point is well made.
> > 
> > 
> > >But there's ->mnt_expiry_mark, I think you can use that to check if
> > >mount was recently walked over.
> > 
> > 
> > I'll check that.
> > 
> > I'll probably be reluctant to go that way simply because I want to backport
> > this to 2.4. But maybe that will become secondary as work progresses.
> > 
> > Never the less it may well provide the extra functionality I need.
> > 
> 
> The one problem with mnt_expiry_mark is that it isn't recursive, so if you
> want to expire a tree of mountpoints (like autofs4 currently does), you have
> to feed it steroids:
> 
> http://marc.theaimsgroup.com/?l=linux-fsdevel&m=109875168418269&w=2
> http://marc.theaimsgroup.com/?l=linux-kernel&m=109875147222911&w=2
> http://marc.theaimsgroup.com/?l=linux-fsdevel&m=109871584122188&w=2
> 
> > 
> > >So for direct mounts, the sibling mount (as opposed to the overmount)
> > >has the advantage of not having to do ugly recursion checking hacks in
> > >autofs.
> > 
> > 
> > I've found that all I really need to do is check that the follow_mount is
> > successful and respond accordingly.
> > 
> > Ian
> > 
> 


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-05-31 10:13             ` Miklos Szeredi
  2005-05-31 15:43               ` Mike Waychison
  2005-06-01  5:04               ` Ian Kent
@ 2005-06-03 17:13               ` Mike Waychison
  2005-06-03 18:20                 ` Miklos Szeredi
  2 siblings, 1 reply; 37+ messages in thread
From: Mike Waychison @ 2005-06-03 17:13 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: raven, linux-fsdevel, jamie, viro

Miklos Szeredi wrote:

> So the result is two sibling mounts: the original autofs and the
> actual filesystem.  But subsequent lookups will see the later mount
> only, since it's placed earlier on the hash chain, and lookup_mnt() will
> always return that.

I misread this bit.  There's is no reason we need to have two vfsmounts 
mounted on the same <vfsmount,dentry> tuple.  Mounting on top of the 
autofs filesystem works fine, and doesn't modify the vfsmount invariant.

Mike Waychison

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-03 17:13               ` Mike Waychison
@ 2005-06-03 18:20                 ` Miklos Szeredi
  0 siblings, 0 replies; 37+ messages in thread
From: Miklos Szeredi @ 2005-06-03 18:20 UTC (permalink / raw)
  To: mike; +Cc: raven, linux-fsdevel, jamie, viro

> > So the result is two sibling mounts: the original autofs and the
> > actual filesystem.  But subsequent lookups will see the later mount
> > only, since it's placed earlier on the hash chain, and lookup_mnt() will
> > always return that.
> 
> I misread this bit.  There's is no reason we need to have two vfsmounts 
> mounted on the same <vfsmount,dentry> tuple.  Mounting on top of the 
> autofs filesystem works fine, and doesn't modify the vfsmount invariant.

OK.  I didn't think of opening the autofs root and passing that to
userspace.  I agree that this is a better way of avoiding the
recursion on follow_link.

Miklos

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-05-31 10:05           ` Miklos Szeredi
  2005-05-31 10:13             ` Miklos Szeredi
@ 2005-06-01  4:36             ` Ian Kent
  2005-06-03 17:24               ` Mike Waychison
  1 sibling, 1 reply; 37+ messages in thread
From: Ian Kent @ 2005-06-01  4:36 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: mike, linux-fsdevel, jamie, Al Viro

On Tue, 31 May 2005, Miklos Szeredi wrote:

> > There's not really anything magic about using the follow link method. It's 
> > just a bit of a cheat as a directory can't also be a symlink and the 
> > callback comes at exactly the right time to make stuff happen.
> 
> So is it something like this?
> 
>   - autofs is mounted over direct mountpoint 'dm'
> 
>   - when dm is looked up, autofs's follow_link method is called
> 
>   - this triggers the actual mount in the daemon, which mounts over
>     the autofs filesystem (with some trick to avoid recursion)

Admit, I haven't paid enough attention to recursion.
Ouch! Caught by that the other day.

> 
>   - when the daemon is finished, the autofs's follow_link method
>     enters the new mount (how?) and returns

A function functionally similar to follow_mount of namei.c. Which needs 
the appropriate vfsmount.

> 
> 
> Why do you need the mount topology for this?

Don't. Actually need the nameidata upto date when the follow_link method 
is called.

Ian


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-01  4:36             ` Ian Kent
@ 2005-06-03 17:24               ` Mike Waychison
  2005-06-04  4:38                 ` raven
  0 siblings, 1 reply; 37+ messages in thread
From: Mike Waychison @ 2005-06-03 17:24 UTC (permalink / raw)
  To: Ian Kent; +Cc: Miklos Szeredi, linux-fsdevel, jamie, Al Viro

Ian Kent wrote:
> On Tue, 31 May 2005, Miklos Szeredi wrote:
> 
> 
>>>There's not really anything magic about using the follow link method. It's 
>>>just a bit of a cheat as a directory can't also be a symlink and the 
>>>callback comes at exactly the right time to make stuff happen.
>>
>>So is it something like this?
>>
>>  - autofs is mounted over direct mountpoint 'dm'
>>
>>  - when dm is looked up, autofs's follow_link method is called
>>
>>  - this triggers the actual mount in the daemon, which mounts over
>>    the autofs filesystem (with some trick to avoid recursion)
> 
> 
> Admit, I haven't paid enough attention to recursion.
> Ouch! Caught by that the other day.
> 
> 
>>  - when the daemon is finished, the autofs's follow_link method
>>    enters the new mount (how?) and returns
> 
> 
> A function functionally similar to follow_mount of namei.c. Which needs 
> the appropriate vfsmount.
> 
> 
>>
>>Why do you need the mount topology for this?
> 
> 
> Don't. Actually need the nameidata upto date when the follow_link method 
> is called.
> 

However you can't rely on this for expiry checks, as the vfsmount can 
change (--rbind, CLONE_NEWNS).

Mike Waychison

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-03 17:24               ` Mike Waychison
@ 2005-06-04  4:38                 ` raven
  0 siblings, 0 replies; 37+ messages in thread
From: raven @ 2005-06-04  4:38 UTC (permalink / raw)
  To: Mike Waychison; +Cc: Miklos Szeredi, linux-fsdevel, jamie, Al Viro

On Fri, 3 Jun 2005, Mike Waychison wrote:

> Ian Kent wrote:
> > On Tue, 31 May 2005, Miklos Szeredi wrote:
> > 
> > 
> > > >There's not really anything magic about using the follow link method.
> > > >It's just a bit of a cheat as a directory can't also be a symlink and the
> > > >callback comes at exactly the right time to make stuff happen.
> > >
> > >So is it something like this?
> > >
> > >  - autofs is mounted over direct mountpoint 'dm'
> > >
> > >  - when dm is looked up, autofs's follow_link method is called
> > >
> > >  - this triggers the actual mount in the daemon, which mounts over
> > >    the autofs filesystem (with some trick to avoid recursion)
> > 
> > 
> > Admit, I haven't paid enough attention to recursion.
> > Ouch! Caught by that the other day.
> > 
> > 
> > >  - when the daemon is finished, the autofs's follow_link method
> > >    enters the new mount (how?) and returns
> > 
> > 
> > A function functionally similar to follow_mount of namei.c. Which needs the
> > appropriate vfsmount.
> > 
> > 
> > >
> > >Why do you need the mount topology for this?
> > 
> > 
> > Don't. Actually need the nameidata upto date when the follow_link method is
> > called.
> > 
> 
> However you can't rely on this for expiry checks, as the vfsmount can change
> (--rbind, CLONE_NEWNS).

No. This is a limitation of the old autofs design and will probably 
remain so for the forseable future. It's one of the issues that I hope 
autofsng will address.

I can deal with expirey otherwise.

Ian


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-05-31  7:02     ` Ian Kent
  2005-05-31  7:56       ` Miklos Szeredi
@ 2005-05-31 15:55       ` Mike Waychison
  2005-05-31 16:08         ` Miklos Szeredi
  2005-06-01  3:57         ` Ian Kent
  1 sibling, 2 replies; 37+ messages in thread
From: Mike Waychison @ 2005-05-31 15:55 UTC (permalink / raw)
  To: Ian Kent; +Cc: Miklos Szeredi, linux-fsdevel, jamie, viro

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Ian Kent wrote:
> On Mon, 30 May 2005, Mike Waychison wrote:
>> 
>> No, the removal of struct namespace shouldn't keep you from walking
>> vfsmounts from modules.  In fact, in Miklos' 1/8 patch, he moved
>> namespace->sem to a global semaphore, which isn't needed by the end of
>> the patch set.  namespace->sem only ever protected the namespace->list
>> field.
>  
> But my impression was that this semaphore surrounded the mount and umount 
> processes. The list updates looked like they were all done when this 
> semaphore was held.
> 

namespace->sem did indeed surround the mounting and unmounting
processes, however it never was meant to protect the integrety of the
vfsmount tree, only namespace->list.  Due to the lock nesting rules of
vfs, it did infact exclude any tree operations, however the proper lock
to have used was always vfsmount_lock

> 
>> If you which to walk a mounttree, you still only ever require grabbing
>> vfsmount_lock and use mntget/mntput.
> 
> 
> Which is not exported.
> 
> So do you think it should be?

Yes.  Even better would be for a proper interface to be exported from
fs/namespace.c though, so that as a module you don't care about the
nitty gritty details of how the tree is held together.  This is likely
to get even more complicated with time as Ram's patches get more testing.

> Maybe lookup_mnt shouldn't be static and should be exported instead?
> 

lookup_mnt is the wrong operation, as it looks for a mountpoint given a
vfsmount and dentry.  What you need is a way to walk the tree, and I'm
inclined to say that something like next_mnt is what you are looking for.

> 
>> Overall, this means autofs4 stuff will need to be updated to reflect
>> these changes, but they are minimal at best.
> 
> 
> OK. But I'm not sure how I can get at this from within a module.
> 

Right now, the quickest way is to EXPORT_SYMBOL(vfsmount_lock) and grab
it instead of namespace->sem.

> Perhaps something like updating the nameidata struct passed to 
> follow_link, similar to what you did elsewhere in your autofsng 
> patches, needs to be added to this lot.

I'm lost to what you mean.  The only place autofsng touches nameidata in
the VFS is setting ->mnt to a non-stale mount before a ->follow_link.
That symlinks even work without this change is almost coincidence.

Mike Waychison
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.0 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://enigmail.mozdev.org

iD8DBQFCnIj5dQs4kOxk3/MRArdfAJ0Ytphl0/ly9Ds6v/WDiCTy+FBwdACfeT+r
TO/HFIluXOPYZGcZUBqrr+c=
=EMfY
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-05-31 15:55       ` Mike Waychison
@ 2005-05-31 16:08         ` Miklos Szeredi
  2005-06-01  4:18           ` Ian Kent
  2005-06-01  3:57         ` Ian Kent
  1 sibling, 1 reply; 37+ messages in thread
From: Miklos Szeredi @ 2005-05-31 16:08 UTC (permalink / raw)
  To: mike; +Cc: raven, linux-fsdevel, jamie, viro

> namespace->sem did indeed surround the mounting and unmounting
> processes, however it never was meant to protect the integrety of the
> vfsmount tree, only namespace->list.

I'm not sure about that.  The copy_tree() function relies on the tree
not chaning underneeth, and that's why it's called with
namespace->sem.  And vfsmount_lock can't be used, because copy_tree()
can sleep on allocations.

> > Maybe lookup_mnt shouldn't be static and should be exported instead?
> > 
> 
> lookup_mnt is the wrong operation, as it looks for a mountpoint given a
> vfsmount and dentry.  What you need is a way to walk the tree, and I'm
> inclined to say that something like next_mnt is what you are looking for.

Can someone explain, why autofs needs to walk the mount tree?

Miklos

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-05-31 16:08         ` Miklos Szeredi
@ 2005-06-01  4:18           ` Ian Kent
  2005-06-01  8:28             ` Miklos Szeredi
  2005-06-03 17:26             ` Mike Waychison
  0 siblings, 2 replies; 37+ messages in thread
From: Ian Kent @ 2005-06-01  4:18 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: mike, linux-fsdevel, jamie, viro

On Tue, 31 May 2005, Miklos Szeredi wrote:

> > namespace->sem did indeed surround the mounting and unmounting
> > processes, however it never was meant to protect the integrety of the
> > vfsmount tree, only namespace->list.
> 
> I'm not sure about that.  The copy_tree() function relies on the tree
> not chaning underneeth, and that's why it's called with
> namespace->sem.  And vfsmount_lock can't be used, because copy_tree()
> can sleep on allocations.
> 
> > > Maybe lookup_mnt shouldn't be static and should be exported instead?
> > > 
> > 
> > lookup_mnt is the wrong operation, as it looks for a mountpoint given a
> > vfsmount and dentry.  What you need is a way to walk the tree, and I'm
> > inclined to say that something like next_mnt is what you are looking for.
> 
> Can someone explain, why autofs needs to walk the mount tree?

I generally don't need to walk the vfsmount tree except during expire (ref 
may_umount_tree in namespace.c). What I find is that I often need to 
locate a sibling vfsmount given a parent and a dentry (aka. lookup_mnt). 
This is because, after a mount is triggered (the daemon is notified and 
does the mount) I need it in order to follow it. I expect Mike has similar 
needs.

Ian


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-01  4:18           ` Ian Kent
@ 2005-06-01  8:28             ` Miklos Szeredi
  2005-06-03  2:23               ` Ian Kent
  2005-06-03 17:26             ` Mike Waychison
  1 sibling, 1 reply; 37+ messages in thread
From: Miklos Szeredi @ 2005-06-01  8:28 UTC (permalink / raw)
  To: raven; +Cc: mike, linux-fsdevel, jamie, viro

> > Can someone explain, why autofs needs to walk the mount tree?
> 
> I generally don't need to walk the vfsmount tree except during expire (ref 
> may_umount_tree in namespace.c). What I find is that I often need to 
> locate a sibling vfsmount given a parent and a dentry (aka. lookup_mnt). 
> This is because, after a mount is triggered (the daemon is notified and 
> does the mount) I need it in order to follow it. I expect Mike has similar 
> needs.

You can use follow_down(), which does just this, and is already
exported.

Miklos

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-01  8:28             ` Miklos Szeredi
@ 2005-06-03  2:23               ` Ian Kent
  2005-06-03  5:18                 ` Miklos Szeredi
  0 siblings, 1 reply; 37+ messages in thread
From: Ian Kent @ 2005-06-03  2:23 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: mike, linux-fsdevel, jamie, viro

On Wed, 1 Jun 2005, Miklos Szeredi wrote:

> > > Can someone explain, why autofs needs to walk the mount tree?
> > 
> > I generally don't need to walk the vfsmount tree except during expire (ref 
> > may_umount_tree in namespace.c). What I find is that I often need to 
> > locate a sibling vfsmount given a parent and a dentry (aka. lookup_mnt). 
> > This is because, after a mount is triggered (the daemon is notified and 
> > does the mount) I need it in order to follow it. I expect Mike has similar 
> > needs.
> 
> You can use follow_down(), which does just this, and is already
> exported.

Sure, but that was the subject of this thread in the begining.

The nameidata vfsmount entry is not upto date when this method is called.
Therefore I need to look it up.

I could propose a change to link_path_walk to update it prior to the 
call but then I can't have a tarball that can be used to build the module 
without rebuilding the kernel.

I was planning on doing this to facilitate testing.

Do you really think that it's OK to remove long standing structure 
fields on the strength that no other kernel code is using them?

What about the externally developed modules from other places?

Ian

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-03  2:23               ` Ian Kent
@ 2005-06-03  5:18                 ` Miklos Szeredi
  2005-06-04  4:53                   ` raven
  0 siblings, 1 reply; 37+ messages in thread
From: Miklos Szeredi @ 2005-06-03  5:18 UTC (permalink / raw)
  To: raven; +Cc: mike, linux-fsdevel, jamie, viro

> > > > Can someone explain, why autofs needs to walk the mount tree?
> > > 
> > > I generally don't need to walk the vfsmount tree except during
> > > expire (ref may_umount_tree in namespace.c). What I find is that
> > > I often need to locate a sibling vfsmount given a parent and a
> > > dentry (aka. lookup_mnt).  This is because, after a mount is
> > > triggered (the daemon is notified and does the mount) I need it
> > > in order to follow it. I expect Mike has similar needs.
> > 
> > You can use follow_down(), which does just this, and is already
> > exported.
> 
> Sure, but that was the subject of this thread in the begining.

I think you're mixing it with follow_link().  follow_down() is just a
nice wrapper for lookup_mnt(), and is exported to modules.

> The nameidata vfsmount entry is not upto date when this method is called.
> Therefore I need to look it up.
> 
> I could propose a change to link_path_walk to update it prior to the 
> call but then I can't have a tarball that can be used to build the module 
> without rebuilding the kernel.
> 
> I was planning on doing this to facilitate testing.
> 
> Do you really think that it's OK to remove long standing structure 
> fields on the strength that no other kernel code is using them?

Yes, if it's not part of an interface.

> What about the externally developed modules from other places?

I think it's usually understood, that you shouldn't rely on
implementation details, or otherwise have to face the consequences.

But in this case there's no such problem I think, because you can
solve this with existing exported interfaces, and don't have to bother
about mnt_list.

Miklos

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-03  5:18                 ` Miklos Szeredi
@ 2005-06-04  4:53                   ` raven
  2005-06-04  6:57                     ` Miklos Szeredi
  0 siblings, 1 reply; 37+ messages in thread
From: raven @ 2005-06-04  4:53 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: mike, linux-fsdevel, jamie, viro

On Fri, 3 Jun 2005, Miklos Szeredi wrote:

> > > > > Can someone explain, why autofs needs to walk the mount tree?
> > > > 
> > > > I generally don't need to walk the vfsmount tree except during
> > > > expire (ref may_umount_tree in namespace.c). What I find is that
> > > > I often need to locate a sibling vfsmount given a parent and a
> > > > dentry (aka. lookup_mnt).  This is because, after a mount is
> > > > triggered (the daemon is notified and does the mount) I need it
> > > > in order to follow it. I expect Mike has similar needs.
> > > 
> > > You can use follow_down(), which does just this, and is already
> > > exported.
> > 
> > Sure, but that was the subject of this thread in the begining.
> 
> I think you're mixing it with follow_link().  follow_down() is just a
> nice wrapper for lookup_mnt(), and is exported to modules.
> 
> > The nameidata vfsmount entry is not upto date when this method is called.
> > Therefore I need to look it up.
> > 
> > I could propose a change to link_path_walk to update it prior to the 
> > call but then I can't have a tarball that can be used to build the module 
> > without rebuilding the kernel.
> > 
> > I was planning on doing this to facilitate testing.
> > 
> > Do you really think that it's OK to remove long standing structure 
> > fields on the strength that no other kernel code is using them?
> 
> Yes, if it's not part of an interface.

What interface?

I'm not aware of any clearly defined API for access to in kernel functions 
for the VFS except for the various operations methods. Unfortuneatly, any 
struct that is passed to them should be largely imutable as well. Which 
ultimeatly covers the structs we are talking about here.

All I see is people taking the oppertunity to stop exporting functions 
without consideration to what should be considered part of an API.

Some companies do this really well to foster development.

> 
> > What about the externally developed modules from other places?
> 
> I think it's usually understood, that you shouldn't rely on
> implementation details, or otherwise have to face the consequences.

I dont think that really good enough for a viable software platform.

> 
> But in this case there's no such problem I think, because you can
> solve this with existing exported interfaces, and don't have to bother
> about mnt_list.

Yes. That would be the case except there's a reference held to the 
vfsmount in the struct path prior to point at which follow_link is called, 
which I don't think can be obtained.

In any case I'll find a way to work around it.

Ian


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-04  4:53                   ` raven
@ 2005-06-04  6:57                     ` Miklos Szeredi
  2005-06-04  7:13                       ` raven
  0 siblings, 1 reply; 37+ messages in thread
From: Miklos Szeredi @ 2005-06-04  6:57 UTC (permalink / raw)
  To: raven; +Cc: miklos, mike, linux-fsdevel, jamie, viro

> What interface?
> 
> I'm not aware of any clearly defined API for access to in kernel functions 
> for the VFS except for the various operations methods. Unfortuneatly, any 
> struct that is passed to them should be largely imutable as well. Which 
> ultimeatly covers the structs we are talking about here.

You are right that it's not an explicitly defined API.  However the
only _truly_ exported part of the mount api is mntget/mntput.  You'll
notice, that only closely related code (path lookup, dentry cache)
will ever touch anything in struct vfsmount, and even those only in a
very few places.

So struct vfsmount is _not_ part of the API, and no unrelated code (in
or out of the kernel) should touch any part of it.

> All I see is people taking the oppertunity to stop exporting functions 
> without consideration to what should be considered part of an API.
> 
> Some companies do this really well to foster development.

And generally using things which are not really part of an API just
results in messy and buggy code.  So even if it makes development
easier, it's not worth it on the long run.

> Yes. That would be the case except there's a reference held to the 
> vfsmount in the struct path prior to point at which follow_link is called, 
> which I don't think can be obtained.
> 
> In any case I'll find a way to work around it.

I don't see why you'd need to work around anything.  Following pseudo
code should work perfectly fine:

int autofs_follow_link(struct dentry *dentry, struct nameidata *nd)
{
	if (!is_mount_in_progress(dentry))
		send_trigger_to_userspace(dentry)

	wait_event(mountpoint_waitq(dentry), !is_mount_in_progress(dentry));
	while(follow_down(&nd->mnt, &nd->dentry));
	return 0;
}

follow_down() will be careful to update the refcounts on the vfsmount
and dentry.

Miklos

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-04  6:57                     ` Miklos Szeredi
@ 2005-06-04  7:13                       ` raven
  0 siblings, 0 replies; 37+ messages in thread
From: raven @ 2005-06-04  7:13 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: mike, linux-fsdevel, jamie, viro

On Sat, 4 Jun 2005, Miklos Szeredi wrote:

> > What interface?
> > 
> > I'm not aware of any clearly defined API for access to in kernel functions 
> > for the VFS except for the various operations methods. Unfortuneatly, any 
> > struct that is passed to them should be largely imutable as well. Which 
> > ultimeatly covers the structs we are talking about here.
> 
> You are right that it's not an explicitly defined API.  However the
> only _truly_ exported part of the mount api is mntget/mntput.  You'll
> notice, that only closely related code (path lookup, dentry cache)
> will ever touch anything in struct vfsmount, and even those only in a
> very few places.
> 
> So struct vfsmount is _not_ part of the API, and no unrelated code (in
> or out of the kernel) should touch any part of it.
> 
> > All I see is people taking the oppertunity to stop exporting functions 
> > without consideration to what should be considered part of an API.
> > 
> > Some companies do this really well to foster development.
> 
> And generally using things which are not really part of an API just
> results in messy and buggy code.  So even if it makes development
> easier, it's not worth it on the long run.

OK. Sorry to go on about this but it's is just my little hobby horse.

> 
> > Yes. That would be the case except there's a reference held to the 
> > vfsmount in the struct path prior to point at which follow_link is called, 
> > which I don't think can be obtained.
> > 
> > In any case I'll find a way to work around it.
> 
> I don't see why you'd need to work around anything.  Following pseudo
> code should work perfectly fine:
> 
> int autofs_follow_link(struct dentry *dentry, struct nameidata *nd)
> {
> 	if (!is_mount_in_progress(dentry))
> 		send_trigger_to_userspace(dentry)
> 
> 	wait_event(mountpoint_waitq(dentry), !is_mount_in_progress(dentry));
> 	while(follow_down(&nd->mnt, &nd->dentry));
> 	return 0;
> }

Indeed, that's pretty much what I have.

And yes I partially missed (ignored) the fact that the follow down should 
do the trick because I believe that there is a follow_down (follow_mount 
actually) that will cause reference to a vfsmount (not accessible in 
follow_link) prior to the follow_link call that, at least in one case, 
is not put. I believe this is held in a struct path (next) local variable. 
For a normal link nd->mnt and next.mnt would be the same. But maybe I'm 
mistaken.

This is what I mean when I say there appears to be an assumption that a 
mount is not followed immediately before the follow_link call in 
link_path_walk.

Ian


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-01  4:18           ` Ian Kent
  2005-06-01  8:28             ` Miklos Szeredi
@ 2005-06-03 17:26             ` Mike Waychison
  1 sibling, 0 replies; 37+ messages in thread
From: Mike Waychison @ 2005-06-03 17:26 UTC (permalink / raw)
  To: Ian Kent; +Cc: Miklos Szeredi, linux-fsdevel, jamie, viro

Ian Kent wrote:
> On Tue, 31 May 2005, Miklos Szeredi wrote:
> 
> 
>>>namespace->sem did indeed surround the mounting and unmounting
>>>processes, however it never was meant to protect the integrety of the
>>>vfsmount tree, only namespace->list.
>>
>>I'm not sure about that.  The copy_tree() function relies on the tree
>>not chaning underneeth, and that's why it's called with
>>namespace->sem.  And vfsmount_lock can't be used, because copy_tree()
>>can sleep on allocations.
>>
>>
>>>>Maybe lookup_mnt shouldn't be static and should be exported instead?
>>>>
>>>
>>>lookup_mnt is the wrong operation, as it looks for a mountpoint given a
>>>vfsmount and dentry.  What you need is a way to walk the tree, and I'm
>>>inclined to say that something like next_mnt is what you are looking for.
>>
>>Can someone explain, why autofs needs to walk the mount tree?
> 
> 
> I generally don't need to walk the vfsmount tree except during expire (ref 
> may_umount_tree in namespace.c). What I find is that I often need to 
> locate a sibling vfsmount given a parent and a dentry (aka. lookup_mnt). 
> This is because, after a mount is triggered (the daemon is notified and 
> does the mount) I need it in order to follow it. I expect Mike has similar 
> needs.
> 

I handle this in the vfs layer due to it's hairyness and the fact that 
other filesystems may want to use it (samba, afs, autofs4, basically 
anything that wants to automount things from kernelspace).

Mike Waychison

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-05-31 15:55       ` Mike Waychison
  2005-05-31 16:08         ` Miklos Szeredi
@ 2005-06-01  3:57         ` Ian Kent
  2005-06-03 17:07           ` Mike Waychison
  1 sibling, 1 reply; 37+ messages in thread
From: Ian Kent @ 2005-06-01  3:57 UTC (permalink / raw)
  To: Mike Waychison; +Cc: Miklos Szeredi, linux-fsdevel, jamie, viro

On Tue, 31 May 2005, Mike Waychison wrote:

> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
> 
> Ian Kent wrote:
> > On Mon, 30 May 2005, Mike Waychison wrote:
> >> 
> >> No, the removal of struct namespace shouldn't keep you from walking
> >> vfsmounts from modules.  In fact, in Miklos' 1/8 patch, he moved
> >> namespace->sem to a global semaphore, which isn't needed by the end of
> >> the patch set.  namespace->sem only ever protected the namespace->list
> >> field.
> >  
> > But my impression was that this semaphore surrounded the mount and umount 
> > processes. The list updates looked like they were all done when this 
> > semaphore was held.
> > 
> 
> namespace->sem did indeed surround the mounting and unmounting
> processes, however it never was meant to protect the integrety of the
> vfsmount tree, only namespace->list.  Due to the lock nesting rules of
> vfs, it did infact exclude any tree operations, however the proper lock
> to have used was always vfsmount_lock
> 
> > 
> >> If you which to walk a mounttree, you still only ever require grabbing
> >> vfsmount_lock and use mntget/mntput.
> > 
> > 
> > Which is not exported.
> > 
> > So do you think it should be?
> 
> Yes.  Even better would be for a proper interface to be exported from
> fs/namespace.c though, so that as a module you don't care about the
> nitty gritty details of how the tree is held together.  This is likely
> to get even more complicated with time as Ram's patches get more testing.

For a start it's probably a good idea to define which of the current 
functions should be exposed and leave them exposed whether they are in use 
by other clients or not.

> 
> > Maybe lookup_mnt shouldn't be static and should be exported instead?
> > 
> 
> lookup_mnt is the wrong operation, as it looks for a mountpoint given a
> vfsmount and dentry.  What you need is a way to walk the tree, and I'm
> inclined to say that something like next_mnt is what you are looking for.

As it turns out lookup_mnt would work fine in this special case. The 
nameidata should always contain the parent of the required mount. I can 
see your point though, I'll think about it for a while.

> 
> > 
> >> Overall, this means autofs4 stuff will need to be updated to reflect
> >> these changes, but they are minimal at best.
> > 
> > 
> > OK. But I'm not sure how I can get at this from within a module.
> > 
> 
> Right now, the quickest way is to EXPORT_SYMBOL(vfsmount_lock) and grab
> it instead of namespace->sem.
> 
> > Perhaps something like updating the nameidata struct passed to 
> > follow_link, similar to what you did elsewhere in your autofsng 
> > patches, needs to be added to this lot.
> 
> I'm lost to what you mean.  The only place autofsng touches nameidata in
> the VFS is setting ->mnt to a non-stale mount before a ->follow_link.
> That symlinks even work without this change is almost coincidence.

That was exactly what I was saying.

Ian


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [RFC][PATCH 1/8] namespace: use global namespace semaphore
  2005-06-01  3:57         ` Ian Kent
@ 2005-06-03 17:07           ` Mike Waychison
  0 siblings, 0 replies; 37+ messages in thread
From: Mike Waychison @ 2005-06-03 17:07 UTC (permalink / raw)
  To: Ian Kent; +Cc: Miklos Szeredi, linux-fsdevel, jamie, viro

Ian Kent wrote:
> On Tue, 31 May 2005, Mike Waychison wrote:
> 
> 
>>-----BEGIN PGP SIGNED MESSAGE-----
>>Hash: SHA1
>>
>>Ian Kent wrote:
>>
>>>On Mon, 30 May 2005, Mike Waychison wrote:
>>>
>>>>No, the removal of struct namespace shouldn't keep you from walking
>>>>vfsmounts from modules.  In fact, in Miklos' 1/8 patch, he moved
>>>>namespace->sem to a global semaphore, which isn't needed by the end of
>>>>the patch set.  namespace->sem only ever protected the namespace->list
>>>>field.
>>>
>>> 
>>>But my impression was that this semaphore surrounded the mount and umount 
>>>processes. The list updates looked like they were all done when this 
>>>semaphore was held.
>>>
>>
>>namespace->sem did indeed surround the mounting and unmounting
>>processes, however it never was meant to protect the integrety of the
>>vfsmount tree, only namespace->list.  Due to the lock nesting rules of
>>vfs, it did infact exclude any tree operations, however the proper lock
>>to have used was always vfsmount_lock
>>
>>
>>>>If you which to walk a mounttree, you still only ever require grabbing
>>>>vfsmount_lock and use mntget/mntput.
>>>
>>>
>>>Which is not exported.
>>>
>>>So do you think it should be?
>>
>>Yes.  Even better would be for a proper interface to be exported from
>>fs/namespace.c though, so that as a module you don't care about the
>>nitty gritty details of how the tree is held together.  This is likely
>>to get even more complicated with time as Ram's patches get more testing.
> 
> 
> For a start it's probably a good idea to define which of the current 
> functions should be exposed and leave them exposed whether they are in use 
> by other clients or not.

Well, the only valid module use of this stuff is autofs.  Change the 
interface and change the in-kernel uses of it for autofs4 (and autofs3 
if need be).

> 
> 
>>>Maybe lookup_mnt shouldn't be static and should be exported instead?
>>>
>>
>>lookup_mnt is the wrong operation, as it looks for a mountpoint given a
>>vfsmount and dentry.  What you need is a way to walk the tree, and I'm
>>inclined to say that something like next_mnt is what you are looking for.
> 
> 
> As it turns out lookup_mnt would work fine in this special case. The 
> nameidata should always contain the parent of the required mount. I can 
> see your point though, I'll think about it for a while.
> 

If lookup_mnt were used, you'd have to walk the dcache for each mount 
and do a lookup_mnt for each in-memory dentry if d_mounted!=0.  Not only 
is this a huge waste of cycles, but it's also very hairy as safely 
walking the dcache is a) harder than walking vfsmounts and b) over the 
edge in terms of intruding on VFS internals.

> 
>>>>Overall, this means autofs4 stuff will need to be updated to reflect
>>>>these changes, but they are minimal at best.
>>>
>>>
>>>OK. But I'm not sure how I can get at this from within a module.
>>>
>>
>>Right now, the quickest way is to EXPORT_SYMBOL(vfsmount_lock) and grab
>>it instead of namespace->sem.
>>
>>
>>>Perhaps something like updating the nameidata struct passed to 
>>>follow_link, similar to what you did elsewhere in your autofsng 
>>>patches, needs to be added to this lot.
>>
>>I'm lost to what you mean.  The only place autofsng touches nameidata in
>>the VFS is setting ->mnt to a non-stale mount before a ->follow_link.
>>That symlinks even work without this change is almost coincidence.
> 
> 
> That was exactly what I was saying.
> 

Ah.  Well, I don't know how having the right vfsmount at ->follow_link 
time will help you expire anything (that's the only use of vfsmount tree 
walking, isn't it?).  If anything, the ->follow_link'able dentry would 
be covered and the callback wouldn't happen.

Mike Waychison

^ permalink raw reply	[flat|nested] 37+ messages in thread

end of thread, other threads:[~2005-06-04  7:37 UTC | newest]

Thread overview: 37+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-05-30 13:19 [RFC][PATCH 1/8] namespace: use global namespace semaphore Miklos Szeredi
2005-05-31  2:58 ` Ian Kent
2005-05-31  3:23   ` Mike Waychison
2005-05-31  7:02     ` Ian Kent
2005-05-31  7:56       ` Miklos Szeredi
2005-05-31  9:34         ` Ian Kent
2005-05-31 10:05           ` Miklos Szeredi
2005-05-31 10:13             ` Miklos Szeredi
2005-05-31 15:43               ` Mike Waychison
2005-06-01  5:04               ` Ian Kent
2005-06-01  8:35                 ` Miklos Szeredi
2005-06-03  2:10                   ` Ian Kent
2005-06-03  5:28                     ` Miklos Szeredi
2005-06-04  4:13                       ` raven
2005-06-04  6:32                         ` Miklos Szeredi
2005-06-04  6:50                           ` raven
2005-06-04  7:20                             ` Miklos Szeredi
2005-06-04  7:27                               ` raven
2005-06-03 17:22                     ` Mike Waychison
2005-06-04  4:32                       ` raven
2005-06-03 17:13               ` Mike Waychison
2005-06-03 18:20                 ` Miklos Szeredi
2005-06-01  4:36             ` Ian Kent
2005-06-03 17:24               ` Mike Waychison
2005-06-04  4:38                 ` raven
2005-05-31 15:55       ` Mike Waychison
2005-05-31 16:08         ` Miklos Szeredi
2005-06-01  4:18           ` Ian Kent
2005-06-01  8:28             ` Miklos Szeredi
2005-06-03  2:23               ` Ian Kent
2005-06-03  5:18                 ` Miklos Szeredi
2005-06-04  4:53                   ` raven
2005-06-04  6:57                     ` Miklos Szeredi
2005-06-04  7:13                       ` raven
2005-06-03 17:26             ` Mike Waychison
2005-06-01  3:57         ` Ian Kent
2005-06-03 17:07           ` Mike Waychison

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).