All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] VFS autmounter support v3
@ 2003-06-20 16:17 David Howells
  2003-06-20 16:37 ` viro
  0 siblings, 1 reply; 2+ messages in thread
From: David Howells @ 2003-06-20 16:17 UTC (permalink / raw)
  To: Linus Torvalds, viro; +Cc: Kernel Mailing List, dhowells


Hi Linus, Al,

Okay, it turns out I don't really need a special operation to deal with
automount points... as HPA was pointing out, the same can be done by giving a
directory a follow_link() operation...

However, would you consent to accept this patch (or something similar)? It has
the actual automounting stuff stuff taken out, leaving two parts:

 (1) A convenience function (__do_add_mount) that a module can call to insert
     a vfsmount into the mount topology at a point described by a struct
     nameidata (as would be passed to follow_link).

     This can be taken out if you insist on my bouncing the mount parameters
     down to userspace so that it can issue a mount - provided something
     approximating fmount() is also provided so that inter-namespace mounting
     can be done under controlled circumstances.

 (2) Automatic mount point expiry. This allows any mountpoint to be given a
     timeout, such that when mntput() detects that the vfsmount is only used
     by its parent, a work chitty will be enqueued to cause the containing
     namespace to be vacuumed later for dead mounts.

I'd also like to make it so that any mount can be given a "timeout" argument,
but I'm not sure what the best way to do so is.

David


diff -uNr linux-2.5.72/fs/namespace.c linux-2.5.72-auto/fs/namespace.c
--- linux-2.5.72/fs/namespace.c	2003-06-17 15:01:51.000000000 +0100
+++ linux-2.5.72-auto/fs/namespace.c	2003-06-20 14:36:58.000000000 +0100
@@ -30,6 +30,8 @@
 static int hash_mask, hash_bits;
 static kmem_cache_t *mnt_cache; 
 
+static void process_mount_expiry(void *_data);
+
 static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
 {
 	unsigned long tmp = ((unsigned long) mnt / L1_CACHE_BYTES);
@@ -84,13 +86,9 @@
 	return p;
 }
 
-static int check_mnt(struct vfsmount *mnt)
+static inline int check_mnt(struct vfsmount *mnt)
 {
-	spin_lock(&dcache_lock);
-	while (mnt->mnt_parent != mnt)
-		mnt = mnt->mnt_parent;
-	spin_unlock(&dcache_lock);
-	return mnt == current->namespace->root;
+	return mnt->mnt_namespace == current->namespace;
 }
 
 static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd)
@@ -142,6 +140,9 @@
 		mnt->mnt_root = dget(root);
 		mnt->mnt_mountpoint = mnt->mnt_root;
 		mnt->mnt_parent = mnt;
+		mnt->mnt_namespace = old->mnt_namespace;
+		mnt->mnt_expiry_timeout = old->mnt_expiry_timeout;
+		mnt->mnt_expires_at = old->mnt_expires_at;
 	}
 	return mnt;
 }
@@ -530,6 +531,7 @@
 	}
 
 	if (mnt) {
+		mnt->mnt_expiry_timeout = 0;
 		err = graft_tree(mnt, nd);
 		if (err) {
 			spin_lock(&dcache_lock);
@@ -622,6 +624,7 @@
 
 	detach_mnt(old_nd.mnt, &parent_nd);
 	attach_mnt(old_nd.mnt, nd);
+	old_nd.mnt->mnt_expiry_timeout = 0;
 out2:
 	spin_unlock(&dcache_lock);
 out1:
@@ -634,23 +637,11 @@
 	return err;
 }
 
-static int do_add_mount(struct nameidata *nd, char *type, int flags,
-			int mnt_flags, char *name, void *data)
+int __do_add_mount(struct vfsmount *newmnt, struct nameidata *nd)
 {
-	struct vfsmount *mnt;
 	int err;
 
-	if (!type || !memchr(type, 0, PAGE_SIZE))
-		return -EINVAL;
-
-	/* we need capabilities... */
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	mnt = do_kern_mount(type, flags, name, data);
-	err = PTR_ERR(mnt);
-	if (IS_ERR(mnt))
-		goto out;
+	newmnt->mnt_expires_at = get_seconds() + newmnt->mnt_expiry_timeout;
 
 	down_write(&current->namespace->sem);
 	/* Something was mounted here while we slept */
@@ -662,18 +653,131 @@
 
 	/* Refuse the same filesystem on the same mount point */
 	err = -EBUSY;
-	if (nd->mnt->mnt_sb == mnt->mnt_sb && nd->mnt->mnt_root == nd->dentry)
+	if (nd->mnt->mnt_sb == newmnt->mnt_sb &&
+	    nd->mnt->mnt_root == nd->dentry)
 		goto unlock;
 
-	mnt->mnt_flags = mnt_flags;
-	err = graft_tree(mnt, nd);
+	err = graft_tree(newmnt, nd);
 unlock:
 	up_write(&current->namespace->sem);
-	mntput(mnt);
-out:
+	mntput(newmnt);
 	return err;
 }
 
+EXPORT_SYMBOL_GPL(__do_add_mount);
+
+static int do_add_mount(struct nameidata *nd, char *type, int flags,
+			int mnt_flags, char *name, void *data)
+{
+	struct vfsmount *mnt;
+	int err;
+
+	if (!type || !memchr(type, 0, PAGE_SIZE))
+		return -EINVAL;
+
+	/* we need capabilities... */
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	mnt = do_kern_mount(type, flags, name, data);
+	err = PTR_ERR(mnt);
+	if (IS_ERR(mnt))
+		return err;
+
+	return __do_add_mount(mnt, nd);
+}
+
+static inline void set_mount_expiry_timer(struct namespace *namespace,
+					  unsigned long timeout)
+{
+	spin_lock(&dcache_lock);
+
+	if (atomic_read(&namespace->count) > 0) {
+		get_namespace(namespace);
+
+		if (!schedule_delayed_work(&namespace->mnt_expiry_work,
+					   (timeout + 1) * HZ))
+			put_namespace(namespace);
+	}
+
+	spin_unlock(&dcache_lock);
+}
+
+void mnt_begin_expiry(struct vfsmount *mnt)
+{
+	mnt->mnt_expires_at = get_seconds() + mnt->mnt_expiry_timeout;
+
+	set_mount_expiry_timer(mnt->mnt_namespace, mnt->mnt_expiry_timeout);
+}
+
+EXPORT_SYMBOL_GPL(mnt_begin_expiry);
+
+static void process_mount_expiry(void *_data)
+{
+	struct namespace *namespace = _data;
+	struct list_head *_p, *_n, graveyard;
+	struct vfsmount *mnt;
+	time_t now;
+	int timeout = INT_MAX, tmp;
+
+	INIT_LIST_HEAD(&graveyard);
+
+	down_write(&namespace->sem);
+
+	now = get_seconds();
+
+	list_for_each_safe(_p, _n, &namespace->list) {
+		mnt = list_entry(_p, struct vfsmount, mnt_list);
+
+		if (mnt->mnt_expiry_timeout &&
+		    atomic_read(&mnt->mnt_count) == 1) {
+			spin_lock(&dcache_lock);
+
+			if (atomic_read(&mnt->mnt_count) == 1) {
+				tmp = (int) mnt->mnt_expires_at - (int) now;
+				if (tmp <= 0) {
+					list_move_tail(&mnt->mnt_list,
+						       &graveyard);
+					list_del_init(&mnt->mnt_child);
+					list_del_init(&mnt->mnt_hash);
+					mnt->mnt_mountpoint->d_mounted--;
+				} else if (tmp < timeout) {
+					timeout = tmp;
+				}
+			}
+
+			spin_unlock(&dcache_lock);
+		}
+	}
+
+	up_write(&namespace->sem);
+
+	while (!list_empty(&graveyard)) {
+		mnt = list_entry(graveyard.next, struct vfsmount, mnt_list);
+		list_del_init(&mnt->mnt_list);
+
+		dput(xchg(&mnt->mnt_mountpoint, mnt->mnt_root));
+		mntput(xchg(&mnt->mnt_parent, mnt));
+
+		if (atomic_read(&mnt->mnt_sb->s_active) == 1) {
+			/* last instance - try to be smart */
+			lock_kernel();
+			DQUOT_OFF(mnt->mnt_sb);
+			acct_auto_close(mnt->mnt_sb);
+			unlock_kernel();
+		}
+
+		mntput(mnt);
+	}
+
+	if (timeout != INT_MAX) {
+		set_mount_expiry_timer(namespace, timeout);
+	}
+	else {
+		put_namespace(namespace);
+	}
+}
+
 static int copy_mount_options (const void __user *data, unsigned long *where)
 {
 	int i;
@@ -800,6 +904,9 @@
 	init_rwsem(&new_ns->sem);
 	new_ns->root = NULL;
 	INIT_LIST_HEAD(&new_ns->list);
+	INIT_WORK(&new_ns->mnt_expiry_work,
+		  process_mount_expiry,
+		  new_ns);
 
 	down_write(&tsk->namespace->sem);
 	/* First pass: copy the tree topology */
@@ -816,6 +923,8 @@
 		p = namespace->root;
 		q = new_ns->root;
 		while (p) {
+			q->mnt_namespace = new_ns;
+
 			if (p == fs->rootmnt) {
 				rootmnt = p;
 				fs->rootmnt = mntget(q);
@@ -844,6 +953,8 @@
 	if (altrootmnt)
 		mntput(altrootmnt);
 
+	set_mount_expiry_timer(namespace, 1);
+
 	put_namespace(namespace);
 	return 0;
 
@@ -1079,9 +1190,13 @@
 		panic("Can't allocate initial namespace");
 	atomic_set(&namespace->count, 1);
 	INIT_LIST_HEAD(&namespace->list);
+	INIT_WORK(&namespace->mnt_expiry_work,
+		  process_mount_expiry,
+		  namespace);
 	init_rwsem(&namespace->sem);
 	list_add(&mnt->mnt_list, &namespace->list);
 	namespace->root = mnt;
+	mnt->mnt_namespace = namespace;
 
 	init_task.namespace = namespace;
 	read_lock(&tasklist_lock);
diff -uNr linux-2.5.72/fs/super.c linux-2.5.72-auto/fs/super.c
--- linux-2.5.72/fs/super.c	2003-06-17 15:01:51.000000000 +0100
+++ linux-2.5.72-auto/fs/super.c	2003-06-17 15:06:42.000000000 +0100
@@ -21,6 +21,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/acct.h>
@@ -683,6 +684,7 @@
 	mnt->mnt_root = dget(sb->s_root);
 	mnt->mnt_mountpoint = sb->s_root;
 	mnt->mnt_parent = mnt;
+	mnt->mnt_namespace = current->namespace;
 	up_write(&sb->s_umount);
 	put_filesystem(type);
 	return mnt;
@@ -697,6 +699,8 @@
 	return (struct vfsmount *)sb;
 }
 
+EXPORT_SYMBOL_GPL(do_kern_mount);
+
 struct vfsmount *kern_mount(struct file_system_type *type)
 {
 	return do_kern_mount(type->name, 0, type->name, NULL);
diff -uNr linux-2.5.72/include/linux/mount.h linux-2.5.72-auto/include/linux/mount.h
--- linux-2.5.72/include/linux/mount.h	2003-06-17 15:01:35.000000000 +0100
+++ linux-2.5.72-auto/include/linux/mount.h	2003-06-20 14:38:12.000000000 +0100
@@ -31,6 +31,9 @@
 	int mnt_flags;
 	char *mnt_devname;		/* Name of device e.g. /dev/dsk/hda1 */
 	struct list_head mnt_list;
+	struct namespace *mnt_namespace; /* containing namespace */
+	time_t mnt_expires_at;		/* time at which automount expires */
+	unsigned mnt_expiry_timeout;	/* expiry timeout (in seconds) or 0 */
 };
 
 static inline struct vfsmount *mntget(struct vfsmount *mnt)
@@ -40,11 +43,15 @@
 	return mnt;
 }
 
+extern void mnt_begin_expiry(struct vfsmount *mnt);
 extern void __mntput(struct vfsmount *mnt);
 
 static inline void mntput(struct vfsmount *mnt)
 {
 	if (mnt) {
+		if (atomic_read(&mnt->mnt_count) == 2 &&
+		    mnt->mnt_expiry_timeout)
+			mnt_begin_expiry(mnt);
 		if (atomic_dec_and_test(&mnt->mnt_count))
 			__mntput(mnt);
 	}
@@ -55,5 +62,7 @@
 extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
 				      const char *name, void *data);
 
+extern int __do_add_mount(struct vfsmount *newmnt, struct nameidata *nd);
+
 #endif
 #endif /* _LINUX_MOUNT_H */
diff -uNr linux-2.5.72/include/linux/namespace.h linux-2.5.72-auto/include/linux/namespace.h
--- linux-2.5.72/include/linux/namespace.h	2003-06-17 15:01:36.000000000 +0100
+++ linux-2.5.72-auto/include/linux/namespace.h	2003-06-17 15:06:42.000000000 +0100
@@ -10,6 +10,7 @@
 	struct vfsmount *	root;
 	struct list_head	list;
 	struct rw_semaphore	sem;
+	struct work_struct	mnt_expiry_work;
 };
 
 extern void umount_tree(struct vfsmount *);

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] VFS autmounter support v3
  2003-06-20 16:17 [PATCH] VFS autmounter support v3 David Howells
@ 2003-06-20 16:37 ` viro
  0 siblings, 0 replies; 2+ messages in thread
From: viro @ 2003-06-20 16:37 UTC (permalink / raw)
  To: David Howells; +Cc: Linus Torvalds, Kernel Mailing List

On Fri, Jun 20, 2003 at 05:17:16PM +0100, David Howells wrote:
 
>  (2) Automatic mount point expiry. This allows any mountpoint to be given a
>      timeout, such that when mntput() detects that the vfsmount is only used
>      by its parent, a work chitty will be enqueued to cause the containing
>      namespace to be vacuumed later for dead mounts.

Broken.
	a) it doesn't scale.  A single expirable mountpoint and we will be
walking potentially very long list.
	b) the logics is wrong - you are scheduling "let's go and expire
stuff" when ->mnt_count drops far enough; put something like /usr/share
on a separate fs and observe what happens to ->mnt_count.  It will touch
the trigger value very often.  Better yet, do that with /usr/include and
start a big build.  You will have your expiry code triggered all the time,
even though fs is in very active use.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2003-06-20 16:23 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-06-20 16:17 [PATCH] VFS autmounter support v3 David Howells
2003-06-20 16:37 ` viro

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.