All of lore.kernel.org
 help / color / mirror / Atom feed
From: Al Viro <viro@ftp.linux.org.uk>
To: torvalds@osdl.org
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linuxram@us.ibm.com
Subject: [PATCH 12/18] shared mount handling: bind and rbind
Date: Tue, 08 Nov 2005 02:01:31 +0000	[thread overview]
Message-ID: <E1EZInj-0001Ez-AV@ZenIV.linux.org.uk> (raw)

From: Ram Pai <linuxram@us.ibm.com>
Date: 1131401990 -0500

Handling of MS_BIND in presense of shared mounts (see
Documentation/sharedsubtree.txt in the end of patch series
for detailed description).

Signed-off-by: Ram Pai (linuxram@us.ibm.com)
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

---

 fs/namespace.c     |  126 +++++++++++++++++++++++++++++++++++++++++++---------
 fs/pnode.c         |   81 +++++++++++++++++++++++++++++++++
 fs/pnode.h         |   14 ++++++
 include/linux/fs.h |    5 ++
 4 files changed, 204 insertions(+), 22 deletions(-)

61916467f11424161a197c5aab53a6bb79619911
diff --git a/fs/namespace.c b/fs/namespace.c
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -28,8 +28,6 @@
 
 extern int __init init_rootfs(void);
 
-#define CL_EXPIRE 	0x01
-
 #ifdef CONFIG_SYSFS
 extern int __init sysfs_init(void);
 #else
@@ -145,13 +143,43 @@ static void detach_mnt(struct vfsmount *
 	old_nd->dentry->d_mounted--;
 }
 
+void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
+			struct vfsmount *child_mnt)
+{
+	child_mnt->mnt_parent = mntget(mnt);
+	child_mnt->mnt_mountpoint = dget(dentry);
+	dentry->d_mounted++;
+}
+
 static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd)
 {
-	mnt->mnt_parent = mntget(nd->mnt);
-	mnt->mnt_mountpoint = dget(nd->dentry);
-	list_add(&mnt->mnt_hash, mount_hashtable + hash(nd->mnt, nd->dentry));
+	mnt_set_mountpoint(nd->mnt, nd->dentry, mnt);
+	list_add_tail(&mnt->mnt_hash, mount_hashtable +
+			hash(nd->mnt, nd->dentry));
 	list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts);
-	nd->dentry->d_mounted++;
+}
+
+/*
+ * the caller must hold vfsmount_lock
+ */
+static void commit_tree(struct vfsmount *mnt)
+{
+	struct vfsmount *parent = mnt->mnt_parent;
+	struct vfsmount *m;
+	LIST_HEAD(head);
+	struct namespace *n = parent->mnt_namespace;
+
+	BUG_ON(parent == mnt);
+
+	list_add_tail(&head, &mnt->mnt_list);
+	list_for_each_entry(m, &head, mnt_list)
+		m->mnt_namespace = n;
+	list_splice(&head, n->list.prev);
+
+	list_add_tail(&mnt->mnt_hash, mount_hashtable +
+				hash(parent, mnt->mnt_mountpoint));
+	list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+	touch_namespace(n);
 }
 
 static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
@@ -183,7 +211,11 @@ static struct vfsmount *clone_mnt(struct
 		mnt->mnt_root = dget(root);
 		mnt->mnt_mountpoint = mnt->mnt_root;
 		mnt->mnt_parent = mnt;
-		mnt->mnt_namespace = current->namespace;
+
+		if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
+			list_add(&mnt->mnt_share, &old->mnt_share);
+		if (flag & CL_MAKE_SHARED)
+			set_mnt_shared(mnt);
 
 		/* stick the duplicate mount on the same expiry list
 		 * as the original if that was on one */
@@ -379,7 +411,7 @@ int may_umount(struct vfsmount *mnt)
 
 EXPORT_SYMBOL(may_umount);
 
-static void release_mounts(struct list_head *head)
+void release_mounts(struct list_head *head)
 {
 	struct vfsmount *mnt;
 	while(!list_empty(head)) {
@@ -401,7 +433,7 @@ static void release_mounts(struct list_h
 	}
 }
 
-static void umount_tree(struct vfsmount *mnt, struct list_head *kill)
+void umount_tree(struct vfsmount *mnt, struct list_head *kill)
 {
 	struct vfsmount *p;
 
@@ -581,7 +613,7 @@ static int lives_below_in_same_fs(struct
 	}
 }
 
-static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
+struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
 					int flag)
 {
 	struct vfsmount *res, *p, *q, *r, *s;
@@ -626,6 +658,67 @@ Enomem:
 	return NULL;
 }
 
+/*
+ *  @source_mnt : mount tree to be attached
+ *  @nd        : place the mount tree @source_mnt is attached
+ *
+ *  NOTE: in the table below explains the semantics when a source mount
+ *  of a given type is attached to a destination mount of a given type.
+ * 	---------------------------------------------
+ * 	|         BIND MOUNT OPERATION              |
+ * 	|********************************************
+ * 	| source-->| shared        |       private  |
+ * 	| dest     |               |                |
+ * 	|   |      |               |                |
+ * 	|   v      |               |                |
+ * 	|********************************************
+ * 	|  shared  | shared (++)   |     shared (+) |
+ * 	|          |               |                |
+ * 	|non-shared| shared (+)    |      private   |
+ * 	*********************************************
+ * A bind operation clones the source mount and mounts the clone on the
+ * destination mount.
+ *
+ * (++)  the cloned mount is propagated to all the mounts in the propagation
+ * 	 tree of the destination mount and the cloned mount is added to
+ * 	 the peer group of the source mount.
+ * (+)   the cloned mount is created under the destination mount and is marked
+ *       as shared. The cloned mount is added to the peer group of the source
+ *       mount.
+ *
+ * if the source mount is a tree, the operations explained above is
+ * applied to each mount in the tree.
+ * Must be called without spinlocks held, since this function can sleep
+ * in allocations.
+ */
+static int attach_recursive_mnt(struct vfsmount *source_mnt,
+				struct nameidata *nd)
+{
+	LIST_HEAD(tree_list);
+	struct vfsmount *dest_mnt = nd->mnt;
+	struct dentry *dest_dentry = nd->dentry;
+	struct vfsmount *child, *p;
+
+	if (propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list))
+		return -EINVAL;
+
+	if (IS_MNT_SHARED(dest_mnt)) {
+		for (p = source_mnt; p; p = next_mnt(p, source_mnt))
+			set_mnt_shared(p);
+	}
+
+	spin_lock(&vfsmount_lock);
+	mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
+	commit_tree(source_mnt);
+
+	list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
+		list_del_init(&child->mnt_hash);
+		commit_tree(child);
+	}
+	spin_unlock(&vfsmount_lock);
+	return 0;
+}
+
 static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
 {
 	int err;
@@ -646,17 +739,8 @@ static int graft_tree(struct vfsmount *m
 		goto out_unlock;
 
 	err = -ENOENT;
-	spin_lock(&vfsmount_lock);
-	if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) {
-		struct list_head head;
-
-		attach_mnt(mnt, nd);
-		list_add_tail(&head, &mnt->mnt_list);
-		list_splice(&head, current->namespace->list.prev);
-		err = 0;
-		touch_namespace(current->namespace);
-	}
-	spin_unlock(&vfsmount_lock);
+	if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry))
+		err = attach_recursive_mnt(mnt, nd);
 out_unlock:
 	up(&nd->dentry->d_inode->i_sem);
 	if (!err)
diff --git a/fs/pnode.c b/fs/pnode.c
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -20,9 +20,88 @@ static inline struct vfsmount *next_peer
 void change_mnt_propagation(struct vfsmount *mnt, int type)
 {
 	if (type == MS_SHARED) {
-		mnt->mnt_flags |= MNT_SHARED;
+		set_mnt_shared(mnt);
 	} else {
 		list_del_init(&mnt->mnt_share);
 		mnt->mnt_flags &= ~MNT_PNODE_MASK;
 	}
 }
+
+/*
+ * get the next mount in the propagation tree.
+ * @m: the mount seen last
+ * @origin: the original mount from where the tree walk initiated
+ */
+static struct vfsmount *propagation_next(struct vfsmount *m,
+					 struct vfsmount *origin)
+{
+	m = next_peer(m);
+	if (m == origin)
+		return NULL;
+	return m;
+}
+
+/*
+ * mount 'source_mnt' under the destination 'dest_mnt' at
+ * dentry 'dest_dentry'. And propagate that mount to
+ * all the peer and slave mounts of 'dest_mnt'.
+ * Link all the new mounts into a propagation tree headed at
+ * source_mnt. Also link all the new mounts using ->mnt_list
+ * headed at source_mnt's ->mnt_list
+ *
+ * @dest_mnt: destination mount.
+ * @dest_dentry: destination dentry.
+ * @source_mnt: source mount.
+ * @tree_list : list of heads of trees to be attached.
+ */
+int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry,
+		    struct vfsmount *source_mnt, struct list_head *tree_list)
+{
+	struct vfsmount *m, *child;
+	int ret = 0;
+	struct vfsmount *prev_dest_mnt = dest_mnt;
+	struct vfsmount *prev_src_mnt  = source_mnt;
+	LIST_HEAD(tmp_list);
+	LIST_HEAD(umount_list);
+
+	for (m = propagation_next(dest_mnt, dest_mnt); m;
+			m = propagation_next(m, dest_mnt)) {
+		int type = CL_PROPAGATION;
+
+		if (IS_MNT_NEW(m))
+			continue;
+
+		if (IS_MNT_SHARED(m))
+			type |= CL_MAKE_SHARED;
+
+		if (!(child = copy_tree(source_mnt, source_mnt->mnt_root,
+						type))) {
+			ret = -ENOMEM;
+			list_splice(tree_list, tmp_list.prev);
+			goto out;
+		}
+
+		if (is_subdir(dest_dentry, m->mnt_root)) {
+			mnt_set_mountpoint(m, dest_dentry, child);
+			list_add_tail(&child->mnt_hash, tree_list);
+		} else {
+			/*
+			 * This can happen if the parent mount was bind mounted
+			 * on some subdirectory of a shared/slave mount.
+			 */
+			list_add_tail(&child->mnt_hash, &tmp_list);
+		}
+		prev_dest_mnt = m;
+		prev_src_mnt  = child;
+	}
+out:
+	spin_lock(&vfsmount_lock);
+	while (!list_empty(&tmp_list)) {
+		child = list_entry(tmp_list.next, struct vfsmount, mnt_hash);
+		list_del_init(&child->mnt_hash);
+		umount_tree(child, &umount_list);
+	}
+	spin_unlock(&vfsmount_lock);
+	release_mounts(&umount_list);
+	return ret;
+}
diff --git a/fs/pnode.h b/fs/pnode.h
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -12,7 +12,21 @@
 #include <linux/mount.h>
 
 #define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED)
+#define IS_MNT_NEW(mnt)  (!mnt->mnt_namespace)
 #define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~MNT_SHARED)
 
+#define CL_EXPIRE    		0x01
+#define CL_COPY_ALL 		0x04
+#define CL_MAKE_SHARED 		0x08
+#define CL_PROPAGATION 		0x10
+
+static inline void set_mnt_shared(struct vfsmount *mnt)
+{
+	mnt->mnt_flags &= ~MNT_PNODE_MASK;
+	mnt->mnt_flags |= MNT_SHARED;
+}
+
 void change_mnt_propagation(struct vfsmount *, int);
+int propagate_mnt(struct vfsmount *, struct dentry *, struct vfsmount *,
+		struct list_head *);
 #endif /* _LINUX_PNODE_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1251,7 +1251,12 @@ extern int unregister_filesystem(struct 
 extern struct vfsmount *kern_mount(struct file_system_type *);
 extern int may_umount_tree(struct vfsmount *);
 extern int may_umount(struct vfsmount *);
+extern void umount_tree(struct vfsmount *, struct list_head *);
+extern void release_mounts(struct list_head *);
 extern long do_mount(char *, char *, char *, unsigned long, void *);
+extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
+extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
+				  struct vfsmount *);
 
 extern int vfs_statfs(struct super_block *, struct kstatfs *);
 

             reply	other threads:[~2005-11-08  2:04 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-11-08  2:01 Al Viro [this message]
2005-11-08 14:11 ` [PATCH 12/18] shared mount handling: bind and rbind Miklos Szeredi
2005-11-08 15:48   ` Ram Pai
2005-11-08 15:55     ` Miklos Szeredi
2005-11-09 18:44       ` Ram Pai
2005-11-09 18:59         ` Linus Torvalds
2005-11-09 19:26           ` Al Viro
2005-11-09 19:28           ` Ram Pai
2005-11-16  3:29           ` Rob Landley
2005-11-16  3:53             ` Linus Torvalds
2005-11-16  5:35               ` Al Boldi
2005-11-16  8:19                 ` Miklos Szeredi
2005-11-16  9:10                   ` Rob Landley
2005-11-16 10:14                     ` Miklos Szeredi
2005-11-16 13:59                   ` Shaya Potter
2005-11-16 16:35                     ` Miklos Szeredi
2005-11-16 20:05                     ` Al Boldi
2005-11-16 20:21                       ` Shaya Potter
2005-11-16  8:47                 ` Rob Landley
2005-11-16  8:41               ` Rob Landley
2005-11-16 16:18                 ` Linus Torvalds
2005-11-09 10:54 ` Miklos Szeredi
2005-11-09 14:31   ` Al Viro
2005-11-09 15:22     ` Miklos Szeredi
2005-11-09 15:56       ` Al Viro
2005-11-09 16:33         ` Miklos Szeredi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=E1EZInj-0001Ez-AV@ZenIV.linux.org.uk \
    --to=viro@ftp.linux.org.uk \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxram@us.ibm.com \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.