* [RFC-2 PATCH 1/8] shared subtree
2005-07-18 6:53 [RFC-2 PATCH 0/8] shared subtree Ram Pai
@ 2005-07-18 6:53 ` Ram Pai
2005-07-18 6:53 ` [RFC-2 PATCH 2/8] " Ram Pai
` (6 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Ram Pai @ 2005-07-18 6:53 UTC (permalink / raw)
To: linux-kernel, linux-fsdevel
Cc: Alexander Viro, mike, Miklos Szeredi, bfields, Andrew Morton,
penberg
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: shared_private_slave.patch --]
[-- Type: text/x-patch; name=shared_private_slave.patch, Size: 14600 bytes --]
This patch adds the shared/private/slave support for VFS trees.
Signed by Ram Pai (linuxram@us.ibm.com)
fs/Makefile | 2
fs/dcache.c | 2
fs/namespace.c | 98 +++++++++++++++++++++++++++++++
fs/pnode.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/fs.h | 5 +
include/linux/mount.h | 44 ++++++++++++-
include/linux/pnode.h | 80 +++++++++++++++++++++++++
7 files changed, 385 insertions(+), 4 deletions(-)
Index: 2.6.12.work1/fs/namespace.c
===================================================================
--- 2.6.12.work1.orig/fs/namespace.c
+++ 2.6.12.work1/fs/namespace.c
@@ -22,6 +22,7 @@
#include <linux/namei.h>
#include <linux/security.h>
#include <linux/mount.h>
+#include <linux/pnode.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -62,6 +63,7 @@ struct vfsmount *alloc_vfsmnt(const char
INIT_LIST_HEAD(&mnt->mnt_mounts);
INIT_LIST_HEAD(&mnt->mnt_list);
INIT_LIST_HEAD(&mnt->mnt_fslink);
+ INIT_LIST_HEAD(&mnt->mnt_pnode_mntlist);
if (name) {
int size = strlen(name)+1;
char *newname = kmalloc(size, GFP_KERNEL);
@@ -615,6 +617,100 @@ out_unlock:
return err;
}
+static int do_make_shared(struct vfsmount *mnt)
+{
+ int err=0;
+ struct vfspnode *old_pnode = NULL;
+ /*
+ * if the mount is already a slave mount,
+ * allocated a new pnode and make it
+ * a slave pnode of the original pnode.
+ */
+ if (IS_MNT_SLAVE(mnt)) {
+ old_pnode = mnt->mnt_pnode;
+ pnode_del_slave_mnt(mnt);
+ }
+ if(!IS_MNT_SHARED(mnt)) {
+ mnt->mnt_pnode = pnode_alloc();
+ if(!mnt->mnt_pnode) {
+ pnode_add_slave_mnt(old_pnode, mnt);
+ err = -ENOMEM;
+ goto out;
+ }
+ pnode_add_member_mnt(mnt->mnt_pnode, mnt);
+ }
+ if(old_pnode)
+ pnode_add_slave_pnode(old_pnode, mnt->mnt_pnode);
+ set_mnt_shared(mnt);
+out:
+ return err;
+}
+
+static int do_make_slave(struct vfsmount *mnt)
+{
+ int err=0;
+ struct vfspnode *old_pnode = NULL;
+
+ if (IS_MNT_SLAVE(mnt))
+ goto out;
+ /*
+ * only shared mounts can
+ * be made slave
+ */
+ if (!IS_MNT_SHARED(mnt)) {
+ err = -EINVAL;
+ goto out;
+ }
+ old_pnode = mnt->mnt_pnode;
+ pnode_del_member_mnt(mnt);
+ pnode_add_slave_mnt(old_pnode, mnt);
+ set_mnt_slave(mnt);
+
+out:
+ return err;
+}
+
+static int do_make_private(struct vfsmount *mnt)
+{
+ if(mnt->mnt_pnode)
+ pnode_disassociate_mnt(mnt);
+ set_mnt_private(mnt);
+ return 0;
+}
+
+/*
+ * recursively change the type of the mountpoint.
+ */
+static int do_change_type(struct nameidata *nd, int flag)
+{
+ struct vfsmount *m, *mnt = nd->mnt;
+ int err=0;
+
+ if (!(flag & MS_SHARED) && !(flag & MS_PRIVATE)
+ && !(flag & MS_SLAVE))
+ return -EINVAL;
+
+ if (nd->dentry != nd->mnt->mnt_root)
+ return -EINVAL;
+
+ spin_lock(&vfsmount_lock);
+ for (m = mnt; m; m = next_mnt(m, mnt)) {
+ switch (flag) {
+ case MS_SHARED:
+ err = do_make_shared(m);
+ break;
+ case MS_SLAVE:
+ err = do_make_slave(m);
+ break;
+ case MS_PRIVATE:
+ err = do_make_private(m);
+ break;
+ }
+ }
+ spin_unlock(&vfsmount_lock);
+ return err;
+}
+
/*
* do loopback mount.
*/
@@ -1049,6 +1145,8 @@ long do_mount(char * dev_name, char * di
data_page);
else if (flags & MS_BIND)
retval = do_loopback(&nd, dev_name, flags & MS_REC);
+ else if (flags & MS_SHARED || flags & MS_PRIVATE || flags & MS_SLAVE)
+ retval = do_change_type(&nd, flags);
else if (flags & MS_MOVE)
retval = do_move_mount(&nd, dev_name);
else
Index: 2.6.12.work1/fs/pnode.c
===================================================================
--- /dev/null
+++ 2.6.12.work1/fs/pnode.c
@@ -0,0 +1,158 @@
+/*
+ * linux/fs/pnode.c
+ *
+ * (C) Copyright IBM Corporation 2005.
+ * Released under GPL v2.
+ * Author : Ram Pai (linuxram@us.ibm.com)
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/syscalls.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/quotaops.h>
+#include <linux/acct.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/namespace.h>
+#include <linux/namei.h>
+#include <linux/security.h>
+#include <linux/mount.h>
+#include <linux/pnode.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <stdarg.h>
+
+
+static kmem_cache_t * pnode_cachep;
+
+/* spinlock for pnode related operations */
+ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfspnode_lock);
+
+
+void __init pnode_init(unsigned long mempages)
+{
+ pnode_cachep = kmem_cache_create("pnode_cache",
+ sizeof(struct vfspnode), 0,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+}
+
+
+struct vfspnode * pnode_alloc(void)
+{
+ struct vfspnode *pnode = kmem_cache_alloc(pnode_cachep, GFP_KERNEL);
+ INIT_LIST_HEAD(&pnode->pnode_vfs);
+ INIT_LIST_HEAD(&pnode->pnode_slavevfs);
+ INIT_LIST_HEAD(&pnode->pnode_slavepnode);
+ INIT_LIST_HEAD(&pnode->pnode_peer_slave);
+ pnode->pnode_master = NULL;
+ pnode->pnode_flags = 0;
+ atomic_set(&pnode->pnode_count,0);
+ return pnode;
+}
+
+void pnode_free(struct vfspnode *pnode)
+{
+ kmem_cache_free(pnode_cachep, pnode);
+}
+
+/*
+ * __put_pnode() should be called with vfspnode_lock held
+ */
+void __put_pnode(struct vfspnode *pnode)
+{
+ struct vfspnode *tmp_pnode;
+ do {
+ tmp_pnode = pnode->pnode_master;
+ list_del_init(&pnode->pnode_peer_slave);
+ BUG_ON(!list_empty(&pnode->pnode_vfs));
+ BUG_ON(!list_empty(&pnode->pnode_slavevfs));
+ BUG_ON(!list_empty(&pnode->pnode_slavepnode));
+ pnode_free(pnode);
+ pnode = tmp_pnode;
+ if (!pnode || !atomic_dec_and_test(&pnode->pnode_count))
+ break;
+ } while(pnode);
+}
+
+static void inline pnode_add_mnt(struct vfspnode *pnode,
+ struct vfsmount *mnt, int slave)
+{
+ if (!pnode || !mnt)
+ return;
+ spin_lock(&vfspnode_lock);
+ mnt->mnt_pnode = pnode;
+ if (slave) {
+ set_mnt_slave(mnt);
+ list_add(&mnt->mnt_pnode_mntlist, &pnode->pnode_slavevfs);
+ } else {
+ set_mnt_shared(mnt);
+ list_add(&mnt->mnt_pnode_mntlist, &pnode->pnode_vfs);
+ }
+ get_pnode(pnode);
+ spin_unlock(&vfspnode_lock);
+}
+
+void pnode_add_member_mnt(struct vfspnode *pnode,
+ struct vfsmount *mnt)
+{
+ pnode_add_mnt(pnode, mnt, 0);
+}
+
+void pnode_add_slave_mnt(struct vfspnode *pnode,
+ struct vfsmount *mnt)
+{
+ pnode_add_mnt(pnode, mnt, 1);
+}
+
+
+void pnode_add_slave_pnode(struct vfspnode *pnode,
+ struct vfspnode *slave_pnode)
+{
+ if (!pnode || !slave_pnode)
+ return;
+ spin_lock(&vfspnode_lock);
+ slave_pnode->pnode_master = pnode;
+ slave_pnode->pnode_flags = 0;
+ list_add(&slave_pnode->pnode_peer_slave, &pnode->pnode_slavepnode);
+ get_pnode(pnode);
+ spin_unlock(&vfspnode_lock);
+}
+
+static void _pnode_disassociate_mnt(struct vfsmount *mnt)
+{
+ spin_lock(&vfspnode_lock);
+ list_del_init(&mnt->mnt_pnode_mntlist);
+ put_pnode_locked(mnt->mnt_pnode);
+ spin_unlock(&vfspnode_lock);
+ mnt->mnt_pnode = NULL;
+}
+
+void pnode_del_slave_mnt(struct vfsmount *mnt)
+{
+ if (!mnt)
+ return;
+ _pnode_disassociate_mnt(mnt);
+ CLEAR_MNT_SLAVE(mnt);
+}
+
+void pnode_del_member_mnt(struct vfsmount *mnt)
+{
+ if (!mnt)
+ return;
+ _pnode_disassociate_mnt(mnt);
+ CLEAR_MNT_SHARED(mnt);
+}
+
+
+void pnode_disassociate_mnt(struct vfsmount *mnt)
+{
+ if (!mnt)
+ return;
+ _pnode_disassociate_mnt(mnt);
+ CLEAR_MNT_SHARED(mnt);
+ CLEAR_MNT_SLAVE(mnt);
+}
Index: 2.6.12.work1/fs/dcache.c
===================================================================
--- 2.6.12.work1.orig/fs/dcache.c
+++ 2.6.12.work1/fs/dcache.c
@@ -27,6 +27,7 @@
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/file.h>
+#include <linux/pnode.h>
#include <asm/uaccess.h>
#include <linux/security.h>
#include <linux/seqlock.h>
@@ -1737,6 +1738,7 @@ void __init vfs_caches_init(unsigned lon
inode_init(mempages);
files_init(mempages);
mnt_init(mempages);
+ pnode_init(mempages);
bdev_cache_init();
chrdev_init();
}
Index: 2.6.12.work1/include/linux/fs.h
===================================================================
--- 2.6.12.work1.orig/include/linux/fs.h
+++ 2.6.12.work1/include/linux/fs.h
@@ -102,6 +102,9 @@ extern int dir_notify_enable;
#define MS_MOVE 8192
#define MS_REC 16384
#define MS_VERBOSE 32768
+#define MS_PRIVATE (1<<18) /* recursively change to private */
+#define MS_SLAVE (1<<19) /* recursively change to slave */
+#define MS_SHARED (1<<20) /* recursively change to shared */
#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */
#define MS_ACTIVE (1<<30)
#define MS_NOUSER (1<<31)
@@ -232,6 +235,7 @@ extern void update_atime (struct inode *
extern void __init inode_init(unsigned long);
extern void __init inode_init_early(void);
extern void __init mnt_init(unsigned long);
+extern void __init pnode_init(unsigned long);
extern void __init files_init(unsigned long);
struct buffer_head;
@@ -1211,6 +1215,7 @@ extern struct vfsmount *kern_mount(struc
extern int may_umount_tree(struct vfsmount *);
extern int may_umount(struct vfsmount *);
extern long do_mount(char *, char *, char *, unsigned long, void *);
+extern struct vfsmount *do_make_mounted(struct vfsmount *, struct dentry *);
extern int vfs_statfs(struct super_block *, struct kstatfs *);
Index: 2.6.12.work1/include/linux/pnode.h
===================================================================
--- /dev/null
+++ 2.6.12.work1/include/linux/pnode.h
@@ -0,0 +1,80 @@
+/*
+ * linux/fs/pnode.c
+ *
+ * (C) Copyright IBM Corporation 2005.
+ * Released under GPL v2.
+ *
+ */
+#ifndef _LINUX_PNODE_H
+#define _LINUX_PNODE_H
+
+#include <linux/list.h>
+#include <linux/mount.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+
+struct vfspnode {
+ struct list_head pnode_vfs; /* list of vfsmounts anchored here */
+ struct list_head pnode_slavevfs; /* list of slave vfsmounts */
+ struct list_head pnode_slavepnode;/* list of slave pnode */
+ struct list_head pnode_peer_slave;/* going through master's slave pnode
+ list*/
+ struct vfspnode *pnode_master; /* master pnode */
+ int pnode_flags;
+ atomic_t pnode_count;
+};
+#define PNODE_MAX_SLAVE_LEVEL 10
+#define PNODE_DELETE 0x01
+#define PNODE_SLAVE 0x02
+
+#define IS_PNODE_DELETE(pn) ((pn->pnode_flags&PNODE_DELETE)==PNODE_DELETE)
+#define IS_PNODE_SLAVE(pn) ((pn->pnode_flags&PNODE_SLAVE)==PNODE_SLAVE)
+#define SET_PNODE_DELETE(pn) pn->pnode_flags |= PNODE_DELETE
+#define SET_PNODE_SLAVE(pn) pn->pnode_flags |= PNODE_SLAVE
+
+extern spinlock_t vfspnode_lock;
+extern void __put_pnode(struct vfspnode *);
+
+static inline struct vfspnode *
+get_pnode(struct vfspnode *pnode)
+{
+ if (!pnode)
+ return NULL;
+ atomic_inc(&pnode->pnode_count);
+ return pnode;
+}
+
+static inline void
+put_pnode(struct vfspnode *pnode)
+{
+ if (!pnode)
+ return;
+ if (atomic_dec_and_lock(&pnode->pnode_count, &vfspnode_lock)) {
+ __put_pnode(pnode);
+ spin_unlock(&vfspnode_lock);
+ }
+}
+
+/*
+ * must be called holding the vfspnode_lock
+ */
+static inline void
+put_pnode_locked(struct vfspnode *pnode)
+{
+ if (!pnode)
+ return;
+ if (atomic_dec_and_test(&pnode->pnode_count)) {
+ __put_pnode(pnode);
+ }
+}
+
+void __init pnode_init(unsigned long );
+struct vfspnode * pnode_alloc(void);
+void pnode_add_slave_mnt(struct vfspnode *, struct vfsmount *);
+void pnode_add_member_mnt(struct vfspnode *, struct vfsmount *);
+void pnode_del_slave_mnt(struct vfsmount *);
+void pnode_del_member_mnt(struct vfsmount *);
+void pnode_disassociate_mnt(struct vfsmount *);
+void pnode_add_slave_pnode(struct vfspnode *, struct vfspnode *);
+struct vfsmount * pnode_make_mounted(struct vfspnode *, struct vfsmount *, struct dentry *);
+#endif /* _LINUX_PNODE_H */
Index: 2.6.12.work1/include/linux/mount.h
===================================================================
--- 2.6.12.work1.orig/include/linux/mount.h
+++ 2.6.12.work1/include/linux/mount.h
@@ -16,9 +16,21 @@
#include <linux/spinlock.h>
#include <asm/atomic.h>
-#define MNT_NOSUID 1
-#define MNT_NODEV 2
-#define MNT_NOEXEC 4
+#define MNT_NOSUID 0x01
+#define MNT_NODEV 0x02
+#define MNT_NOEXEC 0x04
+#define MNT_PRIVATE 0x10 /* if the vfsmount is private, by default it is private*/
+#define MNT_SLAVE 0x20 /* if the vfsmount is a slave mount of its pnode */
+#define MNT_SHARED 0x40 /* if the vfsmount is a slave mount of its pnode */
+#define MNT_PNODE_MASK 0xf0 /* propogation flag mask */
+
+#define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED)
+#define IS_MNT_SLAVE(mnt) (mnt->mnt_flags & MNT_SLAVE)
+#define IS_MNT_PRIVATE(mnt) (mnt->mnt_flags & MNT_PRIVATE)
+
+#define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~(MNT_PNODE_MASK & MNT_SHARED))
+#define CLEAR_MNT_PRIVATE(mnt) (mnt->mnt_flags &= ~(MNT_PNODE_MASK & MNT_PRIVATE))
+#define CLEAR_MNT_SLAVE(mnt) (mnt->mnt_flags &= ~(MNT_PNODE_MASK & MNT_SLAVE))
struct vfsmount
{
@@ -29,6 +41,10 @@ struct vfsmount
struct super_block *mnt_sb; /* pointer to superblock */
struct list_head mnt_mounts; /* list of children, anchored here */
struct list_head mnt_child; /* and going through their mnt_child */
+ struct list_head mnt_pnode_mntlist;/* and going through their
+ pnode's vfsmount */
+ struct vfspnode *mnt_pnode; /* and going through their
+ pnode's vfsmount */
atomic_t mnt_count;
int mnt_flags;
int mnt_expiry_mark; /* true if marked for expiry */
@@ -38,6 +54,28 @@ struct vfsmount
struct namespace *mnt_namespace; /* containing namespace */
};
+static inline void set_mnt_shared(struct vfsmount *mnt)
+{
+ mnt->mnt_flags |= MNT_PNODE_MASK & MNT_SHARED;
+ CLEAR_MNT_PRIVATE(mnt);
+ CLEAR_MNT_SLAVE(mnt);
+}
+
+static inline void set_mnt_private(struct vfsmount *mnt)
+{
+ mnt->mnt_flags |= MNT_PNODE_MASK & MNT_PRIVATE;
+ CLEAR_MNT_SLAVE(mnt);
+ CLEAR_MNT_SHARED(mnt);
+ mnt->mnt_pnode = NULL;
+}
+
+static inline void set_mnt_slave(struct vfsmount *mnt)
+{
+ mnt->mnt_flags |= MNT_PNODE_MASK & MNT_SLAVE;
+ CLEAR_MNT_PRIVATE(mnt);
+ CLEAR_MNT_SHARED(mnt);
+}
+
static inline struct vfsmount *mntget(struct vfsmount *mnt)
{
if (mnt)
Index: 2.6.12.work1/fs/Makefile
===================================================================
--- 2.6.12.work1.orig/fs/Makefile
+++ 2.6.12.work1/fs/Makefile
@@ -8,7 +8,7 @@
obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \
block_dev.o char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
- attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
+ attr.o bad_inode.o file.o filesystems.o namespace.o pnode.o aio.o \
seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
obj-$(CONFIG_EPOLL) += eventpoll.o
^ permalink raw reply [flat|nested] 9+ messages in thread* [RFC-2 PATCH 2/8] shared subtree
2005-07-18 6:53 [RFC-2 PATCH 0/8] shared subtree Ram Pai
2005-07-18 6:53 ` [RFC-2 PATCH 1/8] " Ram Pai
@ 2005-07-18 6:53 ` Ram Pai
2005-07-18 6:53 ` [RFC-2 PATCH 3/8] " Ram Pai
` (5 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Ram Pai @ 2005-07-18 6:53 UTC (permalink / raw)
To: linux-kernel, linux-fsdevel
Cc: Alexander Viro, mike, Miklos Szeredi, bfields, Andrew Morton,
penberg
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: unclone.patch --]
[-- Type: text/x-patch; name=unclone.patch, Size: 4306 bytes --]
Adds the ability to unclone a vfs tree. A uncloned vfs tree will not be
clonnable, and hence cannot be bind/rbind to any other mountpoint.
RP
fs/namespace.c | 15 ++++++++++++++-
include/linux/fs.h | 1 +
include/linux/mount.h | 15 +++++++++++++++
3 files changed, 30 insertions(+), 1 deletion(-)
Index: 2.6.12.work1/fs/namespace.c
===================================================================
--- 2.6.12.work1.orig/fs/namespace.c
+++ 2.6.12.work1/fs/namespace.c
@@ -678,6 +678,14 @@ static int do_make_private(struct vfsmou
return 0;
}
+static int do_make_unclone(struct vfsmount *mnt)
+{
+ if(mnt->mnt_pnode)
+ pnode_disassociate_mnt(mnt);
+ set_mnt_unclone(mnt);
+ return 0;
+}
+
/*
* recursively change the type of the mountpoint.
*/
@@ -687,6 +695,7 @@ static int do_change_type(struct nameida
int err=0;
if (!(flag & MS_SHARED) && !(flag & MS_PRIVATE)
+ && !(flag & MS_UNCLONE)
&& !(flag & MS_SLAVE))
return -EINVAL;
@@ -705,6 +714,9 @@ static int do_change_type(struct nameida
case MS_PRIVATE:
err = do_make_private(m);
break;
+ case MS_UNCLONE:
+ err = do_make_unclone(m);
+ break;
}
}
spin_unlock(&vfsmount_lock);
@@ -1145,7 +1157,8 @@ long do_mount(char * dev_name, char * di
data_page);
else if (flags & MS_BIND)
retval = do_loopback(&nd, dev_name, flags & MS_REC);
- else if (flags & MS_SHARED || flags & MS_PRIVATE || flags & MS_SLAVE)
+ else if (flags & MS_SHARED || flags & MS_UNCLONE ||
+ flags & MS_PRIVATE || flags & MS_SLAVE)
retval = do_change_type(&nd, flags);
else if (flags & MS_MOVE)
retval = do_move_mount(&nd, dev_name);
Index: 2.6.12.work1/include/linux/fs.h
===================================================================
--- 2.6.12.work1.orig/include/linux/fs.h
+++ 2.6.12.work1/include/linux/fs.h
@@ -102,6 +102,7 @@ extern int dir_notify_enable;
#define MS_MOVE 8192
#define MS_REC 16384
#define MS_VERBOSE 32768
+#define MS_UNCLONE (1<<17) /* recursively change to unclonnable */
#define MS_PRIVATE (1<<18) /* recursively change to private */
#define MS_SLAVE (1<<19) /* recursively change to slave */
#define MS_SHARED (1<<20) /* recursively change to shared */
Index: 2.6.12.work1/include/linux/mount.h
===================================================================
--- 2.6.12.work1.orig/include/linux/mount.h
+++ 2.6.12.work1/include/linux/mount.h
@@ -22,15 +22,18 @@
#define MNT_PRIVATE 0x10 /* if the vfsmount is private, by default it is private*/
#define MNT_SLAVE 0x20 /* if the vfsmount is a slave mount of its pnode */
#define MNT_SHARED 0x40 /* if the vfsmount is a slave mount of its pnode */
+#define MNT_UNCLONE 0x80 /* if the vfsmount is unclonable */
#define MNT_PNODE_MASK 0xf0 /* propogation flag mask */
#define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED)
#define IS_MNT_SLAVE(mnt) (mnt->mnt_flags & MNT_SLAVE)
#define IS_MNT_PRIVATE(mnt) (mnt->mnt_flags & MNT_PRIVATE)
+#define IS_MNT_UNCLONE(mnt) (mnt->mnt_flags & MNT_UNCLONE)
#define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~(MNT_PNODE_MASK & MNT_SHARED))
#define CLEAR_MNT_PRIVATE(mnt) (mnt->mnt_flags &= ~(MNT_PNODE_MASK & MNT_PRIVATE))
#define CLEAR_MNT_SLAVE(mnt) (mnt->mnt_flags &= ~(MNT_PNODE_MASK & MNT_SLAVE))
+#define CLEAR_MNT_UNCLONE(mnt) (mnt->mnt_flags &= ~(MNT_PNODE_MASK & MNT_UNCLONE))
struct vfsmount
{
@@ -59,6 +62,7 @@ static inline void set_mnt_shared(struct
mnt->mnt_flags |= MNT_PNODE_MASK & MNT_SHARED;
CLEAR_MNT_PRIVATE(mnt);
CLEAR_MNT_SLAVE(mnt);
+ CLEAR_MNT_UNCLONE(mnt);
}
static inline void set_mnt_private(struct vfsmount *mnt)
@@ -66,6 +70,16 @@ static inline void set_mnt_private(struc
mnt->mnt_flags |= MNT_PNODE_MASK & MNT_PRIVATE;
CLEAR_MNT_SLAVE(mnt);
CLEAR_MNT_SHARED(mnt);
+ CLEAR_MNT_UNCLONE(mnt);
+ mnt->mnt_pnode = NULL;
+}
+
+static inline void set_mnt_unclone(struct vfsmount *mnt)
+{
+ mnt->mnt_flags |= MNT_PNODE_MASK & MNT_UNCLONE;
+ CLEAR_MNT_SLAVE(mnt);
+ CLEAR_MNT_SHARED(mnt);
+ CLEAR_MNT_PRIVATE(mnt);
mnt->mnt_pnode = NULL;
}
@@ -74,6 +88,7 @@ static inline void set_mnt_slave(struct
mnt->mnt_flags |= MNT_PNODE_MASK & MNT_SLAVE;
CLEAR_MNT_PRIVATE(mnt);
CLEAR_MNT_SHARED(mnt);
+ CLEAR_MNT_UNCLONE(mnt);
}
static inline struct vfsmount *mntget(struct vfsmount *mnt)
^ permalink raw reply [flat|nested] 9+ messages in thread* [RFC-2 PATCH 3/8] shared subtree
2005-07-18 6:53 [RFC-2 PATCH 0/8] shared subtree Ram Pai
2005-07-18 6:53 ` [RFC-2 PATCH 1/8] " Ram Pai
2005-07-18 6:53 ` [RFC-2 PATCH 2/8] " Ram Pai
@ 2005-07-18 6:53 ` Ram Pai
2005-07-18 6:53 ` [RFC-2 PATCH 4/8] " Ram Pai
` (4 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Ram Pai @ 2005-07-18 6:53 UTC (permalink / raw)
To: linux-kernel, linux-fsdevel
Cc: Alexander Viro, mike, Miklos Szeredi, bfields, Andrew Morton,
penberg
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: rbind.patch --]
[-- Type: text/x-patch; name=rbind.patch, Size: 38890 bytes --]
Adds the ability to bind/rbind a shared/private/slave subtree and set up
propogation wherever needed.
RP
Signed by Ram Pai (linuxram@us.ibm.com)
fs/namespace.c | 559 ++++++++++++++++++++++++++++++++++++++++------
fs/pnode.c | 416 +++++++++++++++++++++++++++++++++-
include/linux/dcache.h | 2
include/linux/fs.h | 4
include/linux/namespace.h | 1
include/linux/pnode.h | 5
6 files changed, 906 insertions(+), 81 deletions(-)
Index: 2.6.12.work1/fs/namespace.c
===================================================================
--- 2.6.12.work1.orig/fs/namespace.c
+++ 2.6.12.work1/fs/namespace.c
@@ -42,7 +42,8 @@ static inline int sysfs_init(void)
static struct list_head *mount_hashtable;
static int hash_mask, hash_bits;
-static kmem_cache_t *mnt_cache;
+static kmem_cache_t *mnt_cache;
+static struct rw_semaphore namespace_sem;
static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
{
@@ -54,7 +55,7 @@ static inline unsigned long hash(struct
struct vfsmount *alloc_vfsmnt(const char *name)
{
- struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL);
+ struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL);
if (mnt) {
memset(mnt, 0, sizeof(struct vfsmount));
atomic_set(&mnt->mnt_count,1);
@@ -86,7 +87,8 @@ void free_vfsmnt(struct vfsmount *mnt)
* Now, lookup_mnt increments the ref count before returning
* the vfsmount struct.
*/
-struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
+struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
+ struct dentry *root)
{
struct list_head * head = mount_hashtable + hash(mnt, dentry);
struct list_head * tmp = head;
@@ -99,7 +101,8 @@ struct vfsmount *lookup_mnt(struct vfsmo
if (tmp == head)
break;
p = list_entry(tmp, struct vfsmount, mnt_hash);
- if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) {
+ if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry &&
+ (root == NULL || p->mnt_root == root)) {
found = mntget(p);
break;
}
@@ -108,6 +111,37 @@ struct vfsmount *lookup_mnt(struct vfsmo
return found;
}
+struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
+{
+ return __lookup_mnt(mnt, dentry, NULL);
+}
+
+static struct vfsmount *
+clone_mnt(struct vfsmount *old, struct dentry *root)
+{
+ struct super_block *sb = old->mnt_sb;
+ struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
+
+ if (mnt) {
+ mnt->mnt_flags = old->mnt_flags;
+ atomic_inc(&sb->s_active);
+ mnt->mnt_sb = sb;
+ mnt->mnt_root = dget(root);
+ mnt->mnt_mountpoint = mnt->mnt_root;
+ mnt->mnt_parent = mnt;
+ mnt->mnt_namespace = old->mnt_namespace;
+ mnt->mnt_pnode = get_pnode(old->mnt_pnode);
+
+ /* stick the duplicate mount on the same expiry list
+ * as the original if that was on one */
+ spin_lock(&vfsmount_lock);
+ if (!list_empty(&old->mnt_fslink))
+ list_add(&mnt->mnt_fslink, &old->mnt_fslink);
+ spin_unlock(&vfsmount_lock);
+ }
+ return mnt;
+}
+
static inline int check_mnt(struct vfsmount *mnt)
{
return mnt->mnt_namespace == current->namespace;
@@ -128,11 +162,70 @@ static void attach_mnt(struct vfsmount *
{
mnt->mnt_parent = mntget(nd->mnt);
mnt->mnt_mountpoint = dget(nd->dentry);
+ mnt->mnt_namespace = nd->mnt->mnt_namespace;
list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts);
nd->dentry->d_mounted++;
}
+static struct vfsmount *do_attach_mnt(struct vfsmount *mnt,
+ struct dentry *dentry,
+ struct vfsmount *child_mnt)
+{
+ struct nameidata nd;
+ LIST_HEAD(head);
+
+ nd.mnt = mnt;
+ nd.dentry = dentry;
+ attach_mnt(child_mnt, &nd);
+ list_add_tail(&head, &child_mnt->mnt_list);
+ list_splice(&head, child_mnt->mnt_namespace->list.prev);
+ return child_mnt;
+}
+
+static void attach_prepare_mnt(struct vfsmount *mnt, struct nameidata *nd)
+{
+ mnt->mnt_parent = mntget(nd->mnt);
+ mnt->mnt_mountpoint = dget(nd->dentry);
+ nd->dentry->d_mounted++;
+}
+
+void do_attach_real_mnt(struct vfsmount *mnt)
+{
+ struct vfsmount *parent = mnt->mnt_parent;
+ BUG_ON(parent==mnt);
+ if(list_empty(&mnt->mnt_hash))
+ list_add(&mnt->mnt_hash,
+ mount_hashtable+hash(parent, mnt->mnt_mountpoint));
+ if(list_empty(&mnt->mnt_child))
+ list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+ mnt->mnt_namespace = parent->mnt_namespace;
+ list_add_tail(&mnt->mnt_list, &mnt->mnt_namespace->list);
+}
+
+struct vfsmount *do_attach_prepare_mnt(struct vfsmount *mnt,
+ struct dentry *dentry,
+ struct vfsmount *template_mnt,
+ int clone_flag)
+{
+ struct vfsmount *child_mnt;
+ struct nameidata nd;
+
+ if (clone_flag) {
+ if(!(child_mnt = clone_mnt(template_mnt,
+ template_mnt->mnt_root)))
+ return NULL;
+ } else
+ child_mnt = template_mnt;
+
+ nd.mnt = mnt;
+ nd.dentry = dentry;
+
+ attach_prepare_mnt(child_mnt, &nd);
+
+ return child_mnt;
+}
+
static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
{
struct list_head *next = p->mnt_mounts.next;
@@ -149,29 +242,14 @@ static struct vfsmount *next_mnt(struct
return list_entry(next, struct vfsmount, mnt_child);
}
-static struct vfsmount *
-clone_mnt(struct vfsmount *old, struct dentry *root)
+static struct vfsmount *skip_mnt_tree(struct vfsmount *p)
{
- struct super_block *sb = old->mnt_sb;
- struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
-
- if (mnt) {
- mnt->mnt_flags = old->mnt_flags;
- atomic_inc(&sb->s_active);
- mnt->mnt_sb = sb;
- mnt->mnt_root = dget(root);
- mnt->mnt_mountpoint = mnt->mnt_root;
- mnt->mnt_parent = mnt;
- mnt->mnt_namespace = old->mnt_namespace;
-
- /* stick the duplicate mount on the same expiry list
- * as the original if that was on one */
- spin_lock(&vfsmount_lock);
- if (!list_empty(&old->mnt_fslink))
- list_add(&mnt->mnt_fslink, &old->mnt_fslink);
- spin_unlock(&vfsmount_lock);
+ struct list_head *prev = p->mnt_mounts.prev;
+ while (prev != &p->mnt_mounts) {
+ p = list_entry(prev, struct vfsmount, mnt_child);
+ prev = p->mnt_mounts.prev;
}
- return mnt;
+ return p;
}
void __mntput(struct vfsmount *mnt)
@@ -191,7 +269,7 @@ static void *m_start(struct seq_file *m,
struct list_head *p;
loff_t l = *pos;
- down_read(&n->sem);
+ down_read(&namespace_sem);
list_for_each(p, &n->list)
if (!l--)
return list_entry(p, struct vfsmount, mnt_list);
@@ -208,8 +286,7 @@ static void *m_next(struct seq_file *m,
static void m_stop(struct seq_file *m, void *v)
{
- struct namespace *n = m->private;
- up_read(&n->sem);
+ up_read(&namespace_sem);
}
static inline void mangle(struct seq_file *m, const char *s)
@@ -433,7 +510,7 @@ static int do_umount(struct vfsmount *mn
return retval;
}
- down_write(¤t->namespace->sem);
+ down_write(&namespace_sem);
spin_lock(&vfsmount_lock);
if (atomic_read(&sb->s_active) == 1) {
@@ -455,7 +532,7 @@ static int do_umount(struct vfsmount *mn
spin_unlock(&vfsmount_lock);
if (retval)
security_sb_umount_busy(mnt);
- up_write(¤t->namespace->sem);
+ up_write(&namespace_sem);
return retval;
}
@@ -495,9 +572,9 @@ out:
#ifdef __ARCH_WANT_SYS_OLDUMOUNT
/*
- * The 2.0 compatible umount. No flags.
+ * The 2.0 compatible umount. No flags.
*/
-
+
asmlinkage long sys_oldumount(char __user * name)
{
return sys_umount(name,0);
@@ -541,6 +618,9 @@ static struct vfsmount *copy_tree(struct
struct list_head *h;
struct nameidata nd;
+ if (IS_MNT_UNCLONE(mnt))
+ return NULL;
+
res = q = clone_mnt(mnt, dentry);
if (!q)
goto Enomem;
@@ -549,10 +629,15 @@ static struct vfsmount *copy_tree(struct
p = mnt;
for (h = mnt->mnt_mounts.next; h != &mnt->mnt_mounts; h = h->next) {
r = list_entry(h, struct vfsmount, mnt_child);
+
if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry))
continue;
for (s = r; s; s = next_mnt(s, r)) {
+ if (IS_MNT_UNCLONE(s)) {
+ s = skip_mnt_tree(s);
+ continue;
+ }
while (p != s->mnt_parent) {
p = p->mnt_parent;
q = q->mnt_parent;
@@ -579,9 +664,179 @@ static struct vfsmount *copy_tree(struct
return NULL;
}
+ /*
+ * @source_mnt : mount tree to be attached
+ * @nd : place the mount tree @source_mnt is attached
+ *
+ * NOTE: in the table below explains the semantics when a source vfsmount
+ * of a given type is attached to a destination vfsmount of a give type.
+ * ---------------------------------------------------------------------
+ * | BIND MOUNT OPERATION |
+ * |*******************************************************************|
+ * | dest --> | shared | private | slave |unclonable |
+ * | source | | | | |
+ * | | | | | | |
+ * | v | | | | |
+ * |*******************************************************************|
+ * | | | | | |
+ * | shared | shared (++) | shared (+)|shared (+)| shared (+)|
+ * | | | | | |
+ * | | | | | |
+ * | private | shared (+) | private | private | private |
+ * | | | | | |
+ * | | | | | |
+ * | slave | shared (+) | private | private | private |
+ * | | | | | |
+ * | | | | | |
+ * | unclonable| - | - | - | - |
+ * | | | | | |
+ * | | | | | |
+ * ********************************************************************
+ *
+ * (++) the mount will be propogated to all the vfsmounts in the pnode tree
+ * of the destination vfsmount, and all the non-slave new mounts in
+ * destination vfsmount will be added the source vfsmount's pnode.
+ * (+) the mount will be propogated to the destination vfsmount
+ * and the new mount will be added to the source vfsmount's pnode.
+ *
+ * if the source mount is a tree, the operations explained above is
+ * applied to each
+ * vfsmount in the tree.
+ *
+ * Should be called without spinlocks held, because this function can sleep
+ * in allocations.
+ *
+ */
+static int attach_recursive_mnt(struct vfsmount *source_mnt,
+ struct nameidata *nd)
+{
+ struct vfsmount *mntpt_mnt, *m, *p;
+ struct vfspnode *src_pnode, *t_p, *dest_pnode, *tmp_pnode;
+ struct dentry *mntpt_dentry;
+ int ret;
+ LIST_HEAD(pnodehead);
+
+ mntpt_mnt = nd->mnt;
+ dest_pnode = IS_MNT_SHARED(mntpt_mnt) ? mntpt_mnt->mnt_pnode : NULL;
+ src_pnode = IS_MNT_SHARED(source_mnt) ? source_mnt->mnt_pnode : NULL;
+
+ if (!dest_pnode && !src_pnode) {
+ LIST_HEAD(head);
+ spin_lock(&vfsmount_lock);
+ do_attach_mnt(nd->mnt, nd->dentry, source_mnt);
+ spin_unlock(&vfsmount_lock);
+ goto out;
+ }
+
+ /*
+ * Ok, the source or the destination pnode exists.
+ * Get ready for pnode operations.
+ * Create a temporary pnode which shall hold all the
+ * new mounts. Merge or delete or slave that pnode
+ * later in a separate operation, depending on
+ * the type of source and destination mounts.
+ */
+ p = NULL;
+ for (m = source_mnt; m; m = next_mnt(m, source_mnt)) {
+ int unclone = IS_MNT_UNCLONE(m);
+
+ list_del_init(&m->mnt_list);
+
+ while (p && p != m->mnt_parent)
+ p = p->mnt_parent;
+
+ if (!p) {
+ mntpt_dentry = nd->dentry;
+ mntpt_mnt = nd->mnt;
+ } else {
+ mntpt_dentry = m->mnt_mountpoint;
+ mntpt_mnt = p;
+ }
+
+ p=m;
+ dest_pnode = IS_MNT_SHARED(mntpt_mnt) ?
+ mntpt_mnt->mnt_pnode : NULL;
+ src_pnode = (IS_MNT_SHARED(m))?
+ m->mnt_pnode : NULL;
+
+ m->mnt_pnode = NULL;
+ /*
+ * get a temporary pnode into which add the new vfs, and keep
+ * track of these pnodes and their real pnode.
+ */
+ if (!(tmp_pnode = pnode_alloc()))
+ return -ENOMEM;
+
+
+ if (dest_pnode && !unclone) {
+ if ((ret = pnode_prepare_mount(dest_pnode, tmp_pnode,
+ mntpt_dentry, m, mntpt_mnt)))
+ return ret;
+ } else {
+ if (m == m->mnt_parent)
+ do_attach_prepare_mnt(mntpt_mnt,
+ mntpt_dentry, m, 0);
+ pnode_add_member_mnt(tmp_pnode, m);
+ if (unclone) {
+ set_mnt_unclone(m);
+ m->mnt_pnode = tmp_pnode;
+ SET_PNODE_DELETE(tmp_pnode);
+ } else if (!src_pnode) {
+ set_mnt_private(m);
+ m->mnt_pnode = tmp_pnode;
+ SET_PNODE_DELETE(tmp_pnode);
+ }
+ /*
+ * NOTE: set_mnt_private() or
+ * set_mnt_unclone() resets the
+ * m->mnt_pnode information.
+ * reinitialize it. This is needed to
+ * decrement the refcount on the
+ * pnode when * the mount 'm' is
+ * unlinked in * pnode_real_mount().
+ */
+ }
+
+ tmp_pnode->pnode_master = src_pnode;
+ /*
+ * temporarily track the pnode with which the tmp_pnode
+ * has to merge with; in the pnode_master field.
+ */
+ list_add_tail(&tmp_pnode->pnode_peer_slave, &pnodehead);
+ }
+
+ /*
+ * new mounts. Merge or delete or slave the temporary pnode
+ */
+ spin_lock(&vfsmount_lock);
+ list_for_each_entry_safe(tmp_pnode, t_p, &pnodehead, pnode_peer_slave) {
+ int del_flag = IS_PNODE_DELETE(tmp_pnode);
+ struct vfspnode *master_pnode = tmp_pnode->pnode_master;
+ list_del_init(&tmp_pnode->pnode_peer_slave);
+ pnode_real_mount(tmp_pnode, del_flag);
+ if (!del_flag && master_pnode) {
+ tmp_pnode->pnode_master = NULL;
+ pnode_merge_pnode(tmp_pnode, master_pnode);
+ /*
+ * we don't need the extra reference to
+ * the master_pnode, that was created either
+ * (a) pnode_add_slave_pnode: when the mnt was made as
+ * a slave mnt.
+ * (b) pnode_merge_pnode: during clone_mnt().
+ */
+ put_pnode(master_pnode);
+ }
+ }
+ spin_unlock(&vfsmount_lock);
+out:
+ mntget(source_mnt);
+ return 0;
+}
+
static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
{
- int err;
+ int err, ret;
+
if (mnt->mnt_sb->s_flags & MS_NOUSER)
return -EINVAL;
@@ -599,17 +854,12 @@ static int graft_tree(struct vfsmount *m
goto out_unlock;
err = -ENOENT;
- spin_lock(&vfsmount_lock);
- if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) {
- struct list_head head;
- attach_mnt(mnt, nd);
- list_add_tail(&head, &mnt->mnt_list);
- list_splice(&head, current->namespace->list.prev);
- mntget(mnt);
- err = 0;
- }
+ spin_lock(&vfsmount_lock);
+ ret = (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry));
spin_unlock(&vfsmount_lock);
+ if (ret)
+ err = attach_recursive_mnt(mnt, nd);
out_unlock:
up(&nd->dentry->d_inode->i_sem);
if (!err)
@@ -686,6 +936,146 @@ static int do_make_unclone(struct vfsmou
return 0;
}
+ /*
+ * This operation is equivalent of mount --bind dir dir
+ * create a new mount at the dentry, and unmount all child mounts
+ * mounted on top of dentries below 'dentry', and mount them
+ * under the new mount.
+ */
+struct vfsmount *do_make_mounted(struct vfsmount *mnt, struct dentry *dentry)
+{
+ struct vfsmount *child_mnt, *next;
+ struct nameidata nd;
+ struct vfsmount *newmnt = clone_mnt(mnt, dentry);
+ LIST_HEAD(move);
+
+ /*
+ * note clone_mnt() gets a reference to the pnode.
+ * we won't use that pnode anyway. So just let it
+ * go
+ */
+ put_pnode(newmnt->mnt_pnode);
+ newmnt->mnt_pnode = NULL;
+
+ if (newmnt) {
+ /*
+ * walk through the mount list of mnt and move
+ * them under the new mount
+ */
+ spin_lock(&vfsmount_lock);
+ list_del_init(&newmnt->mnt_fslink);
+
+ list_for_each_entry_safe(child_mnt, next,
+ &mnt->mnt_mounts, mnt_child) {
+
+ if(child_mnt->mnt_mountpoint == dentry)
+ continue;
+
+ if(!is_subdir(child_mnt->mnt_mountpoint, dentry))
+ continue;
+
+ detach_mnt(child_mnt, &nd);
+ nd.mnt = newmnt;
+ attach_mnt(child_mnt, &nd);
+ }
+
+ nd.mnt = mnt;
+ nd.dentry = dentry;
+ do_attach_mnt(nd.mnt, nd.dentry, newmnt);
+ spin_unlock(&vfsmount_lock);
+ }
+ return newmnt;
+}
+
+ /*
+ * Inverse operation of do_make_mounted()
+ */
+int do_make_unmounted(struct vfsmount *mnt)
+{
+ struct vfsmount *parent_mnt, *child_mnt, *next;
+ struct nameidata nd;
+
+ /* validate if mount has a different parent */
+ parent_mnt = mnt->mnt_parent;
+ if (mnt == parent_mnt)
+ return 0;
+ /*
+ * cannot unmount a mount that is not created
+ * as a overlay mount.
+ */
+ if (mnt->mnt_mountpoint != mnt->mnt_root)
+ return -EINVAL;
+
+ /* for each submounts in the parent, put the mounts back */
+ spin_lock(&vfsmount_lock);
+ list_for_each_entry_safe(child_mnt, next, &mnt->mnt_mounts, mnt_child) {
+ detach_mnt(child_mnt, &nd);
+ nd.mnt = parent_mnt;
+ attach_mnt(child_mnt, &nd);
+ }
+ detach_mnt(mnt, &nd);
+ spin_unlock(&vfsmount_lock);
+ return 0;
+}
+
+/*
+ * @nd: contains the vfsmount and the dentry where the new mount
+ * is the be created
+ * @mnt: returns the newly created mount.
+ * Create a new mount at the location specified by 'nd' and
+ * propogate the mount to all other mounts if the mountpoint
+ * is under a shared mount.
+ */
+int make_mounted(struct nameidata *nd, struct vfsmount **mnt)
+{
+ struct vfsmount *parent_mnt;
+ struct dentry *parent_dentry;
+ int err = mount_is_safe(nd);
+ if (err)
+ return err;
+ parent_dentry = nd->dentry;
+ parent_mnt = nd->mnt;
+ /*
+ * check if dentry already has a vfsmount
+ * if it does not, create and attach
+ * a new vfsmount at that dentry.
+ * Also propogate the mount if parent_mnt
+ * is shared.
+ */
+ if(parent_dentry != parent_mnt->mnt_root) {
+ *mnt = IS_MNT_SHARED(parent_mnt) ?
+ pnode_make_mounted(parent_mnt->mnt_pnode,
+ parent_mnt, parent_dentry) :
+ do_make_mounted(parent_mnt, parent_dentry);
+ if (!*mnt)
+ err = -ENOMEM;
+ } else
+ *mnt = parent_mnt;
+ return err;
+}
+
+ /*
+ * Inverse operation of make_mounted()
+ */
+int make_unmounted(struct vfsmount *mnt)
+{
+ if (mnt == mnt->mnt_parent)
+ return 0;
+ /*
+ * cannot unmount a mount that is not created
+ * as a overlay mount.
+ */
+ if (mnt->mnt_mountpoint != mnt->mnt_root)
+ return -EINVAL;
+
+ if (IS_MNT_SHARED(mnt))
+ pnode_make_unmounted(mnt->mnt_pnode);
+ else
+ do_make_unmounted(mnt);
+
+ return 0;
+}
+
/*
* recursively change the type of the mountpoint.
*/
@@ -729,7 +1119,7 @@ static int do_change_type(struct nameida
static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
{
struct nameidata old_nd;
- struct vfsmount *mnt = NULL;
+ struct vfsmount *mnt = NULL, *overlay_mnt=NULL;
int err = mount_is_safe(nd);
if (err)
return err;
@@ -739,14 +1129,31 @@ static int do_loopback(struct nameidata
if (err)
return err;
- down_write(¤t->namespace->sem);
+ if (IS_MNT_UNCLONE(old_nd.mnt)) {
+ err = -EINVAL;
+ goto path_release;
+ }
+
+ down_write(&namespace_sem);
err = -EINVAL;
if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd.mnt))) {
+
+ /*
+ * If the dentry is not the root dentry, and if a bind
+ * from a shared subtree is attempted, create a mount
+ * at the dentry, and use the new mount as the starting
+ * point for the bind/rbind operation.
+ */
+ overlay_mnt = old_nd.mnt;
+ if(IS_MNT_SHARED(old_nd.mnt) &&
+ (err = make_mounted(&old_nd, &overlay_mnt)))
+ goto out;
+
err = -ENOMEM;
if (recurse)
- mnt = copy_tree(old_nd.mnt, old_nd.dentry);
+ mnt = copy_tree(overlay_mnt, old_nd.dentry);
else
- mnt = clone_mnt(old_nd.mnt, old_nd.dentry);
+ mnt = clone_mnt(overlay_mnt, old_nd.dentry);
}
if (mnt) {
@@ -757,15 +1164,25 @@ static int do_loopback(struct nameidata
err = graft_tree(mnt, nd);
if (err) {
- spin_lock(&vfsmount_lock);
- umount_tree(mnt);
- spin_unlock(&vfsmount_lock);
- } else
- mntput(mnt);
- }
+ spin_lock(&vfsmount_lock);
+ umount_tree(mnt);
+ spin_unlock(&vfsmount_lock);
+ /*
+ * ok we failed! so undo any overlay
+ * mount that we did earlier.
+ */
+ if (old_nd.mnt != overlay_mnt)
+ make_unmounted(overlay_mnt);
+ } else
+ mntput(mnt);
+ }
+
+ out:
+ up_write(&namespace_sem);
+
+ path_release:
+ path_release(&old_nd);
- up_write(¤t->namespace->sem);
- path_release(&old_nd);
return err;
}
@@ -813,7 +1230,7 @@ static int do_move_mount(struct nameidat
if (err)
return err;
- down_write(¤t->namespace->sem);
+ down_write(&namespace_sem);
while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
;
err = -EINVAL;
@@ -857,7 +1274,7 @@ out2:
out1:
up(&nd->dentry->d_inode->i_sem);
out:
- up_write(¤t->namespace->sem);
+ up_write(&namespace_sem);
if (!err)
path_release(&parent_nd);
path_release(&old_nd);
@@ -896,7 +1313,7 @@ int do_add_mount(struct vfsmount *newmnt
{
int err;
- down_write(¤t->namespace->sem);
+ down_write(&namespace_sem);
/* Something was mounted here while we slept */
while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
;
@@ -925,7 +1342,7 @@ int do_add_mount(struct vfsmount *newmnt
}
unlock:
- up_write(¤t->namespace->sem);
+ up_write(&namespace_sem);
mntput(newmnt);
return err;
}
@@ -981,7 +1398,7 @@ void mark_mounts_for_expiry(struct list_
get_namespace(namespace);
spin_unlock(&vfsmount_lock);
- down_write(&namespace->sem);
+ down_write(&namespace_sem);
spin_lock(&vfsmount_lock);
/* check that it is still dead: the count should now be 2 - as
@@ -1025,7 +1442,7 @@ void mark_mounts_for_expiry(struct list_
spin_unlock(&vfsmount_lock);
}
- up_write(&namespace->sem);
+ up_write(&namespace_sem);
mntput(mnt);
put_namespace(namespace);
@@ -1071,7 +1488,7 @@ int copy_mount_options(const void __user
int i;
unsigned long page;
unsigned long size;
-
+
*where = 0;
if (!data)
return 0;
@@ -1090,7 +1507,7 @@ int copy_mount_options(const void __user
i = size - exact_copy_from_user((void *)page, data, size);
if (!i) {
- free_page(page);
+ free_page(page);
return -EFAULT;
}
if (i != PAGE_SIZE)
@@ -1196,14 +1613,13 @@ int copy_namespace(int flags, struct tas
goto out;
atomic_set(&new_ns->count, 1);
- init_rwsem(&new_ns->sem);
INIT_LIST_HEAD(&new_ns->list);
- down_write(&tsk->namespace->sem);
+ down_write(&namespace_sem);
/* First pass: copy the tree topology */
new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root);
if (!new_ns->root) {
- up_write(&tsk->namespace->sem);
+ up_write(&namespace_sem);
kfree(new_ns);
goto out;
}
@@ -1237,7 +1653,7 @@ int copy_namespace(int flags, struct tas
p = next_mnt(p, namespace->root);
q = next_mnt(q, new_ns->root);
}
- up_write(&tsk->namespace->sem);
+ up_write(&namespace_sem);
tsk->namespace = new_ns;
@@ -1419,7 +1835,7 @@ asmlinkage long sys_pivot_root(const cha
user_nd.mnt = mntget(current->fs->rootmnt);
user_nd.dentry = dget(current->fs->root);
read_unlock(¤t->fs->lock);
- down_write(¤t->namespace->sem);
+ down_write(&namespace_sem);
down(&old_nd.dentry->d_inode->i_sem);
error = -EINVAL;
if (!check_mnt(user_nd.mnt))
@@ -1465,7 +1881,7 @@ asmlinkage long sys_pivot_root(const cha
path_release(&parent_nd);
out2:
up(&old_nd.dentry->d_inode->i_sem);
- up_write(¤t->namespace->sem);
+ up_write(&namespace_sem);
path_release(&user_nd);
path_release(&old_nd);
out1:
@@ -1492,7 +1908,6 @@ static void __init init_mount_tree(void)
panic("Can't allocate initial namespace");
atomic_set(&namespace->count, 1);
INIT_LIST_HEAD(&namespace->list);
- init_rwsem(&namespace->sem);
list_add(&mnt->mnt_list, &namespace->list);
namespace->root = mnt;
mnt->mnt_namespace = namespace;
@@ -1515,6 +1930,8 @@ void __init mnt_init(unsigned long mempa
unsigned int nr_hash;
int i;
+ init_rwsem(&namespace_sem);
+
mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
@@ -1562,7 +1979,7 @@ void __put_namespace(struct namespace *n
{
struct vfsmount *mnt;
- down_write(&namespace->sem);
+ down_write(&namespace_sem);
spin_lock(&vfsmount_lock);
list_for_each_entry(mnt, &namespace->list, mnt_list) {
@@ -1571,6 +1988,6 @@ void __put_namespace(struct namespace *n
umount_tree(namespace->root);
spin_unlock(&vfsmount_lock);
- up_write(&namespace->sem);
+ up_write(&namespace_sem);
kfree(namespace);
}
Index: 2.6.12.work1/fs/pnode.c
===================================================================
--- 2.6.12.work1.orig/fs/pnode.c
+++ 2.6.12.work1/fs/pnode.c
@@ -26,6 +26,10 @@
#include <asm/unistd.h>
#include <stdarg.h>
+enum pnode_vfs_type {
+ PNODE_MEMBER_VFS = 0x01,
+ PNODE_SLAVE_VFS = 0x02
+};
static kmem_cache_t * pnode_cachep;
@@ -78,6 +82,72 @@ void __put_pnode(struct vfspnode *pnode)
} while(pnode);
}
+struct pcontext {
+ struct vfspnode *start;
+ int level;
+ struct vfspnode *master_pnode;
+ struct vfspnode *pnode;
+};
+
+/*
+ * Walk the pnode tree for each pnode encountered.
+ * @context: provides context on the state of the last walk in the pnode
+ * tree.
+ */
+static int pnode_next(struct pcontext *context)
+{
+ struct vfspnode *pnode = context->pnode;
+ struct vfspnode *master_pnode=context->master_pnode;
+ struct list_head *next;
+
+ if (!pnode) {
+ BUG_ON(!context->start);
+ get_pnode(context->start);
+ context->pnode = context->start;
+ context->master_pnode = NULL;
+ context->level = 0;
+ return 1;
+ }
+
+ spin_lock(&vfspnode_lock);
+ next = pnode->pnode_slavepnode.next;
+ if (next == &pnode->pnode_slavepnode) {
+ while (1) {
+ int flag;
+
+ if (pnode == context->start) {
+ put_pnode_locked(pnode);
+ spin_unlock(&vfspnode_lock);
+ BUG_ON(context->level != 0);
+ return 0;
+ }
+
+ next = pnode->pnode_peer_slave.next;
+ flag = (next != &pnode->pnode_master->pnode_slavepnode);
+ put_pnode_locked(pnode);
+
+ if (flag)
+ break;
+
+ pnode = master_pnode;
+ master_pnode = pnode->pnode_master;
+ context->level--;
+ }
+ } else {
+ master_pnode = pnode;
+ context->level++;
+ }
+
+ pnode = list_entry(next, struct vfspnode, pnode_peer_slave);
+ get_pnode(pnode);
+
+ context->pnode = pnode;
+ context->master_pnode = master_pnode;
+ spin_unlock(&vfspnode_lock);
+ return 1;
+}
+
+
static void inline pnode_add_mnt(struct vfspnode *pnode,
struct vfsmount *mnt, int slave)
{
@@ -87,10 +157,12 @@ static void inline pnode_add_mnt(struct
mnt->mnt_pnode = pnode;
if (slave) {
set_mnt_slave(mnt);
- list_add(&mnt->mnt_pnode_mntlist, &pnode->pnode_slavevfs);
+ list_add(&mnt->mnt_pnode_mntlist,
+ &pnode->pnode_slavevfs);
} else {
set_mnt_shared(mnt);
- list_add(&mnt->mnt_pnode_mntlist, &pnode->pnode_vfs);
+ list_add(&mnt->mnt_pnode_mntlist,
+ &pnode->pnode_vfs);
}
get_pnode(pnode);
spin_unlock(&vfspnode_lock);
@@ -108,7 +180,6 @@ void pnode_add_slave_mnt(struct vfspnode
pnode_add_mnt(pnode, mnt, 1);
}
-
void pnode_add_slave_pnode(struct vfspnode *pnode,
struct vfspnode *slave_pnode)
{
@@ -117,12 +188,13 @@ void pnode_add_slave_pnode(struct vfspno
spin_lock(&vfspnode_lock);
slave_pnode->pnode_master = pnode;
slave_pnode->pnode_flags = 0;
- list_add(&slave_pnode->pnode_peer_slave, &pnode->pnode_slavepnode);
+ list_add(&slave_pnode->pnode_peer_slave,
+ &pnode->pnode_slavepnode);
get_pnode(pnode);
spin_unlock(&vfspnode_lock);
}
-static void _pnode_disassociate_mnt(struct vfsmount *mnt)
+static inline void __pnode_disassociate_mnt(struct vfsmount *mnt)
{
spin_lock(&vfspnode_lock);
list_del_init(&mnt->mnt_pnode_mntlist);
@@ -135,7 +207,7 @@ void pnode_del_slave_mnt(struct vfsmount
{
if (!mnt)
return;
- _pnode_disassociate_mnt(mnt);
+ __pnode_disassociate_mnt(mnt);
CLEAR_MNT_SLAVE(mnt);
}
@@ -143,16 +215,342 @@ void pnode_del_member_mnt(struct vfsmoun
{
if (!mnt)
return;
- _pnode_disassociate_mnt(mnt);
+ __pnode_disassociate_mnt(mnt);
CLEAR_MNT_SHARED(mnt);
}
-
void pnode_disassociate_mnt(struct vfsmount *mnt)
{
if (!mnt)
return;
- _pnode_disassociate_mnt(mnt);
+ __pnode_disassociate_mnt(mnt);
CLEAR_MNT_SHARED(mnt);
CLEAR_MNT_SLAVE(mnt);
}
+
+// merge pnode into peer_pnode and get rid of pnode
+int pnode_merge_pnode(struct vfspnode *pnode, struct vfspnode *peer_pnode)
+{
+ struct vfspnode *slave_pnode, *pnext;
+ struct vfsmount *mnt, *slave_mnt, *next;
+ int i,count;
+
+ spin_lock(&vfspnode_lock);
+ list_for_each_entry_safe(slave_pnode, pnext,
+ &pnode->pnode_slavepnode, pnode_peer_slave) {
+ slave_pnode->pnode_master = peer_pnode;
+ list_move(&slave_pnode->pnode_peer_slave,
+ &peer_pnode->pnode_slavepnode);
+ }
+
+ list_for_each_entry_safe(slave_mnt, next,
+ &pnode->pnode_slavevfs, mnt_pnode_mntlist) {
+ slave_mnt->mnt_pnode = peer_pnode;
+ list_move(&slave_mnt->mnt_pnode_mntlist,
+ &peer_pnode->pnode_slavevfs);
+ }
+
+ list_for_each_entry_safe(mnt, next,
+ &pnode->pnode_vfs, mnt_pnode_mntlist) {
+ mnt->mnt_pnode = peer_pnode;
+ list_move(&mnt->mnt_pnode_mntlist,
+ &peer_pnode->pnode_vfs);
+ }
+
+ count = atomic_read(&pnode->pnode_count);
+ atomic_add(count, &peer_pnode->pnode_count);
+
+ /*
+ * delete all references to 'pnode'.
+ * A better implementation can simply
+ * call free_pnode(pnode). But this is
+ * a cleaner way of doing it. Offcourse
+ * with some cost.
+ */
+ for (i=0 ; i <count; i++)
+ put_pnode_unlocked(pnode);
+ spin_unlock(&vfspnode_lock);
+ return 0;
+}
+
+/*
+ * @pnode: pnode that contains the vfsmounts, on which the
+ * new mount is created at dentry 'dentry'
+ * @dentry: the dentry on which the new mount is created
+ * @mnt: return the mount created on this vfsmount
+ * walk through all the vfsmounts belonging to this pnode
+ * as well as its slave pnodes and for each vfsmount create
+ * a new vfsmount at 'dentry'. Return the vfsmount created
+ * at 'dentry' of vfsmount 'mnt'.
+ */
+struct vfsmount *pnode_make_mounted(struct vfspnode *pnode,
+ struct vfsmount *mnt, struct dentry *dentry)
+{
+ struct vfsmount *child_mnt;
+ int ret=0, level;
+ struct vfspnode *master_pnode;
+ struct vfspnode *child_pnode, *master_child_pnode;
+ struct vfsmount *slave_mnt, *member_mnt, *t_m;
+ struct pcontext context;
+ static struct vfspnode *p_array[PNODE_MAX_SLAVE_LEVEL];
+
+ context.start = pnode;
+ context.pnode = NULL;
+
+ while (pnode_next(&context)) {
+ level = context.level;
+ pnode = context.pnode;
+ master_pnode = context.master_pnode;
+
+ if (master_pnode)
+ master_child_pnode = p_array[level-1];
+ else
+ master_child_pnode = NULL;
+
+ if (!(child_pnode = pnode_alloc())) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (child_pnode && master_child_pnode)
+ pnode_add_slave_pnode(master_child_pnode,
+ child_pnode);
+
+ p_array[level] = child_pnode;
+
+ spin_lock(&vfspnode_lock);
+ list_for_each_entry_safe(member_mnt,
+ t_m, &pnode->pnode_vfs, mnt_pnode_mntlist) {
+ spin_unlock(&vfspnode_lock);
+ if (!(child_mnt = do_make_mounted(member_mnt,
+ dentry))) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ spin_lock(&vfspnode_lock);
+ pnode_add_member_mnt(child_pnode, child_mnt);
+ }
+ list_for_each_entry_safe(slave_mnt, t_m,
+ &pnode->pnode_slavevfs, mnt_pnode_mntlist) {
+ spin_unlock(&vfspnode_lock);
+ if (!(child_mnt = do_make_mounted(slave_mnt,
+ dentry))) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ spin_lock(&vfspnode_lock);
+ pnode_add_slave_mnt(child_pnode, child_mnt);
+ }
+ spin_unlock(&vfspnode_lock);
+ }
+out:
+ if (ret)
+ return NULL;
+
+ child_mnt = __lookup_mnt(mnt, dentry, dentry);
+ mntput(child_mnt);
+ return child_mnt;
+}
+
+int vfs_make_unmounted(struct vfsmount *mnt)
+{
+ struct vfspnode *pnode;
+ int ret=0;
+
+ if (do_make_unmounted(mnt)) {
+ ret = 1;
+ goto out;
+ }
+ pnode = mnt->mnt_pnode;
+ list_del_init(&mnt->mnt_pnode_mntlist);
+ put_pnode(pnode);
+out:
+ return ret;
+}
+
+int pnode_make_unmounted(struct vfspnode *pnode)
+{
+ int ret=0;
+ struct vfsmount *slave_mnt, *member_mnt, *t_m;
+ struct pcontext context;
+
+ context.start = pnode;
+ context.pnode = NULL;
+ while (pnode_next(&context)) {
+ pnode = context.pnode;
+ // traverse member vfsmounts
+ spin_lock(&vfspnode_lock);
+ list_for_each_entry_safe(member_mnt,
+ t_m, &pnode->pnode_vfs, mnt_pnode_mntlist) {
+ spin_unlock(&vfspnode_lock);
+ if ((ret = vfs_make_unmounted(member_mnt)))
+ goto out;
+ spin_lock(&vfspnode_lock);
+ }
+ list_for_each_entry_safe(slave_mnt, t_m,
+ &pnode->pnode_slavevfs, mnt_pnode_mntlist) {
+ spin_unlock(&vfspnode_lock);
+ if ((ret = vfs_make_unmounted(slave_mnt)))
+ goto out;
+ spin_lock(&vfspnode_lock);
+ }
+ spin_unlock(&vfspnode_lock);
+ }
+out:
+ return ret;
+}
+
+int vfs_prepare_mount_func(struct vfsmount *mnt, enum pnode_vfs_type flag,
+ struct vfspnode *pnode,
+ struct vfsmount *source_mnt,
+ struct dentry *mountpoint_dentry,
+ struct vfsmount *p_mnt)
+
+{
+ struct vfsmount *child_mnt;
+
+ if ((p_mnt != mnt) || (source_mnt == source_mnt->mnt_parent)) {
+ child_mnt = do_attach_prepare_mnt(mnt, mountpoint_dentry,
+ source_mnt, (p_mnt != mnt));
+
+ if (!child_mnt)
+ return -ENOMEM;
+
+ if (child_mnt != source_mnt)
+ put_pnode(source_mnt->mnt_pnode);
+ } else
+ child_mnt = source_mnt;
+
+ switch (flag) {
+ case PNODE_SLAVE_VFS :
+ pnode_add_slave_mnt(pnode, child_mnt);
+ break;
+ case PNODE_MEMBER_VFS :
+ pnode_add_member_mnt(pnode, child_mnt);
+ break;
+ }
+
+ return 0;
+}
+
+int pnode_prepare_mount(struct vfspnode *pnode,
+ struct vfspnode *my_pnode,
+ struct dentry *mountpoint_dentry,
+ struct vfsmount *source_mnt,
+ struct vfsmount *mnt)
+{
+ int ret=0, level;
+ struct vfspnode *master_pnode, *child_pnode, *master_child_pnode;
+ struct vfsmount *slave_mnt, *member_mnt, *t_m;
+ struct pcontext context;
+ static struct vfspnode *p_array[PNODE_MAX_SLAVE_LEVEL];
+
+ context.start = pnode;
+ context.pnode = NULL;
+ while (pnode_next(&context)) {
+ level = context.level;
+ pnode = context.pnode;
+ master_pnode = context.master_pnode;
+
+ if (master_pnode) {
+ master_child_pnode = p_array[level];
+ child_pnode = NULL;
+ } else {
+ master_child_pnode = NULL;
+ child_pnode = my_pnode;
+ }
+
+ if (!(child_pnode = pnode_alloc()))
+ return -ENOMEM;
+
+ if (master_child_pnode && child_pnode)
+ pnode_add_slave_pnode(master_child_pnode,
+ child_pnode);
+ p_array[level] = child_pnode;
+
+ spin_lock(&vfspnode_lock);
+ list_for_each_entry_safe(member_mnt,
+ t_m, &pnode->pnode_vfs, mnt_pnode_mntlist) {
+ spin_unlock(&vfspnode_lock);
+ if((ret=vfs_prepare_mount_func(member_mnt,
+ PNODE_MEMBER_VFS, child_pnode,
+ source_mnt, mountpoint_dentry, mnt)))
+ goto out;
+ spin_lock(&vfspnode_lock);
+ }
+ list_for_each_entry_safe(slave_mnt,
+ t_m, &pnode->pnode_slavevfs,
+ mnt_pnode_mntlist) {
+ spin_unlock(&vfspnode_lock);
+ if((ret = vfs_prepare_mount_func(slave_mnt,
+ PNODE_SLAVE_VFS, child_pnode,
+ source_mnt, mountpoint_dentry, mnt)))
+ goto out;
+ spin_lock(&vfspnode_lock);
+ }
+ spin_unlock(&vfspnode_lock);
+ }
+out:
+ return ret;
+}
+
+int vfs_real_mount_func(struct vfsmount *mnt, int delflag)
+{
+ BUG_ON(mnt == mnt->mnt_parent);
+ do_attach_real_mnt(mnt);
+ if (delflag) {
+ spin_lock(&vfspnode_lock);
+ list_del_init(&mnt->mnt_pnode_mntlist);
+ put_pnode_locked(mnt->mnt_pnode);
+ spin_unlock(&vfspnode_lock);
+ mnt->mnt_pnode = NULL;
+ }
+ return 0;
+}
+
+/*
+ * @pnode: walk the propogation tree and complete the
+ * attachments of the child mounts to the parents
+ * correspondingly.
+ * @flag: if set destroy the propogation tree
+ */
+int pnode_real_mount(struct vfspnode *pnode, int flag)
+{
+ int ret=0;
+ struct vfsmount *slave_mnt, *member_mnt, *t_m;
+ struct pcontext context;
+
+ context.start = pnode;
+ context.pnode = NULL;
+ while (pnode_next(&context)) {
+ pnode = context.pnode;
+ // traverse member vfsmounts
+ spin_lock(&vfspnode_lock);
+ list_for_each_entry_safe(member_mnt,
+ t_m, &pnode->pnode_vfs, mnt_pnode_mntlist) {
+ spin_unlock(&vfspnode_lock);
+ if ((ret = vfs_real_mount_func(member_mnt,
+ flag)))
+ goto out;
+ }
+ list_for_each_entry_safe(slave_mnt, t_m,
+ &pnode->pnode_slavevfs, mnt_pnode_mntlist) {
+ spin_unlock(&vfspnode_lock);
+ if ((ret = vfs_real_mount_func(slave_mnt,
+ flag)))
+ goto out;
+ spin_lock(&vfspnode_lock);
+ }
+
+ if (flag) {
+ BUG_ON(!list_empty(&pnode->pnode_vfs));
+ BUG_ON(!list_empty(&pnode->pnode_slavevfs));
+ BUG_ON(!list_empty(&pnode->pnode_slavepnode));
+ list_del_init(&pnode->pnode_peer_slave);
+ put_pnode_locked(pnode);
+ }
+ spin_unlock(&vfspnode_lock);
+ }
+out:
+ return ret;
+}
Index: 2.6.12.work1/include/linux/fs.h
===================================================================
--- 2.6.12.work1.orig/include/linux/fs.h
+++ 2.6.12.work1/include/linux/fs.h
@@ -1216,7 +1216,11 @@ extern struct vfsmount *kern_mount(struc
extern int may_umount_tree(struct vfsmount *);
extern int may_umount(struct vfsmount *);
extern long do_mount(char *, char *, char *, unsigned long, void *);
+extern struct vfsmount *do_attach_prepare_mnt(struct vfsmount *,
+ struct dentry *, struct vfsmount *, int );
+extern void do_attach_real_mnt(struct vfsmount *);
extern struct vfsmount *do_make_mounted(struct vfsmount *, struct dentry *);
+extern int do_make_unmounted(struct vfsmount *);
extern int vfs_statfs(struct super_block *, struct kstatfs *);
Index: 2.6.12.work1/include/linux/pnode.h
===================================================================
--- 2.6.12.work1.orig/include/linux/pnode.h
+++ 2.6.12.work1/include/linux/pnode.h
@@ -76,5 +76,10 @@ void pnode_del_slave_mnt(struct vfsmount
void pnode_del_member_mnt(struct vfsmount *);
void pnode_disassociate_mnt(struct vfsmount *);
void pnode_add_slave_pnode(struct vfspnode *, struct vfspnode *);
+int pnode_merge_pnode(struct vfspnode *, struct vfspnode *);
struct vfsmount * pnode_make_mounted(struct vfspnode *, struct vfsmount *, struct dentry *);
+int pnode_make_unmounted(struct vfspnode *);
+int pnode_prepare_mount(struct vfspnode *, struct vfspnode *, struct dentry *,
+ struct vfsmount *, struct vfsmount *);
+int pnode_real_mount(struct vfspnode *, int);
#endif /* _LINUX_PNODE_H */
Index: 2.6.12.work1/include/linux/namespace.h
===================================================================
--- 2.6.12.work1.orig/include/linux/namespace.h
+++ 2.6.12.work1/include/linux/namespace.h
@@ -9,7 +9,6 @@ struct namespace {
atomic_t count;
struct vfsmount * root;
struct list_head list;
- struct rw_semaphore sem;
};
extern void umount_tree(struct vfsmount *);
Index: 2.6.12.work1/include/linux/dcache.h
===================================================================
--- 2.6.12.work1.orig/include/linux/dcache.h
+++ 2.6.12.work1/include/linux/dcache.h
@@ -329,6 +329,8 @@ static inline int d_mountpoint(struct de
}
extern struct vfsmount *lookup_mnt(struct vfsmount *, struct dentry *);
+extern struct vfsmount *__lookup_mnt(struct vfsmount *,
+ struct dentry *, struct dentry *);
extern struct dentry *lookup_create(struct nameidata *nd, int is_dir);
extern int sysctl_vfs_cache_pressure;
^ permalink raw reply [flat|nested] 9+ messages in thread* [RFC-2 PATCH 4/8] shared subtree
2005-07-18 6:53 [RFC-2 PATCH 0/8] shared subtree Ram Pai
` (2 preceding siblings ...)
2005-07-18 6:53 ` [RFC-2 PATCH 3/8] " Ram Pai
@ 2005-07-18 6:53 ` Ram Pai
2005-07-18 6:53 ` [RFC-2 PATCH 5/8] " Ram Pai
` (3 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Ram Pai @ 2005-07-18 6:53 UTC (permalink / raw)
To: linux-kernel, linux-fsdevel
Cc: Alexander Viro, mike, Miklos Szeredi, bfields, Andrew Morton,
penberg
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: move.patch --]
[-- Type: text/x-patch; name=move.patch, Size: 9429 bytes --]
Adds ability to move a shared/private/slave/unclone tree to any other
shared/private/slave/unclone tree. Also incorporates the same behavior
for pivot_root()
RP
Signed by Ram Pai (linuxram@us.ibm.com)
fs/namespace.c | 150 +++++++++++++++++++++++++++++++++++++++++++++++----------
1 files changed, 125 insertions(+), 25 deletions(-)
Index: 2.6.12.work1/fs/namespace.c
===================================================================
--- 2.6.12.work1.orig/fs/namespace.c
+++ 2.6.12.work1/fs/namespace.c
@@ -664,9 +664,12 @@ static struct vfsmount *copy_tree(struct
return NULL;
}
+
/*
* @source_mnt : mount tree to be attached
* @nd : place the mount tree @source_mnt is attached
+ * @move : use the move semantics if set, else use normal attach semantics
+ * as explained below
*
* NOTE: in the table below explains the semantics when a source vfsmount
* of a given type is attached to a destination vfsmount of a give type.
@@ -699,16 +702,44 @@ static struct vfsmount *copy_tree(struct
* (+) the mount will be propogated to the destination vfsmount
* and the new mount will be added to the source vfsmount's pnode.
*
+ *
+ * ---------------------------------------------------------------------
+ * | MOVE MOUNT OPERATION |
+ * |*******************************************************************|
+ * | dest --> | shared | private | slave |unclonable |
+ * | source | | | | |
+ * | | | | | | |
+ * | v | | | | |
+ * |*******************************************************************|
+ * | | | | | |
+ * | shared | shared (++) | shared (+)|shared (+)| shared (+)|
+ * | | | | | |
+ * | | | | | |
+ * | private | shared (+) | private | private | private |
+ * | | | | | |
+ * | | | | | |
+ * | slave | shared (+++) | slave | slave | slave |
+ * | | | | | |
+ * | | | | | |
+ * | unclonable| unclonable | unclonable |unclonable| unclonable|
+ * | | | | | |
+ * | | | | | |
+ * ********************************************************************
+ *
+ * (+++) the mount will be propogated to all the vfsmounts in the pnode tree
+ * of the destination vfsmount, and all the new mounts will be
+ * added to a new pnode , which will be a slave pnode of the
+ * source vfsmount's pnode.
+ *
* if the source mount is a tree, the operations explained above is
- * applied to each
- * vfsmount in the tree.
+ * applied to each vfsmount in the tree.
*
* Should be called without spinlocks held, because this function can sleep
* in allocations.
*
*/
static int attach_recursive_mnt(struct vfsmount *source_mnt,
- struct nameidata *nd)
+ struct nameidata *nd, int move)
{
struct vfsmount *mntpt_mnt, *m, *p;
struct vfspnode *src_pnode, *t_p, *dest_pnode, *tmp_pnode;
@@ -718,7 +749,9 @@ static int attach_recursive_mnt(struct v
mntpt_mnt = nd->mnt;
dest_pnode = IS_MNT_SHARED(mntpt_mnt) ? mntpt_mnt->mnt_pnode : NULL;
- src_pnode = IS_MNT_SHARED(source_mnt) ? source_mnt->mnt_pnode : NULL;
+ src_pnode = IS_MNT_SHARED(source_mnt) ||
+ (move && IS_MNT_SLAVE(source_mnt)) ?
+ source_mnt->mnt_pnode : NULL;
if (!dest_pnode && !src_pnode) {
LIST_HEAD(head);
@@ -739,6 +772,7 @@ static int attach_recursive_mnt(struct v
p = NULL;
for (m = source_mnt; m; m = next_mnt(m, source_mnt)) {
int unclone = IS_MNT_UNCLONE(m);
+ int slave = IS_MNT_SLAVE(m);
list_del_init(&m->mnt_list);
@@ -756,7 +790,7 @@ static int attach_recursive_mnt(struct v
p=m;
dest_pnode = IS_MNT_SHARED(mntpt_mnt) ?
mntpt_mnt->mnt_pnode : NULL;
- src_pnode = (IS_MNT_SHARED(m))?
+ src_pnode = (IS_MNT_SHARED(m) || (move && slave))?
m->mnt_pnode : NULL;
m->mnt_pnode = NULL;
@@ -772,19 +806,35 @@ static int attach_recursive_mnt(struct v
if ((ret = pnode_prepare_mount(dest_pnode, tmp_pnode,
mntpt_dentry, m, mntpt_mnt)))
return ret;
+ if (move && dest_pnode && slave)
+ SET_PNODE_SLAVE(tmp_pnode);
} else {
if (m == m->mnt_parent)
do_attach_prepare_mnt(mntpt_mnt,
mntpt_dentry, m, 0);
- pnode_add_member_mnt(tmp_pnode, m);
- if (unclone) {
- set_mnt_unclone(m);
- m->mnt_pnode = tmp_pnode;
- SET_PNODE_DELETE(tmp_pnode);
- } else if (!src_pnode) {
- set_mnt_private(m);
- m->mnt_pnode = tmp_pnode;
- SET_PNODE_DELETE(tmp_pnode);
+ if (move && slave)
+ pnode_add_slave_mnt(tmp_pnode, m);
+ else {
+ pnode_add_member_mnt(tmp_pnode, m);
+ if (unclone) {
+ BUG_ON(!move);
+ set_mnt_unclone(m);
+ m->mnt_pnode = tmp_pnode;
+ SET_PNODE_DELETE(tmp_pnode);
+ } else if (!src_pnode) {
+ set_mnt_private(m);
+ m->mnt_pnode = tmp_pnode;
+ SET_PNODE_DELETE(tmp_pnode);
+ }
+ /*
+ * NOTE: set_mnt_private() or
+ * set_mnt_unclone() resets the
+ * m->mnt_pnode information.
+ * reinitialize it. This is needed to
+ * decrement the refcount on the
+ * pnode when * the mount 'm' is
+ * unlinked in * pnode_real_mount().
+ */
}
/*
* NOTE: set_mnt_private() or
@@ -809,19 +859,29 @@ static int attach_recursive_mnt(struct v
* new mounts. Merge or delete or slave the temporary pnode
*/
spin_lock(&vfsmount_lock);
- list_for_each_entry_safe(tmp_pnode, t_p, &pnodehead, pnode_peer_slave) {
+ list_for_each_entry_safe(tmp_pnode, t_p, &pnodehead,
+ pnode_peer_slave) {
+
int del_flag = IS_PNODE_DELETE(tmp_pnode);
+ int slave_flag = IS_PNODE_SLAVE(tmp_pnode);
struct vfspnode *master_pnode = tmp_pnode->pnode_master;
+
list_del_init(&tmp_pnode->pnode_peer_slave);
pnode_real_mount(tmp_pnode, del_flag);
+
if (!del_flag && master_pnode) {
tmp_pnode->pnode_master = NULL;
- pnode_merge_pnode(tmp_pnode, master_pnode);
+
+ if (slave_flag)
+ pnode_add_slave_pnode(master_pnode, tmp_pnode);
+ else
+ pnode_merge_pnode(tmp_pnode, master_pnode);
+
/*
* we don't need the extra reference to
* the master_pnode, that was created either
- * (a) pnode_add_slave_pnode: when the mnt was made as
- * a slave mnt.
+ * (a) pnode_add_slave_pnode: when the mnt
+ * was made as a slave mnt.
* (b) pnode_merge_pnode: during clone_mnt().
*/
put_pnode(master_pnode);
@@ -833,6 +893,18 @@ out:
return 0;
}
+static void
+detach_recursive_mnt(struct vfsmount *source_mnt, struct nameidata *nd)
+{
+ struct vfsmount *m;
+
+ detach_mnt(source_mnt, nd);
+ for (m = source_mnt; m; m = next_mnt(m, source_mnt)) {
+ list_del_init(&m->mnt_pnode_mntlist);
+ }
+ return;
+}
+
static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
{
int err, ret;
@@ -859,7 +931,7 @@ static int graft_tree(struct vfsmount *m
ret = (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry));
spin_unlock(&vfsmount_lock);
if (ret)
- err = attach_recursive_mnt(mnt, nd);
+ err = attach_recursive_mnt(mnt, nd, 0);
out_unlock:
up(&nd->dentry->d_inode->i_sem);
if (!err)
@@ -1256,6 +1328,12 @@ static int do_move_mount(struct nameidat
if (S_ISDIR(nd->dentry->d_inode->i_mode) !=
S_ISDIR(old_nd.dentry->d_inode->i_mode))
goto out2;
+ /*
+ * Don't move a mount in a shared parent.
+ */
+ if(old_nd.mnt->mnt_parent &&
+ IS_MNT_SHARED(old_nd.mnt->mnt_parent))
+ goto out2;
err = -ELOOP;
for (p = nd->mnt; p->mnt_parent!=p; p = p->mnt_parent)
@@ -1263,8 +1341,11 @@ static int do_move_mount(struct nameidat
goto out2;
err = 0;
- detach_mnt(old_nd.mnt, &parent_nd);
- attach_mnt(old_nd.mnt, nd);
+ detach_recursive_mnt(old_nd.mnt, &parent_nd);
+ spin_unlock(&vfsmount_lock);
+ err = attach_recursive_mnt(old_nd.mnt, nd, 1);
+ spin_lock(&vfsmount_lock);
+ mntput(old_nd.mnt);
/* if the mount is moved, it should no longer be expire
* automatically */
@@ -1855,6 +1936,16 @@ asmlinkage long sys_pivot_root(const cha
goto out2; /* not a mountpoint */
if (new_nd.mnt->mnt_root != new_nd.dentry)
goto out2; /* not a mountpoint */
+ /*
+ * Don't move a mount in a shared parent.
+ */
+ if(user_nd.mnt->mnt_parent &&
+ IS_MNT_SHARED(user_nd.mnt->mnt_parent))
+ goto out2;
+ if(new_nd.mnt->mnt_parent &&
+ IS_MNT_SHARED(new_nd.mnt->mnt_parent))
+ goto out2;
+
tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */
spin_lock(&vfsmount_lock);
if (tmp != new_nd.mnt) {
@@ -1869,10 +1960,19 @@ asmlinkage long sys_pivot_root(const cha
goto out3;
} else if (!is_subdir(old_nd.dentry, new_nd.dentry))
goto out3;
- detach_mnt(new_nd.mnt, &parent_nd);
- detach_mnt(user_nd.mnt, &root_parent);
- attach_mnt(user_nd.mnt, &old_nd); /* mount old root on put_old */
- attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */
+ detach_recursive_mnt(new_nd.mnt, &parent_nd);
+ detach_recursive_mnt(user_nd.mnt, &root_parent);
+
+ spin_unlock(&vfsmount_lock);
+ error = attach_recursive_mnt(user_nd.mnt, &old_nd, 1);
+ spin_lock(&vfsmount_lock);
+ mntput(user_nd.mnt);
+
+ spin_unlock(&vfsmount_lock);
+ error = attach_recursive_mnt(new_nd.mnt, &root_parent, 1);
+ spin_lock(&vfsmount_lock);
+ mntput(new_nd.mnt);
+
spin_unlock(&vfsmount_lock);
chroot_fs_refs(&user_nd, &new_nd);
security_sb_post_pivotroot(&user_nd, &new_nd);
^ permalink raw reply [flat|nested] 9+ messages in thread* [RFC-2 PATCH 5/8] shared subtree
2005-07-18 6:53 [RFC-2 PATCH 0/8] shared subtree Ram Pai
` (3 preceding siblings ...)
2005-07-18 6:53 ` [RFC-2 PATCH 4/8] " Ram Pai
@ 2005-07-18 6:53 ` Ram Pai
2005-07-18 6:53 ` [RFC-2 PATCH 6/8] " Ram Pai
` (2 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Ram Pai @ 2005-07-18 6:53 UTC (permalink / raw)
To: linux-kernel, linux-fsdevel
Cc: Alexander Viro, mike, Miklos Szeredi, bfields, Andrew Morton,
penberg
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: umount.patch --]
[-- Type: text/x-patch; name=umount.patch, Size: 9039 bytes --]
Adds ability to unmount a shared/slave/unclone/private tree
RP
Signed by Ram Pai (linuxram@us.ibm.com)
fs/namespace.c | 68 +++++++++++++++++++++++++-----
fs/pnode.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/fs.h | 3 +
include/linux/pnode.h | 5 ++
4 files changed, 177 insertions(+), 11 deletions(-)
Index: 2.6.12.work1/fs/pnode.c
===================================================================
--- 2.6.12.work1.orig/fs/pnode.c
+++ 2.6.12.work1/fs/pnode.c
@@ -273,6 +273,117 @@ int pnode_merge_pnode(struct vfspnode *p
return 0;
}
+static int vfs_busy(struct vfsmount *mnt, struct dentry *dentry,
+ struct dentry *rootdentry, struct vfsmount *origmnt)
+{
+ struct vfsmount *child_mnt;
+ int ret=0;
+
+ spin_unlock(&vfsmount_lock);
+ child_mnt = __lookup_mnt(mnt, dentry, rootdentry);
+ spin_lock(&vfsmount_lock);
+
+ if (!child_mnt)
+ return 0;
+
+ if (list_empty(&child_mnt->mnt_mounts)) {
+ if (origmnt == child_mnt)
+ ret = do_refcount_check(child_mnt, 3);
+ else
+ ret = do_refcount_check(child_mnt, 2);
+ }
+ mntput(child_mnt);
+ return ret;
+}
+
+int pnode_mount_busy(struct vfspnode *pnode, struct dentry *mntpt,
+ struct dentry *root, struct vfsmount *mnt)
+{
+ int ret=0;
+ struct vfsmount *slave_mnt, *member_mnt, *t_m;
+ struct pcontext context;
+
+ context.start = pnode;
+ context.pnode = NULL;
+ while (pnode_next(&context)) {
+ pnode = context.pnode;
+
+ // traverse member vfsmounts
+ spin_lock(&vfspnode_lock);
+ list_for_each_entry_safe(member_mnt,
+ t_m, &pnode->pnode_vfs, mnt_pnode_mntlist) {
+ spin_unlock(&vfspnode_lock);
+ if ((ret = vfs_busy(member_mnt, mntpt,
+ root, mnt)))
+ goto out;
+ spin_lock(&vfspnode_lock);
+ }
+ list_for_each_entry_safe(slave_mnt, t_m,
+ &pnode->pnode_slavevfs, mnt_pnode_mntlist) {
+ spin_unlock(&vfspnode_lock);
+ if ((ret = vfs_busy(slave_mnt, mntpt,
+ root, mnt)))
+ goto out;
+ spin_lock(&vfspnode_lock);
+ }
+ spin_unlock(&vfspnode_lock);
+ }
+out:
+ return ret;
+}
+
+int vfs_umount(struct vfsmount *mnt, struct dentry *dentry,
+ struct dentry *rootdentry)
+{
+ struct vfsmount *child_mnt;
+
+ spin_unlock(&vfsmount_lock);
+ child_mnt = __lookup_mnt(mnt, dentry, rootdentry);
+ spin_lock(&vfsmount_lock);
+ mntput(child_mnt);
+ if (child_mnt && list_empty(&child_mnt->mnt_mounts)) {
+ do_detach_mount(child_mnt);
+ if (child_mnt->mnt_pnode)
+ pnode_disassociate_mnt(child_mnt);
+ }
+ return 0;
+}
+
+int pnode_umount(struct vfspnode *pnode, struct dentry *dentry,
+ struct dentry *rootdentry)
+{
+ int ret=0;
+ struct vfsmount *slave_mnt, *member_mnt, *t_m;
+ struct pcontext context;
+
+ context.start = pnode;
+ context.pnode = NULL;
+ while (pnode_next(&context)) {
+ pnode = context.pnode;
+ // traverse member vfsmounts
+ spin_lock(&vfspnode_lock);
+ list_for_each_entry_safe(member_mnt,
+ t_m, &pnode->pnode_vfs, mnt_pnode_mntlist) {
+ spin_unlock(&vfspnode_lock);
+ if ((ret = vfs_umount(member_mnt,
+ dentry, rootdentry)))
+ goto out;
+ spin_lock(&vfspnode_lock);
+ }
+ list_for_each_entry_safe(slave_mnt, t_m,
+ &pnode->pnode_slavevfs, mnt_pnode_mntlist) {
+ spin_unlock(&vfspnode_lock);
+ if ((ret = vfs_umount(slave_mnt,
+ dentry, rootdentry)))
+ goto out;
+ spin_lock(&vfspnode_lock);
+ }
+ spin_unlock(&vfspnode_lock);
+ }
+out:
+ return ret;
+}
+
/*
* @pnode: pnode that contains the vfsmounts, on which the
* new mount is created at dentry 'dentry'
@@ -532,6 +643,7 @@ int pnode_real_mount(struct vfspnode *pn
if ((ret = vfs_real_mount_func(member_mnt,
flag)))
goto out;
+ spin_lock(&vfspnode_lock);
}
list_for_each_entry_safe(slave_mnt, t_m,
&pnode->pnode_slavevfs, mnt_pnode_mntlist) {
Index: 2.6.12.work1/fs/namespace.c
===================================================================
--- 2.6.12.work1.orig/fs/namespace.c
+++ 2.6.12.work1/fs/namespace.c
@@ -352,6 +352,7 @@ struct seq_operations mounts_op = {
* open files, pwds, chroots or sub mounts that are
* busy.
*/
+//TOBEFIXED
int may_umount_tree(struct vfsmount *mnt)
{
struct list_head *next;
@@ -394,6 +395,20 @@ resume:
EXPORT_SYMBOL(may_umount_tree);
+int mount_busy(struct vfsmount *mnt)
+{
+ struct vfspnode *parent_pnode;
+
+ if (mnt == mnt->mnt_parent || !IS_MNT_SHARED(mnt->mnt_parent))
+ return do_refcount_check(mnt, 2);
+
+ parent_pnode = mnt->mnt_parent->mnt_pnode;
+ BUG_ON(!parent_pnode);
+ return pnode_mount_busy(parent_pnode,
+ mnt->mnt_mountpoint,
+ mnt->mnt_root, mnt);
+}
+
/**
* may_umount - check if a mount point is busy
* @mnt: root of mount
@@ -409,13 +424,27 @@ EXPORT_SYMBOL(may_umount_tree);
*/
int may_umount(struct vfsmount *mnt)
{
- if (atomic_read(&mnt->mnt_count) > 2)
+ if (mount_busy(mnt))
return -EBUSY;
return 0;
}
EXPORT_SYMBOL(may_umount);
+void do_detach_mount(struct vfsmount *mnt)
+{
+ struct nameidata old_nd;
+ if (mnt != mnt->mnt_parent) {
+ detach_mnt(mnt, &old_nd);
+ path_release(&old_nd);
+ }
+ list_del_init(&mnt->mnt_list);
+ list_del_init(&mnt->mnt_fslink);
+ spin_unlock(&vfsmount_lock);
+ mntput(mnt);
+ spin_lock(&vfsmount_lock);
+}
+
void umount_tree(struct vfsmount *mnt)
{
struct vfsmount *p;
@@ -430,20 +459,35 @@ void umount_tree(struct vfsmount *mnt)
mnt = list_entry(kill.next, struct vfsmount, mnt_list);
list_del_init(&mnt->mnt_list);
list_del_init(&mnt->mnt_fslink);
- if (mnt->mnt_parent == mnt) {
- spin_unlock(&vfsmount_lock);
+ if (mnt->mnt_parent != mnt &&
+ IS_MNT_SHARED(mnt->mnt_parent)) {
+ struct vfspnode *parent_pnode
+ = mnt->mnt_parent->mnt_pnode;
+ BUG_ON(!parent_pnode);
+ pnode_umount(parent_pnode,
+ mnt->mnt_mountpoint,
+ mnt->mnt_root);
} else {
- struct nameidata old_nd;
- detach_mnt(mnt, &old_nd);
- spin_unlock(&vfsmount_lock);
- path_release(&old_nd);
+ if (IS_MNT_SHARED(mnt) || IS_MNT_SLAVE(mnt)) {
+ BUG_ON(!mnt->mnt_pnode);
+ pnode_disassociate_mnt(mnt);
+ }
+ do_detach_mount(mnt);
}
- mntput(mnt);
- spin_lock(&vfsmount_lock);
}
}
-static int do_umount(struct vfsmount *mnt, int flags)
+/*
+ * return true if the refcount is greater than count
+ */
+int do_refcount_check(struct vfsmount *mnt, int count)
+{
+
+ int mycount = atomic_read(&mnt->mnt_count);
+ return (mycount > count);
+}
+
+int do_umount(struct vfsmount *mnt, int flags)
{
struct super_block * sb = mnt->mnt_sb;
int retval;
@@ -524,7 +568,7 @@ static int do_umount(struct vfsmount *mn
spin_lock(&vfsmount_lock);
}
retval = -EBUSY;
- if (atomic_read(&mnt->mnt_count) == 2 || flags & MNT_DETACH) {
+ if (flags & MNT_DETACH || !mount_busy(mnt)) {
if (!list_empty(&mnt->mnt_list))
umount_tree(mnt);
retval = 0;
@@ -900,7 +944,9 @@ detach_recursive_mnt(struct vfsmount *so
detach_mnt(source_mnt, nd);
for (m = source_mnt; m; m = next_mnt(m, source_mnt)) {
+ spin_lock(&vfspnode_lock);
list_del_init(&m->mnt_pnode_mntlist);
+ spin_unlock(&vfspnode_lock);
}
return;
}
Index: 2.6.12.work1/include/linux/fs.h
===================================================================
--- 2.6.12.work1.orig/include/linux/fs.h
+++ 2.6.12.work1/include/linux/fs.h
@@ -1216,11 +1216,14 @@ extern struct vfsmount *kern_mount(struc
extern int may_umount_tree(struct vfsmount *);
extern int may_umount(struct vfsmount *);
extern long do_mount(char *, char *, char *, unsigned long, void *);
+extern int do_umount(struct vfsmount *, int);
extern struct vfsmount *do_attach_prepare_mnt(struct vfsmount *,
struct dentry *, struct vfsmount *, int );
extern void do_attach_real_mnt(struct vfsmount *);
extern struct vfsmount *do_make_mounted(struct vfsmount *, struct dentry *);
extern int do_make_unmounted(struct vfsmount *);
+extern void do_detach_mount(struct vfsmount *);
+extern int do_refcount_check(struct vfsmount *, int );
extern int vfs_statfs(struct super_block *, struct kstatfs *);
Index: 2.6.12.work1/include/linux/pnode.h
===================================================================
--- 2.6.12.work1.orig/include/linux/pnode.h
+++ 2.6.12.work1/include/linux/pnode.h
@@ -70,6 +70,9 @@ put_pnode_locked(struct vfspnode *pnode)
void __init pnode_init(unsigned long );
struct vfspnode * pnode_alloc(void);
+void pnode_free(struct vfspnode *);
+int pnode_is_busy(struct vfspnode *);
+int pnode_umount_vfs(struct vfspnode *, struct dentry *, struct dentry *, int);
void pnode_add_slave_mnt(struct vfspnode *, struct vfsmount *);
void pnode_add_member_mnt(struct vfspnode *, struct vfsmount *);
void pnode_del_slave_mnt(struct vfsmount *);
@@ -82,4 +85,6 @@ int pnode_make_unmounted(struct vfspnod
int pnode_prepare_mount(struct vfspnode *, struct vfspnode *, struct dentry *,
struct vfsmount *, struct vfsmount *);
int pnode_real_mount(struct vfspnode *, int);
+int pnode_umount(struct vfspnode *, struct dentry *, struct dentry *);
+int pnode_mount_busy(struct vfspnode *, struct dentry *, struct dentry *, struct vfsmount *);
#endif /* _LINUX_PNODE_H */
^ permalink raw reply [flat|nested] 9+ messages in thread* [RFC-2 PATCH 6/8] shared subtree
2005-07-18 6:53 [RFC-2 PATCH 0/8] shared subtree Ram Pai
` (4 preceding siblings ...)
2005-07-18 6:53 ` [RFC-2 PATCH 5/8] " Ram Pai
@ 2005-07-18 6:53 ` Ram Pai
2005-07-18 6:53 ` [RFC-2 PATCH 7/8] " Ram Pai
2005-07-18 6:53 ` [RFC-2 PATCH 8/8] " Ram Pai
7 siblings, 0 replies; 9+ messages in thread
From: Ram Pai @ 2005-07-18 6:53 UTC (permalink / raw)
To: linux-kernel, linux-fsdevel
Cc: Alexander Viro, mike, Miklos Szeredi, bfields, Andrew Morton,
penberg
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: namespace.patch --]
[-- Type: text/x-patch; name=namespace.patch, Size: 976 bytes --]
Adds ability to clone a namespace that has shared/private/slave/unclone
subtrees in it.
RP
Signed by Ram Pai (linuxram@us.ibm.com)
fs/namespace.c | 9 +++++++++
1 files changed, 9 insertions(+)
Index: 2.6.12.work1/fs/namespace.c
===================================================================
--- 2.6.12.work1.orig/fs/namespace.c
+++ 2.6.12.work1/fs/namespace.c
@@ -1763,6 +1763,13 @@ int copy_namespace(int flags, struct tas
q = new_ns->root;
while (p) {
q->mnt_namespace = new_ns;
+
+ if (IS_MNT_SHARED(q))
+ pnode_add_member_mnt(q->mnt_pnode, q);
+ else if (IS_MNT_SLAVE(q))
+ pnode_add_slave_mnt(q->mnt_pnode, q);
+ put_pnode(q->mnt_pnode);
+
if (fs) {
if (p == fs->rootmnt) {
rootmnt = p;
@@ -2129,6 +2136,8 @@ void __put_namespace(struct namespace *n
spin_lock(&vfsmount_lock);
list_for_each_entry(mnt, &namespace->list, mnt_list) {
+ if (mnt->mnt_pnode)
+ pnode_disassociate_mnt(mnt);
mnt->mnt_namespace = NULL;
}
^ permalink raw reply [flat|nested] 9+ messages in thread* [RFC-2 PATCH 7/8] shared subtree
2005-07-18 6:53 [RFC-2 PATCH 0/8] shared subtree Ram Pai
` (5 preceding siblings ...)
2005-07-18 6:53 ` [RFC-2 PATCH 6/8] " Ram Pai
@ 2005-07-18 6:53 ` Ram Pai
2005-07-18 6:53 ` [RFC-2 PATCH 8/8] " Ram Pai
7 siblings, 0 replies; 9+ messages in thread
From: Ram Pai @ 2005-07-18 6:53 UTC (permalink / raw)
To: linux-kernel, linux-fsdevel
Cc: Alexander Viro, mike, Miklos Szeredi, bfields, Andrew Morton,
penberg
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: automount.patch --]
[-- Type: text/x-patch; name=automount.patch, Size: 9034 bytes --]
adds support for mount/umount propogation for autofs initiated operations,
RP
fs/namespace.c | 151 +++++++++++++++++---------------------------------
fs/pnode.c | 13 ++--
include/linux/pnode.h | 3
3 files changed, 61 insertions(+), 106 deletions(-)
Index: 2.6.12.work1/fs/namespace.c
===================================================================
--- 2.6.12.work1.orig/fs/namespace.c
+++ 2.6.12.work1/fs/namespace.c
@@ -215,6 +215,9 @@ struct vfsmount *do_attach_prepare_mnt(s
if(!(child_mnt = clone_mnt(template_mnt,
template_mnt->mnt_root)))
return NULL;
+ spin_lock(&vfsmount_lock);
+ list_del_init(&child_mnt->mnt_fslink);
+ spin_unlock(&vfsmount_lock);
} else
child_mnt = template_mnt;
@@ -352,38 +355,16 @@ struct seq_operations mounts_op = {
* open files, pwds, chroots or sub mounts that are
* busy.
*/
-//TOBEFIXED
int may_umount_tree(struct vfsmount *mnt)
{
- struct list_head *next;
- struct vfsmount *this_parent = mnt;
- int actual_refs;
- int minimum_refs;
+ int actual_refs=0;
+ int minimum_refs=0;
+ struct vfsmount *p;
spin_lock(&vfsmount_lock);
- actual_refs = atomic_read(&mnt->mnt_count);
- minimum_refs = 2;
-repeat:
- next = this_parent->mnt_mounts.next;
-resume:
- while (next != &this_parent->mnt_mounts) {
- struct vfsmount *p = list_entry(next, struct vfsmount, mnt_child);
-
- next = next->next;
-
+ for (p = mnt; p; p = next_mnt(p, mnt)) {
actual_refs += atomic_read(&p->mnt_count);
minimum_refs += 2;
-
- if (!list_empty(&p->mnt_mounts)) {
- this_parent = p;
- goto repeat;
- }
- }
-
- if (this_parent != mnt) {
- next = this_parent->mnt_child.next;
- this_parent = this_parent->mnt_parent;
- goto resume;
}
spin_unlock(&vfsmount_lock);
@@ -395,18 +376,18 @@ resume:
EXPORT_SYMBOL(may_umount_tree);
-int mount_busy(struct vfsmount *mnt)
+int mount_busy(struct vfsmount *mnt, int refcnt)
{
struct vfspnode *parent_pnode;
if (mnt == mnt->mnt_parent || !IS_MNT_SHARED(mnt->mnt_parent))
- return do_refcount_check(mnt, 2);
+ return do_refcount_check(mnt, refcnt);
parent_pnode = mnt->mnt_parent->mnt_pnode;
BUG_ON(!parent_pnode);
return pnode_mount_busy(parent_pnode,
mnt->mnt_mountpoint,
- mnt->mnt_root, mnt);
+ mnt->mnt_root, mnt, refcnt);
}
/**
@@ -424,7 +405,7 @@ int mount_busy(struct vfsmount *mnt)
*/
int may_umount(struct vfsmount *mnt)
{
- if (mount_busy(mnt))
+ if (mount_busy(mnt, 2))
return -EBUSY;
return 0;
}
@@ -445,6 +426,25 @@ void do_detach_mount(struct vfsmount *mn
spin_lock(&vfsmount_lock);
}
+void umount_mnt(struct vfsmount *mnt)
+{
+ if (mnt->mnt_parent != mnt &&
+ IS_MNT_SHARED(mnt->mnt_parent)) {
+ struct vfspnode *parent_pnode
+ = mnt->mnt_parent->mnt_pnode;
+ BUG_ON(!parent_pnode);
+ pnode_umount(parent_pnode,
+ mnt->mnt_mountpoint,
+ mnt->mnt_root);
+ } else {
+ if (IS_MNT_SHARED(mnt) || IS_MNT_SLAVE(mnt)) {
+ BUG_ON(!mnt->mnt_pnode);
+ pnode_disassociate_mnt(mnt);
+ }
+ do_detach_mount(mnt);
+ }
+}
+
void umount_tree(struct vfsmount *mnt)
{
struct vfsmount *p;
@@ -459,21 +459,7 @@ void umount_tree(struct vfsmount *mnt)
mnt = list_entry(kill.next, struct vfsmount, mnt_list);
list_del_init(&mnt->mnt_list);
list_del_init(&mnt->mnt_fslink);
- if (mnt->mnt_parent != mnt &&
- IS_MNT_SHARED(mnt->mnt_parent)) {
- struct vfspnode *parent_pnode
- = mnt->mnt_parent->mnt_pnode;
- BUG_ON(!parent_pnode);
- pnode_umount(parent_pnode,
- mnt->mnt_mountpoint,
- mnt->mnt_root);
- } else {
- if (IS_MNT_SHARED(mnt) || IS_MNT_SLAVE(mnt)) {
- BUG_ON(!mnt->mnt_pnode);
- pnode_disassociate_mnt(mnt);
- }
- do_detach_mount(mnt);
- }
+ umount_mnt(mnt);
}
}
@@ -568,7 +554,7 @@ int do_umount(struct vfsmount *mnt, int
spin_lock(&vfsmount_lock);
}
retval = -EBUSY;
- if (flags & MNT_DETACH || !mount_busy(mnt)) {
+ if (flags & MNT_DETACH || !mount_busy(mnt, 2)) {
if (!list_empty(&mnt->mnt_list))
umount_tree(mnt);
retval = 0;
@@ -1490,6 +1476,8 @@ void mark_mounts_for_expiry(struct list_
if (list_empty(mounts))
return;
+ down_write(&namespace_sem);
+
spin_lock(&vfsmount_lock);
/* extract from the expiration list every vfsmount that matches the
@@ -1499,8 +1487,7 @@ void mark_mounts_for_expiry(struct list_
* cleared by mntput())
*/
list_for_each_entry_safe(mnt, next, mounts, mnt_fslink) {
- if (!xchg(&mnt->mnt_expiry_mark, 1) ||
- atomic_read(&mnt->mnt_count) != 1)
+ if (!xchg(&mnt->mnt_expiry_mark, 1) || mount_busy(mnt, 1))
continue;
mntget(mnt);
@@ -1508,12 +1495,13 @@ void mark_mounts_for_expiry(struct list_
}
/*
- * go through the vfsmounts we've just consigned to the graveyard to
- * - check that they're still dead
+ * go through the vfsmounts we've just consigned to the graveyard
* - delete the vfsmount from the appropriate namespace under lock
* - dispose of the corpse
*/
while (!list_empty(&graveyard)) {
+ struct super_block *sb;
+
mnt = list_entry(graveyard.next, struct vfsmount, mnt_fslink);
list_del_init(&mnt->mnt_fslink);
@@ -1524,60 +1512,25 @@ void mark_mounts_for_expiry(struct list_
continue;
get_namespace(namespace);
- spin_unlock(&vfsmount_lock);
- down_write(&namespace_sem);
- spin_lock(&vfsmount_lock);
-
- /* check that it is still dead: the count should now be 2 - as
- * contributed by the vfsmount parent and the mntget above */
- if (atomic_read(&mnt->mnt_count) == 2) {
- struct vfsmount *xdmnt;
- struct dentry *xdentry;
-
- /* delete from the namespace */
- list_del_init(&mnt->mnt_list);
- list_del_init(&mnt->mnt_child);
- list_del_init(&mnt->mnt_hash);
- mnt->mnt_mountpoint->d_mounted--;
-
- xdentry = mnt->mnt_mountpoint;
- mnt->mnt_mountpoint = mnt->mnt_root;
- xdmnt = mnt->mnt_parent;
- mnt->mnt_parent = mnt;
-
- spin_unlock(&vfsmount_lock);
-
- mntput(xdmnt);
- dput(xdentry);
-
- /* now lay it to rest if this was the last ref on the
- * superblock */
- if (atomic_read(&mnt->mnt_sb->s_active) == 1) {
- /* last instance - try to be smart */
- lock_kernel();
- DQUOT_OFF(mnt->mnt_sb);
- acct_auto_close(mnt->mnt_sb);
- unlock_kernel();
- }
-
- mntput(mnt);
- } else {
- /* someone brought it back to life whilst we didn't
- * have any locks held so return it to the expiration
- * list */
- list_add_tail(&mnt->mnt_fslink, mounts);
- spin_unlock(&vfsmount_lock);
+ sb = mnt->mnt_sb;
+ umount_mnt(mnt);
+ /*
+ * now lay it to rest if this was the last ref on the
+ * superblock
+ */
+ if (atomic_read(&sb->s_active) == 1) {
+ /* last instance - try to be smart */
+ lock_kernel();
+ DQUOT_OFF(sb);
+ acct_auto_close(sb);
+ unlock_kernel();
}
-
- up_write(&namespace_sem);
-
mntput(mnt);
- put_namespace(namespace);
- spin_lock(&vfsmount_lock);
+ put_namespace(namespace);
}
-
spin_unlock(&vfsmount_lock);
+ up_write(&namespace_sem);
}
EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
Index: 2.6.12.work1/fs/pnode.c
===================================================================
--- 2.6.12.work1.orig/fs/pnode.c
+++ 2.6.12.work1/fs/pnode.c
@@ -34,7 +34,7 @@ enum pnode_vfs_type {
static kmem_cache_t * pnode_cachep;
/* spinlock for pnode related operations */
- __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfspnode_lock);
+ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfspnode_lock);
void __init pnode_init(unsigned long mempages)
@@ -274,7 +274,8 @@ int pnode_merge_pnode(struct vfspnode *p
}
static int vfs_busy(struct vfsmount *mnt, struct dentry *dentry,
- struct dentry *rootdentry, struct vfsmount *origmnt)
+ struct dentry *rootdentry, struct vfsmount *origmnt,
+ int refcnt)
{
struct vfsmount *child_mnt;
int ret=0;
@@ -288,16 +289,16 @@ static int vfs_busy(struct vfsmount *mnt
if (list_empty(&child_mnt->mnt_mounts)) {
if (origmnt == child_mnt)
- ret = do_refcount_check(child_mnt, 3);
- else
- ret = do_refcount_check(child_mnt, 2);
+ ret = do_refcount_check(child_mnt, refcnt+1);
+ else
+ ret = do_refcount_check(child_mnt, refcnt);
}
mntput(child_mnt);
return ret;
}
int pnode_mount_busy(struct vfspnode *pnode, struct dentry *mntpt,
- struct dentry *root, struct vfsmount *mnt)
+ struct dentry *root, struct vfsmount *mnt, int refcnt)
{
int ret=0;
struct vfsmount *slave_mnt, *member_mnt, *t_m;
Index: 2.6.12.work1/include/linux/pnode.h
===================================================================
--- 2.6.12.work1.orig/include/linux/pnode.h
+++ 2.6.12.work1/include/linux/pnode.h
@@ -86,5 +86,6 @@ int pnode_prepare_mount(struct vfspnode
struct vfsmount *, struct vfsmount *);
int pnode_real_mount(struct vfspnode *, int);
int pnode_umount(struct vfspnode *, struct dentry *, struct dentry *);
-int pnode_mount_busy(struct vfspnode *, struct dentry *, struct dentry *, struct vfsmount *);
+int pnode_mount_busy(struct vfspnode *, struct dentry *, struct dentry *,
+ struct vfsmount *, int);
#endif /* _LINUX_PNODE_H */
^ permalink raw reply [flat|nested] 9+ messages in thread* [RFC-2 PATCH 8/8] shared subtree
2005-07-18 6:53 [RFC-2 PATCH 0/8] shared subtree Ram Pai
` (6 preceding siblings ...)
2005-07-18 6:53 ` [RFC-2 PATCH 7/8] " Ram Pai
@ 2005-07-18 6:53 ` Ram Pai
7 siblings, 0 replies; 9+ messages in thread
From: Ram Pai @ 2005-07-18 6:53 UTC (permalink / raw)
To: linux-kernel, linux-fsdevel
Cc: Alexander Viro, mike, Miklos Szeredi, bfields, Andrew Morton,
penberg
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: pnode_opt.patch --]
[-- Type: text/x-patch; name=pnode_opt.patch, Size: 17225 bytes --]
code Optimization for pnode.c
fs/pnode.c | 478 ++++++++++++++++++++++++++++---------------------------------
1 files changed, 224 insertions(+), 254 deletions(-)
Index: 2.6.12.work1/fs/pnode.c
===================================================================
--- 2.6.12.work1.orig/fs/pnode.c
+++ 2.6.12.work1/fs/pnode.c
@@ -26,6 +26,7 @@
#include <asm/unistd.h>
#include <stdarg.h>
+
enum pnode_vfs_type {
PNODE_MEMBER_VFS = 0x01,
PNODE_SLAVE_VFS = 0x02
@@ -34,7 +35,7 @@ enum pnode_vfs_type {
static kmem_cache_t * pnode_cachep;
/* spinlock for pnode related operations */
- __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfspnode_lock);
+__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfspnode_lock);
void __init pnode_init(unsigned long mempages)
@@ -58,7 +59,7 @@ struct vfspnode * pnode_alloc(void)
return pnode;
}
-void pnode_free(struct vfspnode *pnode)
+void inline pnode_free(struct vfspnode *pnode)
{
kmem_cache_free(pnode_cachep, pnode);
}
@@ -147,7 +148,6 @@ static int pnode_next(struct pcontext *c
return 1;
}
-
static void inline pnode_add_mnt(struct vfspnode *pnode,
struct vfsmount *mnt, int slave)
{
@@ -180,6 +180,111 @@ void pnode_add_slave_mnt(struct vfspnode
pnode_add_mnt(pnode, mnt, 1);
}
+/*
+ * traverse the pnode tree and at each pnode encountered, execute the
+ * pnode_fnc(). For each vfsmount encountered call the vfs_fnc().
+ *
+ * @pnode: pnode tree to be traversed
+ * @in_data: input data
+ * @out_data: output data
+ * @pnode_func: function to be called when a new pnode is encountered.
+ * @vfs_func: function to be called on each slave and member vfs belonging
+ * to the pnode.
+ */
+static int pnode_traverse(struct vfspnode *pnode,
+ void *in_data,
+ void **out_data,
+ int (*pnode_pre_func)(struct vfspnode *,
+ void *, void **, va_list),
+ int (*pnode_post_func)(struct vfspnode *,
+ void *, va_list),
+ int (*vfs_func)(struct vfsmount *,
+ enum pnode_vfs_type, void *, va_list),
+ ...)
+{
+ va_list args;
+ int ret = 0, level;
+ void *my_data, *data_from_master;
+ struct vfspnode *master_pnode;
+ struct vfsmount *slave_mnt, *member_mnt, *t_m;
+ struct pcontext context;
+ static void *p_array[PNODE_MAX_SLAVE_LEVEL];
+
+ context.start = pnode;
+ context.pnode = NULL;
+ /*
+ * determine whether to process vfs first or the
+ * slave pnode first
+ */
+ while (pnode_next(&context)) {
+ level = context.level;
+ pnode = context.pnode;
+ master_pnode = context.master_pnode;
+
+ if (master_pnode) {
+ data_from_master = p_array[level-1];
+ my_data = NULL;
+ } else {
+ data_from_master = NULL;
+ my_data = in_data;
+ }
+
+ if (pnode_pre_func) {
+ va_start(args, vfs_func);
+ if((ret = pnode_pre_func(pnode,
+ data_from_master, &my_data, args)))
+ goto error;
+ va_end(args);
+ }
+
+ // traverse member vfsmounts
+ spin_lock(&vfspnode_lock);
+ list_for_each_entry_safe(member_mnt,
+ t_m, &pnode->pnode_vfs, mnt_pnode_mntlist) {
+
+ spin_unlock(&vfspnode_lock);
+ va_start(args, vfs_func);
+ if ((ret = vfs_func(member_mnt,
+ PNODE_MEMBER_VFS, my_data, args)))
+ goto error;
+ va_end(args);
+ spin_lock(&vfspnode_lock);
+ }
+ list_for_each_entry_safe(slave_mnt, t_m,
+ &pnode->pnode_slavevfs, mnt_pnode_mntlist) {
+
+ spin_unlock(&vfspnode_lock);
+ va_start(args, vfs_func);
+ if ((ret = vfs_func(slave_mnt, PNODE_SLAVE_VFS,
+ my_data, args)))
+ goto error;
+ va_end(args);
+ spin_lock(&vfspnode_lock);
+ }
+ spin_unlock(&vfspnode_lock);
+
+ if (pnode_post_func) {
+ va_start(args, vfs_func);
+ if((ret = pnode_post_func(pnode,
+ my_data, args)))
+ goto error;
+ va_end(args);
+ }
+
+ p_array[level] = my_data;
+ }
+out:
+ if (out_data)
+ *out_data = p_array[0];
+ return ret;
+error:
+ va_end(args);
+ if (out_data)
+ *out_data = NULL;
+ goto out;
+}
+
+
void pnode_add_slave_pnode(struct vfspnode *pnode,
struct vfspnode *slave_pnode)
{
@@ -219,6 +324,7 @@ void pnode_del_member_mnt(struct vfsmoun
CLEAR_MNT_SHARED(mnt);
}
+
void pnode_disassociate_mnt(struct vfsmount *mnt)
{
if (!mnt)
@@ -228,6 +334,7 @@ void pnode_disassociate_mnt(struct vfsmo
CLEAR_MNT_SLAVE(mnt);
}
+
// merge pnode into peer_pnode and get rid of pnode
int pnode_merge_pnode(struct vfspnode *pnode, struct vfspnode *peer_pnode)
{
@@ -268,15 +375,18 @@ int pnode_merge_pnode(struct vfspnode *p
* with some cost.
*/
for (i=0 ; i <count; i++)
- put_pnode_unlocked(pnode);
+ put_pnode_locked(pnode);
spin_unlock(&vfspnode_lock);
return 0;
}
-static int vfs_busy(struct vfsmount *mnt, struct dentry *dentry,
- struct dentry *rootdentry, struct vfsmount *origmnt,
- int refcnt)
+static int vfs_busy(struct vfsmount *mnt, enum pnode_vfs_type flag,
+ void *indata, va_list args)
{
+ struct dentry *dentry = va_arg(args, struct dentry *);
+ struct dentry *rootdentry = va_arg(args, struct dentry *);
+ struct vfsmount *origmnt = va_arg(args, struct vfsmount *);
+ int refcnt = va_arg(args, int);
struct vfsmount *child_mnt;
int ret=0;
@@ -290,7 +400,7 @@ static int vfs_busy(struct vfsmount *mnt
if (list_empty(&child_mnt->mnt_mounts)) {
if (origmnt == child_mnt)
ret = do_refcount_check(child_mnt, refcnt+1);
- else
+ else
ret = do_refcount_check(child_mnt, refcnt);
}
mntput(child_mnt);
@@ -300,52 +410,32 @@ static int vfs_busy(struct vfsmount *mnt
int pnode_mount_busy(struct vfspnode *pnode, struct dentry *mntpt,
struct dentry *root, struct vfsmount *mnt, int refcnt)
{
- int ret=0;
- struct vfsmount *slave_mnt, *member_mnt, *t_m;
- struct pcontext context;
-
- context.start = pnode;
- context.pnode = NULL;
- while (pnode_next(&context)) {
- pnode = context.pnode;
-
- // traverse member vfsmounts
- spin_lock(&vfspnode_lock);
- list_for_each_entry_safe(member_mnt,
- t_m, &pnode->pnode_vfs, mnt_pnode_mntlist) {
- spin_unlock(&vfspnode_lock);
- if ((ret = vfs_busy(member_mnt, mntpt,
- root, mnt)))
- goto out;
- spin_lock(&vfspnode_lock);
- }
- list_for_each_entry_safe(slave_mnt, t_m,
- &pnode->pnode_slavevfs, mnt_pnode_mntlist) {
- spin_unlock(&vfspnode_lock);
- if ((ret = vfs_busy(slave_mnt, mntpt,
- root, mnt)))
- goto out;
- spin_lock(&vfspnode_lock);
- }
- spin_unlock(&vfspnode_lock);
- }
-out:
- return ret;
+ return pnode_traverse(pnode, NULL, NULL,
+ NULL, NULL, vfs_busy, mntpt, root, mnt, refcnt);
}
-int vfs_umount(struct vfsmount *mnt, struct dentry *dentry,
- struct dentry *rootdentry)
+
+int vfs_umount(struct vfsmount *mnt, enum pnode_vfs_type flag,
+ void *indata, va_list args)
{
struct vfsmount *child_mnt;
+ struct dentry *dentry, *rootdentry;
+
+
+ dentry = va_arg(args, struct dentry *);
+ rootdentry = va_arg(args, struct dentry *);
spin_unlock(&vfsmount_lock);
child_mnt = __lookup_mnt(mnt, dentry, rootdentry);
spin_lock(&vfsmount_lock);
mntput(child_mnt);
if (child_mnt && list_empty(&child_mnt->mnt_mounts)) {
- do_detach_mount(child_mnt);
- if (child_mnt->mnt_pnode)
+ if (IS_MNT_SHARED(child_mnt) ||
+ IS_MNT_SLAVE(child_mnt)) {
+ BUG_ON(!child_mnt->mnt_pnode);
pnode_disassociate_mnt(child_mnt);
+ }
+ do_detach_mount(child_mnt);
}
return 0;
}
@@ -353,34 +443,54 @@ int vfs_umount(struct vfsmount *mnt, str
int pnode_umount(struct vfspnode *pnode, struct dentry *dentry,
struct dentry *rootdentry)
{
+ return pnode_traverse(pnode, NULL, (void *)NULL,
+ NULL, NULL, vfs_umount, dentry, rootdentry);
+}
+
+
+int pnode_mount_func(struct vfspnode *pnode, void *indata,
+ void **outdata, va_list args)
+{
+ struct vfspnode *pnode_slave, *pnode_master;
int ret=0;
- struct vfsmount *slave_mnt, *member_mnt, *t_m;
- struct pcontext context;
- context.start = pnode;
- context.pnode = NULL;
- while (pnode_next(&context)) {
- pnode = context.pnode;
- // traverse member vfsmounts
- spin_lock(&vfspnode_lock);
- list_for_each_entry_safe(member_mnt,
- t_m, &pnode->pnode_vfs, mnt_pnode_mntlist) {
- spin_unlock(&vfspnode_lock);
- if ((ret = vfs_umount(member_mnt,
- dentry, rootdentry)))
- goto out;
- spin_lock(&vfspnode_lock);
- }
- list_for_each_entry_safe(slave_mnt, t_m,
- &pnode->pnode_slavevfs, mnt_pnode_mntlist) {
- spin_unlock(&vfspnode_lock);
- if ((ret = vfs_umount(slave_mnt,
- dentry, rootdentry)))
- goto out;
- spin_lock(&vfspnode_lock);
- }
- spin_unlock(&vfspnode_lock);
+ pnode_master = indata;
+
+ if (*outdata)
+ pnode_slave = *outdata;
+ else if (!(pnode_slave = pnode_alloc()))
+ return -ENOMEM;
+
+ *outdata = pnode_slave;
+
+ if (pnode_slave && pnode_master)
+ pnode_add_slave_pnode(pnode_master, pnode_slave);
+ return ret;
+}
+
+int vfs_make_mounted_func(struct vfsmount *mnt, enum pnode_vfs_type flag,
+ void *indata, va_list args)
+{
+ struct dentry *target_dentry;
+ int ret=0;
+ struct vfsmount *child_mount;
+ struct vfspnode *pnode;
+
+ target_dentry = va_arg(args, struct dentry *);
+ if (!(child_mount = do_make_mounted(mnt, target_dentry))) {
+ ret = -ENOMEM;
+ goto out;
}
+ pnode = (struct vfspnode *)indata;
+ switch (flag) {
+ case PNODE_SLAVE_VFS :
+ pnode_add_slave_mnt(pnode, child_mount);
+ break;
+ case PNODE_MEMBER_VFS :
+ pnode_add_member_mnt(pnode, child_mount);
+ break;
+ }
+
out:
return ret;
}
@@ -399,72 +509,17 @@ struct vfsmount *pnode_make_mounted(stru
struct vfsmount *mnt, struct dentry *dentry)
{
struct vfsmount *child_mnt;
- int ret=0, level;
- struct vfspnode *master_pnode;
- struct vfspnode *child_pnode, *master_child_pnode;
- struct vfsmount *slave_mnt, *member_mnt, *t_m;
- struct pcontext context;
- static struct vfspnode *p_array[PNODE_MAX_SLAVE_LEVEL];
-
- context.start = pnode;
- context.pnode = NULL;
-
- while (pnode_next(&context)) {
- level = context.level;
- pnode = context.pnode;
- master_pnode = context.master_pnode;
-
- if (master_pnode)
- master_child_pnode = p_array[level-1];
- else
- master_child_pnode = NULL;
-
- if (!(child_pnode = pnode_alloc())) {
- ret = -ENOMEM;
- goto out;
- }
-
- if (child_pnode && master_child_pnode)
- pnode_add_slave_pnode(master_child_pnode,
- child_pnode);
-
- p_array[level] = child_pnode;
-
- spin_lock(&vfspnode_lock);
- list_for_each_entry_safe(member_mnt,
- t_m, &pnode->pnode_vfs, mnt_pnode_mntlist) {
- spin_unlock(&vfspnode_lock);
- if (!(child_mnt = do_make_mounted(member_mnt,
- dentry))) {
- ret = -ENOMEM;
- goto out;
- }
- spin_lock(&vfspnode_lock);
- pnode_add_member_mnt(child_pnode, child_mnt);
- }
- list_for_each_entry_safe(slave_mnt, t_m,
- &pnode->pnode_slavevfs, mnt_pnode_mntlist) {
- spin_unlock(&vfspnode_lock);
- if (!(child_mnt = do_make_mounted(slave_mnt,
- dentry))) {
- ret = -ENOMEM;
- goto out;
- }
- spin_lock(&vfspnode_lock);
- pnode_add_slave_mnt(child_pnode, child_mnt);
- }
- spin_unlock(&vfspnode_lock);
- }
-out:
- if (ret)
- return NULL;
+ if(pnode_traverse(pnode, NULL, (void *)NULL,
+ pnode_mount_func, NULL, vfs_make_mounted_func,
+ (void *)dentry))
+ return NULL;
child_mnt = __lookup_mnt(mnt, dentry, dentry);
mntput(child_mnt);
return child_mnt;
}
-int vfs_make_unmounted(struct vfsmount *mnt)
+int vfs_make_unmounted_func(struct vfsmount *mnt)
{
struct vfspnode *pnode;
int ret=0;
@@ -473,58 +528,36 @@ int vfs_make_unmounted(struct vfsmount *
ret = 1;
goto out;
}
+
pnode = mnt->mnt_pnode;
+ spin_lock(&vfspnode_lock);
list_del_init(&mnt->mnt_pnode_mntlist);
- put_pnode(pnode);
+ put_pnode_locked(pnode);
+ spin_unlock(&vfspnode_lock);
out:
return ret;
}
int pnode_make_unmounted(struct vfspnode *pnode)
{
- int ret=0;
- struct vfsmount *slave_mnt, *member_mnt, *t_m;
- struct pcontext context;
-
- context.start = pnode;
- context.pnode = NULL;
- while (pnode_next(&context)) {
- pnode = context.pnode;
- // traverse member vfsmounts
- spin_lock(&vfspnode_lock);
- list_for_each_entry_safe(member_mnt,
- t_m, &pnode->pnode_vfs, mnt_pnode_mntlist) {
- spin_unlock(&vfspnode_lock);
- if ((ret = vfs_make_unmounted(member_mnt)))
- goto out;
- spin_lock(&vfspnode_lock);
- }
- list_for_each_entry_safe(slave_mnt, t_m,
- &pnode->pnode_slavevfs, mnt_pnode_mntlist) {
- spin_unlock(&vfspnode_lock);
- if ((ret = vfs_make_unmounted(slave_mnt)))
- goto out;
- spin_lock(&vfspnode_lock);
- }
- spin_unlock(&vfspnode_lock);
- }
-out:
- return ret;
+ return pnode_traverse(pnode, NULL, (void *)NULL,
+ NULL, NULL, vfs_make_unmounted_func);
}
int vfs_prepare_mount_func(struct vfsmount *mnt, enum pnode_vfs_type flag,
- struct vfspnode *pnode,
- struct vfsmount *source_mnt,
- struct dentry *mountpoint_dentry,
- struct vfsmount *p_mnt)
-
+ void *indata, va_list args)
{
- struct vfsmount *child_mnt;
+ struct vfsmount *source_mnt, *child_mnt, *p_mnt;
+ struct dentry *mountpoint_dentry;
+ struct vfspnode *pnode = (struct vfspnode *)indata;
+
+ source_mnt = va_arg(args, struct vfsmount * );
+ mountpoint_dentry = va_arg(args, struct dentry *);
+ p_mnt = va_arg(args, struct vfsmount *);
if ((p_mnt != mnt) || (source_mnt == source_mnt->mnt_parent)) {
child_mnt = do_attach_prepare_mnt(mnt, mountpoint_dentry,
source_mnt, (p_mnt != mnt));
-
if (!child_mnt)
return -ENOMEM;
@@ -546,71 +579,43 @@ int vfs_prepare_mount_func(struct vfsmou
}
int pnode_prepare_mount(struct vfspnode *pnode,
- struct vfspnode *my_pnode,
+ struct vfspnode *master_child_pnode,
struct dentry *mountpoint_dentry,
struct vfsmount *source_mnt,
struct vfsmount *mnt)
{
- int ret=0, level;
- struct vfspnode *master_pnode, *child_pnode, *master_child_pnode;
- struct vfsmount *slave_mnt, *member_mnt, *t_m;
- struct pcontext context;
- static struct vfspnode *p_array[PNODE_MAX_SLAVE_LEVEL];
-
- context.start = pnode;
- context.pnode = NULL;
- while (pnode_next(&context)) {
- level = context.level;
- pnode = context.pnode;
- master_pnode = context.master_pnode;
-
- if (master_pnode) {
- master_child_pnode = p_array[level];
- child_pnode = NULL;
- } else {
- master_child_pnode = NULL;
- child_pnode = my_pnode;
- }
-
- if (!(child_pnode = pnode_alloc()))
- return -ENOMEM;
-
- if (master_child_pnode && child_pnode)
- pnode_add_slave_pnode(master_child_pnode,
- child_pnode);
- p_array[level] = child_pnode;
+ return pnode_traverse(pnode,
+ master_child_pnode,
+ (void *)NULL,
+ pnode_mount_func,
+ NULL,
+ vfs_prepare_mount_func,
+ source_mnt,
+ mountpoint_dentry,
+ mnt);
+}
+int pnode_real_mount_post_func(struct vfspnode *pnode, void *indata,
+ va_list args)
+{
+ if (va_arg(args, int)) {
spin_lock(&vfspnode_lock);
- list_for_each_entry_safe(member_mnt,
- t_m, &pnode->pnode_vfs, mnt_pnode_mntlist) {
- spin_unlock(&vfspnode_lock);
- if((ret=vfs_prepare_mount_func(member_mnt,
- PNODE_MEMBER_VFS, child_pnode,
- source_mnt, mountpoint_dentry, mnt)))
- goto out;
- spin_lock(&vfspnode_lock);
- }
- list_for_each_entry_safe(slave_mnt,
- t_m, &pnode->pnode_slavevfs,
- mnt_pnode_mntlist) {
- spin_unlock(&vfspnode_lock);
- if((ret = vfs_prepare_mount_func(slave_mnt,
- PNODE_SLAVE_VFS, child_pnode,
- source_mnt, mountpoint_dentry, mnt)))
- goto out;
- spin_lock(&vfspnode_lock);
- }
+ BUG_ON(!list_empty(&pnode->pnode_vfs));
+ BUG_ON(!list_empty(&pnode->pnode_slavevfs));
+ BUG_ON(!list_empty(&pnode->pnode_slavepnode));
+ list_del_init(&pnode->pnode_peer_slave);
+ put_pnode_locked(pnode);
spin_unlock(&vfspnode_lock);
}
-out:
- return ret;
+ return 0;
}
-int vfs_real_mount_func(struct vfsmount *mnt, int delflag)
+int vfs_real_mount_func(struct vfsmount *mnt, enum pnode_vfs_type flag,
+ void *indata, va_list args)
{
BUG_ON(mnt == mnt->mnt_parent);
do_attach_real_mnt(mnt);
- if (delflag) {
+ if (va_arg(args, int)) {
spin_lock(&vfspnode_lock);
list_del_init(&mnt->mnt_pnode_mntlist);
put_pnode_locked(mnt->mnt_pnode);
@@ -628,42 +633,7 @@ int vfs_real_mount_func(struct vfsmount
*/
int pnode_real_mount(struct vfspnode *pnode, int flag)
{
- int ret=0;
- struct vfsmount *slave_mnt, *member_mnt, *t_m;
- struct pcontext context;
-
- context.start = pnode;
- context.pnode = NULL;
- while (pnode_next(&context)) {
- pnode = context.pnode;
- // traverse member vfsmounts
- spin_lock(&vfspnode_lock);
- list_for_each_entry_safe(member_mnt,
- t_m, &pnode->pnode_vfs, mnt_pnode_mntlist) {
- spin_unlock(&vfspnode_lock);
- if ((ret = vfs_real_mount_func(member_mnt,
- flag)))
- goto out;
- spin_lock(&vfspnode_lock);
- }
- list_for_each_entry_safe(slave_mnt, t_m,
- &pnode->pnode_slavevfs, mnt_pnode_mntlist) {
- spin_unlock(&vfspnode_lock);
- if ((ret = vfs_real_mount_func(slave_mnt,
- flag)))
- goto out;
- spin_lock(&vfspnode_lock);
- }
-
- if (flag) {
- BUG_ON(!list_empty(&pnode->pnode_vfs));
- BUG_ON(!list_empty(&pnode->pnode_slavevfs));
- BUG_ON(!list_empty(&pnode->pnode_slavepnode));
- list_del_init(&pnode->pnode_peer_slave);
- put_pnode_locked(pnode);
- }
- spin_unlock(&vfspnode_lock);
- }
-out:
- return ret;
+ return pnode_traverse(pnode,
+ NULL, (void *)NULL, NULL, pnode_real_mount_post_func,
+ vfs_real_mount_func, flag);
}
^ permalink raw reply [flat|nested] 9+ messages in thread