public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Jan Blunck <jblunck@suse.de>
To: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: Bharata B Rao <bharata@linux.vnet.ibm.com>
Subject: [RFC 16/26] union-mount: Introduce union_mount structure
Date: Mon, 30 Jul 2007 18:13:39 +0200	[thread overview]
Message-ID: <20070730161324.628215686@weierstrass.suse.de> (raw)
In-Reply-To: 20070730161323.100048969@weierstrass.suse.de

[-- Attachment #1: um/union-mount-union-stack.diff --]
[-- Type: text/plain, Size: 13313 bytes --]

This patch adds the basic structures of VFS based union mounts. It is a new
implementation based on some of my old idea's that influenced Bharata B Rao
<bharata@linux.vnet.ibm.com> who came up with the proposal to let the
union_mount struct only point to the next layer in the union stack. I rewrote
nearly all of the central patches around lookup and the dcache interaction.

Advantages of the new implementation:
- the new union stack is no longer tied directly to one dentry
- the union stack enables dentries to be part of more than one union
  (bind mounts)
- it is unnecessary to traverse the union stack when de/referencing a dentry
- caching of union stack information still driven by dentry cache

Signed-off-by: Jan Blunck <jblunck@suse.de>
---
 fs/Kconfig             |    8 +
 fs/Makefile            |    2 
 fs/dcache.c            |    4 
 fs/union.c             |  335 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dcache.h |    9 +
 include/linux/union.h  |   61 ++++++++
 6 files changed, 419 insertions(+)

--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -551,6 +551,14 @@ config INOTIFY_USER
 
 	  If unsure, say Y.
 
+config UNION_MOUNT
+       bool "Union mount support (EXPERIMENTAL)"
+       depends on EXPERIMENTAL
+       ---help---
+         If you say Y here, you will be able to mount file systems as
+         union mount stacks. This is a VFS based implementation and
+         should work with all file systems. If unsure, say N.
+
 config QUOTA
 	bool "Quota support"
 	help
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -49,6 +49,8 @@ obj-$(CONFIG_FS_POSIX_ACL)	+= posix_acl.
 obj-$(CONFIG_NFS_COMMON)	+= nfs_common/
 obj-$(CONFIG_GENERIC_ACL)	+= generic_acl.o
 
+obj-$(CONFIG_UNION_MOUNT)	+= union.o
+
 obj-$(CONFIG_QUOTA)		+= dquot.o
 obj-$(CONFIG_QFMT_V1)		+= quota_v1.o
 obj-$(CONFIG_QFMT_V2)		+= quota_v2.o
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -985,6 +985,10 @@ struct dentry *d_alloc(struct dentry * p
 #ifdef CONFIG_PROFILING
 	dentry->d_cookie = NULL;
 #endif
+#ifdef CONFIG_UNION_MOUNT
+	INIT_LIST_HEAD(&dentry->d_unions);
+	dentry->d_unionized = 0;
+#endif
 	INIT_HLIST_NODE(&dentry->d_hash);
 	INIT_LIST_HEAD(&dentry->d_lru);
 	INIT_LIST_HEAD(&dentry->d_subdirs);
--- /dev/null
+++ b/fs/union.c
@@ -0,0 +1,335 @@
+/*
+ * VFS based union mount for Linux
+ *
+ * Copyright (C) 2004-2007 IBM Corporation, IBM Deutschland Entwicklung GmbH.
+ * Copyright (C) 2007 Novell Inc.
+ *
+ *   Author(s): Jan Blunck (j.blunck@tu-harburg.de)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/bootmem.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/hash.h>
+#include <linux/fs.h>
+#include <linux/union.h>
+
+/*
+ * This is borrowed from fs/inode.c. The hashtable for lookups. Somebody
+ * should try to make this good - I've just made it work.
+ */
+static unsigned int union_hash_mask __read_mostly;
+static unsigned int union_hash_shift __read_mostly;
+static struct hlist_head *union_hashtable __read_mostly;
+static unsigned int union_rhash_mask __read_mostly;
+static unsigned int union_rhash_shift __read_mostly;
+static struct hlist_head *union_rhashtable __read_mostly;
+
+/*
+ * Locking Rules:
+ * - dcache_lock (for union_rlookup() only)
+ * - union_lock
+ */
+DEFINE_SPINLOCK(union_lock);
+
+static struct kmem_cache *union_cache __read_mostly;
+
+static unsigned long hash(struct dentry *dentry, struct vfsmount *mnt)
+{
+	unsigned long tmp;
+
+	tmp = ((unsigned long)mnt * (unsigned long)dentry) ^
+		(GOLDEN_RATIO_PRIME + (unsigned long)mnt) / L1_CACHE_BYTES;
+	tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> union_hash_shift);
+	return tmp & union_hash_mask;
+}
+
+static __initdata unsigned long union_hash_entries;
+
+static int __init set_union_hash_entries(char *str)
+{
+	if (!str)
+		return 0;
+	union_hash_entries = simple_strtoul(str, &str, 0);
+	return 1;
+}
+
+__setup("union_hash_entries=", set_union_hash_entries);
+
+static int __init init_union(void)
+{
+	int loop;
+
+	union_cache = kmem_cache_create("union_mount",
+					sizeof(struct union_mount), 0,
+					SLAB_HWCACHE_ALIGN | SLAB_PANIC,
+					NULL, NULL);
+
+	union_hashtable = alloc_large_system_hash("Union-cache",
+						  sizeof(struct hlist_head),
+						  union_hash_entries,
+						  14,
+						  0,
+						  &union_hash_shift,
+						  &union_hash_mask,
+						  0);
+
+	for (loop = 0; loop < (1 << union_hash_shift); loop++)
+		INIT_HLIST_HEAD(&union_hashtable[loop]);
+
+
+	union_rhashtable = alloc_large_system_hash("rUnion-cache",
+						  sizeof(struct hlist_head),
+						  union_hash_entries,
+						  14,
+						  0,
+						  &union_rhash_shift,
+						  &union_rhash_mask,
+						  0);
+
+	for (loop = 0; loop < (1 << union_rhash_shift); loop++)
+		INIT_HLIST_HEAD(&union_rhashtable[loop]);
+
+	return 0;
+}
+
+fs_initcall(init_union);
+
+struct union_mount *union_alloc(struct dentry *this, struct vfsmount *this_mnt,
+				struct dentry *next, struct vfsmount *next_mnt)
+{
+	struct union_mount *um;
+
+	BUG_ON(!S_ISDIR(this->d_inode->i_mode));
+	BUG_ON(!S_ISDIR(next->d_inode->i_mode));
+
+	um = kmem_cache_alloc(union_cache, GFP_ATOMIC);
+	if (!um)
+		return NULL;
+
+	atomic_set(&um->u_count, 1);
+	INIT_LIST_HEAD(&um->u_unions);
+	INIT_HLIST_NODE(&um->u_hash);
+	INIT_HLIST_NODE(&um->u_rhash);
+
+	um->u_this.mnt = this_mnt;
+	um->u_this.dentry = this;
+	um->u_next.mnt = mntget(next_mnt);
+	um->u_next.dentry = dget(next);
+
+	return um;
+}
+
+struct union_mount *union_get(struct union_mount *um)
+{
+	BUG_ON(!atomic_read(&um->u_count));
+	atomic_inc(&um->u_count);
+	return um;
+}
+
+static int __union_put(struct union_mount *um)
+{
+	if (!atomic_dec_and_test(&um->u_count))
+		return 0;
+
+	BUG_ON(!hlist_unhashed(&um->u_hash));
+	BUG_ON(!hlist_unhashed(&um->u_rhash));
+
+	kmem_cache_free(union_cache, um);
+	return 1;
+}
+
+void union_put(struct union_mount *um)
+{
+	struct path tmp = um->u_next;
+
+	if (__union_put(um))
+		pathput(&tmp);
+}
+
+static void __union_hash(struct union_mount *um)
+{
+	hlist_add_head(&um->u_hash, union_hashtable +
+		       hash(um->u_this.dentry, um->u_this.mnt));
+	hlist_add_head(&um->u_rhash, union_rhashtable +
+		       hash(um->u_next.dentry, um->u_next.mnt));
+}
+
+static void __union_unhash(struct union_mount *um)
+{
+	hlist_del_init(&um->u_hash);
+	hlist_del_init(&um->u_rhash);
+}
+
+struct union_mount *union_lookup(struct dentry *dentry, struct vfsmount *mnt)
+{
+	struct hlist_head *head = union_hashtable + hash(dentry, mnt);
+	struct hlist_node *node;
+	struct union_mount *um;
+
+	hlist_for_each_entry(um, node, head, u_hash) {
+		if ((um->u_this.dentry == dentry) &&
+		    (um->u_this.mnt == mnt))
+			return um;
+	}
+
+	return NULL;
+}
+
+struct union_mount *union_rlookup(struct dentry *dentry, struct vfsmount *mnt)
+{
+	struct hlist_head *head = union_rhashtable + hash(dentry, mnt);
+	struct hlist_node *node;
+	struct union_mount *um;
+
+	hlist_for_each_entry(um, node, head, u_rhash) {
+		if ((um->u_next.dentry == dentry) &&
+		    (um->u_next.mnt == mnt))
+			return um;
+	}
+
+	return NULL;
+}
+
+/*
+ * is_unionized - check if a dentry lives on a union mounted file system
+ *
+ * This tests if a dentry is living on an union mounted file system by walking
+ * the file system hierarchy.
+ */
+int is_unionized(struct dentry *dentry, struct vfsmount *mnt)
+{
+	struct path this = { .mnt = mntget(mnt),
+			     .dentry = dget(dentry) };
+	struct vfsmount *tmp;
+
+	do {
+		/* check if there is an union mounted on top of us */
+		spin_lock(&vfsmount_lock);
+		list_for_each_entry(tmp, &this.mnt->mnt_mounts, mnt_child) {
+			if (!(tmp->mnt_flags & MNT_UNION))
+				continue;
+			/* Isn't this a bug? */
+			if (this.dentry->d_sb != tmp->mnt_mountpoint->d_sb)
+				continue;
+			if (lives_below_in_same_fs(this.dentry,
+						   tmp->mnt_mountpoint)) {
+				spin_unlock(&vfsmount_lock);
+				pathput(&this);
+				return 1;
+			}
+		}
+		spin_unlock(&vfsmount_lock);
+
+		/* check our mountpoint next */
+		tmp = mntget(this.mnt->mnt_parent);
+		dput(this.dentry);
+		this.dentry = dget(this.mnt->mnt_mountpoint);
+		mntput(this.mnt);
+		this.mnt = tmp;
+	} while (this.mnt != this.mnt->mnt_parent);
+
+	pathput(&this);
+	return 0;
+}
+
+int append_to_union(struct vfsmount *mnt, struct dentry *dentry,
+		    struct vfsmount *dest_mnt, struct dentry *dest_dentry)
+{
+	struct union_mount *this, *um;
+
+	BUG_ON(!IS_MNT_UNION(mnt));
+
+	this = union_alloc(dentry, mnt, dest_dentry, dest_mnt);
+	if (!this)
+		return -ENOMEM;
+
+	spin_lock(&union_lock);
+	um = union_lookup(dentry, mnt);
+	if (um) {
+		BUG_ON((um->u_next.dentry != dest_dentry) ||
+		       (um->u_next.mnt != dest_mnt));
+		spin_unlock(&union_lock);
+		union_put(this);
+		return 0;
+	}
+	__union_hash(this);
+	spin_unlock(&union_lock);
+	return 0;
+}
+
+/*
+ * follow_union_down - follow the union stack one layer down
+ *
+ * This is called to traverse the union stack from one layer to the next
+ * overlayed one. follow_union_down() is called by various lookup functions
+ * that are aware of union mounts.
+ *
+ * Returns none zero if followed to the next layer, zero otherwise.
+ */
+int follow_union_down(struct vfsmount **mnt, struct dentry **dentry)
+{
+	struct union_mount *um;
+
+	if (!IS_MNT_UNION(*mnt))
+		return 0;
+
+	spin_lock(&union_lock);
+	um = union_lookup(*dentry, *mnt);
+	spin_unlock(&union_lock);
+	if (um) {
+		pathget(&um->u_next);
+		dput(*dentry);
+		*dentry = um->u_next.dentry;
+		mntput(*mnt);
+		*mnt = um->u_next.mnt;
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * follow_union_mount - follow the union stack to the topmost layer
+ *
+ * This is called to traverse the union stack to the topmost layer. This is
+ * necessary for following parent pointers in an union mount.
+ *
+ * Returns none zero if followed to the topmost layer, zero otherwise.
+ */
+int follow_union_mount(struct vfsmount **mnt, struct dentry **dentry)
+{
+	struct union_mount *um;
+	int res = 0;
+
+	while (IS_UNION(*dentry)) {
+		spin_lock(&dcache_lock);
+		spin_lock(&union_lock);
+		um = union_rlookup(*dentry, *mnt);
+		if (um)
+			pathget(&um->u_this);
+		spin_unlock(&union_lock);
+		spin_unlock(&dcache_lock);
+
+		/*
+		 * Q: Aaargh, how do I validate the topmost dentry pointer?
+		 * A: Eeeeasy! We took the dcache_lock and union_lock. Since
+		 *    this protects from any dput'ng going on, we know that the
+		 *    dentry is valid since the union is unhashed under
+		 *    dcache_lock too.
+		 */
+		if (!um)
+			break;
+		dput(*dentry);
+		*dentry = um->u_this.dentry;
+		mntput(*mnt);
+		*mnt = um->u_this.mnt;
+		res = 1;
+	}
+
+	return res;
+}
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -93,6 +93,15 @@ struct dentry {
 	struct dentry *d_parent;	/* parent directory */
 	struct qstr d_name;
 
+#ifdef CONFIG_UNION_MOUNT
+	/*
+	 * The following fields are used by the VFS based union mount
+	 * implementation. Both are protected by union_lock!
+	 */
+	struct list_head d_unions;	/* list of union_mount's */
+	unsigned int d_unionized;	/* unions referencing this dentry */
+#endif
+
 	struct list_head d_lru;		/* LRU list */
 	/*
 	 * d_child and d_rcu can share memory
--- /dev/null
+++ b/include/linux/union.h
@@ -0,0 +1,61 @@
+/*
+ * VFS based union mount for Linux
+ *
+ * Copyright (C) 2004-2007 IBM Corporation, IBM Deutschland Entwicklung GmbH.
+ * Copyright (C) 2007 Novell Inc.
+ *   Author(s): Jan Blunck (j.blunck@tu-harburg.de)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#ifndef __LINUX_UNION_H
+#define __LINUX_UNION_H
+#ifdef __KERNEL__
+
+#include <linux/list.h>
+#include <asm/atomic.h>
+
+struct dentry;
+struct vfsmount;
+
+#ifdef CONFIG_UNION_MOUNT
+
+/*
+ * The new union mount structure.
+ */
+struct union_mount {
+	atomic_t u_count;		/* reference count */
+	struct mutex u_mutex;
+	struct list_head u_unions;	/* list head for d_unions */
+	struct hlist_node u_hash;	/* list head for seaching */
+	struct hlist_node u_rhash;	/* list head for reverse seaching */
+
+	struct path u_this;		/* this is me */
+	struct path u_next;		/* this is what I overlay */
+};
+
+#define IS_UNION(dentry)	(!list_empty(&(dentry)->d_unions) || \
+				 (dentry)->d_unionized)
+#define IS_MNT_UNION(mnt)	((mnt)->mnt_flags & MNT_UNION)
+
+extern int is_unionized(struct dentry *, struct vfsmount *);
+extern int append_to_union(struct vfsmount *, struct dentry *,
+			   struct vfsmount *, struct dentry *);
+extern int follow_union_down(struct vfsmount **, struct dentry **);
+extern int follow_union_mount(struct vfsmount **, struct dentry **);
+
+#else /* CONFIG_UNION_MOUNT */
+
+#define IS_UNION(x)			(0)
+#define IS_MNT_UNION(x)			(0)
+#define is_unionized(x, y)		(0)
+#define append_to_union(x1, y1, x2, y2)	({ BUG(); (0); })
+#define follow_union_down(x, y)		({ (0); })
+#define follow_union_mount(x, y)	({ (0); })
+
+#endif	/* CONFIG_UNION_MOUNT */
+#endif	/* __KERNEL__ */
+#endif	/* __LINUX_UNION_H */

-- 


  parent reply	other threads:[~2007-07-30 16:17 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-07-30 16:13 [RFC 00/26] VFS based Union Mount (V2) Jan Blunck
2007-07-30 16:13 ` [RFC 01/26] [PATCH 14/18] shmem: convert to using splice instead of sendfile() Jan Blunck
2007-07-30 16:13 ` [RFC 02/26] VFS: Export dput_path() and path_to_nameidata() Jan Blunck
2007-07-30 16:13 ` [RFC 03/26] VFS: Make lookup_hash() return a struct path Jan Blunck
2007-07-30 16:13 ` [RFC 04/26] VFS: Make lookup_create() " Jan Blunck
2007-07-30 16:13 ` [RFC 05/26] VFS: cache_lookup() cleanup Jan Blunck
2007-07-30 16:13 ` [RFC 06/26] VFS: Make real_lookup() return a struct path Jan Blunck
2007-07-30 16:13 ` [RFC 07/26] VFS: Introduce dput() variante that maintains a kill-list Jan Blunck
2007-07-30 16:13 ` [RFC 08/26] VFS: Export lives_below_in_same_fs() Jan Blunck
2007-07-30 16:13 ` [RFC 09/26] linux/stat.h: Add the filetype white-out Jan Blunck
2007-07-30 16:13 ` [RFC 10/26] VFS white-out handling Jan Blunck
2007-07-30 16:13 ` [RFC 11/26] tmpfs white-out support Jan Blunck
2007-08-01 15:13   ` Hugh Dickins
2007-08-02  2:48     ` Matt Mackall
2007-07-30 16:13 ` [RFC 12/26] ext2 " Jan Blunck
2007-07-31  3:45   ` Theodore Tso
2007-07-31  7:44     ` Jan Blunck
2007-07-31  8:32       ` Andreas Dilger
2007-07-31  9:08         ` Jan Blunck
2007-07-31 10:53       ` Theodore Tso
2007-08-02 19:31         ` Pavel Machek
2007-07-31 16:36   ` Josef Sipek
2007-07-31 17:00     ` Jan Blunck
2007-07-31 17:11       ` Josef Sipek
2007-08-01 15:23         ` Dave Kleikamp
2007-08-01 18:44           ` Josef Sipek
2007-08-01 19:10             ` Dave Kleikamp
2007-08-01 19:33               ` Josef Sipek
2007-08-01 19:52                 ` Dave Kleikamp
2007-08-01 22:06                   ` Erez Zadok
2007-08-02 12:05                     ` Jan Blunck
2007-08-02 11:55                 ` Jan Blunck
2007-08-02 17:50                 ` Jörn Engel
2007-08-02 18:15                   ` Jeremy Maitin-Shepard
2007-08-02  5:24             ` Ph. Marek
2007-08-02 12:12               ` Jan Blunck
2007-08-02 10:26         ` Jan Blunck
2007-08-01 10:00       ` Hans-Peter Jansen
2007-08-01 11:43         ` Josef Sipek
2007-08-01 18:01         ` Jan Engelhardt
2007-07-31 17:03     ` Mark Williamson
2007-07-31 17:16       ` Josef Sipek
2007-08-01 17:58     ` Jan Engelhardt
2007-08-01 18:03       ` Josef Sipek
2007-07-30 16:13 ` [RFC 13/26] ext3 whiteout support Jan Blunck
2007-07-30 16:13 ` [RFC 14/26] union-mount: Documentation Jan Blunck
2007-07-30 16:13 ` [RFC 15/26] union-mount: Add union-mount mount flag Jan Blunck
2007-07-30 16:13 ` Jan Blunck [this message]
2007-08-06  5:57   ` [RFC 16/26] union-mount: Introduce union_mount structure Bharata B Rao
2007-07-30 16:13 ` [RFC 17/26] union-mount: Drive the union cache via dcache Jan Blunck
2007-07-30 16:13 ` [RFC 18/26] union-mount: Changes to the namespace handling Jan Blunck
2007-08-08 10:10   ` Bharata B Rao
2007-07-30 16:13 ` [RFC 19/26] union-mount: Make lookup work for union-mounted file systems Jan Blunck
2007-08-09  5:42   ` Bharata B Rao
2007-07-30 16:13 ` [RFC 20/26] union-mount: Simple union-mount readdir implementation Jan Blunck
2007-08-06 11:08   ` Bharata B Rao
2007-07-30 16:13 ` [RFC 21/26] union-mount: in-kernel file copy between union mounted filesystems Jan Blunck
2007-07-30 16:13 ` [RFC 22/26] union-mount: white-out changes for copy-on-open Jan Blunck
2007-07-30 16:13 ` [RFC 23/26] union-mount: copyup on rename Jan Blunck
2007-07-30 16:13 ` [RFC 24/26] union-mount: dont report EROFS for union mounts Jan Blunck
2007-07-30 16:13 ` [RFC 25/26] union-mount: Debug Infrastructure Jan Blunck
2007-07-30 16:13 ` [RFC 26/26] union-mount: Debug code Jan Blunck
2007-07-30 18:23 ` [RFC 00/26] VFS based Union Mount (V2) Al Boldi
2007-08-02  6:49 ` Bharata B Rao
2007-08-02 10:17   ` Jan Blunck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070730161324.628215686@weierstrass.suse.de \
    --to=jblunck@suse.de \
    --cc=bharata@linux.vnet.ibm.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox