linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Mike Waychison <Michael.Waychison@Sun.COM>
To: Christoph Hellwig <hch@infradead.org>,
	linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Cc: raven@themaw.net
Subject: Re: [PATCH 14/28] VFS: Introduce Mountpoint file descriptors (resend)
Date: Mon, 25 Oct 2004 11:35:45 -0400	[thread overview]
Message-ID: <417D1D51.8060901@sun.com> (raw)
In-Reply-To: <20041025152521.GA1959@infradead.org>

[-- Attachment #1: Type: text/plain, Size: 526 bytes --]

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Christoph Hellwig wrote:

> You haven't explained why you actually need it, though.
> 

Apparently I used the wrong server and a couple patches bounced :\

I'll try to make the next series more 'forward self-describing' :)
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.5 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://enigmail.mozdev.org

iD8DBQFBfR1RdQs4kOxk3/MRAkVnAKCJXfCmyzk1UaL0GcuPwQsdexHMhgCeMbGl
Wz9LPh+FJbdMpyPPYiVrtoY=
=+WQa
-----END PGP SIGNATURE-----

[-- Attachment #2: 14-introduce_mountfd.diff --]
[-- Type: text/x-patch, Size: 11235 bytes --]

This patch introduces the concept of a mountpoint file descriptor (mountfd).
All interfacing with mountfds are done through the mountfd(2) syscall (which
currently uses a command op argument to determine what you really are trying
to do.

This patch only adds the following abilities:
 - grab a reference to a mountfd given a directory fd (of the root of the
   mountpoint).
 - get the directory fd of the root of the mountpoint given a mountfd.

NOTE: the entire interface is highly likely to change and
comments/suggestions are more than welcome as the provided api is mostly a
prototype at this stage and far from complete.

Further patches add the following functionalities:
 - attach a mountpoint given a mountfd to a directory (given a dirfd)
 - detach/umount a mountpoint given a mountfd (umount if not busy, forced
   umount, lazy umount)

NOTE AGAIN: most of the interface is half-baked, and really does require
input from others.  Most of this functionality is not neccesarily needed for
autofsng, however I thought I'd start the ball rolling for _some_ interface.

Signed-off-by: Mike Waychison <michael.waychison@sun.com>
---

 Documentation/ioctl-number.txt |    1 
 arch/i386/kernel/entry.S       |   17 ++
 fs/Makefile                    |    2 
 fs/mountfd.c                   |  256 +++++++++++++++++++++++++++++++++++++++++
 fs/namespace.c                 |    2 
 include/asm-i386/unistd.h      |    3 
 include/linux/fs.h             |    2 
 7 files changed, 281 insertions(+), 2 deletions(-)

Index: linux-2.6.9-quilt/arch/i386/kernel/entry.S
===================================================================
--- linux-2.6.9-quilt.orig/arch/i386/kernel/entry.S	2004-08-14 01:36:32.000000000 -0400
+++ linux-2.6.9-quilt/arch/i386/kernel/entry.S	2004-10-22 17:17:40.735271440 -0400
@@ -886,5 +886,22 @@ ENTRY(sys_call_table)
 	.long sys_mq_notify
 	.long sys_mq_getsetattr
 	.long sys_ni_syscall		/* reserved for kexec */
+	.long sys_ni_syscall
+	.long sys_ni_syscall		/* 285 */
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_ni_syscall		/* 290 */
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_ni_syscall		/* 295 */
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_mountfd		/* 300 */
 
 syscall_table_size=(.-sys_call_table)
Index: linux-2.6.9-quilt/include/asm-i386/unistd.h
===================================================================
--- linux-2.6.9-quilt.orig/include/asm-i386/unistd.h	2004-08-14 01:37:25.000000000 -0400
+++ linux-2.6.9-quilt/include/asm-i386/unistd.h	2004-10-22 17:17:40.735271440 -0400
@@ -289,8 +289,9 @@
 #define __NR_mq_notify		(__NR_mq_open+4)
 #define __NR_mq_getsetattr	(__NR_mq_open+5)
 #define __NR_sys_kexec_load	283
+#define __NR_mountfd		300
 
-#define NR_syscalls 284
+#define NR_syscalls 301
 
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
 
Index: linux-2.6.9-quilt/fs/mountfd.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.9-quilt/fs/mountfd.c	2004-10-22 17:17:40.736271288 -0400
@@ -0,0 +1,256 @@
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#include <linux/security.h>
+
+#define MFDFS_MAGIC 0x4A9F2E43
+#define MFDFS_ROOT_INO 1
+
+#define VFSMOUNT(filp) ((struct vfsmount *)((filp)->private_data))
+
+static struct vfsmount *mfdfs_mnt;
+
+static void mfdfs_read_inode(struct inode *inode);
+static struct super_operations mfdfs_ops = {
+	.read_inode    = mfdfs_read_inode,
+	.statfs        = simple_statfs,
+};
+
+static struct super_block *mfdfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
+{
+	return get_sb_pseudo(fs_type, "mountfd:", &mfdfs_ops, MFDFS_MAGIC);
+}
+
+static struct file_system_type mfd_fs_type = {
+	.name    =	"mfdfs",
+	.get_sb  = 	mfdfs_get_sb,
+	.kill_sb = 	kill_anon_super,
+};
+
+void __init mfdfs_init(void)
+{
+	register_filesystem(&mfd_fs_type);
+	mfdfs_mnt = kern_mount(&mfd_fs_type);
+}
+
+/* TODO: change this list into a hashtable */
+spinlock_t mfd_lock = SPIN_LOCK_UNLOCKED;
+LIST_HEAD(mfd_list);
+struct mountfd {
+	struct list_head list;
+	struct vfsmount *mnt;
+	struct dentry   *dentry;
+	atomic_t count;
+};
+
+static int mfd_release(struct inode *inode, struct file *file)
+{
+	struct vfsmount *mnt = VFSMOUNT(file);
+	struct mountfd *mfd, *tofree = NULL;
+
+	spin_lock(&mfd_lock);
+	list_for_each_entry(mfd, &mfd_list, list) {
+		if (mfd->mnt == mnt) {
+			if (atomic_dec_and_test(&mfd->count)) {
+				tofree = mfd;
+				list_del_init(&tofree->list);
+			}
+			break;
+		}
+	}
+	spin_unlock(&mfd_lock);
+
+	if (tofree) {
+		d_delete(tofree->dentry);
+		mntsoftput(tofree->mnt);
+		kfree(tofree);
+	}
+	return 0;
+}
+
+static int mfd_ioctl(struct inode *inode, struct file *filp,
+		     unsigned int cmd, unsigned long arg);
+static struct file_operations mfd_file_ops = {
+	.release  =	mfd_release,
+	.ioctl    =	mfd_ioctl,
+};
+
+static void mfdfs_read_inode(struct inode *inode)
+{
+	inode->i_fop = &mfd_file_ops;
+	inode->i_mode = S_IFREG | S_IRUGO | S_IXUGO;
+	inode->i_nlink = 1;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+
+	inode->i_blocks = 0;
+	inode->i_blksize = 1024;
+}
+
+static struct dentry *get_mfd_dentry(struct vfsmount *mnt)
+{
+	struct mountfd *newmfd, *mfd;
+	struct dentry *dentry;
+	struct qstr qstr;
+	struct inode *inode;
+
+	/* create an new mfd before we lock */
+	newmfd = kmalloc(sizeof(*newmfd), GFP_KERNEL);
+	if (!newmfd)
+		return ERR_PTR(-ENOMEM);
+	if (!mfdfs_mnt->mnt_root)
+		return ERR_PTR(-ENOMEM);
+	/* TODO: how to make this context dependent in proc/<pid>/fd */
+	qstr.name = "DUMMY";
+	qstr.len  = 5;
+	newmfd->dentry = d_alloc(mfdfs_mnt->mnt_root, &qstr);
+	if (IS_ERR(newmfd->dentry)) {
+		struct dentry *err = newmfd->dentry;
+		kfree(newmfd);
+		return err;
+	}
+	inode = iget(mfdfs_mnt->mnt_sb, iunique(mfdfs_mnt->mnt_sb, MFDFS_ROOT_INO));
+	if (IS_ERR(inode)) {
+		int err = PTR_ERR(inode);
+		dput(newmfd->dentry);
+		kfree(newmfd);
+		return ERR_PTR(err);
+	}
+	d_add(newmfd->dentry, inode);
+
+	spin_lock(&mfd_lock);
+	list_for_each_entry(mfd, &mfd_list, list) {
+		if (mfd->mnt == mnt) {
+			dentry = dget(mfd->dentry);
+			atomic_inc(&mfd->count);
+			spin_unlock(&mfd_lock);
+
+			d_delete(newmfd->dentry);
+			kfree(newmfd);
+
+			return dentry;
+		}
+	}
+
+	newmfd->mnt = mntsoftget(mnt);
+	dentry = newmfd->dentry;
+	INIT_LIST_HEAD(&newmfd->list);
+	list_add(&newmfd->list, &mfd_list);
+	atomic_set(&newmfd->count, 1);
+	spin_unlock(&mfd_lock);
+
+	return dget(dentry);
+}
+
+static long open_mfd(struct vfsmount *mnt)
+{
+	struct file *file;
+	int error;
+	int fd;
+	
+	error = -ENOMEM;
+	file = get_empty_filp();
+	if (!file)
+		goto out;
+
+	error = -ENFILE;
+	fd = get_unused_fd();
+	if (fd < 0)
+		goto out_putfilp;
+
+	file->private_data = mnt;
+	file->f_dentry = get_mfd_dentry(mnt);
+	if (IS_ERR(file->f_dentry)) {
+		error = PTR_ERR(file->f_dentry);
+		goto out_putfd;
+	}
+	file->f_vfsmnt = mntget(mfdfs_mnt);
+
+	file->f_op    = &mfd_file_ops;
+	file->f_mode  = FMODE_READ | FMODE_WRITE;
+	file->f_flags = O_RDWR;
+	file->f_pos   = 0;
+
+	fd_install(fd, file);
+	return fd;
+
+out_putfd:
+	put_unused_fd(fd);
+out_putfilp:
+	put_filp(file);
+out:
+	return error;
+}
+
+static long mfd_getmfd(int dirfd)
+{
+	struct file *dir;
+	int error;
+
+	error = -EBADF;
+	dir = fget(dirfd);
+	if (!dir)
+		goto out;
+
+	error = -ENOTDIR;
+	if (!S_ISDIR(dir->f_dentry->d_inode->i_mode))
+		goto out_fput;
+
+	error = -EINVAL;
+	if (dir->f_vfsmnt->mnt_root != dir->f_dentry)
+		goto out_fput;
+
+	error = open_mfd(dir->f_vfsmnt);
+
+out_fput:
+	fput(dir);
+out:
+	return error;
+}
+
+long mfd_getdirfd(struct file *mountfilp)
+{
+	long error, dirfd;
+	struct file *filp;
+	struct vfsmount *mnt;
+
+	mnt = mntget(VFSMOUNT(mountfilp));
+
+	error = -ENFILE;
+	if ((dirfd = get_unused_fd()) < 0)
+		goto out_filp;
+	
+	filp = dentry_open(dget(mnt->mnt_root), mnt, O_DIRECTORY | O_RDONLY);
+	if (IS_ERR(filp)) {
+		error = PTR_ERR(filp);
+		goto out_filp;
+	}
+
+	error = dirfd;
+	fd_install(dirfd, filp);
+
+out_filp:
+	mntput(mnt);
+	return error;
+}
+
+static int mfd_ioctl(struct inode *inode, struct file *filp,
+		     unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case MOUNTFD_IOC_GETDIRFD:
+		return mfd_getdirfd(filp);
+	}
+	return -ENOTTY;
+}
+
+asmlinkage long sys_mountfd(int dirfd)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return mfd_getmfd(dirfd);
+}
Index: linux-2.6.9-quilt/fs/Makefile
===================================================================
--- linux-2.6.9-quilt.orig/fs/Makefile	2004-08-14 01:37:14.000000000 -0400
+++ linux-2.6.9-quilt/fs/Makefile	2004-10-22 17:17:40.736271288 -0400
@@ -10,7 +10,7 @@ obj-y :=	open.o read_write.o file_table.
 		namei.o fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \
 		dcache.o inode.o attr.o bad_inode.o file.o dnotify.o \
 		filesystems.o namespace.o seq_file.o xattr.o libfs.o \
-		fs-writeback.o mpage.o direct-io.o aio.o
+		fs-writeback.o mpage.o direct-io.o aio.o mountfd.o
 
 obj-$(CONFIG_EPOLL)		+= eventpoll.o
 obj-$(CONFIG_COMPAT)		+= compat.o
Index: linux-2.6.9-quilt/fs/namespace.c
===================================================================
--- linux-2.6.9-quilt.orig/fs/namespace.c	2004-10-22 17:17:40.187354736 -0400
+++ linux-2.6.9-quilt/fs/namespace.c	2004-10-22 17:17:40.738270984 -0400
@@ -34,6 +34,7 @@ static inline int sysfs_init(void)
 	return 0;
 }
 #endif
+extern void __init mfdfs_init(void);
 
 /* spinlock for vfsmount related operations, inplace of dcache_lock */
 spinlock_t vfsmount_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
@@ -1711,6 +1712,7 @@ void __init mnt_init(unsigned long mempa
 		d++;
 		i--;
 	} while (i);
+	mfdfs_init();
 	sysfs_init();
 	init_rootfs();
 	init_mount_tree();
Index: linux-2.6.9-quilt/Documentation/ioctl-number.txt
===================================================================
--- linux-2.6.9-quilt.orig/Documentation/ioctl-number.txt	2004-08-14 01:38:04.000000000 -0400
+++ linux-2.6.9-quilt/Documentation/ioctl-number.txt	2004-10-22 17:17:40.738270984 -0400
@@ -144,6 +144,7 @@ Code	Seq#	Include File		Comments
 'p'	40-7F	linux/nvram.h
 'p'	80-9F				user-space parport
 					<mailto:tim@cyberelk.net>
+'p'	A0-BF				mountpoint file descriptors
 'q'	00-1F	linux/videotext.h	conflict!
 'q'	80-FF				Internet PhoneJACK, Internet LineJACK
 					<http://www.quicknet.net>
Index: linux-2.6.9-quilt/include/linux/fs.h
===================================================================
--- linux-2.6.9-quilt.orig/include/linux/fs.h	2004-10-22 17:17:37.120820920 -0400
+++ linux-2.6.9-quilt/include/linux/fs.h	2004-10-22 17:17:40.739270832 -0400
@@ -214,6 +214,8 @@ extern int leases_enable, dir_notify_ena
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */
 #define FIGETBSZ   _IO(0x00,2)	/* get the block size used for bmap */
 
+#define MOUNTFD_IOC_GETDIRFD _IO('p', 0xa0)
+
 #ifdef __KERNEL__
 
 #include <linux/list.h>

  reply	other threads:[~2004-10-25 15:36 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-10-25 14:38 [PATCH 0/28] Autofs NG Patchset 0.2 Mike Waychison
2004-10-25 14:39 ` [PATCH 1/28] VFS: Unexport umount_tree Mike Waychison
2004-10-25 14:39   ` [PATCH 2/28] VFS: mnt_fslink -> mnt_expire Mike Waychison
2004-10-25 14:40     ` [PATCH 3/28] VFS: Move expiry into vfs Mike Waychison
2004-10-25 14:40       ` [PATCH 4/28] VFS: Stat shouldn't stop expire Mike Waychison
2004-10-25 14:41         ` [PATCH 5/28] VFS: Make expiry timeout configurable Mike Waychison
2004-10-25 14:41           ` [PATCH 6/28] VFS: Make expiry recursive Mike Waychison
2004-10-25 14:42             ` [PATCH 7/28] AFS: Update AFS to use new expiry interface Mike Waychison
2004-10-25 14:42               ` [PATCH 8/28] VFS: Remove MNT_EXPIRE support Mike Waychison
2004-10-25 14:43                 ` [PATCH 9/28] VFS: Give sane expiry semantics Mike Waychison
2004-10-25 14:43                   ` [PATCH 10/28] VFS: Move next_mnt() Mike Waychison
2004-10-25 14:44                     ` [PATCH 11/28] VFS: Allow for detachable subtrees Mike Waychison
2004-10-25 14:44                       ` [PATCH 12/28] VFS: Remove (now bogus) check_mnt Mike Waychison
2004-10-25 14:45                         ` [PATCH 13/28] VFS: Introduce soft reference counts Mike Waychison
2004-10-25 15:25                           ` Christoph Hellwig
2004-10-25 15:35                             ` Mike Waychison [this message]
2004-10-25 17:20                           ` Mika Penttilä
2004-10-25 17:25                             ` Mike Waychison
2004-10-25 17:52                               ` Mika Penttilä
2004-10-25 17:56                                 ` [PATCH 11/28] VFS: Allow for detachable subtrees (resend) Mike Waychison
2004-10-25 15:09                         ` [PATCH 12/28] VFS: Remove (now bogus) check_mnt Christoph Hellwig
2004-10-25 15:15                           ` Mike Waychison
2004-10-25 15:04                 ` [PATCH 8/28] VFS: Remove MNT_EXPIRE support Christoph Hellwig
2004-10-25 15:12                   ` Mike Waychison
2004-10-25 15:16                     ` Christoph Hellwig
2004-10-25 15:30                       ` Mike Waychison
2004-10-25 17:16                   ` Mike Waychison
2004-10-25 17:29                     ` Mike Waychison
2004-10-25 15:04             ` [PATCH 6/28] VFS: Make expiry recursive Christoph Hellwig
2004-10-26 10:27         ` [PATCH 4/28] VFS: Stat shouldn't stop expire Christoph Hellwig
2004-10-27 18:36           ` Mike Waychison
2004-10-25 14:59       ` [PATCH 3/28] VFS: Move expiry into vfs Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=417D1D51.8060901@sun.com \
    --to=michael.waychison@sun.com \
    --cc=hch@infradead.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=raven@themaw.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).