All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mike Waychison <Michael.Waychison@Sun.COM>
To: Christoph Hellwig <hch@infradead.org>,
	linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Cc: raven@themaw.net
Subject: Re: [PATCH 14/28] VFS: Introduce Mountpoint file descriptors (resend)
Date: Mon, 25 Oct 2004 11:35:45 -0400	[thread overview]
Message-ID: <417D1D51.8060901@sun.com> (raw)
In-Reply-To: <20041025152521.GA1959@infradead.org>

[-- Attachment #1: Type: text/plain, Size: 526 bytes --]

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Christoph Hellwig wrote:

> You haven't explained why you actually need it, though.
> 

Apparently I used the wrong server and a couple patches bounced :\

I'll try to make the next series more 'forward self-describing' :)
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.5 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://enigmail.mozdev.org

iD8DBQFBfR1RdQs4kOxk3/MRAkVnAKCJXfCmyzk1UaL0GcuPwQsdexHMhgCeMbGl
Wz9LPh+FJbdMpyPPYiVrtoY=
=+WQa
-----END PGP SIGNATURE-----

[-- Attachment #2: 14-introduce_mountfd.diff --]
[-- Type: text/x-patch, Size: 11235 bytes --]

This patch introduces the concept of a mountpoint file descriptor (mountfd).
All interfacing with mountfds are done through the mountfd(2) syscall (which
currently uses a command op argument to determine what you really are trying
to do.

This patch only adds the following abilities:
 - grab a reference to a mountfd given a directory fd (of the root of the
   mountpoint).
 - get the directory fd of the root of the mountpoint given a mountfd.

NOTE: the entire interface is highly likely to change and
comments/suggestions are more than welcome as the provided api is mostly a
prototype at this stage and far from complete.

Further patches add the following functionalities:
 - attach a mountpoint given a mountfd to a directory (given a dirfd)
 - detach/umount a mountpoint given a mountfd (umount if not busy, forced
   umount, lazy umount)

NOTE AGAIN: most of the interface is half-baked, and really does require
input from others.  Most of this functionality is not neccesarily needed for
autofsng, however I thought I'd start the ball rolling for _some_ interface.

Signed-off-by: Mike Waychison <michael.waychison@sun.com>
---

 Documentation/ioctl-number.txt |    1 
 arch/i386/kernel/entry.S       |   17 ++
 fs/Makefile                    |    2 
 fs/mountfd.c                   |  256 +++++++++++++++++++++++++++++++++++++++++
 fs/namespace.c                 |    2 
 include/asm-i386/unistd.h      |    3 
 include/linux/fs.h             |    2 
 7 files changed, 281 insertions(+), 2 deletions(-)

Index: linux-2.6.9-quilt/arch/i386/kernel/entry.S
===================================================================
--- linux-2.6.9-quilt.orig/arch/i386/kernel/entry.S	2004-08-14 01:36:32.000000000 -0400
+++ linux-2.6.9-quilt/arch/i386/kernel/entry.S	2004-10-22 17:17:40.735271440 -0400
@@ -886,5 +886,22 @@ ENTRY(sys_call_table)
 	.long sys_mq_notify
 	.long sys_mq_getsetattr
 	.long sys_ni_syscall		/* reserved for kexec */
+	.long sys_ni_syscall
+	.long sys_ni_syscall		/* 285 */
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_ni_syscall		/* 290 */
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_ni_syscall		/* 295 */
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_ni_syscall
+	.long sys_mountfd		/* 300 */
 
 syscall_table_size=(.-sys_call_table)
Index: linux-2.6.9-quilt/include/asm-i386/unistd.h
===================================================================
--- linux-2.6.9-quilt.orig/include/asm-i386/unistd.h	2004-08-14 01:37:25.000000000 -0400
+++ linux-2.6.9-quilt/include/asm-i386/unistd.h	2004-10-22 17:17:40.735271440 -0400
@@ -289,8 +289,9 @@
 #define __NR_mq_notify		(__NR_mq_open+4)
 #define __NR_mq_getsetattr	(__NR_mq_open+5)
 #define __NR_sys_kexec_load	283
+#define __NR_mountfd		300
 
-#define NR_syscalls 284
+#define NR_syscalls 301
 
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
 
Index: linux-2.6.9-quilt/fs/mountfd.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.9-quilt/fs/mountfd.c	2004-10-22 17:17:40.736271288 -0400
@@ -0,0 +1,256 @@
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#include <linux/security.h>
+
+#define MFDFS_MAGIC 0x4A9F2E43
+#define MFDFS_ROOT_INO 1
+
+#define VFSMOUNT(filp) ((struct vfsmount *)((filp)->private_data))
+
+static struct vfsmount *mfdfs_mnt;
+
+static void mfdfs_read_inode(struct inode *inode);
+static struct super_operations mfdfs_ops = {
+	.read_inode    = mfdfs_read_inode,
+	.statfs        = simple_statfs,
+};
+
+static struct super_block *mfdfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
+{
+	return get_sb_pseudo(fs_type, "mountfd:", &mfdfs_ops, MFDFS_MAGIC);
+}
+
+static struct file_system_type mfd_fs_type = {
+	.name    =	"mfdfs",
+	.get_sb  = 	mfdfs_get_sb,
+	.kill_sb = 	kill_anon_super,
+};
+
+void __init mfdfs_init(void)
+{
+	register_filesystem(&mfd_fs_type);
+	mfdfs_mnt = kern_mount(&mfd_fs_type);
+}
+
+/* TODO: change this list into a hashtable */
+spinlock_t mfd_lock = SPIN_LOCK_UNLOCKED;
+LIST_HEAD(mfd_list);
+struct mountfd {
+	struct list_head list;
+	struct vfsmount *mnt;
+	struct dentry   *dentry;
+	atomic_t count;
+};
+
+static int mfd_release(struct inode *inode, struct file *file)
+{
+	struct vfsmount *mnt = VFSMOUNT(file);
+	struct mountfd *mfd, *tofree = NULL;
+
+	spin_lock(&mfd_lock);
+	list_for_each_entry(mfd, &mfd_list, list) {
+		if (mfd->mnt == mnt) {
+			if (atomic_dec_and_test(&mfd->count)) {
+				tofree = mfd;
+				list_del_init(&tofree->list);
+			}
+			break;
+		}
+	}
+	spin_unlock(&mfd_lock);
+
+	if (tofree) {
+		d_delete(tofree->dentry);
+		mntsoftput(tofree->mnt);
+		kfree(tofree);
+	}
+	return 0;
+}
+
+static int mfd_ioctl(struct inode *inode, struct file *filp,
+		     unsigned int cmd, unsigned long arg);
+static struct file_operations mfd_file_ops = {
+	.release  =	mfd_release,
+	.ioctl    =	mfd_ioctl,
+};
+
+static void mfdfs_read_inode(struct inode *inode)
+{
+	inode->i_fop = &mfd_file_ops;
+	inode->i_mode = S_IFREG | S_IRUGO | S_IXUGO;
+	inode->i_nlink = 1;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+
+	inode->i_blocks = 0;
+	inode->i_blksize = 1024;
+}
+
+static struct dentry *get_mfd_dentry(struct vfsmount *mnt)
+{
+	struct mountfd *newmfd, *mfd;
+	struct dentry *dentry;
+	struct qstr qstr;
+	struct inode *inode;
+
+	/* create an new mfd before we lock */
+	newmfd = kmalloc(sizeof(*newmfd), GFP_KERNEL);
+	if (!newmfd)
+		return ERR_PTR(-ENOMEM);
+	if (!mfdfs_mnt->mnt_root)
+		return ERR_PTR(-ENOMEM);
+	/* TODO: how to make this context dependent in proc/<pid>/fd */
+	qstr.name = "DUMMY";
+	qstr.len  = 5;
+	newmfd->dentry = d_alloc(mfdfs_mnt->mnt_root, &qstr);
+	if (IS_ERR(newmfd->dentry)) {
+		struct dentry *err = newmfd->dentry;
+		kfree(newmfd);
+		return err;
+	}
+	inode = iget(mfdfs_mnt->mnt_sb, iunique(mfdfs_mnt->mnt_sb, MFDFS_ROOT_INO));
+	if (IS_ERR(inode)) {
+		int err = PTR_ERR(inode);
+		dput(newmfd->dentry);
+		kfree(newmfd);
+		return ERR_PTR(err);
+	}
+	d_add(newmfd->dentry, inode);
+
+	spin_lock(&mfd_lock);
+	list_for_each_entry(mfd, &mfd_list, list) {
+		if (mfd->mnt == mnt) {
+			dentry = dget(mfd->dentry);
+			atomic_inc(&mfd->count);
+			spin_unlock(&mfd_lock);
+
+			d_delete(newmfd->dentry);
+			kfree(newmfd);
+
+			return dentry;
+		}
+	}
+
+	newmfd->mnt = mntsoftget(mnt);
+	dentry = newmfd->dentry;
+	INIT_LIST_HEAD(&newmfd->list);
+	list_add(&newmfd->list, &mfd_list);
+	atomic_set(&newmfd->count, 1);
+	spin_unlock(&mfd_lock);
+
+	return dget(dentry);
+}
+
+static long open_mfd(struct vfsmount *mnt)
+{
+	struct file *file;
+	int error;
+	int fd;
+	
+	error = -ENOMEM;
+	file = get_empty_filp();
+	if (!file)
+		goto out;
+
+	error = -ENFILE;
+	fd = get_unused_fd();
+	if (fd < 0)
+		goto out_putfilp;
+
+	file->private_data = mnt;
+	file->f_dentry = get_mfd_dentry(mnt);
+	if (IS_ERR(file->f_dentry)) {
+		error = PTR_ERR(file->f_dentry);
+		goto out_putfd;
+	}
+	file->f_vfsmnt = mntget(mfdfs_mnt);
+
+	file->f_op    = &mfd_file_ops;
+	file->f_mode  = FMODE_READ | FMODE_WRITE;
+	file->f_flags = O_RDWR;
+	file->f_pos   = 0;
+
+	fd_install(fd, file);
+	return fd;
+
+out_putfd:
+	put_unused_fd(fd);
+out_putfilp:
+	put_filp(file);
+out:
+	return error;
+}
+
+static long mfd_getmfd(int dirfd)
+{
+	struct file *dir;
+	int error;
+
+	error = -EBADF;
+	dir = fget(dirfd);
+	if (!dir)
+		goto out;
+
+	error = -ENOTDIR;
+	if (!S_ISDIR(dir->f_dentry->d_inode->i_mode))
+		goto out_fput;
+
+	error = -EINVAL;
+	if (dir->f_vfsmnt->mnt_root != dir->f_dentry)
+		goto out_fput;
+
+	error = open_mfd(dir->f_vfsmnt);
+
+out_fput:
+	fput(dir);
+out:
+	return error;
+}
+
+long mfd_getdirfd(struct file *mountfilp)
+{
+	long error, dirfd;
+	struct file *filp;
+	struct vfsmount *mnt;
+
+	mnt = mntget(VFSMOUNT(mountfilp));
+
+	error = -ENFILE;
+	if ((dirfd = get_unused_fd()) < 0)
+		goto out_filp;
+	
+	filp = dentry_open(dget(mnt->mnt_root), mnt, O_DIRECTORY | O_RDONLY);
+	if (IS_ERR(filp)) {
+		error = PTR_ERR(filp);
+		goto out_filp;
+	}
+
+	error = dirfd;
+	fd_install(dirfd, filp);
+
+out_filp:
+	mntput(mnt);
+	return error;
+}
+
+static int mfd_ioctl(struct inode *inode, struct file *filp,
+		     unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case MOUNTFD_IOC_GETDIRFD:
+		return mfd_getdirfd(filp);
+	}
+	return -ENOTTY;
+}
+
+asmlinkage long sys_mountfd(int dirfd)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return mfd_getmfd(dirfd);
+}
Index: linux-2.6.9-quilt/fs/Makefile
===================================================================
--- linux-2.6.9-quilt.orig/fs/Makefile	2004-08-14 01:37:14.000000000 -0400
+++ linux-2.6.9-quilt/fs/Makefile	2004-10-22 17:17:40.736271288 -0400
@@ -10,7 +10,7 @@ obj-y :=	open.o read_write.o file_table.
 		namei.o fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \
 		dcache.o inode.o attr.o bad_inode.o file.o dnotify.o \
 		filesystems.o namespace.o seq_file.o xattr.o libfs.o \
-		fs-writeback.o mpage.o direct-io.o aio.o
+		fs-writeback.o mpage.o direct-io.o aio.o mountfd.o
 
 obj-$(CONFIG_EPOLL)		+= eventpoll.o
 obj-$(CONFIG_COMPAT)		+= compat.o
Index: linux-2.6.9-quilt/fs/namespace.c
===================================================================
--- linux-2.6.9-quilt.orig/fs/namespace.c	2004-10-22 17:17:40.187354736 -0400
+++ linux-2.6.9-quilt/fs/namespace.c	2004-10-22 17:17:40.738270984 -0400
@@ -34,6 +34,7 @@ static inline int sysfs_init(void)
 	return 0;
 }
 #endif
+extern void __init mfdfs_init(void);
 
 /* spinlock for vfsmount related operations, inplace of dcache_lock */
 spinlock_t vfsmount_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
@@ -1711,6 +1712,7 @@ void __init mnt_init(unsigned long mempa
 		d++;
 		i--;
 	} while (i);
+	mfdfs_init();
 	sysfs_init();
 	init_rootfs();
 	init_mount_tree();
Index: linux-2.6.9-quilt/Documentation/ioctl-number.txt
===================================================================
--- linux-2.6.9-quilt.orig/Documentation/ioctl-number.txt	2004-08-14 01:38:04.000000000 -0400
+++ linux-2.6.9-quilt/Documentation/ioctl-number.txt	2004-10-22 17:17:40.738270984 -0400
@@ -144,6 +144,7 @@ Code	Seq#	Include File		Comments
 'p'	40-7F	linux/nvram.h
 'p'	80-9F				user-space parport
 					<mailto:tim@cyberelk.net>
+'p'	A0-BF				mountpoint file descriptors
 'q'	00-1F	linux/videotext.h	conflict!
 'q'	80-FF				Internet PhoneJACK, Internet LineJACK
 					<http://www.quicknet.net>
Index: linux-2.6.9-quilt/include/linux/fs.h
===================================================================
--- linux-2.6.9-quilt.orig/include/linux/fs.h	2004-10-22 17:17:37.120820920 -0400
+++ linux-2.6.9-quilt/include/linux/fs.h	2004-10-22 17:17:40.739270832 -0400
@@ -214,6 +214,8 @@ extern int leases_enable, dir_notify_ena
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */
 #define FIGETBSZ   _IO(0x00,2)	/* get the block size used for bmap */
 
+#define MOUNTFD_IOC_GETDIRFD _IO('p', 0xa0)
+
 #ifdef __KERNEL__
 
 #include <linux/list.h>

  reply	other threads:[~2004-10-25 15:44 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-10-25 14:38 [PATCH 0/28] Autofs NG Patchset 0.2 Mike Waychison
2004-10-25 14:39 ` [PATCH 1/28] VFS: Unexport umount_tree Mike Waychison
2004-10-25 14:39   ` [PATCH 2/28] VFS: mnt_fslink -> mnt_expire Mike Waychison
2004-10-25 14:40     ` [PATCH 3/28] VFS: Move expiry into vfs Mike Waychison
2004-10-25 14:40       ` [PATCH 4/28] VFS: Stat shouldn't stop expire Mike Waychison
2004-10-25 14:41         ` [PATCH 5/28] VFS: Make expiry timeout configurable Mike Waychison
2004-10-25 14:41           ` [PATCH 6/28] VFS: Make expiry recursive Mike Waychison
2004-10-25 14:42             ` [PATCH 7/28] AFS: Update AFS to use new expiry interface Mike Waychison
2004-10-25 14:42               ` [PATCH 8/28] VFS: Remove MNT_EXPIRE support Mike Waychison
2004-10-25 14:43                 ` [PATCH 9/28] VFS: Give sane expiry semantics Mike Waychison
2004-10-25 14:43                   ` [PATCH 10/28] VFS: Move next_mnt() Mike Waychison
2004-10-25 14:44                     ` [PATCH 11/28] VFS: Allow for detachable subtrees Mike Waychison
2004-10-25 14:44                       ` [PATCH 12/28] VFS: Remove (now bogus) check_mnt Mike Waychison
2004-10-25 14:45                         ` [PATCH 13/28] VFS: Introduce soft reference counts Mike Waychison
2004-10-25 15:25                           ` Christoph Hellwig
2004-10-25 15:35                             ` Mike Waychison [this message]
2004-10-25 17:20                           ` Mika Penttilä
2004-10-25 17:25                             ` Mike Waychison
2004-10-25 17:25                               ` Mike Waychison
2004-10-25 17:52                               ` Mika Penttilä
2004-10-25 17:52                                 ` Mika Penttilä
2004-10-25 17:56                                 ` [PATCH 11/28] VFS: Allow for detachable subtrees (resend) Mike Waychison
2004-10-25 15:09                         ` [PATCH 12/28] VFS: Remove (now bogus) check_mnt Christoph Hellwig
2004-10-25 15:15                           ` Mike Waychison
2004-10-25 15:04                 ` [PATCH 8/28] VFS: Remove MNT_EXPIRE support Christoph Hellwig
2004-10-25 15:12                   ` Mike Waychison
2004-10-25 15:16                     ` Christoph Hellwig
2004-10-25 15:30                       ` Mike Waychison
2004-10-25 17:16                   ` Mike Waychison
2004-10-25 17:29                     ` Mike Waychison
2004-10-25 15:04             ` [PATCH 6/28] VFS: Make expiry recursive Christoph Hellwig
2004-10-26 10:27         ` [PATCH 4/28] VFS: Stat shouldn't stop expire Christoph Hellwig
2004-10-27 18:36           ` Mike Waychison
2004-10-25 14:59       ` [PATCH 3/28] VFS: Move expiry into vfs Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=417D1D51.8060901@sun.com \
    --to=michael.waychison@sun.com \
    --cc=hch@infradead.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=raven@themaw.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.