[PATCH -V3 3/5] vfs: Add open by file handle support

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
To: hch@infradead.org, viro@zeniv.linux.org.uk, adilger@sun.com,
	corbet@lwn.net
Cc: linux-fsdevel@vger.kernel.org, sfrench@us.ibm.com,
	"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Subject: [PATCH -V3 3/5] vfs: Add open by file handle support
Date: Thu, 22 Apr 2010 23:45:31 +0530	[thread overview]
Message-ID: <1271960133-16414-4-git-send-email-aneesh.kumar@linux.vnet.ibm.com> (raw)
In-Reply-To: <1271960133-16414-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/filesystems.c              |   32 ++++++++-
 fs/namei.c                    |   24 -------
 fs/namespace.c                |   38 ++++++++++
 fs/open.c                     |  154 +++++++++++++++++++++++++++++++++++++++++
 fs/pnode.c                    |    2 +-
 include/linux/mnt_namespace.h |    2 +
 include/linux/namei.h         |   24 +++++++
 7 files changed, 250 insertions(+), 26 deletions(-)

diff --git a/fs/filesystems.c b/fs/filesystems.c
index 68ba492..743d36e 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -281,5 +281,35 @@ struct file_system_type *get_fs_type(const char *name)
 	}
 	return fs;
 }
-
 EXPORT_SYMBOL(get_fs_type);
+
+struct super_block *fs_get_sb(struct uuid *fsid)
+{
+	struct uuid *this_fsid;
+	struct file_system_type *fs_type;
+	struct super_block *sb, *found_sb = NULL;
+
+	read_lock(&file_systems_lock);
+	fs_type = file_systems;
+	while (fs_type) {
+		spin_lock(&sb_lock);
+		list_for_each_entry(sb, &fs_type->fs_supers, s_instances) {
+			if (!sb->s_op->get_fsid)
+				continue;
+			this_fsid = sb->s_op->get_fsid(sb);
+			if (!memcmp(fsid->uuid, this_fsid->uuid,
+					sizeof(this_fsid->uuid))) {
+				/* found the matching super_block */
+				atomic_inc(&sb->s_active);
+				found_sb = sb;
+				spin_unlock(&sb_lock);
+				goto out;
+			}
+		}
+		spin_unlock(&sb_lock);
+		fs_type = fs_type->next;
+	}
+out:
+	read_unlock(&file_systems_lock);
+	return found_sb;
+}
diff --git a/fs/namei.c b/fs/namei.c
index a7dce91..a18711e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1521,30 +1521,6 @@ out_unlock:
 	return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
 }
 
-/*
- * Note that while the flag value (low two bits) for sys_open means:
- *	00 - read-only
- *	01 - write-only
- *	10 - read-write
- *	11 - special
- * it is changed into
- *	00 - no permissions needed
- *	01 - read-permission
- *	10 - write-permission
- *	11 - read-write
- * for the internal routines (ie open_namei()/follow_link() etc)
- * This is more logical, and also allows the 00 "no perm needed"
- * to be used for symlinks (where the permissions are checked
- * later).
- *
-*/
-static inline int open_to_namei_flags(int flag)
-{
-	if ((flag+1) & O_ACCMODE)
-		flag++;
-	return flag;
-}
-
 static int open_will_truncate(int flag, struct inode *inode)
 {
 	/*
diff --git a/fs/namespace.c b/fs/namespace.c
index 8174c8a..6168526 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2364,3 +2364,41 @@ void put_mnt_ns(struct mnt_namespace *ns)
 	kfree(ns);
 }
 EXPORT_SYMBOL(put_mnt_ns);
+
+/*
+ * Get any vfsmount mapping the superblock in the
+ * task namespace
+ */
+struct vfsmount *fs_get_vfsmount(struct task_struct *task,
+				struct super_block *sb)
+{
+	struct nsproxy *nsp;
+	struct list_head *mount_list;
+	struct mnt_namespace *ns = NULL;
+	struct vfsmount *mnt, *sb_mnt = NULL;
+
+	rcu_read_lock();
+	nsp = task_nsproxy(task);
+	if (nsp) {
+		ns = nsp->mnt_ns;
+		if (ns)
+			get_mnt_ns(ns);
+	}
+	rcu_read_unlock();
+	if (!ns)
+		return NULL;
+	down_read(&namespace_sem);
+	list_for_each(mount_list, &ns->list) {
+		mnt = list_entry(mount_list, struct vfsmount, mnt_list);
+		if (mnt->mnt_sb == sb) {
+			/* found the matching super block */
+			sb_mnt = mnt;
+			mntget(sb_mnt);
+			break;
+		}
+	}
+	up_read(&namespace_sem);
+
+	put_mnt_ns(ns);
+	return sb_mnt;
+}
diff --git a/fs/open.c b/fs/open.c
index 2d9a92b..4e8a8f4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1301,3 +1301,157 @@ err_out:
 	asmlinkage_protect(2, ret, name, handle);
 	return ret;
 }
+
+static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
+{
+	return 1;
+}
+
+static struct dentry *handle_to_dentry(struct vfsmount *mnt,
+				struct file_handle *fh)
+{
+	int retval = 0;
+	int handle_size;
+	void *handle = NULL;
+	struct dentry *dentry;
+
+	if ((fh->handle_size > MAX_HANDLE_SZ) ||
+		(fh->handle_size <= 0)) {
+		retval = -EINVAL;
+		goto err_out;
+	}
+	handle = kmalloc(fh->handle_size, GFP_KERNEL);
+	if (!handle) {
+		retval =  -ENOMEM;
+		goto err_out;
+	}
+	if (copy_from_user(handle, fh->f_handle, fh->handle_size)) {
+		retval = -EFAULT;
+		goto err_out;
+	}
+	/* change the handle size to multiple of sizeof(u32) */
+	handle_size = fh->handle_size >> 2;
+	dentry = exportfs_decode_fh(mnt, (struct fid *)handle,
+					handle_size, fh->handle_type,
+					vfs_dentry_acceptable, NULL);
+	kfree(handle);
+	return dentry;
+
+err_out:
+	kfree(handle);
+	return ERR_PTR(retval);
+}
+
+long do_sys_open_by_handle(struct file_handle *fh, int flags)
+{
+	int fd;
+	int retval = 0;
+	int d_flags  = flags;
+	struct file *filp;
+	struct vfsmount *mnt;
+	struct inode *inode;
+	struct dentry *dentry;
+	struct super_block *sb;
+
+	if (!capable(CAP_DAC_OVERRIDE))
+		return -EPERM;
+
+	sb = fs_get_sb(&fh->fsid);
+	if (!sb)
+		return -ESTALE;
+	/*
+	 * Find the vfsmount for this superblock in the
+	 * current namespace
+	 */
+	mnt = fs_get_vfsmount(current, sb);
+	if (!mnt) {
+		deactivate_super(sb);
+		return -ESTALE;
+	}
+
+	dentry = handle_to_dentry(mnt, fh);
+	if (IS_ERR(dentry)) {
+		mntput(mnt);
+		deactivate_super(sb);
+		return PTR_ERR(dentry);
+	}
+
+	inode = dentry->d_inode;
+	/* Restrict open_by_handle to directories & regular files. */
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
+		retval = -EINVAL;
+		goto err_out;
+	}
+
+	flags  = open_to_namei_flags(flags);
+	/* O_TRUNC implies we need access checks for write permissions */
+	if (flags & O_TRUNC)
+		flags |= MAY_WRITE;
+
+	if ((!(flags & O_APPEND) || (flags & O_TRUNC)) &&
+		(flags & FMODE_WRITE) && IS_APPEND(inode)) {
+		retval = -EPERM;
+		goto err_out;
+	}
+
+	if ((flags & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
+		retval = -EACCES;
+		goto err_out;
+	}
+
+	/* Can't write directories. */
+	if (S_ISDIR(inode->i_mode) && (flags & FMODE_WRITE)) {
+		retval = -EISDIR;
+		goto err_out;
+	}
+
+	fd = get_unused_fd();
+	if (fd < 0) {
+		retval = fd;
+		goto err_out;
+	}
+
+	filp = dentry_open(dentry, mntget(mnt),
+			d_flags, current_cred());
+	if (IS_ERR(filp)) {
+		put_unused_fd(fd);
+		retval =  PTR_ERR(filp);
+		goto err_out;
+	}
+
+	if (inode->i_mode & S_IFREG) {
+		filp->f_flags |= O_NOATIME;
+		filp->f_mode |= FMODE_NOCMTIME;
+	}
+	fsnotify_open(filp->f_path.dentry);
+	fd_install(fd, filp);
+	mntput(mnt);
+	deactivate_super(sb);
+	return fd;
+
+err_out:
+	mntput(mnt);
+	deactivate_super(sb);
+	dput(dentry);
+	return retval;
+}
+
+SYSCALL_DEFINE2(open_by_handle_at, struct file_handle __user *, handle,
+		int, flags)
+{
+	long ret;
+	struct file_handle f_handle;
+
+	if (force_o_largefile())
+		flags |= O_LARGEFILE;
+
+	if (copy_from_user(&f_handle, handle, sizeof(struct file_handle))) {
+		ret = -EFAULT;
+		goto err_out;
+	}
+	ret = do_sys_open_by_handle(&f_handle, flags);
+err_out:
+	/* avoid REGPARM breakage on x86: */
+	asmlinkage_protect(2, ret, handle, flags);
+	return ret;
+}
diff --git a/fs/pnode.c b/fs/pnode.c
index 5cc564a..9f6d12d 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -6,9 +6,9 @@
  *	Author : Ram Pai (linuxram@us.ibm.com)
  *
  */
+#include <linux/fs.h>
 #include <linux/mnt_namespace.h>
 #include <linux/mount.h>
-#include <linux/fs.h>
 #include "internal.h"
 #include "pnode.h"
 
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 0b89efc..d363ecc 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -36,6 +36,8 @@ extern const struct seq_operations mounts_op;
 extern const struct seq_operations mountinfo_op;
 extern const struct seq_operations mountstats_op;
 extern int mnt_had_events(struct proc_mounts *);
+extern struct vfsmount *fs_get_vfsmount(struct task_struct *task,
+					struct super_block *sb);
 
 #endif
 #endif
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 05b441d..a853aa0 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -4,6 +4,7 @@
 #include <linux/dcache.h>
 #include <linux/linkage.h>
 #include <linux/path.h>
+#include <asm-generic/fcntl.h>
 
 struct vfsmount;
 
@@ -96,4 +97,27 @@ static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
 	((char *) name)[min(len, maxlen)] = '\0';
 }
 
+/*
+ * Note that while the flag value (low two bits) for sys_open means:
+ *	00 - read-only
+ *	01 - write-only
+ *	10 - read-write
+ *	11 - special
+ * it is changed into
+ *	00 - no permissions needed
+ *	01 - read-permission
+ *	10 - write-permission
+ *	11 - read-write
+ * for the internal routines (ie open_namei()/follow_link() etc)
+ * This is more logical, and also allows the 00 "no perm needed"
+ * to be used for symlinks (where the permissions are checked
+ * later).
+ *
+*/
+static inline int open_to_namei_flags(int flag)
+{
+	if ((flag+1) & O_ACCMODE)
+		flag++;
+	return flag;
+}
 #endif /* _LINUX_NAMEI_H */
-- 
1.7.0.4.360.g11766c

next prev parent reply	other threads:[~2010-04-22 18:15 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-04-22 18:15 [PATCH -V3] Generic name to handle and open by handle syscalls Aneesh Kumar K.V
2010-04-22 18:15 ` [PATCH -V3 1/5] exportfs: Return the minimum required handle size Aneesh Kumar K.V
2010-04-22 18:15 ` [PATCH -V3 2/5] vfs: Add name to file handle conversion support Aneesh Kumar K.V
2010-04-22 18:15 ` Aneesh Kumar K.V [this message]
2010-04-22 19:22   ` [PATCH -V3 3/5] vfs: Add open by file handle support Andreas Dilger
2010-04-23 11:40     ` Aneesh Kumar K. V
2010-04-22 18:15 ` [PATCH -V3 4/5] x86: Add new syscalls for x86_32 Aneesh Kumar K.V
2010-04-22 18:15 ` [PATCH -V3 5/5] ext4: Add get_fsid callback Aneesh Kumar K.V
2010-04-22 19:07 ` [PATCH -V3] Generic name to handle and open by handle syscalls Andreas Dilger
2010-04-22 22:49 ` Serge E. Hallyn
2010-04-23 11:45   ` Aneesh Kumar K. V
2010-04-23 13:49     ` Serge E. Hallyn
2010-04-23 13:23 ` Theodore Tso
2010-04-24  0:19   ` Andreas Dilger
2010-04-24  1:08     ` Neil Brown
2010-04-25 18:21       ` Aneesh Kumar K. V
2010-04-26  9:56       ` Christoph Hellwig
2010-04-26 10:16         ` Neil Brown
2010-04-26 10:28           ` Christoph Hellwig
2010-04-26 11:16             ` Neil Brown
2010-04-26 14:53               ` Theodore Tso
2010-04-26 14:56                 ` Christoph Hellwig
2010-04-25 18:07     ` Aneesh Kumar K. V

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:68ba492 dfblob:743d36e dfblob:a7dce91 dfblob:a18711e
dfblob:8174c8a dfblob:6168526 dfblob:2d9a92b dfblob:4e8a8f4
dfblob:5cc564a dfblob:9f6d12d dfblob:0b89efc dfblob:d363ecc
dfblob:05b441d dfblob:a853aa0 )
 OR (
bs:"[PATCH -V3 3/5] vfs: Add open by file handle support" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1271960133-16414-4-git-send-email-aneesh.kumar@linux.vnet.ibm.com \
    --to=aneesh.kumar@linux.vnet.ibm.com \
    --cc=adilger@sun.com \
    --cc=corbet@lwn.net \
    --cc=hch@infradead.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=sfrench@us.ibm.com \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.