[PATCH -V21 03/12] vfs: Add open by file handle support

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
To: hch@infradead.org, viro@zeniv.linux.org.uk, adilger@sun.com,
	corbet@lwn.net, neilb@suse.de, npiggin@kernel.dk,
	hooanon05@yahoo.co.jp, bfields@fieldses.org, miklos@szeredi.hu
Cc: linux-fsdevel@vger.kernel.org, sfrench@us.ibm.com,
	philippe.deniel@CEA.FR, linux-kernel@vger.kernel.org,
	"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Subject: [PATCH -V21 03/12] vfs: Add open by file handle support
Date: Tue,  5 Oct 2010 15:52:11 +0530	[thread overview]
Message-ID: <1286274140-26533-4-git-send-email-aneesh.kumar@linux.vnet.ibm.com> (raw)
In-Reply-To: <1286274140-26533-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/compat.c              |   11 +++
 fs/exportfs/expfs.c      |    2 +
 fs/namei.c               |  223 +++++++++++++++++++++++++++++++++++++++++++---
 fs/open.c                |   32 ++++++-
 include/linux/fs.h       |   10 ++-
 include/linux/namei.h    |    1 +
 include/linux/syscalls.h |    3 +
 7 files changed, 263 insertions(+), 19 deletions(-)

diff --git a/fs/compat.c b/fs/compat.c
index 0644a15..4a423fa 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -2334,3 +2334,14 @@ asmlinkage long compat_sys_timerfd_gettime(int ufd,
 }
 
 #endif /* CONFIG_TIMERFD */
+
+/*
+ * Exactly like fs/open.c:sys_open_by_handle_at(), except that it
+ * doesn't set the O_LARGEFILE flag.
+ */
+asmlinkage long
+compat_sys_open_by_handle_at(int mountdirfd,
+			     struct file_handle __user *handle, int flags)
+{
+	return do_handle_open(mountdirfd, handle, flags);
+}
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index cfee0f0..05a1179 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -373,6 +373,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
 	/*
 	 * Try to get any dentry for the given file handle from the filesystem.
 	 */
+	if (!nop || !nop->fh_to_dentry)
+		return ERR_PTR(-ESTALE);
 	result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
 	if (!result)
 		result = ERR_PTR(-ESTALE);
diff --git a/fs/namei.c b/fs/namei.c
index 24896e8..c52a9d5 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -32,6 +32,7 @@
 #include <linux/fcntl.h>
 #include <linux/device_cgroup.h>
 #include <linux/fs_struct.h>
+#include <linux/exportfs.h>
 #include <asm/uaccess.h>
 
 #include "internal.h"
@@ -1050,6 +1051,29 @@ out_fail:
 	return retval;
 }
 
+struct vfsmount *get_vfsmount_from_fd(int fd)
+{
+	int fput_needed;
+	struct path path;
+	struct file *filep;
+
+	if (fd == AT_FDCWD) {
+		struct fs_struct *fs = current->fs;
+		spin_lock(&fs->lock);
+		path = fs->pwd;
+		mntget(path.mnt);
+		spin_unlock(&fs->lock);
+	} else {
+		filep = fget_light(fd, &fput_needed);
+		if (!filep)
+			return ERR_PTR(-EBADF);
+		path = filep->f_path;
+		mntget(path.mnt);
+		fput_light(filep, fput_needed);
+	}
+	return path.mnt;
+}
+
 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
 static int do_path_lookup(int dfd, const char *name,
 				unsigned int flags, struct nameidata *nd)
@@ -1537,26 +1561,30 @@ static int open_will_truncate(int flag, struct inode *inode)
 	return (flag & O_TRUNC);
 }
 
-static struct file *finish_open(struct nameidata *nd,
+static struct file *finish_open(struct file *filp, struct path *path,
 				int open_flag, int acc_mode)
 {
-	struct file *filp;
-	int will_truncate;
 	int error;
+	int will_truncate;
 
-	will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
+	will_truncate = open_will_truncate(open_flag, path->dentry->d_inode);
 	if (will_truncate) {
-		error = mnt_want_write(nd->path.mnt);
+		error = mnt_want_write(path->mnt);
 		if (error)
 			goto exit;
 	}
-	error = may_open(&nd->path, acc_mode, open_flag);
+	error = may_open(path, acc_mode, open_flag);
 	if (error) {
 		if (will_truncate)
-			mnt_drop_write(nd->path.mnt);
+			mnt_drop_write(path->mnt);
 		goto exit;
 	}
-	filp = nameidata_to_filp(nd);
+	/* Has the filesystem initialised the file for us? */
+	if (filp->f_path.dentry == NULL)
+		filp = __dentry_open(path->dentry, path->mnt, filp,
+				     NULL, current_cred());
+	else
+		path_put(path);
 	if (!IS_ERR(filp)) {
 		error = ima_file_check(filp, acc_mode);
 		if (error) {
@@ -1566,7 +1594,7 @@ static struct file *finish_open(struct nameidata *nd,
 	}
 	if (!IS_ERR(filp)) {
 		if (will_truncate) {
-			error = handle_truncate(&nd->path);
+			error = handle_truncate(path);
 			if (error) {
 				fput(filp);
 				filp = ERR_PTR(error);
@@ -1579,13 +1607,17 @@ static struct file *finish_open(struct nameidata *nd,
 	 * on its behalf.
 	 */
 	if (will_truncate)
-		mnt_drop_write(nd->path.mnt);
+		mnt_drop_write(path->mnt);
 	return filp;
 
 exit:
-	if (!IS_ERR(nd->intent.open.file))
-		release_open_intent(nd);
-	path_put(&nd->path);
+	if (!IS_ERR(filp)) {
+		if (filp->f_path.dentry == NULL)
+			put_filp(filp);
+		else
+			fput(filp);
+	}
+	path_put(path);
 	return ERR_PTR(error);
 }
 
@@ -1719,7 +1751,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	if (S_ISDIR(path->dentry->d_inode->i_mode))
 		goto exit;
 ok:
-	filp = finish_open(nd, open_flag, acc_mode);
+	filp = finish_open(nd->intent.open.file, &nd->path,
+			   open_flag, acc_mode);
+
 	return filp;
 
 exit_mutex_unlock:
@@ -1892,6 +1926,167 @@ struct file *filp_open(const char *filename, int flags, int mode)
 }
 EXPORT_SYMBOL(filp_open);
 
+#ifdef CONFIG_EXPORTFS
+static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
+{
+	return 1;
+}
+
+static int do_handle_to_path(int mountdirfd, struct file_handle *handle,
+			     struct path *path)
+{
+	int retval = 0;
+	int handle_dwords;
+
+	path->mnt = get_vfsmount_from_fd(mountdirfd);
+	if (IS_ERR(path->mnt)) {
+		retval = PTR_ERR(path->mnt);
+		goto out_err;
+	}
+	/* change the handle size to multiple of sizeof(u32) */
+	handle_dwords = handle->handle_bytes >> 2;
+	path->dentry = exportfs_decode_fh(path->mnt,
+					  (struct fid *)handle->f_handle,
+					  handle_dwords, handle->handle_type,
+					  vfs_dentry_acceptable, NULL);
+	if (IS_ERR(path->dentry)) {
+		retval = PTR_ERR(path->dentry);
+		goto out_mnt;
+	}
+	return 0;
+out_mnt:
+	mntput(path->mnt);
+out_err:
+	return retval;
+}
+
+int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
+		   struct path *path)
+{
+	int retval = 0;
+	struct file_handle f_handle;
+	struct file_handle *handle = NULL;
+
+	/*
+	 * With handle we don't look at the execute bit on the
+	 * the directory. Ideally we would like CAP_DAC_SEARCH.
+	 * But we don't have that
+	 */
+	if (!capable(CAP_DAC_READ_SEARCH)) {
+		retval = -EPERM;
+		goto out_err;
+	}
+	if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) {
+		retval = -EFAULT;
+		goto out_err;
+	}
+	if ((f_handle.handle_bytes > MAX_HANDLE_SZ) ||
+	    (f_handle.handle_bytes == 0)) {
+		retval = -EINVAL;
+		goto out_err;
+	}
+	handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
+			 GFP_KERNEL);
+	if (!handle) {
+		retval = -ENOMEM;
+		goto out_err;
+	}
+	/* copy the full handle */
+	if (copy_from_user(handle, ufh,
+			   sizeof(struct file_handle) +
+			   f_handle.handle_bytes)) {
+		retval = -EFAULT;
+		goto out_handle;
+	}
+
+	retval = do_handle_to_path(mountdirfd, handle, path);
+
+out_handle:
+	kfree(handle);
+out_err:
+	return retval;
+}
+#else
+int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
+		   struct path *path)
+{
+	return -ENOSYS;
+}
+#endif
+
+long do_handle_open(int mountdirfd,
+		    struct file_handle __user *ufh, int open_flag)
+{
+	long retval = 0;
+	int fd, acc_mode;
+	struct path path;
+	struct file *filp;
+
+	/* can't use O_CREATE with open_by_handle */
+	if (open_flag & O_CREAT) {
+		retval = -EINVAL;
+		goto out_err;
+	}
+	retval = handle_to_path(mountdirfd, ufh, &path);
+	if (retval)
+		goto out_err;
+
+	if ((open_flag & O_DIRECTORY) &&
+	    !S_ISDIR(path.dentry->d_inode->i_mode)) {
+		retval = -ENOTDIR;
+		goto out_path;
+	}
+	/*
+	 * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only
+	 * check for O_DSYNC if the need any syncing at all we enforce it's
+	 * always set instead of having to deal with possibly weird behaviour
+	 * for malicious applications setting only __O_SYNC.
+	 */
+	if (open_flag & __O_SYNC)
+		open_flag |= O_DSYNC;
+
+	acc_mode = MAY_OPEN | ACC_MODE(open_flag);
+
+	/* O_TRUNC implies we need access checks for write permissions */
+	if (open_flag & O_TRUNC)
+		acc_mode |= MAY_WRITE;
+	/*
+	 * Allow the LSM permission hook to distinguish append
+	 * access from general write access.
+	 */
+	if (open_flag & O_APPEND)
+		acc_mode |= MAY_APPEND;
+
+	fd = get_unused_fd_flags(open_flag);
+	if (fd < 0) {
+		retval = fd;
+		goto out_path;
+	}
+	filp = get_empty_filp();
+	if (!filp) {
+		retval = -ENFILE;
+		goto out_free_fd;
+	}
+	filp->f_flags = open_flag;
+	filp = finish_open(filp, &path, open_flag, acc_mode);
+	if (IS_ERR(filp)) {
+		put_unused_fd(fd);
+		retval =  PTR_ERR(filp);
+	} else {
+		retval = fd;
+		fsnotify_open(filp);
+		fd_install(fd, filp);
+	}
+	return retval;
+
+out_free_fd:
+	put_unused_fd(fd);
+out_path:
+	path_put(&path);
+out_err:
+	return retval;
+}
+
 /**
  * lookup_create - lookup a dentry, creating it if it doesn't exist
  * @nd: nameidata info
diff --git a/fs/open.c b/fs/open.c
index 0475a35..5447d4d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -652,10 +652,10 @@ static inline int __get_file_write_access(struct inode *inode,
 	return error;
 }
 
-static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
-					struct file *f,
-					int (*open)(struct inode *, struct file *),
-					const struct cred *cred)
+struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
+			   struct file *f,
+			   int (*open)(struct inode *, struct file *),
+			   const struct cred *cred)
 {
 	struct inode *inode;
 	int error;
@@ -1177,3 +1177,27 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
 	return -ENOSYS;
 }
 #endif
+
+/**
+ * sys_open_by_handle_at: Open the file handle
+ * @mountdirfd: directory file descriptor
+ * @handle: file handle to be opened
+ * @flag: open flags.
+ *
+ * @mountdirfd indicate the directory file descriptor
+ * of the mount point. file handle is decoded relative
+ * to the vfsmount pointed by the @mountdirfd. @flags
+ * value is same as the open(2) flags.
+ */
+SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
+		struct file_handle __user *, handle,
+		int, flags)
+{
+	long ret;
+
+	if (force_o_largefile())
+		flags |= O_LARGEFILE;
+
+	ret = do_handle_open(mountdirfd, handle, flags);
+	return ret;
+}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1c7c6ca..4421f15 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1937,6 +1937,10 @@ extern int do_fallocate(struct file *file, int mode, loff_t offset,
 extern long do_sys_open(int dfd, const char __user *filename, int flags,
 			int mode);
 extern struct file *filp_open(const char *, int, int);
+struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
+			   struct file *f,
+			   int (*open)(struct inode *, struct file *),
+			   const struct cred *cred);
 extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
 				 const struct cred *);
 extern int filp_close(struct file *, fl_owner_t id);
@@ -2148,11 +2152,15 @@ extern void free_write_pipe(struct file *);
 
 extern struct file *do_filp_open(int dfd, const char *pathname,
 		int open_flag, int mode, int acc_mode);
+extern int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
+			  struct path *path);
+extern long do_handle_open(int mountdirfd,
+			   struct file_handle __user *ufh, int open_flag);
 extern int may_open(struct path *, int, int);
 
 extern int kernel_read(struct file *, loff_t, char *, unsigned long);
 extern struct file * open_exec(const char *);
- 
+
 /* fs/dcache.c -- generic fs support functions */
 extern int is_subdir(struct dentry *, struct dentry *);
 extern int path_is_under(struct path *, struct path *);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 05b441d..827aef0 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -64,6 +64,7 @@ extern int user_path_at(int, const char __user *, unsigned, struct path *);
 #define user_path_dir(name, path) \
 	user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, path)
 
+extern struct vfsmount *get_vfsmount_from_fd(int);
 extern int kern_path(const char *, unsigned, struct path *);
 
 extern int path_lookup(const char *, unsigned, struct nameidata *);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 6ab4d07..89a0ade 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -835,4 +835,7 @@ asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg);
 asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name,
 				      struct file_handle __user *handle,
 				      int __user *mnt_id, int flag);
+asmlinkage long sys_open_by_handle_at(int mountdirfd,
+				      struct file_handle __user *handle,
+				      int flags);
 #endif
-- 
1.7.0.4

next prev parent reply	other threads:[~2010-10-05 10:22 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-10-05 10:22 [PATCH -V21 00/12] Generic name to handle and open by handle syscalls Aneesh Kumar K.V
2010-10-05 10:22 ` [PATCH -V21 01/12] exportfs: Return the minimum required handle size Aneesh Kumar K.V
2010-10-05 10:22 ` [PATCH -V21 02/12] vfs: Add name to file handle conversion support Aneesh Kumar K.V
2010-10-05 10:22 ` Aneesh Kumar K.V [this message]
2010-10-05 10:22 ` [PATCH -V21 04/12] vfs: Add handle based readlink syscall Aneesh Kumar K.V
2010-10-05 10:22 ` [PATCH -V21 05/12] vfs: Add handle based stat syscall Aneesh Kumar K.V
2010-10-05 10:22 ` [PATCH -V21 06/12] vfs: Add handle based link syscall Aneesh Kumar K.V
2010-10-05 10:22 ` [PATCH -V21 07/12] x86: Add new syscalls for x86_32 Aneesh Kumar K.V
2010-10-05 10:22 ` [PATCH -V21 08/12] x86: Add new syscalls for x86_64 Aneesh Kumar K.V
2010-10-05 10:22 ` [PATCH -V21 09/12] unistd.h: Add new syscalls numbers to asm-generic Aneesh Kumar K.V
2010-10-05 10:22 ` [PATCH -V21 10/12] vfs: Export file system uuid via /proc/<pid>/mountinfo Aneesh Kumar K.V
2010-10-05 10:22 ` [PATCH -V21 11/12] ext3: Copy fs UUID to superblock Aneesh Kumar K.V
2010-10-05 10:22 ` [PATCH -V21 12/12] ext4: " Aneesh Kumar K.V
2010-10-11 23:34 ` [PATCH -V21 00/12] Generic name to handle and open by handle syscalls J. Bruce Fields
2010-10-24 15:03 ` Aneesh Kumar K. V

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:0644a15 dfblob:4a423fa dfblob:cfee0f0 dfblob:05a1179
dfblob:24896e8 dfblob:c52a9d5 dfblob:0475a35 dfblob:5447d4d
dfblob:1c7c6ca dfblob:4421f15 dfblob:05b441d dfblob:827aef0
dfblob:6ab4d07 dfblob:89a0ade )
 OR (
bs:"[PATCH -V21 03/12] vfs: Add open by file handle support" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1286274140-26533-4-git-send-email-aneesh.kumar@linux.vnet.ibm.com \
    --to=aneesh.kumar@linux.vnet.ibm.com \
    --cc=adilger@sun.com \
    --cc=bfields@fieldses.org \
    --cc=corbet@lwn.net \
    --cc=hch@infradead.org \
    --cc=hooanon05@yahoo.co.jp \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    --cc=neilb@suse.de \
    --cc=npiggin@kernel.dk \
    --cc=philippe.deniel@CEA.FR \
    --cc=sfrench@us.ibm.com \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.