linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
To: hch@infradead.org, viro@zeniv.linux.org.uk, adilger@sun.com,
	corbet@lwn.net, serue@us.ibm.com
Cc: linux-fsdevel@vger.kernel.org, sfrench@us.ibm.com,
	"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Subject: [PATCH -V4 3/6] vfs: Add open by file handle support
Date: Fri, 23 Apr 2010 17:08:32 +0530	[thread overview]
Message-ID: <1272022715-11716-4-git-send-email-aneesh.kumar@linux.vnet.ibm.com> (raw)
In-Reply-To: <1272022715-11716-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/filesystems.c              |   32 ++++++++-
 fs/namei.c                    |   24 ------
 fs/namespace.c                |   38 ++++++++++
 fs/open.c                     |  155 +++++++++++++++++++++++++++++++++++++++++
 fs/pnode.c                    |    2 +-
 include/linux/fs.h            |    1 +
 include/linux/mnt_namespace.h |    2 +
 include/linux/namei.h         |   24 ++++++
 8 files changed, 252 insertions(+), 26 deletions(-)

diff --git a/fs/filesystems.c b/fs/filesystems.c
index 68ba492..743d36e 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -281,5 +281,35 @@ struct file_system_type *get_fs_type(const char *name)
 	}
 	return fs;
 }
-
 EXPORT_SYMBOL(get_fs_type);
+
+struct super_block *fs_get_sb(struct uuid *fsid)
+{
+	struct uuid *this_fsid;
+	struct file_system_type *fs_type;
+	struct super_block *sb, *found_sb = NULL;
+
+	read_lock(&file_systems_lock);
+	fs_type = file_systems;
+	while (fs_type) {
+		spin_lock(&sb_lock);
+		list_for_each_entry(sb, &fs_type->fs_supers, s_instances) {
+			if (!sb->s_op->get_fsid)
+				continue;
+			this_fsid = sb->s_op->get_fsid(sb);
+			if (!memcmp(fsid->uuid, this_fsid->uuid,
+					sizeof(this_fsid->uuid))) {
+				/* found the matching super_block */
+				atomic_inc(&sb->s_active);
+				found_sb = sb;
+				spin_unlock(&sb_lock);
+				goto out;
+			}
+		}
+		spin_unlock(&sb_lock);
+		fs_type = fs_type->next;
+	}
+out:
+	read_unlock(&file_systems_lock);
+	return found_sb;
+}
diff --git a/fs/namei.c b/fs/namei.c
index a7dce91..a18711e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1521,30 +1521,6 @@ out_unlock:
 	return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
 }
 
-/*
- * Note that while the flag value (low two bits) for sys_open means:
- *	00 - read-only
- *	01 - write-only
- *	10 - read-write
- *	11 - special
- * it is changed into
- *	00 - no permissions needed
- *	01 - read-permission
- *	10 - write-permission
- *	11 - read-write
- * for the internal routines (ie open_namei()/follow_link() etc)
- * This is more logical, and also allows the 00 "no perm needed"
- * to be used for symlinks (where the permissions are checked
- * later).
- *
-*/
-static inline int open_to_namei_flags(int flag)
-{
-	if ((flag+1) & O_ACCMODE)
-		flag++;
-	return flag;
-}
-
 static int open_will_truncate(int flag, struct inode *inode)
 {
 	/*
diff --git a/fs/namespace.c b/fs/namespace.c
index 8174c8a..6168526 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2364,3 +2364,41 @@ void put_mnt_ns(struct mnt_namespace *ns)
 	kfree(ns);
 }
 EXPORT_SYMBOL(put_mnt_ns);
+
+/*
+ * Get any vfsmount mapping the superblock in the
+ * task namespace
+ */
+struct vfsmount *fs_get_vfsmount(struct task_struct *task,
+				struct super_block *sb)
+{
+	struct nsproxy *nsp;
+	struct list_head *mount_list;
+	struct mnt_namespace *ns = NULL;
+	struct vfsmount *mnt, *sb_mnt = NULL;
+
+	rcu_read_lock();
+	nsp = task_nsproxy(task);
+	if (nsp) {
+		ns = nsp->mnt_ns;
+		if (ns)
+			get_mnt_ns(ns);
+	}
+	rcu_read_unlock();
+	if (!ns)
+		return NULL;
+	down_read(&namespace_sem);
+	list_for_each(mount_list, &ns->list) {
+		mnt = list_entry(mount_list, struct vfsmount, mnt_list);
+		if (mnt->mnt_sb == sb) {
+			/* found the matching super block */
+			sb_mnt = mnt;
+			mntget(sb_mnt);
+			break;
+		}
+	}
+	up_read(&namespace_sem);
+
+	put_mnt_ns(ns);
+	return sb_mnt;
+}
diff --git a/fs/open.c b/fs/open.c
index dd27cbd..ca09c2d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1301,3 +1301,158 @@ err_out:
 	asmlinkage_protect(2, ret, name, handle);
 	return ret;
 }
+
+static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
+{
+	return 1;
+}
+
+static struct dentry *handle_to_dentry(struct vfsmount *mnt,
+				struct file_handle *fh)
+{
+	int retval = 0;
+	int handle_size;
+	void *handle = NULL;
+	struct dentry *dentry;
+
+	if ((fh->handle_size > MAX_HANDLE_SZ) ||
+		(fh->handle_size <= 0)) {
+		retval = -EINVAL;
+		goto err_out;
+	}
+	handle = kmalloc(fh->handle_size, GFP_KERNEL);
+	if (!handle) {
+		retval =  -ENOMEM;
+		goto err_out;
+	}
+	if (copy_from_user(handle, fh->f_handle, fh->handle_size)) {
+		retval = -EFAULT;
+		goto err_out;
+	}
+	/* change the handle size to multiple of sizeof(u32) */
+	handle_size = fh->handle_size >> 2;
+	dentry = exportfs_decode_fh(mnt, (struct fid *)handle,
+					handle_size, fh->handle_type,
+					vfs_dentry_acceptable, NULL);
+	kfree(handle);
+	return dentry;
+
+err_out:
+	kfree(handle);
+	return ERR_PTR(retval);
+}
+
+long do_sys_open_by_handle(struct file_handle *fh, int flags)
+{
+	int fd;
+	int retval = 0;
+	int d_flags  = flags;
+	struct file *filp;
+	struct vfsmount *mnt;
+	struct inode *inode;
+	struct dentry *dentry;
+	struct super_block *sb;
+
+	if (!capable(CAP_DAC_OVERRIDE))
+		return -EPERM;
+
+	sb = fs_get_sb(&fh->fsid);
+	if (!sb)
+		return -ESTALE;
+	/*
+	 * Find the vfsmount for this superblock in the
+	 * current namespace
+	 */
+	mnt = fs_get_vfsmount(current, sb);
+	if (!mnt) {
+		retval = -ESTALE;
+		goto out_sb;
+	}
+
+	dentry = handle_to_dentry(mnt, fh);
+	if (IS_ERR(dentry)) {
+		retval = PTR_ERR(dentry);
+		goto out_mnt;
+	}
+
+	inode = dentry->d_inode;
+	/* Restrict open_by_handle to directories & regular files. */
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
+		retval = -EINVAL;
+		goto out_err;
+	}
+
+	flags  = open_to_namei_flags(flags);
+	/* O_TRUNC implies we need access checks for write permissions */
+	if (flags & O_TRUNC)
+		flags |= MAY_WRITE;
+
+	if ((!(flags & O_APPEND) || (flags & O_TRUNC)) &&
+		(flags & FMODE_WRITE) && IS_APPEND(inode)) {
+		retval = -EPERM;
+		goto out_err;
+	}
+
+	if ((flags & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
+		retval = -EACCES;
+		goto out_err;
+	}
+
+	/* Can't write directories. */
+	if (S_ISDIR(inode->i_mode) && (flags & FMODE_WRITE)) {
+		retval = -EISDIR;
+		goto out_err;
+	}
+
+	fd = get_unused_fd();
+	if (fd < 0) {
+		retval = fd;
+		goto out_err;
+	}
+
+	filp = dentry_open(dentry, mntget(mnt),
+			d_flags, current_cred());
+	if (IS_ERR(filp)) {
+		put_unused_fd(fd);
+		retval =  PTR_ERR(filp);
+		goto out_err;
+	}
+
+	if (inode->i_mode & S_IFREG) {
+		filp->f_flags |= O_NOATIME;
+		filp->f_mode |= FMODE_NOCMTIME;
+	}
+	fsnotify_open(filp->f_path.dentry);
+	fd_install(fd, filp);
+	retval = fd;
+	goto out_mnt;
+
+out_err:
+	dput(dentry);
+out_mnt:
+	mntput(mnt);
+out_sb:
+	deactivate_super(sb);
+
+	return retval;
+}
+
+SYSCALL_DEFINE2(open_by_handle, struct file_handle __user *, handle,
+		int, flags)
+{
+	long ret;
+	struct file_handle f_handle;
+
+	if (force_o_largefile())
+		flags |= O_LARGEFILE;
+
+	if (copy_from_user(&f_handle, handle, sizeof(struct file_handle))) {
+		ret = -EFAULT;
+		goto err_out;
+	}
+	ret = do_sys_open_by_handle(&f_handle, flags);
+err_out:
+	/* avoid REGPARM breakage on x86: */
+	asmlinkage_protect(2, ret, handle, flags);
+	return ret;
+}
diff --git a/fs/pnode.c b/fs/pnode.c
index 5cc564a..9f6d12d 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -6,9 +6,9 @@
  *	Author : Ram Pai (linuxram@us.ibm.com)
  *
  */
+#include <linux/fs.h>
 #include <linux/mnt_namespace.h>
 #include <linux/mount.h>
-#include <linux/fs.h>
 #include "internal.h"
 #include "pnode.h"
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 157ed57..2116b00 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1935,6 +1935,7 @@ extern int filp_close(struct file *, fl_owner_t id);
 extern char * getname(const char __user *);
 extern long do_sys_name_to_handle(const char __user *name,
 				struct file_handle *fh);
+extern long do_sys_open_by_handle(struct file_handle *fh, int flags);
 
 /* fs/ioctl.c */
 
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 0b89efc..d363ecc 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -36,6 +36,8 @@ extern const struct seq_operations mounts_op;
 extern const struct seq_operations mountinfo_op;
 extern const struct seq_operations mountstats_op;
 extern int mnt_had_events(struct proc_mounts *);
+extern struct vfsmount *fs_get_vfsmount(struct task_struct *task,
+					struct super_block *sb);
 
 #endif
 #endif
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 05b441d..a853aa0 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -4,6 +4,7 @@
 #include <linux/dcache.h>
 #include <linux/linkage.h>
 #include <linux/path.h>
+#include <asm-generic/fcntl.h>
 
 struct vfsmount;
 
@@ -96,4 +97,27 @@ static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
 	((char *) name)[min(len, maxlen)] = '\0';
 }
 
+/*
+ * Note that while the flag value (low two bits) for sys_open means:
+ *	00 - read-only
+ *	01 - write-only
+ *	10 - read-write
+ *	11 - special
+ * it is changed into
+ *	00 - no permissions needed
+ *	01 - read-permission
+ *	10 - write-permission
+ *	11 - read-write
+ * for the internal routines (ie open_namei()/follow_link() etc)
+ * This is more logical, and also allows the 00 "no perm needed"
+ * to be used for symlinks (where the permissions are checked
+ * later).
+ *
+*/
+static inline int open_to_namei_flags(int flag)
+{
+	if ((flag+1) & O_ACCMODE)
+		flag++;
+	return flag;
+}
 #endif /* _LINUX_NAMEI_H */
-- 
1.7.0.4.360.g11766c


  parent reply	other threads:[~2010-04-23 11:38 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-04-23 11:38 [PATCH -V4] Generic name to handle and open by handle syscalls Aneesh Kumar K.V
2010-04-23 11:38 ` [PATCH -V4 1/6] exportfs: Return the minimum required handle size Aneesh Kumar K.V
2010-04-23 11:38 ` [PATCH -V4 2/6] vfs: Add name to file handle conversion support Aneesh Kumar K.V
2010-04-23 22:02   ` Andreas Dilger
2010-04-26  9:52     ` Christoph Hellwig
2010-04-26 10:14       ` Aneesh Kumar K. V
2010-04-26 10:12     ` Aneesh Kumar K. V
2010-04-26 17:57       ` J. Bruce Fields
2010-04-27  6:13         ` Aneesh Kumar K. V
2010-04-27 13:28           ` J. Bruce Fields
2010-04-23 11:38 ` Aneesh Kumar K.V [this message]
2010-04-23 11:38 ` [PATCH -V4 4/6] ext4: Add get_fsid callback Aneesh Kumar K.V
2010-04-23 11:38 ` [PATCH -V4 5/6] x86: Add new syscalls for x86_32 Aneesh Kumar K.V
2010-04-23 11:38 ` [PATCH -V4 6/6] x86: Add new syscalls for x86_64 Aneesh Kumar K.V
2010-04-23 22:09   ` Andreas Dilger
2010-04-25 18:28     ` Aneesh Kumar K. V
2010-04-26  9:53       ` Christoph Hellwig
2010-04-26 17:59         ` Andreas Dilger
2010-04-27  6:25         ` Aneesh Kumar K. V

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1272022715-11716-4-git-send-email-aneesh.kumar@linux.vnet.ibm.com \
    --to=aneesh.kumar@linux.vnet.ibm.com \
    --cc=adilger@sun.com \
    --cc=corbet@lwn.net \
    --cc=hch@infradead.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=serue@us.ibm.com \
    --cc=sfrench@us.ibm.com \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).