From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
To: hch@infradead.org, viro@zeniv.linux.org.uk, adilger@sun.com,
corbet@lwn.net, serue@us.ibm.com, neilb@suse.de
Cc: linux-fsdevel@vger.kernel.org, sfrench@us.ibm.com,
"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Subject: [PATCH -V6 3/8] vfs: Add open by file handle support
Date: Tue, 27 Apr 2010 21:43:45 +0530 [thread overview]
Message-ID: <1272384830-22670-4-git-send-email-aneesh.kumar@linux.vnet.ibm.com> (raw)
In-Reply-To: <1272384830-22670-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
fs/filesystems.c | 33 +++++++++-
fs/namei.c | 24 -------
fs/namespace.c | 38 +++++++++++
fs/open.c | 142 +++++++++++++++++++++++++++++++++++++++++
fs/pnode.c | 2 +-
include/linux/fs.h | 1 +
include/linux/mnt_namespace.h | 2 +
include/linux/namei.h | 24 +++++++
8 files changed, 240 insertions(+), 26 deletions(-)
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 68ba492..a424691 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -281,5 +281,36 @@ struct file_system_type *get_fs_type(const char *name)
}
return fs;
}
-
EXPORT_SYMBOL(get_fs_type);
+
+struct super_block *fs_get_sb(struct uuid *fsid)
+{
+ int error;
+ struct uuid this_fsid;
+ struct file_system_type *fs_type;
+ struct super_block *sb, *found_sb = NULL;
+
+ read_lock(&file_systems_lock);
+ for (fs_type = file_systems; fs_type; fs_type = fs_type->next) {
+ spin_lock(&sb_lock);
+ list_for_each_entry(sb, &fs_type->fs_supers, s_instances) {
+ if (!sb->s_op->get_fsid)
+ continue;
+ error = sb->s_op->get_fsid(sb, &this_fsid);
+ if (error)
+ continue;
+ if (!memcmp(fsid->uuid, this_fsid.uuid,
+ sizeof(this_fsid.uuid))) {
+ /* found the matching super_block */
+ atomic_inc(&sb->s_active);
+ found_sb = sb;
+ spin_unlock(&sb_lock);
+ goto out;
+ }
+ }
+ spin_unlock(&sb_lock);
+ }
+out:
+ read_unlock(&file_systems_lock);
+ return found_sb;
+}
diff --git a/fs/namei.c b/fs/namei.c
index a7dce91..a18711e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1521,30 +1521,6 @@ out_unlock:
return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
}
-/*
- * Note that while the flag value (low two bits) for sys_open means:
- * 00 - read-only
- * 01 - write-only
- * 10 - read-write
- * 11 - special
- * it is changed into
- * 00 - no permissions needed
- * 01 - read-permission
- * 10 - write-permission
- * 11 - read-write
- * for the internal routines (ie open_namei()/follow_link() etc)
- * This is more logical, and also allows the 00 "no perm needed"
- * to be used for symlinks (where the permissions are checked
- * later).
- *
-*/
-static inline int open_to_namei_flags(int flag)
-{
- if ((flag+1) & O_ACCMODE)
- flag++;
- return flag;
-}
-
static int open_will_truncate(int flag, struct inode *inode)
{
/*
diff --git a/fs/namespace.c b/fs/namespace.c
index 8174c8a..6168526 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2364,3 +2364,41 @@ void put_mnt_ns(struct mnt_namespace *ns)
kfree(ns);
}
EXPORT_SYMBOL(put_mnt_ns);
+
+/*
+ * Get any vfsmount mapping the superblock in the
+ * task namespace
+ */
+struct vfsmount *fs_get_vfsmount(struct task_struct *task,
+ struct super_block *sb)
+{
+ struct nsproxy *nsp;
+ struct list_head *mount_list;
+ struct mnt_namespace *ns = NULL;
+ struct vfsmount *mnt, *sb_mnt = NULL;
+
+ rcu_read_lock();
+ nsp = task_nsproxy(task);
+ if (nsp) {
+ ns = nsp->mnt_ns;
+ if (ns)
+ get_mnt_ns(ns);
+ }
+ rcu_read_unlock();
+ if (!ns)
+ return NULL;
+ down_read(&namespace_sem);
+ list_for_each(mount_list, &ns->list) {
+ mnt = list_entry(mount_list, struct vfsmount, mnt_list);
+ if (mnt->mnt_sb == sb) {
+ /* found the matching super block */
+ sb_mnt = mnt;
+ mntget(sb_mnt);
+ break;
+ }
+ }
+ up_read(&namespace_sem);
+
+ put_mnt_ns(ns);
+ return sb_mnt;
+}
diff --git a/fs/open.c b/fs/open.c
index 5d0f87b..e0a0cb1 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1296,3 +1296,145 @@ err_out:
asmlinkage_protect(4, ret, dfd, name, handle, flag);
return ret;
}
+
+static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
+{
+ return 1;
+}
+
+static struct dentry *handle_to_dentry(struct vfsmount *mnt,
+ struct file_handle *handle)
+{
+ int handle_size;
+ struct dentry *dentry;
+
+ /* change the handle size to multiple of sizeof(u32) */
+ handle_size = handle->handle_size >> 2;
+ dentry = exportfs_decode_fh(mnt, (struct fid *)handle->f_handle,
+ handle_size, handle->handle_type,
+ vfs_dentry_acceptable, NULL);
+ return dentry;
+}
+
+static long do_sys_open_by_handle(struct file_handle __user *ufh, int flags)
+{
+ int fd;
+ int retval = 0;
+ int d_flags = flags;
+ struct file *filp;
+ struct vfsmount *mnt;
+ struct inode *inode;
+ struct dentry *dentry;
+ struct super_block *sb;
+ struct file_handle f_handle;
+ struct file_handle *handle = NULL;
+
+ if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) {
+ retval = -EFAULT;
+ goto out_err;
+ }
+ if ((f_handle.handle_size > MAX_HANDLE_SZ) ||
+ (f_handle.handle_size <= 0)) {
+ retval = -EINVAL;
+ goto out_err;
+ }
+ if (!capable(CAP_DAC_OVERRIDE)) {
+ retval = -EPERM;
+ goto out_err;
+ }
+ sb = fs_get_sb(&f_handle.fsid);
+ if (!sb)
+ return -ESTALE;
+ /*
+ * Find the vfsmount for this superblock in the
+ * current namespace
+ */
+ mnt = fs_get_vfsmount(current, sb);
+ if (!mnt) {
+ retval = -ESTALE;
+ goto out_sb;
+ }
+
+ handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_size,
+ GFP_KERNEL);
+ if (!handle) {
+ retval = -ENOMEM;
+ goto out_mnt;
+ }
+ /* copy the full handle */
+ if (copy_from_user(handle, ufh,
+ sizeof(struct file_handle) +
+ f_handle.handle_size)) {
+ retval = -EFAULT;
+ goto out_mnt;
+ }
+ dentry = handle_to_dentry(mnt, handle);
+ if (IS_ERR(dentry)) {
+ retval = PTR_ERR(dentry);
+ goto out_mnt;
+ }
+ inode = dentry->d_inode;
+ flags = open_to_namei_flags(flags);
+ /* O_TRUNC implies we need access checks for write permissions */
+ if (flags & O_TRUNC)
+ flags |= MAY_WRITE;
+
+ if ((!(flags & O_APPEND) || (flags & O_TRUNC)) &&
+ (flags & FMODE_WRITE) && IS_APPEND(inode)) {
+ retval = -EPERM;
+ goto out_dentry;
+ }
+ if ((flags & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
+ retval = -EACCES;
+ goto out_dentry;
+ }
+ /* Can't write directories. */
+ if (S_ISDIR(inode->i_mode) && (flags & FMODE_WRITE)) {
+ retval = -EISDIR;
+ goto out_dentry;
+ }
+ fd = get_unused_fd_flags(d_flags);
+ if (fd < 0) {
+ retval = fd;
+ goto out_dentry;
+ }
+ filp = dentry_open(dget(dentry), mntget(mnt),
+ d_flags, current_cred());
+ if (IS_ERR(filp)) {
+ put_unused_fd(fd);
+ retval = PTR_ERR(filp);
+ goto out_dentry;
+ }
+ if (inode->i_mode & S_IFREG) {
+ filp->f_flags |= O_NOATIME;
+ filp->f_mode |= FMODE_NOCMTIME;
+ }
+ fsnotify_open(filp->f_path.dentry);
+ fd_install(fd, filp);
+ retval = fd;
+
+out_dentry:
+ dput(dentry);
+out_mnt:
+ kfree(handle);
+ mntput(mnt);
+out_sb:
+ deactivate_super(sb);
+out_err:
+ return retval;
+}
+
+SYSCALL_DEFINE2(open_by_handle, struct file_handle __user *, handle,
+ int, flags)
+{
+ long ret;
+
+ if (force_o_largefile())
+ flags |= O_LARGEFILE;
+
+ ret = do_sys_open_by_handle(handle, flags);
+
+ /* avoid REGPARM breakage on x86: */
+ asmlinkage_protect(2, ret, handle, flags);
+ return ret;
+}
diff --git a/fs/pnode.c b/fs/pnode.c
index 5cc564a..9f6d12d 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -6,9 +6,9 @@
* Author : Ram Pai (linuxram@us.ibm.com)
*
*/
+#include <linux/fs.h>
#include <linux/mnt_namespace.h>
#include <linux/mount.h>
-#include <linux/fs.h>
#include "internal.h"
#include "pnode.h"
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 055734c..da6d297 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2344,6 +2344,7 @@ extern struct super_block *get_super(struct block_device *);
extern struct super_block *get_active_super(struct block_device *bdev);
extern struct super_block *user_get_super(dev_t);
extern void drop_super(struct super_block *sb);
+extern struct super_block *fs_get_sb(struct uuid *fsid);
extern int dcache_dir_open(struct inode *, struct file *);
extern int dcache_dir_close(struct inode *, struct file *);
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 0b89efc..d363ecc 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -36,6 +36,8 @@ extern const struct seq_operations mounts_op;
extern const struct seq_operations mountinfo_op;
extern const struct seq_operations mountstats_op;
extern int mnt_had_events(struct proc_mounts *);
+extern struct vfsmount *fs_get_vfsmount(struct task_struct *task,
+ struct super_block *sb);
#endif
#endif
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 05b441d..a853aa0 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -4,6 +4,7 @@
#include <linux/dcache.h>
#include <linux/linkage.h>
#include <linux/path.h>
+#include <asm-generic/fcntl.h>
struct vfsmount;
@@ -96,4 +97,27 @@ static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
((char *) name)[min(len, maxlen)] = '\0';
}
+/*
+ * Note that while the flag value (low two bits) for sys_open means:
+ * 00 - read-only
+ * 01 - write-only
+ * 10 - read-write
+ * 11 - special
+ * it is changed into
+ * 00 - no permissions needed
+ * 01 - read-permission
+ * 10 - write-permission
+ * 11 - read-write
+ * for the internal routines (ie open_namei()/follow_link() etc)
+ * This is more logical, and also allows the 00 "no perm needed"
+ * to be used for symlinks (where the permissions are checked
+ * later).
+ *
+*/
+static inline int open_to_namei_flags(int flag)
+{
+ if ((flag+1) & O_ACCMODE)
+ flag++;
+ return flag;
+}
#endif /* _LINUX_NAMEI_H */
--
1.7.0.4.360.g11766c
next prev parent reply other threads:[~2010-04-27 16:14 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-04-27 16:13 [PATCH -V6 0/8] Generic name to handle and open by handle syscalls Aneesh Kumar K.V
2010-04-27 16:13 ` [PATCH -V6 1/8] exportfs: Return the minimum required handle size Aneesh Kumar K.V
2010-04-27 16:13 ` [PATCH -V6 2/8] vfs: Add name to file handle conversion support Aneesh Kumar K.V
2010-04-27 16:13 ` Aneesh Kumar K.V [this message]
2010-04-27 16:13 ` [PATCH -V6 4/8] vfs: Add freadlink syscall Aneesh Kumar K.V
2010-04-27 16:13 ` [PATCH -V6 5/8] ext4: Add get_fsid callback Aneesh Kumar K.V
2010-04-27 16:13 ` [PATCH -V6 6/8] x86: Add new syscalls for x86_32 Aneesh Kumar K.V
2010-04-27 16:45 ` Aneesh Kumar K. V
2010-04-27 16:13 ` [PATCH -V6 7/8] x86: Add new syscalls for x86_64 Aneesh Kumar K.V
2010-04-27 16:13 ` [PATCH -V6 8/8] ext3: Add get_fsid callback Aneesh Kumar K.V
2010-04-27 21:13 ` [PATCH -V6 0/8] Generic name to handle and open by handle syscalls Andreas Dilger
2010-04-28 5:09 ` Aneesh Kumar K. V
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1272384830-22670-4-git-send-email-aneesh.kumar@linux.vnet.ibm.com \
--to=aneesh.kumar@linux.vnet.ibm.com \
--cc=adilger@sun.com \
--cc=corbet@lwn.net \
--cc=hch@infradead.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=neilb@suse.de \
--cc=serue@us.ibm.com \
--cc=sfrench@us.ibm.com \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).