[PATCH -V2] Generic name to handle and open by handle syscalls

linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH -V2] Generic name to handle and open by handle syscalls
@ 2010-03-18 17:09 Aneesh Kumar K.V
  2010-03-18 17:09 ` [PATCH -V2 1/3] vfs: Add name to file handle conversion support Aneesh Kumar K.V
                   ` (5 more replies)
  0 siblings, 6 replies; 12+ messages in thread
From: Aneesh Kumar K.V @ 2010-03-18 17:09 UTC (permalink / raw)
  To: hch, viro; +Cc: linux-fsdevel, adilger, corbet

The below set of patches implement open by handle support using exportfs
operations. This allows user space application to map a file name to file 
handle and later open the file using handle. This should be usable
for userspace NFS [1] and 9P server [2]. XFS already support this with the ioctls
XFS_IOC_PATH_TO_HANDLE and XFS_IOC_OPEN_BY_HANDLE.

[1] http://nfs-ganesha.sourceforge.net/
[2] http://lists.gnu.org/archive/html/qemu-devel/2010-03/msg01087.html

Changes from v1:
a) handle size is now specified in bytes
b) returns -EOVERFLOW if the handle size is small
c) dropped open_handle syscall and added open_by_handle_at syscall
   open_by_handle_at takes mount_fd as the directory fd of the mount point
   containing the file
e) handle will only be unique in a given file system. So for an NFS server
   exporting multiple file system, NFS server will have to internally track the
   mount point to which a file handle belongs to. We should be able to do it much
   easily than expecting kernel to give a system wide unique file handle. System
   wide unique file handle would need much larger changes to the exportfs or VFS
   interface and I was not sure whether we really need to do that in the kernel or
   in the user space
f) open_handle_at now only check for DAC_OVERRIDE capability

Example program:
-------------
cc  -D_GNU_SOURCE  <src.c>
----------------
#include <stdio.h>
#include <stdlib.h>

#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>

struct file_handle {
        int handle_size;
        int handle_type;
        void *handle;
};
int main(int argc, char *argv[])
{
        int ret;
        int fd, dirfd;
        char buf[100];
        struct file_handle fh;
        fh.handle_type = 0;
        fh.handle = malloc(100);
        fh.handle_size = 100;
        errno  = 0;
        ret = syscall(338, argv[1], &fh);
        if (ret) {
                perror("Error:");
                exit(1);
        }
        dirfd = open("/", O_RDONLY|O_DIRECTORY);
        fd = syscall(339, dirfd, &fh, O_RDONLY);
        if (fd <= 0 ) {
                perror("Error:");
                exit(1);
        }
        memset(buf, 0 , 100);
        while (read(fd, buf, 100) > 0) {
                printf("%s", buf);
                memset(buf, 0 , 100);
        }
        return 0;
}

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH -V2 1/3] vfs: Add name to file handle conversion support
  2010-03-18 17:09 [PATCH -V2] Generic name to handle and open by handle syscalls Aneesh Kumar K.V
@ 2010-03-18 17:09 ` Aneesh Kumar K.V
  2010-03-18 17:09 ` [PATCH -V2 2/3] vfs: Add open by file handle support Aneesh Kumar K.V
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 12+ messages in thread
From: Aneesh Kumar K.V @ 2010-03-18 17:09 UTC (permalink / raw)
  To: hch, viro; +Cc: linux-fsdevel, adilger, corbet, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/open.c          |   80 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |    7 ++++
 2 files changed, 87 insertions(+), 0 deletions(-)

diff --git a/fs/open.c b/fs/open.c
index e17f544..7012cf9 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -30,6 +30,7 @@
 #include <linux/falloc.h>
 #include <linux/fs_struct.h>
 #include <linux/ima.h>
+#include <linux/exportfs.h>
 
 #include "internal.h"
 
@@ -1206,3 +1207,82 @@ int nonseekable_open(struct inode *inode, struct file *filp)
 }
 
 EXPORT_SYMBOL(nonseekable_open);
+
+/* limit the handle size to some value */
+#define MAX_HANDLE_SZ 4096
+static int do_sys_name_to_handle(const char __user *name,
+				struct file_handle *fh)
+{
+	int retval;
+	int handle_size;
+	struct path path;
+	struct inode *inode;
+	void *handle = NULL;
+
+	if (fh->handle_size > MAX_HANDLE_SZ)
+		return -EINVAL;
+
+	retval = user_lpath(name, &path);
+	if (retval)
+		return retval;
+
+	inode = path.dentry->d_inode;
+	/*
+	 * name to handle conversion only done for regular files
+	 * directories and symbolic links
+	 */
+	if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) &&
+		!S_ISLNK(inode->i_mode)) {
+		retval =  -EINVAL;
+		goto err_out;
+	}
+	handle = kmalloc(fh->handle_size, GFP_KERNEL);
+	if (!handle) {
+		retval = -ENOMEM;
+		goto err_out;
+	}
+
+	handle_size = fh->handle_size;
+	/* we ask for a non connected handle */
+	retval = exportfs_encode_fh(path.dentry, (struct fid *)handle,
+				&handle_size,  0);
+	/* convert handle size to bytes */
+	handle_size *= sizeof(u32);
+	fh->handle_type = retval;
+	if (handle_size <= fh->handle_size) {
+		if (copy_to_user(fh->f_handle, handle,
+					handle_size))
+			retval = -EFAULT;
+		else
+			retval = 0;
+	} else
+		retval = -EOVERFLOW;
+	fh->handle_size = handle_size;
+	kfree(handle);
+
+err_out:
+	path_put(&path);
+	return retval;
+}
+
+SYSCALL_DEFINE2(name_to_handle, const char __user *, name,
+		struct file_handle __user *, handle)
+{
+	long ret;
+	struct file_handle f_handle;
+	if (copy_from_user(&f_handle, handle, sizeof(struct file_handle))) {
+		ret = -EFAULT;
+		goto err_out;
+	}
+	ret = do_sys_name_to_handle(name, &f_handle);
+	if (copy_to_user(&handle->handle_type,
+			&f_handle.handle_type, sizeof(f_handle.handle_type)) ||
+		copy_to_user(&handle->handle_size,
+			&f_handle.handle_size, sizeof(f_handle.handle_size))) {
+		ret = -EFAULT;
+	}
+err_out:
+	/* avoid REGPARM breakage on x86: */
+	asmlinkage_protect(2, ret, name, handle);
+	return ret;
+}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 10b8ded..e618cac 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -948,6 +948,13 @@ struct file {
 	unsigned long f_mnt_write_state;
 #endif
 };
+
+struct file_handle {
+	int handle_size;
+	int handle_type;
+	void *f_handle;
+};
+
 extern spinlock_t files_lock;
 #define file_list_lock() spin_lock(&files_lock);
 #define file_list_unlock() spin_unlock(&files_lock);
-- 
1.7.0.2.273.gc2413


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH -V2 2/3] vfs: Add open by file handle support
  2010-03-18 17:09 [PATCH -V2] Generic name to handle and open by handle syscalls Aneesh Kumar K.V
  2010-03-18 17:09 ` [PATCH -V2 1/3] vfs: Add name to file handle conversion support Aneesh Kumar K.V
@ 2010-03-18 17:09 ` Aneesh Kumar K.V
  2010-03-18 17:09 ` [PATCH -V2 3/3] x86: Add new syscalls for x86_32 Aneesh Kumar K.V
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 12+ messages in thread
From: Aneesh Kumar K.V @ 2010-03-18 17:09 UTC (permalink / raw)
  To: hch, viro; +Cc: linux-fsdevel, adilger, corbet, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/namei.c            |   24 --------
 fs/open.c             |  154 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/namei.h |   24 ++++++++
 3 files changed, 178 insertions(+), 24 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 48e60a1..44c2437 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1521,30 +1521,6 @@ out_unlock:
 	return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
 }
 
-/*
- * Note that while the flag value (low two bits) for sys_open means:
- *	00 - read-only
- *	01 - write-only
- *	10 - read-write
- *	11 - special
- * it is changed into
- *	00 - no permissions needed
- *	01 - read-permission
- *	10 - write-permission
- *	11 - read-write
- * for the internal routines (ie open_namei()/follow_link() etc)
- * This is more logical, and also allows the 00 "no perm needed"
- * to be used for symlinks (where the permissions are checked
- * later).
- *
-*/
-static inline int open_to_namei_flags(int flag)
-{
-	if ((flag+1) & O_ACCMODE)
-		flag++;
-	return flag;
-}
-
 static int open_will_truncate(int flag, struct inode *inode)
 {
 	/*
diff --git a/fs/open.c b/fs/open.c
index 7012cf9..98f6433 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1286,3 +1286,157 @@ err_out:
 	asmlinkage_protect(2, ret, name, handle);
 	return ret;
 }
+
+static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
+{
+	return 1;
+}
+
+static struct path *path_from_fd(int fd)
+{
+	struct path *path;
+	struct file *filep;
+
+	filep = fget(fd);
+	if (!filep)
+		return ERR_PTR(-EBADF);
+	path = &filep->f_path;
+	path_get(path);
+	fput(filep);
+	return path;
+}
+
+
+static struct dentry *handle_to_dentry(struct path *path,
+				struct file_handle *fh)
+{
+	int retval = 0;
+	int handle_size;
+	void *handle = NULL;
+	struct dentry *dentry;
+
+	if (fh->handle_size > MAX_HANDLE_SZ) {
+		retval = -EINVAL;
+		goto err_out;
+	}
+	handle = kmalloc(fh->handle_size, GFP_KERNEL);
+	if (!handle) {
+		retval =  -ENOMEM;
+		goto err_out;
+	}
+	if (copy_from_user(handle, fh->f_handle, fh->handle_size)) {
+		retval = -EFAULT;
+		goto err_out;
+	}
+	/* change the handle size to multiple of sizeof(u32) */
+	handle_size = fh->handle_size >> 2;
+	dentry = exportfs_decode_fh(path->mnt, (struct fid *)handle,
+					handle_size, fh->handle_type,
+					vfs_dentry_acceptable, NULL);
+	kfree(handle);
+	return dentry;
+
+err_out:
+	kfree(handle);
+	return ERR_PTR(retval);
+}
+
+long do_sys_open_by_handle(int mount_fd, struct file_handle *fh, int flags)
+{
+	int fd;
+	int retval = 0;
+	int d_flags  = flags;
+	struct path *path;
+	struct file *filp;
+	struct inode *inode;
+	struct dentry *dentry;
+
+	if (!capable(CAP_DAC_OVERRIDE))
+		return -EPERM;
+
+	path = path_from_fd(mount_fd);
+	if (IS_ERR(path))
+		return PTR_ERR(path);
+
+	dentry = handle_to_dentry(path, fh);
+	if (IS_ERR(dentry)) {
+		path_put(path);
+		return PTR_ERR(dentry);
+	}
+
+	inode = dentry->d_inode;
+	/* Restrict open_by_handle to directories & regular files. */
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
+		retval = -EINVAL;
+		goto err_out;
+	}
+
+	flags  = open_to_namei_flags(flags);
+	/* O_TRUNC implies we need access checks for write permissions */
+	if (flags & O_TRUNC)
+		flags |= MAY_WRITE;
+
+	if ((!(flags & O_APPEND) || (flags & O_TRUNC)) &&
+		(flags & FMODE_WRITE) && IS_APPEND(inode)) {
+		retval = -EPERM;
+		goto err_out;
+	}
+
+	if ((flags & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
+		retval = -EACCES;
+		goto err_out;
+	}
+
+	/* Can't write directories. */
+	if (S_ISDIR(inode->i_mode) && (flags & FMODE_WRITE)) {
+		retval = -EISDIR;
+		goto err_out;
+	}
+
+	fd = get_unused_fd();
+	if (fd < 0) {
+		retval = fd;
+		goto err_out;
+	}
+
+	filp = dentry_open(dentry, mntget(path->mnt),
+			d_flags, current_cred());
+	if (IS_ERR(filp)) {
+		put_unused_fd(fd);
+		return PTR_ERR(filp);
+	}
+
+	if (inode->i_mode & S_IFREG) {
+		filp->f_flags |= O_NOATIME;
+		filp->f_mode |= FMODE_NOCMTIME;
+	}
+	fsnotify_open(filp->f_path.dentry);
+	fd_install(fd, filp);
+	path_put(path);
+	return fd;
+
+err_out:
+	path_put(path);
+	dput(dentry);
+	return retval;
+}
+
+SYSCALL_DEFINE3(open_by_handle_at, int, mountfd,
+		struct file_handle __user *, handle, int, flags)
+{
+	long ret;
+	struct file_handle f_handle;
+
+	if (force_o_largefile())
+		flags |= O_LARGEFILE;
+
+	if (copy_from_user(&f_handle, handle, sizeof(struct file_handle))) {
+		ret = -EFAULT;
+		goto err_out;
+	}
+	ret = do_sys_open_by_handle(mountfd, &f_handle, flags);
+err_out:
+	/* avoid REGPARM breakage on x86: */
+	asmlinkage_protect(3, ret, mountfd, handle, flags);
+	return ret;
+}
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 05b441d..a853aa0 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -4,6 +4,7 @@
 #include <linux/dcache.h>
 #include <linux/linkage.h>
 #include <linux/path.h>
+#include <asm-generic/fcntl.h>
 
 struct vfsmount;
 
@@ -96,4 +97,27 @@ static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
 	((char *) name)[min(len, maxlen)] = '\0';
 }
 
+/*
+ * Note that while the flag value (low two bits) for sys_open means:
+ *	00 - read-only
+ *	01 - write-only
+ *	10 - read-write
+ *	11 - special
+ * it is changed into
+ *	00 - no permissions needed
+ *	01 - read-permission
+ *	10 - write-permission
+ *	11 - read-write
+ * for the internal routines (ie open_namei()/follow_link() etc)
+ * This is more logical, and also allows the 00 "no perm needed"
+ * to be used for symlinks (where the permissions are checked
+ * later).
+ *
+*/
+static inline int open_to_namei_flags(int flag)
+{
+	if ((flag+1) & O_ACCMODE)
+		flag++;
+	return flag;
+}
 #endif /* _LINUX_NAMEI_H */
-- 
1.7.0.2.273.gc2413


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH -V2 3/3] x86: Add new syscalls for x86_32
  2010-03-18 17:09 [PATCH -V2] Generic name to handle and open by handle syscalls Aneesh Kumar K.V
  2010-03-18 17:09 ` [PATCH -V2 1/3] vfs: Add name to file handle conversion support Aneesh Kumar K.V
  2010-03-18 17:09 ` [PATCH -V2 2/3] vfs: Add open by file handle support Aneesh Kumar K.V
@ 2010-03-18 17:09 ` Aneesh Kumar K.V
  2010-03-18 17:31 ` [PATCH -V2] Generic name to handle and open by handle syscalls Christoph Hellwig
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 12+ messages in thread
From: Aneesh Kumar K.V @ 2010-03-18 17:09 UTC (permalink / raw)
  To: hch, viro; +Cc: linux-fsdevel, adilger, corbet, Aneesh Kumar K.V

This patch adds sys_name_to_handle and sys_open_by_handle
syscalls to x86_32

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/x86/include/asm/unistd_32.h   |    4 +++-
 arch/x86/kernel/syscall_table_32.S |    2 ++
 2 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 3baf379..f8a7511 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -343,10 +343,12 @@
 #define __NR_rt_tgsigqueueinfo	335
 #define __NR_perf_event_open	336
 #define __NR_recvmmsg		337
+#define __NR_name_to_handle	338
+#define __NR_open_by_handle_at  339
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 338
+#define NR_syscalls 340
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 15228b5..3252573 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -337,3 +337,5 @@ ENTRY(sys_call_table)
 	.long sys_rt_tgsigqueueinfo	/* 335 */
 	.long sys_perf_event_open
 	.long sys_recvmmsg
+	.long sys_name_to_handle
+	.long sys_open_by_handle_at	/* 339 */
-- 
1.7.0.2.273.gc2413


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH -V2] Generic name to handle and open by handle syscalls
  2010-03-18 17:09 [PATCH -V2] Generic name to handle and open by handle syscalls Aneesh Kumar K.V
                   ` (2 preceding siblings ...)
  2010-03-18 17:09 ` [PATCH -V2 3/3] x86: Add new syscalls for x86_32 Aneesh Kumar K.V
@ 2010-03-18 17:31 ` Christoph Hellwig
  2010-03-18 17:54   ` J. Bruce Fields
  2010-03-29  7:12   ` Aneesh Kumar K. V
  2010-03-18 22:26 ` Andreas Dilger
  2010-03-26  0:42 ` Ben Hutchings
  5 siblings, 2 replies; 12+ messages in thread
From: Christoph Hellwig @ 2010-03-18 17:31 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: hch, viro, linux-fsdevel, adilger, corbet

I haven't looked at the code in detail yet, but the real value add for
userspace nfs servers and similar would be globally unique handles.
This of course requires filesystem support, but having a way to export
the filesystem handle would also simplify nfsd a lot so that it doesn't
have to rely on hacked support to pass this in from userspace which
gets it from statfs, assuming f_fsid has parts of a uuid in it, or
blkid.

This might be as simple as adding an s_uuid array to the superblock and
having some simple routines to iterate it, or we might make that
an export operation to allow a bit more flexibility.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH -V2] Generic name to handle and open by handle syscalls
  2010-03-18 17:31 ` [PATCH -V2] Generic name to handle and open by handle syscalls Christoph Hellwig
@ 2010-03-18 17:54   ` J. Bruce Fields
  2010-03-29  7:12   ` Aneesh Kumar K. V
  1 sibling, 0 replies; 12+ messages in thread
From: J. Bruce Fields @ 2010-03-18 17:54 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Aneesh Kumar K.V, viro, linux-fsdevel, adilger, corbet,
	Neil Brown

On Thu, Mar 18, 2010 at 01:31:45PM -0400, Christoph Hellwig wrote:
> I haven't looked at the code in detail yet, but the real value add for
> userspace nfs servers and similar would be globally unique handles.
> This of course requires filesystem support, but having a way to export
> the filesystem handle would also simplify nfsd a lot so that it doesn't
> have to rely on hacked support to pass this in from userspace which
> gets it from statfs, assuming f_fsid has parts of a uuid in it, or
> blkid.
> 
> This might be as simple as adding an s_uuid array to the superblock and
> having some simple routines to iterate it, or we might make that
> an export operation to allow a bit more flexibility.

That might be a good thing.  But the need for backwards-compatibility
with the existing mechanisms may prevent any simplification, at least in
the near term.

--b.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH -V2] Generic name to handle and open by handle syscalls
  2010-03-18 17:09 [PATCH -V2] Generic name to handle and open by handle syscalls Aneesh Kumar K.V
                   ` (3 preceding siblings ...)
  2010-03-18 17:31 ` [PATCH -V2] Generic name to handle and open by handle syscalls Christoph Hellwig
@ 2010-03-18 22:26 ` Andreas Dilger
  2010-03-19  6:00   ` Aneesh Kumar K. V
  2010-03-26  0:42 ` Ben Hutchings
  5 siblings, 1 reply; 12+ messages in thread
From: Andreas Dilger @ 2010-03-18 22:26 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: hch, viro, linux-fsdevel, corbet

On 2010-03-18, at 11:09, Aneesh Kumar K.V wrote:
> Example program:
> int main(int argc, char *argv[])
> {
>        ret = syscall(338, argv[1], &fh);
>        if (ret) {
>                perror("Error:");
>                exit(1);
>        }
>        dirfd = open("/", O_RDONLY|O_DIRECTORY);
>        fd = syscall(339, dirfd, &fh, O_RDONLY);


For your example program, it would be useful to include the
system calls so it is easier to see what is going on, like:

#ifndef HAVE_NAME_TO_HANDLE
static inline int name_to_handle(const char *name, struct file_handle  
*fh)
{
         return syscall(338, name, fh);
}

static inline int open_by_handle(int dirfd, struct file_handle *fh,  
int flags)
{
         return syscall(339, dirfd, fh, flags);
}
#endif

int main(...)
{
         ret = name_to_handle(argv[1], &fh);
         :

         dirfd = open("/", O_RDONLY|O_DIRECTORY);
         fd = open_by_handle(dirfd, &fh, O_RDONLY);
         :
         :
}

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH -V2] Generic name to handle and open by handle syscalls
  2010-03-18 22:26 ` Andreas Dilger
@ 2010-03-19  6:00   ` Aneesh Kumar K. V
  0 siblings, 0 replies; 12+ messages in thread
From: Aneesh Kumar K. V @ 2010-03-19  6:00 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: hch, viro, linux-fsdevel, corbet

On Thu, 18 Mar 2010 16:26:42 -0600, Andreas Dilger <adilger@sun.com> wrote:
> On 2010-03-18, at 11:09, Aneesh Kumar K.V wrote:
> > Example program:
> > int main(int argc, char *argv[])
> > {
> >        ret = syscall(338, argv[1], &fh);
> >        if (ret) {
> >                perror("Error:");
> >                exit(1);
> >        }
> >        dirfd = open("/", O_RDONLY|O_DIRECTORY);
> >        fd = syscall(339, dirfd, &fh, O_RDONLY);
> 
> 
> For your example program, it would be useful to include the
> system calls so it is easier to see what is going on, like:
> 
> #ifndef HAVE_NAME_TO_HANDLE
> static inline int name_to_handle(const char *name, struct file_handle  
> *fh)
> {
>          return syscall(338, name, fh);
> }
> 
> static inline int open_by_handle(int dirfd, struct file_handle *fh,  
> int flags)
> {
>          return syscall(339, dirfd, fh, flags);
> }
> #endif
> 
> int main(...)
> {
>          ret = name_to_handle(argv[1], &fh);
>          :
> 
>          dirfd = open("/", O_RDONLY|O_DIRECTORY);
>          fd = open_by_handle(dirfd, &fh, O_RDONLY);
>          :
>          :
> }
> 
Updated the sample prg as below. I also needed an additonal kernel
patch. Adding the kernel patch also here.

#include <stdio.h>
#include <stdlib.h>

#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>


struct file_handle {
    int handle_size;
    int handle_type;
    void *handle;
};

static int name_to_handle(const char *name, struct file_handle  *fh)
{
    return syscall(338, name, fh);
}

static int open_by_handle(int dirfd, struct file_handle *fh,  int flags)
{
    return syscall(339, dirfd, fh, flags);
}

int main(int argc, char *argv[])
{
    int ret;
    int fd, dirfd;
    char buf[100];
    struct file_handle fh;
    fh.handle_size = 0;
again:
    if (fh.handle_size)
	fh.handle = malloc(fh.handle_size);
    fh.handle_type = 0;
    errno  = 0;
    ret = name_to_handle(argv[1], &fh);
    if (ret) {
	perror("Error:");
	printf("Found the handle size needed to be %d\n", fh.handle_size);
	printf("Trying again..\n");
	goto again;
	exit(1);
    }
    dirfd = open("/", O_RDONLY|O_DIRECTORY);
    fd = open_by_handle(dirfd, &fh, O_RDONLY);
    if (fd <= 0 ) {
	perror("Error:");
	exit(1);
    }
    memset(buf, 0 , 100);
    while (read(fd, buf, 100) > 0) {
	printf("%s", buf);
	memset(buf, 0 , 100);
    }
    return 0;
}


commit 1a1fe3f95295857e8406fcc3943ba6ad4cc2792c
Author: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Date:   Fri Mar 19 10:19:17 2010 +0530

    exportfs: Return the minimum required handle size
    
    The exportfs encode handle function should return the minimum required
    handle size. This helps user to find out the handle size by passing 0
    handle size in the first step and then redoing to the call again with
    the returned handle size value.
    
    Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index ba5c3fd..db22392 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -21,9 +21,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
 	int len = *max_len;
 	int type;
 
-	if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) ||
-	    (connectable && len < BTRFS_FID_SIZE_CONNECTABLE))
+	if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
+		*max_len = BTRFS_FID_SIZE_CONNECTABLE;
 		return 255;
+	} else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
+		*max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
+		return 255;
+	}
 
 	len  = BTRFS_FID_SIZE_NON_CONNECTABLE;
 	type = FILEID_BTRFS_WITHOUT_PARENT;
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index e9e1759..cfee0f0 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -319,9 +319,14 @@ static int export_encode_fh(struct dentry *dentry, struct fid *fid,
 	struct inode * inode = dentry->d_inode;
 	int len = *max_len;
 	int type = FILEID_INO32_GEN;
-	
-	if (len < 2 || (connectable && len < 4))
+
+	if (connectable && (len < 4)) {
+		*max_len = 4;
+		return 255;
+	} else if (len < 2) {
+		*max_len = 2;
 		return 255;
+	}
 
 	len = 2;
 	fid->i32.ino = inode->i_ino;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index fbeecdc..b4c3839 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -738,8 +738,10 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable)
 	struct inode *inode =  de->d_inode;
 	u32 ipos_h, ipos_m, ipos_l;
 
-	if (len < 5)
+	if (len < 5) {
+		*lenp = 5;
 		return 255; /* no room */
+	}
 
 	ipos_h = MSDOS_I(inode)->i_pos >> 8;
 	ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1a822ce..a0e90c3 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -638,8 +638,10 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
 	u64 nodeid;
 	u32 generation;
 
-	if (*max_len < len)
+	if (*max_len < len) {
+		*max_len = len;
 		return  255;
+	}
 
 	nodeid = get_fuse_inode(inode)->nodeid;
 	generation = inode->i_generation;
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index d15876e..4a85b36 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -37,9 +37,13 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
 	struct super_block *sb = inode->i_sb;
 	struct gfs2_inode *ip = GFS2_I(inode);
 
-	if (*len < GFS2_SMALL_FH_SIZE ||
-	    (connectable && *len < GFS2_LARGE_FH_SIZE))
+	if (connectable && (*len < GFS2_LARGE_FH_SIZE)) {
+		*len = GFS2_LARGE_FH_SIZE;
 		return 255;
+	} else if (*len < GFS2_SMALL_FH_SIZE) {
+		*len = GFS2_SMALL_FH_SIZE;
+		return 255;
+	}
 
 	fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
 	fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index ed752cb..dd4687f 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -124,9 +124,13 @@ isofs_export_encode_fh(struct dentry *dentry,
 	 * offset of the inode and the upper 16 bits of fh32[1] to
 	 * hold the offset of the parent.
 	 */
-
-	if (len < 3 || (connectable && len < 5))
+	if (connectable && (len < 5)) {
+		*max_len = 5;
+		return 255;
+	} else if (len < 3) {
+		*max_len = 3;
 		return 255;
+	}
 
 	len = 3;
 	fh32[0] = ei->i_iget5_block;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 19ad145..250a347 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -201,8 +201,14 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
 		   dentry->d_name.len, dentry->d_name.name,
 		   fh, len, connectable);
 
-	if (len < 3 || (connectable && len < 6)) {
+	if (connectable && (len < 6)) {
 		mlog(ML_ERROR, "fh buffer is too small for encoding\n");
+		*max_len = 6;
+		type = 255;
+		goto bail;
+	} else if (len < 3) {
+		mlog(ML_ERROR, "fh buffer is too small for encoding\n");
+		*max_len = 3;
 		type = 255;
 		goto bail;
 	}
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index d1da94b..fe1600c 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1587,8 +1587,13 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
 	struct inode *inode = dentry->d_inode;
 	int maxlen = *lenp;
 
-	if (maxlen < 3)
+	if (need_parent && (maxlen < 5)) {
+		*lenp = 5;
 		return 255;
+	} else if (maxlen < 3) {
+		*lenp = 3;
+		return 255;
+	}
 
 	data[0] = inode->i_ino;
 	data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index db423ab..7f6fc85 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1360,8 +1360,13 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
 	struct fid *fid = (struct fid *)fh;
 	int type = FILEID_UDF_WITHOUT_PARENT;
 
-	if (len < 3 || (connectable && len < 5))
+	if (connectable && (len < 5)) {
+		*lenp = 5;
+		return 255;
+	} else if (len < 3) {
+		*lenp = 3;
 		return 255;
+	}
 
 	*lenp = 3;
 	fid->udf.block = location.logicalBlockNum;
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 846b75a..82c0553 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -81,8 +81,10 @@ xfs_fs_encode_fh(
 	 * seven combinations work.  The real answer is "don't use v2".
 	 */
 	len = xfs_fileid_length(fileid_type);
-	if (*max_len < len)
+	if (*max_len < len) {
+		*max_len = len
 		return 255;
+	}
 	*max_len = len;
 
 	switch (fileid_type) {
diff --git a/mm/shmem.c b/mm/shmem.c
index eef4ebe..bbeda1c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2125,8 +2125,10 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
 {
 	struct inode *inode = dentry->d_inode;
 
-	if (*len < 3)
+	if (*len < 3) {
+		*len = 3;
 		return 255;
+	}
 
 	if (hlist_unhashed(&inode->i_hash)) {
 		/* Unfortunately insert_inode_hash is not idempotent,

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH -V2] Generic name to handle and open by handle syscalls
  2010-03-18 17:09 [PATCH -V2] Generic name to handle and open by handle syscalls Aneesh Kumar K.V
                   ` (4 preceding siblings ...)
  2010-03-18 22:26 ` Andreas Dilger
@ 2010-03-26  0:42 ` Ben Hutchings
  2010-03-26  6:43   ` Andreas Dilger
  5 siblings, 1 reply; 12+ messages in thread
From: Ben Hutchings @ 2010-03-26  0:42 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: hch, viro, linux-fsdevel, adilger, corbet

[-- Attachment #1: Type: text/plain, Size: 998 bytes --]

On Thu, 2010-03-18 at 22:39 +0530, Aneesh Kumar K.V wrote:
> The below set of patches implement open by handle support using exportfs
> operations. This allows user space application to map a file name to file 
> handle and later open the file using handle. This should be usable
> for userspace NFS [1] and 9P server [2]. XFS already support this with the ioctls
> XFS_IOC_PATH_TO_HANDLE and XFS_IOC_OPEN_BY_HANDLE.
[...]

I think this is quite a poor choice of name.  A 'handle' is normally a
capability and a counted reference to some resource.  In a Linux context
'file handle' suggests to me a pointer to struct file.  But as I
understand it you are trying to provide an uncounted reference that is
still subject to later permission checks.

I would suggest something like 'file id', 'file number' (though that
could be confused with file descriptor numbers) or 'file cookie'.

Ben.

-- 
Ben Hutchings
Once a job is fouled up, anything done to improve it makes it worse.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 828 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH -V2] Generic name to handle and open by handle syscalls
  2010-03-26  0:42 ` Ben Hutchings
@ 2010-03-26  6:43   ` Andreas Dilger
  0 siblings, 0 replies; 12+ messages in thread
From: Andreas Dilger @ 2010-03-26  6:43 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: Aneesh Kumar K.V, hch, viro, linux-fsdevel, corbet

On 2010-03-25, at 18:42, Ben Hutchings wrote:
> On Thu, 2010-03-18 at 22:39 +0530, Aneesh Kumar K.V wrote:
>> The below set of patches implement open by handle support using  
>> exportfs
>> operations. This allows user space application to map a file name  
>> to file
>> handle and later open the file using handle. This should be usable
>> for userspace NFS [1] and 9P server [2]. XFS already support this  
>> with the ioctls
>> XFS_IOC_PATH_TO_HANDLE and XFS_IOC_OPEN_BY_HANDLE.
>
> I think this is quite a poor choice of name.  A 'handle' is normally a
> capability and a counted reference to some resource.  In a Linux  
> context
> 'file handle' suggests to me a pointer to struct file.  But as I
> understand it you are trying to provide an uncounted reference that is
> still subject to later permission checks.


In NFS this is currently called a file handle (i.e. an encoded  
representation of a specific file/inode), so the name is consistent  
with current usage.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH -V2] Generic name to handle and open by handle syscalls
  2010-03-18 17:31 ` [PATCH -V2] Generic name to handle and open by handle syscalls Christoph Hellwig
  2010-03-18 17:54   ` J. Bruce Fields
@ 2010-03-29  7:12   ` Aneesh Kumar K. V
  2010-03-30 19:36     ` Andreas Dilger
  1 sibling, 1 reply; 12+ messages in thread
From: Aneesh Kumar K. V @ 2010-03-29  7:12 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: hch, viro, linux-fsdevel, adilger, corbet

On Thu, 18 Mar 2010 13:31:45 -0400, Christoph Hellwig <hch@infradead.org> wrote:

Hi Christoph,

> I haven't looked at the code in detail yet, but the real value add for
> userspace nfs servers and similar would be globally unique handles.
> This of course requires filesystem support, but having a way to export
> the filesystem handle would also simplify nfsd a lot so that it doesn't
> have to rely on hacked support to pass this in from userspace which
> gets it from statfs, assuming f_fsid has parts of a uuid in it, or
> blkid.
> 
> This might be as simple as adding an s_uuid array to the superblock and
> having some simple routines to iterate it, or we might make that
> an export operation to allow a bit more flexibility.

As per the private email exchanges we had on this, do you agree that we
would need the vfsmount to successfully open a file by handle ? If yes
we would need to specify the mount point via mountfd. In that case do we
need the handle returned by kernel to be system wide unique ? 

We can build uniqueness in the userspace based on the mountfd and that
also enables us to use different field width for file system
identifier. So rather than forcing the usage of uuid, userspace can now
decided to use a fsid that is smaller and that uniquely identify only the
vfsmounts that the NFS server is exporting  ?

If you agree with the above do you see anything missing in the set of
patches posted. Are they merge ready ?


NOTE: In-lining below the patch that show why we would need a vfsmount
for open-by-handle.

-aneesh

I actually did a patch with not acceptable check. Where i am stuck is
the reconnect_path call for directories. That would need a vfsmount. I
can do a variant with struct super_block but then exportfs_get_name
needs a vfsmount for opening the parent directory using dentry_open
(get_name). We also need a vfsmount to do the final dentry_open after
we do the handle to dentry conversion in open_by_handle syscall.


diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index e9e1759..97f04fa 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -486,4 +486,74 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
 }
 EXPORT_SYMBOL_GPL(exportfs_decode_fh);

+static struct super_block *fs_get_sb(__kernel_fsid_t *fsid)
+{
+	struct super_block *sb, *found_sb = NULL;
+	struct file_system_type *fs_type;
+
+	read_lock(&file_systems_lock);
+retry:
+	fs_type = file_systems;
+	while (fs_type) {
+		list_for_each_entry(sb, &fs_type->fs_supers, s_instances) {
+			this_fsid = sb->get_fsid();
+			if (!memcmp(fsid, this_fsid, sizeof(__kernel_fsid_t))) {
+				/* found the matching super_block */
+				if (!grab_super(sb))
+					goto retry;
+				else
+					found_sb = sb;
+			}
+		}
+		fs = fs->next;
+	}
+	read_unlock(&file_systems_lock);
+	return found_sb;
+
+}
+
+struct dentry *exportfs_decode_unique_fh(__kernel_fsid_t *fsid, struct fid *fid,
+					int fh_len, int fileid_type)
+{
+	int err;
+	struct dentry *result;
+	char nbuf[NAME_MAX+1];
+	const struct export_operations *nop;
+	struct super_block  *sb = fs_get_sb(fsid);
+
+	if (!sb)
+		return ERR_PTR(-ESTALE);
+	nop = sb->s_export_op;
+	/*
+	 * Try to get any dentry for the given file handle from the filesystem.
+	 */
+	result = nop->fh_to_dentry(sb, fid, fh_len, fileid_type);
+	if (!result)
+		result = ERR_PTR(-ESTALE);
+	if (IS_ERR(result))
+		return result;
+
+	if (S_ISDIR(result->d_inode->i_mode)) {
+		/*
+		 * This request is for a directory.
+		 *
+		 * On the positive side there is only one dentry for each
+		 * directory inode.  On the negative side this implies that we
+		 * to ensure our dentry is connected all the way up to the
+		 * filesystem root.
+		 */
+		if (result->d_flags & DCACHE_DISCONNECTED) {
+                       /*FIXME!!!! We need vfsmount here */
+			err = reconnect_path(mnt, result, nbuf);
+			if (err)
+				goto err_result;
+		}
+		return result;
+	}
+	return result;
+err_result:
+	dput(result);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(exportfs_decode_unique_fh);
+
 MODULE_LICENSE("GPL");


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH -V2] Generic name to handle and open by handle syscalls
  2010-03-29  7:12   ` Aneesh Kumar K. V
@ 2010-03-30 19:36     ` Andreas Dilger
  0 siblings, 0 replies; 12+ messages in thread
From: Andreas Dilger @ 2010-03-30 19:36 UTC (permalink / raw)
  To: Aneesh Kumar K. V; +Cc: Christoph Hellwig, viro, linux-fsdevel, corbet

On 2010-03-29, at 01:12, Aneesh Kumar K. V wrote:
> As per the private email exchanges we had on this, do you agree that  
> we
> would need the vfsmount to successfully open a file by handle ? If yes
> we would need to specify the mount point via mountfd. In that case  
> do we
> need the handle returned by kernel to be system wide unique ?
>
> We can build uniqueness in the userspace based on the mountfd and that
> also enables us to use different field width for file system
> identifier. So rather than forcing the usage of uuid, userspace can  
> now
> decided to use a fsid that is smaller and that uniquely identify  
> only the
> vfsmounts that the NFS server is exporting  ?

To my thinking, forcing userspace to distinguish this is the wrong  
place to do it.  There are a number of uses for this, such as the  
checkpoint/restart that was discussed on another thread, that could  
benefit from having consistent filehandles.

The other thing I've long wanted in-kernel identifiers for is jbd/jbd2  
being able to share a single journal among multiple filesystems.  At  
one time I was going to make a special-case jbd2 filesystem that  
contains these identifiers, but having globally-unique filehandles  
available in the kernel would be even better.  That would allow an  
ext4 instance to get the journal UUID + an agreed-upon filehandle to  
locate the journal at mount time.

> +static struct super_block *fs_get_sb(__kernel_fsid_t *fsid)
> +{
> +	while (fs_type) {
> +		list_for_each_entry(sb, &fs_type->fs_supers, s_instances) {
> +			this_fsid = sb->get_fsid();

Rather than walking all of the filesystem, this seems exactly the  
place to use a hash table to avoid O(n^2) behaviour when mounting  
hundreds/thousands of filesystems (which happens in some cases).

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2010-03-30 19:36 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-03-18 17:09 [PATCH -V2] Generic name to handle and open by handle syscalls Aneesh Kumar K.V
2010-03-18 17:09 ` [PATCH -V2 1/3] vfs: Add name to file handle conversion support Aneesh Kumar K.V
2010-03-18 17:09 ` [PATCH -V2 2/3] vfs: Add open by file handle support Aneesh Kumar K.V
2010-03-18 17:09 ` [PATCH -V2 3/3] x86: Add new syscalls for x86_32 Aneesh Kumar K.V
2010-03-18 17:31 ` [PATCH -V2] Generic name to handle and open by handle syscalls Christoph Hellwig
2010-03-18 17:54   ` J. Bruce Fields
2010-03-29  7:12   ` Aneesh Kumar K. V
2010-03-30 19:36     ` Andreas Dilger
2010-03-18 22:26 ` Andreas Dilger
2010-03-19  6:00   ` Aneesh Kumar K. V
2010-03-26  0:42 ` Ben Hutchings
2010-03-26  6:43   ` Andreas Dilger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).