[RFC PATCH] Generic name to handle and open by handle syscalls

linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [RFC PATCH] Generic name to handle and open by handle syscalls
@ 2010-02-19  5:42 Aneesh Kumar K.V
  2010-02-19  5:42 ` [RFC PATCH 1/3] vfs: Add name to file handle conversion support Aneesh Kumar K.V
                   ` (4 more replies)
  0 siblings, 5 replies; 36+ messages in thread
From: Aneesh Kumar K.V @ 2010-02-19  5:42 UTC (permalink / raw)
  To: hch, viro; +Cc: linux-fsdevel

The below set of patches implement open by handle support using exportfs
operations. This allows user space application to map a file name to file 
handle and later open the file using handle. This should be usable
for userspace NFS and 9P server. XFS already support this with the ioctls
XFS_IOC_PATH_TO_HANDLE and XFS_IOC_OPEN_BY_HANDLE.

Example program:
-------------
#include <stdio.h>
#include <stdlib.h>

#include <fcntl.h>
#include <unistd.h>
#include <errno.h>

struct file_handle {
	int handle_size;
	int handle_type;
	void *f_handle;
};
int main(int argc, char *argv[])
{
	int ret;
	int fd;
	char buf[100];
	struct file_handle fh;
	fh.handle_type = 0;
	fh.handle = malloc(100);
	fh.handle_size = 100/sizeof(int);
	errno  = 0;
	ret = syscall(338, argv[1], &fh);
	if (ret) {
		perror("Error:");
		exit(1);
	}
	printf("%d\n",fh.handle_size);
	fd = syscall(339, &fh, O_RDONLY);
	if (fd <= 0 ) {
		perror("Error:");
		exit(1);
	}
	printf("fd = %d\n", fd);
	memset(buf, 0 , 100);
	while (read(fd, buf, 100) > 0) {
		printf("%s", buf);
		memset(buf, 0 , 100);
	}
	return 0;
}

-aneesh


^ permalink raw reply	[flat|nested] 36+ messages in thread

* [RFC PATCH 1/3] vfs: Add name to file handle conversion support
  2010-02-19  5:42 [RFC PATCH] Generic name to handle and open by handle syscalls Aneesh Kumar K.V
@ 2010-02-19  5:42 ` Aneesh Kumar K.V
  2010-02-20 18:15   ` Andreas Dilger
  2010-02-19  5:42 ` [RFC PATCH 2/3] vfs: Add open by file handle support Aneesh Kumar K.V
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 36+ messages in thread
From: Aneesh Kumar K.V @ 2010-02-19  5:42 UTC (permalink / raw)
  To: hch, viro; +Cc: linux-fsdevel, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/open.c          |   64 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |    7 +++++
 2 files changed, 71 insertions(+), 0 deletions(-)

diff --git a/fs/open.c b/fs/open.c
index 040cef7..f5a8420 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -31,6 +31,7 @@
 #include <linux/falloc.h>
 #include <linux/fs_struct.h>
 #include <linux/ima.h>
+#include <linux/exportfs.h>
 
 #include "internal.h"
 
@@ -1079,6 +1080,69 @@ SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
 	return ret;
 }
 
+static int do_sys_name_to_handle(const char __user *name,
+				struct file_handle *handle)
+{
+	int retval;
+	struct path path;
+	struct inode *inode;
+	void *f_handle = NULL;
+	int handle_size = handle->handle_size;
+
+	retval = user_lpath(name, &path);
+	if (retval)
+		return retval;
+
+	inode = path.dentry->d_inode;
+
+	f_handle = kmalloc(handle_size, GFP_KERNEL);
+	if (!f_handle) {
+		retval = -ENOMEM;
+		goto err_out;
+	}
+
+	/* we ask for a non connected handle */
+	retval = exportfs_encode_fh(path.dentry, (struct fid *)f_handle,
+				&handle_size,  0);
+	handle->handle_type = retval;
+	if (handle_size < handle->handle_size) {
+		if (copy_to_user(handle->f_handle, f_handle,
+					handle_size*sizeof(u32)))
+			retval = -EFAULT;
+		retval = 0;
+	} else
+		retval = -EAGAIN;
+	handle->handle_size = handle_size;
+	kfree(f_handle);
+
+err_out:
+	path_put(&path);
+	return retval;
+}
+
+SYSCALL_DEFINE2(name_to_handle, const char __user *, name,
+		struct file_handle __user *, handle)
+{
+	long ret;
+	struct file_handle f_handle;
+	if (copy_from_user(&f_handle, handle, sizeof(struct file_handle))) {
+		ret = -EFAULT;
+		goto err_out;
+	}
+	ret = do_sys_name_to_handle(name, &f_handle);
+	if (copy_to_user(&handle->handle_type,
+			&f_handle.handle_type, sizeof(f_handle.handle_type)) ||
+		copy_to_user(&handle->handle_size,
+			&f_handle.handle_size, sizeof(f_handle.handle_size))) {
+		ret = -EFAULT;
+	}
+err_out:
+	/* avoid REGPARM breakage on x86: */
+	asmlinkage_protect(2, ret, name, handle);
+	return ret;
+}
+
+
 SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
 		int, mode)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b1bcb27..5bef1e9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -945,6 +945,13 @@ struct file {
 	unsigned long f_mnt_write_state;
 #endif
 };
+
+struct file_handle {
+	int handle_size;
+	int handle_type;
+	void *f_handle;
+};
+
 extern spinlock_t files_lock;
 #define file_list_lock() spin_lock(&files_lock);
 #define file_list_unlock() spin_unlock(&files_lock);
-- 
1.7.0.31.g1df487


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [RFC PATCH 2/3] vfs: Add open by file handle support
  2010-02-19  5:42 [RFC PATCH] Generic name to handle and open by handle syscalls Aneesh Kumar K.V
  2010-02-19  5:42 ` [RFC PATCH 1/3] vfs: Add name to file handle conversion support Aneesh Kumar K.V
@ 2010-02-19  5:42 ` Aneesh Kumar K.V
  2010-02-20 18:58   ` Andreas Dilger
  2010-02-19  5:42 ` [RFC PATCH 3/3] x86: Add new syscalls for x86_32 Aneesh Kumar K.V
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 36+ messages in thread
From: Aneesh Kumar K.V @ 2010-02-19  5:42 UTC (permalink / raw)
  To: hch, viro; +Cc: linux-fsdevel, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 fs/namei.c            |   24 -------
 fs/open.c             |  166 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/namei.h |   24 +++++++
 3 files changed, 190 insertions(+), 24 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index d62fdc8..7ffdfe9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1558,30 +1558,6 @@ out_unlock:
 	return may_open(&nd->path, 0, flag & ~O_TRUNC);
 }
 
-/*
- * Note that while the flag value (low two bits) for sys_open means:
- *	00 - read-only
- *	01 - write-only
- *	10 - read-write
- *	11 - special
- * it is changed into
- *	00 - no permissions needed
- *	01 - read-permission
- *	10 - write-permission
- *	11 - read-write
- * for the internal routines (ie open_namei()/follow_link() etc)
- * This is more logical, and also allows the 00 "no perm needed"
- * to be used for symlinks (where the permissions are checked
- * later).
- *
-*/
-static inline int open_to_namei_flags(int flag)
-{
-	if ((flag+1) & O_ACCMODE)
-		flag++;
-	return flag;
-}
-
 static int open_will_truncate(int flag, struct inode *inode)
 {
 	/*
diff --git a/fs/open.c b/fs/open.c
index f5a8420..4187651 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1273,3 +1273,169 @@ int nonseekable_open(struct inode *inode, struct file *filp)
 }
 
 EXPORT_SYMBOL(nonseekable_open);
+
+static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
+{
+	return 1;
+}
+
+static struct path *path_from_fd(int fd)
+{
+	struct path *path;
+	struct file *filep;
+
+	if (fd == AT_FDCWD) {
+		struct fs_struct *fs = current->fs;
+		read_lock(&fs->lock);
+		path = &fs->pwd;
+		path_get(path);
+		read_unlock(&fs->lock);
+	} else {
+		filep = fget(fd);
+		if (!filep)
+			return ERR_PTR(-EBADF);
+		path = &filep->f_path;
+		path_get(path);
+		fput(filep);
+	}
+	return path;
+}
+
+
+static struct dentry *handle_to_dentry(struct path *path,
+				struct file_handle *fh)
+{
+	int retval = 0;
+	void *handle = NULL;
+	struct inode *inode;
+	struct dentry *dentry;
+	int handle_size = fh->handle_size;
+
+	inode = path->dentry->d_inode;
+	if (!S_ISREG(inode->i_mode) &&
+		!S_ISDIR(inode->i_mode) &&
+		!S_ISLNK(inode->i_mode)) {
+		retval =  -EINVAL;
+		goto err_out;
+	}
+	/* should we do some check on handle size ?*/
+	handle = kmalloc(handle_size, GFP_KERNEL);
+	if (!handle) {
+		retval =  -ENOMEM;
+		goto err_out;
+	}
+	if (copy_from_user(handle, fh->f_handle,
+				handle_size*sizeof(u32))) {
+		retval = -EFAULT;
+		goto err_out;
+	}
+	dentry = exportfs_decode_fh(path->mnt, (struct fid *)handle,
+					handle_size, fh->handle_type,
+					vfs_dentry_acceptable, NULL);
+	kfree(handle);
+	return dentry;
+
+err_out:
+	kfree(handle);
+	return ERR_PTR(retval);
+}
+
+long do_sys_open_by_handle(int dfd, struct file_handle *fh, int flags)
+{
+	int fd;
+	int retval = 0;
+	int d_flags  = flags;
+	struct path *path;
+	struct file *filp;
+	struct inode *inode;
+	struct dentry *dentry;
+
+	if (!capable(CAP_SYS_ADMIN))
+		/* Allow open by handle only by sysadmin */
+		return -EPERM;
+
+	path = path_from_fd(dfd);
+	if (IS_ERR(path))
+		return PTR_ERR(path);
+
+	dentry = handle_to_dentry(path, fh);
+	if (IS_ERR(dentry)) {
+		path_put(path);
+		return PTR_ERR(dentry);
+	}
+
+	inode = dentry->d_inode;
+	/* Restrict open_by_handle to directories & regular files. */
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
+		retval = -EINVAL;
+		goto err_out;
+	}
+
+	flags  = open_to_namei_flags(flags);
+	/* O_TRUNC implies we need access checks for write permissions */
+	if (flags & O_TRUNC)
+		flags |= MAY_WRITE;
+
+	if ((!(flags & O_APPEND) || (flags & O_TRUNC)) &&
+		(flags & FMODE_WRITE) && IS_APPEND(inode)) {
+		retval = -EPERM;
+		goto err_out;
+	}
+
+	if ((flags & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
+		retval = -EACCES;
+		goto err_out;
+	}
+
+	/* Can't write directories. */
+	if (S_ISDIR(inode->i_mode) && (flags & FMODE_WRITE)) {
+		retval = -EISDIR;
+		goto err_out;
+	}
+
+	fd = get_unused_fd();
+	if (fd < 0) {
+		retval = fd;
+		goto err_out;
+	}
+
+	filp = dentry_open(dentry, mntget(path->mnt),
+			d_flags, current_cred());
+	if (IS_ERR(filp)) {
+		put_unused_fd(fd);
+		return PTR_ERR(filp);
+	}
+
+	if (inode->i_mode & S_IFREG) {
+		filp->f_flags |= O_NOATIME;
+		filp->f_mode |= FMODE_NOCMTIME;
+	}
+	fsnotify_open(filp->f_path.dentry);
+	fd_install(fd, filp);
+	path_put(path);
+	return fd;
+
+err_out:
+	path_put(path);
+	dput(dentry);
+	return retval;
+}
+
+SYSCALL_DEFINE2(open_by_handle, struct file_handle __user *, handle, int, flags)
+{
+	long ret;
+	struct file_handle f_handle;
+
+	if (force_o_largefile())
+		flags |= O_LARGEFILE;
+
+	if (copy_from_user(&f_handle, handle, sizeof(struct file_handle))) {
+		ret = -EFAULT;
+		goto err_out;
+	}
+	ret = do_sys_open_by_handle(AT_FDCWD, &f_handle, flags);
+err_out:
+	/* avoid REGPARM breakage on x86: */
+	asmlinkage_protect(2, ret, handle, flags);
+	return ret;
+}
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 05b441d..a853aa0 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -4,6 +4,7 @@
 #include <linux/dcache.h>
 #include <linux/linkage.h>
 #include <linux/path.h>
+#include <asm-generic/fcntl.h>
 
 struct vfsmount;
 
@@ -96,4 +97,27 @@ static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
 	((char *) name)[min(len, maxlen)] = '\0';
 }
 
+/*
+ * Note that while the flag value (low two bits) for sys_open means:
+ *	00 - read-only
+ *	01 - write-only
+ *	10 - read-write
+ *	11 - special
+ * it is changed into
+ *	00 - no permissions needed
+ *	01 - read-permission
+ *	10 - write-permission
+ *	11 - read-write
+ * for the internal routines (ie open_namei()/follow_link() etc)
+ * This is more logical, and also allows the 00 "no perm needed"
+ * to be used for symlinks (where the permissions are checked
+ * later).
+ *
+*/
+static inline int open_to_namei_flags(int flag)
+{
+	if ((flag+1) & O_ACCMODE)
+		flag++;
+	return flag;
+}
 #endif /* _LINUX_NAMEI_H */
-- 
1.7.0.31.g1df487


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [RFC PATCH 3/3] x86: Add new syscalls for x86_32
  2010-02-19  5:42 [RFC PATCH] Generic name to handle and open by handle syscalls Aneesh Kumar K.V
  2010-02-19  5:42 ` [RFC PATCH 1/3] vfs: Add name to file handle conversion support Aneesh Kumar K.V
  2010-02-19  5:42 ` [RFC PATCH 2/3] vfs: Add open by file handle support Aneesh Kumar K.V
@ 2010-02-19  5:42 ` Aneesh Kumar K.V
  2010-02-19  9:34 ` [RFC PATCH] Generic name to handle and open by handle syscalls Andreas Dilger
  2010-02-22 23:06 ` Jonathan Corbet
  4 siblings, 0 replies; 36+ messages in thread
From: Aneesh Kumar K.V @ 2010-02-19  5:42 UTC (permalink / raw)
  To: hch, viro; +Cc: linux-fsdevel, Aneesh Kumar K.V

This patch adds sys_name_to_handle and sys_open_by_handle
syscalls to x86_32

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/x86/include/asm/unistd_32.h   |    4 +++-
 arch/x86/kernel/syscall_table_32.S |    2 ++
 2 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 3baf379..faf5b94 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -343,10 +343,12 @@
 #define __NR_rt_tgsigqueueinfo	335
 #define __NR_perf_event_open	336
 #define __NR_recvmmsg		337
+#define __NR_name_to_handle	338
+#define __NR_open_by_handle	339
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 338
+#define NR_syscalls 340
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 15228b5..1b62fa3 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -337,3 +337,5 @@ ENTRY(sys_call_table)
 	.long sys_rt_tgsigqueueinfo	/* 335 */
 	.long sys_perf_event_open
 	.long sys_recvmmsg
+	.long sys_name_to_handle
+	.long sys_open_by_handle	/* 339 */
-- 
1.7.0.31.g1df487


^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH] Generic name to handle and open by handle syscalls
  2010-02-19  5:42 [RFC PATCH] Generic name to handle and open by handle syscalls Aneesh Kumar K.V
                   ` (2 preceding siblings ...)
  2010-02-19  5:42 ` [RFC PATCH 3/3] x86: Add new syscalls for x86_32 Aneesh Kumar K.V
@ 2010-02-19  9:34 ` Andreas Dilger
  2010-02-19  9:49   ` Aneesh Kumar K. V
  2010-02-22 23:06 ` Jonathan Corbet
  4 siblings, 1 reply; 36+ messages in thread
From: Andreas Dilger @ 2010-02-19  9:34 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: hch, viro, linux-fsdevel

On 2010-02-18, at 22:42, Aneesh Kumar K.V wrote:
> The below set of patches implement open by handle support using  
> exportfs
> operations. This allows user space application to map a file name to  
> file
> handle and later open the file using handle. This should be usable
> for userspace NFS and 9P server. XFS already support this with the  
> ioctls
> XFS_IOC_PATH_TO_HANDLE and XFS_IOC_OPEN_BY_HANDLE.
>
> struct file_handle {
> 	int handle_size;
> 	int handle_type;
> 	void *f_handle;
> };

What is the required size of the f_handle field?  It seems that either  
this
should be a well-defined structure size in the header, or the syscall  
will return an error like EOVERFLOW if the handle will not fit into  
the supplied handle_size, and it returns the required size in  
handle_size.

> 	fh.handle_type = 0;
> 	fh.handle = malloc(100);
> 	fh.handle_size = 100/sizeof(int);

It seems strange to define the handle_size in terms of ints instead of  
bytes?

> 	ret = syscall(338, argv[1], &fh);
> 	fd = syscall(339, &fh, O_RDONLY);

What is the expected lifespan of "fh" to remain valid in the kernel?   
Presumably this is not just the encoding of the inode number into a  
buffer, or it would be too easy to forge from userspace.  That means  
there needs to be some unguessable state in the kernel for each file  
handle, that may potentially need to be kept indefinitely if there is  
no expiry.

Will "fh" be portable between processes?  I assume that is the intent,  
but good to declare the actual semantics of the syscalls before they  
go into the kernel.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH] Generic name to handle and open by handle syscalls
  2010-02-19  9:34 ` [RFC PATCH] Generic name to handle and open by handle syscalls Andreas Dilger
@ 2010-02-19  9:49   ` Aneesh Kumar K. V
  2010-02-20 19:01     ` Andreas Dilger
  0 siblings, 1 reply; 36+ messages in thread
From: Aneesh Kumar K. V @ 2010-02-19  9:49 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: hch, viro, linux-fsdevel

On Fri, 19 Feb 2010 02:34:29 -0700, Andreas Dilger <adilger@sun.com> wrote:
> On 2010-02-18, at 22:42, Aneesh Kumar K.V wrote:
> > The below set of patches implement open by handle support using  
> > exportfs
> > operations. This allows user space application to map a file name to  
> > file
> > handle and later open the file using handle. This should be usable
> > for userspace NFS and 9P server. XFS already support this with the  
> > ioctls
> > XFS_IOC_PATH_TO_HANDLE and XFS_IOC_OPEN_BY_HANDLE.
> >
> > struct file_handle {
> > 	int handle_size;
> > 	int handle_type;
> > 	void *f_handle;
> > };
> 
> What is the required size of the f_handle field?  It seems that either  
> this
> should be a well-defined structure size in the header, or the syscall  
> will return an error like EOVERFLOW if the handle will not fit into  
> the supplied handle_size, and it returns the required size in  
> handle_size.


The patch already does that. I have made the syscall return with
-EAGAIN with handle_size containing the required value. May be
EOVERFLOW is the right error value. I will update in the next iteration.



> 
> > 	fh.handle_type = 0;
> > 	fh.handle = malloc(100);
> > 	fh.handle_size = 100/sizeof(int);
> 
> It seems strange to define the handle_size in terms of ints instead of  
> bytes?

If we agree with size in bytes. I can update the kernel to work in bytes.

> 
> > 	ret = syscall(338, argv[1], &fh);
> > 	fd = syscall(339, &fh, O_RDONLY);
> 
> What is the expected lifespan of "fh" to remain valid in the kernel?   
> Presumably this is not just the encoding of the inode number into a  
> buffer, or it would be too easy to forge from userspace.  That means  
> there needs to be some unguessable state in the kernel for each file  
> handle, that may potentially need to be kept indefinitely if there is  
> no expiry.
> 
> Will "fh" be portable between processes?  I assume that is the intent,  
> but good to declare the actual semantics of the syscalls before they  
> go into the kernel.

Life time rule and uniqueness rule should be same as the NFS file
handle. My understanding is there is no expiry. I am not sure what
would be the issues if one could guess the file handle ? It should
be looked at as another way to identify a file. The open call takes
the mode and normal permissions checks are done during open. 

If you are worried about limiting file access by controlling the
permissions of directories i guess the below mail explained that
we cannot depend on directory permissions for such access restrictions
http://article.gmane.org/gmane.linux.file-systems/37419 ?

-aneesh
 

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH 1/3] vfs: Add name to file handle conversion support
  2010-02-19  5:42 ` [RFC PATCH 1/3] vfs: Add name to file handle conversion support Aneesh Kumar K.V
@ 2010-02-20 18:15   ` Andreas Dilger
  2010-02-22  5:15     ` Aneesh Kumar K. V
  0 siblings, 1 reply; 36+ messages in thread
From: Andreas Dilger @ 2010-02-20 18:15 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: hch, viro, linux-fsdevel

On 2010-02-18, at 22:42, Aneesh Kumar K.V wrote:
> +static int do_sys_name_to_handle(const char __user *name,
> +				struct file_handle *handle)
> +{
> +	/* we ask for a non connected handle */
> +	retval = exportfs_encode_fh(path.dentry, (struct fid *)f_handle,
> +				&handle_size,  0);
> +	if (handle_size < handle->handle_size) {
> +		if (copy_to_user(handle->f_handle, f_handle,
> +					handle_size*sizeof(u32)))
> +			retval = -EFAULT;

Shouldn't this be "handle_size <= handle->handle_size"?

> +SYSCALL_DEFINE2(name_to_handle, const char __user *, name,
> +		struct file_handle __user *, handle)
> +{
> +	ret = do_sys_name_to_handle(name, &f_handle);
> +	if (copy_to_user(&handle->handle_type,
> +			&f_handle.handle_type, sizeof(f_handle.handle_type)) ||
> +		copy_to_user(&handle->handle_size,
> +			&f_handle.handle_size, sizeof(f_handle.handle_size)))

It seems strange to do the copy_to_user() of f_handle in  
do_sys_name_to_handle(), but the handle_size and handle_type in  
name_to_handle()?  Is there a reason it was split this way?

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH 2/3] vfs: Add open by file handle support
  2010-02-19  5:42 ` [RFC PATCH 2/3] vfs: Add open by file handle support Aneesh Kumar K.V
@ 2010-02-20 18:58   ` Andreas Dilger
  2010-02-20 20:13     ` Brad Boyer
                       ` (2 more replies)
  0 siblings, 3 replies; 36+ messages in thread
From: Andreas Dilger @ 2010-02-20 18:58 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: hch, viro, linux-fsdevel

On 2010-02-18, at 22:42, Aneesh Kumar K.V wrote:
> +static struct dentry *handle_to_dentry(struct path *path,
> +				struct file_handle *fh)
> +{
> +	inode = path->dentry->d_inode;
> +	if (!S_ISREG(inode->i_mode) &&
> +		!S_ISDIR(inode->i_mode) &&
> +		!S_ISLNK(inode->i_mode)) {

Please follow normal indenting rules, like:

	if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) &&
	    !S_ISLNK(inode->i_mode)) {

> +	/* should we do some check on handle size ?*/
> +	handle = kmalloc(handle_size, GFP_KERNEL);

Good question.  kmalloc() allows up to 32MB allocations, so a user  
running 1024 threads calling this with bad handles could OOM most  
systems today due to unswappable kernel allocations.  Should there be  
an upper limit on the handle size, like 4096 bytes or so?  Should some  
filesystem legitimately need a larger size the kernel can always be  
changed without problems since the size isn't part of the ABI (it  
should be an internal sanity check), but I can't imagine what needs to  
be put into a file handle that would exceed this size.

> +	dentry = exportfs_decode_fh(path->mnt, (struct fid *)handle,
> +					handle_size, fh->handle_type,
> +					vfs_dentry_acceptable, NULL);

One serious problem I can see with this is that the handle only  
encodes the ino+generation of the inode, and nothing about the device  
itself.  NFS handles this by using the fsid to identify the  
filesystem, but it would be highly confusing to applications if a  
process with a different CWD gets a different file or an error trying  
to open the file handle, or if a new filesystem gets mounted and the  
file handle stops working.  Having the file handle able to positively  
identify a single inode in the system is the most important property  
it has, I think.

Probably one of the important use cases of open_by_handle() is to have  
one process doing pathname resolution and slave threads doing work on  
those handles.  Usually daemon threads change their working directory  
to "/" to avoid pinning some directory.  Even without that, it would  
be impossible for a process to create handles in 2 different  
filesystems, which is totally broken:

          (CWD is /home/adilger)
          name_to_handle("/usr/src/linux/COPYING", &fh);

          fd = open_by_handle(&fh, O_RDONLY);
          (fd = -1; errno = ENOENT or EBADF or whatever)

Without the handle identifying the filesystem in some way this is  
mostly useless.  Encoding the device number would be a simple (though  
non-robust) way to do this, a better way would be with the filesystem  
UUID and adding an in-kernel UUID->sb mapping function (which is  
useful for other things as well).

> +long do_sys_open_by_handle(int dfd, struct file_handle *fh, int  
> flags)
> +{
> +	if (!capable(CAP_SYS_ADMIN))
> +		/* Allow open by handle only by sysadmin */
> +		return -EPERM;

Hmm, I guess this avoids some of the security concerns, but makes it a  
lot less useful.  I was thinking this could be used for e.g. user NFS  
serving or such, but if it is limited to root only then you might as  
well just set up the in-kernel NFSd.  By making the handle hard to  
forge (e.g. generate random key per superblock, sha1(ino+gen+key) and  
store that into fh; someone with more security experience can think of  
a better scheme) then you can reasonably safely dispense with the  
CAP_SYS_ADMIN check because you can be sure that the proper path  
traversal has been done by a trusted process and there is no more  
exposure than unix socket fd passing.

> +	if ((!(flags & O_APPEND) || (flags & O_TRUNC)) &&
> +		(flags & FMODE_WRITE) && IS_APPEND(inode)) {

Please use normal indenting style, aligning on the parenthesis.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH] Generic name to handle and open by handle syscalls
  2010-02-19  9:49   ` Aneesh Kumar K. V
@ 2010-02-20 19:01     ` Andreas Dilger
  2010-02-22  6:27       ` Aneesh Kumar K. V
  0 siblings, 1 reply; 36+ messages in thread
From: Andreas Dilger @ 2010-02-20 19:01 UTC (permalink / raw)
  To: Aneesh Kumar K. V; +Cc: hch, viro, linux-fsdevel

On 2010-02-19, at 02:49, Aneesh Kumar K. V wrote:
> On Fri, 19 Feb 2010 02:34:29 -0700, Andreas Dilger <adilger@sun.com>  
> wrote:
>>
>> What is the expected lifespan of "fh" to remain valid in the kernel?
>> Presumably this is not just the encoding of the inode number into a
>> buffer, or it would be too easy to forge from userspace.  That means
>> there needs to be some unguessable state in the kernel for each file
>> handle, that may potentially need to be kept indefinitely if there is
>> no expiry.
>>
>> Will "fh" be portable between processes?  I assume that is the  
>> intent,
>> but good to declare the actual semantics of the syscalls before they
>> go into the kernel.
>
> Life time rule and uniqueness rule should be same as the NFS file  
> handle. My understanding is there is no expiry.

Yes, and NFS file handles can be a pain in the ass, because it means  
inode numbers/devices/fsid should never be changed for fear of  
breaking NFS.  I'd much rather advertise that handles have a limited  
lifespan (e.g. at most until the server reboots, possibly sooner) so  
that there isn't an expectation of them lasting forever.

> I am not sure what would be the issues if one could guess the file  
> handle?  It should be looked at as another way to identify a file.  
> The open call takes the mode and normal permissions checks are done  
> during open.
>
> If you are worried about limiting file access by controlling the  
> permissions of directories i guess the below mail explained that we  
> cannot depend on directory permissions for such access restrictions http://article.gmane.org/gmane.linux.file-systems/37419 
>  ?

I don't think one person's opinion circumvents many years of POSIX  
behaviour that the only way to access a file is to do path traversal  
and have permission all the way down.  It is one thing for a process  
with access to the file to get a file handle and pass it to another  
process (that can happen today already), but it definitely shouldn't  
be possible for an arbitrary process to invent a file handle and get  
access to a file to which it cannot do path traversal through any  
existing path.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH 2/3] vfs: Add open by file handle support
  2010-02-20 18:58   ` Andreas Dilger
@ 2010-02-20 20:13     ` Brad Boyer
       [not found]       ` <FB88A140-C2EB-4E62-9769-D2524C874C8C@sun.com>
  2010-02-22  6:13     ` Aneesh Kumar K. V
  2010-02-26 19:24     ` J. Bruce Fields
  2 siblings, 1 reply; 36+ messages in thread
From: Brad Boyer @ 2010-02-20 20:13 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: Aneesh Kumar K.V, hch, viro, linux-fsdevel

On Sat, Feb 20, 2010 at 11:58:34AM -0700, Andreas Dilger wrote:
> Without the handle identifying the filesystem in some way this is  
> mostly useless.  Encoding the device number would be a simple (though  
> non-robust) way to do this, a better way would be with the filesystem  
> UUID and adding an in-kernel UUID->sb mapping function (which is  
> useful for other things as well).

I certainly have one use in mind for having a static sb->ID mapping
inside the FS driver. It seems like a more reliable way to do NFS
serving of anything more complicated than a standard disk-based
local filesystem. In particular, it would be nice to be able to
serve some synthetic or network/cluster based FS over NFS and have
the file handles be consistent across multiple machines for failover
purposes. Currently specifying the fsid in the exports table is the
only logical way to force this, but that's extra manual work.

I haven't started writing any code to do this, but I have looked
at the current NFS FH code and it seems like it should be reasonable
to add a new set of methods to allow another FS specific field. The
hardest part seems to be holding the size down to the old NFS v2
protocol limits.

	Brad Boyer
	flar@allandria.com

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH 2/3] vfs: Add open by file handle support
       [not found]       ` <FB88A140-C2EB-4E62-9769-D2524C874C8C@sun.com>
@ 2010-02-22  2:46         ` Brad Boyer
  2010-02-26 19:21         ` J. Bruce Fields
  1 sibling, 0 replies; 36+ messages in thread
From: Brad Boyer @ 2010-02-22  2:46 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: Aneesh Kumar K.V, hch, viro, linux-fsdevel

On Sun, Feb 21, 2010 at 11:42:45AM -0700, Andreas Dilger wrote:
> On 2010-02-20, at 13:13, Brad Boyer wrote:
> >On Sat, Feb 20, 2010 at 11:58:34AM -0700, Andreas Dilger wrote:
> >>Without the handle identifying the filesystem in some way this is
> >>mostly useless.  Encoding the device number would be a simple (though
> >>non-robust) way to do this, a better way would be with the filesystem
> >>UUID and adding an in-kernel UUID->sb mapping function (which is
> >>useful for other things as well).
> >
> >I certainly have one use in mind for having a static sb->ID mapping
> >inside the FS driver.
> 
> One use I've had for this in the past, then never managed to finish  
> the work is for in-kernel identification of multi-device filesystems.   
> For example, ext3/4 can have an external journal, and currently the  
> journal device is identified by the UUID but we also have to add a  
> "hint" for the kernel for finding it by devno.  This can fail on  
> occasion when devices are renamed.

I'm trying to understand your expectations here. Do you mean that you
want to be able to search for a block device object in the kernel by
UUID?  I'm not saying it wouldn't be useful to have that, but I do
want a feel for what would be involved. Wouldn't it be easier to have
a custom mount.ext3 and mount.ext4 that use libblkid to find the
journal device?

> >It seems like a more reliable way to do NFS
> >serving of anything more complicated than a standard disk-based
> >local filesystem. In particular, it would be nice to be able to
> >serve some synthetic or network/cluster based FS over NFS and have
> >the file handles be consistent across multiple machines for failover
> >purposes. Currently specifying the fsid in the exports table is the
> >only logical way to force this, but that's extra manual work.
> 
> Yes, we looked at this in the past for Lustre as well, and while we  
> had proposed a patch for the NFSd code to extract the FSID from the  
> filesystem, it was turned down because "setting the FSID via a  
> userspace file is the right thing to do".  I have enough on my plate  
> not to wage an uphill battle for this.

It's good to see that someone else already thought of this and that
I'm not crazy for wanting it.

> I don't necessarily agree, because that means administering yet  
> another config parameter outside the filesystem on all of the  
> servers.  I don't mind allowing userspace to override the fs UUID- 
> based FSID, but why not let this be used by default instead of the  
> devno (which is increasingly dynamic these days).
> 
> Maybe with more clustered filesystems in the kernel these days this  
> idea can gain more traction.

It does seem like a logical use case to have a cluster FS supply its
own FSID, since it probably knows better than the kernel how to actually
specify uniqueness. The only complexity is making sure it's possible
to tell the difference between that and one the kernel generates.

Having to set the FSID in the exports table on a large cluster for
a large number of exports is a pain. This is doubly true if any
of it was supposed to be automated in terms of adding new exports.
I know some people working on a NAS-type box with built-in failover
based on Linux and they ran into this problem, and they have to
generate fsid values in their management scripts because of this.

> >I haven't started writing any code to do this, but I have looked
> >at the current NFS FH code and it seems like it should be reasonable
> >to add a new set of methods to allow another FS specific field.
> 
> First would be a method to extract the FSID/UUID from the filesystem.   
> That is immediately useful for even local configs.

Seems like a simple thing to add, but what would be a simple usage that
would be likely to be considered useful? A new feature without any
users is not going to be welcomed.

> >The hardest part seems to be holding the size down to the old NFS v2
> >protocol limits.
> 
> 
> Would it not be possible to encode the FSID/UUID only into NFSv3/v4  
> handles, and leave the NFSv2 handles without this information?  I  
> imagine the usage of NFSv2 is growing ever smaller, and there isn't a  
> requirement to add this extra field there.

It's certainly an option, but it would be nice to support the older
protocol as well if it's possible.

	Brad Boyer
	flar@allandria.com


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH 1/3] vfs: Add name to file handle conversion support
  2010-02-20 18:15   ` Andreas Dilger
@ 2010-02-22  5:15     ` Aneesh Kumar K. V
  0 siblings, 0 replies; 36+ messages in thread
From: Aneesh Kumar K. V @ 2010-02-22  5:15 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: hch, viro, linux-fsdevel

On Sat, 20 Feb 2010 11:15:33 -0700, Andreas Dilger <adilger@sun.com> wrote:
> On 2010-02-18, at 22:42, Aneesh Kumar K.V wrote:
> > +static int do_sys_name_to_handle(const char __user *name,
> > +				struct file_handle *handle)
> > +{
> > +	/* we ask for a non connected handle */
> > +	retval = exportfs_encode_fh(path.dentry, (struct fid *)f_handle,
> > +				&handle_size,  0);
> > +	if (handle_size < handle->handle_size) {
> > +		if (copy_to_user(handle->f_handle, f_handle,
> > +					handle_size*sizeof(u32)))
> > +			retval = -EFAULT;
> 
> Shouldn't this be "handle_size <= handle->handle_size"?


Yes. Will fix in the next iteration.

> 
> > +SYSCALL_DEFINE2(name_to_handle, const char __user *, name,
> > +		struct file_handle __user *, handle)
> > +{
> > +	ret = do_sys_name_to_handle(name, &f_handle);
> > +	if (copy_to_user(&handle->handle_type,
> > +			&f_handle.handle_type, sizeof(f_handle.handle_type)) ||
> > +		copy_to_user(&handle->handle_size,
> > +			&f_handle.handle_size, sizeof(f_handle.handle_size)))
> 
> It seems strange to do the copy_to_user() of f_handle in  
> do_sys_name_to_handle(), but the handle_size and handle_type in  
> name_to_handle()?  Is there a reason it was split this way?
> 

No specific reason.

-aneesh


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH 2/3] vfs: Add open by file handle support
  2010-02-20 18:58   ` Andreas Dilger
  2010-02-20 20:13     ` Brad Boyer
@ 2010-02-22  6:13     ` Aneesh Kumar K. V
  2010-02-22  6:31       ` Dave Chinner
  2010-02-26 19:24     ` J. Bruce Fields
  2 siblings, 1 reply; 36+ messages in thread
From: Aneesh Kumar K. V @ 2010-02-22  6:13 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: hch, viro, linux-fsdevel

On Sat, 20 Feb 2010 11:58:34 -0700, Andreas Dilger <adilger@sun.com> wrote:
> On 2010-02-18, at 22:42, Aneesh Kumar K.V wrote:
> > +static struct dentry *handle_to_dentry(struct path *path,
> > +				struct file_handle *fh)
> > +{
> > +	inode = path->dentry->d_inode;
> > +	if (!S_ISREG(inode->i_mode) &&
> > +		!S_ISDIR(inode->i_mode) &&
> > +		!S_ISLNK(inode->i_mode)) {
> 
> Please follow normal indenting rules, like:
> 
> 	if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) &&
> 	    !S_ISLNK(inode->i_mode)) {
> 
> > +	/* should we do some check on handle size ?*/
> > +	handle = kmalloc(handle_size, GFP_KERNEL);
> 
> Good question.  kmalloc() allows up to 32MB allocations, so a user  
> running 1024 threads calling this with bad handles could OOM most  
> systems today due to unswappable kernel allocations.  Should there be  
> an upper limit on the handle size, like 4096 bytes or so?  Should some  
> filesystem legitimately need a larger size the kernel can always be  
> changed without problems since the size isn't part of the ABI (it  
> should be an internal sanity check), but I can't imagine what needs to  
> be put into a file handle that would exceed this size.

At this point i will limit it to 4096 bytes.


> 
> > +	dentry = exportfs_decode_fh(path->mnt, (struct fid *)handle,
> > +					handle_size, fh->handle_type,
> > +					vfs_dentry_acceptable, NULL);
> 
> One serious problem I can see with this is that the handle only  
> encodes the ino+generation of the inode, and nothing about the device  
> itself.  NFS handles this by using the fsid to identify the  
> filesystem, but it would be highly confusing to applications if a  
> process with a different CWD gets a different file or an error trying  
> to open the file handle, or if a new filesystem gets mounted and the  
> file handle stops working.  Having the file handle able to positively  
> identify a single inode in the system is the most important property  
> it has, I think.
> 
> Probably one of the important use cases of open_by_handle() is to have  
> one process doing pathname resolution and slave threads doing work on  
> those handles.  Usually daemon threads change their working directory  
> to "/" to avoid pinning some directory.  Even without that, it would  
> be impossible for a process to create handles in 2 different  
> filesystems, which is totally broken:
> 
>           (CWD is /home/adilger)
>           name_to_handle("/usr/src/linux/COPYING", &fh);
> 
>           fd = open_by_handle(&fh, O_RDONLY);
>           (fd = -1; errno = ENOENT or EBADF or whatever)
> 

do_sys_open_by_handle actually takes an fd. I was planning to do an openat
style syscall also. That should handle the above problem. In case of the
new syscall dirfd will be a file descriptor for a file in the
particular filesystem.


> Without the handle identifying the filesystem in some way this is  
> mostly useless.  Encoding the device number would be a simple (though  
> non-robust) way to do this, a better way would be with the filesystem  
> UUID and adding an in-kernel UUID->sb mapping function (which is  
> useful for other things as well).
> 

But i understand this is an good requirement. I will see what best I
can do.

> > +long do_sys_open_by_handle(int dfd, struct file_handle *fh, int  
> > flags)
> > +{
> > +	if (!capable(CAP_SYS_ADMIN))
> > +		/* Allow open by handle only by sysadmin */
> > +		return -EPERM;
> 
> Hmm, I guess this avoids some of the security concerns, but makes it a  
> lot less useful.  I was thinking this could be used for e.g. user NFS  
> serving or such, but if it is limited to root only then you might as  
> well just set up the in-kernel NFSd.  By making the handle hard to  
> forge (e.g. generate random key per superblock, sha1(ino+gen+key) and  
> store that into fh; someone with more security experience can think of  
> a better scheme) then you can reasonably safely dispense with the  
> CAP_SYS_ADMIN check because you can be sure that the proper path  
> traversal has been done by a trusted process and there is no more  
> exposure than unix socket fd passing.

If we make the handle depend on the random key, how do we make sure
that after a server crash when get the same inode with the same handle.
That would be a requirement for an NFS server right? We can definitely
encode device details or UUID details the same way nfs server does in
fh_compose. But i guess making it random. 

What are the issues of being able to guess the file handle ? May be
there are other ways to handle those.


> 
> > +	if ((!(flags & O_APPEND) || (flags & O_TRUNC)) &&
> > +		(flags & FMODE_WRITE) && IS_APPEND(inode)) {
> 
> Please use normal indenting style, aligning on the parenthesis.


-aneesh

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH] Generic name to handle and open by handle syscalls
  2010-02-20 19:01     ` Andreas Dilger
@ 2010-02-22  6:27       ` Aneesh Kumar K. V
  0 siblings, 0 replies; 36+ messages in thread
From: Aneesh Kumar K. V @ 2010-02-22  6:27 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: hch, viro, linux-fsdevel

On Sat, 20 Feb 2010 12:01:38 -0700, Andreas Dilger <adilger@sun.com> wrote:
> On 2010-02-19, at 02:49, Aneesh Kumar K. V wrote:
> > On Fri, 19 Feb 2010 02:34:29 -0700, Andreas Dilger <adilger@sun.com>  
> > wrote:
> >>
> >> What is the expected lifespan of "fh" to remain valid in the kernel?
> >> Presumably this is not just the encoding of the inode number into a
> >> buffer, or it would be too easy to forge from userspace.  That means
> >> there needs to be some unguessable state in the kernel for each file
> >> handle, that may potentially need to be kept indefinitely if there is
> >> no expiry.
> >>
> >> Will "fh" be portable between processes?  I assume that is the  
> >> intent,
> >> but good to declare the actual semantics of the syscalls before they
> >> go into the kernel.
> >
> > Life time rule and uniqueness rule should be same as the NFS file  
> > handle. My understanding is there is no expiry.
> 
> Yes, and NFS file handles can be a pain in the ass, because it means  
> inode numbers/devices/fsid should never be changed for fear of  
> breaking NFS.  I'd much rather advertise that handles have a limited  
> lifespan (e.g. at most until the server reboots, possibly sooner) so  
> that there isn't an expectation of them lasting forever.

That will make the interface not useful for applications like user
space NFS server right ?  Most of them want server to map the handle
to an inode even after a server crash right. The crash recovery might
be done in case of these network file system with the above assumptions.

-aneesh

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH 2/3] vfs: Add open by file handle support
  2010-02-22  6:13     ` Aneesh Kumar K. V
@ 2010-02-22  6:31       ` Dave Chinner
  0 siblings, 0 replies; 36+ messages in thread
From: Dave Chinner @ 2010-02-22  6:31 UTC (permalink / raw)
  To: Aneesh Kumar K. V; +Cc: Andreas Dilger, hch, viro, linux-fsdevel

On Mon, Feb 22, 2010 at 11:43:43AM +0530, Aneesh Kumar K. V wrote:
> On Sat, 20 Feb 2010 11:58:34 -0700, Andreas Dilger <adilger@sun.com> wrote:
> > Without the handle identifying the filesystem in some way this is  
> > mostly useless.  Encoding the device number would be a simple (though  
> > non-robust) way to do this, a better way would be with the filesystem  
> > UUID and adding an in-kernel UUID->sb mapping function (which is  
> > useful for other things as well).
> > 
> 
> But i understand this is an good requirement. I will see what best I
> can do.

For better or worse, the xfs_handle_t contains a fsid derived from
the UUID if the filesystem.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH] Generic name to handle and open by handle syscalls
  2010-02-19  5:42 [RFC PATCH] Generic name to handle and open by handle syscalls Aneesh Kumar K.V
                   ` (3 preceding siblings ...)
  2010-02-19  9:34 ` [RFC PATCH] Generic name to handle and open by handle syscalls Andreas Dilger
@ 2010-02-22 23:06 ` Jonathan Corbet
  2010-02-23  0:56   ` James Morris
  2010-02-23  8:58   ` Aneesh Kumar K. V
  4 siblings, 2 replies; 36+ messages in thread
From: Jonathan Corbet @ 2010-02-22 23:06 UTC (permalink / raw)
  To: Aneesh Kumar K.V; +Cc: hch, viro, linux-fsdevel

On Fri, 19 Feb 2010 11:12:26 +0530
"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:

> The below set of patches implement open by handle support using exportfs
> operations.

I have a couple of questions...starting with: what is the use case for
this functionality?  There must, clearly, be some kind of application
which needs to be able to open by file handle, but I'm not sure what
that would be.

I agree with Andreas that the handle length looks like a bit of a
crapshoot.  How should an application developer know how much memory to
dedicate to this?

In do_sys_name_to_handle() you have:

> +	f_handle = kmalloc(handle_size, GFP_KERNEL);

handle_size comes directly from user space.  Perhaps it should be
sanity-checked?  (I would also just use handle->handle_size; later
handle_size contains the *real* handle size, and I found that confusing.
Yes, I'm easily confused, but...)

> +	handle->handle_type = retval;
> +	if (handle_size < handle->handle_size) {
> +		if (copy_to_user(handle->f_handle, f_handle,
> +					handle_size*sizeof(u32)))
> +			retval = -EFAULT;
> +		retval = 0;
> +	} else
> +		retval = -EAGAIN;
> +	handle->handle_size = handle_size;

EAGAIN seems like a strange thing to return here.  ENOSPC maybe?

Are you missing an "else" before the "retval = 0;" line?  You'll never
return -EFAULT here.

In do_sys_open_by_handle():

> +	if (!capable(CAP_SYS_ADMIN))
> +		/* Allow open by handle only by sysadmin */
> +		return -EPERM;

I assume this is to avoid access to readable files within unreadable
directories?   Otherwise you could check the permissions of the target
dentry.

CAP_SYS_ADMIN seems like the wrong capability, though.  CAP_DAC_OVERRIDE
might make more sense?

Is there any sense in allowing anybody to call name_to_handle() if
open_by_handle() is restricted?

Thanks,

jon

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH] Generic name to handle and open by handle syscalls
  2010-02-22 23:06 ` Jonathan Corbet
@ 2010-02-23  0:56   ` James Morris
  2010-02-23  8:58   ` Aneesh Kumar K. V
  1 sibling, 0 replies; 36+ messages in thread
From: James Morris @ 2010-02-23  0:56 UTC (permalink / raw)
  To: Jonathan Corbet
  Cc: Aneesh Kumar K.V, Christoph Hellwig, viro, linux-fsdevel,
	linux-security-module

[Adding LSM to the cc list...]


On Mon, 22 Feb 2010, Jonathan Corbet wrote:

> On Fri, 19 Feb 2010 11:12:26 +0530
> "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:
> 
> > The below set of patches implement open by handle support using exportfs
> > operations.
> 
> I have a couple of questions...starting with: what is the use case for
> this functionality?  There must, clearly, be some kind of application
> which needs to be able to open by file handle, but I'm not sure what
> that would be.
> 
> I agree with Andreas that the handle length looks like a bit of a
> crapshoot.  How should an application developer know how much memory to
> dedicate to this?
> 
> In do_sys_name_to_handle() you have:
> 
> > +	f_handle = kmalloc(handle_size, GFP_KERNEL);
> 
> handle_size comes directly from user space.  Perhaps it should be
> sanity-checked?  (I would also just use handle->handle_size; later
> handle_size contains the *real* handle size, and I found that confusing.
> Yes, I'm easily confused, but...)
> 
> > +	handle->handle_type = retval;
> > +	if (handle_size < handle->handle_size) {
> > +		if (copy_to_user(handle->f_handle, f_handle,
> > +					handle_size*sizeof(u32)))
> > +			retval = -EFAULT;
> > +		retval = 0;
> > +	} else
> > +		retval = -EAGAIN;
> > +	handle->handle_size = handle_size;
> 
> EAGAIN seems like a strange thing to return here.  ENOSPC maybe?
> 
> Are you missing an "else" before the "retval = 0;" line?  You'll never
> return -EFAULT here.
> 
> In do_sys_open_by_handle():
> 
> > +	if (!capable(CAP_SYS_ADMIN))
> > +		/* Allow open by handle only by sysadmin */
> > +		return -EPERM;
> 
> I assume this is to avoid access to readable files within unreadable
> directories?   Otherwise you could check the permissions of the target
> dentry.
> 
> CAP_SYS_ADMIN seems like the wrong capability, though.  CAP_DAC_OVERRIDE
> might make more sense?
> 
> Is there any sense in allowing anybody to call name_to_handle() if
> open_by_handle() is restricted?
> 
> Thanks,
> 
> jon
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

-- 
James Morris
<jmorris@namei.org>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH] Generic name to handle and open by handle syscalls
  2010-02-22 23:06 ` Jonathan Corbet
  2010-02-23  0:56   ` James Morris
@ 2010-02-23  8:58   ` Aneesh Kumar K. V
  2010-02-23 19:46     ` Jonathan Corbet
                       ` (2 more replies)
  1 sibling, 3 replies; 36+ messages in thread
From: Aneesh Kumar K. V @ 2010-02-23  8:58 UTC (permalink / raw)
  To: Jonathan Corbet; +Cc: hch, viro, linux-fsdevel

On Mon, 22 Feb 2010 16:06:59 -0700, Jonathan Corbet <corbet@lwn.net> wrote:
> On Fri, 19 Feb 2010 11:12:26 +0530
> "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:
> 
> > The below set of patches implement open by handle support using exportfs
> > operations.
> 
> I have a couple of questions...starting with: what is the use case for
> this functionality?  There must, clearly, be some kind of application
> which needs to be able to open by file handle, but I'm not sure what
> that would be.
>

User space NFS server would be one example. Also if we want to NFS
export another network file system which have a user space server, that
would be another reason.

> I agree with Andreas that the handle length looks like a bit of a
> crapshoot.  How should an application developer know how much memory to
> dedicate to this?
> 
> In do_sys_name_to_handle() you have:
> 
> > +	f_handle = kmalloc(handle_size, GFP_KERNEL);
> 
> handle_size comes directly from user space.  Perhaps it should be
> sanity-checked?  (I would also just use handle->handle_size; later
> handle_size contains the *real* handle size, and I found that confusing.
> Yes, I'm easily confused, but...)

I already had a FIXME is another patch to sanity check the handle
size. I was not sure what should be maximum size. Andreas suggested 4096
So i will update the patch in the next iteration with that value. I will
also update the variable names as you suggested above.

> 
> > +	handle->handle_type = retval;
> > +	if (handle_size < handle->handle_size) {
> > +		if (copy_to_user(handle->f_handle, f_handle,
> > +					handle_size*sizeof(u32)))
> > +			retval = -EFAULT;
> > +		retval = 0;
> > +	} else
> > +		retval = -EAGAIN;
> > +	handle->handle_size = handle_size;
> 
> EAGAIN seems like a strange thing to return here.  ENOSPC maybe?

The reason for me using EAGAIN was to give a hint that if the user
reissue the call with new returned handle_size we should be ok. But
Andreas suggested EOVERFLOW. So i will use EOVERFLOW. Do you think
ENOSPC is he right error value here ?

> 
> Are you missing an "else" before the "retval = 0;" line?  You'll never
> return -EFAULT here.
> 

good catch. Will fix in the next iteration


> In do_sys_open_by_handle():
> 
> > +	if (!capable(CAP_SYS_ADMIN))
> > +		/* Allow open by handle only by sysadmin */
> > +		return -EPERM;
> 
> I assume this is to avoid access to readable files within unreadable
> directories?   Otherwise you could check the permissions of the target
> dentry.
> 
> CAP_SYS_ADMIN seems like the wrong capability, though.  CAP_DAC_OVERRIDE
> might make more sense?

I guess CAP_DAC_OVERRIDE should be enough here. The CAP_SYS_ADMIN
restriction came from the xfs ioctl version. 

> 
> Is there any sense in allowing anybody to call name_to_handle() if
> open_by_handle() is restricted?
> 

I guess it should still be useful because handle can be passed around
and later given to a process that have enough permission to open by handle.

-aneesh


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH] Generic name to handle and open by handle syscalls
  2010-02-23  8:58   ` Aneesh Kumar K. V
@ 2010-02-23 19:46     ` Jonathan Corbet
  2010-02-24  0:49     ` Dave Chinner
  2010-02-25  4:53     ` Serge E. Hallyn
  2 siblings, 0 replies; 36+ messages in thread
From: Jonathan Corbet @ 2010-02-23 19:46 UTC (permalink / raw)
  To: Aneesh Kumar K. V; +Cc: hch, viro, linux-fsdevel

On Tue, 23 Feb 2010 14:28:36 +0530
"Aneesh Kumar K. V" <aneesh.kumar@linux.vnet.ibm.com> wrote:

> > EAGAIN seems like a strange thing to return here.  ENOSPC maybe?  
> 
> The reason for me using EAGAIN was to give a hint that if the user
> reissue the call with new returned handle_size we should be ok. But
> Andreas suggested EOVERFLOW. So i will use EOVERFLOW. Do you think
> ENOSPC is he right error value here ?

EOVERFLOW is probably better than ENOSPC, I'd go with that.

jon

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH] Generic name to handle and open by handle syscalls
  2010-02-23  8:58   ` Aneesh Kumar K. V
  2010-02-23 19:46     ` Jonathan Corbet
@ 2010-02-24  0:49     ` Dave Chinner
  2010-02-25  4:53     ` Serge E. Hallyn
  2 siblings, 0 replies; 36+ messages in thread
From: Dave Chinner @ 2010-02-24  0:49 UTC (permalink / raw)
  To: Aneesh Kumar K. V; +Cc: Jonathan Corbet, hch, viro, linux-fsdevel

On Tue, Feb 23, 2010 at 02:28:36PM +0530, Aneesh Kumar K. V wrote:
> On Mon, 22 Feb 2010 16:06:59 -0700, Jonathan Corbet <corbet@lwn.net> wrote:
> > On Fri, 19 Feb 2010 11:12:26 +0530
> > "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:
> > 
> > > The below set of patches implement open by handle support using exportfs
> > > operations.
> > 
> > I have a couple of questions...starting with: what is the use case for
> > this functionality?  There must, clearly, be some kind of application
> > which needs to be able to open by file handle, but I'm not sure what
> > that would be.
> >
> 
> User space NFS server would be one example. Also if we want to NFS
> export another network file system which have a user space server, that
> would be another reason.

Some history - the XFS handle interface was once used for a
userspace NFS server on Irix. IIRC it was replaced by a kernel based
server in about 1995 because the syscall overhead was a
performance limiting factor and since then only XFS specific
applications have used the interface. OOC, does an up-to-date
userspace NFS server that could make use of this even exist today?

As it is, these days the XFS handle interface is not intended for
such a use. From the XFS libhandle manpage:

DESCRIPTION
       These functions provide a way to perform certain filesystem
       operations without using  a  file  descriptor  to  access
       filesystem objects.  They  are intended for use by a limited
       set of system utilities such as backup programs. They are
       supported only by the XFS filesystem.  Link with the
       libhandle library to access these functions.

i.e. it is intended to be used for tight integration of userspace
filesystem utilities into the filesystem.

As for example uses, the first is xfsdump and xfsrestore - XFS's
optimised backup and restore programs.  The second is DMF - SGI's
HSM that is built on top of XFS. Both of these could not do what
they do without the handle interface...

FWIW, These applications both require XFS's handles to be stable for the
life of an inode as well as unique across the system. Hence the
XFS handle contains a fsid and is stable across reboots.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH] Generic name to handle and open by handle syscalls
  2010-02-23  8:58   ` Aneesh Kumar K. V
  2010-02-23 19:46     ` Jonathan Corbet
  2010-02-24  0:49     ` Dave Chinner
@ 2010-02-25  4:53     ` Serge E. Hallyn
  2010-02-25 14:30       ` Jonathan Corbet
  2 siblings, 1 reply; 36+ messages in thread
From: Serge E. Hallyn @ 2010-02-25  4:53 UTC (permalink / raw)
  To: Aneesh Kumar K. V; +Cc: Jonathan Corbet, hch, viro, linux-fsdevel

Quoting Aneesh Kumar K. V (aneesh.kumar@linux.vnet.ibm.com):
> On Mon, 22 Feb 2010 16:06:59 -0700, Jonathan Corbet <corbet@lwn.net> wrote:
> > On Fri, 19 Feb 2010 11:12:26 +0530
> > "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> wrote:
> > 
> > > The below set of patches implement open by handle support using exportfs
> > > operations.
> > 
> > I have a couple of questions...starting with: what is the use case for
> > this functionality?  There must, clearly, be some kind of application
> > which needs to be able to open by file handle, but I'm not sure what
> > that would be.
> >
> 
> User space NFS server would be one example. Also if we want to NFS
> export another network file system which have a user space server, that
> would be another reason.
> 
> > I agree with Andreas that the handle length looks like a bit of a
> > crapshoot.  How should an application developer know how much memory to
> > dedicate to this?
> > 
> > In do_sys_name_to_handle() you have:
> > 
> > > +	f_handle = kmalloc(handle_size, GFP_KERNEL);
> > 
> > handle_size comes directly from user space.  Perhaps it should be
> > sanity-checked?  (I would also just use handle->handle_size; later
> > handle_size contains the *real* handle size, and I found that confusing.
> > Yes, I'm easily confused, but...)
> 
> I already had a FIXME is another patch to sanity check the handle
> size. I was not sure what should be maximum size. Andreas suggested 4096
> So i will update the patch in the next iteration with that value. I will
> also update the variable names as you suggested above.
> 
> > 
> > > +	handle->handle_type = retval;
> > > +	if (handle_size < handle->handle_size) {
> > > +		if (copy_to_user(handle->f_handle, f_handle,
> > > +					handle_size*sizeof(u32)))
> > > +			retval = -EFAULT;
> > > +		retval = 0;
> > > +	} else
> > > +		retval = -EAGAIN;
> > > +	handle->handle_size = handle_size;
> > 
> > EAGAIN seems like a strange thing to return here.  ENOSPC maybe?
> 
> The reason for me using EAGAIN was to give a hint that if the user
> reissue the call with new returned handle_size we should be ok. But
> Andreas suggested EOVERFLOW. So i will use EOVERFLOW. Do you think
> ENOSPC is he right error value here ?
> 
> > 
> > Are you missing an "else" before the "retval = 0;" line?  You'll never
> > return -EFAULT here.
> > 
> 
> good catch. Will fix in the next iteration
> 
> 
> > In do_sys_open_by_handle():
> > 
> > > +	if (!capable(CAP_SYS_ADMIN))
> > > +		/* Allow open by handle only by sysadmin */
> > > +		return -EPERM;
> > 
> > I assume this is to avoid access to readable files within unreadable
> > directories?   Otherwise you could check the permissions of the target
> > dentry.
> > 
> > CAP_SYS_ADMIN seems like the wrong capability, though.  CAP_DAC_OVERRIDE
> > might make more sense?
> 
> I guess CAP_DAC_OVERRIDE should be enough here. The CAP_SYS_ADMIN
> restriction came from the xfs ioctl version. 

I'd be curious to see the reasons for requiring it in the xfs version.
Do you have any docs about it?  You're still doing a dentry_open, and
you got the filename fd somehow so the name shouldn't be a secret...
An LSM hook - specifically to make sure that selinux still allows you
to read the path (access to file->f_security) - might belong here, but
I dunno about the capable check.

thanks,
-serge

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH] Generic name to handle and open by handle syscalls
  2010-02-25  4:53     ` Serge E. Hallyn
@ 2010-02-25 14:30       ` Jonathan Corbet
  2010-02-25 15:19         ` Serge E. Hallyn
  0 siblings, 1 reply; 36+ messages in thread
From: Jonathan Corbet @ 2010-02-25 14:30 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Aneesh Kumar K. V, hch, viro, linux-fsdevel

On Wed, 24 Feb 2010 22:53:23 -0600
"Serge E. Hallyn" <serue@us.ibm.com> wrote:

> I'd be curious to see the reasons for requiring it in the xfs version.
> Do you have any docs about it?  You're still doing a dentry_open, and
> you got the filename fd somehow so the name shouldn't be a secret...
> An LSM hook - specifically to make sure that selinux still allows you
> to read the path (access to file->f_security) - might belong here,

I had assumed it was the path that was the issue; a file handle is
divorced from that path, so there's no way to know if a process can
search its way down to the file or not.  That would leave the system
open to the same "open the file after path permissions have changed"
problem that people have complained about in other contexts.  It seems
like you could also fish for files by opening random file handles; I
don't know how large the search space is, so it's hard for me to say
how practical that would be.

jon

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH] Generic name to handle and open by handle syscalls
  2010-02-25 14:30       ` Jonathan Corbet
@ 2010-02-25 15:19         ` Serge E. Hallyn
  2010-02-25 17:55           ` Aneesh Kumar K. V
  0 siblings, 1 reply; 36+ messages in thread
From: Serge E. Hallyn @ 2010-02-25 15:19 UTC (permalink / raw)
  To: Jonathan Corbet; +Cc: Aneesh Kumar K. V, hch, viro, linux-fsdevel

Quoting Jonathan Corbet (corbet@lwn.net):
> On Wed, 24 Feb 2010 22:53:23 -0600
> "Serge E. Hallyn" <serue@us.ibm.com> wrote:
> 
> > I'd be curious to see the reasons for requiring it in the xfs version.
> > Do you have any docs about it?  You're still doing a dentry_open, and
> > you got the filename fd somehow so the name shouldn't be a secret...
> > An LSM hook - specifically to make sure that selinux still allows you
> > to read the path (access to file->f_security) - might belong here,
> 
> I had assumed it was the path that was the issue; a file handle is
> divorced from that path, so there's no way to know if a process can
> search its way down to the file or not.  That would leave the system
> open to the same "open the file after path permissions have changed"
> problem that people have complained about in other contexts.  It seems
> like you could also fish for files by opening random file handles; I
> don't know how large the search space is, so it's hard for me to say
> how practical that would be.

Right, and so I think what is really needed is some DAC checks at the
newly-introduced sys_name_to_handle(), which near as I could tell are
not there at all.

Then, if process X is going to sys_open_by_handle() using pathname
fd 4, then fd 4 had to be created using sys_name_to_handle() either
by X or by some process Y which handed fd 4 over to X.  In either
case, it's basically no different from a open_at() where the
directory fd was handed to X by Y at that point, right?

So, if do_sys_name_to_handle() actually does DAC checks (somewhere
in the depths of the exportfs code?) then all should be fine now.
But I don't see any...

-serge

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH] Generic name to handle and open by handle syscalls
  2010-02-25 15:19         ` Serge E. Hallyn
@ 2010-02-25 17:55           ` Aneesh Kumar K. V
  2010-02-25 18:11             ` Serge E. Hallyn
  0 siblings, 1 reply; 36+ messages in thread
From: Aneesh Kumar K. V @ 2010-02-25 17:55 UTC (permalink / raw)
  To: Serge E. Hallyn, Jonathan Corbet; +Cc: hch, viro, linux-fsdevel

On Thu, 25 Feb 2010 09:19:09 -0600, "Serge E. Hallyn" <serue@us.ibm.com> wrote:
> Quoting Jonathan Corbet (corbet@lwn.net):
> > On Wed, 24 Feb 2010 22:53:23 -0600
> > "Serge E. Hallyn" <serue@us.ibm.com> wrote:
> > 
> > > I'd be curious to see the reasons for requiring it in the xfs version.
> > > Do you have any docs about it?  You're still doing a dentry_open, and
> > > you got the filename fd somehow so the name shouldn't be a secret...
> > > An LSM hook - specifically to make sure that selinux still allows you
> > > to read the path (access to file->f_security) - might belong here,
> > 
> > I had assumed it was the path that was the issue; a file handle is
> > divorced from that path, so there's no way to know if a process can
> > search its way down to the file or not.  That would leave the system
> > open to the same "open the file after path permissions have changed"
> > problem that people have complained about in other contexts.  It seems
> > like you could also fish for files by opening random file handles; I
> > don't know how large the search space is, so it's hard for me to say
> > how practical that would be.
> 
> Right, and so I think what is really needed is some DAC checks at the
> newly-introduced sys_name_to_handle(), which near as I could tell are
> not there at all.
> 
> Then, if process X is going to sys_open_by_handle() using pathname
> fd 4, then fd 4 had to be created using sys_name_to_handle() either
> by X or by some process Y which handed fd 4 over to X.  In either
> case, it's basically no different from a open_at() where the
> directory fd was handed to X by Y at that point, right?
> 
> So, if do_sys_name_to_handle() actually does DAC checks (somewhere
> in the depths of the exportfs code?) then all should be fine now.
> But I don't see any...


user_lpath(..) used to convert name to path does path look using normal
permission check. So we do check for permissions when converting a path
to handle. But the problem still remain with respect to somebody being
able to guess the file handle and use that in open_by_handle. Currently
open_by handle() is limited to CAP_SYS_ADMIN (which i am updating to
CAP_DAC_OVERRIDE). 

-aneesh

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH] Generic name to handle and open by handle syscalls
  2010-02-25 17:55           ` Aneesh Kumar K. V
@ 2010-02-25 18:11             ` Serge E. Hallyn
  2010-02-25 18:20               ` Aneesh Kumar K. V
  0 siblings, 1 reply; 36+ messages in thread
From: Serge E. Hallyn @ 2010-02-25 18:11 UTC (permalink / raw)
  To: Aneesh Kumar K. V; +Cc: Jonathan Corbet, hch, viro, linux-fsdevel

Quoting Aneesh Kumar K. V (aneesh.kumar@linux.vnet.ibm.com):
> On Thu, 25 Feb 2010 09:19:09 -0600, "Serge E. Hallyn" <serue@us.ibm.com> wrote:
> > Quoting Jonathan Corbet (corbet@lwn.net):
> > > On Wed, 24 Feb 2010 22:53:23 -0600
> > > "Serge E. Hallyn" <serue@us.ibm.com> wrote:
> > > 
> > > > I'd be curious to see the reasons for requiring it in the xfs version.
> > > > Do you have any docs about it?  You're still doing a dentry_open, and
> > > > you got the filename fd somehow so the name shouldn't be a secret...
> > > > An LSM hook - specifically to make sure that selinux still allows you
> > > > to read the path (access to file->f_security) - might belong here,
> > > 
> > > I had assumed it was the path that was the issue; a file handle is
> > > divorced from that path, so there's no way to know if a process can
> > > search its way down to the file or not.  That would leave the system
> > > open to the same "open the file after path permissions have changed"
> > > problem that people have complained about in other contexts.  It seems
> > > like you could also fish for files by opening random file handles; I
> > > don't know how large the search space is, so it's hard for me to say
> > > how practical that would be.
> > 
> > Right, and so I think what is really needed is some DAC checks at the
> > newly-introduced sys_name_to_handle(), which near as I could tell are
> > not there at all.
> > 
> > Then, if process X is going to sys_open_by_handle() using pathname
> > fd 4, then fd 4 had to be created using sys_name_to_handle() either
> > by X or by some process Y which handed fd 4 over to X.  In either
> > case, it's basically no different from a open_at() where the
> > directory fd was handed to X by Y at that point, right?
> > 
> > So, if do_sys_name_to_handle() actually does DAC checks (somewhere
> > in the depths of the exportfs code?) then all should be fine now.
> > But I don't see any...
> 
> 
> user_lpath(..) used to convert name to path does path look using normal

Ah, there it is, thanks.

In that case I don't see where there is any reason for special
privilege on the part of a process who receives that fd.

Let me put it another way:  if task Y does sys_name_to_handle() to
create an fd, then we should not absolve Y of the responsibility of
not passing that fd around willy nilly by papering over
sys_open_by_handle() with a requirement for superuser privileges.

And if Y were intentionally misbehaving, then it could just share
the pathname in countless already-existing ways, and then just for
the heck of it open the file itself and pass that fd to the client.
So not allowing the pathname fd to be transferred seems worthless.

> permission check. So we do check for permissions when converting a path
> to handle. But the problem still remain with respect to somebody being
> able to guess the file handle and use that in open_by_handle. Currently

But why is that a problem?  I don't see how it can be abused by a user,
unless it is the specific intent for some server Y to create a pathname
fd FD and pass that to a client X, where X should be able to see the file
contents but not the pathname?  And if that were the case then you
wouldn't require CAP_SYS_ADMIN for the client.

I'm obviously missing something - can you give a specific example?

> open_by handle() is limited to CAP_SYS_ADMIN (which i am updating to
> CAP_DAC_OVERRIDE). 

-serge

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH] Generic name to handle and open by handle syscalls
  2010-02-25 18:11             ` Serge E. Hallyn
@ 2010-02-25 18:20               ` Aneesh Kumar K. V
  2010-02-25 19:05                 ` Serge E. Hallyn
  0 siblings, 1 reply; 36+ messages in thread
From: Aneesh Kumar K. V @ 2010-02-25 18:20 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Jonathan Corbet, hch, viro, linux-fsdevel

On Thu, 25 Feb 2010 12:11:13 -0600, "Serge E. Hallyn" <serue@us.ibm.com> wrote:
> Quoting Aneesh Kumar K. V (aneesh.kumar@linux.vnet.ibm.com):
> > On Thu, 25 Feb 2010 09:19:09 -0600, "Serge E. Hallyn" <serue@us.ibm.com> wrote:
> > > Quoting Jonathan Corbet (corbet@lwn.net):
> > > > On Wed, 24 Feb 2010 22:53:23 -0600
> > > > "Serge E. Hallyn" <serue@us.ibm.com> wrote:
> > > > 
> > > > > I'd be curious to see the reasons for requiring it in the xfs version.
> > > > > Do you have any docs about it?  You're still doing a dentry_open, and
> > > > > you got the filename fd somehow so the name shouldn't be a secret...
> > > > > An LSM hook - specifically to make sure that selinux still allows you
> > > > > to read the path (access to file->f_security) - might belong here,
> > > > 
> > > > I had assumed it was the path that was the issue; a file handle is
> > > > divorced from that path, so there's no way to know if a process can
> > > > search its way down to the file or not.  That would leave the system
> > > > open to the same "open the file after path permissions have changed"
> > > > problem that people have complained about in other contexts.  It seems
> > > > like you could also fish for files by opening random file handles; I
> > > > don't know how large the search space is, so it's hard for me to say
> > > > how practical that would be.
> > > 
> > > Right, and so I think what is really needed is some DAC checks at the
> > > newly-introduced sys_name_to_handle(), which near as I could tell are
> > > not there at all.
> > > 
> > > Then, if process X is going to sys_open_by_handle() using pathname
> > > fd 4, then fd 4 had to be created using sys_name_to_handle() either
> > > by X or by some process Y which handed fd 4 over to X.  In either
> > > case, it's basically no different from a open_at() where the
> > > directory fd was handed to X by Y at that point, right?
> > > 
> > > So, if do_sys_name_to_handle() actually does DAC checks (somewhere
> > > in the depths of the exportfs code?) then all should be fine now.
> > > But I don't see any...
> > 
> > 
> > user_lpath(..) used to convert name to path does path look using normal
> 
> Ah, there it is, thanks.
> 
> In that case I don't see where there is any reason for special
> privilege on the part of a process who receives that fd.
> 
> Let me put it another way:  if task Y does sys_name_to_handle() to
> create an fd, then we should not absolve Y of the responsibility of
> not passing that fd around willy nilly by papering over
> sys_open_by_handle() with a requirement for superuser privileges.
> 
> And if Y were intentionally misbehaving, then it could just share
> the pathname in countless already-existing ways, and then just for
> the heck of it open the file itself and pass that fd to the client.
> So not allowing the pathname fd to be transferred seems worthless.
> 
> > permission check. So we do check for permissions when converting a path
> > to handle. But the problem still remain with respect to somebody being
> > able to guess the file handle and use that in open_by_handle. Currently
> 
> But why is that a problem?  I don't see how it can be abused by a user,
> unless it is the specific intent for some server Y to create a pathname
> fd FD and pass that to a client X, where X should be able to see the file
> contents but not the pathname?  And if that were the case then you
> wouldn't require CAP_SYS_ADMIN for the client.
> 
> I'm obviously missing something - can you give a specific example?

Currently whether a user is allowed to open a file is also determined by
the permission of the directory components of the path. That's denying
execute bits on the directory prevent the lookup and so user can't open
the file. (Whether we can depend on this behaviour is debated before).
With file handle being valid for the life of the file, if a user is able
to guess handle for file /a/b/c then he will be able to open 'c' without
looking at the execute bits of 'a' or 'b'.

-aneesh

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH] Generic name to handle and open by handle syscalls
  2010-02-25 18:20               ` Aneesh Kumar K. V
@ 2010-02-25 19:05                 ` Serge E. Hallyn
  2010-02-26  9:12                   ` Andreas Dilger
  0 siblings, 1 reply; 36+ messages in thread
From: Serge E. Hallyn @ 2010-02-25 19:05 UTC (permalink / raw)
  To: Aneesh Kumar K. V; +Cc: Jonathan Corbet, hch, viro, linux-fsdevel

Quoting Aneesh Kumar K. V (aneesh.kumar@linux.vnet.ibm.com):
> On Thu, 25 Feb 2010 12:11:13 -0600, "Serge E. Hallyn" <serue@us.ibm.com> wrote:
> > Quoting Aneesh Kumar K. V (aneesh.kumar@linux.vnet.ibm.com):
> > > On Thu, 25 Feb 2010 09:19:09 -0600, "Serge E. Hallyn" <serue@us.ibm.com> wrote:
> > > > Quoting Jonathan Corbet (corbet@lwn.net):
> > > > > On Wed, 24 Feb 2010 22:53:23 -0600
> > > > > "Serge E. Hallyn" <serue@us.ibm.com> wrote:
> > > > > 
> > > > > > I'd be curious to see the reasons for requiring it in the xfs version.
> > > > > > Do you have any docs about it?  You're still doing a dentry_open, and
> > > > > > you got the filename fd somehow so the name shouldn't be a secret...
> > > > > > An LSM hook - specifically to make sure that selinux still allows you
> > > > > > to read the path (access to file->f_security) - might belong here,
> > > > > 
> > > > > I had assumed it was the path that was the issue; a file handle is
> > > > > divorced from that path, so there's no way to know if a process can
> > > > > search its way down to the file or not.  That would leave the system
> > > > > open to the same "open the file after path permissions have changed"
> > > > > problem that people have complained about in other contexts.  It seems
> > > > > like you could also fish for files by opening random file handles; I
> > > > > don't know how large the search space is, so it's hard for me to say
> > > > > how practical that would be.
> > > > 
> > > > Right, and so I think what is really needed is some DAC checks at the
> > > > newly-introduced sys_name_to_handle(), which near as I could tell are
> > > > not there at all.
> > > > 
> > > > Then, if process X is going to sys_open_by_handle() using pathname
> > > > fd 4, then fd 4 had to be created using sys_name_to_handle() either
> > > > by X or by some process Y which handed fd 4 over to X.  In either
> > > > case, it's basically no different from a open_at() where the
> > > > directory fd was handed to X by Y at that point, right?
> > > > 
> > > > So, if do_sys_name_to_handle() actually does DAC checks (somewhere
> > > > in the depths of the exportfs code?) then all should be fine now.
> > > > But I don't see any...
> > > 
> > > 
> > > user_lpath(..) used to convert name to path does path look using normal
> > 
> > Ah, there it is, thanks.
> > 
> > In that case I don't see where there is any reason for special
> > privilege on the part of a process who receives that fd.
> > 
> > Let me put it another way:  if task Y does sys_name_to_handle() to
> > create an fd, then we should not absolve Y of the responsibility of
> > not passing that fd around willy nilly by papering over
> > sys_open_by_handle() with a requirement for superuser privileges.
> > 
> > And if Y were intentionally misbehaving, then it could just share
> > the pathname in countless already-existing ways, and then just for
> > the heck of it open the file itself and pass that fd to the client.
> > So not allowing the pathname fd to be transferred seems worthless.
> > 
> > > permission check. So we do check for permissions when converting a path
> > > to handle. But the problem still remain with respect to somebody being
> > > able to guess the file handle and use that in open_by_handle. Currently
> > 
> > But why is that a problem?  I don't see how it can be abused by a user,
> > unless it is the specific intent for some server Y to create a pathname
> > fd FD and pass that to a client X, where X should be able to see the file
> > contents but not the pathname?  And if that were the case then you
> > wouldn't require CAP_SYS_ADMIN for the client.
> > 
> > I'm obviously missing something - can you give a specific example?
> 
> Currently whether a user is allowed to open a file is also determined by
> the permission of the directory components of the path. That's denying
> execute bits on the directory prevent the lookup and so user can't open
> the file. (Whether we can depend on this behaviour is debated before).
> With file handle being valid for the life of the file, if a user is able
> to guess handle for file /a/b/c then he will be able to open 'c' without

Jipes!  I was misunderstanding what you were doing with the struct
file_handle.  Your use of the phrase 'guess handle for file' set me
straight.  I thought you were encoding a file_handle into an fd using
sys_name_to_handle(), and passing that fd along over a unix sock - so a
client would have to receive a validly opened fd to use it.  If it can
just guess at a string, then yeah, please do hide that behind as much
privilege as you can!

I take it then that the file_handles must be communicated over something
other than unix socks (else you could just pass an fd and let the client
either use the fd, or re-open /proc/self/fd/<fd>)?  Would you be able to
at least add a touch of randomness and hashing to make this sa[fn]er and
turn this into a single-use capability?  Or does that not fit your usage
model?  So the server would come up with some random bytes B, calculate
H = hash(F|B) (hash of filename concatenated with random bytes), pass F
and H along to client while storing F and B, so client can pass F and H
to sys_open_by_handle() which confirms that that was a valid file
handle?

thanks,
-serge

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH] Generic name to handle and open by handle syscalls
  2010-02-25 19:05                 ` Serge E. Hallyn
@ 2010-02-26  9:12                   ` Andreas Dilger
  2010-02-26 19:56                     ` Serge E. Hallyn
  0 siblings, 1 reply; 36+ messages in thread
From: Andreas Dilger @ 2010-02-26  9:12 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: Aneesh Kumar K. V, Jonathan Corbet, hch, viro, linux-fsdevel

On 2010-02-25, at 12:05, Serge E. Hallyn wrote:
> Jipes!  I was misunderstanding what you were doing with the struct
> file_handle.  Your use of the phrase 'guess handle for file' set me
> straight.  I thought you were encoding a file_handle into an fd using
> sys_name_to_handle(), and passing that fd along over a unix sock -  
> so a
> client would have to receive a validly opened fd to use it.  If it can
> just guess at a string, then yeah, please do hide that behind as much
> privilege as you can!

It seems to me that there are two different, though related, use  
cases.  In some cases it would be desirable to allow "short lived"  
handles to be passed between processes, and in other cases it is  
necessary to have "long lived" handles.

For "short lived" or "strong" handles they should contain a capability  
that prevents arbitrary handles from being guessed, so such a handle  
could be used by any process, possibly for at most a limited duration  
or use count.  For "long lived" handles, either the capability needs  
to be stored persistently so that it can be validated even after a  
server reboot, or the "weak" handles (without capabilities) that can  
be guessed should only be usable with CAP_DAC_OVERRIDE.

> I take it then that the file_handles must be communicated over  
> something
> other than unix socks (else you could just pass an fd and let the  
> client
> either use the fd, or re-open /proc/self/fd/<fd>)?  Would you be  
> able to
> at least add a touch of randomness and hashing to make this sa[fn]er  
> and
> turn this into a single-use capability?  Or does that not fit your  
> usage
> model?  So the server would come up with some random bytes B,  
> calculate
> H = hash(F|B) (hash of filename concatenated with random bytes),  
> pass F
> and H along to client while storing F and B, so client can pass F  
> and H
> to sys_open_by_handle() which confirms that that was a valid file
> handle?

Something like that.  I'm not a security expert, and capability  
designs exist and I'd suggest we don't try to invent anything here.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH 2/3] vfs: Add open by file handle support
       [not found]       ` <FB88A140-C2EB-4E62-9769-D2524C874C8C@sun.com>
  2010-02-22  2:46         ` Brad Boyer
@ 2010-02-26 19:21         ` J. Bruce Fields
  2010-02-28 17:55           ` Andreas Dilger
  1 sibling, 1 reply; 36+ messages in thread
From: J. Bruce Fields @ 2010-02-26 19:21 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: Brad Boyer, Aneesh Kumar K.V, hch, viro, linux-fsdevel

On Sun, Feb 21, 2010 at 11:42:45AM -0700, Andreas Dilger wrote:
> Yes, we looked at this in the past for Lustre as well, and while we had 
> proposed a patch for the NFSd code to extract the FSID from the  
> filesystem, it was turned down because "setting the FSID via a userspace 
> file is the right thing to do".  I have enough on my plate not to wage an 
> uphill battle for this.
>
> I don't necessarily agree, because that means administering yet another 
> config parameter outside the filesystem on all of the servers.  I don't 
> mind allowing userspace to override the fs UUID-based FSID, but why not 
> let this be used by default instead of the devno (which is increasingly 
> dynamic these days).
>
> Maybe with more clustered filesystems in the kernel these days this idea 
> can gain more traction.

I agree that a cluster filesystem shouldn't need fsid= set right across
all servers.

But doesn't the libblkid uuid stuff as it's now implemented give you
what you need?

>> I haven't started writing any code to do this, but I have looked
>> at the current NFS FH code and it seems like it should be reasonable
>> to add a new set of methods to allow another FS specific field.
>
> First would be a method to extract the FSID/UUID from the filesystem.   
> That is immediately useful for even local configs.
>
>> The hardest part seems to be holding the size down to the old NFS v2
>> protocol limits.
>
> Would it not be possible to encode the FSID/UUID only into NFSv3/v4  
> handles, and leave the NFSv2 handles without this information?  I  
> imagine the usage of NFSv2 is growing ever smaller, and there isn't a  
> requirement to add this extra field there.

Yeah, I don't think NFSv2 support should be a high priority for new
features.

--b.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH 2/3] vfs: Add open by file handle support
  2010-02-20 18:58   ` Andreas Dilger
  2010-02-20 20:13     ` Brad Boyer
  2010-02-22  6:13     ` Aneesh Kumar K. V
@ 2010-02-26 19:24     ` J. Bruce Fields
  2 siblings, 0 replies; 36+ messages in thread
From: J. Bruce Fields @ 2010-02-26 19:24 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: Aneesh Kumar K.V, hch, viro, linux-fsdevel

On Sat, Feb 20, 2010 at 11:58:34AM -0700, Andreas Dilger wrote:
> On 2010-02-18, at 22:42, Aneesh Kumar K.V wrote:
>> +long do_sys_open_by_handle(int dfd, struct file_handle *fh, int  
>> flags)
>> +{
>> +	if (!capable(CAP_SYS_ADMIN))
>> +		/* Allow open by handle only by sysadmin */
>> +		return -EPERM;
>
> Hmm, I guess this avoids some of the security concerns, but makes it a  
> lot less useful.  I was thinking this could be used for e.g. user NFS  
> serving or such, but if it is limited to root only then you might as  
> well just set up the in-kernel NFSd.  By making the handle hard to forge 
> (e.g. generate random key per superblock, sha1(ino+gen+key) and store 
> that into fh; someone with more security experience can think of a better 
> scheme) then you can reasonably safely dispense with the CAP_SYS_ADMIN 
> check because you can be sure that the proper path traversal has been 
> done by a trusted process and there is no more exposure than unix socket 
> fd passing.

The problem with filehandles is that they never die; they have to
survive essentially indefinitely, even across server reboots.

A file descriptor has a better-defined lifetime.

A "secret" that can never be expired doesn't strike me as a very good
secret.

--b.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH] Generic name to handle and open by handle syscalls
  2010-02-26  9:12                   ` Andreas Dilger
@ 2010-02-26 19:56                     ` Serge E. Hallyn
  0 siblings, 0 replies; 36+ messages in thread
From: Serge E. Hallyn @ 2010-02-26 19:56 UTC (permalink / raw)
  To: Andreas Dilger
  Cc: Aneesh Kumar K. V, Jonathan Corbet, hch, viro, linux-fsdevel

Quoting Andreas Dilger (adilger@sun.com):
> On 2010-02-25, at 12:05, Serge E. Hallyn wrote:
> >Jipes!  I was misunderstanding what you were doing with the struct
> >file_handle.  Your use of the phrase 'guess handle for file' set me
> >straight.  I thought you were encoding a file_handle into an fd using
> >sys_name_to_handle(), and passing that fd along over a unix sock -
> >so a
> >client would have to receive a validly opened fd to use it.  If it can
> >just guess at a string, then yeah, please do hide that behind as much
> >privilege as you can!
> 
> It seems to me that there are two different, though related, use
> cases.  In some cases it would be desirable to allow "short lived"
> handles to be passed between processes, and in other cases it is
> necessary to have "long lived" handles.

Yes, but what do the client and server sides look like?  Can this
be done using fds instead of file_handles?  I.e. is requiring they
be sent over a unix sock ok?  Probably not, but it seems worth
checking.

> For "short lived" or "strong" handles they should contain a
> capability that prevents arbitrary handles from being guessed, so
> such a handle could be used by any process, possibly for at most a
> limited duration or use count.  For "long lived" handles, either the
> capability needs to be stored persistently so that it can be
> validated even after a server reboot, or the "weak" handles (without
> capabilities) that can be guessed should only be usable with
> CAP_DAC_OVERRIDE.
> 
> >I take it then that the file_handles must be communicated over
> >something
> >other than unix socks (else you could just pass an fd and let the
> >client
> >either use the fd, or re-open /proc/self/fd/<fd>)?  Would you be
> >able to
> >at least add a touch of randomness and hashing to make this
> >sa[fn]er and
> >turn this into a single-use capability?  Or does that not fit your
> >usage
> >model?  So the server would come up with some random bytes B,
> >calculate
> >H = hash(F|B) (hash of filename concatenated with random bytes),
> >pass F
> >and H along to client while storing F and B, so client can pass F
> >and H
> >to sys_open_by_handle() which confirms that that was a valid file
> >handle?
> 
> 
> Something like that.  I'm not a security expert, and capability
> designs exist and I'd suggest we don't try to invent anything here.

All the better  :)

-serge

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH 2/3] vfs: Add open by file handle support
  2010-02-26 19:21         ` J. Bruce Fields
@ 2010-02-28 17:55           ` Andreas Dilger
  2010-02-28 19:00             ` J. Bruce Fields
  0 siblings, 1 reply; 36+ messages in thread
From: Andreas Dilger @ 2010-02-28 17:55 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Brad Boyer, Aneesh Kumar K.V, hch, viro, linux-fsdevel

On 2010-02-26, at 12:21, J. Bruce Fields wrote:
> On Sun, Feb 21, 2010 at 11:42:45AM -0700, Andreas Dilger wrote:
>> Yes, we looked at this in the past for Lustre as well, and while we  
>> had
>> proposed a patch for the NFSd code to extract the FSID from the
>> filesystem, it was turned down because "setting the FSID via a  
>> userspace
>> file is the right thing to do".  I have enough on my plate not to  
>> wage an
>> uphill battle for this.
>
> I agree that a cluster filesystem shouldn't need fsid= set right  
> across
> all servers.
>
> But doesn't the libblkid uuid stuff as it's now implemented give you
> what you need?

I'm not sure what you mean?  On the clients (where the NFS servers are  
running) there are no block devices, so I don't think libblkid is  
relevant.  Lustre itself already can provide a UUID/fsid that is the  
same on all clients, but there is no way to pass this to NFSd.

If there is interest to revive this idea, I'll try to dig up the old  
patches we had. I believe that they set a FS_NFS_FSID (or similarly  
named) flag in the file_system_type, and possibly a method that  
extracted this information for NFSd.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH 2/3] vfs: Add open by file handle support
  2010-02-28 17:55           ` Andreas Dilger
@ 2010-02-28 19:00             ` J. Bruce Fields
  2010-03-01 18:25               ` Oleg Drokin
  0 siblings, 1 reply; 36+ messages in thread
From: J. Bruce Fields @ 2010-02-28 19:00 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: Brad Boyer, Aneesh Kumar K.V, hch, viro, linux-fsdevel

On Sun, Feb 28, 2010 at 10:55:34AM -0700, Andreas Dilger wrote:
> On 2010-02-26, at 12:21, J. Bruce Fields wrote:
>> On Sun, Feb 21, 2010 at 11:42:45AM -0700, Andreas Dilger wrote:
>>> Yes, we looked at this in the past for Lustre as well, and while we  
>>> had
>>> proposed a patch for the NFSd code to extract the FSID from the
>>> filesystem, it was turned down because "setting the FSID via a  
>>> userspace
>>> file is the right thing to do".  I have enough on my plate not to  
>>> wage an
>>> uphill battle for this.
>>
>> I agree that a cluster filesystem shouldn't need fsid= set right  
>> across
>> all servers.
>>
>> But doesn't the libblkid uuid stuff as it's now implemented give you
>> what you need?
>
> I'm not sure what you mean?

With recent mountd and kernel, knfsd generates filehandles using a uuid
passed down from mountd, which mountd gets from libblkid.

> On the clients (where the NFS servers are  
> running) there are no block devices, so I don't think libblkid is  
> relevant.

Oh, OK.  (Though for a shared-disk cluster-filesystem it should be
enough, yes?)

> Lustre itself already can provide a UUID/fsid that is the  
> same on all clients, but there is no way to pass this to NFSd.
>
> If there is interest to revive this idea, I'll try to dig up the old  
> patches we had. I believe that they set a FS_NFS_FSID (or similarly  
> named) flag in the file_system_type, and possibly a method that  
> extracted this information for NFSd.

OK, sure, but if it's only of use to lustre than I don't see how to
justify a kernel patch.

Another option would be to provide an alternative to
nfs-utils/utils/mountd/cache.c's get_uuid() that can request the
filesystem's uuid (assuming you've got an easy way to get it from
userspace).  That might also save having to add yet another case to e.g.
fs/nfsd/nfsfh.c:set_version_and_fsid_type().

--b.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH 2/3] vfs: Add open by file handle support
  2010-02-28 19:00             ` J. Bruce Fields
@ 2010-03-01 18:25               ` Oleg Drokin
  2010-03-01 21:25                 ` J. Bruce Fields
  0 siblings, 1 reply; 36+ messages in thread
From: Oleg Drokin @ 2010-03-01 18:25 UTC (permalink / raw)
  To: J. Bruce Fields
  Cc: Andreas Dilger, Brad Boyer, Aneesh Kumar K.V, hch, viro,
	linux-fsdevel

Hello!

On Feb 28, 2010, at 2:00 PM, J. Bruce Fields wrote:

>> If there is interest to revive this idea, I'll try to dig up the old  
>> patches we had. I believe that they set a FS_NFS_FSID (or similarly  
>> named) flag in the file_system_type, and possibly a method that  
>> extracted this information for NFSd.
> OK, sure, but if it's only of use to lustre than I don't see how to
> justify a kernel patch.

I wonder how does GFS/OCFS2 do this? They cannot depend on the
SAN block device to have same major:minor on every exporting node, right?

> Another option would be to provide an alternative to
> nfs-utils/utils/mountd/cache.c's get_uuid() that can request the
> filesystem's uuid (assuming you've got an easy way to get it from
> userspace).  That might also save having to add yet another case to e.g.
> fs/nfsd/nfsfh.c:set_version_and_fsid_type().

Hopefully there would be a standard way to request the uuid, not
just a huge switch with separate code for every fstype.

Bye,
    Oleg

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH 2/3] vfs: Add open by file handle support
  2010-03-01 18:25               ` Oleg Drokin
@ 2010-03-01 21:25                 ` J. Bruce Fields
  0 siblings, 0 replies; 36+ messages in thread
From: J. Bruce Fields @ 2010-03-01 21:25 UTC (permalink / raw)
  To: Oleg Drokin
  Cc: Andreas Dilger, Brad Boyer, Aneesh Kumar K.V, hch, viro,
	linux-fsdevel

On Mon, Mar 01, 2010 at 01:25:56PM -0500, Oleg Drokin wrote:
> Hello!
> 
> On Feb 28, 2010, at 2:00 PM, J. Bruce Fields wrote:
> 
> >> If there is interest to revive this idea, I'll try to dig up the old  
> >> patches we had. I believe that they set a FS_NFS_FSID (or similarly  
> >> named) flag in the file_system_type, and possibly a method that  
> >> extracted this information for NFSd.
> > OK, sure, but if it's only of use to lustre than I don't see how to
> > justify a kernel patch.
> 
> I wonder how does GFS/OCFS2 do this? They cannot depend on the
> SAN block device to have same major:minor on every exporting node, right?

Right, libblkid calculates a uuid from the contents somehow.

> > Another option would be to provide an alternative to
> > nfs-utils/utils/mountd/cache.c's get_uuid() that can request the
> > filesystem's uuid (assuming you've got an easy way to get it from
> > userspace).  That might also save having to add yet another case to e.g.
> > fs/nfsd/nfsfh.c:set_version_and_fsid_type().
> 
> Hopefully there would be a standard way to request the uuid, not
> just a huge switch with separate code for every fstype.

Sure.

--b.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [RFC PATCH] Generic name to handle and open by handle syscalls
@ 2010-03-11 13:14 DENIEL Philippe
  0 siblings, 0 replies; 36+ messages in thread
From: DENIEL Philippe @ 2010-03-11 13:14 UTC (permalink / raw)
  To: linux-fsdevel

Hi,

I just found this message in this mailing list archive. This follows a
search I did after having a look at this thread on LWN.net
(http://lwn.net/Articles/375888/).

> The below set of patches implement open by handle support using 
exportfs > operations. This allows user space application to map a file
name to file > handle and later open the file using handle. This should
be usable > for userspace NFS and 9P server. XFS already support this
with the ioctls > XFS_IOC_PATH_TO_HANDLE and XFS_IOC_OPEN_BY_HANDLE.

After reading the mailing-list archive, it sounds like many people do
not see the benefit in having a way to address files from user space via
their fh and not only through the use of their name. I am leading a
project (see http://nfs-ganesha.sourceforge.net) that implements NFS
functionality from User Space. My server has various backends modules
that make it possible to address different kind of namespaces. When we
did the one dedicated to POSIX, we were strongly limited by the POSIX
functions that reference the files only by their names. We implemented
kind of "name <-> fh" tracking feature via the use of a db, but it is a
bit heavy and not performant. Having a way to translate name to fh, or
open file by fh would be great : it would make it possible to get rid of
all this heavy plumber we did with the db. In fact, half of the work is
already done in my project : we use LUSTRE a lot on our site, and we had
a need for exporting LUSTRE via NFS. There is a user space LUSTRE-API
was provide this "name to fid" and "open by fid" feature. We used it to
build a LUSTRE dedicated backend, and this fit perfectly our needs. So,
if patches are made to perform name-to-handle and open-by-handle
operation, this would be great. Of course, this is my personal point of
view, but personally I have a clear need of this feature.

Regards

Philippe

^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2010-03-11 14:27 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-02-19  5:42 [RFC PATCH] Generic name to handle and open by handle syscalls Aneesh Kumar K.V
2010-02-19  5:42 ` [RFC PATCH 1/3] vfs: Add name to file handle conversion support Aneesh Kumar K.V
2010-02-20 18:15   ` Andreas Dilger
2010-02-22  5:15     ` Aneesh Kumar K. V
2010-02-19  5:42 ` [RFC PATCH 2/3] vfs: Add open by file handle support Aneesh Kumar K.V
2010-02-20 18:58   ` Andreas Dilger
2010-02-20 20:13     ` Brad Boyer
     [not found]       ` <FB88A140-C2EB-4E62-9769-D2524C874C8C@sun.com>
2010-02-22  2:46         ` Brad Boyer
2010-02-26 19:21         ` J. Bruce Fields
2010-02-28 17:55           ` Andreas Dilger
2010-02-28 19:00             ` J. Bruce Fields
2010-03-01 18:25               ` Oleg Drokin
2010-03-01 21:25                 ` J. Bruce Fields
2010-02-22  6:13     ` Aneesh Kumar K. V
2010-02-22  6:31       ` Dave Chinner
2010-02-26 19:24     ` J. Bruce Fields
2010-02-19  5:42 ` [RFC PATCH 3/3] x86: Add new syscalls for x86_32 Aneesh Kumar K.V
2010-02-19  9:34 ` [RFC PATCH] Generic name to handle and open by handle syscalls Andreas Dilger
2010-02-19  9:49   ` Aneesh Kumar K. V
2010-02-20 19:01     ` Andreas Dilger
2010-02-22  6:27       ` Aneesh Kumar K. V
2010-02-22 23:06 ` Jonathan Corbet
2010-02-23  0:56   ` James Morris
2010-02-23  8:58   ` Aneesh Kumar K. V
2010-02-23 19:46     ` Jonathan Corbet
2010-02-24  0:49     ` Dave Chinner
2010-02-25  4:53     ` Serge E. Hallyn
2010-02-25 14:30       ` Jonathan Corbet
2010-02-25 15:19         ` Serge E. Hallyn
2010-02-25 17:55           ` Aneesh Kumar K. V
2010-02-25 18:11             ` Serge E. Hallyn
2010-02-25 18:20               ` Aneesh Kumar K. V
2010-02-25 19:05                 ` Serge E. Hallyn
2010-02-26  9:12                   ` Andreas Dilger
2010-02-26 19:56                     ` Serge E. Hallyn
  -- strict thread matches above, loose matches on Subject: below --
2010-03-11 13:14 DENIEL Philippe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).