* [RFC v0 1/4] vfs: add copy_range syscall and vfs entry point
2013-05-14 21:15 [RFC v0 0/4] sys_copy_range() rough draft Zach Brown
@ 2013-05-14 21:15 ` Zach Brown
2013-05-15 19:44 ` Eric Wong
2013-05-14 21:15 ` [RFC v0 2/4] x86: add sys_copy_range to syscall tables Zach Brown
` (3 subsequent siblings)
4 siblings, 1 reply; 16+ messages in thread
From: Zach Brown @ 2013-05-14 21:15 UTC (permalink / raw)
To: Martin K. Petersen, Trond Myklebust, linux-kernel, linux-fsdevel,
linux-btrfs, linux-nfs
This adds a syscall and vfs entry point for clone_range which offloads
data copying between existing files.
The syscall is a thin wrapper around the vfs entry point. Its arguments
are inspired by sys_splice().
The behaviour of the vfs helper is derived from the current btrfs
CLONE_RANGE ioctl.
---
fs/Makefile | 2 +-
fs/copy_range.c | 127 ++++++++++++++++++++++++++++++++++++++
include/linux/fs.h | 3 +
include/uapi/asm-generic/unistd.h | 4 +-
kernel/sys_ni.c | 1 +
5 files changed, 135 insertions(+), 2 deletions(-)
create mode 100644 fs/copy_range.c
diff --git a/fs/Makefile b/fs/Makefile
index 4fe6df3..1be83b3 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o splice.o sync.o utimes.o \
- stack.o fs_struct.o statfs.o
+ stack.o fs_struct.o statfs.o copy_range.o
ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
diff --git a/fs/copy_range.c b/fs/copy_range.c
new file mode 100644
index 0000000..3000b9f
--- /dev/null
+++ b/fs/copy_range.c
@@ -0,0 +1,127 @@
+/*
+ * "copy_range": offload data copying between existing files
+ *
+ * Copyright (C) 2013 Zach Brown <zab@redhat.com>
+ */
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/syscalls.h>
+#include <linux/export.h>
+#include <linux/fsnotify.h>
+
+/**
+ * vfs_copy_range - copy range of bytes from source file to existing file
+ * @file_in: source regular file
+ * @pos_in: starting byte offset to copy from the source file
+ * @file_out: destination regular file
+ * @pos_out: starting byte offset to copy to in the destination file
+ * @count: number of bytes to copy
+ *
+ * Returns number of bytes successfully copied from the start of the range or
+ * a negative errno error value.
+ *
+ * The number of bytes successfully written can be less than the input
+ * count if an error is encountered. In this partial success case the
+ * contents of the destination range after the copied bytes can be a mix
+ * of pre-existing bytes, bytes from the source range, or zeros,
+ * depending on the implementation.
+ *
+ * The source range must be entirely within i_size in the source file.
+ * A destination range outside of the size of the destination file will
+ * extend its size.
+ */
+ssize_t vfs_copy_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t count)
+{
+ struct inode *inode_in;
+ struct inode *inode_out;
+ ssize_t ret;
+
+ if (count == 0)
+ return 0;
+
+ /* copy_range allows full ssize_t count, ignoring MAX_RW_COUNT */
+ ret = rw_verify_area(READ, file_in, &pos_in, count);
+ if (ret >= 0)
+ ret = rw_verify_area(WRITE, file_out, &pos_out, count);
+ if (ret < 0)
+ return ret;
+
+ if (!(file_in->f_mode & FMODE_READ) ||
+ !(file_out->f_mode & FMODE_WRITE) ||
+ (file_out->f_flags & O_APPEND) ||
+ !file_in->f_op || !file_in->f_op->copy_range)
+ return -EINVAL;
+
+ inode_in = file_inode(file_in);
+ inode_out = file_inode(file_out);
+
+ /* make sure offsets don't wrap and the input is inside i_size */
+ if (pos_in + count < pos_in || pos_out + count < pos_out ||
+ pos_in + count > i_size_read(inode_in))
+ return -EINVAL;
+
+ /* XXX do we want this test? btrfs_ioctl_clone_range() */
+ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+ return -EISDIR;
+
+ if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+ return -EINVAL;
+
+ if (inode_in->i_sb != inode_out->i_sb ||
+ file_in->f_path.mnt != file_out->f_path.mnt)
+ return -EXDEV;
+
+ /* forbid ranges in the same file for now */
+ if (inode_in == inode_out)
+ return -EINVAL;
+
+ ret = mnt_want_write_file(file_out);
+ if (ret)
+ return ret;
+
+ ret = file_in->f_op->copy_range(file_in, pos_in, file_out, pos_out,
+ count);
+ if (ret > 0) {
+ fsnotify_access(file_in);
+ add_rchar(current, ret);
+ fsnotify_modify(file_out);
+ add_wchar(current, ret);
+ }
+ inc_syscr(current);
+ inc_syscw(current);
+
+ mnt_drop_write_file(file_out);
+
+ return ret;
+}
+EXPORT_SYMBOL(vfs_copy_range);
+
+SYSCALL_DEFINE5(copy_range, int, fd_in, loff_t __user *, upos_in,
+ int, fd_out, loff_t __user *, upos_out, size_t, count)
+{
+ loff_t pos_in;
+ loff_t pos_out;
+ struct fd f_in;
+ struct fd f_out;
+ ssize_t ret;
+
+ if (get_user(pos_in, upos_in) || get_user(pos_out, upos_out))
+ return -EFAULT;
+
+ f_in = fdget(fd_in);
+ f_out = fdget(fd_out);
+
+ if (f_in.file && f_out.file)
+ ret = vfs_copy_range(f_in.file, pos_in, f_out.file, pos_out,
+ count);
+ else
+ ret = -EBADF;
+
+ fdput(f_in);
+ fdput(f_out);
+
+ return ret;
+}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 43db02e..6214893 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1543,6 +1543,7 @@ struct file_operations {
long (*fallocate)(struct file *file, int mode, loff_t offset,
loff_t len);
int (*show_fdinfo)(struct seq_file *m, struct file *f);
+ ssize_t (*copy_range)(struct file *, loff_t, struct file *, loff_t, size_t);
};
struct inode_operations {
@@ -1588,6 +1589,8 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
unsigned long, loff_t *);
extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
unsigned long, loff_t *);
+extern ssize_t vfs_copy_range(struct file *, loff_t , struct file *, loff_t,
+ size_t);
struct super_operations {
struct inode *(*alloc_inode)(struct super_block *sb);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 0cc74c4..3935d1c 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -692,9 +692,11 @@ __SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
__SYSCALL(__NR_kcmp, sys_kcmp)
#define __NR_finit_module 273
__SYSCALL(__NR_finit_module, sys_finit_module)
+#define __NR_copy_range 274
+__SYSCALL(__NR_copy_range, sys_copy_range)
#undef __NR_syscalls
-#define __NR_syscalls 274
+#define __NR_syscalls 275
/*
* All syscalls below here should go away really,
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 7078052..af7808a 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -151,6 +151,7 @@ cond_syscall(sys_process_vm_readv);
cond_syscall(sys_process_vm_writev);
cond_syscall(compat_sys_process_vm_readv);
cond_syscall(compat_sys_process_vm_writev);
+cond_syscall(sys_copy_range);
/* arch-specific weak syscall entries */
cond_syscall(sys_pciconfig_read);
--
1.7.11.7
^ permalink raw reply related [flat|nested] 16+ messages in thread* [RFC v0 3/4] btrfs: add .copy_range file operation
2013-05-14 21:15 [RFC v0 0/4] sys_copy_range() rough draft Zach Brown
2013-05-14 21:15 ` [RFC v0 1/4] vfs: add copy_range syscall and vfs entry point Zach Brown
2013-05-14 21:15 ` [RFC v0 2/4] x86: add sys_copy_range to syscall tables Zach Brown
@ 2013-05-14 21:15 ` Zach Brown
2013-05-14 21:15 ` [RFC v0 4/4] nfs, nfsd: rough sys_copy_range and COPY support Zach Brown
2013-05-14 21:42 ` [RFC v0 0/4] sys_copy_range() rough draft Dave Chinner
4 siblings, 0 replies; 16+ messages in thread
From: Zach Brown @ 2013-05-14 21:15 UTC (permalink / raw)
To: Martin K. Petersen, Trond Myklebust, linux-kernel, linux-fsdevel,
linux-btrfs, linux-nfs
This rearranges the existing COPY_RANGE ioctl implementation so that the
.copy_range file operation can call the core loop that copies file data
extent items.
The extent copying loop is lifted up into its own function. It retains
the core btrfs error checks that should be shared between the
CLONE_RANGE ioctl and copy_range syscall.
Signed-off-by: Zach Brown <zab@redhat.com>
---
fs/btrfs/ctree.h | 3 ++
fs/btrfs/file.c | 1 +
fs/btrfs/ioctl.c | 122 +++++++++++++++++++++++++++++++++----------------------
3 files changed, 77 insertions(+), 49 deletions(-)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 63c328a..bf9555c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3612,6 +3612,9 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
struct page **pages, size_t num_pages,
loff_t pos, size_t write_bytes,
struct extent_state **cached);
+ssize_t btrfs_copy_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t count);
/* tree-defrag.c */
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 4205ba7..d75cc07 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2460,6 +2460,7 @@ const struct file_operations btrfs_file_operations = {
#ifdef CONFIG_COMPAT
.compat_ioctl = btrfs_ioctl,
#endif
+ .copy_range = btrfs_copy_range,
};
void btrfs_auto_defrag_exit(void)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0de4a2f..ac035d8 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2463,13 +2463,10 @@ out:
return ret;
}
-static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
- u64 off, u64 olen, u64 destoff)
+static noinline int btrfs_clone_extents(struct inode *inode, struct inode *src,
+ u64 off, u64 olen, u64 destoff)
{
- struct inode *inode = file_inode(file);
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct fd src_file;
- struct inode *src;
struct btrfs_trans_handle *trans;
struct btrfs_path *path;
struct extent_buffer *leaf;
@@ -2491,59 +2488,22 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
* they don't overlap)?
*/
- /* the destination must be opened for writing */
- if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
- return -EINVAL;
-
if (btrfs_root_readonly(root))
return -EROFS;
- ret = mnt_want_write_file(file);
- if (ret)
- return ret;
-
- src_file = fdget(srcfd);
- if (!src_file.file) {
- ret = -EBADF;
- goto out_drop_write;
- }
-
- ret = -EXDEV;
- if (src_file.file->f_path.mnt != file->f_path.mnt)
- goto out_fput;
-
- src = file_inode(src_file.file);
-
- ret = -EINVAL;
- if (src == inode)
- goto out_fput;
-
- /* the src must be open for reading */
- if (!(src_file.file->f_mode & FMODE_READ))
- goto out_fput;
-
/* don't make the dst file partly checksummed */
if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
- goto out_fput;
-
- ret = -EISDIR;
- if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
- goto out_fput;
-
- ret = -EXDEV;
- if (src->i_sb != inode->i_sb)
- goto out_fput;
+ return -EINVAL;
- ret = -ENOMEM;
buf = vmalloc(btrfs_level_size(root, 0));
if (!buf)
- goto out_fput;
+ return -ENOMEM;
path = btrfs_alloc_path();
if (!path) {
vfree(buf);
- goto out_fput;
+ return -ENOMEM;
}
path->reada = 2;
@@ -2555,10 +2515,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
}
- /* determine range to clone */
- ret = -EINVAL;
- if (off + len > src->i_size || off + len < off)
- goto out_unlock;
+ /* CLONE_RANGE can have len == 0, copy_range won't */
if (len == 0)
olen = len = src->i_size - off;
/* if we extend to eof, continue to block boundary */
@@ -2566,6 +2523,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
len = ALIGN(src->i_size, bs) - off;
/* verify the end result is block aligned */
+ ret = -EINVAL;
if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
!IS_ALIGNED(destoff, bs))
goto out_unlock;
@@ -2849,6 +2807,72 @@ out_unlock:
mutex_unlock(&inode->i_mutex);
vfree(buf);
btrfs_free_path(path);
+ return ret;
+}
+
+ssize_t btrfs_copy_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t count)
+{
+ ssize_t ret;
+
+ ret = btrfs_clone_extents(file_inode(file_out), file_inode(file_in),
+ pos_in, count, pos_out);
+ if (ret == 0)
+ ret = count;
+ return ret;
+}
+
+static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
+ u64 off, u64 len, u64 destoff)
+{
+ struct inode *inode = file_inode(file);
+ struct fd src_file;
+ struct inode *src;
+ long ret;
+
+ /* the destination must be opened for writing */
+ if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
+ return -EINVAL;
+
+ ret = mnt_want_write_file(file);
+ if (ret)
+ return ret;
+
+ src_file = fdget(srcfd);
+ if (!src_file.file) {
+ ret = -EBADF;
+ goto out_drop_write;
+ }
+
+ ret = -EXDEV;
+ if (src_file.file->f_path.mnt != file->f_path.mnt)
+ goto out_fput;
+
+ src = file_inode(src_file.file);
+
+ ret = -EINVAL;
+ if (src == inode)
+ goto out_fput;
+
+ /* the src must be open for reading */
+ if (!(src_file.file->f_mode & FMODE_READ))
+ goto out_fput;
+
+ ret = -EISDIR;
+ if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
+ goto out_fput;
+
+ ret = -EXDEV;
+ if (src->i_sb != inode->i_sb)
+ goto out_fput;
+
+ ret = -EINVAL;
+ if (off + len > src->i_size || off + len < off)
+ goto out_fput;
+
+ ret = btrfs_clone_extents(inode, src, off, len, destoff);
+
out_fput:
fdput(src_file);
out_drop_write:
--
1.7.11.7
^ permalink raw reply related [flat|nested] 16+ messages in thread* [RFC v0 4/4] nfs, nfsd: rough sys_copy_range and COPY support
2013-05-14 21:15 [RFC v0 0/4] sys_copy_range() rough draft Zach Brown
` (2 preceding siblings ...)
2013-05-14 21:15 ` [RFC v0 3/4] btrfs: add .copy_range file operation Zach Brown
@ 2013-05-14 21:15 ` Zach Brown
2013-05-15 20:19 ` J. Bruce Fields
2013-05-14 21:42 ` [RFC v0 0/4] sys_copy_range() rough draft Dave Chinner
4 siblings, 1 reply; 16+ messages in thread
From: Zach Brown @ 2013-05-14 21:15 UTC (permalink / raw)
To: Martin K. Petersen, Trond Myklebust, linux-kernel, linux-fsdevel,
linux-btrfs, linux-nfs
This crude patch illustrates the simplest plumbing involved in
supporting sys_call_range with the NFS COPY operation that's pending in
the 4.2 draft spec.
The patch is based on a previous prototype that used the COPY op to
implement sys_copyfileat which created a new file (based on the ocfs2
reflink ioctl). By contrast, this copies file contents between existing
files.
There's still a lot of implementation and testing to do, but this can
get discussion going.
---
fs/nfs/file.c | 25 +++++++++
fs/nfs/nfs4proc.c | 72 ++++++++++++++++++++++++++
fs/nfs/nfs4xdr.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++
fs/nfsd/nfs4proc.c | 35 +++++++++++++
fs/nfsd/nfs4xdr.c | 43 ++++++++++++++++
fs/nfsd/vfs.c | 41 +++++++++++++++
fs/nfsd/vfs.h | 3 ++
fs/nfsd/xdr4.h | 21 ++++++++
include/linux/nfs4.h | 6 ++-
include/linux/nfs_xdr.h | 24 +++++++++
10 files changed, 401 insertions(+), 1 deletion(-)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index a87a44f..7d7bedf 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -917,6 +917,30 @@ int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
}
EXPORT_SYMBOL_GPL(nfs_setlease);
+ssize_t nfs_copy_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t count)
+{
+ struct dentry *dentry_in = file_in->f_path.dentry;
+ struct dentry *dentry_out = file_out->f_path.dentry;
+ struct inode *inode_in = dentry_in->d_inode;
+ struct inode *inode_out = dentry_out->d_inode;
+ loff_t ret;
+
+ dprintk("NFS copy_range(%s/%s@%llu, %s/%s@%llu, %zd)\n",
+ dentry_in->d_parent->d_name.name, dentry_in->d_name.name,
+ (unsigned long long)pos_in,
+ dentry_out->d_parent->d_name.name, dentry_out->d_name.name,
+ (unsigned long long)pos_out, count);
+
+ if (NFS_PROTO(inode_in)->copy == NULL)
+ ret = -EOPNOTSUPP;
+ else
+ ret = NFS_PROTO(inode_in)->copy(inode_in, inode_out, NULL,
+ 0, count, pos_in, pos_out);
+ return ret;
+}
+
const struct file_operations nfs_file_operations = {
.llseek = nfs_file_llseek,
.read = do_sync_read,
@@ -934,5 +958,6 @@ const struct file_operations nfs_file_operations = {
.splice_write = nfs_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = nfs_setlease,
+ .copy_range = nfs_copy_range,
};
EXPORT_SYMBOL_GPL(nfs_file_operations);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8fbc100..1586b3e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5405,6 +5405,75 @@ int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name,
}
#ifdef CONFIG_NFS_V4_1
+static loff_t _nfs4_proc_copy(struct inode *inode,
+ struct inode *dir,
+ struct qstr *name,
+ int flags,
+ loff_t nbyte,
+ loff_t src_offset,
+ loff_t dst_offset)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ int status;
+ struct nfs_copy_args arg = {
+ .fh = NFS_FH(inode),
+ .dir_fh = NFS_FH(dir),
+ .src_offset = src_offset,
+ .dst_offset = dst_offset,
+ .count = nbyte,
+ .flags = flags,
+ .destination = name,
+ .bitmask = server->attr_bitmask,
+ };
+ struct nfs_copy_res res = {
+ .fh = NFS_FH(inode),
+ .callback_id_length = 0,
+ .callback_id = 0,
+ .bytes_copied = 0,
+ .server = server,
+ };
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY],
+ .rpc_argp = &arg,
+ .rpc_resp = &res,
+ };
+
+ res.fattr = nfs_alloc_fattr();
+ if (res.fattr == NULL)
+ return -ENOMEM;
+
+ status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
+ &res.seq_res, 1);
+ if (res.bytes_copied)
+ status = res.bytes_copied;
+
+ nfs_free_fattr(res.fattr);
+ return status;
+}
+
+static loff_t nfs4_proc_copy(struct inode *inode,
+ struct inode *dir,
+ struct qstr *name,
+ int flags,
+ loff_t nbyte,
+ loff_t src_offset,
+ loff_t dst_offset)
+{
+ struct nfs4_exception exception = {0, };
+ loff_t ret;
+
+ do {
+ ret = _nfs4_proc_copy(inode, dir, name, flags, nbyte,
+ src_offset, dst_offset);
+ if (ret < 0)
+ ret = nfs4_handle_exception(NFS_SERVER(inode), ret,
+ &exception);
+ } while (exception.retry);
+
+ return ret;
+}
+
+
/*
* Check the exchange flags returned by the server for invalid flags, having
* both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or
@@ -7097,6 +7166,9 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.free_client = nfs4_free_client,
.create_server = nfs4_create_server,
.clone_server = nfs_clone_server,
+#ifdef CONFIG_NFS_V4_1
+ .copy = nfs4_proc_copy,
+#endif
};
static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4be8d13..28598b0 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -397,6 +397,8 @@ static int nfs4_stat_to_errno(int);
#define encode_free_stateid_maxsz (op_encode_hdr_maxsz + 1 + \
XDR_QUADLEN(NFS4_STATEID_SIZE))
#define decode_free_stateid_maxsz (op_decode_hdr_maxsz + 1)
+#define encode_copy_maxsz (op_encode_hdr_maxsz + 8 + nfs4_name_maxsz)
+#define decode_copy_maxsz (op_decode_hdr_maxsz + 1 + decode_stateid_maxsz)
#else /* CONFIG_NFS_V4_1 */
#define encode_sequence_maxsz 0
#define decode_sequence_maxsz 0
@@ -840,6 +842,22 @@ static int nfs4_stat_to_errno(int);
#define NFS4_dec_free_stateid_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_free_stateid_maxsz)
+#define NFS4_enc_copy_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_savefh_maxsz + \
+ encode_putfh_maxsz + \
+ encode_copy_maxsz + \
+ encode_getfh_maxsz + \
+ encode_getattr_maxsz)
+#define NFS4_dec_copy_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_savefh_maxsz + \
+ decode_putfh_maxsz + \
+ decode_copy_maxsz + \
+ decode_getfh_maxsz + \
+ decode_getattr_maxsz)
const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
compound_encode_hdr_maxsz +
@@ -1817,6 +1835,23 @@ static void encode_reclaim_complete(struct xdr_stream *xdr,
encode_op_hdr(xdr, OP_RECLAIM_COMPLETE, decode_reclaim_complete_maxsz, hdr);
encode_uint32(xdr, args->one_fs);
}
+
+static void encode_copy(struct xdr_stream *xdr,
+ const struct nfs_copy_args *args,
+ struct compound_hdr *hdr)
+{
+ __be32 *p;
+
+ p = reserve_space(xdr, 36 + args->destination->len);
+ *p++ = cpu_to_be32(OP_COPY);
+ p = xdr_encode_hyper(p, args->src_offset);
+ p = xdr_encode_hyper(p, args->dst_offset);
+ p = xdr_encode_hyper(p, args->count);
+ *p++ = cpu_to_be32(args->flags);
+ xdr_encode_opaque(p, args->destination->name, args->destination->len);
+ hdr->nops++;
+ hdr->replen += decode_copy_maxsz;
+}
#endif /* CONFIG_NFS_V4_1 */
static void encode_sequence(struct xdr_stream *xdr,
@@ -2761,6 +2796,30 @@ static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr,
}
/*
+ * Encode a COPY request
+ */
+static int nfs4_xdr_enc_copy(struct rpc_rqst *req, __be32 *p,
+ struct nfs_copy_args *args)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, req, &hdr);
+ encode_sequence(&xdr, &args->seq_args, &hdr);
+ encode_putfh(&xdr, args->fh, &hdr);
+ encode_savefh(&xdr, &hdr);
+ encode_putfh(&xdr, args->dir_fh, &hdr);
+ encode_copy(&xdr, args, &hdr);
+ encode_getfh(&xdr, &hdr);
+ encode_getfattr(&xdr, args->bitmask, &hdr);
+ encode_nops(&hdr);
+ return 0;
+}
+
+/*
* a GET_LEASE_TIME request
*/
static void nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req,
@@ -4688,6 +4747,41 @@ static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
return decode_change_info(xdr, cinfo);
}
+#if defined(CONFIG_NFS_V4_1)
+static int decode_copy(struct xdr_stream *xdr, struct nfs_copy_res *res)
+{
+ __be32 *p;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_COPY);
+ if (status)
+ return status;
+
+ if (status == 0) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ goto out_overflow;
+ res->callback_id_length = be32_to_cpup(p);
+ if (res->callback_id_length == 1) {
+ status = decode_stateid(xdr, res->callback_id);
+ if (unlikely(status))
+ return status;
+ } else if (res->callback_id_length != 0)
+ return -EIO;
+ } else {
+ p = xdr_inline_decode(xdr, 8);
+ if (unlikely(!p))
+ goto out_overflow;
+ p = xdr_decode_hyper(p, &res->bytes_copied);
+ }
+
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
/*
* We create the owner, so we know a proper owner.id length is 4.
*/
@@ -7047,6 +7141,43 @@ static int nfs4_xdr_dec_free_stateid(struct rpc_rqst *rqstp,
out:
return status;
}
+
+/*
+ * Decode COPY response
+ */
+static int nfs4_xdr_dec_copy(struct rpc_rqst *rqstp, __be32 *p,
+ struct nfs_copy_res *res)
+{
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_compound_hdr(&xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(&xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(&xdr);
+ if (status)
+ goto out;
+ status = decode_savefh(&xdr);
+ if (status != 0)
+ goto out;
+ status = decode_putfh(&xdr);
+ if (status != 0)
+ goto out;
+ status = decode_copy(&xdr, res);
+ if (status)
+ goto out;
+ status = decode_getfh(&xdr, res->fh);
+ if (status != 0)
+ goto out;
+ decode_getfattr(&xdr, res->fattr, res->server);
+out:
+ return status;
+}
#endif /* CONFIG_NFS_V4_1 */
/**
@@ -7257,6 +7388,7 @@ struct rpc_procinfo nfs4_procedures[] = {
PROC(BIND_CONN_TO_SESSION,
enc_bind_conn_to_session, dec_bind_conn_to_session),
PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid),
+ PROC(COPY, enc_copy, dec_copy),
#endif /* CONFIG_NFS_V4_1 */
};
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 27d74a2..2f62ebb 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -986,6 +986,37 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status;
}
+/*
+ * XXX:
+ * - do something with stateids :)
+ * - implement callback results and OFFLOAD_ABORT
+ * - inter-server copies?
+ */
+static __be32
+nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_copy *copy)
+{
+ __be32 status;
+
+ /* only support copying data to an existing file */
+ if (copy->ca_destinationlen)
+ return nfserr_inval;
+
+ if (!cstate->current_fh.fh_dentry || !cstate->save_fh.fh_dentry)
+ return nfserr_nofilehandle;
+
+ status = nfsd_copy_range(rqstp, &cstate->save_fh, copy->ca_src_offset,
+ &cstate->current_fh, copy->ca_dst_offset,
+ copy->ca_count);
+ if (status == nfs_ok)
+ copy->u.cr_bytes_copied = copy->ca_count;
+
+ /* don't support async callbacks yet */
+ copy->u.ok.cr_callback_id_length = 0;
+
+ return status;
+}
+
/* This routine never returns NFS_OK! If there are no other errors, it
* will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the
* attributes matched. VERIFY is implemented by mapping NFSERR_SAME
@@ -1798,6 +1829,10 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid,
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
+ [OP_COPY] = {
+ .op_func = (nfsd4op_func)nfsd4_copy,
+ .op_name = "OP_COPY",
+ },
};
#ifdef NFSD_DEBUG
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 6cd86e0..d2978e9 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1445,6 +1445,26 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str
}
static __be32
+nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
+{
+ DECODE_HEAD;
+
+ READ_BUF(32);
+ READ64(copy->ca_src_offset);
+ READ64(copy->ca_dst_offset);
+ READ64(copy->ca_count);
+ READ32(copy->ca_flags);
+ READ32(copy->ca_destinationlen);
+ READ_BUF(copy->ca_destinationlen);
+ SAVEMEM(copy->ca_destination, copy->ca_destinationlen);
+ if ((status = check_filename(copy->ca_destination,
+ copy->ca_destinationlen)))
+ return status;
+
+ DECODE_TAIL;
+}
+
+static __be32
nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
{
return nfs_ok;
@@ -1557,6 +1577,7 @@ static nfsd4_dec nfsd41_dec_ops[] = {
[OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid,
[OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete,
+ [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy,
};
struct nfsd4_minorversion_ops {
@@ -3394,6 +3415,27 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
}
static __be32
+nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
+ struct nfsd4_copy *copy)
+{
+ __be32 *p;
+
+ if (!nfserr) {
+ RESERVE_SPACE(4);
+ WRITE32(copy->u.ok.cr_callback_id_length);
+ ADJUST_ARGS();
+ if (copy->u.ok.cr_callback_id_length == 1)
+ nfsd4_encode_stateid(resp, copy->u.ok.cr_callback_id);
+ } else {
+ RESERVE_SPACE(8);
+ WRITE64(copy->u.cr_bytes_copied);
+ ADJUST_ARGS();
+ }
+
+ return nfserr;
+}
+
+static __be32
nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
{
return nfserr;
@@ -3465,6 +3507,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
[OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
[OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop,
[OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy,
};
/*
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 84ce601..0c1b427 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -28,6 +28,8 @@
#include <asm/uaccess.h>
#include <linux/exportfs.h>
#include <linux/writeback.h>
+#include <linux/fs_struct.h>
+#include <linux/kmod.h>
#ifdef CONFIG_NFSD_V3
#include "xdr3.h"
@@ -621,6 +623,45 @@ int nfsd4_is_junction(struct dentry *dentry)
return 0;
return 1;
}
+
+__be32
+nfsd_copy_range(struct svc_rqst *rqstp, struct svc_fh *fhp_in, u64 pos_in,
+ struct svc_fh *fhp_out, u64 pos_out, u64 count)
+{
+ struct file *filp_in = NULL;
+ struct file *filp_out = NULL;
+ int err;
+
+ /* XXX verify pos and count within sane limits? */
+
+ err = nfsd_open(rqstp, fhp_in, S_IFREG, NFSD_MAY_READ, &filp_in);
+ if (err)
+ goto out;
+
+ err = nfsd_open(rqstp, fhp_out, S_IFREG, NFSD_MAY_WRITE, &filp_out);
+ if (err)
+ goto out;
+
+ err = vfs_copy_range(filp_in, pos_in, filp_out, pos_out, count);
+ /* fall back if .copy_range isn't supported */
+
+ if (!err && EX_ISSYNC(fhp_out->fh_export))
+ err = vfs_fsync_range(filp_out, pos_out, pos_out + count-1, 0);
+
+out:
+ if (filp_in)
+ nfsd_close(filp_in);
+ if (filp_out)
+ nfsd_close(filp_out);
+
+ if (err < 0)
+ err = nfserrno(err);
+ else
+ err = 0;
+
+ return err;
+}
+
#endif /* defined(CONFIG_NFSD_V4) */
#ifdef CONFIG_NFSD_V3
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 5b58941..bbc9483 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -85,6 +85,9 @@ __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
struct svc_fh *res, struct iattr *);
__be32 nfsd_link(struct svc_rqst *, struct svc_fh *,
char *, int, struct svc_fh *);
+__be32 nfsd_copy_range(struct svc_rqst *,
+ struct svc_fh *, u64,
+ struct svc_fh *, u64, u64);
__be32 nfsd_rename(struct svc_rqst *,
struct svc_fh *, char *, int,
struct svc_fh *, char *, int);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 3b271d2..95fd1c3 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -426,6 +426,26 @@ struct nfsd4_reclaim_complete {
u32 rca_one_fs;
};
+struct nfsd4_copy {
+ /* request */
+ u64 ca_src_offset;
+ u64 ca_dst_offset;
+ u64 ca_count;
+ u32 ca_flags;
+ u32 ca_destinationlen;
+ char * ca_destination;
+
+ /* response */
+ union {
+ struct {
+ u32 cr_callback_id_length;
+ stateid_t * cr_callback_id;
+ } ok;
+ u64 cr_bytes_copied;
+ } u;
+
+};
+
struct nfsd4_op {
int opnum;
__be32 status;
@@ -471,6 +491,7 @@ struct nfsd4_op {
struct nfsd4_reclaim_complete reclaim_complete;
struct nfsd4_test_stateid test_stateid;
struct nfsd4_free_stateid free_stateid;
+ struct nfsd4_copy copy;
} u;
struct nfs4_replay * replay;
};
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 7b8fc73..6be484e 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -100,6 +100,7 @@ enum nfs_opnum4 {
OP_WANT_DELEGATION = 56,
OP_DESTROY_CLIENTID = 57,
OP_RECLAIM_COMPLETE = 58,
+ OP_COPY = 59,
OP_ILLEGAL = 10044,
};
@@ -108,7 +109,7 @@ enum nfs_opnum4 {
Needs to be updated if more operations are defined in future.*/
#define FIRST_NFS4_OP OP_ACCESS
-#define LAST_NFS4_OP OP_RECLAIM_COMPLETE
+#define LAST_NFS4_OP OP_COPY
enum nfsstat4 {
NFS4_OK = 0,
@@ -456,6 +457,9 @@ enum {
NFSPROC4_CLNT_GETDEVICELIST,
NFSPROC4_CLNT_BIND_CONN_TO_SESSION,
NFSPROC4_CLNT_DESTROY_CLIENTID,
+
+ /* nfs42 */
+ NFSPROC4_CLNT_COPY,
};
/* nfs41 types */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 104b62f..2256e31 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1184,6 +1184,28 @@ struct nfs41_free_stateid_res {
unsigned int status;
};
+struct nfs_copy_args {
+ struct nfs_fh *fh;
+ struct nfs_fh *dir_fh;
+ u32 *bitmask;
+ __u64 src_offset;
+ __u64 dst_offset;
+ __u64 count;
+ __u32 flags;
+ const struct qstr *destination;
+ struct nfs4_sequence_args seq_args;
+};
+
+struct nfs_copy_res {
+ struct nfs_fh *fh;
+ struct nfs_fattr *fattr;
+ __u32 callback_id_length;
+ nfs4_stateid *callback_id;
+ __u64 bytes_copied;
+ const struct nfs_server *server;
+ struct nfs4_sequence_res seq_res;
+};
+
#else
struct pnfs_ds_commit_info {
@@ -1433,6 +1455,8 @@ struct nfs_rpc_ops {
struct nfs_server *(*create_server)(struct nfs_mount_info *, struct nfs_subversion *);
struct nfs_server *(*clone_server)(struct nfs_server *, struct nfs_fh *,
struct nfs_fattr *, rpc_authflavor_t);
+ loff_t (*copy) (struct inode *, struct inode *, struct qstr *,
+ int, loff_t, loff_t, loff_t);
};
/*
--
1.7.11.7
^ permalink raw reply related [flat|nested] 16+ messages in thread