linux-cifs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
To: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Cc: viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn@public.gmane.org,
	tao.peng-7I+n7zu2hftEKMMhf/gKZA@public.gmane.org,
	jeff.layton-7I+n7zu2hftEKMMhf/gKZA@public.gmane.org,
	bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-btrfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-cifs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: Re: [PATCH 2/4] vfs: pull btrfs clone API to vfs layer
Date: Sun, 6 Dec 2015 16:53:31 -0800	[thread overview]
Message-ID: <20151207005331.GA10582@birch.djwong.org> (raw)
In-Reply-To: <1449143992-7415-3-git-send-email-hch-jcswGhMUV9g@public.gmane.org>

On Thu, Dec 03, 2015 at 12:59:50PM +0100, Christoph Hellwig wrote:
> The btrfs clone ioctls are now adopted by other file systems, with NFS
> and CIFS already having support for them, and XFS being under active
> development.  To avoid growth of various slightly incompatible
> implementations, add one to the VFS.  Note that clones are different from
> file copies in several ways:
> 
>  - they are atomic vs other writers
>  - they support whole file clones
>  - they support 64-bit legth clones
>  - they do not allow partial success (aka short writes)
>  - clones are expected to be a fast metadata operation
> 
> Because of that it would be rather cumbersome to try to piggyback them on
> top of the recent clone_file_range infrastructure.  The converse isn't
> true and the clone_file_range system call could try clone file range as
> a first attempt to copy, something that further patches will enable.
> 
> Based on earlier work from Peng Tao.

<snip>

> diff --git a/fs/read_write.c b/fs/read_write.c
> index 6c1aa73..9e3dd8f 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -1451,3 +1451,75 @@ out1:
>  out2:
>  	return ret;
>  }
> +
> +static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
> +{
> +	struct inode *inode = file_inode(file);
> +
> +	if (unlikely(pos < 0))
> +		return -EINVAL;
> +
> +	 if (unlikely((loff_t) (pos + len) < 0))
> +		return -EINVAL;
> +
> +	if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
> +		loff_t end = len ? pos + len - 1 : OFFSET_MAX;
> +		int retval;
> +
> +		retval = locks_mandatory_area(file, pos, end,
> +				write ? F_WRLCK : F_RDLCK);
> +		if (retval < 0)
> +			return retval;
> +	}
> +
> +	return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
> +}
> +
> +int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
> +		struct file *file_out, loff_t pos_out, u64 len)
> +{
> +	struct inode *inode_in = file_inode(file_in);
> +	struct inode *inode_out = file_inode(file_out);
> +	int ret;
> +
> +	if (inode_in->i_sb != inode_out->i_sb ||
> +	    file_in->f_path.mnt != file_out->f_path.mnt)
> +		return -EXDEV;
> +
> +	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
> +		return -EISDIR;
> +	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
> +		return -EOPNOTSUPP;

I thought we were moving to -EINVAL for wrong file types?

Though, perhaps "I've also prepared a btrfs patch for this and clone" from the
earlier thread about generic/157 wasn't referring to /this/ patch. :)

In any case, I'm ok with EINVAL, and I haven't heard any objections to
changing -EOPNOTSUPP -> -EINVAL when trying to reflink/dedupe/whatever
non-file non-dir fds.

<shrug> Anyone object?

--D

> +
> +	if (!(file_in->f_mode & FMODE_READ) ||
> +	    !(file_out->f_mode & FMODE_WRITE) ||
> +	    (file_out->f_flags & O_APPEND) ||
> +	    !file_in->f_op->clone_file_range)
> +		return -EBADF;
> +
> +	ret = clone_verify_area(file_in, pos_in, len, false);
> +	if (ret)
> +		return ret;
> +
> +	ret = clone_verify_area(file_out, pos_out, len, true);
> +	if (ret)
> +		return ret;
> +
> +	if (pos_in + len > i_size_read(inode_in))
> +		return -EINVAL;
> +
> +	ret = mnt_want_write_file(file_out);
> +	if (ret)
> +		return ret;
> +
> +	ret = file_in->f_op->clone_file_range(file_in, pos_in,
> +			file_out, pos_out, len);
> +	if (!ret) {
> +		fsnotify_access(file_in);
> +		fsnotify_modify(file_out);
> +	}
> +
> +	mnt_drop_write_file(file_out);
> +	return ret;
> +}
> +EXPORT_SYMBOL(vfs_clone_file_range);
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index af559ac..59bf96d 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1629,7 +1629,10 @@ struct file_operations {
>  #ifndef CONFIG_MMU
>  	unsigned (*mmap_capabilities)(struct file *);
>  #endif
> -	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
> +	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
> +			loff_t, size_t, unsigned int);
> +	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
> +			u64);
>  };
>  
>  struct inode_operations {
> @@ -1683,6 +1686,8 @@ extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
>  		unsigned long, loff_t *);
>  extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
>  				   loff_t, size_t, unsigned int);
> +extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
> +		struct file *file_out, loff_t pos_out, u64 len);
>  
>  struct super_operations {
>     	struct inode *(*alloc_inode)(struct super_block *sb);
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index f15d980..cd5db7f 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -39,6 +39,13 @@
>  #define RENAME_EXCHANGE		(1 << 1)	/* Exchange source and dest */
>  #define RENAME_WHITEOUT		(1 << 2)	/* Whiteout source */
>  
> +struct file_clone_range {
> +	__s64 src_fd;
> +	__u64 src_offset;
> +	__u64 src_length;
> +	__u64 dest_offset;
> +};
> +
>  struct fstrim_range {
>  	__u64 start;
>  	__u64 len;
> @@ -159,6 +166,8 @@ struct inodes_stat_t {
>  #define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
>  #define FITHAW		_IOWR('X', 120, int)	/* Thaw */
>  #define FITRIM		_IOWR('X', 121, struct fstrim_range)	/* Trim */
> +#define FICLONE		_IOW(0x94, 9, int)
> +#define FICLONERANGE	_IOW(0x94, 13, struct file_clone_range)
>  
>  #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
>  #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
> -- 
> 1.9.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2015-12-07  0:53 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-12-03 11:59 move btrfs clone ioctls to common code V2 Christoph Hellwig
     [not found] ` <1449143992-7415-1-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2015-12-03 11:59   ` [PATCH 1/4] locks: new locks_mandatory_area calling convention Christoph Hellwig
2015-12-08  4:05     ` Al Viro
     [not found]       ` <20151208040504.GA2791-3bDd1+5oDREiFSDQTTA3OLVCufUGDwFn@public.gmane.org>
2015-12-08 14:54         ` Christoph Hellwig
     [not found]           ` <20151208145453.GA3621-jcswGhMUV9g@public.gmane.org>
2015-12-08 16:16             ` Al Viro
2015-12-03 11:59   ` [PATCH 2/4] vfs: pull btrfs clone API to vfs layer Christoph Hellwig
     [not found]     ` <1449143992-7415-3-git-send-email-hch-jcswGhMUV9g@public.gmane.org>
2015-12-07  0:53       ` Darrick J. Wong [this message]
     [not found]         ` <20151207005331.GA10582-PTl6brltDGh4DFYR7WNSRA@public.gmane.org>
2015-12-07 15:13           ` Christoph Hellwig
2015-12-07 21:09             ` Darrick J. Wong
     [not found]             ` <20151207151319.GA2472-jcswGhMUV9g@public.gmane.org>
2015-12-08  1:54               ` Darrick J. Wong
2015-12-14 16:34           ` [PATCH 5/4] vfs: return EINVAL for unsupported file types in clone Christoph Hellwig
2015-12-09 20:40       ` [PATCH 2/4] vfs: pull btrfs clone API to vfs layer Darrick J. Wong
     [not found]         ` <20151209204033.GB10582-PTl6brltDGh4DFYR7WNSRA@public.gmane.org>
2015-12-14 16:34           ` Christoph Hellwig
2015-12-14 17:08           ` Darrick J. Wong
2015-12-03 11:59 ` [PATCH 3/4] nfsd: Pass filehandle to nfs4_preprocess_stateid_op() Christoph Hellwig
2015-12-03 11:59 ` [PATCH 4/4] nfsd: implement the NFSv4.2 CLONE operation Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20151207005331.GA10582@birch.djwong.org \
    --to=darrick.wong-qhclzuegtsvqt0dzr+alfa@public.gmane.org \
    --cc=bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org \
    --cc=hch-jcswGhMUV9g@public.gmane.org \
    --cc=jeff.layton-7I+n7zu2hftEKMMhf/gKZA@public.gmane.org \
    --cc=linux-btrfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-cifs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=tao.peng-7I+n7zu2hftEKMMhf/gKZA@public.gmane.org \
    --cc=viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).