linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] vfs: fix copy_file_range() averts filesystem freeze protection
@ 2022-11-10 15:55 Amir Goldstein
  2022-11-11 14:53 ` Namjae Jeon
  2022-11-14 11:33 ` Luís Henriques
  0 siblings, 2 replies; 3+ messages in thread
From: Amir Goldstein @ 2022-11-10 15:55 UTC (permalink / raw)
  To: Al Viro
  Cc: Linus Torvalds, Namjae Jeon, Olga Kornievskaia, Luis Henriques,
	linux-fsdevel, linux-cifs, linux-nfs

Commit 868f9f2f8e00 ("vfs: fix copy_file_range() regression in cross-fs
copies") removed fallback to generic_copy_file_range() for cross-fs
cases inside vfs_copy_file_range().

To preserve behavior of nfsd and ksmbd server-side-copy, the fallback to
generic_copy_file_range() was added in nfsd and ksmbd code, but that
call is missing sb_start_write(), fsnotify hooks and more.

Ideally, nfsd and ksmbd would pass a flag to vfs_copy_file_range() that
will take care of the fallback, but that code would be subtle and we got
vfs_copy_file_range() logic wrong too many times already.

Instead, add a flag to explicitly request vfs_copy_file_range() to
perform only generic_copy_file_range() and let nfsd and ksmbd use this
flag only in the fallback path.

This choise keeps the logic changes to minimum in the non-nfsd/ksmbd code
paths to reduce the risk of further regressions.

Fixes: 868f9f2f8e00 ("vfs: fix copy_file_range() regression in cross-fs copies")
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---

Hi Al,

Another fix for the long tradition of copy_file_range() regressions.
This one only affected cross-fs server-side-copy from nfsd/ksmbd.

I ran the copy_range fstests group on ext4/xfs/overlay to verify no
regressions in local fs and nfsv3/nfsv4 to test server-side-copy.

I also patched copy_file_range() to test the nfsd fallback code on
local fs.

Namje, could you please test ksmbd.

Thanks,
Amir.

 fs/ksmbd/vfs.c     |  6 +++---
 fs/nfsd/vfs.c      |  4 ++--
 fs/read_write.c    | 19 +++++++++++++++----
 include/linux/fs.h |  8 ++++++++
 4 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
index 8de970d6146f..94b8ed4ef870 100644
--- a/fs/ksmbd/vfs.c
+++ b/fs/ksmbd/vfs.c
@@ -1794,9 +1794,9 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work,
 		ret = vfs_copy_file_range(src_fp->filp, src_off,
 					  dst_fp->filp, dst_off, len, 0);
 		if (ret == -EOPNOTSUPP || ret == -EXDEV)
-			ret = generic_copy_file_range(src_fp->filp, src_off,
-						      dst_fp->filp, dst_off,
-						      len, 0);
+			ret = vfs_copy_file_range(src_fp->filp, src_off,
+						  dst_fp->filp, dst_off, len,
+						  COPY_FILE_SPLICE);
 		if (ret < 0)
 			return ret;
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index f650afedd67f..5cf11cde51f8 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -596,8 +596,8 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
 	ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
 
 	if (ret == -EOPNOTSUPP || ret == -EXDEV)
-		ret = generic_copy_file_range(src, src_pos, dst, dst_pos,
-					      count, 0);
+		ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count,
+					  COPY_FILE_SPLICE);
 	return ret;
 }
 
diff --git a/fs/read_write.c b/fs/read_write.c
index 328ce8cf9a85..24b9668d6377 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1388,6 +1388,8 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
 				struct file *file_out, loff_t pos_out,
 				size_t len, unsigned int flags)
 {
+	lockdep_assert(sb_write_started(file_inode(file_out)->i_sb));
+
 	return do_splice_direct(file_in, &pos_in, file_out, &pos_out,
 				len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
 }
@@ -1424,7 +1426,9 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
 	 * and several different sets of file_operations, but they all end up
 	 * using the same ->copy_file_range() function pointer.
 	 */
-	if (file_out->f_op->copy_file_range) {
+	if (flags & COPY_FILE_SPLICE) {
+		/* cross sb splice is allowed */
+	} else if (file_out->f_op->copy_file_range) {
 		if (file_in->f_op->copy_file_range !=
 		    file_out->f_op->copy_file_range)
 			return -EXDEV;
@@ -1474,8 +1478,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 			    size_t len, unsigned int flags)
 {
 	ssize_t ret;
+	bool splice = flags & COPY_FILE_SPLICE;
 
-	if (flags != 0)
+	if (flags & ~COPY_FILE_SPLICE)
 		return -EINVAL;
 
 	ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
@@ -1501,14 +1506,14 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 	 * same sb using clone, but for filesystems where both clone and copy
 	 * are supported (e.g. nfs,cifs), we only call the copy method.
 	 */
-	if (file_out->f_op->copy_file_range) {
+	if (!splice && file_out->f_op->copy_file_range) {
 		ret = file_out->f_op->copy_file_range(file_in, pos_in,
 						      file_out, pos_out,
 						      len, flags);
 		goto done;
 	}
 
-	if (file_in->f_op->remap_file_range &&
+	if (!splice && file_in->f_op->remap_file_range &&
 	    file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
 		ret = file_in->f_op->remap_file_range(file_in, pos_in,
 				file_out, pos_out,
@@ -1528,6 +1533,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 	 * consistent story about which filesystems support copy_file_range()
 	 * and which filesystems do not, that will allow userspace tools to
 	 * make consistent desicions w.r.t using copy_file_range().
+	 *
+	 * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE.
 	 */
 	ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
 				      flags);
@@ -1582,6 +1589,10 @@ SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
 		pos_out = f_out.file->f_pos;
 	}
 
+	ret = -EINVAL;
+	if (flags != 0)
+		goto out;
+
 	ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
 				  flags);
 	if (ret > 0) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e654435f1651..59ae95ddb679 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2089,6 +2089,14 @@ struct dir_context {
  */
 #define REMAP_FILE_ADVISORY		(REMAP_FILE_CAN_SHORTEN)
 
+/*
+ * These flags control the behavior of vfs_copy_file_range().
+ * They are not available to the user via syscall.
+ *
+ * COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops
+ */
+#define COPY_FILE_SPLICE		(1 << 0)
+
 struct iov_iter;
 struct io_uring_cmd;
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] vfs: fix copy_file_range() averts filesystem freeze protection
  2022-11-10 15:55 [PATCH] vfs: fix copy_file_range() averts filesystem freeze protection Amir Goldstein
@ 2022-11-11 14:53 ` Namjae Jeon
  2022-11-14 11:33 ` Luís Henriques
  1 sibling, 0 replies; 3+ messages in thread
From: Namjae Jeon @ 2022-11-11 14:53 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: Al Viro, Linus Torvalds, Olga Kornievskaia, Luis Henriques,
	linux-fsdevel, linux-cifs, linux-nfs

2022-11-11 0:55 GMT+09:00, Amir Goldstein <amir73il@gmail.com>:
> Commit 868f9f2f8e00 ("vfs: fix copy_file_range() regression in cross-fs
> copies") removed fallback to generic_copy_file_range() for cross-fs
> cases inside vfs_copy_file_range().
>
> To preserve behavior of nfsd and ksmbd server-side-copy, the fallback to
> generic_copy_file_range() was added in nfsd and ksmbd code, but that
> call is missing sb_start_write(), fsnotify hooks and more.
>
> Ideally, nfsd and ksmbd would pass a flag to vfs_copy_file_range() that
> will take care of the fallback, but that code would be subtle and we got
> vfs_copy_file_range() logic wrong too many times already.
>
> Instead, add a flag to explicitly request vfs_copy_file_range() to
> perform only generic_copy_file_range() and let nfsd and ksmbd use this
> flag only in the fallback path.
>
> This choise keeps the logic changes to minimum in the non-nfsd/ksmbd code
> paths to reduce the risk of further regressions.
>
> Fixes: 868f9f2f8e00 ("vfs: fix copy_file_range() regression in cross-fs
> copies")
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
>
> Hi Al,
>
> Another fix for the long tradition of copy_file_range() regressions.
> This one only affected cross-fs server-side-copy from nfsd/ksmbd.
>
> I ran the copy_range fstests group on ext4/xfs/overlay to verify no
> regressions in local fs and nfsv3/nfsv4 to test server-side-copy.
>
> I also patched copy_file_range() to test the nfsd fallback code on
> local fs.
>
> Namje, could you please test ksmbd.
Works fine. You can add tested-by tag for ksmbd.
Tested-by: Namjae Jeon <linkinjeon@kernel.org>

>
> Thanks,
> Amir.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] vfs: fix copy_file_range() averts filesystem freeze protection
  2022-11-10 15:55 [PATCH] vfs: fix copy_file_range() averts filesystem freeze protection Amir Goldstein
  2022-11-11 14:53 ` Namjae Jeon
@ 2022-11-14 11:33 ` Luís Henriques
  1 sibling, 0 replies; 3+ messages in thread
From: Luís Henriques @ 2022-11-14 11:33 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: Al Viro, Linus Torvalds, Namjae Jeon, Olga Kornievskaia,
	Luis Henriques, linux-fsdevel, linux-cifs, linux-nfs

Amir Goldstein <amir73il@gmail.com> writes:

> Commit 868f9f2f8e00 ("vfs: fix copy_file_range() regression in cross-fs
> copies") removed fallback to generic_copy_file_range() for cross-fs
> cases inside vfs_copy_file_range().
>
> To preserve behavior of nfsd and ksmbd server-side-copy, the fallback to
> generic_copy_file_range() was added in nfsd and ksmbd code, but that
> call is missing sb_start_write(), fsnotify hooks and more.
>
> Ideally, nfsd and ksmbd would pass a flag to vfs_copy_file_range() that
> will take care of the fallback, but that code would be subtle and we got
> vfs_copy_file_range() logic wrong too many times already.
>
> Instead, add a flag to explicitly request vfs_copy_file_range() to
> perform only generic_copy_file_range() and let nfsd and ksmbd use this
> flag only in the fallback path.
>
> This choise keeps the logic changes to minimum in the non-nfsd/ksmbd code
> paths to reduce the risk of further regressions.
>
> Fixes: 868f9f2f8e00 ("vfs: fix copy_file_range() regression in cross-fs copies")
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
>
> Hi Al,
>
> Another fix for the long tradition of copy_file_range() regressions.
> This one only affected cross-fs server-side-copy from nfsd/ksmbd.
>
> I ran the copy_range fstests group on ext4/xfs/overlay to verify no
> regressions in local fs and nfsv3/nfsv4 to test server-side-copy.
>
> I also patched copy_file_range() to test the nfsd fallback code on
> local fs.
>
> Namje, could you please test ksmbd.

For what is worth, I've also done some testing with ceph and I didn't saw
any regression either.  So, feel free to add my

Tested-by: Luís Henriques <lhenriques@suse.de>

Cheers,
-- 
Luís

>
> Thanks,
> Amir.
>
>  fs/ksmbd/vfs.c     |  6 +++---
>  fs/nfsd/vfs.c      |  4 ++--
>  fs/read_write.c    | 19 +++++++++++++++----
>  include/linux/fs.h |  8 ++++++++
>  4 files changed, 28 insertions(+), 9 deletions(-)
>
> diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
> index 8de970d6146f..94b8ed4ef870 100644
> --- a/fs/ksmbd/vfs.c
> +++ b/fs/ksmbd/vfs.c
> @@ -1794,9 +1794,9 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work,
>  		ret = vfs_copy_file_range(src_fp->filp, src_off,
>  					  dst_fp->filp, dst_off, len, 0);
>  		if (ret == -EOPNOTSUPP || ret == -EXDEV)
> -			ret = generic_copy_file_range(src_fp->filp, src_off,
> -						      dst_fp->filp, dst_off,
> -						      len, 0);
> +			ret = vfs_copy_file_range(src_fp->filp, src_off,
> +						  dst_fp->filp, dst_off, len,
> +						  COPY_FILE_SPLICE);
>  		if (ret < 0)
>  			return ret;
>  
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index f650afedd67f..5cf11cde51f8 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -596,8 +596,8 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
>  	ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
>  
>  	if (ret == -EOPNOTSUPP || ret == -EXDEV)
> -		ret = generic_copy_file_range(src, src_pos, dst, dst_pos,
> -					      count, 0);
> +		ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count,
> +					  COPY_FILE_SPLICE);
>  	return ret;
>  }
>  
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 328ce8cf9a85..24b9668d6377 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -1388,6 +1388,8 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
>  				struct file *file_out, loff_t pos_out,
>  				size_t len, unsigned int flags)
>  {
> +	lockdep_assert(sb_write_started(file_inode(file_out)->i_sb));
> +
>  	return do_splice_direct(file_in, &pos_in, file_out, &pos_out,
>  				len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
>  }
> @@ -1424,7 +1426,9 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
>  	 * and several different sets of file_operations, but they all end up
>  	 * using the same ->copy_file_range() function pointer.
>  	 */
> -	if (file_out->f_op->copy_file_range) {
> +	if (flags & COPY_FILE_SPLICE) {
> +		/* cross sb splice is allowed */
> +	} else if (file_out->f_op->copy_file_range) {
>  		if (file_in->f_op->copy_file_range !=
>  		    file_out->f_op->copy_file_range)
>  			return -EXDEV;
> @@ -1474,8 +1478,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
>  			    size_t len, unsigned int flags)
>  {
>  	ssize_t ret;
> +	bool splice = flags & COPY_FILE_SPLICE;
>  
> -	if (flags != 0)
> +	if (flags & ~COPY_FILE_SPLICE)
>  		return -EINVAL;
>  
>  	ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len,
> @@ -1501,14 +1506,14 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
>  	 * same sb using clone, but for filesystems where both clone and copy
>  	 * are supported (e.g. nfs,cifs), we only call the copy method.
>  	 */
> -	if (file_out->f_op->copy_file_range) {
> +	if (!splice && file_out->f_op->copy_file_range) {
>  		ret = file_out->f_op->copy_file_range(file_in, pos_in,
>  						      file_out, pos_out,
>  						      len, flags);
>  		goto done;
>  	}
>  
> -	if (file_in->f_op->remap_file_range &&
> +	if (!splice && file_in->f_op->remap_file_range &&
>  	    file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
>  		ret = file_in->f_op->remap_file_range(file_in, pos_in,
>  				file_out, pos_out,
> @@ -1528,6 +1533,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
>  	 * consistent story about which filesystems support copy_file_range()
>  	 * and which filesystems do not, that will allow userspace tools to
>  	 * make consistent desicions w.r.t using copy_file_range().
> +	 *
> +	 * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE.
>  	 */
>  	ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
>  				      flags);
> @@ -1582,6 +1589,10 @@ SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
>  		pos_out = f_out.file->f_pos;
>  	}
>  
> +	ret = -EINVAL;
> +	if (flags != 0)
> +		goto out;
> +
>  	ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
>  				  flags);
>  	if (ret > 0) {
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index e654435f1651..59ae95ddb679 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -2089,6 +2089,14 @@ struct dir_context {
>   */
>  #define REMAP_FILE_ADVISORY		(REMAP_FILE_CAN_SHORTEN)
>  
> +/*
> + * These flags control the behavior of vfs_copy_file_range().
> + * They are not available to the user via syscall.
> + *
> + * COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops
> + */
> +#define COPY_FILE_SPLICE		(1 << 0)
> +
>  struct iov_iter;
>  struct io_uring_cmd;
>  
> -- 
>
> 2.25.1
>


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-11-14 11:32 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-11-10 15:55 [PATCH] vfs: fix copy_file_range() averts filesystem freeze protection Amir Goldstein
2022-11-11 14:53 ` Namjae Jeon
2022-11-14 11:33 ` Luís Henriques

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).