linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v1] xfs: allow read IO and FICLONE to run concurrently
@ 2023-10-04 20:58 Catherine Hoang
  2023-10-05  0:04 ` Darrick J. Wong
  0 siblings, 1 reply; 2+ messages in thread
From: Catherine Hoang @ 2023-10-04 20:58 UTC (permalink / raw)
  To: linux-xfs

Clone operations and read IO do not change any data in the source file, so they
should be able to run concurrently. Demote the exclusive locks taken by FICLONE
to shared locks to allow reads while cloning. While a clone is in progress,
writes will take the IOLOCK_EXCL, so they block until the clone completes.

Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
---
 fs/xfs/xfs_file.c    | 41 +++++++++++++++++++++++++++++++++++------
 fs/xfs/xfs_inode.h   |  3 +++
 fs/xfs/xfs_reflink.c | 31 +++++++++++++++++++++++++++++++
 fs/xfs/xfs_reflink.h |  2 ++
 4 files changed, 71 insertions(+), 6 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 203700278ddb..1ec987fcabb9 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -554,6 +554,15 @@ xfs_file_dio_write_aligned(
 	ret = xfs_ilock_iocb(iocb, iolock);
 	if (ret)
 		return ret;
+
+	if (xfs_iflags_test(ip, XFS_ICLONING)) {
+		xfs_iunlock(ip, iolock);
+		iolock = XFS_IOLOCK_EXCL;
+		ret = xfs_ilock_iocb(iocb, iolock);
+		if (ret)
+			return ret;
+	}
+
 	ret = xfs_file_write_checks(iocb, from, &iolock);
 	if (ret)
 		goto out_unlock;
@@ -563,7 +572,7 @@ xfs_file_dio_write_aligned(
 	 * the iolock back to shared if we had to take the exclusive lock in
 	 * xfs_file_write_checks() for other reasons.
 	 */
-	if (iolock == XFS_IOLOCK_EXCL) {
+	if (iolock == XFS_IOLOCK_EXCL && !xfs_iflags_test(ip, XFS_ICLONING)) {
 		xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
 		iolock = XFS_IOLOCK_SHARED;
 	}
@@ -622,6 +631,14 @@ xfs_file_dio_write_unaligned(
 	if (ret)
 		return ret;
 
+	if (xfs_iflags_test(ip, XFS_ICLONING) && iolock != XFS_IOLOCK_EXCL) {
+		xfs_iunlock(ip, iolock);
+		iolock = XFS_IOLOCK_EXCL;
+		ret = xfs_ilock_iocb(iocb, iolock);
+		if (ret)
+			return ret;
+	}
+
 	/*
 	 * We can't properly handle unaligned direct I/O to reflink files yet,
 	 * as we can't unshare a partial block.
@@ -1180,7 +1197,8 @@ xfs_file_remap_range(
 	if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out))
 		xfs_log_force_inode(dest);
 out_unlock:
-	xfs_iunlock2_io_mmap(src, dest);
+	xfs_reflink_unlock(src, dest);
+	xfs_iflags_clear(src, XFS_ICLONING);
 	if (ret)
 		trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
 	return remapped > 0 ? remapped : ret;
@@ -1328,6 +1346,7 @@ __xfs_filemap_fault(
 	struct inode		*inode = file_inode(vmf->vma->vm_file);
 	struct xfs_inode	*ip = XFS_I(inode);
 	vm_fault_t		ret;
+	uint                    mmaplock = XFS_MMAPLOCK_SHARED;
 
 	trace_xfs_filemap_fault(ip, order, write_fault);
 
@@ -1339,17 +1358,27 @@ __xfs_filemap_fault(
 	if (IS_DAX(inode)) {
 		pfn_t pfn;
 
-		xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+		xfs_ilock(XFS_I(inode), mmaplock);
+		if (xfs_iflags_test(ip, XFS_ICLONING)) {
+			xfs_iunlock(ip, mmaplock);
+			mmaplock = XFS_MMAPLOCK_EXCL;
+			xfs_ilock(XFS_I(inode), mmaplock);
+		}
 		ret = xfs_dax_fault(vmf, order, write_fault, &pfn);
 		if (ret & VM_FAULT_NEEDDSYNC)
 			ret = dax_finish_sync_fault(vmf, order, pfn);
-		xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+		xfs_iunlock(XFS_I(inode), mmaplock);
 	} else {
 		if (write_fault) {
-			xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+			xfs_ilock(XFS_I(inode), mmaplock);
+			if (xfs_iflags_test(ip, XFS_ICLONING)) {
+				xfs_iunlock(ip, mmaplock);
+				mmaplock = XFS_MMAPLOCK_EXCL;
+				xfs_ilock(XFS_I(inode), mmaplock);
+			}
 			ret = iomap_page_mkwrite(vmf,
 					&xfs_page_mkwrite_iomap_ops);
-			xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+			xfs_iunlock(XFS_I(inode), mmaplock);
 		} else {
 			ret = filemap_fault(vmf);
 		}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 0c5bdb91152e..e3ff059c69f7 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -347,6 +347,9 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
 /* Quotacheck is running but inode has not been added to quota counts. */
 #define XFS_IQUOTAUNCHECKED	(1 << 14)
 
+/* Clone in progress, do not allow modifications. */
+#define XFS_ICLONING (1 << 15)
+
 /* All inode state flags related to inode reclaim. */
 #define XFS_ALL_IRECLAIM_FLAGS	(XFS_IRECLAIMABLE | \
 				 XFS_IRECLAIM | \
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index eb9102453aff..645cc196ee13 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1540,6 +1540,10 @@ xfs_reflink_remap_prep(
 	if (ret)
 		goto out_unlock;
 
+	xfs_iflags_set(src, XFS_ICLONING);
+	if (inode_in != inode_out)
+		xfs_ilock_demote(src, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL);
+
 	return 0;
 out_unlock:
 	xfs_iunlock2_io_mmap(src, dest);
@@ -1718,3 +1722,30 @@ xfs_reflink_unshare(
 	trace_xfs_reflink_unshare_error(ip, error, _RET_IP_);
 	return error;
 }
+
+/* Unlock both inodes after the reflink completes. */
+void
+xfs_reflink_unlock(
+	struct xfs_inode	*ip1,
+	struct xfs_inode	*ip2)
+{
+	struct address_space	*mapping1;
+	struct address_space	*mapping2;
+
+	if (IS_DAX(VFS_I(ip1)) && IS_DAX(VFS_I(ip2))) {
+		if (ip1 != ip2)
+			xfs_iunlock(ip1, XFS_MMAPLOCK_SHARED);
+		xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
+	} else {
+		mapping1 = VFS_I(ip1)->i_mapping;
+		mapping2 = VFS_I(ip2)->i_mapping;
+		if (mapping1 && mapping1 != mapping2)
+			up_read(&mapping1->invalidate_lock);
+		if (mapping2)
+			up_write(&mapping2->invalidate_lock);
+	}
+
+	if (ip1 != ip2)
+		inode_unlock_shared(VFS_I(ip1));
+	inode_unlock(VFS_I(ip2));
+}
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 65c5dfe17ecf..89f4d2a2f52e 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -53,4 +53,6 @@ extern int xfs_reflink_remap_blocks(struct xfs_inode *src, loff_t pos_in,
 extern int xfs_reflink_update_dest(struct xfs_inode *dest, xfs_off_t newlen,
 		xfs_extlen_t cowextsize, unsigned int remap_flags);
 
+void xfs_reflink_unlock(struct xfs_inode *ip1, struct xfs_inode *ip2);
+
 #endif /* __XFS_REFLINK_H */
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH v1] xfs: allow read IO and FICLONE to run concurrently
  2023-10-04 20:58 [PATCH v1] xfs: allow read IO and FICLONE to run concurrently Catherine Hoang
@ 2023-10-05  0:04 ` Darrick J. Wong
  0 siblings, 0 replies; 2+ messages in thread
From: Darrick J. Wong @ 2023-10-05  0:04 UTC (permalink / raw)
  To: Catherine Hoang; +Cc: linux-xfs

On Wed, Oct 04, 2023 at 01:58:07PM -0700, Catherine Hoang wrote:
> Clone operations and read IO do not change any data in the source file, so they
> should be able to run concurrently. Demote the exclusive locks taken by FICLONE
> to shared locks to allow reads while cloning. While a clone is in progress,
> writes will take the IOLOCK_EXCL, so they block until the clone completes.
> 
> Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
> ---
>  fs/xfs/xfs_file.c    | 41 +++++++++++++++++++++++++++++++++++------
>  fs/xfs/xfs_inode.h   |  3 +++
>  fs/xfs/xfs_reflink.c | 31 +++++++++++++++++++++++++++++++
>  fs/xfs/xfs_reflink.h |  2 ++
>  4 files changed, 71 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 203700278ddb..1ec987fcabb9 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -554,6 +554,15 @@ xfs_file_dio_write_aligned(
>  	ret = xfs_ilock_iocb(iocb, iolock);
>  	if (ret)
>  		return ret;
> +
> +	if (xfs_iflags_test(ip, XFS_ICLONING)) {
> +		xfs_iunlock(ip, iolock);
> +		iolock = XFS_IOLOCK_EXCL;
> +		ret = xfs_ilock_iocb(iocb, iolock);
> +		if (ret)
> +			return ret;
> +	}
> +
>  	ret = xfs_file_write_checks(iocb, from, &iolock);
>  	if (ret)
>  		goto out_unlock;
> @@ -563,7 +572,7 @@ xfs_file_dio_write_aligned(
>  	 * the iolock back to shared if we had to take the exclusive lock in
>  	 * xfs_file_write_checks() for other reasons.
>  	 */
> -	if (iolock == XFS_IOLOCK_EXCL) {
> +	if (iolock == XFS_IOLOCK_EXCL && !xfs_iflags_test(ip, XFS_ICLONING)) {
>  		xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
>  		iolock = XFS_IOLOCK_SHARED;
>  	}
> @@ -622,6 +631,14 @@ xfs_file_dio_write_unaligned(
>  	if (ret)
>  		return ret;
>  
> +	if (xfs_iflags_test(ip, XFS_ICLONING) && iolock != XFS_IOLOCK_EXCL) {

xfs_iflags_test cycles a spinlock; you ought to put the cheaper
condition (the iolock check) first so that the compiler's shorcutting
can make this cheaper.

> +		xfs_iunlock(ip, iolock);
> +		iolock = XFS_IOLOCK_EXCL;
> +		ret = xfs_ilock_iocb(iocb, iolock);
> +		if (ret)
> +			return ret;
> +	}
> +
>  	/*
>  	 * We can't properly handle unaligned direct I/O to reflink files yet,
>  	 * as we can't unshare a partial block.
> @@ -1180,7 +1197,8 @@ xfs_file_remap_range(
>  	if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out))
>  		xfs_log_force_inode(dest);
>  out_unlock:
> -	xfs_iunlock2_io_mmap(src, dest);
> +	xfs_reflink_unlock(src, dest);
> +	xfs_iflags_clear(src, XFS_ICLONING);

Clear the ICLONING flag before dropping the locks so that a caller
trying to get IOLOCK_EXCL won't wake up until the flag is really gone.

>  	if (ret)
>  		trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
>  	return remapped > 0 ? remapped : ret;
> @@ -1328,6 +1346,7 @@ __xfs_filemap_fault(
>  	struct inode		*inode = file_inode(vmf->vma->vm_file);
>  	struct xfs_inode	*ip = XFS_I(inode);
>  	vm_fault_t		ret;
> +	uint                    mmaplock = XFS_MMAPLOCK_SHARED;
>  
>  	trace_xfs_filemap_fault(ip, order, write_fault);
>  
> @@ -1339,17 +1358,27 @@ __xfs_filemap_fault(
>  	if (IS_DAX(inode)) {
>  		pfn_t pfn;
>  
> -		xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> +		xfs_ilock(XFS_I(inode), mmaplock);
> +		if (xfs_iflags_test(ip, XFS_ICLONING)) {
> +			xfs_iunlock(ip, mmaplock);
> +			mmaplock = XFS_MMAPLOCK_EXCL;
> +			xfs_ilock(XFS_I(inode), mmaplock);
> +		}
>  		ret = xfs_dax_fault(vmf, order, write_fault, &pfn);
>  		if (ret & VM_FAULT_NEEDDSYNC)
>  			ret = dax_finish_sync_fault(vmf, order, pfn);
> -		xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> +		xfs_iunlock(XFS_I(inode), mmaplock);
>  	} else {
>  		if (write_fault) {
> -			xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> +			xfs_ilock(XFS_I(inode), mmaplock);
> +			if (xfs_iflags_test(ip, XFS_ICLONING)) {
> +				xfs_iunlock(ip, mmaplock);
> +				mmaplock = XFS_MMAPLOCK_EXCL;
> +				xfs_ilock(XFS_I(inode), mmaplock);
> +			}
>  			ret = iomap_page_mkwrite(vmf,
>  					&xfs_page_mkwrite_iomap_ops);
> -			xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> +			xfs_iunlock(XFS_I(inode), mmaplock);
>  		} else {
>  			ret = filemap_fault(vmf);
>  		}
> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> index 0c5bdb91152e..e3ff059c69f7 100644
> --- a/fs/xfs/xfs_inode.h
> +++ b/fs/xfs/xfs_inode.h
> @@ -347,6 +347,9 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
>  /* Quotacheck is running but inode has not been added to quota counts. */
>  #define XFS_IQUOTAUNCHECKED	(1 << 14)
>  
> +/* Clone in progress, do not allow modifications. */
> +#define XFS_ICLONING (1 << 15)

Somewhere we need to capture the locking behavior around this flag.

/*
 * Remap in progress.  Callers that wish to update file data while
 * holding a shared IOLOCK or MMAPLOCK must drop the lock and retake
 * the lock in exclusive mode.  Relocking the file will block until
 * ICLONING is cleared.
 */
#define XFS_IREMAPPING		(1U << 15)

On second thought, perhaps this should be called XFS_IREMAPPING?  Since
these semantics apply to dedupe in addition to clone.

> +
>  /* All inode state flags related to inode reclaim. */
>  #define XFS_ALL_IRECLAIM_FLAGS	(XFS_IRECLAIMABLE | \
>  				 XFS_IRECLAIM | \
> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> index eb9102453aff..645cc196ee13 100644
> --- a/fs/xfs/xfs_reflink.c
> +++ b/fs/xfs/xfs_reflink.c
> @@ -1540,6 +1540,10 @@ xfs_reflink_remap_prep(
>  	if (ret)
>  		goto out_unlock;
>  
> +	xfs_iflags_set(src, XFS_ICLONING);
> +	if (inode_in != inode_out)
> +		xfs_ilock_demote(src, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL);
> +
>  	return 0;
>  out_unlock:
>  	xfs_iunlock2_io_mmap(src, dest);
> @@ -1718,3 +1722,30 @@ xfs_reflink_unshare(
>  	trace_xfs_reflink_unshare_error(ip, error, _RET_IP_);
>  	return error;
>  }
> +
> +/* Unlock both inodes after the reflink completes. */
> +void
> +xfs_reflink_unlock(
> +	struct xfs_inode	*ip1,
> +	struct xfs_inode	*ip2)
> +{
> +	struct address_space	*mapping1;
> +	struct address_space	*mapping2;
> +
> +	if (IS_DAX(VFS_I(ip1)) && IS_DAX(VFS_I(ip2))) {
> +		if (ip1 != ip2)
> +			xfs_iunlock(ip1, XFS_MMAPLOCK_SHARED);
> +		xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
> +	} else {
> +		mapping1 = VFS_I(ip1)->i_mapping;
> +		mapping2 = VFS_I(ip2)->i_mapping;
> +		if (mapping1 && mapping1 != mapping2)
> +			up_read(&mapping1->invalidate_lock);
> +		if (mapping2)
> +			up_write(&mapping2->invalidate_lock);

XFS_MMAPLOCK is the same thing mapping->invalidate-lock; won't
the first version work for the !DAX case too?

--D

> +	}
> +
> +	if (ip1 != ip2)
> +		inode_unlock_shared(VFS_I(ip1));
> +	inode_unlock(VFS_I(ip2));
> +}
> diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
> index 65c5dfe17ecf..89f4d2a2f52e 100644
> --- a/fs/xfs/xfs_reflink.h
> +++ b/fs/xfs/xfs_reflink.h
> @@ -53,4 +53,6 @@ extern int xfs_reflink_remap_blocks(struct xfs_inode *src, loff_t pos_in,
>  extern int xfs_reflink_update_dest(struct xfs_inode *dest, xfs_off_t newlen,
>  		xfs_extlen_t cowextsize, unsigned int remap_flags);
>  
> +void xfs_reflink_unlock(struct xfs_inode *ip1, struct xfs_inode *ip2);
> +
>  #endif /* __XFS_REFLINK_H */
> -- 
> 2.34.1
> 

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-10-05  0:04 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-10-04 20:58 [PATCH v1] xfs: allow read IO and FICLONE to run concurrently Catherine Hoang
2023-10-05  0:04 ` Darrick J. Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).